|
|
|
@ -1,4 +1,4 @@
|
|
|
|
|
#lang racket/base
|
|
|
|
|
#lang br
|
|
|
|
|
|
|
|
|
|
(require racket/match
|
|
|
|
|
racket/list
|
|
|
|
@ -45,83 +45,83 @@
|
|
|
|
|
;; FIXME: clean up code.
|
|
|
|
|
(define (make-permissive-tokenizer tokenizer token-type-hash)
|
|
|
|
|
(define tokenizer-thunk (cond
|
|
|
|
|
[(sequence? tokenizer)
|
|
|
|
|
(sequence->generator tokenizer)]
|
|
|
|
|
[(procedure? tokenizer)
|
|
|
|
|
tokenizer]))
|
|
|
|
|
[(sequence? tokenizer)
|
|
|
|
|
(sequence->generator tokenizer)]
|
|
|
|
|
[(procedure? tokenizer)
|
|
|
|
|
tokenizer]))
|
|
|
|
|
|
|
|
|
|
;; lookup: symbol any pos pos -> position-token
|
|
|
|
|
(define (lookup type val start-pos end-pos)
|
|
|
|
|
(lex:position-token
|
|
|
|
|
((hash-ref token-type-hash type
|
|
|
|
|
(lambda ()
|
|
|
|
|
((current-tokenizer-error-handler) (format "~a" type) val
|
|
|
|
|
(lex:position-offset start-pos)
|
|
|
|
|
(lex:position-line start-pos)
|
|
|
|
|
(lex:position-col start-pos)
|
|
|
|
|
(and (number? (lex:position-offset start-pos))
|
|
|
|
|
(number? (lex:position-offset end-pos))
|
|
|
|
|
(- (lex:position-offset end-pos)
|
|
|
|
|
(lex:position-offset start-pos))))))
|
|
|
|
|
(lambda ()
|
|
|
|
|
((current-tokenizer-error-handler) (format "~a" type) val
|
|
|
|
|
(lex:position-offset start-pos)
|
|
|
|
|
(lex:position-line start-pos)
|
|
|
|
|
(lex:position-col start-pos)
|
|
|
|
|
(and (number? (lex:position-offset start-pos))
|
|
|
|
|
(number? (lex:position-offset end-pos))
|
|
|
|
|
(- (lex:position-offset end-pos)
|
|
|
|
|
(lex:position-offset start-pos))))))
|
|
|
|
|
val)
|
|
|
|
|
start-pos end-pos))
|
|
|
|
|
|
|
|
|
|
(define (permissive-tokenizer)
|
|
|
|
|
(define next-token (tokenizer-thunk))
|
|
|
|
|
(let loop ([next-token next-token])
|
|
|
|
|
(match next-token
|
|
|
|
|
[(or (? eof-object?) (? void?))
|
|
|
|
|
(lookup 'EOF eof no-position no-position)]
|
|
|
|
|
|
|
|
|
|
[(? symbol?)
|
|
|
|
|
(lookup next-token next-token no-position no-position)]
|
|
|
|
|
|
|
|
|
|
[(? string?)
|
|
|
|
|
(lookup (string->symbol next-token) next-token no-position no-position)]
|
|
|
|
|
|
|
|
|
|
[(? char?)
|
|
|
|
|
(lookup (string->symbol (string next-token)) next-token no-position no-position)]
|
|
|
|
|
|
|
|
|
|
;; Compatibility
|
|
|
|
|
[(? lex:token?)
|
|
|
|
|
(loop (token (lex:token-name next-token)
|
|
|
|
|
(lex:token-value next-token)))]
|
|
|
|
|
|
|
|
|
|
[(token-struct type val offset line column span skip?)
|
|
|
|
|
(cond [skip?
|
|
|
|
|
;; skip whitespace, and just tokenize again.
|
|
|
|
|
(permissive-tokenizer)]
|
|
|
|
|
|
|
|
|
|
[(hash-has-key? token-type-hash type)
|
|
|
|
|
(define start-pos (lex:position offset line column))
|
|
|
|
|
;; try to synthesize a consistent end position.
|
|
|
|
|
(define end-pos (lex:position (if (and (number? offset) (number? span))
|
|
|
|
|
(+ offset span)
|
|
|
|
|
offset)
|
|
|
|
|
line
|
|
|
|
|
(if (and (number? column) (number? span))
|
|
|
|
|
(+ column span)
|
|
|
|
|
column)))
|
|
|
|
|
(lookup type val start-pos end-pos)]
|
|
|
|
|
[else
|
|
|
|
|
;; We ran into a token of unrecognized type. Let's raise an appropriate error.
|
|
|
|
|
((current-tokenizer-error-handler) type val
|
|
|
|
|
offset line column span)])]
|
|
|
|
|
|
|
|
|
|
[(lex:position-token t s e)
|
|
|
|
|
(define a-position-token (loop t))
|
|
|
|
|
(lex:position-token (lex:position-token-token a-position-token)
|
|
|
|
|
(if (no-position? (lex:position-token-start-pos a-position-token))
|
|
|
|
|
s
|
|
|
|
|
(lex:position-token-start-pos a-position-token))
|
|
|
|
|
(if (no-position? (lex:position-token-end-pos a-position-token))
|
|
|
|
|
e
|
|
|
|
|
(lex:position-token-end-pos a-position-token)))]
|
|
|
|
|
|
|
|
|
|
[else
|
|
|
|
|
;; Otherwise, we have no idea how to treat this as a token.
|
|
|
|
|
((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token)
|
|
|
|
|
#f #f #f #f)])))
|
|
|
|
|
(match next-token
|
|
|
|
|
[(or (? eof-object?) (? void?))
|
|
|
|
|
(lookup 'EOF eof no-position no-position)]
|
|
|
|
|
|
|
|
|
|
[(? symbol?)
|
|
|
|
|
(lookup next-token next-token no-position no-position)]
|
|
|
|
|
|
|
|
|
|
[(? string?)
|
|
|
|
|
(lookup (string->symbol next-token) next-token no-position no-position)]
|
|
|
|
|
|
|
|
|
|
[(? char?)
|
|
|
|
|
(lookup (string->symbol (string next-token)) next-token no-position no-position)]
|
|
|
|
|
|
|
|
|
|
;; Compatibility
|
|
|
|
|
[(? lex:token?)
|
|
|
|
|
(loop (token (lex:token-name next-token)
|
|
|
|
|
(lex:token-value next-token)))]
|
|
|
|
|
|
|
|
|
|
[(token-struct type val offset line column span skip?)
|
|
|
|
|
(cond [skip?
|
|
|
|
|
;; skip whitespace, and just tokenize again.
|
|
|
|
|
(permissive-tokenizer)]
|
|
|
|
|
|
|
|
|
|
[(hash-has-key? token-type-hash type)
|
|
|
|
|
(define start-pos (lex:position offset line column))
|
|
|
|
|
;; try to synthesize a consistent end position.
|
|
|
|
|
(define end-pos (lex:position (if (and (number? offset) (number? span))
|
|
|
|
|
(+ offset span)
|
|
|
|
|
offset)
|
|
|
|
|
line
|
|
|
|
|
(if (and (number? column) (number? span))
|
|
|
|
|
(+ column span)
|
|
|
|
|
column)))
|
|
|
|
|
(lookup type val start-pos end-pos)]
|
|
|
|
|
[else
|
|
|
|
|
;; We ran into a token of unrecognized type. Let's raise an appropriate error.
|
|
|
|
|
((current-tokenizer-error-handler) type val
|
|
|
|
|
offset line column span)])]
|
|
|
|
|
|
|
|
|
|
[(lex:position-token t s e)
|
|
|
|
|
(define a-position-token (loop t))
|
|
|
|
|
(lex:position-token (lex:position-token-token a-position-token)
|
|
|
|
|
(if (no-position? (lex:position-token-start-pos a-position-token))
|
|
|
|
|
s
|
|
|
|
|
(lex:position-token-start-pos a-position-token))
|
|
|
|
|
(if (no-position? (lex:position-token-end-pos a-position-token))
|
|
|
|
|
e
|
|
|
|
|
(lex:position-token-end-pos a-position-token)))]
|
|
|
|
|
|
|
|
|
|
[else
|
|
|
|
|
;; Otherwise, we have no idea how to treat this as a token.
|
|
|
|
|
((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token)
|
|
|
|
|
#f #f #f #f)])))
|
|
|
|
|
permissive-tokenizer)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -140,19 +140,25 @@
|
|
|
|
|
(lex:position-offset start-pos))
|
|
|
|
|
#f)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#|
|
|
|
|
|
MB: the next three functions control the parse tree output.
|
|
|
|
|
This would be the place to check a syntax property for hiding.
|
|
|
|
|
|#
|
|
|
|
|
;; We create a syntax using read-syntax; by definition, it should have the
|
|
|
|
|
;; original? property set to #t, which we then copy over to syntaxes constructed
|
|
|
|
|
;; with atomic-datum->syntax and rule-components->syntax.
|
|
|
|
|
(define stx-with-original?-property
|
|
|
|
|
(read-syntax #f (open-input-string "original")))
|
|
|
|
|
(read-syntax #f (open-input-string "meaningless-string")))
|
|
|
|
|
|
|
|
|
|
(define elided (gensym))
|
|
|
|
|
|
|
|
|
|
;; atomic-datum->syntax: datum position position
|
|
|
|
|
;; Helper that does the ugly work in wrapping a datum into a syntax
|
|
|
|
|
;; with source location.
|
|
|
|
|
(define (atomic-datum->syntax d start-pos end-pos)
|
|
|
|
|
(datum->syntax #f d (positions->srcloc start-pos end-pos) stx-with-original?-property))
|
|
|
|
|
(define (atomic-datum->syntax d start-pos end-pos [hide? #f])
|
|
|
|
|
(if hide?
|
|
|
|
|
elided
|
|
|
|
|
(datum->syntax #f d (positions->srcloc start-pos end-pos) stx-with-original?-property)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -160,11 +166,10 @@
|
|
|
|
|
;; Creates an stx out of the rule name and its components.
|
|
|
|
|
;; The location information of the rule spans that of its components.
|
|
|
|
|
(define (rule-components->syntax rule-name/false #:srcloc [srcloc #f] . components)
|
|
|
|
|
(define flattened-components (apply append components))
|
|
|
|
|
(define flattened-elided-components (filter-not (λ(c) (eq? c elided)) (apply append components)))
|
|
|
|
|
(datum->syntax #f
|
|
|
|
|
(apply append
|
|
|
|
|
(list
|
|
|
|
|
(datum->syntax #f rule-name/false srcloc stx-with-original?-property))
|
|
|
|
|
components)
|
|
|
|
|
(cons
|
|
|
|
|
(datum->syntax #f rule-name/false srcloc stx-with-original?-property)
|
|
|
|
|
flattened-elided-components)
|
|
|
|
|
srcloc
|
|
|
|
|
stx-with-original?-property))
|
|
|
|
|