use lexer-srloc

dev-srcloc
Matthew Butterick 8 years ago
parent 6613a3e62c
commit 52874c63d5

@ -5,7 +5,7 @@
(provide (all-defined-out)) (provide (all-defined-out))
(define basic-lexer (define basic-lexer
(lexer-src-pos (lexer-srcloc
[(eof) eof] [(eof) eof]
[whitespace (token lexeme #:skip? #t)] [whitespace (token lexeme #:skip? #t)]
[(from/to "rem" "\n") (token 'REM (string-downcase lexeme))] [(from/to "rem" "\n") (token 'REM (string-downcase lexeme))]

@ -118,6 +118,16 @@
e e
(lex:position-token-end-pos a-position-token)))] (lex:position-token-end-pos a-position-token)))]
[(lex:srcloc-token t loc)
(define a-position-token (loop t))
(lex:position-token (lex:position-token-token a-position-token)
(if (no-position? (lex:position-token-start-pos a-position-token))
(lex:position (srcloc-position loc) (srcloc-line loc) (srcloc-column loc))
(lex:position-token-start-pos a-position-token))
(if (no-position? (lex:position-token-start-pos a-position-token))
(lex:position (+ (srcloc-position loc) (srcloc-span loc)) #f #f)
(lex:position-token-end-pos a-position-token)))]
[else [else
;; Otherwise, we have no idea how to treat this as a token. ;; Otherwise, we have no idea how to treat this as a token.
((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token) ((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token)

@ -35,17 +35,22 @@
;; This is intended to be a general token structure constructor that's nice ;; This is intended to be a general token structure constructor that's nice
;; to work with. ;; to work with.
;; It should cooperate with the tokenizers constructed with make-permissive-tokenizer. ;; It should cooperate with the tokenizers constructed with make-permissive-tokenizer.
(define token (define (token type ;; (U symbol string)
(lambda (type ;; (U symbol string) [val #f] ;; any
[val #f] ;; any [srcloc #f]
#:position [position #f] ;; (U #f number) #:position [position #f] ;; (U #f number)
#:line [line #f] ;; (U #f number) #:line [line #f] ;; (U #f number)
#:column [column #f] ;; (U #f number) #:column [column #f] ;; (U #f number)
#:span [span #f] ;; boolean #:span [span #f] ;; boolean
#:skip? [skip? #f]) #:skip? [skip? #f])
(token-struct (if (string? type) (string->symbol type) type) (token-struct (if (string? type) (string->symbol type) type)
val val
position line column span skip?))) ;; keyword values take precedence over srcloc values
(or position (and srcloc (srcloc-position srcloc)))
(or line (and srcloc (srcloc-line srcloc)))
(or column (and srcloc (srcloc-column srcloc)))
(or span (and srcloc (srcloc-span srcloc)))
skip?))
;; When bad things happen, we need to emit errors with source location. ;; When bad things happen, we need to emit errors with source location.
@ -62,10 +67,14 @@
in)) in))
(define token-producer (tokenize input-port)) (define token-producer (tokenize input-port))
(for/list ([token (in-producer token-producer (λ(tok) (for/list ([token (in-producer token-producer (λ(tok)
;; position-tokens are produced by lexer-src-pos (eq? eof (cond
(eq? eof (if (position-token? tok) ;; position-tokens are produced by lexer-src-pos,
(position-token-token tok) [(position-token? tok)
tok))))]) (position-token-token tok)]
;; and srcloc-tokens by lexer-srcloc
[(srcloc-token? tok)
(srcloc-token-token tok)]
[else tok]))))])
token)) token))
(provide trim-ends) (provide trim-ends)
@ -86,7 +95,7 @@
[(_ . STRS) [(_ . STRS)
(with-syntax ([(UCSTR ...) (map (compose1 string-upcase syntax->datum) (syntax->list #'STRS))] (with-syntax ([(UCSTR ...) (map (compose1 string-upcase syntax->datum) (syntax->list #'STRS))]
[(LCSTR ...) (map (compose1 string-downcase syntax->datum) (syntax->list #'STRS))]) [(LCSTR ...) (map (compose1 string-downcase syntax->datum) (syntax->list #'STRS))])
#'(union (union UCSTR ...) (union LCSTR ...)))]))) #'(union (union UCSTR ...) (union LCSTR ...)))])))
;; change names of lexer abbreviations to be consistent with Racket srcloc conventions ;; change names of lexer abbreviations to be consistent with Racket srcloc conventions

Loading…
Cancel
Save