*** empty log message ***

original commit: 012bc92ebc45f34e232e084a278f17f411bb7449
tokens
Scott Owens 22 years ago
parent 6f77b77e30
commit 9c730a5f6e

@ -34,17 +34,17 @@
[(: #\tab #\space) (calcl input-port)] [(: #\tab #\space) (calcl input-port)]
;; The parser will treat the return of 'newline the same as (token-newline) ;; The parser will treat the return of 'newline the same as (token-newline)
[#\newline 'newline] [#\newline 'newline]
[(: = + - * / ^) (string->symbol (get-lexeme))] [(: = + - * / ^) (string->symbol lexeme)]
["(" 'OP] ["(" 'OP]
[")" 'CP] [")" 'CP]
[sin (token-FNCT sin)] [sin (token-FNCT sin)]
;; It the parens are left off of an "invocation" of an abbreviation, it means the ;; It the parens are left off of an "invocation" of an abbreviation, it means the
;; character sequence instead. ;; character sequence instead.
[(+ (: (lower-letter) (upper-letter))) (token-VAR (string->symbol (get-lexeme)))] [(+ (: (lower-letter) (upper-letter))) (token-VAR (string->symbol lexeme))]
[(+ (digit)) (token-NUM (string->number (get-lexeme)))] [(+ (digit)) (token-NUM (string->number lexeme))]
;; Strings which dr/mzscheme does not think of as symbols (such as . or ,) must be ;; Strings which dr/mzscheme does not think of as symbols (such as . or ,) must be
;; entered as a string or character. "." would also be ok. ;; entered as a string or character. "." would also be ok.
[(@ (+ (digit)) #\. (* (digit))) (token-NUM (string->number (get-lexeme)))])) [(@ (+ (digit)) #\. (* (digit))) (token-NUM (string->number lexeme))]))
(define calcp (define calcp

@ -21,20 +21,20 @@
["#t" (token-DATUM #t)] ["#t" (token-DATUM #t)]
["#f" (token-DATUM #f)] ["#f" (token-DATUM #f)]
[(@ "#\\" (any)) (token-DATUM (caddr (string->list (get-lexeme))))] [(@ "#\\" (any)) (token-DATUM (caddr (string->list lexeme)))]
["#\\space" (token-DATUM #\space)] ["#\\space" (token-DATUM #\space)]
["#\\newline" (token-DATUM #\newline)] ["#\\newline" (token-DATUM #\newline)]
[(: (@ (initial) (* (subsequent))) + - "...") (token-DATUM (string->symbol (get-lexeme)))] [(: (@ (initial) (* (subsequent))) + - "...") (token-DATUM (string->symbol lexeme))]
[#\" (token-DATUM (list->string (get-string-token input-port)))] [#\" (token-DATUM (list->string (get-string-token input-port)))]
[#\( 'OP] [#\( 'OP]
[#\) 'CP] [#\) 'CP]
[#\[ 'OP] [#\[ 'OP]
[#\] 'CP] [#\] 'CP]
["#(" 'HASHOP] ["#(" 'HASHOP]
[(num2) (token-DATUM (string->number (get-lexeme) 2))] [(num2) (token-DATUM (string->number lexeme 2))]
[(num8) (token-DATUM (string->number (get-lexeme) 8))] [(num8) (token-DATUM (string->number lexeme 8))]
[(num10) (token-DATUM (string->number (get-lexeme) 10))] [(num10) (token-DATUM (string->number lexeme 10))]
[(num16) (token-DATUM (string->number (get-lexeme) 16))] [(num16) (token-DATUM (string->number lexeme 16))]
["'" 'QUOTE] ["'" 'QUOTE]
["`" 'QUASIQUOTE] ["`" 'QUASIQUOTE]
["," 'UNQUOTE] ["," 'UNQUOTE]
@ -44,7 +44,7 @@
(define get-string-token (define get-string-token
(lexer (lexer
[(^ #\" #\\) (cons (car (string->list (get-lexeme))) [(^ #\" #\\) (cons (car (string->list lexeme))
(get-string-token input-port))] (get-string-token input-port))]
[(@ #\\ #\\) (cons #\\ (get-string-token input-port))] [(@ #\\ #\\) (cons #\\ (get-string-token input-port))]
[(@ #\\ #\") (cons #\" (get-string-token input-port))] [(@ #\\ #\") (cons #\" (get-string-token input-port))]
@ -232,14 +232,13 @@
(sexp-list [() null] (sexp-list [() null]
[(sexp-list sexp) (cons $2 $1)])))) [(sexp-list sexp) (cons $2 $1)]))))
(define (rs sn ip off) (define (rs sn ip)
(port-count-lines! ip) (port-count-lines! ip)
((scheme-parser sn) (lambda () (scheme-lexer ip)))) ((scheme-parser sn) (lambda () (scheme-lexer ip))))
(define readsyntax (define readsyntax
(case-lambda ((sn) (rs sn (current-input-port) (list 0 0 0))) (case-lambda ((sn) (rs sn (current-input-port)))
((sn ip) (rs sn ip (list 0 0 0))) ((sn ip) (rs sn ip))))
((sn ip off) (rs sn ip off))))
(provide (rename readsyntax read-syntax)) (provide (rename readsyntax read-syntax))

@ -11,10 +11,10 @@
(lib "cffi.ss" "compiler")) (lib "cffi.ss" "compiler"))
(provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs (provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs
get-position position-offset position-line position-col position? position-offset position-line position-col position?
define-tokens define-empty-tokens) define-tokens define-empty-tokens)
(define-syntaxes (lexer-experiment lexer-src-pos-experiment) (define-syntaxes (lexer-exp lexer-src-pos-exp)
(let ((build-lexer (let ((build-lexer
(lambda (wrap?) (lambda (wrap?)
(lambda (stx) (lambda (stx)
@ -121,9 +121,9 @@
0 0
ip)) ip))
(let ((first-pos (get-position ip))) (let ((first-pos (get-position ip)))
(let-values (((longest-match-length longest-match-action) (let-values (((longest-match-length length longest-match-action)
(lexer ip peek-string))) (lexer ip peek-string)))
(do-match ip first-pos longest-match-length (vector-ref actions longest-match-action) wrap?))))) (do-match ip first-pos longest-match-length length (vector-ref actions longest-match-action) wrap?)))))
(define (lexer-body start-state trans-table eof-table actions no-lookahead wrap?) (define (lexer-body start-state trans-table eof-table actions no-lookahead wrap?)
(lambda (ip) (lambda (ip)
@ -158,12 +158,13 @@
(+ (char->integer (string-ref char 0)) (* 256 state))))))) (+ (char->integer (string-ref char 0)) (* 256 state)))))))
(cond (cond
((not next-state) ((not next-state)
(do-match ip first-pos longest-match-length longest-match-action wrap?)) (do-match ip first-pos longest-match-length length longest-match-action wrap?))
((vector-ref no-lookahead next-state) ((vector-ref no-lookahead next-state)
(let ((act (vector-ref actions next-state))) (let ((act (vector-ref actions next-state)))
(do-match ip (do-match ip
first-pos first-pos
(if act length longest-match-length) (if act length longest-match-length)
length
(if act act longest-match-action) (if act act longest-match-action)
wrap?))) wrap?)))
(else (else
@ -177,41 +178,46 @@
(if act (if act
length length
longest-match-length)))))))))) longest-match-length))))))))))
(define (do-match lb first-pos longest-match-length longest-match-action wrap?) (define id (lambda (x) x))
(let* ((match (read-string longest-match-length lb))
(end-pos (get-position lb))) (define (do-match lb first-pos longest-match-length length longest-match-action wrap?)
(if (not longest-match-action) (if (not longest-match-action)
(raise-read-error (let* ((match (read-string length lb))
(format "lexer: No match found in input starting with: ~a" match) (end-pos (get-position lb)))
#f (if (not longest-match-action)
(position-line first-pos) (raise-read-error
(position-col first-pos) (format "lexer: No match found in input starting with: ~a" match)
(position-offset first-pos) #f
(- (position-offset end-pos) (position-offset first-pos)))) (position-line first-pos)
(cond (position-col first-pos)
(wrap? (position-offset first-pos)
(let/ec ret (- (position-offset end-pos) (position-offset first-pos)))))
(list (longest-match-action (let* ((match (read-string longest-match-length lb))
(lambda () first-pos) (end-pos (get-position lb)))
(lambda () end-pos) (cond
(lambda () match) (wrap?
ret (let/ec ret
lb) (list (longest-match-action
first-pos first-pos
end-pos))) end-pos
(else match
(longest-match-action ret
(lambda () first-pos) lb)
(lambda () end-pos) first-pos
(lambda () match) end-pos)))
(lambda (x) x) (else
lb))))) (longest-match-action
first-pos
end-pos
match
id
lb))))))
(define-struct position (offset line col)) (define-struct position (offset line col))
(define (get-position ip) (define (get-position ip)
(let-values (((line col off) (port-next-location ip))) (let-values (((line col off) (port-next-location ip)))
(make-position off line col))) (make-position (add1 off) (if line (add1 line) #f) (if col (add1 col) #f))))
) )

Loading…
Cancel
Save