*** empty log message ***

original commit: 012bc92ebc45f34e232e084a278f17f411bb7449
tokens
Scott Owens 22 years ago
parent 6f77b77e30
commit 9c730a5f6e

@ -34,17 +34,17 @@
[(: #\tab #\space) (calcl input-port)]
;; The parser will treat the return of 'newline the same as (token-newline)
[#\newline 'newline]
[(: = + - * / ^) (string->symbol (get-lexeme))]
[(: = + - * / ^) (string->symbol lexeme)]
["(" 'OP]
[")" 'CP]
[sin (token-FNCT sin)]
;; It the parens are left off of an "invocation" of an abbreviation, it means the
;; character sequence instead.
[(+ (: (lower-letter) (upper-letter))) (token-VAR (string->symbol (get-lexeme)))]
[(+ (digit)) (token-NUM (string->number (get-lexeme)))]
[(+ (: (lower-letter) (upper-letter))) (token-VAR (string->symbol lexeme))]
[(+ (digit)) (token-NUM (string->number lexeme))]
;; Strings which dr/mzscheme does not think of as symbols (such as . or ,) must be
;; entered as a string or character. "." would also be ok.
[(@ (+ (digit)) #\. (* (digit))) (token-NUM (string->number (get-lexeme)))]))
[(@ (+ (digit)) #\. (* (digit))) (token-NUM (string->number lexeme))]))
(define calcp

@ -21,20 +21,20 @@
["#t" (token-DATUM #t)]
["#f" (token-DATUM #f)]
[(@ "#\\" (any)) (token-DATUM (caddr (string->list (get-lexeme))))]
[(@ "#\\" (any)) (token-DATUM (caddr (string->list lexeme)))]
["#\\space" (token-DATUM #\space)]
["#\\newline" (token-DATUM #\newline)]
[(: (@ (initial) (* (subsequent))) + - "...") (token-DATUM (string->symbol (get-lexeme)))]
[(: (@ (initial) (* (subsequent))) + - "...") (token-DATUM (string->symbol lexeme))]
[#\" (token-DATUM (list->string (get-string-token input-port)))]
[#\( 'OP]
[#\) 'CP]
[#\[ 'OP]
[#\] 'CP]
["#(" 'HASHOP]
[(num2) (token-DATUM (string->number (get-lexeme) 2))]
[(num8) (token-DATUM (string->number (get-lexeme) 8))]
[(num10) (token-DATUM (string->number (get-lexeme) 10))]
[(num16) (token-DATUM (string->number (get-lexeme) 16))]
[(num2) (token-DATUM (string->number lexeme 2))]
[(num8) (token-DATUM (string->number lexeme 8))]
[(num10) (token-DATUM (string->number lexeme 10))]
[(num16) (token-DATUM (string->number lexeme 16))]
["'" 'QUOTE]
["`" 'QUASIQUOTE]
["," 'UNQUOTE]
@ -44,7 +44,7 @@
(define get-string-token
(lexer
[(^ #\" #\\) (cons (car (string->list (get-lexeme)))
[(^ #\" #\\) (cons (car (string->list lexeme))
(get-string-token input-port))]
[(@ #\\ #\\) (cons #\\ (get-string-token input-port))]
[(@ #\\ #\") (cons #\" (get-string-token input-port))]
@ -232,14 +232,13 @@
(sexp-list [() null]
[(sexp-list sexp) (cons $2 $1)]))))
(define (rs sn ip off)
(define (rs sn ip)
(port-count-lines! ip)
((scheme-parser sn) (lambda () (scheme-lexer ip))))
(define readsyntax
(case-lambda ((sn) (rs sn (current-input-port) (list 0 0 0)))
((sn ip) (rs sn ip (list 0 0 0)))
((sn ip off) (rs sn ip off))))
(case-lambda ((sn) (rs sn (current-input-port)))
((sn ip) (rs sn ip))))
(provide (rename readsyntax read-syntax))

@ -11,10 +11,10 @@
(lib "cffi.ss" "compiler"))
(provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs
get-position position-offset position-line position-col position?
position-offset position-line position-col position?
define-tokens define-empty-tokens)
(define-syntaxes (lexer-experiment lexer-src-pos-experiment)
(define-syntaxes (lexer-exp lexer-src-pos-exp)
(let ((build-lexer
(lambda (wrap?)
(lambda (stx)
@ -121,9 +121,9 @@
0
ip))
(let ((first-pos (get-position ip)))
(let-values (((longest-match-length longest-match-action)
(let-values (((longest-match-length length longest-match-action)
(lexer ip peek-string)))
(do-match ip first-pos longest-match-length (vector-ref actions longest-match-action) wrap?)))))
(do-match ip first-pos longest-match-length length (vector-ref actions longest-match-action) wrap?)))))
(define (lexer-body start-state trans-table eof-table actions no-lookahead wrap?)
(lambda (ip)
@ -158,12 +158,13 @@
(+ (char->integer (string-ref char 0)) (* 256 state)))))))
(cond
((not next-state)
(do-match ip first-pos longest-match-length longest-match-action wrap?))
(do-match ip first-pos longest-match-length length longest-match-action wrap?))
((vector-ref no-lookahead next-state)
(let ((act (vector-ref actions next-state)))
(do-match ip
first-pos
(if act length longest-match-length)
length
(if act act longest-match-action)
wrap?)))
(else
@ -177,41 +178,46 @@
(if act
length
longest-match-length))))))))))
(define (do-match lb first-pos longest-match-length longest-match-action wrap?)
(let* ((match (read-string longest-match-length lb))
(end-pos (get-position lb)))
(if (not longest-match-action)
(raise-read-error
(format "lexer: No match found in input starting with: ~a" match)
#f
(position-line first-pos)
(position-col first-pos)
(position-offset first-pos)
(- (position-offset end-pos) (position-offset first-pos))))
(cond
(wrap?
(let/ec ret
(list (longest-match-action
(lambda () first-pos)
(lambda () end-pos)
(lambda () match)
ret
lb)
first-pos
end-pos)))
(else
(longest-match-action
(lambda () first-pos)
(lambda () end-pos)
(lambda () match)
(lambda (x) x)
lb)))))
(define id (lambda (x) x))
(define (do-match lb first-pos longest-match-length length longest-match-action wrap?)
(if (not longest-match-action)
(let* ((match (read-string length lb))
(end-pos (get-position lb)))
(if (not longest-match-action)
(raise-read-error
(format "lexer: No match found in input starting with: ~a" match)
#f
(position-line first-pos)
(position-col first-pos)
(position-offset first-pos)
(- (position-offset end-pos) (position-offset first-pos)))))
(let* ((match (read-string longest-match-length lb))
(end-pos (get-position lb)))
(cond
(wrap?
(let/ec ret
(list (longest-match-action
first-pos
end-pos
match
ret
lb)
first-pos
end-pos)))
(else
(longest-match-action
first-pos
end-pos
match
id
lb))))))
(define-struct position (offset line col))
(define (get-position ip)
(let-values (((line col off) (port-next-location ip)))
(make-position off line col)))
(make-position (add1 off) (if line (add1 line) #f) (if col (add1 col) #f))))
)

Loading…
Cancel
Save