diff --git a/collects/parser-tools/examples/calc.ss b/collects/parser-tools/examples/calc.ss index 3f0f125..caeaa4f 100644 --- a/collects/parser-tools/examples/calc.ss +++ b/collects/parser-tools/examples/calc.ss @@ -34,17 +34,17 @@ [(: #\tab #\space) (calcl input-port)] ;; The parser will treat the return of 'newline the same as (token-newline) [#\newline 'newline] - [(: = + - * / ^) (string->symbol (get-lexeme))] + [(: = + - * / ^) (string->symbol lexeme)] ["(" 'OP] [")" 'CP] [sin (token-FNCT sin)] ;; It the parens are left off of an "invocation" of an abbreviation, it means the ;; character sequence instead. - [(+ (: (lower-letter) (upper-letter))) (token-VAR (string->symbol (get-lexeme)))] - [(+ (digit)) (token-NUM (string->number (get-lexeme)))] + [(+ (: (lower-letter) (upper-letter))) (token-VAR (string->symbol lexeme))] + [(+ (digit)) (token-NUM (string->number lexeme))] ;; Strings which dr/mzscheme does not think of as symbols (such as . or ,) must be ;; entered as a string or character. "." would also be ok. - [(@ (+ (digit)) #\. (* (digit))) (token-NUM (string->number (get-lexeme)))])) + [(@ (+ (digit)) #\. (* (digit))) (token-NUM (string->number lexeme))])) (define calcp diff --git a/collects/parser-tools/examples/read.ss b/collects/parser-tools/examples/read.ss index 518cb9f..3a974b4 100644 --- a/collects/parser-tools/examples/read.ss +++ b/collects/parser-tools/examples/read.ss @@ -21,20 +21,20 @@ ["#t" (token-DATUM #t)] ["#f" (token-DATUM #f)] - [(@ "#\\" (any)) (token-DATUM (caddr (string->list (get-lexeme))))] + [(@ "#\\" (any)) (token-DATUM (caddr (string->list lexeme)))] ["#\\space" (token-DATUM #\space)] ["#\\newline" (token-DATUM #\newline)] - [(: (@ (initial) (* (subsequent))) + - "...") (token-DATUM (string->symbol (get-lexeme)))] + [(: (@ (initial) (* (subsequent))) + - "...") (token-DATUM (string->symbol lexeme))] [#\" (token-DATUM (list->string (get-string-token input-port)))] [#\( 'OP] [#\) 'CP] [#\[ 'OP] [#\] 'CP] ["#(" 'HASHOP] - [(num2) (token-DATUM (string->number (get-lexeme) 2))] - [(num8) (token-DATUM (string->number (get-lexeme) 8))] - [(num10) (token-DATUM (string->number (get-lexeme) 10))] - [(num16) (token-DATUM (string->number (get-lexeme) 16))] + [(num2) (token-DATUM (string->number lexeme 2))] + [(num8) (token-DATUM (string->number lexeme 8))] + [(num10) (token-DATUM (string->number lexeme 10))] + [(num16) (token-DATUM (string->number lexeme 16))] ["'" 'QUOTE] ["`" 'QUASIQUOTE] ["," 'UNQUOTE] @@ -44,7 +44,7 @@ (define get-string-token (lexer - [(^ #\" #\\) (cons (car (string->list (get-lexeme))) + [(^ #\" #\\) (cons (car (string->list lexeme)) (get-string-token input-port))] [(@ #\\ #\\) (cons #\\ (get-string-token input-port))] [(@ #\\ #\") (cons #\" (get-string-token input-port))] @@ -232,14 +232,13 @@ (sexp-list [() null] [(sexp-list sexp) (cons $2 $1)])))) - (define (rs sn ip off) + (define (rs sn ip) (port-count-lines! ip) ((scheme-parser sn) (lambda () (scheme-lexer ip)))) (define readsyntax - (case-lambda ((sn) (rs sn (current-input-port) (list 0 0 0))) - ((sn ip) (rs sn ip (list 0 0 0))) - ((sn ip off) (rs sn ip off)))) + (case-lambda ((sn) (rs sn (current-input-port))) + ((sn ip) (rs sn ip)))) (provide (rename readsyntax read-syntax)) diff --git a/collects/parser-tools/lex.ss b/collects/parser-tools/lex.ss index 6824cd6..6d57ad8 100644 --- a/collects/parser-tools/lex.ss +++ b/collects/parser-tools/lex.ss @@ -11,10 +11,10 @@ (lib "cffi.ss" "compiler")) (provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs - get-position position-offset position-line position-col position? + position-offset position-line position-col position? define-tokens define-empty-tokens) - (define-syntaxes (lexer-experiment lexer-src-pos-experiment) + (define-syntaxes (lexer-exp lexer-src-pos-exp) (let ((build-lexer (lambda (wrap?) (lambda (stx) @@ -121,9 +121,9 @@ 0 ip)) (let ((first-pos (get-position ip))) - (let-values (((longest-match-length longest-match-action) + (let-values (((longest-match-length length longest-match-action) (lexer ip peek-string))) - (do-match ip first-pos longest-match-length (vector-ref actions longest-match-action) wrap?))))) + (do-match ip first-pos longest-match-length length (vector-ref actions longest-match-action) wrap?))))) (define (lexer-body start-state trans-table eof-table actions no-lookahead wrap?) (lambda (ip) @@ -158,12 +158,13 @@ (+ (char->integer (string-ref char 0)) (* 256 state))))))) (cond ((not next-state) - (do-match ip first-pos longest-match-length longest-match-action wrap?)) + (do-match ip first-pos longest-match-length length longest-match-action wrap?)) ((vector-ref no-lookahead next-state) (let ((act (vector-ref actions next-state))) (do-match ip first-pos (if act length longest-match-length) + length (if act act longest-match-action) wrap?))) (else @@ -177,41 +178,46 @@ (if act length longest-match-length)))))))))) - - (define (do-match lb first-pos longest-match-length longest-match-action wrap?) - (let* ((match (read-string longest-match-length lb)) - (end-pos (get-position lb))) - (if (not longest-match-action) - (raise-read-error - (format "lexer: No match found in input starting with: ~a" match) - #f - (position-line first-pos) - (position-col first-pos) - (position-offset first-pos) - (- (position-offset end-pos) (position-offset first-pos)))) - (cond - (wrap? - (let/ec ret - (list (longest-match-action - (lambda () first-pos) - (lambda () end-pos) - (lambda () match) - ret - lb) - first-pos - end-pos))) - (else - (longest-match-action - (lambda () first-pos) - (lambda () end-pos) - (lambda () match) - (lambda (x) x) - lb))))) + + (define id (lambda (x) x)) + + (define (do-match lb first-pos longest-match-length length longest-match-action wrap?) + (if (not longest-match-action) + (let* ((match (read-string length lb)) + (end-pos (get-position lb))) + (if (not longest-match-action) + (raise-read-error + (format "lexer: No match found in input starting with: ~a" match) + #f + (position-line first-pos) + (position-col first-pos) + (position-offset first-pos) + (- (position-offset end-pos) (position-offset first-pos))))) + (let* ((match (read-string longest-match-length lb)) + (end-pos (get-position lb))) + (cond + (wrap? + (let/ec ret + (list (longest-match-action + first-pos + end-pos + match + ret + lb) + first-pos + end-pos))) + (else + (longest-match-action + first-pos + end-pos + match + id + lb)))))) (define-struct position (offset line col)) (define (get-position ip) (let-values (((line col off) (port-next-location ip))) - (make-position off line col))) + (make-position (add1 off) (if line (add1 line) #f) (if col (add1 col) #f)))) )