From 596cdd18f7f6ad840b7a4b16702908b644d98a28 Mon Sep 17 00:00:00 2001 From: Scott Owens Date: Thu, 12 Aug 2004 22:58:19 +0000 Subject: [PATCH] *** empty log message *** original commit: 6140639bdc5fbe802148e163b44a0dbe6a6d5f94 --- collects/parser-tools/lex-plt-v200.ss | 24 ++ collects/parser-tools/lex-sre.ss | 6 +- collects/parser-tools/lex.ss | 25 +- .../parser-tools/private-lex/token-syntax.ss | 1 - collects/parser-tools/private-lex/token.ss | 48 +++- collects/parser-tools/private-yacc/grammar.ss | 1 - collects/parser-tools/private-yacc/graph.ss | 1 - .../private-yacc/input-file-parser.ss | 1 - collects/parser-tools/private-yacc/lalr.ss | 1 - collects/parser-tools/private-yacc/lr0.ss | 1 - .../private-yacc/parser-actions.ss | 1 - .../private-yacc/parser-builder.ss | 1 - collects/parser-tools/private-yacc/table.ss | 1 - .../parser-tools/private-yacc/yacc-helper.ss | 1 - collects/parser-tools/yacc-to-scheme.ss | 30 +-- collects/parser-tools/yacc.ss | 238 +++++++++--------- 16 files changed, 215 insertions(+), 166 deletions(-) create mode 100644 collects/parser-tools/lex-plt-v200.ss diff --git a/collects/parser-tools/lex-plt-v200.ss b/collects/parser-tools/lex-plt-v200.ss new file mode 100644 index 0000000..939c51a --- /dev/null +++ b/collects/parser-tools/lex-plt-v200.ss @@ -0,0 +1,24 @@ +(module lex-plt-v200 mzscheme + (require (lib "lex.ss" "parser-tools") + (prefix : (lib "lex-sre.ss" "parser-tools"))) + + (provide epsilon + ~ + (rename :* *) + (rename :+ +) + (rename :? ?) + (rename :or :) + (rename :& &) + (rename :: @) + (rename :~ ^) + (rename :/ -)) + + (define-lex-trans epsilon + (syntax-rules () + ((_) ""))) + + (define-lex-trans ~ + (syntax-rules () + ((_ re) (complement re))))) + + diff --git a/collects/parser-tools/lex-sre.ss b/collects/parser-tools/lex-sre.ss index a5560b1..6c8d777 100644 --- a/collects/parser-tools/lex-sre.ss +++ b/collects/parser-tools/lex-sre.ss @@ -13,7 +13,8 @@ & ~ (rename sre-- -) - (rename sre-/ /)/-only-chars) + (rename sre-/ /) + /-only-chars) (define-lex-trans sre-* (syntax-rules () @@ -72,6 +73,7 @@ ((_ re ...) (char-complement (union re ...))))) + ;; char-set difference (define-lex-trans (sre-- stx) (syntax-case stx () ((_) @@ -79,7 +81,7 @@ "must have at least one argument" stx)) ((_ big-re re ...) - (syntax (intersect big-re (complement (union re) ...)))))) + (syntax (intersect big-re (~ (union re) ...)))))) (define-lex-trans (sre-/ stx) (syntax-case stx () diff --git a/collects/parser-tools/lex.ss b/collects/parser-tools/lex.ss index b5a9a7a..d40f42a 100644 --- a/collects/parser-tools/lex.ss +++ b/collects/parser-tools/lex.ss @@ -11,14 +11,23 @@ (require (lib "readerr.ss" "syntax") - (lib "cffi.ss" "compiler") "private-lex/token.ss") (provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs define-lex-trans - position-offset position-line position-col position? - define-tokens define-empty-tokens token-name token-value token? file-path + + ;; Dealing with tokens and related structures + define-tokens define-empty-tokens token-name token-value token? + (struct position (offset line col)) + (struct position-token (token start-pos end-pos)) + + ;; File path for highlighting errors while lexing + file-path + + ;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4. any-char any-string nothing alphabetic lower-case upper-case title-case numeric symbolic punctuation graphic whitespace blank iso-control + + ;; A regular expression operator char-set) (define file-path (make-parameter #f)) @@ -260,18 +269,16 @@ (cond (wrap? (let/ec ret - (list (action first-pos end-pos value ret ip) - first-pos - end-pos))) + (make-position-token (action first-pos end-pos value ret ip) + first-pos + end-pos))) (else (action first-pos end-pos value id ip))))) - (define-struct position (offset line col)) (define (get-position ip) (let-values (((line col off) (port-next-location ip))) (make-position off line col))) - - + (define-syntax (create-unicode-abbrevs stx) (syntax-case stx () ((_ ctxt) diff --git a/collects/parser-tools/private-lex/token-syntax.ss b/collects/parser-tools/private-lex/token-syntax.ss index a66f33b..c1f1492 100644 --- a/collects/parser-tools/private-lex/token-syntax.ss +++ b/collects/parser-tools/private-lex/token-syntax.ss @@ -1,4 +1,3 @@ -#cs (module token-syntax mzscheme ;; The things needed at compile time to handle definition of tokens diff --git a/collects/parser-tools/private-lex/token.ss b/collects/parser-tools/private-lex/token.ss index 7d23f67..9c8007b 100644 --- a/collects/parser-tools/private-lex/token.ss +++ b/collects/parser-tools/private-lex/token.ss @@ -1,14 +1,45 @@ -#cs (module token mzscheme (require-for-syntax "token-syntax.ss") ;; Defining tokens - (provide define-tokens define-empty-tokens make-token token-name token-value token?) + (provide define-tokens define-empty-tokens make-token token? + (protect (rename token-name real-token-name)) + (protect (rename token-value real-token-value)) + (rename token-name* token-name) + (rename token-value* token-value) + (struct position (offset line col)) + (struct position-token (token start-pos end-pos))) - (define-struct token (name value) (make-inspector)) + + ;; A token is either + ;; - symbol + ;; - (make-token symbol any) + (define-struct token (name value)) + ;; token-name*: token -> symbol + (define (token-name* t) + (cond + ((symbol? t) t) + ((token? t) (token-name t)) + (else (raise-type-error + 'token-name + "symbol or struct:token" + 0 + t)))) + + ;; token-value*: token -> any + (define (token-value* t) + (cond + ((symbol? t) #f) + ((token? t) (token-value t)) + (else (raise-type-error + 'token-value + "symbol or struct:token" + 0 + t)))) + (define-syntaxes (define-tokens define-empty-tokens) (let ((define-tokens-helper (lambda (stx empty?) @@ -36,8 +67,10 @@ n n) ,@(if empty? '() '(x))) - (make-token ',n ,(if empty? #f 'x)))) - (syntax->list (syntax (terms ...))))) + ,(if empty? + `',n + `(make-token ',n x)))) + (syntax->list (syntax (terms ...))))) stx)) ((_ ...) (raise-syntax-error @@ -47,5 +80,8 @@ (values (lambda (stx) (define-tokens-helper stx #f)) (lambda (stx) (define-tokens-helper stx #t))))) -) + + (define-struct position (offset line col)) + (define-struct position-token (token start-pos end-pos)) + ) diff --git a/collects/parser-tools/private-yacc/grammar.ss b/collects/parser-tools/private-yacc/grammar.ss index 4e7ad7e..87362e2 100644 --- a/collects/parser-tools/private-yacc/grammar.ss +++ b/collects/parser-tools/private-yacc/grammar.ss @@ -1,7 +1,6 @@ ;; Constructs to create and access grammars, the internal ;; representation of the input to the parser generator. -#cs (module grammar mzscheme (require (lib "class.ss") diff --git a/collects/parser-tools/private-yacc/graph.ss b/collects/parser-tools/private-yacc/graph.ss index 67b7155..02e28df 100644 --- a/collects/parser-tools/private-yacc/graph.ss +++ b/collects/parser-tools/private-yacc/graph.ss @@ -1,4 +1,3 @@ -#cs (module graph mzscheme (provide digraph) diff --git a/collects/parser-tools/private-yacc/input-file-parser.ss b/collects/parser-tools/private-yacc/input-file-parser.ss index fbc8568..8c688f7 100644 --- a/collects/parser-tools/private-yacc/input-file-parser.ss +++ b/collects/parser-tools/private-yacc/input-file-parser.ss @@ -1,4 +1,3 @@ -#cs (module input-file-parser mzscheme ;; routines for parsing the input to the parser generator and producing a diff --git a/collects/parser-tools/private-yacc/lalr.ss b/collects/parser-tools/private-yacc/lalr.ss index 3163dba..148d52b 100644 --- a/collects/parser-tools/private-yacc/lalr.ss +++ b/collects/parser-tools/private-yacc/lalr.ss @@ -1,4 +1,3 @@ -#cs (module lalr mzscheme ;; Compute LALR lookaheads from DeRemer and Pennello 1982 diff --git a/collects/parser-tools/private-yacc/lr0.ss b/collects/parser-tools/private-yacc/lr0.ss index 7bb4580..c894962 100644 --- a/collects/parser-tools/private-yacc/lr0.ss +++ b/collects/parser-tools/private-yacc/lr0.ss @@ -1,4 +1,3 @@ -#cs (module lr0 mzscheme ;; Handle the LR0 automaton diff --git a/collects/parser-tools/private-yacc/parser-actions.ss b/collects/parser-tools/private-yacc/parser-actions.ss index 825a18f..bd3a57a 100644 --- a/collects/parser-tools/private-yacc/parser-actions.ss +++ b/collects/parser-tools/private-yacc/parser-actions.ss @@ -1,4 +1,3 @@ -#cs (module parser-actions mzscheme ;; The entries into the action table diff --git a/collects/parser-tools/private-yacc/parser-builder.ss b/collects/parser-tools/private-yacc/parser-builder.ss index ca84f5e..ae552c0 100644 --- a/collects/parser-tools/private-yacc/parser-builder.ss +++ b/collects/parser-tools/private-yacc/parser-builder.ss @@ -1,4 +1,3 @@ -#cs (module parser-builder mzscheme (require "input-file-parser.ss" diff --git a/collects/parser-tools/private-yacc/table.ss b/collects/parser-tools/private-yacc/table.ss index 11f487d..3f6767b 100644 --- a/collects/parser-tools/private-yacc/table.ss +++ b/collects/parser-tools/private-yacc/table.ss @@ -1,4 +1,3 @@ -#cs (module table mzscheme ;; Routine to build the LALR table diff --git a/collects/parser-tools/private-yacc/yacc-helper.ss b/collects/parser-tools/private-yacc/yacc-helper.ss index ab2f5bf..c5fa0b1 100644 --- a/collects/parser-tools/private-yacc/yacc-helper.ss +++ b/collects/parser-tools/private-yacc/yacc-helper.ss @@ -1,4 +1,3 @@ -#cs (module yacc-helper mzscheme (require (lib "list.ss") diff --git a/collects/parser-tools/yacc-to-scheme.ss b/collects/parser-tools/yacc-to-scheme.ss index 3fd6a25..1e6eaca 100644 --- a/collects/parser-tools/yacc-to-scheme.ss +++ b/collects/parser-tools/yacc-to-scheme.ss @@ -1,5 +1,6 @@ (module yacc-to-scheme mzscheme (require (lib "lex.ss" "parser-tools") + (prefix : (lib "lex-sre.ss" "parser-tools")) (lib "yacc.ss" "parser-tools") (lib "readerr.ss" "syntax") (lib "list.ss")) @@ -7,24 +8,24 @@ (define match-double-string (lexer - ((^ #\" #\\) (cons (car (string->list lexeme)) - (match-double-string input-port))) - ((@ #\\ (- #\000 #\377)) (cons (string-ref lexeme 1) (match-double-string input-port))) + ((:* (:~ #\" #\\)) (append (string->list lexeme) + (match-double-string input-port))) + ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port))) (#\" null))) (define match-single-string (lexer - ((^ #\' #\\) (cons (car (string->list lexeme)) - (match-single-string input-port))) - ((@ #\\ (- #\000 #\377)) (cons (string-ref lexeme 1) (match-single-string input-port))) + ((:* (:~ #\' #\\)) (append (string->list lexeme) + (match-single-string input-port))) + ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port))) (#\' null))) (define-lex-abbrevs - (letter (: (- "a" "z") (- "A" "Z"))) - (digit (- "0" "9")) - (initial (: letter "!" "$" "%" "&" "*" "/" "<" "=" ">" "?" "^" "_" "~" "@")) - (subsequent (: initial digit "+" "-" "." "@")) - (comment (@ "/*" (* (: (^ "*") (@ "*" (^ "/")))) "*/"))) + (letter (:or (:/ "a" "z") (:/ "A" "Z"))) + (digit (:/ "0" "9")) + (initial (:or letter (char-set "!$%&*/<=>?^_~@"))) + (subsequent (:or initial digit (char-set "+-.@"))) + (comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/"))) (define-empty-tokens x (EOF PIPE |:| SEMI |%%| %prec)) @@ -34,14 +35,15 @@ (define get-token-grammar (lexer-src-pos ("%%" '|%%|) - ((: ":") (string->symbol lexeme)) + (":" (string->symbol lexeme)) ("%prec" (string->symbol lexeme)) (#\| 'PIPE) - ((+ (: #\newline #\tab " " comment (@ "{" (* (^ "}")) "}"))) (return-without-pos (get-token-grammar input-port))) + ((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}"))) + (return-without-pos (get-token-grammar input-port))) (#\; 'SEMI) (#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))) (#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))) - ((@ initial (* subsequent)) (token-SYM (string->symbol lexeme))))) + ((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme))))) (define (parse-grammar enter-term enter-empty-term enter-non-term) (parser diff --git a/collects/parser-tools/yacc.ss b/collects/parser-tools/yacc.ss index ccb5a90..0ab4922 100644 --- a/collects/parser-tools/yacc.ss +++ b/collects/parser-tools/yacc.ss @@ -1,4 +1,3 @@ -#cs (module yacc mzscheme (require-for-syntax "private-yacc/parser-builder.ss" @@ -6,11 +5,12 @@ (require "private-yacc/array2d.ss" "private-lex/token.ss" "private-yacc/parser-actions.ss" + (lib "etc.ss") (lib "pretty.ss") (lib "readerr.ss" "syntax")) (provide parser) - + (define-syntax (parser stx) (syntax-case stx () ((_ args ...) @@ -163,7 +163,7 @@ (raise-syntax-error #f "parser must have the form (parser args ...)" stx)))) - + (define (reduce-stack stack num ret-vals src-pos) (cond ((> num 0) @@ -177,136 +177,123 @@ (cons (stack-frame-value top-frame) ret-vals)))) (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))) (else (values stack ret-vals)))) - - (define-struct stack-frame (state value start-pos end-pos) (make-inspector)) - + + ;; extract-helper : (symbol or make-token) any any -> symbol any any any + (define (extract-helper tok v1 v2) + (cond + ((symbol? tok) + (values tok #f v1 v2)) + ((token? tok) + (values (real-token-name tok) (real-token-value tok) v1 v2)) + (else (raise-type-error 'parser + "symbol or struct:token" + 0 + tok)))) + + ;; extract-src-pos : position-token -> symbol any any any + (define (extract-src-pos ip) + (cond + ((position-token? ip) + (extract-helper (position-token-token ip) + (position-token-start-pos ip) + (position-token-end-pos ip))) + (else + (raise-type-error 'parser + "struct:position-token" + 0 + ip)))) + + ;; extract-no-src-pos : (symbol or make-token) -> symbol any any any + (define (extract-no-src-pos ip) + (extract-helper ip #f #f)) + + (define-struct stack-frame (state value start-pos end-pos)) + (define (make-empty-stack i) (list (make-stack-frame i #f #f #f))) - - (define (false-thunk) #f) - + ;; The table format is an array2d that maps each state/term pair to either ;; an accept, shift or reduce structure - or a #f. Except that we will encode ;; by changing (make-accept) -> 'accept, (make-shift i) -> i and ;; (make-reduce i1 i2 i3) -> #(i1 i2 i3) - (define (parser-body debug err starts ends table term-sym->index actions src-pos) - (letrec ((input->token + (define (parser-body debug? err starts ends table term-sym->index actions src-pos) + (local ((define extract (if src-pos - (lambda (ip) - (cond - ((and (list? ip) (= 3 (length ip))) - (let ((tok (car ip))) - (cond - ((symbol? tok) (make-token tok #f)) - ((token? tok) tok) - (else (raise-type-error 'parser - "(list (token or symbol) position position)" - 0 - ip))))) - (else - (raise-type-error 'parser - "(list (token or symbol) position position)" - 0 - ip)))) - (lambda (ip) - (cond - ((symbol? ip) (make-token ip #f)) - ((token? ip) ip) - (else (raise-type-error 'parser "token or symbol" 0 ip)))))) - - (fix-error - (lambda (stack tok ip get-token) - (when debug (pretty-print stack)) - (letrec ((remove-input - (lambda () - (if (memq (token-name tok) ends) - (raise-read-error "parser: Cannot continue after error" - #f #f #f #f #f) - (let ((a (find-action stack tok ip))) - (cond - ((shift? a) - ;;(printf "shift:~a~n" (shift-state a)) - (cons (if src-pos - (make-stack-frame (shift-state a) - (token-value tok) - (cadr ip) - (caddr ip)) - (make-stack-frame (shift-state a) - (token-value tok) - #f - #f)) - stack)) - (else - ;;(printf "discard input:~a~n" tok) - (set! ip (get-token)) - (set! tok (input->token ip)) - (remove-input))))))) - (remove-states - (lambda () - (let ((a (find-action stack (make-token 'error #f) ip))) + extract-src-pos + extract-no-src-pos)) + + (define (fix-error stack tok val start-pos end-pos get-token) + (when debug? (pretty-print stack)) + (local ((define (remove-input tok val start-pos end-pos) + (if (memq tok ends) + (raise-read-error "parser: Cannot continue after error" + #f #f #f #f #f) + (let ((a (find-action stack tok val start-pos end-pos))) (cond ((shift? a) ;;(printf "shift:~a~n" (shift-state a)) - (set! stack - (cons - (if src-pos - (make-stack-frame (shift-state a) - #f - (cadr ip) - (caddr ip)) - (make-stack-frame (shift-state a) - #f - #f - #f)) - stack)) - (remove-input)) + (cons (make-stack-frame (shift-state a) + val + start-pos + end-pos) + stack)) (else - ;;(printf "discard state:~a~n" (car stack)) - (cond - ((< (length stack) 2) - (raise-read-error "parser: Cannot continue after error" - #f #f #f #f #f)) - (else - (set! stack (cdr stack)) - (remove-states))))))))) - (remove-states)))) - - (find-action - (lambda (stack tok ip) - (let ((token-index (hash-table-get term-sym->index - (token-name tok) - false-thunk))) - (if token-index - (array2d-ref table - (stack-frame-state (car stack)) - token-index) - (begin - (if src-pos - (err #f (token-name tok) (token-value tok) (cadr ip) (caddr ip)) - (err #f (token-name tok) (token-value tok))) - (raise-read-error (format "parser: got token of unknown type ~a" (token-name tok)) - #f #f #f #f #f)))))) - (make-parser - (lambda (start-number) - (lambda (get-token) - (let parsing-loop ((stack (make-empty-stack start-number)) - (ip (get-token))) - (let* ((tok (input->token ip)) - (action (find-action stack tok ip))) + ;;(printf "discard input:~a~n" tok) + (let-values (((tok val start-pos end-pos) + (extract (get-token)))) + (remove-input tok val start-pos end-pos)))))))) + (let remove-states () + (let ((a (find-action stack 'error #f start-pos end-pos))) + (cond + ((shift? a) + ;;(printf "shift:~a~n" (shift-state a)) + (set! stack + (cons + (make-stack-frame (shift-state a) + #f + start-pos + end-pos) + stack)) + (remove-input)) + (else + ;;(printf "discard state:~a~n" (car stack)) + (cond + ((< (length stack) 2) + (raise-read-error "parser: Cannot continue after error" + #f #f #f #f #f)) + (else + (set! stack (cdr stack)) + (remove-states))))))))) + + (define (find-action stack tok val start-pos end-pos) + (let ((token-index (hash-table-get term-sym->index + tok + (lambda () #f)))) + (if token-index + (array2d-ref table + (stack-frame-state (car stack)) + token-index) + (begin + (if src-pos + (err #f tok val start-pos end-pos) + (err #f tok val)) + (raise-read-error (format "parser: got token of unknown type ~a" tok) + #f #f #f #f #f))))) + (define (make-parser start-number) + (lambda (get-token) + (let parsing-loop ((stack (make-empty-stack start-number)) + (ip (get-token))) + (let-values (((tok val start-pos end-pos) + (extract ip))) + (let ((action (find-action stack tok val start-pos end-pos))) (cond ((shift? action) ;; (printf "shift:~a~n" (shift-state action)) - (let ((val (token-value tok))) - (parsing-loop (cons (if src-pos - (make-stack-frame (shift-state action) - val - (cadr ip) - (caddr ip)) - (make-stack-frame (shift-state action) - val - #f - #f)) - stack) - (get-token)))) + (parsing-loop (cons (make-stack-frame (shift-state action) + val + start-pos + end-pos) + stack) + (get-token))) ((reduce? action) ;; (printf "reduce:~a~n" (reduce-prod-num action)) (let-values (((new-stack args) @@ -322,9 +309,9 @@ (make-stack-frame goto (apply (vector-ref actions (reduce-prod-num action)) args) - (if (null? args) (cadr ip) (cadr args)) + (if (null? args) start-pos (cadr args)) (if (null? args) - (caddr ip) + end-pos (list-ref args (- (* (reduce-rhs-length action) 3) 1)))) (make-stack-frame goto @@ -338,9 +325,10 @@ (stack-frame-value (car stack))) (else (if src-pos - (err #t (token-name tok) (token-value tok) (cadr ip) (caddr ip)) - (err #t (token-name tok) (token-value tok))) - (parsing-loop (fix-error stack tok ip get-token) (get-token)))))))))) + (err #t tok val start-pos end-pos) + (err #t tok val)) + (parsing-loop (fix-error stack tok val start-pos end-pos get-token) + (get-token)))))))))) (cond ((null? (cdr starts)) (make-parser 0)) (else