From 01364cc5f33af16e1535033181e32a9b75d8e0ac Mon Sep 17 00:00:00 2001 From: Scott Owens Date: Sun, 27 Oct 2002 06:57:31 +0000 Subject: [PATCH] i original commit: 5564d30b2666489fc516a7791e488d537e6287fd --- collects/parser-tools/private-yacc/grammar.ss | 90 ++++++- .../private-yacc/input-file-parser.ss | 166 ++++-------- .../private-yacc/parser-builder.ss | 23 +- collects/parser-tools/yacc.ss | 248 +++++++++--------- 4 files changed, 270 insertions(+), 257 deletions(-) diff --git a/collects/parser-tools/private-yacc/grammar.ss b/collects/parser-tools/private-yacc/grammar.ss index 3076714..9e09ff5 100644 --- a/collects/parser-tools/private-yacc/grammar.ss +++ b/collects/parser-tools/private-yacc/grammar.ss @@ -1,7 +1,9 @@ #cs (module grammar mzscheme - (require (lib "class.ss")) + (require (lib "class.ss") + (lib "list.ss") + "yacc-helper.ss") ;; Constructs to create and access grammars, the internal ;; representation of the input to the parser generator. @@ -148,16 +150,46 @@ ;; where the nth element in the outermost list is the list of productions with the nth non-term as lhs (init prods) ;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable - (init-field terms non-terms nullable-non-terms end-terms) + (init-field terms non-terms end-terms) - ;; indexed by the index of the non-term - contains the list of productions for that non-term - (define nt->prods (list->vector prods)) ;; list of all productions (define all-prods (apply append prods)) (define num-prods (length all-prods)) (define num-terms (length terms)) (define num-non-terms (length non-terms)) + (let ((count 0)) + (for-each + (lambda (nt) + (set-non-term-index! nt count) + (set! count (add1 count))) + non-terms)) + + (let ((count 0)) + (for-each + (lambda (t) + (set-term-index! t count) + (set! count (add1 count))) + terms)) + + (let ((count 0)) + (for-each + (lambda (prod) + (set-prod-index! prod count) + (set! count (add1 count))) + all-prods)) + + ;; indexed by the index of the non-term - contains the list of productions for that non-term + (define nt->prods + (let ((v (make-vector (length prods) #f))) + (for-each (lambda (prods) + (vector-set! v (non-term-index (prod-lhs (car prods))) prods)) + prods) + v)) + + (define nullable-non-terms + (nullable all-prods num-non-terms)) + (define/public (get-num-terms) num-terms) (define/public (get-num-non-terms) num-non-terms) @@ -193,8 +225,56 @@ (define/public (nullable-after-dot?-thunk) (lambda (item) (nullable-after-dot? item))))) - + + + ;; nullable: production list * int -> non-term set + ;; determines which non-terminals can derive epsilon + (define (nullable prods num-nts) + (letrec ((nullable (make-vector num-nts #f)) + (added #f) + + ;; possible-nullable: producion list -> production list + ;; Removes all productions that have a terminal + (possible-nullable + (lambda (prods) + (filter (lambda (prod) + (vector-andmap non-term? (prod-rhs prod))) + prods))) + + ;; set-nullables: production list -> production list + ;; makes one pass through the productions, adding the ones + ;; known to be nullable now to nullable and returning a list + ;; of productions that we don't know about yet. + (set-nullables + (lambda (prods) + (cond + ((null? prods) null) + ((vector-ref nullable + (gram-sym-index (prod-lhs (car prods)))) + (set-nullables (cdr prods))) + ((vector-andmap (lambda (nt) + (vector-ref nullable (gram-sym-index nt))) + (prod-rhs (car prods))) + (vector-set! nullable + (gram-sym-index (prod-lhs (car prods))) + #t) + (set! added #t) + (set-nullables (cdr prods))) + (else + (cons (car prods) + (set-nullables (cdr prods)))))))) + + (let loop ((P (possible-nullable prods))) + (cond + ((null? P) nullable) + (else + (set! added #f) + (let ((new-P (set-nullables P))) + (if added + (loop new-P) + nullable))))))) + ;; ------------------------ Productions --------------------------- ;; production = (make-prod non-term (gram-sym vector) int prec syntax-object) diff --git a/collects/parser-tools/private-yacc/input-file-parser.ss b/collects/parser-tools/private-yacc/input-file-parser.ss index d0fa35f..9c28848 100644 --- a/collects/parser-tools/private-yacc/input-file-parser.ss +++ b/collects/parser-tools/private-yacc/input-file-parser.ss @@ -7,10 +7,13 @@ (require "yacc-helper.ss" "../private-lex/token-syntax.ss" "grammar.ss" - (lib "list.ss") - (lib "class.ss")) + (lib "class.ss") + (lib "contracts.ss")) + - (provide parse-input get-term-list) + (provide/contract + (parse-input ((listof syntax?) (listof syntax?) syntax? (or/f false? syntax?) syntax? syntax? any? . -> . (is-a?/c grammar%))) + (get-term-list (syntax? . -> . (listof syntax?)))) (define stx-for-original-property (read-syntax #f (open-input-string "original"))) @@ -44,54 +47,6 @@ `(,(datum->syntax-object act name b stx-for-original-property) ,@(get-args (add1 i) (cdr rhs))))))))))) - ;; nullable: production list * int -> non-term set - ;; determines which non-terminals can derive epsilon - (define (nullable prods num-nts) - (letrec ((nullable (make-vector num-nts #f)) - (added #f) - - ;; possible-nullable: producion list -> production list - ;; Removes all productions that have a terminal - (possible-nullable - (lambda (prods) - (filter (lambda (prod) - (vector-andmap non-term? (prod-rhs prod))) - prods))) - - ;; set-nullables: production list -> production list - ;; makes one pass through the productions, adding the ones - ;; known to be nullable now to nullable and returning a list - ;; of productions that we don't know about yet. - (set-nullables - (lambda (prods) - (cond - ((null? prods) null) - ((vector-ref nullable - (gram-sym-index (prod-lhs (car prods)))) - (set-nullables (cdr prods))) - ((vector-andmap (lambda (nt) - (vector-ref nullable (gram-sym-index nt))) - (prod-rhs (car prods))) - (vector-set! nullable - (gram-sym-index (prod-lhs (car prods))) - #t) - (set! added #t) - (set-nullables (cdr prods))) - (else - (cons (car prods) - (set-nullables (cdr prods)))))))) - - (let loop ((P (possible-nullable prods))) - (cond - ((null? P) nullable) - (else - (set! added #f) - (let ((new-P (set-nullables P))) - (if added - (loop new-P) - nullable))))))) - - ;; Given the list of terminal symbols and the precedence/associativity definitions, ;; builds terminal structures (See grammar.ss) ;; build-terms: symbol list * symbol list list -> term list @@ -115,16 +70,12 @@ (set! counter (add1 counter)))) precs) - (set! counter 0) - ;; Build the terminal structures (map (lambda (term-sym) - (begin0 - (make-term term-sym - counter - (hash-table-get prec-table term-sym (lambda () #f))) - (set! counter (add1 counter)))) + (make-term term-sym + #f + (hash-table-get prec-table term-sym (lambda () #f)))) term-list))) ;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.ss) @@ -158,12 +109,9 @@ "Token declaration must be (tokens symbol ...)" so)))) - ;; parse-input: syntax-object * syntax-object list * syntax-object^4 * boolean-> grammar (define (parse-input start ends term-defs prec-decls prods runtime src-pos) - (let* ((counter 0) + (let* ((start-syms (map syntax-object->datum start)) - (start-sym (syntax-object->datum start)) - (list-of-terms (map syntax-object->datum (get-term-list term-defs))) (end-terms @@ -177,7 +125,7 @@ end) (syntax-object->datum end))) ends)) - + ;; Get the list of terminals out of input-terms (list-of-non-terms @@ -263,13 +211,8 @@ (terms (build-terms list-of-terms precs)) - (non-terms (begin - (set! counter 2) - (map (lambda (non-term) - (begin0 - (make-non-term non-term counter) - (set! counter (add1 counter)))) - list-of-non-terms))) + (non-terms (map (lambda (non-term) (make-non-term non-term #f)) + list-of-non-terms)) (term-table (make-hash-table)) (non-term-table (make-hash-table))) @@ -337,11 +280,10 @@ (eq? (syntax-object->datum a) (syntax-object->datum b))) ((prod-rhs action) (let ((p (parse-prod (syntax prod-rhs)))) - (set! counter (add1 counter)) (make-prod nt p - counter + #f (let loop ((i (sub1 (vector-length p)))) (if (>= i 0) (let ((gs (vector-ref p i))) @@ -353,11 +295,10 @@ ((prod-rhs (prec term) action) (identifier? (syntax term)) (let ((p (parse-prod (syntax prod-rhs)))) - (set! counter (add1 counter)) (make-prod nt p - counter + #f (term-prec (hash-table-get term-table @@ -382,9 +323,8 @@ (syntax-case prods-so () ((nt productions ...) (> (length (syntax->list (syntax (productions ...)))) 0) - (let ((nt (hash-table-get - non-term-table - (syntax-object->datum (syntax nt))))) + (let ((nt (hash-table-get non-term-table + (syntax-object->datum (syntax nt))))) (map (lambda (p) (parse-prod+action nt p)) (syntax->list (syntax (productions ...)))))) (_ @@ -393,55 +333,43 @@ "A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side" prods-so)))))) - (if (not (memq start-sym list-of-non-terms)) - (raise-syntax-error - 'parser-start - (format "Start symbol ~a not defined as a non-terminal" - start-sym) - start)) + (for-each + (lambda (sstx ssym) + (unless (memq ssym list-of-non-terms) + (raise-syntax-error + 'parser-start + (format "Start symbol ~a not defined as a non-terminal" ssym) + sstx))) + start start-syms) - (set! counter (length end-terms)) - (let* ((start (make-non-term (gensym) 0)) - (end-non-term (make-non-term (gensym) 1)) + (let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) + (end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) (parsed-prods (map parse-prods-for-nt (cdr (syntax->list prods)))) - (counter2 0) (prods - `((,(make-prod start (vector end-non-term) 0 #f #f)) - ,(map - (lambda (end) - (set! counter2 (add1 counter2)) - (make-prod end-non-term - (vector - (hash-table-get non-term-table start-sym) - (hash-table-get term-table end)) - counter2 - #f - (datum->syntax-object - runtime - `(lambda (x) x)))) - end-terms) - ,@parsed-prods)) - (nulls (nullable (apply append prods) - (+ 2 (length non-terms))))) - + `(,@(map (lambda (start end-non-term) + (list (make-prod start (vector end-non-term) #f #f #f))) + starts end-non-terms) + ,@(map + (lambda (end-nt start-sym) + (map + (lambda (end) + (make-prod end-nt + (vector + (hash-table-get non-term-table start-sym) + (hash-table-get term-table end)) + #f + #f + (datum->syntax-object + runtime + `(lambda (x) x)))) + end-terms)) + end-non-terms start-syms) + ,@parsed-prods))) -;; (printf "nullable: {~a}~n~n" -;; (apply string-append -;; (let loop ((i 0)) -;; (cond -;; ((>= i (vector-length nulls)) null) -;; ((vector-ref nulls i) -;; (cons -;; (format "~a " -;; (gram-sym-symbol -;; (list-ref (cons start (cons end-non-term non-terms)) i))) -;; (loop (add1 i)))) -;; (else (loop (add1 i))))))) (make-object grammar% prods terms - (cons start (cons end-non-term non-terms)) - nulls + (append starts (append end-non-terms non-terms)) (map (lambda (term-name) (hash-table-get term-table term-name)) end-terms))))))) diff --git a/collects/parser-tools/private-yacc/parser-builder.ss b/collects/parser-tools/private-yacc/parser-builder.ss index 41f14ca..4e6e5d5 100644 --- a/collects/parser-tools/private-yacc/parser-builder.ss +++ b/collects/parser-tools/private-yacc/parser-builder.ss @@ -4,9 +4,12 @@ (require "input-file-parser.ss" "grammar.ss" "table.ss" - (lib "class.ss")) + (lib "class.ss") + (lib "contracts.ss")) - (provide build-parser) + (provide/contract + (build-parser ((string? any? any? syntax? (listof syntax?) (listof syntax?) + (or/f syntax? false?) syntax? syntax?) . ->* . (any? any? any? any?)))) (define (strip so) (syntax-local-introduce @@ -20,14 +23,14 @@ (syntax-case prods () ((_ (bind ((bound ...) x ...) ...) ...) (let ((binds (syntax->list (syntax (bind ...)))) - (bounds (cons start - (apply - append - (map syntax->list - (apply - append - (map syntax->list - (syntax->list (syntax (((bound ...) ...) ...))))))))) + (bounds (append start + (apply + append + (map syntax->list + (apply + append + (map syntax->list + (syntax->list (syntax (((bound ...) ...) ...))))))))) (terms (get-term-list terms)) (precs (if precs (syntax-case precs () diff --git a/collects/parser-tools/yacc.ss b/collects/parser-tools/yacc.ss index a54223c..e65ebe4 100644 --- a/collects/parser-tools/yacc.ss +++ b/collects/parser-tools/yacc.ss @@ -53,42 +53,43 @@ (if tokens (raise-syntax-error #f "Multiple tokens declarations" stx) (set! tokens arg))) - ((start symbol) - (cond - ((not (identifier? (syntax symbol))) - (raise-syntax-error - 'parser-start - "Start non-terminal must be a symbol" - (syntax symbol))) - (start - (raise-syntax-error #f "Multiple start declarations" stx)) - (else - (set! start (syntax symbol))))) + ((start symbol ...) + (let ((symbols (syntax->list (syntax (symbol ...))))) + (for-each + (lambda (sym) + (unless (identifier? sym) + (raise-syntax-error 'parser-start + "Start symbol must be a symbol" + sym))) + symbols) + (when start + (raise-syntax-error #f "Multiple start declarations" stx)) + (when (null? symbols) + (raise-syntax-error 'parser-start + "Missing start symbol" + stx)) + (set! start symbols))) ((end symbols ...) - (begin + (let ((symbols (syntax->list (syntax (symbols ...))))) (for-each (lambda (sym) - (if (not (identifier? sym)) - (raise-syntax-error - 'parser-end - "End token must be a symbol" - sym))) - (syntax->list (syntax (symbols ...)))) - (let ((d (duplicate-list? (syntax-object->datum - (syntax (symbols ...)))))) - (if d - (raise-syntax-error - 'parser-end - (format "Duplicate end token definition for ~a" d) - arg))) - (if (= 0 (length (syntax->list (syntax (symbols ...))))) - (raise-syntax-error - 'parser-end - "end declaration must contain at least 1 token" - arg)) - (if end + (unless (identifier? sym) + (raise-syntax-error 'parser-end + "End token must be a symbol" + sym))) + symbols) + (let ((d (duplicate-list? (map syntax-object->datum symbols)))) + (when d + (raise-syntax-error 'parser-end + (format "Duplicate end token definition for ~a" d) + arg)) + (when (null? symbols) + (raise-syntax-error 'parser-end + "end declaration must contain at least 1 token" + arg)) + (when end (raise-syntax-error #f "Multiple end declarations" stx)) - (set! end (syntax->list (syntax (symbols ...)))))) + (set! end symbols)))) ((precs decls ...) (if precs (raise-syntax-error #f "Multiple precs declarations" stx) @@ -100,26 +101,25 @@ ((yacc-output filename) (cond ((not (string? (syntax-object->datum (syntax filename)))) - (raise-syntax-error - 'parser-yacc-output - "Yacc-output filename must be a string" - (syntax filename))) + (raise-syntax-error 'parser-yacc-output + "Yacc-output filename must be a string" + (syntax filename))) (yacc-output (raise-syntax-error #f "Multiple yacc-output declarations" stx)) (else (set! yacc-output (syntax-object->datum (syntax filename)))))) (_ (raise-syntax-error 'parser-args "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" arg)))) (syntax->list (syntax (args ...)))) - (if (not tokens) - (raise-syntax-error #f "missing tokens declaration" stx)) - (if (not error) - (raise-syntax-error #f "missing error declaration" stx)) - (if (not grammar) - (raise-syntax-error #f "missing grammar declaration" stx)) - (if (not end) - (raise-syntax-error #f "missing end declaration" stx)) - (if (not start) - (raise-syntax-error #f "missing start declaration" stx)) + (unless tokens + (raise-syntax-error #f "missing tokens declaration" stx)) + (unless error + (raise-syntax-error #f "missing error declaration" stx)) + (unless grammar + (raise-syntax-error #f "missing grammar declaration" stx)) + (unless end + (raise-syntax-error #f "missing end declaration" stx)) + (unless start + (raise-syntax-error #f "missing start declaration" stx)) (let-values (((table term-sym->index actions check-syntax-fix) (build-parser (if debug debug "") src-pos @@ -130,23 +130,23 @@ precs grammar stx))) - (if (and yacc-output (not (string=? yacc-output ""))) - (with-handlers [(exn:i/o:filesystem? - (lambda (e) - (fprintf - (current-error-port) - "Cannot write yacc-output to file \"~a\". ~a~n" - (exn:i/o:filesystem-pathname e) - (exn:i/o:filesystem-detail e))))] - (call-with-output-file yacc-output - (lambda (port) - (display-yacc (syntax-object->datum grammar) - tokens - (syntax-object->datum start) - (if precs - (syntax-object->datum precs) - #f) - port))))) + (when (and yacc-output (not (string=? yacc-output ""))) + (with-handlers [(exn:i/o:filesystem? + (lambda (e) + (fprintf + (current-error-port) + "Cannot write yacc-output to file \"~a\". ~a~n" + (exn:i/o:filesystem-pathname e) + (exn:i/o:filesystem-detail e))))] + (call-with-output-file yacc-output + (lambda (port) + (display-yacc (syntax-object->datum grammar) + tokens + (syntax-object->datum start) + (if precs + (syntax-object->datum precs) + #f) + port))))) (with-syntax ((check-syntax-fix check-syntax-fix) (err error) (ends end) @@ -160,10 +160,9 @@ check-syntax-fix (parser-body debug err (quote ends) table term-sym->index actions src-pos))))))) (_ - (raise-syntax-error - #f - "parser must have the form (parser args ...)" - stx)))) + (raise-syntax-error #f + "parser must have the form (parser args ...)" + stx)))) (define (reduce-stack stack num ret-vals src-pos) (cond @@ -181,7 +180,7 @@ (define-struct stack-frame (state value start-pos end-pos) (make-inspector)) - (define empty-stack (list (make-stack-frame 0 #f #f #f))) + (define (make-empty-stack i) (list (make-stack-frame i #f #f #f))) (define (false-thunk) #f) @@ -285,59 +284,62 @@ (err #f (token-name tok) (token-value tok) (cadr ip) (caddr ip)) (err #f (token-name tok) (token-value tok))) (raise-read-error (format "parser: got token of unknown type ~a" (token-name tok)) - #f #f #f #f #f))))))) - (lambda (get-token) - (let parsing-loop ((stack empty-stack) - (ip (get-token))) - (let* ((tok (input->token ip)) - (action (find-action stack tok ip))) - (cond - ((shift? action) - ;; (printf "shift:~a~n" (shift-state action)) - (let ((val (token-value tok))) - (parsing-loop (cons (if src-pos - (make-stack-frame (shift-state action) - val - (cadr ip) - (caddr ip)) - (make-stack-frame (shift-state action) - val - #f - #f)) - stack) - (get-token)))) - ((reduce? action) - ;; (printf "reduce:~a~n" (reduce-prod-num action)) - (let-values (((new-stack args) - (reduce-stack stack - (reduce-rhs-length action) - null - src-pos))) - (let* ((A (reduce-lhs-num action)) - (goto (array2d-ref table (stack-frame-state (car new-stack)) A))) - (parsing-loop (cons - (if src-pos - (make-stack-frame goto - (apply (vector-ref actions (reduce-prod-num action)) args) - (if (null? args) (cadr ip) (cadr args)) - (if (null? args) - (caddr ip) - (list-ref args (- (* (reduce-rhs-length action) 3) 1)))) - (make-stack-frame goto - (apply (vector-ref actions (reduce-prod-num action)) args) - #f - #f)) - new-stack) - ip)))) - ((accept? action) - ;; (printf "accept~n") - (stack-frame-value (car stack))) - (else - (if src-pos - (err #t (token-name tok) (token-value tok) (cadr ip) (caddr ip)) - (err #t (token-name tok) (token-value tok))) - (parsing-loop (fix-error stack tok ip get-token) (get-token))))))))) - - - + #f #f #f #f #f)))))) + (make-parser + (lambda (start-number) + (lambda (get-token) + (let parsing-loop ((stack (make-empty-stack start-number)) + (ip (get-token))) + (let* ((tok (input->token ip)) + (action (find-action stack tok ip))) + (cond + ((shift? action) + ;; (printf "shift:~a~n" (shift-state action)) + (let ((val (token-value tok))) + (parsing-loop (cons (if src-pos + (make-stack-frame (shift-state action) + val + (cadr ip) + (caddr ip)) + (make-stack-frame (shift-state action) + val + #f + #f)) + stack) + (get-token)))) + ((reduce? action) + ;; (printf "reduce:~a~n" (reduce-prod-num action)) + (let-values (((new-stack args) + (reduce-stack stack + (reduce-rhs-length action) + null + src-pos))) + (let* ((A (reduce-lhs-num action)) + (goto (array2d-ref table (stack-frame-state (car new-stack)) A))) + (parsing-loop + (cons + (if src-pos + (make-stack-frame + goto + (apply (vector-ref actions (reduce-prod-num action)) args) + (if (null? args) (cadr ip) (cadr args)) + (if (null? args) + (caddr ip) + (list-ref args (- (* (reduce-rhs-length action) 3) 1)))) + (make-stack-frame + goto + (apply (vector-ref actions (reduce-prod-num action)) args) + #f + #f)) + new-stack) + ip)))) + ((accept? action) + ;; (printf "accept~n") + (stack-frame-value (car stack))) + (else + (if src-pos + (err #t (token-name tok) (token-value tok) (cadr ip) (caddr ip)) + (err #t (token-name tok) (token-value tok))) + (parsing-loop (fix-error stack tok ip get-token) (get-token)))))))))) + (make-parser 0))) ) \ No newline at end of file