diff --git a/collects/parser-tools/private-yacc/grammar.ss b/collects/parser-tools/private-yacc/grammar.ss index e0b0a55..48898fa 100644 --- a/collects/parser-tools/private-yacc/grammar.ss +++ b/collects/parser-tools/private-yacc/grammar.ss @@ -1,53 +1,45 @@ #cs (module grammar mzscheme + (require (lib "class.ss")) + ;; Constructs to create and access grammars, the internal ;; representation of the input to the parser generator. (provide - (rename export-make-item make-item) + make-item make-term make-non-term make-prec make-prod - (rename make-gram make-grammar) ;; Things that work on items start-item? item-prod item->string - sym-at-dot move-dot-right move-dot-right! itemstring - non-term? term? nullable? non-termbit-vector term-index non-term-index ;; Things that work on precs prec-num prec-assoc - ;;Things that work on grammars - get-nt-prods get-init-prod - (rename gram-non-terms grammar-non-terms) - (rename gram-terms grammar-terms) - (rename gram-num-prods grammar-num-prods) - (rename gram-prods grammar-prods) - (rename gram-end-terms grammar-end-terms) - + grammar% + ;; Things that work on productions prod-index prod-prec prod-rhs prod-lhs prod-action) ;;---------------------- LR items -------------------------- - ;; LR-item = (make-item production nat (int | #f)) - ;; The n field contains the least integer such the item is nullable - ;; after the dot if the dot is to the right of the nth position. - (define-struct item (prod dot-pos n) (make-inspector)) + ;; LR-item = (make-item production nat) + ;; The dot-pos field is the index of the element in the rhs + ;; of prod that the dot immediately preceeds. + ;; Thus 0 <= dot-pos <= (vector-length rhs). + (define-struct item (prod dot-pos) (make-inspector)) - (define (export-make-item a b) - (make-item a b #f)) - - ;; item bool ;; Lexicographic comparison on two items. (define (item bool + ;; The start production always has index 0 (define (start-item? i) (= 0 (non-term-index (prod-lhs (item-prod i))))) @@ -70,43 +64,18 @@ (cond ((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) (else (make-item (item-prod i) - (add1 (item-dot-pos i)) - (item-n i))))) - - ;; move-dot-right!: LR-item -> LR-item | #f - ;; moves the dot to the right in the item, unless it is at its - ;; rightmost, then it returns false - (define (move-dot-right! i) - (cond - ((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) - (else (set-item-dot-pos! i (add1 (item-dot-pos i))) - i))) + (add1 (item-dot-pos i)))))) ;; sym-at-dot: LR-item -> gram-sym | #f ;; returns the symbol after the dot in the item or #f if there is none (define (sym-at-dot i) - (let ((dp (item-dot-pos i))) + (let ((dp (item-dot-pos i)) + (rhs (prod-rhs (item-prod i)))) (cond - ((= dp (vector-length (prod-rhs (item-prod i)))) #f) - (else (vector-ref (prod-rhs (item-prod i)) dp))))) + ((= dp (vector-length rhs)) #f) + (else (vector-ref rhs dp))))) - ;; nullable-after-dot?: LR1-iten * grammar -> bool - ;; determines if the string after the dot is nullable - (define (nullable-after-dot? i g) - (let ((i-n (item-n i))) - (cond - (i-n (>= (item-dot-pos i) i-n)) - (else - (let ((str (prod-rhs (item-prod i)))) - (let loop ((c (sub1 (vector-length str)))) - (cond - ((= c -1) (set-item-n! i 0)) - ((term? (vector-ref str c)) (set-item-n! i (add1 c))) - ((nullable? g (vector-ref str c)) (loop (sub1 c))) - (else (set-item-n! i (add1 c)))))) - (>= (item-dot-pos i) (item-n i)))))) - - + ;; print-item: LR-item -> (define (item->string it) (let ((print-sym (lambda (i) @@ -130,6 +99,8 @@ ;; gram-sym = (make-term symbol int prec) ;; | (make-non-term symbol int) + ;; Each term has a unique index 0 <= index < number of terms + ;; Each non-term has a unique index 0 <= index < number of non-terms (define-struct term (sym index prec) (make-inspector)) (define-struct non-term (sym index) (make-inspector)) @@ -152,6 +123,9 @@ (define (gram-sym->string gs) (symbol->string (gram-sym-symbol gs))) + ;; term-list->bit-vector: term list -> int + ;; Creates a number where the nth bit is 1 if the term with index n is in + ;; the list, and whose nth bit is 0 otherwise (define (term-list->bit-vector terms) (cond ((null? terms) 0) @@ -166,44 +140,57 @@ (define-struct prec (num assoc) (make-inspector)) ;; ------------------------- Grammar ------------------------------ - - ;; grammar = (make-gram (production list vector) - ;; (production list) - ;; (bool vector) - ;; (non-term list) - ;; (term list) - ;; int - ;; (term list)) - ;; - ;; The nt-prods field is indexed by the number assigned to the non-term and - ;; contains the list of productions for that non-term - ;; The prods field contains a list of all productions - ;; The nulls field is indexed by the index for a non-term and is trus iff - ;; the non-term is nullable - (define-struct gram - (nt-prods prods nulls non-terms terms num-prods end-terms) - (make-inspector)) - - - ;; get-nt-prods: grammar * non-term -> production list - ;; returns the productions for the given non-term - (define (get-nt-prods g nt) - (vector-ref (gram-nt-prods g) (non-term-index nt))) - - - ;; get-init-prod: grammar -> production - ;; gets the starting production - (define (get-init-prod g) - (car (vector-ref (gram-nt-prods g) 0))) + (define grammar% + (class object% + (super-instantiate ()) + ;; prods: production list list + ;; where the nth element in the outermost list is the list of productions with the nth non-term as lhs + (init prods) + ;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable + (init-field terms non-terms nullable-non-terms end-terms) - - (define (nullable? g nt) - (vector-ref (gram-nulls g) (non-term-index nt))) - + ;; indexed by the index of the non-term - contains the list of productions for that non-term + (define nt->prods (list->vector prods)) + ;; list of all productions + (define all-prods (apply append prods)) + (define num-prods (length all-prods)) + (define num-terms (length terms)) + (define num-non-terms (length non-terms)) + + (define/public (get-num-terms) num-terms) + (define/public (get-num-non-terms) num-non-terms) + + (define/public (get-prods-for-non-term nt) + (vector-ref nt->prods (non-term-index nt))) + (define/public (get-prods) all-prods) + (define/public (get-init-prod) + (car (vector-ref nt->prods 0))) + + (define/public (get-terms) terms) + (define/public (get-non-terms) non-terms) + + (define/public (get-num-prods) num-prods) + (define/public (get-end-terms) end-terms) + + (define/public (nullable-non-term? nt) + (vector-ref nullable-non-terms (non-term-index nt))) + + (define/public (nullable-after-dot? item) + (let* ((rhs (prod-rhs (item-prod item))) + (prod-length (vector-length rhs))) + (let loop ((i (item-dot-pos item))) + (cond + ((< i prod-length) + (if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i))) + (loop (add1 i)) + #f)) + ((= i prod-length) #t))))))) + ;; ------------------------ Productions --------------------------- ;; production = (make-prod non-term (gram-sym vector) int prec syntax-object) + ;; Each production has a unique index 0 <= index <= number of productions (define-struct prod (lhs rhs index prec action) (make-inspector)) ) diff --git a/collects/parser-tools/private-yacc/input-file-parser.ss b/collects/parser-tools/private-yacc/input-file-parser.ss index ea23bc2..22b07e3 100644 --- a/collects/parser-tools/private-yacc/input-file-parser.ss +++ b/collects/parser-tools/private-yacc/input-file-parser.ss @@ -4,7 +4,11 @@ ;; routines for parsing the input to the parser generator and producing a ;; grammar (See grammar.ss) - (require "yacc-helper.ss" "../private-lex/token-syntax.ss" "grammar.ss" (lib "list.ss")) + (require "yacc-helper.ss" + "../private-lex/token-syntax.ss" + "grammar.ss" + (lib "list.ss") + (lib "class.ss")) (provide parse-input get-term-list) @@ -418,13 +422,11 @@ ;; (list-ref (cons start (cons end-non-term non-terms)) i))) ;; (loop (add1 i)))) ;; (else (loop (add1 i))))))) - (make-grammar - (list->vector prods) - (apply append prods) - nulls - (cons start (cons end-non-term non-terms)) - terms - (add1 counter) - (map (lambda (term-name) - (hash-table-get term-table term-name)) - end-terms))))))) + (make-object grammar% + prods + terms + (cons start (cons end-non-term non-terms)) + nulls + (map (lambda (term-name) + (hash-table-get term-table term-name)) + end-terms))))))) diff --git a/collects/parser-tools/private-yacc/lalr.ss b/collects/parser-tools/private-yacc/lalr.ss index f3ae43c..6f057eb 100644 --- a/collects/parser-tools/private-yacc/lalr.ss +++ b/collects/parser-tools/private-yacc/lalr.ss @@ -5,16 +5,22 @@ (require "lr0.ss" "grammar.ss" - "graph.ss" "array2d.ss" (lib "list.ss") (lib "class.ss")) (provide compute-LA) - ;; compute-DR: LR0-automaton * grammar -> (trans-key -> term list) + (define (list-head l n) + (cond + ((= 0 n) null) + (else (cons (car l) (list-head (cdr l) (sub1 n)))))) + + + ;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set) ;; computes for each state, non-term transition pair, the terminals ;; which can transition out of the resulting state + ;; output term set is represented in bit-vector form (define (compute-DR a g) (lambda (tk) (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) @@ -22,7 +28,7 @@ (filter (lambda (term) (send a run-automaton r term)) - (grammar-terms g)))))) + (send g get-terms)))))) ;; compute-reads: ;; LR0-automaton * grammar -> (trans-key -> trans-key list) @@ -31,88 +37,150 @@ (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) (map (lambda (x) (make-trans-key r x)) (filter (lambda (non-term) - (and (nullable? g non-term) + (and (send g nullable-non-term? non-term) (send a run-automaton r non-term))) - (grammar-non-terms g)))))) + (send g get-non-terms )))))) - ;; compute-read: LR0-automaton * grammar -> (trans-key -> term list) + ;; compute-read: LR0-automaton * grammar -> (trans-key -> term set) + ;; output term set is represented in bit-vector form (define (compute-read a g) (let* ((dr (compute-DR a g)) (reads (compute-reads a g))) (digraph-tk->terml (send a get-mapped-non-term-keys) reads dr - (vector-length (send a get-states)) - (length (grammar-terms g)) - (length (grammar-non-terms g))))) + (send a get-num-states) + (send g get-num-terms) + (send g get-num-non-terms)))) + + ;; run-lr0-backward: lr0-automaton * gram-sym list * kernel * int -> kernel list + ;; returns the list of all k such that state k transitions to state start on the + ;; transitions in rhs (in order) + (define (run-lr0-backward a rhs start num-states) + (let loop ((states (list start)) + (rhs (reverse rhs))) + (cond + ((null? rhs) states) + (else (loop (kernel-list-remove-duplicates + (send a run-automaton-back states (car rhs)) + num-states) + (cdr rhs)))))) + + ;; prod->items-for-include: grammar * prod * non-term -> lr0-item list + ;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma) + ;; and gamma =>* epsilon + (define (prod->items-for-include g prod nt) + (let* ((rhs (prod-rhs prod)) + (rhs-l (vector-length rhs))) + (append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l)))) + (list (make-item prod (sub1 rhs-l))) + null) + (let loop ((i (sub1 rhs-l))) + (cond + ((and (> i 0) + (non-term? (vector-ref rhs i)) + (send g nullable-non-term? (vector-ref rhs i))) + (if (eq? nt (vector-ref rhs (sub1 i))) + (cons (make-item prod (sub1 i)) + (loop (sub1 i))) + (loop (sub1 i)))) + (else null)))))) + ;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list + ;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list + ;; and gamma =>* epsilon + (define (prod-list->items-for-include g prod-list nt) + (apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list))) - ;; comput-includes-and-lookback: - ;; lr0-automaton * grammar -> (value (trans-key -> trans-key list) - ;; (kernel * prod -> trans-key list)) - (define (compute-includes-and-lookback a g) - (let* ((non-terms (grammar-non-terms g)) - (num-states (vector-length (send a get-states))) - (num-non-terms (length non-terms)) - (includes (make-array2d num-states num-non-terms null)) - (lookback (make-array2d num-states - (grammar-num-prods g) - null))) - (send a for-each-state - (lambda (state) - (for-each - (lambda (non-term) - (for-each - (lambda (prod) - (let loop ((i (make-item prod 0)) - (p state)) - (if (and p i) - (let* ((next-sym (sym-at-dot i)) - (new-i (move-dot-right i))) - (if (and (non-term? next-sym) - (nullable-after-dot? new-i g)) - (array2d-add! includes - (kernel-index p) - (gram-sym-index next-sym) - (make-trans-key state non-term))) - (if (not new-i) - (array2d-add! lookback - (kernel-index p) - (prod-index prod) - (make-trans-key state non-term))) - (if next-sym - (loop new-i - (send a run-automaton p next-sym))))))) - (get-nt-prods g non-term))) - non-terms))) - - (values (lambda (tk) - (array2d-ref includes - (kernel-index (trans-key-st tk)) - (gram-sym-index (trans-key-gs tk)))) - (lambda (state prod) - (array2d-ref lookback - (kernel-index state) - (prod-index prod)))))) - - ;; compute-follow: LR0-automaton * grammar -> (trans-key -> term list) + ;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list) + (define (compute-includes a g) + (let ((non-terms (send g get-non-terms)) + (num-states (send a get-num-states))) + (lambda (tk) + (let ((goal-state (trans-key-st tk)) + (non-term (trans-key-gs tk))) + (apply append + (map (lambda (B) + (map (lambda (state) + (make-trans-key state B)) + (kernel-list-remove-duplicates + (let ((items (prod-list->items-for-include g (send g get-prods-for-non-term B) non-term))) + (apply append + (map (lambda (item) + (let ((rhs (prod-rhs (item-prod item)))) + (run-lr0-backward a + (list-head (vector->list rhs) + (- (vector-length rhs) + (item-dot-pos item))) + goal-state + num-states))) + items))) + num-states))) + non-terms)))))) + + + ;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list) +; (define (compute-includes a g) +; (let* ((non-terms (send g get-non-terms)) +; (num-states (vector-length (send a get-states))) +; (num-non-terms (length non-terms)) +; (includes (make-array2d num-states num-non-terms null))) +; (send a for-each-state +; (lambda (state) +; (for-each +; (lambda (non-term) +; (for-each +; (lambda (prod) +; (let loop ((i (make-item prod 0)) +; (p state)) +; (if (and p i) +; (let* ((next-sym (sym-at-dot i)) +; (new-i (move-dot-right i))) +; (if (and (non-term? next-sym) +; (send g nullable-after-dot? new-i)) +; (array2d-add! includes +; (kernel-index p) +; (gram-sym-index next-sym) +; (make-trans-key state non-term))) +; (if next-sym +; (loop new-i +; (send a run-automaton p next-sym))))))) +; (send g get-prods-for-non-term non-term))) +; non-terms))) +; +; (lambda (tk) +; (array2d-ref includes +; (kernel-index (trans-key-st tk)) +; (gram-sym-index (trans-key-gs tk)))))) +; + ; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list) + (define (compute-lookback a g) + (let ((num-states (send a get-num-states))) + (lambda (state prod) + (map (lambda (k) (make-trans-key k (prod-lhs prod))) + (run-lr0-backward a (vector->list (prod-rhs prod)) state num-states))))) + + ;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set) + ;; output term set is represented in bit-vector form (define (compute-follow a g includes) (let ((read (compute-read a g))) (digraph-tk->terml (send a get-mapped-non-term-keys) - includes - read - (vector-length (send a get-states)) - (length (grammar-terms g)) - (length (grammar-non-terms g))))) - - ;; compute-LA: LR0-automaton * grammar -> (kernel * prod -> term list) + includes + read + (send a get-num-states) + (send g get-num-terms) + (send g get-num-non-terms)))) + + ;; compute-LA: LR0-automaton * grammar -> (kernel * prod -> term set) + ;; output term set is represented in bit-vector form (define (compute-LA a g) - (let-values (((includes lookback) (time (compute-includes-and-lookback a g)))) - (let ((follow (time (compute-follow a g includes)))) - (lambda (k p) - (let* ((l (lookback k p)) - (f (map follow l))) - (apply bitwise-ior (cons 0 f))))))) + (let* ((includes (compute-includes a g)) + (lookback (compute-lookback a g)) + (follow (compute-follow a g includes))) + (lambda (k p) + (let* ((l (lookback k p)) + (f (map follow l))) + (apply bitwise-ior (cons 0 f)))))) (define (print-DR dr a g) @@ -141,7 +209,7 @@ state (gram-sym-symbol non-term) (print-output res))))) - (grammar-non-terms g)))) + (send g get-non-terms)))) (newline)) (define (print-input-st-prod f name a g print-output) @@ -159,8 +227,8 @@ (kernel-index state) (prod-index prod) (print-output res))))) - (get-nt-prods g non-term))) - (grammar-non-terms g))))) + (send g get-prods-for-non-term non-term))) + (send g get-non-terms))))) (define (print-output-terms r) (map diff --git a/collects/parser-tools/private-yacc/lr0.ss b/collects/parser-tools/private-yacc/lr0.ss index bda4d3c..99bc050 100644 --- a/collects/parser-tools/private-yacc/lr0.ss +++ b/collects/parser-tools/private-yacc/lr0.ss @@ -16,10 +16,13 @@ ;; kernel = (make-kernel (LR1-item list) index) ;; the list must be kept sorted according to item kernel list (define (kernel-list-remove-duplicates k num-states) (let ((v (make-vector num-states #f))) (for-each @@ -28,19 +31,20 @@ k) (let loop ((i 0)) (cond - ((< i num-states) - (let ((k (vector-ref v i))) - (if k - (cons k (loop (add1 i))) - (loop (add1 i))))) - (else null))))) - + ((< i num-states) + (let ((k (vector-ref v i))) + (if k + (cons k (loop (add1 i))) + (loop (add1 i))))) + (else null))))) + ;; LR0-automaton = object of class lr0% (define lr0% (class object% (super-instantiate ()) + ;; Hash tables that map a trans-keys to a kernel (init term-hash non-term-hash) (init-field states epsilons num-terms num-non-terms) @@ -64,6 +68,9 @@ (define/public (get-states) states) + (define/public (get-num-states) + (vector-length states)) + (define/public (get-epsilon-trans) epsilons) @@ -157,8 +164,8 @@ (define (build-lr0-automaton grammar) ; (printf "LR(0) automaton:~n") (letrec ( - (terms (list->vector (grammar-terms grammar))) - (non-terms (list->vector (grammar-non-terms grammar))) + (terms (list->vector (send grammar get-terms))) + (non-terms (list->vector (send grammar get-non-terms))) (num-non-terms (vector-length non-terms)) (num-gram-syms (+ num-non-terms (vector-length terms))) (epsilons (make-hash-table 'equal)) @@ -170,12 +177,12 @@ ;; steps. Assumes that each non-term can be reduced to a string ;; of terms. (first-non-term - (digraph (grammar-non-terms grammar) + (digraph (send grammar get-non-terms) (lambda (nt) (filter non-term? (map (lambda (prod) (sym-at-dot (make-item prod 0))) - (get-nt-prods grammar nt)))) + (send grammar get-prods-for-non-term nt)))) (lambda (nt) (list nt)) (union non-termlist table))))) - (num-non-terms (length (grammar-non-terms grammar))) + (num-non-terms (send grammar get-num-non-terms)) (token-code `(let ((ht (make-hash-table))) @@ -83,11 +84,11 @@ `(hash-table-put! ht ',(gram-sym-symbol term) ,(+ num-non-terms (gram-sym-index term)))) - (grammar-terms grammar)) + (send grammar get-terms)) ht))) (actions-code - `(vector ,@(map prod-action (grammar-prods grammar))))) + `(vector ,@(map prod-action (send grammar get-prods))))) (values table-code token-code actions-code diff --git a/collects/parser-tools/private-yacc/table.ss b/collects/parser-tools/private-yacc/table.ss index 866d096..dd3ae70 100644 --- a/collects/parser-tools/private-yacc/table.ss +++ b/collects/parser-tools/private-yacc/table.ss @@ -216,18 +216,18 @@ ;; buile-table: grammar * string -> action2d-array (define (build-table g file suppress) (let* ((a (time (build-lr0-automaton g))) - (terms (grammar-terms g)) - (non-terms (grammar-non-terms g)) + (terms (send g get-terms)) + (non-terms (send g get-non-terms)) (get-term (list->vector terms)) (get-non-term (list->vector non-terms)) - (get-prod (list->vector (grammar-prods g))) + (get-prod (list->vector (send g get-prods))) (num-terms (vector-length get-term)) (num-non-terms (vector-length get-non-term)) (end-term-indexes (map (lambda (term) (+ num-non-terms (gram-sym-index term))) - (grammar-end-terms g))) + (send g get-end-terms))) (num-gram-syms (+ num-terms num-non-terms)) (table (make-array2d (vector-length (send a get-states)) num-gram-syms #f)) (array2d-add! @@ -239,7 +239,7 @@ (array2d-set! v i1 i2 (cons a old)))) (else (if (not (equal? a old)) (array2d-set! v i1 i2 (list a old)))))))) - (get-lookahead (compute-LA a g))) + (get-lookahead (time (compute-LA a g)))) (time (send a for-each-state (lambda (state) @@ -299,7 +299,7 @@ (exn:i/o:filesystem-detail e))))] (call-with-output-file file (lambda (port) - (display-parser a table get-term get-non-term (grammar-prods g) + (display-parser a table get-term get-non-term (send g get-prods) port))))) (resolve-conflicts a table num-terms num-non-terms suppress) table))