From 9b439ae7a67885e4ea41056c6a4a3bec9bd7bf27 Mon Sep 17 00:00:00 2001 From: Scott Owens Date: Wed, 1 May 2002 23:38:25 +0000 Subject: [PATCH] *** empty log message *** original commit: ea6305faf60d9adc3868cab1789827ba67cb4e67 --- collects/parser-tools/private-yacc/grammar.ss | 43 +++-- collects/parser-tools/private-yacc/graph.ss | 38 ++-- collects/parser-tools/private-yacc/lalr.ss | 130 ++++++++++---- collects/parser-tools/private-yacc/lr0.ss | 165 ++++++++++++----- collects/parser-tools/private-yacc/table.ss | 169 +++++++++--------- 5 files changed, 341 insertions(+), 204 deletions(-) diff --git a/collects/parser-tools/private-yacc/grammar.ss b/collects/parser-tools/private-yacc/grammar.ss index dc3c992..e0b0a55 100644 --- a/collects/parser-tools/private-yacc/grammar.ss +++ b/collects/parser-tools/private-yacc/grammar.ss @@ -15,7 +15,7 @@ ;; Things that work on items start-item? item-prod item->string - sym-at-dot move-dot-right itemstring @@ -72,28 +72,39 @@ (else (make-item (item-prod i) (add1 (item-dot-pos i)) (item-n i))))) - + + ;; move-dot-right!: LR-item -> LR-item | #f + ;; moves the dot to the right in the item, unless it is at its + ;; rightmost, then it returns false + (define (move-dot-right! i) + (cond + ((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) + (else (set-item-dot-pos! i (add1 (item-dot-pos i))) + i))) + ;; sym-at-dot: LR-item -> gram-sym | #f ;; returns the symbol after the dot in the item or #f if there is none (define (sym-at-dot i) - (cond - ((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) - (else (vector-ref (prod-rhs (item-prod i)) (item-dot-pos i))))) + (let ((dp (item-dot-pos i))) + (cond + ((= dp (vector-length (prod-rhs (item-prod i)))) #f) + (else (vector-ref (prod-rhs (item-prod i)) dp))))) ;; nullable-after-dot?: LR1-iten * grammar -> bool ;; determines if the string after the dot is nullable (define (nullable-after-dot? i g) - (cond - ((item-n i) => (lambda (x) (>= (item-dot-pos i) x))) - (else - (let ((str (prod-rhs (item-prod i)))) - (let loop ((c (sub1 (vector-length str)))) - (cond - ((= c -1) (set-item-n! i 0)) - ((term? (vector-ref str c)) (set-item-n! i (add1 c))) - ((nullable? g (vector-ref str c)) (loop (sub1 c))) - (else (set-item-n! i (add1 c)))))) - (>= (item-dot-pos i) (item-n i))))) + (let ((i-n (item-n i))) + (cond + (i-n (>= (item-dot-pos i) i-n)) + (else + (let ((str (prod-rhs (item-prod i)))) + (let loop ((c (sub1 (vector-length str)))) + (cond + ((= c -1) (set-item-n! i 0)) + ((term? (vector-ref str c)) (set-item-n! i (add1 c))) + ((nullable? g (vector-ref str c)) (loop (sub1 c))) + (else (set-item-n! i (add1 c)))))) + (>= (item-dot-pos i) (item-n i)))))) ;; print-item: LR-item -> diff --git a/collects/parser-tools/private-yacc/graph.ss b/collects/parser-tools/private-yacc/graph.ss index d42f8b0..67b7155 100644 --- a/collects/parser-tools/private-yacc/graph.ss +++ b/collects/parser-tools/private-yacc/graph.ss @@ -11,38 +11,27 @@ ;; DeRemer and Pennello 1982 ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} ;; We use a hash-table to represent the result function 'a -> 'b set, so - ;; the values of type 'a must be comparable with equal?. + ;; the values of type 'a must be comparable with eq?. (define (digraph nodes edges f- union fail) (letrec ( ;; Will map elements of 'a to 'b sets - (results (make-hash-table 'equal)) + (results (make-hash-table)) (f (lambda (x) (hash-table-get results x fail))) ;; Maps elements of 'a to integers. - (N (make-hash-table 'equal)) + (N (make-hash-table)) (get-N (lambda (x) (hash-table-get N x zero-thunk))) (set-N (lambda (x d) (hash-table-put! N x d))) + + (stack null) + (push (lambda (x) + (set! stack (cons x stack)))) + (pop (lambda () + (begin0 + (car stack) + (set! stack (cdr stack))))) + (depth (lambda () (length stack))) -; (stack null) -; (push (lambda (x) -; (set! stack (cons x stack)))) -; (pop (lambda () -; (begin0 -; (car stack) -; (set! stack (cdr stack))))) -; (depth (lambda () (length stack))) - (stack (make-vector 1000 #f)) - (stack-pointer 0) - (push (lambda (x) - (vector-set! stack stack-pointer x) - (set! stack-pointer (add1 stack-pointer)))) - (pop (lambda () - (set! stack-pointer (sub1 stack-pointer)) - (vector-ref stack stack-pointer))) - (depth (lambda () stack-pointer)) - - - ;; traverse: 'a -> (traverse (lambda (x) @@ -62,13 +51,14 @@ (let loop ((p (pop))) (set-N p +inf.0) (hash-table-put! results p (f x)) - (if (not (equal? x p)) + (if (not (eq? x p)) (loop (pop))))))))) (for-each (lambda (x) (if (= 0 (get-N x)) (traverse x))) nodes) f)) + ) diff --git a/collects/parser-tools/private-yacc/lalr.ss b/collects/parser-tools/private-yacc/lalr.ss index 876f8c7..f3ae43c 100644 --- a/collects/parser-tools/private-yacc/lalr.ss +++ b/collects/parser-tools/private-yacc/lalr.ss @@ -7,7 +7,8 @@ "grammar.ss" "graph.ss" "array2d.ss" - (lib "list.ss")) + (lib "list.ss") + (lib "class.ss")) (provide compute-LA) @@ -16,33 +17,34 @@ ;; which can transition out of the resulting state (define (compute-DR a g) (lambda (tk) - (let ((r (run-automaton (trans-key-st tk) (trans-key-gs tk) a))) + (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) (term-list->bit-vector (filter (lambda (term) - (run-automaton r term a)) + (send a run-automaton r term)) (grammar-terms g)))))) ;; compute-reads: ;; LR0-automaton * grammar -> (trans-key -> trans-key list) (define (compute-reads a g) (lambda (tk) - (let ((r (run-automaton (trans-key-st tk) (trans-key-gs tk) a))) + (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) (map (lambda (x) (make-trans-key r x)) (filter (lambda (non-term) (and (nullable? g non-term) - (run-automaton r non-term a))) + (send a run-automaton r non-term))) (grammar-non-terms g)))))) ;; compute-read: LR0-automaton * grammar -> (trans-key -> term list) (define (compute-read a g) (let* ((dr (compute-DR a g)) (reads (compute-reads a g))) - (digraph (get-mapped-lr0-non-term-keys a) - reads - dr - bitwise-ior - (lambda () 0)))) + (digraph-tk->terml (send a get-mapped-non-term-keys) + reads + dr + (vector-length (send a get-states)) + (length (grammar-terms g)) + (length (grammar-non-terms g))))) ;; comput-includes-and-lookback: @@ -50,13 +52,13 @@ ;; (kernel * prod -> trans-key list)) (define (compute-includes-and-lookback a g) (let* ((non-terms (grammar-non-terms g)) - (num-states (vector-length (lr0-states a))) + (num-states (vector-length (send a get-states))) (num-non-terms (length non-terms)) (includes (make-array2d num-states num-non-terms null)) (lookback (make-array2d num-states (grammar-num-prods g) null))) - (for-each-state + (send a for-each-state (lambda (state) (for-each (lambda (non-term) @@ -65,8 +67,8 @@ (let loop ((i (make-item prod 0)) (p state)) (if (and p i) - (let ((new-i (move-dot-right i)) - (next-sym (sym-at-dot i))) + (let* ((next-sym (sym-at-dot i)) + (new-i (move-dot-right i))) (if (and (non-term? next-sym) (nullable-after-dot? new-i g)) (array2d-add! includes @@ -80,10 +82,10 @@ (make-trans-key state non-term))) (if next-sym (loop new-i - (run-automaton p next-sym a))))))) + (send a run-automaton p next-sym))))))) (get-nt-prods g non-term))) - non-terms)) - a) + non-terms))) + (values (lambda (tk) (array2d-ref includes (kernel-index (trans-key-st tk)) @@ -93,15 +95,15 @@ (kernel-index state) (prod-index prod)))))) - ;; compute-follow: LR0-automaton * grammar -> (trans-key -> term list) (define (compute-follow a g includes) (let ((read (compute-read a g))) - (digraph (get-mapped-lr0-non-term-keys a) - includes - read - bitwise-ior - (lambda () 0)))) + (digraph-tk->terml (send a get-mapped-non-term-keys) + includes + read + (vector-length (send a get-states)) + (length (grammar-terms g)) + (length (grammar-non-terms g))))) ;; compute-LA: LR0-automaton * grammar -> (kernel * prod -> term list) (define (compute-LA a g) @@ -128,7 +130,7 @@ (define (print-input-st-sym f name a g print-output) (printf "~a:~n" name) - (for-each-state + (send a for-each-state (lambda (state) (for-each (lambda (non-term) @@ -139,13 +141,12 @@ state (gram-sym-symbol non-term) (print-output res))))) - (grammar-non-terms g))) - a) + (grammar-non-terms g)))) (newline)) (define (print-input-st-prod f name a g print-output) (printf "~a:~n" name) - (for-each-state + (send a for-each-state (lambda (state) (for-each (lambda (non-term) @@ -159,8 +160,7 @@ (prod-index prod) (print-output res))))) (get-nt-prods g non-term))) - (grammar-non-terms g))) - a)) + (grammar-non-terms g))))) (define (print-output-terms r) (map @@ -176,7 +176,77 @@ (gram-sym-symbol (trans-key-gs p)))) r)) - + ;; digraph-tk->terml: + ;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int + ;; -> (trans-key -> term list) + ;; DeRemer and Pennello 1982 + ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} + ;; A specialization of digraph in the file graph.ss + (define (digraph-tk->terml nodes edges f- num-states num-terms num-non-terms) + (letrec ( + ;; Will map elements of trans-key to term sets represented as bit vectors + (results-terms (make-array2d num-states num-terms 0)) + (results-non-terms (make-array2d num-states num-non-terms 0)) + + ;; Maps elements of trans-keys to integers. + (N-terms (make-array2d num-states num-terms 0)) + (N-non-terms (make-array2d num-states num-non-terms 0)) + + (lookup-tk-map + (lambda (map-term map-non-term) + (lambda (tk) + (let ((st (trans-key-st tk)) + (gs (trans-key-gs tk))) + (if (term? gs) + (array2d-ref map-term (kernel-index st) (term-index gs)) + (array2d-ref map-non-term (kernel-index st) (non-term-index gs))))))) + (add-tk-map + (lambda (map-term map-non-term) + (lambda (tk v) + (let ((st (trans-key-st tk)) + (gs (trans-key-gs tk))) + (if (term? gs) + (array2d-set! map-term (kernel-index st) (term-index gs) v) + (array2d-set! map-non-term (kernel-index st) (non-term-index gs) v)))))) + + (get-N (lookup-tk-map N-terms N-non-terms)) + (set-N (add-tk-map N-terms N-non-terms)) + (get-f (lookup-tk-map results-terms results-non-terms)) + (set-f (add-tk-map results-terms results-non-terms)) + + (stack null) + (push (lambda (x) + (set! stack (cons x stack)))) + (pop (lambda () + (begin0 + (car stack) + (set! stack (cdr stack))))) + (depth (lambda () (length stack))) + + ;; traverse: 'a -> + (traverse + (lambda (x) + (push x) + (let ((d (depth))) + (set-N x d) + (set-f x (f- x)) + (for-each (lambda (y) + (if (= 0 (get-N y)) + (traverse y)) + (set-f x (bitwise-ior (get-f x) (get-f y))) + (set-N x (min (get-N x) (get-N y)))) + (edges x)) + (if (= d (get-N x)) + (let loop ((p (pop))) + (set-N p +inf.0) + (set-f p (get-f x)) + (if (not (equal? x p)) + (loop (pop))))))))) + (for-each (lambda (x) + (if (= 0 (get-N x)) + (traverse x))) + nodes) + get-f)) ) diff --git a/collects/parser-tools/private-yacc/lr0.ss b/collects/parser-tools/private-yacc/lr0.ss index ea2b233..bda4d3c 100644 --- a/collects/parser-tools/private-yacc/lr0.ss +++ b/collects/parser-tools/private-yacc/lr0.ss @@ -6,12 +6,117 @@ (require "grammar.ss" "graph.ss" "array2d.ss" - (lib "list.ss")) + (lib "list.ss") + (lib "class.ss")) - (provide build-lr0-automaton run-automaton (struct trans-key (st gs)) - get-mapped-lr0-non-term-keys lr0-states lr0-epsilon-trans - kernel-items kernel-index for-each-state) + (provide build-lr0-automaton lr0% + (struct trans-key (st gs)) + kernel-items kernel-index kernel-list-remove-duplicates) + + ;; kernel = (make-kernel (LR1-item list) index) + ;; the list must be kept sorted according to item kernel | #f + ;; returns the state that the transition trans-key provides or #f + ;; if there is no such state + (define/public (run-automaton k s) + (if (term? s) + (array2d-ref term-transitions (kernel-index k) (term-index s)) + (array2d-ref non-term-transitions (kernel-index k) (non-term-index s)))) + + (define/public (run-automaton-back k s) + (apply append + (if (term? s) + (map (lambda (k) + (array2d-ref reverse-term-transitions (kernel-index k) (term-index s))) + k) + (map (lambda (k) + (array2d-ref reverse-non-term-transitions (kernel-index k) (non-term-index s))) + k)))))) + + (define (make-lr0-table auto-hash states syms) + (let ((t (make-array2d states syms #f))) + (hash-table-map auto-hash + (lambda (k v) + (array2d-set! t + (kernel-index (trans-key-st k)) + (gram-sym-index (trans-key-gs k)) + v))) + t)) + + (define (reverse-hash hash) + (let ((reverse-hash (make-hash-table 'equal)) + (hash-table-add! + (lambda (ht k v) + (hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null))))))) + (hash-table-for-each hash + (lambda (k v) + (hash-table-add! reverse-hash + (make-trans-key v (trans-key-gs k)) + (trans-key-st k)))) + reverse-hash)) + + (define (union comp (eq? a b) (define (kernel->string k) @@ -56,16 +142,7 @@ (kernel-items k)) "}"))) - ;; run-automaton: kernel * gram-sym * LR0-automaton -> kernel | #f - ;; returns the state that the transition trans-key provides or #f - ;; if there is no such state - (define (run-automaton k s a) - (if (term? s) - (array2d-ref (lr0-term-transitions a) (kernel-index k) (term-index s)) - (array2d-ref (lr0-non-term-transitions a) (kernel-index k) (non-term-index s)))) - (define get-mapped-lr0-non-term-keys lr0-mapped-non-terms) - (define (add-lr0-transition! ttable nttable key value) (hash-table-put! (if (term? (trans-key-gs key)) @@ -73,17 +150,7 @@ nttable) key value)) - - (define (make-lr0-table auto-hash states syms) - (let ((t (make-array2d states syms #f))) - (hash-table-map auto-hash - (lambda (k v) - (array2d-set! t - (kernel-index (trans-key-st k)) - (gram-sym-index (trans-key-gs k)) - v))) - t)) - + ;; build-LR0-automaton: grammar -> LR0-automaton ;; Constructs the kernels of the sets of LR(0) items of g @@ -263,11 +330,13 @@ (seen-kernels null)) (cond ((and (empty-queue? new-kernels) (null? old-kernels)) - (make-lr0 (make-lr0-table automaton-term (length seen-kernels) (vector-length terms)) - (make-lr0-table automaton-non-term (length seen-kernels) (vector-length non-terms)) - (hash-table-map automaton-non-term (lambda (k v) k)) - (list->vector (reverse! seen-kernels)) - epsilons)) + (make-object lr0% + automaton-term + automaton-non-term + (list->vector (reverse! seen-kernels)) + epsilons + (vector-length terms) + num-non-terms)) ((null? old-kernels) (loop (deq! new-kernels) seen-kernels)) (else diff --git a/collects/parser-tools/private-yacc/table.ss b/collects/parser-tools/private-yacc/table.ss index 1dd0200..866d096 100644 --- a/collects/parser-tools/private-yacc/table.ss +++ b/collects/parser-tools/private-yacc/table.ss @@ -8,7 +8,8 @@ "array2d.ss" "lalr.ss" "parser-actions.ss" - (lib "list.ss")) + (lib "list.ss") + (lib "class.ss")) (provide build-table) @@ -65,7 +66,7 @@ (gram-sym-symbol (prod-lhs prod)) (map gram-sym-symbol (vector->list (prod-rhs prod))))) prods) - (for-each-state + (send a for-each-state (lambda (state) (fprintf port "State ~a~n" (kernel-index state)) (for-each (lambda (item) @@ -112,8 +113,7 @@ port))) (loop (add1 j))))) - (newline port)) - a) + (newline port))) (if (> SR-conflicts 0) (fprintf port "~a shift/reduce conflicts~n" SR-conflicts)) @@ -141,7 +141,7 @@ (loop (car rest) (cdr rest))) (else (loop current-guess (cdr rest)))))) (else entry))))) - (for-each-state + (send a for-each-state (lambda (state) (let loop ((term 0)) (if (< term num-terms) @@ -151,8 +151,7 @@ (array2d-ref table (kernel-index state) (+ num-non-terms term)))) - (loop (add1 term)))))) - a) + (loop (add1 term))))))) (if (not suppress) (begin (if (> SR-conflicts 0) @@ -168,7 +167,7 @@ (define (resolve-prec-conflicts a table get-term get-prod num-terms num-non-terms) - (for-each-state + (send a for-each-state (lambda (state) (let loop ((term 0)) (if (< term num-terms) @@ -210,102 +209,100 @@ ((eq? 'right (prec-assoc s-prec)) shift) (else #f))))))) - (loop (add1 term)))))) - a)) + (loop (add1 term)))))))) ;; In the result table the first index is the state and the second is the ;; term/non-term index (with the non-terms coming first) ;; buile-table: grammar * string -> action2d-array (define (build-table g file suppress) (let* ((a (time (build-lr0-automaton g))) - (terms (grammar-terms g)) - (non-terms (grammar-non-terms g)) - (get-term (list->vector terms)) - (get-non-term (list->vector non-terms)) - (get-prod (list->vector (grammar-prods g))) - (num-terms (vector-length get-term)) - (num-non-terms (vector-length get-non-term)) + (terms (grammar-terms g)) + (non-terms (grammar-non-terms g)) + (get-term (list->vector terms)) + (get-non-term (list->vector non-terms)) + (get-prod (list->vector (grammar-prods g))) + (num-terms (vector-length get-term)) + (num-non-terms (vector-length get-non-term)) (end-term-indexes (map (lambda (term) (+ num-non-terms (gram-sym-index term))) (grammar-end-terms g))) - (num-gram-syms (+ num-terms num-non-terms)) - (table (make-array2d (vector-length (lr0-states a)) num-gram-syms #f)) - (array2d-add! - (lambda (v i1 i2 a) - (let ((old (array2d-ref v i1 i2))) - (cond - ((not old) (array2d-set! v i1 i2 a)) - ((list? old) (if (not (member a old)) - (array2d-set! v i1 i2 (cons a old)))) - (else (if (not (equal? a old)) - (array2d-set! v i1 i2 (list a old)))))))) - (get-lookahead (compute-LA a g))) + (num-gram-syms (+ num-terms num-non-terms)) + (table (make-array2d (vector-length (send a get-states)) num-gram-syms #f)) + (array2d-add! + (lambda (v i1 i2 a) + (let ((old (array2d-ref v i1 i2))) + (cond + ((not old) (array2d-set! v i1 i2 a)) + ((list? old) (if (not (member a old)) + (array2d-set! v i1 i2 (cons a old)))) + (else (if (not (equal? a old)) + (array2d-set! v i1 i2 (list a old)))))))) + (get-lookahead (compute-LA a g))) (time - (for-each-state - (lambda (state) - (let loop ((i 0)) - (if (< i num-gram-syms) - (begin - (let* ((s (if (< i num-non-terms) - (vector-ref get-non-term i) - (vector-ref get-term (- i num-non-terms)))) - (goto - (run-automaton state s a))) - (if goto - (array2d-set! table - (kernel-index state) - i + (send a for-each-state + (lambda (state) + (let loop ((i 0)) + (if (< i num-gram-syms) + (begin + (let* ((s (if (< i num-non-terms) + (vector-ref get-non-term i) + (vector-ref get-term (- i num-non-terms)))) + (goto + (send a run-automaton state s))) + (if goto + (array2d-set! table + (kernel-index state) + i + (cond + ((< i num-non-terms) + (kernel-index goto)) + ((member i end-term-indexes) + (make-accept)) + (else + (make-shift + (kernel-index goto))))))) + (loop (add1 i))))) + + (for-each + (lambda (item) + (let ((item-prod (item-prod item))) + (bit-vector-for-each + (lambda (term-index) + (array2d-add! table + (kernel-index state) + (+ num-non-terms term-index) (cond - ((< i num-non-terms) - (kernel-index goto)) - ((member i end-term-indexes) - (make-accept)) - (else - (make-shift - (kernel-index goto))))))) - (loop (add1 i))))) - - (for-each - (lambda (item) - (let ((item-prod (item-prod item))) - (bit-vector-for-each - (lambda (term-index) - (array2d-add! table - (kernel-index state) - (+ num-non-terms term-index) - (cond - ((not (start-item? item)) - (make-reduce - (prod-index item-prod) - (gram-sym-index (prod-lhs item-prod)) - (vector-length (prod-rhs item-prod))))))) - (get-lookahead state item-prod)))) - - (append (hash-table-get (lr0-epsilon-trans a) state (lambda () null)) - (filter (lambda (item) - (not (move-dot-right item))) - (kernel-items state))))) - a)) - + ((not (start-item? item)) + (make-reduce + (prod-index item-prod) + (gram-sym-index (prod-lhs item-prod)) + (vector-length (prod-rhs item-prod))))))) + (get-lookahead state item-prod)))) + + (append (hash-table-get (send a get-epsilon-trans) state (lambda () null)) + (filter (lambda (item) + (not (move-dot-right item))) + (kernel-items state))))))) + (resolve-prec-conflicts a table get-term get-prod num-terms num-non-terms) - + (if (not (string=? file "")) - (with-handlers [(exn:i/o:filesystem? - (lambda (e) - (fprintf - (current-error-port) - "Cannot write debug output to file \"~a\". ~a~n" - (exn:i/o:filesystem-pathname e) - (exn:i/o:filesystem-detail e))))] - (call-with-output-file file - (lambda (port) - (display-parser a table get-term get-non-term (grammar-prods g) + (with-handlers [(exn:i/o:filesystem? + (lambda (e) + (fprintf + (current-error-port) + "Cannot write debug output to file \"~a\". ~a~n" + (exn:i/o:filesystem-pathname e) + (exn:i/o:filesystem-detail e))))] + (call-with-output-file file + (lambda (port) + (display-parser a table get-term get-non-term (grammar-prods g) port))))) (resolve-conflicts a table num-terms num-non-terms suppress) table)) ) - +