diff --git a/collects/parser-tools/private-yacc/grammar.ss b/collects/parser-tools/private-yacc/grammar.ss new file mode 100644 index 0000000..d45b4a3 --- /dev/null +++ b/collects/parser-tools/private-yacc/grammar.ss @@ -0,0 +1,193 @@ +#cs +(module grammar mzscheme + + ;; Constructs to create and access grammars, the internal + ;; representation of the input to the parser generator. + + (require (lib "list.ss") + "yacc-helper.ss") + + (provide + + (rename export-make-item make-item) + make-term + make-non-term + make-prec + make-prod + (rename make-gram make-grammar) + + ;; Things that work on items + start-item? item-prod item-prod-index item->string + sym-at-dot move-dot-right itemstring + non-term? term? nullable? non-term bool + ;; Lexicographic comparison on two items. + (define (item LR-item | #f + ;; moves the dot to the right in the item, unless it is at its + ;; rightmost, then it returns false + (define (move-dot-right i) + (cond + ((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) + (else (make-item (item-prod i) + (add1 (item-dot-pos i)) + (item-n i))))) + + ;; sym-at-dot: LR-item -> gram-sym | #f + ;; returns the symbol after the dot in the item or #f if there is none + (define (sym-at-dot i) + (cond + ((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) + (else (vector-ref (prod-rhs (item-prod i)) (item-dot-pos i))))) + + ;; nullable-after-dot?: LR1-iten * grammar -> bool + ;; determines if the string after the dot is nullable + (define (nullable-after-dot? i g) + (cond + ((item-n i) => (lambda (x) (>= (item-dot-pos i) x))) + (else + (let ((str (prod-rhs (item-prod i)))) + (let loop ((c (sub1 (vector-length str)))) + (cond + ((= c -1) (set-item-n! i 0)) + ((term? (vector-ref str c)) (set-item-n! i (add1 c))) + ((nullable? g (vector-ref str c)) (loop (sub1 c))) + (else (set-item-n! i (add1 c)))))) + (>= (item-dot-pos i) (item-n i))))) + + + ;; print-item: LR-item -> + (define (item->string it) + (let ((print-sym (lambda (i) + (let ((gs (vector-ref (prod-rhs (item-prod it)) i))) + (cond + ((term? gs) (format "~a " (term-sym gs))) + (else (format "~a " (non-term-sym gs)))))))) + (string-append + (format "~a -> " (non-term-sym (prod-lhs (item-prod it)))) + (let loop ((i 0)) + (cond + ((= i (vector-length (prod-rhs (item-prod it)))) + (if (= i (item-dot-pos it)) + ". " + "")) + ((= i (item-dot-pos it)) + (string-append ". " (print-sym i) (loop (add1 i)))) + (else (string-append (print-sym i) (loop (add1 i))))))))) + + ;; --------------------- Grammar Symbols -------------------------- + + ;; gram-sym = (make-term symbol int prec) + ;; | (make-non-term symbol int) + (define-struct term (sym index prec)) + (define-struct non-term (sym index)) + + (define (non-termstring gs) + (symbol->string (gram-sym-symbol gs))) + + ;; ------------------------- Precedences --------------------------- + + ;; a precedence declaration. the sym should be 'left 'right or 'nonassoc + ;; prec = (make-prec int sym) + ;; | #f + (define-struct prec (num assoc)) + + ;; ------------------------- Grammar ------------------------------ + + ;; grammar = (make-gram (production list vector) + ;; (production list) + ;; (bool vector) + ;; (non-term list) + ;; (term list) + ;; int) + ;; + ;; The nt-prods field is indexed by the number assigned to the non-term and + ;; contains the list of productions for that non-term + ;; The prods field contains a list of all productions + ;; The nulls field is indexed by the index for a non-term and is trus iff + ;; the non-term is nullable + (define-struct gram + (nt-prods prods nulls non-terms terms num-prods)) + + + ;; get-nt-prods: grammar * non-term -> production list + ;; returns the productions for the given non-term + (define (get-nt-prods g nt) + (vector-ref (gram-nt-prods g) (non-term-index nt))) + + + ;; get-init-prod: grammar -> production + ;; gets the starting production + (define (get-init-prod g) + (car (vector-ref (gram-nt-prods g) 0))) + + + + (define (nullable? g nt) + (vector-ref (gram-nulls g) (non-term-index nt))) + + + ;; ------------------------ Productions --------------------------- + + ;; production = (make-prod non-term (gram-sym vector) int prec) + (define-struct prod (lhs rhs index prec)) +) diff --git a/collects/parser-tools/private-yacc/graph.ss b/collects/parser-tools/private-yacc/graph.ss new file mode 100644 index 0000000..3252576 --- /dev/null +++ b/collects/parser-tools/private-yacc/graph.ss @@ -0,0 +1,60 @@ +#cs +(module graph mzscheme + + (provide digraph) + + ;; digraph: + ;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * 'b + ;; -> ('a -> 'b) + ;; DeRemer and Pennello 1982 + ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} + ;; We use a hash-table to represent the result function 'a -> 'b set, so + ;; the values of type 'a must be comparable with equal?. + (define (digraph nodes edges f- union fail) + (letrec ( + ;; Will map elements of 'a to 'b sets + (results (make-hash-table 'equal)) + (f (lambda (x) (hash-table-get results x (lambda () fail)))) + + ;; Maps elements of 'a to integers. + (N (make-hash-table 'equal)) + (get-N (lambda (x) (hash-table-get N x (lambda () 0)))) + (set-N (lambda (x d) (hash-table-put! N x d))) + + (stack null) + (push (lambda (x) (set! stack (cons x stack)))) + (pop (lambda () (begin0 (car stack) (set! stack (cdr stack))))) + (depth (lambda () (length stack))) + + ;; traverse: 'a -> + (traverse + (lambda (x) + (push x) + (let ((d (depth))) + (set-N x d) + (hash-table-put! results x (f- x)) + (for-each (lambda (y) + (if (= 0 (get-N y)) + (traverse y)) + (hash-table-put! results + x + (union (f x) (f y))) + (set-N x (min (get-N x) (get-N y)))) + (edges x)) + (if (= d (get-N x)) + (let loop ((p (pop))) + (set-N p +inf.0) + (hash-table-put! results p (f x)) + (if (not (equal? x p)) + (loop (pop))))))))) + (for-each (lambda (x) + (if (= 0 (get-N x)) + (traverse x))) + nodes) + f)) +) + + + + + diff --git a/collects/parser-tools/private-yacc/input-file-parser.ss b/collects/parser-tools/private-yacc/input-file-parser.ss new file mode 100644 index 0000000..f212ec1 --- /dev/null +++ b/collects/parser-tools/private-yacc/input-file-parser.ss @@ -0,0 +1,356 @@ +#cs +(module input-file-parser mzscheme + + ;; routines for parsing the input to the parser generator and producing a + ;; grammar (See grammar.ss) + + (require "yacc-helper.ss" "terminal-syntax.ss" "grammar.ss" (lib "list.ss")) + + (provide parse-input) + + ;; nullable: production list * int -> non-term set + ;; determines which non-terminals can derive epsilon + (define (nullable prods num-nts) + (letrec ((nullable (make-vector num-nts #f)) + (added #f) + + ;; possible-nullable: producion list -> production list + ;; Removes all productions that have a terminal + (possible-nullable + (lambda (prods) + (filter (lambda (prod) + (vector-andmap non-term? (prod-rhs prod))) + prods))) + + ;; set-nullables: production list -> production list + ;; makes one pass through the productions, adding the ones + ;; known to be nullable now to nullable and returning a list + ;; of productions that we don't know about yet. + (set-nullables + (lambda (prods) + (cond + ((null? prods) null) + ((vector-ref nullable + (gram-sym-index (prod-lhs (car prods)))) + (set-nullables (cdr prods))) + ((vector-andmap (lambda (nt) + (vector-ref nullable (gram-sym-index nt))) + (prod-rhs (car prods))) + (vector-set! nullable + (gram-sym-index (prod-lhs (car prods))) + #t) + (set! added #t) + (set-nullables (cdr prods))) + (else + (cons (car prods) + (set-nullables (cdr prods)))))))) + + (let loop ((P (possible-nullable prods))) + (cond + ((null? P) nullable) + (else + (set! added #f) + (let ((new-P (set-nullables P))) + (if added + (loop new-P) + nullable))))))) + + + ;; Given the list of terminal symbols and the precedence/associativity definitions, + ;; builds terminal structures (See grammar.ss) + ;; build-terms: symbol list * symbol list list -> term list + (define (build-terms term-list precs) + (let ((counter 0) + + ;;(term-list (cons (gensym) term-list)) + + ;; Will map a terminal symbol to its precedence/associativity + (prec-table (make-hash-table))) + + ;; Fill the prec table + (for-each + (lambda (p-decl) + (begin0 + (let ((assoc (car p-decl))) + (for-each + (lambda (term-sym) + (hash-table-put! prec-table term-sym (make-prec counter assoc))) + (cdr p-decl))) + (set! counter (add1 counter)))) + precs) + + (set! counter 0) + + ;; Build the terminal structures + (map + (lambda (term-sym) + (begin0 + (make-term term-sym + counter + (hash-table-get prec-table term-sym (lambda () #f))) + (set! counter (add1 counter)))) + term-list))) + + ;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.xx) + ;; get-terms-from-def: syntax-object -> symbol list + (define (get-terms-from-def term-syn) + (let ((t (syntax-local-value term-syn (lambda () #f)))) + (cond + ((terminals-def? t) (terminals-def-t t)) + (else + (raise-syntax-error + 'parser-tokens + "undefined token group" + term-syn))))) + + ;; parse-input: syntax-object^4 * string -> grammar + (define (parse-input start term-defs prec-decls prods) + (let* ((counter 0) + + (start-sym (syntax-object->datum start)) + + ;; Get the list of terminals out of input-terms + (list-of-terms + (syntax-case term-defs () + ((term-def ...) + (andmap identifier? (syntax->list term-defs)) + (remove-duplicates + (apply append + (map get-terms-from-def + (syntax->list term-defs))))) + (_ + (raise-syntax-error + 'parser-tokens + "Token list must be (symbol ...)" + term-defs)))) + + + (list-of-non-terms + (syntax-case prods () + (((non-term production ...) ...) + (begin + (for-each + (lambda (nts) + (if (memq (syntax-object->datum nts) list-of-terms) + (raise-syntax-error + 'parser-non-terminals + (format "~a used as both token and non-terminal" + (syntax-object->datum nts)) + nts))) + (syntax->list (syntax (non-term ...)))) + + (if (not (memq start-sym + (syntax-object->datum (syntax (non-term ...))))) + (raise-syntax-error + 'parser-start + (format "Start symbol ~a not defined as a non-terminal" + start-sym) + start)) + + (let ((dup (duplicate-list? (syntax-object->datum + (syntax (non-term ...)))))) + (if dup + (raise-syntax-error + 'parser-non-terminals + (format "non-terminal ~a defined multiple times" + dup) + prods))) + + (syntax-object->datum (syntax (non-term ...))))) + (_ + (raise-syntax-error + 'parser-productions + "Productions must be of the form ((non-terminal productions ...) ...)" + prods)))) + + ;; Check the precedence declarations for errors and turn them into data + (precs + (syntax-case prec-decls () + (((type term ...) ...) + (let ((p-terms + (apply append (syntax-object->datum + (syntax ((term ...) ...)))))) + (cond + ((duplicate-list? p-terms) => + (lambda (d) + (raise-syntax-error + 'parser-precedences + (format "duplicate precedence declaration for token ~a" + d) + prec-decls))) + (else + (for-each + (lambda (a) + (for-each + (lambda (t) + (if (not (memq (syntax-object->datum t) + list-of-terms)) + (raise-syntax-error + 'parser-precedences + (format + "Precedence declared for non-token ~a" + (syntax-object->datum t)) + t))) + (syntax->list a))) + (syntax->list (syntax ((term ...) ...)))) + (for-each + (lambda (type) + (if (not (memq (syntax-object->datum type) + `(left right nonassoc))) + (raise-syntax-error + 'parser-precedences + "Associativity must be left, right or nonassoc" + type))) + (syntax->list (syntax (type ...)))) + (syntax-object->datum prec-decls))))) + (_ + (raise-syntax-error + 'parser-precedences + "Precedence declaration must be of the form ((assoc term ...) ...) where assoc is left, right or nonassoc" + prec-decls)))) + + (terms (build-terms list-of-terms precs)) + + (non-terms (begin + (set! counter 1) + (map (lambda (non-term) + (begin0 + (make-non-term non-term counter) + (set! counter (add1 counter)))) + list-of-non-terms))) + (term-table (make-hash-table)) + (non-term-table (make-hash-table))) + + (for-each (lambda (t) + (hash-table-put! term-table (gram-sym-symbol t) t)) + terms) + + (for-each (lambda (nt) + (hash-table-put! non-term-table (gram-sym-symbol nt) nt)) + non-terms) + + (set! counter 1) + + (let* ( + ;; parse-prod: syntax-object -> gram-sym vector + (parse-prod + (lambda (prod-so) + (syntax-case prod-so () + ((prod-rhs-sym ...) + (andmap identifier? (syntax->list prod-so)) + (list->vector + (map (lambda (s) + (hash-table-get + term-table + (syntax-object->datum s) + (lambda () + (hash-table-get + non-term-table + (syntax-object->datum s) + (lambda () + (raise-syntax-error + 'parser-production-rhs + (format + "~a is not declared as a terminal or non-terminal" + (syntax-object->datum s)) + s)))))) + (syntax->list prod-so)))) + (_ + (raise-syntax-error + 'parser-production-rhs + "production right-hand-side must have form (symbol ...)" + prod-so))))) + + ;; parse-prod+action: non-term * syntax-object -> production + (parse-prod+action + (lambda (nt prod-so) + (syntax-case prod-so (prec) + ((prod-rhs action) + (let ((p (parse-prod (syntax prod-rhs)))) + (begin0 + (make-prod + nt + p + counter + (let loop ((i (sub1 (vector-length p)))) + (if (>= i 0) + (let ((gs (vector-ref p i))) + (if (term? gs) + (term-prec gs) + (loop (sub1 i)))) + #f))) + (set! counter (add1 counter))))) + ((prod-rhs (prec term) action) + (identifier? (syntax term)) + (begin0 + (make-prod + nt + (parse-prod (syntax prod-rhs)) + counter + (term-prec + (hash-table-get + term-table + (syntax-object->datum (syntax term)) + (lambda () + (raise-syntax-error + 'parser-production-rhs + (format + "unrecognized terminal ~a in precedence declaration" + (syntax-object->datum (syntax term))) + (syntax term))))) + (set! counter (add1 counter))))) + (_ + (raise-syntax-error + 'parser-production-rhs + "production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]" + prod-so))))) + + ;; parse-prod-for-nt: syntax-object -> production list + (parse-prods-for-nt + (lambda (prods-so) + (syntax-case prods-so () + ((nt productions ...) + (> (length (syntax->list (syntax (productions ...)))) 0) + (let* ((prods (syntax-e prods-so)) + (nt (hash-table-get non-term-table + (syntax-e (car prods))))) + (map (lambda (p) (parse-prod+action nt p)) + (cdr prods)))) + (_ + (raise-syntax-error + 'parser-productions + "A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side" + prods-so)))))) + + (let* ((start (make-non-term 'Start 0)) + (prods + (cons + (list (make-prod start + (vector (hash-table-get non-term-table + start-sym)) + 0 + #f)) + (map parse-prods-for-nt (syntax->list prods)))) + (nulls (nullable (apply append prods) + (add1 (length non-terms))))) + + + (printf "nullable: {~a}~n~n" + (apply string-append + (let loop ((i 0)) + (cond + ((>= i (vector-length nulls)) null) + ((vector-ref nulls i) + (cons + (format "~a" + (gram-sym-symbol + (list-ref (cons start non-terms) i))) + (loop (add1 i)))) + (else (loop (add1 i))))))) + (make-grammar + (list->vector prods) + (apply append prods) + nulls + (cons start non-terms) + terms + counter)))))) diff --git a/collects/parser-tools/private-yacc/lalr.ss b/collects/parser-tools/private-yacc/lalr.ss new file mode 100644 index 0000000..524fd49 --- /dev/null +++ b/collects/parser-tools/private-yacc/lalr.ss @@ -0,0 +1,126 @@ +#cs +(module lalr mzscheme + + ;; Compute LALR lookaheads from DeRemer and Pennello 1982 + + (require "lr0.ss" + "grammar.ss" + "graph.ss" + "array2d.ss" + (lib "list.ss")) + + (provide compute-LA) + + (define (array2d-add! a i1 i2 v) + (let ((old (array2d-ref a i1 i2))) + (array2d-set! a i1 i2 (cons v old)))) + + ;; compute-DR: LR0-automaton * grammar -> (trans-key -> term list) + ;; computes for each state, non-term transition pair, the terminals + ;; which can transition out of the resulting state + (define (compute-DR a g) + (lambda (tk) + (let ((r (run-automaton (trans-key-st tk) (trans-key-gs tk) a))) + (filter + (lambda (term) + (run-automaton r term a)) + (grammar-terms g))))) + + ;; compute-reads: + ;; LR0-automaton * grammar -> (trans-key -> trans-key list) + (define (compute-reads a g) + (lambda (tk) + (let ((r (run-automaton (trans-key-st tk) (trans-key-gs tk) a))) + (map (lambda (x) (make-trans-key r x)) + (filter (lambda (non-term) + (and (nullable? g non-term) + (run-automaton r non-term a))) + (grammar-non-terms g)))))) + + ;; compute-read: LR0-automaton * grammar -> (trans-key -> term list) + (define (compute-read a g) + (let* ((dr (compute-DR a g)) + (reads (compute-reads a g))) + (digraph (filter (lambda (x) (non-term? (trans-key-gs x))) + (hash-table-map (lr0-transitions a) (lambda (k v) k))) + reads + dr + (union term (value (trans-key -> trans-key list) + ;; (kernel * prod -> trans-key list)) + (define (compute-includes-and-lookback a g) + (let* ((states (lr0-states a)) + (non-terms (grammar-non-terms g)) + (num-states (vector-length states)) + (num-non-terms (length non-terms)) + (includes (make-array2d num-states num-non-terms null)) + (lookback (make-array2d num-states + (grammar-num-prods g) + null))) + (let loop ((state 0)) + (if (< state num-states) + (begin + (for-each + (lambda (non-term) + (for-each + (lambda (prod) + (let loop ((i (make-item prod 0)) + (p (vector-ref states state))) + (if (and p i) + (begin + (if (and (non-term? (sym-at-dot i)) + (nullable-after-dot? (move-dot-right i) + g)) + (array2d-add! includes + (kernel-index p) + (gram-sym-index + (sym-at-dot i)) + (make-trans-key + (vector-ref states state) + non-term))) + (if (not (move-dot-right i)) + (array2d-add! lookback + (kernel-index p) + (prod-index prod) + (make-trans-key + (vector-ref states state) + non-term))) + (loop (move-dot-right i) + (run-automaton p (sym-at-dot i) a)))))) + (get-nt-prods g non-term))) + non-terms) + (loop (add1 state))))) + (values (lambda (tk) + (array2d-ref includes + (kernel-index (trans-key-st tk)) + (gram-sym-index (trans-key-gs tk)))) + (lambda (state prod) + (array2d-ref lookback + (kernel-index state) + (prod-index prod)))))) + + + ;; compute-follow: LR0-automaton * grammar -> (trans-key -> term list) + (define (compute-follow a g includes) + (let ((read (compute-read a g))) + (digraph (filter (lambda (x) (non-term? (trans-key-gs x))) + (hash-table-map (lr0-transitions a) (lambda (k v) k))) + includes + read + (union term (kernel * prod -> term list) + (define (compute-LA a g) + (let-values (((includes lookback) (compute-includes-and-lookback a g))) + (let ((follow (compute-follow a g includes))) + (lambda (k p) + (let* ((l (lookback k p)) + (f (map follow l))) + (apply append f)))))) + +) \ No newline at end of file diff --git a/collects/parser-tools/private-yacc/lr0.ss b/collects/parser-tools/private-yacc/lr0.ss new file mode 100644 index 0000000..e083a67 --- /dev/null +++ b/collects/parser-tools/private-yacc/lr0.ss @@ -0,0 +1,247 @@ +#cs +(module lr0 mzscheme + + ;; Handle the LR0 automaton + + (require "grammar.ss" + "graph.ss" + (lib "list.ss")) + + (provide union build-lr0-automaton run-automaton (struct trans-key (st gs)) + lr0-transitions lr0-states kernel-items kernel-index) + + (define (union comp (eq? a b) + (define (kernel->string k) + (apply string-append + `("{" ,@(map (lambda (i) (string-append (item->string i) ", ")) + (kernel-items k)) + "}"))) + + ;; run-automaton: kernel * gram-sym * LR0-automaton -> kernel | #f + ;; returns the state that the transition trans-key provides or #f + ;; if there is no such state + (define (run-automaton k s a) + (hash-table-get (lr0-transitions a) (make-trans-key k s) (lambda () #f))) + + + ;; build-LR0-automaton: grammar -> LR0-automaton + ;; Constructs the kernels of the sets of LR(0) items of g + (define (build-lr0-automaton grammar) +; (printf "LR(0) automaton:~n") + (letrec ( + (terms (list->vector (grammar-terms grammar))) + (non-terms (list->vector (grammar-non-terms grammar))) + (num-non-terms (vector-length non-terms)) + (num-gram-syms (+ num-non-terms (vector-length terms))) + + + ;; first-non-term: non-term -> non-term list + ;; given a non-terminal symbol C, return those non-terminal + ;; symbols A s.t. C -> An for some string of terminals and + ;; non-terminals n where -> means a rightmost derivation in many + ;; steps. Assumes that each non-term can be reduces to a string + ;; of terms. + (first-non-term + (digraph (grammar-non-terms grammar) + (lambda (nt) + (filter non-term? + (map (lambda (prod) + (sym-at-dot (make-item prod 0))) + (get-nt-prods grammar nt)))) + (lambda (nt) (list nt)) + (union non-term LR1-item list + ;; Creates a set of items containing i s.t. if A -> n.Xm is in it, + ;; X -> .o is in it too. + (LR0-closure + (lambda (i) + (cond + ((null? i) null) + (else + (let ((next-gsym (sym-at-dot (car i)))) + (cond + ((non-term? next-gsym) + (cons (car i) + (append + (apply append + (map (lambda (non-term) + (map (lambda (x) + (make-item x 0)) + (get-nt-prods grammar + non-term))) + (first-non-term next-gsym))) + (LR0-closure (cdr i))))) + (else + (cons (car i) (LR0-closure (cdr i)))))))))) + + + + ;; maps trans-keys to kernels + (automaton (make-hash-table 'equal)) + + ;; keeps the kernels we have seen, so we can have a unique + ;; list for each kernel + (kernels (make-hash-table 'equal)) + + (counter 1) + + ;; goto: LR1-item list -> LR1-item list list + ;; creates new kernels by moving the dot in each item in the + ;; LR0-closure of kernel to the right, and grouping them by + ;; the term/non-term moved over. Returns the kernels not + ;; yet seen, and places the trans-keys into automaton + (goto + (lambda (kernel) + (let ( + ;; maps each gram-syms to a list of items + + (table (make-vector num-gram-syms null)) + + ;; add-item!: + ;; (item list) vector * item -> + ;; adds i into the table grouped with the grammar + ;; symbol following its dot + (add-item! + (lambda (table i) + (let ((gs (sym-at-dot i))) + (if gs + (let* ((add (if (term? gs) + num-non-terms + 0)) + (already + (vector-ref table + (+ add + (gram-sym-index gs))))) + (if (not (member i already)) + (vector-set! table + (+ add (gram-sym-index gs)) + (cons i already))))))))) + + ;; Group the items of the LR0 closure of the kernel + ;; by the character after the dot + (for-each (lambda (item) + (add-item! table item)) + (LR0-closure (kernel-items kernel))) + + + ;; each group is a new kernel, with the dot advanced. + ;; sorts the items in a kernel so kernels can be compared + ;; with equal? for using the table kernels to make sure + ;; only one representitive of each kernel is created + (filter + (lambda (x) x) + (map + (lambda (i) + (let* ((gs (car i)) + (items (cadr i)) + (new #f) + (new-kernel (quicksort + (filter (lambda (x) x) + (map move-dot-right items)) + item ~a on ~a~n" +; (kernel->string kernel) +; (kernel->string unique-kernel) +; (gram-sym-symbol gs)) + (if new + unique-kernel + #f))) + (let loop ((i 0)) + (cond + ((< i num-non-terms) + (let ((items (vector-ref table i))) + (cond + ((null? items) (loop (add1 i))) + (else + (cons (list (vector-ref non-terms i) items) + (loop (add1 i))))))) + ((< i num-gram-syms) + (let ((items (vector-ref table i))) + (cond + ((null? items) (loop (add1 i))) + (else + (cons (list (vector-ref terms (- i num-non-terms)) + items) + (loop (add1 i))))))) + (else null)))))))) + + + (start (list (make-item (get-init-prod grammar) 0))) + (startk (make-kernel start 0)) + (new-kernels (make-queue))) + + (hash-table-put! kernels start startk) + (let loop ((old-kernels (list startk)) + (seen-kernels null)) + (cond + ((and (empty-queue? new-kernels) (null? old-kernels)) + (make-lr0 automaton (list->vector (reverse! seen-kernels)))) + ((null? old-kernels) + (loop (deq! new-kernels) seen-kernels)) + (else + (enq! new-kernels (goto (car old-kernels))) + (loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))))))) + + (define-struct q (f l)) + (define (empty-queue? q) + (null? (q-f q))) + (define (make-queue) + (make-q null null)) + (define (enq! q i) + (if (empty-queue? q) + (let ((i (list i))) + (set-q-l! q i) + (set-q-f! q i)) + (begin + (set-cdr! (q-l q) (list i)) + (set-q-l! q (cdr (q-l q)))))) + (define (deq! q) + (begin0 + (car (q-f q)) + (set-q-f! q (cdr (q-f q))))) + +) \ No newline at end of file diff --git a/collects/parser-tools/private-yacc/parser-actions.ss b/collects/parser-tools/private-yacc/parser-actions.ss new file mode 100644 index 0000000..1728145 --- /dev/null +++ b/collects/parser-tools/private-yacc/parser-actions.ss @@ -0,0 +1,19 @@ +#cs +(module parser-actions mzscheme + + ;; The entries into the action table + + (provide shift? reduce? accept? + shift-state reduce-prod-num reduce-lhs-num reduce-rhs-length + make-shift make-reduce) + + ;; action = (shift int) + ;; | (reduce int int int) + ;; | (accept) + ;; | int + ;; | #f + + (define-struct shift (state)) + (define-struct reduce (prod-num lhs-num rhs-length)) + (define-struct accept ()) + ) diff --git a/collects/parser-tools/private-yacc/parser-builder.ss b/collects/parser-tools/private-yacc/parser-builder.ss new file mode 100644 index 0000000..5d8df62 --- /dev/null +++ b/collects/parser-tools/private-yacc/parser-builder.ss @@ -0,0 +1,77 @@ +#cs +(module parser-builder mzscheme + + (require "input-file-parser.ss" + "table.ss" + "parser-actions.ss" + "grammar.ss" + (lib "pretty.ss")) + + (provide build-parser) + + (define (build-parser start input-terms assocs prods filename runtime src) + (let* ((grammar (parse-input start input-terms assocs prods)) + (table (build-table grammar "")) + (table-code + (cons 'vector + (map (lambda (action) + (cond + ((shift? action) + `(make-shift ,(shift-state action))) + ((reduce? action) + `(make-reduce ,(reduce-prod-num action) + ,(reduce-lhs-num action) + ,(reduce-rhs-length action))) + ((accept? action) + `(make-accept)) + (else action))) + (vector->list table)))) + + (num-non-terms (length (grammar-non-terms grammar))) + + (token-code + `(let ((ht (make-hash-table))) + (begin + ,@(map (lambda (term) + `(hash-table-put! ht + ',(gram-sym-symbol term) + ,(+ num-non-terms (gram-sym-index term)))) + (grammar-terms grammar)) + ht))) + + (parser-code + `(letrec ((term-sym->index ,token-code) + (table ,table-code) + (pop-2x + (lambda (s n) + (if (> n 0) + (pop-2x (cdr (cdr s)) (sub1 n)) + s)))) + (lambda (get-token) + (let loop ((stack (list 0))) + (let* ((next (get-token)) + (s (car stack)) + (a (hash-table-get term-sym->index + (if (token? next) + (token-name next) + next))) + (action (array2d-ref table s a))) + (cond + ((shift? action) + (loop (cons (shift-state action) (cons a stack)))) + ((reduce? action) + (display (reduce-prod-num action)) + (newline) + (let* ((A (reduce-lhs-num action)) + (new-stack (pop-2x stack (reduce-rhs-length action))) + (goto (array2d-ref table (car new-stack) A))) + (loop (cons goto (cons A new-stack))))) + ((accept? action) + (printf "accept~n"))))))))) + (pretty-print parser-code) + (newline) + (datum->syntax-object + runtime + parser-code + src)))) + \ No newline at end of file diff --git a/collects/parser-tools/private-yacc/table.ss b/collects/parser-tools/private-yacc/table.ss new file mode 100644 index 0000000..b0e12ef --- /dev/null +++ b/collects/parser-tools/private-yacc/table.ss @@ -0,0 +1,286 @@ +#cs +(module table mzscheme + + ;; Routine to build the LALR table + + (require "grammar.ss" + "lr0.ss" + "array2d.ss" + "lalr.ss" + "parser-actions.ss" + (lib "list.ss")) + + (provide build-table) + + ;; print-entry: symbol * action * output-port -> + ;; prints the action a for lookahead sym to port + (define (print-entry sym a port) + (let ((s "\t~a\t\t\t\t\t~a\t~a\n")) + (cond + ((shift? a) + (fprintf port s sym "shift" (shift-state a))) + ((reduce? a) + (fprintf port s sym "reduce" (reduce-prod-num a))) + ((accept? a) + (fprintf port s sym "accept" "")) + (a + (fprintf port s sym "goto" a))))) + + + ;; count: ('a -> bool) * 'a list -> num + ;; counts the number of elements in list that satisfy pred + (define (count pred list) + (cond + ((null? list) 0) + ((pred (car list)) (+ 1 (count pred (cdr list)))) + (else (count pred (cdr list))))) + + + ;; display-parser: + ;; action array2d * term vector * non-term vector * kernel vector * + ;; output-port -> + ;; Prints out the parser given by table. + (define (display-parser table terms non-terms states port) + (let* ((num-terms (vector-length terms)) + (num-non-terms (vector-length non-terms)) + (num-gram-syms (+ num-terms num-non-terms)) + (num-states (vector-length states)) + (SR-conflicts 0) + (RR-conflicts 0)) + (let loop ((i 0)) + (if (< i num-states) + (begin + (fprintf port "State ~a~n" i) + (for-each (lambda (item) + (fprintf port "\t~a~n" (item->string item))) + (kernel-items (vector-ref states i))) + (newline port) + (let loop ((j 0)) + (if (< j num-terms) + (begin + (let ((act (array2d-ref + table + i + (+ j num-non-terms)))) + (cond + ((list? act) + (fprintf port "begin conflict:~n") + (if (> (count reduce? act) 1) + (set! RR-conflicts (add1 RR-conflicts))) + (if (> (count shift? act) 0) + (set! SR-conflicts (add1 SR-conflicts))) + (map (lambda (x) + (print-entry + (gram-sym-symbol (vector-ref terms j)) + x + port)) + act) + (fprintf port "end conflict~n")) + (act (print-entry + (gram-sym-symbol (vector-ref terms j)) + act + port)))) + (loop (add1 j))))) + + (newline port) + + (let loop ((j 0)) + (if (< j num-non-terms) + (begin + (let ((s (array2d-ref table i j))) + (if s + (print-entry + (gram-sym-symbol (vector-ref non-terms j)) + s + port))) + (loop (add1 j))))) + + (newline port) + (loop (add1 i))))) + (if (> SR-conflicts 0) + (fprintf port "~a shift/reduce conflicts~n" SR-conflicts)) + (if (> RR-conflicts 0) + (fprintf port "~a reduce/reduce conflicts~n" RR-conflicts)))) + + (define (resolve-conflicts table num-states num-terms num-non-terms) + (letrec ((SR-conflicts 0) + (RR-conflicts 0) + (get-action + (lambda (entry) + (cond + ((list? entry) + (if (> (count shift? entry) 0) + (set! SR-conflicts (add1 SR-conflicts))) + (if (> (count reduce? entry) 1) + (set! RR-conflicts (add1 RR-conflicts))) + (let loop ((current-guess (make-reduce +inf.0 -1 -1)) + (rest entry)) + (cond + ((null? rest) current-guess) + ((shift? (car rest)) (car rest)) + ((< (reduce-prod-num (car rest)) + (reduce-prod-num current-guess)) + (loop (car rest) (cdr rest))) + (else (loop current-guess (cdr rest)))))) + (else entry))))) + (let loop ((state 0)) + (if (< state num-states) + (begin + (let loop ((term 0)) + (if (< term num-terms) + (begin + (array2d-set! table state (+ num-non-terms term) + (get-action + (array2d-ref table + state + (+ num-non-terms term)))) + (loop (add1 term))))) + (loop (add1 state))))) + (if (> SR-conflicts 0) + (fprintf (current-error-port) + "~a shift/reduce conflicts~n" + SR-conflicts)) + (if (> RR-conflicts 0) + (fprintf (current-error-port) + "~a reduce/reduce conflicts~n" + RR-conflicts)))) + + + + (define (resolve-prec-conflicts table get-term get-prod + num-states num-terms num-non-terms) + (let loop ((state 0)) + (if (< state num-states) + (begin + (let loop ((term 0)) + (if (< term num-terms) + (begin + (let ((action (array2d-ref table + state + (+ num-non-terms term)))) + (if (and (list? action) + (= 2 (length action)) + (or (shift? (car action)) + (shift? (cadr action)))) + (let* ((shift (if (shift? (car action)) + (car action) + (cadr action))) + (reduce (if (shift? (car action)) + (cadr action) + (car action))) + (s-prec (term-prec + (vector-ref get-term + term))) + (r-prec (prod-prec + (vector-ref + get-prod + (reduce-prod-num reduce))))) + (if (and s-prec r-prec) + (array2d-set! + table + state + (+ num-non-terms term) + (cond + ((< (prec-num s-prec) + (prec-num r-prec)) + reduce) + ((> (prec-num s-prec) + (prec-num r-prec)) + shift) + ((eq? 'left (prec-assoc s-prec)) + reduce) + ((eq? 'right (prec-assoc s-prec)) + shift) + (else #f))))))) + (loop (add1 term))))) + (loop (add1 state)))))) + + ;; In the result table the first index is the state and the second is the + ;; term/non-term index (with the non-terms coming first) + ;; buile-table: grammar * string -> action2d-array + (define (build-table g file) + (let* ((a (build-lr0-automaton g)) + (terms (grammar-terms g)) + (non-terms (grammar-non-terms g)) + (get-state (lr0-states a)) + (get-term (list->vector terms)) + (get-non-term (list->vector non-terms)) + (get-prod (list->vector (grammar-prods g))) + (num-states (vector-length get-state)) + (num-terms (vector-length get-term)) + (num-non-terms (vector-length get-non-term)) + (num-gram-syms (+ num-terms num-non-terms)) + (table (make-array2d num-states num-gram-syms #f)) + (array2d-add! + (lambda (v i1 i2 a) + (let ((old (array2d-ref v i1 i2))) + (cond + ((not old) (array2d-set! v i1 i2 a)) + ((list? old) (if (not (member a old)) + (array2d-set! v i1 i2 (cons a old)))) + (else (if (not (equal? a old)) + (array2d-set! v i1 i2 (list a old)))))))) + (get-lookahead (compute-LA a g))) + + (let loop ((state 0)) + (if (< state num-states) + (begin + (let loop ((i 0)) + (if (< i num-gram-syms) + (begin + (let* ((s (if (< i num-non-terms) + (vector-ref get-non-term i) + (vector-ref get-term (- i num-non-terms)))) + (goto + (run-automaton (vector-ref get-state state) + s + a))) + (if goto + (array2d-set! table + state + i + (if (< i num-non-terms) + (kernel-index goto) + (make-shift + (kernel-index goto)))))) + (loop (add1 i))))) + (let ((items + (filter (lambda (item) + (not (move-dot-right item))) + (kernel-items + (vector-ref get-state state))))) + (for-each + (lambda (item) + (for-each + (lambda (t) + (array2d-add! table + state + (+ num-non-terms (gram-sym-index t)) + (cond + ((not (start-item? item)) + (make-reduce + (item-prod-index item) + (gram-sym-index (prod-lhs (item-prod item))) + (vector-length (prod-rhs (item-prod item)))))))) + (get-lookahead (vector-ref get-state state) + (item-prod item)))) + items)) + + (loop (add1 state))))) + (resolve-prec-conflicts table get-term get-prod num-states num-terms + num-non-terms) + (if (not (string=? file "")) + (with-handlers [(exn:i/o:filesystem? + (lambda (e) + (fprintf + (current-error-port) + "Cannot write debug output to file \"~a\". ~a~n" + (exn:i/o:filesystem-pathname e) + (exn:i/o:filesystem-detail e))))] + (call-with-output-file file + (lambda (port) + (display-parser table get-term get-non-term get-state port))))) + (resolve-conflicts table num-states num-terms num-non-terms) + table)) +) + diff --git a/collects/parser-tools/private-yacc/terminal-syntax.ss b/collects/parser-tools/private-yacc/terminal-syntax.ss new file mode 100644 index 0000000..fd97ab7 --- /dev/null +++ b/collects/parser-tools/private-yacc/terminal-syntax.ss @@ -0,0 +1,35 @@ +#cs +(module terminal-syntax mzscheme + + ;; The things needed at compile time to handle definition of tokens + + (provide make-terminals-def terminals-def-t terminals-def? define-tokens-helper) + + (define-struct terminals-def (t)) + + (define (define-tokens-helper stx hack empty?) + (syntax-case stx () + ((_ name (terms ...)) + (andmap identifier? (syntax->list (syntax (terms ...)))) + (datum->syntax-object + hack + `(begin + (define-syntax ,(syntax name) + (make-terminals-def ',(syntax (terms ...)))) + ,@(map + (lambda (n) + `(define (,(datum->syntax-object + n + (string->symbol + (format "token-~a" (syntax-object->datum n))) + n) + ,@(if empty? '() '(x))) + (make-token ',n ,(if empty? #f 'x)))) + (syntax->list (syntax (terms ...))))) + stx)) + ((_ ...) + (raise-syntax-error + #f + "must have the form (define-tokens name (symbol ...))" + stx)))) +) diff --git a/collects/parser-tools/private-yacc/terminal.ss b/collects/parser-tools/private-yacc/terminal.ss new file mode 100644 index 0000000..fa3d317 --- /dev/null +++ b/collects/parser-tools/private-yacc/terminal.ss @@ -0,0 +1,24 @@ +#cs +(module terminal mzscheme + + ;; Defining tokens + + (require-for-syntax "terminal-syntax.ss") + + (provide define-tokens define-empty-tokens token-name token-value token?) + + (define-struct token (name value)) + + (define-syntax define-tokens + (lambda (stx) + (syntax-case stx () + ((_ name ...) + (define-tokens-helper stx #'hack #f))))) + + (define-syntax define-empty-tokens + (lambda (stx) + (syntax-case stx () + ((_ name ...) + (define-tokens-helper stx #'hack #t))))) +) + diff --git a/collects/parser-tools/private-yacc/yacc-helper.ss b/collects/parser-tools/private-yacc/yacc-helper.ss new file mode 100644 index 0000000..60f90ee --- /dev/null +++ b/collects/parser-tools/private-yacc/yacc-helper.ss @@ -0,0 +1,63 @@ +#cs +(module yacc-helper mzscheme + + ;; General helper routines + + (provide duplicate-list? remove-duplicates overlap? vector-andmap) + + (define (vector-andmap f v) + (let loop ((i 0)) + (cond + ((= i (vector-length v)) #t) + (else (if (f (vector-ref v i)) + (loop (add1 i)) + #f))))) + + ;; duplicate-list?: symbol list -> #f | symbol + ;; returns a symbol that exists twice in l, or false if no such symbol + ;; exists + (define (duplicate-list? l) + (letrec ((t (make-hash-table)) + (dl? (lambda (l) + (cond + ((null? l) #f) + ((hash-table-get t (car l) (lambda () #f)) => + (lambda (x) x)) + (else + (hash-table-put! t (car l) (car l)) + (dl? (cdr l))))))) + (dl? l))) + + ;; remove-duplicates: symbol list -> symbol list + ;; removes the duplicates from the lists + (define (remove-duplicates sl) + (let ((t (make-hash-table))) + (letrec ((x + (lambda (sl) + (cond + ((null? sl) sl) + ((hash-table-get t (car sl) (lambda () #f)) + (x (cdr sl))) + (else + (hash-table-put! t (car sl) #t) + (cons (car sl) (x (cdr sl)))))))) + (x sl)))) + + ;; overlap?: symbol list * symbol list -> #f | symbol + ;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists + (define (overlap? l1 l2) + (let/ec ret + (let ((t (make-hash-table))) + (for-each (lambda (s1) + (hash-table-put! t s1 s1)) + l1) + (for-each (lambda (s2) + (cond + ((hash-table-get t s2 (lambda () #f)) => + (lambda (o) (ret o))))) + l2) + #f))) + + +) + diff --git a/collects/parser-tools/yacc.ss b/collects/parser-tools/yacc.ss index e69de29..0742b2d 100644 --- a/collects/parser-tools/yacc.ss +++ b/collects/parser-tools/yacc.ss @@ -0,0 +1,40 @@ +#cs +(module yacc mzscheme + + (require-for-syntax "private-yacc/parser-builder.ss") + (require "private-yacc/terminal.ss" + "private-yacc/parser-actions.ss" + "private-yacc/array2d.ss") + + (provide define-tokens define-empty-tokens parser parser-debug) + + (define-syntax parser + (lambda (stx) + (syntax-case stx () + ((_ start input-terms assocs prods) + (build-parser (syntax start) (syntax input-terms) + (syntax assocs) (syntax prods) + "" #'here stx)) + (_ + (raise-syntax-error + #f + "parser must have the form (parser start-symbol tokens precedence/associativity productions)" + stx))))) + + + (define-syntax parser-debug + (lambda (stx) + (syntax-case stx () + ((_ filename start input-terms assocs prods) + (string? (syntax-object->datum (syntax filename))) + (build-parser (syntax start) (syntax input-terms) + (syntax assocs) (syntax prods) + (syntax-object->datum (syntax filename)) + #'here stx)) + (_ + (raise-syntax-error + #f + "parser must have the form (parser-debug filename start-symbol tokens precedence/associativity productions) where filename is a string" + stx))))) + +) \ No newline at end of file