*** empty log message ***

original commit: 250709526458a0af817fe09a7abc5d85b5639640
tokens
Scott Owens 23 years ago
parent 442aca5621
commit 96200c5baf

@ -0,0 +1,193 @@
#cs
(module grammar mzscheme
;; Constructs to create and access grammars, the internal
;; representation of the input to the parser generator.
(require (lib "list.ss")
"yacc-helper.ss")
(provide
(rename export-make-item make-item)
make-term
make-non-term
make-prec
make-prod
(rename make-gram make-grammar)
;; Things that work on items
start-item? item-prod item-prod-index item->string
sym-at-dot move-dot-right item<? nullable-after-dot?
;; Things that operate on grammar symbols
gram-sym-symbol gram-sym-index term-prec gram-sym->string
non-term? term? nullable? non-term<? term<?
;; Things that work on precs
prec-num prec-assoc
;;Things that work on grammars
get-nt-prods get-init-prod
(rename gram-non-terms grammar-non-terms)
(rename gram-terms grammar-terms)
(rename gram-num-prods grammar-num-prods)
(rename gram-prods grammar-prods)
;; Things that work on productions
prod-index prod-prec prod-rhs prod-lhs)
;;---------------------- LR items --------------------------
;; LR-item = (make-item production nat (int | #f))
;; The n field contains the least integer such the item is nullable
;; after the dot if the dot is to the right of the nth position.
(define-struct item (prod dot-pos n))
(define (export-make-item a b)
(make-item a b #f))
(define (item-prod-index x)
(prod-index (item-prod x)))
;; item<?: LR-item * LR-item -> bool
;; Lexicographic comparison on two items.
(define (item<? i1 i2)
(let ((p1 (prod-index (item-prod i1)))
(p2 (prod-index (item-prod i2))))
(or (< p1 p2)
(and (= p1 p2)
(let ((d1 (item-dot-pos i1))
(d2 (item-dot-pos i2)))
(< d1 d2))))))
(define (start-item? i)
(= 0 (non-term-index (prod-lhs (item-prod i)))))
;; move-dot-right: LR-item -> LR-item | #f
;; moves the dot to the right in the item, unless it is at its
;; rightmost, then it returns false
(define (move-dot-right i)
(cond
((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f)
(else (make-item (item-prod i)
(add1 (item-dot-pos i))
(item-n i)))))
;; sym-at-dot: LR-item -> gram-sym | #f
;; returns the symbol after the dot in the item or #f if there is none
(define (sym-at-dot i)
(cond
((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f)
(else (vector-ref (prod-rhs (item-prod i)) (item-dot-pos i)))))
;; nullable-after-dot?: LR1-iten * grammar -> bool
;; determines if the string after the dot is nullable
(define (nullable-after-dot? i g)
(cond
((item-n i) => (lambda (x) (>= (item-dot-pos i) x)))
(else
(let ((str (prod-rhs (item-prod i))))
(let loop ((c (sub1 (vector-length str))))
(cond
((= c -1) (set-item-n! i 0))
((term? (vector-ref str c)) (set-item-n! i (add1 c)))
((nullable? g (vector-ref str c)) (loop (sub1 c)))
(else (set-item-n! i (add1 c))))))
(>= (item-dot-pos i) (item-n i)))))
;; print-item: LR-item ->
(define (item->string it)
(let ((print-sym (lambda (i)
(let ((gs (vector-ref (prod-rhs (item-prod it)) i)))
(cond
((term? gs) (format "~a " (term-sym gs)))
(else (format "~a " (non-term-sym gs))))))))
(string-append
(format "~a -> " (non-term-sym (prod-lhs (item-prod it))))
(let loop ((i 0))
(cond
((= i (vector-length (prod-rhs (item-prod it))))
(if (= i (item-dot-pos it))
". "
""))
((= i (item-dot-pos it))
(string-append ". " (print-sym i) (loop (add1 i))))
(else (string-append (print-sym i) (loop (add1 i)))))))))
;; --------------------- Grammar Symbols --------------------------
;; gram-sym = (make-term symbol int prec)
;; | (make-non-term symbol int)
(define-struct term (sym index prec))
(define-struct non-term (sym index))
(define (non-term<? nt1 nt2)
(< (non-term-index nt1) (non-term-index nt2)))
(define (term<? nt1 nt2)
(< (term-index nt1) (term-index nt2)))
(define (gram-sym-index gs)
(cond
((term? gs) (term-index gs))
(else (non-term-index gs))))
(define (gram-sym-symbol gs)
(cond
((term? gs) (term-sym gs))
(else (non-term-sym gs))))
(define (gram-sym->string gs)
(symbol->string (gram-sym-symbol gs)))
;; ------------------------- Precedences ---------------------------
;; a precedence declaration. the sym should be 'left 'right or 'nonassoc
;; prec = (make-prec int sym)
;; | #f
(define-struct prec (num assoc))
;; ------------------------- Grammar ------------------------------
;; grammar = (make-gram (production list vector)
;; (production list)
;; (bool vector)
;; (non-term list)
;; (term list)
;; int)
;;
;; The nt-prods field is indexed by the number assigned to the non-term and
;; contains the list of productions for that non-term
;; The prods field contains a list of all productions
;; The nulls field is indexed by the index for a non-term and is trus iff
;; the non-term is nullable
(define-struct gram
(nt-prods prods nulls non-terms terms num-prods))
;; get-nt-prods: grammar * non-term -> production list
;; returns the productions for the given non-term
(define (get-nt-prods g nt)
(vector-ref (gram-nt-prods g) (non-term-index nt)))
;; get-init-prod: grammar -> production
;; gets the starting production
(define (get-init-prod g)
(car (vector-ref (gram-nt-prods g) 0)))
(define (nullable? g nt)
(vector-ref (gram-nulls g) (non-term-index nt)))
;; ------------------------ Productions ---------------------------
;; production = (make-prod non-term (gram-sym vector) int prec)
(define-struct prod (lhs rhs index prec))
)

@ -0,0 +1,60 @@
#cs
(module graph mzscheme
(provide digraph)
;; digraph:
;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * 'b
;; -> ('a -> 'b)
;; DeRemer and Pennello 1982
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
;; We use a hash-table to represent the result function 'a -> 'b set, so
;; the values of type 'a must be comparable with equal?.
(define (digraph nodes edges f- union fail)
(letrec (
;; Will map elements of 'a to 'b sets
(results (make-hash-table 'equal))
(f (lambda (x) (hash-table-get results x (lambda () fail))))
;; Maps elements of 'a to integers.
(N (make-hash-table 'equal))
(get-N (lambda (x) (hash-table-get N x (lambda () 0))))
(set-N (lambda (x d) (hash-table-put! N x d)))
(stack null)
(push (lambda (x) (set! stack (cons x stack))))
(pop (lambda () (begin0 (car stack) (set! stack (cdr stack)))))
(depth (lambda () (length stack)))
;; traverse: 'a ->
(traverse
(lambda (x)
(push x)
(let ((d (depth)))
(set-N x d)
(hash-table-put! results x (f- x))
(for-each (lambda (y)
(if (= 0 (get-N y))
(traverse y))
(hash-table-put! results
x
(union (f x) (f y)))
(set-N x (min (get-N x) (get-N y))))
(edges x))
(if (= d (get-N x))
(let loop ((p (pop)))
(set-N p +inf.0)
(hash-table-put! results p (f x))
(if (not (equal? x p))
(loop (pop)))))))))
(for-each (lambda (x)
(if (= 0 (get-N x))
(traverse x)))
nodes)
f))
)

@ -0,0 +1,356 @@
#cs
(module input-file-parser mzscheme
;; routines for parsing the input to the parser generator and producing a
;; grammar (See grammar.ss)
(require "yacc-helper.ss" "terminal-syntax.ss" "grammar.ss" (lib "list.ss"))
(provide parse-input)
;; nullable: production list * int -> non-term set
;; determines which non-terminals can derive epsilon
(define (nullable prods num-nts)
(letrec ((nullable (make-vector num-nts #f))
(added #f)
;; possible-nullable: producion list -> production list
;; Removes all productions that have a terminal
(possible-nullable
(lambda (prods)
(filter (lambda (prod)
(vector-andmap non-term? (prod-rhs prod)))
prods)))
;; set-nullables: production list -> production list
;; makes one pass through the productions, adding the ones
;; known to be nullable now to nullable and returning a list
;; of productions that we don't know about yet.
(set-nullables
(lambda (prods)
(cond
((null? prods) null)
((vector-ref nullable
(gram-sym-index (prod-lhs (car prods))))
(set-nullables (cdr prods)))
((vector-andmap (lambda (nt)
(vector-ref nullable (gram-sym-index nt)))
(prod-rhs (car prods)))
(vector-set! nullable
(gram-sym-index (prod-lhs (car prods)))
#t)
(set! added #t)
(set-nullables (cdr prods)))
(else
(cons (car prods)
(set-nullables (cdr prods))))))))
(let loop ((P (possible-nullable prods)))
(cond
((null? P) nullable)
(else
(set! added #f)
(let ((new-P (set-nullables P)))
(if added
(loop new-P)
nullable)))))))
;; Given the list of terminal symbols and the precedence/associativity definitions,
;; builds terminal structures (See grammar.ss)
;; build-terms: symbol list * symbol list list -> term list
(define (build-terms term-list precs)
(let ((counter 0)
;;(term-list (cons (gensym) term-list))
;; Will map a terminal symbol to its precedence/associativity
(prec-table (make-hash-table)))
;; Fill the prec table
(for-each
(lambda (p-decl)
(begin0
(let ((assoc (car p-decl)))
(for-each
(lambda (term-sym)
(hash-table-put! prec-table term-sym (make-prec counter assoc)))
(cdr p-decl)))
(set! counter (add1 counter))))
precs)
(set! counter 0)
;; Build the terminal structures
(map
(lambda (term-sym)
(begin0
(make-term term-sym
counter
(hash-table-get prec-table term-sym (lambda () #f)))
(set! counter (add1 counter))))
term-list)))
;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.xx)
;; get-terms-from-def: syntax-object -> symbol list
(define (get-terms-from-def term-syn)
(let ((t (syntax-local-value term-syn (lambda () #f))))
(cond
((terminals-def? t) (terminals-def-t t))
(else
(raise-syntax-error
'parser-tokens
"undefined token group"
term-syn)))))
;; parse-input: syntax-object^4 * string -> grammar
(define (parse-input start term-defs prec-decls prods)
(let* ((counter 0)
(start-sym (syntax-object->datum start))
;; Get the list of terminals out of input-terms
(list-of-terms
(syntax-case term-defs ()
((term-def ...)
(andmap identifier? (syntax->list term-defs))
(remove-duplicates
(apply append
(map get-terms-from-def
(syntax->list term-defs)))))
(_
(raise-syntax-error
'parser-tokens
"Token list must be (symbol ...)"
term-defs))))
(list-of-non-terms
(syntax-case prods ()
(((non-term production ...) ...)
(begin
(for-each
(lambda (nts)
(if (memq (syntax-object->datum nts) list-of-terms)
(raise-syntax-error
'parser-non-terminals
(format "~a used as both token and non-terminal"
(syntax-object->datum nts))
nts)))
(syntax->list (syntax (non-term ...))))
(if (not (memq start-sym
(syntax-object->datum (syntax (non-term ...)))))
(raise-syntax-error
'parser-start
(format "Start symbol ~a not defined as a non-terminal"
start-sym)
start))
(let ((dup (duplicate-list? (syntax-object->datum
(syntax (non-term ...))))))
(if dup
(raise-syntax-error
'parser-non-terminals
(format "non-terminal ~a defined multiple times"
dup)
prods)))
(syntax-object->datum (syntax (non-term ...)))))
(_
(raise-syntax-error
'parser-productions
"Productions must be of the form ((non-terminal productions ...) ...)"
prods))))
;; Check the precedence declarations for errors and turn them into data
(precs
(syntax-case prec-decls ()
(((type term ...) ...)
(let ((p-terms
(apply append (syntax-object->datum
(syntax ((term ...) ...))))))
(cond
((duplicate-list? p-terms) =>
(lambda (d)
(raise-syntax-error
'parser-precedences
(format "duplicate precedence declaration for token ~a"
d)
prec-decls)))
(else
(for-each
(lambda (a)
(for-each
(lambda (t)
(if (not (memq (syntax-object->datum t)
list-of-terms))
(raise-syntax-error
'parser-precedences
(format
"Precedence declared for non-token ~a"
(syntax-object->datum t))
t)))
(syntax->list a)))
(syntax->list (syntax ((term ...) ...))))
(for-each
(lambda (type)
(if (not (memq (syntax-object->datum type)
`(left right nonassoc)))
(raise-syntax-error
'parser-precedences
"Associativity must be left, right or nonassoc"
type)))
(syntax->list (syntax (type ...))))
(syntax-object->datum prec-decls)))))
(_
(raise-syntax-error
'parser-precedences
"Precedence declaration must be of the form ((assoc term ...) ...) where assoc is left, right or nonassoc"
prec-decls))))
(terms (build-terms list-of-terms precs))
(non-terms (begin
(set! counter 1)
(map (lambda (non-term)
(begin0
(make-non-term non-term counter)
(set! counter (add1 counter))))
list-of-non-terms)))
(term-table (make-hash-table))
(non-term-table (make-hash-table)))
(for-each (lambda (t)
(hash-table-put! term-table (gram-sym-symbol t) t))
terms)
(for-each (lambda (nt)
(hash-table-put! non-term-table (gram-sym-symbol nt) nt))
non-terms)
(set! counter 1)
(let* (
;; parse-prod: syntax-object -> gram-sym vector
(parse-prod
(lambda (prod-so)
(syntax-case prod-so ()
((prod-rhs-sym ...)
(andmap identifier? (syntax->list prod-so))
(list->vector
(map (lambda (s)
(hash-table-get
term-table
(syntax-object->datum s)
(lambda ()
(hash-table-get
non-term-table
(syntax-object->datum s)
(lambda ()
(raise-syntax-error
'parser-production-rhs
(format
"~a is not declared as a terminal or non-terminal"
(syntax-object->datum s))
s))))))
(syntax->list prod-so))))
(_
(raise-syntax-error
'parser-production-rhs
"production right-hand-side must have form (symbol ...)"
prod-so)))))
;; parse-prod+action: non-term * syntax-object -> production
(parse-prod+action
(lambda (nt prod-so)
(syntax-case prod-so (prec)
((prod-rhs action)
(let ((p (parse-prod (syntax prod-rhs))))
(begin0
(make-prod
nt
p
counter
(let loop ((i (sub1 (vector-length p))))
(if (>= i 0)
(let ((gs (vector-ref p i)))
(if (term? gs)
(term-prec gs)
(loop (sub1 i))))
#f)))
(set! counter (add1 counter)))))
((prod-rhs (prec term) action)
(identifier? (syntax term))
(begin0
(make-prod
nt
(parse-prod (syntax prod-rhs))
counter
(term-prec
(hash-table-get
term-table
(syntax-object->datum (syntax term))
(lambda ()
(raise-syntax-error
'parser-production-rhs
(format
"unrecognized terminal ~a in precedence declaration"
(syntax-object->datum (syntax term)))
(syntax term)))))
(set! counter (add1 counter)))))
(_
(raise-syntax-error
'parser-production-rhs
"production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]"
prod-so)))))
;; parse-prod-for-nt: syntax-object -> production list
(parse-prods-for-nt
(lambda (prods-so)
(syntax-case prods-so ()
((nt productions ...)
(> (length (syntax->list (syntax (productions ...)))) 0)
(let* ((prods (syntax-e prods-so))
(nt (hash-table-get non-term-table
(syntax-e (car prods)))))
(map (lambda (p) (parse-prod+action nt p))
(cdr prods))))
(_
(raise-syntax-error
'parser-productions
"A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side"
prods-so))))))
(let* ((start (make-non-term 'Start 0))
(prods
(cons
(list (make-prod start
(vector (hash-table-get non-term-table
start-sym))
0
#f))
(map parse-prods-for-nt (syntax->list prods))))
(nulls (nullable (apply append prods)
(add1 (length non-terms)))))
(printf "nullable: {~a}~n~n"
(apply string-append
(let loop ((i 0))
(cond
((>= i (vector-length nulls)) null)
((vector-ref nulls i)
(cons
(format "~a"
(gram-sym-symbol
(list-ref (cons start non-terms) i)))
(loop (add1 i))))
(else (loop (add1 i)))))))
(make-grammar
(list->vector prods)
(apply append prods)
nulls
(cons start non-terms)
terms
counter))))))

@ -0,0 +1,126 @@
#cs
(module lalr mzscheme
;; Compute LALR lookaheads from DeRemer and Pennello 1982
(require "lr0.ss"
"grammar.ss"
"graph.ss"
"array2d.ss"
(lib "list.ss"))
(provide compute-LA)
(define (array2d-add! a i1 i2 v)
(let ((old (array2d-ref a i1 i2)))
(array2d-set! a i1 i2 (cons v old))))
;; compute-DR: LR0-automaton * grammar -> (trans-key -> term list)
;; computes for each state, non-term transition pair, the terminals
;; which can transition out of the resulting state
(define (compute-DR a g)
(lambda (tk)
(let ((r (run-automaton (trans-key-st tk) (trans-key-gs tk) a)))
(filter
(lambda (term)
(run-automaton r term a))
(grammar-terms g)))))
;; compute-reads:
;; LR0-automaton * grammar -> (trans-key -> trans-key list)
(define (compute-reads a g)
(lambda (tk)
(let ((r (run-automaton (trans-key-st tk) (trans-key-gs tk) a)))
(map (lambda (x) (make-trans-key r x))
(filter (lambda (non-term)
(and (nullable? g non-term)
(run-automaton r non-term a)))
(grammar-non-terms g))))))
;; compute-read: LR0-automaton * grammar -> (trans-key -> term list)
(define (compute-read a g)
(let* ((dr (compute-DR a g))
(reads (compute-reads a g)))
(digraph (filter (lambda (x) (non-term? (trans-key-gs x)))
(hash-table-map (lr0-transitions a) (lambda (k v) k)))
reads
dr
(union term<?)
null)))
;; comput-includes-and-lookback:
;; lr0-automaton * grammar -> (value (trans-key -> trans-key list)
;; (kernel * prod -> trans-key list))
(define (compute-includes-and-lookback a g)
(let* ((states (lr0-states a))
(non-terms (grammar-non-terms g))
(num-states (vector-length states))
(num-non-terms (length non-terms))
(includes (make-array2d num-states num-non-terms null))
(lookback (make-array2d num-states
(grammar-num-prods g)
null)))
(let loop ((state 0))
(if (< state num-states)
(begin
(for-each
(lambda (non-term)
(for-each
(lambda (prod)
(let loop ((i (make-item prod 0))
(p (vector-ref states state)))
(if (and p i)
(begin
(if (and (non-term? (sym-at-dot i))
(nullable-after-dot? (move-dot-right i)
g))
(array2d-add! includes
(kernel-index p)
(gram-sym-index
(sym-at-dot i))
(make-trans-key
(vector-ref states state)
non-term)))
(if (not (move-dot-right i))
(array2d-add! lookback
(kernel-index p)
(prod-index prod)
(make-trans-key
(vector-ref states state)
non-term)))
(loop (move-dot-right i)
(run-automaton p (sym-at-dot i) a))))))
(get-nt-prods g non-term)))
non-terms)
(loop (add1 state)))))
(values (lambda (tk)
(array2d-ref includes
(kernel-index (trans-key-st tk))
(gram-sym-index (trans-key-gs tk))))
(lambda (state prod)
(array2d-ref lookback
(kernel-index state)
(prod-index prod))))))
;; compute-follow: LR0-automaton * grammar -> (trans-key -> term list)
(define (compute-follow a g includes)
(let ((read (compute-read a g)))
(digraph (filter (lambda (x) (non-term? (trans-key-gs x)))
(hash-table-map (lr0-transitions a) (lambda (k v) k)))
includes
read
(union term<?)
null)))
;; compute-LA: LR0-automaton * grammar -> (kernel * prod -> term list)
(define (compute-LA a g)
(let-values (((includes lookback) (compute-includes-and-lookback a g)))
(let ((follow (compute-follow a g includes)))
(lambda (k p)
(let* ((l (lookback k p))
(f (map follow l)))
(apply append f))))))
)

@ -0,0 +1,247 @@
#cs
(module lr0 mzscheme
;; Handle the LR0 automaton
(require "grammar.ss"
"graph.ss"
(lib "list.ss"))
(provide union build-lr0-automaton run-automaton (struct trans-key (st gs))
lr0-transitions lr0-states kernel-items kernel-index)
(define (union comp<?)
(letrec ((union
(lambda (l1 l2)
(cond
((null? l1) l2)
((null? l2) l1)
(else (let ((c1 (car l1))
(c2 (car l2)))
(cond
((comp<? c1 c2)
(cons c1 (union (cdr l1) l2)))
((comp<? c2 c1)
(cons c2 (union l1 (cdr l2))))
(else (union (cdr l1) l2)))))))))
union))
;; kernel = (make-kernel (LR1-item list) index)
;; the list must be kept sorted according to item<? so that equal? can
;; be used to compare kernels
;; LR0-automaton = (make-lr0 (trans-key kernel hash-table) (kernel vector))
;; trans-key = (make-trans-key kernel gram-sym)
(define-struct kernel (items index))
(define-struct trans-key (st gs))
(define-struct lr0 (transitions states))
;; The kernels in the automaton are represented cannonically.
;; That is (equal? a b) <=> (eq? a b)
(define (kernel->string k)
(apply string-append
`("{" ,@(map (lambda (i) (string-append (item->string i) ", "))
(kernel-items k))
"}")))
;; run-automaton: kernel * gram-sym * LR0-automaton -> kernel | #f
;; returns the state that the transition trans-key provides or #f
;; if there is no such state
(define (run-automaton k s a)
(hash-table-get (lr0-transitions a) (make-trans-key k s) (lambda () #f)))
;; build-LR0-automaton: grammar -> LR0-automaton
;; Constructs the kernels of the sets of LR(0) items of g
(define (build-lr0-automaton grammar)
; (printf "LR(0) automaton:~n")
(letrec (
(terms (list->vector (grammar-terms grammar)))
(non-terms (list->vector (grammar-non-terms grammar)))
(num-non-terms (vector-length non-terms))
(num-gram-syms (+ num-non-terms (vector-length terms)))
;; first-non-term: non-term -> non-term list
;; given a non-terminal symbol C, return those non-terminal
;; symbols A s.t. C -> An for some string of terminals and
;; non-terminals n where -> means a rightmost derivation in many
;; steps. Assumes that each non-term can be reduces to a string
;; of terms.
(first-non-term
(digraph (grammar-non-terms grammar)
(lambda (nt)
(filter non-term?
(map (lambda (prod)
(sym-at-dot (make-item prod 0)))
(get-nt-prods grammar nt))))
(lambda (nt) (list nt))
(union non-term<?)
null))
;; closure: LR1-item list -> LR1-item list
;; Creates a set of items containing i s.t. if A -> n.Xm is in it,
;; X -> .o is in it too.
(LR0-closure
(lambda (i)
(cond
((null? i) null)
(else
(let ((next-gsym (sym-at-dot (car i))))
(cond
((non-term? next-gsym)
(cons (car i)
(append
(apply append
(map (lambda (non-term)
(map (lambda (x)
(make-item x 0))
(get-nt-prods grammar
non-term)))
(first-non-term next-gsym)))
(LR0-closure (cdr i)))))
(else
(cons (car i) (LR0-closure (cdr i))))))))))
;; maps trans-keys to kernels
(automaton (make-hash-table 'equal))
;; keeps the kernels we have seen, so we can have a unique
;; list for each kernel
(kernels (make-hash-table 'equal))
(counter 1)
;; goto: LR1-item list -> LR1-item list list
;; creates new kernels by moving the dot in each item in the
;; LR0-closure of kernel to the right, and grouping them by
;; the term/non-term moved over. Returns the kernels not
;; yet seen, and places the trans-keys into automaton
(goto
(lambda (kernel)
(let (
;; maps each gram-syms to a list of items
(table (make-vector num-gram-syms null))
;; add-item!:
;; (item list) vector * item ->
;; adds i into the table grouped with the grammar
;; symbol following its dot
(add-item!
(lambda (table i)
(let ((gs (sym-at-dot i)))
(if gs
(let* ((add (if (term? gs)
num-non-terms
0))
(already
(vector-ref table
(+ add
(gram-sym-index gs)))))
(if (not (member i already))
(vector-set! table
(+ add (gram-sym-index gs))
(cons i already)))))))))
;; Group the items of the LR0 closure of the kernel
;; by the character after the dot
(for-each (lambda (item)
(add-item! table item))
(LR0-closure (kernel-items kernel)))
;; each group is a new kernel, with the dot advanced.
;; sorts the items in a kernel so kernels can be compared
;; with equal? for using the table kernels to make sure
;; only one representitive of each kernel is created
(filter
(lambda (x) x)
(map
(lambda (i)
(let* ((gs (car i))
(items (cadr i))
(new #f)
(new-kernel (quicksort
(filter (lambda (x) x)
(map move-dot-right items))
item<?))
(unique-kernel (hash-table-get
kernels
new-kernel
(lambda ()
(let ((k (make-kernel
new-kernel
counter)))
(set! new #t)
(set! counter (add1 counter))
(hash-table-put! kernels
new-kernel
k)
k)))))
(hash-table-put! automaton
(make-trans-key kernel gs)
unique-kernel)
; (printf "~a -> ~a on ~a~n"
; (kernel->string kernel)
; (kernel->string unique-kernel)
; (gram-sym-symbol gs))
(if new
unique-kernel
#f)))
(let loop ((i 0))
(cond
((< i num-non-terms)
(let ((items (vector-ref table i)))
(cond
((null? items) (loop (add1 i)))
(else
(cons (list (vector-ref non-terms i) items)
(loop (add1 i)))))))
((< i num-gram-syms)
(let ((items (vector-ref table i)))
(cond
((null? items) (loop (add1 i)))
(else
(cons (list (vector-ref terms (- i num-non-terms))
items)
(loop (add1 i)))))))
(else null))))))))
(start (list (make-item (get-init-prod grammar) 0)))
(startk (make-kernel start 0))
(new-kernels (make-queue)))
(hash-table-put! kernels start startk)
(let loop ((old-kernels (list startk))
(seen-kernels null))
(cond
((and (empty-queue? new-kernels) (null? old-kernels))
(make-lr0 automaton (list->vector (reverse! seen-kernels))))
((null? old-kernels)
(loop (deq! new-kernels) seen-kernels))
(else
(enq! new-kernels (goto (car old-kernels)))
(loop (cdr old-kernels) (cons (car old-kernels) seen-kernels)))))))
(define-struct q (f l))
(define (empty-queue? q)
(null? (q-f q)))
(define (make-queue)
(make-q null null))
(define (enq! q i)
(if (empty-queue? q)
(let ((i (list i)))
(set-q-l! q i)
(set-q-f! q i))
(begin
(set-cdr! (q-l q) (list i))
(set-q-l! q (cdr (q-l q))))))
(define (deq! q)
(begin0
(car (q-f q))
(set-q-f! q (cdr (q-f q)))))
)

@ -0,0 +1,19 @@
#cs
(module parser-actions mzscheme
;; The entries into the action table
(provide shift? reduce? accept?
shift-state reduce-prod-num reduce-lhs-num reduce-rhs-length
make-shift make-reduce)
;; action = (shift int)
;; | (reduce int int int)
;; | (accept)
;; | int
;; | #f
(define-struct shift (state))
(define-struct reduce (prod-num lhs-num rhs-length))
(define-struct accept ())
)

@ -0,0 +1,77 @@
#cs
(module parser-builder mzscheme
(require "input-file-parser.ss"
"table.ss"
"parser-actions.ss"
"grammar.ss"
(lib "pretty.ss"))
(provide build-parser)
(define (build-parser start input-terms assocs prods filename runtime src)
(let* ((grammar (parse-input start input-terms assocs prods))
(table (build-table grammar ""))
(table-code
(cons 'vector
(map (lambda (action)
(cond
((shift? action)
`(make-shift ,(shift-state action)))
((reduce? action)
`(make-reduce ,(reduce-prod-num action)
,(reduce-lhs-num action)
,(reduce-rhs-length action)))
((accept? action)
`(make-accept))
(else action)))
(vector->list table))))
(num-non-terms (length (grammar-non-terms grammar)))
(token-code
`(let ((ht (make-hash-table)))
(begin
,@(map (lambda (term)
`(hash-table-put! ht
',(gram-sym-symbol term)
,(+ num-non-terms (gram-sym-index term))))
(grammar-terms grammar))
ht)))
(parser-code
`(letrec ((term-sym->index ,token-code)
(table ,table-code)
(pop-2x
(lambda (s n)
(if (> n 0)
(pop-2x (cdr (cdr s)) (sub1 n))
s))))
(lambda (get-token)
(let loop ((stack (list 0)))
(let* ((next (get-token))
(s (car stack))
(a (hash-table-get term-sym->index
(if (token? next)
(token-name next)
next)))
(action (array2d-ref table s a)))
(cond
((shift? action)
(loop (cons (shift-state action) (cons a stack))))
((reduce? action)
(display (reduce-prod-num action))
(newline)
(let* ((A (reduce-lhs-num action))
(new-stack (pop-2x stack (reduce-rhs-length action)))
(goto (array2d-ref table (car new-stack) A)))
(loop (cons goto (cons A new-stack)))))
((accept? action)
(printf "accept~n")))))))))
(pretty-print parser-code)
(newline)
(datum->syntax-object
runtime
parser-code
src))))

@ -0,0 +1,286 @@
#cs
(module table mzscheme
;; Routine to build the LALR table
(require "grammar.ss"
"lr0.ss"
"array2d.ss"
"lalr.ss"
"parser-actions.ss"
(lib "list.ss"))
(provide build-table)
;; print-entry: symbol * action * output-port ->
;; prints the action a for lookahead sym to port
(define (print-entry sym a port)
(let ((s "\t~a\t\t\t\t\t~a\t~a\n"))
(cond
((shift? a)
(fprintf port s sym "shift" (shift-state a)))
((reduce? a)
(fprintf port s sym "reduce" (reduce-prod-num a)))
((accept? a)
(fprintf port s sym "accept" ""))
(a
(fprintf port s sym "goto" a)))))
;; count: ('a -> bool) * 'a list -> num
;; counts the number of elements in list that satisfy pred
(define (count pred list)
(cond
((null? list) 0)
((pred (car list)) (+ 1 (count pred (cdr list))))
(else (count pred (cdr list)))))
;; display-parser:
;; action array2d * term vector * non-term vector * kernel vector *
;; output-port ->
;; Prints out the parser given by table.
(define (display-parser table terms non-terms states port)
(let* ((num-terms (vector-length terms))
(num-non-terms (vector-length non-terms))
(num-gram-syms (+ num-terms num-non-terms))
(num-states (vector-length states))
(SR-conflicts 0)
(RR-conflicts 0))
(let loop ((i 0))
(if (< i num-states)
(begin
(fprintf port "State ~a~n" i)
(for-each (lambda (item)
(fprintf port "\t~a~n" (item->string item)))
(kernel-items (vector-ref states i)))
(newline port)
(let loop ((j 0))
(if (< j num-terms)
(begin
(let ((act (array2d-ref
table
i
(+ j num-non-terms))))
(cond
((list? act)
(fprintf port "begin conflict:~n")
(if (> (count reduce? act) 1)
(set! RR-conflicts (add1 RR-conflicts)))
(if (> (count shift? act) 0)
(set! SR-conflicts (add1 SR-conflicts)))
(map (lambda (x)
(print-entry
(gram-sym-symbol (vector-ref terms j))
x
port))
act)
(fprintf port "end conflict~n"))
(act (print-entry
(gram-sym-symbol (vector-ref terms j))
act
port))))
(loop (add1 j)))))
(newline port)
(let loop ((j 0))
(if (< j num-non-terms)
(begin
(let ((s (array2d-ref table i j)))
(if s
(print-entry
(gram-sym-symbol (vector-ref non-terms j))
s
port)))
(loop (add1 j)))))
(newline port)
(loop (add1 i)))))
(if (> SR-conflicts 0)
(fprintf port "~a shift/reduce conflicts~n" SR-conflicts))
(if (> RR-conflicts 0)
(fprintf port "~a reduce/reduce conflicts~n" RR-conflicts))))
(define (resolve-conflicts table num-states num-terms num-non-terms)
(letrec ((SR-conflicts 0)
(RR-conflicts 0)
(get-action
(lambda (entry)
(cond
((list? entry)
(if (> (count shift? entry) 0)
(set! SR-conflicts (add1 SR-conflicts)))
(if (> (count reduce? entry) 1)
(set! RR-conflicts (add1 RR-conflicts)))
(let loop ((current-guess (make-reduce +inf.0 -1 -1))
(rest entry))
(cond
((null? rest) current-guess)
((shift? (car rest)) (car rest))
((< (reduce-prod-num (car rest))
(reduce-prod-num current-guess))
(loop (car rest) (cdr rest)))
(else (loop current-guess (cdr rest))))))
(else entry)))))
(let loop ((state 0))
(if (< state num-states)
(begin
(let loop ((term 0))
(if (< term num-terms)
(begin
(array2d-set! table state (+ num-non-terms term)
(get-action
(array2d-ref table
state
(+ num-non-terms term))))
(loop (add1 term)))))
(loop (add1 state)))))
(if (> SR-conflicts 0)
(fprintf (current-error-port)
"~a shift/reduce conflicts~n"
SR-conflicts))
(if (> RR-conflicts 0)
(fprintf (current-error-port)
"~a reduce/reduce conflicts~n"
RR-conflicts))))
(define (resolve-prec-conflicts table get-term get-prod
num-states num-terms num-non-terms)
(let loop ((state 0))
(if (< state num-states)
(begin
(let loop ((term 0))
(if (< term num-terms)
(begin
(let ((action (array2d-ref table
state
(+ num-non-terms term))))
(if (and (list? action)
(= 2 (length action))
(or (shift? (car action))
(shift? (cadr action))))
(let* ((shift (if (shift? (car action))
(car action)
(cadr action)))
(reduce (if (shift? (car action))
(cadr action)
(car action)))
(s-prec (term-prec
(vector-ref get-term
term)))
(r-prec (prod-prec
(vector-ref
get-prod
(reduce-prod-num reduce)))))
(if (and s-prec r-prec)
(array2d-set!
table
state
(+ num-non-terms term)
(cond
((< (prec-num s-prec)
(prec-num r-prec))
reduce)
((> (prec-num s-prec)
(prec-num r-prec))
shift)
((eq? 'left (prec-assoc s-prec))
reduce)
((eq? 'right (prec-assoc s-prec))
shift)
(else #f)))))))
(loop (add1 term)))))
(loop (add1 state))))))
;; In the result table the first index is the state and the second is the
;; term/non-term index (with the non-terms coming first)
;; buile-table: grammar * string -> action2d-array
(define (build-table g file)
(let* ((a (build-lr0-automaton g))
(terms (grammar-terms g))
(non-terms (grammar-non-terms g))
(get-state (lr0-states a))
(get-term (list->vector terms))
(get-non-term (list->vector non-terms))
(get-prod (list->vector (grammar-prods g)))
(num-states (vector-length get-state))
(num-terms (vector-length get-term))
(num-non-terms (vector-length get-non-term))
(num-gram-syms (+ num-terms num-non-terms))
(table (make-array2d num-states num-gram-syms #f))
(array2d-add!
(lambda (v i1 i2 a)
(let ((old (array2d-ref v i1 i2)))
(cond
((not old) (array2d-set! v i1 i2 a))
((list? old) (if (not (member a old))
(array2d-set! v i1 i2 (cons a old))))
(else (if (not (equal? a old))
(array2d-set! v i1 i2 (list a old))))))))
(get-lookahead (compute-LA a g)))
(let loop ((state 0))
(if (< state num-states)
(begin
(let loop ((i 0))
(if (< i num-gram-syms)
(begin
(let* ((s (if (< i num-non-terms)
(vector-ref get-non-term i)
(vector-ref get-term (- i num-non-terms))))
(goto
(run-automaton (vector-ref get-state state)
s
a)))
(if goto
(array2d-set! table
state
i
(if (< i num-non-terms)
(kernel-index goto)
(make-shift
(kernel-index goto))))))
(loop (add1 i)))))
(let ((items
(filter (lambda (item)
(not (move-dot-right item)))
(kernel-items
(vector-ref get-state state)))))
(for-each
(lambda (item)
(for-each
(lambda (t)
(array2d-add! table
state
(+ num-non-terms (gram-sym-index t))
(cond
((not (start-item? item))
(make-reduce
(item-prod-index item)
(gram-sym-index (prod-lhs (item-prod item)))
(vector-length (prod-rhs (item-prod item))))))))
(get-lookahead (vector-ref get-state state)
(item-prod item))))
items))
(loop (add1 state)))))
(resolve-prec-conflicts table get-term get-prod num-states num-terms
num-non-terms)
(if (not (string=? file ""))
(with-handlers [(exn:i/o:filesystem?
(lambda (e)
(fprintf
(current-error-port)
"Cannot write debug output to file \"~a\". ~a~n"
(exn:i/o:filesystem-pathname e)
(exn:i/o:filesystem-detail e))))]
(call-with-output-file file
(lambda (port)
(display-parser table get-term get-non-term get-state port)))))
(resolve-conflicts table num-states num-terms num-non-terms)
table))
)

@ -0,0 +1,35 @@
#cs
(module terminal-syntax mzscheme
;; The things needed at compile time to handle definition of tokens
(provide make-terminals-def terminals-def-t terminals-def? define-tokens-helper)
(define-struct terminals-def (t))
(define (define-tokens-helper stx hack empty?)
(syntax-case stx ()
((_ name (terms ...))
(andmap identifier? (syntax->list (syntax (terms ...))))
(datum->syntax-object
hack
`(begin
(define-syntax ,(syntax name)
(make-terminals-def ',(syntax (terms ...))))
,@(map
(lambda (n)
`(define (,(datum->syntax-object
n
(string->symbol
(format "token-~a" (syntax-object->datum n)))
n)
,@(if empty? '() '(x)))
(make-token ',n ,(if empty? #f 'x))))
(syntax->list (syntax (terms ...)))))
stx))
((_ ...)
(raise-syntax-error
#f
"must have the form (define-tokens name (symbol ...))"
stx))))
)

@ -0,0 +1,24 @@
#cs
(module terminal mzscheme
;; Defining tokens
(require-for-syntax "terminal-syntax.ss")
(provide define-tokens define-empty-tokens token-name token-value token?)
(define-struct token (name value))
(define-syntax define-tokens
(lambda (stx)
(syntax-case stx ()
((_ name ...)
(define-tokens-helper stx #'hack #f)))))
(define-syntax define-empty-tokens
(lambda (stx)
(syntax-case stx ()
((_ name ...)
(define-tokens-helper stx #'hack #t)))))
)

@ -0,0 +1,63 @@
#cs
(module yacc-helper mzscheme
;; General helper routines
(provide duplicate-list? remove-duplicates overlap? vector-andmap)
(define (vector-andmap f v)
(let loop ((i 0))
(cond
((= i (vector-length v)) #t)
(else (if (f (vector-ref v i))
(loop (add1 i))
#f)))))
;; duplicate-list?: symbol list -> #f | symbol
;; returns a symbol that exists twice in l, or false if no such symbol
;; exists
(define (duplicate-list? l)
(letrec ((t (make-hash-table))
(dl? (lambda (l)
(cond
((null? l) #f)
((hash-table-get t (car l) (lambda () #f)) =>
(lambda (x) x))
(else
(hash-table-put! t (car l) (car l))
(dl? (cdr l)))))))
(dl? l)))
;; remove-duplicates: symbol list -> symbol list
;; removes the duplicates from the lists
(define (remove-duplicates sl)
(let ((t (make-hash-table)))
(letrec ((x
(lambda (sl)
(cond
((null? sl) sl)
((hash-table-get t (car sl) (lambda () #f))
(x (cdr sl)))
(else
(hash-table-put! t (car sl) #t)
(cons (car sl) (x (cdr sl))))))))
(x sl))))
;; overlap?: symbol list * symbol list -> #f | symbol
;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists
(define (overlap? l1 l2)
(let/ec ret
(let ((t (make-hash-table)))
(for-each (lambda (s1)
(hash-table-put! t s1 s1))
l1)
(for-each (lambda (s2)
(cond
((hash-table-get t s2 (lambda () #f)) =>
(lambda (o) (ret o)))))
l2)
#f)))
)

@ -0,0 +1,40 @@
#cs
(module yacc mzscheme
(require-for-syntax "private-yacc/parser-builder.ss")
(require "private-yacc/terminal.ss"
"private-yacc/parser-actions.ss"
"private-yacc/array2d.ss")
(provide define-tokens define-empty-tokens parser parser-debug)
(define-syntax parser
(lambda (stx)
(syntax-case stx ()
((_ start input-terms assocs prods)
(build-parser (syntax start) (syntax input-terms)
(syntax assocs) (syntax prods)
"" #'here stx))
(_
(raise-syntax-error
#f
"parser must have the form (parser start-symbol tokens precedence/associativity productions)"
stx)))))
(define-syntax parser-debug
(lambda (stx)
(syntax-case stx ()
((_ filename start input-terms assocs prods)
(string? (syntax-object->datum (syntax filename)))
(build-parser (syntax start) (syntax input-terms)
(syntax assocs) (syntax prods)
(syntax-object->datum (syntax filename))
#'here stx))
(_
(raise-syntax-error
#f
"parser must have the form (parser-debug filename start-symbol tokens precedence/associativity productions) where filename is a string"
stx)))))
)
Loading…
Cancel
Save