*** empty log message ***

original commit: a8b8b4c9f8aae2ca3af3f086048be6c115defe1d
tokens
Scott Owens 23 years ago
parent 9b439ae7a6
commit 95300ddd13

@ -1,53 +1,45 @@
#cs
(module grammar mzscheme
(require (lib "class.ss"))
;; Constructs to create and access grammars, the internal
;; representation of the input to the parser generator.
(provide
(rename export-make-item make-item)
make-item
make-term
make-non-term
make-prec
make-prod
(rename make-gram make-grammar)
;; Things that work on items
start-item? item-prod item->string
sym-at-dot move-dot-right move-dot-right! item<? nullable-after-dot?
sym-at-dot move-dot-right item<? item-dot-pos
;; Things that operate on grammar symbols
gram-sym-symbol gram-sym-index term-prec gram-sym->string
non-term? term? nullable? non-term<? term<?
non-term? term? non-term<? term<?
term-list->bit-vector term-index non-term-index
;; Things that work on precs
prec-num prec-assoc
;;Things that work on grammars
get-nt-prods get-init-prod
(rename gram-non-terms grammar-non-terms)
(rename gram-terms grammar-terms)
(rename gram-num-prods grammar-num-prods)
(rename gram-prods grammar-prods)
(rename gram-end-terms grammar-end-terms)
grammar%
;; Things that work on productions
prod-index prod-prec prod-rhs prod-lhs prod-action)
;;---------------------- LR items --------------------------
;; LR-item = (make-item production nat (int | #f))
;; The n field contains the least integer such the item is nullable
;; after the dot if the dot is to the right of the nth position.
(define-struct item (prod dot-pos n) (make-inspector))
;; LR-item = (make-item production nat)
;; The dot-pos field is the index of the element in the rhs
;; of prod that the dot immediately preceeds.
;; Thus 0 <= dot-pos <= (vector-length rhs).
(define-struct item (prod dot-pos) (make-inspector))
(define (export-make-item a b)
(make-item a b #f))
;; item<?: LR-item * LR-item -> bool
;; Lexicographic comparison on two items.
(define (item<? i1 i2)
@ -59,6 +51,8 @@
(d2 (item-dot-pos i2)))
(< d1 d2))))))
;; start-item?: LR-item -> bool
;; The start production always has index 0
(define (start-item? i)
(= 0 (non-term-index (prod-lhs (item-prod i)))))
@ -70,43 +64,18 @@
(cond
((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f)
(else (make-item (item-prod i)
(add1 (item-dot-pos i))
(item-n i)))))
;; move-dot-right!: LR-item -> LR-item | #f
;; moves the dot to the right in the item, unless it is at its
;; rightmost, then it returns false
(define (move-dot-right! i)
(cond
((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f)
(else (set-item-dot-pos! i (add1 (item-dot-pos i)))
i)))
(add1 (item-dot-pos i))))))
;; sym-at-dot: LR-item -> gram-sym | #f
;; returns the symbol after the dot in the item or #f if there is none
(define (sym-at-dot i)
(let ((dp (item-dot-pos i)))
(let ((dp (item-dot-pos i))
(rhs (prod-rhs (item-prod i))))
(cond
((= dp (vector-length (prod-rhs (item-prod i)))) #f)
(else (vector-ref (prod-rhs (item-prod i)) dp)))))
((= dp (vector-length rhs)) #f)
(else (vector-ref rhs dp)))))
;; nullable-after-dot?: LR1-iten * grammar -> bool
;; determines if the string after the dot is nullable
(define (nullable-after-dot? i g)
(let ((i-n (item-n i)))
(cond
(i-n (>= (item-dot-pos i) i-n))
(else
(let ((str (prod-rhs (item-prod i))))
(let loop ((c (sub1 (vector-length str))))
(cond
((= c -1) (set-item-n! i 0))
((term? (vector-ref str c)) (set-item-n! i (add1 c)))
((nullable? g (vector-ref str c)) (loop (sub1 c)))
(else (set-item-n! i (add1 c))))))
(>= (item-dot-pos i) (item-n i))))))
;; print-item: LR-item ->
(define (item->string it)
(let ((print-sym (lambda (i)
@ -130,6 +99,8 @@
;; gram-sym = (make-term symbol int prec)
;; | (make-non-term symbol int)
;; Each term has a unique index 0 <= index < number of terms
;; Each non-term has a unique index 0 <= index < number of non-terms
(define-struct term (sym index prec) (make-inspector))
(define-struct non-term (sym index) (make-inspector))
@ -152,6 +123,9 @@
(define (gram-sym->string gs)
(symbol->string (gram-sym-symbol gs)))
;; term-list->bit-vector: term list -> int
;; Creates a number where the nth bit is 1 if the term with index n is in
;; the list, and whose nth bit is 0 otherwise
(define (term-list->bit-vector terms)
(cond
((null? terms) 0)
@ -166,44 +140,57 @@
(define-struct prec (num assoc) (make-inspector))
;; ------------------------- Grammar ------------------------------
;; grammar = (make-gram (production list vector)
;; (production list)
;; (bool vector)
;; (non-term list)
;; (term list)
;; int
;; (term list))
;;
;; The nt-prods field is indexed by the number assigned to the non-term and
;; contains the list of productions for that non-term
;; The prods field contains a list of all productions
;; The nulls field is indexed by the index for a non-term and is trus iff
;; the non-term is nullable
(define-struct gram
(nt-prods prods nulls non-terms terms num-prods end-terms)
(make-inspector))
;; get-nt-prods: grammar * non-term -> production list
;; returns the productions for the given non-term
(define (get-nt-prods g nt)
(vector-ref (gram-nt-prods g) (non-term-index nt)))
;; get-init-prod: grammar -> production
;; gets the starting production
(define (get-init-prod g)
(car (vector-ref (gram-nt-prods g) 0)))
(define grammar%
(class object%
(super-instantiate ())
;; prods: production list list
;; where the nth element in the outermost list is the list of productions with the nth non-term as lhs
(init prods)
;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable
(init-field terms non-terms nullable-non-terms end-terms)
(define (nullable? g nt)
(vector-ref (gram-nulls g) (non-term-index nt)))
;; indexed by the index of the non-term - contains the list of productions for that non-term
(define nt->prods (list->vector prods))
;; list of all productions
(define all-prods (apply append prods))
(define num-prods (length all-prods))
(define num-terms (length terms))
(define num-non-terms (length non-terms))
(define/public (get-num-terms) num-terms)
(define/public (get-num-non-terms) num-non-terms)
(define/public (get-prods-for-non-term nt)
(vector-ref nt->prods (non-term-index nt)))
(define/public (get-prods) all-prods)
(define/public (get-init-prod)
(car (vector-ref nt->prods 0)))
(define/public (get-terms) terms)
(define/public (get-non-terms) non-terms)
(define/public (get-num-prods) num-prods)
(define/public (get-end-terms) end-terms)
(define/public (nullable-non-term? nt)
(vector-ref nullable-non-terms (non-term-index nt)))
(define/public (nullable-after-dot? item)
(let* ((rhs (prod-rhs (item-prod item)))
(prod-length (vector-length rhs)))
(let loop ((i (item-dot-pos item)))
(cond
((< i prod-length)
(if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i)))
(loop (add1 i))
#f))
((= i prod-length) #t)))))))
;; ------------------------ Productions ---------------------------
;; production = (make-prod non-term (gram-sym vector) int prec syntax-object)
;; Each production has a unique index 0 <= index <= number of productions
(define-struct prod (lhs rhs index prec action) (make-inspector))
)

@ -4,7 +4,11 @@
;; routines for parsing the input to the parser generator and producing a
;; grammar (See grammar.ss)
(require "yacc-helper.ss" "../private-lex/token-syntax.ss" "grammar.ss" (lib "list.ss"))
(require "yacc-helper.ss"
"../private-lex/token-syntax.ss"
"grammar.ss"
(lib "list.ss")
(lib "class.ss"))
(provide parse-input get-term-list)
@ -418,13 +422,11 @@
;; (list-ref (cons start (cons end-non-term non-terms)) i)))
;; (loop (add1 i))))
;; (else (loop (add1 i)))))))
(make-grammar
(list->vector prods)
(apply append prods)
nulls
(cons start (cons end-non-term non-terms))
terms
(add1 counter)
(map (lambda (term-name)
(hash-table-get term-table term-name))
end-terms)))))))
(make-object grammar%
prods
terms
(cons start (cons end-non-term non-terms))
nulls
(map (lambda (term-name)
(hash-table-get term-table term-name))
end-terms)))))))

@ -5,16 +5,22 @@
(require "lr0.ss"
"grammar.ss"
"graph.ss"
"array2d.ss"
(lib "list.ss")
(lib "class.ss"))
(provide compute-LA)
;; compute-DR: LR0-automaton * grammar -> (trans-key -> term list)
(define (list-head l n)
(cond
((= 0 n) null)
(else (cons (car l) (list-head (cdr l) (sub1 n))))))
;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set)
;; computes for each state, non-term transition pair, the terminals
;; which can transition out of the resulting state
;; output term set is represented in bit-vector form
(define (compute-DR a g)
(lambda (tk)
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))))
@ -22,7 +28,7 @@
(filter
(lambda (term)
(send a run-automaton r term))
(grammar-terms g))))))
(send g get-terms))))))
;; compute-reads:
;; LR0-automaton * grammar -> (trans-key -> trans-key list)
@ -31,88 +37,150 @@
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))))
(map (lambda (x) (make-trans-key r x))
(filter (lambda (non-term)
(and (nullable? g non-term)
(and (send g nullable-non-term? non-term)
(send a run-automaton r non-term)))
(grammar-non-terms g))))))
(send g get-non-terms ))))))
;; compute-read: LR0-automaton * grammar -> (trans-key -> term list)
;; compute-read: LR0-automaton * grammar -> (trans-key -> term set)
;; output term set is represented in bit-vector form
(define (compute-read a g)
(let* ((dr (compute-DR a g))
(reads (compute-reads a g)))
(digraph-tk->terml (send a get-mapped-non-term-keys)
reads
dr
(vector-length (send a get-states))
(length (grammar-terms g))
(length (grammar-non-terms g)))))
(send a get-num-states)
(send g get-num-terms)
(send g get-num-non-terms))))
;; run-lr0-backward: lr0-automaton * gram-sym list * kernel * int -> kernel list
;; returns the list of all k such that state k transitions to state start on the
;; transitions in rhs (in order)
(define (run-lr0-backward a rhs start num-states)
(let loop ((states (list start))
(rhs (reverse rhs)))
(cond
((null? rhs) states)
(else (loop (kernel-list-remove-duplicates
(send a run-automaton-back states (car rhs))
num-states)
(cdr rhs))))))
;; prod->items-for-include: grammar * prod * non-term -> lr0-item list
;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma)
;; and gamma =>* epsilon
(define (prod->items-for-include g prod nt)
(let* ((rhs (prod-rhs prod))
(rhs-l (vector-length rhs)))
(append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l))))
(list (make-item prod (sub1 rhs-l)))
null)
(let loop ((i (sub1 rhs-l)))
(cond
((and (> i 0)
(non-term? (vector-ref rhs i))
(send g nullable-non-term? (vector-ref rhs i)))
(if (eq? nt (vector-ref rhs (sub1 i)))
(cons (make-item prod (sub1 i))
(loop (sub1 i)))
(loop (sub1 i))))
(else null))))))
;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list
;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list
;; and gamma =>* epsilon
(define (prod-list->items-for-include g prod-list nt)
(apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list)))
;; comput-includes-and-lookback:
;; lr0-automaton * grammar -> (value (trans-key -> trans-key list)
;; (kernel * prod -> trans-key list))
(define (compute-includes-and-lookback a g)
(let* ((non-terms (grammar-non-terms g))
(num-states (vector-length (send a get-states)))
(num-non-terms (length non-terms))
(includes (make-array2d num-states num-non-terms null))
(lookback (make-array2d num-states
(grammar-num-prods g)
null)))
(send a for-each-state
(lambda (state)
(for-each
(lambda (non-term)
(for-each
(lambda (prod)
(let loop ((i (make-item prod 0))
(p state))
(if (and p i)
(let* ((next-sym (sym-at-dot i))
(new-i (move-dot-right i)))
(if (and (non-term? next-sym)
(nullable-after-dot? new-i g))
(array2d-add! includes
(kernel-index p)
(gram-sym-index next-sym)
(make-trans-key state non-term)))
(if (not new-i)
(array2d-add! lookback
(kernel-index p)
(prod-index prod)
(make-trans-key state non-term)))
(if next-sym
(loop new-i
(send a run-automaton p next-sym)))))))
(get-nt-prods g non-term)))
non-terms)))
(values (lambda (tk)
(array2d-ref includes
(kernel-index (trans-key-st tk))
(gram-sym-index (trans-key-gs tk))))
(lambda (state prod)
(array2d-ref lookback
(kernel-index state)
(prod-index prod))))))
;; compute-follow: LR0-automaton * grammar -> (trans-key -> term list)
;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list)
(define (compute-includes a g)
(let ((non-terms (send g get-non-terms))
(num-states (send a get-num-states)))
(lambda (tk)
(let ((goal-state (trans-key-st tk))
(non-term (trans-key-gs tk)))
(apply append
(map (lambda (B)
(map (lambda (state)
(make-trans-key state B))
(kernel-list-remove-duplicates
(let ((items (prod-list->items-for-include g (send g get-prods-for-non-term B) non-term)))
(apply append
(map (lambda (item)
(let ((rhs (prod-rhs (item-prod item))))
(run-lr0-backward a
(list-head (vector->list rhs)
(- (vector-length rhs)
(item-dot-pos item)))
goal-state
num-states)))
items)))
num-states)))
non-terms))))))
;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list)
; (define (compute-includes a g)
; (let* ((non-terms (send g get-non-terms))
; (num-states (vector-length (send a get-states)))
; (num-non-terms (length non-terms))
; (includes (make-array2d num-states num-non-terms null)))
; (send a for-each-state
; (lambda (state)
; (for-each
; (lambda (non-term)
; (for-each
; (lambda (prod)
; (let loop ((i (make-item prod 0))
; (p state))
; (if (and p i)
; (let* ((next-sym (sym-at-dot i))
; (new-i (move-dot-right i)))
; (if (and (non-term? next-sym)
; (send g nullable-after-dot? new-i))
; (array2d-add! includes
; (kernel-index p)
; (gram-sym-index next-sym)
; (make-trans-key state non-term)))
; (if next-sym
; (loop new-i
; (send a run-automaton p next-sym)))))))
; (send g get-prods-for-non-term non-term)))
; non-terms)))
;
; (lambda (tk)
; (array2d-ref includes
; (kernel-index (trans-key-st tk))
; (gram-sym-index (trans-key-gs tk))))))
;
; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list)
(define (compute-lookback a g)
(let ((num-states (send a get-num-states)))
(lambda (state prod)
(map (lambda (k) (make-trans-key k (prod-lhs prod)))
(run-lr0-backward a (vector->list (prod-rhs prod)) state num-states)))))
;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set)
;; output term set is represented in bit-vector form
(define (compute-follow a g includes)
(let ((read (compute-read a g)))
(digraph-tk->terml (send a get-mapped-non-term-keys)
includes
read
(vector-length (send a get-states))
(length (grammar-terms g))
(length (grammar-non-terms g)))))
;; compute-LA: LR0-automaton * grammar -> (kernel * prod -> term list)
includes
read
(send a get-num-states)
(send g get-num-terms)
(send g get-num-non-terms))))
;; compute-LA: LR0-automaton * grammar -> (kernel * prod -> term set)
;; output term set is represented in bit-vector form
(define (compute-LA a g)
(let-values (((includes lookback) (time (compute-includes-and-lookback a g))))
(let ((follow (time (compute-follow a g includes))))
(lambda (k p)
(let* ((l (lookback k p))
(f (map follow l)))
(apply bitwise-ior (cons 0 f)))))))
(let* ((includes (compute-includes a g))
(lookback (compute-lookback a g))
(follow (compute-follow a g includes)))
(lambda (k p)
(let* ((l (lookback k p))
(f (map follow l)))
(apply bitwise-ior (cons 0 f))))))
(define (print-DR dr a g)
@ -141,7 +209,7 @@
state
(gram-sym-symbol non-term)
(print-output res)))))
(grammar-non-terms g))))
(send g get-non-terms))))
(newline))
(define (print-input-st-prod f name a g print-output)
@ -159,8 +227,8 @@
(kernel-index state)
(prod-index prod)
(print-output res)))))
(get-nt-prods g non-term)))
(grammar-non-terms g)))))
(send g get-prods-for-non-term non-term)))
(send g get-non-terms)))))
(define (print-output-terms r)
(map

@ -16,10 +16,13 @@
;; kernel = (make-kernel (LR1-item list) index)
;; the list must be kept sorted according to item<? so that equal? can
;; be used to compare kernels
;; Each kernel is assigned a uniqui index, 0 <= index < number of states
;; trans-key = (make-trans-key kernel gram-sym)
(define-struct kernel (items index) (make-inspector))
(define-struct trans-key (st gs) (make-inspector))
;; kernel-list-remove-duplicates: kernel list * int -> kernel list
(define (kernel-list-remove-duplicates k num-states)
(let ((v (make-vector num-states #f)))
(for-each
@ -28,19 +31,20 @@
k)
(let loop ((i 0))
(cond
((< i num-states)
(let ((k (vector-ref v i)))
(if k
(cons k (loop (add1 i)))
(loop (add1 i)))))
(else null)))))
((< i num-states)
(let ((k (vector-ref v i)))
(if k
(cons k (loop (add1 i)))
(loop (add1 i)))))
(else null)))))
;; LR0-automaton = object of class lr0%
(define lr0%
(class object%
(super-instantiate ())
;; Hash tables that map a trans-keys to a kernel
(init term-hash non-term-hash)
(init-field states epsilons num-terms num-non-terms)
@ -64,6 +68,9 @@
(define/public (get-states)
states)
(define/public (get-num-states)
(vector-length states))
(define/public (get-epsilon-trans)
epsilons)
@ -157,8 +164,8 @@
(define (build-lr0-automaton grammar)
; (printf "LR(0) automaton:~n")
(letrec (
(terms (list->vector (grammar-terms grammar)))
(non-terms (list->vector (grammar-non-terms grammar)))
(terms (list->vector (send grammar get-terms)))
(non-terms (list->vector (send grammar get-non-terms)))
(num-non-terms (vector-length non-terms))
(num-gram-syms (+ num-non-terms (vector-length terms)))
(epsilons (make-hash-table 'equal))
@ -170,12 +177,12 @@
;; steps. Assumes that each non-term can be reduced to a string
;; of terms.
(first-non-term
(digraph (grammar-non-terms grammar)
(digraph (send grammar get-non-terms)
(lambda (nt)
(filter non-term?
(map (lambda (prod)
(sym-at-dot (make-item prod 0)))
(get-nt-prods grammar nt))))
(send grammar get-prods-for-non-term nt))))
(lambda (nt) (list nt))
(union non-term<?)
(lambda () null)))
@ -197,8 +204,9 @@
(map (lambda (non-term)
(map (lambda (x)
(make-item x 0))
(get-nt-prods grammar
non-term)))
(send grammar
get-prods-for-non-term
non-term)))
(first-non-term next-gsym)))
(LR0-closure (cdr i)))))
(else
@ -321,7 +329,7 @@
(else null))))))))
(start (list (make-item (get-init-prod grammar) 0)))
(start (list (make-item (send grammar get-init-prod) 0)))
(startk (make-kernel start 0))
(new-kernels (make-queue)))

@ -4,7 +4,8 @@
(require "input-file-parser.ss"
"table.ss"
"parser-actions.ss"
"grammar.ss")
"grammar.ss"
(lib "class.ss"))
(provide build-parser)
@ -74,7 +75,7 @@
(else action)))
(vector->list table)))))
(num-non-terms (length (grammar-non-terms grammar)))
(num-non-terms (send grammar get-num-non-terms))
(token-code
`(let ((ht (make-hash-table)))
@ -83,11 +84,11 @@
`(hash-table-put! ht
',(gram-sym-symbol term)
,(+ num-non-terms (gram-sym-index term))))
(grammar-terms grammar))
(send grammar get-terms))
ht)))
(actions-code
`(vector ,@(map prod-action (grammar-prods grammar)))))
`(vector ,@(map prod-action (send grammar get-prods)))))
(values table-code
token-code
actions-code

@ -216,18 +216,18 @@
;; buile-table: grammar * string -> action2d-array
(define (build-table g file suppress)
(let* ((a (time (build-lr0-automaton g)))
(terms (grammar-terms g))
(non-terms (grammar-non-terms g))
(terms (send g get-terms))
(non-terms (send g get-non-terms))
(get-term (list->vector terms))
(get-non-term (list->vector non-terms))
(get-prod (list->vector (grammar-prods g)))
(get-prod (list->vector (send g get-prods)))
(num-terms (vector-length get-term))
(num-non-terms (vector-length get-non-term))
(end-term-indexes
(map
(lambda (term)
(+ num-non-terms (gram-sym-index term)))
(grammar-end-terms g)))
(send g get-end-terms)))
(num-gram-syms (+ num-terms num-non-terms))
(table (make-array2d (vector-length (send a get-states)) num-gram-syms #f))
(array2d-add!
@ -239,7 +239,7 @@
(array2d-set! v i1 i2 (cons a old))))
(else (if (not (equal? a old))
(array2d-set! v i1 i2 (list a old))))))))
(get-lookahead (compute-LA a g)))
(get-lookahead (time (compute-LA a g))))
(time
(send a for-each-state
(lambda (state)
@ -299,7 +299,7 @@
(exn:i/o:filesystem-detail e))))]
(call-with-output-file file
(lambda (port)
(display-parser a table get-term get-non-term (grammar-prods g)
(display-parser a table get-term get-non-term (send g get-prods)
port)))))
(resolve-conflicts a table num-terms num-non-terms suppress)
table))

Loading…
Cancel
Save