You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
brag/parser-tools/private-yacc/lr0.rkt

312 lines
12 KiB
Racket

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

#lang racket/base
(require yaragg/parser-tools/private-yacc/grammar
yaragg/parser-tools/private-yacc/graph
racket/class)
;; Handle the LR0 automaton
(provide build-lr0-automaton lr0%
(struct-out trans-key) trans-key-list-remove-dups
kernel-items kernel-index)
;; kernel = (kernel (LR1-item list) index)
;; the list must be kept sorted according to item<? so that equal? can
;; be used to compare kernels
;; Each kernel is assigned a unique index, 0 <= index < number of states
;; trans-key = (trans-key kernel gram-sym)
(struct kernel (items index) #:transparent)
(struct trans-key (st gs) #:transparent)
(define (trans-key<? a b)
(define kia (kernel-index (trans-key-st a)))
(define kib (kernel-index (trans-key-st b)))
(or (< kia kib)
(and (= kia kib)
(< (non-term-index (trans-key-gs a))
(non-term-index (trans-key-gs b))))))
(define (trans-key-list-remove-dups tkl)
(let loop ([sorted (sort tkl trans-key<?)])
(cond
[(null? sorted) '()]
[(null? (cdr sorted)) sorted]
[(and (= (non-term-index (trans-key-gs (car sorted)))
(non-term-index (trans-key-gs (cadr sorted))))
(= (kernel-index (trans-key-st (car sorted)))
(kernel-index (trans-key-st (cadr sorted)))))
(loop (cdr sorted))]
[else
(cons (car sorted) (loop (cdr sorted)))])))
;; build-transition-table : int (listof (cons/c trans-key X) ->
;; (vectorof (symbol X hashtable))
(define (build-transition-table num-states assoc)
(define transitions (make-vector num-states #f))
(let loop ([i (sub1 (vector-length transitions))])
(when (>= i 0)
(vector-set! transitions i (make-hasheq))
(loop (sub1 i))))
(for ([trans-key/kernel (in-list assoc)])
(define tk (car trans-key/kernel))
(hash-set! (vector-ref transitions (kernel-index (trans-key-st tk)))
(gram-sym-symbol (trans-key-gs tk))
(cdr trans-key/kernel)))
transitions)
;; reverse-assoc : (listof (cons/c trans-key? kernel?)) ->
;; (listof (cons/c trans-key? (listof kernel?)))
(define (reverse-assoc assoc)
(define reverse-hash (make-hash))
(define (hash-table-add! ht k v)
(hash-set! ht k (cons v (hash-ref ht k '()))))
(for ([trans-key/kernel (in-list assoc)])
(define tk (car trans-key/kernel))
(hash-table-add! reverse-hash
(trans-key (cdr trans-key/kernel)
(trans-key-gs tk))
(trans-key-st tk)))
(hash-map reverse-hash cons))
;; kernel-list-remove-duplicates
;; LR0-automaton = object of class lr0%
(define lr0%
(class object%
(super-instantiate ())
;; term-assoc : (listof (cons/c trans-key? kernel?))
;; non-term-assoc : (listof (cons/c trans-key? kernel?))
;; states : (vectorof kernel?)
;; epsilons : ???
(init-field term-assoc non-term-assoc states epsilons)
(define transitions (build-transition-table (vector-length states)
(append term-assoc non-term-assoc)))
(define reverse-term-assoc (reverse-assoc term-assoc))
(define reverse-non-term-assoc (reverse-assoc non-term-assoc))
(define reverse-transitions
(build-transition-table (vector-length states)
(append reverse-term-assoc reverse-non-term-assoc)))
(define mapped-non-terms (map car non-term-assoc))
(define/public (get-mapped-non-term-keys)
mapped-non-terms)
(define/public (get-num-states)
(vector-length states))
(define/public (get-epsilon-trans)
epsilons)
(define/public (get-transitions)
(append term-assoc non-term-assoc))
;; for-each-state : (state ->) ->
;; Iteration over the states in an automaton
(define/public (for-each-state f)
(define num-states (vector-length states))
(let loop ([i 0])
(when (< i num-states)
(f (vector-ref states i))
(loop (add1 i)))))
;; run-automaton: kernel? gram-sym? -> (union kernel #f)
;; returns the state reached from state k on input s, or #f when k
;; has no transition on s
(define/public (run-automaton k s)
(hash-ref (vector-ref transitions (kernel-index k))
(gram-sym-symbol s)
#f))
;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel)
;; returns the list of states that can reach k by transitioning on s.
(define/public (run-automaton-back k s)
(for*/list ([k (in-list k)]
[val (in-list (hash-ref (vector-ref reverse-transitions (kernel-index k))
(gram-sym-symbol s)
'()))])
val))))
(define ((union comp<?) l1 l2)
(let loop ([l1 l1] [l2 l2])
(cond
[(null? l1) l2]
[(null? l2) l1]
[else (define c1 (car l1))
(define c2 (car l2))
(cond
[(comp<? c1 c2) (cons c1 (loop (cdr l1) l2))]
[(comp<? c2 c1) (cons c2 (loop l1 (cdr l2)))]
[else (loop (cdr l1) l2)])])))
;; The kernels in the automaton are represented cannonically.
;; That is (equal? a b) <=> (eq? a b)
(define (kernel->string k)
(apply string-append
`("{" ,@(map (λ (i) (string-append (item->string i) ", "))
(kernel-items k))
"}")))
;; build-LR0-automaton: grammar -> LR0-automaton
;; Constructs the kernels of the sets of LR(0) items of g
(define (build-lr0-automaton grammar)
; (printf "LR(0) automaton:\n")
(define epsilons (make-hash))
(define grammar-symbols (append (grammar-non-terms grammar)
(grammar-terms grammar)))
;; first-non-term: non-term -> non-term list
;; given a non-terminal symbol C, return those non-terminal
;; symbols A s.t. C -> An for some string of terminals and
;; non-terminals n where -> means a rightmost derivation in many
;; steps. Assumes that each non-term can be reduced to a string
;; of terms.
(define first-non-term
(digraph (grammar-non-terms grammar)
(λ (nt)
(filter non-term?
(map (λ (prod) (sym-at-dot (item prod 0)))
(grammar-prods-for-non-term grammar nt))))
(λ (nt) (list nt))
(union non-term<?)
(λ () '())))
;; closure: LR1-item list -> LR1-item list
;; Creates a set of items containing i s.t. if A -> n.Xm is in it,
;; X -> .o is in it too.
(define (LR0-closure i)
(cond
[(null? i) '()]
[else
(define next-gsym (sym-at-dot (car i)))
(cond
[(non-term? next-gsym)
(cons (car i)
(append
(for*/list ([non-term (in-list (first-non-term next-gsym))]
[x (in-list (grammar-prods-for-non-term grammar non-term))])
(item x 0))
(LR0-closure (cdr i))))]
[else (cons (car i) (LR0-closure (cdr i)))])]))
;; maps trans-keys to kernels
(define automaton-term '())
(define automaton-non-term '())
;; keeps the kernels we have seen, so we can have a unique
;; list for each kernel
(define kernels (make-hash))
(define counter 0)
;; goto: LR1-item list -> LR1-item list list
;; creates new kernels by moving the dot in each item in the
;; LR0-closure of kernel to the right, and grouping them by
;; the term/non-term moved over. Returns the kernels not
;; yet seen, and places the trans-keys into automaton
(define (goto ker)
;; maps a gram-syms to a list of items
(define table (make-hasheq))
;; add-item!:
;; (symbol (listof item) hashtable) item? ->
;; adds i into the table grouped with the grammar
;; symbol following its dot
(define (add-item! table i)
(define gs (sym-at-dot i))
(cond
[gs (define already (hash-ref table (gram-sym-symbol gs) '()))
(unless (member i already)
(hash-set! table (gram-sym-symbol gs) (cons i already)))]
((zero? (vector-length (prod-rhs (item-prod i))))
(define current (hash-ref epsilons ker '()))
(hash-set! epsilons ker (cons i current)))))
;; Group the items of the LR0 closure of the kernel
;; by the character after the dot
(for ([item (in-list (LR0-closure (kernel-items ker)))])
(add-item! table item))
;; each group is a new kernel, with the dot advanced.
;; sorts the items in a kernel so kernels can be compared
;; with equal? for using the table kernels to make sure
;; only one representitive of each kernel is created
(define is
(let loop ([gsyms grammar-symbols])
(cond
[(null? gsyms) '()]
[else
(define items (hash-ref table (gram-sym-symbol (car gsyms)) '()))
(cond
[(null? items) (loop (cdr gsyms))]
[else (cons (list (car gsyms) items)
(loop (cdr gsyms)))])])))
(filter
values
(for/list ([i (in-list is)])
(define gs (car i))
(define items (cadr i))
(define new #f)
(define new-kernel (sort (filter values (map move-dot-right items)) item<?))
(define unique-kernel (hash-ref kernels new-kernel
(λ ()
(define k (kernel new-kernel counter))
(set! new #t)
(set! counter (add1 counter))
(hash-set! kernels new-kernel k)
k)))
(if (term? gs)
(set! automaton-term (cons (cons (trans-key ker gs)
unique-kernel)
automaton-term))
(set! automaton-non-term (cons (cons (trans-key ker gs)
unique-kernel)
automaton-non-term)))
#;(printf "~a -> ~a on ~a\n"
(kernel->string kernel)
(kernel->string unique-kernel)
(gram-sym-symbol gs))
(and new unique-kernel))))
(define starts (map (λ (init-prod) (list (item init-prod 0)))
(grammar-init-prods grammar)))
(define startk (for/list ([start (in-list starts)])
(define k (kernel start counter))
(hash-set! kernels start k)
(set! counter (add1 counter))
k))
(define new-kernels (make-queue))
(let loop ([old-kernels startk]
[seen-kernels '()])
(cond
[(and (empty-queue? new-kernels) (null? old-kernels))
(make-object lr0% automaton-term automaton-non-term
(list->vector (reverse seen-kernels)) epsilons)]
[(null? old-kernels) (loop (deq! new-kernels) seen-kernels)]
[else
(enq! new-kernels (goto (car old-kernels)))
(loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))])))
(struct q (f l) #:mutable)
(define (empty-queue? q) (null? (q-f q)))
(define (make-queue) (q '() '()))
(define (enq! q i)
(cond
[(empty-queue? q)
(let ([i (mcons i '())])
(set-q-l! q i)
(set-q-f! q i))]
[else
(set-mcdr! (q-l q) (mcons i '()))
(set-q-l! q (mcdr (q-l q)))]))
(define (deq! q)
(begin0
(mcar (q-f q))
(set-q-f! q (mcdr (q-f q)))))