refactor into racket/base

pull/5/head
Matthew Butterick 6 years ago
parent fc1e00bc2a
commit fd446e6013

@ -47,18 +47,16 @@
(define-struct tasks (active active-back waits multi-waits cache progress?))
(define-for-syntax make-token-identifier-mapping make-hasheq)
(define-for-syntax token-identifier-mapping-get
(case-lambda
[(t tok)
(hash-ref t (syntax-e tok))]
[(t tok fail)
(hash-ref t (syntax-e tok) fail)]))
(define-for-syntax token-identifier-mapping-put!
(lambda (t tok v)
(hash-set! t (syntax-e tok) v)))
(define-for-syntax token-identifier-mapping-map
(lambda (t f)
(hash-map t f)))
(define-for-syntax (token-identifier-mapping-get t tok [fail #f])
(if fail
(hash-ref t (syntax-e tok) fail)
(hash-ref t (syntax-e tok))))
(define-for-syntax (token-identifier-mapping-put! t tok v)
(hash-set! t (syntax-e tok) v))
(define-for-syntax (token-identifier-mapping-map t f)
(hash-map t f))
;; Used to calculate information on the grammar, such as whether
;; a particular non-terminal is "simple" instead of recursively defined.
@ -71,7 +69,7 @@
(cdr as) (cdr bs))]))
(let loop ()
(when (ormap-all #f
(lambda (nt pats)
(λ (nt pats)
(let ([old (bound-identifier-mapping-get nts nt)])
(let ([new (proc nt pats old)])
(if (equal? old new)
@ -88,149 +86,120 @@
(define (parse-and simple-a? parse-a parse-b
stream last-consumed-token depth end success-k fail-k
max-depth tasks)
(letrec ([mk-got-k
(lambda (success-k fail-k)
(lambda (val stream last-consumed-token depth max-depth tasks next1-k)
(define ((mk-got-k success-k fail-k) val stream last-consumed-token depth max-depth tasks next1-k)
(if simple-a?
(parse-b val stream last-consumed-token depth end
(mk-got2-k success-k fail-k next1-k)
(mk-fail2-k success-k fail-k next1-k)
max-depth tasks)
(parallel-or
(lambda (success-k fail-k max-depth tasks)
(λ (success-k fail-k max-depth tasks)
(parse-b val stream last-consumed-token depth end
success-k fail-k
max-depth tasks))
(lambda (success-k fail-k max-depth tasks)
(λ (success-k fail-k max-depth tasks)
(next1-k (mk-got-k success-k fail-k)
fail-k max-depth tasks))
success-k fail-k max-depth tasks))))]
[mk-got2-k
(lambda (success-k fail-k next1-k)
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
success-k fail-k max-depth tasks)))
(define ((mk-got2-k success-k fail-k next1-k) val stream last-consumed-token depth max-depth tasks next-k)
(success-k val stream last-consumed-token depth max-depth tasks
(lambda (success-k fail-k max-depth tasks)
(λ (success-k fail-k max-depth tasks)
(next-k (mk-got2-k success-k fail-k next1-k)
(mk-fail2-k success-k fail-k next1-k)
max-depth tasks)))))]
[mk-fail2-k
(lambda (success-k fail-k next1-k)
(lambda (max-depth tasks)
(next1-k (mk-got-k success-k fail-k)
fail-k
max-depth
tasks)))])
max-depth tasks))))
(define ((mk-fail2-k success-k fail-k next1-k) max-depth tasks)
(next1-k (mk-got-k success-k fail-k) fail-k max-depth tasks))
(parse-a stream last-consumed-token depth end
(mk-got-k success-k fail-k)
fail-k
max-depth tasks)))
max-depth tasks))
;; Parallel or for non-terminal alternatives
(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)
(parallel-or (lambda (success-k fail-k max-depth tasks)
(parallel-or (λ (success-k fail-k max-depth tasks)
(parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks))
(lambda (success-k fail-k max-depth tasks)
(λ (success-k fail-k max-depth tasks)
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks))
success-k fail-k max-depth tasks))
;; Generic parallel-or
(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks)
(define answer-key (gensym))
(letrec ([gota-k
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
(define (gota-k val stream last-consumed-token depth max-depth tasks next-k)
(report-answer answer-key
max-depth
tasks
(list val stream last-consumed-token depth next-k)))]
[faila-k
(lambda (max-depth tasks)
(list val stream last-consumed-token depth next-k)))
(define (faila-k max-depth tasks)
(report-answer answer-key
max-depth
tasks
null))])
(let* ([tasks (queue-task
tasks
(lambda (max-depth tasks)
(parse-a gota-k
faila-k
max-depth tasks)))]
[tasks (queue-task
tasks
(lambda (max-depth tasks)
(parse-b gota-k
faila-k
max-depth tasks)))]
[queue-next (lambda (next-k tasks)
(queue-task tasks
(lambda (max-depth tasks)
(next-k gota-k
faila-k
max-depth tasks))))])
(letrec ([mk-got-one
(lambda (immediate-next? get-nth success-k)
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
null))
(let* ([tasks (queue-task tasks (λ (max-depth tasks)
(parse-a gota-k faila-k max-depth tasks)))]
[tasks (queue-task tasks (λ (max-depth tasks)
(parse-b gota-k faila-k max-depth tasks)))]
[queue-next (λ (next-k tasks)
(queue-task tasks (λ (max-depth tasks)
(next-k gota-k faila-k max-depth tasks))))])
(define ((mk-got-one immediate-next? get-nth success-k) val stream last-consumed-token depth max-depth tasks next-k)
(let ([tasks (if immediate-next?
(queue-next next-k tasks)
tasks)])
(success-k val stream last-consumed-token depth max-depth
tasks
(lambda (success-k fail-k max-depth tasks)
(λ (success-k fail-k max-depth tasks)
(let ([tasks (if immediate-next?
tasks
(queue-next next-k tasks))])
(get-nth max-depth tasks success-k fail-k)))))))]
[get-first
(lambda (max-depth tasks success-k fail-k)
(get-nth max-depth tasks success-k fail-k))))))
(define (get-first max-depth tasks success-k fail-k)
(wait-for-answer #f max-depth tasks answer-key
(mk-got-one #t get-first success-k)
(lambda (max-depth tasks)
(λ (max-depth tasks)
(get-second max-depth tasks success-k fail-k))
#f))]
[get-second
(lambda (max-depth tasks success-k fail-k)
#f))
(define (get-second max-depth tasks success-k fail-k)
(wait-for-answer #f max-depth tasks answer-key
(mk-got-one #f get-second success-k)
fail-k #f))])
(get-first max-depth tasks success-k fail-k)))))
fail-k #f))
(get-first max-depth tasks success-k fail-k)))
;; Non-terminal alternatives where the first is "simple" can be done
;; sequentially, which is simpler
(define (parse-or parse-a parse-b
stream last-consumed-token depth end success-k fail-k max-depth tasks)
(letrec ([mk-got-k
(lambda (success-k fail-k)
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
(define ((mk-got-k success-k fail-k) val stream last-consumed-token depth max-depth tasks next-k)
(success-k val stream last-consumed-token depth
max-depth tasks
(lambda (success-k fail-k max-depth tasks)
(λ (success-k fail-k max-depth tasks)
(next-k (mk-got-k success-k fail-k)
(mk-fail-k success-k fail-k)
max-depth tasks)))))]
[mk-fail-k
(lambda (success-k fail-k)
(lambda (max-depth tasks)
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))])
max-depth tasks))))
(define ((mk-fail-k success-k fail-k) max-depth tasks)
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks))
(parse-a stream last-consumed-token depth end
(mk-got-k success-k fail-k)
(mk-fail-k success-k fail-k)
max-depth tasks)))
max-depth tasks))
;; Starts a thread
(define queue-task
(lambda (tasks t [progress? #t])
(define (queue-task tasks t [progress? #t])
(make-tasks (tasks-active tasks)
(cons t (tasks-active-back tasks))
(tasks-waits tasks)
(tasks-multi-waits tasks)
(tasks-cache tasks)
(or progress? (tasks-progress? tasks)))))
(or progress? (tasks-progress? tasks))))
;; Reports an answer to a waiting thread:
(define (report-answer answer-key max-depth tasks val)
(let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))])
(define v (hash-ref (tasks-waits tasks) answer-key (λ () #f)))
(if v
(let ([tasks (make-tasks (cons (v val)
(tasks-active tasks))
(let ([tasks (make-tasks (cons (v val) (tasks-active tasks))
(tasks-active-back tasks)
(tasks-waits tasks)
(tasks-multi-waits tasks)
@ -241,29 +210,29 @@
;; We have an answer ready too fast; wait
(swap-task max-depth
(queue-task tasks
(lambda (max-depth tasks)
(λ (max-depth tasks)
(report-answer answer-key max-depth tasks val))
#f)))))
#f))))
;; Reports an answer to multiple waiting threads:
(define (report-answer-all answer-key max-depth tasks val k)
(let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))])
(define v (hash-ref (tasks-multi-waits tasks) answer-key (λ () null)))
(hash-remove! (tasks-multi-waits tasks) answer-key)
(let ([tasks (make-tasks (append (map (lambda (a) (a val)) v)
(let ([tasks (make-tasks (append (map (λ (a) (a val)) v)
(tasks-active tasks))
(tasks-active-back tasks)
(tasks-waits tasks)
(tasks-multi-waits tasks)
(tasks-cache tasks)
#t)])
(k max-depth tasks))))
(k max-depth tasks)))
;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise
;; there might be many. Use wither #t or #f (and `report-answer' or
;; `report-answer-all', resptively) consistently for a particular answer key.
(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k)
(let ([wait (lambda (val)
(lambda (max-depth tasks)
(let ([wait (λ (val)
(λ (max-depth tasks)
(if val
(if (null? val)
(fail-k max-depth tasks)
@ -273,7 +242,7 @@
(if multi?
(hash-set! (tasks-multi-waits tasks) answer-key
(cons wait (hash-ref (tasks-multi-waits tasks) answer-key
(lambda () null))))
(λ () null))))
(hash-set! (tasks-waits tasks) answer-key wait))
(let ([tasks (make-tasks (tasks-active tasks)
(tasks-active-back tasks)
@ -302,8 +271,8 @@
(make-tasks (apply
append
(hash-map (tasks-multi-waits tasks)
(lambda (k l)
(map (lambda (v) (v #f)) l))))
(λ (k l)
(map (λ (v) (v #f)) l))))
(tasks-active-back tasks)
(tasks-waits tasks)
(make-hasheq)
@ -325,11 +294,9 @@
(define no-pos-val (make-position #f #f #f))
(define-for-syntax no-pos
(let ([npv ((syntax-local-certifier) #'no-pos-val)])
(lambda (stx) npv)))
(define-for-syntax at-tok-pos
(lambda (sel expr)
(lambda (stx)
#`(let ([v #,expr]) (if v (#,sel v) no-pos-val)))))
(λ (stx) npv)))
(define-for-syntax ((at-tok-pos sel expr) stx)
#`(let ([v #,expr]) (if v (#,sel v) no-pos-val)))
;; Builds a matcher for a particular alternative
(define-for-syntax (build-match nts toks pat handle $ctx)
@ -337,27 +304,23 @@
[pos 1])
(if (null? pat)
#`(success-k #,handle stream last-consumed-token depth max-depth tasks
(lambda (success-k fail-k max-depth tasks)
(λ (success-k fail-k max-depth tasks)
(fail-k max-depth tasks)))
(let ([id (datum->syntax (car pat)
(string->symbol (format "$~a" pos)))]
[id-start-pos (datum->syntax (car pat)
(string->symbol (format "$~a-start-pos" pos)))]
[id-end-pos (datum->syntax (car pat)
(string->symbol (format "$~a-end-pos" pos)))]
[n-end-pos (and (null? (cdr pat))
(datum->syntax (car pat) '$n-end-pos))])
(let ([id (datum->syntax (car pat) (string->symbol (format "$~a" pos)))]
[id-start-pos (datum->syntax (car pat) (string->symbol (format "$~a-start-pos" pos)))]
[id-end-pos (datum->syntax (car pat) (string->symbol (format "$~a-end-pos" pos)))]
[n-end-pos (and (null? (cdr pat)) (datum->syntax (car pat) '$n-end-pos))])
(cond
[(bound-identifier-mapping-get nts (car pat) (lambda () #f))
[(bound-identifier-mapping-get nts (car pat) (λ () #f))
;; Match non-termimal
#`(parse-and
;; First part is simple? (If so, we don't have to parallelize the `and'.)
#,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))])
#,(let ([l (bound-identifier-mapping-get nts (car pat) (λ () #f))])
(or (not l)
(andmap values (caddr l))))
#,(car pat)
(let ([original-stream stream])
(lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks)
(λ (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks)
(let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream)
tok-end
tok-start)
@ -372,10 +335,10 @@
#,(loop (cdr pat) (add1 pos)))))
stream last-consumed-token depth
#,(let ([cnt (apply +
(map (lambda (item)
(map (λ (item)
(cond
[(bound-identifier-mapping-get nts item (lambda () #f))
=> (lambda (l) (car l))]
[(bound-identifier-mapping-get nts item (λ () #f))
=> (λ (l) (car l))]
[else 1]))
(cdr pat)))])
#`(- end #,cnt))
@ -419,37 +382,36 @@
[max-depth max-depth]
[tasks tasks]
[k k])
(let ([answer-key (gensym)]
[table-key (vector key depth n)]
[old-depth depth]
[old-stream stream])
(define answer-key (gensym))
(define table-key (vector key depth n))
(define old-depth depth)
(define old-stream stream)
#;(printf "Loop ~a\n" table-key)
(cond
[(hash-ref (tasks-cache tasks) table-key (lambda () #f))
=> (lambda (result)
[(hash-ref (tasks-cache tasks) table-key (λ () #f))
=> (λ (result)
#;(printf "Reuse ~a\n" table-key)
(result success-k fail-k max-depth tasks))]
[else
#;(printf "Try ~a ~a\n" table-key (map tok-name stream))
(hash-set! (tasks-cache tasks) table-key
(lambda (success-k fail-k max-depth tasks)
(λ (success-k fail-k max-depth tasks)
#;(printf "Wait ~a ~a\n" table-key answer-key)
(wait-for-answer #t max-depth tasks answer-key success-k fail-k
(lambda (max-depth tasks)
(λ (max-depth tasks)
#;(printf "Deadlock ~a ~a\n" table-key answer-key)
(fail-k max-depth tasks)))))
(let result-loop ([max-depth max-depth][tasks tasks][k k])
(letrec ([orig-stream stream]
[new-got-k
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
(define orig-stream stream)
(define (new-got-k val stream last-consumed-token depth max-depth tasks next-k)
;; Check whether we already have a result that consumed the same amount:
(let ([result-key (vector #f key old-depth depth)])
(define result-key (vector #f key old-depth depth))
(cond
[(hash-ref (tasks-cache tasks) result-key (lambda () #f))
[(hash-ref (tasks-cache tasks) result-key (λ () #f))
;; Go for the next-result
(result-loop max-depth
tasks
(lambda (end max-depth tasks success-k fail-k)
(λ (end max-depth tasks success-k fail-k)
(next-k success-k fail-k max-depth tasks)))]
[else
#;(printf "Success ~a ~a\n" table-key
@ -457,37 +419,36 @@
(if (= d depth)
null
(cons (car s) (loop (add1 d) (cdr s)))))))
(let ([next-k (lambda (success-k fail-k max-depth tasks)
(let ([next-k (λ (success-k fail-k max-depth tasks)
(loop (add1 n)
success-k
fail-k
max-depth
tasks
(lambda (end max-depth tasks success-k fail-k)
(λ (end max-depth tasks success-k fail-k)
(next-k success-k fail-k max-depth tasks))))])
(hash-set! (tasks-cache tasks) result-key #t)
(hash-set! (tasks-cache tasks) table-key
(lambda (success-k fail-k max-depth tasks)
(λ (success-k fail-k max-depth tasks)
(success-k val stream last-consumed-token depth max-depth tasks next-k)))
(report-answer-all answer-key
max-depth
tasks
(list val stream last-consumed-token depth next-k)
(lambda (max-depth tasks)
(success-k val stream last-consumed-token depth max-depth tasks next-k))))])))]
[new-fail-k
(lambda (max-depth tasks)
(λ (max-depth tasks)
(success-k val stream last-consumed-token depth max-depth tasks next-k))))]))
(define (new-fail-k max-depth tasks)
#;(printf "Failure ~a\n" table-key)
(hash-set! (tasks-cache tasks) table-key
(lambda (success-k fail-k max-depth tasks)
(λ (success-k fail-k max-depth tasks)
(fail-k max-depth tasks)))
(report-answer-all answer-key
max-depth
tasks
null
(lambda (max-depth tasks)
(fail-k max-depth tasks))))])
(k end max-depth tasks new-got-k new-fail-k)))])))))
(λ (max-depth tasks)
(fail-k max-depth tasks))))
(k end max-depth tasks new-got-k new-fail-k))]))))
;; These temp identifiers can't be `gensym` or `generate-temporary`
;; because they have to be consistent between module loads
@ -497,39 +458,34 @@
(define-for-syntax atok-id-temp 'atok_wrutdjgecmybyfipiwsgjlvsveryodlgassuzcargiuznzgdghrykfqfbwcjgzdhdoeqxcucmtjkuyucskzethozhqkasphdwbht)
(define-syntax (cfg-parser stx)
(syntax-case stx ()
[(_ clause ...)
(let ([clauses (syntax->list #'(clause ...))])
[(_ CLAUSE ...)
(let ([clauses (syntax->list #'(CLAUSE ...))])
(let-values ([(start grammar cfg-error parser-clauses src-pos?)
(let ([all-toks (apply
append
(map (lambda (clause)
(for/list ([clause (in-list clauses)])
(syntax-case clause (tokens)
[(tokens t ...)
[(tokens T ...)
(apply
append
(map (lambda (t)
(let ([v (syntax-local-value t (lambda () #f))])
(for/list ([t (in-list (syntax->list #'(T ...)))])
(define v (syntax-local-value t (λ () #f)))
(cond
[(terminals-def? v)
(map (lambda (v)
(cons v #f))
(syntax->list (terminals-def-t v)))]
(for/list ([v (in-list (syntax->list (terminals-def-t v)))])
(cons v #f))]
[(e-terminals-def? v)
(map (lambda (v)
(cons v #t))
(syntax->list (e-terminals-def-t v)))]
[else null])))
(syntax->list #'(t ...))))]
[_else null]))
clauses))]
(for/list ([v (in-list (syntax->list (e-terminals-def-t v)))])
(cons v #t))]
[else null])))]
[_else null])))]
[all-end-toks (apply
append
(map (lambda (clause)
(for/list ([clause (in-list clauses)])
(syntax-case clause (end)
[(end t ...)
(syntax->list #'(t ...))]
[_else null]))
clauses))])
[(end T ...)
(syntax->list #'(T ...))]
[_else null])))])
(let loop ([clauses clauses]
[cfg-start #f]
[cfg-grammar #f]
@ -543,47 +499,35 @@
(reverse parser-clauses)
src-pos?)
(syntax-case (car clauses) (start error grammar src-pos)
[(start tok)
(loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)]
[(error expr)
(loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)]
[(grammar [nt [pat handle0 handle ...] ...] ...)
[(start TOK)
(loop (cdr clauses) #'TOK cfg-grammar cfg-error src-pos? parser-clauses)]
[(error EXPR)
(loop (cdr clauses) cfg-start cfg-grammar #'EXPR src-pos? parser-clauses)]
[(grammar [NT [PAT HANDLE0 HANDLE ...] ...] ...)
(let ([nts (make-bound-identifier-mapping)]
[toks (make-token-identifier-mapping)]
[end-toks (make-token-identifier-mapping)]
[nt-ids (syntax->list #'(nt ...))]
[patss (map (lambda (stx)
[nt-ids (syntax->list #'(NT ...))]
[patss (map (λ (stx)
(map syntax->list (syntax->list stx)))
(syntax->list #'((pat ...) ...)))])
(for-each (lambda (nt)
(syntax->list #'((PAT ...) ...)))])
(for ([nt (in-list nt-ids)])
(bound-identifier-mapping-put! nts nt (list 0)))
nt-ids)
(for-each (lambda (t)
(for ([t (in-list all-end-toks)])
(token-identifier-mapping-put! end-toks t #t))
all-end-toks)
(for-each (lambda (t)
(unless (token-identifier-mapping-get end-toks (car t) (lambda () #f))
(let ([id (gensym (syntax-e (car t)))])
(token-identifier-mapping-put! toks (car t)
(cons id (cdr t))))))
all-toks)
(for ([t (in-list all-toks)]
#:unless (token-identifier-mapping-get end-toks (car t) (λ () #f)))
(define id (gensym (syntax-e (car t))))
(token-identifier-mapping-put! toks (car t) (cons id (cdr t))))
;; Compute min max size for each non-term:
(nt-fixpoint
nts
(lambda (nt pats old-list)
(λ (nt pats old-list)
(let ([new-cnt
(apply
min
(map (lambda (pat)
(apply
+
(map (lambda (elem)
(car
(bound-identifier-mapping-get nts
elem
(lambda () (list 1)))))
pat)))
pats))])
(apply min (for/list ([pat (in-list pats)])
(for/sum ([elem (in-list pat)])
(car (bound-identifier-mapping-get
nts elem (λ () (list 1)))))))])
(if (new-cnt . > . (car old-list))
(cons new-cnt (cdr old-list))
old-list)))
@ -592,17 +536,17 @@
;; for a non-terminal
(nt-fixpoint
nts
(lambda (nt pats old-list)
(λ (nt pats old-list)
(let ([new-list
(apply
append
(map (lambda (pat)
(for/list ([pat (in-list pats)])
(let loop ([pat pat])
(if (pair? pat)
(let ([l (bound-identifier-mapping-get
nts
(car pat)
(lambda ()
(λ ()
(list 1 (map-token toks (car pat)))))])
;; If the non-terminal can match 0 things,
;; then it might match something from the
@ -611,10 +555,9 @@
(if (zero? (car l))
(append (cdr l) (loop (cdr pat)))
(cdr l)))
null)))
pats))])
(let ([new (filter (lambda (id)
(andmap (lambda (id2)
null))))])
(let ([new (filter (λ (id)
(andmap (λ (id2)
(not (eq? id id2)))
(cdr old-list)))
new-list)])
@ -623,7 +566,7 @@
(let ([new (let loop ([new new])
(if (null? (cdr new))
new
(if (ormap (lambda (id)
(if (ormap (λ (id)
(eq? (car new) id))
(cdr new))
(loop (cdr new))
@ -632,26 +575,26 @@
old-list))))
nt-ids patss)
;; Determine left-recursive clauses:
(for-each (lambda (nt pats)
(for-each (λ (nt pats)
(let ([l (bound-identifier-mapping-get nts nt)])
(bound-identifier-mapping-put! nts nt (list (car l)
(cdr l)
(map (lambda (x) #f) pats)))))
(map (λ (x) #f) pats)))))
nt-ids patss)
(nt-fixpoint
nts
(lambda (nt pats old-list)
(λ (nt pats old-list)
(list (car old-list)
(cadr old-list)
(map (lambda (pat simple?)
(map (λ (pat simple?)
(or simple?
(let ([l (map (lambda (elem)
(let ([l (map (λ (elem)
(bound-identifier-mapping-get
nts
elem
(lambda () #f)))
(λ () #f)))
pat)])
(andmap (lambda (i)
(andmap (λ (i)
(or (not i)
(andmap values (caddr i))))
l))))
@ -660,16 +603,16 @@
;; Build a definition for each non-term:
(loop (cdr clauses)
cfg-start
(map (lambda (nt pats handles $ctxs)
(map (λ (nt pats handles $ctxs)
(define info (bound-identifier-mapping-get nts nt))
(list nt
#`(let ([key (gensym '#,nt)])
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
(λ (stream last-consumed-token depth end success-k fail-k max-depth tasks)
(parse-nt/share
key #,(car info) '#,(cadr info) stream last-consumed-token depth end
max-depth tasks
success-k fail-k
(lambda (end max-depth tasks success-k fail-k)
(λ (end max-depth tasks success-k fail-k)
#,(let loop ([pats pats]
[handles (syntax->list handles)]
[$ctxs (syntax->list $ctxs)]
@ -680,13 +623,13 @@
(car simple?s))
#'parse-or
#'parse-parallel-or)
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
(λ (stream last-consumed-token depth end success-k fail-k max-depth tasks)
#,(build-match nts
toks
(car pats)
(car handles)
(car $ctxs)))
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
(λ (stream last-consumed-token depth end success-k fail-k max-depth tasks)
#,(loop (cdr pats)
(cdr handles)
(cdr $ctxs)
@ -694,14 +637,14 @@
stream last-consumed-token depth end success-k fail-k max-depth tasks)))))))))
nt-ids
patss
(syntax->list #'(((begin handle0 handle ...) ...) ...))
(syntax->list #'((handle0 ...) ...)))
(syntax->list #'(((begin HANDLE0 HANDLE ...) ...) ...))
(syntax->list #'((HANDLE0 ...) ...)))
cfg-error
src-pos?
(list*
(with-syntax ([((tok tok-id . $e) ...)
(token-identifier-mapping-map toks
(lambda (k v)
(λ (k v)
(list* k
(car v)
(if (cdr v)
@ -743,19 +686,19 @@
src-pos?
(cons (car clauses) parser-clauses))]))))])
#`(let ([orig-parse (parser
[error (lambda (a b c)
[error (λ (a b c)
(error 'cfg-parser "unexpected ~a token: ~a" b c))]
. #,parser-clauses)]
[error-proc #,cfg-error])
(letrec #,grammar
(lambda (get-tok)
(λ (get-tok)
(let ([tok-list (orig-parse get-tok)])
(letrec ([success-k
(lambda (val stream last-consumed-token depth max-depth tasks next)
(λ (val stream last-consumed-token depth max-depth tasks next)
(if (null? stream)
val
(next success-k fail-k max-depth tasks)))]
[fail-k (lambda (max-depth tasks)
[fail-k (λ (max-depth tasks)
(cond
[(null? tok-list)
(if error-proc
@ -847,7 +790,7 @@
(define (parse s)
(define ip (open-input-string s))
(port-count-lines! ip)
(-parse (lambda () (lex ip))))
(-parse (λ () (lex ip))))
(check-equal? (parse "abc")
'(unanchored (lit "abc" 1 4) 1 4))
@ -881,7 +824,7 @@
(tokens non-terminals)
(start <program>)
(end EOF)
(error (lambda (a b stx)
(error (λ (a b stx)
(error 'parse "failed at ~s" stx)))
(grammar [<program> [(PLUS) "plus"]
[(<minus-program> BAR <minus-program>) (list $1 $2 $3)]
@ -903,7 +846,7 @@
-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****"
;; This one fails:
#;"+*")])
(check-equal? (parse (lambda () (lex p)))
(check-equal? (parse (λ () (lex p)))
'((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))

@ -1,4 +1,4 @@
#lang scheme
#lang racket/base
;; An interactive calculator inspired by the calculator example in the bison manual.
@ -22,12 +22,12 @@
;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too.
(digit (:/ "0" "9")))
(define calcl
(define calc-lex
(lexer
[(eof) 'EOF]
;; recursively call the lexer on the remaining input after a tab or space. Returning the
;; result of that operation. This effectively skips all whitespace.
[(:or #\tab #\space) (calcl input-port)]
[(:or #\tab #\space) (calc-lex input-port)]
;; (token-newline) returns 'newline
[#\newline (token-newline)]
;; Since (token-=) returns '=, just return the symbol directly
@ -40,7 +40,7 @@
[(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))]))
(define calcp
(define calc-parse
(parser
(start start)
@ -78,12 +78,15 @@
;; run the calculator on the given input-port
(define (calc ip)
(port-count-lines! ip)
(letrec ((one-line
(lambda ()
(let ((result (calcp (lambda () (calcl ip)))))
(let loop ()
(define result (calc-parse (λ () (calc-lex ip))))
(when result
(printf "~a\n" result)
(one-line))))))
(one-line)))
(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3"))
(loop))))
(module+ test
(require rackunit)
(check-equal? (let ([o (open-output-string)])
(parameterize ([current-output-port o])
(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3")))
(get-output-string o)) "1\n-2\n"))

@ -1,11 +1,11 @@
#lang racket/base
;; This implements the equivalent of racket's read-syntax for R5RS scheme.
;; It has not been thoroughly tested. Also it will read an entire file into a
;; list of syntax objects, instead of returning one syntax object at a time
(module read mzscheme
(require br-parser-tools/lex
(prefix : br-parser-tools/lex-sre)
(require (for-syntax racket/base)
br-parser-tools/lex
(prefix-in : br-parser-tools/lex-sre)
br-parser-tools/yacc
syntax/readerr)
@ -170,21 +170,21 @@
(define-syntax (build-so stx)
(syntax-case stx ()
((_ value start end)
(with-syntax ((start-pos (datum->syntax-object
(syntax end)
(with-syntax ((start-pos (datum->syntax
#'end
(string->symbol
(format "$~a-start-pos"
(syntax-object->datum (syntax start))))))
(end-pos (datum->syntax-object
(syntax end)
(syntax->datum #'start)))))
(end-pos (datum->syntax
#'end
(string->symbol
(format "$~a-end-pos"
(syntax-object->datum (syntax end))))))
(source (datum->syntax-object
(syntax end)
(syntax->datum #'end)))))
(source (datum->syntax
#'end
'source-name)))
(syntax
(datum->syntax-object
(datum->syntax
#f
value
(list source
@ -237,6 +237,4 @@
(case-lambda ((sn) (rs sn (current-input-port)))
((sn ip) (rs sn ip))))
(provide (rename readsyntax read-syntax))
)
(provide (rename-out [readsyntax read-syntax]))

@ -1,24 +1,23 @@
(module lex-plt-v200 mzscheme
(require br-parser-tools/lex
(prefix : br-parser-tools/lex-sre))
#lang racket/base
(require (for-syntax racket/base)
br-parser-tools/lex
(prefix-in : br-parser-tools/lex-sre))
(provide epsilon
~
(rename :* *)
(rename :+ +)
(rename :? ?)
(rename :or :)
(rename :& &)
(rename :: @)
(rename :~ ^)
(rename :/ -))
(provide epsilon ~
(rename-out [:* *]
[:+ +]
[:? ?]
[:or :]
[:& &]
[:: @]
[:~ ^]
[:/ -]))
(define-lex-trans epsilon
(syntax-rules ()
((_) "")))
(define-lex-trans ~
(syntax-rules ()
((_ re) (complement re)))))
(define-lex-trans (epsilon stx)
(syntax-case stx ()
[(_) #'""]))
(define-lex-trans (~ stx)
(syntax-case stx ()
[(_ RE) #'(complement RE)]))

@ -1,119 +1,103 @@
(module lex-sre mzscheme
(require br-parser-tools/lex)
(provide (rename sre-* *)
(rename sre-+ +)
?
(rename sre-= =)
(rename sre->= >=)
**
(rename sre-or or)
:
seq
&
~
(rename sre-- -)
(rename sre-/ /)
/-only-chars)
(define-lex-trans sre-*
(syntax-rules ()
((_ re ...)
(repetition 0 +inf.0 (union re ...)))))
(define-lex-trans sre-+
(syntax-rules ()
((_ re ...)
(repetition 1 +inf.0 (union re ...)))))
(define-lex-trans ?
(syntax-rules ()
((_ re ...)
(repetition 0 1 (union re ...)))))
(define-lex-trans sre-=
(syntax-rules ()
((_ n re ...)
(repetition n n (union re ...)))))
(define-lex-trans sre->=
(syntax-rules ()
((_ n re ...)
(repetition n +inf.0 (union re ...)))))
(define-lex-trans **
(syntax-rules ()
((_ low #f re ...)
(** low +inf.0 re ...))
((_ low high re ...)
(repetition low high (union re ...)))))
(define-lex-trans sre-or
(syntax-rules ()
((_ re ...)
(union re ...))))
(define-lex-trans :
(syntax-rules ()
((_ re ...)
(concatenation re ...))))
(define-lex-trans seq
(syntax-rules ()
((_ re ...)
(concatenation re ...))))
(define-lex-trans &
(syntax-rules ()
((_ re ...)
(intersection re ...))))
(define-lex-trans ~
(syntax-rules ()
((_ re ...)
(char-complement (union re ...)))))
#lang racket/base
(require (for-syntax racket/base)
br-parser-tools/lex)
(provide (rename-out [sre-* *]
[sre-+ +]
[sre-= =]
[sre->= >=]
[sre-or or]
[sre-- -]
[sre-/ /])
? ** : seq & ~ /-only-chars)
(define-lex-trans (sre-* stx)
(syntax-case stx ()
[(_ RE ...)
#'(repetition 0 +inf.0 (union RE ...))]))
(define-lex-trans (sre-+ stx)
(syntax-case stx ()
[(_ RE ...)
#'(repetition 1 +inf.0 (union RE ...))]))
(define-lex-trans (? stx)
(syntax-case stx ()
[(_ RE ...)
#'(repetition 0 1 (union RE ...))]))
(define-lex-trans (sre-= stx)
(syntax-case stx ()
[(_ N RE ...)
#'(repetition N N (union RE ...))]))
(define-lex-trans (sre->= stx)
(syntax-case stx ()
[(_ N RE ...)
#'(repetition N +inf.0 (union RE ...))]))
(define-lex-trans (** stx)
(syntax-case stx ()
[(_ LOW #f RE ...)
#'(** LOW +inf.0 RE ...)]
[(_ LOW HIGH RE ...)
#'(repetition LOW HIGH (union RE ...))]))
(define-lex-trans (sre-or stx)
(syntax-case stx ()
[(_ RE ...)
#'(union RE ...)]))
(define-lex-trans (: stx)
(syntax-case stx ()
[(_ RE ...)
#'(concatenation RE ...)]))
(define-lex-trans (seq stx)
(syntax-case stx ()
[(_ RE ...)
#'(concatenation RE ...)]))
(define-lex-trans (& stx)
(syntax-case stx ()
[(_ RE ...)
#'(intersection RE ...)]))
(define-lex-trans (~ stx)
(syntax-case stx ()
[(_ RE ...)
#'(char-complement (union RE ...))]))
;; set difference
(define-lex-trans (sre-- stx)
(syntax-case stx ()
((_)
[(_)
(raise-syntax-error #f
"must have at least one argument"
stx))
((_ big-re re ...)
(syntax (& big-re (complement (union re ...)))))))
stx)]
[(_ BIG-RE RE ...)
#'(& BIG-RE (complement (union RE ...)))]))
(define-lex-trans (sre-/ stx)
(syntax-case stx ()
((_ range ...)
(let ((chars
(apply append (map (lambda (r)
(let ((x (syntax-e r)))
[(_ RANGE ...)
(let ([chars
(apply append (for/list ([r (in-list (syntax->list #'(RANGE ...)))])
(let ([x (syntax-e r)])
(cond
((char? x) (list x))
((string? x) (string->list x))
(else
(raise-syntax-error
#f
"not a char or string"
stx
r)))))
(syntax->list (syntax (range ...)))))))
[(char? x) (list x)]
[(string? x) (string->list x)]
[else
(raise-syntax-error #f "not a char or string" stx r)]))))])
(unless (even? (length chars))
(raise-syntax-error
#f
"not given an even number of characters"
stx))
#`(/-only-chars #,@chars)))))
(define-lex-trans /-only-chars
(syntax-rules ()
((_ c1 c2)
(char-range c1 c2))
((_ c1 c2 c ...)
(union (char-range c1 c2)
(/-only-chars c ...)))))
)
(raise-syntax-error #f "not given an even number of characters" stx))
#`(/-only-chars #,@chars))]))
(define-lex-trans (/-only-chars stx)
(syntax-case stx ()
[(_ C1 C2)
#'(char-range C1 C2)]
[(_ C1 C2 C ...)
#'(union (char-range C1 C2) (/-only-chars C ...))]))

@ -3,7 +3,7 @@
;; Provides the syntax used to create lexers and the functions needed to
;; create and use the buffer that the lexer reads from. See docs.
(require (for-syntax mzlib/list
(require (for-syntax racket/list
syntax/stx
syntax/define
syntax/boundmap
@ -14,7 +14,7 @@
racket/base
racket/promise))
(require mzlib/stxparam
(require racket/stxparam
syntax/readerr
"private-lex/token.rkt")
@ -30,7 +30,7 @@
file-path
lexer-file-path ;; alternate name
;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4.
;; Lex abbrevs for unicode char sets.
any-char any-string nothing alphabetic lower-case upper-case title-case
numeric symbolic punctuation graphic whitespace blank iso-control
@ -212,12 +212,11 @@
(define (get-next-state char table)
(and table (get-next-state-helper char 0 (vector-length table) table)))
(define (lexer-body start-state trans-table actions no-lookahead special-action
has-special-comment-action? special-comment-action eof-action)
(letrec ([lexer
(λ (ip)
(let ((first-pos (get-position ip))
(first-char (peek-char-or-special ip 0)))
(define ((lexer-body start-state trans-table actions no-lookahead special-action
has-special-comment-action? special-comment-action eof-action) ip)
(define (lexer ip)
(define first-pos (get-position ip))
(define first-char (peek-char-or-special ip 0))
;(printf "(peek-char-or-special port 0) = ~e\n" first-char)
(cond
[(eof-object? first-char)
@ -233,40 +232,40 @@
[else
(let lexer-loop (
;; current-state
(state start-state)
[state start-state]
;; the character to transition on
(char first-char)
[char first-char]
;; action for the longest match seen thus far
;; including a match at the current state
(longest-match-action
(vector-ref actions start-state))
[longest-match-action
(vector-ref actions start-state)]
;; how many bytes precede char
(length-bytes 0)
[length-bytes 0]
;; how many characters have been read
;; including the one just read
(length-chars 1)
[length-chars 1]
;; how many characters are in the longest match
(longest-match-length 0))
(let ([next-state
[longest-match-length 0])
(define next-state
(cond
[(not (char? char)) #f]
[else (get-next-state (char->integer char)
(vector-ref trans-table state))])])
(vector-ref trans-table state))]))
(cond
[(not next-state)
(check-match ip first-pos longest-match-length
length-chars longest-match-action)]
[(vector-ref no-lookahead next-state)
(let ((act (vector-ref actions next-state)))
(define act (vector-ref actions next-state))
(check-match ip
first-pos
(if act length-chars longest-match-length)
length-chars
(if act act longest-match-action)))]
(if act act longest-match-action))]
[else
(let* ([act (vector-ref actions next-state)]
[next-length-bytes (+ (char-utf-8-length char) length-bytes)]
[next-char (peek-char-or-special ip next-length-bytes)])
(define act (vector-ref actions next-state))
(define next-length-bytes (+ (char-utf-8-length char) length-bytes))
(define next-char (peek-char-or-special ip next-length-bytes))
#;(printf "(peek-char-or-special port ~e) = ~e\n"
next-length-bytes next-char)
(lexer-loop next-state
@ -278,26 +277,25 @@
(add1 length-chars)
(if act
length-chars
longest-match-length)))])))])))])
(λ (ip)
longest-match-length))]))]))
(unless (input-port? ip)
(raise-argument-error 'lexer "input-port?" 0 ip))
(lexer ip))))
(lexer ip))
(define (check-match lb first-pos longest-match-length length longest-match-action)
(unless longest-match-action
(let* ([match (read-string length lb)]
[end-pos (get-position lb)])
(define match (read-string length lb))
(define end-pos (get-position lb))
(raise-read-error
(format "lexer: No match found in input starting with: ~a" match)
(file-path)
(position-line first-pos)
(position-col first-pos)
(position-offset first-pos)
(- (position-offset end-pos) (position-offset first-pos)))))
(let ([match (read-string longest-match-length lb)])
(- (position-offset end-pos) (position-offset first-pos))))
(define match (read-string longest-match-length lb))
;(printf "(read-string ~e port) = ~e\n" longest-match-length match)
(do-match lb first-pos longest-match-action match)))
(do-match lb first-pos longest-match-action match))
(define file-path (make-parameter #f))
(define lexer-file-path file-path)

@ -1,5 +1,4 @@
#lang racket/base
(provide (all-defined-out))
(require syntax/stx)
@ -7,10 +6,10 @@
;; Returns the first action from a rule of the form ((which-special) action)
(define (get-special-action rules which-special none)
(cond
((null? rules) none)
(else
[(null? rules) none]
[else
(syntax-case (car rules) ()
[((special) ACT)
(and (identifier? #'special) (module-or-top-identifier=? (syntax special) which-special))
(and (identifier? #'special) (module-or-top-identifier=? #'special which-special))
#'ACT]
[_ (get-special-action (cdr rules) which-special none)]))))
[_ (get-special-action (cdr rules) which-special none)])]))

@ -1,11 +1,10 @@
(module deriv mzscheme
(require mzlib/list
(prefix is: mzlib/integer-set)
#lang racket/base
(require racket/list
(prefix-in is: data/integer-set)
"re.rkt"
"util.rkt")
(provide build-dfa print-dfa (struct dfa (num-states start-state final-states/actions transitions)))
(provide build-dfa print-dfa (struct-out dfa))
(define e (build-epsilon))
(define z (build-zero))
@ -19,23 +18,23 @@
;; taking the derivative of r.
(define (get-char-groups r found-negation)
(cond
((or (eq? r e) (eq? r z)) null)
((char-setR? r) (list r))
((concatR? r)
[(or (eq? r e) (eq? r z)) null]
[(char-setR? r) (list r)]
[(concatR? r)
(if (re-nullable? (concatR-re1 r))
(append (get-char-groups (concatR-re1 r) found-negation)
(get-char-groups (concatR-re2 r) found-negation))
(get-char-groups (concatR-re1 r) found-negation)))
((repeatR? r)
(get-char-groups (repeatR-re r) found-negation))
((orR? r)
(apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r))))
((andR? r)
(apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r))))
((negR? r)
(get-char-groups (concatR-re1 r) found-negation))]
[(repeatR? r)
(get-char-groups (repeatR-re r) found-negation)]
[(orR? r)
(apply append (map (λ (x) (get-char-groups x found-negation)) (orR-res r)))]
[(andR? r)
(apply append (map (λ (x) (get-char-groups x found-negation)) (andR-res r)))]
[(negR? r)
(if found-negation
(get-char-groups (negR-re r) #t)
(cons all-chars (get-char-groups (negR-re r) #t))))))
(cons all-chars (get-char-groups (negR-re r) #t)))]))
(test-block ((c (make-cache))
(r1 (->re #\1 c))
@ -67,32 +66,32 @@
;; deriveR : re char cache -> re
(define (deriveR r c cache)
(cond
((or (eq? r e) (eq? r z)) z)
((char-setR? r)
(if (loc:member? c (char-setR-chars r)) e z))
((concatR? r)
(let* ((r1 (concatR-re1 r))
(r2 (concatR-re2 r))
(d (build-concat (deriveR r1 c cache) r2 cache)))
[(or (eq? r e) (eq? r z)) z]
[(char-setR? r)
(if (loc:member? c (char-setR-chars r)) e z)]
[(concatR? r)
(define r1 (concatR-re1 r))
(define r2 (concatR-re2 r))
(define d (build-concat (deriveR r1 c cache) r2 cache))
(if (re-nullable? r1)
(build-or (list d (deriveR r2 c cache)) cache)
d)))
((repeatR? r)
d)]
[(repeatR? r)
(build-concat (deriveR (repeatR-re r) c cache)
(build-repeat (sub1 (repeatR-low r))
(sub1 (repeatR-high r))
(repeatR-re r) cache)
cache))
((orR? r)
(build-or (map (lambda (x) (deriveR x c cache))
cache)]
[(orR? r)
(build-or (map (λ (x) (deriveR x c cache))
(orR-res r))
cache))
((andR? r)
(build-and (map (lambda (x) (deriveR x c cache))
cache)]
[(andR? r)
(build-and (map (λ (x) (deriveR x c cache))
(andR-res r))
cache))
((negR? r)
(build-neg (deriveR (negR-re r) c cache) cache))))
cache)]
[(negR? r)
(build-neg (deriveR (negR-re r) c cache) cache)]))
(test-block ((c (make-cache))
(a (char->integer #\a))
@ -135,13 +134,11 @@
;; applies deriveR to all the re-actions's re parts.
;; Returns #f if the derived state is equivalent to z.
(define (derive r c cache)
(let ((new-r (map (lambda (ra)
(cons (deriveR (car ra) c cache) (cdr ra)))
r)))
(if (andmap (lambda (x) (eq? z (car x)))
new-r)
(define new-r (for/list ([ra (in-list r)])
(cons (deriveR (car ra) c cache) (cdr ra))))
(if (andmap (λ (x) (eq? z (car x))) new-r)
#f
new-r)))
new-r))
(test-block ((c (make-cache))
(r1 (->re #\1 c))
@ -157,9 +154,9 @@
;; action from the first final state or #f if there is none.
(define (get-final res)
(cond
((null? res) #f)
((re-nullable? (caar res)) (cdar res))
(else (get-final (cdr res)))))
[(null? res) #f]
[(re-nullable? (caar res)) (cdar res)]
[else (get-final (cdr res))]))
(test-block ((c->i char->integer)
(c (make-cache))
@ -189,7 +186,7 @@
;; get->key : re-action -> (list-of nat)
;; states are indexed by the list of indexes of their res
(define (get-key s)
(map (lambda (x) (re-index (car x))) s))
(map (λ (x) (re-index (car x))) s))
(define loc:partition is:partition)
@ -198,11 +195,11 @@
;; derivative of the car of st. Only one derivative per set need to be taken.
(define (compute-chars st)
(cond
((null? st) null)
(else
[(null? st) null]
[else
(loc:partition (map char-setR-chars
(apply append (map (lambda (x) (get-char-groups (car x) #f))
(state-spec (car st)))))))))
(apply append (map (λ (x) (get-char-groups (car x) #f))
(state-spec (car st))))))]))
(test-block ((c (make-cache))
(c->i char->integer)
@ -222,71 +219,70 @@
;; (list-of (cons int (list-of (cons char-set int)))))
;; Each transitions is a state and a list of chars with the state to transition to.
;; The finals and transitions are sorted by state number, and duplicate free.
(define-struct dfa (num-states start-state final-states/actions transitions) (make-inspector))
(define-struct dfa (num-states start-state final-states/actions transitions) #:inspector (make-inspector))
(define loc:get-integer is:get-integer)
;; build-dfa : (list-of re-action) cache -> dfa
(define (build-dfa rs cache)
(let* ((transitions (make-hash-table))
(get-state-number (make-counter))
(start (make-state rs (get-state-number))))
(cache (cons 'state (get-key rs)) (lambda () start))
(let loop ((old-states (list start))
(new-states null)
(all-states (list start))
(cs (compute-chars (list start))))
(let* ([transitions (make-hash)]
[get-state-number (make-counter)]
[start (make-state rs (get-state-number))])
(cache (cons 'state (get-key rs)) (λ () start))
(let loop ([old-states (list start)]
[new-states null]
[all-states (list start)]
[cs (compute-chars (list start))])
(cond
((and (null? old-states) (null? new-states))
[(and (null? old-states) (null? new-states))
(make-dfa (get-state-number) (state-index start)
(sort (filter (lambda (x) (cdr x))
(map (lambda (state)
(cons (state-index state) (get-final (state-spec state))))
all-states))
(lambda (a b) (< (car a) (car b))))
(sort (hash-table-map transitions
(lambda (state trans)
(sort (for*/list ([state (in-list all-states)]
[val (in-value (cons (state-index state) (get-final (state-spec state))))]
#:when (cdr val))
val)
< #:key car)
(sort (hash-map transitions
(λ (state trans)
(cons (state-index state)
(map (lambda (t)
(for/list ([t (in-list trans)])
(cons (car t)
(state-index (cdr t))))
trans))))
(lambda (a b) (< (car a) (car b))))))
((null? old-states)
(loop new-states null all-states (compute-chars new-states)))
((null? cs)
(loop (cdr old-states) new-states all-states (compute-chars (cdr old-states))))
(else
(let* ((state (car old-states))
(c (car cs))
(new-re (derive (state-spec state) (loc:get-integer c) cache)))
(state-index (cdr t)))))))
< #:key car))]
[(null? old-states)
(loop new-states null all-states (compute-chars new-states))]
[(null? cs)
(loop (cdr old-states) new-states all-states (compute-chars (cdr old-states)))]
[else
(define state (car old-states))
(define c (car cs))
(define new-re (derive (state-spec state) (loc:get-integer c) cache))
(cond
(new-re
(let* ((new-state? #f)
(new-state (cache (cons 'state (get-key new-re))
(lambda ()
[new-re
(let* ([new-state? #f]
[new-state (cache (cons 'state (get-key new-re))
(λ ()
(set! new-state? #t)
(make-state new-re (get-state-number)))))
(new-all-states (if new-state? (cons new-state all-states) all-states)))
(hash-table-put! transitions
(make-state new-re (get-state-number))))]
[new-all-states (if new-state? (cons new-state all-states) all-states)])
(hash-set! transitions
state
(cons (cons c new-state)
(hash-table-get transitions state
(lambda () null))))
(hash-ref transitions state
(λ () null))))
(cond
(new-state?
(loop old-states (cons new-state new-states) new-all-states (cdr cs)))
(else
(loop old-states new-states new-all-states (cdr cs))))))
(else (loop old-states new-states all-states (cdr cs))))))))))
[new-state?
(loop old-states (cons new-state new-states) new-all-states (cdr cs))]
[else
(loop old-states new-states new-all-states (cdr cs))]))]
[else (loop old-states new-states all-states (cdr cs))])]))))
(define (print-dfa x)
(printf "number of states: ~a\n" (dfa-num-states x))
(printf "start state: ~a\n" (dfa-start-state x))
(printf "final states: ~a\n" (map car (dfa-final-states/actions x)))
(for-each (lambda (trans)
(for-each (λ (trans)
(printf "state: ~a\n" (car trans))
(for-each (lambda (rule)
(for-each (λ (rule)
(printf " -~a-> ~a\n"
(is:integer-set-contents (car rule))
(cdr rule)))
@ -294,10 +290,8 @@
(dfa-transitions x)))
(define (build-test-dfa rs)
(let ((c (make-cache)))
(build-dfa (map (lambda (x) (cons (->re x c) 'action))
rs)
c)))
(define c (make-cache))
(build-dfa (map (λ (x) (cons (->re x c) 'action)) rs) c))
#|
@ -334,6 +328,6 @@
(define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection))
(complement (union (concatenation (intersection) "01")
(repetition 1 +inf.0 "1")))))))
(define t14 (build-test-dfa `((complement "1"))))
(define t14 (build-test-dfa `((complement "1")))))
|#
)

@ -1,5 +1,5 @@
#lang scheme/base
(require (for-syntax scheme/base)
#lang racket/base
(require (for-syntax racket/base)
"../lex.rkt"
rackunit)

@ -1,6 +1,8 @@
(module front mzscheme
(require (prefix is: mzlib/integer-set)
mzlib/list
#lang racket/base
(require racket/base
racket/match
(prefix-in is: data/integer-set)
racket/list
syntax/stx
"util.rkt"
"stx.rkt"
@ -24,84 +26,69 @@
;; dfa->1d-table : dfa -> (same as build-lexer)
(define (dfa->1d-table dfa)
(let ((state-table (make-vector (dfa-num-states dfa) #f))
(transition-cache (make-hash-table 'equal)))
(for-each
(lambda (trans)
(let* ((from-state (car trans))
(all-chars/to (cdr trans))
(flat-all-chars/to
(define state-table (make-vector (dfa-num-states dfa) #f))
(define transition-cache (make-hasheq))
(for ([trans (in-list (dfa-transitions dfa))])
(match-define (cons from-state all-chars/to) trans)
(define flat-all-chars/to
(sort
(apply append
(map (lambda (chars/to)
(let ((char-ranges (loc:integer-set-contents (car chars/to)))
(to (cdr chars/to)))
(map (lambda (char-range)
(let ((entry (vector (car char-range) (cdr char-range) to)))
(hash-table-get transition-cache entry
(lambda ()
(hash-table-put! transition-cache
(for*/list ([chars/to (in-list all-chars/to)]
[char-ranges (in-value (loc:integer-set-contents (car chars/to)))]
[to (in-value (cdr chars/to))]
[char-range (in-list char-ranges)])
(define entry (vector (car char-range) (cdr char-range) to))
(hash-ref transition-cache entry (λ ()
(hash-set! transition-cache
entry
entry)
entry))))
char-ranges)))
all-chars/to))
(lambda (a b)
(< (vector-ref a 0) (vector-ref b 0))))))
(vector-set! state-table from-state (list->vector flat-all-chars/to))))
(dfa-transitions dfa))
state-table))
entry)))
< #:key (λ (v) (vector-ref v 0))))
(vector-set! state-table from-state (list->vector flat-all-chars/to)))
state-table)
(define loc:foldr is:foldr)
;; dfa->2d-table : dfa -> (same as build-lexer)
(define (dfa->2d-table dfa)
(let (
;; char-table : (vector-of (union #f nat))
;; The lexer table, one entry per state per char.
;; Each entry specifies a state to transition to.
;; #f indicates no transition
(char-table (make-vector (* 256 (dfa-num-states dfa)) #f)))
(define char-table (make-vector (* 256 (dfa-num-states dfa)) #f))
;; Fill the char-table vector
(for-each
(lambda (trans)
(let ((from-state (car trans)))
(for-each (lambda (chars/to)
(let ((to-state (cdr chars/to)))
(loc:foldr (lambda (char _)
(for* ([trans (in-list (dfa-transitions dfa))]
[chars/to (in-list (cdr trans))])
(define from-state (car trans))
(define to-state (cdr chars/to))
(loc:foldr (λ (char _)
(vector-set! char-table
(bitwise-ior
char
(arithmetic-shift from-state 8))
to-state))
(void)
(car chars/to))))
(cdr trans))))
(dfa-transitions dfa))
char-table))
(car chars/to)))
char-table)
;; dfa->actions : dfa -> (vector-of (union #f syntax-object))
;; The action for each final state, #f if the state isn't final
(define (dfa->actions dfa)
(let ((actions (make-vector (dfa-num-states dfa) #f)))
(for-each (lambda (state/action)
(define actions (make-vector (dfa-num-states dfa) #f))
(for ([state/action (in-list (dfa-final-states/actions dfa))])
(vector-set! actions (car state/action) (cdr state/action)))
(dfa-final-states/actions dfa))
actions))
actions)
;; dfa->no-look : dfa -> (vector-of bool)
;; For each state whether the lexer can ignore the next input.
;; It can do this only if there are no transitions out of the
;; current state.
(define (dfa->no-look dfa)
(let ((no-look (make-vector (dfa-num-states dfa) #t)))
(for-each (lambda (trans)
(define no-look (make-vector (dfa-num-states dfa) #t))
(for ([trans (in-list (dfa-transitions dfa))])
(vector-set! no-look (car trans) #f))
(dfa-transitions dfa))
no-look))
no-look)
(test-block ((d1 (make-dfa 1 1 (list) (list)))
(d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
@ -138,35 +125,30 @@
;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object))
;; each syntax object has the form (re action)
(define (build-lexer sos)
(let* ((disappeared-uses (box null))
(s-re-acts (map (lambda (so)
(define disappeared-uses (box null))
(define s-re-acts (for/list ([so (in-list sos)])
(cons (parse (stx-car so) disappeared-uses)
(stx-car (stx-cdr so))))
sos))
(cache (make-cache))
(re-acts (map (lambda (s-re-act)
(stx-car (stx-cdr so)))))
(define cache (make-cache))
(define re-acts (for/list ([s-re-act (in-list s-re-acts)])
(cons (->re (car s-re-act) cache)
(cdr s-re-act)))
s-re-acts))
(dfa (build-dfa re-acts cache))
(table (dfa->1d-table dfa)))
(cdr s-re-act))))
(define dfa (build-dfa re-acts cache))
(define table (dfa->1d-table dfa))
;(print-dfa dfa)
#;(let ((num-states (vector-length table))
(num-vectors (length (filter values (vector->list table))))
(num-entries (apply + (map
(lambda (x) (if x (vector-length x) 0))
(λ (x) (if x (vector-length x) 0))
(vector->list table))))
(num-different-entries
(let ((ht (make-hash-table)))
(let ((ht (make-hash)))
(for-each
(lambda (x)
(λ (x)
(when x
(for-each
(lambda (y)
(hash-table-put! ht y #t))
(λ (y)
(hash-set! ht y #t))
(vector->list x))))
(vector->list table))
(length (hash-table-map ht cons)))))
@ -175,5 +157,5 @@
(/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries
(* 5 num-different-entries))) 1024)))
(values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)
(unbox disappeared-uses))))
)
(unbox disappeared-uses)))

@ -1,7 +1,7 @@
(module re mzscheme
(require mzlib/list
scheme/match
(prefix is: mzlib/integer-set)
#lang racket/base
(require racket/list
racket/match
(prefix-in is: data/integer-set)
"util.rkt")
(provide ->re build-epsilon build-zero build-char-set build-concat
@ -26,15 +26,15 @@
;;
;; Every re must have an index field globally different from all
;; other re index fields.
(define-struct re (nullable? index) (make-inspector))
(define-struct (epsilonR re) () (make-inspector))
(define-struct (zeroR re) () (make-inspector))
(define-struct (char-setR re) (chars) (make-inspector))
(define-struct (concatR re) (re1 re2) (make-inspector))
(define-struct (repeatR re) (low high re) (make-inspector))
(define-struct (orR re) (res) (make-inspector))
(define-struct (andR re) (res) (make-inspector))
(define-struct (negR re) (re) (make-inspector))
(define-struct re (nullable? index) #:inspector (make-inspector))
(define-struct (epsilonR re) () #:inspector (make-inspector))
(define-struct (zeroR re) () #:inspector (make-inspector))
(define-struct (char-setR re) (chars) #:inspector (make-inspector))
(define-struct (concatR re) (re1 re2) #:inspector (make-inspector))
(define-struct (repeatR re) (low high re) #:inspector (make-inspector))
(define-struct (orR re) (res) #:inspector (make-inspector))
(define-struct (andR re) (res) #:inspector (make-inspector))
(define-struct (negR re) (re) #:inspector (make-inspector))
;; e : re
;; The unique epsilon re
@ -70,41 +70,40 @@
;; ->re : s-re cache -> re
(define (->re exp cache)
(match exp
((? char?) (build-char-set (loc:make-range (char->integer exp)) cache))
((? string?) (->re `(concatenation ,@(string->list exp)) cache))
((? re?) exp)
(`(repetition ,low ,high ,r)
(build-repeat low high (->re r cache) cache))
(`(union ,rs ...)
(build-or (flatten-res (map (lambda (r) (->re r cache)) rs)
[(? char?) (build-char-set (loc:make-range (char->integer exp)) cache)]
[(? string?) (->re `(concatenation ,@(string->list exp)) cache)]
[(? re?) exp]
[`(repetition ,low ,high ,r)
(build-repeat low high (->re r cache) cache)]
[`(union ,rs ...)
(build-or (flatten-res (map (λ (r) (->re r cache)) rs)
orR? orR-res loc:union cache)
cache))
(`(intersection ,rs ...)
(build-and (flatten-res (map (lambda (r) (->re r cache)) rs)
andR? andR-res (lambda (a b)
cache)]
[`(intersection ,rs ...)
(build-and (flatten-res (map (λ (r) (->re r cache)) rs)
andR? andR-res (λ (a b)
(let-values (((i _ __) (loc:split a b))) i))
cache)
cache))
(`(complement ,r)
(build-neg (->re r cache) cache))
(`(concatenation ,rs ...)
(foldr (lambda (x y)
cache)]
[`(complement ,r) (build-neg (->re r cache) cache)]
[`(concatenation ,rs ...)
(foldr (λ (x y)
(build-concat (->re x cache) y cache))
e
rs))
(`(char-range ,c1 ,c2)
(let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1)))
(i2 (char->integer (if (string? c2) (string-ref c2 0) c2))))
rs)]
[`(char-range ,c1 ,c2)
(let ([i1 (char->integer (if (string? c1) (string-ref c1 0) c1))]
[i2 (char->integer (if (string? c2) (string-ref c2 0) c2))])
(if (<= i1 i2)
(build-char-set (loc:make-range i1 i2) cache)
z)))
(`(char-complement ,crs ...)
(let ((cs (->re `(union ,@crs) cache)))
z))]
[`(char-complement ,crs ...)
(let ([cs (->re `(union ,@crs) cache)])
(cond
((zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache))
((char-setR? cs)
(build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache))
(else z))))))
[(zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)]
[(char-setR? cs)
(build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)]
[else z]))]))
@ -115,22 +114,22 @@
;; Flattens out the values of type?. get-res only needs to function on things type? returns
;; true for.
(define (flatten-res l type? get-res combine cache)
(let loop ((res l)
(let loop ([res l]
;; chars : (union #f char-set)
(chars #f)
(no-chars null))
[chars #f]
[no-chars null])
(cond
((null? res)
[(null? res)
(if chars
(cons (build-char-set chars cache) no-chars)
no-chars))
((char-setR? (car res))
no-chars)]
[(char-setR? (car res))
(if chars
(loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars)
(loop (cdr res) (char-setR-chars (car res)) no-chars)))
((type? (car res))
(loop (append (get-res (car res)) (cdr res)) chars no-chars))
(else (loop (cdr res) chars (cons (car res) no-chars))))))
(loop (cdr res) (char-setR-chars (car res)) no-chars))]
[(type? (car res))
(loop (append (get-res (car res)) (cdr res)) chars no-chars)]
[else (loop (cdr res) chars (cons (car res) no-chars))])))
;; build-epsilon : -> re
(define (build-epsilon) e)
@ -141,85 +140,85 @@
;; build-char-set : char-set cache -> re
(define (build-char-set cs cache)
(let ((l (loc:integer-set-contents cs)))
(define l (loc:integer-set-contents cs))
(cond
((null? l) z)
(else
[(null? l) z]
[else
(cache l
(lambda ()
(make-char-setR #f (get-index) cs)))))))
(λ ()
(make-char-setR #f (get-index) cs)))]))
;; build-concat : re re cache -> re
(define (build-concat r1 r2 cache)
(cond
((eq? e r1) r2)
((eq? e r2) r1)
((or (eq? z r1) (eq? z r2)) z)
(else
[(eq? e r1) r2]
[(eq? e r2) r1]
[(or (eq? z r1) (eq? z r2)) z]
[else
(cache (cons 'concat (cons (re-index r1) (re-index r2)))
(lambda ()
(λ ()
(make-concatR (and (re-nullable? r1) (re-nullable? r2))
(get-index)
r1 r2))))))
r1 r2)))]))
;; build-repeat : nat nat-or-+inf.0 re cache -> re
(define (build-repeat low high r cache)
(let ((low (if (< low 0) 0 low)))
(let ([low (if (< low 0) 0 low)])
(cond
((eq? r e) e)
((and (= 0 low) (or (= 0 high) (eq? z r))) e)
((and (= 1 low) (= 1 high)) r)
((and (repeatR? r)
[(eq? r e) e]
[(and (= 0 low) (or (= 0 high) (eq? z r))) e]
[(and (= 1 low) (= 1 high)) r]
[(and (repeatR? r)
(eqv? (repeatR-high r) +inf.0)
(or (= 0 (repeatR-low r))
(= 1 (repeatR-low r))))
(build-repeat (* low (repeatR-low r))
+inf.0
(repeatR-re r)
cache))
(else
cache)]
[else
(cache (cons 'repeat (cons low (cons high (re-index r))))
(lambda ()
(make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r)))))))
(λ ()
(make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r)))])))
;; build-or : (list-of re) cache -> re
(define (build-or rs cache)
(let ((rs
(let ([rs
(filter
(lambda (x) (not (eq? x z)))
(do-simple-equiv (replace rs orR? orR-res null) re-index))))
(λ (x) (not (eq? x z)))
(do-simple-equiv (replace rs orR? orR-res null) re-index))])
(cond
((null? rs) z)
((null? (cdr rs)) (car rs))
((memq (build-neg z cache) rs) (build-neg z cache))
(else
[(null? rs) z]
[(null? (cdr rs)) (car rs)]
[(memq (build-neg z cache) rs) (build-neg z cache)]
[else
(cache (cons 'or (map re-index rs))
(lambda ()
(make-orR (ormap re-nullable? rs) (get-index) rs)))))))
(λ ()
(make-orR (ormap re-nullable? rs) (get-index) rs)))])))
;; build-and : (list-of re) cache -> re
(define (build-and rs cache)
(let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index)))
(let ([rs (do-simple-equiv (replace rs andR? andR-res null) re-index)])
(cond
((null? rs) (build-neg z cache))
((null? (cdr rs)) (car rs))
((memq z rs) z)
(else
[(null? rs) (build-neg z cache)]
[(null? (cdr rs)) (car rs)]
[(memq z rs) z]
[else
(cache (cons 'and (map re-index rs))
(lambda ()
(make-andR (andmap re-nullable? rs) (get-index) rs)))))))
(λ ()
(make-andR (andmap re-nullable? rs) (get-index) rs)))])))
;; build-neg : re cache -> re
(define (build-neg r cache)
(cond
((negR? r) (negR-re r))
(else
[(negR? r) (negR-re r)]
[else
(cache (cons 'neg (re-index r))
(lambda ()
(make-negR (not (re-nullable? r)) (get-index) r))))))
(λ ()
(make-negR (not (re-nullable? r)) (get-index) r)))]))
;; Tests for the build-functions
(test-block ((c (make-cache))
@ -317,7 +316,7 @@
((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1)
orR? orR-res is:union c))))
(isc (is:make-range (char->integer #\1) (char->integer #\7))))
((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y)
((flatten-res `(,r1 ,r2) andR? andR-res (λ (x y)
(let-values (((i _ __)
(is:split x y)))
i))
@ -382,4 +381,4 @@
(isc (is:make-range 0)))
)
)

@ -1,30 +1,25 @@
#lang racket
(require "util.rkt"
syntax/id-table)
#lang racket/base
(require "util.rkt" syntax/id-table)
(provide parse)
(define (bad-args stx num)
(raise-syntax-error
#f
(format "incorrect number of arguments (should have ~a)" num)
stx))
(raise-syntax-error #f (format "incorrect number of arguments (should have ~a)" num) stx))
;; char-range-arg: syntax-object syntax-object -> nat
;; If c contains is a character or length 1 string, returns the integer
;; for the character. Otherwise raises a syntax error.
(define (char-range-arg stx containing-stx)
(let ((c (syntax-e stx)))
(define c (syntax-e stx))
(cond
((char? c) (char->integer c))
((and (string? c) (= (string-length c) 1))
(char->integer (string-ref c 0)))
(else
[(char? c) (char->integer c)]
[(and (string? c) (= (string-length c) 1))
(char->integer (string-ref c 0))]
[else
(raise-syntax-error
#f
"not a char or single-char string"
containing-stx stx)))))
containing-stx stx)]))
(module+ test
(check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1))
(check-equal? (char-range-arg #'"1" #'here) (char->integer #\1)))
@ -42,114 +37,90 @@
[disappeared-uses disappeared-uses]
;; seen-lex-abbrevs: id-table
[seen-lex-abbrevs (make-immutable-free-id-table)])
(let ([recur (lambda (s)
(let ([recur (λ (s)
(loop (syntax-rearm s stx)
disappeared-uses
seen-lex-abbrevs))]
[recur/abbrev (lambda (s id)
[recur/abbrev (λ (s id)
(loop (syntax-rearm s stx)
disappeared-uses
(free-id-table-set seen-lex-abbrevs id id)))])
(syntax-case (disarm stx) (repetition union intersection complement concatenation
char-range char-complement)
(_
[_
(identifier? stx)
(let ((expansion (syntax-local-value stx (lambda () #f))))
(let ([expansion (syntax-local-value stx (λ () #f))])
(unless (lex-abbrev? expansion)
(raise-syntax-error 'regular-expression
"undefined abbreviation"
stx))
;; Check for cycles.
(when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f))
(when (free-id-table-ref seen-lex-abbrevs stx (λ () #f))
(raise-syntax-error 'regular-expression
"illegal lex-abbrev cycle detected"
stx
#f
(list (free-id-table-ref seen-lex-abbrevs stx))))
(set-box! disappeared-uses (cons stx (unbox disappeared-uses)))
(recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx)))
(_
(recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))]
[_
(or (char? (syntax-e stx)) (string? (syntax-e stx)))
(syntax-e stx))
((repetition arg ...)
(let ((arg-list (syntax->list (syntax (arg ...)))))
(syntax-e stx)]
[(repetition ARG ...)
(let ([arg-list (syntax->list #'(ARG ...))])
(unless (= 3 (length arg-list))
(bad-args stx 2))
(let ((low (syntax-e (car arg-list)))
(high (syntax-e (cadr arg-list)))
(re (caddr arg-list)))
(define low (syntax-e (car arg-list)))
(define high (syntax-e (cadr arg-list)))
(define re (caddr arg-list))
(unless (and (number? low) (exact? low) (integer? low) (>= low 0))
(raise-syntax-error #f
"not a non-negative exact integer"
stx
(car arg-list)))
(raise-syntax-error #f "not a non-negative exact integer" stx (car arg-list)))
(unless (or (and (number? high) (exact? high) (integer? high) (>= high 0))
(eqv? high +inf.0))
(raise-syntax-error #f
"not a non-negative exact integer or +inf.0"
stx
(cadr arg-list)))
(raise-syntax-error #f "not a non-negative exact integer or +inf.0" stx (cadr arg-list)))
(unless (<= low high)
(raise-syntax-error
#f
"the first argument is not less than or equal to the second argument"
stx))
`(repetition ,low ,high ,(recur re)))))
((union re ...)
`(union ,@(map recur (syntax->list (syntax (re ...))))))
((intersection re ...)
`(intersection ,@(map recur (syntax->list (syntax (re ...))))))
((complement re ...)
(let ((re-list (syntax->list (syntax (re ...)))))
(raise-syntax-error #f "the first argument is not less than or equal to the second argument" stx))
`(repetition ,low ,high ,(recur re)))]
[(union RE ...)
`(union ,@(map recur (syntax->list #'(RE ...))))]
[(intersection RE ...)
`(intersection ,@(map recur (syntax->list #'(RE ...))))]
[(complement RE ...)
(let ([re-list (syntax->list #'(RE ...))])
(unless (= 1 (length re-list))
(bad-args stx 1))
`(complement ,(recur (car re-list)))))
((concatenation re ...)
`(concatenation ,@(map recur (syntax->list (syntax (re ...))))))
((char-range arg ...)
(let ((arg-list (syntax->list (syntax (arg ...)))))
`(complement ,(recur (car re-list))))]
[(concatenation RE ...)
`(concatenation ,@(map recur (syntax->list #'(RE ...))))]
[(char-range ARG ...)
(let ((arg-list (syntax->list #'(ARG ...))))
(unless (= 2 (length arg-list))
(bad-args stx 2))
(let ((i1 (char-range-arg (car arg-list) stx))
(i2 (char-range-arg (cadr arg-list) stx)))
(let ([i1 (char-range-arg (car arg-list) stx)]
[i2 (char-range-arg (cadr arg-list) stx)])
(if (<= i1 i2)
`(char-range ,(integer->char i1) ,(integer->char i2))
(raise-syntax-error
#f
"the first argument does not precede or equal second argument"
stx)))))
((char-complement arg ...)
(let ((arg-list (syntax->list (syntax (arg ...)))))
(raise-syntax-error #f "the first argument does not precede or equal second argument" stx))))]
[(char-complement ARG ...)
(let ([arg-list (syntax->list #'(ARG ...))])
(unless (= 1 (length arg-list))
(bad-args stx 1))
(let ((parsed (recur (car arg-list))))
(define parsed (recur (car arg-list)))
(unless (char-set? parsed)
(raise-syntax-error #f
"not a character set"
stx
(car arg-list)))
`(char-complement ,parsed))))
((op form ...)
(identifier? (syntax op))
(let* ((o (syntax op))
(expansion (syntax-local-value o (lambda () #f))))
(set-box! disappeared-uses (cons o (unbox disappeared-uses)))
(raise-syntax-error #f "not a character set" stx (car arg-list)))
`(char-complement ,parsed))]
((OP form ...)
(identifier? #'OP)
(let* ([expansion (syntax-local-value #'OP (λ () #f))])
(set-box! disappeared-uses (cons #'OP (unbox disappeared-uses)))
(cond
((lex-trans? expansion)
(recur ((lex-trans-f expansion) (disarm stx))))
(expansion
(raise-syntax-error 'regular-expression
"not a lex-trans"
stx))
(else
(raise-syntax-error 'regular-expression
"undefined operator"
stx)))))
(_
(raise-syntax-error
'regular-expression
"not a char, string, identifier, or (op args ...)"
stx))))))
[(lex-trans? expansion)
(recur ((lex-trans-f expansion) (disarm stx)))]
[expansion
(raise-syntax-error 'regular-expression "not a lex-trans" stx)]
[else
(raise-syntax-error 'regular-expression "undefined operator" stx)])))
[_ (raise-syntax-error 'regular-expression "not a char, string, identifier, or (op args ...)" stx)]))))
@ -158,23 +129,18 @@
;; char-set? is conservative.
(define (char-set? s-re)
(cond
((char? s-re) #t)
((string? s-re) (= (string-length s-re) 1))
((list? s-re)
(let ((op (car s-re)))
(case op
((union intersection) (andmap char-set? (cdr s-re)))
((char-range char-complement) #t)
((repetition)
(and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re))))
((concatenation)
(and (= 2 (length s-re)) (char-set? (cadr s-re))))
(else #f))))
(else #f)))
[(char? s-re)]
[(string? s-re) (= (string-length s-re) 1)]
[(list? s-re) (case (car s-re)
[(union intersection) (andmap char-set? (cdr s-re))]
[(char-range char-complement) #t]
[(repetition) (and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re)))]
[(concatenation) (and (= 2 (length s-re)) (char-set? (cadr s-re)))]
(else #f))]
[else #f]))
(module+ test
(require rackunit))
(module+ test
(require rackunit)
(check-equal? (char-set? #\a) #t)
(check-equal? (char-set? "12") #f)
(check-equal? (char-set? "1") #t)
@ -217,4 +183,3 @@
(check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3))
(check-equal? (parse #'(char-complement (union "1" "2")) null)
'(char-complement (union "1" "2"))))
; )

@ -1,9 +1,7 @@
(module token-syntax mzscheme
;; The things needed at compile time to handle definition of tokens
#lang racket/base
(provide make-terminals-def terminals-def-t terminals-def?
make-e-terminals-def e-terminals-def-t e-terminals-def?)
;; The things needed at compile time to handle definition of tokens
(define-struct terminals-def (t))
(define-struct e-terminals-def (t))
)

@ -1,68 +1,57 @@
(module token mzscheme
(require-for-syntax "token-syntax.rkt")
#lang racket/base
(require (for-syntax racket/base "token-syntax.rkt"))
;; Defining tokens
(provide define-tokens define-empty-tokens make-token token?
(protect (rename token-name real-token-name))
(protect (rename token-value real-token-value))
(rename token-name* token-name)
(rename token-value* token-value)
(struct position (offset line col))
(struct position-token (token start-pos end-pos))
(struct srcloc-token (token srcloc)))
(protect-out (rename-out [token-name real-token-name]))
(protect-out (rename-out [token-value real-token-value]))
(rename-out [token-name* token-name][token-value* token-value])
(struct-out position)
(struct-out position-token)
(struct-out srcloc-token))
;; A token is either
;; - symbol
;; - (make-token symbol any)
(define-struct token (name value) (make-inspector))
(define-struct token (name value) #:inspector (make-inspector))
;; token-name*: token -> symbol
(define (token-name* t)
(cond
((symbol? t) t)
((token? t) (token-name t))
(else (raise-type-error
'token-name
"symbol or struct:token"
0
t))))
[(symbol? t) t]
[(token? t) (token-name t)]
[else (raise-type-error 'token-name "symbol or struct:token" 0 t)]))
;; token-value*: token -> any
(define (token-value* t)
(cond
((symbol? t) #f)
((token? t) (token-value t))
(else (raise-type-error
'token-value
"symbol or struct:token"
0
t))))
[(symbol? t) #f]
[(token? t) (token-value t)]
[else (raise-type-error 'token-value "symbol or struct:token" 0 t)]))
(define-for-syntax (make-ctor-name n)
(datum->syntax-object n
(datum->syntax n
(string->symbol (format "token-~a" (syntax-e n)))
n
n))
(define-for-syntax (make-define-tokens empty?)
(lambda (stx)
(define-for-syntax ((make-define-tokens empty?) stx)
(syntax-case stx ()
((_ name (token ...))
(andmap identifier? (syntax->list (syntax (token ...))))
[(_ NAME (TOKEN ...))
(andmap identifier? (syntax->list #'(TOKEN ...)))
(with-syntax (((marked-token ...)
(map values #;(make-syntax-introducer)
(syntax->list (syntax (token ...))))))
(syntax->list #'(TOKEN ...)))))
(quasisyntax/loc stx
(begin
(define-syntax name
(define-syntax NAME
#,(if empty?
#'(make-e-terminals-def (quote-syntax (marked-token ...)))
#'(make-terminals-def (quote-syntax (marked-token ...)))))
#,@(map
(lambda (n)
(λ (n)
(when (eq? (syntax-e n) 'error)
(raise-syntax-error
#f
@ -73,20 +62,19 @@
'#,n)
#`(define (#,(make-ctor-name n) x)
(make-token '#,n x))))
(syntax->list (syntax (token ...))))
#;(define marked-token #f) #;...))))
((_ ...)
(raise-syntax-error
#f
(syntax->list #'(TOKEN ...)))
#;(define marked-token #f) #;...)))]
[(_ ...)
(raise-syntax-error #f
"must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))"
stx)))))
stx)]))
(define-syntax define-tokens (make-define-tokens #f))
(define-syntax define-empty-tokens (make-define-tokens #t))
(define-struct position (offset line col) #f)
(define-struct position-token (token start-pos end-pos) #f)
(define-struct position (offset line col) #:inspector #f)
(define-struct position-token (token start-pos end-pos) #:inspector #f)
(define-struct srcloc-token (token srcloc) #:inspector #f)
(define-struct srcloc-token (token srcloc) #f)
)

@ -1,6 +1,5 @@
#lang racket
(require "util.rkt")
#lang racket/base
(require racket/promise "util.rkt")
(provide (all-defined-out))
@ -10,36 +9,33 @@
;; get-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat))
(define (get-chars-for char-x? mapped-chars)
(cond
((null? mapped-chars) null)
(else
(let* ((range (car mapped-chars))
(low (car range))
(high (cadr range))
(x (char-x? low)))
[(null? mapped-chars) null]
[else
(define range (car mapped-chars))
(define low (car range))
(define high (cadr range))
(define x (char-x? low))
(cond
((caddr range)
[(caddr range)
(if x
(cons (cons low high)
(get-chars-for char-x? (cdr mapped-chars)))
(get-chars-for char-x? (cdr mapped-chars))))
(else
(let loop ((range-start low)
(i (car range))
(parity x))
(cons (cons low high) (get-chars-for char-x? (cdr mapped-chars)))
(get-chars-for char-x? (cdr mapped-chars)))]
[else
(let loop ([range-start low]
[i (car range)]
[parity x])
(cond
((> i high)
[(> i high)
(if parity
(cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars)))
(get-chars-for char-x? (cdr mapped-chars))))
((eq? parity (char-x? i))
(loop range-start (add1 i) parity))
(parity
(cons (cons range-start (sub1 i)) (loop i (add1 i) #f)))
(else
(loop i (add1 i) #t))))))))))
(get-chars-for char-x? (cdr mapped-chars)))]
[(eq? parity (char-x? i))
(loop range-start (add1 i) parity)]
[parity (cons (cons range-start (sub1 i)) (loop i (add1 i) #f))]
[else (loop i (add1 i) #t)]))])]))
(define (compute-ranges x?)
(delay (get-chars-for (lambda (x) (x? (integer->char x))) mapped-chars)))
(delay (get-chars-for (λ (x) (x? (integer->char x))) mapped-chars)))
(define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325
(define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405
@ -61,7 +57,7 @@
(check-equal? (get-chars-for odd? '()) '())
(check-equal? (get-chars-for odd? '((1 4 #f) (8 13 #f)))
'((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13)))
(check-equal? (get-chars-for (lambda (x)
(check-equal? (get-chars-for (λ (x)
(odd? (quotient x 10)))
'((1 5 #t) (17 19 #t) (21 51 #f)))
'((17 . 19) (30 . 39) (50 . 51))))

@ -1,4 +1,5 @@
#lang racket
#lang racket/base
(require (for-syntax racket/base))
(provide (all-defined-out))
@ -10,16 +11,16 @@
(module+ test
(require rackunit))
#;(define-syntax test-block
(syntax-rules ()
((_ defs (code right-ans) ...)
(define-syntax (test-block stx)
(syntax-case stx ()
[(_ defs (code right-ans) ...)
#'(module+ test
(require rackunit)
(let* defs
(let ((real-ans code))
(unless (equal? real-ans right-ans)
(printf "Test failed: ~e gave ~e. Expected ~e\n"
'code real-ans 'right-ans))) ...))))
(let ([real-ans code])
(check-equal? real-ans right-ans)) ...))]))
(define-syntax test-block
#;(define-syntax test-block
(syntax-rules ()
((_ x ...) (void))))
@ -31,23 +32,22 @@
;; returned.
;; Xs are compared with equal?
(define (make-cache)
(let ((table (make-hash)))
(lambda (key build)
(hash-ref table key
(lambda ()
(let ((new (build)))
(let ([table (make-hash)])
(λ (key build)
(hash-ref table key (λ ()
(let ([new (build)])
(hash-set! table key new)
new))))))
(module+ test
(define cache (make-cache))
(check-equal? (cache '(s 1 2) (lambda () 9)) 9)
(check-equal? (cache '(s 2 1) (lambda () 8)) 8)
(check-equal? (cache '(s 1 2) (lambda () 1)) 9)
(check-equal? (cache '(s 1 2) (λ () 9)) 9)
(check-equal? (cache '(s 2 1) (λ () 8)) 8)
(check-equal? (cache '(s 1 2) (λ () 1)) 9)
(check-equal? (cache (cons 's (cons 0 (cons +inf.0 10)))
(lambda () 22)) 22)
(λ () 22)) 22)
(check-equal? (cache (cons 's (cons 0 (cons +inf.0 10)))
(lambda () 1)) 22))
(λ () 1)) 22))
@ -55,8 +55,8 @@
;; makes a function that returns a higher number by 1, each time
;; it is called.
(define (make-counter)
(let ((counter 0))
(lambda ()
(let ([counter 0])
(λ ()
(begin0
counter
(set! counter (add1 counter))))))
@ -76,12 +76,12 @@
;; previous entry. l must be grouped by indexes.
(define (remove-dups l index acc)
(cond
((null? l) (reverse acc))
((null? acc) (remove-dups (cdr l) index (cons (car l) acc)))
((= (index (car acc)) (index (car l)))
(remove-dups (cdr l) index acc))
(else
(remove-dups (cdr l) index (cons (car l) acc)))))
[(null? l) (reverse acc)]
[(null? acc) (remove-dups (cdr l) index (cons (car l) acc))]
[(= (index (car acc)) (index (car l)))
(remove-dups (cdr l) index acc)]
[else
(remove-dups (cdr l) index (cons (car l) acc))]))
(module+ test
@ -94,8 +94,8 @@
;; Sorts l according to index and removes the entries with duplicate
;; indexes.
(define (do-simple-equiv l index)
(let ((ordered (sort l (lambda (a b) (< (index a) (index b))))))
(remove-dups ordered index null)))
(define ordered (sort l (λ (a b) (< (index a) (index b)))))
(remove-dups ordered index null))
(module+ test
(check-equal? (do-simple-equiv '((2 2) (1 4) (1 2)
@ -110,16 +110,16 @@
;; list.
(define (replace l pred? get acc)
(cond
((null? l) acc)
((pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc)))
(else (replace (cdr l) pred? get (cons (car l) acc)))))
[(null? l) acc]
[(pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc))]
[else (replace (cdr l) pred? get (cons (car l) acc))]))
(module+ test
(check-equal? (replace null void (lambda () (list 1)) null) null)
(check-equal? (replace null void (λ () (list 1)) null) null)
(check-equal? (replace '(1 2 3 4 3 5)
(lambda (x) (= x 3))
(lambda (x) (list 1 2 3))
(λ (x) (= x 3))
(λ (x) (list 1 2 3))
null)
'(5 1 2 3 4 1 2 3 2 1)))

@ -1,41 +1,38 @@
#lang racket/base
;; Constructs to create and access grammars, the internal
;; representation of the input to the parser generator.
(module grammar mzscheme
(require mzlib/class
mzlib/list
(require racket/class
(except-in racket/list remove-duplicates)
"yacc-helper.rkt"
racket/contract)
;; Each production has a unique index 0 <= index <= number of productions
(define-struct prod (lhs rhs index prec action) (make-inspector))
(define-struct prod (lhs rhs index prec action) #:inspector (make-inspector) #:mutable)
;; The dot-pos field is the index of the element in the rhs
;; of prod that the dot immediately precedes.
;; Thus 0 <= dot-pos <= (vector-length rhs).
(define-struct item (prod dot-pos) (make-inspector))
(define-struct item (prod dot-pos) #:inspector (make-inspector))
;; gram-sym = (union term? non-term?)
;; Each term has a unique index 0 <= index < number of terms
;; Each non-term has a unique index 0 <= index < number of non-terms
(define-struct term (sym index prec) (make-inspector))
(define-struct non-term (sym index) (make-inspector))
(define-struct term (sym index prec) #:inspector (make-inspector) #:mutable)
(define-struct non-term (sym index) #:inspector (make-inspector) #:mutable)
;; a precedence declaration.
(define-struct prec (num assoc) (make-inspector))
(define-struct prec (num assoc) #:inspector (make-inspector))
(provide/contract
(make-item (prod? (or/c #f natural-number/c) . -> . item?))
(make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?))
(make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?))
(make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?))
(make-prod (non-term? (vectorof (or/c non-term? term?))
(or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?)))
[make-item (prod? (or/c #f natural-number/c) . -> . item?)]
[make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?)]
[make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?)]
[make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?)]
[make-prod (non-term? (vectorof (or/c non-term? term?))
(or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?)])
(provide
;; Things that work on items
start-item? item-prod item->string
sym-at-dot move-dot-right item<? item-dot-pos
@ -59,18 +56,16 @@
;; item<?: LR-item * LR-item -> bool
;; Lexicographic comparison on two items.
(define (item<? i1 i2)
(let ((p1 (prod-index (item-prod i1)))
(p2 (prod-index (item-prod i2))))
(define p1 (prod-index (item-prod i1)))
(define p2 (prod-index (item-prod i2)))
(or (< p1 p2)
(and (= p1 p2)
(let ((d1 (item-dot-pos i1))
(d2 (item-dot-pos i2)))
(< d1 d2))))))
(< (item-dot-pos i1) (item-dot-pos i2)))))
;; start-item?: LR-item -> bool
;; The start production always has index 0
(define (start-item? i)
(= 0 (non-term-index (prod-lhs (item-prod i)))))
(zero? (non-term-index (prod-lhs (item-prod i)))))
;; move-dot-right: LR-item -> LR-item | #f
@ -78,38 +73,38 @@
;; rightmost, then it returns false
(define (move-dot-right i)
(cond
((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f)
(else (make-item (item-prod i)
(add1 (item-dot-pos i))))))
[(= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f]
[else (make-item (item-prod i)
(add1 (item-dot-pos i)))]))
;; sym-at-dot: LR-item -> gram-sym | #f
;; returns the symbol after the dot in the item or #f if there is none
(define (sym-at-dot i)
(let ((dp (item-dot-pos i))
(rhs (prod-rhs (item-prod i))))
(define dp (item-dot-pos i))
(define rhs (prod-rhs (item-prod i)))
(cond
((= dp (vector-length rhs)) #f)
(else (vector-ref rhs dp)))))
[(= dp (vector-length rhs)) #f]
[else (vector-ref rhs dp)]))
;; print-item: LR-item ->
(define (item->string it)
(let ((print-sym (lambda (i)
(define print-sym (λ (i)
(let ((gs (vector-ref (prod-rhs (item-prod it)) i)))
(cond
((term? gs) (format "~a " (term-sym gs)))
(else (format "~a " (non-term-sym gs))))))))
(else (format "~a " (non-term-sym gs)))))))
(string-append
(format "~a -> " (non-term-sym (prod-lhs (item-prod it))))
(let loop ((i 0))
(cond
((= i (vector-length (prod-rhs (item-prod it))))
[(= i (vector-length (prod-rhs (item-prod it))))
(if (= i (item-dot-pos it))
". "
""))
((= i (item-dot-pos it))
(string-append ". " (print-sym i) (loop (add1 i))))
(else (string-append (print-sym i) (loop (add1 i)))))))))
"")]
[(= i (item-dot-pos it))
(string-append ". " (print-sym i) (loop (add1 i)))]
[else (string-append (print-sym i) (loop (add1 i)))]))))
;; --------------------- Grammar Symbols --------------------------
@ -120,14 +115,14 @@
(< (term-index nt1) (term-index nt2)))
(define (gram-sym-index gs)
(cond
((term? gs) (term-index gs))
(else (non-term-index gs))))
(if (term? gs)
(term-index gs)
(non-term-index gs)))
(define (gram-sym-symbol gs)
(cond
((term? gs) (term-sym gs))
(else (non-term-sym gs))))
(if (term? gs)
(term-sym gs)
(non-term-sym gs)))
(define (gram-sym->string gs)
(symbol->string (gram-sym-symbol gs)))
@ -136,10 +131,10 @@
;; Creates a number where the nth bit is 1 if the term with index n is in
;; the list, and whose nth bit is 0 otherwise
(define (term-list->bit-vector terms)
(cond
((null? terms) 0)
(else
(bitwise-ior (arithmetic-shift 1 (term-index (car terms))) (term-list->bit-vector (cdr terms))))))
(if (null? terms)
0
(bitwise-ior (arithmetic-shift 1 (term-index (car terms)))
(term-list->bit-vector (cdr terms)))))
;; ------------------------- Grammar ------------------------------
@ -161,33 +156,20 @@
(define num-terms (length terms))
(define num-non-terms (length non-terms))
(let ((count 0))
(for-each
(lambda (nt)
(set-non-term-index! nt count)
(set! count (add1 count)))
non-terms))
(let ((count 0))
(for-each
(lambda (t)
(set-term-index! t count)
(set! count (add1 count)))
terms))
(let ((count 0))
(for-each
(lambda (prod)
(set-prod-index! prod count)
(set! count (add1 count)))
all-prods))
(for ([(nt count) (in-indexed non-terms)])
(set-non-term-index! nt count))
(for ([(t count) (in-indexed terms)])
(set-term-index! t count))
(for ([(prod count) (in-indexed all-prods)])
(set-prod-index! prod count))
;; indexed by the index of the non-term - contains the list of productions for that non-term
(define nt->prods
(let ((v (make-vector (length prods) #f)))
(for-each (lambda (prods)
(for ([prods (in-list prods)])
(vector-set! v (non-term-index (prod-lhs (car prods))) prods))
prods)
v))
(define nullable-non-terms
@ -211,70 +193,58 @@
(vector-ref nullable-non-terms (non-term-index nt)))
(define/public (nullable-after-dot? item)
(let* ((rhs (prod-rhs (item-prod item)))
(prod-length (vector-length rhs)))
(define rhs (prod-rhs (item-prod item)))
(define prod-length (vector-length rhs))
(let loop ((i (item-dot-pos item)))
(cond
((< i prod-length)
(if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i)))
(loop (add1 i))
#f))
((= i prod-length) #t)))))
[(< i prod-length)
(and (non-term? (vector-ref rhs i))
(nullable-non-term? (vector-ref rhs i))
(loop (add1 i)))]
[(= i prod-length)])))
(define/public (nullable-non-term-thunk)
(lambda (nt)
(nullable-non-term? nt)))
(λ (nt) (nullable-non-term? nt)))
(define/public (nullable-after-dot?-thunk)
(lambda (item)
(nullable-after-dot? item)))))
(λ (item) (nullable-after-dot? item)))))
;; nullable: production list * int -> non-term set
;; determines which non-terminals can derive epsilon
(define (nullable prods num-nts)
(letrec ((nullable (make-vector num-nts #f))
(added #f)
(define nullable (make-vector num-nts #f))
(define added #f)
;; possible-nullable: producion list -> production list
;; Removes all productions that have a terminal
(possible-nullable
(lambda (prods)
(filter (lambda (prod)
(vector-andmap non-term? (prod-rhs prod)))
prods)))
(define (possible-nullable prods)
(for/list ([prod (in-list prods)]
#:when (vector-andmap non-term? (prod-rhs prod)))
prod))
;; set-nullables: production list -> production list
;; makes one pass through the productions, adding the ones
;; known to be nullable now to nullable and returning a list
;; of productions that we don't know about yet.
(set-nullables
(lambda (prods)
(define (set-nullables prods)
(cond
((null? prods) null)
((vector-ref nullable
(gram-sym-index (prod-lhs (car prods))))
(set-nullables (cdr prods)))
((vector-andmap (lambda (nt)
(vector-ref nullable (gram-sym-index nt)))
(prod-rhs (car prods)))
(vector-set! nullable
(gram-sym-index (prod-lhs (car prods)))
#t)
[(null? prods) null]
[(vector-ref nullable (gram-sym-index (prod-lhs (car prods))))
(set-nullables (cdr prods))]
[(vector-andmap (λ (nt) (vector-ref nullable (gram-sym-index nt))) (prod-rhs (car prods)))
(vector-set! nullable (gram-sym-index (prod-lhs (car prods))) #t)
(set! added #t)
(set-nullables (cdr prods)))
(else
(cons (car prods)
(set-nullables (cdr prods))))))))
(set-nullables (cdr prods))]
[else (cons (car prods) (set-nullables (cdr prods)))]))
(let loop ((P (possible-nullable prods)))
(cond
((null? P) nullable)
(else
[(null? P) nullable]
[else
(set! added #f)
(let ((new-P (set-nullables P)))
(define new-P (set-nullables P))
(if added
(loop new-P)
nullable)))))))
nullable)])))
)

@ -1,5 +1,4 @@
(module graph mzscheme
#lang racket/base
(provide digraph)
(define (zero-thunk) 0)
@ -11,51 +10,44 @@
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
;; We use a hash-table to represent the result function 'a -> 'b set, so
;; the values of type 'a must be comparable with eq?.
(define (digraph nodes edges f- union fail)
(letrec [
;; Will map elements of 'a to 'b sets
(results (make-hash-table))
(f (lambda (x) (hash-table-get results x fail)))
(define (digraph nodes edges f- union fail)
(define results (make-hasheq))
(define (f x) (hash-ref results x fail))
;; Maps elements of 'a to integers.
(N (make-hash-table))
(get-N (lambda (x) (hash-table-get N x zero-thunk)))
(set-N (lambda (x d) (hash-table-put! N x d)))
(stack null)
(push (lambda (x)
(set! stack (cons x stack))))
(pop (lambda ()
(begin0
(define N (make-hasheq))
(define (get-N x) (hash-ref N x zero-thunk))
(define (set-N x d) (hash-set! N x d))
(define stack null)
(define (push x) (set! stack (cons x stack)))
(define (pop) (begin0
(car stack)
(set! stack (cdr stack)))))
(depth (lambda () (length stack)))
(set! stack (cdr stack))))
(define (depth) (length stack))
;; traverse: 'a ->
(traverse
(lambda (x)
(define (traverse x)
(push x)
(let ((d (depth)))
(define d (depth))
(set-N x d)
(hash-table-put! results x (f- x))
(for-each (lambda (y)
(if (= 0 (get-N y))
(hash-set! results x (f- x))
(for-each (λ (y)
(when (= 0 (get-N y))
(traverse y))
(hash-table-put! results
(hash-set! results
x
(union (f x) (f y)))
(set-N x (min (get-N x) (get-N y))))
(edges x))
(if (= d (get-N x))
(let loop ((p (pop)))
(when (= d (get-N x))
(let loop ([p (pop)])
(set-N p +inf.0)
(hash-table-put! results p (f x))
(if (not (eq? x p))
(loop (pop))))))))]
(for-each (lambda (x)
(if (= 0 (get-N x))
(traverse x)))
nodes)
f))
(hash-set! results p (f x))
(when (not (eq? x p))
(loop (pop))))))
;; Will map elements of 'a to 'b sets
(for ([x (in-list nodes)]
#:when (zero? (get-N x)))
(traverse x))
f)
)

@ -1,374 +1,297 @@
(module input-file-parser mzscheme
;; routines for parsing the input to the parser generator and producing a
;; grammar (See grammar.rkt)
#lang racket/base
(require "yacc-helper.rkt"
"../private-lex/token-syntax.rkt"
"grammar.rkt"
mzlib/class
racket/contract)
(require-for-template mzscheme)
racket/class
racket/contract
(for-template racket/base))
;; routines for parsing the input to the parser generator and producing a
;; grammar (See grammar.rkt)
(define (is-a-grammar%? x) (is-a? x grammar%))
(provide/contract
(parse-input ((listof identifier?) (listof identifier?) (listof identifier?)
(or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?))
(get-term-list ((listof identifier?) . -> . (listof identifier?))))
[parse-input ((listof identifier?) (listof identifier?) (listof identifier?)
(or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?)]
[get-term-list ((listof identifier?) . -> . (listof identifier?))])
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx)))
(define (get-args i rhs src-pos term-defs)
(let ((empty-table (make-hash-table))
(biggest-pos #f))
(hash-table-put! empty-table 'error #t)
(for-each (lambda (td)
(let ((v (syntax-local-value td)))
(if (e-terminals-def? v)
(for-each (lambda (s)
(hash-table-put! empty-table (syntax-object->datum s) #t))
(syntax->list (e-terminals-def-t v))))))
term-defs)
(let ([args
(let get-args ((i i)
(rhs rhs))
(define empty-table (make-hasheq))
(define biggest-pos #f)
(hash-set! empty-table 'error #t)
(for* ([td (in-list term-defs)]
[v (in-value (syntax-local-value td))]
#:when (e-terminals-def? v)
[s (in-list (syntax->list (e-terminals-def-t v)))])
(hash-set! empty-table (syntax->datum s) #t))
(define args
(let get-args ([i i][rhs rhs])
(cond
((null? rhs) null)
(else
(let ((b (car rhs))
(name (if (hash-table-get empty-table (syntax-object->datum (car rhs)) (lambda () #f))
[(null? rhs) null]
[else
(define b (car rhs))
(define name (if (hash-ref empty-table (syntax->datum (car rhs)) #f)
(gensym)
(string->symbol (format "$~a" i)))))
(string->symbol (format "$~a" i))))
(cond
(src-pos
(let ([start-pos-id
(datum->syntax-object b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property)]
[end-pos-id
(datum->syntax-object b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property)])
[src-pos
(define start-pos-id
(datum->syntax b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property))
(define end-pos-id
(datum->syntax b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property))
(set! biggest-pos (cons start-pos-id end-pos-id))
`(,(datum->syntax-object b name b stx-for-original-property)
,start-pos-id
,end-pos-id
,@(get-args (add1 i) (cdr rhs)))))
(else
`(,(datum->syntax-object b name b stx-for-original-property)
,@(get-args (add1 i) (cdr rhs)))))))))])
(values args biggest-pos))))
(list* (datum->syntax b name b stx-for-original-property)
start-pos-id
end-pos-id
(get-args (add1 i) (cdr rhs)))]
[else
(list* (datum->syntax b name b stx-for-original-property)
(get-args (add1 i) (cdr rhs)))])])))
(values args biggest-pos))
;; Given the list of terminal symbols and the precedence/associativity definitions,
;; builds terminal structures (See grammar.rkt)
;; build-terms: symbol list * symbol list list -> term list
(define (build-terms term-list precs)
(let ((counter 0)
(define counter 0)
;;(term-list (cons (gensym) term-list))
;; Will map a terminal symbol to its precedence/associativity
(prec-table (make-hash-table)))
(define prec-table (make-hasheq))
;; Fill the prec table
(for-each
(lambda (p-decl)
(begin0
(let ((assoc (car p-decl)))
(for-each
(lambda (term-sym)
(hash-table-put! prec-table term-sym (make-prec counter assoc)))
(cdr p-decl)))
(set! counter (add1 counter))))
precs)
(for ([p-decl (in-list precs)])
(define assoc (car p-decl))
(for ([term-sym (in-list (cdr p-decl))])
(hash-set! prec-table term-sym (make-prec counter assoc)))
(set! counter (add1 counter)))
;; Build the terminal structures
(map
(lambda (term-sym)
(for/list ([term-sym (in-list term-list)])
(make-term term-sym
#f
(hash-table-get prec-table term-sym (lambda () #f))))
term-list)))
(hash-ref prec-table term-sym (λ () #f)))))
;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt)
;; get-terms-from-def: identifier? -> (listof identifier?)
(define (get-terms-from-def term-syn)
(let ((t (syntax-local-value term-syn (lambda () #f))))
(define t (syntax-local-value term-syn #f))
(cond
((terminals-def? t) (syntax->list (terminals-def-t t)))
((e-terminals-def? t) (syntax->list (e-terminals-def-t t)))
(else
[(terminals-def? t) (syntax->list (terminals-def-t t))]
[(e-terminals-def? t) (syntax->list (e-terminals-def-t t))]
[else
(raise-syntax-error
'parser-tokens
"undefined token group"
term-syn)))))
term-syn)]))
(define (get-term-list term-group-names)
(remove-duplicates
(cons (datum->syntax-object #f 'error)
(apply append
(map get-terms-from-def term-group-names)))))
(cons (datum->syntax #f 'error)
(apply append (map get-terms-from-def term-group-names)))))
(define (parse-input term-defs start ends prec-decls prods src-pos)
(let* ((start-syms (map syntax-e start))
(list-of-terms (map syntax-e (get-term-list term-defs)))
(end-terms
(map
(lambda (end)
(define start-syms (map syntax-e start))
(define list-of-terms (map syntax-e (get-term-list term-defs)))
(define end-terms
(for/list ([end (in-list ends)])
(unless (memq (syntax-e end) list-of-terms)
(raise-syntax-error
'parser-end-tokens
(format "End token ~a not defined as a token"
(syntax-e end))
end))
(syntax-e end))
ends))
(syntax-e end)))
;; Get the list of terminals out of input-terms
(list-of-non-terms
(define list-of-non-terms
(syntax-case prods ()
(((non-term production ...) ...)
[((NON-TERM PRODUCTION ...) ...)
(begin
(for-each
(lambda (nts)
(if (memq (syntax-object->datum nts) list-of-terms)
(for ([nts (in-list (syntax->list #'(NON-TERM ...)))]
#:when (memq (syntax->datum nts) list-of-terms))
(raise-syntax-error
'parser-non-terminals
(format "~a used as both token and non-terminal"
(syntax-object->datum nts))
nts)))
(syntax->list (syntax (non-term ...))))
(let ((dup (duplicate-list? (syntax-object->datum
(syntax (non-term ...))))))
(if dup
(format "~a used as both token and non-terminal" (syntax->datum nts))
nts))
(let ([dup (duplicate-list? (syntax->datum #'(NON-TERM ...)))])
(when dup
(raise-syntax-error
'parser-non-terminals
(format "non-terminal ~a defined multiple times"
dup)
(format "non-terminal ~a defined multiple times" dup)
prods)))
(syntax-object->datum (syntax (non-term ...)))))
(_
(raise-syntax-error
(syntax->datum #'(NON-TERM ...)))]
[_ (raise-syntax-error
'parser-grammar
"Grammar must be of the form (grammar (non-terminal productions ...) ...)"
prods))))
prods)]))
;; Check the precedence declarations for errors and turn them into data
(precs
(define precs
(syntax-case prec-decls ()
(((type term ...) ...)
(let ((p-terms
(syntax-object->datum (syntax (term ... ...)))))
[((TYPE TERM ...) ...)
(let ([p-terms (syntax->datum #'(TERM ... ...))])
(cond
((duplicate-list? p-terms) =>
(lambda (d)
[(duplicate-list? p-terms) =>
(λ (d)
(raise-syntax-error
'parser-precedences
(format "duplicate precedence declaration for token ~a"
d)
prec-decls)))
(else
(for-each
(lambda (a)
(for-each
(lambda (t)
(if (not (memq (syntax-object->datum t)
list-of-terms))
(format "duplicate precedence declaration for token ~a" d)
prec-decls))]
[else (for ([t (in-list (syntax->list #'(TERM ... ...)))]
#:when (not (memq (syntax->datum t) list-of-terms)))
(raise-syntax-error
'parser-precedences
(format
"Precedence declared for non-token ~a"
(syntax-object->datum t))
t)))
(syntax->list a)))
(syntax->list (syntax ((term ...) ...))))
(for-each
(lambda (type)
(if (not (memq (syntax-object->datum type)
`(left right nonassoc)))
(format "Precedence declared for non-token ~a" (syntax->datum t))
t))
(for ([type (in-list (syntax->list #'(TYPE ...)))]
#:unless (memq (syntax->datum type) `(left right nonassoc)))
(raise-syntax-error
'parser-precedences
"Associativity must be left, right or nonassoc"
type)))
(syntax->list (syntax (type ...))))
(syntax-object->datum prec-decls)))))
(#f null)
(_
(raise-syntax-error
type))
(syntax->datum prec-decls)]))]
[#f null]
[_ (raise-syntax-error
'parser-precedences
"Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc"
prec-decls))))
(terms (build-terms list-of-terms precs))
prec-decls)]))
(non-terms (map (lambda (non-term) (make-non-term non-term #f))
(define terms (build-terms list-of-terms precs))
(define non-terms (map (λ (non-term) (make-non-term non-term #f))
list-of-non-terms))
(term-table (make-hash-table))
(non-term-table (make-hash-table)))
(define term-table (make-hasheq))
(define non-term-table (make-hasheq))
(for-each (lambda (t)
(hash-table-put! term-table (gram-sym-symbol t) t))
terms)
(for ([t (in-list terms)])
(hash-set! term-table (gram-sym-symbol t) t))
(for-each (lambda (nt)
(hash-table-put! non-term-table (gram-sym-symbol nt) nt))
non-terms)
(for ([nt (in-list non-terms)])
(hash-set! non-term-table (gram-sym-symbol nt) nt))
(let* (
;; parse-prod: syntax-object -> gram-sym vector
(parse-prod
(lambda (prod-so)
(define (parse-prod prod-so)
(syntax-case prod-so ()
((prod-rhs-sym ...)
[(PROD-RHS-SYM ...)
(andmap identifier? (syntax->list prod-so))
(begin
(for-each (lambda (t)
(if (memq (syntax-object->datum t) end-terms)
(for ([t (in-list (syntax->list prod-so))]
#:when (memq (syntax->datum t) end-terms))
(raise-syntax-error
'parser-production-rhs
(format "~a is an end token and cannot be used in a production"
(syntax-object->datum t))
t)))
(syntax->list prod-so))
(list->vector
(map (lambda (s)
(hash-table-get
term-table
(syntax-object->datum s)
(lambda ()
(hash-table-get
non-term-table
(syntax-object->datum s)
(lambda ()
(raise-syntax-error
(format "~a is an end token and cannot be used in a production" (syntax->datum t))
t))
(for/vector ([s (in-list (syntax->list prod-so))])
(cond
[(hash-ref term-table (syntax->datum s) #f)]
[(hash-ref non-term-table (syntax->datum s) #f)]
[else (raise-syntax-error
'parser-production-rhs
(format
"~a is not declared as a terminal or non-terminal"
(syntax-object->datum s))
s))))))
(syntax->list prod-so)))))
(_
(raise-syntax-error
(format "~a is not declared as a terminal or non-terminal" (syntax->datum s))
s)])))]
[_ (raise-syntax-error
'parser-production-rhs
"production right-hand-side must have form (symbol ...)"
prod-so)))))
prod-so)]))
;; parse-action: syntax-object * syntax-object -> syntax-object
(parse-action
(lambda (rhs act)
(let-values ([(args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs)])
(let ([act
(define (parse-action rhs act-in)
(define-values (args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs))
(define act
(if biggest
(with-syntax ([$n-start-pos (datum->syntax-object (car biggest) '$n-start-pos)]
[$n-end-pos (datum->syntax-object (cdr biggest) '$n-end-pos)])
#`(let ([$n-start-pos #,(car biggest)]
[$n-end-pos #,(cdr biggest)])
#,act))
act)])
(quasisyntax/loc act
(lambda #,args
#,act))))))
(with-syntax ([(CAR-BIGGEST . CDR-BIGGEST) biggest]
[$N-START-POS (datum->syntax (car biggest) '$n-start-pos)]
[$N-END-POS (datum->syntax (cdr biggest) '$n-end-pos)]
[ACT-IN act-in])
#'(let ([$N-START-POS CAR-BIGGEST]
[$N-END-POS CDR-BIGGEST])
ACT-IN))
act-in))
(with-syntax ([ARGS args][ACT act])
(syntax/loc #'ACT (λ ARGS ACT))))
;; parse-prod+action: non-term * syntax-object -> production
(parse-prod+action
(lambda (nt prod-so)
(define (parse-prod+action nt prod-so)
(syntax-case prod-so ()
((prod-rhs action)
(let ((p (parse-prod (syntax prod-rhs))))
[(PROD-RHS ACTION)
(let ([p (parse-prod #'PROD-RHS)])
(make-prod
nt
p
#f
(let loop ((i (sub1 (vector-length p))))
(let loop ([i (sub1 (vector-length p))])
(if (>= i 0)
(let ((gs (vector-ref p i)))
(let ([gs (vector-ref p i)])
(if (term? gs)
(term-prec gs)
(loop (sub1 i))))
#f))
(parse-action (syntax prod-rhs) (syntax action)))))
((prod-rhs (prec term) action)
(identifier? (syntax term))
(let ((p (parse-prod (syntax prod-rhs))))
(parse-action #'PROD-RHS #'ACTION)))]
[(PROD-RHS (PREC TERM) ACTION)
(identifier? #'TERM)
(let ([p (parse-prod #'PROD-RHS)])
(make-prod
nt
p
#f
(term-prec
(hash-table-get
term-table
(syntax-object->datum (syntax term))
(lambda ()
(raise-syntax-error
(cond
[(hash-ref term-table (syntax->datum #'TERM) #f)]
[else (raise-syntax-error
'parser-production-rhs
(format
"unrecognized terminal ~a in precedence declaration"
(syntax-object->datum (syntax term)))
(syntax term)))))
(parse-action (syntax prod-rhs) (syntax action)))))
(_
(raise-syntax-error
(syntax->datum #'TERM))
#'TERM)]))
(parse-action #'PROD-RHS #'ACTION)))]
[_ (raise-syntax-error
'parser-production-rhs
"production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]"
prod-so)))))
prod-so)]))
;; parse-prod-for-nt: syntax-object -> production list
(parse-prods-for-nt
(lambda (prods-so)
(define (parse-prods-for-nt prods-so)
(syntax-case prods-so ()
((nt productions ...)
(> (length (syntax->list (syntax (productions ...)))) 0)
(let ((nt (hash-table-get non-term-table
(syntax-object->datum (syntax nt)))))
(map (lambda (p) (parse-prod+action nt p))
(syntax->list (syntax (productions ...))))))
(_
(raise-syntax-error
[(NT PRODUCTIONS ...)
(positive? (length (syntax->list #'(PRODUCTIONS ...))))
(let ([nt (hash-ref non-term-table (syntax->datum #'NT))])
(map (λ (p) (parse-prod+action nt p)) (syntax->list #'(PRODUCTIONS ...))))]
[_ (raise-syntax-error
'parser-productions
"A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side"
prods-so))))))
prods-so)]))
(for-each
(lambda (sstx ssym)
(unless (memq ssym list-of-non-terms)
(for ([sstx (in-list start)]
[ssym (in-list start-syms)]
#:unless (memq ssym list-of-non-terms))
(raise-syntax-error
'parser-start
(format "Start symbol ~a not defined as a non-terminal" ssym)
sstx)))
start start-syms)
sstx))
(let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
(end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
(parsed-prods (map parse-prods-for-nt (syntax->list prods)))
(start-prods
(map (lambda (start end-non-term)
(list (make-prod start (vector end-non-term) #f #f
(syntax (lambda (x) x)))))
starts end-non-terms))
(prods
`(,@start-prods
,@(map
(lambda (end-nt start-sym)
(map
(lambda (end)
(define starts (map (λ (x) (make-non-term (gensym) #f)) start-syms))
(define end-non-terms (map (λ (x) (make-non-term (gensym) #f)) start-syms))
(define parsed-prods (map parse-prods-for-nt (syntax->list prods)))
(define start-prods (for/list ([start (in-list starts)]
[end-non-term (in-list end-non-terms)])
(list (make-prod start (vector end-non-term) #f #f #'values))))
(define new-prods
(append start-prods
(for/list ([end-nt (in-list end-non-terms)]
[start-sym (in-list start-syms)])
(for/list ([end (in-list end-terms)])
(make-prod end-nt
(vector
(hash-table-get non-term-table start-sym)
(hash-table-get term-table end))
(hash-ref non-term-table start-sym)
(hash-ref term-table end))
#f
#f
(syntax (lambda (x) x))))
end-terms))
end-non-terms start-syms)
,@parsed-prods)))
#'values)))
parsed-prods))
(make-object grammar%
prods
new-prods
(map car start-prods)
terms
(append starts (append end-non-terms non-terms))
(map (lambda (term-name)
(hash-table-get term-table term-name))
end-terms)))))))
(map (λ (term-name) (hash-ref term-table term-name)) end-terms)))

@ -1,11 +1,10 @@
(module lalr mzscheme
;; Compute LALR lookaheads from DeRemer and Pennello 1982
#lang racket/base
(require "lr0.rkt"
"grammar.rkt"
mzlib/list
mzlib/class)
racket/list
racket/class)
;; Compute LALR lookaheads from DeRemer and Pennello 1982
(provide compute-LA)
@ -13,126 +12,117 @@
;; computes for each state, non-term transition pair, the terminals
;; which can transition out of the resulting state
;; output term set is represented in bit-vector form
(define (compute-DR a g)
(lambda (tk)
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))))
(define ((compute-DR a g) tk)
(define r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))
(term-list->bit-vector
(filter
(lambda (term)
(send a run-automaton r term))
(send g get-terms))))))
(filter (λ (term) (send a run-automaton r term)) (send g get-terms))))
;; compute-reads:
;; LR0-automaton * grammar -> (trans-key -> trans-key list)
(define (compute-reads a g)
(let ((nullable-non-terms
(filter (lambda (nt) (send g nullable-non-term? nt))
(send g get-non-terms))))
(lambda (tk)
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))))
(map (lambda (x) (make-trans-key r x))
(filter (lambda (non-term) (send a run-automaton r non-term))
nullable-non-terms))))))
(define nullable-non-terms (filter (λ (nt) (send g nullable-non-term? nt)) (send g get-non-terms)))
(λ (tk)
(define r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))
(for/list ([non-term (in-list nullable-non-terms)]
#:when (send a run-automaton r non-term))
(make-trans-key r non-term))))
;; compute-read: LR0-automaton * grammar -> (trans-key -> term set)
;; output term set is represented in bit-vector form
(define (compute-read a g)
(let* ((dr (compute-DR a g))
(reads (compute-reads a g)))
(define dr (compute-DR a g))
(define reads (compute-reads a g))
(digraph-tk->terml (send a get-mapped-non-term-keys)
reads
dr
(send a get-num-states))))
(send a get-num-states)))
;; returns the list of all k such that state k transitions to state start on the
;; transitions in rhs (in order)
(define (run-lr0-backward a rhs dot-pos start num-states)
(let loop ((states (list start))
(i (sub1 dot-pos)))
(let loop ([states (list start)]
[i (sub1 dot-pos)])
(cond
((< i 0) states)
(else (loop (send a run-automaton-back states (vector-ref rhs i))
(sub1 i))))))
[(< i 0) states]
[else (loop (send a run-automaton-back states (vector-ref rhs i))
(sub1 i))])))
;; prod->items-for-include: grammar * prod * non-term -> lr0-item list
;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma)
;; and gamma =>* epsilon
(define (prod->items-for-include g prod nt)
(let* ((rhs (prod-rhs prod))
(rhs-l (vector-length rhs)))
(define rhs (prod-rhs prod))
(define rhs-l (vector-length rhs))
(append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l))))
(list (make-item prod (sub1 rhs-l)))
null)
(let loop ((i (sub1 rhs-l)))
(let loop ([i (sub1 rhs-l)])
(cond
((and (> i 0)
[(and (> i 0)
(non-term? (vector-ref rhs i))
(send g nullable-non-term? (vector-ref rhs i)))
(if (eq? nt (vector-ref rhs (sub1 i)))
(cons (make-item prod (sub1 i))
(loop (sub1 i)))
(loop (sub1 i))))
(else null))))))
(loop (sub1 i)))]
[else null]))))
;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list
;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list
;; and gamma =>* epsilon
(define (prod-list->items-for-include g prod-list nt)
(apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list)))
(apply append (map (λ (prod) (prod->items-for-include g prod nt)) prod-list)))
;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list)
(define (compute-includes a g)
(let ((num-states (send a get-num-states))
(items-for-input-nt (make-vector (send g get-num-non-terms) null)))
(for-each
(lambda (input-nt)
(define num-states (send a get-num-states))
(define items-for-input-nt (make-vector (send g get-num-non-terms) null))
(for ([input-nt (in-list (send g get-non-terms))])
(vector-set! items-for-input-nt (non-term-index input-nt)
(prod-list->items-for-include g (send g get-prods) input-nt)))
(send g get-non-terms))
(lambda (tk)
(let* ((goal-state (trans-key-st tk))
(non-term (trans-key-gs tk))
(items (vector-ref items-for-input-nt (non-term-index non-term))))
(λ (tk)
(define goal-state (trans-key-st tk))
(define non-term (trans-key-gs tk))
(define items (vector-ref items-for-input-nt (non-term-index non-term)))
(trans-key-list-remove-dups
(apply append
(map (lambda (item)
(let* ((prod (item-prod item))
(rhs (prod-rhs prod))
(lhs (prod-lhs prod)))
(map (lambda (state)
(make-trans-key state lhs))
(for/list ([item (in-list items)])
(define prod (item-prod item))
(define rhs (prod-rhs prod))
(define lhs (prod-lhs prod))
(map (λ (state) (make-trans-key state lhs))
(run-lr0-backward a
rhs
(item-dot-pos item)
goal-state
num-states))))
items)))))))
num-states)))))))
;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list)
(define (compute-lookback a g)
(let ((num-states (send a get-num-states)))
(lambda (state prod)
(map (lambda (k) (make-trans-key k (prod-lhs prod)))
(run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states)))))
(define num-states (send a get-num-states))
(λ (state prod)
(map (λ (k) (make-trans-key k (prod-lhs prod)))
(run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states))))
;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set)
;; output term set is represented in bit-vector form
(define (compute-follow a g includes)
(let ((read (compute-read a g)))
(define read (compute-read a g))
(digraph-tk->terml (send a get-mapped-non-term-keys)
includes
read
(send a get-num-states))))
(send a get-num-states)))
;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set
;; output term set is represented in bit-vector form
(define (compute-LA a g)
(let* ((includes (compute-includes a g))
(lookback (compute-lookback a g))
(follow (compute-follow a g includes)))
(lambda (k p)
(let* ((l (lookback k p))
(f (map follow l)))
(apply bitwise-ior (cons 0 f))))))
(define includes (compute-includes a g))
(define lookback (compute-lookback a g))
(define follow (compute-follow a g includes))
(λ (k p)
(define l (lookback k p))
(define f (map follow l))
(apply bitwise-ior (cons 0 f))))
(define (print-DR dr a g)
(print-input-st-sym dr "DR" a g print-output-terms))
@ -150,11 +140,11 @@
(define (print-input-st-sym f name a g print-output)
(printf "~a:\n" name)
(send a for-each-state
(lambda (state)
(λ (state)
(for-each
(lambda (non-term)
(let ((res (f (make-trans-key state non-term))))
(if (not (null? res))
(λ (non-term)
(let ([res (f (make-trans-key state non-term))])
(when (not (null? res))
(printf "~a(~a, ~a) = ~a\n"
name
state
@ -166,13 +156,13 @@
(define (print-input-st-prod f name a g print-output)
(printf "~a:\n" name)
(send a for-each-state
(lambda (state)
(λ (state)
(for-each
(lambda (non-term)
(λ (non-term)
(for-each
(lambda (prod)
(let ((res (f state prod)))
(if (not (null? res))
(λ (prod)
(let ([res (f state prod)])
(when (not (null? res))
(printf "~a(~a, ~a) = ~a\n"
name
(kernel-index state)
@ -182,45 +172,35 @@
(send g get-non-terms)))))
(define (print-output-terms r)
(map
(lambda (p)
(gram-sym-symbol p))
r))
(map gram-sym-symbol r))
(define (print-output-st-nt r)
(map
(lambda (p)
(list
(kernel-index (trans-key-st p))
(gram-sym-symbol (trans-key-gs p))))
r))
(map (λ (p) (list (kernel-index (trans-key-st p)) (gram-sym-symbol (trans-key-gs p)))) r))
;; init-tk-map : int -> (vectorof hashtable?)
(define (init-tk-map n)
(let ((v (make-vector n #f)))
(let loop ((i (sub1 (vector-length v))))
(define v (make-vector n #f))
(let loop ([i (sub1 (vector-length v))])
(when (>= i 0)
(vector-set! v i (make-hash-table))
(vector-set! v i (make-hasheq))
(loop (sub1 i))))
v))
v)
;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int
(define (lookup-tk-map map)
(lambda (tk)
(let ((st (trans-key-st tk))
(gs (trans-key-gs tk)))
(hash-table-get (vector-ref map (kernel-index st))
(define ((lookup-tk-map map) tk)
(define st (trans-key-st tk))
(define gs (trans-key-gs tk))
(hash-ref (vector-ref map (kernel-index st))
(gram-sym-symbol gs)
(lambda () 0)))))
(λ () 0)))
;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int ->
(define (add-tk-map map)
(lambda (tk v)
(let ((st (trans-key-st tk))
(gs (trans-key-gs tk)))
(hash-table-put! (vector-ref map (kernel-index st))
(define ((add-tk-map map) tk v)
(define st (trans-key-st tk))
(define gs (trans-key-gs tk))
(hash-set! (vector-ref map (kernel-index st))
(gram-sym-symbol gs)
v))))
v))
;; digraph-tk->terml:
;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int
@ -229,49 +209,44 @@
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
;; A specialization of digraph in the file graph.rkt
(define (digraph-tk->terml nodes edges f- num-states)
(letrec [
;; Will map elements of trans-key to term sets represented as bit vectors
(results (init-tk-map num-states))
(define results (init-tk-map num-states))
;; Maps elements of trans-keys to integers.
(N (init-tk-map num-states))
(define N (init-tk-map num-states))
(get-N (lookup-tk-map N))
(set-N (add-tk-map N))
(get-f (lookup-tk-map results))
(set-f (add-tk-map results))
(define get-N (lookup-tk-map N))
(define set-N (add-tk-map N))
(define get-f (lookup-tk-map results))
(define set-f (add-tk-map results))
(stack null)
(push (lambda (x)
(set! stack (cons x stack))))
(pop (lambda ()
(begin0
(define stack null)
(define (push x) (set! stack (cons x stack)))
(define (pop) (begin0
(car stack)
(set! stack (cdr stack)))))
(depth (lambda () (length stack)))
(set! stack (cdr stack))))
(define (depth) (length stack))
;; traverse: 'a ->
(traverse
(lambda (x)
(define (traverse x)
(push x)
(let ((d (depth)))
(let ([d (depth)])
(set-N x d)
(set-f x (f- x))
(for-each (lambda (y)
(for-each (λ (y)
(when (= 0 (get-N y))
(traverse y))
(set-f x (bitwise-ior (get-f x) (get-f y)))
(set-N x (min (get-N x) (get-N y))))
(edges x))
(when (= d (get-N x))
(let loop ((p (pop)))
(let loop ([p (pop)])
(set-N p +inf.0)
(set-f p (get-f x))
(unless (equal? x p)
(loop (pop))))))))]
(for-each (lambda (x)
(when (= 0 (get-N x))
(traverse x)))
nodes)
get-f))
)
(loop (pop)))))))
(for ([x (in-list nodes)]
#:when (zero? (get-N x)))
(traverse x))
get-f)

@ -1,14 +1,13 @@
(module lr0 mzscheme
;; Handle the LR0 automaton
#lang racket/base
(require "grammar.rkt"
"graph.rkt"
mzlib/list
mzlib/class)
racket/list
racket/class)
;; Handle the LR0 automaton
(provide build-lr0-automaton lr0%
(struct trans-key (st gs)) trans-key-list-remove-dups
(struct-out trans-key) trans-key-list-remove-dups
kernel-items kernel-index)
;; kernel = (make-kernel (LR1-item list) index)
@ -16,64 +15,59 @@
;; be used to compare kernels
;; Each kernel is assigned a unique index, 0 <= index < number of states
;; trans-key = (make-trans-key kernel gram-sym)
(define-struct kernel (items index) (make-inspector))
(define-struct trans-key (st gs) (make-inspector))
(define-struct kernel (items index) #:inspector (make-inspector))
(define-struct trans-key (st gs) #:inspector (make-inspector))
(define (trans-key<? a b)
(let ((kia (kernel-index (trans-key-st a)))
(kib (kernel-index (trans-key-st b))))
(define kia (kernel-index (trans-key-st a)))
(define kib (kernel-index (trans-key-st b)))
(or (< kia kib)
(and (= kia kib)
(< (non-term-index (trans-key-gs a))
(non-term-index (trans-key-gs b)))))))
(non-term-index (trans-key-gs b))))))
(define (trans-key-list-remove-dups tkl)
(let loop ((sorted (sort tkl trans-key<?)))
(let loop ([sorted (sort tkl trans-key<?)])
(cond
((null? sorted) null)
((null? (cdr sorted)) sorted)
(else
[(null? sorted) null]
[(null? (cdr sorted)) sorted]
[else
(if (and (= (non-term-index (trans-key-gs (car sorted)))
(non-term-index (trans-key-gs (cadr sorted))))
(= (kernel-index (trans-key-st (car sorted)))
(kernel-index (trans-key-st (cadr sorted)))))
(loop (cdr sorted))
(cons (car sorted) (loop (cdr sorted))))))))
(cons (car sorted) (loop (cdr sorted))))])))
;; build-transition-table : int (listof (cons/c trans-key X) ->
;; (vectorof (symbol X hashtable))
(define (build-transition-table num-states assoc)
(let ((transitions (make-vector num-states #f)))
(let loop ((i (sub1 (vector-length transitions))))
(define transitions (make-vector num-states #f))
(let loop ([i (sub1 (vector-length transitions))])
(when (>= i 0)
(vector-set! transitions i (make-hash-table))
(vector-set! transitions i (make-hasheq))
(loop (sub1 i))))
(for-each
(lambda (trans-key/kernel)
(let ((tk (car trans-key/kernel)))
(hash-table-put! (vector-ref transitions (kernel-index (trans-key-st tk)))
(for ([trans-key/kernel (in-list assoc)])
(define tk (car trans-key/kernel))
(hash-set! (vector-ref transitions (kernel-index (trans-key-st tk)))
(gram-sym-symbol (trans-key-gs tk))
(cdr trans-key/kernel))))
assoc)
transitions))
(cdr trans-key/kernel)))
transitions)
;; reverse-assoc : (listof (cons/c trans-key? kernel?)) ->
;; (listof (cons/c trans-key? (listof kernel?)))
(define (reverse-assoc assoc)
(let ((reverse-hash (make-hash-table 'equal))
(hash-table-add!
(lambda (ht k v)
(hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null)))))))
(for-each
(lambda (trans-key/kernel)
(let ((tk (car trans-key/kernel)))
(define reverse-hash (make-hash))
(define (hash-table-add! ht k v)
(hash-set! ht k (cons v (hash-ref ht k (λ () null)))))
(for ([trans-key/kernel (in-list assoc)])
(define tk (car trans-key/kernel))
(hash-table-add! reverse-hash
(make-trans-key (cdr trans-key/kernel)
(trans-key-gs tk))
(trans-key-st tk))))
assoc)
(hash-table-map reverse-hash cons)))
(trans-key-st tk)))
(hash-map reverse-hash cons))
;; kernel-list-remove-duplicates
@ -113,54 +107,47 @@
;; for-each-state : (state ->) ->
;; Iteration over the states in an automaton
(define/public (for-each-state f)
(let ((num-states (vector-length states)))
(let loop ((i 0))
(if (< i num-states)
(begin
(define num-states (vector-length states))
(let loop ([i 0])
(when (< i num-states)
(f (vector-ref states i))
(loop (add1 i)))))))
(loop (add1 i)))))
;; run-automaton: kernel? gram-sym? -> (union kernel #f)
;; returns the state reached from state k on input s, or #f when k
;; has no transition on s
(define/public (run-automaton k s)
(hash-table-get (vector-ref transitions (kernel-index k))
(hash-ref (vector-ref transitions (kernel-index k))
(gram-sym-symbol s)
(lambda () #f)))
(λ () #f)))
;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel)
;; returns the list of states that can reach k by transitioning on s.
(define/public (run-automaton-back k s)
(apply append
(map
(lambda (k)
(hash-table-get (vector-ref reverse-transitions (kernel-index k))
(for*/list ([k (in-list k)]
[val (in-list (hash-ref (vector-ref reverse-transitions (kernel-index k))
(gram-sym-symbol s)
(lambda () null)))
k)))))
(λ () null)))])
val))))
(define (union comp<?)
(letrec ((union
(lambda (l1 l2)
(define ((union comp<?) l1 l2)
(let loop ([l1 l1] [l2 l2])
(cond
((null? l1) l2)
((null? l2) l1)
(else (let ((c1 (car l1))
(c2 (car l2)))
[(null? l1) l2]
[(null? l2) l1]
[else (define c1 (car l1))
(define c2 (car l2))
(cond
((comp<? c1 c2)
(cons c1 (union (cdr l1) l2)))
((comp<? c2 c1)
(cons c2 (union l1 (cdr l2))))
(else (union (cdr l1) l2)))))))))
union))
[(comp<? c1 c2) (cons c1 (loop (cdr l1) l2))]
[(comp<? c2 c1) (cons c2 (loop l1 (cdr l2)))]
[else (loop (cdr l1) l2)])])))
;; The kernels in the automaton are represented cannonically.
;; That is (equal? a b) <=> (eq? a b)
(define (kernel->string k)
(apply string-append
`("{" ,@(map (lambda (i) (string-append (item->string i) ", "))
`("{" ,@(map (λ (i) (string-append (item->string i) ", "))
(kernel-items k))
"}")))
@ -168,9 +155,8 @@
;; Constructs the kernels of the sets of LR(0) items of g
(define (build-lr0-automaton grammar)
; (printf "LR(0) automaton:\n")
(letrec (
(epsilons (make-hash-table 'equal))
(grammar-symbols (append (send grammar get-non-terms)
(define epsilons (make-hash))
(define grammar-symbols (append (send grammar get-non-terms)
(send grammar get-terms)))
;; first-non-term: non-term -> non-term list
;; given a non-terminal symbol C, return those non-terminal
@ -178,195 +164,151 @@
;; non-terminals n where -> means a rightmost derivation in many
;; steps. Assumes that each non-term can be reduced to a string
;; of terms.
(first-non-term
(define first-non-term
(digraph (send grammar get-non-terms)
(lambda (nt)
(λ (nt)
(filter non-term?
(map (lambda (prod)
(sym-at-dot (make-item prod 0)))
(map (λ (prod) (sym-at-dot (make-item prod 0)))
(send grammar get-prods-for-non-term nt))))
(lambda (nt) (list nt))
(λ (nt) (list nt))
(union non-term<?)
(lambda () null)))
(λ () null)))
;; closure: LR1-item list -> LR1-item list
;; Creates a set of items containing i s.t. if A -> n.Xm is in it,
;; X -> .o is in it too.
(LR0-closure
(lambda (i)
(define (LR0-closure i)
(cond
((null? i) null)
(else
(let ((next-gsym (sym-at-dot (car i))))
[(null? i) null]
[else
(define next-gsym (sym-at-dot (car i)))
(cond
((non-term? next-gsym)
[(non-term? next-gsym)
(cons (car i)
(append
(apply append
(map (lambda (non-term)
(map (lambda (x)
(make-item x 0))
(send grammar
(for*/list ([non-term (in-list (first-non-term next-gsym))]
[x (in-list (send grammar
get-prods-for-non-term
non-term)))
(first-non-term next-gsym)))
(LR0-closure (cdr i)))))
(else
(cons (car i) (LR0-closure (cdr i))))))))))
non-term))])
(make-item x 0))
(LR0-closure (cdr i))))]
[else (cons (car i) (LR0-closure (cdr i)))])]))
;; maps trans-keys to kernels
(automaton-term null)
(automaton-non-term null)
(define automaton-term null)
(define automaton-non-term null)
;; keeps the kernels we have seen, so we can have a unique
;; list for each kernel
(kernels (make-hash-table 'equal))
(counter 0)
(define kernels (make-hash))
(define counter 0)
;; goto: LR1-item list -> LR1-item list list
;; creates new kernels by moving the dot in each item in the
;; LR0-closure of kernel to the right, and grouping them by
;; the term/non-term moved over. Returns the kernels not
;; yet seen, and places the trans-keys into automaton
(goto
(lambda (kernel)
(let (
(define (goto kernel)
;; maps a gram-syms to a list of items
(table (make-hash-table))
(define table (make-hasheq))
;; add-item!:
;; (symbol (listof item) hashtable) item? ->
;; adds i into the table grouped with the grammar
;; symbol following its dot
(add-item!
(lambda (table i)
(let ((gs (sym-at-dot i)))
(define (add-item! table i)
(define gs (sym-at-dot i))
(cond
(gs
(let ((already
(hash-table-get table
(gram-sym-symbol gs)
(lambda () null))))
[gs (define already (hash-ref table (gram-sym-symbol gs) (λ () null)))
(unless (member i already)
(hash-table-put! table
(gram-sym-symbol gs)
(cons i already)))))
((= 0 (vector-length (prod-rhs (item-prod i))))
(let ((current (hash-table-get epsilons
kernel
(lambda () null))))
(hash-table-put! epsilons
kernel
(cons i current)))))))))
(hash-set! table (gram-sym-symbol gs) (cons i already)))]
((zero? (vector-length (prod-rhs (item-prod i))))
(define current (hash-ref epsilons kernel (λ () null)))
(hash-set! epsilons kernel (cons i current)))))
;; Group the items of the LR0 closure of the kernel
;; by the character after the dot
(for-each (lambda (item)
(for ([item (in-list (LR0-closure (kernel-items kernel)))])
(add-item! table item))
(LR0-closure (kernel-items kernel)))
;; each group is a new kernel, with the dot advanced.
;; sorts the items in a kernel so kernels can be compared
;; with equal? for using the table kernels to make sure
;; only one representitive of each kernel is created
(define is
(let loop ([gsyms grammar-symbols])
(cond
[(null? gsyms) null]
[else
(define items (hash-ref table (gram-sym-symbol (car gsyms)) (λ () null)))
(cond
[(null? items) (loop (cdr gsyms))]
[else (cons (list (car gsyms) items)
(loop (cdr gsyms)))])])))
(filter
(lambda (x) x)
(map
(lambda (i)
(let* ((gs (car i))
(items (cadr i))
(new #f)
(new-kernel (sort
(filter (lambda (x) x)
(map move-dot-right items))
item<?))
(unique-kernel (hash-table-get
kernels
new-kernel
(lambda ()
(let ((k (make-kernel
new-kernel
counter)))
values
(for/list ([i (in-list is)])
(define gs (car i))
(define items (cadr i))
(define new #f)
(define new-kernel (sort (filter values (map move-dot-right items)) item<?))
(define unique-kernel (hash-ref kernels new-kernel
(λ ()
(define k (make-kernel new-kernel counter))
(set! new #t)
(set! counter (add1 counter))
(hash-table-put! kernels
new-kernel
k)
k)))))
(cond
((term? gs)
(hash-set! kernels new-kernel k)
k)))
(if (term? gs)
(set! automaton-term (cons (cons (make-trans-key kernel gs)
unique-kernel)
automaton-term)))
(else
automaton-term))
(set! automaton-non-term (cons (cons (make-trans-key kernel gs)
unique-kernel)
automaton-non-term))))
automaton-non-term)))
#;(printf "~a -> ~a on ~a\n"
(kernel->string kernel)
(kernel->string unique-kernel)
(gram-sym-symbol gs))
(if new
unique-kernel
#f)))
(let loop ((gsyms grammar-symbols))
(cond
((null? gsyms) null)
(else
(let ((items (hash-table-get table
(gram-sym-symbol (car gsyms))
(lambda () null))))
(cond
((null? items) (loop (cdr gsyms)))
(else
(cons (list (car gsyms) items)
(loop (cdr gsyms))))))))))))))
(and new unique-kernel))))
(starts
(map (lambda (init-prod) (list (make-item init-prod 0)))
(define starts (map (λ (init-prod) (list (make-item init-prod 0)))
(send grammar get-init-prods)))
(startk
(map (lambda (start)
(let ((k (make-kernel start counter)))
(hash-table-put! kernels start k)
(define startk (for/list ([start (in-list starts)])
(define k (make-kernel start counter))
(hash-set! kernels start k)
(set! counter (add1 counter))
k))
starts))
(new-kernels (make-queue)))
(let loop ((old-kernels startk)
(seen-kernels null))
(define new-kernels (make-queue))
(let loop ([old-kernels startk]
[seen-kernels null])
(cond
((and (empty-queue? new-kernels) (null? old-kernels))
(make-object lr0%
automaton-term
automaton-non-term
(list->vector (reverse seen-kernels))
epsilons))
((null? old-kernels)
(loop (deq! new-kernels) seen-kernels))
(else
[(and (empty-queue? new-kernels) (null? old-kernels))
(make-object lr0% automaton-term automaton-non-term
(list->vector (reverse seen-kernels)) epsilons)]
[(null? old-kernels) (loop (deq! new-kernels) seen-kernels)]
[else
(enq! new-kernels (goto (car old-kernels)))
(loop (cdr old-kernels) (cons (car old-kernels) seen-kernels)))))))
(loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))])))
(define-struct q (f l) #:inspector (make-inspector) #:mutable)
(define (empty-queue? q) (null? (q-f q)))
(define (make-queue) (make-q null null))
(define-struct q (f l) (make-inspector))
(define (empty-queue? q)
(null? (q-f q)))
(define (make-queue)
(make-q null null))
(define (enq! q i)
(if (empty-queue? q)
(let ((i (mcons i null)))
(cond
[(empty-queue? q)
(let ([i (mcons i null)])
(set-q-l! q i)
(set-q-f! q i))
(begin
(set-q-f! q i))]
[else
(set-mcdr! (q-l q) (mcons i null))
(set-q-l! q (mcdr (q-l q))))))
(set-q-l! q (mcdr (q-l q)))]))
(define (deq! q)
(begin0
(mcar (q-f q))
(set-q-f! q (mcdr (q-f q)))))
)

@ -1,7 +1,7 @@
(module parser-actions mzscheme
#lang racket/base
(require "grammar.rkt")
(provide (all-defined-except make-reduce make-reduce*)
(rename make-reduce* make-reduce))
(provide (except-out (all-defined-out) make-reduce make-reduce*)
(rename-out [make-reduce* make-reduce]))
;; An action is
;; - (make-shift int)
@ -12,12 +12,12 @@
;; A reduce contains a runtime-reduce so that sharing of the reduces can
;; be easily transferred to sharing of runtime-reduces.
(define-struct action () (make-inspector))
(define-struct (shift action) (state) (make-inspector))
(define-struct (reduce action) (prod runtime-reduce) (make-inspector))
(define-struct (accept action) () (make-inspector))
(define-struct (goto action) (state) (make-inspector))
(define-struct (no-action action) () (make-inspector))
(define-struct action () #:inspector (make-inspector))
(define-struct (shift action) (state) #:inspector (make-inspector))
(define-struct (reduce action) (prod runtime-reduce) #:inspector (make-inspector))
(define-struct (accept action) () #:inspector (make-inspector))
(define-struct (goto action) (state) #:inspector (make-inspector))
(define-struct (no-action action) () #:inspector (make-inspector))
(define (make-reduce* p)
(make-reduce p
@ -34,11 +34,11 @@
(define (action->runtime-action a)
(cond
((shift? a) (shift-state a))
((reduce? a) (reduce-runtime-reduce a))
((accept? a) 'accept)
((goto? a) (- (+ (goto-state a) 1)))
((no-action? a) #f)))
[(shift? a) (shift-state a)]
[(reduce? a) (reduce-runtime-reduce a)]
[(accept? a) 'accept]
[(goto? a) (- (+ (goto-state a) 1))]
[(no-action? a) #f]))
(define (runtime-shift? x) (and (integer? x) (>= x 0)))
(define runtime-reduce? vector?)
@ -51,4 +51,4 @@
(define (runtime-reduce-rhs-length x) (vector-ref x 2))
(define (runtime-goto-state x) (- (+ x 1)))
)

@ -1,98 +1,89 @@
(module parser-builder mzscheme
#lang racket/base
(require "input-file-parser.rkt"
"grammar.rkt"
"table.rkt"
mzlib/class
racket/class
racket/contract)
(require-for-template mzscheme)
(require (for-template racket/base))
(provide/contract
(build-parser (-> string? any/c any/c
(provide/contract [build-parser (-> string? any/c any/c
(listof identifier?)
(listof identifier?)
(listof identifier?)
(or/c syntax? #f)
syntax?
(values any/c any/c any/c any/c))))
(values any/c any/c any/c any/c))])
;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?)
;; (union syntax? false/c) syntax?) -> syntax?
(define (fix-check-syntax input-terms start ends assocs prods)
(let* ((term-binders (get-term-list input-terms))
(get-term-binder
(let ((t (make-hash-table)))
(for-each
(lambda (term)
(hash-table-put! t (syntax-e term) term))
term-binders)
(lambda (x)
(let ((r (hash-table-get t (syntax-e x) (lambda () #f))))
(define term-binders (get-term-list input-terms))
(define get-term-binder
(let ([t (make-hasheq)])
(for ([term (in-list term-binders)])
(hash-set! t (syntax-e term) term))
(λ (x)
(define r (hash-ref t (syntax-e x) (λ () #f)))
(if r
(syntax-local-introduce (datum->syntax-object r (syntax-e x) x x))
x)))))
(rhs-list
(syntax-case prods ()
(((_ rhs ...) ...)
(syntax->list (syntax (rhs ... ...)))))))
(with-syntax (((tmp ...) (map syntax-local-introduce term-binders))
((term-group ...)
(map (lambda (tg)
(syntax-local-introduce (datum->syntax r (syntax-e x) x x))
x))))
(define rhs-list (syntax-case prods ()
[((_ RHS ...) ...) (syntax->list #'(RHS ... ...))]))
(with-syntax ([(TMP ...) (map syntax-local-introduce term-binders)]
[(TERM-GROUP ...)
(map (λ (tg)
(syntax-property
(datum->syntax-object tg #f)
(datum->syntax tg #f)
'disappeared-use
tg))
input-terms))
((end ...)
(map get-term-binder ends))
((start ...)
(map get-term-binder start))
((bind ...)
(syntax-case prods ()
(((bind _ ...) ...)
(syntax->list (syntax (bind ...))))))
(((bound ...) ...)
(map
(lambda (rhs)
input-terms)]
[(END ...) (map get-term-binder ends)]
[(START ...) (map get-term-binder start)]
[(BIND ...) (syntax-case prods ()
(((BIND _ ...) ...)
(syntax->list #'(BIND ...))))]
[((BOUND ...) ...)
(map (λ (rhs)
(syntax-case rhs ()
(((bound ...) (_ pbound) __)
[((BOUND ...) (_ PBOUND) __)
(map get-term-binder
(cons (syntax pbound)
(syntax->list (syntax (bound ...))))))
(((bound ...) _)
(cons #'PBOUND (syntax->list #'(BOUND ...))))]
[((BOUND ...) _)
(map get-term-binder
(syntax->list (syntax (bound ...)))))))
rhs-list))
((prec ...)
(syntax->list #'(BOUND ...)))]))
rhs-list)]
[(PREC ...)
(if assocs
(map get-term-binder
(syntax-case assocs ()
(((__ term ...) ...)
(syntax->list (syntax (term ... ...))))))
null)))
(((__ TERM ...) ...)
(syntax->list #'(TERM ... ...)))))
null)])
#`(when #f
(let ((bind void) ... (tmp void) ...)
(void bound ... ... term-group ... start ... end ... prec ...))))))
(require mzlib/list "parser-actions.rkt")
(let ((BIND void) ... (TMP void) ...)
(void BOUND ... ... TERM-GROUP ... START ... END ... PREC ...)))))
(require racket/list "parser-actions.rkt")
(define (build-parser filename src-pos suppress input-terms start end assocs prods)
(let* ((grammar (parse-input input-terms start end assocs prods src-pos))
(table (build-table grammar filename suppress))
(all-tokens (make-hash-table))
(actions-code
`(vector ,@(map prod-action (send grammar get-prods)))))
(for-each (lambda (term)
(hash-table-put! all-tokens (gram-sym-symbol term) #t))
(send grammar get-terms))
(define grammar (parse-input input-terms start end assocs prods src-pos))
(define table (build-table grammar filename suppress))
(define all-tokens (make-hasheq))
(define actions-code `(vector ,@(map prod-action (send grammar get-prods))))
(for ([term (in-list (send grammar get-terms))])
(hash-set! all-tokens (gram-sym-symbol term) #t))
#;(let ((num-states (vector-length table))
(num-gram-syms (+ (send grammar get-num-terms)
(send grammar get-num-non-terms)))
(num-ht-entries (apply + (map length (vector->list table))))
(num-reduces
(let ((ht (make-hash-table)))
(let ((ht (make-hasheq)))
(for-each
(lambda (x)
(λ (x)
(when (reduce? x)
(hash-table-put! ht x #t)))
(hash-set! ht x #t)))
(map cdr (apply append (vector->list table))))
(length (hash-table-map ht void)))))
(printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n"
@ -108,6 +99,5 @@
(values table
all-tokens
actions-code
(fix-check-syntax input-terms start end assocs prods))))
(fix-check-syntax input-terms start end assocs prods)))
)

@ -1,14 +1,14 @@
#lang scheme/base
;; Routine to build the LALR table
#lang racket/base
(require "grammar.rkt"
"lr0.rkt"
"lalr.rkt"
"parser-actions.rkt"
racket/contract
mzlib/list
mzlib/class)
racket/list
racket/class)
;; Routine to build the LALR table
(define (is-a-grammar%? x) (is-a? x grammar%))
(provide/contract
@ -30,103 +30,84 @@
;; group-table : parse-table -> grouped-parse-table
(define (group-table table)
(list->vector
(map
(lambda (state-entry)
(let ((ht (make-hash)))
(for-each
(lambda (gs/actions)
(let ((group (hash-ref ht (car gs/actions) (lambda () null))))
(unless (member (cdr gs/actions) group)
(hash-set! ht (car gs/actions) (cons (cdr gs/actions) group)))))
state-entry)
(hash-map ht cons)))
(vector->list table))))
(for/list ([state-entry (in-list (vector->list table))])
(define ht (make-hasheq))
(for* ([gs/actions (in-list state-entry)]
[group (in-value (hash-ref ht (car gs/actions) (λ () null)))]
#:unless (member (cdr gs/actions) group))
(hash-set! ht (car gs/actions) (cons (cdr gs/actions) group)))
(hash-map ht cons))))
;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) ->
;; (vectorof (listof (cons/c gram-sym? Y)))
(define (table-map f table)
(list->vector
(map
(lambda (state-entry)
(map
(lambda (gs/X)
(cons (car gs/X) (f (car gs/X) (cdr gs/X))))
state-entry))
(vector->list table))))
(for/list ([state-entry (in-list (vector->list table))])
(for/list ([gs/X (in-list state-entry)])
(cons (car gs/X) (f (car gs/X) (cdr gs/X)))))))
(define (bit-vector-for-each f bv)
(letrec ((for-each
(lambda (bv number)
(let loop ([bv bv] [number 0])
(cond
((= 0 bv) (void))
((= 1 (bitwise-and 1 bv))
[(zero? bv) (void)]
[(= 1 (bitwise-and 1 bv))
(f number)
(for-each (arithmetic-shift bv -1) (add1 number)))
(else (for-each (arithmetic-shift bv -1) (add1 number)))))))
(for-each bv 0)))
(loop (arithmetic-shift bv -1) (add1 number))]
[else (loop (arithmetic-shift bv -1) (add1 number))])))
;; print-entry: symbol action output-port ->
;; prints the action a for lookahead sym to the given port
(define (print-entry sym a port)
(let ((s "\t~a\t\t\t\t\t~a\t~a\n"))
(define s "\t~a\t\t\t\t\t~a\t~a\n")
(cond
((shift? a)
(fprintf port s sym "shift" (shift-state a)))
((reduce? a)
(fprintf port s sym "reduce" (prod-index (reduce-prod a))))
((accept? a)
(fprintf port s sym "accept" ""))
((goto? a)
(fprintf port s sym "goto" (goto-state a))))))
[(shift? a) (fprintf port s sym "shift" (shift-state a))]
[(reduce? a) (fprintf port s sym "reduce" (prod-index (reduce-prod a)))]
[(accept? a) (fprintf port s sym "accept" "")]
[(goto? a) (fprintf port s sym "goto" (goto-state a))]))
;; count: ('a -> bool) * 'a list -> num
;; counts the number of elements in list that satisfy pred
(define (count pred list)
(cond
((null? list) 0)
((pred (car list)) (+ 1 (count pred (cdr list))))
(else (count pred (cdr list)))))
[(null? list) 0]
[(pred (car list)) (+ 1 (count pred (cdr list)))]
[else (count pred (cdr list))]))
;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port ->
;; Prints out the parser given by table.
(define (display-parser a grouped-table prods port)
(let* ((SR-conflicts 0)
(RR-conflicts 0))
(for-each
(lambda (prod)
(define SR-conflicts 0)
(define RR-conflicts 0)
(for ([prod (in-list prods)])
(fprintf port
"~a\t~a\t=\t~a\n"
(prod-index prod)
(gram-sym-symbol (prod-lhs prod))
(map gram-sym-symbol (vector->list (prod-rhs prod)))))
prods)
(send a for-each-state
(lambda (state)
(λ (state)
(fprintf port "State ~a\n" (kernel-index state))
(for-each (lambda (item)
(for ([item (in-list (kernel-items state))])
(fprintf port "\t~a\n" (item->string item)))
(kernel-items state))
(newline port)
(for-each
(lambda (gs/action)
(let ((sym (gram-sym-symbol (car gs/action)))
(act (cdr gs/action)))
(for ([gs/action (in-list (vector-ref grouped-table (kernel-index state)))])
(define sym (gram-sym-symbol (car gs/action)))
(define act (cdr gs/action))
(cond
((null? act) (void))
((null? (cdr act))
(print-entry sym (car act) port))
(else
[(null? act) (void)]
[(null? (cdr act))
(print-entry sym (car act) port)]
[else
(fprintf port "begin conflict:\n")
(when (> (count reduce? act) 1)
(set! RR-conflicts (add1 RR-conflicts)))
(when (> (count shift? act) 0)
(set! SR-conflicts (add1 SR-conflicts)))
(map (lambda (x) (print-entry sym x port)) act)
(fprintf port "end conflict\n")))))
(vector-ref grouped-table (kernel-index state)))
(map (λ (x) (print-entry sym x port)) act)
(fprintf port "end conflict\n")]))
(newline port)))
(when (> SR-conflicts 0)
@ -136,47 +117,45 @@
(when (> RR-conflicts 0)
(fprintf port "~a reduce/reduce conflict~a\n"
RR-conflicts
(if (= RR-conflicts 1) "" "s")))))
(if (= RR-conflicts 1) "" "s"))))
;; resolve-conflict : (listof action?) -> action? bool bool
(define (resolve-conflict actions)
(cond
((null? actions) (values (make-no-action) #f #f))
((null? (cdr actions))
(values (car actions) #f #f))
(else
(let ((SR-conflict? (> (count shift? actions) 0))
(RR-conflict? (> (count reduce? actions) 1)))
[(null? actions) (values (make-no-action) #f #f)]
[(null? (cdr actions)) (values (car actions) #f #f)]
[else
(define SR-conflict? (> (count shift? actions) 0))
(define RR-conflict? (> (count reduce? actions) 1))
(let loop ((current-guess #f)
(rest actions))
(cond
((null? rest) (values current-guess SR-conflict? RR-conflict?))
((shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?))
((not current-guess)
(loop (car rest) (cdr rest)))
((and (reduce? (car rest))
[(null? rest) (values current-guess SR-conflict? RR-conflict?)]
[(shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?)]
[(not current-guess) (loop (car rest) (cdr rest))]
[(and (reduce? (car rest))
(< (prod-index (reduce-prod (car rest)))
(prod-index (reduce-prod current-guess))))
(loop (car rest) (cdr rest)))
((accept? (car rest))
(loop (car rest) (cdr rest))]
[(accept? (car rest))
(eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n")
(loop current-guess (cdr rest)))
(else (loop current-guess (cdr rest)))))))))
(loop current-guess (cdr rest))]
[else (loop current-guess (cdr rest))]))]))
;; resolve-conflicts : grouped-parse-table bool -> parse-table
(define (resolve-conflicts grouped-table suppress)
(let* ((SR-conflicts 0)
(RR-conflicts 0)
(table (table-map
(lambda (gs actions)
(let-values (((action SR? RR?)
(resolve-conflict actions)))
(define SR-conflicts 0)
(define RR-conflicts 0)
(define table (table-map
(λ (gs actions)
(let-values ([(action SR? RR?)
(resolve-conflict actions)])
(when SR?
(set! SR-conflicts (add1 SR-conflicts)))
(when RR?
(set! RR-conflicts (add1 RR-conflicts)))
action))
grouped-table)))
grouped-table))
(unless suppress
(when (> SR-conflicts 0)
(eprintf "~a shift/reduce conflict~a\n"
@ -186,61 +165,60 @@
(eprintf "~a reduce/reduce conflict~a\n"
RR-conflicts
(if (= RR-conflicts 1) "" "s"))))
table))
table)
;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action)
;; Resolves a single shift-reduce conflict, if precedences are in place.
(define (resolve-sr-conflict/prec actions shift-prec)
(let* ((shift (if (shift? (car actions))
(define shift (if (shift? (car actions))
(car actions)
(cadr actions)))
(reduce (if (shift? (car actions))
(define reduce (if (shift? (car actions))
(cadr actions)
(car actions)))
(reduce-prec (prod-prec (reduce-prod reduce))))
(define reduce-prec (prod-prec (reduce-prod reduce)))
(cond
((and shift-prec reduce-prec)
[(and shift-prec reduce-prec)
(cond
((< (prec-num shift-prec) (prec-num reduce-prec))
(list reduce))
((> (prec-num shift-prec) (prec-num reduce-prec))
(list shift))
((eq? 'left (prec-assoc shift-prec))
(list reduce))
((eq? 'right (prec-assoc shift-prec))
(list shift))
(else null)))
(else actions))))
[(< (prec-num shift-prec) (prec-num reduce-prec))
(list reduce)]
[(> (prec-num shift-prec) (prec-num reduce-prec))
(list shift)]
[(eq? 'left (prec-assoc shift-prec))
(list reduce)]
[(eq? 'right (prec-assoc shift-prec))
(list shift)]
[else null])]
[else actions]))
;; resolve-prec-conflicts : parse-table -> grouped-parse-table
(define (resolve-prec-conflicts table)
(table-map
(lambda (gs actions)
(λ (gs actions)
(cond
((and (term? gs)
[(and (term? gs)
(= 2 (length actions))
(or (shift? (car actions))
(shift? (cadr actions))))
(resolve-sr-conflict/prec actions (term-prec gs)))
(else actions)))
(resolve-sr-conflict/prec actions (term-prec gs))]
[else actions]))
(group-table table)))
;; build-table: grammar string bool -> parse-table
(define (build-table g file suppress)
(let* ((a (build-lr0-automaton g))
(term-vector (list->vector (send g get-terms)))
(end-terms (send g get-end-terms))
(table (make-parse-table (send a get-num-states)))
(get-lookahead (compute-LA a g))
(reduce-cache (make-hash)))
(define a (build-lr0-automaton g))
(define term-vector (list->vector (send g get-terms)))
(define end-terms (send g get-end-terms))
(define table (make-parse-table (send a get-num-states)))
(define get-lookahead (compute-LA a g))
(define reduce-cache (make-hash))
(for ([trans-key/state (in-list (send a get-transitions))])
(define from-state-index (kernel-index (trans-key-st (car trans-key/state))))
(define gs (trans-key-gs (car trans-key/state)))
(define to-state (cdr trans-key/state))
(for-each
(lambda (trans-key/state)
(let ((from-state-index (kernel-index (trans-key-st (car trans-key/state))))
(gs (trans-key-gs (car trans-key/state)))
(to-state (cdr trans-key/state)))
(table-add! table from-state-index gs
(cond
((non-term? gs)
@ -249,19 +227,19 @@
(make-accept))
(else
(make-shift
(kernel-index to-state)))))))
(send a get-transitions))
(kernel-index to-state))))))
(send a for-each-state
(lambda (state)
(for-each
(lambda (item)
(let ((item-prod (item-prod item)))
(λ (state)
(for ([item (in-list (append (hash-ref (send a get-epsilon-trans) state (λ () null))
(filter (λ (item)
(not (move-dot-right item)))
(kernel-items state))))])
(let ([item-prod (item-prod item)])
(bit-vector-for-each
(lambda (term-index)
(λ (term-index)
(unless (start-item? item)
(let ((r (hash-ref reduce-cache item-prod
(lambda ()
(λ ()
(let ((r (make-reduce item-prod)))
(hash-set! reduce-cache item-prod r)
r)))))
@ -269,22 +247,18 @@
(kernel-index state)
(vector-ref term-vector term-index)
r))))
(get-lookahead state item-prod))))
(append (hash-ref (send a get-epsilon-trans) state (lambda () null))
(filter (lambda (item)
(not (move-dot-right item)))
(kernel-items state))))))
(get-lookahead state item-prod))))))
(let ((grouped-table (resolve-prec-conflicts table)))
(define grouped-table (resolve-prec-conflicts table))
(unless (string=? file "")
(with-handlers [(exn:fail:filesystem?
(lambda (e)
(λ (e)
(eprintf
"Cannot write debug output to file \"~a\": ~a\n"
file
(exn-message e))))]
(call-with-output-file file
(lambda (port)
(λ (port)
(display-parser a grouped-table (send g get-prods) port))
#:exists 'truncate)))
(resolve-conflicts grouped-table suppress))))
(resolve-conflicts grouped-table suppress))

@ -1,118 +1,71 @@
(module yacc-helper mzscheme
(require mzlib/list
#lang racket/base
(require (prefix-in rl: racket/list)
"../private-lex/token-syntax.rkt")
;; General helper routines
(provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc)
(define (vector-andmap f v)
(let loop ((i 0))
(cond
((= i (vector-length v)) #t)
(else (if (f (vector-ref v i))
(loop (add1 i))
#f)))))
(define (vector-andmap pred vec)
(for/and ([item (in-vector vec)])
(pred vec)))
;; duplicate-list?: symbol list -> #f | symbol
;; returns a symbol that exists twice in l, or false if no such symbol
;; exists
(define (duplicate-list? l)
(letrec ((t (make-hash-table))
(dl? (lambda (l)
(cond
((null? l) #f)
((hash-table-get t (car l) (lambda () #f)) =>
(lambda (x) x))
(else
(hash-table-put! t (car l) (car l))
(dl? (cdr l)))))))
(dl? l)))
(define (duplicate-list? syms)
(rl:check-duplicates syms eq?))
;; remove-duplicates: syntax-object list -> syntax-object list
;; removes the duplicates from the lists
(define (remove-duplicates sl)
(let ((t (make-hash-table)))
(letrec ((x
(lambda (sl)
(cond
((null? sl) sl)
((hash-table-get t (syntax-object->datum (car sl)) (lambda () #f))
(x (cdr sl)))
(else
(hash-table-put! t (syntax-object->datum (car sl)) #t)
(cons (car sl) (x (cdr sl))))))))
(x sl))))
(define (remove-duplicates syms)
(rl:remove-duplicates syms equal? #:key syntax->datum))
;; overlap?: symbol list * symbol list -> #f | symbol
;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists
(define (overlap? l1 l2)
(let/ec ret
(let ((t (make-hash-table)))
(for-each (lambda (s1)
(hash-table-put! t s1 s1))
l1)
(for-each (lambda (s2)
(cond
((hash-table-get t s2 (lambda () #f)) =>
(lambda (o) (ret o)))))
l2)
#f)))
(define (overlap? syms1 syms2)
(for/first ([sym1 (in-list syms1)]
#:when (memq sym1 syms2))
sym1))
(define (display-yacc grammar tokens start precs port)
(let-syntax ((p (syntax-rules ()
((_ args ...) (fprintf port args ...)))))
(let* ((tokens (map syntax-local-value tokens))
(eterms (filter e-terminals-def? tokens))
(terms (filter terminals-def? tokens))
(term-table (make-hash-table))
(display-rhs
(lambda (rhs)
(for-each (lambda (sym) (p "~a " (hash-table-get term-table sym (lambda () sym))))
(car rhs))
(if (= 3 (length rhs))
(let-syntax ([p (syntax-rules ()
((_ args ...) (fprintf port args ...)))])
(let* ([tokens (map syntax-local-value tokens)]
[eterms (filter e-terminals-def? tokens)]
[terms (filter terminals-def? tokens)]
[term-table (make-hasheq)]
[display-rhs
(λ (rhs)
(for ([sym (in-list (car rhs))])
(p "~a " (hash-ref term-table sym (λ () sym))))
(when (= 3 (length rhs))
(p "%prec ~a" (cadadr rhs)))
(p "\n"))))
(for-each
(lambda (t)
(for-each
(lambda (t)
(hash-table-put! term-table t (format "'~a'" t)))
(syntax-object->datum (e-terminals-def-t t))))
eterms)
(for-each
(lambda (t)
(for-each
(lambda (t)
(p "\n"))])
(for* ([t (in-list eterms)]
[t (in-list (syntax->datum (e-terminals-def-t t)))])
(hash-set! term-table t (format "'~a'" t)))
(for* ([t (in-list terms)]
[t (in-list (syntax->datum (terminals-def-t t)))])
(p "%token ~a\n" t)
(hash-table-put! term-table t (format "~a" t)))
(syntax-object->datum (terminals-def-t t))))
terms)
(if precs
(for-each (lambda (prec)
(hash-set! term-table t (format "~a" t)))
(when precs
(for ([prec (in-list precs)])
(p "%~a " (car prec))
(for-each (lambda (tok)
(p " ~a" (hash-table-get term-table tok)))
(cdr prec))
(p "\n"))
precs))
(for ([tok (in-list (cdr prec))])
(p " ~a" (hash-ref term-table tok)))
(p "\n")))
(p "%start ~a\n" start)
(p "%%\n")
(for-each (lambda (prod)
(let ((nt (car prod)))
(for ([prod (in-list grammar)])
(define nt (car prod))
(p "~a: " nt)
(display-rhs (cadr prod))
(for-each (lambda (rhs)
(for ([rhs (in-list (cddr prod))])
(p "| ")
(display-rhs rhs))
(cddr prod))
(p ";\n")))
grammar)
(p ";\n"))
(p "%%\n"))))
)

@ -1,55 +1,53 @@
(module yacc-to-scheme mzscheme
#lang racket/base
(require br-parser-tools/lex
(prefix : br-parser-tools/lex-sre)
(prefix-in : br-parser-tools/lex-sre)
br-parser-tools/yacc
syntax/readerr
mzlib/list)
racket/list)
(provide trans)
(define match-double-string
(lexer
((:+ (:~ #\" #\\)) (append (string->list lexeme)
(match-double-string input-port)))
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port)))
(#\" null)))
[(:+ (:~ #\" #\\)) (append (string->list lexeme)
(match-double-string input-port))]
[(:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port))]
[#\" null]))
(define match-single-string
(lexer
((:+ (:~ #\' #\\)) (append (string->list lexeme)
(match-single-string input-port)))
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port)))
(#\' null)))
[(:+ (:~ #\' #\\)) (append (string->list lexeme)
(match-single-string input-port))]
[(:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port))]
[#\' null]))
(define-lex-abbrevs
(letter (:or (:/ "a" "z") (:/ "A" "Z")))
(digit (:/ "0" "9"))
(initial (:or letter (char-set "!$%&*/<=>?^_~@")))
(subsequent (:or initial digit (char-set "+-.@")))
(comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/")))
[letter (:or (:/ "a" "z") (:/ "A" "Z"))]
[digit (:/ "0" "9")]
[initial (:or letter (char-set "!$%&*/<=>?^_~@"))]
[subsequent (:or initial digit (char-set "+-.@"))]
[comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/")])
(define-empty-tokens x
(EOF PIPE |:| SEMI |%%| %prec))
(define-tokens y
(SYM STRING))
(define-empty-tokens x (EOF PIPE |:| SEMI |%%| %prec))
(define-tokens y (SYM STRING))
(define get-token-grammar
(lexer-src-pos
("%%" '|%%|)
(":" (string->symbol lexeme))
("%prec" (string->symbol lexeme))
(#\| 'PIPE)
((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}")))
(return-without-pos (get-token-grammar input-port)))
(#\; 'SEMI)
(#\' (token-STRING (string->symbol (list->string (match-single-string input-port)))))
(#\" (token-STRING (string->symbol (list->string (match-double-string input-port)))))
((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme)))))
["%%" '|%%|]
[":" (string->symbol lexeme)]
["%prec" (string->symbol lexeme)]
[#\| 'PIPE]
[(:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}")))
(return-without-pos (get-token-grammar input-port))]
[#\; 'SEMI]
[#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))]
[#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))]
[(:: initial (:* subsequent)) (token-SYM (string->symbol lexeme))]))
(define (parse-grammar enter-term enter-empty-term enter-non-term)
(parser
(tokens x y)
(src-pos)
(error (lambda (tok-ok tok-name tok-value start-pos end-pos)
(error (λ (tok-ok tok-name tok-value start-pos end-pos)
(raise-read-error
(format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value)
(file-path)
@ -98,38 +96,35 @@
(string<? (symbol->string a) (symbol->string b)))
(define (trans filename)
(let* ((i (open-input-file filename))
(terms (make-hash-table))
(eterms (make-hash-table))
(nterms (make-hash-table))
(enter-term
(lambda (s)
(if (not (hash-table-get nterms s (lambda () #f)))
(hash-table-put! terms s #t))))
(enter-empty-term
(lambda (s)
(if (not (hash-table-get nterms s (lambda () #f)))
(hash-table-put! eterms s #t))))
(enter-non-term
(lambda (s)
(hash-table-remove! terms s)
(hash-table-remove! eterms s)
(hash-table-put! nterms s #t))))
(define i (open-input-file filename))
(define terms (make-hasheq))
(define eterms (make-hasheq))
(define nterms (make-hasheq))
(define (enter-term s)
(when (not (hash-ref nterms s (λ () #f)))
(hash-set! terms s #t)))
(define (enter-empty-term s)
(when (not (hash-ref nterms s (λ () #f)))
(hash-set! eterms s #t)))
(define (enter-non-term s)
(hash-remove! terms s)
(hash-remove! eterms s)
(hash-set! nterms s #t))
(port-count-lines! i)
(file-path filename)
(regexp-match "%%" i)
(begin0
(let ((gram ((parse-grammar enter-term enter-empty-term enter-non-term)
(lambda ()
(let ([gram ((parse-grammar enter-term enter-empty-term enter-non-term)
(λ ()
(let ((t (get-token-grammar i)))
t)))))
t)))])
`(begin
(define-tokens t ,(sort (hash-table-map terms (lambda (k v) k)) symbol<?))
(define-empty-tokens et ,(sort (hash-table-map eterms (lambda (k v) k)) symbol<?))
(define-tokens t ,(sort (hash-map terms (λ (k v) k)) symbol<?))
(define-empty-tokens et ,(sort (hash-map eterms (λ (k v) k)) symbol<?))
(parser
(start ___)
(end ___)
(error ___)
(tokens t et)
(grammar ,@gram))))
(close-input-port i)))))
(close-input-port i)))

@ -1,14 +1,13 @@
#lang scheme/base
(require (for-syntax scheme/base
#lang racket/base
(require (for-syntax racket/base
"private-yacc/parser-builder.rkt"
"private-yacc/grammar.rkt"
"private-yacc/yacc-helper.rkt"
"private-yacc/parser-actions.rkt"))
(require "private-lex/token.rkt"
"private-yacc/parser-actions.rkt")
"private-lex/token.rkt"
"private-yacc/parser-actions.rkt"
mzlib/etc
mzlib/pretty
racket/local
racket/pretty
syntax/readerr)
(provide parser)
@ -17,139 +16,93 @@
;; convert-parse-table : (vectorof (listof (cons/c gram-sym? action?))) ->
;; (vectorof (symbol runtime-action hashtable))
(define-for-syntax (convert-parse-table table)
(list->vector
(map
(lambda (state-entry)
(let ((ht (make-hasheq)))
(for-each
(lambda (gs/action)
(for/vector ([state-entry (in-vector table)])
(let ([ht (make-hasheq)])
(for ([gs/action (in-list state-entry)])
(hash-set! ht
(gram-sym-symbol (car gs/action))
(action->runtime-action (cdr gs/action))))
state-entry)
ht))
(vector->list table))))
ht)))
(define-syntax (parser stx)
(syntax-case stx ()
((_ args ...)
(let ((arg-list (syntax->list (syntax (args ...))))
(src-pos #f)
(debug #f)
(error #f)
(tokens #f)
(start #f)
(end #f)
(precs #f)
(suppress #f)
(grammar #f)
(yacc-output #f))
(for-each
(lambda (arg)
[(_ ARGS ...)
(let ([arg-list (syntax->list #'(ARGS ...))]
[src-pos #f]
[debug #f]
[error #f]
[tokens #f]
[start #f]
[end #f]
[precs #f]
[suppress #f]
[grammar #f]
[yacc-output #f])
(for ([arg (in-list (syntax->list #'(ARGS ...)))])
(syntax-case* arg (debug error tokens start end precs grammar
suppress src-pos yacc-output)
(lambda (a b)
(eq? (syntax-e a) (syntax-e b)))
((debug filename)
(λ (a b) (eq? (syntax-e a) (syntax-e b)))
[(debug FILENAME)
(cond
((not (string? (syntax-e (syntax filename))))
(raise-syntax-error
#f
"Debugging filename must be a string"
stx
(syntax filename)))
(debug
(raise-syntax-error #f "Multiple debug declarations" stx))
(else
(set! debug (syntax-e (syntax filename))))))
((suppress)
(set! suppress #t))
((src-pos)
(set! src-pos #t))
((error expression)
[(not (string? (syntax-e #'FILENAME)))
(raise-syntax-error #f "Debugging filename must be a string" stx #'FILENAME)]
[debug (raise-syntax-error #f "Multiple debug declarations" stx)]
[else (set! debug (syntax-e #'FILENAME))])]
[(suppress) (set! suppress #t)]
[(src-pos) (set! src-pos #t)]
[(error EXPRESSION)
(if error
(raise-syntax-error #f "Multiple error declarations" stx)
(set! error (syntax expression))))
((tokens def ...)
(set! error #'EXPRESSION))]
[(tokens DEF ...)
(begin
(when tokens
(raise-syntax-error #f "Multiple tokens declarations" stx))
(let ((defs (syntax->list (syntax (def ...)))))
(for-each
(lambda (d)
(unless (identifier? d)
(raise-syntax-error
#f
"Token-group name must be an identifier"
stx
d)))
defs)
(set! tokens defs))))
((start symbol ...)
(let ((symbols (syntax->list (syntax (symbol ...)))))
(for-each
(lambda (sym)
(unless (identifier? sym)
(raise-syntax-error #f
"Start symbol must be a symbol"
stx
sym)))
symbols)
(let ((defs (syntax->list #'(DEF ...))))
(for ([d (in-list defs)]
#:unless (identifier? d))
(raise-syntax-error #f "Token-group name must be an identifier" stx d))
(set! tokens defs)))]
[(start symbol ...)
(let ([symbols (syntax->list #'(symbol ...))])
(for ([sym (in-list symbols)]
#:unless (identifier? sym))
(raise-syntax-error #f "Start symbol must be a symbol" stx sym))
(when start
(raise-syntax-error #f "Multiple start declarations" stx))
(when (null? symbols)
(raise-syntax-error #f
"Missing start symbol"
stx
arg))
(set! start symbols)))
((end symbols ...)
(let ((symbols (syntax->list (syntax (symbols ...)))))
(for-each
(lambda (sym)
(unless (identifier? sym)
(raise-syntax-error #f
"End token must be a symbol"
stx
sym)))
symbols)
(let ((d (duplicate-list? (map syntax-e symbols))))
(raise-syntax-error #f "Missing start symbol" stx arg))
(set! start symbols))]
[(end SYMBOLS ...)
(let ((symbols (syntax->list #'(SYMBOLS ...))))
(for ([sym (in-list symbols)]
#:unless (identifier? sym))
(raise-syntax-error #f "End token must be a symbol" stx sym))
(let ([d (duplicate-list? (map syntax-e symbols))])
(when d
(raise-syntax-error
#f
(format "Duplicate end token definition for ~a" d)
stx
arg))
(raise-syntax-error #f (format "Duplicate end token definition for ~a" d) stx arg))
(when (null? symbols)
(raise-syntax-error
#f
"end declaration must contain at least 1 token"
stx
arg))
(raise-syntax-error #f "end declaration must contain at least 1 token" stx arg))
(when end
(raise-syntax-error #f "Multiple end declarations" stx))
(set! end symbols))))
((precs decls ...)
(set! end symbols)))]
[(precs DECLS ...)
(if precs
(raise-syntax-error #f "Multiple precs declarations" stx)
(set! precs (syntax/loc arg (decls ...)))))
((grammar prods ...)
(set! precs (syntax/loc arg (DECLS ...))))]
[(grammar PRODS ...)
(if grammar
(raise-syntax-error #f "Multiple grammar declarations" stx)
(set! grammar (syntax/loc arg (prods ...)))))
((yacc-output filename)
(set! grammar (syntax/loc arg (PRODS ...))))]
[(yacc-output FILENAME)
(cond
((not (string? (syntax-e (syntax filename))))
(raise-syntax-error #f
"Yacc-output filename must be a string"
stx
(syntax filename)))
(yacc-output
(raise-syntax-error #f "Multiple yacc-output declarations" stx))
(else
(set! yacc-output (syntax-e (syntax filename))))))
(_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg))))
(syntax->list (syntax (args ...))))
[(not (string? (syntax-e #'FILENAME)))
(raise-syntax-error #f "Yacc-output filename must be a string" stx #'FILENAME)]
[yacc-output
(raise-syntax-error #f "Multiple yacc-output declarations" stx)]
[else
(set! yacc-output (syntax-e #'FILENAME))])]
[_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)]))
(unless tokens
(raise-syntax-error #f "missing tokens declaration" stx))
(unless error
@ -160,7 +113,7 @@
(raise-syntax-error #f "missing end declaration" stx))
(unless start
(raise-syntax-error #f "missing start declaration" stx))
(let-values (((table all-term-syms actions check-syntax-fix)
(let-values ([(table all-term-syms actions check-syntax-fix)
(build-parser (if debug debug "")
src-pos
suppress
@ -168,66 +121,51 @@
start
end
precs
grammar)))
grammar)])
(when (and yacc-output (not (string=? yacc-output "")))
(with-handlers [(exn:fail:filesystem?
(lambda (e)
(eprintf
"Cannot write yacc-output to file \"~a\"\n"
yacc-output)))]
(λ (e) (eprintf "Cannot write yacc-output to file \"~a\"\n" yacc-output)))]
(call-with-output-file yacc-output
(lambda (port)
(λ (port)
(display-yacc (syntax->datum grammar)
tokens
(map syntax->datum start)
(if precs
(syntax->datum precs)
#f)
(and precs (syntax->datum precs))
port))
#:exists 'truncate)))
(with-syntax ((check-syntax-fix check-syntax-fix)
(err error)
(ends end)
(starts start)
(debug debug)
(table (convert-parse-table table))
(all-term-syms all-term-syms)
(actions actions)
(src-pos src-pos))
(syntax
(begin
(with-syntax ([check-syntax-fix check-syntax-fix]
[err error]
[ends end]
[starts start]
[debug debug]
[table (convert-parse-table table)]
[all-term-syms all-term-syms]
[actions actions]
[src-pos src-pos])
#'(begin
check-syntax-fix
(parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos)))))))
(_
(raise-syntax-error #f
"parser must have the form (parser args ...)"
stx))))
(parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos)))))]
[_ (raise-syntax-error #f "parser must have the form (parser args ...)" stx)]))
(define (reduce-stack stack num ret-vals src-pos)
(cond
((> num 0)
(let* ((top-frame (car stack))
(ret-vals
(if src-pos
[(positive? num)
(define top-frame (car stack))
(let ([ret-vals (if src-pos
(cons (stack-frame-value top-frame)
(cons (stack-frame-start-pos top-frame)
(cons (stack-frame-end-pos top-frame)
ret-vals)))
(cons (stack-frame-value top-frame) ret-vals))))
(reduce-stack (cdr stack) (sub1 num) ret-vals src-pos)))
(else (values stack ret-vals))))
(cons (stack-frame-value top-frame) ret-vals))])
(reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))]
[else (values stack ret-vals)]))
;; extract-helper : (symbol or make-token) any any -> symbol any any any
(define (extract-helper tok v1 v2)
(cond
((symbol? tok)
(values tok #f v1 v2))
((token? tok)
(values (real-token-name tok) (real-token-value tok) v1 v2))
(else (raise-argument-error 'parser
"(or/c symbol? token?)"
0
tok))))
[(symbol? tok) (values tok #f v1 v2)]
[(token? tok) (values (real-token-name tok) (real-token-value tok) v1 v2)]
[else (raise-argument-error 'parser "(or/c symbol? token?)" 0 tok)]))
;; well-formed-position-token?: any -> boolean
;; Returns true if pt is a position token whose position-token-token
@ -236,8 +174,7 @@
;; a tokenizer produces an erroneous position-token wrapped twice.
;; (as often happens when omitting return-without-pos).
(define (well-formed-token-field? t)
(or (symbol? t)
(token? t)))
(or (symbol? t) (token? t)))
(define (well-formed-position-token? pt)
(and (position-token? pt)
@ -250,24 +187,18 @@
;; extract-src-pos : position-token -> symbol any any any
(define (extract-src-pos ip)
(unless (well-formed-position-token? ip)
(raise-argument-error 'parser
"well-formed-position-token?"
0
ip))
(raise-argument-error 'parser "well-formed-position-token?" 0 ip))
(extract-helper (position-token-token ip)
(position-token-start-pos ip)
(position-token-end-pos ip)))
(define (extract-srcloc ip)
(unless (well-formed-srcloc-token? ip)
(raise-argument-error 'parser
"well-formed-srcloc-token?"
0
ip))
(let ([loc (srcloc-token-srcloc ip)])
(raise-argument-error 'parser "well-formed-srcloc-token?" 0 ip))
(define loc (srcloc-token-srcloc ip))
(extract-helper (srcloc-token-token ip)
(position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc))
(position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f))))
(position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f)))
;; extract-no-src-pos : (symbol or make-token) -> symbol any any any
@ -295,24 +226,24 @@
(if (memq tok ends)
(raise-read-error "parser: Cannot continue after error"
#f #f #f #f #f)
(let ((a (find-action stack tok val start-pos end-pos)))
(let ([a (find-action stack tok val start-pos end-pos)])
(cond
((runtime-shift? a)
[(runtime-shift? a)
;; (printf "shift:~a\n" (runtime-shift-state a))
(cons (make-stack-frame (runtime-shift-state a)
val
start-pos
end-pos)
stack))
(else
stack)]
[else
;; (printf "discard input:~a\n" tok)
(let-values (((tok val start-pos end-pos)
(extract (get-token))))
(remove-input tok val start-pos end-pos))))))))
(let-values ([(tok val start-pos end-pos)
(extract (get-token))])
(remove-input tok val start-pos end-pos))])))))
(let remove-states ()
(let ((a (find-action stack 'error #f start-pos end-pos)))
(let ([a (find-action stack 'error #f start-pos end-pos)])
(cond
((runtime-shift? a)
[(runtime-shift? a)
;; (printf "shift:~a\n" (runtime-shift-state a))
(set! stack
(cons
@ -321,61 +252,55 @@
start-pos
end-pos)
stack))
(remove-input tok val start-pos end-pos))
(else
(remove-input tok val start-pos end-pos)]
[else
;; (printf "discard state:~a\n" (car stack))
(cond
((< (length stack) 2)
[(< (length stack) 2)
(raise-read-error "parser: Cannot continue after error"
#f #f #f #f #f))
(else
#f #f #f #f #f)]
[else
(set! stack (cdr stack))
(remove-states)))))))))
(remove-states)])])))))
(define (find-action stack tok val start-pos end-pos)
(unless (hash-ref all-term-syms
tok
#f)
(unless (hash-ref all-term-syms tok #f)
(if src-pos
(err #f tok val start-pos end-pos)
(err #f tok val))
(raise-read-error (format "parser: got token of unknown type ~a" tok)
#f #f #f #f #f))
(hash-ref (vector-ref table (stack-frame-state (car stack)))
tok
#f))
(hash-ref (vector-ref table (stack-frame-state (car stack))) tok #f))
(define (make-parser start-number)
(lambda (get-token)
(define ((make-parser start-number) get-token)
(unless (and (procedure? get-token)
(procedure-arity-includes? get-token 0))
(error 'get-token "expected a nullary procedure, got ~e" get-token))
(let parsing-loop ((stack (make-empty-stack start-number))
(ip (get-token)))
(let-values (((tok val start-pos end-pos)
(extract ip)))
(let ((action (find-action stack tok val start-pos end-pos)))
(let parsing-loop ([stack (make-empty-stack start-number)]
[ip (get-token)])
(let-values ([(tok val start-pos end-pos) (extract ip)])
(let ([action (find-action stack tok val start-pos end-pos)])
(cond
((runtime-shift? action)
[(runtime-shift? action)
;; (printf "shift:~a\n" (runtime-shift-state action))
(parsing-loop (cons (make-stack-frame (runtime-shift-state action)
val
start-pos
end-pos)
stack)
(get-token)))
((runtime-reduce? action)
(get-token))]
[(runtime-reduce? action)
;; (printf "reduce:~a\n" (runtime-reduce-prod-num action))
(let-values (((new-stack args)
(let-values ([(new-stack args)
(reduce-stack stack
(runtime-reduce-rhs-length action)
null
src-pos)))
(let ((goto
src-pos)])
(let ([goto
(runtime-goto-state
(hash-ref
(vector-ref table (stack-frame-state (car new-stack)))
(runtime-reduce-lhs action)))))
(runtime-reduce-lhs action)))])
(parsing-loop
(cons
(if src-pos
@ -392,21 +317,18 @@
#f
#f))
new-stack)
ip))))
((runtime-accept? action)
ip)))]
[(runtime-accept? action)
;; (printf "accept\n")
(stack-frame-value (car stack)))
(else
(stack-frame-value (car stack))]
[else
(if src-pos
(err #t tok val start-pos end-pos)
(err #t tok val))
(parsing-loop (fix-error stack tok val start-pos end-pos get-token)
(get-token))))))))))
(cond
((null? (cdr starts)) (make-parser 0))
(else
(let loop ((l starts)
(i 0))
(get-token))]))))))
(cond
((null? l) null)
(else (cons (make-parser i) (loop (cdr l) (add1 i))))))))))
[(null? (cdr starts)) (make-parser 0)]
[else
(for/list ([(l i) (in-indexed starts)])
(make-parser i))])))

@ -7,5 +7,3 @@
(define build-deps '("rackunit-lib"))
(define pkg-desc "implementation (no documentation) part of \"br-parser-tools\"")
(define pkg-authors '(mflatt))

Loading…
Cancel
Save