refactor into racket/base

pull/5/head
Matthew Butterick 7 years ago
parent fc1e00bc2a
commit fd446e6013

@ -47,18 +47,16 @@
(define-struct tasks (active active-back waits multi-waits cache progress?)) (define-struct tasks (active active-back waits multi-waits cache progress?))
(define-for-syntax make-token-identifier-mapping make-hasheq) (define-for-syntax make-token-identifier-mapping make-hasheq)
(define-for-syntax token-identifier-mapping-get (define-for-syntax (token-identifier-mapping-get t tok [fail #f])
(case-lambda (if fail
[(t tok) (hash-ref t (syntax-e tok) fail)
(hash-ref t (syntax-e tok))] (hash-ref t (syntax-e tok))))
[(t tok fail)
(hash-ref t (syntax-e tok) fail)])) (define-for-syntax (token-identifier-mapping-put! t tok v)
(define-for-syntax token-identifier-mapping-put! (hash-set! t (syntax-e tok) v))
(lambda (t tok v)
(hash-set! t (syntax-e tok) v))) (define-for-syntax (token-identifier-mapping-map t f)
(define-for-syntax token-identifier-mapping-map (hash-map t f))
(lambda (t f)
(hash-map t f)))
;; Used to calculate information on the grammar, such as whether ;; Used to calculate information on the grammar, such as whether
;; a particular non-terminal is "simple" instead of recursively defined. ;; a particular non-terminal is "simple" instead of recursively defined.
@ -71,7 +69,7 @@
(cdr as) (cdr bs))])) (cdr as) (cdr bs))]))
(let loop () (let loop ()
(when (ormap-all #f (when (ormap-all #f
(lambda (nt pats) (λ (nt pats)
(let ([old (bound-identifier-mapping-get nts nt)]) (let ([old (bound-identifier-mapping-get nts nt)])
(let ([new (proc nt pats old)]) (let ([new (proc nt pats old)])
(if (equal? old new) (if (equal? old new)
@ -88,182 +86,153 @@
(define (parse-and simple-a? parse-a parse-b (define (parse-and simple-a? parse-a parse-b
stream last-consumed-token depth end success-k fail-k stream last-consumed-token depth end success-k fail-k
max-depth tasks) max-depth tasks)
(letrec ([mk-got-k (define ((mk-got-k success-k fail-k) val stream last-consumed-token depth max-depth tasks next1-k)
(lambda (success-k fail-k) (if simple-a?
(lambda (val stream last-consumed-token depth max-depth tasks next1-k) (parse-b val stream last-consumed-token depth end
(if simple-a? (mk-got2-k success-k fail-k next1-k)
(parse-b val stream last-consumed-token depth end (mk-fail2-k success-k fail-k next1-k)
(mk-got2-k success-k fail-k next1-k) max-depth tasks)
(mk-fail2-k success-k fail-k next1-k) (parallel-or
max-depth tasks) (λ (success-k fail-k max-depth tasks)
(parallel-or (parse-b val stream last-consumed-token depth end
(lambda (success-k fail-k max-depth tasks) success-k fail-k
(parse-b val stream last-consumed-token depth end max-depth tasks))
success-k fail-k (λ (success-k fail-k max-depth tasks)
max-depth tasks)) (next1-k (mk-got-k success-k fail-k)
(lambda (success-k fail-k max-depth tasks) fail-k max-depth tasks))
(next1-k (mk-got-k success-k fail-k) success-k fail-k max-depth tasks)))
fail-k max-depth tasks))
success-k fail-k max-depth tasks))))] (define ((mk-got2-k success-k fail-k next1-k) val stream last-consumed-token depth max-depth tasks next-k)
[mk-got2-k (success-k val stream last-consumed-token depth max-depth tasks
(lambda (success-k fail-k next1-k) (λ (success-k fail-k max-depth tasks)
(lambda (val stream last-consumed-token depth max-depth tasks next-k) (next-k (mk-got2-k success-k fail-k next1-k)
(success-k val stream last-consumed-token depth max-depth tasks (mk-fail2-k success-k fail-k next1-k)
(lambda (success-k fail-k max-depth tasks) max-depth tasks))))
(next-k (mk-got2-k success-k fail-k next1-k)
(mk-fail2-k success-k fail-k next1-k) (define ((mk-fail2-k success-k fail-k next1-k) max-depth tasks)
max-depth tasks)))))] (next1-k (mk-got-k success-k fail-k) fail-k max-depth tasks))
[mk-fail2-k
(lambda (success-k fail-k next1-k) (parse-a stream last-consumed-token depth end
(lambda (max-depth tasks) (mk-got-k success-k fail-k)
(next1-k (mk-got-k success-k fail-k) fail-k
fail-k max-depth tasks))
max-depth
tasks)))])
(parse-a stream last-consumed-token depth end
(mk-got-k success-k fail-k)
fail-k
max-depth tasks)))
;; Parallel or for non-terminal alternatives ;; Parallel or for non-terminal alternatives
(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) (define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)
(parallel-or (lambda (success-k fail-k max-depth tasks) (parallel-or (λ (success-k fail-k max-depth tasks)
(parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks)) (parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks))
(lambda (success-k fail-k max-depth tasks) (λ (success-k fail-k max-depth tasks)
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)) (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks))
success-k fail-k max-depth tasks)) success-k fail-k max-depth tasks))
;; Generic parallel-or ;; Generic parallel-or
(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) (define (parallel-or parse-a parse-b success-k fail-k max-depth tasks)
(define answer-key (gensym)) (define answer-key (gensym))
(letrec ([gota-k (define (gota-k val stream last-consumed-token depth max-depth tasks next-k)
(lambda (val stream last-consumed-token depth max-depth tasks next-k) (report-answer answer-key
(report-answer answer-key max-depth
max-depth tasks
tasks (list val stream last-consumed-token depth next-k)))
(list val stream last-consumed-token depth next-k)))] (define (faila-k max-depth tasks)
[faila-k (report-answer answer-key
(lambda (max-depth tasks) max-depth
(report-answer answer-key
max-depth
tasks
null))])
(let* ([tasks (queue-task
tasks tasks
(lambda (max-depth tasks) null))
(parse-a gota-k (let* ([tasks (queue-task tasks (λ (max-depth tasks)
faila-k (parse-a gota-k faila-k max-depth tasks)))]
max-depth tasks)))] [tasks (queue-task tasks (λ (max-depth tasks)
[tasks (queue-task (parse-b gota-k faila-k max-depth tasks)))]
[queue-next (λ (next-k tasks)
(queue-task tasks (λ (max-depth tasks)
(next-k gota-k faila-k max-depth tasks))))])
(define ((mk-got-one immediate-next? get-nth success-k) val stream last-consumed-token depth max-depth tasks next-k)
(let ([tasks (if immediate-next?
(queue-next next-k tasks)
tasks)])
(success-k val stream last-consumed-token depth max-depth
tasks tasks
(lambda (max-depth tasks) (λ (success-k fail-k max-depth tasks)
(parse-b gota-k (let ([tasks (if immediate-next?
faila-k tasks
max-depth tasks)))] (queue-next next-k tasks))])
[queue-next (lambda (next-k tasks) (get-nth max-depth tasks success-k fail-k))))))
(queue-task tasks (define (get-first max-depth tasks success-k fail-k)
(lambda (max-depth tasks) (wait-for-answer #f max-depth tasks answer-key
(next-k gota-k (mk-got-one #t get-first success-k)
faila-k (λ (max-depth tasks)
max-depth tasks))))]) (get-second max-depth tasks success-k fail-k))
(letrec ([mk-got-one #f))
(lambda (immediate-next? get-nth success-k) (define (get-second max-depth tasks success-k fail-k)
(lambda (val stream last-consumed-token depth max-depth tasks next-k) (wait-for-answer #f max-depth tasks answer-key
(let ([tasks (if immediate-next? (mk-got-one #f get-second success-k)
(queue-next next-k tasks) fail-k #f))
tasks)]) (get-first max-depth tasks success-k fail-k)))
(success-k val stream last-consumed-token depth max-depth
tasks
(lambda (success-k fail-k max-depth tasks)
(let ([tasks (if immediate-next?
tasks
(queue-next next-k tasks))])
(get-nth max-depth tasks success-k fail-k)))))))]
[get-first
(lambda (max-depth tasks success-k fail-k)
(wait-for-answer #f max-depth tasks answer-key
(mk-got-one #t get-first success-k)
(lambda (max-depth tasks)
(get-second max-depth tasks success-k fail-k))
#f))]
[get-second
(lambda (max-depth tasks success-k fail-k)
(wait-for-answer #f max-depth tasks answer-key
(mk-got-one #f get-second success-k)
fail-k #f))])
(get-first max-depth tasks success-k fail-k)))))
;; Non-terminal alternatives where the first is "simple" can be done ;; Non-terminal alternatives where the first is "simple" can be done
;; sequentially, which is simpler ;; sequentially, which is simpler
(define (parse-or parse-a parse-b (define (parse-or parse-a parse-b
stream last-consumed-token depth end success-k fail-k max-depth tasks) stream last-consumed-token depth end success-k fail-k max-depth tasks)
(letrec ([mk-got-k (define ((mk-got-k success-k fail-k) val stream last-consumed-token depth max-depth tasks next-k)
(lambda (success-k fail-k) (success-k val stream last-consumed-token depth
(lambda (val stream last-consumed-token depth max-depth tasks next-k) max-depth tasks
(success-k val stream last-consumed-token depth (λ (success-k fail-k max-depth tasks)
max-depth tasks (next-k (mk-got-k success-k fail-k)
(lambda (success-k fail-k max-depth tasks) (mk-fail-k success-k fail-k)
(next-k (mk-got-k success-k fail-k) max-depth tasks))))
(mk-fail-k success-k fail-k) (define ((mk-fail-k success-k fail-k) max-depth tasks)
max-depth tasks)))))] (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks))
[mk-fail-k (parse-a stream last-consumed-token depth end
(lambda (success-k fail-k) (mk-got-k success-k fail-k)
(lambda (max-depth tasks) (mk-fail-k success-k fail-k)
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))]) max-depth tasks))
(parse-a stream last-consumed-token depth end
(mk-got-k success-k fail-k)
(mk-fail-k success-k fail-k)
max-depth tasks)))
;; Starts a thread ;; Starts a thread
(define queue-task (define (queue-task tasks t [progress? #t])
(lambda (tasks t [progress? #t]) (make-tasks (tasks-active tasks)
(make-tasks (tasks-active tasks) (cons t (tasks-active-back tasks))
(cons t (tasks-active-back tasks)) (tasks-waits tasks)
(tasks-waits tasks) (tasks-multi-waits tasks)
(tasks-multi-waits tasks) (tasks-cache tasks)
(tasks-cache tasks) (or progress? (tasks-progress? tasks))))
(or progress? (tasks-progress? tasks)))))
;; Reports an answer to a waiting thread: ;; Reports an answer to a waiting thread:
(define (report-answer answer-key max-depth tasks val) (define (report-answer answer-key max-depth tasks val)
(let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))]) (define v (hash-ref (tasks-waits tasks) answer-key (λ () #f)))
(if v (if v
(let ([tasks (make-tasks (cons (v val) (let ([tasks (make-tasks (cons (v val) (tasks-active tasks))
(tasks-active tasks)) (tasks-active-back tasks)
(tasks-active-back tasks) (tasks-waits tasks)
(tasks-waits tasks) (tasks-multi-waits tasks)
(tasks-multi-waits tasks) (tasks-cache tasks)
(tasks-cache tasks) #t)])
#t)]) (hash-remove! (tasks-waits tasks) answer-key)
(hash-remove! (tasks-waits tasks) answer-key) (swap-task max-depth tasks))
(swap-task max-depth tasks)) ;; We have an answer ready too fast; wait
;; We have an answer ready too fast; wait (swap-task max-depth
(swap-task max-depth (queue-task tasks
(queue-task tasks (λ (max-depth tasks)
(lambda (max-depth tasks) (report-answer answer-key max-depth tasks val))
(report-answer answer-key max-depth tasks val)) #f))))
#f)))))
;; Reports an answer to multiple waiting threads: ;; Reports an answer to multiple waiting threads:
(define (report-answer-all answer-key max-depth tasks val k) (define (report-answer-all answer-key max-depth tasks val k)
(let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))]) (define v (hash-ref (tasks-multi-waits tasks) answer-key (λ () null)))
(hash-remove! (tasks-multi-waits tasks) answer-key) (hash-remove! (tasks-multi-waits tasks) answer-key)
(let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) (let ([tasks (make-tasks (append (map (λ (a) (a val)) v)
(tasks-active tasks)) (tasks-active tasks))
(tasks-active-back tasks) (tasks-active-back tasks)
(tasks-waits tasks) (tasks-waits tasks)
(tasks-multi-waits tasks) (tasks-multi-waits tasks)
(tasks-cache tasks) (tasks-cache tasks)
#t)]) #t)])
(k max-depth tasks)))) (k max-depth tasks)))
;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise ;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise
;; there might be many. Use wither #t or #f (and `report-answer' or ;; there might be many. Use wither #t or #f (and `report-answer' or
;; `report-answer-all', resptively) consistently for a particular answer key. ;; `report-answer-all', resptively) consistently for a particular answer key.
(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) (define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k)
(let ([wait (lambda (val) (let ([wait (λ (val)
(lambda (max-depth tasks) (λ (max-depth tasks)
(if val (if val
(if (null? val) (if (null? val)
(fail-k max-depth tasks) (fail-k max-depth tasks)
@ -273,7 +242,7 @@
(if multi? (if multi?
(hash-set! (tasks-multi-waits tasks) answer-key (hash-set! (tasks-multi-waits tasks) answer-key
(cons wait (hash-ref (tasks-multi-waits tasks) answer-key (cons wait (hash-ref (tasks-multi-waits tasks) answer-key
(lambda () null)))) (λ () null))))
(hash-set! (tasks-waits tasks) answer-key wait)) (hash-set! (tasks-waits tasks) answer-key wait))
(let ([tasks (make-tasks (tasks-active tasks) (let ([tasks (make-tasks (tasks-active tasks)
(tasks-active-back tasks) (tasks-active-back tasks)
@ -302,8 +271,8 @@
(make-tasks (apply (make-tasks (apply
append append
(hash-map (tasks-multi-waits tasks) (hash-map (tasks-multi-waits tasks)
(lambda (k l) (λ (k l)
(map (lambda (v) (v #f)) l)))) (map (λ (v) (v #f)) l))))
(tasks-active-back tasks) (tasks-active-back tasks)
(tasks-waits tasks) (tasks-waits tasks)
(make-hasheq) (make-hasheq)
@ -325,11 +294,9 @@
(define no-pos-val (make-position #f #f #f)) (define no-pos-val (make-position #f #f #f))
(define-for-syntax no-pos (define-for-syntax no-pos
(let ([npv ((syntax-local-certifier) #'no-pos-val)]) (let ([npv ((syntax-local-certifier) #'no-pos-val)])
(lambda (stx) npv))) (λ (stx) npv)))
(define-for-syntax at-tok-pos (define-for-syntax ((at-tok-pos sel expr) stx)
(lambda (sel expr) #`(let ([v #,expr]) (if v (#,sel v) no-pos-val)))
(lambda (stx)
#`(let ([v #,expr]) (if v (#,sel v) no-pos-val)))))
;; Builds a matcher for a particular alternative ;; Builds a matcher for a particular alternative
(define-for-syntax (build-match nts toks pat handle $ctx) (define-for-syntax (build-match nts toks pat handle $ctx)
@ -337,27 +304,23 @@
[pos 1]) [pos 1])
(if (null? pat) (if (null? pat)
#`(success-k #,handle stream last-consumed-token depth max-depth tasks #`(success-k #,handle stream last-consumed-token depth max-depth tasks
(lambda (success-k fail-k max-depth tasks) (λ (success-k fail-k max-depth tasks)
(fail-k max-depth tasks))) (fail-k max-depth tasks)))
(let ([id (datum->syntax (car pat) (let ([id (datum->syntax (car pat) (string->symbol (format "$~a" pos)))]
(string->symbol (format "$~a" pos)))] [id-start-pos (datum->syntax (car pat) (string->symbol (format "$~a-start-pos" pos)))]
[id-start-pos (datum->syntax (car pat) [id-end-pos (datum->syntax (car pat) (string->symbol (format "$~a-end-pos" pos)))]
(string->symbol (format "$~a-start-pos" pos)))] [n-end-pos (and (null? (cdr pat)) (datum->syntax (car pat) '$n-end-pos))])
[id-end-pos (datum->syntax (car pat)
(string->symbol (format "$~a-end-pos" pos)))]
[n-end-pos (and (null? (cdr pat))
(datum->syntax (car pat) '$n-end-pos))])
(cond (cond
[(bound-identifier-mapping-get nts (car pat) (lambda () #f)) [(bound-identifier-mapping-get nts (car pat) (λ () #f))
;; Match non-termimal ;; Match non-termimal
#`(parse-and #`(parse-and
;; First part is simple? (If so, we don't have to parallelize the `and'.) ;; First part is simple? (If so, we don't have to parallelize the `and'.)
#,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) #,(let ([l (bound-identifier-mapping-get nts (car pat) (λ () #f))])
(or (not l) (or (not l)
(andmap values (caddr l)))) (andmap values (caddr l))))
#,(car pat) #,(car pat)
(let ([original-stream stream]) (let ([original-stream stream])
(lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks) (λ (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks)
(let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream) (let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream)
tok-end tok-end
tok-start) tok-start)
@ -372,10 +335,10 @@
#,(loop (cdr pat) (add1 pos))))) #,(loop (cdr pat) (add1 pos)))))
stream last-consumed-token depth stream last-consumed-token depth
#,(let ([cnt (apply + #,(let ([cnt (apply +
(map (lambda (item) (map (λ (item)
(cond (cond
[(bound-identifier-mapping-get nts item (lambda () #f)) [(bound-identifier-mapping-get nts item (λ () #f))
=> (lambda (l) (car l))] => (λ (l) (car l))]
[else 1])) [else 1]))
(cdr pat)))]) (cdr pat)))])
#`(- end #,cnt)) #`(- end #,cnt))
@ -419,75 +382,73 @@
[max-depth max-depth] [max-depth max-depth]
[tasks tasks] [tasks tasks]
[k k]) [k k])
(let ([answer-key (gensym)] (define answer-key (gensym))
[table-key (vector key depth n)] (define table-key (vector key depth n))
[old-depth depth] (define old-depth depth)
[old-stream stream]) (define old-stream stream)
#;(printf "Loop ~a\n" table-key) #;(printf "Loop ~a\n" table-key)
(cond (cond
[(hash-ref (tasks-cache tasks) table-key (lambda () #f)) [(hash-ref (tasks-cache tasks) table-key (λ () #f))
=> (lambda (result) => (λ (result)
#;(printf "Reuse ~a\n" table-key) #;(printf "Reuse ~a\n" table-key)
(result success-k fail-k max-depth tasks))] (result success-k fail-k max-depth tasks))]
[else [else
#;(printf "Try ~a ~a\n" table-key (map tok-name stream)) #;(printf "Try ~a ~a\n" table-key (map tok-name stream))
(hash-set! (tasks-cache tasks) table-key (hash-set! (tasks-cache tasks) table-key
(lambda (success-k fail-k max-depth tasks) (λ (success-k fail-k max-depth tasks)
#;(printf "Wait ~a ~a\n" table-key answer-key) #;(printf "Wait ~a ~a\n" table-key answer-key)
(wait-for-answer #t max-depth tasks answer-key success-k fail-k (wait-for-answer #t max-depth tasks answer-key success-k fail-k
(lambda (max-depth tasks) (λ (max-depth tasks)
#;(printf "Deadlock ~a ~a\n" table-key answer-key) #;(printf "Deadlock ~a ~a\n" table-key answer-key)
(fail-k max-depth tasks))))) (fail-k max-depth tasks)))))
(let result-loop ([max-depth max-depth][tasks tasks][k k]) (let result-loop ([max-depth max-depth][tasks tasks][k k])
(letrec ([orig-stream stream] (define orig-stream stream)
[new-got-k (define (new-got-k val stream last-consumed-token depth max-depth tasks next-k)
(lambda (val stream last-consumed-token depth max-depth tasks next-k) ;; Check whether we already have a result that consumed the same amount:
;; Check whether we already have a result that consumed the same amount: (define result-key (vector #f key old-depth depth))
(let ([result-key (vector #f key old-depth depth)]) (cond
(cond [(hash-ref (tasks-cache tasks) result-key (λ () #f))
[(hash-ref (tasks-cache tasks) result-key (lambda () #f)) ;; Go for the next-result
;; Go for the next-result (result-loop max-depth
(result-loop max-depth tasks
tasks (λ (end max-depth tasks success-k fail-k)
(lambda (end max-depth tasks success-k fail-k) (next-k success-k fail-k max-depth tasks)))]
(next-k success-k fail-k max-depth tasks)))] [else
[else #;(printf "Success ~a ~a\n" table-key
#;(printf "Success ~a ~a\n" table-key (map tok-name (let loop ([d old-depth][s old-stream])
(map tok-name (let loop ([d old-depth][s old-stream]) (if (= d depth)
(if (= d depth) null
null (cons (car s) (loop (add1 d) (cdr s)))))))
(cons (car s) (loop (add1 d) (cdr s))))))) (let ([next-k (λ (success-k fail-k max-depth tasks)
(let ([next-k (lambda (success-k fail-k max-depth tasks) (loop (add1 n)
(loop (add1 n) success-k
success-k fail-k
fail-k max-depth
max-depth tasks
tasks (λ (end max-depth tasks success-k fail-k)
(lambda (end max-depth tasks success-k fail-k) (next-k success-k fail-k max-depth tasks))))])
(next-k success-k fail-k max-depth tasks))))]) (hash-set! (tasks-cache tasks) result-key #t)
(hash-set! (tasks-cache tasks) result-key #t) (hash-set! (tasks-cache tasks) table-key
(hash-set! (tasks-cache tasks) table-key (λ (success-k fail-k max-depth tasks)
(lambda (success-k fail-k max-depth tasks) (success-k val stream last-consumed-token depth max-depth tasks next-k)))
(success-k val stream last-consumed-token depth max-depth tasks next-k))) (report-answer-all answer-key
(report-answer-all answer-key max-depth
max-depth tasks
tasks (list val stream last-consumed-token depth next-k)
(list val stream last-consumed-token depth next-k) (λ (max-depth tasks)
(lambda (max-depth tasks) (success-k val stream last-consumed-token depth max-depth tasks next-k))))]))
(success-k val stream last-consumed-token depth max-depth tasks next-k))))])))] (define (new-fail-k max-depth tasks)
[new-fail-k #;(printf "Failure ~a\n" table-key)
(lambda (max-depth tasks) (hash-set! (tasks-cache tasks) table-key
#;(printf "Failure ~a\n" table-key) (λ (success-k fail-k max-depth tasks)
(hash-set! (tasks-cache tasks) table-key (fail-k max-depth tasks)))
(lambda (success-k fail-k max-depth tasks) (report-answer-all answer-key
(fail-k max-depth tasks))) max-depth
(report-answer-all answer-key tasks
max-depth null
tasks (λ (max-depth tasks)
null (fail-k max-depth tasks))))
(lambda (max-depth tasks) (k end max-depth tasks new-got-k new-fail-k))]))))
(fail-k max-depth tasks))))])
(k end max-depth tasks new-got-k new-fail-k)))])))))
;; These temp identifiers can't be `gensym` or `generate-temporary` ;; These temp identifiers can't be `gensym` or `generate-temporary`
;; because they have to be consistent between module loads ;; because they have to be consistent between module loads
@ -497,39 +458,34 @@
(define-for-syntax atok-id-temp 'atok_wrutdjgecmybyfipiwsgjlvsveryodlgassuzcargiuznzgdghrykfqfbwcjgzdhdoeqxcucmtjkuyucskzethozhqkasphdwbht) (define-for-syntax atok-id-temp 'atok_wrutdjgecmybyfipiwsgjlvsveryodlgassuzcargiuznzgdghrykfqfbwcjgzdhdoeqxcucmtjkuyucskzethozhqkasphdwbht)
(define-syntax (cfg-parser stx) (define-syntax (cfg-parser stx)
(syntax-case stx () (syntax-case stx ()
[(_ clause ...) [(_ CLAUSE ...)
(let ([clauses (syntax->list #'(clause ...))]) (let ([clauses (syntax->list #'(CLAUSE ...))])
(let-values ([(start grammar cfg-error parser-clauses src-pos?) (let-values ([(start grammar cfg-error parser-clauses src-pos?)
(let ([all-toks (apply (let ([all-toks (apply
append append
(map (lambda (clause) (for/list ([clause (in-list clauses)])
(syntax-case clause (tokens) (syntax-case clause (tokens)
[(tokens t ...) [(tokens T ...)
(apply (apply
append append
(map (lambda (t) (for/list ([t (in-list (syntax->list #'(T ...)))])
(let ([v (syntax-local-value t (lambda () #f))]) (define v (syntax-local-value t (λ () #f)))
(cond (cond
[(terminals-def? v) [(terminals-def? v)
(map (lambda (v) (for/list ([v (in-list (syntax->list (terminals-def-t v)))])
(cons v #f)) (cons v #f))]
(syntax->list (terminals-def-t v)))] [(e-terminals-def? v)
[(e-terminals-def? v) (for/list ([v (in-list (syntax->list (e-terminals-def-t v)))])
(map (lambda (v) (cons v #t))]
(cons v #t)) [else null])))]
(syntax->list (e-terminals-def-t v)))] [_else null])))]
[else null])))
(syntax->list #'(t ...))))]
[_else null]))
clauses))]
[all-end-toks (apply [all-end-toks (apply
append append
(map (lambda (clause) (for/list ([clause (in-list clauses)])
(syntax-case clause (end) (syntax-case clause (end)
[(end t ...) [(end T ...)
(syntax->list #'(t ...))] (syntax->list #'(T ...))]
[_else null])) [_else null])))])
clauses))])
(let loop ([clauses clauses] (let loop ([clauses clauses]
[cfg-start #f] [cfg-start #f]
[cfg-grammar #f] [cfg-grammar #f]
@ -543,47 +499,35 @@
(reverse parser-clauses) (reverse parser-clauses)
src-pos?) src-pos?)
(syntax-case (car clauses) (start error grammar src-pos) (syntax-case (car clauses) (start error grammar src-pos)
[(start tok) [(start TOK)
(loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] (loop (cdr clauses) #'TOK cfg-grammar cfg-error src-pos? parser-clauses)]
[(error expr) [(error EXPR)
(loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] (loop (cdr clauses) cfg-start cfg-grammar #'EXPR src-pos? parser-clauses)]
[(grammar [nt [pat handle0 handle ...] ...] ...) [(grammar [NT [PAT HANDLE0 HANDLE ...] ...] ...)
(let ([nts (make-bound-identifier-mapping)] (let ([nts (make-bound-identifier-mapping)]
[toks (make-token-identifier-mapping)] [toks (make-token-identifier-mapping)]
[end-toks (make-token-identifier-mapping)] [end-toks (make-token-identifier-mapping)]
[nt-ids (syntax->list #'(nt ...))] [nt-ids (syntax->list #'(NT ...))]
[patss (map (lambda (stx) [patss (map (λ (stx)
(map syntax->list (syntax->list stx))) (map syntax->list (syntax->list stx)))
(syntax->list #'((pat ...) ...)))]) (syntax->list #'((PAT ...) ...)))])
(for-each (lambda (nt) (for ([nt (in-list nt-ids)])
(bound-identifier-mapping-put! nts nt (list 0))) (bound-identifier-mapping-put! nts nt (list 0)))
nt-ids) (for ([t (in-list all-end-toks)])
(for-each (lambda (t) (token-identifier-mapping-put! end-toks t #t))
(token-identifier-mapping-put! end-toks t #t)) (for ([t (in-list all-toks)]
all-end-toks) #:unless (token-identifier-mapping-get end-toks (car t) (λ () #f)))
(for-each (lambda (t) (define id (gensym (syntax-e (car t))))
(unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) (token-identifier-mapping-put! toks (car t) (cons id (cdr t))))
(let ([id (gensym (syntax-e (car t)))])
(token-identifier-mapping-put! toks (car t)
(cons id (cdr t))))))
all-toks)
;; Compute min max size for each non-term: ;; Compute min max size for each non-term:
(nt-fixpoint (nt-fixpoint
nts nts
(lambda (nt pats old-list) (λ (nt pats old-list)
(let ([new-cnt (let ([new-cnt
(apply (apply min (for/list ([pat (in-list pats)])
min (for/sum ([elem (in-list pat)])
(map (lambda (pat) (car (bound-identifier-mapping-get
(apply nts elem (λ () (list 1)))))))])
+
(map (lambda (elem)
(car
(bound-identifier-mapping-get nts
elem
(lambda () (list 1)))))
pat)))
pats))])
(if (new-cnt . > . (car old-list)) (if (new-cnt . > . (car old-list))
(cons new-cnt (cdr old-list)) (cons new-cnt (cdr old-list))
old-list))) old-list)))
@ -592,29 +536,28 @@
;; for a non-terminal ;; for a non-terminal
(nt-fixpoint (nt-fixpoint
nts nts
(lambda (nt pats old-list) (λ (nt pats old-list)
(let ([new-list (let ([new-list
(apply (apply
append append
(map (lambda (pat) (for/list ([pat (in-list pats)])
(let loop ([pat pat]) (let loop ([pat pat])
(if (pair? pat) (if (pair? pat)
(let ([l (bound-identifier-mapping-get (let ([l (bound-identifier-mapping-get
nts nts
(car pat) (car pat)
(lambda () (λ ()
(list 1 (map-token toks (car pat)))))]) (list 1 (map-token toks (car pat)))))])
;; If the non-terminal can match 0 things, ;; If the non-terminal can match 0 things,
;; then it might match something from the ;; then it might match something from the
;; next pattern element. Otherwise, it must ;; next pattern element. Otherwise, it must
;; match the first element: ;; match the first element:
(if (zero? (car l)) (if (zero? (car l))
(append (cdr l) (loop (cdr pat))) (append (cdr l) (loop (cdr pat)))
(cdr l))) (cdr l)))
null))) null))))])
pats))]) (let ([new (filter (λ (id)
(let ([new (filter (lambda (id) (andmap (λ (id2)
(andmap (lambda (id2)
(not (eq? id id2))) (not (eq? id id2)))
(cdr old-list))) (cdr old-list)))
new-list)]) new-list)])
@ -623,7 +566,7 @@
(let ([new (let loop ([new new]) (let ([new (let loop ([new new])
(if (null? (cdr new)) (if (null? (cdr new))
new new
(if (ormap (lambda (id) (if (ormap (λ (id)
(eq? (car new) id)) (eq? (car new) id))
(cdr new)) (cdr new))
(loop (cdr new)) (loop (cdr new))
@ -632,26 +575,26 @@
old-list)))) old-list))))
nt-ids patss) nt-ids patss)
;; Determine left-recursive clauses: ;; Determine left-recursive clauses:
(for-each (lambda (nt pats) (for-each (λ (nt pats)
(let ([l (bound-identifier-mapping-get nts nt)]) (let ([l (bound-identifier-mapping-get nts nt)])
(bound-identifier-mapping-put! nts nt (list (car l) (bound-identifier-mapping-put! nts nt (list (car l)
(cdr l) (cdr l)
(map (lambda (x) #f) pats))))) (map (λ (x) #f) pats)))))
nt-ids patss) nt-ids patss)
(nt-fixpoint (nt-fixpoint
nts nts
(lambda (nt pats old-list) (λ (nt pats old-list)
(list (car old-list) (list (car old-list)
(cadr old-list) (cadr old-list)
(map (lambda (pat simple?) (map (λ (pat simple?)
(or simple? (or simple?
(let ([l (map (lambda (elem) (let ([l (map (λ (elem)
(bound-identifier-mapping-get (bound-identifier-mapping-get
nts nts
elem elem
(lambda () #f))) (λ () #f)))
pat)]) pat)])
(andmap (lambda (i) (andmap (λ (i)
(or (not i) (or (not i)
(andmap values (caddr i)))) (andmap values (caddr i))))
l)))) l))))
@ -660,16 +603,16 @@
;; Build a definition for each non-term: ;; Build a definition for each non-term:
(loop (cdr clauses) (loop (cdr clauses)
cfg-start cfg-start
(map (lambda (nt pats handles $ctxs) (map (λ (nt pats handles $ctxs)
(define info (bound-identifier-mapping-get nts nt)) (define info (bound-identifier-mapping-get nts nt))
(list nt (list nt
#`(let ([key (gensym '#,nt)]) #`(let ([key (gensym '#,nt)])
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) (λ (stream last-consumed-token depth end success-k fail-k max-depth tasks)
(parse-nt/share (parse-nt/share
key #,(car info) '#,(cadr info) stream last-consumed-token depth end key #,(car info) '#,(cadr info) stream last-consumed-token depth end
max-depth tasks max-depth tasks
success-k fail-k success-k fail-k
(lambda (end max-depth tasks success-k fail-k) (λ (end max-depth tasks success-k fail-k)
#,(let loop ([pats pats] #,(let loop ([pats pats]
[handles (syntax->list handles)] [handles (syntax->list handles)]
[$ctxs (syntax->list $ctxs)] [$ctxs (syntax->list $ctxs)]
@ -680,13 +623,13 @@
(car simple?s)) (car simple?s))
#'parse-or #'parse-or
#'parse-parallel-or) #'parse-parallel-or)
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) (λ (stream last-consumed-token depth end success-k fail-k max-depth tasks)
#,(build-match nts #,(build-match nts
toks toks
(car pats) (car pats)
(car handles) (car handles)
(car $ctxs))) (car $ctxs)))
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) (λ (stream last-consumed-token depth end success-k fail-k max-depth tasks)
#,(loop (cdr pats) #,(loop (cdr pats)
(cdr handles) (cdr handles)
(cdr $ctxs) (cdr $ctxs)
@ -694,14 +637,14 @@
stream last-consumed-token depth end success-k fail-k max-depth tasks))))))))) stream last-consumed-token depth end success-k fail-k max-depth tasks)))))))))
nt-ids nt-ids
patss patss
(syntax->list #'(((begin handle0 handle ...) ...) ...)) (syntax->list #'(((begin HANDLE0 HANDLE ...) ...) ...))
(syntax->list #'((handle0 ...) ...))) (syntax->list #'((HANDLE0 ...) ...)))
cfg-error cfg-error
src-pos? src-pos?
(list* (list*
(with-syntax ([((tok tok-id . $e) ...) (with-syntax ([((tok tok-id . $e) ...)
(token-identifier-mapping-map toks (token-identifier-mapping-map toks
(lambda (k v) (λ (k v)
(list* k (list* k
(car v) (car v)
(if (cdr v) (if (cdr v)
@ -743,19 +686,19 @@
src-pos? src-pos?
(cons (car clauses) parser-clauses))]))))]) (cons (car clauses) parser-clauses))]))))])
#`(let ([orig-parse (parser #`(let ([orig-parse (parser
[error (lambda (a b c) [error (λ (a b c)
(error 'cfg-parser "unexpected ~a token: ~a" b c))] (error 'cfg-parser "unexpected ~a token: ~a" b c))]
. #,parser-clauses)] . #,parser-clauses)]
[error-proc #,cfg-error]) [error-proc #,cfg-error])
(letrec #,grammar (letrec #,grammar
(lambda (get-tok) (λ (get-tok)
(let ([tok-list (orig-parse get-tok)]) (let ([tok-list (orig-parse get-tok)])
(letrec ([success-k (letrec ([success-k
(lambda (val stream last-consumed-token depth max-depth tasks next) (λ (val stream last-consumed-token depth max-depth tasks next)
(if (null? stream) (if (null? stream)
val val
(next success-k fail-k max-depth tasks)))] (next success-k fail-k max-depth tasks)))]
[fail-k (lambda (max-depth tasks) [fail-k (λ (max-depth tasks)
(cond (cond
[(null? tok-list) [(null? tok-list)
(if error-proc (if error-proc
@ -847,7 +790,7 @@
(define (parse s) (define (parse s)
(define ip (open-input-string s)) (define ip (open-input-string s))
(port-count-lines! ip) (port-count-lines! ip)
(-parse (lambda () (lex ip)))) (-parse (λ () (lex ip))))
(check-equal? (parse "abc") (check-equal? (parse "abc")
'(unanchored (lit "abc" 1 4) 1 4)) '(unanchored (lit "abc" 1 4) 1 4))
@ -881,7 +824,7 @@
(tokens non-terminals) (tokens non-terminals)
(start <program>) (start <program>)
(end EOF) (end EOF)
(error (lambda (a b stx) (error (λ (a b stx)
(error 'parse "failed at ~s" stx))) (error 'parse "failed at ~s" stx)))
(grammar [<program> [(PLUS) "plus"] (grammar [<program> [(PLUS) "plus"]
[(<minus-program> BAR <minus-program>) (list $1 $2 $3)] [(<minus-program> BAR <minus-program>) (list $1 $2 $3)]
@ -903,7 +846,7 @@
-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****" -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****"
;; This one fails: ;; This one fails:
#;"+*")]) #;"+*")])
(check-equal? (parse (lambda () (lex p))) (check-equal? (parse (λ () (lex p)))
'((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) '((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|| ||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))

@ -1,4 +1,4 @@
#lang scheme #lang racket/base
;; An interactive calculator inspired by the calculator example in the bison manual. ;; An interactive calculator inspired by the calculator example in the bison manual.
@ -15,19 +15,19 @@
(define vars (make-hash)) (define vars (make-hash))
(define-lex-abbrevs (define-lex-abbrevs
(lower-letter (:/ "a" "z")) (lower-letter (:/ "a" "z"))
(upper-letter (:/ #\A #\Z)) (upper-letter (:/ #\A #\Z))
;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too. ;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too.
(digit (:/ "0" "9"))) (digit (:/ "0" "9")))
(define calcl (define calc-lex
(lexer (lexer
[(eof) 'EOF] [(eof) 'EOF]
;; recursively call the lexer on the remaining input after a tab or space. Returning the ;; recursively call the lexer on the remaining input after a tab or space. Returning the
;; result of that operation. This effectively skips all whitespace. ;; result of that operation. This effectively skips all whitespace.
[(:or #\tab #\space) (calcl input-port)] [(:or #\tab #\space) (calc-lex input-port)]
;; (token-newline) returns 'newline ;; (token-newline) returns 'newline
[#\newline (token-newline)] [#\newline (token-newline)]
;; Since (token-=) returns '=, just return the symbol directly ;; Since (token-=) returns '=, just return the symbol directly
@ -40,7 +40,7 @@
[(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))])) [(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))]))
(define calcp (define calc-parse
(parser (parser
(start start) (start start)
@ -78,12 +78,15 @@
;; run the calculator on the given input-port ;; run the calculator on the given input-port
(define (calc ip) (define (calc ip)
(port-count-lines! ip) (port-count-lines! ip)
(letrec ((one-line (let loop ()
(lambda () (define result (calc-parse (λ () (calc-lex ip))))
(let ((result (calcp (lambda () (calcl ip))))) (when result
(when result (printf "~a\n" result)
(printf "~a\n" result) (loop))))
(one-line))))))
(one-line))) (module+ test
(require rackunit)
(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3")) (check-equal? (let ([o (open-output-string)])
(parameterize ([current-output-port o])
(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3")))
(get-output-string o)) "1\n-2\n"))

@ -1,242 +1,240 @@
#lang racket/base
;; This implements the equivalent of racket's read-syntax for R5RS scheme. ;; This implements the equivalent of racket's read-syntax for R5RS scheme.
;; It has not been thoroughly tested. Also it will read an entire file into a ;; It has not been thoroughly tested. Also it will read an entire file into a
;; list of syntax objects, instead of returning one syntax object at a time ;; list of syntax objects, instead of returning one syntax object at a time
(module read mzscheme (require (for-syntax racket/base)
br-parser-tools/lex
(require br-parser-tools/lex (prefix-in : br-parser-tools/lex-sre)
(prefix : br-parser-tools/lex-sre) br-parser-tools/yacc
br-parser-tools/yacc syntax/readerr)
syntax/readerr)
(define-tokens data (DATUM))
(define-tokens data (DATUM)) (define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF))
(define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF))
(define scheme-lexer
(define scheme-lexer (lexer-src-pos
(lexer-src-pos
;; Skip comments, without accumulating extra position information
;; Skip comments, without accumulating extra position information [(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))]
[(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))]
["#t" (token-DATUM #t)]
["#t" (token-DATUM #t)] ["#f" (token-DATUM #f)]
["#f" (token-DATUM #f)] [(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))]
[(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))] ["#\\space" (token-DATUM #\space)]
["#\\space" (token-DATUM #\space)] ["#\\newline" (token-DATUM #\newline)]
["#\\newline" (token-DATUM #\newline)] [(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))]
[(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))] [#\" (token-DATUM (list->string (get-string-token input-port)))]
[#\" (token-DATUM (list->string (get-string-token input-port)))] [#\( 'OP]
[#\( 'OP] [#\) 'CP]
[#\) 'CP] [#\[ 'OP]
[#\[ 'OP] [#\] 'CP]
[#\] 'CP] ["#(" 'HASHOP]
["#(" 'HASHOP] [num2 (token-DATUM (string->number lexeme 2))]
[num2 (token-DATUM (string->number lexeme 2))] [num8 (token-DATUM (string->number lexeme 8))]
[num8 (token-DATUM (string->number lexeme 8))] [num10 (token-DATUM (string->number lexeme 10))]
[num10 (token-DATUM (string->number lexeme 10))] [num16 (token-DATUM (string->number lexeme 16))]
[num16 (token-DATUM (string->number lexeme 16))] ["'" 'QUOTE]
["'" 'QUOTE] ["`" 'QUASIQUOTE]
["`" 'QUASIQUOTE] ["," 'UNQUOTE]
["," 'UNQUOTE] [",@" 'UNQUOTE-SPLICING]
[",@" 'UNQUOTE-SPLICING] ["." 'DOT]
["." 'DOT] [(eof) 'EOF]))
[(eof) 'EOF]))
(define get-string-token
(define get-string-token (lexer
(lexer [(:~ #\" #\\) (cons (car (string->list lexeme))
[(:~ #\" #\\) (cons (car (string->list lexeme)) (get-string-token input-port))]
(get-string-token input-port))] [(:: #\\ #\\) (cons #\\ (get-string-token input-port))]
[(:: #\\ #\\) (cons #\\ (get-string-token input-port))] [(:: #\\ #\") (cons #\" (get-string-token input-port))]
[(:: #\\ #\") (cons #\" (get-string-token input-port))] [#\" null]))
[#\" null]))
(define-lex-abbrevs
(define-lex-abbrevs [letter (:or (:/ "a" "z") (:/ #\A #\Z))]
[letter (:or (:/ "a" "z") (:/ #\A #\Z))] [digit (:/ #\0 #\9)]
[digit (:/ #\0 #\9)] [scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)]
[scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)] [initial (:or letter (char-set "!$%&*/:<=>?^_~@"))]
[initial (:or letter (char-set "!$%&*/:<=>?^_~@"))] [subsequent (:or initial digit (char-set "+-.@"))]
[subsequent (:or initial digit (char-set "+-.@"))] [comment (:: #\; (:* (:~ #\newline)) #\newline)]
[comment (:: #\; (:* (:~ #\newline)) #\newline)]
;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of
;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of ;; using regexp macros to avoid the cut and paste.
;; using regexp macros to avoid the cut and paste. ; [numR (:: prefixR complexR)]
; [numR (:: prefixR complexR)] ; [complexR (:or realR
; [complexR (:or realR ; (:: realR "@" realR)
; (:: realR "@" realR) ; (:: realR "+" urealR "i")
; (:: realR "+" urealR "i") ; (:: realR "-" urealR "i")
; (:: realR "-" urealR "i") ; (:: realR "+i")
; (:: realR "+i") ; (:: realR "-i")
; (:: realR "-i") ; (:: "+" urealR "i")
; (:: "+" urealR "i") ; (:: "-" urealR "i")
; (:: "-" urealR "i") ; (:: "+i")
; (:: "+i") ; (:: "-i"))]
; (:: "-i"))] ; [realR (:: sign urealR)]
; [realR (:: sign urealR)] ; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)]
; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)] ; [uintegerR (:: (:+ digitR) (:* #\#))]
; [uintegerR (:: (:+ digitR) (:* #\#))] ; [prefixR (:or (:: radixR exactness)
; [prefixR (:or (:: radixR exactness) ; (:: exactness radixR))]
; (:: exactness radixR))]
[num2 (:: prefix2 complex2)]
[num2 (:: prefix2 complex2)] [complex2 (:or real2
[complex2 (:or real2 (:: real2 "@" real2)
(:: real2 "@" real2) (:: real2 "+" ureal2 "i")
(:: real2 "+" ureal2 "i") (:: real2 "-" ureal2 "i")
(:: real2 "-" ureal2 "i") (:: real2 "+i")
(:: real2 "+i") (:: real2 "-i")
(:: real2 "-i") (:: "+" ureal2 "i")
(:: "+" ureal2 "i") (:: "-" ureal2 "i")
(:: "-" ureal2 "i") (:: "+i")
(:: "-i"))]
[real2 (:: sign ureal2)]
[ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))]
[uinteger2 (:: (:+ digit2) (:* #\#))]
[prefix2 (:or (:: radix2 exactness)
(:: exactness radix2))]
[radix2 "#b"]
[digit2 (:or "0" "1")]
[num8 (:: prefix8 complex8)]
[complex8 (:or real8
(:: real8 "@" real8)
(:: real8 "+" ureal8 "i")
(:: real8 "-" ureal8 "i")
(:: real8 "+i")
(:: real8 "-i")
(:: "+" ureal8 "i")
(:: "-" ureal8 "i")
(:: "+i")
(:: "-i"))]
[real8 (:: sign ureal8)]
[ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))]
[uinteger8 (:: (:+ digit8) (:* #\#))]
[prefix8 (:or (:: radix8 exactness)
(:: exactness radix8))]
[radix8 "#o"]
[digit8 (:/ "0" "7")]
[num10 (:: prefix10 complex10)]
[complex10 (:or real10
(:: real10 "@" real10)
(:: real10 "+" ureal10 "i")
(:: real10 "-" ureal10 "i")
(:: real10 "+i")
(:: real10 "-i")
(:: "+" ureal10 "i")
(:: "-" ureal10 "i")
(:: "+i") (:: "+i")
(:: "-i"))] (:: "-i"))]
[real2 (:: sign ureal2)] [real10 (:: sign ureal10)]
[ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))] [ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)]
[uinteger2 (:: (:+ digit2) (:* #\#))] [uinteger10 (:: (:+ digit10) (:* #\#))]
[prefix2 (:or (:: radix2 exactness) [prefix10 (:or (:: radix10 exactness)
(:: exactness radix2))] (:: exactness radix10))]
[radix2 "#b"] [radix10 (:? "#d")]
[digit2 (:or "0" "1")] [digit10 digit]
[num8 (:: prefix8 complex8)] [decimal10 (:or (:: uinteger10 suffix)
[complex8 (:or real8 (:: #\. (:+ digit10) (:* #\#) suffix)
(:: real8 "@" real8) (:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix)
(:: real8 "+" ureal8 "i") (:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))]
(:: real8 "-" ureal8 "i")
(:: real8 "+i") [num16 (:: prefix16 complex16)]
(:: real8 "-i") [complex16 (:or real16
(:: "+" ureal8 "i") (:: real16 "@" real16)
(:: "-" ureal8 "i") (:: real16 "+" ureal16 "i")
(:: "+i") (:: real16 "-" ureal16 "i")
(:: "-i"))] (:: real16 "+i")
[real8 (:: sign ureal8)] (:: real16 "-i")
[ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))] (:: "+" ureal16 "i")
[uinteger8 (:: (:+ digit8) (:* #\#))] (:: "-" ureal16 "i")
[prefix8 (:or (:: radix8 exactness) "+i"
(:: exactness radix8))] "-i")]
[radix8 "#o"] [real16 (:: sign ureal16)]
[digit8 (:/ "0" "7")] [ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))]
[uinteger16 (:: (:+ digit16) (:* #\#))]
[num10 (:: prefix10 complex10)] [prefix16 (:or (:: radix16 exactness)
[complex10 (:or real10 (:: exactness radix16))]
(:: real10 "@" real10) [radix16 "#x"]
(:: real10 "+" ureal10 "i") [digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))]
(:: real10 "-" ureal10 "i")
(:: real10 "+i")
(:: real10 "-i") [suffix (:or "" (:: exponent-marker sign (:+ digit10)))]
(:: "+" ureal10 "i") [exponent-marker (:or "e" "s" "f" "d" "l")]
(:: "-" ureal10 "i") [sign (:or "" "+" "-")]
(:: "+i") [exactness (:or "" "#i" "#e")])
(:: "-i"))]
[real10 (:: sign ureal10)]
[ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)] (define stx-for-original-property (read-syntax #f (open-input-string "original")))
[uinteger10 (:: (:+ digit10) (:* #\#))]
[prefix10 (:or (:: radix10 exactness) ;; A macro to build the syntax object
(:: exactness radix10))] (define-syntax (build-so stx)
[radix10 (:? "#d")] (syntax-case stx ()
[digit10 digit] ((_ value start end)
[decimal10 (:or (:: uinteger10 suffix) (with-syntax ((start-pos (datum->syntax
(:: #\. (:+ digit10) (:* #\#) suffix) #'end
(:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix)
(:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))]
[num16 (:: prefix16 complex16)]
[complex16 (:or real16
(:: real16 "@" real16)
(:: real16 "+" ureal16 "i")
(:: real16 "-" ureal16 "i")
(:: real16 "+i")
(:: real16 "-i")
(:: "+" ureal16 "i")
(:: "-" ureal16 "i")
"+i"
"-i")]
[real16 (:: sign ureal16)]
[ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))]
[uinteger16 (:: (:+ digit16) (:* #\#))]
[prefix16 (:or (:: radix16 exactness)
(:: exactness radix16))]
[radix16 "#x"]
[digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))]
[suffix (:or "" (:: exponent-marker sign (:+ digit10)))]
[exponent-marker (:or "e" "s" "f" "d" "l")]
[sign (:or "" "+" "-")]
[exactness (:or "" "#i" "#e")])
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
;; A macro to build the syntax object
(define-syntax (build-so stx)
(syntax-case stx ()
((_ value start end)
(with-syntax ((start-pos (datum->syntax-object
(syntax end)
(string->symbol
(format "$~a-start-pos"
(syntax-object->datum (syntax start))))))
(end-pos (datum->syntax-object
(syntax end)
(string->symbol (string->symbol
(format "$~a-end-pos" (format "$~a-start-pos"
(syntax-object->datum (syntax end)))))) (syntax->datum #'start)))))
(source (datum->syntax-object (end-pos (datum->syntax
(syntax end) #'end
'source-name))) (string->symbol
(syntax (format "$~a-end-pos"
(datum->syntax-object (syntax->datum #'end)))))
#f (source (datum->syntax
value #'end
(list source 'source-name)))
(position-line start-pos) (syntax
(position-col start-pos) (datum->syntax
(position-offset start-pos) #f
(- (position-offset end-pos) value
(position-offset start-pos))) (list source
stx-for-original-property)))))) (position-line start-pos)
(position-col start-pos)
(define (scheme-parser source-name) (position-offset start-pos)
(parser (- (position-offset end-pos)
(src-pos) (position-offset start-pos)))
stx-for-original-property))))))
(start s)
(end EOF) (define (scheme-parser source-name)
(error (lambda (a name val start end) (parser
(raise-read-error (src-pos)
"read-error"
source-name (start s)
(position-line start) (end EOF)
(position-col start) (error (lambda (a name val start end)
(position-offset start) (raise-read-error
(- (position-offset end) "read-error"
(position-offset start))))) source-name
(tokens data delim) (position-line start)
(position-col start)
(position-offset start)
(grammar (- (position-offset end)
(position-offset start)))))
(s [(sexp-list) (reverse $1)]) (tokens data delim)
(sexp [(DATUM) (build-so $1 1 1)]
[(OP sexp-list CP) (build-so (reverse $2) 1 3)] (grammar
[(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)]
[(QUOTE sexp) (build-so (list 'quote $2) 1 2)] (s [(sexp-list) (reverse $1)])
[(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)]
[(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)] (sexp [(DATUM) (build-so $1 1 1)]
[(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)] [(OP sexp-list CP) (build-so (reverse $2) 1 3)]
[(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)]) [(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)]
[(QUOTE sexp) (build-so (list 'quote $2) 1 2)]
(sexp-list [() null] [(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)]
[(sexp-list sexp) (cons $2 $1)])))) [(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)]
[(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)]
(define (rs sn ip) [(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)])
(port-count-lines! ip)
((scheme-parser sn) (lambda () (scheme-lexer ip)))) (sexp-list [() null]
[(sexp-list sexp) (cons $2 $1)]))))
(define readsyntax
(case-lambda ((sn) (rs sn (current-input-port))) (define (rs sn ip)
((sn ip) (rs sn ip)))) (port-count-lines! ip)
((scheme-parser sn) (lambda () (scheme-lexer ip))))
(provide (rename readsyntax read-syntax))
(define readsyntax
) (case-lambda ((sn) (rs sn (current-input-port)))
((sn ip) (rs sn ip))))
(provide (rename-out [readsyntax read-syntax]))

@ -1,24 +1,23 @@
(module lex-plt-v200 mzscheme #lang racket/base
(require br-parser-tools/lex (require (for-syntax racket/base)
(prefix : br-parser-tools/lex-sre)) br-parser-tools/lex
(prefix-in : br-parser-tools/lex-sre))
(provide epsilon (provide epsilon ~
~ (rename-out [:* *]
(rename :* *) [:+ +]
(rename :+ +) [:? ?]
(rename :? ?) [:or :]
(rename :or :) [:& &]
(rename :& &) [:: @]
(rename :: @) [:~ ^]
(rename :~ ^) [:/ -]))
(rename :/ -))
(define-lex-trans epsilon (define-lex-trans (epsilon stx)
(syntax-rules () (syntax-case stx ()
((_) ""))) [(_) #'""]))
(define-lex-trans ~
(syntax-rules ()
((_ re) (complement re)))))
(define-lex-trans (~ stx)
(syntax-case stx ()
[(_ RE) #'(complement RE)]))

@ -1,119 +1,103 @@
(module lex-sre mzscheme #lang racket/base
(require br-parser-tools/lex) (require (for-syntax racket/base)
br-parser-tools/lex)
(provide (rename sre-* *)
(rename sre-+ +) (provide (rename-out [sre-* *]
? [sre-+ +]
(rename sre-= =) [sre-= =]
(rename sre->= >=) [sre->= >=]
** [sre-or or]
(rename sre-or or) [sre-- -]
: [sre-/ /])
seq ? ** : seq & ~ /-only-chars)
&
~ (define-lex-trans (sre-* stx)
(rename sre-- -) (syntax-case stx ()
(rename sre-/ /) [(_ RE ...)
/-only-chars) #'(repetition 0 +inf.0 (union RE ...))]))
(define-lex-trans sre-* (define-lex-trans (sre-+ stx)
(syntax-rules () (syntax-case stx ()
((_ re ...) [(_ RE ...)
(repetition 0 +inf.0 (union re ...))))) #'(repetition 1 +inf.0 (union RE ...))]))
(define-lex-trans sre-+ (define-lex-trans (? stx)
(syntax-rules () (syntax-case stx ()
((_ re ...) [(_ RE ...)
(repetition 1 +inf.0 (union re ...))))) #'(repetition 0 1 (union RE ...))]))
(define-lex-trans ? (define-lex-trans (sre-= stx)
(syntax-rules () (syntax-case stx ()
((_ re ...) [(_ N RE ...)
(repetition 0 1 (union re ...))))) #'(repetition N N (union RE ...))]))
(define-lex-trans sre-= (define-lex-trans (sre->= stx)
(syntax-rules () (syntax-case stx ()
((_ n re ...) [(_ N RE ...)
(repetition n n (union re ...))))) #'(repetition N +inf.0 (union RE ...))]))
(define-lex-trans sre->= (define-lex-trans (** stx)
(syntax-rules () (syntax-case stx ()
((_ n re ...) [(_ LOW #f RE ...)
(repetition n +inf.0 (union re ...))))) #'(** LOW +inf.0 RE ...)]
[(_ LOW HIGH RE ...)
(define-lex-trans ** #'(repetition LOW HIGH (union RE ...))]))
(syntax-rules ()
((_ low #f re ...) (define-lex-trans (sre-or stx)
(** low +inf.0 re ...)) (syntax-case stx ()
((_ low high re ...) [(_ RE ...)
(repetition low high (union re ...))))) #'(union RE ...)]))
(define-lex-trans sre-or (define-lex-trans (: stx)
(syntax-rules () (syntax-case stx ()
((_ re ...) [(_ RE ...)
(union re ...)))) #'(concatenation RE ...)]))
(define-lex-trans : (define-lex-trans (seq stx)
(syntax-rules () (syntax-case stx ()
((_ re ...) [(_ RE ...)
(concatenation re ...)))) #'(concatenation RE ...)]))
(define-lex-trans seq (define-lex-trans (& stx)
(syntax-rules () (syntax-case stx ()
((_ re ...) [(_ RE ...)
(concatenation re ...)))) #'(intersection RE ...)]))
(define-lex-trans & (define-lex-trans (~ stx)
(syntax-rules () (syntax-case stx ()
((_ re ...) [(_ RE ...)
(intersection re ...)))) #'(char-complement (union RE ...))]))
(define-lex-trans ~ ;; set difference
(syntax-rules () (define-lex-trans (sre-- stx)
((_ re ...) (syntax-case stx ()
(char-complement (union re ...))))) [(_)
(raise-syntax-error #f
;; set difference "must have at least one argument"
(define-lex-trans (sre-- stx) stx)]
(syntax-case stx () [(_ BIG-RE RE ...)
((_) #'(& BIG-RE (complement (union RE ...)))]))
(raise-syntax-error #f
"must have at least one argument" (define-lex-trans (sre-/ stx)
stx)) (syntax-case stx ()
((_ big-re re ...) [(_ RANGE ...)
(syntax (& big-re (complement (union re ...))))))) (let ([chars
(apply append (for/list ([r (in-list (syntax->list #'(RANGE ...)))])
(define-lex-trans (sre-/ stx) (let ([x (syntax-e r)])
(syntax-case stx () (cond
((_ range ...) [(char? x) (list x)]
(let ((chars [(string? x) (string->list x)]
(apply append (map (lambda (r) [else
(let ((x (syntax-e r))) (raise-syntax-error #f "not a char or string" stx r)]))))])
(cond (unless (even? (length chars))
((char? x) (list x)) (raise-syntax-error #f "not given an even number of characters" stx))
((string? x) (string->list x)) #`(/-only-chars #,@chars))]))
(else
(raise-syntax-error (define-lex-trans (/-only-chars stx)
#f (syntax-case stx ()
"not a char or string" [(_ C1 C2)
stx #'(char-range C1 C2)]
r))))) [(_ C1 C2 C ...)
(syntax->list (syntax (range ...))))))) #'(union (char-range C1 C2) (/-only-chars C ...))]))
(unless (even? (length chars))
(raise-syntax-error
#f
"not given an even number of characters"
stx))
#`(/-only-chars #,@chars)))))
(define-lex-trans /-only-chars
(syntax-rules ()
((_ c1 c2)
(char-range c1 c2))
((_ c1 c2 c ...)
(union (char-range c1 c2)
(/-only-chars c ...)))))
)

@ -3,7 +3,7 @@
;; Provides the syntax used to create lexers and the functions needed to ;; Provides the syntax used to create lexers and the functions needed to
;; create and use the buffer that the lexer reads from. See docs. ;; create and use the buffer that the lexer reads from. See docs.
(require (for-syntax mzlib/list (require (for-syntax racket/list
syntax/stx syntax/stx
syntax/define syntax/define
syntax/boundmap syntax/boundmap
@ -14,7 +14,7 @@
racket/base racket/base
racket/promise)) racket/promise))
(require mzlib/stxparam (require racket/stxparam
syntax/readerr syntax/readerr
"private-lex/token.rkt") "private-lex/token.rkt")
@ -30,7 +30,7 @@
file-path file-path
lexer-file-path ;; alternate name lexer-file-path ;; alternate name
;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4. ;; Lex abbrevs for unicode char sets.
any-char any-string nothing alphabetic lower-case upper-case title-case any-char any-string nothing alphabetic lower-case upper-case title-case
numeric symbolic punctuation graphic whitespace blank iso-control numeric symbolic punctuation graphic whitespace blank iso-control
@ -77,9 +77,9 @@
(let () (let ()
(define spec/re-acts (syntax->list #'RE+ACTS)) (define spec/re-acts (syntax->list #'RE+ACTS))
(for/and ([x (in-list spec/re-acts)]) (for/and ([x (in-list spec/re-acts)])
(syntax-case x () (syntax-case x ()
[(RE ACT) #t] [(RE ACT) #t]
[else (raise-syntax-error caller "not a regular expression / action pair" stx x)])) [else (raise-syntax-error caller "not a regular expression / action pair" stx x)]))
(define eof-act (get-special-action spec/re-acts #'eof (case src-loc-style (define eof-act (get-special-action spec/re-acts #'eof (case src-loc-style
[(lexer-src-pos) #'(return-without-pos eof)] [(lexer-src-pos) #'(return-without-pos eof)]
[(lexer-srcloc) #'(return-without-srcloc eof)] [(lexer-srcloc) #'(return-without-srcloc eof)]
@ -212,92 +212,90 @@
(define (get-next-state char table) (define (get-next-state char table)
(and table (get-next-state-helper char 0 (vector-length table) table))) (and table (get-next-state-helper char 0 (vector-length table) table)))
(define (lexer-body start-state trans-table actions no-lookahead special-action (define ((lexer-body start-state trans-table actions no-lookahead special-action
has-special-comment-action? special-comment-action eof-action) has-special-comment-action? special-comment-action eof-action) ip)
(letrec ([lexer (define (lexer ip)
(λ (ip) (define first-pos (get-position ip))
(let ((first-pos (get-position ip)) (define first-char (peek-char-or-special ip 0))
(first-char (peek-char-or-special ip 0))) ;(printf "(peek-char-or-special port 0) = ~e\n" first-char)
;(printf "(peek-char-or-special port 0) = ~e\n" first-char) (cond
(cond [(eof-object? first-char)
[(eof-object? first-char) (do-match ip first-pos eof-action (read-char-or-special ip))]
(do-match ip first-pos eof-action (read-char-or-special ip))] [(special-comment? first-char)
[(special-comment? first-char) (read-char-or-special ip)
(read-char-or-special ip) (cond
(cond (has-special-comment-action?
(has-special-comment-action? (do-match ip first-pos special-comment-action #f))
(do-match ip first-pos special-comment-action #f)) (else (lexer ip)))]
(else (lexer ip)))] [(not (char? first-char))
[(not (char? first-char)) (do-match ip first-pos special-action (read-char-or-special ip))]
(do-match ip first-pos special-action (read-char-or-special ip))] [else
[else (let lexer-loop (
(let lexer-loop ( ;; current-state
;; current-state [state start-state]
(state start-state) ;; the character to transition on
;; the character to transition on [char first-char]
(char first-char) ;; action for the longest match seen thus far
;; action for the longest match seen thus far ;; including a match at the current state
;; including a match at the current state [longest-match-action
(longest-match-action (vector-ref actions start-state)]
(vector-ref actions start-state)) ;; how many bytes precede char
;; how many bytes precede char [length-bytes 0]
(length-bytes 0) ;; how many characters have been read
;; how many characters have been read ;; including the one just read
;; including the one just read [length-chars 1]
(length-chars 1) ;; how many characters are in the longest match
;; how many characters are in the longest match [longest-match-length 0])
(longest-match-length 0)) (define next-state
(let ([next-state (cond
(cond [(not (char? char)) #f]
[(not (char? char)) #f] [else (get-next-state (char->integer char)
[else (get-next-state (char->integer char) (vector-ref trans-table state))]))
(vector-ref trans-table state))])]) (cond
(cond [(not next-state)
[(not next-state) (check-match ip first-pos longest-match-length
(check-match ip first-pos longest-match-length length-chars longest-match-action)]
length-chars longest-match-action)] [(vector-ref no-lookahead next-state)
[(vector-ref no-lookahead next-state) (define act (vector-ref actions next-state))
(let ((act (vector-ref actions next-state))) (check-match ip
(check-match ip first-pos
first-pos (if act length-chars longest-match-length)
(if act length-chars longest-match-length) length-chars
length-chars (if act act longest-match-action))]
(if act act longest-match-action)))] [else
[else (define act (vector-ref actions next-state))
(let* ([act (vector-ref actions next-state)] (define next-length-bytes (+ (char-utf-8-length char) length-bytes))
[next-length-bytes (+ (char-utf-8-length char) length-bytes)] (define next-char (peek-char-or-special ip next-length-bytes))
[next-char (peek-char-or-special ip next-length-bytes)]) #;(printf "(peek-char-or-special port ~e) = ~e\n"
#;(printf "(peek-char-or-special port ~e) = ~e\n" next-length-bytes next-char)
next-length-bytes next-char) (lexer-loop next-state
(lexer-loop next-state next-char
next-char (if act
(if act act
act longest-match-action)
longest-match-action) next-length-bytes
next-length-bytes (add1 length-chars)
(add1 length-chars) (if act
(if act length-chars
length-chars longest-match-length))]))]))
longest-match-length)))])))])))]) (unless (input-port? ip)
(λ (ip) (raise-argument-error 'lexer "input-port?" 0 ip))
(unless (input-port? ip) (lexer ip))
(raise-argument-error 'lexer "input-port?" 0 ip))
(lexer ip))))
(define (check-match lb first-pos longest-match-length length longest-match-action) (define (check-match lb first-pos longest-match-length length longest-match-action)
(unless longest-match-action (unless longest-match-action
(let* ([match (read-string length lb)] (define match (read-string length lb))
[end-pos (get-position lb)]) (define end-pos (get-position lb))
(raise-read-error (raise-read-error
(format "lexer: No match found in input starting with: ~a" match) (format "lexer: No match found in input starting with: ~a" match)
(file-path) (file-path)
(position-line first-pos) (position-line first-pos)
(position-col first-pos) (position-col first-pos)
(position-offset first-pos) (position-offset first-pos)
(- (position-offset end-pos) (position-offset first-pos))))) (- (position-offset end-pos) (position-offset first-pos))))
(let ([match (read-string longest-match-length lb)]) (define match (read-string longest-match-length lb))
;(printf "(read-string ~e port) = ~e\n" longest-match-length match) ;(printf "(read-string ~e port) = ~e\n" longest-match-length match)
(do-match lb first-pos longest-match-action match))) (do-match lb first-pos longest-match-action match))
(define file-path (make-parameter #f)) (define file-path (make-parameter #f))
(define lexer-file-path file-path) (define lexer-file-path file-path)
@ -325,10 +323,10 @@
(force whitespace-ranges) (force whitespace-ranges)
(force blank-ranges) (force blank-ranges)
(force iso-control-ranges)))]) (force iso-control-ranges)))])
`(union ,@(map (λ (x) `(union ,@(map (λ (x)
`(char-range ,(integer->char (car x)) `(char-range ,(integer->char (car x))
,(integer->char (cdr x)))) ,(integer->char (cdr x))))
range)))] range)))]
[(NAMES ...) (for/list ([sym (in-list '(alphabetic [(NAMES ...) (for/list ([sym (in-list '(alphabetic
lower-case lower-case
upper-case upper-case
@ -340,7 +338,7 @@
whitespace whitespace
blank blank
iso-control))]) iso-control))])
(datum->syntax #'CTXT sym #f))]) (datum->syntax #'CTXT sym #f))])
#'(define-lex-abbrevs (NAMES RANGES) ...))])) #'(define-lex-abbrevs (NAMES RANGES) ...))]))
(define-lex-abbrev any-char (char-complement (union))) (define-lex-abbrev any-char (char-complement (union)))

@ -1,5 +1,4 @@
#lang racket/base #lang racket/base
(provide (all-defined-out)) (provide (all-defined-out))
(require syntax/stx) (require syntax/stx)
@ -7,10 +6,10 @@
;; Returns the first action from a rule of the form ((which-special) action) ;; Returns the first action from a rule of the form ((which-special) action)
(define (get-special-action rules which-special none) (define (get-special-action rules which-special none)
(cond (cond
((null? rules) none) [(null? rules) none]
(else [else
(syntax-case (car rules) () (syntax-case (car rules) ()
[((special) ACT) [((special) ACT)
(and (identifier? #'special) (module-or-top-identifier=? (syntax special) which-special)) (and (identifier? #'special) (module-or-top-identifier=? #'special which-special))
#'ACT] #'ACT]
[_ (get-special-action (cdr rules) which-special none)])))) [_ (get-special-action (cdr rules) which-special none)])]))

@ -1,303 +1,297 @@
(module deriv mzscheme #lang racket/base
(require racket/list
(prefix-in is: data/integer-set)
"re.rkt"
"util.rkt")
(require mzlib/list (provide build-dfa print-dfa (struct-out dfa))
(prefix is: mzlib/integer-set)
"re.rkt"
"util.rkt")
(provide build-dfa print-dfa (struct dfa (num-states start-state final-states/actions transitions))) (define e (build-epsilon))
(define z (build-zero))
(define e (build-epsilon))
(define z (build-zero))
;; Don't do anything with this one but extract the chars
(define all-chars (->re `(char-complement (union)) (make-cache)))
;; Don't do anything with this one but extract the chars ;; get-char-groups : re bool -> (list-of char-setR?)
(define all-chars (->re `(char-complement (union)) (make-cache))) ;; Collects the char-setRs in r that could be used in
;; taking the derivative of r.
(define (get-char-groups r found-negation)
(cond
[(or (eq? r e) (eq? r z)) null]
[(char-setR? r) (list r)]
[(concatR? r)
(if (re-nullable? (concatR-re1 r))
(append (get-char-groups (concatR-re1 r) found-negation)
(get-char-groups (concatR-re2 r) found-negation))
(get-char-groups (concatR-re1 r) found-negation))]
[(repeatR? r)
(get-char-groups (repeatR-re r) found-negation)]
[(orR? r)
(apply append (map (λ (x) (get-char-groups x found-negation)) (orR-res r)))]
[(andR? r)
(apply append (map (λ (x) (get-char-groups x found-negation)) (andR-res r)))]
[(negR? r)
(if found-negation
(get-char-groups (negR-re r) #t)
(cons all-chars (get-char-groups (negR-re r) #t)))]))
;; get-char-groups : re bool -> (list-of char-setR?) (test-block ((c (make-cache))
;; Collects the char-setRs in r that could be used in (r1 (->re #\1 c))
;; taking the derivative of r. (r2 (->re #\2 c)))
(define (get-char-groups r found-negation) ((get-char-groups e #f) null)
(cond ((get-char-groups z #f) null)
((or (eq? r e) (eq? r z)) null) ((get-char-groups r1 #f) (list r1))
((char-setR? r) (list r)) ((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f)
((concatR? r) (list r1))
(if (re-nullable? (concatR-re1 r)) ((get-char-groups (->re `(concatenation ,e ,r2) c) #f)
(append (get-char-groups (concatR-re1 r) found-negation) (list r2))
(get-char-groups (concatR-re2 r) found-negation)) ((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f)
(get-char-groups (concatR-re1 r) found-negation))) (list r1 r2))
((repeatR? r) ((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f)
(get-char-groups (repeatR-re r) found-negation)) (list r1))
((orR? r) ((get-char-groups
(apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r)))) (->re `(union (repetition 0 +inf.0 ,r1)
((andR? r) (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
(apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r)))) (list r1 r2 (->re "3" c) (->re "4" c)))
((negR? r) ((get-char-groups (->re `(complement ,r1) c) #f)
(if found-negation (list all-chars r1))
(get-char-groups (negR-re r) #t) ((get-char-groups
(cons all-chars (get-char-groups (negR-re r) #t)))))) (->re `(intersection (repetition 0 +inf.0 ,r1)
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
(list r1 r2 (->re "3" c) (->re "4" c)))
)
(define loc:member? is:member?)
(test-block ((c (make-cache)) ;; deriveR : re char cache -> re
(r1 (->re #\1 c)) (define (deriveR r c cache)
(r2 (->re #\2 c))) (cond
((get-char-groups e #f) null) [(or (eq? r e) (eq? r z)) z]
((get-char-groups z #f) null) [(char-setR? r)
((get-char-groups r1 #f) (list r1)) (if (loc:member? c (char-setR-chars r)) e z)]
((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f) [(concatR? r)
(list r1)) (define r1 (concatR-re1 r))
((get-char-groups (->re `(concatenation ,e ,r2) c) #f) (define r2 (concatR-re2 r))
(list r2)) (define d (build-concat (deriveR r1 c cache) r2 cache))
((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f) (if (re-nullable? r1)
(list r1 r2)) (build-or (list d (deriveR r2 c cache)) cache)
((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f) d)]
(list r1)) [(repeatR? r)
((get-char-groups (build-concat (deriveR (repeatR-re r) c cache)
(->re `(union (repetition 0 +inf.0 ,r1) (build-repeat (sub1 (repeatR-low r))
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) (sub1 (repeatR-high r))
(list r1 r2 (->re "3" c) (->re "4" c))) (repeatR-re r) cache)
((get-char-groups (->re `(complement ,r1) c) #f) cache)]
(list all-chars r1)) [(orR? r)
((get-char-groups (build-or (map (λ (x) (deriveR x c cache))
(->re `(intersection (repetition 0 +inf.0 ,r1) (orR-res r))
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) cache)]
(list r1 r2 (->re "3" c) (->re "4" c))) [(andR? r)
) (build-and (map (λ (x) (deriveR x c cache))
(define loc:member? is:member?) (andR-res r))
cache)]
[(negR? r)
(build-neg (deriveR (negR-re r) c cache) cache)]))
;; deriveR : re char cache -> re (test-block ((c (make-cache))
(define (deriveR r c cache) (a (char->integer #\a))
(cond (b (char->integer #\b))
((or (eq? r e) (eq? r z)) z) (r1 (->re #\a c))
((char-setR? r) (r2 (->re `(repetition 0 +inf.0 #\a) c))
(if (loc:member? c (char-setR-chars r)) e z)) (r3 (->re `(repetition 0 +inf.0 ,r2) c))
((concatR? r) (r4 (->re `(concatenation #\a ,r2) c))
(let* ((r1 (concatR-re1 r)) (r5 (->re `(repetition 0 +inf.0 ,r4) c))
(r2 (concatR-re2 r)) (r6 (->re `(union ,r5 #\a) c))
(d (build-concat (deriveR r1 c cache) r2 cache))) (r7 (->re `(concatenation ,r2 ,r2) c))
(if (re-nullable? r1) (r8 (->re `(complement ,r4) c))
(build-or (list d (deriveR r2 c cache)) cache) (r9 (->re `(intersection ,r2 ,r4) c)))
d))) ((deriveR e a c) z)
((repeatR? r) ((deriveR z a c) z)
(build-concat (deriveR (repeatR-re r) c cache) ((deriveR r1 b c) z)
(build-repeat (sub1 (repeatR-low r)) ((deriveR r1 a c) e)
(sub1 (repeatR-high r)) ((deriveR r2 a c) r2)
(repeatR-re r) cache) ((deriveR r2 b c) z)
cache)) ((deriveR r3 a c) r2)
((orR? r) ((deriveR r3 b c) z)
(build-or (map (lambda (x) (deriveR x c cache)) ((deriveR r4 a c) r2)
(orR-res r)) ((deriveR r4 b c) z)
cache)) ((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c))
((andR? r) ((deriveR r5 b c) z)
(build-and (map (lambda (x) (deriveR x c cache)) ((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c))
(andR-res r)) ((deriveR r6 b c) z)
cache)) ((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c))
((negR? r) ((deriveR r7 b c) z)
(build-neg (deriveR (negR-re r) c cache) cache)))) ((deriveR r8 a c) (->re `(complement, r2) c))
((deriveR r8 b c) (->re `(complement ,z) c))
((deriveR r9 a c) r2)
((deriveR r9 b c) z)
((deriveR (->re `(repetition 1 2 "ab") c) a c)
(->re `(concatenation "b" (repetition 0 1 "ab")) c)))
(test-block ((c (make-cache)) ;; An re-action is (cons re action)
(a (char->integer #\a))
(b (char->integer #\b))
(r1 (->re #\a c))
(r2 (->re `(repetition 0 +inf.0 #\a) c))
(r3 (->re `(repetition 0 +inf.0 ,r2) c))
(r4 (->re `(concatenation #\a ,r2) c))
(r5 (->re `(repetition 0 +inf.0 ,r4) c))
(r6 (->re `(union ,r5 #\a) c))
(r7 (->re `(concatenation ,r2 ,r2) c))
(r8 (->re `(complement ,r4) c))
(r9 (->re `(intersection ,r2 ,r4) c)))
((deriveR e a c) z)
((deriveR z a c) z)
((deriveR r1 b c) z)
((deriveR r1 a c) e)
((deriveR r2 a c) r2)
((deriveR r2 b c) z)
((deriveR r3 a c) r2)
((deriveR r3 b c) z)
((deriveR r4 a c) r2)
((deriveR r4 b c) z)
((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c))
((deriveR r5 b c) z)
((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c))
((deriveR r6 b c) z)
((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c))
((deriveR r7 b c) z)
((deriveR r8 a c) (->re `(complement, r2) c))
((deriveR r8 b c) (->re `(complement ,z) c))
((deriveR r9 a c) r2)
((deriveR r9 b c) z)
((deriveR (->re `(repetition 1 2 "ab") c) a c)
(->re `(concatenation "b" (repetition 0 1 "ab")) c)))
;; An re-action is (cons re action) ;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f)
;; applies deriveR to all the re-actions's re parts.
;; Returns #f if the derived state is equivalent to z.
(define (derive r c cache)
(define new-r (for/list ([ra (in-list r)])
(cons (deriveR (car ra) c cache) (cdr ra))))
(if (andmap (λ (x) (eq? z (car x))) new-r)
#f
new-r))
;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f) (test-block ((c (make-cache))
;; applies deriveR to all the re-actions's re parts. (r1 (->re #\1 c))
;; Returns #f if the derived state is equivalent to z. (r2 (->re #\2 c)))
(define (derive r c cache) ((derive null (char->integer #\1) c) #f)
(let ((new-r (map (lambda (ra) ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c)
(cons (deriveR (car ra) c cache) (cdr ra))) (list (cons e 1) (cons z 2)))
r))) ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f))
(if (andmap (lambda (x) (eq? z (car x)))
new-r)
#f
new-r)))
(test-block ((c (make-cache))
(r1 (->re #\1 c))
(r2 (->re #\2 c)))
((derive null (char->integer #\1) c) #f)
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c)
(list (cons e 1) (cons z 2)))
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f))
;; get-final : (list-of re-action) -> (union #f syntax-object)
;; An re that accepts e represents a final state. Return the
;; action from the first final state or #f if there is none.
(define (get-final res)
(cond
[(null? res) #f]
[(re-nullable? (caar res)) (cdar res)]
[else (get-final (cdr res))]))
;; get-final : (list-of re-action) -> (union #f syntax-object) (test-block ((c->i char->integer)
;; An re that accepts e represents a final state. Return the (c (make-cache))
;; action from the first final state or #f if there is none. (r1 (->re #\a c))
(define (get-final res) (r2 (->re #\b c))
(cond (b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5)))
((null? res) #f) (a (list (cons r1 1) (cons r2 2))))
((re-nullable? (caar res)) (cdar res)) ((derive null (c->i #\a) c) #f)
(else (get-final (cdr res))))) ((derive a (c->i #\a) c) (list (cons e 1) (cons z 2)))
((derive a (c->i #\b) c) (list (cons z 1) (cons e 2)))
((derive a (c->i #\c) c) #f)
((derive (list (cons (->re `(union " " "\n" ",") c) 1)
(cons (->re `(concatenation (repetition 0 1 "-")
(repetition 1 +inf.0 (char-range "0" "9"))) c) 2)
(cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3)
(cons (->re "[" c) 4)
(cons (->re "]" c) 5)) (c->i #\[) c)
b)
((get-final a) #f)
((get-final (list (cons e 1) (cons e 2))) 1)
((get-final b) 4))
(test-block ((c->i char->integer)
(c (make-cache))
(r1 (->re #\a c))
(r2 (->re #\b c))
(b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5)))
(a (list (cons r1 1) (cons r2 2))))
((derive null (c->i #\a) c) #f)
((derive a (c->i #\a) c) (list (cons e 1) (cons z 2)))
((derive a (c->i #\b) c) (list (cons z 1) (cons e 2)))
((derive a (c->i #\c) c) #f)
((derive (list (cons (->re `(union " " "\n" ",") c) 1)
(cons (->re `(concatenation (repetition 0 1 "-")
(repetition 1 +inf.0 (char-range "0" "9"))) c) 2)
(cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3)
(cons (->re "[" c) 4)
(cons (->re "]" c) 5)) (c->i #\[) c)
b)
((get-final a) #f)
((get-final (list (cons e 1) (cons e 2))) 1)
((get-final b) 4))
;; A state is (make-state (list-of re-action) nat)
(define-struct state (spec index))
;; A state is (make-state (list-of re-action) nat) ;; get->key : re-action -> (list-of nat)
(define-struct state (spec index)) ;; states are indexed by the list of indexes of their res
(define (get-key s)
(map (λ (x) (re-index (car x))) s))
;; get->key : re-action -> (list-of nat) (define loc:partition is:partition)
;; states are indexed by the list of indexes of their res
(define (get-key s)
(map (lambda (x) (re-index (car x))) s))
(define loc:partition is:partition) ;; compute-chars : (list-of state) -> (list-of char-set)
;; Computed the sets of equivalent characters for taking the
;; derivative of the car of st. Only one derivative per set need to be taken.
(define (compute-chars st)
(cond
[(null? st) null]
[else
(loc:partition (map char-setR-chars
(apply append (map (λ (x) (get-char-groups (car x) #f))
(state-spec (car st))))))]))
;; compute-chars : (list-of state) -> (list-of char-set) (test-block ((c (make-cache))
;; Computed the sets of equivalent characters for taking the (c->i char->integer)
;; derivative of the car of st. Only one derivative per set need to be taken. (r1 (->re `(char-range #\1 #\4) c))
(define (compute-chars st) (r2 (->re `(char-range #\2 #\3) c)))
(cond ((compute-chars null) null)
((null? st) null) ((compute-chars (list (make-state null 1))) null)
(else ((map is:integer-set-contents
(loc:partition (map char-setR-chars (compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2))))
(apply append (map (lambda (x) (get-char-groups (car x) #f)) (list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3)))
(state-spec (car st))))))))) (is:integer-set-contents (is:union (is:make-range (c->i #\1))
(is:make-range (c->i #\4)))))))
(test-block ((c (make-cache))
(c->i char->integer)
(r1 (->re `(char-range #\1 #\4) c))
(r2 (->re `(char-range #\2 #\3) c)))
((compute-chars null) null)
((compute-chars (list (make-state null 1))) null)
((map is:integer-set-contents
(compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2))))
(list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3)))
(is:integer-set-contents (is:union (is:make-range (c->i #\1))
(is:make-range (c->i #\4)))))))
;; A dfa is (make-dfa int int
;; (list-of (cons int syntax-object))
;; (list-of (cons int (list-of (cons char-set int)))))
;; Each transitions is a state and a list of chars with the state to transition to.
;; The finals and transitions are sorted by state number, and duplicate free.
(define-struct dfa (num-states start-state final-states/actions transitions) #:inspector (make-inspector))
;; A dfa is (make-dfa int int (define loc:get-integer is:get-integer)
;; (list-of (cons int syntax-object))
;; (list-of (cons int (list-of (cons char-set int)))))
;; Each transitions is a state and a list of chars with the state to transition to.
;; The finals and transitions are sorted by state number, and duplicate free.
(define-struct dfa (num-states start-state final-states/actions transitions) (make-inspector))
(define loc:get-integer is:get-integer) ;; build-dfa : (list-of re-action) cache -> dfa
(define (build-dfa rs cache)
(let* ([transitions (make-hash)]
[get-state-number (make-counter)]
[start (make-state rs (get-state-number))])
(cache (cons 'state (get-key rs)) (λ () start))
(let loop ([old-states (list start)]
[new-states null]
[all-states (list start)]
[cs (compute-chars (list start))])
(cond
[(and (null? old-states) (null? new-states))
(make-dfa (get-state-number) (state-index start)
(sort (for*/list ([state (in-list all-states)]
[val (in-value (cons (state-index state) (get-final (state-spec state))))]
#:when (cdr val))
val)
< #:key car)
(sort (hash-map transitions
(λ (state trans)
(cons (state-index state)
(for/list ([t (in-list trans)])
(cons (car t)
(state-index (cdr t)))))))
< #:key car))]
[(null? old-states)
(loop new-states null all-states (compute-chars new-states))]
[(null? cs)
(loop (cdr old-states) new-states all-states (compute-chars (cdr old-states)))]
[else
(define state (car old-states))
(define c (car cs))
(define new-re (derive (state-spec state) (loc:get-integer c) cache))
(cond
[new-re
(let* ([new-state? #f]
[new-state (cache (cons 'state (get-key new-re))
(λ ()
(set! new-state? #t)
(make-state new-re (get-state-number))))]
[new-all-states (if new-state? (cons new-state all-states) all-states)])
(hash-set! transitions
state
(cons (cons c new-state)
(hash-ref transitions state
(λ () null))))
(cond
[new-state?
(loop old-states (cons new-state new-states) new-all-states (cdr cs))]
[else
(loop old-states new-states new-all-states (cdr cs))]))]
[else (loop old-states new-states all-states (cdr cs))])]))))
;; build-dfa : (list-of re-action) cache -> dfa (define (print-dfa x)
(define (build-dfa rs cache) (printf "number of states: ~a\n" (dfa-num-states x))
(let* ((transitions (make-hash-table)) (printf "start state: ~a\n" (dfa-start-state x))
(get-state-number (make-counter)) (printf "final states: ~a\n" (map car (dfa-final-states/actions x)))
(start (make-state rs (get-state-number)))) (for-each (λ (trans)
(cache (cons 'state (get-key rs)) (lambda () start)) (printf "state: ~a\n" (car trans))
(let loop ((old-states (list start)) (for-each (λ (rule)
(new-states null) (printf " -~a-> ~a\n"
(all-states (list start)) (is:integer-set-contents (car rule))
(cs (compute-chars (list start)))) (cdr rule)))
(cond (cdr trans)))
((and (null? old-states) (null? new-states)) (dfa-transitions x)))
(make-dfa (get-state-number) (state-index start)
(sort (filter (lambda (x) (cdr x))
(map (lambda (state)
(cons (state-index state) (get-final (state-spec state))))
all-states))
(lambda (a b) (< (car a) (car b))))
(sort (hash-table-map transitions
(lambda (state trans)
(cons (state-index state)
(map (lambda (t)
(cons (car t)
(state-index (cdr t))))
trans))))
(lambda (a b) (< (car a) (car b))))))
((null? old-states)
(loop new-states null all-states (compute-chars new-states)))
((null? cs)
(loop (cdr old-states) new-states all-states (compute-chars (cdr old-states))))
(else
(let* ((state (car old-states))
(c (car cs))
(new-re (derive (state-spec state) (loc:get-integer c) cache)))
(cond
(new-re
(let* ((new-state? #f)
(new-state (cache (cons 'state (get-key new-re))
(lambda ()
(set! new-state? #t)
(make-state new-re (get-state-number)))))
(new-all-states (if new-state? (cons new-state all-states) all-states)))
(hash-table-put! transitions
state
(cons (cons c new-state)
(hash-table-get transitions state
(lambda () null))))
(cond
(new-state?
(loop old-states (cons new-state new-states) new-all-states (cdr cs)))
(else
(loop old-states new-states new-all-states (cdr cs))))))
(else (loop old-states new-states all-states (cdr cs))))))))))
(define (print-dfa x) (define (build-test-dfa rs)
(printf "number of states: ~a\n" (dfa-num-states x)) (define c (make-cache))
(printf "start state: ~a\n" (dfa-start-state x)) (build-dfa (map (λ (x) (cons (->re x c) 'action)) rs) c))
(printf "final states: ~a\n" (map car (dfa-final-states/actions x)))
(for-each (lambda (trans)
(printf "state: ~a\n" (car trans))
(for-each (lambda (rule)
(printf " -~a-> ~a\n"
(is:integer-set-contents (car rule))
(cdr rule)))
(cdr trans)))
(dfa-transitions x)))
(define (build-test-dfa rs)
(let ((c (make-cache)))
(build-dfa (map (lambda (x) (cons (->re x c) 'action))
rs)
c)))
#| #|
@ -316,8 +310,8 @@
(build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b) (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b)
(union #\a #\b) (union #\a #\b) (union #\a #\b))))) (union #\a #\b) (union #\a #\b) (union #\a #\b)))))
(define t9 (build-test-dfa `((concatenation "/*" (define t9 (build-test-dfa `((concatenation "/*"
(complement (concatenation (intersection) "*/" (intersection))) (complement (concatenation (intersection) "*/" (intersection)))
"*/")))) "*/"))))
(define t11 (build-test-dfa `((complement "1")))) (define t11 (build-test-dfa `((complement "1"))))
(define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b") (define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b")
(concatenation "a" (repetition 0 +inf.0 "b"))) (concatenation "a" (repetition 0 +inf.0 "b")))
@ -329,11 +323,11 @@
"]"))) "]")))
(define y (build-test-dfa (define y (build-test-dfa
`((repetition 1 +inf.0 `((repetition 1 +inf.0
(union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|") (union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|")
(concatenation "|" (repetition 0 +inf.0 (char-complement "|")))))))) (concatenation "|" (repetition 0 +inf.0 (char-complement "|"))))))))
(define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection)) (define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection))
(complement (union (concatenation (intersection) "01") (complement (union (concatenation (intersection) "01")
(repetition 1 +inf.0 "1"))))))) (repetition 1 +inf.0 "1")))))))
(define t14 (build-test-dfa `((complement "1")))) (define t14 (build-test-dfa `((complement "1")))))
|#
) |#

@ -1,5 +1,5 @@
#lang scheme/base #lang racket/base
(require (for-syntax scheme/base) (require (for-syntax racket/base)
"../lex.rkt" "../lex.rkt"
rackunit) rackunit)

@ -1,179 +1,161 @@
(module front mzscheme #lang racket/base
(require (prefix is: mzlib/integer-set) (require racket/base
mzlib/list racket/match
syntax/stx (prefix-in is: data/integer-set)
"util.rkt" racket/list
"stx.rkt" syntax/stx
"re.rkt" "util.rkt"
"deriv.rkt") "stx.rkt"
"re.rkt"
(provide build-lexer) "deriv.rkt")
(define-syntax time-label (provide build-lexer)
(syntax-rules ()
((_ l e ...) (define-syntax time-label
(begin (syntax-rules ()
(printf "~a: " l) ((_ l e ...)
(time (begin e ...)))))) (begin
(printf "~a: " l)
;; A table is either (time (begin e ...))))))
;; - (vector-of (union #f nat))
;; - (vector-of (vector-of (vector nat nat nat))) ;; A table is either
;; - (vector-of (union #f nat))
(define loc:integer-set-contents is:integer-set-contents) ;; - (vector-of (vector-of (vector nat nat nat)))
;; dfa->1d-table : dfa -> (same as build-lexer) (define loc:integer-set-contents is:integer-set-contents)
(define (dfa->1d-table dfa)
(let ((state-table (make-vector (dfa-num-states dfa) #f)) ;; dfa->1d-table : dfa -> (same as build-lexer)
(transition-cache (make-hash-table 'equal))) (define (dfa->1d-table dfa)
(for-each (define state-table (make-vector (dfa-num-states dfa) #f))
(lambda (trans) (define transition-cache (make-hasheq))
(let* ((from-state (car trans)) (for ([trans (in-list (dfa-transitions dfa))])
(all-chars/to (cdr trans)) (match-define (cons from-state all-chars/to) trans)
(flat-all-chars/to (define flat-all-chars/to
(sort (sort
(apply append (for*/list ([chars/to (in-list all-chars/to)]
(map (lambda (chars/to) [char-ranges (in-value (loc:integer-set-contents (car chars/to)))]
(let ((char-ranges (loc:integer-set-contents (car chars/to))) [to (in-value (cdr chars/to))]
(to (cdr chars/to))) [char-range (in-list char-ranges)])
(map (lambda (char-range) (define entry (vector (car char-range) (cdr char-range) to))
(let ((entry (vector (car char-range) (cdr char-range) to))) (hash-ref transition-cache entry (λ ()
(hash-table-get transition-cache entry (hash-set! transition-cache
(lambda () entry
(hash-table-put! transition-cache entry)
entry entry)))
entry) < #:key (λ (v) (vector-ref v 0))))
entry)))) (vector-set! state-table from-state (list->vector flat-all-chars/to)))
char-ranges))) state-table)
all-chars/to))
(lambda (a b)
(< (vector-ref a 0) (vector-ref b 0)))))) (define loc:foldr is:foldr)
(vector-set! state-table from-state (list->vector flat-all-chars/to))))
(dfa-transitions dfa)) ;; dfa->2d-table : dfa -> (same as build-lexer)
state-table)) (define (dfa->2d-table dfa)
;; char-table : (vector-of (union #f nat))
;; The lexer table, one entry per state per char.
(define loc:foldr is:foldr) ;; Each entry specifies a state to transition to.
;; #f indicates no transition
;; dfa->2d-table : dfa -> (same as build-lexer) (define char-table (make-vector (* 256 (dfa-num-states dfa)) #f))
(define (dfa->2d-table dfa) ;; Fill the char-table vector
(let ( (for* ([trans (in-list (dfa-transitions dfa))]
;; char-table : (vector-of (union #f nat)) [chars/to (in-list (cdr trans))])
;; The lexer table, one entry per state per char. (define from-state (car trans))
;; Each entry specifies a state to transition to. (define to-state (cdr chars/to))
;; #f indicates no transition (loc:foldr (λ (char _)
(char-table (make-vector (* 256 (dfa-num-states dfa)) #f))) (vector-set! char-table
(bitwise-ior
;; Fill the char-table vector char
(for-each (arithmetic-shift from-state 8))
(lambda (trans) to-state))
(let ((from-state (car trans))) (void)
(for-each (lambda (chars/to) (car chars/to)))
(let ((to-state (cdr chars/to))) char-table)
(loc:foldr (lambda (char _)
(vector-set! char-table
(bitwise-ior ;; dfa->actions : dfa -> (vector-of (union #f syntax-object))
char ;; The action for each final state, #f if the state isn't final
(arithmetic-shift from-state 8)) (define (dfa->actions dfa)
to-state)) (define actions (make-vector (dfa-num-states dfa) #f))
(void) (for ([state/action (in-list (dfa-final-states/actions dfa))])
(car chars/to)))) (vector-set! actions (car state/action) (cdr state/action)))
(cdr trans)))) actions)
(dfa-transitions dfa))
char-table)) ;; dfa->no-look : dfa -> (vector-of bool)
;; For each state whether the lexer can ignore the next input.
;; It can do this only if there are no transitions out of the
;; dfa->actions : dfa -> (vector-of (union #f syntax-object)) ;; current state.
;; The action for each final state, #f if the state isn't final (define (dfa->no-look dfa)
(define (dfa->actions dfa) (define no-look (make-vector (dfa-num-states dfa) #t))
(let ((actions (make-vector (dfa-num-states dfa) #f))) (for ([trans (in-list (dfa-transitions dfa))])
(for-each (lambda (state/action) (vector-set! no-look (car trans) #f))
(vector-set! actions (car state/action) (cdr state/action))) no-look)
(dfa-final-states/actions dfa))
actions)) (test-block ((d1 (make-dfa 1 1 (list) (list)))
(d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
;; dfa->no-look : dfa -> (vector-of bool) (list (cons 1 (list (cons (is:make-range 49 50) 1)
;; For each state whether the lexer can ignore the next input. (cons (is:make-range 51) 2)))
;; It can do this only if there are no transitions out of the (cons 2 (list (cons (is:make-range 49) 3))))))
;; current state. (d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
(define (dfa->no-look dfa) (list (cons 1 (list (cons (is:make-range 100 200) 0)
(let ((no-look (make-vector (dfa-num-states dfa) #t))) (cons (is:make-range 49 50) 1)
(for-each (lambda (trans) (cons (is:make-range 51) 2)))
(vector-set! no-look (car trans) #f)) (cons 2 (list (cons (is:make-range 49) 3)))))))
(dfa-transitions dfa)) ((dfa->2d-table d1) (make-vector 256 #f))
no-look)) ((dfa->2d-table d2) (let ((v (make-vector 1024 #f)))
(vector-set! v 305 1)
(test-block ((d1 (make-dfa 1 1 (list) (list))) (vector-set! v 306 1)
(d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) (vector-set! v 307 2)
(list (cons 1 (list (cons (is:make-range 49 50) 1) (vector-set! v 561 3)
(cons (is:make-range 51) 2))) v))
(cons 2 (list (cons (is:make-range 49) 3)))))) ((dfa->1d-table d1) (make-vector 1 #f))
(d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) ((dfa->1d-table d2) #(#f
(list (cons 1 (list (cons (is:make-range 100 200) 0)
(cons (is:make-range 49 50) 1)
(cons (is:make-range 51) 2)))
(cons 2 (list (cons (is:make-range 49) 3)))))))
((dfa->2d-table d1) (make-vector 256 #f))
((dfa->2d-table d2) (let ((v (make-vector 1024 #f)))
(vector-set! v 305 1)
(vector-set! v 306 1)
(vector-set! v 307 2)
(vector-set! v 561 3)
v))
((dfa->1d-table d1) (make-vector 1 #f))
((dfa->1d-table d2) #(#f
#(#(49 50 1) #(51 51 2)) #(#(49 50 1) #(51 51 2))
#(#(49 49 3)) #(#(49 49 3))
#f)) #f))
((dfa->1d-table d3) #(#f ((dfa->1d-table d3) #(#f
#(#(49 50 1) #(51 51 2) #(100 200 0)) #(#(49 50 1) #(51 51 2) #(100 200 0))
#(#(49 49 3)) #(#(49 49 3))
#f)) #f))
((dfa->actions d1) (vector #f)) ((dfa->actions d1) (vector #f))
((dfa->actions d2) (vector #f #f 2 3)) ((dfa->actions d2) (vector #f #f 2 3))
((dfa->no-look d1) (vector #t)) ((dfa->no-look d1) (vector #t))
((dfa->no-look d2) (vector #t #f #f #t))) ((dfa->no-look d2) (vector #t #f #f #t)))
;; build-lexer : syntax-object list -> ;; build-lexer : syntax-object list ->
;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object)) ;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object))
;; each syntax object has the form (re action) ;; each syntax object has the form (re action)
(define (build-lexer sos) (define (build-lexer sos)
(let* ((disappeared-uses (box null)) (define disappeared-uses (box null))
(s-re-acts (map (lambda (so) (define s-re-acts (for/list ([so (in-list sos)])
(cons (parse (stx-car so) disappeared-uses) (cons (parse (stx-car so) disappeared-uses)
(stx-car (stx-cdr so)))) (stx-car (stx-cdr so)))))
sos)) (define cache (make-cache))
(define re-acts (for/list ([s-re-act (in-list s-re-acts)])
(cache (make-cache)) (cons (->re (car s-re-act) cache)
(cdr s-re-act))))
(re-acts (map (lambda (s-re-act) (define dfa (build-dfa re-acts cache))
(cons (->re (car s-re-act) cache) (define table (dfa->1d-table dfa))
(cdr s-re-act))) ;(print-dfa dfa)
s-re-acts)) #;(let ((num-states (vector-length table))
(num-vectors (length (filter values (vector->list table))))
(num-entries (apply + (map
(λ (x) (if x (vector-length x) 0))
(vector->list table))))
(num-different-entries
(let ((ht (make-hash)))
(for-each
(λ (x)
(when x
(for-each
(λ (y)
(hash-set! ht y #t))
(vector->list x))))
(vector->list table))
(length (hash-table-map ht cons)))))
(printf "~a states, ~aKB\n"
num-states
(/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries
(* 5 num-different-entries))) 1024)))
(values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)
(unbox disappeared-uses)))
(dfa (build-dfa re-acts cache))
(table (dfa->1d-table dfa)))
;(print-dfa dfa)
#;(let ((num-states (vector-length table))
(num-vectors (length (filter values (vector->list table))))
(num-entries (apply + (map
(lambda (x) (if x (vector-length x) 0))
(vector->list table))))
(num-different-entries
(let ((ht (make-hash-table)))
(for-each
(lambda (x)
(when x
(for-each
(lambda (y)
(hash-table-put! ht y #t))
(vector->list x))))
(vector->list table))
(length (hash-table-map ht cons)))))
(printf "~a states, ~aKB\n"
num-states
(/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries
(* 5 num-different-entries))) 1024)))
(values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)
(unbox disappeared-uses))))
)

@ -1,385 +1,384 @@
(module re mzscheme #lang racket/base
(require mzlib/list (require racket/list
scheme/match racket/match
(prefix is: mzlib/integer-set) (prefix-in is: data/integer-set)
"util.rkt") "util.rkt")
(provide ->re build-epsilon build-zero build-char-set build-concat (provide ->re build-epsilon build-zero build-char-set build-concat
build-repeat build-or build-and build-neg build-repeat build-or build-and build-neg
epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR? epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR?
char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high
orR-res andR-res negR-re orR-res andR-res negR-re
re-nullable? re-index) re-nullable? re-index)
;; get-index : -> nat ;; get-index : -> nat
(define get-index (make-counter)) (define get-index (make-counter))
;; An re is either ;; An re is either
;; - (make-epsilonR bool nat) ;; - (make-epsilonR bool nat)
;; - (make-zeroR bool nat) ;; - (make-zeroR bool nat)
;; - (make-char-setR bool nat char-set) ;; - (make-char-setR bool nat char-set)
;; - (make-concatR bool nat re re) ;; - (make-concatR bool nat re re)
;; - (make-repeatR bool nat nat nat-or-+inf.0 re) ;; - (make-repeatR bool nat nat nat-or-+inf.0 re)
;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs ;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs
;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs ;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs
;; - (make-negR bool nat re) ;; - (make-negR bool nat re)
;; ;;
;; Every re must have an index field globally different from all ;; Every re must have an index field globally different from all
;; other re index fields. ;; other re index fields.
(define-struct re (nullable? index) (make-inspector)) (define-struct re (nullable? index) #:inspector (make-inspector))
(define-struct (epsilonR re) () (make-inspector)) (define-struct (epsilonR re) () #:inspector (make-inspector))
(define-struct (zeroR re) () (make-inspector)) (define-struct (zeroR re) () #:inspector (make-inspector))
(define-struct (char-setR re) (chars) (make-inspector)) (define-struct (char-setR re) (chars) #:inspector (make-inspector))
(define-struct (concatR re) (re1 re2) (make-inspector)) (define-struct (concatR re) (re1 re2) #:inspector (make-inspector))
(define-struct (repeatR re) (low high re) (make-inspector)) (define-struct (repeatR re) (low high re) #:inspector (make-inspector))
(define-struct (orR re) (res) (make-inspector)) (define-struct (orR re) (res) #:inspector (make-inspector))
(define-struct (andR re) (res) (make-inspector)) (define-struct (andR re) (res) #:inspector (make-inspector))
(define-struct (negR re) (re) (make-inspector)) (define-struct (negR re) (re) #:inspector (make-inspector))
;; e : re ;; e : re
;; The unique epsilon re ;; The unique epsilon re
(define e (make-epsilonR #t (get-index))) (define e (make-epsilonR #t (get-index)))
;; z : re ;; z : re
;; The unique zero re ;; The unique zero re
(define z (make-zeroR #f (get-index))) (define z (make-zeroR #f (get-index)))
;; s-re = char constant ;; s-re = char constant
;; | string constant (sequence of characters) ;; | string constant (sequence of characters)
;; | re a precompiled re ;; | re a precompiled re
;; | (repetition low high s-re) repetition between low and high times (inclusive) ;; | (repetition low high s-re) repetition between low and high times (inclusive)
;; | (union s-re ...) ;; | (union s-re ...)
;; | (intersection s-re ...) ;; | (intersection s-re ...)
;; | (complement s-re) ;; | (complement s-re)
;; | (concatenation s-re ...) ;; | (concatenation s-re ...)
;; | (char-range rng rng) match any character between two (inclusive) ;; | (char-range rng rng) match any character between two (inclusive)
;; | (char-complement char-set) match any character not listed ;; | (char-complement char-set) match any character not listed
;; low = natural-number ;; low = natural-number
;; high = natural-number or +inf.0 ;; high = natural-number or +inf.0
;; rng = char or string with length 1 ;; rng = char or string with length 1
;; (concatenation) (repetition 0 0 x), and "" match the empty string. ;; (concatenation) (repetition 0 0 x), and "" match the empty string.
;; (union) matches no strings. ;; (union) matches no strings.
;; (intersection) matches any string. ;; (intersection) matches any string.
(define loc:make-range is:make-range) (define loc:make-range is:make-range)
(define loc:union is:union) (define loc:union is:union)
(define loc:split is:split) (define loc:split is:split)
(define loc:complement is:complement) (define loc:complement is:complement)
;; ->re : s-re cache -> re ;; ->re : s-re cache -> re
(define (->re exp cache) (define (->re exp cache)
(match exp (match exp
((? char?) (build-char-set (loc:make-range (char->integer exp)) cache)) [(? char?) (build-char-set (loc:make-range (char->integer exp)) cache)]
((? string?) (->re `(concatenation ,@(string->list exp)) cache)) [(? string?) (->re `(concatenation ,@(string->list exp)) cache)]
((? re?) exp) [(? re?) exp]
(`(repetition ,low ,high ,r) [`(repetition ,low ,high ,r)
(build-repeat low high (->re r cache) cache)) (build-repeat low high (->re r cache) cache)]
(`(union ,rs ...) [`(union ,rs ...)
(build-or (flatten-res (map (lambda (r) (->re r cache)) rs) (build-or (flatten-res (map (λ (r) (->re r cache)) rs)
orR? orR-res loc:union cache) orR? orR-res loc:union cache)
cache)) cache)]
(`(intersection ,rs ...) [`(intersection ,rs ...)
(build-and (flatten-res (map (lambda (r) (->re r cache)) rs) (build-and (flatten-res (map (λ (r) (->re r cache)) rs)
andR? andR-res (lambda (a b) andR? andR-res (λ (a b)
(let-values (((i _ __) (loc:split a b))) i)) (let-values (((i _ __) (loc:split a b))) i))
cache) cache)
cache)) cache)]
(`(complement ,r) [`(complement ,r) (build-neg (->re r cache) cache)]
(build-neg (->re r cache) cache)) [`(concatenation ,rs ...)
(`(concatenation ,rs ...) (foldr (λ (x y)
(foldr (lambda (x y) (build-concat (->re x cache) y cache))
(build-concat (->re x cache) y cache)) e
e rs)]
rs)) [`(char-range ,c1 ,c2)
(`(char-range ,c1 ,c2) (let ([i1 (char->integer (if (string? c1) (string-ref c1 0) c1))]
(let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1))) [i2 (char->integer (if (string? c2) (string-ref c2 0) c2))])
(i2 (char->integer (if (string? c2) (string-ref c2 0) c2)))) (if (<= i1 i2)
(if (<= i1 i2) (build-char-set (loc:make-range i1 i2) cache)
(build-char-set (loc:make-range i1 i2) cache) z))]
z))) [`(char-complement ,crs ...)
(`(char-complement ,crs ...) (let ([cs (->re `(union ,@crs) cache)])
(let ((cs (->re `(union ,@crs) cache))) (cond
(cond [(zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)]
((zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)) [(char-setR? cs)
((char-setR? cs) (build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)]
(build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)) [else z]))]))
(else z))))))
;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re)) ;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re))
;; (char-set char-set -> char-set) cache -> (list-of re) ;; (char-set char-set -> char-set) cache -> (list-of re)
;; Takes all the char-sets in l and combines them into one char-set using the combine function. ;; Takes all the char-sets in l and combines them into one char-set using the combine function.
;; Flattens out the values of type?. get-res only needs to function on things type? returns ;; Flattens out the values of type?. get-res only needs to function on things type? returns
;; true for. ;; true for.
(define (flatten-res l type? get-res combine cache) (define (flatten-res l type? get-res combine cache)
(let loop ((res l) (let loop ([res l]
;; chars : (union #f char-set) ;; chars : (union #f char-set)
(chars #f) [chars #f]
(no-chars null)) [no-chars null])
(cond (cond
((null? res) [(null? res)
(if chars (if chars
(cons (build-char-set chars cache) no-chars) (cons (build-char-set chars cache) no-chars)
no-chars)) no-chars)]
((char-setR? (car res)) [(char-setR? (car res))
(if chars (if chars
(loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars) (loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars)
(loop (cdr res) (char-setR-chars (car res)) no-chars))) (loop (cdr res) (char-setR-chars (car res)) no-chars))]
((type? (car res)) [(type? (car res))
(loop (append (get-res (car res)) (cdr res)) chars no-chars)) (loop (append (get-res (car res)) (cdr res)) chars no-chars)]
(else (loop (cdr res) chars (cons (car res) no-chars)))))) [else (loop (cdr res) chars (cons (car res) no-chars))])))
;; build-epsilon : -> re
(define (build-epsilon) e)
(define (build-zero) z) ;; build-epsilon : -> re
(define (build-epsilon) e)
(define loc:integer-set-contents is:integer-set-contents) (define (build-zero) z)
;; build-char-set : char-set cache -> re (define loc:integer-set-contents is:integer-set-contents)
(define (build-char-set cs cache)
(let ((l (loc:integer-set-contents cs)))
(cond
((null? l) z)
(else
(cache l
(lambda ()
(make-char-setR #f (get-index) cs)))))))
;; build-char-set : char-set cache -> re
(define (build-char-set cs cache)
(define l (loc:integer-set-contents cs))
(cond
[(null? l) z]
[else
(cache l
(λ ()
(make-char-setR #f (get-index) cs)))]))
;; build-concat : re re cache -> re
(define (build-concat r1 r2 cache)
(cond
((eq? e r1) r2)
((eq? e r2) r1)
((or (eq? z r1) (eq? z r2)) z)
(else
(cache (cons 'concat (cons (re-index r1) (re-index r2)))
(lambda ()
(make-concatR (and (re-nullable? r1) (re-nullable? r2))
(get-index)
r1 r2))))))
;; build-repeat : nat nat-or-+inf.0 re cache -> re ;; build-concat : re re cache -> re
(define (build-repeat low high r cache) (define (build-concat r1 r2 cache)
(let ((low (if (< low 0) 0 low))) (cond
(cond [(eq? e r1) r2]
((eq? r e) e) [(eq? e r2) r1]
((and (= 0 low) (or (= 0 high) (eq? z r))) e) [(or (eq? z r1) (eq? z r2)) z]
((and (= 1 low) (= 1 high)) r) [else
((and (repeatR? r) (cache (cons 'concat (cons (re-index r1) (re-index r2)))
(eqv? (repeatR-high r) +inf.0) (λ ()
(or (= 0 (repeatR-low r)) (make-concatR (and (re-nullable? r1) (re-nullable? r2))
(= 1 (repeatR-low r)))) (get-index)
(build-repeat (* low (repeatR-low r)) r1 r2)))]))
+inf.0
(repeatR-re r)
cache))
(else
(cache (cons 'repeat (cons low (cons high (re-index r))))
(lambda ()
(make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r)))))))
;; build-repeat : nat nat-or-+inf.0 re cache -> re
(define (build-repeat low high r cache)
(let ([low (if (< low 0) 0 low)])
(cond
[(eq? r e) e]
[(and (= 0 low) (or (= 0 high) (eq? z r))) e]
[(and (= 1 low) (= 1 high)) r]
[(and (repeatR? r)
(eqv? (repeatR-high r) +inf.0)
(or (= 0 (repeatR-low r))
(= 1 (repeatR-low r))))
(build-repeat (* low (repeatR-low r))
+inf.0
(repeatR-re r)
cache)]
[else
(cache (cons 'repeat (cons low (cons high (re-index r))))
(λ ()
(make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r)))])))
;; build-or : (list-of re) cache -> re
(define (build-or rs cache)
(let ((rs
(filter
(lambda (x) (not (eq? x z)))
(do-simple-equiv (replace rs orR? orR-res null) re-index))))
(cond
((null? rs) z)
((null? (cdr rs)) (car rs))
((memq (build-neg z cache) rs) (build-neg z cache))
(else
(cache (cons 'or (map re-index rs))
(lambda ()
(make-orR (ormap re-nullable? rs) (get-index) rs)))))))
;; build-and : (list-of re) cache -> re ;; build-or : (list-of re) cache -> re
(define (build-and rs cache) (define (build-or rs cache)
(let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index))) (let ([rs
(cond (filter
((null? rs) (build-neg z cache)) (λ (x) (not (eq? x z)))
((null? (cdr rs)) (car rs)) (do-simple-equiv (replace rs orR? orR-res null) re-index))])
((memq z rs) z) (cond
(else [(null? rs) z]
(cache (cons 'and (map re-index rs)) [(null? (cdr rs)) (car rs)]
(lambda () [(memq (build-neg z cache) rs) (build-neg z cache)]
(make-andR (andmap re-nullable? rs) (get-index) rs))))))) [else
(cache (cons 'or (map re-index rs))
(λ ()
(make-orR (ormap re-nullable? rs) (get-index) rs)))])))
;; build-neg : re cache -> re ;; build-and : (list-of re) cache -> re
(define (build-neg r cache) (define (build-and rs cache)
(let ([rs (do-simple-equiv (replace rs andR? andR-res null) re-index)])
(cond (cond
((negR? r) (negR-re r)) [(null? rs) (build-neg z cache)]
(else [(null? (cdr rs)) (car rs)]
(cache (cons 'neg (re-index r)) [(memq z rs) z]
(lambda () [else
(make-negR (not (re-nullable? r)) (get-index) r)))))) (cache (cons 'and (map re-index rs))
(λ ()
(make-andR (andmap re-nullable? rs) (get-index) rs)))])))
;; build-neg : re cache -> re
(define (build-neg r cache)
(cond
[(negR? r) (negR-re r)]
[else
(cache (cons 'neg (re-index r))
(λ ()
(make-negR (not (re-nullable? r)) (get-index) r)))]))
;; Tests for the build-functions
(test-block ((c (make-cache))
(isc is:integer-set-contents)
(r1 (build-char-set (is:make-range (char->integer #\1)) c))
(r2 (build-char-set (is:make-range (char->integer #\2)) c))
(r3 (build-char-set (is:make-range (char->integer #\3)) c))
(rc (build-concat r1 r2 c))
(rc2 (build-concat r2 r1 c))
(rr (build-repeat 0 +inf.0 rc c))
(ro (build-or `(,rr ,rc ,rr) c))
(ro2 (build-or `(,rc ,rr ,z) c))
(ro3 (build-or `(,rr ,rc) c))
(ro4 (build-or `(,(build-or `(,r1 ,r2) c)
,(build-or `(,r2 ,r3) c)) c))
(ra (build-and `(,rr ,rc ,rr) c))
(ra2 (build-and `(,rc ,rr) c))
(ra3 (build-and `(,rr ,rc) c))
(ra4 (build-and `(,(build-and `(,r3 ,r2) c)
,(build-and `(,r2 ,r1) c)) c))
(rn (build-neg z c))
(rn2 (build-neg r1 c)))
;; Tests for the build-functions ((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1))))
(test-block ((c (make-cache)) ((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2))))
(isc is:integer-set-contents) ((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3))))
(r1 (build-char-set (is:make-range (char->integer #\1)) c)) ((build-char-set (is:make-range) c) z)
(r2 (build-char-set (is:make-range (char->integer #\2)) c)) ((build-concat r1 e c) r1)
(r3 (build-char-set (is:make-range (char->integer #\3)) c)) ((build-concat e r1 c) r1)
(rc (build-concat r1 r2 c)) ((build-concat r1 z c) z)
(rc2 (build-concat r2 r1 c)) ((build-concat z r1 c) z)
(rr (build-repeat 0 +inf.0 rc c)) ((build-concat r1 r2 c) rc)
(ro (build-or `(,rr ,rc ,rr) c)) ((concatR-re1 rc) r1)
(ro2 (build-or `(,rc ,rr ,z) c)) ((concatR-re2 rc) r2)
(ro3 (build-or `(,rr ,rc) c)) ((concatR-re1 rc2) r2)
(ro4 (build-or `(,(build-or `(,r1 ,r2) c) ((concatR-re2 rc2) r1)
,(build-or `(,r2 ,r3) c)) c)) (ro ro2)
(ra (build-and `(,rr ,rc ,rr) c)) (ro ro3)
(ra2 (build-and `(,rc ,rr) c)) (ro4 (build-or `(,r1 ,r2 ,r3) c))
(ra3 (build-and `(,rr ,rc) c)) ((orR-res ro) (list rc rr))
(ra4 (build-and `(,(build-and `(,r3 ,r2) c) ((orR-res ro4) (list r1 r2 r3))
,(build-and `(,r2 ,r1) c)) c)) ((build-or null c) z)
(rn (build-neg z c)) ((build-or `(,r1 ,z) c) r1)
(rn2 (build-neg r1 c))) ((build-repeat 0 +inf.0 rc c) rr)
((build-repeat 0 1 z c) e)
((build-repeat 0 0 rc c) e)
((build-repeat 0 +inf.0 z c) e)
((build-repeat -1 +inf.0 z c) e)
((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c)
(build-repeat 0 +inf.0 rc c))
((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c)
(build-repeat 0 +inf.0 rc c))
((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c)
(build-repeat 20 +inf.0 rc c))
((build-repeat 1 1 rc c) rc)
((repeatR-re rr) rc)
(ra ra2)
(ra ra3)
(ra4 (build-and `(,r1 ,r2 ,r3) c))
((andR-res ra) (list rc rr))
((andR-res ra4) (list r1 r2 r3))
((build-and null c) (build-neg z c))
((build-and `(,r1 ,z) c) z)
((build-and `(,r1) c) r1)
((build-neg r1 c) (build-neg r1 c))
((build-neg (build-neg r1 c) c) r1)
((negR-re (build-neg r2 c)) r2)
((re-nullable? r1) #f)
((re-nullable? rc) #f)
((re-nullable? (build-concat rr rr c)) #t)
((re-nullable? rr) #t)
((re-nullable? (build-repeat 0 1 rc c)) #t)
((re-nullable? (build-repeat 1 2 rc c)) #f)
((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t)
((re-nullable? ro) #t)
((re-nullable? (build-or `(,r1 ,r2) c)) #f)
((re-nullable? (build-and `(,r1 ,e) c)) #f)
((re-nullable? (build-and `(,rr ,e) c)) #t)
((re-nullable? (build-neg r1 c)) #t)
((re-nullable? (build-neg rr c)) #f))
((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1)))) (test-block ((c (make-cache))
((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2)))) (isc is:integer-set-contents)
((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3)))) (r1 (->re #\1 c))
((build-char-set (is:make-range) c) z) (r2 (->re #\2 c))
((build-concat r1 e c) r1) (r3-5 (->re '(char-range #\3 #\5) c))
((build-concat e r1 c) r1) (r4 (build-or `(,r1 ,r2) c))
((build-concat r1 z c) z) (r5 (->re `(union ,r3-5 #\7) c))
((build-concat z r1 c) z) (r6 (->re #\6 c)))
((build-concat r1 r2 c) rc) ((flatten-res null orR? orR-res is:union c) null)
((concatR-re1 rc) r1) ((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c))))
((concatR-re2 rc) r2) (isc (is:make-range (char->integer #\1))))
((concatR-re1 rc2) r2) ((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c))))
((concatR-re2 rc2) r1) (isc (is:make-range (char->integer #\1) (char->integer #\2))))
(ro ro2) ((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1)
(ro ro3) orR? orR-res is:union c))))
(ro4 (build-or `(,r1 ,r2 ,r3) c)) (isc (is:make-range (char->integer #\1) (char->integer #\7))))
((orR-res ro) (list rc rr)) ((flatten-res `(,r1 ,r2) andR? andR-res (λ (x y)
((orR-res ro4) (list r1 r2 r3)) (let-values (((i _ __)
((build-or null c) z) (is:split x y)))
((build-or `(,r1 ,z) c) r1) i))
((build-repeat 0 +inf.0 rc c) rr) c)
((build-repeat 0 1 z c) e) (list z)))
((build-repeat 0 0 rc c) e)
((build-repeat 0 +inf.0 z c) e)
((build-repeat -1 +inf.0 z c) e)
((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c)
(build-repeat 0 +inf.0 rc c))
((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c)
(build-repeat 0 +inf.0 rc c))
((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c)
(build-repeat 20 +inf.0 rc c))
((build-repeat 1 1 rc c) rc)
((repeatR-re rr) rc)
(ra ra2)
(ra ra3)
(ra4 (build-and `(,r1 ,r2 ,r3) c))
((andR-res ra) (list rc rr))
((andR-res ra4) (list r1 r2 r3))
((build-and null c) (build-neg z c))
((build-and `(,r1 ,z) c) z)
((build-and `(,r1) c) r1)
((build-neg r1 c) (build-neg r1 c))
((build-neg (build-neg r1 c) c) r1)
((negR-re (build-neg r2 c)) r2)
((re-nullable? r1) #f)
((re-nullable? rc) #f)
((re-nullable? (build-concat rr rr c)) #t)
((re-nullable? rr) #t)
((re-nullable? (build-repeat 0 1 rc c)) #t)
((re-nullable? (build-repeat 1 2 rc c)) #f)
((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t)
((re-nullable? ro) #t)
((re-nullable? (build-or `(,r1 ,r2) c)) #f)
((re-nullable? (build-and `(,r1 ,e) c)) #f)
((re-nullable? (build-and `(,rr ,e) c)) #t)
((re-nullable? (build-neg r1 c)) #t)
((re-nullable? (build-neg rr c)) #f))
(test-block ((c (make-cache)) ;; ->re
(isc is:integer-set-contents) (test-block ((c (make-cache))
(r1 (->re #\1 c)) (isc is:integer-set-contents)
(r2 (->re #\2 c)) (r (->re #\a c))
(r3-5 (->re '(char-range #\3 #\5) c)) (rr (->re `(concatenation ,r ,r) c))
(r4 (build-or `(,r1 ,r2) c)) (rrr (->re `(concatenation ,r ,rr) c))
(r5 (->re `(union ,r3-5 #\7) c)) (rrr* (->re `(repetition 0 +inf.0 ,rrr) c)))
(r6 (->re #\6 c))) ((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a))))
((flatten-res null orR? orR-res is:union c) null) ((->re "" c) e)
((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c)))) ((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c))
(isc (is:make-range (char->integer #\1)))) ((->re r c) r)
((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c)))) ((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c))
(isc (is:make-range (char->integer #\1) (char->integer #\2)))) ((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c))
((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) ((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c))
orR? orR-res is:union c)))) ((->re `(repetition 0 1 ,rrr*) c) rrr*)
(isc (is:make-range (char->integer #\1) (char->integer #\7)))) ((->re `(union (union (char-range #\a #\c)
((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y) (char-complement (char-range #\000 #\110)
(let-values (((i _ __) (char-range #\112 ,(integer->char max-char-num))))
(is:split x y))) (union (repetition 0 +inf.0 #\2))) c)
i)) (build-or (list (build-char-set (is:union (is:make-range 73)
c) (is:make-range 97 99))
(list z))) c)
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
c))
((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c))
((->re `(union ,r) c) r)
((->re `(union) c) z)
((->re `(intersection (intersection #\111
(char-complement (char-range #\000 #\110)
(char-range #\112 ,(integer->char max-char-num))))
(intersection (repetition 0 +inf.0 #\2))) c)
(build-and (list (build-char-set (is:make-range 73) c)
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
c))
((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110)
(char-range #\112 ,(integer->char max-char-num))))
(intersection (repetition 0 +inf.0 #\2))) c)
z)
((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c))
((->re `(intersection ,r) c) r)
((->re `(intersection) c) (build-neg z c))
((->re `(complement ,r) c) (build-neg r c))
((->re `(concatenation) c) e)
((->re `(concatenation ,rrr*) c) rrr*)
(rr (build-concat r r c))
((->re `(concatenation ,r ,rr ,rrr) c)
(build-concat r (build-concat rr rrr c) c))
((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49)))
((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57)))
((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49)))
((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57)))
((->re `(char-range "9" "1") c) z)
((isc (char-setR-chars (->re `(char-complement) c)))
(isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c))))
((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c)))
(isc (is:make-range 0)))
)
;; ->re
(test-block ((c (make-cache))
(isc is:integer-set-contents)
(r (->re #\a c))
(rr (->re `(concatenation ,r ,r) c))
(rrr (->re `(concatenation ,r ,rr) c))
(rrr* (->re `(repetition 0 +inf.0 ,rrr) c)))
((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a))))
((->re "" c) e)
((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c))
((->re r c) r)
((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c))
((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c))
((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c))
((->re `(repetition 0 1 ,rrr*) c) rrr*)
((->re `(union (union (char-range #\a #\c)
(char-complement (char-range #\000 #\110)
(char-range #\112 ,(integer->char max-char-num))))
(union (repetition 0 +inf.0 #\2))) c)
(build-or (list (build-char-set (is:union (is:make-range 73)
(is:make-range 97 99))
c)
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
c))
((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c))
((->re `(union ,r) c) r)
((->re `(union) c) z)
((->re `(intersection (intersection #\111
(char-complement (char-range #\000 #\110)
(char-range #\112 ,(integer->char max-char-num))))
(intersection (repetition 0 +inf.0 #\2))) c)
(build-and (list (build-char-set (is:make-range 73) c)
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
c))
((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110)
(char-range #\112 ,(integer->char max-char-num))))
(intersection (repetition 0 +inf.0 #\2))) c)
z)
((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c))
((->re `(intersection ,r) c) r)
((->re `(intersection) c) (build-neg z c))
((->re `(complement ,r) c) (build-neg r c))
((->re `(concatenation) c) e)
((->re `(concatenation ,rrr*) c) rrr*)
(rr (build-concat r r c))
((->re `(concatenation ,r ,rr ,rrr) c)
(build-concat r (build-concat rr rrr c) c))
((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49)))
((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57)))
((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49)))
((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57)))
((->re `(char-range "9" "1") c) z)
((isc (char-setR-chars (->re `(char-complement) c)))
(isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c))))
((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c)))
(isc (is:make-range 0)))
)
)

@ -1,30 +1,25 @@
#lang racket #lang racket/base
(require "util.rkt" syntax/id-table)
(require "util.rkt"
syntax/id-table)
(provide parse) (provide parse)
(define (bad-args stx num) (define (bad-args stx num)
(raise-syntax-error (raise-syntax-error #f (format "incorrect number of arguments (should have ~a)" num) stx))
#f
(format "incorrect number of arguments (should have ~a)" num)
stx))
;; char-range-arg: syntax-object syntax-object -> nat ;; char-range-arg: syntax-object syntax-object -> nat
;; If c contains is a character or length 1 string, returns the integer ;; If c contains is a character or length 1 string, returns the integer
;; for the character. Otherwise raises a syntax error. ;; for the character. Otherwise raises a syntax error.
(define (char-range-arg stx containing-stx) (define (char-range-arg stx containing-stx)
(let ((c (syntax-e stx))) (define c (syntax-e stx))
(cond (cond
((char? c) (char->integer c)) [(char? c) (char->integer c)]
((and (string? c) (= (string-length c) 1)) [(and (string? c) (= (string-length c) 1))
(char->integer (string-ref c 0))) (char->integer (string-ref c 0))]
(else [else
(raise-syntax-error (raise-syntax-error
#f #f
"not a char or single-char string" "not a char or single-char string"
containing-stx stx))))) containing-stx stx)]))
(module+ test (module+ test
(check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1)) (check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1))
(check-equal? (char-range-arg #'"1" #'here) (char->integer #\1))) (check-equal? (char-range-arg #'"1" #'here) (char->integer #\1)))
@ -34,187 +29,157 @@
(define (disarm stx) (define (disarm stx)
(syntax-disarm stx orig-insp)) (syntax-disarm stx orig-insp))
;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt) ;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt)
;; checks for errors and generates the plain s-exp form for s ;; checks for errors and generates the plain s-exp form for s
;; Expands lex-abbrevs and applies lex-trans. ;; Expands lex-abbrevs and applies lex-trans.
(define (parse stx disappeared-uses) (define (parse stx disappeared-uses)
(let loop ([stx stx] (let loop ([stx stx]
[disappeared-uses disappeared-uses] [disappeared-uses disappeared-uses]
;; seen-lex-abbrevs: id-table ;; seen-lex-abbrevs: id-table
[seen-lex-abbrevs (make-immutable-free-id-table)]) [seen-lex-abbrevs (make-immutable-free-id-table)])
(let ([recur (lambda (s) (let ([recur (λ (s)
(loop (syntax-rearm s stx) (loop (syntax-rearm s stx)
disappeared-uses disappeared-uses
seen-lex-abbrevs))] seen-lex-abbrevs))]
[recur/abbrev (lambda (s id) [recur/abbrev (λ (s id)
(loop (syntax-rearm s stx) (loop (syntax-rearm s stx)
disappeared-uses disappeared-uses
(free-id-table-set seen-lex-abbrevs id id)))]) (free-id-table-set seen-lex-abbrevs id id)))])
(syntax-case (disarm stx) (repetition union intersection complement concatenation (syntax-case (disarm stx) (repetition union intersection complement concatenation
char-range char-complement) char-range char-complement)
(_ [_
(identifier? stx) (identifier? stx)
(let ((expansion (syntax-local-value stx (lambda () #f)))) (let ([expansion (syntax-local-value stx (λ () #f))])
(unless (lex-abbrev? expansion) (unless (lex-abbrev? expansion)
(raise-syntax-error 'regular-expression (raise-syntax-error 'regular-expression
"undefined abbreviation" "undefined abbreviation"
stx)) stx))
;; Check for cycles. ;; Check for cycles.
(when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f)) (when (free-id-table-ref seen-lex-abbrevs stx (λ () #f))
(raise-syntax-error 'regular-expression (raise-syntax-error 'regular-expression
"illegal lex-abbrev cycle detected" "illegal lex-abbrev cycle detected"
stx stx
#f #f
(list (free-id-table-ref seen-lex-abbrevs stx)))) (list (free-id-table-ref seen-lex-abbrevs stx))))
(set-box! disappeared-uses (cons stx (unbox disappeared-uses))) (set-box! disappeared-uses (cons stx (unbox disappeared-uses)))
(recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))) (recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))]
(_ [_
(or (char? (syntax-e stx)) (string? (syntax-e stx))) (or (char? (syntax-e stx)) (string? (syntax-e stx)))
(syntax-e stx)) (syntax-e stx)]
((repetition arg ...) [(repetition ARG ...)
(let ((arg-list (syntax->list (syntax (arg ...))))) (let ([arg-list (syntax->list #'(ARG ...))])
(unless (= 3 (length arg-list)) (unless (= 3 (length arg-list))
(bad-args stx 2)) (bad-args stx 2))
(let ((low (syntax-e (car arg-list))) (define low (syntax-e (car arg-list)))
(high (syntax-e (cadr arg-list))) (define high (syntax-e (cadr arg-list)))
(re (caddr arg-list))) (define re (caddr arg-list))
(unless (and (number? low) (exact? low) (integer? low) (>= low 0)) (unless (and (number? low) (exact? low) (integer? low) (>= low 0))
(raise-syntax-error #f (raise-syntax-error #f "not a non-negative exact integer" stx (car arg-list)))
"not a non-negative exact integer" (unless (or (and (number? high) (exact? high) (integer? high) (>= high 0))
stx (eqv? high +inf.0))
(car arg-list))) (raise-syntax-error #f "not a non-negative exact integer or +inf.0" stx (cadr arg-list)))
(unless (or (and (number? high) (exact? high) (integer? high) (>= high 0)) (unless (<= low high)
(eqv? high +inf.0)) (raise-syntax-error #f "the first argument is not less than or equal to the second argument" stx))
(raise-syntax-error #f `(repetition ,low ,high ,(recur re)))]
"not a non-negative exact integer or +inf.0" [(union RE ...)
stx `(union ,@(map recur (syntax->list #'(RE ...))))]
(cadr arg-list))) [(intersection RE ...)
(unless (<= low high) `(intersection ,@(map recur (syntax->list #'(RE ...))))]
(raise-syntax-error [(complement RE ...)
#f (let ([re-list (syntax->list #'(RE ...))])
"the first argument is not less than or equal to the second argument" (unless (= 1 (length re-list))
stx)) (bad-args stx 1))
`(repetition ,low ,high ,(recur re))))) `(complement ,(recur (car re-list))))]
((union re ...) [(concatenation RE ...)
`(union ,@(map recur (syntax->list (syntax (re ...)))))) `(concatenation ,@(map recur (syntax->list #'(RE ...))))]
((intersection re ...) [(char-range ARG ...)
`(intersection ,@(map recur (syntax->list (syntax (re ...)))))) (let ((arg-list (syntax->list #'(ARG ...))))
((complement re ...) (unless (= 2 (length arg-list))
(let ((re-list (syntax->list (syntax (re ...))))) (bad-args stx 2))
(unless (= 1 (length re-list)) (let ([i1 (char-range-arg (car arg-list) stx)]
(bad-args stx 1)) [i2 (char-range-arg (cadr arg-list) stx)])
`(complement ,(recur (car re-list))))) (if (<= i1 i2)
((concatenation re ...) `(char-range ,(integer->char i1) ,(integer->char i2))
`(concatenation ,@(map recur (syntax->list (syntax (re ...)))))) (raise-syntax-error #f "the first argument does not precede or equal second argument" stx))))]
((char-range arg ...) [(char-complement ARG ...)
(let ((arg-list (syntax->list (syntax (arg ...))))) (let ([arg-list (syntax->list #'(ARG ...))])
(unless (= 2 (length arg-list)) (unless (= 1 (length arg-list))
(bad-args stx 2)) (bad-args stx 1))
(let ((i1 (char-range-arg (car arg-list) stx)) (define parsed (recur (car arg-list)))
(i2 (char-range-arg (cadr arg-list) stx))) (unless (char-set? parsed)
(if (<= i1 i2) (raise-syntax-error #f "not a character set" stx (car arg-list)))
`(char-range ,(integer->char i1) ,(integer->char i2)) `(char-complement ,parsed))]
(raise-syntax-error ((OP form ...)
#f (identifier? #'OP)
"the first argument does not precede or equal second argument" (let* ([expansion (syntax-local-value #'OP (λ () #f))])
stx))))) (set-box! disappeared-uses (cons #'OP (unbox disappeared-uses)))
((char-complement arg ...) (cond
(let ((arg-list (syntax->list (syntax (arg ...))))) [(lex-trans? expansion)
(unless (= 1 (length arg-list)) (recur ((lex-trans-f expansion) (disarm stx)))]
(bad-args stx 1)) [expansion
(let ((parsed (recur (car arg-list)))) (raise-syntax-error 'regular-expression "not a lex-trans" stx)]
(unless (char-set? parsed) [else
(raise-syntax-error #f (raise-syntax-error 'regular-expression "undefined operator" stx)])))
"not a character set" [_ (raise-syntax-error 'regular-expression "not a char, string, identifier, or (op args ...)" stx)]))))
stx
(car arg-list)))
`(char-complement ,parsed))))
((op form ...)
(identifier? (syntax op))
(let* ((o (syntax op))
(expansion (syntax-local-value o (lambda () #f))))
(set-box! disappeared-uses (cons o (unbox disappeared-uses)))
(cond
((lex-trans? expansion)
(recur ((lex-trans-f expansion) (disarm stx))))
(expansion
(raise-syntax-error 'regular-expression
"not a lex-trans"
stx))
(else
(raise-syntax-error 'regular-expression
"undefined operator"
stx)))))
(_
(raise-syntax-error
'regular-expression
"not a char, string, identifier, or (op args ...)"
stx))))))
;; char-set? : s-re -> bool ;; char-set? : s-re -> bool
;; A char-set is an re that matches only strings of length 1. ;; A char-set is an re that matches only strings of length 1.
;; char-set? is conservative. ;; char-set? is conservative.
(define (char-set? s-re) (define (char-set? s-re)
(cond (cond
((char? s-re) #t) [(char? s-re)]
((string? s-re) (= (string-length s-re) 1)) [(string? s-re) (= (string-length s-re) 1)]
((list? s-re) [(list? s-re) (case (car s-re)
(let ((op (car s-re))) [(union intersection) (andmap char-set? (cdr s-re))]
(case op [(char-range char-complement) #t]
((union intersection) (andmap char-set? (cdr s-re))) [(repetition) (and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re)))]
((char-range char-complement) #t) [(concatenation) (and (= 2 (length s-re)) (char-set? (cadr s-re)))]
((repetition) (else #f))]
(and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re)))) [else #f]))
((concatenation)
(and (= 2 (length s-re)) (char-set? (cadr s-re))))
(else #f))))
(else #f)))
(module+ test (module+ test
(require rackunit)) (require rackunit)
(module+ test (check-equal? (char-set? #\a) #t)
(check-equal? (char-set? #\a) #t) (check-equal? (char-set? "12") #f)
(check-equal? (char-set? "12") #f) (check-equal? (char-set? "1") #t)
(check-equal? (char-set? "1") #t) (check-equal? (char-set? '(repetition 1 2 #\1)) #f)
(check-equal? (char-set? '(repetition 1 2 #\1)) #f) (check-equal? (char-set? '(repetition 1 1 "12")) #f)
(check-equal? (char-set? '(repetition 1 1 "12")) #f) (check-equal? (char-set? '(repetition 1 1 "1")) #t)
(check-equal? (char-set? '(repetition 1 1 "1")) #t) (check-equal? (char-set? '(union "1" "2" "3")) #t)
(check-equal? (char-set? '(union "1" "2" "3")) #t) (check-equal? (char-set? '(union "1" "" "3")) #f)
(check-equal? (char-set? '(union "1" "" "3")) #f) (check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t)
(check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t) (check-equal? (char-set? '(intersection "1" "")) #f)
(check-equal? (char-set? '(intersection "1" "")) #f) (check-equal? (char-set? '(complement "1")) #f)
(check-equal? (char-set? '(complement "1")) #f) (check-equal? (char-set? '(concatenation "1" "2")) #f)
(check-equal? (char-set? '(concatenation "1" "2")) #f) (check-equal? (char-set? '(concatenation "" "2")) #f)
(check-equal? (char-set? '(concatenation "" "2")) #f) (check-equal? (char-set? '(concatenation "1")) #t)
(check-equal? (char-set? '(concatenation "1")) #t) (check-equal? (char-set? '(concatenation "12")) #f)
(check-equal? (char-set? '(concatenation "12")) #f) (check-equal? (char-set? '(char-range #\1 #\2)) #t)
(check-equal? (char-set? '(char-range #\1 #\2)) #t) (check-equal? (char-set? '(char-complement #\1)) #t))
(check-equal? (char-set? '(char-complement #\1)) #t))
;; yikes... these test cases all have the wrong arity, now. ;; yikes... these test cases all have the wrong arity, now.
;; and by "now", I mean it's been broken since before we ;; and by "now", I mean it's been broken since before we
;; moved to git. ;; moved to git.
(module+ test (module+ test
(check-equal? (parse #'#\a null) #\a) (check-equal? (parse #'#\a null) #\a)
(check-equal? (parse #'"1" null) "1") (check-equal? (parse #'"1" null) "1")
(check-equal? (parse #'(repetition 1 1 #\1) null) (check-equal? (parse #'(repetition 1 1 #\1) null)
'(repetition 1 1 #\1)) '(repetition 1 1 #\1))
(check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1)) (check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1))
(check-equal? (parse #'(union #\1 (union "2") (union)) null) (check-equal? (parse #'(union #\1 (union "2") (union)) null)
'(union #\1 (union "2") (union))) '(union #\1 (union "2") (union)))
(check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)) (check-equal? (parse #'(intersection #\1 (intersection "2") (intersection))
null) null)
'(intersection #\1 (intersection "2") (intersection))) '(intersection #\1 (intersection "2") (intersection)))
(check-equal? (parse #'(complement (union #\1 #\2)) (check-equal? (parse #'(complement (union #\1 #\2))
null) null)
'(complement (union #\1 #\2))) '(complement (union #\1 #\2)))
(check-equal? (parse #'(concatenation "1" "2" (concatenation)) null) (check-equal? (parse #'(concatenation "1" "2" (concatenation)) null)
'(concatenation "1" "2" (concatenation))) '(concatenation "1" "2" (concatenation)))
(check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1)) (check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1))
(check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1)) (check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1))
(check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3)) (check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3))
(check-equal? (parse #'(char-complement (union "1" "2")) null) (check-equal? (parse #'(char-complement (union "1" "2")) null)
'(char-complement (union "1" "2")))) '(char-complement (union "1" "2"))))
; )

@ -1,9 +1,7 @@
(module token-syntax mzscheme #lang racket/base
(provide make-terminals-def terminals-def-t terminals-def?
make-e-terminals-def e-terminals-def-t e-terminals-def?)
;; The things needed at compile time to handle definition of tokens ;; The things needed at compile time to handle definition of tokens
(define-struct terminals-def (t))
(provide make-terminals-def terminals-def-t terminals-def? (define-struct e-terminals-def (t))
make-e-terminals-def e-terminals-def-t e-terminals-def?)
(define-struct terminals-def (t))
(define-struct e-terminals-def (t))
)

@ -1,92 +1,80 @@
(module token mzscheme #lang racket/base
(require (for-syntax racket/base "token-syntax.rkt"))
(require-for-syntax "token-syntax.rkt") ;; Defining tokens
;; Defining tokens (provide define-tokens define-empty-tokens make-token token?
(protect-out (rename-out [token-name real-token-name]))
(protect-out (rename-out [token-value real-token-value]))
(rename-out [token-name* token-name][token-value* token-value])
(struct-out position)
(struct-out position-token)
(struct-out srcloc-token))
(provide define-tokens define-empty-tokens make-token token?
(protect (rename token-name real-token-name))
(protect (rename token-value real-token-value))
(rename token-name* token-name)
(rename token-value* token-value)
(struct position (offset line col))
(struct position-token (token start-pos end-pos))
(struct srcloc-token (token srcloc)))
;; A token is either
;; - symbol
;; - (make-token symbol any)
(define-struct token (name value) #:inspector (make-inspector))
;; A token is either ;; token-name*: token -> symbol
;; - symbol (define (token-name* t)
;; - (make-token symbol any) (cond
(define-struct token (name value) (make-inspector)) [(symbol? t) t]
[(token? t) (token-name t)]
[else (raise-type-error 'token-name "symbol or struct:token" 0 t)]))
;; token-name*: token -> symbol ;; token-value*: token -> any
(define (token-name* t) (define (token-value* t)
(cond (cond
((symbol? t) t) [(symbol? t) #f]
((token? t) (token-name t)) [(token? t) (token-value t)]
(else (raise-type-error [else (raise-type-error 'token-value "symbol or struct:token" 0 t)]))
'token-name
"symbol or struct:token"
0
t))))
;; token-value*: token -> any (define-for-syntax (make-ctor-name n)
(define (token-value* t) (datum->syntax n
(cond (string->symbol (format "token-~a" (syntax-e n)))
((symbol? t) #f) n
((token? t) (token-value t)) n))
(else (raise-type-error
'token-value
"symbol or struct:token"
0
t))))
(define-for-syntax (make-ctor-name n) (define-for-syntax ((make-define-tokens empty?) stx)
(datum->syntax-object n (syntax-case stx ()
(string->symbol (format "token-~a" (syntax-e n))) [(_ NAME (TOKEN ...))
n (andmap identifier? (syntax->list #'(TOKEN ...)))
n)) (with-syntax (((marked-token ...)
(map values #;(make-syntax-introducer)
(syntax->list #'(TOKEN ...)))))
(quasisyntax/loc stx
(begin
(define-syntax NAME
#,(if empty?
#'(make-e-terminals-def (quote-syntax (marked-token ...)))
#'(make-terminals-def (quote-syntax (marked-token ...)))))
#,@(map
(λ (n)
(when (eq? (syntax-e n) 'error)
(raise-syntax-error
#f
"Cannot define a token named error."
stx))
(if empty?
#`(define (#,(make-ctor-name n))
'#,n)
#`(define (#,(make-ctor-name n) x)
(make-token '#,n x))))
(syntax->list #'(TOKEN ...)))
#;(define marked-token #f) #;...)))]
[(_ ...)
(raise-syntax-error #f
"must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))"
stx)]))
(define-for-syntax (make-define-tokens empty?) (define-syntax define-tokens (make-define-tokens #f))
(lambda (stx) (define-syntax define-empty-tokens (make-define-tokens #t))
(syntax-case stx ()
((_ name (token ...))
(andmap identifier? (syntax->list (syntax (token ...))))
(with-syntax (((marked-token ...)
(map values #;(make-syntax-introducer)
(syntax->list (syntax (token ...))))))
(quasisyntax/loc stx
(begin
(define-syntax name
#,(if empty?
#'(make-e-terminals-def (quote-syntax (marked-token ...)))
#'(make-terminals-def (quote-syntax (marked-token ...)))))
#,@(map
(lambda (n)
(when (eq? (syntax-e n) 'error)
(raise-syntax-error
#f
"Cannot define a token named error."
stx))
(if empty?
#`(define (#,(make-ctor-name n))
'#,n)
#`(define (#,(make-ctor-name n) x)
(make-token '#,n x))))
(syntax->list (syntax (token ...))))
#;(define marked-token #f) #;...))))
((_ ...)
(raise-syntax-error
#f
"must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))"
stx)))))
(define-syntax define-tokens (make-define-tokens #f)) (define-struct position (offset line col) #:inspector #f)
(define-syntax define-empty-tokens (make-define-tokens #t)) (define-struct position-token (token start-pos end-pos) #:inspector #f)
(define-struct position (offset line col) #f) (define-struct srcloc-token (token srcloc) #:inspector #f)
(define-struct position-token (token start-pos end-pos) #f)
(define-struct srcloc-token (token srcloc) #f)
)

@ -1,6 +1,5 @@
#lang racket #lang racket/base
(require racket/promise "util.rkt")
(require "util.rkt")
(provide (all-defined-out)) (provide (all-defined-out))
@ -10,36 +9,33 @@
;; get-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat)) ;; get-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat))
(define (get-chars-for char-x? mapped-chars) (define (get-chars-for char-x? mapped-chars)
(cond (cond
((null? mapped-chars) null) [(null? mapped-chars) null]
(else [else
(let* ((range (car mapped-chars)) (define range (car mapped-chars))
(low (car range)) (define low (car range))
(high (cadr range)) (define high (cadr range))
(x (char-x? low))) (define x (char-x? low))
(cond (cond
((caddr range) [(caddr range)
(if x (if x
(cons (cons low high) (cons (cons low high) (get-chars-for char-x? (cdr mapped-chars)))
(get-chars-for char-x? (cdr mapped-chars))) (get-chars-for char-x? (cdr mapped-chars)))]
(get-chars-for char-x? (cdr mapped-chars)))) [else
(else (let loop ([range-start low]
(let loop ((range-start low) [i (car range)]
(i (car range)) [parity x])
(parity x)) (cond
(cond [(> i high)
((> i high) (if parity
(if parity (cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars)))
(cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars))) (get-chars-for char-x? (cdr mapped-chars)))]
(get-chars-for char-x? (cdr mapped-chars)))) [(eq? parity (char-x? i))
((eq? parity (char-x? i)) (loop range-start (add1 i) parity)]
(loop range-start (add1 i) parity)) [parity (cons (cons range-start (sub1 i)) (loop i (add1 i) #f))]
(parity [else (loop i (add1 i) #t)]))])]))
(cons (cons range-start (sub1 i)) (loop i (add1 i) #f)))
(else
(loop i (add1 i) #t))))))))))
(define (compute-ranges x?) (define (compute-ranges x?)
(delay (get-chars-for (lambda (x) (x? (integer->char x))) mapped-chars))) (delay (get-chars-for (λ (x) (x? (integer->char x))) mapped-chars)))
(define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325 (define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325
(define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405 (define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405
@ -61,7 +57,7 @@
(check-equal? (get-chars-for odd? '()) '()) (check-equal? (get-chars-for odd? '()) '())
(check-equal? (get-chars-for odd? '((1 4 #f) (8 13 #f))) (check-equal? (get-chars-for odd? '((1 4 #f) (8 13 #f)))
'((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13))) '((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13)))
(check-equal? (get-chars-for (lambda (x) (check-equal? (get-chars-for (λ (x)
(odd? (quotient x 10))) (odd? (quotient x 10)))
'((1 5 #t) (17 19 #t) (21 51 #f))) '((1 5 #t) (17 19 #t) (21 51 #f)))
'((17 . 19) (30 . 39) (50 . 51)))) '((17 . 19) (30 . 39) (50 . 51))))

@ -1,4 +1,5 @@
#lang racket #lang racket/base
(require (for-syntax racket/base))
(provide (all-defined-out)) (provide (all-defined-out))
@ -10,18 +11,18 @@
(module+ test (module+ test
(require rackunit)) (require rackunit))
(define-syntax (test-block stx)
(syntax-case stx ()
[(_ defs (code right-ans) ...)
#'(module+ test
(require rackunit)
(let* defs
(let ([real-ans code])
(check-equal? real-ans right-ans)) ...))]))
#;(define-syntax test-block #;(define-syntax test-block
(syntax-rules () (syntax-rules ()
((_ defs (code right-ans) ...) ((_ x ...) (void))))
(let* defs
(let ((real-ans code))
(unless (equal? real-ans right-ans)
(printf "Test failed: ~e gave ~e. Expected ~e\n"
'code real-ans 'right-ans))) ...))))
(define-syntax test-block
(syntax-rules ()
((_ x ...) (void))))
;; A cache is (X ( -> Y) -> Y) ;; A cache is (X ( -> Y) -> Y)
@ -31,23 +32,22 @@
;; returned. ;; returned.
;; Xs are compared with equal? ;; Xs are compared with equal?
(define (make-cache) (define (make-cache)
(let ((table (make-hash))) (let ([table (make-hash)])
(lambda (key build) (λ (key build)
(hash-ref table key (hash-ref table key (λ ()
(lambda () (let ([new (build)])
(let ((new (build))) (hash-set! table key new)
(hash-set! table key new) new))))))
new))))))
(module+ test (module+ test
(define cache (make-cache)) (define cache (make-cache))
(check-equal? (cache '(s 1 2) (lambda () 9)) 9) (check-equal? (cache '(s 1 2) (λ () 9)) 9)
(check-equal? (cache '(s 2 1) (lambda () 8)) 8) (check-equal? (cache '(s 2 1) (λ () 8)) 8)
(check-equal? (cache '(s 1 2) (lambda () 1)) 9) (check-equal? (cache '(s 1 2) (λ () 1)) 9)
(check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10)))
(lambda () 22)) 22) (λ () 22)) 22)
(check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10)))
(lambda () 1)) 22)) (λ () 1)) 22))
@ -55,8 +55,8 @@
;; makes a function that returns a higher number by 1, each time ;; makes a function that returns a higher number by 1, each time
;; it is called. ;; it is called.
(define (make-counter) (define (make-counter)
(let ((counter 0)) (let ([counter 0])
(lambda () (λ ()
(begin0 (begin0
counter counter
(set! counter (add1 counter)))))) (set! counter (add1 counter))))))
@ -76,33 +76,33 @@
;; previous entry. l must be grouped by indexes. ;; previous entry. l must be grouped by indexes.
(define (remove-dups l index acc) (define (remove-dups l index acc)
(cond (cond
((null? l) (reverse acc)) [(null? l) (reverse acc)]
((null? acc) (remove-dups (cdr l) index (cons (car l) acc))) [(null? acc) (remove-dups (cdr l) index (cons (car l) acc))]
((= (index (car acc)) (index (car l))) [(= (index (car acc)) (index (car l)))
(remove-dups (cdr l) index acc)) (remove-dups (cdr l) index acc)]
(else [else
(remove-dups (cdr l) index (cons (car l) acc))))) (remove-dups (cdr l) index (cons (car l) acc))]))
(module+ test (module+ test
(check-equal? (remove-dups '((1 2) (2 2) (1 3) (1 4) (check-equal? (remove-dups '((1 2) (2 2) (1 3) (1 4)
(100 4) (0 5)) cadr null) (100 4) (0 5)) cadr null)
'((1 2) (1 3) (1 4) (0 5))) '((1 2) (1 3) (1 4) (0 5)))
(check-equal? (remove-dups null error null) null)) (check-equal? (remove-dups null error null) null))
;; do-simple-equiv : (list-of X) (X -> nat) -> (list-of X) ;; do-simple-equiv : (list-of X) (X -> nat) -> (list-of X)
;; Sorts l according to index and removes the entries with duplicate ;; Sorts l according to index and removes the entries with duplicate
;; indexes. ;; indexes.
(define (do-simple-equiv l index) (define (do-simple-equiv l index)
(let ((ordered (sort l (lambda (a b) (< (index a) (index b)))))) (define ordered (sort l (λ (a b) (< (index a) (index b)))))
(remove-dups ordered index null))) (remove-dups ordered index null))
(module+ test (module+ test
(check-equal? (do-simple-equiv '((2 2) (1 4) (1 2) (check-equal? (do-simple-equiv '((2 2) (1 4) (1 2)
(100 4) (1 3) (0 5)) (100 4) (1 3) (0 5))
cadr) cadr)
'((2 2) (1 3) (1 4) (0 5))) '((2 2) (1 3) (1 4) (0 5)))
(check-equal? (do-simple-equiv null error) null)) (check-equal? (do-simple-equiv null error) null))
;; replace : (list-of X) (X -> bool) (X -> (list-of X)) (list-of X) -> ;; replace : (list-of X) (X -> bool) (X -> (list-of X)) (list-of X) ->
;; (list-of X) ;; (list-of X)
@ -110,16 +110,16 @@
;; list. ;; list.
(define (replace l pred? get acc) (define (replace l pred? get acc)
(cond (cond
((null? l) acc) [(null? l) acc]
((pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc))) [(pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc))]
(else (replace (cdr l) pred? get (cons (car l) acc))))) [else (replace (cdr l) pred? get (cons (car l) acc))]))
(module+ test (module+ test
(check-equal? (replace null void (lambda () (list 1)) null) null) (check-equal? (replace null void (λ () (list 1)) null) null)
(check-equal? (replace '(1 2 3 4 3 5) (check-equal? (replace '(1 2 3 4 3 5)
(lambda (x) (= x 3)) (λ (x) (= x 3))
(lambda (x) (list 1 2 3)) (λ (x) (list 1 2 3))
null) null)
'(5 1 2 3 4 1 2 3 2 1))) '(5 1 2 3 4 1 2 3 2 1)))

@ -1,280 +1,250 @@
#lang racket/base
;; Constructs to create and access grammars, the internal ;; Constructs to create and access grammars, the internal
;; representation of the input to the parser generator. ;; representation of the input to the parser generator.
(module grammar mzscheme (require racket/class
(except-in racket/list remove-duplicates)
(require mzlib/class "yacc-helper.rkt"
mzlib/list racket/contract)
"yacc-helper.rkt"
racket/contract) ;; Each production has a unique index 0 <= index <= number of productions
(define-struct prod (lhs rhs index prec action) #:inspector (make-inspector) #:mutable)
;; Each production has a unique index 0 <= index <= number of productions
(define-struct prod (lhs rhs index prec action) (make-inspector)) ;; The dot-pos field is the index of the element in the rhs
;; of prod that the dot immediately precedes.
;; The dot-pos field is the index of the element in the rhs ;; Thus 0 <= dot-pos <= (vector-length rhs).
;; of prod that the dot immediately precedes. (define-struct item (prod dot-pos) #:inspector (make-inspector))
;; Thus 0 <= dot-pos <= (vector-length rhs).
(define-struct item (prod dot-pos) (make-inspector)) ;; gram-sym = (union term? non-term?)
;; Each term has a unique index 0 <= index < number of terms
;; gram-sym = (union term? non-term?) ;; Each non-term has a unique index 0 <= index < number of non-terms
;; Each term has a unique index 0 <= index < number of terms (define-struct term (sym index prec) #:inspector (make-inspector) #:mutable)
;; Each non-term has a unique index 0 <= index < number of non-terms (define-struct non-term (sym index) #:inspector (make-inspector) #:mutable)
(define-struct term (sym index prec) (make-inspector))
(define-struct non-term (sym index) (make-inspector)) ;; a precedence declaration.
(define-struct prec (num assoc) #:inspector (make-inspector))
;; a precedence declaration.
(define-struct prec (num assoc) (make-inspector)) (provide/contract
[make-item (prod? (or/c #f natural-number/c) . -> . item?)]
(provide/contract [make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?)]
(make-item (prod? (or/c #f natural-number/c) . -> . item?)) [make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?)]
(make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?)) [make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?)]
(make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?)) [make-prod (non-term? (vectorof (or/c non-term? term?))
(make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?)) (or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?)])
(make-prod (non-term? (vectorof (or/c non-term? term?))
(or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?))) (provide
;; Things that work on items
(provide start-item? item-prod item->string
sym-at-dot move-dot-right item<? item-dot-pos
;; Things that work on items ;; Things that operate on grammar symbols
start-item? item-prod item->string gram-sym-symbol gram-sym-index term-prec gram-sym->string
sym-at-dot move-dot-right item<? item-dot-pos non-term? term? non-term<? term<?
term-list->bit-vector term-index non-term-index
;; Things that operate on grammar symbols
gram-sym-symbol gram-sym-index term-prec gram-sym->string ;; Things that work on precs
non-term? term? non-term<? term<? prec-num prec-assoc
term-list->bit-vector term-index non-term-index
grammar%
;; Things that work on precs
prec-num prec-assoc ;; Things that work on productions
prod-index prod-prec prod-rhs prod-lhs prod-action)
grammar%
;; Things that work on productions ;;---------------------- LR items --------------------------
prod-index prod-prec prod-rhs prod-lhs prod-action)
;; item<?: LR-item * LR-item -> bool
;; Lexicographic comparison on two items.
;;---------------------- LR items -------------------------- (define (item<? i1 i2)
(define p1 (prod-index (item-prod i1)))
;; item<?: LR-item * LR-item -> bool (define p2 (prod-index (item-prod i2)))
;; Lexicographic comparison on two items. (or (< p1 p2)
(define (item<? i1 i2) (and (= p1 p2)
(let ((p1 (prod-index (item-prod i1))) (< (item-dot-pos i1) (item-dot-pos i2)))))
(p2 (prod-index (item-prod i2))))
(or (< p1 p2) ;; start-item?: LR-item -> bool
(and (= p1 p2) ;; The start production always has index 0
(let ((d1 (item-dot-pos i1)) (define (start-item? i)
(d2 (item-dot-pos i2))) (zero? (non-term-index (prod-lhs (item-prod i)))))
(< d1 d2))))))
;; start-item?: LR-item -> bool ;; move-dot-right: LR-item -> LR-item | #f
;; The start production always has index 0 ;; moves the dot to the right in the item, unless it is at its
(define (start-item? i) ;; rightmost, then it returns false
(= 0 (non-term-index (prod-lhs (item-prod i))))) (define (move-dot-right i)
(cond
[(= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f]
;; move-dot-right: LR-item -> LR-item | #f [else (make-item (item-prod i)
;; moves the dot to the right in the item, unless it is at its (add1 (item-dot-pos i)))]))
;; rightmost, then it returns false
(define (move-dot-right i) ;; sym-at-dot: LR-item -> gram-sym | #f
;; returns the symbol after the dot in the item or #f if there is none
(define (sym-at-dot i)
(define dp (item-dot-pos i))
(define rhs (prod-rhs (item-prod i)))
(cond
[(= dp (vector-length rhs)) #f]
[else (vector-ref rhs dp)]))
;; print-item: LR-item ->
(define (item->string it)
(define print-sym (λ (i)
(let ((gs (vector-ref (prod-rhs (item-prod it)) i)))
(cond
((term? gs) (format "~a " (term-sym gs)))
(else (format "~a " (non-term-sym gs)))))))
(string-append
(format "~a -> " (non-term-sym (prod-lhs (item-prod it))))
(let loop ((i 0))
(cond
[(= i (vector-length (prod-rhs (item-prod it))))
(if (= i (item-dot-pos it))
". "
"")]
[(= i (item-dot-pos it))
(string-append ". " (print-sym i) (loop (add1 i)))]
[else (string-append (print-sym i) (loop (add1 i)))]))))
;; --------------------- Grammar Symbols --------------------------
(define (non-term<? nt1 nt2)
(< (non-term-index nt1) (non-term-index nt2)))
(define (term<? nt1 nt2)
(< (term-index nt1) (term-index nt2)))
(define (gram-sym-index gs)
(if (term? gs)
(term-index gs)
(non-term-index gs)))
(define (gram-sym-symbol gs)
(if (term? gs)
(term-sym gs)
(non-term-sym gs)))
(define (gram-sym->string gs)
(symbol->string (gram-sym-symbol gs)))
;; term-list->bit-vector: term list -> int
;; Creates a number where the nth bit is 1 if the term with index n is in
;; the list, and whose nth bit is 0 otherwise
(define (term-list->bit-vector terms)
(if (null? terms)
0
(bitwise-ior (arithmetic-shift 1 (term-index (car terms)))
(term-list->bit-vector (cdr terms)))))
;; ------------------------- Grammar ------------------------------
(define grammar%
(class object%
(super-instantiate ())
;; prods: production list list
;; where there is one production list per non-term
(init prods)
;; init-prods: production list
;; The productions parsing can start from
;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable
(init-field init-prods terms non-terms end-terms)
;; list of all productions
(define all-prods (apply append prods))
(define num-prods (length all-prods))
(define num-terms (length terms))
(define num-non-terms (length non-terms))
(for ([(nt count) (in-indexed non-terms)])
(set-non-term-index! nt count))
(for ([(t count) (in-indexed terms)])
(set-term-index! t count))
(for ([(prod count) (in-indexed all-prods)])
(set-prod-index! prod count))
;; indexed by the index of the non-term - contains the list of productions for that non-term
(define nt->prods
(let ((v (make-vector (length prods) #f)))
(for ([prods (in-list prods)])
(vector-set! v (non-term-index (prod-lhs (car prods))) prods))
v))
(define nullable-non-terms
(nullable all-prods num-non-terms))
(define/public (get-num-terms) num-terms)
(define/public (get-num-non-terms) num-non-terms)
(define/public (get-prods-for-non-term nt)
(vector-ref nt->prods (non-term-index nt)))
(define/public (get-prods) all-prods)
(define/public (get-init-prods) init-prods)
(define/public (get-terms) terms)
(define/public (get-non-terms) non-terms)
(define/public (get-num-prods) num-prods)
(define/public (get-end-terms) end-terms)
(define/public (nullable-non-term? nt)
(vector-ref nullable-non-terms (non-term-index nt)))
(define/public (nullable-after-dot? item)
(define rhs (prod-rhs (item-prod item)))
(define prod-length (vector-length rhs))
(let loop ((i (item-dot-pos item)))
(cond
[(< i prod-length)
(and (non-term? (vector-ref rhs i))
(nullable-non-term? (vector-ref rhs i))
(loop (add1 i)))]
[(= i prod-length)])))
(define/public (nullable-non-term-thunk)
(λ (nt) (nullable-non-term? nt)))
(define/public (nullable-after-dot?-thunk)
(λ (item) (nullable-after-dot? item)))))
;; nullable: production list * int -> non-term set
;; determines which non-terminals can derive epsilon
(define (nullable prods num-nts)
(define nullable (make-vector num-nts #f))
(define added #f)
;; possible-nullable: producion list -> production list
;; Removes all productions that have a terminal
(define (possible-nullable prods)
(for/list ([prod (in-list prods)]
#:when (vector-andmap non-term? (prod-rhs prod)))
prod))
;; set-nullables: production list -> production list
;; makes one pass through the productions, adding the ones
;; known to be nullable now to nullable and returning a list
;; of productions that we don't know about yet.
(define (set-nullables prods)
(cond (cond
((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) [(null? prods) null]
(else (make-item (item-prod i) [(vector-ref nullable (gram-sym-index (prod-lhs (car prods))))
(add1 (item-dot-pos i)))))) (set-nullables (cdr prods))]
[(vector-andmap (λ (nt) (vector-ref nullable (gram-sym-index nt))) (prod-rhs (car prods)))
;; sym-at-dot: LR-item -> gram-sym | #f (vector-set! nullable (gram-sym-index (prod-lhs (car prods))) #t)
;; returns the symbol after the dot in the item or #f if there is none (set! added #t)
(define (sym-at-dot i) (set-nullables (cdr prods))]
(let ((dp (item-dot-pos i)) [else (cons (car prods) (set-nullables (cdr prods)))]))
(rhs (prod-rhs (item-prod i)))) (let loop ((P (possible-nullable prods)))
(cond
((= dp (vector-length rhs)) #f)
(else (vector-ref rhs dp)))))
;; print-item: LR-item ->
(define (item->string it)
(let ((print-sym (lambda (i)
(let ((gs (vector-ref (prod-rhs (item-prod it)) i)))
(cond
((term? gs) (format "~a " (term-sym gs)))
(else (format "~a " (non-term-sym gs))))))))
(string-append
(format "~a -> " (non-term-sym (prod-lhs (item-prod it))))
(let loop ((i 0))
(cond
((= i (vector-length (prod-rhs (item-prod it))))
(if (= i (item-dot-pos it))
". "
""))
((= i (item-dot-pos it))
(string-append ". " (print-sym i) (loop (add1 i))))
(else (string-append (print-sym i) (loop (add1 i)))))))))
;; --------------------- Grammar Symbols --------------------------
(define (non-term<? nt1 nt2)
(< (non-term-index nt1) (non-term-index nt2)))
(define (term<? nt1 nt2)
(< (term-index nt1) (term-index nt2)))
(define (gram-sym-index gs)
(cond (cond
((term? gs) (term-index gs)) [(null? P) nullable]
(else (non-term-index gs)))) [else
(set! added #f)
(define new-P (set-nullables P))
(if added
(loop new-P)
nullable)])))
(define (gram-sym-symbol gs)
(cond
((term? gs) (term-sym gs))
(else (non-term-sym gs))))
(define (gram-sym->string gs)
(symbol->string (gram-sym-symbol gs)))
;; term-list->bit-vector: term list -> int
;; Creates a number where the nth bit is 1 if the term with index n is in
;; the list, and whose nth bit is 0 otherwise
(define (term-list->bit-vector terms)
(cond
((null? terms) 0)
(else
(bitwise-ior (arithmetic-shift 1 (term-index (car terms))) (term-list->bit-vector (cdr terms))))))
;; ------------------------- Grammar ------------------------------
(define grammar%
(class object%
(super-instantiate ())
;; prods: production list list
;; where there is one production list per non-term
(init prods)
;; init-prods: production list
;; The productions parsing can start from
;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable
(init-field init-prods terms non-terms end-terms)
;; list of all productions
(define all-prods (apply append prods))
(define num-prods (length all-prods))
(define num-terms (length terms))
(define num-non-terms (length non-terms))
(let ((count 0))
(for-each
(lambda (nt)
(set-non-term-index! nt count)
(set! count (add1 count)))
non-terms))
(let ((count 0))
(for-each
(lambda (t)
(set-term-index! t count)
(set! count (add1 count)))
terms))
(let ((count 0))
(for-each
(lambda (prod)
(set-prod-index! prod count)
(set! count (add1 count)))
all-prods))
;; indexed by the index of the non-term - contains the list of productions for that non-term
(define nt->prods
(let ((v (make-vector (length prods) #f)))
(for-each (lambda (prods)
(vector-set! v (non-term-index (prod-lhs (car prods))) prods))
prods)
v))
(define nullable-non-terms
(nullable all-prods num-non-terms))
(define/public (get-num-terms) num-terms)
(define/public (get-num-non-terms) num-non-terms)
(define/public (get-prods-for-non-term nt)
(vector-ref nt->prods (non-term-index nt)))
(define/public (get-prods) all-prods)
(define/public (get-init-prods) init-prods)
(define/public (get-terms) terms)
(define/public (get-non-terms) non-terms)
(define/public (get-num-prods) num-prods)
(define/public (get-end-terms) end-terms)
(define/public (nullable-non-term? nt)
(vector-ref nullable-non-terms (non-term-index nt)))
(define/public (nullable-after-dot? item)
(let* ((rhs (prod-rhs (item-prod item)))
(prod-length (vector-length rhs)))
(let loop ((i (item-dot-pos item)))
(cond
((< i prod-length)
(if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i)))
(loop (add1 i))
#f))
((= i prod-length) #t)))))
(define/public (nullable-non-term-thunk)
(lambda (nt)
(nullable-non-term? nt)))
(define/public (nullable-after-dot?-thunk)
(lambda (item)
(nullable-after-dot? item)))))
;; nullable: production list * int -> non-term set
;; determines which non-terminals can derive epsilon
(define (nullable prods num-nts)
(letrec ((nullable (make-vector num-nts #f))
(added #f)
;; possible-nullable: producion list -> production list
;; Removes all productions that have a terminal
(possible-nullable
(lambda (prods)
(filter (lambda (prod)
(vector-andmap non-term? (prod-rhs prod)))
prods)))
;; set-nullables: production list -> production list
;; makes one pass through the productions, adding the ones
;; known to be nullable now to nullable and returning a list
;; of productions that we don't know about yet.
(set-nullables
(lambda (prods)
(cond
((null? prods) null)
((vector-ref nullable
(gram-sym-index (prod-lhs (car prods))))
(set-nullables (cdr prods)))
((vector-andmap (lambda (nt)
(vector-ref nullable (gram-sym-index nt)))
(prod-rhs (car prods)))
(vector-set! nullable
(gram-sym-index (prod-lhs (car prods)))
#t)
(set! added #t)
(set-nullables (cdr prods)))
(else
(cons (car prods)
(set-nullables (cdr prods))))))))
(let loop ((P (possible-nullable prods)))
(cond
((null? P) nullable)
(else
(set! added #f)
(let ((new-P (set-nullables P)))
(if added
(loop new-P)
nullable)))))))
)

@ -1,61 +1,53 @@
(module graph mzscheme #lang racket/base
(provide digraph)
(provide digraph) (define (zero-thunk) 0)
(define (zero-thunk) 0) ;; digraph:
;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b)
;; -> ('a -> 'b)
;; DeRemer and Pennello 1982
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
;; We use a hash-table to represent the result function 'a -> 'b set, so
;; the values of type 'a must be comparable with eq?.
;; digraph: (define (digraph nodes edges f- union fail)
;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b) (define results (make-hasheq))
;; -> ('a -> 'b) (define (f x) (hash-ref results x fail))
;; DeRemer and Pennello 1982 ;; Maps elements of 'a to integers.
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} (define N (make-hasheq))
;; We use a hash-table to represent the result function 'a -> 'b set, so (define (get-N x) (hash-ref N x zero-thunk))
;; the values of type 'a must be comparable with eq?. (define (set-N x d) (hash-set! N x d))
(define (digraph nodes edges f- union fail) (define stack null)
(letrec [ (define (push x) (set! stack (cons x stack)))
;; Will map elements of 'a to 'b sets (define (pop) (begin0
(results (make-hash-table)) (car stack)
(f (lambda (x) (hash-table-get results x fail))) (set! stack (cdr stack))))
(define (depth) (length stack))
;; Maps elements of 'a to integers. ;; traverse: 'a ->
(N (make-hash-table)) (define (traverse x)
(get-N (lambda (x) (hash-table-get N x zero-thunk))) (push x)
(set-N (lambda (x d) (hash-table-put! N x d))) (define d (depth))
(set-N x d)
(hash-set! results x (f- x))
(for-each (λ (y)
(when (= 0 (get-N y))
(traverse y))
(hash-set! results
x
(union (f x) (f y)))
(set-N x (min (get-N x) (get-N y))))
(edges x))
(when (= d (get-N x))
(let loop ([p (pop)])
(set-N p +inf.0)
(hash-set! results p (f x))
(when (not (eq? x p))
(loop (pop))))))
;; Will map elements of 'a to 'b sets
(for ([x (in-list nodes)]
#:when (zero? (get-N x)))
(traverse x))
f)
(stack null)
(push (lambda (x)
(set! stack (cons x stack))))
(pop (lambda ()
(begin0
(car stack)
(set! stack (cdr stack)))))
(depth (lambda () (length stack)))
;; traverse: 'a ->
(traverse
(lambda (x)
(push x)
(let ((d (depth)))
(set-N x d)
(hash-table-put! results x (f- x))
(for-each (lambda (y)
(if (= 0 (get-N y))
(traverse y))
(hash-table-put! results
x
(union (f x) (f y)))
(set-N x (min (get-N x) (get-N y))))
(edges x))
(if (= d (get-N x))
(let loop ((p (pop)))
(set-N p +inf.0)
(hash-table-put! results p (f x))
(if (not (eq? x p))
(loop (pop))))))))]
(for-each (lambda (x)
(if (= 0 (get-N x))
(traverse x)))
nodes)
f))
)

@ -1,374 +1,297 @@
(module input-file-parser mzscheme #lang racket/base
(require "yacc-helper.rkt"
"../private-lex/token-syntax.rkt"
"grammar.rkt"
racket/class
racket/contract
(for-template racket/base))
;; routines for parsing the input to the parser generator and producing a ;; routines for parsing the input to the parser generator and producing a
;; grammar (See grammar.rkt) ;; grammar (See grammar.rkt)
(require "yacc-helper.rkt"
"../private-lex/token-syntax.rkt"
"grammar.rkt"
mzlib/class
racket/contract)
(require-for-template mzscheme)
(define (is-a-grammar%? x) (is-a? x grammar%)) (define (is-a-grammar%? x) (is-a? x grammar%))
(provide/contract (provide/contract
(parse-input ((listof identifier?) (listof identifier?) (listof identifier?) [parse-input ((listof identifier?) (listof identifier?) (listof identifier?)
(or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?)) (or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?)]
(get-term-list ((listof identifier?) . -> . (listof identifier?)))) [get-term-list ((listof identifier?) . -> . (listof identifier?))])
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx)))
(define (get-args i rhs src-pos term-defs)
(let ((empty-table (make-hash-table))
(biggest-pos #f))
(hash-table-put! empty-table 'error #t)
(for-each (lambda (td)
(let ((v (syntax-local-value td)))
(if (e-terminals-def? v)
(for-each (lambda (s)
(hash-table-put! empty-table (syntax-object->datum s) #t))
(syntax->list (e-terminals-def-t v))))))
term-defs)
(let ([args
(let get-args ((i i)
(rhs rhs))
(cond
((null? rhs) null)
(else
(let ((b (car rhs))
(name (if (hash-table-get empty-table (syntax-object->datum (car rhs)) (lambda () #f))
(gensym)
(string->symbol (format "$~a" i)))))
(cond
(src-pos
(let ([start-pos-id
(datum->syntax-object b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property)]
[end-pos-id
(datum->syntax-object b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property)])
(set! biggest-pos (cons start-pos-id end-pos-id))
`(,(datum->syntax-object b name b stx-for-original-property)
,start-pos-id
,end-pos-id
,@(get-args (add1 i) (cdr rhs)))))
(else
`(,(datum->syntax-object b name b stx-for-original-property)
,@(get-args (add1 i) (cdr rhs)))))))))])
(values args biggest-pos))))
;; Given the list of terminal symbols and the precedence/associativity definitions,
;; builds terminal structures (See grammar.rkt)
;; build-terms: symbol list * symbol list list -> term list
(define (build-terms term-list precs)
(let ((counter 0)
;;(term-list (cons (gensym) term-list)) (define stx-for-original-property (read-syntax #f (open-input-string "original")))
;; Will map a terminal symbol to its precedence/associativity ;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx)))
(prec-table (make-hash-table))) (define (get-args i rhs src-pos term-defs)
(define empty-table (make-hasheq))
;; Fill the prec table (define biggest-pos #f)
(for-each (hash-set! empty-table 'error #t)
(lambda (p-decl) (for* ([td (in-list term-defs)]
(begin0 [v (in-value (syntax-local-value td))]
(let ((assoc (car p-decl))) #:when (e-terminals-def? v)
(for-each [s (in-list (syntax->list (e-terminals-def-t v)))])
(lambda (term-sym) (hash-set! empty-table (syntax->datum s) #t))
(hash-table-put! prec-table term-sym (make-prec counter assoc))) (define args
(cdr p-decl))) (let get-args ([i i][rhs rhs])
(set! counter (add1 counter))))
precs)
;; Build the terminal structures
(map
(lambda (term-sym)
(make-term term-sym
#f
(hash-table-get prec-table term-sym (lambda () #f))))
term-list)))
;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt)
;; get-terms-from-def: identifier? -> (listof identifier?)
(define (get-terms-from-def term-syn)
(let ((t (syntax-local-value term-syn (lambda () #f))))
(cond (cond
((terminals-def? t) (syntax->list (terminals-def-t t))) [(null? rhs) null]
((e-terminals-def? t) (syntax->list (e-terminals-def-t t))) [else
(else (define b (car rhs))
(raise-syntax-error (define name (if (hash-ref empty-table (syntax->datum (car rhs)) #f)
'parser-tokens (gensym)
"undefined token group" (string->symbol (format "$~a" i))))
term-syn))))) (cond
[src-pos
(define (get-term-list term-group-names) (define start-pos-id
(remove-duplicates (datum->syntax b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property))
(cons (datum->syntax-object #f 'error) (define end-pos-id
(apply append (datum->syntax b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property))
(map get-terms-from-def term-group-names))))) (set! biggest-pos (cons start-pos-id end-pos-id))
(list* (datum->syntax b name b stx-for-original-property)
start-pos-id
end-pos-id
(get-args (add1 i) (cdr rhs)))]
[else
(list* (datum->syntax b name b stx-for-original-property)
(get-args (add1 i) (cdr rhs)))])])))
(values args biggest-pos))
(define (parse-input term-defs start ends prec-decls prods src-pos) ;; Given the list of terminal symbols and the precedence/associativity definitions,
(let* ((start-syms (map syntax-e start)) ;; builds terminal structures (See grammar.rkt)
;; build-terms: symbol list * symbol list list -> term list
(define (build-terms term-list precs)
(define counter 0)
;;(term-list (cons (gensym) term-list))
;; Will map a terminal symbol to its precedence/associativity
(define prec-table (make-hasheq))
(list-of-terms (map syntax-e (get-term-list term-defs))) ;; Fill the prec table
(for ([p-decl (in-list precs)])
(define assoc (car p-decl))
(for ([term-sym (in-list (cdr p-decl))])
(hash-set! prec-table term-sym (make-prec counter assoc)))
(set! counter (add1 counter)))
(end-terms ;; Build the terminal structures
(map (for/list ([term-sym (in-list term-list)])
(lambda (end) (make-term term-sym
(unless (memq (syntax-e end) list-of-terms) #f
(raise-syntax-error (hash-ref prec-table term-sym (λ () #f)))))
'parser-end-tokens
(format "End token ~a not defined as a token"
(syntax-e end))
end))
(syntax-e end))
ends))
;; Get the list of terminals out of input-terms ;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt)
;; get-terms-from-def: identifier? -> (listof identifier?)
(define (get-terms-from-def term-syn)
(define t (syntax-local-value term-syn #f))
(cond
[(terminals-def? t) (syntax->list (terminals-def-t t))]
[(e-terminals-def? t) (syntax->list (e-terminals-def-t t))]
[else
(raise-syntax-error
'parser-tokens
"undefined token group"
term-syn)]))
(list-of-non-terms (define (get-term-list term-group-names)
(syntax-case prods () (remove-duplicates
(((non-term production ...) ...) (cons (datum->syntax #f 'error)
(begin (apply append (map get-terms-from-def term-group-names)))))
(for-each
(lambda (nts)
(if (memq (syntax-object->datum nts) list-of-terms)
(raise-syntax-error
'parser-non-terminals
(format "~a used as both token and non-terminal"
(syntax-object->datum nts))
nts)))
(syntax->list (syntax (non-term ...))))
(let ((dup (duplicate-list? (syntax-object->datum (define (parse-input term-defs start ends prec-decls prods src-pos)
(syntax (non-term ...)))))) (define start-syms (map syntax-e start))
(if dup (define list-of-terms (map syntax-e (get-term-list term-defs)))
(raise-syntax-error (define end-terms
'parser-non-terminals (for/list ([end (in-list ends)])
(format "non-terminal ~a defined multiple times" (unless (memq (syntax-e end) list-of-terms)
dup) (raise-syntax-error
prods))) 'parser-end-tokens
(format "End token ~a not defined as a token"
(syntax-object->datum (syntax (non-term ...))))) (syntax-e end))
(_ end))
(raise-syntax-error (syntax-e end)))
'parser-grammar ;; Get the list of terminals out of input-terms
"Grammar must be of the form (grammar (non-terminal productions ...) ...)" (define list-of-non-terms
prods)))) (syntax-case prods ()
[((NON-TERM PRODUCTION ...) ...)
;; Check the precedence declarations for errors and turn them into data (begin
(precs (for ([nts (in-list (syntax->list #'(NON-TERM ...)))]
(syntax-case prec-decls () #:when (memq (syntax->datum nts) list-of-terms))
(((type term ...) ...) (raise-syntax-error
(let ((p-terms 'parser-non-terminals
(syntax-object->datum (syntax (term ... ...))))) (format "~a used as both token and non-terminal" (syntax->datum nts))
(cond nts))
((duplicate-list? p-terms) => (let ([dup (duplicate-list? (syntax->datum #'(NON-TERM ...)))])
(lambda (d) (when dup
(raise-syntax-error
'parser-non-terminals
(format "non-terminal ~a defined multiple times" dup)
prods)))
(syntax->datum #'(NON-TERM ...)))]
[_ (raise-syntax-error
'parser-grammar
"Grammar must be of the form (grammar (non-terminal productions ...) ...)"
prods)]))
;; Check the precedence declarations for errors and turn them into data
(define precs
(syntax-case prec-decls ()
[((TYPE TERM ...) ...)
(let ([p-terms (syntax->datum #'(TERM ... ...))])
(cond
[(duplicate-list? p-terms) =>
(λ (d)
(raise-syntax-error
'parser-precedences
(format "duplicate precedence declaration for token ~a" d)
prec-decls))]
[else (for ([t (in-list (syntax->list #'(TERM ... ...)))]
#:when (not (memq (syntax->datum t) list-of-terms)))
(raise-syntax-error (raise-syntax-error
'parser-precedences 'parser-precedences
(format "duplicate precedence declaration for token ~a" (format "Precedence declared for non-token ~a" (syntax->datum t))
d) t))
prec-decls))) (for ([type (in-list (syntax->list #'(TYPE ...)))]
(else #:unless (memq (syntax->datum type) `(left right nonassoc)))
(for-each (raise-syntax-error
(lambda (a) 'parser-precedences
(for-each "Associativity must be left, right or nonassoc"
(lambda (t) type))
(if (not (memq (syntax-object->datum t) (syntax->datum prec-decls)]))]
list-of-terms)) [#f null]
(raise-syntax-error [_ (raise-syntax-error
'parser-precedences 'parser-precedences
(format "Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc"
"Precedence declared for non-token ~a" prec-decls)]))
(syntax-object->datum t))
t)))
(syntax->list a)))
(syntax->list (syntax ((term ...) ...))))
(for-each
(lambda (type)
(if (not (memq (syntax-object->datum type)
`(left right nonassoc)))
(raise-syntax-error
'parser-precedences
"Associativity must be left, right or nonassoc"
type)))
(syntax->list (syntax (type ...))))
(syntax-object->datum prec-decls)))))
(#f null)
(_
(raise-syntax-error
'parser-precedences
"Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc"
prec-decls))))
(terms (build-terms list-of-terms precs))
(non-terms (map (lambda (non-term) (make-non-term non-term #f)) (define terms (build-terms list-of-terms precs))
list-of-non-terms)) (define non-terms (map (λ (non-term) (make-non-term non-term #f))
(term-table (make-hash-table)) list-of-non-terms))
(non-term-table (make-hash-table))) (define term-table (make-hasheq))
(define non-term-table (make-hasheq))
(for-each (lambda (t) (for ([t (in-list terms)])
(hash-table-put! term-table (gram-sym-symbol t) t)) (hash-set! term-table (gram-sym-symbol t) t))
terms)
(for-each (lambda (nt) (for ([nt (in-list non-terms)])
(hash-table-put! non-term-table (gram-sym-symbol nt) nt)) (hash-set! non-term-table (gram-sym-symbol nt) nt))
non-terms)
(let* ( ;; parse-prod: syntax-object -> gram-sym vector
;; parse-prod: syntax-object -> gram-sym vector (define (parse-prod prod-so)
(parse-prod (syntax-case prod-so ()
(lambda (prod-so) [(PROD-RHS-SYM ...)
(syntax-case prod-so () (andmap identifier? (syntax->list prod-so))
((prod-rhs-sym ...) (begin
(andmap identifier? (syntax->list prod-so)) (for ([t (in-list (syntax->list prod-so))]
(begin #:when (memq (syntax->datum t) end-terms))
(for-each (lambda (t) (raise-syntax-error
(if (memq (syntax-object->datum t) end-terms) 'parser-production-rhs
(raise-syntax-error (format "~a is an end token and cannot be used in a production" (syntax->datum t))
'parser-production-rhs t))
(format "~a is an end token and cannot be used in a production" (for/vector ([s (in-list (syntax->list prod-so))])
(syntax-object->datum t)) (cond
t))) [(hash-ref term-table (syntax->datum s) #f)]
(syntax->list prod-so)) [(hash-ref non-term-table (syntax->datum s) #f)]
(list->vector [else (raise-syntax-error
(map (lambda (s) 'parser-production-rhs
(hash-table-get (format "~a is not declared as a terminal or non-terminal" (syntax->datum s))
term-table s)])))]
(syntax-object->datum s) [_ (raise-syntax-error
(lambda () 'parser-production-rhs
(hash-table-get "production right-hand-side must have form (symbol ...)"
non-term-table prod-so)]))
(syntax-object->datum s)
(lambda ()
(raise-syntax-error
'parser-production-rhs
(format
"~a is not declared as a terminal or non-terminal"
(syntax-object->datum s))
s))))))
(syntax->list prod-so)))))
(_
(raise-syntax-error
'parser-production-rhs
"production right-hand-side must have form (symbol ...)"
prod-so)))))
;; parse-action: syntax-object * syntax-object -> syntax-object ;; parse-action: syntax-object * syntax-object -> syntax-object
(parse-action (define (parse-action rhs act-in)
(lambda (rhs act) (define-values (args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs))
(let-values ([(args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs)]) (define act
(let ([act (if biggest
(if biggest (with-syntax ([(CAR-BIGGEST . CDR-BIGGEST) biggest]
(with-syntax ([$n-start-pos (datum->syntax-object (car biggest) '$n-start-pos)] [$N-START-POS (datum->syntax (car biggest) '$n-start-pos)]
[$n-end-pos (datum->syntax-object (cdr biggest) '$n-end-pos)]) [$N-END-POS (datum->syntax (cdr biggest) '$n-end-pos)]
#`(let ([$n-start-pos #,(car biggest)] [ACT-IN act-in])
[$n-end-pos #,(cdr biggest)]) #'(let ([$N-START-POS CAR-BIGGEST]
#,act)) [$N-END-POS CDR-BIGGEST])
act)]) ACT-IN))
(quasisyntax/loc act act-in))
(lambda #,args (with-syntax ([ARGS args][ACT act])
#,act)))))) (syntax/loc #'ACT (λ ARGS ACT))))
;; parse-prod+action: non-term * syntax-object -> production ;; parse-prod+action: non-term * syntax-object -> production
(parse-prod+action (define (parse-prod+action nt prod-so)
(lambda (nt prod-so) (syntax-case prod-so ()
(syntax-case prod-so () [(PROD-RHS ACTION)
((prod-rhs action) (let ([p (parse-prod #'PROD-RHS)])
(let ((p (parse-prod (syntax prod-rhs)))) (make-prod
(make-prod nt
nt p
p #f
#f (let loop ([i (sub1 (vector-length p))])
(let loop ((i (sub1 (vector-length p)))) (if (>= i 0)
(if (>= i 0) (let ([gs (vector-ref p i)])
(let ((gs (vector-ref p i))) (if (term? gs)
(if (term? gs) (term-prec gs)
(term-prec gs) (loop (sub1 i))))
(loop (sub1 i)))) #f))
#f)) (parse-action #'PROD-RHS #'ACTION)))]
(parse-action (syntax prod-rhs) (syntax action))))) [(PROD-RHS (PREC TERM) ACTION)
((prod-rhs (prec term) action) (identifier? #'TERM)
(identifier? (syntax term)) (let ([p (parse-prod #'PROD-RHS)])
(let ((p (parse-prod (syntax prod-rhs)))) (make-prod
(make-prod nt
nt p
p #f
#f (term-prec
(term-prec (cond
(hash-table-get [(hash-ref term-table (syntax->datum #'TERM) #f)]
term-table [else (raise-syntax-error
(syntax-object->datum (syntax term))
(lambda ()
(raise-syntax-error
'parser-production-rhs
(format
"unrecognized terminal ~a in precedence declaration"
(syntax-object->datum (syntax term)))
(syntax term)))))
(parse-action (syntax prod-rhs) (syntax action)))))
(_
(raise-syntax-error
'parser-production-rhs 'parser-production-rhs
"production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]" (format
prod-so))))) "unrecognized terminal ~a in precedence declaration"
(syntax->datum #'TERM))
#'TERM)]))
(parse-action #'PROD-RHS #'ACTION)))]
[_ (raise-syntax-error
'parser-production-rhs
"production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]"
prod-so)]))
;; parse-prod-for-nt: syntax-object -> production list ;; parse-prod-for-nt: syntax-object -> production list
(parse-prods-for-nt (define (parse-prods-for-nt prods-so)
(lambda (prods-so) (syntax-case prods-so ()
(syntax-case prods-so () [(NT PRODUCTIONS ...)
((nt productions ...) (positive? (length (syntax->list #'(PRODUCTIONS ...))))
(> (length (syntax->list (syntax (productions ...)))) 0) (let ([nt (hash-ref non-term-table (syntax->datum #'NT))])
(let ((nt (hash-table-get non-term-table (map (λ (p) (parse-prod+action nt p)) (syntax->list #'(PRODUCTIONS ...))))]
(syntax-object->datum (syntax nt))))) [_ (raise-syntax-error
(map (lambda (p) (parse-prod+action nt p)) 'parser-productions
(syntax->list (syntax (productions ...)))))) "A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side"
(_ prods-so)]))
(raise-syntax-error
'parser-productions
"A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side"
prods-so))))))
(for-each (for ([sstx (in-list start)]
(lambda (sstx ssym) [ssym (in-list start-syms)]
(unless (memq ssym list-of-non-terms) #:unless (memq ssym list-of-non-terms))
(raise-syntax-error (raise-syntax-error
'parser-start 'parser-start
(format "Start symbol ~a not defined as a non-terminal" ssym) (format "Start symbol ~a not defined as a non-terminal" ssym)
sstx))) sstx))
start start-syms)
(let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) (define starts (map (λ (x) (make-non-term (gensym) #f)) start-syms))
(end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) (define end-non-terms (map (λ (x) (make-non-term (gensym) #f)) start-syms))
(parsed-prods (map parse-prods-for-nt (syntax->list prods))) (define parsed-prods (map parse-prods-for-nt (syntax->list prods)))
(start-prods (define start-prods (for/list ([start (in-list starts)]
(map (lambda (start end-non-term) [end-non-term (in-list end-non-terms)])
(list (make-prod start (vector end-non-term) #f #f (list (make-prod start (vector end-non-term) #f #f #'values))))
(syntax (lambda (x) x))))) (define new-prods
starts end-non-terms)) (append start-prods
(prods (for/list ([end-nt (in-list end-non-terms)]
`(,@start-prods [start-sym (in-list start-syms)])
,@(map (for/list ([end (in-list end-terms)])
(lambda (end-nt start-sym) (make-prod end-nt
(map (vector
(lambda (end) (hash-ref non-term-table start-sym)
(make-prod end-nt (hash-ref term-table end))
(vector #f
(hash-table-get non-term-table start-sym) #f
(hash-table-get term-table end)) #'values)))
#f parsed-prods))
#f
(syntax (lambda (x) x))))
end-terms))
end-non-terms start-syms)
,@parsed-prods)))
(make-object grammar% (make-object grammar%
prods new-prods
(map car start-prods) (map car start-prods)
terms terms
(append starts (append end-non-terms non-terms)) (append starts (append end-non-terms non-terms))
(map (lambda (term-name) (map (λ (term-name) (hash-ref term-table term-name)) end-terms)))
(hash-table-get term-table term-name))
end-terms)))))))

@ -1,277 +1,252 @@
(module lalr mzscheme #lang racket/base
(require "lr0.rkt"
;; Compute LALR lookaheads from DeRemer and Pennello 1982 "grammar.rkt"
racket/list
(require "lr0.rkt" racket/class)
"grammar.rkt"
mzlib/list ;; Compute LALR lookaheads from DeRemer and Pennello 1982
mzlib/class)
(provide compute-LA)
(provide compute-LA)
;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set)
;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set) ;; computes for each state, non-term transition pair, the terminals
;; computes for each state, non-term transition pair, the terminals ;; which can transition out of the resulting state
;; which can transition out of the resulting state ;; output term set is represented in bit-vector form
;; output term set is represented in bit-vector form (define ((compute-DR a g) tk)
(define (compute-DR a g) (define r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))
(lambda (tk) (term-list->bit-vector
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) (filter (λ (term) (send a run-automaton r term)) (send g get-terms))))
(term-list->bit-vector
(filter ;; compute-reads:
(lambda (term) ;; LR0-automaton * grammar -> (trans-key -> trans-key list)
(send a run-automaton r term)) (define (compute-reads a g)
(send g get-terms)))))) (define nullable-non-terms (filter (λ (nt) (send g nullable-non-term? nt)) (send g get-non-terms)))
(λ (tk)
;; compute-reads: (define r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))
;; LR0-automaton * grammar -> (trans-key -> trans-key list) (for/list ([non-term (in-list nullable-non-terms)]
(define (compute-reads a g) #:when (send a run-automaton r non-term))
(let ((nullable-non-terms (make-trans-key r non-term))))
(filter (lambda (nt) (send g nullable-non-term? nt))
(send g get-non-terms)))) ;; compute-read: LR0-automaton * grammar -> (trans-key -> term set)
(lambda (tk) ;; output term set is represented in bit-vector form
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) (define (compute-read a g)
(map (lambda (x) (make-trans-key r x)) (define dr (compute-DR a g))
(filter (lambda (non-term) (send a run-automaton r non-term)) (define reads (compute-reads a g))
nullable-non-terms)))))) (digraph-tk->terml (send a get-mapped-non-term-keys)
reads
;; compute-read: LR0-automaton * grammar -> (trans-key -> term set) dr
;; output term set is represented in bit-vector form (send a get-num-states)))
(define (compute-read a g) ;; returns the list of all k such that state k transitions to state start on the
(let* ((dr (compute-DR a g)) ;; transitions in rhs (in order)
(reads (compute-reads a g))) (define (run-lr0-backward a rhs dot-pos start num-states)
(digraph-tk->terml (send a get-mapped-non-term-keys) (let loop ([states (list start)]
reads [i (sub1 dot-pos)])
dr (cond
(send a get-num-states)))) [(< i 0) states]
;; returns the list of all k such that state k transitions to state start on the [else (loop (send a run-automaton-back states (vector-ref rhs i))
;; transitions in rhs (in order) (sub1 i))])))
(define (run-lr0-backward a rhs dot-pos start num-states)
(let loop ((states (list start)) ;; prod->items-for-include: grammar * prod * non-term -> lr0-item list
(i (sub1 dot-pos))) ;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma)
(cond ;; and gamma =>* epsilon
((< i 0) states) (define (prod->items-for-include g prod nt)
(else (loop (send a run-automaton-back states (vector-ref rhs i)) (define rhs (prod-rhs prod))
(sub1 i)))))) (define rhs-l (vector-length rhs))
(append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l))))
;; prod->items-for-include: grammar * prod * non-term -> lr0-item list (list (make-item prod (sub1 rhs-l)))
;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma) null)
;; and gamma =>* epsilon (let loop ([i (sub1 rhs-l)])
(define (prod->items-for-include g prod nt) (cond
(let* ((rhs (prod-rhs prod)) [(and (> i 0)
(rhs-l (vector-length rhs))) (non-term? (vector-ref rhs i))
(append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l)))) (send g nullable-non-term? (vector-ref rhs i)))
(list (make-item prod (sub1 rhs-l))) (if (eq? nt (vector-ref rhs (sub1 i)))
null) (cons (make-item prod (sub1 i))
(let loop ((i (sub1 rhs-l))) (loop (sub1 i)))
(cond (loop (sub1 i)))]
((and (> i 0) [else null]))))
(non-term? (vector-ref rhs i))
(send g nullable-non-term? (vector-ref rhs i))) ;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list
(if (eq? nt (vector-ref rhs (sub1 i))) ;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list
(cons (make-item prod (sub1 i)) ;; and gamma =>* epsilon
(loop (sub1 i))) (define (prod-list->items-for-include g prod-list nt)
(loop (sub1 i)))) (apply append (map (λ (prod) (prod->items-for-include g prod nt)) prod-list)))
(else null))))))
;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list)
;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list (define (compute-includes a g)
;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list (define num-states (send a get-num-states))
;; and gamma =>* epsilon (define items-for-input-nt (make-vector (send g get-num-non-terms) null))
(define (prod-list->items-for-include g prod-list nt) (for ([input-nt (in-list (send g get-non-terms))])
(apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list))) (vector-set! items-for-input-nt (non-term-index input-nt)
(prod-list->items-for-include g (send g get-prods) input-nt)))
;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list) (λ (tk)
(define (compute-includes a g) (define goal-state (trans-key-st tk))
(let ((num-states (send a get-num-states)) (define non-term (trans-key-gs tk))
(items-for-input-nt (make-vector (send g get-num-non-terms) null))) (define items (vector-ref items-for-input-nt (non-term-index non-term)))
(for-each (trans-key-list-remove-dups
(lambda (input-nt) (apply append
(vector-set! items-for-input-nt (non-term-index input-nt) (for/list ([item (in-list items)])
(prod-list->items-for-include g (send g get-prods) input-nt))) (define prod (item-prod item))
(send g get-non-terms)) (define rhs (prod-rhs prod))
(lambda (tk) (define lhs (prod-lhs prod))
(let* ((goal-state (trans-key-st tk)) (map (λ (state) (make-trans-key state lhs))
(non-term (trans-key-gs tk)) (run-lr0-backward a
(items (vector-ref items-for-input-nt (non-term-index non-term)))) rhs
(trans-key-list-remove-dups (item-dot-pos item)
(apply append goal-state
(map (lambda (item) num-states)))))))
(let* ((prod (item-prod item))
(rhs (prod-rhs prod)) ;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list)
(lhs (prod-lhs prod))) (define (compute-lookback a g)
(map (lambda (state) (define num-states (send a get-num-states))
(make-trans-key state lhs)) (λ (state prod)
(run-lr0-backward a (map (λ (k) (make-trans-key k (prod-lhs prod)))
rhs (run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states))))
(item-dot-pos item)
goal-state ;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set)
num-states)))) ;; output term set is represented in bit-vector form
items))))))) (define (compute-follow a g includes)
(define read (compute-read a g))
;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list) (digraph-tk->terml (send a get-mapped-non-term-keys)
(define (compute-lookback a g) includes
(let ((num-states (send a get-num-states))) read
(lambda (state prod) (send a get-num-states)))
(map (lambda (k) (make-trans-key k (prod-lhs prod)))
(run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states))))) ;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set
;; output term set is represented in bit-vector form
;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set) (define (compute-LA a g)
;; output term set is represented in bit-vector form (define includes (compute-includes a g))
(define (compute-follow a g includes) (define lookback (compute-lookback a g))
(let ((read (compute-read a g))) (define follow (compute-follow a g includes))
(digraph-tk->terml (send a get-mapped-non-term-keys) (λ (k p)
includes (define l (lookback k p))
read (define f (map follow l))
(send a get-num-states)))) (apply bitwise-ior (cons 0 f))))
;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set
;; output term set is represented in bit-vector form (define (print-DR dr a g)
(define (compute-LA a g) (print-input-st-sym dr "DR" a g print-output-terms))
(let* ((includes (compute-includes a g)) (define (print-Read Read a g)
(lookback (compute-lookback a g)) (print-input-st-sym Read "Read" a g print-output-terms))
(follow (compute-follow a g includes))) (define (print-includes i a g)
(lambda (k p) (print-input-st-sym i "includes" a g print-output-st-nt))
(let* ((l (lookback k p)) (define (print-lookback l a g)
(f (map follow l))) (print-input-st-prod l "lookback" a g print-output-st-nt))
(apply bitwise-ior (cons 0 f)))))) (define (print-follow f a g)
(print-input-st-sym f "follow" a g print-output-terms))
(define (print-DR dr a g) (define (print-LA l a g)
(print-input-st-sym dr "DR" a g print-output-terms)) (print-input-st-prod l "LA" a g print-output-terms))
(define (print-Read Read a g)
(print-input-st-sym Read "Read" a g print-output-terms)) (define (print-input-st-sym f name a g print-output)
(define (print-includes i a g) (printf "~a:\n" name)
(print-input-st-sym i "includes" a g print-output-st-nt)) (send a for-each-state
(define (print-lookback l a g) (λ (state)
(print-input-st-prod l "lookback" a g print-output-st-nt))
(define (print-follow f a g)
(print-input-st-sym f "follow" a g print-output-terms))
(define (print-LA l a g)
(print-input-st-prod l "LA" a g print-output-terms))
(define (print-input-st-sym f name a g print-output)
(printf "~a:\n" name)
(send a for-each-state
(lambda (state)
(for-each
(lambda (non-term)
(let ((res (f (make-trans-key state non-term))))
(if (not (null? res))
(printf "~a(~a, ~a) = ~a\n"
name
state
(gram-sym-symbol non-term)
(print-output res)))))
(send g get-non-terms))))
(newline))
(define (print-input-st-prod f name a g print-output)
(printf "~a:\n" name)
(send a for-each-state
(lambda (state)
(for-each
(lambda (non-term)
(for-each (for-each
(lambda (prod) (λ (non-term)
(let ((res (f state prod))) (let ([res (f (make-trans-key state non-term))])
(if (not (null? res)) (when (not (null? res))
(printf "~a(~a, ~a) = ~a\n" (printf "~a(~a, ~a) = ~a\n"
name name
(kernel-index state) state
(prod-index prod) (gram-sym-symbol non-term)
(print-output res))))) (print-output res)))))
(send g get-prods-for-non-term non-term))) (send g get-non-terms))))
(send g get-non-terms))))) (newline))
(define (print-output-terms r) (define (print-input-st-prod f name a g print-output)
(map (printf "~a:\n" name)
(lambda (p) (send a for-each-state
(gram-sym-symbol p)) (λ (state)
r)) (for-each
(λ (non-term)
(define (print-output-st-nt r) (for-each
(map (λ (prod)
(lambda (p) (let ([res (f state prod)])
(list (when (not (null? res))
(kernel-index (trans-key-st p)) (printf "~a(~a, ~a) = ~a\n"
(gram-sym-symbol (trans-key-gs p)))) name
r)) (kernel-index state)
(prod-index prod)
;; init-tk-map : int -> (vectorof hashtable?) (print-output res)))))
(define (init-tk-map n) (send g get-prods-for-non-term non-term)))
(let ((v (make-vector n #f))) (send g get-non-terms)))))
(let loop ((i (sub1 (vector-length v))))
(when (>= i 0) (define (print-output-terms r)
(vector-set! v i (make-hash-table)) (map gram-sym-symbol r))
(loop (sub1 i))))
v)) (define (print-output-st-nt r)
(map (λ (p) (list (kernel-index (trans-key-st p)) (gram-sym-symbol (trans-key-gs p)))) r))
;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int
(define (lookup-tk-map map) ;; init-tk-map : int -> (vectorof hashtable?)
(lambda (tk) (define (init-tk-map n)
(let ((st (trans-key-st tk)) (define v (make-vector n #f))
(gs (trans-key-gs tk))) (let loop ([i (sub1 (vector-length v))])
(hash-table-get (vector-ref map (kernel-index st)) (when (>= i 0)
(gram-sym-symbol gs) (vector-set! v i (make-hasheq))
(lambda () 0))))) (loop (sub1 i))))
v)
;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int ->
(define (add-tk-map map) ;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int
(lambda (tk v) (define ((lookup-tk-map map) tk)
(let ((st (trans-key-st tk)) (define st (trans-key-st tk))
(gs (trans-key-gs tk))) (define gs (trans-key-gs tk))
(hash-table-put! (vector-ref map (kernel-index st)) (hash-ref (vector-ref map (kernel-index st))
(gram-sym-symbol gs) (gram-sym-symbol gs)
v)))) (λ () 0)))
;; digraph-tk->terml: ;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int ->
;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int (define ((add-tk-map map) tk v)
;; -> (trans-key -> term list) (define st (trans-key-st tk))
;; DeRemer and Pennello 1982 (define gs (trans-key-gs tk))
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} (hash-set! (vector-ref map (kernel-index st))
;; A specialization of digraph in the file graph.rkt (gram-sym-symbol gs)
(define (digraph-tk->terml nodes edges f- num-states) v))
(letrec [
;; Will map elements of trans-key to term sets represented as bit vectors ;; digraph-tk->terml:
(results (init-tk-map num-states)) ;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int
;; -> (trans-key -> term list)
;; Maps elements of trans-keys to integers. ;; DeRemer and Pennello 1982
(N (init-tk-map num-states)) ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
;; A specialization of digraph in the file graph.rkt
(get-N (lookup-tk-map N)) (define (digraph-tk->terml nodes edges f- num-states)
(set-N (add-tk-map N)) ;; Will map elements of trans-key to term sets represented as bit vectors
(get-f (lookup-tk-map results)) (define results (init-tk-map num-states))
(set-f (add-tk-map results))
;; Maps elements of trans-keys to integers.
(stack null) (define N (init-tk-map num-states))
(push (lambda (x)
(set! stack (cons x stack)))) (define get-N (lookup-tk-map N))
(pop (lambda () (define set-N (add-tk-map N))
(begin0 (define get-f (lookup-tk-map results))
(car stack) (define set-f (add-tk-map results))
(set! stack (cdr stack)))))
(depth (lambda () (length stack))) (define stack null)
(define (push x) (set! stack (cons x stack)))
;; traverse: 'a -> (define (pop) (begin0
(traverse (car stack)
(lambda (x) (set! stack (cdr stack))))
(push x) (define (depth) (length stack))
(let ((d (depth)))
(set-N x d) ;; traverse: 'a ->
(set-f x (f- x)) (define (traverse x)
(for-each (lambda (y) (push x)
(when (= 0 (get-N y)) (let ([d (depth)])
(traverse y)) (set-N x d)
(set-f x (bitwise-ior (get-f x) (get-f y))) (set-f x (f- x))
(set-N x (min (get-N x) (get-N y)))) (for-each (λ (y)
(edges x)) (when (= 0 (get-N y))
(when (= d (get-N x)) (traverse y))
(let loop ((p (pop))) (set-f x (bitwise-ior (get-f x) (get-f y)))
(set-N p +inf.0) (set-N x (min (get-N x) (get-N y))))
(set-f p (get-f x)) (edges x))
(unless (equal? x p) (when (= d (get-N x))
(loop (pop))))))))] (let loop ([p (pop)])
(for-each (lambda (x) (set-N p +inf.0)
(when (= 0 (get-N x)) (set-f p (get-f x))
(traverse x))) (unless (equal? x p)
nodes) (loop (pop)))))))
get-f))
) (for ([x (in-list nodes)]
#:when (zero? (get-N x)))
(traverse x))
get-f)

@ -1,372 +1,314 @@
(module lr0 mzscheme #lang racket/base
(require "grammar.rkt"
;; Handle the LR0 automaton "graph.rkt"
racket/list
(require "grammar.rkt" racket/class)
"graph.rkt"
mzlib/list ;; Handle the LR0 automaton
mzlib/class)
(provide build-lr0-automaton lr0%
(provide build-lr0-automaton lr0% (struct-out trans-key) trans-key-list-remove-dups
(struct trans-key (st gs)) trans-key-list-remove-dups kernel-items kernel-index)
kernel-items kernel-index)
;; kernel = (make-kernel (LR1-item list) index)
;; kernel = (make-kernel (LR1-item list) index) ;; the list must be kept sorted according to item<? so that equal? can
;; the list must be kept sorted according to item<? so that equal? can ;; be used to compare kernels
;; be used to compare kernels ;; Each kernel is assigned a unique index, 0 <= index < number of states
;; Each kernel is assigned a unique index, 0 <= index < number of states ;; trans-key = (make-trans-key kernel gram-sym)
;; trans-key = (make-trans-key kernel gram-sym) (define-struct kernel (items index) #:inspector (make-inspector))
(define-struct kernel (items index) (make-inspector)) (define-struct trans-key (st gs) #:inspector (make-inspector))
(define-struct trans-key (st gs) (make-inspector))
(define (trans-key<? a b)
(define (trans-key<? a b) (define kia (kernel-index (trans-key-st a)))
(let ((kia (kernel-index (trans-key-st a))) (define kib (kernel-index (trans-key-st b)))
(kib (kernel-index (trans-key-st b)))) (or (< kia kib)
(or (< kia kib) (and (= kia kib)
(and (= kia kib) (< (non-term-index (trans-key-gs a))
(< (non-term-index (trans-key-gs a)) (non-term-index (trans-key-gs b))))))
(non-term-index (trans-key-gs b)))))))
(define (trans-key-list-remove-dups tkl)
(define (trans-key-list-remove-dups tkl) (let loop ([sorted (sort tkl trans-key<?)])
(let loop ((sorted (sort tkl trans-key<?))) (cond
[(null? sorted) null]
[(null? (cdr sorted)) sorted]
[else
(if (and (= (non-term-index (trans-key-gs (car sorted)))
(non-term-index (trans-key-gs (cadr sorted))))
(= (kernel-index (trans-key-st (car sorted)))
(kernel-index (trans-key-st (cadr sorted)))))
(loop (cdr sorted))
(cons (car sorted) (loop (cdr sorted))))])))
;; build-transition-table : int (listof (cons/c trans-key X) ->
;; (vectorof (symbol X hashtable))
(define (build-transition-table num-states assoc)
(define transitions (make-vector num-states #f))
(let loop ([i (sub1 (vector-length transitions))])
(when (>= i 0)
(vector-set! transitions i (make-hasheq))
(loop (sub1 i))))
(for ([trans-key/kernel (in-list assoc)])
(define tk (car trans-key/kernel))
(hash-set! (vector-ref transitions (kernel-index (trans-key-st tk)))
(gram-sym-symbol (trans-key-gs tk))
(cdr trans-key/kernel)))
transitions)
;; reverse-assoc : (listof (cons/c trans-key? kernel?)) ->
;; (listof (cons/c trans-key? (listof kernel?)))
(define (reverse-assoc assoc)
(define reverse-hash (make-hash))
(define (hash-table-add! ht k v)
(hash-set! ht k (cons v (hash-ref ht k (λ () null)))))
(for ([trans-key/kernel (in-list assoc)])
(define tk (car trans-key/kernel))
(hash-table-add! reverse-hash
(make-trans-key (cdr trans-key/kernel)
(trans-key-gs tk))
(trans-key-st tk)))
(hash-map reverse-hash cons))
;; kernel-list-remove-duplicates
;; LR0-automaton = object of class lr0%
(define lr0%
(class object%
(super-instantiate ())
;; term-assoc : (listof (cons/c trans-key? kernel?))
;; non-term-assoc : (listof (cons/c trans-key? kernel?))
;; states : (vectorof kernel?)
;; epsilons : ???
(init-field term-assoc non-term-assoc states epsilons)
(define transitions (build-transition-table (vector-length states)
(append term-assoc non-term-assoc)))
(define reverse-term-assoc (reverse-assoc term-assoc))
(define reverse-non-term-assoc (reverse-assoc non-term-assoc))
(define reverse-transitions
(build-transition-table (vector-length states)
(append reverse-term-assoc reverse-non-term-assoc)))
(define mapped-non-terms (map car non-term-assoc))
(define/public (get-mapped-non-term-keys)
mapped-non-terms)
(define/public (get-num-states)
(vector-length states))
(define/public (get-epsilon-trans)
epsilons)
(define/public (get-transitions)
(append term-assoc non-term-assoc))
;; for-each-state : (state ->) ->
;; Iteration over the states in an automaton
(define/public (for-each-state f)
(define num-states (vector-length states))
(let loop ([i 0])
(when (< i num-states)
(f (vector-ref states i))
(loop (add1 i)))))
;; run-automaton: kernel? gram-sym? -> (union kernel #f)
;; returns the state reached from state k on input s, or #f when k
;; has no transition on s
(define/public (run-automaton k s)
(hash-ref (vector-ref transitions (kernel-index k))
(gram-sym-symbol s)
(λ () #f)))
;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel)
;; returns the list of states that can reach k by transitioning on s.
(define/public (run-automaton-back k s)
(for*/list ([k (in-list k)]
[val (in-list (hash-ref (vector-ref reverse-transitions (kernel-index k))
(gram-sym-symbol s)
(λ () null)))])
val))))
(define ((union comp<?) l1 l2)
(let loop ([l1 l1] [l2 l2])
(cond
[(null? l1) l2]
[(null? l2) l1]
[else (define c1 (car l1))
(define c2 (car l2))
(cond
[(comp<? c1 c2) (cons c1 (loop (cdr l1) l2))]
[(comp<? c2 c1) (cons c2 (loop l1 (cdr l2)))]
[else (loop (cdr l1) l2)])])))
;; The kernels in the automaton are represented cannonically.
;; That is (equal? a b) <=> (eq? a b)
(define (kernel->string k)
(apply string-append
`("{" ,@(map (λ (i) (string-append (item->string i) ", "))
(kernel-items k))
"}")))
;; build-LR0-automaton: grammar -> LR0-automaton
;; Constructs the kernels of the sets of LR(0) items of g
(define (build-lr0-automaton grammar)
; (printf "LR(0) automaton:\n")
(define epsilons (make-hash))
(define grammar-symbols (append (send grammar get-non-terms)
(send grammar get-terms)))
;; first-non-term: non-term -> non-term list
;; given a non-terminal symbol C, return those non-terminal
;; symbols A s.t. C -> An for some string of terminals and
;; non-terminals n where -> means a rightmost derivation in many
;; steps. Assumes that each non-term can be reduced to a string
;; of terms.
(define first-non-term
(digraph (send grammar get-non-terms)
(λ (nt)
(filter non-term?
(map (λ (prod) (sym-at-dot (make-item prod 0)))
(send grammar get-prods-for-non-term nt))))
(λ (nt) (list nt))
(union non-term<?)
(λ () null)))
;; closure: LR1-item list -> LR1-item list
;; Creates a set of items containing i s.t. if A -> n.Xm is in it,
;; X -> .o is in it too.
(define (LR0-closure i)
(cond
[(null? i) null]
[else
(define next-gsym (sym-at-dot (car i)))
(cond
[(non-term? next-gsym)
(cons (car i)
(append
(for*/list ([non-term (in-list (first-non-term next-gsym))]
[x (in-list (send grammar
get-prods-for-non-term
non-term))])
(make-item x 0))
(LR0-closure (cdr i))))]
[else (cons (car i) (LR0-closure (cdr i)))])]))
;; maps trans-keys to kernels
(define automaton-term null)
(define automaton-non-term null)
;; keeps the kernels we have seen, so we can have a unique
;; list for each kernel
(define kernels (make-hash))
(define counter 0)
;; goto: LR1-item list -> LR1-item list list
;; creates new kernels by moving the dot in each item in the
;; LR0-closure of kernel to the right, and grouping them by
;; the term/non-term moved over. Returns the kernels not
;; yet seen, and places the trans-keys into automaton
(define (goto kernel)
;; maps a gram-syms to a list of items
(define table (make-hasheq))
;; add-item!:
;; (symbol (listof item) hashtable) item? ->
;; adds i into the table grouped with the grammar
;; symbol following its dot
(define (add-item! table i)
(define gs (sym-at-dot i))
(cond (cond
((null? sorted) null) [gs (define already (hash-ref table (gram-sym-symbol gs) (λ () null)))
((null? (cdr sorted)) sorted) (unless (member i already)
(else (hash-set! table (gram-sym-symbol gs) (cons i already)))]
(if (and (= (non-term-index (trans-key-gs (car sorted))) ((zero? (vector-length (prod-rhs (item-prod i))))
(non-term-index (trans-key-gs (cadr sorted)))) (define current (hash-ref epsilons kernel (λ () null)))
(= (kernel-index (trans-key-st (car sorted))) (hash-set! epsilons kernel (cons i current)))))
(kernel-index (trans-key-st (cadr sorted)))))
(loop (cdr sorted)) ;; Group the items of the LR0 closure of the kernel
(cons (car sorted) (loop (cdr sorted)))))))) ;; by the character after the dot
(for ([item (in-list (LR0-closure (kernel-items kernel)))])
(add-item! table item))
;; build-transition-table : int (listof (cons/c trans-key X) ->
;; (vectorof (symbol X hashtable)) ;; each group is a new kernel, with the dot advanced.
(define (build-transition-table num-states assoc) ;; sorts the items in a kernel so kernels can be compared
(let ((transitions (make-vector num-states #f))) ;; with equal? for using the table kernels to make sure
(let loop ((i (sub1 (vector-length transitions)))) ;; only one representitive of each kernel is created
(when (>= i 0) (define is
(vector-set! transitions i (make-hash-table)) (let loop ([gsyms grammar-symbols])
(loop (sub1 i)))) (cond
(for-each [(null? gsyms) null]
(lambda (trans-key/kernel) [else
(let ((tk (car trans-key/kernel))) (define items (hash-ref table (gram-sym-symbol (car gsyms)) (λ () null)))
(hash-table-put! (vector-ref transitions (kernel-index (trans-key-st tk))) (cond
(gram-sym-symbol (trans-key-gs tk)) [(null? items) (loop (cdr gsyms))]
(cdr trans-key/kernel)))) [else (cons (list (car gsyms) items)
assoc) (loop (cdr gsyms)))])])))
transitions)) (filter
values
;; reverse-assoc : (listof (cons/c trans-key? kernel?)) -> (for/list ([i (in-list is)])
;; (listof (cons/c trans-key? (listof kernel?))) (define gs (car i))
(define (reverse-assoc assoc) (define items (cadr i))
(let ((reverse-hash (make-hash-table 'equal)) (define new #f)
(hash-table-add! (define new-kernel (sort (filter values (map move-dot-right items)) item<?))
(lambda (ht k v) (define unique-kernel (hash-ref kernels new-kernel
(hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null))))))) (λ ()
(for-each (define k (make-kernel new-kernel counter))
(lambda (trans-key/kernel)
(let ((tk (car trans-key/kernel)))
(hash-table-add! reverse-hash
(make-trans-key (cdr trans-key/kernel)
(trans-key-gs tk))
(trans-key-st tk))))
assoc)
(hash-table-map reverse-hash cons)))
;; kernel-list-remove-duplicates
;; LR0-automaton = object of class lr0%
(define lr0%
(class object%
(super-instantiate ())
;; term-assoc : (listof (cons/c trans-key? kernel?))
;; non-term-assoc : (listof (cons/c trans-key? kernel?))
;; states : (vectorof kernel?)
;; epsilons : ???
(init-field term-assoc non-term-assoc states epsilons)
(define transitions (build-transition-table (vector-length states)
(append term-assoc non-term-assoc)))
(define reverse-term-assoc (reverse-assoc term-assoc))
(define reverse-non-term-assoc (reverse-assoc non-term-assoc))
(define reverse-transitions
(build-transition-table (vector-length states)
(append reverse-term-assoc reverse-non-term-assoc)))
(define mapped-non-terms (map car non-term-assoc))
(define/public (get-mapped-non-term-keys)
mapped-non-terms)
(define/public (get-num-states)
(vector-length states))
(define/public (get-epsilon-trans)
epsilons)
(define/public (get-transitions)
(append term-assoc non-term-assoc))
;; for-each-state : (state ->) ->
;; Iteration over the states in an automaton
(define/public (for-each-state f)
(let ((num-states (vector-length states)))
(let loop ((i 0))
(if (< i num-states)
(begin
(f (vector-ref states i))
(loop (add1 i)))))))
;; run-automaton: kernel? gram-sym? -> (union kernel #f)
;; returns the state reached from state k on input s, or #f when k
;; has no transition on s
(define/public (run-automaton k s)
(hash-table-get (vector-ref transitions (kernel-index k))
(gram-sym-symbol s)
(lambda () #f)))
;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel)
;; returns the list of states that can reach k by transitioning on s.
(define/public (run-automaton-back k s)
(apply append
(map
(lambda (k)
(hash-table-get (vector-ref reverse-transitions (kernel-index k))
(gram-sym-symbol s)
(lambda () null)))
k)))))
(define (union comp<?)
(letrec ((union
(lambda (l1 l2)
(cond
((null? l1) l2)
((null? l2) l1)
(else (let ((c1 (car l1))
(c2 (car l2)))
(cond
((comp<? c1 c2)
(cons c1 (union (cdr l1) l2)))
((comp<? c2 c1)
(cons c2 (union l1 (cdr l2))))
(else (union (cdr l1) l2)))))))))
union))
;; The kernels in the automaton are represented cannonically.
;; That is (equal? a b) <=> (eq? a b)
(define (kernel->string k)
(apply string-append
`("{" ,@(map (lambda (i) (string-append (item->string i) ", "))
(kernel-items k))
"}")))
;; build-LR0-automaton: grammar -> LR0-automaton
;; Constructs the kernels of the sets of LR(0) items of g
(define (build-lr0-automaton grammar)
; (printf "LR(0) automaton:\n")
(letrec (
(epsilons (make-hash-table 'equal))
(grammar-symbols (append (send grammar get-non-terms)
(send grammar get-terms)))
;; first-non-term: non-term -> non-term list
;; given a non-terminal symbol C, return those non-terminal
;; symbols A s.t. C -> An for some string of terminals and
;; non-terminals n where -> means a rightmost derivation in many
;; steps. Assumes that each non-term can be reduced to a string
;; of terms.
(first-non-term
(digraph (send grammar get-non-terms)
(lambda (nt)
(filter non-term?
(map (lambda (prod)
(sym-at-dot (make-item prod 0)))
(send grammar get-prods-for-non-term nt))))
(lambda (nt) (list nt))
(union non-term<?)
(lambda () null)))
;; closure: LR1-item list -> LR1-item list
;; Creates a set of items containing i s.t. if A -> n.Xm is in it,
;; X -> .o is in it too.
(LR0-closure
(lambda (i)
(cond
((null? i) null)
(else
(let ((next-gsym (sym-at-dot (car i))))
(cond
((non-term? next-gsym)
(cons (car i)
(append
(apply append
(map (lambda (non-term)
(map (lambda (x)
(make-item x 0))
(send grammar
get-prods-for-non-term
non-term)))
(first-non-term next-gsym)))
(LR0-closure (cdr i)))))
(else
(cons (car i) (LR0-closure (cdr i))))))))))
;; maps trans-keys to kernels
(automaton-term null)
(automaton-non-term null)
;; keeps the kernels we have seen, so we can have a unique
;; list for each kernel
(kernels (make-hash-table 'equal))
(counter 0)
;; goto: LR1-item list -> LR1-item list list
;; creates new kernels by moving the dot in each item in the
;; LR0-closure of kernel to the right, and grouping them by
;; the term/non-term moved over. Returns the kernels not
;; yet seen, and places the trans-keys into automaton
(goto
(lambda (kernel)
(let (
;; maps a gram-syms to a list of items
(table (make-hash-table))
;; add-item!:
;; (symbol (listof item) hashtable) item? ->
;; adds i into the table grouped with the grammar
;; symbol following its dot
(add-item!
(lambda (table i)
(let ((gs (sym-at-dot i)))
(cond
(gs
(let ((already
(hash-table-get table
(gram-sym-symbol gs)
(lambda () null))))
(unless (member i already)
(hash-table-put! table
(gram-sym-symbol gs)
(cons i already)))))
((= 0 (vector-length (prod-rhs (item-prod i))))
(let ((current (hash-table-get epsilons
kernel
(lambda () null))))
(hash-table-put! epsilons
kernel
(cons i current)))))))))
;; Group the items of the LR0 closure of the kernel
;; by the character after the dot
(for-each (lambda (item)
(add-item! table item))
(LR0-closure (kernel-items kernel)))
;; each group is a new kernel, with the dot advanced.
;; sorts the items in a kernel so kernels can be compared
;; with equal? for using the table kernels to make sure
;; only one representitive of each kernel is created
(filter
(lambda (x) x)
(map
(lambda (i)
(let* ((gs (car i))
(items (cadr i))
(new #f)
(new-kernel (sort
(filter (lambda (x) x)
(map move-dot-right items))
item<?))
(unique-kernel (hash-table-get
kernels
new-kernel
(lambda ()
(let ((k (make-kernel
new-kernel
counter)))
(set! new #t) (set! new #t)
(set! counter (add1 counter)) (set! counter (add1 counter))
(hash-table-put! kernels (hash-set! kernels new-kernel k)
new-kernel k)))
k) (if (term? gs)
k))))) (set! automaton-term (cons (cons (make-trans-key kernel gs)
(cond unique-kernel)
((term? gs) automaton-term))
(set! automaton-term (cons (cons (make-trans-key kernel gs) (set! automaton-non-term (cons (cons (make-trans-key kernel gs)
unique-kernel) unique-kernel)
automaton-term))) automaton-non-term)))
(else #;(printf "~a -> ~a on ~a\n"
(set! automaton-non-term (cons (cons (make-trans-key kernel gs) (kernel->string kernel)
unique-kernel) (kernel->string unique-kernel)
automaton-non-term)))) (gram-sym-symbol gs))
#;(printf "~a -> ~a on ~a\n" (and new unique-kernel))))
(kernel->string kernel)
(kernel->string unique-kernel) (define starts (map (λ (init-prod) (list (make-item init-prod 0)))
(gram-sym-symbol gs)) (send grammar get-init-prods)))
(if new (define startk (for/list ([start (in-list starts)])
unique-kernel (define k (make-kernel start counter))
#f))) (hash-set! kernels start k)
(let loop ((gsyms grammar-symbols)) (set! counter (add1 counter))
(cond k))
((null? gsyms) null) (define new-kernels (make-queue))
(else (let loop ([old-kernels startk]
(let ((items (hash-table-get table [seen-kernels null])
(gram-sym-symbol (car gsyms)) (cond
(lambda () null)))) [(and (empty-queue? new-kernels) (null? old-kernels))
(cond (make-object lr0% automaton-term automaton-non-term
((null? items) (loop (cdr gsyms))) (list->vector (reverse seen-kernels)) epsilons)]
(else [(null? old-kernels) (loop (deq! new-kernels) seen-kernels)]
(cons (list (car gsyms) items) [else
(loop (cdr gsyms)))))))))))))) (enq! new-kernels (goto (car old-kernels)))
(loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))])))
(starts
(map (lambda (init-prod) (list (make-item init-prod 0))) (define-struct q (f l) #:inspector (make-inspector) #:mutable)
(send grammar get-init-prods))) (define (empty-queue? q) (null? (q-f q)))
(startk (define (make-queue) (make-q null null))
(map (lambda (start)
(let ((k (make-kernel start counter))) (define (enq! q i)
(hash-table-put! kernels start k) (cond
(set! counter (add1 counter)) [(empty-queue? q)
k)) (let ([i (mcons i null)])
starts)) (set-q-l! q i)
(new-kernels (make-queue))) (set-q-f! q i))]
[else
(let loop ((old-kernels startk) (set-mcdr! (q-l q) (mcons i null))
(seen-kernels null)) (set-q-l! q (mcdr (q-l q)))]))
(cond
((and (empty-queue? new-kernels) (null? old-kernels))
(make-object lr0% (define (deq! q)
automaton-term (begin0
automaton-non-term (mcar (q-f q))
(list->vector (reverse seen-kernels)) (set-q-f! q (mcdr (q-f q)))))
epsilons))
((null? old-kernels)
(loop (deq! new-kernels) seen-kernels))
(else
(enq! new-kernels (goto (car old-kernels)))
(loop (cdr old-kernels) (cons (car old-kernels) seen-kernels)))))))
(define-struct q (f l) (make-inspector))
(define (empty-queue? q)
(null? (q-f q)))
(define (make-queue)
(make-q null null))
(define (enq! q i)
(if (empty-queue? q)
(let ((i (mcons i null)))
(set-q-l! q i)
(set-q-f! q i))
(begin
(set-mcdr! (q-l q) (mcons i null))
(set-q-l! q (mcdr (q-l q))))))
(define (deq! q)
(begin0
(mcar (q-f q))
(set-q-f! q (mcdr (q-f q)))))
)

@ -1,54 +1,54 @@
(module parser-actions mzscheme #lang racket/base
(require "grammar.rkt") (require "grammar.rkt")
(provide (all-defined-except make-reduce make-reduce*) (provide (except-out (all-defined-out) make-reduce make-reduce*)
(rename make-reduce* make-reduce)) (rename-out [make-reduce* make-reduce]))
;; An action is ;; An action is
;; - (make-shift int) ;; - (make-shift int)
;; - (make-reduce prod runtime-action) ;; - (make-reduce prod runtime-action)
;; - (make-accept) ;; - (make-accept)
;; - (make-goto int) ;; - (make-goto int)
;; - (no-action) ;; - (no-action)
;; A reduce contains a runtime-reduce so that sharing of the reduces can ;; A reduce contains a runtime-reduce so that sharing of the reduces can
;; be easily transferred to sharing of runtime-reduces. ;; be easily transferred to sharing of runtime-reduces.
(define-struct action () (make-inspector)) (define-struct action () #:inspector (make-inspector))
(define-struct (shift action) (state) (make-inspector)) (define-struct (shift action) (state) #:inspector (make-inspector))
(define-struct (reduce action) (prod runtime-reduce) (make-inspector)) (define-struct (reduce action) (prod runtime-reduce) #:inspector (make-inspector))
(define-struct (accept action) () (make-inspector)) (define-struct (accept action) () #:inspector (make-inspector))
(define-struct (goto action) (state) (make-inspector)) (define-struct (goto action) (state) #:inspector (make-inspector))
(define-struct (no-action action) () (make-inspector)) (define-struct (no-action action) () #:inspector (make-inspector))
(define (make-reduce* p) (define (make-reduce* p)
(make-reduce p (make-reduce p
(vector (prod-index p) (vector (prod-index p)
(gram-sym-symbol (prod-lhs p)) (gram-sym-symbol (prod-lhs p))
(vector-length (prod-rhs p))))) (vector-length (prod-rhs p)))))
;; A runtime-action is ;; A runtime-action is
;; non-negative-int (shift) ;; non-negative-int (shift)
;; (vector int symbol int) (reduce) ;; (vector int symbol int) (reduce)
;; 'accept (accept) ;; 'accept (accept)
;; negative-int (goto) ;; negative-int (goto)
;; #f (no-action) ;; #f (no-action)
(define (action->runtime-action a) (define (action->runtime-action a)
(cond (cond
((shift? a) (shift-state a)) [(shift? a) (shift-state a)]
((reduce? a) (reduce-runtime-reduce a)) [(reduce? a) (reduce-runtime-reduce a)]
((accept? a) 'accept) [(accept? a) 'accept]
((goto? a) (- (+ (goto-state a) 1))) [(goto? a) (- (+ (goto-state a) 1))]
((no-action? a) #f))) [(no-action? a) #f]))
(define (runtime-shift? x) (and (integer? x) (>= x 0))) (define (runtime-shift? x) (and (integer? x) (>= x 0)))
(define runtime-reduce? vector?) (define runtime-reduce? vector?)
(define (runtime-accept? x) (eq? x 'accept)) (define (runtime-accept? x) (eq? x 'accept))
(define (runtime-goto? x) (and (integer? x) (< x 0))) (define (runtime-goto? x) (and (integer? x) (< x 0)))
(define runtime-shift-state values) (define runtime-shift-state values)
(define (runtime-reduce-prod-num x) (vector-ref x 0)) (define (runtime-reduce-prod-num x) (vector-ref x 0))
(define (runtime-reduce-lhs x) (vector-ref x 1)) (define (runtime-reduce-lhs x) (vector-ref x 1))
(define (runtime-reduce-rhs-length x) (vector-ref x 2)) (define (runtime-reduce-rhs-length x) (vector-ref x 2))
(define (runtime-goto-state x) (- (+ x 1))) (define (runtime-goto-state x) (- (+ x 1)))
)

@ -1,113 +1,103 @@
(module parser-builder mzscheme #lang racket/base
(require "input-file-parser.rkt"
"grammar.rkt"
"table.rkt"
racket/class
racket/contract)
(require (for-template racket/base))
(require "input-file-parser.rkt" (provide/contract [build-parser (-> string? any/c any/c
"grammar.rkt" (listof identifier?)
"table.rkt" (listof identifier?)
mzlib/class (listof identifier?)
racket/contract) (or/c syntax? #f)
(require-for-template mzscheme) syntax?
(values any/c any/c any/c any/c))])
(provide/contract ;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?)
(build-parser (-> string? any/c any/c ;; (union syntax? false/c) syntax?) -> syntax?
(listof identifier?) (define (fix-check-syntax input-terms start ends assocs prods)
(listof identifier?) (define term-binders (get-term-list input-terms))
(listof identifier?) (define get-term-binder
(or/c syntax? #f) (let ([t (make-hasheq)])
syntax? (for ([term (in-list term-binders)])
(values any/c any/c any/c any/c)))) (hash-set! t (syntax-e term) term))
(λ (x)
;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?) (define r (hash-ref t (syntax-e x) (λ () #f)))
;; (union syntax? false/c) syntax?) -> syntax? (if r
(define (fix-check-syntax input-terms start ends assocs prods) (syntax-local-introduce (datum->syntax r (syntax-e x) x x))
(let* ((term-binders (get-term-list input-terms)) x))))
(get-term-binder (define rhs-list (syntax-case prods ()
(let ((t (make-hash-table))) [((_ RHS ...) ...) (syntax->list #'(RHS ... ...))]))
(for-each (with-syntax ([(TMP ...) (map syntax-local-introduce term-binders)]
(lambda (term) [(TERM-GROUP ...)
(hash-table-put! t (syntax-e term) term)) (map (λ (tg)
term-binders) (syntax-property
(lambda (x) (datum->syntax tg #f)
(let ((r (hash-table-get t (syntax-e x) (lambda () #f)))) 'disappeared-use
(if r tg))
(syntax-local-introduce (datum->syntax-object r (syntax-e x) x x)) input-terms)]
x))))) [(END ...) (map get-term-binder ends)]
(rhs-list [(START ...) (map get-term-binder start)]
(syntax-case prods () [(BIND ...) (syntax-case prods ()
(((_ rhs ...) ...) (((BIND _ ...) ...)
(syntax->list (syntax (rhs ... ...))))))) (syntax->list #'(BIND ...))))]
(with-syntax (((tmp ...) (map syntax-local-introduce term-binders)) [((BOUND ...) ...)
((term-group ...) (map (λ (rhs)
(map (lambda (tg)
(syntax-property
(datum->syntax-object tg #f)
'disappeared-use
tg))
input-terms))
((end ...)
(map get-term-binder ends))
((start ...)
(map get-term-binder start))
((bind ...)
(syntax-case prods ()
(((bind _ ...) ...)
(syntax->list (syntax (bind ...))))))
(((bound ...) ...)
(map
(lambda (rhs)
(syntax-case rhs () (syntax-case rhs ()
(((bound ...) (_ pbound) __) [((BOUND ...) (_ PBOUND) __)
(map get-term-binder (map get-term-binder
(cons (syntax pbound) (cons #'PBOUND (syntax->list #'(BOUND ...))))]
(syntax->list (syntax (bound ...)))))) [((BOUND ...) _)
(((bound ...) _)
(map get-term-binder (map get-term-binder
(syntax->list (syntax (bound ...))))))) (syntax->list #'(BOUND ...)))]))
rhs-list)) rhs-list)]
((prec ...) [(PREC ...)
(if assocs (if assocs
(map get-term-binder (map get-term-binder
(syntax-case assocs () (syntax-case assocs ()
(((__ term ...) ...) (((__ TERM ...) ...)
(syntax->list (syntax (term ... ...)))))) (syntax->list #'(TERM ... ...)))))
null))) null)])
#`(when #f #`(when #f
(let ((bind void) ... (tmp void) ...) (let ((BIND void) ... (TMP void) ...)
(void bound ... ... term-group ... start ... end ... prec ...)))))) (void BOUND ... ... TERM-GROUP ... START ... END ... PREC ...)))))
(require mzlib/list "parser-actions.rkt")
(define (build-parser filename src-pos suppress input-terms start end assocs prods) (require racket/list "parser-actions.rkt")
(let* ((grammar (parse-input input-terms start end assocs prods src-pos))
(table (build-table grammar filename suppress)) (define (build-parser filename src-pos suppress input-terms start end assocs prods)
(all-tokens (make-hash-table)) (define grammar (parse-input input-terms start end assocs prods src-pos))
(actions-code (define table (build-table grammar filename suppress))
`(vector ,@(map prod-action (send grammar get-prods))))) (define all-tokens (make-hasheq))
(for-each (lambda (term) (define actions-code `(vector ,@(map prod-action (send grammar get-prods))))
(hash-table-put! all-tokens (gram-sym-symbol term) #t))
(send grammar get-terms)) (for ([term (in-list (send grammar get-terms))])
#;(let ((num-states (vector-length table)) (hash-set! all-tokens (gram-sym-symbol term) #t))
(num-gram-syms (+ (send grammar get-num-terms)
(send grammar get-num-non-terms))) #;(let ((num-states (vector-length table))
(num-ht-entries (apply + (map length (vector->list table)))) (num-gram-syms (+ (send grammar get-num-terms)
(num-reduces (send grammar get-num-non-terms)))
(let ((ht (make-hash-table))) (num-ht-entries (apply + (map length (vector->list table))))
(for-each (num-reduces
(lambda (x) (let ((ht (make-hasheq)))
(when (reduce? x) (for-each
(hash-table-put! ht x #t))) (λ (x)
(map cdr (apply append (vector->list table)))) (when (reduce? x)
(length (hash-table-map ht void))))) (hash-set! ht x #t)))
(printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n" (map cdr (apply append (vector->list table))))
num-states num-gram-syms num-ht-entries num-reduces) (length (hash-table-map ht void)))))
(printf "~a -- ~aKB, previously ~aKB\n" (printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n"
(/ (+ 2 num-states num-states num-gram-syms num-ht-entries num-reduces)
(* 4 num-states) (* 2 1.5 num-ht-entries) (printf "~a -- ~aKB, previously ~aKB\n"
(* 5 num-reduces)) 256.0) (/ (+ 2 num-states
(/ (+ 2 num-states (* 4 num-states) (* 2 1.5 num-ht-entries)
(* 4 num-states) (* 2 2.3 num-ht-entries) (* 5 num-reduces)) 256.0)
(* 5 num-reduces)) 256.0) (/ (+ 2 num-states
(/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0))) (* 4 num-states) (* 2 2.3 num-ht-entries)
(values table (* 5 num-reduces)) 256.0)
all-tokens (/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0)))
actions-code (values table
(fix-check-syntax input-terms start end assocs prods)))) all-tokens
actions-code
(fix-check-syntax input-terms start end assocs prods)))
)

@ -1,290 +1,264 @@
#lang scheme/base #lang racket/base
(require "grammar.rkt"
"lr0.rkt"
"lalr.rkt"
"parser-actions.rkt"
racket/contract
racket/list
racket/class)
;; Routine to build the LALR table ;; Routine to build the LALR table
(require "grammar.rkt"
"lr0.rkt"
"lalr.rkt"
"parser-actions.rkt"
racket/contract
mzlib/list
mzlib/class)
(define (is-a-grammar%? x) (is-a? x grammar%)) (define (is-a-grammar%? x) (is-a? x grammar%))
(provide/contract (provide/contract
(build-table (-> is-a-grammar%? string? any/c (build-table (-> is-a-grammar%? string? any/c
(vectorof (listof (cons/c (or/c term? non-term?) action?)))))) (vectorof (listof (cons/c (or/c term? non-term?) action?))))))
;; A parse-table is (vectorof (listof (cons/c gram-sym? action))) ;; A parse-table is (vectorof (listof (cons/c gram-sym? action)))
;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action)))) ;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action))))
;; make-parse-table : int -> parse-table ;; make-parse-table : int -> parse-table
(define (make-parse-table num-states) (define (make-parse-table num-states)
(make-vector num-states null)) (make-vector num-states null))
;; table-add!: parse-table nat symbol action -> ;; table-add!: parse-table nat symbol action ->
(define (table-add! table state-index symbol val) (define (table-add! table state-index symbol val)
(vector-set! table state-index (cons (cons symbol val) (vector-set! table state-index (cons (cons symbol val)
(vector-ref table state-index)))) (vector-ref table state-index))))
;; group-table : parse-table -> grouped-parse-table ;; group-table : parse-table -> grouped-parse-table
(define (group-table table) (define (group-table table)
(list->vector (list->vector
(map (for/list ([state-entry (in-list (vector->list table))])
(lambda (state-entry) (define ht (make-hasheq))
(let ((ht (make-hash))) (for* ([gs/actions (in-list state-entry)]
(for-each [group (in-value (hash-ref ht (car gs/actions) (λ () null)))]
(lambda (gs/actions) #:unless (member (cdr gs/actions) group))
(let ((group (hash-ref ht (car gs/actions) (lambda () null)))) (hash-set! ht (car gs/actions) (cons (cdr gs/actions) group)))
(unless (member (cdr gs/actions) group) (hash-map ht cons))))
(hash-set! ht (car gs/actions) (cons (cdr gs/actions) group)))))
state-entry)
(hash-map ht cons)))
(vector->list table))))
;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) -> ;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) ->
;; (vectorof (listof (cons/c gram-sym? Y))) ;; (vectorof (listof (cons/c gram-sym? Y)))
(define (table-map f table) (define (table-map f table)
(list->vector (list->vector
(map (for/list ([state-entry (in-list (vector->list table))])
(lambda (state-entry) (for/list ([gs/X (in-list state-entry)])
(map (cons (car gs/X) (f (car gs/X) (cdr gs/X)))))))
(lambda (gs/X)
(cons (car gs/X) (f (car gs/X) (cdr gs/X))))
state-entry))
(vector->list table))))
(define (bit-vector-for-each f bv)
(let loop ([bv bv] [number 0])
(cond
[(zero? bv) (void)]
[(= 1 (bitwise-and 1 bv))
(f number)
(loop (arithmetic-shift bv -1) (add1 number))]
[else (loop (arithmetic-shift bv -1) (add1 number))])))
(define (bit-vector-for-each f bv)
(letrec ((for-each
(lambda (bv number)
(cond
((= 0 bv) (void))
((= 1 (bitwise-and 1 bv))
(f number)
(for-each (arithmetic-shift bv -1) (add1 number)))
(else (for-each (arithmetic-shift bv -1) (add1 number)))))))
(for-each bv 0)))
;; print-entry: symbol action output-port ->
;; prints the action a for lookahead sym to the given port
(define (print-entry sym a port)
(define s "\t~a\t\t\t\t\t~a\t~a\n")
(cond
[(shift? a) (fprintf port s sym "shift" (shift-state a))]
[(reduce? a) (fprintf port s sym "reduce" (prod-index (reduce-prod a)))]
[(accept? a) (fprintf port s sym "accept" "")]
[(goto? a) (fprintf port s sym "goto" (goto-state a))]))
;; print-entry: symbol action output-port ->
;; prints the action a for lookahead sym to the given port
(define (print-entry sym a port)
(let ((s "\t~a\t\t\t\t\t~a\t~a\n"))
(cond
((shift? a)
(fprintf port s sym "shift" (shift-state a)))
((reduce? a)
(fprintf port s sym "reduce" (prod-index (reduce-prod a))))
((accept? a)
(fprintf port s sym "accept" ""))
((goto? a)
(fprintf port s sym "goto" (goto-state a))))))
;; count: ('a -> bool) * 'a list -> num
;; counts the number of elements in list that satisfy pred
(define (count pred list)
(cond
[(null? list) 0]
[(pred (car list)) (+ 1 (count pred (cdr list)))]
[else (count pred (cdr list))]))
;; count: ('a -> bool) * 'a list -> num ;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port ->
;; counts the number of elements in list that satisfy pred ;; Prints out the parser given by table.
(define (count pred list) (define (display-parser a grouped-table prods port)
(cond (define SR-conflicts 0)
((null? list) 0) (define RR-conflicts 0)
((pred (car list)) (+ 1 (count pred (cdr list)))) (for ([prod (in-list prods)])
(else (count pred (cdr list))))) (fprintf port
"~a\t~a\t=\t~a\n"
(prod-index prod)
(gram-sym-symbol (prod-lhs prod))
(map gram-sym-symbol (vector->list (prod-rhs prod)))))
;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port -> (send a for-each-state
;; Prints out the parser given by table. (λ (state)
(define (display-parser a grouped-table prods port)
(let* ((SR-conflicts 0)
(RR-conflicts 0))
(for-each
(lambda (prod)
(fprintf port
"~a\t~a\t=\t~a\n"
(prod-index prod)
(gram-sym-symbol (prod-lhs prod))
(map gram-sym-symbol (vector->list (prod-rhs prod)))))
prods)
(send a for-each-state
(lambda (state)
(fprintf port "State ~a\n" (kernel-index state)) (fprintf port "State ~a\n" (kernel-index state))
(for-each (lambda (item) (for ([item (in-list (kernel-items state))])
(fprintf port "\t~a\n" (item->string item))) (fprintf port "\t~a\n" (item->string item)))
(kernel-items state))
(newline port) (newline port)
(for-each (for ([gs/action (in-list (vector-ref grouped-table (kernel-index state)))])
(lambda (gs/action) (define sym (gram-sym-symbol (car gs/action)))
(let ((sym (gram-sym-symbol (car gs/action))) (define act (cdr gs/action))
(act (cdr gs/action))) (cond
(cond [(null? act) (void)]
((null? act) (void)) [(null? (cdr act))
((null? (cdr act)) (print-entry sym (car act) port)]
(print-entry sym (car act) port)) [else
(else (fprintf port "begin conflict:\n")
(fprintf port "begin conflict:\n") (when (> (count reduce? act) 1)
(when (> (count reduce? act) 1) (set! RR-conflicts (add1 RR-conflicts)))
(set! RR-conflicts (add1 RR-conflicts))) (when (> (count shift? act) 0)
(when (> (count shift? act) 0) (set! SR-conflicts (add1 SR-conflicts)))
(set! SR-conflicts (add1 SR-conflicts))) (map (λ (x) (print-entry sym x port)) act)
(map (lambda (x) (print-entry sym x port)) act) (fprintf port "end conflict\n")]))
(fprintf port "end conflict\n")))))
(vector-ref grouped-table (kernel-index state)))
(newline port))) (newline port)))
(when (> SR-conflicts 0) (when (> SR-conflicts 0)
(fprintf port "~a shift/reduce conflict~a\n" (fprintf port "~a shift/reduce conflict~a\n"
SR-conflicts SR-conflicts
(if (= SR-conflicts 1) "" "s"))) (if (= SR-conflicts 1) "" "s")))
(when (> RR-conflicts 0) (when (> RR-conflicts 0)
(fprintf port "~a reduce/reduce conflict~a\n" (fprintf port "~a reduce/reduce conflict~a\n"
RR-conflicts RR-conflicts
(if (= RR-conflicts 1) "" "s"))))) (if (= RR-conflicts 1) "" "s"))))
;; resolve-conflict : (listof action?) -> action? bool bool
(define (resolve-conflict actions)
(cond
((null? actions) (values (make-no-action) #f #f))
((null? (cdr actions))
(values (car actions) #f #f))
(else
(let ((SR-conflict? (> (count shift? actions) 0))
(RR-conflict? (> (count reduce? actions) 1)))
(let loop ((current-guess #f)
(rest actions))
(cond
((null? rest) (values current-guess SR-conflict? RR-conflict?))
((shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?))
((not current-guess)
(loop (car rest) (cdr rest)))
((and (reduce? (car rest))
(< (prod-index (reduce-prod (car rest)))
(prod-index (reduce-prod current-guess))))
(loop (car rest) (cdr rest)))
((accept? (car rest))
(eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n")
(loop current-guess (cdr rest)))
(else (loop current-guess (cdr rest)))))))))
;; resolve-conflicts : grouped-parse-table bool -> parse-table ;; resolve-conflict : (listof action?) -> action? bool bool
(define (resolve-conflicts grouped-table suppress) (define (resolve-conflict actions)
(let* ((SR-conflicts 0) (cond
(RR-conflicts 0) [(null? actions) (values (make-no-action) #f #f)]
(table (table-map [(null? (cdr actions)) (values (car actions) #f #f)]
(lambda (gs actions) [else
(let-values (((action SR? RR?) (define SR-conflict? (> (count shift? actions) 0))
(resolve-conflict actions))) (define RR-conflict? (> (count reduce? actions) 1))
(when SR? (let loop ((current-guess #f)
(set! SR-conflicts (add1 SR-conflicts))) (rest actions))
(when RR? (cond
(set! RR-conflicts (add1 RR-conflicts))) [(null? rest) (values current-guess SR-conflict? RR-conflict?)]
action)) [(shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?)]
grouped-table))) [(not current-guess) (loop (car rest) (cdr rest))]
(unless suppress [(and (reduce? (car rest))
(when (> SR-conflicts 0) (< (prod-index (reduce-prod (car rest)))
(eprintf "~a shift/reduce conflict~a\n" (prod-index (reduce-prod current-guess))))
SR-conflicts (loop (car rest) (cdr rest))]
(if (= SR-conflicts 1) "" "s"))) [(accept? (car rest))
(when (> RR-conflicts 0) (eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n")
(eprintf "~a reduce/reduce conflict~a\n" (loop current-guess (cdr rest))]
RR-conflicts [else (loop current-guess (cdr rest))]))]))
(if (= RR-conflicts 1) "" "s"))))
table))
;; resolve-conflicts : grouped-parse-table bool -> parse-table
(define (resolve-conflicts grouped-table suppress)
(define SR-conflicts 0)
(define RR-conflicts 0)
(define table (table-map
(λ (gs actions)
(let-values ([(action SR? RR?)
(resolve-conflict actions)])
(when SR?
(set! SR-conflicts (add1 SR-conflicts)))
(when RR?
(set! RR-conflicts (add1 RR-conflicts)))
action))
grouped-table))
(unless suppress
(when (> SR-conflicts 0)
(eprintf "~a shift/reduce conflict~a\n"
SR-conflicts
(if (= SR-conflicts 1) "" "s")))
(when (> RR-conflicts 0)
(eprintf "~a reduce/reduce conflict~a\n"
RR-conflicts
(if (= RR-conflicts 1) "" "s"))))
table)
;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action)
;; Resolves a single shift-reduce conflict, if precedences are in place.
(define (resolve-sr-conflict/prec actions shift-prec)
(let* ((shift (if (shift? (car actions))
(car actions)
(cadr actions)))
(reduce (if (shift? (car actions))
(cadr actions)
(car actions)))
(reduce-prec (prod-prec (reduce-prod reduce))))
(cond
((and shift-prec reduce-prec)
(cond
((< (prec-num shift-prec) (prec-num reduce-prec))
(list reduce))
((> (prec-num shift-prec) (prec-num reduce-prec))
(list shift))
((eq? 'left (prec-assoc shift-prec))
(list reduce))
((eq? 'right (prec-assoc shift-prec))
(list shift))
(else null)))
(else actions))))
;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action)
;; Resolves a single shift-reduce conflict, if precedences are in place.
(define (resolve-sr-conflict/prec actions shift-prec)
(define shift (if (shift? (car actions))
(car actions)
(cadr actions)))
(define reduce (if (shift? (car actions))
(cadr actions)
(car actions)))
(define reduce-prec (prod-prec (reduce-prod reduce)))
(cond
[(and shift-prec reduce-prec)
(cond
[(< (prec-num shift-prec) (prec-num reduce-prec))
(list reduce)]
[(> (prec-num shift-prec) (prec-num reduce-prec))
(list shift)]
[(eq? 'left (prec-assoc shift-prec))
(list reduce)]
[(eq? 'right (prec-assoc shift-prec))
(list shift)]
[else null])]
[else actions]))
;; resolve-prec-conflicts : parse-table -> grouped-parse-table
(define (resolve-prec-conflicts table)
(table-map
(lambda (gs actions)
(cond
((and (term? gs)
(= 2 (length actions))
(or (shift? (car actions))
(shift? (cadr actions))))
(resolve-sr-conflict/prec actions (term-prec gs)))
(else actions)))
(group-table table)))
;; build-table: grammar string bool -> parse-table ;; resolve-prec-conflicts : parse-table -> grouped-parse-table
(define (build-table g file suppress) (define (resolve-prec-conflicts table)
(let* ((a (build-lr0-automaton g)) (table-map
(term-vector (list->vector (send g get-terms))) (λ (gs actions)
(end-terms (send g get-end-terms)) (cond
(table (make-parse-table (send a get-num-states))) [(and (term? gs)
(get-lookahead (compute-LA a g)) (= 2 (length actions))
(reduce-cache (make-hash))) (or (shift? (car actions))
(shift? (cadr actions))))
(resolve-sr-conflict/prec actions (term-prec gs))]
[else actions]))
(group-table table)))
(for-each ;; build-table: grammar string bool -> parse-table
(lambda (trans-key/state) (define (build-table g file suppress)
(let ((from-state-index (kernel-index (trans-key-st (car trans-key/state)))) (define a (build-lr0-automaton g))
(gs (trans-key-gs (car trans-key/state))) (define term-vector (list->vector (send g get-terms)))
(to-state (cdr trans-key/state))) (define end-terms (send g get-end-terms))
(table-add! table from-state-index gs (define table (make-parse-table (send a get-num-states)))
(cond (define get-lookahead (compute-LA a g))
((non-term? gs) (define reduce-cache (make-hash))
(make-goto (kernel-index to-state))) (for ([trans-key/state (in-list (send a get-transitions))])
((member gs end-terms) (define from-state-index (kernel-index (trans-key-st (car trans-key/state))))
(make-accept)) (define gs (trans-key-gs (car trans-key/state)))
(else (define to-state (cdr trans-key/state))
(make-shift
(kernel-index to-state)))))))
(send a get-transitions))
(send a for-each-state (table-add! table from-state-index gs
(lambda (state) (cond
(for-each ((non-term? gs)
(lambda (item) (make-goto (kernel-index to-state)))
(let ((item-prod (item-prod item))) ((member gs end-terms)
(bit-vector-for-each (make-accept))
(lambda (term-index) (else
(unless (start-item? item) (make-shift
(let ((r (hash-ref reduce-cache item-prod (kernel-index to-state))))))
(lambda () (send a for-each-state
(let ((r (make-reduce item-prod))) (λ (state)
(hash-set! reduce-cache item-prod r) (for ([item (in-list (append (hash-ref (send a get-epsilon-trans) state (λ () null))
r))))) (filter (λ (item)
(table-add! table (not (move-dot-right item)))
(kernel-index state) (kernel-items state))))])
(vector-ref term-vector term-index) (let ([item-prod (item-prod item)])
r)))) (bit-vector-for-each
(get-lookahead state item-prod)))) (λ (term-index)
(append (hash-ref (send a get-epsilon-trans) state (lambda () null)) (unless (start-item? item)
(filter (lambda (item) (let ((r (hash-ref reduce-cache item-prod
(not (move-dot-right item))) (λ ()
(kernel-items state)))))) (let ((r (make-reduce item-prod)))
(hash-set! reduce-cache item-prod r)
r)))))
(table-add! table
(kernel-index state)
(vector-ref term-vector term-index)
r))))
(get-lookahead state item-prod))))))
(let ((grouped-table (resolve-prec-conflicts table))) (define grouped-table (resolve-prec-conflicts table))
(unless (string=? file "") (unless (string=? file "")
(with-handlers [(exn:fail:filesystem? (with-handlers [(exn:fail:filesystem?
(lambda (e) (λ (e)
(eprintf (eprintf
"Cannot write debug output to file \"~a\": ~a\n" "Cannot write debug output to file \"~a\": ~a\n"
file file
(exn-message e))))] (exn-message e))))]
(call-with-output-file file (call-with-output-file file
(lambda (port) (λ (port)
(display-parser a grouped-table (send g get-prods) port)) (display-parser a grouped-table (send g get-prods) port))
#:exists 'truncate))) #:exists 'truncate)))
(resolve-conflicts grouped-table suppress)))) (resolve-conflicts grouped-table suppress))

@ -1,118 +1,71 @@
(module yacc-helper mzscheme #lang racket/base
(require (prefix-in rl: racket/list)
"../private-lex/token-syntax.rkt")
(require mzlib/list ;; General helper routines
"../private-lex/token-syntax.rkt") (provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc)
;; General helper routines (define (vector-andmap pred vec)
(for/and ([item (in-vector vec)])
(pred vec)))
(provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc) ;; duplicate-list?: symbol list -> #f | symbol
;; returns a symbol that exists twice in l, or false if no such symbol
;; exists
(define (duplicate-list? syms)
(rl:check-duplicates syms eq?))
(define (vector-andmap f v) ;; remove-duplicates: syntax-object list -> syntax-object list
(let loop ((i 0)) ;; removes the duplicates from the lists
(cond (define (remove-duplicates syms)
((= i (vector-length v)) #t) (rl:remove-duplicates syms equal? #:key syntax->datum))
(else (if (f (vector-ref v i))
(loop (add1 i))
#f)))))
;; duplicate-list?: symbol list -> #f | symbol ;; overlap?: symbol list * symbol list -> #f | symbol
;; returns a symbol that exists twice in l, or false if no such symbol ;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists
;; exists (define (overlap? syms1 syms2)
(define (duplicate-list? l) (for/first ([sym1 (in-list syms1)]
(letrec ((t (make-hash-table)) #:when (memq sym1 syms2))
(dl? (lambda (l) sym1))
(cond
((null? l) #f)
((hash-table-get t (car l) (lambda () #f)) =>
(lambda (x) x))
(else
(hash-table-put! t (car l) (car l))
(dl? (cdr l)))))))
(dl? l)))
;; remove-duplicates: syntax-object list -> syntax-object list
;; removes the duplicates from the lists
(define (remove-duplicates sl)
(let ((t (make-hash-table)))
(letrec ((x
(lambda (sl)
(cond
((null? sl) sl)
((hash-table-get t (syntax-object->datum (car sl)) (lambda () #f))
(x (cdr sl)))
(else
(hash-table-put! t (syntax-object->datum (car sl)) #t)
(cons (car sl) (x (cdr sl))))))))
(x sl))))
;; overlap?: symbol list * symbol list -> #f | symbol (define (display-yacc grammar tokens start precs port)
;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists (let-syntax ([p (syntax-rules ()
(define (overlap? l1 l2) ((_ args ...) (fprintf port args ...)))])
(let/ec ret (let* ([tokens (map syntax-local-value tokens)]
(let ((t (make-hash-table))) [eterms (filter e-terminals-def? tokens)]
(for-each (lambda (s1) [terms (filter terminals-def? tokens)]
(hash-table-put! t s1 s1)) [term-table (make-hasheq)]
l1) [display-rhs
(for-each (lambda (s2) (λ (rhs)
(cond (for ([sym (in-list (car rhs))])
((hash-table-get t s2 (lambda () #f)) => (p "~a " (hash-ref term-table sym (λ () sym))))
(lambda (o) (ret o))))) (when (= 3 (length rhs))
l2) (p "%prec ~a" (cadadr rhs)))
#f))) (p "\n"))])
(for* ([t (in-list eterms)]
[t (in-list (syntax->datum (e-terminals-def-t t)))])
(hash-set! term-table t (format "'~a'" t)))
(for* ([t (in-list terms)]
[t (in-list (syntax->datum (terminals-def-t t)))])
(p "%token ~a\n" t)
(hash-set! term-table t (format "~a" t)))
(when precs
(for ([prec (in-list precs)])
(p "%~a " (car prec))
(for ([tok (in-list (cdr prec))])
(p " ~a" (hash-ref term-table tok)))
(p "\n")))
(p "%start ~a\n" start)
(p "%%\n")
(for ([prod (in-list grammar)])
(define nt (car prod))
(p "~a: " nt)
(display-rhs (cadr prod))
(for ([rhs (in-list (cddr prod))])
(p "| ")
(display-rhs rhs))
(p ";\n"))
(p "%%\n"))))
(define (display-yacc grammar tokens start precs port)
(let-syntax ((p (syntax-rules ()
((_ args ...) (fprintf port args ...)))))
(let* ((tokens (map syntax-local-value tokens))
(eterms (filter e-terminals-def? tokens))
(terms (filter terminals-def? tokens))
(term-table (make-hash-table))
(display-rhs
(lambda (rhs)
(for-each (lambda (sym) (p "~a " (hash-table-get term-table sym (lambda () sym))))
(car rhs))
(if (= 3 (length rhs))
(p "%prec ~a" (cadadr rhs)))
(p "\n"))))
(for-each
(lambda (t)
(for-each
(lambda (t)
(hash-table-put! term-table t (format "'~a'" t)))
(syntax-object->datum (e-terminals-def-t t))))
eterms)
(for-each
(lambda (t)
(for-each
(lambda (t)
(p "%token ~a\n" t)
(hash-table-put! term-table t (format "~a" t)))
(syntax-object->datum (terminals-def-t t))))
terms)
(if precs
(for-each (lambda (prec)
(p "%~a " (car prec))
(for-each (lambda (tok)
(p " ~a" (hash-table-get term-table tok)))
(cdr prec))
(p "\n"))
precs))
(p "%start ~a\n" start)
(p "%%\n")
(for-each (lambda (prod)
(let ((nt (car prod)))
(p "~a: " nt)
(display-rhs (cadr prod))
(for-each (lambda (rhs)
(p "| ")
(display-rhs rhs))
(cddr prod))
(p ";\n")))
grammar)
(p "%%\n"))))
)

@ -1,135 +1,130 @@
(module yacc-to-scheme mzscheme #lang racket/base
(require br-parser-tools/lex (require br-parser-tools/lex
(prefix : br-parser-tools/lex-sre) (prefix-in : br-parser-tools/lex-sre)
br-parser-tools/yacc br-parser-tools/yacc
syntax/readerr syntax/readerr
mzlib/list) racket/list)
(provide trans) (provide trans)
(define match-double-string (define match-double-string
(lexer (lexer
((:+ (:~ #\" #\\)) (append (string->list lexeme) [(:+ (:~ #\" #\\)) (append (string->list lexeme)
(match-double-string input-port))) (match-double-string input-port))]
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port))) [(:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port))]
(#\" null))) [#\" null]))
(define match-single-string (define match-single-string
(lexer (lexer
((:+ (:~ #\' #\\)) (append (string->list lexeme) [(:+ (:~ #\' #\\)) (append (string->list lexeme)
(match-single-string input-port))) (match-single-string input-port))]
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port))) [(:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port))]
(#\' null))) [#\' null]))
(define-lex-abbrevs (define-lex-abbrevs
(letter (:or (:/ "a" "z") (:/ "A" "Z"))) [letter (:or (:/ "a" "z") (:/ "A" "Z"))]
(digit (:/ "0" "9")) [digit (:/ "0" "9")]
(initial (:or letter (char-set "!$%&*/<=>?^_~@"))) [initial (:or letter (char-set "!$%&*/<=>?^_~@"))]
(subsequent (:or initial digit (char-set "+-.@"))) [subsequent (:or initial digit (char-set "+-.@"))]
(comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/"))) [comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/")])
(define-empty-tokens x (define-empty-tokens x (EOF PIPE |:| SEMI |%%| %prec))
(EOF PIPE |:| SEMI |%%| %prec)) (define-tokens y (SYM STRING))
(define-tokens y
(SYM STRING))
(define get-token-grammar (define get-token-grammar
(lexer-src-pos (lexer-src-pos
("%%" '|%%|) ["%%" '|%%|]
(":" (string->symbol lexeme)) [":" (string->symbol lexeme)]
("%prec" (string->symbol lexeme)) ["%prec" (string->symbol lexeme)]
(#\| 'PIPE) [#\| 'PIPE]
((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}"))) [(:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}")))
(return-without-pos (get-token-grammar input-port))) (return-without-pos (get-token-grammar input-port))]
(#\; 'SEMI) [#\; 'SEMI]
(#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))) [#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))]
(#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))) [#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))]
((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme))))) [(:: initial (:* subsequent)) (token-SYM (string->symbol lexeme))]))
(define (parse-grammar enter-term enter-empty-term enter-non-term) (define (parse-grammar enter-term enter-empty-term enter-non-term)
(parser (parser
(tokens x y) (tokens x y)
(src-pos) (src-pos)
(error (lambda (tok-ok tok-name tok-value start-pos end-pos) (error (λ (tok-ok tok-name tok-value start-pos end-pos)
(raise-read-error (raise-read-error
(format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value) (format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value)
(file-path) (file-path)
(position-line start-pos) (position-line start-pos)
(position-col start-pos) (position-col start-pos)
(position-offset start-pos) (position-offset start-pos)
(- (position-offset end-pos) (position-offset start-pos))))) (- (position-offset end-pos) (position-offset start-pos)))))
(end |%%|) (end |%%|)
(start gram) (start gram)
(grammar (grammar
(gram (gram
((production) (list $1)) ((production) (list $1))
((production gram) (cons $1 $2))) ((production gram) (cons $1 $2)))
(production (production
((SYM |:| prods SEMI) ((SYM |:| prods SEMI)
(begin (begin
(enter-non-term $1) (enter-non-term $1)
(cons $1 $3)))) (cons $1 $3))))
(prods (prods
((rhs) (list `(,$1 #f))) ((rhs) (list `(,$1 #f)))
((rhs prec) (list `(,$1 ,$2 #f))) ((rhs prec) (list `(,$1 ,$2 #f)))
((rhs PIPE prods) (cons `(,$1 #f) $3)) ((rhs PIPE prods) (cons `(,$1 #f) $3))
((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4))) ((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4)))
(prec (prec
((%prec SYM) ((%prec SYM)
(begin (begin
(enter-term $2) (enter-term $2)
(list 'prec $2))) (list 'prec $2)))
((%prec STRING) ((%prec STRING)
(begin (begin
(enter-empty-term $2) (enter-empty-term $2)
(list 'prec $2)))) (list 'prec $2))))
(rhs (rhs
(() null) (() null)
((SYM rhs) ((SYM rhs)
(begin (begin
(enter-term $1) (enter-term $1)
(cons $1 $2))) (cons $1 $2)))
((STRING rhs) ((STRING rhs)
(begin (begin
(enter-empty-term $1) (enter-empty-term $1)
(cons $1 $2))))))) (cons $1 $2)))))))
(define (symbol<? a b) (define (symbol<? a b)
(string<? (symbol->string a) (symbol->string b))) (string<? (symbol->string a) (symbol->string b)))
(define (trans filename) (define (trans filename)
(let* ((i (open-input-file filename)) (define i (open-input-file filename))
(terms (make-hash-table)) (define terms (make-hasheq))
(eterms (make-hash-table)) (define eterms (make-hasheq))
(nterms (make-hash-table)) (define nterms (make-hasheq))
(enter-term (define (enter-term s)
(lambda (s) (when (not (hash-ref nterms s (λ () #f)))
(if (not (hash-table-get nterms s (lambda () #f))) (hash-set! terms s #t)))
(hash-table-put! terms s #t)))) (define (enter-empty-term s)
(enter-empty-term (when (not (hash-ref nterms s (λ () #f)))
(lambda (s) (hash-set! eterms s #t)))
(if (not (hash-table-get nterms s (lambda () #f))) (define (enter-non-term s)
(hash-table-put! eterms s #t)))) (hash-remove! terms s)
(enter-non-term (hash-remove! eterms s)
(lambda (s) (hash-set! nterms s #t))
(hash-table-remove! terms s) (port-count-lines! i)
(hash-table-remove! eterms s) (file-path filename)
(hash-table-put! nterms s #t)))) (regexp-match "%%" i)
(port-count-lines! i) (begin0
(file-path filename) (let ([gram ((parse-grammar enter-term enter-empty-term enter-non-term)
(regexp-match "%%" i) (λ ()
(begin0 (let ((t (get-token-grammar i)))
(let ((gram ((parse-grammar enter-term enter-empty-term enter-non-term) t)))])
(lambda () `(begin
(let ((t (get-token-grammar i))) (define-tokens t ,(sort (hash-map terms (λ (k v) k)) symbol<?))
t))))) (define-empty-tokens et ,(sort (hash-map eterms (λ (k v) k)) symbol<?))
`(begin (parser
(define-tokens t ,(sort (hash-table-map terms (lambda (k v) k)) symbol<?)) (start ___)
(define-empty-tokens et ,(sort (hash-table-map eterms (lambda (k v) k)) symbol<?)) (end ___)
(parser (error ___)
(start ___) (tokens t et)
(end ___) (grammar ,@gram))))
(error ___) (close-input-port i)))
(tokens t et)
(grammar ,@gram))))
(close-input-port i)))))

@ -1,14 +1,13 @@
#lang scheme/base #lang racket/base
(require (for-syntax racket/base
(require (for-syntax scheme/base
"private-yacc/parser-builder.rkt" "private-yacc/parser-builder.rkt"
"private-yacc/grammar.rkt" "private-yacc/grammar.rkt"
"private-yacc/yacc-helper.rkt" "private-yacc/yacc-helper.rkt"
"private-yacc/parser-actions.rkt")) "private-yacc/parser-actions.rkt")
(require "private-lex/token.rkt" "private-lex/token.rkt"
"private-yacc/parser-actions.rkt" "private-yacc/parser-actions.rkt"
mzlib/etc racket/local
mzlib/pretty racket/pretty
syntax/readerr) syntax/readerr)
(provide parser) (provide parser)
@ -17,139 +16,93 @@
;; convert-parse-table : (vectorof (listof (cons/c gram-sym? action?))) -> ;; convert-parse-table : (vectorof (listof (cons/c gram-sym? action?))) ->
;; (vectorof (symbol runtime-action hashtable)) ;; (vectorof (symbol runtime-action hashtable))
(define-for-syntax (convert-parse-table table) (define-for-syntax (convert-parse-table table)
(list->vector (for/vector ([state-entry (in-vector table)])
(map (let ([ht (make-hasheq)])
(lambda (state-entry) (for ([gs/action (in-list state-entry)])
(let ((ht (make-hasheq))) (hash-set! ht
(for-each (gram-sym-symbol (car gs/action))
(lambda (gs/action) (action->runtime-action (cdr gs/action))))
(hash-set! ht ht)))
(gram-sym-symbol (car gs/action))
(action->runtime-action (cdr gs/action))))
state-entry)
ht))
(vector->list table))))
(define-syntax (parser stx) (define-syntax (parser stx)
(syntax-case stx () (syntax-case stx ()
((_ args ...) [(_ ARGS ...)
(let ((arg-list (syntax->list (syntax (args ...)))) (let ([arg-list (syntax->list #'(ARGS ...))]
(src-pos #f) [src-pos #f]
(debug #f) [debug #f]
(error #f) [error #f]
(tokens #f) [tokens #f]
(start #f) [start #f]
(end #f) [end #f]
(precs #f) [precs #f]
(suppress #f) [suppress #f]
(grammar #f) [grammar #f]
(yacc-output #f)) [yacc-output #f])
(for-each (for ([arg (in-list (syntax->list #'(ARGS ...)))])
(lambda (arg) (syntax-case* arg (debug error tokens start end precs grammar
(syntax-case* arg (debug error tokens start end precs grammar suppress src-pos yacc-output)
suppress src-pos yacc-output) (λ (a b) (eq? (syntax-e a) (syntax-e b)))
(lambda (a b) [(debug FILENAME)
(eq? (syntax-e a) (syntax-e b))) (cond
((debug filename) [(not (string? (syntax-e #'FILENAME)))
(cond (raise-syntax-error #f "Debugging filename must be a string" stx #'FILENAME)]
((not (string? (syntax-e (syntax filename)))) [debug (raise-syntax-error #f "Multiple debug declarations" stx)]
(raise-syntax-error [else (set! debug (syntax-e #'FILENAME))])]
#f [(suppress) (set! suppress #t)]
"Debugging filename must be a string" [(src-pos) (set! src-pos #t)]
stx [(error EXPRESSION)
(syntax filename))) (if error
(debug (raise-syntax-error #f "Multiple error declarations" stx)
(raise-syntax-error #f "Multiple debug declarations" stx)) (set! error #'EXPRESSION))]
(else [(tokens DEF ...)
(set! debug (syntax-e (syntax filename)))))) (begin
((suppress) (when tokens
(set! suppress #t)) (raise-syntax-error #f "Multiple tokens declarations" stx))
((src-pos) (let ((defs (syntax->list #'(DEF ...))))
(set! src-pos #t)) (for ([d (in-list defs)]
((error expression) #:unless (identifier? d))
(if error (raise-syntax-error #f "Token-group name must be an identifier" stx d))
(raise-syntax-error #f "Multiple error declarations" stx) (set! tokens defs)))]
(set! error (syntax expression)))) [(start symbol ...)
((tokens def ...) (let ([symbols (syntax->list #'(symbol ...))])
(begin (for ([sym (in-list symbols)]
(when tokens #:unless (identifier? sym))
(raise-syntax-error #f "Multiple tokens declarations" stx)) (raise-syntax-error #f "Start symbol must be a symbol" stx sym))
(let ((defs (syntax->list (syntax (def ...))))) (when start
(for-each (raise-syntax-error #f "Multiple start declarations" stx))
(lambda (d) (when (null? symbols)
(unless (identifier? d) (raise-syntax-error #f "Missing start symbol" stx arg))
(raise-syntax-error (set! start symbols))]
#f [(end SYMBOLS ...)
"Token-group name must be an identifier" (let ((symbols (syntax->list #'(SYMBOLS ...))))
stx (for ([sym (in-list symbols)]
d))) #:unless (identifier? sym))
defs) (raise-syntax-error #f "End token must be a symbol" stx sym))
(set! tokens defs)))) (let ([d (duplicate-list? (map syntax-e symbols))])
((start symbol ...) (when d
(let ((symbols (syntax->list (syntax (symbol ...))))) (raise-syntax-error #f (format "Duplicate end token definition for ~a" d) stx arg))
(for-each (when (null? symbols)
(lambda (sym) (raise-syntax-error #f "end declaration must contain at least 1 token" stx arg))
(unless (identifier? sym) (when end
(raise-syntax-error #f (raise-syntax-error #f "Multiple end declarations" stx))
"Start symbol must be a symbol" (set! end symbols)))]
stx [(precs DECLS ...)
sym))) (if precs
symbols) (raise-syntax-error #f "Multiple precs declarations" stx)
(when start (set! precs (syntax/loc arg (DECLS ...))))]
(raise-syntax-error #f "Multiple start declarations" stx)) [(grammar PRODS ...)
(when (null? symbols) (if grammar
(raise-syntax-error #f (raise-syntax-error #f "Multiple grammar declarations" stx)
"Missing start symbol" (set! grammar (syntax/loc arg (PRODS ...))))]
stx [(yacc-output FILENAME)
arg)) (cond
(set! start symbols))) [(not (string? (syntax-e #'FILENAME)))
((end symbols ...) (raise-syntax-error #f "Yacc-output filename must be a string" stx #'FILENAME)]
(let ((symbols (syntax->list (syntax (symbols ...))))) [yacc-output
(for-each (raise-syntax-error #f "Multiple yacc-output declarations" stx)]
(lambda (sym) [else
(unless (identifier? sym) (set! yacc-output (syntax-e #'FILENAME))])]
(raise-syntax-error #f [_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)]))
"End token must be a symbol"
stx
sym)))
symbols)
(let ((d (duplicate-list? (map syntax-e symbols))))
(when d
(raise-syntax-error
#f
(format "Duplicate end token definition for ~a" d)
stx
arg))
(when (null? symbols)
(raise-syntax-error
#f
"end declaration must contain at least 1 token"
stx
arg))
(when end
(raise-syntax-error #f "Multiple end declarations" stx))
(set! end symbols))))
((precs decls ...)
(if precs
(raise-syntax-error #f "Multiple precs declarations" stx)
(set! precs (syntax/loc arg (decls ...)))))
((grammar prods ...)
(if grammar
(raise-syntax-error #f "Multiple grammar declarations" stx)
(set! grammar (syntax/loc arg (prods ...)))))
((yacc-output filename)
(cond
((not (string? (syntax-e (syntax filename))))
(raise-syntax-error #f
"Yacc-output filename must be a string"
stx
(syntax filename)))
(yacc-output
(raise-syntax-error #f "Multiple yacc-output declarations" stx))
(else
(set! yacc-output (syntax-e (syntax filename))))))
(_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg))))
(syntax->list (syntax (args ...))))
(unless tokens (unless tokens
(raise-syntax-error #f "missing tokens declaration" stx)) (raise-syntax-error #f "missing tokens declaration" stx))
(unless error (unless error
@ -160,7 +113,7 @@
(raise-syntax-error #f "missing end declaration" stx)) (raise-syntax-error #f "missing end declaration" stx))
(unless start (unless start
(raise-syntax-error #f "missing start declaration" stx)) (raise-syntax-error #f "missing start declaration" stx))
(let-values (((table all-term-syms actions check-syntax-fix) (let-values ([(table all-term-syms actions check-syntax-fix)
(build-parser (if debug debug "") (build-parser (if debug debug "")
src-pos src-pos
suppress suppress
@ -168,66 +121,51 @@
start start
end end
precs precs
grammar))) grammar)])
(when (and yacc-output (not (string=? yacc-output ""))) (when (and yacc-output (not (string=? yacc-output "")))
(with-handlers [(exn:fail:filesystem? (with-handlers [(exn:fail:filesystem?
(lambda (e) (λ (e) (eprintf "Cannot write yacc-output to file \"~a\"\n" yacc-output)))]
(eprintf
"Cannot write yacc-output to file \"~a\"\n"
yacc-output)))]
(call-with-output-file yacc-output (call-with-output-file yacc-output
(lambda (port) (λ (port)
(display-yacc (syntax->datum grammar) (display-yacc (syntax->datum grammar)
tokens tokens
(map syntax->datum start) (map syntax->datum start)
(if precs (and precs (syntax->datum precs))
(syntax->datum precs)
#f)
port)) port))
#:exists 'truncate))) #:exists 'truncate)))
(with-syntax ((check-syntax-fix check-syntax-fix) (with-syntax ([check-syntax-fix check-syntax-fix]
(err error) [err error]
(ends end) [ends end]
(starts start) [starts start]
(debug debug) [debug debug]
(table (convert-parse-table table)) [table (convert-parse-table table)]
(all-term-syms all-term-syms) [all-term-syms all-term-syms]
(actions actions) [actions actions]
(src-pos src-pos)) [src-pos src-pos])
(syntax #'(begin
(begin check-syntax-fix
check-syntax-fix (parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos)))))]
(parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos))))))) [_ (raise-syntax-error #f "parser must have the form (parser args ...)" stx)]))
(_
(raise-syntax-error #f
"parser must have the form (parser args ...)"
stx))))
(define (reduce-stack stack num ret-vals src-pos) (define (reduce-stack stack num ret-vals src-pos)
(cond (cond
((> num 0) [(positive? num)
(let* ((top-frame (car stack)) (define top-frame (car stack))
(ret-vals (let ([ret-vals (if src-pos
(if src-pos (cons (stack-frame-value top-frame)
(cons (stack-frame-value top-frame) (cons (stack-frame-start-pos top-frame)
(cons (stack-frame-start-pos top-frame) (cons (stack-frame-end-pos top-frame)
(cons (stack-frame-end-pos top-frame) ret-vals)))
ret-vals))) (cons (stack-frame-value top-frame) ret-vals))])
(cons (stack-frame-value top-frame) ret-vals)))) (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))]
(reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))) [else (values stack ret-vals)]))
(else (values stack ret-vals))))
;; extract-helper : (symbol or make-token) any any -> symbol any any any ;; extract-helper : (symbol or make-token) any any -> symbol any any any
(define (extract-helper tok v1 v2) (define (extract-helper tok v1 v2)
(cond (cond
((symbol? tok) [(symbol? tok) (values tok #f v1 v2)]
(values tok #f v1 v2)) [(token? tok) (values (real-token-name tok) (real-token-value tok) v1 v2)]
((token? tok) [else (raise-argument-error 'parser "(or/c symbol? token?)" 0 tok)]))
(values (real-token-name tok) (real-token-value tok) v1 v2))
(else (raise-argument-error 'parser
"(or/c symbol? token?)"
0
tok))))
;; well-formed-position-token?: any -> boolean ;; well-formed-position-token?: any -> boolean
;; Returns true if pt is a position token whose position-token-token ;; Returns true if pt is a position token whose position-token-token
@ -236,8 +174,7 @@
;; a tokenizer produces an erroneous position-token wrapped twice. ;; a tokenizer produces an erroneous position-token wrapped twice.
;; (as often happens when omitting return-without-pos). ;; (as often happens when omitting return-without-pos).
(define (well-formed-token-field? t) (define (well-formed-token-field? t)
(or (symbol? t) (or (symbol? t) (token? t)))
(token? t)))
(define (well-formed-position-token? pt) (define (well-formed-position-token? pt)
(and (position-token? pt) (and (position-token? pt)
@ -250,24 +187,18 @@
;; extract-src-pos : position-token -> symbol any any any ;; extract-src-pos : position-token -> symbol any any any
(define (extract-src-pos ip) (define (extract-src-pos ip)
(unless (well-formed-position-token? ip) (unless (well-formed-position-token? ip)
(raise-argument-error 'parser (raise-argument-error 'parser "well-formed-position-token?" 0 ip))
"well-formed-position-token?"
0
ip))
(extract-helper (position-token-token ip) (extract-helper (position-token-token ip)
(position-token-start-pos ip) (position-token-start-pos ip)
(position-token-end-pos ip))) (position-token-end-pos ip)))
(define (extract-srcloc ip) (define (extract-srcloc ip)
(unless (well-formed-srcloc-token? ip) (unless (well-formed-srcloc-token? ip)
(raise-argument-error 'parser (raise-argument-error 'parser "well-formed-srcloc-token?" 0 ip))
"well-formed-srcloc-token?" (define loc (srcloc-token-srcloc ip))
0 (extract-helper (srcloc-token-token ip)
ip)) (position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc))
(let ([loc (srcloc-token-srcloc ip)]) (position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f)))
(extract-helper (srcloc-token-token ip)
(position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc))
(position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f))))
;; extract-no-src-pos : (symbol or make-token) -> symbol any any any ;; extract-no-src-pos : (symbol or make-token) -> symbol any any any
@ -295,24 +226,24 @@
(if (memq tok ends) (if (memq tok ends)
(raise-read-error "parser: Cannot continue after error" (raise-read-error "parser: Cannot continue after error"
#f #f #f #f #f) #f #f #f #f #f)
(let ((a (find-action stack tok val start-pos end-pos))) (let ([a (find-action stack tok val start-pos end-pos)])
(cond (cond
((runtime-shift? a) [(runtime-shift? a)
;; (printf "shift:~a\n" (runtime-shift-state a)) ;; (printf "shift:~a\n" (runtime-shift-state a))
(cons (make-stack-frame (runtime-shift-state a) (cons (make-stack-frame (runtime-shift-state a)
val val
start-pos start-pos
end-pos) end-pos)
stack)) stack)]
(else [else
;; (printf "discard input:~a\n" tok) ;; (printf "discard input:~a\n" tok)
(let-values (((tok val start-pos end-pos) (let-values ([(tok val start-pos end-pos)
(extract (get-token)))) (extract (get-token))])
(remove-input tok val start-pos end-pos)))))))) (remove-input tok val start-pos end-pos))])))))
(let remove-states () (let remove-states ()
(let ((a (find-action stack 'error #f start-pos end-pos))) (let ([a (find-action stack 'error #f start-pos end-pos)])
(cond (cond
((runtime-shift? a) [(runtime-shift? a)
;; (printf "shift:~a\n" (runtime-shift-state a)) ;; (printf "shift:~a\n" (runtime-shift-state a))
(set! stack (set! stack
(cons (cons
@ -321,92 +252,83 @@
start-pos start-pos
end-pos) end-pos)
stack)) stack))
(remove-input tok val start-pos end-pos)) (remove-input tok val start-pos end-pos)]
(else [else
;; (printf "discard state:~a\n" (car stack)) ;; (printf "discard state:~a\n" (car stack))
(cond (cond
((< (length stack) 2) [(< (length stack) 2)
(raise-read-error "parser: Cannot continue after error" (raise-read-error "parser: Cannot continue after error"
#f #f #f #f #f)) #f #f #f #f #f)]
(else [else
(set! stack (cdr stack)) (set! stack (cdr stack))
(remove-states))))))))) (remove-states)])])))))
(define (find-action stack tok val start-pos end-pos) (define (find-action stack tok val start-pos end-pos)
(unless (hash-ref all-term-syms (unless (hash-ref all-term-syms tok #f)
tok
#f)
(if src-pos (if src-pos
(err #f tok val start-pos end-pos) (err #f tok val start-pos end-pos)
(err #f tok val)) (err #f tok val))
(raise-read-error (format "parser: got token of unknown type ~a" tok) (raise-read-error (format "parser: got token of unknown type ~a" tok)
#f #f #f #f #f)) #f #f #f #f #f))
(hash-ref (vector-ref table (stack-frame-state (car stack))) (hash-ref (vector-ref table (stack-frame-state (car stack))) tok #f))
tok
#f))
(define (make-parser start-number) (define ((make-parser start-number) get-token)
(lambda (get-token) (unless (and (procedure? get-token)
(unless (and (procedure? get-token) (procedure-arity-includes? get-token 0))
(procedure-arity-includes? get-token 0)) (error 'get-token "expected a nullary procedure, got ~e" get-token))
(error 'get-token "expected a nullary procedure, got ~e" get-token)) (let parsing-loop ([stack (make-empty-stack start-number)]
(let parsing-loop ((stack (make-empty-stack start-number)) [ip (get-token)])
(ip (get-token))) (let-values ([(tok val start-pos end-pos) (extract ip)])
(let-values (((tok val start-pos end-pos) (let ([action (find-action stack tok val start-pos end-pos)])
(extract ip))) (cond
(let ((action (find-action stack tok val start-pos end-pos))) [(runtime-shift? action)
(cond ;; (printf "shift:~a\n" (runtime-shift-state action))
((runtime-shift? action) (parsing-loop (cons (make-stack-frame (runtime-shift-state action)
;; (printf "shift:~a\n" (runtime-shift-state action)) val
(parsing-loop (cons (make-stack-frame (runtime-shift-state action) start-pos
val end-pos)
start-pos stack)
end-pos) (get-token))]
stack) [(runtime-reduce? action)
(get-token))) ;; (printf "reduce:~a\n" (runtime-reduce-prod-num action))
((runtime-reduce? action) (let-values ([(new-stack args)
;; (printf "reduce:~a\n" (runtime-reduce-prod-num action)) (reduce-stack stack
(let-values (((new-stack args) (runtime-reduce-rhs-length action)
(reduce-stack stack null
(runtime-reduce-rhs-length action) src-pos)])
null (let ([goto
src-pos))) (runtime-goto-state
(let ((goto (hash-ref
(runtime-goto-state (vector-ref table (stack-frame-state (car new-stack)))
(hash-ref (runtime-reduce-lhs action)))])
(vector-ref table (stack-frame-state (car new-stack))) (parsing-loop
(runtime-reduce-lhs action))))) (cons
(parsing-loop (if src-pos
(cons (make-stack-frame
(if src-pos goto
(make-stack-frame (apply (vector-ref actions (runtime-reduce-prod-num action)) args)
goto (if (null? args) start-pos (cadr args))
(apply (vector-ref actions (runtime-reduce-prod-num action)) args) (if (null? args)
(if (null? args) start-pos (cadr args)) end-pos
(if (null? args) (list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1))))
end-pos (make-stack-frame
(list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1)))) goto
(make-stack-frame (apply (vector-ref actions (runtime-reduce-prod-num action)) args)
goto #f
(apply (vector-ref actions (runtime-reduce-prod-num action)) args) #f))
#f new-stack)
#f)) ip)))]
new-stack) [(runtime-accept? action)
ip)))) ;; (printf "accept\n")
((runtime-accept? action) (stack-frame-value (car stack))]
;; (printf "accept\n") [else
(stack-frame-value (car stack))) (if src-pos
(else (err #t tok val start-pos end-pos)
(if src-pos (err #t tok val))
(err #t tok val start-pos end-pos) (parsing-loop (fix-error stack tok val start-pos end-pos get-token)
(err #t tok val)) (get-token))]))))))
(parsing-loop (fix-error stack tok val start-pos end-pos get-token)
(get-token))))))))))
(cond (cond
((null? (cdr starts)) (make-parser 0)) [(null? (cdr starts)) (make-parser 0)]
(else [else
(let loop ((l starts) (for/list ([(l i) (in-indexed starts)])
(i 0)) (make-parser i))])))
(cond
((null? l) null)
(else (cons (make-parser i) (loop (cdr l) (add1 i))))))))))

@ -7,5 +7,3 @@
(define build-deps '("rackunit-lib")) (define build-deps '("rackunit-lib"))
(define pkg-desc "implementation (no documentation) part of \"br-parser-tools\"") (define pkg-desc "implementation (no documentation) part of \"br-parser-tools\"")
(define pkg-authors '(mflatt))

Loading…
Cancel
Save