diff --git a/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt b/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt index 26692a7..250b019 100755 --- a/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt +++ b/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt @@ -47,18 +47,16 @@ (define-struct tasks (active active-back waits multi-waits cache progress?)) (define-for-syntax make-token-identifier-mapping make-hasheq) -(define-for-syntax token-identifier-mapping-get - (case-lambda - [(t tok) - (hash-ref t (syntax-e tok))] - [(t tok fail) - (hash-ref t (syntax-e tok) fail)])) -(define-for-syntax token-identifier-mapping-put! - (lambda (t tok v) - (hash-set! t (syntax-e tok) v))) -(define-for-syntax token-identifier-mapping-map - (lambda (t f) - (hash-map t f))) +(define-for-syntax (token-identifier-mapping-get t tok [fail #f]) + (if fail + (hash-ref t (syntax-e tok) fail) + (hash-ref t (syntax-e tok)))) + +(define-for-syntax (token-identifier-mapping-put! t tok v) + (hash-set! t (syntax-e tok) v)) + +(define-for-syntax (token-identifier-mapping-map t f) + (hash-map t f)) ;; Used to calculate information on the grammar, such as whether ;; a particular non-terminal is "simple" instead of recursively defined. @@ -71,7 +69,7 @@ (cdr as) (cdr bs))])) (let loop () (when (ormap-all #f - (lambda (nt pats) + (λ (nt pats) (let ([old (bound-identifier-mapping-get nts nt)]) (let ([new (proc nt pats old)]) (if (equal? old new) @@ -88,182 +86,153 @@ (define (parse-and simple-a? parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream last-consumed-token depth max-depth tasks next1-k) - (if simple-a? - (parse-b val stream last-consumed-token depth end - (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks) - (parallel-or - (lambda (success-k fail-k max-depth tasks) - (parse-b val stream last-consumed-token depth end - success-k fail-k - max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k max-depth tasks)) - success-k fail-k max-depth tasks))))] - [mk-got2-k - (lambda (success-k fail-k next1-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (success-k val stream last-consumed-token depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks)))))] - [mk-fail2-k - (lambda (success-k fail-k next1-k) - (lambda (max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k - max-depth - tasks)))]) - (parse-a stream last-consumed-token depth end - (mk-got-k success-k fail-k) - fail-k - max-depth tasks))) + (define ((mk-got-k success-k fail-k) val stream last-consumed-token depth max-depth tasks next1-k) + (if simple-a? + (parse-b val stream last-consumed-token depth end + (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks) + (parallel-or + (λ (success-k fail-k max-depth tasks) + (parse-b val stream last-consumed-token depth end + success-k fail-k + max-depth tasks)) + (λ (success-k fail-k max-depth tasks) + (next1-k (mk-got-k success-k fail-k) + fail-k max-depth tasks)) + success-k fail-k max-depth tasks))) + + (define ((mk-got2-k success-k fail-k next1-k) val stream last-consumed-token depth max-depth tasks next-k) + (success-k val stream last-consumed-token depth max-depth tasks + (λ (success-k fail-k max-depth tasks) + (next-k (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks)))) + + (define ((mk-fail2-k success-k fail-k next1-k) max-depth tasks) + (next1-k (mk-got-k success-k fail-k) fail-k max-depth tasks)) + + (parse-a stream last-consumed-token depth end + (mk-got-k success-k fail-k) + fail-k + max-depth tasks)) ;; Parallel or for non-terminal alternatives (define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) - (parallel-or (lambda (success-k fail-k max-depth tasks) + (parallel-or (λ (success-k fail-k max-depth tasks) (parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) + (λ (success-k fail-k max-depth tasks) (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)) success-k fail-k max-depth tasks)) ;; Generic parallel-or (define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) (define answer-key (gensym)) - (letrec ([gota-k - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (report-answer answer-key - max-depth - tasks - (list val stream last-consumed-token depth next-k)))] - [faila-k - (lambda (max-depth tasks) - (report-answer answer-key - max-depth - tasks - null))]) - (let* ([tasks (queue-task + (define (gota-k val stream last-consumed-token depth max-depth tasks next-k) + (report-answer answer-key + max-depth + tasks + (list val stream last-consumed-token depth next-k))) + (define (faila-k max-depth tasks) + (report-answer answer-key + max-depth tasks - (lambda (max-depth tasks) - (parse-a gota-k - faila-k - max-depth tasks)))] - [tasks (queue-task + null)) + (let* ([tasks (queue-task tasks (λ (max-depth tasks) + (parse-a gota-k faila-k max-depth tasks)))] + [tasks (queue-task tasks (λ (max-depth tasks) + (parse-b gota-k faila-k max-depth tasks)))] + [queue-next (λ (next-k tasks) + (queue-task tasks (λ (max-depth tasks) + (next-k gota-k faila-k max-depth tasks))))]) + (define ((mk-got-one immediate-next? get-nth success-k) val stream last-consumed-token depth max-depth tasks next-k) + (let ([tasks (if immediate-next? + (queue-next next-k tasks) + tasks)]) + (success-k val stream last-consumed-token depth max-depth tasks - (lambda (max-depth tasks) - (parse-b gota-k - faila-k - max-depth tasks)))] - [queue-next (lambda (next-k tasks) - (queue-task tasks - (lambda (max-depth tasks) - (next-k gota-k - faila-k - max-depth tasks))))]) - (letrec ([mk-got-one - (lambda (immediate-next? get-nth success-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (let ([tasks (if immediate-next? - (queue-next next-k tasks) - tasks)]) - (success-k val stream last-consumed-token depth max-depth - tasks - (lambda (success-k fail-k max-depth tasks) - (let ([tasks (if immediate-next? - tasks - (queue-next next-k tasks))]) - (get-nth max-depth tasks success-k fail-k)))))))] - [get-first - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #t get-first success-k) - (lambda (max-depth tasks) - (get-second max-depth tasks success-k fail-k)) - #f))] - [get-second - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #f get-second success-k) - fail-k #f))]) - (get-first max-depth tasks success-k fail-k))))) + (λ (success-k fail-k max-depth tasks) + (let ([tasks (if immediate-next? + tasks + (queue-next next-k tasks))]) + (get-nth max-depth tasks success-k fail-k)))))) + (define (get-first max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #t get-first success-k) + (λ (max-depth tasks) + (get-second max-depth tasks success-k fail-k)) + #f)) + (define (get-second max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #f get-second success-k) + fail-k #f)) + (get-first max-depth tasks success-k fail-k))) ;; Non-terminal alternatives where the first is "simple" can be done ;; sequentially, which is simpler (define (parse-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (success-k val stream last-consumed-token depth - max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks)))))] - [mk-fail-k - (lambda (success-k fail-k) - (lambda (max-depth tasks) - (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))]) - (parse-a stream last-consumed-token depth end - (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks))) + (define ((mk-got-k success-k fail-k) val stream last-consumed-token depth max-depth tasks next-k) + (success-k val stream last-consumed-token depth + max-depth tasks + (λ (success-k fail-k max-depth tasks) + (next-k (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks)))) + (define ((mk-fail-k success-k fail-k) max-depth tasks) + (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)) + (parse-a stream last-consumed-token depth end + (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks)) ;; Starts a thread -(define queue-task - (lambda (tasks t [progress? #t]) - (make-tasks (tasks-active tasks) - (cons t (tasks-active-back tasks)) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - (or progress? (tasks-progress? tasks))))) +(define (queue-task tasks t [progress? #t]) + (make-tasks (tasks-active tasks) + (cons t (tasks-active-back tasks)) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + (or progress? (tasks-progress? tasks)))) ;; Reports an answer to a waiting thread: (define (report-answer answer-key max-depth tasks val) - (let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))]) - (if v - (let ([tasks (make-tasks (cons (v val) - (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (hash-remove! (tasks-waits tasks) answer-key) - (swap-task max-depth tasks)) - ;; We have an answer ready too fast; wait - (swap-task max-depth - (queue-task tasks - (lambda (max-depth tasks) - (report-answer answer-key max-depth tasks val)) - #f))))) + (define v (hash-ref (tasks-waits tasks) answer-key (λ () #f))) + (if v + (let ([tasks (make-tasks (cons (v val) (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (hash-remove! (tasks-waits tasks) answer-key) + (swap-task max-depth tasks)) + ;; We have an answer ready too fast; wait + (swap-task max-depth + (queue-task tasks + (λ (max-depth tasks) + (report-answer answer-key max-depth tasks val)) + #f)))) ;; Reports an answer to multiple waiting threads: (define (report-answer-all answer-key max-depth tasks val k) - (let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))]) - (hash-remove! (tasks-multi-waits tasks) answer-key) - (let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) - (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (k max-depth tasks)))) + (define v (hash-ref (tasks-multi-waits tasks) answer-key (λ () null))) + (hash-remove! (tasks-multi-waits tasks) answer-key) + (let ([tasks (make-tasks (append (map (λ (a) (a val)) v) + (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (k max-depth tasks))) ;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise ;; there might be many. Use wither #t or #f (and `report-answer' or ;; `report-answer-all', resptively) consistently for a particular answer key. (define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) - (let ([wait (lambda (val) - (lambda (max-depth tasks) + (let ([wait (λ (val) + (λ (max-depth tasks) (if val (if (null? val) (fail-k max-depth tasks) @@ -273,7 +242,7 @@ (if multi? (hash-set! (tasks-multi-waits tasks) answer-key (cons wait (hash-ref (tasks-multi-waits tasks) answer-key - (lambda () null)))) + (λ () null)))) (hash-set! (tasks-waits tasks) answer-key wait)) (let ([tasks (make-tasks (tasks-active tasks) (tasks-active-back tasks) @@ -302,8 +271,8 @@ (make-tasks (apply append (hash-map (tasks-multi-waits tasks) - (lambda (k l) - (map (lambda (v) (v #f)) l)))) + (λ (k l) + (map (λ (v) (v #f)) l)))) (tasks-active-back tasks) (tasks-waits tasks) (make-hasheq) @@ -325,11 +294,9 @@ (define no-pos-val (make-position #f #f #f)) (define-for-syntax no-pos (let ([npv ((syntax-local-certifier) #'no-pos-val)]) - (lambda (stx) npv))) -(define-for-syntax at-tok-pos - (lambda (sel expr) - (lambda (stx) - #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))))) + (λ (stx) npv))) +(define-for-syntax ((at-tok-pos sel expr) stx) + #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))) ;; Builds a matcher for a particular alternative (define-for-syntax (build-match nts toks pat handle $ctx) @@ -337,27 +304,23 @@ [pos 1]) (if (null? pat) #`(success-k #,handle stream last-consumed-token depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) + (λ (success-k fail-k max-depth tasks) (fail-k max-depth tasks))) - (let ([id (datum->syntax (car pat) - (string->symbol (format "$~a" pos)))] - [id-start-pos (datum->syntax (car pat) - (string->symbol (format "$~a-start-pos" pos)))] - [id-end-pos (datum->syntax (car pat) - (string->symbol (format "$~a-end-pos" pos)))] - [n-end-pos (and (null? (cdr pat)) - (datum->syntax (car pat) '$n-end-pos))]) + (let ([id (datum->syntax (car pat) (string->symbol (format "$~a" pos)))] + [id-start-pos (datum->syntax (car pat) (string->symbol (format "$~a-start-pos" pos)))] + [id-end-pos (datum->syntax (car pat) (string->symbol (format "$~a-end-pos" pos)))] + [n-end-pos (and (null? (cdr pat)) (datum->syntax (car pat) '$n-end-pos))]) (cond - [(bound-identifier-mapping-get nts (car pat) (lambda () #f)) + [(bound-identifier-mapping-get nts (car pat) (λ () #f)) ;; Match non-termimal #`(parse-and ;; First part is simple? (If so, we don't have to parallelize the `and'.) - #,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) + #,(let ([l (bound-identifier-mapping-get nts (car pat) (λ () #f))]) (or (not l) (andmap values (caddr l)))) #,(car pat) (let ([original-stream stream]) - (lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks) + (λ (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks) (let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream) tok-end tok-start) @@ -372,10 +335,10 @@ #,(loop (cdr pat) (add1 pos))))) stream last-consumed-token depth #,(let ([cnt (apply + - (map (lambda (item) + (map (λ (item) (cond - [(bound-identifier-mapping-get nts item (lambda () #f)) - => (lambda (l) (car l))] + [(bound-identifier-mapping-get nts item (λ () #f)) + => (λ (l) (car l))] [else 1])) (cdr pat)))]) #`(- end #,cnt)) @@ -419,75 +382,73 @@ [max-depth max-depth] [tasks tasks] [k k]) - (let ([answer-key (gensym)] - [table-key (vector key depth n)] - [old-depth depth] - [old-stream stream]) - #;(printf "Loop ~a\n" table-key) - (cond - [(hash-ref (tasks-cache tasks) table-key (lambda () #f)) - => (lambda (result) - #;(printf "Reuse ~a\n" table-key) - (result success-k fail-k max-depth tasks))] - [else - #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - #;(printf "Wait ~a ~a\n" table-key answer-key) - (wait-for-answer #t max-depth tasks answer-key success-k fail-k - (lambda (max-depth tasks) - #;(printf "Deadlock ~a ~a\n" table-key answer-key) - (fail-k max-depth tasks))))) - (let result-loop ([max-depth max-depth][tasks tasks][k k]) - (letrec ([orig-stream stream] - [new-got-k - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - ;; Check whether we already have a result that consumed the same amount: - (let ([result-key (vector #f key old-depth depth)]) - (cond - [(hash-ref (tasks-cache tasks) result-key (lambda () #f)) - ;; Go for the next-result - (result-loop max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks)))] - [else - #;(printf "Success ~a ~a\n" table-key - (map tok-name (let loop ([d old-depth][s old-stream]) - (if (= d depth) - null - (cons (car s) (loop (add1 d) (cdr s))))))) - (let ([next-k (lambda (success-k fail-k max-depth tasks) - (loop (add1 n) - success-k - fail-k - max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks))))]) - (hash-set! (tasks-cache tasks) result-key #t) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (success-k val stream last-consumed-token depth max-depth tasks next-k))) - (report-answer-all answer-key - max-depth - tasks - (list val stream last-consumed-token depth next-k) - (lambda (max-depth tasks) - (success-k val stream last-consumed-token depth max-depth tasks next-k))))])))] - [new-fail-k - (lambda (max-depth tasks) - #;(printf "Failure ~a\n" table-key) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (fail-k max-depth tasks))) - (report-answer-all answer-key - max-depth - tasks - null - (lambda (max-depth tasks) - (fail-k max-depth tasks))))]) - (k end max-depth tasks new-got-k new-fail-k)))]))))) + (define answer-key (gensym)) + (define table-key (vector key depth n)) + (define old-depth depth) + (define old-stream stream) + #;(printf "Loop ~a\n" table-key) + (cond + [(hash-ref (tasks-cache tasks) table-key (λ () #f)) + => (λ (result) + #;(printf "Reuse ~a\n" table-key) + (result success-k fail-k max-depth tasks))] + [else + #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) + (hash-set! (tasks-cache tasks) table-key + (λ (success-k fail-k max-depth tasks) + #;(printf "Wait ~a ~a\n" table-key answer-key) + (wait-for-answer #t max-depth tasks answer-key success-k fail-k + (λ (max-depth tasks) + #;(printf "Deadlock ~a ~a\n" table-key answer-key) + (fail-k max-depth tasks))))) + (let result-loop ([max-depth max-depth][tasks tasks][k k]) + (define orig-stream stream) + (define (new-got-k val stream last-consumed-token depth max-depth tasks next-k) + ;; Check whether we already have a result that consumed the same amount: + (define result-key (vector #f key old-depth depth)) + (cond + [(hash-ref (tasks-cache tasks) result-key (λ () #f)) + ;; Go for the next-result + (result-loop max-depth + tasks + (λ (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks)))] + [else + #;(printf "Success ~a ~a\n" table-key + (map tok-name (let loop ([d old-depth][s old-stream]) + (if (= d depth) + null + (cons (car s) (loop (add1 d) (cdr s))))))) + (let ([next-k (λ (success-k fail-k max-depth tasks) + (loop (add1 n) + success-k + fail-k + max-depth + tasks + (λ (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks))))]) + (hash-set! (tasks-cache tasks) result-key #t) + (hash-set! (tasks-cache tasks) table-key + (λ (success-k fail-k max-depth tasks) + (success-k val stream last-consumed-token depth max-depth tasks next-k))) + (report-answer-all answer-key + max-depth + tasks + (list val stream last-consumed-token depth next-k) + (λ (max-depth tasks) + (success-k val stream last-consumed-token depth max-depth tasks next-k))))])) + (define (new-fail-k max-depth tasks) + #;(printf "Failure ~a\n" table-key) + (hash-set! (tasks-cache tasks) table-key + (λ (success-k fail-k max-depth tasks) + (fail-k max-depth tasks))) + (report-answer-all answer-key + max-depth + tasks + null + (λ (max-depth tasks) + (fail-k max-depth tasks)))) + (k end max-depth tasks new-got-k new-fail-k))])))) ;; These temp identifiers can't be `gensym` or `generate-temporary` ;; because they have to be consistent between module loads @@ -497,39 +458,34 @@ (define-for-syntax atok-id-temp 'atok_wrutdjgecmybyfipiwsgjlvsveryodlgassuzcargiuznzgdghrykfqfbwcjgzdhdoeqxcucmtjkuyucskzethozhqkasphdwbht) (define-syntax (cfg-parser stx) (syntax-case stx () - [(_ clause ...) - (let ([clauses (syntax->list #'(clause ...))]) + [(_ CLAUSE ...) + (let ([clauses (syntax->list #'(CLAUSE ...))]) (let-values ([(start grammar cfg-error parser-clauses src-pos?) (let ([all-toks (apply append - (map (lambda (clause) - (syntax-case clause (tokens) - [(tokens t ...) - (apply - append - (map (lambda (t) - (let ([v (syntax-local-value t (lambda () #f))]) - (cond - [(terminals-def? v) - (map (lambda (v) - (cons v #f)) - (syntax->list (terminals-def-t v)))] - [(e-terminals-def? v) - (map (lambda (v) - (cons v #t)) - (syntax->list (e-terminals-def-t v)))] - [else null]))) - (syntax->list #'(t ...))))] - [_else null])) - clauses))] + (for/list ([clause (in-list clauses)]) + (syntax-case clause (tokens) + [(tokens T ...) + (apply + append + (for/list ([t (in-list (syntax->list #'(T ...)))]) + (define v (syntax-local-value t (λ () #f))) + (cond + [(terminals-def? v) + (for/list ([v (in-list (syntax->list (terminals-def-t v)))]) + (cons v #f))] + [(e-terminals-def? v) + (for/list ([v (in-list (syntax->list (e-terminals-def-t v)))]) + (cons v #t))] + [else null])))] + [_else null])))] [all-end-toks (apply append - (map (lambda (clause) - (syntax-case clause (end) - [(end t ...) - (syntax->list #'(t ...))] - [_else null])) - clauses))]) + (for/list ([clause (in-list clauses)]) + (syntax-case clause (end) + [(end T ...) + (syntax->list #'(T ...))] + [_else null])))]) (let loop ([clauses clauses] [cfg-start #f] [cfg-grammar #f] @@ -543,47 +499,35 @@ (reverse parser-clauses) src-pos?) (syntax-case (car clauses) (start error grammar src-pos) - [(start tok) - (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] - [(error expr) - (loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] - [(grammar [nt [pat handle0 handle ...] ...] ...) + [(start TOK) + (loop (cdr clauses) #'TOK cfg-grammar cfg-error src-pos? parser-clauses)] + [(error EXPR) + (loop (cdr clauses) cfg-start cfg-grammar #'EXPR src-pos? parser-clauses)] + [(grammar [NT [PAT HANDLE0 HANDLE ...] ...] ...) (let ([nts (make-bound-identifier-mapping)] [toks (make-token-identifier-mapping)] [end-toks (make-token-identifier-mapping)] - [nt-ids (syntax->list #'(nt ...))] - [patss (map (lambda (stx) + [nt-ids (syntax->list #'(NT ...))] + [patss (map (λ (stx) (map syntax->list (syntax->list stx))) - (syntax->list #'((pat ...) ...)))]) - (for-each (lambda (nt) - (bound-identifier-mapping-put! nts nt (list 0))) - nt-ids) - (for-each (lambda (t) - (token-identifier-mapping-put! end-toks t #t)) - all-end-toks) - (for-each (lambda (t) - (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) - (let ([id (gensym (syntax-e (car t)))]) - (token-identifier-mapping-put! toks (car t) - (cons id (cdr t)))))) - all-toks) + (syntax->list #'((PAT ...) ...)))]) + (for ([nt (in-list nt-ids)]) + (bound-identifier-mapping-put! nts nt (list 0))) + (for ([t (in-list all-end-toks)]) + (token-identifier-mapping-put! end-toks t #t)) + (for ([t (in-list all-toks)] + #:unless (token-identifier-mapping-get end-toks (car t) (λ () #f))) + (define id (gensym (syntax-e (car t)))) + (token-identifier-mapping-put! toks (car t) (cons id (cdr t)))) ;; Compute min max size for each non-term: (nt-fixpoint nts - (lambda (nt pats old-list) + (λ (nt pats old-list) (let ([new-cnt - (apply - min - (map (lambda (pat) - (apply - + - (map (lambda (elem) - (car - (bound-identifier-mapping-get nts - elem - (lambda () (list 1))))) - pat))) - pats))]) + (apply min (for/list ([pat (in-list pats)]) + (for/sum ([elem (in-list pat)]) + (car (bound-identifier-mapping-get + nts elem (λ () (list 1)))))))]) (if (new-cnt . > . (car old-list)) (cons new-cnt (cdr old-list)) old-list))) @@ -592,29 +536,28 @@ ;; for a non-terminal (nt-fixpoint nts - (lambda (nt pats old-list) + (λ (nt pats old-list) (let ([new-list (apply append - (map (lambda (pat) - (let loop ([pat pat]) - (if (pair? pat) - (let ([l (bound-identifier-mapping-get - nts - (car pat) - (lambda () - (list 1 (map-token toks (car pat)))))]) - ;; If the non-terminal can match 0 things, - ;; then it might match something from the - ;; next pattern element. Otherwise, it must - ;; match the first element: - (if (zero? (car l)) - (append (cdr l) (loop (cdr pat))) - (cdr l))) - null))) - pats))]) - (let ([new (filter (lambda (id) - (andmap (lambda (id2) + (for/list ([pat (in-list pats)]) + (let loop ([pat pat]) + (if (pair? pat) + (let ([l (bound-identifier-mapping-get + nts + (car pat) + (λ () + (list 1 (map-token toks (car pat)))))]) + ;; If the non-terminal can match 0 things, + ;; then it might match something from the + ;; next pattern element. Otherwise, it must + ;; match the first element: + (if (zero? (car l)) + (append (cdr l) (loop (cdr pat))) + (cdr l))) + null))))]) + (let ([new (filter (λ (id) + (andmap (λ (id2) (not (eq? id id2))) (cdr old-list))) new-list)]) @@ -623,7 +566,7 @@ (let ([new (let loop ([new new]) (if (null? (cdr new)) new - (if (ormap (lambda (id) + (if (ormap (λ (id) (eq? (car new) id)) (cdr new)) (loop (cdr new)) @@ -632,26 +575,26 @@ old-list)))) nt-ids patss) ;; Determine left-recursive clauses: - (for-each (lambda (nt pats) + (for-each (λ (nt pats) (let ([l (bound-identifier-mapping-get nts nt)]) (bound-identifier-mapping-put! nts nt (list (car l) (cdr l) - (map (lambda (x) #f) pats))))) + (map (λ (x) #f) pats))))) nt-ids patss) (nt-fixpoint nts - (lambda (nt pats old-list) + (λ (nt pats old-list) (list (car old-list) (cadr old-list) - (map (lambda (pat simple?) + (map (λ (pat simple?) (or simple? - (let ([l (map (lambda (elem) + (let ([l (map (λ (elem) (bound-identifier-mapping-get nts elem - (lambda () #f))) + (λ () #f))) pat)]) - (andmap (lambda (i) + (andmap (λ (i) (or (not i) (andmap values (caddr i)))) l)))) @@ -660,16 +603,16 @@ ;; Build a definition for each non-term: (loop (cdr clauses) cfg-start - (map (lambda (nt pats handles $ctxs) + (map (λ (nt pats handles $ctxs) (define info (bound-identifier-mapping-get nts nt)) (list nt #`(let ([key (gensym '#,nt)]) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + (λ (stream last-consumed-token depth end success-k fail-k max-depth tasks) (parse-nt/share key #,(car info) '#,(cadr info) stream last-consumed-token depth end max-depth tasks success-k fail-k - (lambda (end max-depth tasks success-k fail-k) + (λ (end max-depth tasks success-k fail-k) #,(let loop ([pats pats] [handles (syntax->list handles)] [$ctxs (syntax->list $ctxs)] @@ -680,13 +623,13 @@ (car simple?s)) #'parse-or #'parse-parallel-or) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + (λ (stream last-consumed-token depth end success-k fail-k max-depth tasks) #,(build-match nts toks (car pats) (car handles) (car $ctxs))) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + (λ (stream last-consumed-token depth end success-k fail-k max-depth tasks) #,(loop (cdr pats) (cdr handles) (cdr $ctxs) @@ -694,14 +637,14 @@ stream last-consumed-token depth end success-k fail-k max-depth tasks))))))))) nt-ids patss - (syntax->list #'(((begin handle0 handle ...) ...) ...)) - (syntax->list #'((handle0 ...) ...))) + (syntax->list #'(((begin HANDLE0 HANDLE ...) ...) ...)) + (syntax->list #'((HANDLE0 ...) ...))) cfg-error src-pos? (list* (with-syntax ([((tok tok-id . $e) ...) (token-identifier-mapping-map toks - (lambda (k v) + (λ (k v) (list* k (car v) (if (cdr v) @@ -743,19 +686,19 @@ src-pos? (cons (car clauses) parser-clauses))]))))]) #`(let ([orig-parse (parser - [error (lambda (a b c) + [error (λ (a b c) (error 'cfg-parser "unexpected ~a token: ~a" b c))] . #,parser-clauses)] [error-proc #,cfg-error]) (letrec #,grammar - (lambda (get-tok) + (λ (get-tok) (let ([tok-list (orig-parse get-tok)]) (letrec ([success-k - (lambda (val stream last-consumed-token depth max-depth tasks next) + (λ (val stream last-consumed-token depth max-depth tasks next) (if (null? stream) val (next success-k fail-k max-depth tasks)))] - [fail-k (lambda (max-depth tasks) + [fail-k (λ (max-depth tasks) (cond [(null? tok-list) (if error-proc @@ -847,7 +790,7 @@ (define (parse s) (define ip (open-input-string s)) (port-count-lines! ip) - (-parse (lambda () (lex ip)))) + (-parse (λ () (lex ip)))) (check-equal? (parse "abc") '(unanchored (lit "abc" 1 4) 1 4)) @@ -881,7 +824,7 @@ (tokens non-terminals) (start ) (end EOF) - (error (lambda (a b stx) + (error (λ (a b stx) (error 'parse "failed at ~s" stx))) (grammar [ [(PLUS) "plus"] [( BAR ) (list $1 $2 $3)] @@ -903,7 +846,7 @@ -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****" ;; This one fails: #;"+*")]) - (check-equal? (parse (lambda () (lex p))) + (check-equal? (parse (λ () (lex p))) '((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) || (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) diff --git a/br-parser-tools-lib/br-parser-tools/examples/calc.rkt b/br-parser-tools-lib/br-parser-tools/examples/calc.rkt index 9ad1218..0d351bd 100644 --- a/br-parser-tools-lib/br-parser-tools/examples/calc.rkt +++ b/br-parser-tools-lib/br-parser-tools/examples/calc.rkt @@ -1,4 +1,4 @@ -#lang scheme +#lang racket/base ;; An interactive calculator inspired by the calculator example in the bison manual. @@ -15,19 +15,19 @@ (define vars (make-hash)) (define-lex-abbrevs - (lower-letter (:/ "a" "z")) + (lower-letter (:/ "a" "z")) - (upper-letter (:/ #\A #\Z)) + (upper-letter (:/ #\A #\Z)) - ;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too. - (digit (:/ "0" "9"))) + ;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too. + (digit (:/ "0" "9"))) -(define calcl +(define calc-lex (lexer [(eof) 'EOF] ;; recursively call the lexer on the remaining input after a tab or space. Returning the ;; result of that operation. This effectively skips all whitespace. - [(:or #\tab #\space) (calcl input-port)] + [(:or #\tab #\space) (calc-lex input-port)] ;; (token-newline) returns 'newline [#\newline (token-newline)] ;; Since (token-=) returns '=, just return the symbol directly @@ -40,7 +40,7 @@ [(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))])) -(define calcp +(define calc-parse (parser (start start) @@ -78,12 +78,15 @@ ;; run the calculator on the given input-port (define (calc ip) (port-count-lines! ip) - (letrec ((one-line - (lambda () - (let ((result (calcp (lambda () (calcl ip))))) - (when result - (printf "~a\n" result) - (one-line)))))) - (one-line))) + (let loop () + (define result (calc-parse (λ () (calc-lex ip)))) + (when result + (printf "~a\n" result) + (loop)))) -(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3")) +(module+ test + (require rackunit) + (check-equal? (let ([o (open-output-string)]) + (parameterize ([current-output-port o]) + (calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3"))) + (get-output-string o)) "1\n-2\n")) diff --git a/br-parser-tools-lib/br-parser-tools/examples/read.rkt b/br-parser-tools-lib/br-parser-tools/examples/read.rkt index a10b2c1..b01f77a 100644 --- a/br-parser-tools-lib/br-parser-tools/examples/read.rkt +++ b/br-parser-tools-lib/br-parser-tools/examples/read.rkt @@ -1,242 +1,240 @@ +#lang racket/base ;; This implements the equivalent of racket's read-syntax for R5RS scheme. ;; It has not been thoroughly tested. Also it will read an entire file into a ;; list of syntax objects, instead of returning one syntax object at a time - -(module read mzscheme - (require br-parser-tools/lex - (prefix : br-parser-tools/lex-sre) - br-parser-tools/yacc - syntax/readerr) +(require (for-syntax racket/base) + br-parser-tools/lex + (prefix-in : br-parser-tools/lex-sre) + br-parser-tools/yacc + syntax/readerr) - (define-tokens data (DATUM)) - (define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF)) +(define-tokens data (DATUM)) +(define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF)) - (define scheme-lexer - (lexer-src-pos +(define scheme-lexer + (lexer-src-pos - ;; Skip comments, without accumulating extra position information - [(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))] + ;; Skip comments, without accumulating extra position information + [(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))] - ["#t" (token-DATUM #t)] - ["#f" (token-DATUM #f)] - [(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))] - ["#\\space" (token-DATUM #\space)] - ["#\\newline" (token-DATUM #\newline)] - [(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))] - [#\" (token-DATUM (list->string (get-string-token input-port)))] - [#\( 'OP] - [#\) 'CP] - [#\[ 'OP] - [#\] 'CP] - ["#(" 'HASHOP] - [num2 (token-DATUM (string->number lexeme 2))] - [num8 (token-DATUM (string->number lexeme 8))] - [num10 (token-DATUM (string->number lexeme 10))] - [num16 (token-DATUM (string->number lexeme 16))] - ["'" 'QUOTE] - ["`" 'QUASIQUOTE] - ["," 'UNQUOTE] - [",@" 'UNQUOTE-SPLICING] - ["." 'DOT] - [(eof) 'EOF])) + ["#t" (token-DATUM #t)] + ["#f" (token-DATUM #f)] + [(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))] + ["#\\space" (token-DATUM #\space)] + ["#\\newline" (token-DATUM #\newline)] + [(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))] + [#\" (token-DATUM (list->string (get-string-token input-port)))] + [#\( 'OP] + [#\) 'CP] + [#\[ 'OP] + [#\] 'CP] + ["#(" 'HASHOP] + [num2 (token-DATUM (string->number lexeme 2))] + [num8 (token-DATUM (string->number lexeme 8))] + [num10 (token-DATUM (string->number lexeme 10))] + [num16 (token-DATUM (string->number lexeme 16))] + ["'" 'QUOTE] + ["`" 'QUASIQUOTE] + ["," 'UNQUOTE] + [",@" 'UNQUOTE-SPLICING] + ["." 'DOT] + [(eof) 'EOF])) - (define get-string-token - (lexer - [(:~ #\" #\\) (cons (car (string->list lexeme)) - (get-string-token input-port))] - [(:: #\\ #\\) (cons #\\ (get-string-token input-port))] - [(:: #\\ #\") (cons #\" (get-string-token input-port))] - [#\" null])) +(define get-string-token + (lexer + [(:~ #\" #\\) (cons (car (string->list lexeme)) + (get-string-token input-port))] + [(:: #\\ #\\) (cons #\\ (get-string-token input-port))] + [(:: #\\ #\") (cons #\" (get-string-token input-port))] + [#\" null])) - (define-lex-abbrevs - [letter (:or (:/ "a" "z") (:/ #\A #\Z))] - [digit (:/ #\0 #\9)] - [scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)] - [initial (:or letter (char-set "!$%&*/:<=>?^_~@"))] - [subsequent (:or initial digit (char-set "+-.@"))] - [comment (:: #\; (:* (:~ #\newline)) #\newline)] +(define-lex-abbrevs + [letter (:or (:/ "a" "z") (:/ #\A #\Z))] + [digit (:/ #\0 #\9)] + [scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)] + [initial (:or letter (char-set "!$%&*/:<=>?^_~@"))] + [subsequent (:or initial digit (char-set "+-.@"))] + [comment (:: #\; (:* (:~ #\newline)) #\newline)] - ;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of - ;; using regexp macros to avoid the cut and paste. - ; [numR (:: prefixR complexR)] - ; [complexR (:or realR - ; (:: realR "@" realR) - ; (:: realR "+" urealR "i") - ; (:: realR "-" urealR "i") - ; (:: realR "+i") - ; (:: realR "-i") - ; (:: "+" urealR "i") - ; (:: "-" urealR "i") - ; (:: "+i") - ; (:: "-i"))] - ; [realR (:: sign urealR)] - ; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)] - ; [uintegerR (:: (:+ digitR) (:* #\#))] - ; [prefixR (:or (:: radixR exactness) - ; (:: exactness radixR))] + ;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of + ;; using regexp macros to avoid the cut and paste. + ; [numR (:: prefixR complexR)] + ; [complexR (:or realR + ; (:: realR "@" realR) + ; (:: realR "+" urealR "i") + ; (:: realR "-" urealR "i") + ; (:: realR "+i") + ; (:: realR "-i") + ; (:: "+" urealR "i") + ; (:: "-" urealR "i") + ; (:: "+i") + ; (:: "-i"))] + ; [realR (:: sign urealR)] + ; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)] + ; [uintegerR (:: (:+ digitR) (:* #\#))] + ; [prefixR (:or (:: radixR exactness) + ; (:: exactness radixR))] - [num2 (:: prefix2 complex2)] - [complex2 (:or real2 - (:: real2 "@" real2) - (:: real2 "+" ureal2 "i") - (:: real2 "-" ureal2 "i") - (:: real2 "+i") - (:: real2 "-i") - (:: "+" ureal2 "i") - (:: "-" ureal2 "i") - (:: "+i") - (:: "-i"))] - [real2 (:: sign ureal2)] - [ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))] - [uinteger2 (:: (:+ digit2) (:* #\#))] - [prefix2 (:or (:: radix2 exactness) - (:: exactness radix2))] - [radix2 "#b"] - [digit2 (:or "0" "1")] - [num8 (:: prefix8 complex8)] - [complex8 (:or real8 - (:: real8 "@" real8) - (:: real8 "+" ureal8 "i") - (:: real8 "-" ureal8 "i") - (:: real8 "+i") - (:: real8 "-i") - (:: "+" ureal8 "i") - (:: "-" ureal8 "i") + [num2 (:: prefix2 complex2)] + [complex2 (:or real2 + (:: real2 "@" real2) + (:: real2 "+" ureal2 "i") + (:: real2 "-" ureal2 "i") + (:: real2 "+i") + (:: real2 "-i") + (:: "+" ureal2 "i") + (:: "-" ureal2 "i") + (:: "+i") + (:: "-i"))] + [real2 (:: sign ureal2)] + [ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))] + [uinteger2 (:: (:+ digit2) (:* #\#))] + [prefix2 (:or (:: radix2 exactness) + (:: exactness radix2))] + [radix2 "#b"] + [digit2 (:or "0" "1")] + [num8 (:: prefix8 complex8)] + [complex8 (:or real8 + (:: real8 "@" real8) + (:: real8 "+" ureal8 "i") + (:: real8 "-" ureal8 "i") + (:: real8 "+i") + (:: real8 "-i") + (:: "+" ureal8 "i") + (:: "-" ureal8 "i") + (:: "+i") + (:: "-i"))] + [real8 (:: sign ureal8)] + [ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))] + [uinteger8 (:: (:+ digit8) (:* #\#))] + [prefix8 (:or (:: radix8 exactness) + (:: exactness radix8))] + [radix8 "#o"] + [digit8 (:/ "0" "7")] + + [num10 (:: prefix10 complex10)] + [complex10 (:or real10 + (:: real10 "@" real10) + (:: real10 "+" ureal10 "i") + (:: real10 "-" ureal10 "i") + (:: real10 "+i") + (:: real10 "-i") + (:: "+" ureal10 "i") + (:: "-" ureal10 "i") (:: "+i") (:: "-i"))] - [real8 (:: sign ureal8)] - [ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))] - [uinteger8 (:: (:+ digit8) (:* #\#))] - [prefix8 (:or (:: radix8 exactness) - (:: exactness radix8))] - [radix8 "#o"] - [digit8 (:/ "0" "7")] + [real10 (:: sign ureal10)] + [ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)] + [uinteger10 (:: (:+ digit10) (:* #\#))] + [prefix10 (:or (:: radix10 exactness) + (:: exactness radix10))] + [radix10 (:? "#d")] + [digit10 digit] + [decimal10 (:or (:: uinteger10 suffix) + (:: #\. (:+ digit10) (:* #\#) suffix) + (:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix) + (:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))] - [num10 (:: prefix10 complex10)] - [complex10 (:or real10 - (:: real10 "@" real10) - (:: real10 "+" ureal10 "i") - (:: real10 "-" ureal10 "i") - (:: real10 "+i") - (:: real10 "-i") - (:: "+" ureal10 "i") - (:: "-" ureal10 "i") - (:: "+i") - (:: "-i"))] - [real10 (:: sign ureal10)] - [ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)] - [uinteger10 (:: (:+ digit10) (:* #\#))] - [prefix10 (:or (:: radix10 exactness) - (:: exactness radix10))] - [radix10 (:? "#d")] - [digit10 digit] - [decimal10 (:or (:: uinteger10 suffix) - (:: #\. (:+ digit10) (:* #\#) suffix) - (:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix) - (:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))] + [num16 (:: prefix16 complex16)] + [complex16 (:or real16 + (:: real16 "@" real16) + (:: real16 "+" ureal16 "i") + (:: real16 "-" ureal16 "i") + (:: real16 "+i") + (:: real16 "-i") + (:: "+" ureal16 "i") + (:: "-" ureal16 "i") + "+i" + "-i")] + [real16 (:: sign ureal16)] + [ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))] + [uinteger16 (:: (:+ digit16) (:* #\#))] + [prefix16 (:or (:: radix16 exactness) + (:: exactness radix16))] + [radix16 "#x"] + [digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))] - [num16 (:: prefix16 complex16)] - [complex16 (:or real16 - (:: real16 "@" real16) - (:: real16 "+" ureal16 "i") - (:: real16 "-" ureal16 "i") - (:: real16 "+i") - (:: real16 "-i") - (:: "+" ureal16 "i") - (:: "-" ureal16 "i") - "+i" - "-i")] - [real16 (:: sign ureal16)] - [ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))] - [uinteger16 (:: (:+ digit16) (:* #\#))] - [prefix16 (:or (:: radix16 exactness) - (:: exactness radix16))] - [radix16 "#x"] - [digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))] - - [suffix (:or "" (:: exponent-marker sign (:+ digit10)))] - [exponent-marker (:or "e" "s" "f" "d" "l")] - [sign (:or "" "+" "-")] - [exactness (:or "" "#i" "#e")]) + [suffix (:or "" (:: exponent-marker sign (:+ digit10)))] + [exponent-marker (:or "e" "s" "f" "d" "l")] + [sign (:or "" "+" "-")] + [exactness (:or "" "#i" "#e")]) - (define stx-for-original-property (read-syntax #f (open-input-string "original"))) +(define stx-for-original-property (read-syntax #f (open-input-string "original"))) - ;; A macro to build the syntax object - (define-syntax (build-so stx) - (syntax-case stx () - ((_ value start end) - (with-syntax ((start-pos (datum->syntax-object - (syntax end) - (string->symbol - (format "$~a-start-pos" - (syntax-object->datum (syntax start)))))) - (end-pos (datum->syntax-object - (syntax end) +;; A macro to build the syntax object +(define-syntax (build-so stx) + (syntax-case stx () + ((_ value start end) + (with-syntax ((start-pos (datum->syntax + #'end (string->symbol - (format "$~a-end-pos" - (syntax-object->datum (syntax end)))))) - (source (datum->syntax-object - (syntax end) - 'source-name))) - (syntax - (datum->syntax-object - #f - value - (list source - (position-line start-pos) - (position-col start-pos) - (position-offset start-pos) - (- (position-offset end-pos) - (position-offset start-pos))) - stx-for-original-property)))))) + (format "$~a-start-pos" + (syntax->datum #'start))))) + (end-pos (datum->syntax + #'end + (string->symbol + (format "$~a-end-pos" + (syntax->datum #'end))))) + (source (datum->syntax + #'end + 'source-name))) + (syntax + (datum->syntax + #f + value + (list source + (position-line start-pos) + (position-col start-pos) + (position-offset start-pos) + (- (position-offset end-pos) + (position-offset start-pos))) + stx-for-original-property)))))) - (define (scheme-parser source-name) - (parser - (src-pos) +(define (scheme-parser source-name) + (parser + (src-pos) - (start s) - (end EOF) - (error (lambda (a name val start end) - (raise-read-error - "read-error" - source-name - (position-line start) - (position-col start) - (position-offset start) - (- (position-offset end) - (position-offset start))))) - (tokens data delim) + (start s) + (end EOF) + (error (lambda (a name val start end) + (raise-read-error + "read-error" + source-name + (position-line start) + (position-col start) + (position-offset start) + (- (position-offset end) + (position-offset start))))) + (tokens data delim) - (grammar + (grammar - (s [(sexp-list) (reverse $1)]) + (s [(sexp-list) (reverse $1)]) - (sexp [(DATUM) (build-so $1 1 1)] - [(OP sexp-list CP) (build-so (reverse $2) 1 3)] - [(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)] - [(QUOTE sexp) (build-so (list 'quote $2) 1 2)] - [(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)] - [(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)] - [(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)] - [(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)]) + (sexp [(DATUM) (build-so $1 1 1)] + [(OP sexp-list CP) (build-so (reverse $2) 1 3)] + [(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)] + [(QUOTE sexp) (build-so (list 'quote $2) 1 2)] + [(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)] + [(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)] + [(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)] + [(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)]) - (sexp-list [() null] - [(sexp-list sexp) (cons $2 $1)])))) - - (define (rs sn ip) - (port-count-lines! ip) - ((scheme-parser sn) (lambda () (scheme-lexer ip)))) + (sexp-list [() null] + [(sexp-list sexp) (cons $2 $1)])))) - (define readsyntax - (case-lambda ((sn) (rs sn (current-input-port))) - ((sn ip) (rs sn ip)))) +(define (rs sn ip) + (port-count-lines! ip) + ((scheme-parser sn) (lambda () (scheme-lexer ip)))) - (provide (rename readsyntax read-syntax)) +(define readsyntax + (case-lambda ((sn) (rs sn (current-input-port))) + ((sn ip) (rs sn ip)))) - ) +(provide (rename-out [readsyntax read-syntax])) diff --git a/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt b/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt index 0cbb175..ce11f2d 100644 --- a/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt +++ b/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt @@ -1,24 +1,23 @@ -(module lex-plt-v200 mzscheme - (require br-parser-tools/lex - (prefix : br-parser-tools/lex-sre)) +#lang racket/base +(require (for-syntax racket/base) + br-parser-tools/lex + (prefix-in : br-parser-tools/lex-sre)) - (provide epsilon - ~ - (rename :* *) - (rename :+ +) - (rename :? ?) - (rename :or :) - (rename :& &) - (rename :: @) - (rename :~ ^) - (rename :/ -)) +(provide epsilon ~ + (rename-out [:* *] + [:+ +] + [:? ?] + [:or :] + [:& &] + [:: @] + [:~ ^] + [:/ -])) - (define-lex-trans epsilon - (syntax-rules () - ((_) ""))) - - (define-lex-trans ~ - (syntax-rules () - ((_ re) (complement re))))) +(define-lex-trans (epsilon stx) + (syntax-case stx () + [(_) #'""])) +(define-lex-trans (~ stx) + (syntax-case stx () + [(_ RE) #'(complement RE)])) diff --git a/br-parser-tools-lib/br-parser-tools/lex-sre.rkt b/br-parser-tools-lib/br-parser-tools/lex-sre.rkt index 820d090..40f2b16 100644 --- a/br-parser-tools-lib/br-parser-tools/lex-sre.rkt +++ b/br-parser-tools-lib/br-parser-tools/lex-sre.rkt @@ -1,119 +1,103 @@ -(module lex-sre mzscheme - (require br-parser-tools/lex) +#lang racket/base +(require (for-syntax racket/base) + br-parser-tools/lex) - (provide (rename sre-* *) - (rename sre-+ +) - ? - (rename sre-= =) - (rename sre->= >=) - ** - (rename sre-or or) - : - seq - & - ~ - (rename sre-- -) - (rename sre-/ /) - /-only-chars) +(provide (rename-out [sre-* *] + [sre-+ +] + [sre-= =] + [sre->= >=] + [sre-or or] + [sre-- -] + [sre-/ /]) + ? ** : seq & ~ /-only-chars) - (define-lex-trans sre-* - (syntax-rules () - ((_ re ...) - (repetition 0 +inf.0 (union re ...))))) +(define-lex-trans (sre-* stx) + (syntax-case stx () + [(_ RE ...) + #'(repetition 0 +inf.0 (union RE ...))])) - (define-lex-trans sre-+ - (syntax-rules () - ((_ re ...) - (repetition 1 +inf.0 (union re ...))))) +(define-lex-trans (sre-+ stx) + (syntax-case stx () + [(_ RE ...) + #'(repetition 1 +inf.0 (union RE ...))])) - (define-lex-trans ? - (syntax-rules () - ((_ re ...) - (repetition 0 1 (union re ...))))) +(define-lex-trans (? stx) + (syntax-case stx () + [(_ RE ...) + #'(repetition 0 1 (union RE ...))])) - (define-lex-trans sre-= - (syntax-rules () - ((_ n re ...) - (repetition n n (union re ...))))) +(define-lex-trans (sre-= stx) + (syntax-case stx () + [(_ N RE ...) + #'(repetition N N (union RE ...))])) - (define-lex-trans sre->= - (syntax-rules () - ((_ n re ...) - (repetition n +inf.0 (union re ...))))) +(define-lex-trans (sre->= stx) + (syntax-case stx () + [(_ N RE ...) + #'(repetition N +inf.0 (union RE ...))])) - (define-lex-trans ** - (syntax-rules () - ((_ low #f re ...) - (** low +inf.0 re ...)) - ((_ low high re ...) - (repetition low high (union re ...))))) +(define-lex-trans (** stx) + (syntax-case stx () + [(_ LOW #f RE ...) + #'(** LOW +inf.0 RE ...)] + [(_ LOW HIGH RE ...) + #'(repetition LOW HIGH (union RE ...))])) - (define-lex-trans sre-or - (syntax-rules () - ((_ re ...) - (union re ...)))) +(define-lex-trans (sre-or stx) + (syntax-case stx () + [(_ RE ...) + #'(union RE ...)])) - (define-lex-trans : - (syntax-rules () - ((_ re ...) - (concatenation re ...)))) +(define-lex-trans (: stx) + (syntax-case stx () + [(_ RE ...) + #'(concatenation RE ...)])) - (define-lex-trans seq - (syntax-rules () - ((_ re ...) - (concatenation re ...)))) +(define-lex-trans (seq stx) + (syntax-case stx () + [(_ RE ...) + #'(concatenation RE ...)])) - (define-lex-trans & - (syntax-rules () - ((_ re ...) - (intersection re ...)))) +(define-lex-trans (& stx) + (syntax-case stx () + [(_ RE ...) + #'(intersection RE ...)])) - (define-lex-trans ~ - (syntax-rules () - ((_ re ...) - (char-complement (union re ...))))) +(define-lex-trans (~ stx) + (syntax-case stx () + [(_ RE ...) + #'(char-complement (union RE ...))])) - ;; set difference - (define-lex-trans (sre-- stx) - (syntax-case stx () - ((_) - (raise-syntax-error #f - "must have at least one argument" - stx)) - ((_ big-re re ...) - (syntax (& big-re (complement (union re ...))))))) +;; set difference +(define-lex-trans (sre-- stx) + (syntax-case stx () + [(_) + (raise-syntax-error #f + "must have at least one argument" + stx)] + [(_ BIG-RE RE ...) + #'(& BIG-RE (complement (union RE ...)))])) - (define-lex-trans (sre-/ stx) - (syntax-case stx () - ((_ range ...) - (let ((chars - (apply append (map (lambda (r) - (let ((x (syntax-e r))) - (cond - ((char? x) (list x)) - ((string? x) (string->list x)) - (else - (raise-syntax-error - #f - "not a char or string" - stx - r))))) - (syntax->list (syntax (range ...))))))) - (unless (even? (length chars)) - (raise-syntax-error - #f - "not given an even number of characters" - stx)) - #`(/-only-chars #,@chars))))) +(define-lex-trans (sre-/ stx) + (syntax-case stx () + [(_ RANGE ...) + (let ([chars + (apply append (for/list ([r (in-list (syntax->list #'(RANGE ...)))]) + (let ([x (syntax-e r)]) + (cond + [(char? x) (list x)] + [(string? x) (string->list x)] + [else + (raise-syntax-error #f "not a char or string" stx r)]))))]) + (unless (even? (length chars)) + (raise-syntax-error #f "not given an even number of characters" stx)) + #`(/-only-chars #,@chars))])) - (define-lex-trans /-only-chars - (syntax-rules () - ((_ c1 c2) - (char-range c1 c2)) - ((_ c1 c2 c ...) - (union (char-range c1 c2) - (/-only-chars c ...))))) +(define-lex-trans (/-only-chars stx) + (syntax-case stx () + [(_ C1 C2) + #'(char-range C1 C2)] + [(_ C1 C2 C ...) + #'(union (char-range C1 C2) (/-only-chars C ...))])) - ) - diff --git a/br-parser-tools-lib/br-parser-tools/lex.rkt b/br-parser-tools-lib/br-parser-tools/lex.rkt index 7c0c743..d4ee03c 100644 --- a/br-parser-tools-lib/br-parser-tools/lex.rkt +++ b/br-parser-tools-lib/br-parser-tools/lex.rkt @@ -3,7 +3,7 @@ ;; Provides the syntax used to create lexers and the functions needed to ;; create and use the buffer that the lexer reads from. See docs. -(require (for-syntax mzlib/list +(require (for-syntax racket/list syntax/stx syntax/define syntax/boundmap @@ -14,7 +14,7 @@ racket/base racket/promise)) -(require mzlib/stxparam +(require racket/stxparam syntax/readerr "private-lex/token.rkt") @@ -30,7 +30,7 @@ file-path lexer-file-path ;; alternate name - ;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4. + ;; Lex abbrevs for unicode char sets. any-char any-string nothing alphabetic lower-case upper-case title-case numeric symbolic punctuation graphic whitespace blank iso-control @@ -77,9 +77,9 @@ (let () (define spec/re-acts (syntax->list #'RE+ACTS)) (for/and ([x (in-list spec/re-acts)]) - (syntax-case x () - [(RE ACT) #t] - [else (raise-syntax-error caller "not a regular expression / action pair" stx x)])) + (syntax-case x () + [(RE ACT) #t] + [else (raise-syntax-error caller "not a regular expression / action pair" stx x)])) (define eof-act (get-special-action spec/re-acts #'eof (case src-loc-style [(lexer-src-pos) #'(return-without-pos eof)] [(lexer-srcloc) #'(return-without-srcloc eof)] @@ -212,92 +212,90 @@ (define (get-next-state char table) (and table (get-next-state-helper char 0 (vector-length table) table))) -(define (lexer-body start-state trans-table actions no-lookahead special-action - has-special-comment-action? special-comment-action eof-action) - (letrec ([lexer - (λ (ip) - (let ((first-pos (get-position ip)) - (first-char (peek-char-or-special ip 0))) - ;(printf "(peek-char-or-special port 0) = ~e\n" first-char) - (cond - [(eof-object? first-char) - (do-match ip first-pos eof-action (read-char-or-special ip))] - [(special-comment? first-char) - (read-char-or-special ip) - (cond - (has-special-comment-action? - (do-match ip first-pos special-comment-action #f)) - (else (lexer ip)))] - [(not (char? first-char)) - (do-match ip first-pos special-action (read-char-or-special ip))] - [else - (let lexer-loop ( - ;; current-state - (state start-state) - ;; the character to transition on - (char first-char) - ;; action for the longest match seen thus far - ;; including a match at the current state - (longest-match-action - (vector-ref actions start-state)) - ;; how many bytes precede char - (length-bytes 0) - ;; how many characters have been read - ;; including the one just read - (length-chars 1) - ;; how many characters are in the longest match - (longest-match-length 0)) - (let ([next-state - (cond - [(not (char? char)) #f] - [else (get-next-state (char->integer char) - (vector-ref trans-table state))])]) - (cond - [(not next-state) - (check-match ip first-pos longest-match-length - length-chars longest-match-action)] - [(vector-ref no-lookahead next-state) - (let ((act (vector-ref actions next-state))) - (check-match ip - first-pos - (if act length-chars longest-match-length) - length-chars - (if act act longest-match-action)))] - [else - (let* ([act (vector-ref actions next-state)] - [next-length-bytes (+ (char-utf-8-length char) length-bytes)] - [next-char (peek-char-or-special ip next-length-bytes)]) - #;(printf "(peek-char-or-special port ~e) = ~e\n" - next-length-bytes next-char) - (lexer-loop next-state - next-char - (if act - act - longest-match-action) - next-length-bytes - (add1 length-chars) - (if act - length-chars - longest-match-length)))])))])))]) - (λ (ip) - (unless (input-port? ip) - (raise-argument-error 'lexer "input-port?" 0 ip)) - (lexer ip)))) +(define ((lexer-body start-state trans-table actions no-lookahead special-action + has-special-comment-action? special-comment-action eof-action) ip) + (define (lexer ip) + (define first-pos (get-position ip)) + (define first-char (peek-char-or-special ip 0)) + ;(printf "(peek-char-or-special port 0) = ~e\n" first-char) + (cond + [(eof-object? first-char) + (do-match ip first-pos eof-action (read-char-or-special ip))] + [(special-comment? first-char) + (read-char-or-special ip) + (cond + (has-special-comment-action? + (do-match ip first-pos special-comment-action #f)) + (else (lexer ip)))] + [(not (char? first-char)) + (do-match ip first-pos special-action (read-char-or-special ip))] + [else + (let lexer-loop ( + ;; current-state + [state start-state] + ;; the character to transition on + [char first-char] + ;; action for the longest match seen thus far + ;; including a match at the current state + [longest-match-action + (vector-ref actions start-state)] + ;; how many bytes precede char + [length-bytes 0] + ;; how many characters have been read + ;; including the one just read + [length-chars 1] + ;; how many characters are in the longest match + [longest-match-length 0]) + (define next-state + (cond + [(not (char? char)) #f] + [else (get-next-state (char->integer char) + (vector-ref trans-table state))])) + (cond + [(not next-state) + (check-match ip first-pos longest-match-length + length-chars longest-match-action)] + [(vector-ref no-lookahead next-state) + (define act (vector-ref actions next-state)) + (check-match ip + first-pos + (if act length-chars longest-match-length) + length-chars + (if act act longest-match-action))] + [else + (define act (vector-ref actions next-state)) + (define next-length-bytes (+ (char-utf-8-length char) length-bytes)) + (define next-char (peek-char-or-special ip next-length-bytes)) + #;(printf "(peek-char-or-special port ~e) = ~e\n" + next-length-bytes next-char) + (lexer-loop next-state + next-char + (if act + act + longest-match-action) + next-length-bytes + (add1 length-chars) + (if act + length-chars + longest-match-length))]))])) + (unless (input-port? ip) + (raise-argument-error 'lexer "input-port?" 0 ip)) + (lexer ip)) (define (check-match lb first-pos longest-match-length length longest-match-action) (unless longest-match-action - (let* ([match (read-string length lb)] - [end-pos (get-position lb)]) - (raise-read-error - (format "lexer: No match found in input starting with: ~a" match) - (file-path) - (position-line first-pos) - (position-col first-pos) - (position-offset first-pos) - (- (position-offset end-pos) (position-offset first-pos))))) - (let ([match (read-string longest-match-length lb)]) - ;(printf "(read-string ~e port) = ~e\n" longest-match-length match) - (do-match lb first-pos longest-match-action match))) + (define match (read-string length lb)) + (define end-pos (get-position lb)) + (raise-read-error + (format "lexer: No match found in input starting with: ~a" match) + (file-path) + (position-line first-pos) + (position-col first-pos) + (position-offset first-pos) + (- (position-offset end-pos) (position-offset first-pos)))) + (define match (read-string longest-match-length lb)) + ;(printf "(read-string ~e port) = ~e\n" longest-match-length match) + (do-match lb first-pos longest-match-action match)) (define file-path (make-parameter #f)) (define lexer-file-path file-path) @@ -325,10 +323,10 @@ (force whitespace-ranges) (force blank-ranges) (force iso-control-ranges)))]) - `(union ,@(map (λ (x) - `(char-range ,(integer->char (car x)) - ,(integer->char (cdr x)))) - range)))] + `(union ,@(map (λ (x) + `(char-range ,(integer->char (car x)) + ,(integer->char (cdr x)))) + range)))] [(NAMES ...) (for/list ([sym (in-list '(alphabetic lower-case upper-case @@ -340,7 +338,7 @@ whitespace blank iso-control))]) - (datum->syntax #'CTXT sym #f))]) + (datum->syntax #'CTXT sym #f))]) #'(define-lex-abbrevs (NAMES RANGES) ...))])) (define-lex-abbrev any-char (char-complement (union))) diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt index 13f982c..34b7812 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt @@ -1,5 +1,4 @@ #lang racket/base - (provide (all-defined-out)) (require syntax/stx) @@ -7,10 +6,10 @@ ;; Returns the first action from a rule of the form ((which-special) action) (define (get-special-action rules which-special none) (cond - ((null? rules) none) - (else + [(null? rules) none] + [else (syntax-case (car rules) () [((special) ACT) - (and (identifier? #'special) (module-or-top-identifier=? (syntax special) which-special)) + (and (identifier? #'special) (module-or-top-identifier=? #'special which-special)) #'ACT] - [_ (get-special-action (cdr rules) which-special none)])))) + [_ (get-special-action (cdr rules) which-special none)])])) diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt index 28919a3..d87e846 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt @@ -1,303 +1,297 @@ -(module deriv mzscheme - - (require mzlib/list - (prefix is: mzlib/integer-set) - "re.rkt" - "util.rkt") +#lang racket/base +(require racket/list + (prefix-in is: data/integer-set) + "re.rkt" + "util.rkt") - (provide build-dfa print-dfa (struct dfa (num-states start-state final-states/actions transitions))) +(provide build-dfa print-dfa (struct-out dfa)) - (define e (build-epsilon)) - (define z (build-zero)) +(define e (build-epsilon)) +(define z (build-zero)) - ;; Don't do anything with this one but extract the chars - (define all-chars (->re `(char-complement (union)) (make-cache))) +;; Don't do anything with this one but extract the chars +(define all-chars (->re `(char-complement (union)) (make-cache))) - ;; get-char-groups : re bool -> (list-of char-setR?) - ;; Collects the char-setRs in r that could be used in - ;; taking the derivative of r. - (define (get-char-groups r found-negation) - (cond - ((or (eq? r e) (eq? r z)) null) - ((char-setR? r) (list r)) - ((concatR? r) - (if (re-nullable? (concatR-re1 r)) - (append (get-char-groups (concatR-re1 r) found-negation) - (get-char-groups (concatR-re2 r) found-negation)) - (get-char-groups (concatR-re1 r) found-negation))) - ((repeatR? r) - (get-char-groups (repeatR-re r) found-negation)) - ((orR? r) - (apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r)))) - ((andR? r) - (apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r)))) - ((negR? r) - (if found-negation - (get-char-groups (negR-re r) #t) - (cons all-chars (get-char-groups (negR-re r) #t)))))) +;; get-char-groups : re bool -> (list-of char-setR?) +;; Collects the char-setRs in r that could be used in +;; taking the derivative of r. +(define (get-char-groups r found-negation) + (cond + [(or (eq? r e) (eq? r z)) null] + [(char-setR? r) (list r)] + [(concatR? r) + (if (re-nullable? (concatR-re1 r)) + (append (get-char-groups (concatR-re1 r) found-negation) + (get-char-groups (concatR-re2 r) found-negation)) + (get-char-groups (concatR-re1 r) found-negation))] + [(repeatR? r) + (get-char-groups (repeatR-re r) found-negation)] + [(orR? r) + (apply append (map (λ (x) (get-char-groups x found-negation)) (orR-res r)))] + [(andR? r) + (apply append (map (λ (x) (get-char-groups x found-negation)) (andR-res r)))] + [(negR? r) + (if found-negation + (get-char-groups (negR-re r) #t) + (cons all-chars (get-char-groups (negR-re r) #t)))])) - (test-block ((c (make-cache)) - (r1 (->re #\1 c)) - (r2 (->re #\2 c))) - ((get-char-groups e #f) null) - ((get-char-groups z #f) null) - ((get-char-groups r1 #f) (list r1)) - ((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f) - (list r1)) - ((get-char-groups (->re `(concatenation ,e ,r2) c) #f) - (list r2)) - ((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f) - (list r1 r2)) - ((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f) - (list r1)) - ((get-char-groups - (->re `(union (repetition 0 +inf.0 ,r1) - (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) - (list r1 r2 (->re "3" c) (->re "4" c))) - ((get-char-groups (->re `(complement ,r1) c) #f) - (list all-chars r1)) - ((get-char-groups - (->re `(intersection (repetition 0 +inf.0 ,r1) - (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) - (list r1 r2 (->re "3" c) (->re "4" c))) - ) - (define loc:member? is:member?) +(test-block ((c (make-cache)) + (r1 (->re #\1 c)) + (r2 (->re #\2 c))) + ((get-char-groups e #f) null) + ((get-char-groups z #f) null) + ((get-char-groups r1 #f) (list r1)) + ((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f) + (list r1)) + ((get-char-groups (->re `(concatenation ,e ,r2) c) #f) + (list r2)) + ((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f) + (list r1 r2)) + ((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f) + (list r1)) + ((get-char-groups + (->re `(union (repetition 0 +inf.0 ,r1) + (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) + (list r1 r2 (->re "3" c) (->re "4" c))) + ((get-char-groups (->re `(complement ,r1) c) #f) + (list all-chars r1)) + ((get-char-groups + (->re `(intersection (repetition 0 +inf.0 ,r1) + (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) + (list r1 r2 (->re "3" c) (->re "4" c))) + ) +(define loc:member? is:member?) - ;; deriveR : re char cache -> re - (define (deriveR r c cache) - (cond - ((or (eq? r e) (eq? r z)) z) - ((char-setR? r) - (if (loc:member? c (char-setR-chars r)) e z)) - ((concatR? r) - (let* ((r1 (concatR-re1 r)) - (r2 (concatR-re2 r)) - (d (build-concat (deriveR r1 c cache) r2 cache))) - (if (re-nullable? r1) - (build-or (list d (deriveR r2 c cache)) cache) - d))) - ((repeatR? r) - (build-concat (deriveR (repeatR-re r) c cache) - (build-repeat (sub1 (repeatR-low r)) - (sub1 (repeatR-high r)) - (repeatR-re r) cache) - cache)) - ((orR? r) - (build-or (map (lambda (x) (deriveR x c cache)) - (orR-res r)) - cache)) - ((andR? r) - (build-and (map (lambda (x) (deriveR x c cache)) - (andR-res r)) - cache)) - ((negR? r) - (build-neg (deriveR (negR-re r) c cache) cache)))) +;; deriveR : re char cache -> re +(define (deriveR r c cache) + (cond + [(or (eq? r e) (eq? r z)) z] + [(char-setR? r) + (if (loc:member? c (char-setR-chars r)) e z)] + [(concatR? r) + (define r1 (concatR-re1 r)) + (define r2 (concatR-re2 r)) + (define d (build-concat (deriveR r1 c cache) r2 cache)) + (if (re-nullable? r1) + (build-or (list d (deriveR r2 c cache)) cache) + d)] + [(repeatR? r) + (build-concat (deriveR (repeatR-re r) c cache) + (build-repeat (sub1 (repeatR-low r)) + (sub1 (repeatR-high r)) + (repeatR-re r) cache) + cache)] + [(orR? r) + (build-or (map (λ (x) (deriveR x c cache)) + (orR-res r)) + cache)] + [(andR? r) + (build-and (map (λ (x) (deriveR x c cache)) + (andR-res r)) + cache)] + [(negR? r) + (build-neg (deriveR (negR-re r) c cache) cache)])) - (test-block ((c (make-cache)) - (a (char->integer #\a)) - (b (char->integer #\b)) - (r1 (->re #\a c)) - (r2 (->re `(repetition 0 +inf.0 #\a) c)) - (r3 (->re `(repetition 0 +inf.0 ,r2) c)) - (r4 (->re `(concatenation #\a ,r2) c)) - (r5 (->re `(repetition 0 +inf.0 ,r4) c)) - (r6 (->re `(union ,r5 #\a) c)) - (r7 (->re `(concatenation ,r2 ,r2) c)) - (r8 (->re `(complement ,r4) c)) - (r9 (->re `(intersection ,r2 ,r4) c))) - ((deriveR e a c) z) - ((deriveR z a c) z) - ((deriveR r1 b c) z) - ((deriveR r1 a c) e) - ((deriveR r2 a c) r2) - ((deriveR r2 b c) z) - ((deriveR r3 a c) r2) - ((deriveR r3 b c) z) - ((deriveR r4 a c) r2) - ((deriveR r4 b c) z) - ((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c)) - ((deriveR r5 b c) z) - ((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c)) - ((deriveR r6 b c) z) - ((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c)) - ((deriveR r7 b c) z) - ((deriveR r8 a c) (->re `(complement, r2) c)) - ((deriveR r8 b c) (->re `(complement ,z) c)) - ((deriveR r9 a c) r2) - ((deriveR r9 b c) z) - ((deriveR (->re `(repetition 1 2 "ab") c) a c) - (->re `(concatenation "b" (repetition 0 1 "ab")) c))) +(test-block ((c (make-cache)) + (a (char->integer #\a)) + (b (char->integer #\b)) + (r1 (->re #\a c)) + (r2 (->re `(repetition 0 +inf.0 #\a) c)) + (r3 (->re `(repetition 0 +inf.0 ,r2) c)) + (r4 (->re `(concatenation #\a ,r2) c)) + (r5 (->re `(repetition 0 +inf.0 ,r4) c)) + (r6 (->re `(union ,r5 #\a) c)) + (r7 (->re `(concatenation ,r2 ,r2) c)) + (r8 (->re `(complement ,r4) c)) + (r9 (->re `(intersection ,r2 ,r4) c))) + ((deriveR e a c) z) + ((deriveR z a c) z) + ((deriveR r1 b c) z) + ((deriveR r1 a c) e) + ((deriveR r2 a c) r2) + ((deriveR r2 b c) z) + ((deriveR r3 a c) r2) + ((deriveR r3 b c) z) + ((deriveR r4 a c) r2) + ((deriveR r4 b c) z) + ((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c)) + ((deriveR r5 b c) z) + ((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c)) + ((deriveR r6 b c) z) + ((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c)) + ((deriveR r7 b c) z) + ((deriveR r8 a c) (->re `(complement, r2) c)) + ((deriveR r8 b c) (->re `(complement ,z) c)) + ((deriveR r9 a c) r2) + ((deriveR r9 b c) z) + ((deriveR (->re `(repetition 1 2 "ab") c) a c) + (->re `(concatenation "b" (repetition 0 1 "ab")) c))) - ;; An re-action is (cons re action) +;; An re-action is (cons re action) - ;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f) - ;; applies deriveR to all the re-actions's re parts. - ;; Returns #f if the derived state is equivalent to z. - (define (derive r c cache) - (let ((new-r (map (lambda (ra) - (cons (deriveR (car ra) c cache) (cdr ra))) - r))) - (if (andmap (lambda (x) (eq? z (car x))) - new-r) - #f - new-r))) +;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f) +;; applies deriveR to all the re-actions's re parts. +;; Returns #f if the derived state is equivalent to z. +(define (derive r c cache) + (define new-r (for/list ([ra (in-list r)]) + (cons (deriveR (car ra) c cache) (cdr ra)))) + (if (andmap (λ (x) (eq? z (car x))) new-r) + #f + new-r)) - (test-block ((c (make-cache)) - (r1 (->re #\1 c)) - (r2 (->re #\2 c))) - ((derive null (char->integer #\1) c) #f) - ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c) - (list (cons e 1) (cons z 2))) - ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f)) +(test-block ((c (make-cache)) + (r1 (->re #\1 c)) + (r2 (->re #\2 c))) + ((derive null (char->integer #\1) c) #f) + ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c) + (list (cons e 1) (cons z 2))) + ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f)) - ;; get-final : (list-of re-action) -> (union #f syntax-object) - ;; An re that accepts e represents a final state. Return the - ;; action from the first final state or #f if there is none. - (define (get-final res) - (cond - ((null? res) #f) - ((re-nullable? (caar res)) (cdar res)) - (else (get-final (cdr res))))) +;; get-final : (list-of re-action) -> (union #f syntax-object) +;; An re that accepts e represents a final state. Return the +;; action from the first final state or #f if there is none. +(define (get-final res) + (cond + [(null? res) #f] + [(re-nullable? (caar res)) (cdar res)] + [else (get-final (cdr res))])) - (test-block ((c->i char->integer) - (c (make-cache)) - (r1 (->re #\a c)) - (r2 (->re #\b c)) - (b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5))) - (a (list (cons r1 1) (cons r2 2)))) - ((derive null (c->i #\a) c) #f) - ((derive a (c->i #\a) c) (list (cons e 1) (cons z 2))) - ((derive a (c->i #\b) c) (list (cons z 1) (cons e 2))) - ((derive a (c->i #\c) c) #f) - ((derive (list (cons (->re `(union " " "\n" ",") c) 1) - (cons (->re `(concatenation (repetition 0 1 "-") - (repetition 1 +inf.0 (char-range "0" "9"))) c) 2) - (cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3) - (cons (->re "[" c) 4) - (cons (->re "]" c) 5)) (c->i #\[) c) - b) - ((get-final a) #f) - ((get-final (list (cons e 1) (cons e 2))) 1) - ((get-final b) 4)) +(test-block ((c->i char->integer) + (c (make-cache)) + (r1 (->re #\a c)) + (r2 (->re #\b c)) + (b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5))) + (a (list (cons r1 1) (cons r2 2)))) + ((derive null (c->i #\a) c) #f) + ((derive a (c->i #\a) c) (list (cons e 1) (cons z 2))) + ((derive a (c->i #\b) c) (list (cons z 1) (cons e 2))) + ((derive a (c->i #\c) c) #f) + ((derive (list (cons (->re `(union " " "\n" ",") c) 1) + (cons (->re `(concatenation (repetition 0 1 "-") + (repetition 1 +inf.0 (char-range "0" "9"))) c) 2) + (cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3) + (cons (->re "[" c) 4) + (cons (->re "]" c) 5)) (c->i #\[) c) + b) + ((get-final a) #f) + ((get-final (list (cons e 1) (cons e 2))) 1) + ((get-final b) 4)) - ;; A state is (make-state (list-of re-action) nat) - (define-struct state (spec index)) +;; A state is (make-state (list-of re-action) nat) +(define-struct state (spec index)) - ;; get->key : re-action -> (list-of nat) - ;; states are indexed by the list of indexes of their res - (define (get-key s) - (map (lambda (x) (re-index (car x))) s)) +;; get->key : re-action -> (list-of nat) +;; states are indexed by the list of indexes of their res +(define (get-key s) + (map (λ (x) (re-index (car x))) s)) - (define loc:partition is:partition) +(define loc:partition is:partition) - ;; compute-chars : (list-of state) -> (list-of char-set) - ;; Computed the sets of equivalent characters for taking the - ;; derivative of the car of st. Only one derivative per set need to be taken. - (define (compute-chars st) - (cond - ((null? st) null) - (else - (loc:partition (map char-setR-chars - (apply append (map (lambda (x) (get-char-groups (car x) #f)) - (state-spec (car st))))))))) +;; compute-chars : (list-of state) -> (list-of char-set) +;; Computed the sets of equivalent characters for taking the +;; derivative of the car of st. Only one derivative per set need to be taken. +(define (compute-chars st) + (cond + [(null? st) null] + [else + (loc:partition (map char-setR-chars + (apply append (map (λ (x) (get-char-groups (car x) #f)) + (state-spec (car st))))))])) - (test-block ((c (make-cache)) - (c->i char->integer) - (r1 (->re `(char-range #\1 #\4) c)) - (r2 (->re `(char-range #\2 #\3) c))) - ((compute-chars null) null) - ((compute-chars (list (make-state null 1))) null) - ((map is:integer-set-contents - (compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2)))) - (list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3))) - (is:integer-set-contents (is:union (is:make-range (c->i #\1)) - (is:make-range (c->i #\4))))))) +(test-block ((c (make-cache)) + (c->i char->integer) + (r1 (->re `(char-range #\1 #\4) c)) + (r2 (->re `(char-range #\2 #\3) c))) + ((compute-chars null) null) + ((compute-chars (list (make-state null 1))) null) + ((map is:integer-set-contents + (compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2)))) + (list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3))) + (is:integer-set-contents (is:union (is:make-range (c->i #\1)) + (is:make-range (c->i #\4))))))) - ;; A dfa is (make-dfa int int - ;; (list-of (cons int syntax-object)) - ;; (list-of (cons int (list-of (cons char-set int))))) - ;; Each transitions is a state and a list of chars with the state to transition to. - ;; The finals and transitions are sorted by state number, and duplicate free. - (define-struct dfa (num-states start-state final-states/actions transitions) (make-inspector)) +;; A dfa is (make-dfa int int +;; (list-of (cons int syntax-object)) +;; (list-of (cons int (list-of (cons char-set int))))) +;; Each transitions is a state and a list of chars with the state to transition to. +;; The finals and transitions are sorted by state number, and duplicate free. +(define-struct dfa (num-states start-state final-states/actions transitions) #:inspector (make-inspector)) - (define loc:get-integer is:get-integer) +(define loc:get-integer is:get-integer) - ;; build-dfa : (list-of re-action) cache -> dfa - (define (build-dfa rs cache) - (let* ((transitions (make-hash-table)) - (get-state-number (make-counter)) - (start (make-state rs (get-state-number)))) - (cache (cons 'state (get-key rs)) (lambda () start)) - (let loop ((old-states (list start)) - (new-states null) - (all-states (list start)) - (cs (compute-chars (list start)))) - (cond - ((and (null? old-states) (null? new-states)) - (make-dfa (get-state-number) (state-index start) - (sort (filter (lambda (x) (cdr x)) - (map (lambda (state) - (cons (state-index state) (get-final (state-spec state)))) - all-states)) - (lambda (a b) (< (car a) (car b)))) - (sort (hash-table-map transitions - (lambda (state trans) - (cons (state-index state) - (map (lambda (t) - (cons (car t) - (state-index (cdr t)))) - trans)))) - (lambda (a b) (< (car a) (car b)))))) - ((null? old-states) - (loop new-states null all-states (compute-chars new-states))) - ((null? cs) - (loop (cdr old-states) new-states all-states (compute-chars (cdr old-states)))) - (else - (let* ((state (car old-states)) - (c (car cs)) - (new-re (derive (state-spec state) (loc:get-integer c) cache))) - (cond - (new-re - (let* ((new-state? #f) - (new-state (cache (cons 'state (get-key new-re)) - (lambda () - (set! new-state? #t) - (make-state new-re (get-state-number))))) - (new-all-states (if new-state? (cons new-state all-states) all-states))) - (hash-table-put! transitions - state - (cons (cons c new-state) - (hash-table-get transitions state - (lambda () null)))) - (cond - (new-state? - (loop old-states (cons new-state new-states) new-all-states (cdr cs))) - (else - (loop old-states new-states new-all-states (cdr cs)))))) - (else (loop old-states new-states all-states (cdr cs)))))))))) +;; build-dfa : (list-of re-action) cache -> dfa +(define (build-dfa rs cache) + (let* ([transitions (make-hash)] + [get-state-number (make-counter)] + [start (make-state rs (get-state-number))]) + (cache (cons 'state (get-key rs)) (λ () start)) + (let loop ([old-states (list start)] + [new-states null] + [all-states (list start)] + [cs (compute-chars (list start))]) + (cond + [(and (null? old-states) (null? new-states)) + (make-dfa (get-state-number) (state-index start) + (sort (for*/list ([state (in-list all-states)] + [val (in-value (cons (state-index state) (get-final (state-spec state))))] + #:when (cdr val)) + val) + < #:key car) + (sort (hash-map transitions + (λ (state trans) + (cons (state-index state) + (for/list ([t (in-list trans)]) + (cons (car t) + (state-index (cdr t))))))) + < #:key car))] + [(null? old-states) + (loop new-states null all-states (compute-chars new-states))] + [(null? cs) + (loop (cdr old-states) new-states all-states (compute-chars (cdr old-states)))] + [else + (define state (car old-states)) + (define c (car cs)) + (define new-re (derive (state-spec state) (loc:get-integer c) cache)) + (cond + [new-re + (let* ([new-state? #f] + [new-state (cache (cons 'state (get-key new-re)) + (λ () + (set! new-state? #t) + (make-state new-re (get-state-number))))] + [new-all-states (if new-state? (cons new-state all-states) all-states)]) + (hash-set! transitions + state + (cons (cons c new-state) + (hash-ref transitions state + (λ () null)))) + (cond + [new-state? + (loop old-states (cons new-state new-states) new-all-states (cdr cs))] + [else + (loop old-states new-states new-all-states (cdr cs))]))] + [else (loop old-states new-states all-states (cdr cs))])])))) - (define (print-dfa x) - (printf "number of states: ~a\n" (dfa-num-states x)) - (printf "start state: ~a\n" (dfa-start-state x)) - (printf "final states: ~a\n" (map car (dfa-final-states/actions x))) - (for-each (lambda (trans) - (printf "state: ~a\n" (car trans)) - (for-each (lambda (rule) - (printf " -~a-> ~a\n" - (is:integer-set-contents (car rule)) - (cdr rule))) - (cdr trans))) - (dfa-transitions x))) +(define (print-dfa x) + (printf "number of states: ~a\n" (dfa-num-states x)) + (printf "start state: ~a\n" (dfa-start-state x)) + (printf "final states: ~a\n" (map car (dfa-final-states/actions x))) + (for-each (λ (trans) + (printf "state: ~a\n" (car trans)) + (for-each (λ (rule) + (printf " -~a-> ~a\n" + (is:integer-set-contents (car rule)) + (cdr rule))) + (cdr trans))) + (dfa-transitions x))) - (define (build-test-dfa rs) - (let ((c (make-cache))) - (build-dfa (map (lambda (x) (cons (->re x c) 'action)) - rs) - c))) +(define (build-test-dfa rs) + (define c (make-cache)) + (build-dfa (map (λ (x) (cons (->re x c) 'action)) rs) c)) #| @@ -316,8 +310,8 @@ (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b) (union #\a #\b) (union #\a #\b) (union #\a #\b))))) (define t9 (build-test-dfa `((concatenation "/*" - (complement (concatenation (intersection) "*/" (intersection))) - "*/")))) + (complement (concatenation (intersection) "*/" (intersection))) + "*/")))) (define t11 (build-test-dfa `((complement "1")))) (define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b") (concatenation "a" (repetition 0 +inf.0 "b"))) @@ -329,11 +323,11 @@ "]"))) (define y (build-test-dfa `((repetition 1 +inf.0 - (union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|") - (concatenation "|" (repetition 0 +inf.0 (char-complement "|")))))))) + (union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|") + (concatenation "|" (repetition 0 +inf.0 (char-complement "|")))))))) (define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection)) (complement (union (concatenation (intersection) "01") - (repetition 1 +inf.0 "1"))))))) - (define t14 (build-test-dfa `((complement "1")))) -|# - ) + (repetition 1 +inf.0 "1"))))))) + (define t14 (build-test-dfa `((complement "1"))))) + + |# diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt index bbccbe0..7ee2adf 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt @@ -1,5 +1,5 @@ -#lang scheme/base -(require (for-syntax scheme/base) +#lang racket/base +(require (for-syntax racket/base) "../lex.rkt" rackunit) diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt index f74c003..603b32b 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt @@ -1,179 +1,161 @@ -(module front mzscheme - (require (prefix is: mzlib/integer-set) - mzlib/list - syntax/stx - "util.rkt" - "stx.rkt" - "re.rkt" - "deriv.rkt") +#lang racket/base +(require racket/base + racket/match + (prefix-in is: data/integer-set) + racket/list + syntax/stx + "util.rkt" + "stx.rkt" + "re.rkt" + "deriv.rkt") - (provide build-lexer) +(provide build-lexer) - (define-syntax time-label - (syntax-rules () - ((_ l e ...) - (begin - (printf "~a: " l) - (time (begin e ...)))))) +(define-syntax time-label + (syntax-rules () + ((_ l e ...) + (begin + (printf "~a: " l) + (time (begin e ...)))))) - ;; A table is either - ;; - (vector-of (union #f nat)) - ;; - (vector-of (vector-of (vector nat nat nat))) +;; A table is either +;; - (vector-of (union #f nat)) +;; - (vector-of (vector-of (vector nat nat nat))) - (define loc:integer-set-contents is:integer-set-contents) +(define loc:integer-set-contents is:integer-set-contents) - ;; dfa->1d-table : dfa -> (same as build-lexer) - (define (dfa->1d-table dfa) - (let ((state-table (make-vector (dfa-num-states dfa) #f)) - (transition-cache (make-hash-table 'equal))) - (for-each - (lambda (trans) - (let* ((from-state (car trans)) - (all-chars/to (cdr trans)) - (flat-all-chars/to - (sort - (apply append - (map (lambda (chars/to) - (let ((char-ranges (loc:integer-set-contents (car chars/to))) - (to (cdr chars/to))) - (map (lambda (char-range) - (let ((entry (vector (car char-range) (cdr char-range) to))) - (hash-table-get transition-cache entry - (lambda () - (hash-table-put! transition-cache - entry - entry) - entry)))) - char-ranges))) - all-chars/to)) - (lambda (a b) - (< (vector-ref a 0) (vector-ref b 0)))))) - (vector-set! state-table from-state (list->vector flat-all-chars/to)))) - (dfa-transitions dfa)) - state-table)) +;; dfa->1d-table : dfa -> (same as build-lexer) +(define (dfa->1d-table dfa) + (define state-table (make-vector (dfa-num-states dfa) #f)) + (define transition-cache (make-hasheq)) + (for ([trans (in-list (dfa-transitions dfa))]) + (match-define (cons from-state all-chars/to) trans) + (define flat-all-chars/to + (sort + (for*/list ([chars/to (in-list all-chars/to)] + [char-ranges (in-value (loc:integer-set-contents (car chars/to)))] + [to (in-value (cdr chars/to))] + [char-range (in-list char-ranges)]) + (define entry (vector (car char-range) (cdr char-range) to)) + (hash-ref transition-cache entry (λ () + (hash-set! transition-cache + entry + entry) + entry))) + < #:key (λ (v) (vector-ref v 0)))) + (vector-set! state-table from-state (list->vector flat-all-chars/to))) + state-table) - (define loc:foldr is:foldr) +(define loc:foldr is:foldr) - ;; dfa->2d-table : dfa -> (same as build-lexer) - (define (dfa->2d-table dfa) - (let ( - ;; char-table : (vector-of (union #f nat)) - ;; The lexer table, one entry per state per char. - ;; Each entry specifies a state to transition to. - ;; #f indicates no transition - (char-table (make-vector (* 256 (dfa-num-states dfa)) #f))) - - ;; Fill the char-table vector - (for-each - (lambda (trans) - (let ((from-state (car trans))) - (for-each (lambda (chars/to) - (let ((to-state (cdr chars/to))) - (loc:foldr (lambda (char _) - (vector-set! char-table - (bitwise-ior - char - (arithmetic-shift from-state 8)) - to-state)) - (void) - (car chars/to)))) - (cdr trans)))) - (dfa-transitions dfa)) - char-table)) +;; dfa->2d-table : dfa -> (same as build-lexer) +(define (dfa->2d-table dfa) + ;; char-table : (vector-of (union #f nat)) + ;; The lexer table, one entry per state per char. + ;; Each entry specifies a state to transition to. + ;; #f indicates no transition + (define char-table (make-vector (* 256 (dfa-num-states dfa)) #f)) + ;; Fill the char-table vector + (for* ([trans (in-list (dfa-transitions dfa))] + [chars/to (in-list (cdr trans))]) + (define from-state (car trans)) + (define to-state (cdr chars/to)) + (loc:foldr (λ (char _) + (vector-set! char-table + (bitwise-ior + char + (arithmetic-shift from-state 8)) + to-state)) + (void) + (car chars/to))) + char-table) - ;; dfa->actions : dfa -> (vector-of (union #f syntax-object)) - ;; The action for each final state, #f if the state isn't final - (define (dfa->actions dfa) - (let ((actions (make-vector (dfa-num-states dfa) #f))) - (for-each (lambda (state/action) - (vector-set! actions (car state/action) (cdr state/action))) - (dfa-final-states/actions dfa)) - actions)) +;; dfa->actions : dfa -> (vector-of (union #f syntax-object)) +;; The action for each final state, #f if the state isn't final +(define (dfa->actions dfa) + (define actions (make-vector (dfa-num-states dfa) #f)) + (for ([state/action (in-list (dfa-final-states/actions dfa))]) + (vector-set! actions (car state/action) (cdr state/action))) + actions) - ;; dfa->no-look : dfa -> (vector-of bool) - ;; For each state whether the lexer can ignore the next input. - ;; It can do this only if there are no transitions out of the - ;; current state. - (define (dfa->no-look dfa) - (let ((no-look (make-vector (dfa-num-states dfa) #t))) - (for-each (lambda (trans) - (vector-set! no-look (car trans) #f)) - (dfa-transitions dfa)) - no-look)) +;; dfa->no-look : dfa -> (vector-of bool) +;; For each state whether the lexer can ignore the next input. +;; It can do this only if there are no transitions out of the +;; current state. +(define (dfa->no-look dfa) + (define no-look (make-vector (dfa-num-states dfa) #t)) + (for ([trans (in-list (dfa-transitions dfa))]) + (vector-set! no-look (car trans) #f)) + no-look) - (test-block ((d1 (make-dfa 1 1 (list) (list))) - (d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) - (list (cons 1 (list (cons (is:make-range 49 50) 1) - (cons (is:make-range 51) 2))) - (cons 2 (list (cons (is:make-range 49) 3)))))) - (d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) - (list (cons 1 (list (cons (is:make-range 100 200) 0) - (cons (is:make-range 49 50) 1) - (cons (is:make-range 51) 2))) - (cons 2 (list (cons (is:make-range 49) 3))))))) - ((dfa->2d-table d1) (make-vector 256 #f)) - ((dfa->2d-table d2) (let ((v (make-vector 1024 #f))) - (vector-set! v 305 1) - (vector-set! v 306 1) - (vector-set! v 307 2) - (vector-set! v 561 3) - v)) - ((dfa->1d-table d1) (make-vector 1 #f)) - ((dfa->1d-table d2) #(#f +(test-block ((d1 (make-dfa 1 1 (list) (list))) + (d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) + (list (cons 1 (list (cons (is:make-range 49 50) 1) + (cons (is:make-range 51) 2))) + (cons 2 (list (cons (is:make-range 49) 3)))))) + (d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) + (list (cons 1 (list (cons (is:make-range 100 200) 0) + (cons (is:make-range 49 50) 1) + (cons (is:make-range 51) 2))) + (cons 2 (list (cons (is:make-range 49) 3))))))) + ((dfa->2d-table d1) (make-vector 256 #f)) + ((dfa->2d-table d2) (let ((v (make-vector 1024 #f))) + (vector-set! v 305 1) + (vector-set! v 306 1) + (vector-set! v 307 2) + (vector-set! v 561 3) + v)) + ((dfa->1d-table d1) (make-vector 1 #f)) + ((dfa->1d-table d2) #(#f #(#(49 50 1) #(51 51 2)) #(#(49 49 3)) #f)) - ((dfa->1d-table d3) #(#f + ((dfa->1d-table d3) #(#f #(#(49 50 1) #(51 51 2) #(100 200 0)) #(#(49 49 3)) #f)) - ((dfa->actions d1) (vector #f)) - ((dfa->actions d2) (vector #f #f 2 3)) - ((dfa->no-look d1) (vector #t)) - ((dfa->no-look d2) (vector #t #f #f #t))) - - ;; build-lexer : syntax-object list -> - ;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object)) - ;; each syntax object has the form (re action) - (define (build-lexer sos) - (let* ((disappeared-uses (box null)) - (s-re-acts (map (lambda (so) - (cons (parse (stx-car so) disappeared-uses) - (stx-car (stx-cdr so)))) - sos)) + ((dfa->actions d1) (vector #f)) + ((dfa->actions d2) (vector #f #f 2 3)) + ((dfa->no-look d1) (vector #t)) + ((dfa->no-look d2) (vector #t #f #f #t))) - (cache (make-cache)) - - (re-acts (map (lambda (s-re-act) - (cons (->re (car s-re-act) cache) - (cdr s-re-act))) - s-re-acts)) - - (dfa (build-dfa re-acts cache)) - (table (dfa->1d-table dfa))) - ;(print-dfa dfa) - #;(let ((num-states (vector-length table)) - (num-vectors (length (filter values (vector->list table)))) - (num-entries (apply + (map - (lambda (x) (if x (vector-length x) 0)) - (vector->list table)))) - (num-different-entries - (let ((ht (make-hash-table))) - (for-each - (lambda (x) - (when x - (for-each - (lambda (y) - (hash-table-put! ht y #t)) - (vector->list x)))) - (vector->list table)) - (length (hash-table-map ht cons))))) - (printf "~a states, ~aKB\n" - num-states - (/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries - (* 5 num-different-entries))) 1024))) - (values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa) - (unbox disappeared-uses)))) - ) +;; build-lexer : syntax-object list -> +;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object)) +;; each syntax object has the form (re action) +(define (build-lexer sos) + (define disappeared-uses (box null)) + (define s-re-acts (for/list ([so (in-list sos)]) + (cons (parse (stx-car so) disappeared-uses) + (stx-car (stx-cdr so))))) + (define cache (make-cache)) + (define re-acts (for/list ([s-re-act (in-list s-re-acts)]) + (cons (->re (car s-re-act) cache) + (cdr s-re-act)))) + (define dfa (build-dfa re-acts cache)) + (define table (dfa->1d-table dfa)) + ;(print-dfa dfa) + #;(let ((num-states (vector-length table)) + (num-vectors (length (filter values (vector->list table)))) + (num-entries (apply + (map + (λ (x) (if x (vector-length x) 0)) + (vector->list table)))) + (num-different-entries + (let ((ht (make-hash))) + (for-each + (λ (x) + (when x + (for-each + (λ (y) + (hash-set! ht y #t)) + (vector->list x)))) + (vector->list table)) + (length (hash-table-map ht cons))))) + (printf "~a states, ~aKB\n" + num-states + (/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries + (* 5 num-different-entries))) 1024))) + (values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa) + (unbox disappeared-uses))) + diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt index b06c3eb..35b2ced 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt @@ -1,385 +1,384 @@ -(module re mzscheme - (require mzlib/list - scheme/match - (prefix is: mzlib/integer-set) - "util.rkt") +#lang racket/base +(require racket/list + racket/match + (prefix-in is: data/integer-set) + "util.rkt") - (provide ->re build-epsilon build-zero build-char-set build-concat - build-repeat build-or build-and build-neg - epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR? - char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high - orR-res andR-res negR-re - re-nullable? re-index) +(provide ->re build-epsilon build-zero build-char-set build-concat + build-repeat build-or build-and build-neg + epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR? + char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high + orR-res andR-res negR-re + re-nullable? re-index) - ;; get-index : -> nat - (define get-index (make-counter)) +;; get-index : -> nat +(define get-index (make-counter)) - ;; An re is either - ;; - (make-epsilonR bool nat) - ;; - (make-zeroR bool nat) - ;; - (make-char-setR bool nat char-set) - ;; - (make-concatR bool nat re re) - ;; - (make-repeatR bool nat nat nat-or-+inf.0 re) - ;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs - ;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs - ;; - (make-negR bool nat re) - ;; - ;; Every re must have an index field globally different from all - ;; other re index fields. - (define-struct re (nullable? index) (make-inspector)) - (define-struct (epsilonR re) () (make-inspector)) - (define-struct (zeroR re) () (make-inspector)) - (define-struct (char-setR re) (chars) (make-inspector)) - (define-struct (concatR re) (re1 re2) (make-inspector)) - (define-struct (repeatR re) (low high re) (make-inspector)) - (define-struct (orR re) (res) (make-inspector)) - (define-struct (andR re) (res) (make-inspector)) - (define-struct (negR re) (re) (make-inspector)) +;; An re is either +;; - (make-epsilonR bool nat) +;; - (make-zeroR bool nat) +;; - (make-char-setR bool nat char-set) +;; - (make-concatR bool nat re re) +;; - (make-repeatR bool nat nat nat-or-+inf.0 re) +;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs +;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs +;; - (make-negR bool nat re) +;; +;; Every re must have an index field globally different from all +;; other re index fields. +(define-struct re (nullable? index) #:inspector (make-inspector)) +(define-struct (epsilonR re) () #:inspector (make-inspector)) +(define-struct (zeroR re) () #:inspector (make-inspector)) +(define-struct (char-setR re) (chars) #:inspector (make-inspector)) +(define-struct (concatR re) (re1 re2) #:inspector (make-inspector)) +(define-struct (repeatR re) (low high re) #:inspector (make-inspector)) +(define-struct (orR re) (res) #:inspector (make-inspector)) +(define-struct (andR re) (res) #:inspector (make-inspector)) +(define-struct (negR re) (re) #:inspector (make-inspector)) - ;; e : re - ;; The unique epsilon re - (define e (make-epsilonR #t (get-index))) +;; e : re +;; The unique epsilon re +(define e (make-epsilonR #t (get-index))) - ;; z : re - ;; The unique zero re - (define z (make-zeroR #f (get-index))) +;; z : re +;; The unique zero re +(define z (make-zeroR #f (get-index))) - ;; s-re = char constant - ;; | string constant (sequence of characters) - ;; | re a precompiled re - ;; | (repetition low high s-re) repetition between low and high times (inclusive) - ;; | (union s-re ...) - ;; | (intersection s-re ...) - ;; | (complement s-re) - ;; | (concatenation s-re ...) - ;; | (char-range rng rng) match any character between two (inclusive) - ;; | (char-complement char-set) match any character not listed - ;; low = natural-number - ;; high = natural-number or +inf.0 - ;; rng = char or string with length 1 - ;; (concatenation) (repetition 0 0 x), and "" match the empty string. - ;; (union) matches no strings. - ;; (intersection) matches any string. +;; s-re = char constant +;; | string constant (sequence of characters) +;; | re a precompiled re +;; | (repetition low high s-re) repetition between low and high times (inclusive) +;; | (union s-re ...) +;; | (intersection s-re ...) +;; | (complement s-re) +;; | (concatenation s-re ...) +;; | (char-range rng rng) match any character between two (inclusive) +;; | (char-complement char-set) match any character not listed +;; low = natural-number +;; high = natural-number or +inf.0 +;; rng = char or string with length 1 +;; (concatenation) (repetition 0 0 x), and "" match the empty string. +;; (union) matches no strings. +;; (intersection) matches any string. - (define loc:make-range is:make-range) - (define loc:union is:union) - (define loc:split is:split) - (define loc:complement is:complement) +(define loc:make-range is:make-range) +(define loc:union is:union) +(define loc:split is:split) +(define loc:complement is:complement) - ;; ->re : s-re cache -> re - (define (->re exp cache) - (match exp - ((? char?) (build-char-set (loc:make-range (char->integer exp)) cache)) - ((? string?) (->re `(concatenation ,@(string->list exp)) cache)) - ((? re?) exp) - (`(repetition ,low ,high ,r) - (build-repeat low high (->re r cache) cache)) - (`(union ,rs ...) - (build-or (flatten-res (map (lambda (r) (->re r cache)) rs) - orR? orR-res loc:union cache) - cache)) - (`(intersection ,rs ...) - (build-and (flatten-res (map (lambda (r) (->re r cache)) rs) - andR? andR-res (lambda (a b) - (let-values (((i _ __) (loc:split a b))) i)) - cache) - cache)) - (`(complement ,r) - (build-neg (->re r cache) cache)) - (`(concatenation ,rs ...) - (foldr (lambda (x y) - (build-concat (->re x cache) y cache)) - e - rs)) - (`(char-range ,c1 ,c2) - (let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1))) - (i2 (char->integer (if (string? c2) (string-ref c2 0) c2)))) - (if (<= i1 i2) - (build-char-set (loc:make-range i1 i2) cache) - z))) - (`(char-complement ,crs ...) - (let ((cs (->re `(union ,@crs) cache))) - (cond - ((zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)) - ((char-setR? cs) - (build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)) - (else z)))))) +;; ->re : s-re cache -> re +(define (->re exp cache) + (match exp + [(? char?) (build-char-set (loc:make-range (char->integer exp)) cache)] + [(? string?) (->re `(concatenation ,@(string->list exp)) cache)] + [(? re?) exp] + [`(repetition ,low ,high ,r) + (build-repeat low high (->re r cache) cache)] + [`(union ,rs ...) + (build-or (flatten-res (map (λ (r) (->re r cache)) rs) + orR? orR-res loc:union cache) + cache)] + [`(intersection ,rs ...) + (build-and (flatten-res (map (λ (r) (->re r cache)) rs) + andR? andR-res (λ (a b) + (let-values (((i _ __) (loc:split a b))) i)) + cache) + cache)] + [`(complement ,r) (build-neg (->re r cache) cache)] + [`(concatenation ,rs ...) + (foldr (λ (x y) + (build-concat (->re x cache) y cache)) + e + rs)] + [`(char-range ,c1 ,c2) + (let ([i1 (char->integer (if (string? c1) (string-ref c1 0) c1))] + [i2 (char->integer (if (string? c2) (string-ref c2 0) c2))]) + (if (<= i1 i2) + (build-char-set (loc:make-range i1 i2) cache) + z))] + [`(char-complement ,crs ...) + (let ([cs (->re `(union ,@crs) cache)]) + (cond + [(zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)] + [(char-setR? cs) + (build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)] + [else z]))])) - ;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re)) - ;; (char-set char-set -> char-set) cache -> (list-of re) - ;; Takes all the char-sets in l and combines them into one char-set using the combine function. - ;; Flattens out the values of type?. get-res only needs to function on things type? returns - ;; true for. - (define (flatten-res l type? get-res combine cache) - (let loop ((res l) - ;; chars : (union #f char-set) - (chars #f) - (no-chars null)) - (cond - ((null? res) - (if chars - (cons (build-char-set chars cache) no-chars) - no-chars)) - ((char-setR? (car res)) - (if chars - (loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars) - (loop (cdr res) (char-setR-chars (car res)) no-chars))) - ((type? (car res)) - (loop (append (get-res (car res)) (cdr res)) chars no-chars)) - (else (loop (cdr res) chars (cons (car res) no-chars)))))) +;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re)) +;; (char-set char-set -> char-set) cache -> (list-of re) +;; Takes all the char-sets in l and combines them into one char-set using the combine function. +;; Flattens out the values of type?. get-res only needs to function on things type? returns +;; true for. +(define (flatten-res l type? get-res combine cache) + (let loop ([res l] + ;; chars : (union #f char-set) + [chars #f] + [no-chars null]) + (cond + [(null? res) + (if chars + (cons (build-char-set chars cache) no-chars) + no-chars)] + [(char-setR? (car res)) + (if chars + (loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars) + (loop (cdr res) (char-setR-chars (car res)) no-chars))] + [(type? (car res)) + (loop (append (get-res (car res)) (cdr res)) chars no-chars)] + [else (loop (cdr res) chars (cons (car res) no-chars))]))) - ;; build-epsilon : -> re - (define (build-epsilon) e) +;; build-epsilon : -> re +(define (build-epsilon) e) - (define (build-zero) z) +(define (build-zero) z) - (define loc:integer-set-contents is:integer-set-contents) +(define loc:integer-set-contents is:integer-set-contents) - ;; build-char-set : char-set cache -> re - (define (build-char-set cs cache) - (let ((l (loc:integer-set-contents cs))) - (cond - ((null? l) z) - (else - (cache l - (lambda () - (make-char-setR #f (get-index) cs))))))) +;; build-char-set : char-set cache -> re +(define (build-char-set cs cache) + (define l (loc:integer-set-contents cs)) + (cond + [(null? l) z] + [else + (cache l + (λ () + (make-char-setR #f (get-index) cs)))])) - ;; build-concat : re re cache -> re - (define (build-concat r1 r2 cache) - (cond - ((eq? e r1) r2) - ((eq? e r2) r1) - ((or (eq? z r1) (eq? z r2)) z) - (else - (cache (cons 'concat (cons (re-index r1) (re-index r2))) - (lambda () - (make-concatR (and (re-nullable? r1) (re-nullable? r2)) - (get-index) - r1 r2)))))) +;; build-concat : re re cache -> re +(define (build-concat r1 r2 cache) + (cond + [(eq? e r1) r2] + [(eq? e r2) r1] + [(or (eq? z r1) (eq? z r2)) z] + [else + (cache (cons 'concat (cons (re-index r1) (re-index r2))) + (λ () + (make-concatR (and (re-nullable? r1) (re-nullable? r2)) + (get-index) + r1 r2)))])) - ;; build-repeat : nat nat-or-+inf.0 re cache -> re - (define (build-repeat low high r cache) - (let ((low (if (< low 0) 0 low))) - (cond - ((eq? r e) e) - ((and (= 0 low) (or (= 0 high) (eq? z r))) e) - ((and (= 1 low) (= 1 high)) r) - ((and (repeatR? r) - (eqv? (repeatR-high r) +inf.0) - (or (= 0 (repeatR-low r)) - (= 1 (repeatR-low r)))) - (build-repeat (* low (repeatR-low r)) - +inf.0 - (repeatR-re r) - cache)) - (else - (cache (cons 'repeat (cons low (cons high (re-index r)))) - (lambda () - (make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r))))))) +;; build-repeat : nat nat-or-+inf.0 re cache -> re +(define (build-repeat low high r cache) + (let ([low (if (< low 0) 0 low)]) + (cond + [(eq? r e) e] + [(and (= 0 low) (or (= 0 high) (eq? z r))) e] + [(and (= 1 low) (= 1 high)) r] + [(and (repeatR? r) + (eqv? (repeatR-high r) +inf.0) + (or (= 0 (repeatR-low r)) + (= 1 (repeatR-low r)))) + (build-repeat (* low (repeatR-low r)) + +inf.0 + (repeatR-re r) + cache)] + [else + (cache (cons 'repeat (cons low (cons high (re-index r)))) + (λ () + (make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r)))]))) - ;; build-or : (list-of re) cache -> re - (define (build-or rs cache) - (let ((rs - (filter - (lambda (x) (not (eq? x z))) - (do-simple-equiv (replace rs orR? orR-res null) re-index)))) - (cond - ((null? rs) z) - ((null? (cdr rs)) (car rs)) - ((memq (build-neg z cache) rs) (build-neg z cache)) - (else - (cache (cons 'or (map re-index rs)) - (lambda () - (make-orR (ormap re-nullable? rs) (get-index) rs))))))) +;; build-or : (list-of re) cache -> re +(define (build-or rs cache) + (let ([rs + (filter + (λ (x) (not (eq? x z))) + (do-simple-equiv (replace rs orR? orR-res null) re-index))]) + (cond + [(null? rs) z] + [(null? (cdr rs)) (car rs)] + [(memq (build-neg z cache) rs) (build-neg z cache)] + [else + (cache (cons 'or (map re-index rs)) + (λ () + (make-orR (ormap re-nullable? rs) (get-index) rs)))]))) - ;; build-and : (list-of re) cache -> re - (define (build-and rs cache) - (let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index))) - (cond - ((null? rs) (build-neg z cache)) - ((null? (cdr rs)) (car rs)) - ((memq z rs) z) - (else - (cache (cons 'and (map re-index rs)) - (lambda () - (make-andR (andmap re-nullable? rs) (get-index) rs))))))) - - ;; build-neg : re cache -> re - (define (build-neg r cache) +;; build-and : (list-of re) cache -> re +(define (build-and rs cache) + (let ([rs (do-simple-equiv (replace rs andR? andR-res null) re-index)]) (cond - ((negR? r) (negR-re r)) - (else - (cache (cons 'neg (re-index r)) - (lambda () - (make-negR (not (re-nullable? r)) (get-index) r)))))) + [(null? rs) (build-neg z cache)] + [(null? (cdr rs)) (car rs)] + [(memq z rs) z] + [else + (cache (cons 'and (map re-index rs)) + (λ () + (make-andR (andmap re-nullable? rs) (get-index) rs)))]))) + +;; build-neg : re cache -> re +(define (build-neg r cache) + (cond + [(negR? r) (negR-re r)] + [else + (cache (cons 'neg (re-index r)) + (λ () + (make-negR (not (re-nullable? r)) (get-index) r)))])) - ;; Tests for the build-functions - (test-block ((c (make-cache)) - (isc is:integer-set-contents) - (r1 (build-char-set (is:make-range (char->integer #\1)) c)) - (r2 (build-char-set (is:make-range (char->integer #\2)) c)) - (r3 (build-char-set (is:make-range (char->integer #\3)) c)) - (rc (build-concat r1 r2 c)) - (rc2 (build-concat r2 r1 c)) - (rr (build-repeat 0 +inf.0 rc c)) - (ro (build-or `(,rr ,rc ,rr) c)) - (ro2 (build-or `(,rc ,rr ,z) c)) - (ro3 (build-or `(,rr ,rc) c)) - (ro4 (build-or `(,(build-or `(,r1 ,r2) c) - ,(build-or `(,r2 ,r3) c)) c)) - (ra (build-and `(,rr ,rc ,rr) c)) - (ra2 (build-and `(,rc ,rr) c)) - (ra3 (build-and `(,rr ,rc) c)) - (ra4 (build-and `(,(build-and `(,r3 ,r2) c) - ,(build-and `(,r2 ,r1) c)) c)) - (rn (build-neg z c)) - (rn2 (build-neg r1 c))) +;; Tests for the build-functions +(test-block ((c (make-cache)) + (isc is:integer-set-contents) + (r1 (build-char-set (is:make-range (char->integer #\1)) c)) + (r2 (build-char-set (is:make-range (char->integer #\2)) c)) + (r3 (build-char-set (is:make-range (char->integer #\3)) c)) + (rc (build-concat r1 r2 c)) + (rc2 (build-concat r2 r1 c)) + (rr (build-repeat 0 +inf.0 rc c)) + (ro (build-or `(,rr ,rc ,rr) c)) + (ro2 (build-or `(,rc ,rr ,z) c)) + (ro3 (build-or `(,rr ,rc) c)) + (ro4 (build-or `(,(build-or `(,r1 ,r2) c) + ,(build-or `(,r2 ,r3) c)) c)) + (ra (build-and `(,rr ,rc ,rr) c)) + (ra2 (build-and `(,rc ,rr) c)) + (ra3 (build-and `(,rr ,rc) c)) + (ra4 (build-and `(,(build-and `(,r3 ,r2) c) + ,(build-and `(,r2 ,r1) c)) c)) + (rn (build-neg z c)) + (rn2 (build-neg r1 c))) - ((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1)))) - ((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2)))) - ((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3)))) - ((build-char-set (is:make-range) c) z) - ((build-concat r1 e c) r1) - ((build-concat e r1 c) r1) - ((build-concat r1 z c) z) - ((build-concat z r1 c) z) - ((build-concat r1 r2 c) rc) - ((concatR-re1 rc) r1) - ((concatR-re2 rc) r2) - ((concatR-re1 rc2) r2) - ((concatR-re2 rc2) r1) - (ro ro2) - (ro ro3) - (ro4 (build-or `(,r1 ,r2 ,r3) c)) - ((orR-res ro) (list rc rr)) - ((orR-res ro4) (list r1 r2 r3)) - ((build-or null c) z) - ((build-or `(,r1 ,z) c) r1) - ((build-repeat 0 +inf.0 rc c) rr) - ((build-repeat 0 1 z c) e) - ((build-repeat 0 0 rc c) e) - ((build-repeat 0 +inf.0 z c) e) - ((build-repeat -1 +inf.0 z c) e) - ((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c) - (build-repeat 0 +inf.0 rc c)) - ((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c) - (build-repeat 0 +inf.0 rc c)) - ((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c) - (build-repeat 20 +inf.0 rc c)) - ((build-repeat 1 1 rc c) rc) - ((repeatR-re rr) rc) - (ra ra2) - (ra ra3) - (ra4 (build-and `(,r1 ,r2 ,r3) c)) - ((andR-res ra) (list rc rr)) - ((andR-res ra4) (list r1 r2 r3)) - ((build-and null c) (build-neg z c)) - ((build-and `(,r1 ,z) c) z) - ((build-and `(,r1) c) r1) - ((build-neg r1 c) (build-neg r1 c)) - ((build-neg (build-neg r1 c) c) r1) - ((negR-re (build-neg r2 c)) r2) - ((re-nullable? r1) #f) - ((re-nullable? rc) #f) - ((re-nullable? (build-concat rr rr c)) #t) - ((re-nullable? rr) #t) - ((re-nullable? (build-repeat 0 1 rc c)) #t) - ((re-nullable? (build-repeat 1 2 rc c)) #f) - ((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t) - ((re-nullable? ro) #t) - ((re-nullable? (build-or `(,r1 ,r2) c)) #f) - ((re-nullable? (build-and `(,r1 ,e) c)) #f) - ((re-nullable? (build-and `(,rr ,e) c)) #t) - ((re-nullable? (build-neg r1 c)) #t) - ((re-nullable? (build-neg rr c)) #f)) + ((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1)))) + ((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2)))) + ((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3)))) + ((build-char-set (is:make-range) c) z) + ((build-concat r1 e c) r1) + ((build-concat e r1 c) r1) + ((build-concat r1 z c) z) + ((build-concat z r1 c) z) + ((build-concat r1 r2 c) rc) + ((concatR-re1 rc) r1) + ((concatR-re2 rc) r2) + ((concatR-re1 rc2) r2) + ((concatR-re2 rc2) r1) + (ro ro2) + (ro ro3) + (ro4 (build-or `(,r1 ,r2 ,r3) c)) + ((orR-res ro) (list rc rr)) + ((orR-res ro4) (list r1 r2 r3)) + ((build-or null c) z) + ((build-or `(,r1 ,z) c) r1) + ((build-repeat 0 +inf.0 rc c) rr) + ((build-repeat 0 1 z c) e) + ((build-repeat 0 0 rc c) e) + ((build-repeat 0 +inf.0 z c) e) + ((build-repeat -1 +inf.0 z c) e) + ((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c) + (build-repeat 0 +inf.0 rc c)) + ((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c) + (build-repeat 0 +inf.0 rc c)) + ((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c) + (build-repeat 20 +inf.0 rc c)) + ((build-repeat 1 1 rc c) rc) + ((repeatR-re rr) rc) + (ra ra2) + (ra ra3) + (ra4 (build-and `(,r1 ,r2 ,r3) c)) + ((andR-res ra) (list rc rr)) + ((andR-res ra4) (list r1 r2 r3)) + ((build-and null c) (build-neg z c)) + ((build-and `(,r1 ,z) c) z) + ((build-and `(,r1) c) r1) + ((build-neg r1 c) (build-neg r1 c)) + ((build-neg (build-neg r1 c) c) r1) + ((negR-re (build-neg r2 c)) r2) + ((re-nullable? r1) #f) + ((re-nullable? rc) #f) + ((re-nullable? (build-concat rr rr c)) #t) + ((re-nullable? rr) #t) + ((re-nullable? (build-repeat 0 1 rc c)) #t) + ((re-nullable? (build-repeat 1 2 rc c)) #f) + ((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t) + ((re-nullable? ro) #t) + ((re-nullable? (build-or `(,r1 ,r2) c)) #f) + ((re-nullable? (build-and `(,r1 ,e) c)) #f) + ((re-nullable? (build-and `(,rr ,e) c)) #t) + ((re-nullable? (build-neg r1 c)) #t) + ((re-nullable? (build-neg rr c)) #f)) - (test-block ((c (make-cache)) - (isc is:integer-set-contents) - (r1 (->re #\1 c)) - (r2 (->re #\2 c)) - (r3-5 (->re '(char-range #\3 #\5) c)) - (r4 (build-or `(,r1 ,r2) c)) - (r5 (->re `(union ,r3-5 #\7) c)) - (r6 (->re #\6 c))) - ((flatten-res null orR? orR-res is:union c) null) - ((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c)))) - (isc (is:make-range (char->integer #\1)))) - ((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c)))) - (isc (is:make-range (char->integer #\1) (char->integer #\2)))) - ((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) - orR? orR-res is:union c)))) - (isc (is:make-range (char->integer #\1) (char->integer #\7)))) - ((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y) - (let-values (((i _ __) - (is:split x y))) - i)) - c) - (list z))) +(test-block ((c (make-cache)) + (isc is:integer-set-contents) + (r1 (->re #\1 c)) + (r2 (->re #\2 c)) + (r3-5 (->re '(char-range #\3 #\5) c)) + (r4 (build-or `(,r1 ,r2) c)) + (r5 (->re `(union ,r3-5 #\7) c)) + (r6 (->re #\6 c))) + ((flatten-res null orR? orR-res is:union c) null) + ((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c)))) + (isc (is:make-range (char->integer #\1)))) + ((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c)))) + (isc (is:make-range (char->integer #\1) (char->integer #\2)))) + ((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) + orR? orR-res is:union c)))) + (isc (is:make-range (char->integer #\1) (char->integer #\7)))) + ((flatten-res `(,r1 ,r2) andR? andR-res (λ (x y) + (let-values (((i _ __) + (is:split x y))) + i)) + c) + (list z))) - ;; ->re - (test-block ((c (make-cache)) - (isc is:integer-set-contents) - (r (->re #\a c)) - (rr (->re `(concatenation ,r ,r) c)) - (rrr (->re `(concatenation ,r ,rr) c)) - (rrr* (->re `(repetition 0 +inf.0 ,rrr) c))) - ((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a)))) - ((->re "" c) e) - ((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c)) - ((->re r c) r) - ((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c)) - ((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c)) - ((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c)) - ((->re `(repetition 0 1 ,rrr*) c) rrr*) - ((->re `(union (union (char-range #\a #\c) - (char-complement (char-range #\000 #\110) - (char-range #\112 ,(integer->char max-char-num)))) - (union (repetition 0 +inf.0 #\2))) c) - (build-or (list (build-char-set (is:union (is:make-range 73) - (is:make-range 97 99)) - c) - (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) - c)) - ((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c)) - ((->re `(union ,r) c) r) - ((->re `(union) c) z) - ((->re `(intersection (intersection #\111 - (char-complement (char-range #\000 #\110) - (char-range #\112 ,(integer->char max-char-num)))) - (intersection (repetition 0 +inf.0 #\2))) c) - (build-and (list (build-char-set (is:make-range 73) c) - (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) - c)) - ((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110) - (char-range #\112 ,(integer->char max-char-num)))) - (intersection (repetition 0 +inf.0 #\2))) c) - z) - ((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c)) - ((->re `(intersection ,r) c) r) - ((->re `(intersection) c) (build-neg z c)) - ((->re `(complement ,r) c) (build-neg r c)) - ((->re `(concatenation) c) e) - ((->re `(concatenation ,rrr*) c) rrr*) - (rr (build-concat r r c)) - ((->re `(concatenation ,r ,rr ,rrr) c) - (build-concat r (build-concat rr rrr c) c)) - ((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49))) - ((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57))) - ((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49))) - ((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57))) - ((->re `(char-range "9" "1") c) z) - ((isc (char-setR-chars (->re `(char-complement) c))) - (isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c)))) - ((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c))) - (isc (is:make-range 0))) - ) +;; ->re +(test-block ((c (make-cache)) + (isc is:integer-set-contents) + (r (->re #\a c)) + (rr (->re `(concatenation ,r ,r) c)) + (rrr (->re `(concatenation ,r ,rr) c)) + (rrr* (->re `(repetition 0 +inf.0 ,rrr) c))) + ((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a)))) + ((->re "" c) e) + ((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c)) + ((->re r c) r) + ((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c)) + ((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c)) + ((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c)) + ((->re `(repetition 0 1 ,rrr*) c) rrr*) + ((->re `(union (union (char-range #\a #\c) + (char-complement (char-range #\000 #\110) + (char-range #\112 ,(integer->char max-char-num)))) + (union (repetition 0 +inf.0 #\2))) c) + (build-or (list (build-char-set (is:union (is:make-range 73) + (is:make-range 97 99)) + c) + (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) + c)) + ((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c)) + ((->re `(union ,r) c) r) + ((->re `(union) c) z) + ((->re `(intersection (intersection #\111 + (char-complement (char-range #\000 #\110) + (char-range #\112 ,(integer->char max-char-num)))) + (intersection (repetition 0 +inf.0 #\2))) c) + (build-and (list (build-char-set (is:make-range 73) c) + (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) + c)) + ((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110) + (char-range #\112 ,(integer->char max-char-num)))) + (intersection (repetition 0 +inf.0 #\2))) c) + z) + ((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c)) + ((->re `(intersection ,r) c) r) + ((->re `(intersection) c) (build-neg z c)) + ((->re `(complement ,r) c) (build-neg r c)) + ((->re `(concatenation) c) e) + ((->re `(concatenation ,rrr*) c) rrr*) + (rr (build-concat r r c)) + ((->re `(concatenation ,r ,rr ,rrr) c) + (build-concat r (build-concat rr rrr c) c)) + ((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49))) + ((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57))) + ((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49))) + ((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57))) + ((->re `(char-range "9" "1") c) z) + ((isc (char-setR-chars (->re `(char-complement) c))) + (isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c)))) + ((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c))) + (isc (is:make-range 0))) + ) - ) + diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt index 86f7a70..7d887b1 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt @@ -1,30 +1,25 @@ -#lang racket - -(require "util.rkt" - syntax/id-table) - +#lang racket/base +(require "util.rkt" syntax/id-table) (provide parse) (define (bad-args stx num) - (raise-syntax-error - #f - (format "incorrect number of arguments (should have ~a)" num) - stx)) + (raise-syntax-error #f (format "incorrect number of arguments (should have ~a)" num) stx)) ;; char-range-arg: syntax-object syntax-object -> nat ;; If c contains is a character or length 1 string, returns the integer ;; for the character. Otherwise raises a syntax error. (define (char-range-arg stx containing-stx) - (let ((c (syntax-e stx))) - (cond - ((char? c) (char->integer c)) - ((and (string? c) (= (string-length c) 1)) - (char->integer (string-ref c 0))) - (else - (raise-syntax-error - #f - "not a char or single-char string" - containing-stx stx))))) + (define c (syntax-e stx)) + (cond + [(char? c) (char->integer c)] + [(and (string? c) (= (string-length c) 1)) + (char->integer (string-ref c 0))] + [else + (raise-syntax-error + #f + "not a char or single-char string" + containing-stx stx)])) + (module+ test (check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1)) (check-equal? (char-range-arg #'"1" #'here) (char->integer #\1))) @@ -34,187 +29,157 @@ (define (disarm stx) (syntax-disarm stx orig-insp)) - ;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt) - ;; checks for errors and generates the plain s-exp form for s - ;; Expands lex-abbrevs and applies lex-trans. - (define (parse stx disappeared-uses) - (let loop ([stx stx] - [disappeared-uses disappeared-uses] - ;; seen-lex-abbrevs: id-table - [seen-lex-abbrevs (make-immutable-free-id-table)]) - (let ([recur (lambda (s) - (loop (syntax-rearm s stx) - disappeared-uses - seen-lex-abbrevs))] - [recur/abbrev (lambda (s id) - (loop (syntax-rearm s stx) - disappeared-uses - (free-id-table-set seen-lex-abbrevs id id)))]) - (syntax-case (disarm stx) (repetition union intersection complement concatenation - char-range char-complement) - (_ - (identifier? stx) - (let ((expansion (syntax-local-value stx (lambda () #f)))) - (unless (lex-abbrev? expansion) - (raise-syntax-error 'regular-expression - "undefined abbreviation" - stx)) - ;; Check for cycles. - (when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f)) - (raise-syntax-error 'regular-expression - "illegal lex-abbrev cycle detected" - stx - #f - (list (free-id-table-ref seen-lex-abbrevs stx)))) - (set-box! disappeared-uses (cons stx (unbox disappeared-uses))) - (recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))) - (_ - (or (char? (syntax-e stx)) (string? (syntax-e stx))) - (syntax-e stx)) - ((repetition arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 3 (length arg-list)) - (bad-args stx 2)) - (let ((low (syntax-e (car arg-list))) - (high (syntax-e (cadr arg-list))) - (re (caddr arg-list))) - (unless (and (number? low) (exact? low) (integer? low) (>= low 0)) - (raise-syntax-error #f - "not a non-negative exact integer" - stx - (car arg-list))) - (unless (or (and (number? high) (exact? high) (integer? high) (>= high 0)) - (eqv? high +inf.0)) - (raise-syntax-error #f - "not a non-negative exact integer or +inf.0" - stx - (cadr arg-list))) - (unless (<= low high) - (raise-syntax-error - #f - "the first argument is not less than or equal to the second argument" - stx)) - `(repetition ,low ,high ,(recur re))))) - ((union re ...) - `(union ,@(map recur (syntax->list (syntax (re ...)))))) - ((intersection re ...) - `(intersection ,@(map recur (syntax->list (syntax (re ...)))))) - ((complement re ...) - (let ((re-list (syntax->list (syntax (re ...))))) - (unless (= 1 (length re-list)) - (bad-args stx 1)) - `(complement ,(recur (car re-list))))) - ((concatenation re ...) - `(concatenation ,@(map recur (syntax->list (syntax (re ...)))))) - ((char-range arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 2 (length arg-list)) - (bad-args stx 2)) - (let ((i1 (char-range-arg (car arg-list) stx)) - (i2 (char-range-arg (cadr arg-list) stx))) - (if (<= i1 i2) - `(char-range ,(integer->char i1) ,(integer->char i2)) - (raise-syntax-error - #f - "the first argument does not precede or equal second argument" - stx))))) - ((char-complement arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 1 (length arg-list)) - (bad-args stx 1)) - (let ((parsed (recur (car arg-list)))) - (unless (char-set? parsed) - (raise-syntax-error #f - "not a character set" - stx - (car arg-list))) - `(char-complement ,parsed)))) - ((op form ...) - (identifier? (syntax op)) - (let* ((o (syntax op)) - (expansion (syntax-local-value o (lambda () #f)))) - (set-box! disappeared-uses (cons o (unbox disappeared-uses))) - (cond - ((lex-trans? expansion) - (recur ((lex-trans-f expansion) (disarm stx)))) - (expansion - (raise-syntax-error 'regular-expression - "not a lex-trans" - stx)) - (else - (raise-syntax-error 'regular-expression - "undefined operator" - stx))))) - (_ - (raise-syntax-error - 'regular-expression - "not a char, string, identifier, or (op args ...)" - stx)))))) +;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt) +;; checks for errors and generates the plain s-exp form for s +;; Expands lex-abbrevs and applies lex-trans. +(define (parse stx disappeared-uses) + (let loop ([stx stx] + [disappeared-uses disappeared-uses] + ;; seen-lex-abbrevs: id-table + [seen-lex-abbrevs (make-immutable-free-id-table)]) + (let ([recur (λ (s) + (loop (syntax-rearm s stx) + disappeared-uses + seen-lex-abbrevs))] + [recur/abbrev (λ (s id) + (loop (syntax-rearm s stx) + disappeared-uses + (free-id-table-set seen-lex-abbrevs id id)))]) + (syntax-case (disarm stx) (repetition union intersection complement concatenation + char-range char-complement) + [_ + (identifier? stx) + (let ([expansion (syntax-local-value stx (λ () #f))]) + (unless (lex-abbrev? expansion) + (raise-syntax-error 'regular-expression + "undefined abbreviation" + stx)) + ;; Check for cycles. + (when (free-id-table-ref seen-lex-abbrevs stx (λ () #f)) + (raise-syntax-error 'regular-expression + "illegal lex-abbrev cycle detected" + stx + #f + (list (free-id-table-ref seen-lex-abbrevs stx)))) + (set-box! disappeared-uses (cons stx (unbox disappeared-uses))) + (recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))] + [_ + (or (char? (syntax-e stx)) (string? (syntax-e stx))) + (syntax-e stx)] + [(repetition ARG ...) + (let ([arg-list (syntax->list #'(ARG ...))]) + (unless (= 3 (length arg-list)) + (bad-args stx 2)) + (define low (syntax-e (car arg-list))) + (define high (syntax-e (cadr arg-list))) + (define re (caddr arg-list)) + (unless (and (number? low) (exact? low) (integer? low) (>= low 0)) + (raise-syntax-error #f "not a non-negative exact integer" stx (car arg-list))) + (unless (or (and (number? high) (exact? high) (integer? high) (>= high 0)) + (eqv? high +inf.0)) + (raise-syntax-error #f "not a non-negative exact integer or +inf.0" stx (cadr arg-list))) + (unless (<= low high) + (raise-syntax-error #f "the first argument is not less than or equal to the second argument" stx)) + `(repetition ,low ,high ,(recur re)))] + [(union RE ...) + `(union ,@(map recur (syntax->list #'(RE ...))))] + [(intersection RE ...) + `(intersection ,@(map recur (syntax->list #'(RE ...))))] + [(complement RE ...) + (let ([re-list (syntax->list #'(RE ...))]) + (unless (= 1 (length re-list)) + (bad-args stx 1)) + `(complement ,(recur (car re-list))))] + [(concatenation RE ...) + `(concatenation ,@(map recur (syntax->list #'(RE ...))))] + [(char-range ARG ...) + (let ((arg-list (syntax->list #'(ARG ...)))) + (unless (= 2 (length arg-list)) + (bad-args stx 2)) + (let ([i1 (char-range-arg (car arg-list) stx)] + [i2 (char-range-arg (cadr arg-list) stx)]) + (if (<= i1 i2) + `(char-range ,(integer->char i1) ,(integer->char i2)) + (raise-syntax-error #f "the first argument does not precede or equal second argument" stx))))] + [(char-complement ARG ...) + (let ([arg-list (syntax->list #'(ARG ...))]) + (unless (= 1 (length arg-list)) + (bad-args stx 1)) + (define parsed (recur (car arg-list))) + (unless (char-set? parsed) + (raise-syntax-error #f "not a character set" stx (car arg-list))) + `(char-complement ,parsed))] + ((OP form ...) + (identifier? #'OP) + (let* ([expansion (syntax-local-value #'OP (λ () #f))]) + (set-box! disappeared-uses (cons #'OP (unbox disappeared-uses))) + (cond + [(lex-trans? expansion) + (recur ((lex-trans-f expansion) (disarm stx)))] + [expansion + (raise-syntax-error 'regular-expression "not a lex-trans" stx)] + [else + (raise-syntax-error 'regular-expression "undefined operator" stx)]))) + [_ (raise-syntax-error 'regular-expression "not a char, string, identifier, or (op args ...)" stx)])))) - ;; char-set? : s-re -> bool - ;; A char-set is an re that matches only strings of length 1. - ;; char-set? is conservative. - (define (char-set? s-re) - (cond - ((char? s-re) #t) - ((string? s-re) (= (string-length s-re) 1)) - ((list? s-re) - (let ((op (car s-re))) - (case op - ((union intersection) (andmap char-set? (cdr s-re))) - ((char-range char-complement) #t) - ((repetition) - (and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re)))) - ((concatenation) - (and (= 2 (length s-re)) (char-set? (cadr s-re)))) - (else #f)))) - (else #f))) +;; char-set? : s-re -> bool +;; A char-set is an re that matches only strings of length 1. +;; char-set? is conservative. +(define (char-set? s-re) + (cond + [(char? s-re)] + [(string? s-re) (= (string-length s-re) 1)] + [(list? s-re) (case (car s-re) + [(union intersection) (andmap char-set? (cdr s-re))] + [(char-range char-complement) #t] + [(repetition) (and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re)))] + [(concatenation) (and (= 2 (length s-re)) (char-set? (cadr s-re)))] + (else #f))] + [else #f])) - (module+ test - (require rackunit)) - (module+ test - (check-equal? (char-set? #\a) #t) - (check-equal? (char-set? "12") #f) - (check-equal? (char-set? "1") #t) - (check-equal? (char-set? '(repetition 1 2 #\1)) #f) - (check-equal? (char-set? '(repetition 1 1 "12")) #f) - (check-equal? (char-set? '(repetition 1 1 "1")) #t) - (check-equal? (char-set? '(union "1" "2" "3")) #t) - (check-equal? (char-set? '(union "1" "" "3")) #f) - (check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t) - (check-equal? (char-set? '(intersection "1" "")) #f) - (check-equal? (char-set? '(complement "1")) #f) - (check-equal? (char-set? '(concatenation "1" "2")) #f) - (check-equal? (char-set? '(concatenation "" "2")) #f) - (check-equal? (char-set? '(concatenation "1")) #t) - (check-equal? (char-set? '(concatenation "12")) #f) - (check-equal? (char-set? '(char-range #\1 #\2)) #t) - (check-equal? (char-set? '(char-complement #\1)) #t)) +(module+ test + (require rackunit) + (check-equal? (char-set? #\a) #t) + (check-equal? (char-set? "12") #f) + (check-equal? (char-set? "1") #t) + (check-equal? (char-set? '(repetition 1 2 #\1)) #f) + (check-equal? (char-set? '(repetition 1 1 "12")) #f) + (check-equal? (char-set? '(repetition 1 1 "1")) #t) + (check-equal? (char-set? '(union "1" "2" "3")) #t) + (check-equal? (char-set? '(union "1" "" "3")) #f) + (check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t) + (check-equal? (char-set? '(intersection "1" "")) #f) + (check-equal? (char-set? '(complement "1")) #f) + (check-equal? (char-set? '(concatenation "1" "2")) #f) + (check-equal? (char-set? '(concatenation "" "2")) #f) + (check-equal? (char-set? '(concatenation "1")) #t) + (check-equal? (char-set? '(concatenation "12")) #f) + (check-equal? (char-set? '(char-range #\1 #\2)) #t) + (check-equal? (char-set? '(char-complement #\1)) #t)) - ;; yikes... these test cases all have the wrong arity, now. - ;; and by "now", I mean it's been broken since before we - ;; moved to git. - (module+ test - (check-equal? (parse #'#\a null) #\a) - (check-equal? (parse #'"1" null) "1") - (check-equal? (parse #'(repetition 1 1 #\1) null) - '(repetition 1 1 #\1)) - (check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1)) - (check-equal? (parse #'(union #\1 (union "2") (union)) null) - '(union #\1 (union "2") (union))) - (check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)) - null) - '(intersection #\1 (intersection "2") (intersection))) - (check-equal? (parse #'(complement (union #\1 #\2)) - null) - '(complement (union #\1 #\2))) - (check-equal? (parse #'(concatenation "1" "2" (concatenation)) null) - '(concatenation "1" "2" (concatenation))) - (check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1)) - (check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1)) - (check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3)) - (check-equal? (parse #'(char-complement (union "1" "2")) null) - '(char-complement (union "1" "2")))) -; ) +;; yikes... these test cases all have the wrong arity, now. +;; and by "now", I mean it's been broken since before we +;; moved to git. +(module+ test + (check-equal? (parse #'#\a null) #\a) + (check-equal? (parse #'"1" null) "1") + (check-equal? (parse #'(repetition 1 1 #\1) null) + '(repetition 1 1 #\1)) + (check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1)) + (check-equal? (parse #'(union #\1 (union "2") (union)) null) + '(union #\1 (union "2") (union))) + (check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)) + null) + '(intersection #\1 (intersection "2") (intersection))) + (check-equal? (parse #'(complement (union #\1 #\2)) + null) + '(complement (union #\1 #\2))) + (check-equal? (parse #'(concatenation "1" "2" (concatenation)) null) + '(concatenation "1" "2" (concatenation))) + (check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1)) + (check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1)) + (check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3)) + (check-equal? (parse #'(char-complement (union "1" "2")) null) + '(char-complement (union "1" "2")))) diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt index c1f1492..ccb55cf 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt @@ -1,9 +1,7 @@ -(module token-syntax mzscheme +#lang racket/base +(provide make-terminals-def terminals-def-t terminals-def? + make-e-terminals-def e-terminals-def-t e-terminals-def?) - ;; The things needed at compile time to handle definition of tokens - - (provide make-terminals-def terminals-def-t terminals-def? - make-e-terminals-def e-terminals-def-t e-terminals-def?) - (define-struct terminals-def (t)) - (define-struct e-terminals-def (t)) - ) +;; The things needed at compile time to handle definition of tokens +(define-struct terminals-def (t)) +(define-struct e-terminals-def (t)) diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt index 27b3458..07e1fa5 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt @@ -1,92 +1,80 @@ -(module token mzscheme - - (require-for-syntax "token-syntax.rkt") +#lang racket/base +(require (for-syntax racket/base "token-syntax.rkt")) - ;; Defining tokens +;; Defining tokens - (provide define-tokens define-empty-tokens make-token token? - (protect (rename token-name real-token-name)) - (protect (rename token-value real-token-value)) - (rename token-name* token-name) - (rename token-value* token-value) - (struct position (offset line col)) - (struct position-token (token start-pos end-pos)) - (struct srcloc-token (token srcloc))) +(provide define-tokens define-empty-tokens make-token token? + (protect-out (rename-out [token-name real-token-name])) + (protect-out (rename-out [token-value real-token-value])) + (rename-out [token-name* token-name][token-value* token-value]) + (struct-out position) + (struct-out position-token) + (struct-out srcloc-token)) - ;; A token is either - ;; - symbol - ;; - (make-token symbol any) - (define-struct token (name value) (make-inspector)) +;; A token is either +;; - symbol +;; - (make-token symbol any) +(define-struct token (name value) #:inspector (make-inspector)) - ;; token-name*: token -> symbol - (define (token-name* t) - (cond - ((symbol? t) t) - ((token? t) (token-name t)) - (else (raise-type-error - 'token-name - "symbol or struct:token" - 0 - t)))) +;; token-name*: token -> symbol +(define (token-name* t) + (cond + [(symbol? t) t] + [(token? t) (token-name t)] + [else (raise-type-error 'token-name "symbol or struct:token" 0 t)])) - ;; token-value*: token -> any - (define (token-value* t) - (cond - ((symbol? t) #f) - ((token? t) (token-value t)) - (else (raise-type-error - 'token-value - "symbol or struct:token" - 0 - t)))) +;; token-value*: token -> any +(define (token-value* t) + (cond + [(symbol? t) #f] + [(token? t) (token-value t)] + [else (raise-type-error 'token-value "symbol or struct:token" 0 t)])) - (define-for-syntax (make-ctor-name n) - (datum->syntax-object n - (string->symbol (format "token-~a" (syntax-e n))) - n - n)) +(define-for-syntax (make-ctor-name n) + (datum->syntax n + (string->symbol (format "token-~a" (syntax-e n))) + n + n)) - (define-for-syntax (make-define-tokens empty?) - (lambda (stx) - (syntax-case stx () - ((_ name (token ...)) - (andmap identifier? (syntax->list (syntax (token ...)))) - (with-syntax (((marked-token ...) - (map values #;(make-syntax-introducer) - (syntax->list (syntax (token ...)))))) - (quasisyntax/loc stx - (begin - (define-syntax name - #,(if empty? - #'(make-e-terminals-def (quote-syntax (marked-token ...))) - #'(make-terminals-def (quote-syntax (marked-token ...))))) - #,@(map - (lambda (n) - (when (eq? (syntax-e n) 'error) - (raise-syntax-error - #f - "Cannot define a token named error." - stx)) - (if empty? - #`(define (#,(make-ctor-name n)) - '#,n) - #`(define (#,(make-ctor-name n) x) - (make-token '#,n x)))) - (syntax->list (syntax (token ...)))) - #;(define marked-token #f) #;...)))) - ((_ ...) - (raise-syntax-error - #f - "must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))" - stx))))) +(define-for-syntax ((make-define-tokens empty?) stx) + (syntax-case stx () + [(_ NAME (TOKEN ...)) + (andmap identifier? (syntax->list #'(TOKEN ...))) + (with-syntax (((marked-token ...) + (map values #;(make-syntax-introducer) + (syntax->list #'(TOKEN ...))))) + (quasisyntax/loc stx + (begin + (define-syntax NAME + #,(if empty? + #'(make-e-terminals-def (quote-syntax (marked-token ...))) + #'(make-terminals-def (quote-syntax (marked-token ...))))) + #,@(map + (λ (n) + (when (eq? (syntax-e n) 'error) + (raise-syntax-error + #f + "Cannot define a token named error." + stx)) + (if empty? + #`(define (#,(make-ctor-name n)) + '#,n) + #`(define (#,(make-ctor-name n) x) + (make-token '#,n x)))) + (syntax->list #'(TOKEN ...))) + #;(define marked-token #f) #;...)))] + [(_ ...) + (raise-syntax-error #f + "must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))" + stx)])) - (define-syntax define-tokens (make-define-tokens #f)) - (define-syntax define-empty-tokens (make-define-tokens #t)) +(define-syntax define-tokens (make-define-tokens #f)) +(define-syntax define-empty-tokens (make-define-tokens #t)) - (define-struct position (offset line col) #f) - (define-struct position-token (token start-pos end-pos) #f) +(define-struct position (offset line col) #:inspector #f) +(define-struct position-token (token start-pos end-pos) #:inspector #f) - (define-struct srcloc-token (token srcloc) #f) - ) +(define-struct srcloc-token (token srcloc) #:inspector #f) + diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt index c21e88c..d8580b6 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt @@ -1,6 +1,5 @@ -#lang racket - -(require "util.rkt") +#lang racket/base +(require racket/promise "util.rkt") (provide (all-defined-out)) @@ -10,36 +9,33 @@ ;; get-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat)) (define (get-chars-for char-x? mapped-chars) (cond - ((null? mapped-chars) null) - (else - (let* ((range (car mapped-chars)) - (low (car range)) - (high (cadr range)) - (x (char-x? low))) - (cond - ((caddr range) - (if x - (cons (cons low high) - (get-chars-for char-x? (cdr mapped-chars))) - (get-chars-for char-x? (cdr mapped-chars)))) - (else - (let loop ((range-start low) - (i (car range)) - (parity x)) - (cond - ((> i high) - (if parity - (cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars))) - (get-chars-for char-x? (cdr mapped-chars)))) - ((eq? parity (char-x? i)) - (loop range-start (add1 i) parity)) - (parity - (cons (cons range-start (sub1 i)) (loop i (add1 i) #f))) - (else - (loop i (add1 i) #t)))))))))) + [(null? mapped-chars) null] + [else + (define range (car mapped-chars)) + (define low (car range)) + (define high (cadr range)) + (define x (char-x? low)) + (cond + [(caddr range) + (if x + (cons (cons low high) (get-chars-for char-x? (cdr mapped-chars))) + (get-chars-for char-x? (cdr mapped-chars)))] + [else + (let loop ([range-start low] + [i (car range)] + [parity x]) + (cond + [(> i high) + (if parity + (cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars))) + (get-chars-for char-x? (cdr mapped-chars)))] + [(eq? parity (char-x? i)) + (loop range-start (add1 i) parity)] + [parity (cons (cons range-start (sub1 i)) (loop i (add1 i) #f))] + [else (loop i (add1 i) #t)]))])])) (define (compute-ranges x?) - (delay (get-chars-for (lambda (x) (x? (integer->char x))) mapped-chars))) + (delay (get-chars-for (λ (x) (x? (integer->char x))) mapped-chars))) (define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325 (define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405 @@ -61,7 +57,7 @@ (check-equal? (get-chars-for odd? '()) '()) (check-equal? (get-chars-for odd? '((1 4 #f) (8 13 #f))) '((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13))) - (check-equal? (get-chars-for (lambda (x) + (check-equal? (get-chars-for (λ (x) (odd? (quotient x 10))) '((1 5 #t) (17 19 #t) (21 51 #f))) '((17 . 19) (30 . 39) (50 . 51)))) diff --git a/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt b/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt index a7afc54..bbcb447 100644 --- a/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt @@ -1,4 +1,5 @@ -#lang racket +#lang racket/base +(require (for-syntax racket/base)) (provide (all-defined-out)) @@ -10,18 +11,18 @@ (module+ test (require rackunit)) +(define-syntax (test-block stx) + (syntax-case stx () + [(_ defs (code right-ans) ...) + #'(module+ test + (require rackunit) + (let* defs + (let ([real-ans code]) + (check-equal? real-ans right-ans)) ...))])) + #;(define-syntax test-block (syntax-rules () - ((_ defs (code right-ans) ...) - (let* defs - (let ((real-ans code)) - (unless (equal? real-ans right-ans) - (printf "Test failed: ~e gave ~e. Expected ~e\n" - 'code real-ans 'right-ans))) ...)))) - -(define-syntax test-block - (syntax-rules () - ((_ x ...) (void)))) + ((_ x ...) (void)))) ;; A cache is (X ( -> Y) -> Y) @@ -31,23 +32,22 @@ ;; returned. ;; Xs are compared with equal? (define (make-cache) - (let ((table (make-hash))) - (lambda (key build) - (hash-ref table key - (lambda () - (let ((new (build))) - (hash-set! table key new) - new)))))) + (let ([table (make-hash)]) + (λ (key build) + (hash-ref table key (λ () + (let ([new (build)]) + (hash-set! table key new) + new)))))) (module+ test (define cache (make-cache)) - (check-equal? (cache '(s 1 2) (lambda () 9)) 9) - (check-equal? (cache '(s 2 1) (lambda () 8)) 8) - (check-equal? (cache '(s 1 2) (lambda () 1)) 9) + (check-equal? (cache '(s 1 2) (λ () 9)) 9) + (check-equal? (cache '(s 2 1) (λ () 8)) 8) + (check-equal? (cache '(s 1 2) (λ () 1)) 9) (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) - (lambda () 22)) 22) + (λ () 22)) 22) (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) - (lambda () 1)) 22)) + (λ () 1)) 22)) @@ -55,8 +55,8 @@ ;; makes a function that returns a higher number by 1, each time ;; it is called. (define (make-counter) - (let ((counter 0)) - (lambda () + (let ([counter 0]) + (λ () (begin0 counter (set! counter (add1 counter)))))) @@ -76,33 +76,33 @@ ;; previous entry. l must be grouped by indexes. (define (remove-dups l index acc) (cond - ((null? l) (reverse acc)) - ((null? acc) (remove-dups (cdr l) index (cons (car l) acc))) - ((= (index (car acc)) (index (car l))) - (remove-dups (cdr l) index acc)) - (else - (remove-dups (cdr l) index (cons (car l) acc))))) + [(null? l) (reverse acc)] + [(null? acc) (remove-dups (cdr l) index (cons (car l) acc))] + [(= (index (car acc)) (index (car l))) + (remove-dups (cdr l) index acc)] + [else + (remove-dups (cdr l) index (cons (car l) acc))])) (module+ test (check-equal? (remove-dups '((1 2) (2 2) (1 3) (1 4) (100 4) (0 5)) cadr null) - '((1 2) (1 3) (1 4) (0 5))) + '((1 2) (1 3) (1 4) (0 5))) (check-equal? (remove-dups null error null) null)) ;; do-simple-equiv : (list-of X) (X -> nat) -> (list-of X) ;; Sorts l according to index and removes the entries with duplicate ;; indexes. (define (do-simple-equiv l index) - (let ((ordered (sort l (lambda (a b) (< (index a) (index b)))))) - (remove-dups ordered index null))) + (define ordered (sort l (λ (a b) (< (index a) (index b))))) + (remove-dups ordered index null)) (module+ test - (check-equal? (do-simple-equiv '((2 2) (1 4) (1 2) - (100 4) (1 3) (0 5)) - cadr) - '((2 2) (1 3) (1 4) (0 5))) - (check-equal? (do-simple-equiv null error) null)) + (check-equal? (do-simple-equiv '((2 2) (1 4) (1 2) + (100 4) (1 3) (0 5)) + cadr) + '((2 2) (1 3) (1 4) (0 5))) + (check-equal? (do-simple-equiv null error) null)) ;; replace : (list-of X) (X -> bool) (X -> (list-of X)) (list-of X) -> ;; (list-of X) @@ -110,16 +110,16 @@ ;; list. (define (replace l pred? get acc) (cond - ((null? l) acc) - ((pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc))) - (else (replace (cdr l) pred? get (cons (car l) acc))))) + [(null? l) acc] + [(pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc))] + [else (replace (cdr l) pred? get (cons (car l) acc))])) (module+ test - (check-equal? (replace null void (lambda () (list 1)) null) null) + (check-equal? (replace null void (λ () (list 1)) null) null) (check-equal? (replace '(1 2 3 4 3 5) - (lambda (x) (= x 3)) - (lambda (x) (list 1 2 3)) + (λ (x) (= x 3)) + (λ (x) (list 1 2 3)) null) '(5 1 2 3 4 1 2 3 2 1))) diff --git a/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt index ebff00d..07aea77 100644 --- a/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt @@ -1,280 +1,250 @@ +#lang racket/base ;; Constructs to create and access grammars, the internal ;; representation of the input to the parser generator. - -(module grammar mzscheme - (require mzlib/class - mzlib/list - "yacc-helper.rkt" - racket/contract) +(require racket/class + (except-in racket/list remove-duplicates) + "yacc-helper.rkt" + racket/contract) - ;; Each production has a unique index 0 <= index <= number of productions - (define-struct prod (lhs rhs index prec action) (make-inspector)) +;; Each production has a unique index 0 <= index <= number of productions +(define-struct prod (lhs rhs index prec action) #:inspector (make-inspector) #:mutable) - ;; The dot-pos field is the index of the element in the rhs - ;; of prod that the dot immediately precedes. - ;; Thus 0 <= dot-pos <= (vector-length rhs). - (define-struct item (prod dot-pos) (make-inspector)) +;; The dot-pos field is the index of the element in the rhs +;; of prod that the dot immediately precedes. +;; Thus 0 <= dot-pos <= (vector-length rhs). +(define-struct item (prod dot-pos) #:inspector (make-inspector)) - ;; gram-sym = (union term? non-term?) - ;; Each term has a unique index 0 <= index < number of terms - ;; Each non-term has a unique index 0 <= index < number of non-terms - (define-struct term (sym index prec) (make-inspector)) - (define-struct non-term (sym index) (make-inspector)) - - ;; a precedence declaration. - (define-struct prec (num assoc) (make-inspector)) +;; gram-sym = (union term? non-term?) +;; Each term has a unique index 0 <= index < number of terms +;; Each non-term has a unique index 0 <= index < number of non-terms +(define-struct term (sym index prec) #:inspector (make-inspector) #:mutable) +(define-struct non-term (sym index) #:inspector (make-inspector) #:mutable) + +;; a precedence declaration. +(define-struct prec (num assoc) #:inspector (make-inspector)) - (provide/contract - (make-item (prod? (or/c #f natural-number/c) . -> . item?)) - (make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?)) - (make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?)) - (make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?)) - (make-prod (non-term? (vectorof (or/c non-term? term?)) - (or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?))) +(provide/contract + [make-item (prod? (or/c #f natural-number/c) . -> . item?)] + [make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?)] + [make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?)] + [make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?)] + [make-prod (non-term? (vectorof (or/c non-term? term?)) + (or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?)]) - (provide - - - ;; Things that work on items - start-item? item-prod item->string - sym-at-dot move-dot-right itemstring + sym-at-dot move-dot-right itemstring - non-term? term? non-termbit-vector term-index non-term-index + ;; Things that operate on grammar symbols + gram-sym-symbol gram-sym-index term-prec gram-sym->string + non-term? term? non-termbit-vector term-index non-term-index - ;; Things that work on precs - prec-num prec-assoc + ;; Things that work on precs + prec-num prec-assoc - grammar% + grammar% - ;; Things that work on productions - prod-index prod-prec prod-rhs prod-lhs prod-action) + ;; Things that work on productions + prod-index prod-prec prod-rhs prod-lhs prod-action) - ;;---------------------- LR items -------------------------- +;;---------------------- LR items -------------------------- - ;; item bool - ;; Lexicographic comparison on two items. - (define (item bool - ;; The start production always has index 0 - (define (start-item? i) - (= 0 (non-term-index (prod-lhs (item-prod i))))) +;; item bool +;; Lexicographic comparison on two items. +(define (item bool +;; The start production always has index 0 +(define (start-item? i) + (zero? (non-term-index (prod-lhs (item-prod i))))) - ;; move-dot-right: LR-item -> LR-item | #f - ;; moves the dot to the right in the item, unless it is at its - ;; rightmost, then it returns false - (define (move-dot-right i) - (cond - ((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) - (else (make-item (item-prod i) - (add1 (item-dot-pos i)))))) - - ;; sym-at-dot: LR-item -> gram-sym | #f - ;; returns the symbol after the dot in the item or #f if there is none - (define (sym-at-dot i) - (let ((dp (item-dot-pos i)) - (rhs (prod-rhs (item-prod i)))) - (cond - ((= dp (vector-length rhs)) #f) - (else (vector-ref rhs dp))))) +;; move-dot-right: LR-item -> LR-item | #f +;; moves the dot to the right in the item, unless it is at its +;; rightmost, then it returns false +(define (move-dot-right i) + (cond + [(= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f] + [else (make-item (item-prod i) + (add1 (item-dot-pos i)))])) + +;; sym-at-dot: LR-item -> gram-sym | #f +;; returns the symbol after the dot in the item or #f if there is none +(define (sym-at-dot i) + (define dp (item-dot-pos i)) + (define rhs (prod-rhs (item-prod i))) + (cond + [(= dp (vector-length rhs)) #f] + [else (vector-ref rhs dp)])) - ;; print-item: LR-item -> - (define (item->string it) - (let ((print-sym (lambda (i) - (let ((gs (vector-ref (prod-rhs (item-prod it)) i))) - (cond - ((term? gs) (format "~a " (term-sym gs))) - (else (format "~a " (non-term-sym gs)))))))) - (string-append - (format "~a -> " (non-term-sym (prod-lhs (item-prod it)))) - (let loop ((i 0)) - (cond - ((= i (vector-length (prod-rhs (item-prod it)))) - (if (= i (item-dot-pos it)) - ". " - "")) - ((= i (item-dot-pos it)) - (string-append ". " (print-sym i) (loop (add1 i)))) - (else (string-append (print-sym i) (loop (add1 i))))))))) - - ;; --------------------- Grammar Symbols -------------------------- - - (define (non-term +(define (item->string it) + (define print-sym (λ (i) + (let ((gs (vector-ref (prod-rhs (item-prod it)) i))) + (cond + ((term? gs) (format "~a " (term-sym gs))) + (else (format "~a " (non-term-sym gs))))))) + (string-append + (format "~a -> " (non-term-sym (prod-lhs (item-prod it)))) + (let loop ((i 0)) + (cond + [(= i (vector-length (prod-rhs (item-prod it)))) + (if (= i (item-dot-pos it)) + ". " + "")] + [(= i (item-dot-pos it)) + (string-append ". " (print-sym i) (loop (add1 i)))] + [else (string-append (print-sym i) (loop (add1 i)))])))) + +;; --------------------- Grammar Symbols -------------------------- + +(define (non-termstring gs) + (symbol->string (gram-sym-symbol gs))) + +;; term-list->bit-vector: term list -> int +;; Creates a number where the nth bit is 1 if the term with index n is in +;; the list, and whose nth bit is 0 otherwise +(define (term-list->bit-vector terms) + (if (null? terms) + 0 + (bitwise-ior (arithmetic-shift 1 (term-index (car terms))) + (term-list->bit-vector (cdr terms))))) - (define (termstring gs) - (symbol->string (gram-sym-symbol gs))) - ;; term-list->bit-vector: term list -> int - ;; Creates a number where the nth bit is 1 if the term with index n is in - ;; the list, and whose nth bit is 0 otherwise - (define (term-list->bit-vector terms) - (cond - ((null? terms) 0) - (else - (bitwise-ior (arithmetic-shift 1 (term-index (car terms))) (term-list->bit-vector (cdr terms)))))) +;; ------------------------- Grammar ------------------------------ + +(define grammar% + (class object% + (super-instantiate ()) + ;; prods: production list list + ;; where there is one production list per non-term + (init prods) + ;; init-prods: production list + ;; The productions parsing can start from + ;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable + (init-field init-prods terms non-terms end-terms) + ;; list of all productions + (define all-prods (apply append prods)) + (define num-prods (length all-prods)) + (define num-terms (length terms)) + (define num-non-terms (length non-terms)) - ;; ------------------------- Grammar ------------------------------ + (for ([(nt count) (in-indexed non-terms)]) + (set-non-term-index! nt count)) - (define grammar% - (class object% - (super-instantiate ()) - ;; prods: production list list - ;; where there is one production list per non-term - (init prods) - ;; init-prods: production list - ;; The productions parsing can start from - ;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable - (init-field init-prods terms non-terms end-terms) - - ;; list of all productions - (define all-prods (apply append prods)) - (define num-prods (length all-prods)) - (define num-terms (length terms)) - (define num-non-terms (length non-terms)) - - (let ((count 0)) - (for-each - (lambda (nt) - (set-non-term-index! nt count) - (set! count (add1 count))) - non-terms)) - - (let ((count 0)) - (for-each - (lambda (t) - (set-term-index! t count) - (set! count (add1 count))) - terms)) + (for ([(t count) (in-indexed terms)]) + (set-term-index! t count)) - (let ((count 0)) - (for-each - (lambda (prod) - (set-prod-index! prod count) - (set! count (add1 count))) - all-prods)) + (for ([(prod count) (in-indexed all-prods)]) + (set-prod-index! prod count)) - ;; indexed by the index of the non-term - contains the list of productions for that non-term - (define nt->prods - (let ((v (make-vector (length prods) #f))) - (for-each (lambda (prods) - (vector-set! v (non-term-index (prod-lhs (car prods))) prods)) - prods) - v)) + ;; indexed by the index of the non-term - contains the list of productions for that non-term + (define nt->prods + (let ((v (make-vector (length prods) #f))) + (for ([prods (in-list prods)]) + (vector-set! v (non-term-index (prod-lhs (car prods))) prods)) + v)) - (define nullable-non-terms - (nullable all-prods num-non-terms)) + (define nullable-non-terms + (nullable all-prods num-non-terms)) - (define/public (get-num-terms) num-terms) - (define/public (get-num-non-terms) num-non-terms) + (define/public (get-num-terms) num-terms) + (define/public (get-num-non-terms) num-non-terms) - (define/public (get-prods-for-non-term nt) - (vector-ref nt->prods (non-term-index nt))) - (define/public (get-prods) all-prods) - (define/public (get-init-prods) init-prods) + (define/public (get-prods-for-non-term nt) + (vector-ref nt->prods (non-term-index nt))) + (define/public (get-prods) all-prods) + (define/public (get-init-prods) init-prods) - (define/public (get-terms) terms) - (define/public (get-non-terms) non-terms) + (define/public (get-terms) terms) + (define/public (get-non-terms) non-terms) - (define/public (get-num-prods) num-prods) - (define/public (get-end-terms) end-terms) + (define/public (get-num-prods) num-prods) + (define/public (get-end-terms) end-terms) - (define/public (nullable-non-term? nt) - (vector-ref nullable-non-terms (non-term-index nt))) - - (define/public (nullable-after-dot? item) - (let* ((rhs (prod-rhs (item-prod item))) - (prod-length (vector-length rhs))) - (let loop ((i (item-dot-pos item))) - (cond - ((< i prod-length) - (if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i))) - (loop (add1 i)) - #f)) - ((= i prod-length) #t))))) + (define/public (nullable-non-term? nt) + (vector-ref nullable-non-terms (non-term-index nt))) + + (define/public (nullable-after-dot? item) + (define rhs (prod-rhs (item-prod item))) + (define prod-length (vector-length rhs)) + (let loop ((i (item-dot-pos item))) + (cond + [(< i prod-length) + (and (non-term? (vector-ref rhs i)) + (nullable-non-term? (vector-ref rhs i)) + (loop (add1 i)))] + [(= i prod-length)]))) - (define/public (nullable-non-term-thunk) - (lambda (nt) - (nullable-non-term? nt))) - (define/public (nullable-after-dot?-thunk) - (lambda (item) - (nullable-after-dot? item))))) + (define/public (nullable-non-term-thunk) + (λ (nt) (nullable-non-term? nt))) + (define/public (nullable-after-dot?-thunk) + (λ (item) (nullable-after-dot? item))))) - ;; nullable: production list * int -> non-term set - ;; determines which non-terminals can derive epsilon - (define (nullable prods num-nts) - (letrec ((nullable (make-vector num-nts #f)) - (added #f) +;; nullable: production list * int -> non-term set +;; determines which non-terminals can derive epsilon +(define (nullable prods num-nts) + (define nullable (make-vector num-nts #f)) + (define added #f) - ;; possible-nullable: producion list -> production list - ;; Removes all productions that have a terminal - (possible-nullable - (lambda (prods) - (filter (lambda (prod) - (vector-andmap non-term? (prod-rhs prod))) - prods))) + ;; possible-nullable: producion list -> production list + ;; Removes all productions that have a terminal + (define (possible-nullable prods) + (for/list ([prod (in-list prods)] + #:when (vector-andmap non-term? (prod-rhs prod))) + prod)) - ;; set-nullables: production list -> production list - ;; makes one pass through the productions, adding the ones - ;; known to be nullable now to nullable and returning a list - ;; of productions that we don't know about yet. - (set-nullables - (lambda (prods) - (cond - ((null? prods) null) - ((vector-ref nullable - (gram-sym-index (prod-lhs (car prods)))) - (set-nullables (cdr prods))) - ((vector-andmap (lambda (nt) - (vector-ref nullable (gram-sym-index nt))) - (prod-rhs (car prods))) - (vector-set! nullable - (gram-sym-index (prod-lhs (car prods))) - #t) - (set! added #t) - (set-nullables (cdr prods))) - (else - (cons (car prods) - (set-nullables (cdr prods)))))))) - - (let loop ((P (possible-nullable prods))) - (cond - ((null? P) nullable) - (else - (set! added #f) - (let ((new-P (set-nullables P))) - (if added - (loop new-P) - nullable))))))) + ;; set-nullables: production list -> production list + ;; makes one pass through the productions, adding the ones + ;; known to be nullable now to nullable and returning a list + ;; of productions that we don't know about yet. + (define (set-nullables prods) + (cond + [(null? prods) null] + [(vector-ref nullable (gram-sym-index (prod-lhs (car prods)))) + (set-nullables (cdr prods))] + [(vector-andmap (λ (nt) (vector-ref nullable (gram-sym-index nt))) (prod-rhs (car prods))) + (vector-set! nullable (gram-sym-index (prod-lhs (car prods))) #t) + (set! added #t) + (set-nullables (cdr prods))] + [else (cons (car prods) (set-nullables (cdr prods)))])) + (let loop ((P (possible-nullable prods))) + (cond + [(null? P) nullable] + [else + (set! added #f) + (define new-P (set-nullables P)) + (if added + (loop new-P) + nullable)]))) -) + diff --git a/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt index 958acc1..bac6736 100644 --- a/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt @@ -1,61 +1,53 @@ -(module graph mzscheme +#lang racket/base +(provide digraph) - (provide digraph) - - (define (zero-thunk) 0) +(define (zero-thunk) 0) - ;; digraph: - ;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b) - ;; -> ('a -> 'b) - ;; DeRemer and Pennello 1982 - ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} - ;; We use a hash-table to represent the result function 'a -> 'b set, so - ;; the values of type 'a must be comparable with eq?. - (define (digraph nodes edges f- union fail) - (letrec [ - ;; Will map elements of 'a to 'b sets - (results (make-hash-table)) - (f (lambda (x) (hash-table-get results x fail))) - - ;; Maps elements of 'a to integers. - (N (make-hash-table)) - (get-N (lambda (x) (hash-table-get N x zero-thunk))) - (set-N (lambda (x d) (hash-table-put! N x d))) - - (stack null) - (push (lambda (x) - (set! stack (cons x stack)))) - (pop (lambda () - (begin0 - (car stack) - (set! stack (cdr stack))))) - (depth (lambda () (length stack))) +;; digraph: +;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b) +;; -> ('a -> 'b) +;; DeRemer and Pennello 1982 +;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} +;; We use a hash-table to represent the result function 'a -> 'b set, so +;; the values of type 'a must be comparable with eq?. + +(define (digraph nodes edges f- union fail) + (define results (make-hasheq)) + (define (f x) (hash-ref results x fail)) + ;; Maps elements of 'a to integers. + (define N (make-hasheq)) + (define (get-N x) (hash-ref N x zero-thunk)) + (define (set-N x d) (hash-set! N x d)) + (define stack null) + (define (push x) (set! stack (cons x stack))) + (define (pop) (begin0 + (car stack) + (set! stack (cdr stack)))) + (define (depth) (length stack)) - ;; traverse: 'a -> - (traverse - (lambda (x) - (push x) - (let ((d (depth))) - (set-N x d) - (hash-table-put! results x (f- x)) - (for-each (lambda (y) - (if (= 0 (get-N y)) - (traverse y)) - (hash-table-put! results - x - (union (f x) (f y))) - (set-N x (min (get-N x) (get-N y)))) - (edges x)) - (if (= d (get-N x)) - (let loop ((p (pop))) - (set-N p +inf.0) - (hash-table-put! results p (f x)) - (if (not (eq? x p)) - (loop (pop))))))))] - (for-each (lambda (x) - (if (= 0 (get-N x)) - (traverse x))) - nodes) - f)) + ;; traverse: 'a -> + (define (traverse x) + (push x) + (define d (depth)) + (set-N x d) + (hash-set! results x (f- x)) + (for-each (λ (y) + (when (= 0 (get-N y)) + (traverse y)) + (hash-set! results + x + (union (f x) (f y))) + (set-N x (min (get-N x) (get-N y)))) + (edges x)) + (when (= d (get-N x)) + (let loop ([p (pop)]) + (set-N p +inf.0) + (hash-set! results p (f x)) + (when (not (eq? x p)) + (loop (pop)))))) + ;; Will map elements of 'a to 'b sets + (for ([x (in-list nodes)] + #:when (zero? (get-N x))) + (traverse x)) + f) -) diff --git a/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt index 7309f51..fa99240 100644 --- a/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt @@ -1,374 +1,297 @@ -(module input-file-parser mzscheme +#lang racket/base +(require "yacc-helper.rkt" + "../private-lex/token-syntax.rkt" + "grammar.rkt" + racket/class + racket/contract + (for-template racket/base)) + +;; routines for parsing the input to the parser generator and producing a +;; grammar (See grammar.rkt) - ;; routines for parsing the input to the parser generator and producing a - ;; grammar (See grammar.rkt) - - (require "yacc-helper.rkt" - "../private-lex/token-syntax.rkt" - "grammar.rkt" - mzlib/class - racket/contract) - (require-for-template mzscheme) - (define (is-a-grammar%? x) (is-a? x grammar%)) - (provide/contract - (parse-input ((listof identifier?) (listof identifier?) (listof identifier?) - (or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?)) - (get-term-list ((listof identifier?) . -> . (listof identifier?)))) +(provide/contract + [parse-input ((listof identifier?) (listof identifier?) (listof identifier?) + (or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?)] + [get-term-list ((listof identifier?) . -> . (listof identifier?))]) - (define stx-for-original-property (read-syntax #f (open-input-string "original"))) +(define stx-for-original-property (read-syntax #f (open-input-string "original"))) - ;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx))) - (define (get-args i rhs src-pos term-defs) - (let ((empty-table (make-hash-table)) - (biggest-pos #f)) - (hash-table-put! empty-table 'error #t) - (for-each (lambda (td) - (let ((v (syntax-local-value td))) - (if (e-terminals-def? v) - (for-each (lambda (s) - (hash-table-put! empty-table (syntax-object->datum s) #t)) - (syntax->list (e-terminals-def-t v)))))) - term-defs) - (let ([args - (let get-args ((i i) - (rhs rhs)) - (cond - ((null? rhs) null) - (else - (let ((b (car rhs)) - (name (if (hash-table-get empty-table (syntax-object->datum (car rhs)) (lambda () #f)) - (gensym) - (string->symbol (format "$~a" i))))) - (cond - (src-pos - (let ([start-pos-id - (datum->syntax-object b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property)] - [end-pos-id - (datum->syntax-object b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property)]) - (set! biggest-pos (cons start-pos-id end-pos-id)) - `(,(datum->syntax-object b name b stx-for-original-property) - ,start-pos-id - ,end-pos-id - ,@(get-args (add1 i) (cdr rhs))))) - (else - `(,(datum->syntax-object b name b stx-for-original-property) - ,@(get-args (add1 i) (cdr rhs)))))))))]) - (values args biggest-pos)))) +;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx))) +(define (get-args i rhs src-pos term-defs) + (define empty-table (make-hasheq)) + (define biggest-pos #f) + (hash-set! empty-table 'error #t) + (for* ([td (in-list term-defs)] + [v (in-value (syntax-local-value td))] + #:when (e-terminals-def? v) + [s (in-list (syntax->list (e-terminals-def-t v)))]) + (hash-set! empty-table (syntax->datum s) #t)) + (define args + (let get-args ([i i][rhs rhs]) + (cond + [(null? rhs) null] + [else + (define b (car rhs)) + (define name (if (hash-ref empty-table (syntax->datum (car rhs)) #f) + (gensym) + (string->symbol (format "$~a" i)))) + (cond + [src-pos + (define start-pos-id + (datum->syntax b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property)) + (define end-pos-id + (datum->syntax b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property)) + (set! biggest-pos (cons start-pos-id end-pos-id)) + (list* (datum->syntax b name b stx-for-original-property) + start-pos-id + end-pos-id + (get-args (add1 i) (cdr rhs)))] + [else + (list* (datum->syntax b name b stx-for-original-property) + (get-args (add1 i) (cdr rhs)))])]))) + (values args biggest-pos)) - ;; Given the list of terminal symbols and the precedence/associativity definitions, - ;; builds terminal structures (See grammar.rkt) - ;; build-terms: symbol list * symbol list list -> term list - (define (build-terms term-list precs) - (let ((counter 0) - - ;;(term-list (cons (gensym) term-list)) - - ;; Will map a terminal symbol to its precedence/associativity - (prec-table (make-hash-table))) - - ;; Fill the prec table - (for-each - (lambda (p-decl) - (begin0 - (let ((assoc (car p-decl))) - (for-each - (lambda (term-sym) - (hash-table-put! prec-table term-sym (make-prec counter assoc))) - (cdr p-decl))) - (set! counter (add1 counter)))) - precs) +;; Given the list of terminal symbols and the precedence/associativity definitions, +;; builds terminal structures (See grammar.rkt) +;; build-terms: symbol list * symbol list list -> term list +(define (build-terms term-list precs) + (define counter 0) + ;;(term-list (cons (gensym) term-list)) + ;; Will map a terminal symbol to its precedence/associativity + (define prec-table (make-hasheq)) + + ;; Fill the prec table + (for ([p-decl (in-list precs)]) + (define assoc (car p-decl)) + (for ([term-sym (in-list (cdr p-decl))]) + (hash-set! prec-table term-sym (make-prec counter assoc))) + (set! counter (add1 counter))) - ;; Build the terminal structures - (map - (lambda (term-sym) - (make-term term-sym - #f - (hash-table-get prec-table term-sym (lambda () #f)))) - term-list))) + ;; Build the terminal structures + (for/list ([term-sym (in-list term-list)]) + (make-term term-sym + #f + (hash-ref prec-table term-sym (λ () #f))))) - ;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt) - ;; get-terms-from-def: identifier? -> (listof identifier?) - (define (get-terms-from-def term-syn) - (let ((t (syntax-local-value term-syn (lambda () #f)))) - (cond - ((terminals-def? t) (syntax->list (terminals-def-t t))) - ((e-terminals-def? t) (syntax->list (e-terminals-def-t t))) - (else - (raise-syntax-error - 'parser-tokens - "undefined token group" - term-syn))))) +;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt) +;; get-terms-from-def: identifier? -> (listof identifier?) +(define (get-terms-from-def term-syn) + (define t (syntax-local-value term-syn #f)) + (cond + [(terminals-def? t) (syntax->list (terminals-def-t t))] + [(e-terminals-def? t) (syntax->list (e-terminals-def-t t))] + [else + (raise-syntax-error + 'parser-tokens + "undefined token group" + term-syn)])) - (define (get-term-list term-group-names) - (remove-duplicates - (cons (datum->syntax-object #f 'error) - (apply append - (map get-terms-from-def term-group-names))))) +(define (get-term-list term-group-names) + (remove-duplicates + (cons (datum->syntax #f 'error) + (apply append (map get-terms-from-def term-group-names))))) - (define (parse-input term-defs start ends prec-decls prods src-pos) - (let* ((start-syms (map syntax-e start)) - - (list-of-terms (map syntax-e (get-term-list term-defs))) - - (end-terms - (map - (lambda (end) - (unless (memq (syntax-e end) list-of-terms) - (raise-syntax-error - 'parser-end-tokens - (format "End token ~a not defined as a token" - (syntax-e end)) - end)) - (syntax-e end)) - ends)) - - ;; Get the list of terminals out of input-terms - - (list-of-non-terms - (syntax-case prods () - (((non-term production ...) ...) - (begin - (for-each - (lambda (nts) - (if (memq (syntax-object->datum nts) list-of-terms) - (raise-syntax-error - 'parser-non-terminals - (format "~a used as both token and non-terminal" - (syntax-object->datum nts)) - nts))) - (syntax->list (syntax (non-term ...)))) - - (let ((dup (duplicate-list? (syntax-object->datum - (syntax (non-term ...)))))) - (if dup - (raise-syntax-error - 'parser-non-terminals - (format "non-terminal ~a defined multiple times" - dup) - prods))) - - (syntax-object->datum (syntax (non-term ...))))) - (_ - (raise-syntax-error - 'parser-grammar - "Grammar must be of the form (grammar (non-terminal productions ...) ...)" - prods)))) - - ;; Check the precedence declarations for errors and turn them into data - (precs - (syntax-case prec-decls () - (((type term ...) ...) - (let ((p-terms - (syntax-object->datum (syntax (term ... ...))))) - (cond - ((duplicate-list? p-terms) => - (lambda (d) +(define (parse-input term-defs start ends prec-decls prods src-pos) + (define start-syms (map syntax-e start)) + (define list-of-terms (map syntax-e (get-term-list term-defs))) + (define end-terms + (for/list ([end (in-list ends)]) + (unless (memq (syntax-e end) list-of-terms) + (raise-syntax-error + 'parser-end-tokens + (format "End token ~a not defined as a token" + (syntax-e end)) + end)) + (syntax-e end))) + ;; Get the list of terminals out of input-terms + (define list-of-non-terms + (syntax-case prods () + [((NON-TERM PRODUCTION ...) ...) + (begin + (for ([nts (in-list (syntax->list #'(NON-TERM ...)))] + #:when (memq (syntax->datum nts) list-of-terms)) + (raise-syntax-error + 'parser-non-terminals + (format "~a used as both token and non-terminal" (syntax->datum nts)) + nts)) + (let ([dup (duplicate-list? (syntax->datum #'(NON-TERM ...)))]) + (when dup + (raise-syntax-error + 'parser-non-terminals + (format "non-terminal ~a defined multiple times" dup) + prods))) + (syntax->datum #'(NON-TERM ...)))] + [_ (raise-syntax-error + 'parser-grammar + "Grammar must be of the form (grammar (non-terminal productions ...) ...)" + prods)])) + ;; Check the precedence declarations for errors and turn them into data + (define precs + (syntax-case prec-decls () + [((TYPE TERM ...) ...) + (let ([p-terms (syntax->datum #'(TERM ... ...))]) + (cond + [(duplicate-list? p-terms) => + (λ (d) + (raise-syntax-error + 'parser-precedences + (format "duplicate precedence declaration for token ~a" d) + prec-decls))] + [else (for ([t (in-list (syntax->list #'(TERM ... ...)))] + #:when (not (memq (syntax->datum t) list-of-terms))) (raise-syntax-error 'parser-precedences - (format "duplicate precedence declaration for token ~a" - d) - prec-decls))) - (else - (for-each - (lambda (a) - (for-each - (lambda (t) - (if (not (memq (syntax-object->datum t) - list-of-terms)) - (raise-syntax-error - 'parser-precedences - (format - "Precedence declared for non-token ~a" - (syntax-object->datum t)) - t))) - (syntax->list a))) - (syntax->list (syntax ((term ...) ...)))) - (for-each - (lambda (type) - (if (not (memq (syntax-object->datum type) - `(left right nonassoc))) - (raise-syntax-error - 'parser-precedences - "Associativity must be left, right or nonassoc" - type))) - (syntax->list (syntax (type ...)))) - (syntax-object->datum prec-decls))))) - (#f null) - (_ - (raise-syntax-error - 'parser-precedences - "Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc" - prec-decls)))) - - (terms (build-terms list-of-terms precs)) - - (non-terms (map (lambda (non-term) (make-non-term non-term #f)) - list-of-non-terms)) - (term-table (make-hash-table)) - (non-term-table (make-hash-table))) - - (for-each (lambda (t) - (hash-table-put! term-table (gram-sym-symbol t) t)) - terms) - - (for-each (lambda (nt) - (hash-table-put! non-term-table (gram-sym-symbol nt) nt)) - non-terms) - - (let* ( - ;; parse-prod: syntax-object -> gram-sym vector - (parse-prod - (lambda (prod-so) - (syntax-case prod-so () - ((prod-rhs-sym ...) - (andmap identifier? (syntax->list prod-so)) - (begin - (for-each (lambda (t) - (if (memq (syntax-object->datum t) end-terms) - (raise-syntax-error - 'parser-production-rhs - (format "~a is an end token and cannot be used in a production" - (syntax-object->datum t)) - t))) - (syntax->list prod-so)) - (list->vector - (map (lambda (s) - (hash-table-get - term-table - (syntax-object->datum s) - (lambda () - (hash-table-get - non-term-table - (syntax-object->datum s) - (lambda () - (raise-syntax-error - 'parser-production-rhs - (format - "~a is not declared as a terminal or non-terminal" - (syntax-object->datum s)) - s)))))) - (syntax->list prod-so))))) - (_ - (raise-syntax-error - 'parser-production-rhs - "production right-hand-side must have form (symbol ...)" - prod-so))))) - - ;; parse-action: syntax-object * syntax-object -> syntax-object - (parse-action - (lambda (rhs act) - (let-values ([(args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs)]) - (let ([act - (if biggest - (with-syntax ([$n-start-pos (datum->syntax-object (car biggest) '$n-start-pos)] - [$n-end-pos (datum->syntax-object (cdr biggest) '$n-end-pos)]) - #`(let ([$n-start-pos #,(car biggest)] - [$n-end-pos #,(cdr biggest)]) - #,act)) - act)]) - (quasisyntax/loc act - (lambda #,args - #,act)))))) - - ;; parse-prod+action: non-term * syntax-object -> production - (parse-prod+action - (lambda (nt prod-so) - (syntax-case prod-so () - ((prod-rhs action) - (let ((p (parse-prod (syntax prod-rhs)))) - (make-prod - nt - p - #f - (let loop ((i (sub1 (vector-length p)))) - (if (>= i 0) - (let ((gs (vector-ref p i))) - (if (term? gs) - (term-prec gs) - (loop (sub1 i)))) - #f)) - (parse-action (syntax prod-rhs) (syntax action))))) - ((prod-rhs (prec term) action) - (identifier? (syntax term)) - (let ((p (parse-prod (syntax prod-rhs)))) - (make-prod - nt - p - #f - (term-prec - (hash-table-get - term-table - (syntax-object->datum (syntax term)) - (lambda () - (raise-syntax-error - 'parser-production-rhs - (format - "unrecognized terminal ~a in precedence declaration" - (syntax-object->datum (syntax term))) - (syntax term))))) - (parse-action (syntax prod-rhs) (syntax action))))) - (_ - (raise-syntax-error + (format "Precedence declared for non-token ~a" (syntax->datum t)) + t)) + (for ([type (in-list (syntax->list #'(TYPE ...)))] + #:unless (memq (syntax->datum type) `(left right nonassoc))) + (raise-syntax-error + 'parser-precedences + "Associativity must be left, right or nonassoc" + type)) + (syntax->datum prec-decls)]))] + [#f null] + [_ (raise-syntax-error + 'parser-precedences + "Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc" + prec-decls)])) + + (define terms (build-terms list-of-terms precs)) + (define non-terms (map (λ (non-term) (make-non-term non-term #f)) + list-of-non-terms)) + (define term-table (make-hasheq)) + (define non-term-table (make-hasheq)) + + (for ([t (in-list terms)]) + (hash-set! term-table (gram-sym-symbol t) t)) + + (for ([nt (in-list non-terms)]) + (hash-set! non-term-table (gram-sym-symbol nt) nt)) + + ;; parse-prod: syntax-object -> gram-sym vector + (define (parse-prod prod-so) + (syntax-case prod-so () + [(PROD-RHS-SYM ...) + (andmap identifier? (syntax->list prod-so)) + (begin + (for ([t (in-list (syntax->list prod-so))] + #:when (memq (syntax->datum t) end-terms)) + (raise-syntax-error + 'parser-production-rhs + (format "~a is an end token and cannot be used in a production" (syntax->datum t)) + t)) + (for/vector ([s (in-list (syntax->list prod-so))]) + (cond + [(hash-ref term-table (syntax->datum s) #f)] + [(hash-ref non-term-table (syntax->datum s) #f)] + [else (raise-syntax-error + 'parser-production-rhs + (format "~a is not declared as a terminal or non-terminal" (syntax->datum s)) + s)])))] + [_ (raise-syntax-error + 'parser-production-rhs + "production right-hand-side must have form (symbol ...)" + prod-so)])) + + ;; parse-action: syntax-object * syntax-object -> syntax-object + (define (parse-action rhs act-in) + (define-values (args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs)) + (define act + (if biggest + (with-syntax ([(CAR-BIGGEST . CDR-BIGGEST) biggest] + [$N-START-POS (datum->syntax (car biggest) '$n-start-pos)] + [$N-END-POS (datum->syntax (cdr biggest) '$n-end-pos)] + [ACT-IN act-in]) + #'(let ([$N-START-POS CAR-BIGGEST] + [$N-END-POS CDR-BIGGEST]) + ACT-IN)) + act-in)) + (with-syntax ([ARGS args][ACT act]) + (syntax/loc #'ACT (λ ARGS ACT)))) + + ;; parse-prod+action: non-term * syntax-object -> production + (define (parse-prod+action nt prod-so) + (syntax-case prod-so () + [(PROD-RHS ACTION) + (let ([p (parse-prod #'PROD-RHS)]) + (make-prod + nt + p + #f + (let loop ([i (sub1 (vector-length p))]) + (if (>= i 0) + (let ([gs (vector-ref p i)]) + (if (term? gs) + (term-prec gs) + (loop (sub1 i)))) + #f)) + (parse-action #'PROD-RHS #'ACTION)))] + [(PROD-RHS (PREC TERM) ACTION) + (identifier? #'TERM) + (let ([p (parse-prod #'PROD-RHS)]) + (make-prod + nt + p + #f + (term-prec + (cond + [(hash-ref term-table (syntax->datum #'TERM) #f)] + [else (raise-syntax-error 'parser-production-rhs - "production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]" - prod-so))))) - - ;; parse-prod-for-nt: syntax-object -> production list - (parse-prods-for-nt - (lambda (prods-so) - (syntax-case prods-so () - ((nt productions ...) - (> (length (syntax->list (syntax (productions ...)))) 0) - (let ((nt (hash-table-get non-term-table - (syntax-object->datum (syntax nt))))) - (map (lambda (p) (parse-prod+action nt p)) - (syntax->list (syntax (productions ...)))))) - (_ - (raise-syntax-error - 'parser-productions - "A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side" - prods-so)))))) - - (for-each - (lambda (sstx ssym) - (unless (memq ssym list-of-non-terms) - (raise-syntax-error - 'parser-start - (format "Start symbol ~a not defined as a non-terminal" ssym) - sstx))) - start start-syms) - - (let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) - (end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) - (parsed-prods (map parse-prods-for-nt (syntax->list prods))) - (start-prods - (map (lambda (start end-non-term) - (list (make-prod start (vector end-non-term) #f #f - (syntax (lambda (x) x))))) - starts end-non-terms)) - (prods - `(,@start-prods - ,@(map - (lambda (end-nt start-sym) - (map - (lambda (end) - (make-prod end-nt - (vector - (hash-table-get non-term-table start-sym) - (hash-table-get term-table end)) - #f - #f - (syntax (lambda (x) x)))) - end-terms)) - end-non-terms start-syms) - ,@parsed-prods))) + (format + "unrecognized terminal ~a in precedence declaration" + (syntax->datum #'TERM)) + #'TERM)])) + (parse-action #'PROD-RHS #'ACTION)))] + [_ (raise-syntax-error + 'parser-production-rhs + "production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]" + prod-so)])) + + ;; parse-prod-for-nt: syntax-object -> production list + (define (parse-prods-for-nt prods-so) + (syntax-case prods-so () + [(NT PRODUCTIONS ...) + (positive? (length (syntax->list #'(PRODUCTIONS ...)))) + (let ([nt (hash-ref non-term-table (syntax->datum #'NT))]) + (map (λ (p) (parse-prod+action nt p)) (syntax->list #'(PRODUCTIONS ...))))] + [_ (raise-syntax-error + 'parser-productions + "A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side" + prods-so)])) + + (for ([sstx (in-list start)] + [ssym (in-list start-syms)] + #:unless (memq ssym list-of-non-terms)) + (raise-syntax-error + 'parser-start + (format "Start symbol ~a not defined as a non-terminal" ssym) + sstx)) + + (define starts (map (λ (x) (make-non-term (gensym) #f)) start-syms)) + (define end-non-terms (map (λ (x) (make-non-term (gensym) #f)) start-syms)) + (define parsed-prods (map parse-prods-for-nt (syntax->list prods))) + (define start-prods (for/list ([start (in-list starts)] + [end-non-term (in-list end-non-terms)]) + (list (make-prod start (vector end-non-term) #f #f #'values)))) + (define new-prods + (append start-prods + (for/list ([end-nt (in-list end-non-terms)] + [start-sym (in-list start-syms)]) + (for/list ([end (in-list end-terms)]) + (make-prod end-nt + (vector + (hash-ref non-term-table start-sym) + (hash-ref term-table end)) + #f + #f + #'values))) + parsed-prods)) - (make-object grammar% - prods - (map car start-prods) - terms - (append starts (append end-non-terms non-terms)) - (map (lambda (term-name) - (hash-table-get term-table term-name)) - end-terms))))))) + (make-object grammar% + new-prods + (map car start-prods) + terms + (append starts (append end-non-terms non-terms)) + (map (λ (term-name) (hash-ref term-table term-name)) end-terms))) diff --git a/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt index e9b4d3b..ed21c37 100644 --- a/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt @@ -1,277 +1,252 @@ -(module lalr mzscheme +#lang racket/base +(require "lr0.rkt" + "grammar.rkt" + racket/list + racket/class) - ;; Compute LALR lookaheads from DeRemer and Pennello 1982 +;; Compute LALR lookaheads from DeRemer and Pennello 1982 - (require "lr0.rkt" - "grammar.rkt" - mzlib/list - mzlib/class) - - (provide compute-LA) +(provide compute-LA) - ;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set) - ;; computes for each state, non-term transition pair, the terminals - ;; which can transition out of the resulting state - ;; output term set is represented in bit-vector form - (define (compute-DR a g) - (lambda (tk) - (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) - (term-list->bit-vector - (filter - (lambda (term) - (send a run-automaton r term)) - (send g get-terms)))))) +;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set) +;; computes for each state, non-term transition pair, the terminals +;; which can transition out of the resulting state +;; output term set is represented in bit-vector form +(define ((compute-DR a g) tk) + (define r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))) + (term-list->bit-vector + (filter (λ (term) (send a run-automaton r term)) (send g get-terms)))) - ;; compute-reads: - ;; LR0-automaton * grammar -> (trans-key -> trans-key list) - (define (compute-reads a g) - (let ((nullable-non-terms - (filter (lambda (nt) (send g nullable-non-term? nt)) - (send g get-non-terms)))) - (lambda (tk) - (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) - (map (lambda (x) (make-trans-key r x)) - (filter (lambda (non-term) (send a run-automaton r non-term)) - nullable-non-terms)))))) +;; compute-reads: +;; LR0-automaton * grammar -> (trans-key -> trans-key list) +(define (compute-reads a g) + (define nullable-non-terms (filter (λ (nt) (send g nullable-non-term? nt)) (send g get-non-terms))) + (λ (tk) + (define r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))) + (for/list ([non-term (in-list nullable-non-terms)] + #:when (send a run-automaton r non-term)) + (make-trans-key r non-term)))) - ;; compute-read: LR0-automaton * grammar -> (trans-key -> term set) - ;; output term set is represented in bit-vector form - (define (compute-read a g) - (let* ((dr (compute-DR a g)) - (reads (compute-reads a g))) - (digraph-tk->terml (send a get-mapped-non-term-keys) - reads - dr - (send a get-num-states)))) - ;; returns the list of all k such that state k transitions to state start on the - ;; transitions in rhs (in order) - (define (run-lr0-backward a rhs dot-pos start num-states) - (let loop ((states (list start)) - (i (sub1 dot-pos))) - (cond - ((< i 0) states) - (else (loop (send a run-automaton-back states (vector-ref rhs i)) - (sub1 i)))))) +;; compute-read: LR0-automaton * grammar -> (trans-key -> term set) +;; output term set is represented in bit-vector form +(define (compute-read a g) + (define dr (compute-DR a g)) + (define reads (compute-reads a g)) + (digraph-tk->terml (send a get-mapped-non-term-keys) + reads + dr + (send a get-num-states))) +;; returns the list of all k such that state k transitions to state start on the +;; transitions in rhs (in order) +(define (run-lr0-backward a rhs dot-pos start num-states) + (let loop ([states (list start)] + [i (sub1 dot-pos)]) + (cond + [(< i 0) states] + [else (loop (send a run-automaton-back states (vector-ref rhs i)) + (sub1 i))]))) - ;; prod->items-for-include: grammar * prod * non-term -> lr0-item list - ;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma) - ;; and gamma =>* epsilon - (define (prod->items-for-include g prod nt) - (let* ((rhs (prod-rhs prod)) - (rhs-l (vector-length rhs))) - (append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l)))) - (list (make-item prod (sub1 rhs-l))) - null) - (let loop ((i (sub1 rhs-l))) - (cond - ((and (> i 0) - (non-term? (vector-ref rhs i)) - (send g nullable-non-term? (vector-ref rhs i))) - (if (eq? nt (vector-ref rhs (sub1 i))) - (cons (make-item prod (sub1 i)) - (loop (sub1 i))) - (loop (sub1 i)))) - (else null)))))) +;; prod->items-for-include: grammar * prod * non-term -> lr0-item list +;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma) +;; and gamma =>* epsilon +(define (prod->items-for-include g prod nt) + (define rhs (prod-rhs prod)) + (define rhs-l (vector-length rhs)) + (append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l)))) + (list (make-item prod (sub1 rhs-l))) + null) + (let loop ([i (sub1 rhs-l)]) + (cond + [(and (> i 0) + (non-term? (vector-ref rhs i)) + (send g nullable-non-term? (vector-ref rhs i))) + (if (eq? nt (vector-ref rhs (sub1 i))) + (cons (make-item prod (sub1 i)) + (loop (sub1 i))) + (loop (sub1 i)))] + [else null])))) - ;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list - ;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list - ;; and gamma =>* epsilon - (define (prod-list->items-for-include g prod-list nt) - (apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list))) +;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list +;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list +;; and gamma =>* epsilon +(define (prod-list->items-for-include g prod-list nt) + (apply append (map (λ (prod) (prod->items-for-include g prod nt)) prod-list))) - ;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list) - (define (compute-includes a g) - (let ((num-states (send a get-num-states)) - (items-for-input-nt (make-vector (send g get-num-non-terms) null))) - (for-each - (lambda (input-nt) - (vector-set! items-for-input-nt (non-term-index input-nt) - (prod-list->items-for-include g (send g get-prods) input-nt))) - (send g get-non-terms)) - (lambda (tk) - (let* ((goal-state (trans-key-st tk)) - (non-term (trans-key-gs tk)) - (items (vector-ref items-for-input-nt (non-term-index non-term)))) - (trans-key-list-remove-dups - (apply append - (map (lambda (item) - (let* ((prod (item-prod item)) - (rhs (prod-rhs prod)) - (lhs (prod-lhs prod))) - (map (lambda (state) - (make-trans-key state lhs)) - (run-lr0-backward a - rhs - (item-dot-pos item) - goal-state - num-states)))) - items))))))) +;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list) +(define (compute-includes a g) + (define num-states (send a get-num-states)) + (define items-for-input-nt (make-vector (send g get-num-non-terms) null)) + (for ([input-nt (in-list (send g get-non-terms))]) + (vector-set! items-for-input-nt (non-term-index input-nt) + (prod-list->items-for-include g (send g get-prods) input-nt))) + (λ (tk) + (define goal-state (trans-key-st tk)) + (define non-term (trans-key-gs tk)) + (define items (vector-ref items-for-input-nt (non-term-index non-term))) + (trans-key-list-remove-dups + (apply append + (for/list ([item (in-list items)]) + (define prod (item-prod item)) + (define rhs (prod-rhs prod)) + (define lhs (prod-lhs prod)) + (map (λ (state) (make-trans-key state lhs)) + (run-lr0-backward a + rhs + (item-dot-pos item) + goal-state + num-states))))))) - ;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list) - (define (compute-lookback a g) - (let ((num-states (send a get-num-states))) - (lambda (state prod) - (map (lambda (k) (make-trans-key k (prod-lhs prod))) - (run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states))))) +;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list) +(define (compute-lookback a g) + (define num-states (send a get-num-states)) + (λ (state prod) + (map (λ (k) (make-trans-key k (prod-lhs prod))) + (run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states)))) - ;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set) - ;; output term set is represented in bit-vector form - (define (compute-follow a g includes) - (let ((read (compute-read a g))) - (digraph-tk->terml (send a get-mapped-non-term-keys) - includes - read - (send a get-num-states)))) +;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set) +;; output term set is represented in bit-vector form +(define (compute-follow a g includes) + (define read (compute-read a g)) + (digraph-tk->terml (send a get-mapped-non-term-keys) + includes + read + (send a get-num-states))) - ;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set - ;; output term set is represented in bit-vector form - (define (compute-LA a g) - (let* ((includes (compute-includes a g)) - (lookback (compute-lookback a g)) - (follow (compute-follow a g includes))) - (lambda (k p) - (let* ((l (lookback k p)) - (f (map follow l))) - (apply bitwise-ior (cons 0 f)))))) +;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set +;; output term set is represented in bit-vector form +(define (compute-LA a g) + (define includes (compute-includes a g)) + (define lookback (compute-lookback a g)) + (define follow (compute-follow a g includes)) + (λ (k p) + (define l (lookback k p)) + (define f (map follow l)) + (apply bitwise-ior (cons 0 f)))) + - (define (print-DR dr a g) - (print-input-st-sym dr "DR" a g print-output-terms)) - (define (print-Read Read a g) - (print-input-st-sym Read "Read" a g print-output-terms)) - (define (print-includes i a g) - (print-input-st-sym i "includes" a g print-output-st-nt)) - (define (print-lookback l a g) - (print-input-st-prod l "lookback" a g print-output-st-nt)) - (define (print-follow f a g) - (print-input-st-sym f "follow" a g print-output-terms)) - (define (print-LA l a g) - (print-input-st-prod l "LA" a g print-output-terms)) +(define (print-DR dr a g) + (print-input-st-sym dr "DR" a g print-output-terms)) +(define (print-Read Read a g) + (print-input-st-sym Read "Read" a g print-output-terms)) +(define (print-includes i a g) + (print-input-st-sym i "includes" a g print-output-st-nt)) +(define (print-lookback l a g) + (print-input-st-prod l "lookback" a g print-output-st-nt)) +(define (print-follow f a g) + (print-input-st-sym f "follow" a g print-output-terms)) +(define (print-LA l a g) + (print-input-st-prod l "LA" a g print-output-terms)) - (define (print-input-st-sym f name a g print-output) - (printf "~a:\n" name) - (send a for-each-state - (lambda (state) - (for-each - (lambda (non-term) - (let ((res (f (make-trans-key state non-term)))) - (if (not (null? res)) - (printf "~a(~a, ~a) = ~a\n" - name - state - (gram-sym-symbol non-term) - (print-output res))))) - (send g get-non-terms)))) - (newline)) +(define (print-input-st-sym f name a g print-output) + (printf "~a:\n" name) + (send a for-each-state + (λ (state) + (for-each + (λ (non-term) + (let ([res (f (make-trans-key state non-term))]) + (when (not (null? res)) + (printf "~a(~a, ~a) = ~a\n" + name + state + (gram-sym-symbol non-term) + (print-output res))))) + (send g get-non-terms)))) + (newline)) - (define (print-input-st-prod f name a g print-output) - (printf "~a:\n" name) - (send a for-each-state - (lambda (state) - (for-each - (lambda (non-term) +(define (print-input-st-prod f name a g print-output) + (printf "~a:\n" name) + (send a for-each-state + (λ (state) (for-each - (lambda (prod) - (let ((res (f state prod))) - (if (not (null? res)) - (printf "~a(~a, ~a) = ~a\n" - name - (kernel-index state) - (prod-index prod) - (print-output res))))) - (send g get-prods-for-non-term non-term))) - (send g get-non-terms))))) + (λ (non-term) + (for-each + (λ (prod) + (let ([res (f state prod)]) + (when (not (null? res)) + (printf "~a(~a, ~a) = ~a\n" + name + (kernel-index state) + (prod-index prod) + (print-output res))))) + (send g get-prods-for-non-term non-term))) + (send g get-non-terms))))) - (define (print-output-terms r) - (map - (lambda (p) - (gram-sym-symbol p)) - r)) +(define (print-output-terms r) + (map gram-sym-symbol r)) - (define (print-output-st-nt r) - (map - (lambda (p) - (list - (kernel-index (trans-key-st p)) - (gram-sym-symbol (trans-key-gs p)))) - r)) +(define (print-output-st-nt r) + (map (λ (p) (list (kernel-index (trans-key-st p)) (gram-sym-symbol (trans-key-gs p)))) r)) - ;; init-tk-map : int -> (vectorof hashtable?) - (define (init-tk-map n) - (let ((v (make-vector n #f))) - (let loop ((i (sub1 (vector-length v)))) - (when (>= i 0) - (vector-set! v i (make-hash-table)) - (loop (sub1 i)))) - v)) +;; init-tk-map : int -> (vectorof hashtable?) +(define (init-tk-map n) + (define v (make-vector n #f)) + (let loop ([i (sub1 (vector-length v))]) + (when (>= i 0) + (vector-set! v i (make-hasheq)) + (loop (sub1 i)))) + v) - ;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int - (define (lookup-tk-map map) - (lambda (tk) - (let ((st (trans-key-st tk)) - (gs (trans-key-gs tk))) - (hash-table-get (vector-ref map (kernel-index st)) - (gram-sym-symbol gs) - (lambda () 0))))) +;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int +(define ((lookup-tk-map map) tk) + (define st (trans-key-st tk)) + (define gs (trans-key-gs tk)) + (hash-ref (vector-ref map (kernel-index st)) + (gram-sym-symbol gs) + (λ () 0))) - ;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int -> - (define (add-tk-map map) - (lambda (tk v) - (let ((st (trans-key-st tk)) - (gs (trans-key-gs tk))) - (hash-table-put! (vector-ref map (kernel-index st)) - (gram-sym-symbol gs) - v)))) +;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int -> +(define ((add-tk-map map) tk v) + (define st (trans-key-st tk)) + (define gs (trans-key-gs tk)) + (hash-set! (vector-ref map (kernel-index st)) + (gram-sym-symbol gs) + v)) - ;; digraph-tk->terml: - ;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int - ;; -> (trans-key -> term list) - ;; DeRemer and Pennello 1982 - ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} - ;; A specialization of digraph in the file graph.rkt - (define (digraph-tk->terml nodes edges f- num-states) - (letrec [ - ;; Will map elements of trans-key to term sets represented as bit vectors - (results (init-tk-map num-states)) +;; digraph-tk->terml: +;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int +;; -> (trans-key -> term list) +;; DeRemer and Pennello 1982 +;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} +;; A specialization of digraph in the file graph.rkt +(define (digraph-tk->terml nodes edges f- num-states) + ;; Will map elements of trans-key to term sets represented as bit vectors + (define results (init-tk-map num-states)) - ;; Maps elements of trans-keys to integers. - (N (init-tk-map num-states)) + ;; Maps elements of trans-keys to integers. + (define N (init-tk-map num-states)) - (get-N (lookup-tk-map N)) - (set-N (add-tk-map N)) - (get-f (lookup-tk-map results)) - (set-f (add-tk-map results)) + (define get-N (lookup-tk-map N)) + (define set-N (add-tk-map N)) + (define get-f (lookup-tk-map results)) + (define set-f (add-tk-map results)) - (stack null) - (push (lambda (x) - (set! stack (cons x stack)))) - (pop (lambda () - (begin0 - (car stack) - (set! stack (cdr stack))))) - (depth (lambda () (length stack))) + (define stack null) + (define (push x) (set! stack (cons x stack))) + (define (pop) (begin0 + (car stack) + (set! stack (cdr stack)))) + (define (depth) (length stack)) + + ;; traverse: 'a -> + (define (traverse x) + (push x) + (let ([d (depth)]) + (set-N x d) + (set-f x (f- x)) + (for-each (λ (y) + (when (= 0 (get-N y)) + (traverse y)) + (set-f x (bitwise-ior (get-f x) (get-f y))) + (set-N x (min (get-N x) (get-N y)))) + (edges x)) + (when (= d (get-N x)) + (let loop ([p (pop)]) + (set-N p +inf.0) + (set-f p (get-f x)) + (unless (equal? x p) + (loop (pop))))))) - ;; traverse: 'a -> - (traverse - (lambda (x) - (push x) - (let ((d (depth))) - (set-N x d) - (set-f x (f- x)) - (for-each (lambda (y) - (when (= 0 (get-N y)) - (traverse y)) - (set-f x (bitwise-ior (get-f x) (get-f y))) - (set-N x (min (get-N x) (get-N y)))) - (edges x)) - (when (= d (get-N x)) - (let loop ((p (pop))) - (set-N p +inf.0) - (set-f p (get-f x)) - (unless (equal? x p) - (loop (pop))))))))] - (for-each (lambda (x) - (when (= 0 (get-N x)) - (traverse x))) - nodes) - get-f)) -) + (for ([x (in-list nodes)] + #:when (zero? (get-N x))) + (traverse x)) + get-f) diff --git a/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt index f237735..a3b1fcc 100644 --- a/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt @@ -1,372 +1,314 @@ -(module lr0 mzscheme +#lang racket/base +(require "grammar.rkt" + "graph.rkt" + racket/list + racket/class) - ;; Handle the LR0 automaton +;; Handle the LR0 automaton - (require "grammar.rkt" - "graph.rkt" - mzlib/list - mzlib/class) - - (provide build-lr0-automaton lr0% - (struct trans-key (st gs)) trans-key-list-remove-dups - kernel-items kernel-index) +(provide build-lr0-automaton lr0% + (struct-out trans-key) trans-key-list-remove-dups + kernel-items kernel-index) - ;; kernel = (make-kernel (LR1-item list) index) - ;; the list must be kept sorted according to item - ;; (vectorof (symbol X hashtable)) - (define (build-transition-table num-states assoc) - (let ((transitions (make-vector num-states #f))) - (let loop ((i (sub1 (vector-length transitions)))) - (when (>= i 0) - (vector-set! transitions i (make-hash-table)) - (loop (sub1 i)))) - (for-each - (lambda (trans-key/kernel) - (let ((tk (car trans-key/kernel))) - (hash-table-put! (vector-ref transitions (kernel-index (trans-key-st tk))) - (gram-sym-symbol (trans-key-gs tk)) - (cdr trans-key/kernel)))) - assoc) - transitions)) +;; build-transition-table : int (listof (cons/c trans-key X) -> +;; (vectorof (symbol X hashtable)) +(define (build-transition-table num-states assoc) + (define transitions (make-vector num-states #f)) + (let loop ([i (sub1 (vector-length transitions))]) + (when (>= i 0) + (vector-set! transitions i (make-hasheq)) + (loop (sub1 i)))) + (for ([trans-key/kernel (in-list assoc)]) + (define tk (car trans-key/kernel)) + (hash-set! (vector-ref transitions (kernel-index (trans-key-st tk))) + (gram-sym-symbol (trans-key-gs tk)) + (cdr trans-key/kernel))) + transitions) - ;; reverse-assoc : (listof (cons/c trans-key? kernel?)) -> - ;; (listof (cons/c trans-key? (listof kernel?))) - (define (reverse-assoc assoc) - (let ((reverse-hash (make-hash-table 'equal)) - (hash-table-add! - (lambda (ht k v) - (hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null))))))) - (for-each - (lambda (trans-key/kernel) - (let ((tk (car trans-key/kernel))) - (hash-table-add! reverse-hash - (make-trans-key (cdr trans-key/kernel) - (trans-key-gs tk)) - (trans-key-st tk)))) - assoc) - (hash-table-map reverse-hash cons))) +;; reverse-assoc : (listof (cons/c trans-key? kernel?)) -> +;; (listof (cons/c trans-key? (listof kernel?))) +(define (reverse-assoc assoc) + (define reverse-hash (make-hash)) + (define (hash-table-add! ht k v) + (hash-set! ht k (cons v (hash-ref ht k (λ () null))))) + (for ([trans-key/kernel (in-list assoc)]) + (define tk (car trans-key/kernel)) + (hash-table-add! reverse-hash + (make-trans-key (cdr trans-key/kernel) + (trans-key-gs tk)) + (trans-key-st tk))) + (hash-map reverse-hash cons)) - ;; kernel-list-remove-duplicates - ;; LR0-automaton = object of class lr0% - (define lr0% - (class object% - (super-instantiate ()) - ;; term-assoc : (listof (cons/c trans-key? kernel?)) - ;; non-term-assoc : (listof (cons/c trans-key? kernel?)) - ;; states : (vectorof kernel?) - ;; epsilons : ??? - (init-field term-assoc non-term-assoc states epsilons) +;; kernel-list-remove-duplicates +;; LR0-automaton = object of class lr0% +(define lr0% + (class object% + (super-instantiate ()) + ;; term-assoc : (listof (cons/c trans-key? kernel?)) + ;; non-term-assoc : (listof (cons/c trans-key? kernel?)) + ;; states : (vectorof kernel?) + ;; epsilons : ??? + (init-field term-assoc non-term-assoc states epsilons) - (define transitions (build-transition-table (vector-length states) - (append term-assoc non-term-assoc))) + (define transitions (build-transition-table (vector-length states) + (append term-assoc non-term-assoc))) - (define reverse-term-assoc (reverse-assoc term-assoc)) - (define reverse-non-term-assoc (reverse-assoc non-term-assoc)) - (define reverse-transitions - (build-transition-table (vector-length states) - (append reverse-term-assoc reverse-non-term-assoc))) + (define reverse-term-assoc (reverse-assoc term-assoc)) + (define reverse-non-term-assoc (reverse-assoc non-term-assoc)) + (define reverse-transitions + (build-transition-table (vector-length states) + (append reverse-term-assoc reverse-non-term-assoc))) - (define mapped-non-terms (map car non-term-assoc)) + (define mapped-non-terms (map car non-term-assoc)) - (define/public (get-mapped-non-term-keys) - mapped-non-terms) + (define/public (get-mapped-non-term-keys) + mapped-non-terms) - (define/public (get-num-states) - (vector-length states)) + (define/public (get-num-states) + (vector-length states)) - (define/public (get-epsilon-trans) - epsilons) + (define/public (get-epsilon-trans) + epsilons) - (define/public (get-transitions) - (append term-assoc non-term-assoc)) + (define/public (get-transitions) + (append term-assoc non-term-assoc)) - ;; for-each-state : (state ->) -> - ;; Iteration over the states in an automaton - (define/public (for-each-state f) - (let ((num-states (vector-length states))) - (let loop ((i 0)) - (if (< i num-states) - (begin - (f (vector-ref states i)) - (loop (add1 i))))))) + ;; for-each-state : (state ->) -> + ;; Iteration over the states in an automaton + (define/public (for-each-state f) + (define num-states (vector-length states)) + (let loop ([i 0]) + (when (< i num-states) + (f (vector-ref states i)) + (loop (add1 i))))) - ;; run-automaton: kernel? gram-sym? -> (union kernel #f) - ;; returns the state reached from state k on input s, or #f when k - ;; has no transition on s - (define/public (run-automaton k s) - (hash-table-get (vector-ref transitions (kernel-index k)) - (gram-sym-symbol s) - (lambda () #f))) + ;; run-automaton: kernel? gram-sym? -> (union kernel #f) + ;; returns the state reached from state k on input s, or #f when k + ;; has no transition on s + (define/public (run-automaton k s) + (hash-ref (vector-ref transitions (kernel-index k)) + (gram-sym-symbol s) + (λ () #f))) - ;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel) - ;; returns the list of states that can reach k by transitioning on s. - (define/public (run-automaton-back k s) - (apply append - (map - (lambda (k) - (hash-table-get (vector-ref reverse-transitions (kernel-index k)) - (gram-sym-symbol s) - (lambda () null))) - k))))) + ;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel) + ;; returns the list of states that can reach k by transitioning on s. + (define/public (run-automaton-back k s) + (for*/list ([k (in-list k)] + [val (in-list (hash-ref (vector-ref reverse-transitions (kernel-index k)) + (gram-sym-symbol s) + (λ () null)))]) + val)))) - (define (union comp (eq? a b) - (define (kernel->string k) - (apply string-append - `("{" ,@(map (lambda (i) (string-append (item->string i) ", ")) - (kernel-items k)) - "}"))) +;; The kernels in the automaton are represented cannonically. +;; That is (equal? a b) <=> (eq? a b) +(define (kernel->string k) + (apply string-append + `("{" ,@(map (λ (i) (string-append (item->string i) ", ")) + (kernel-items k)) + "}"))) - ;; build-LR0-automaton: grammar -> LR0-automaton - ;; Constructs the kernels of the sets of LR(0) items of g - (define (build-lr0-automaton grammar) -; (printf "LR(0) automaton:\n") - (letrec ( - (epsilons (make-hash-table 'equal)) - (grammar-symbols (append (send grammar get-non-terms) - (send grammar get-terms))) - ;; first-non-term: non-term -> non-term list - ;; given a non-terminal symbol C, return those non-terminal - ;; symbols A s.t. C -> An for some string of terminals and - ;; non-terminals n where -> means a rightmost derivation in many - ;; steps. Assumes that each non-term can be reduced to a string - ;; of terms. - (first-non-term - (digraph (send grammar get-non-terms) - (lambda (nt) - (filter non-term? - (map (lambda (prod) - (sym-at-dot (make-item prod 0))) - (send grammar get-prods-for-non-term nt)))) - (lambda (nt) (list nt)) - (union non-term LR0-automaton +;; Constructs the kernels of the sets of LR(0) items of g +(define (build-lr0-automaton grammar) + ; (printf "LR(0) automaton:\n") + (define epsilons (make-hash)) + (define grammar-symbols (append (send grammar get-non-terms) + (send grammar get-terms))) + ;; first-non-term: non-term -> non-term list + ;; given a non-terminal symbol C, return those non-terminal + ;; symbols A s.t. C -> An for some string of terminals and + ;; non-terminals n where -> means a rightmost derivation in many + ;; steps. Assumes that each non-term can be reduced to a string + ;; of terms. + (define first-non-term + (digraph (send grammar get-non-terms) + (λ (nt) + (filter non-term? + (map (λ (prod) (sym-at-dot (make-item prod 0))) + (send grammar get-prods-for-non-term nt)))) + (λ (nt) (list nt)) + (union non-term LR1-item list - ;; Creates a set of items containing i s.t. if A -> n.Xm is in it, - ;; X -> .o is in it too. - (LR0-closure - (lambda (i) - (cond - ((null? i) null) - (else - (let ((next-gsym (sym-at-dot (car i)))) - (cond - ((non-term? next-gsym) - (cons (car i) - (append - (apply append - (map (lambda (non-term) - (map (lambda (x) - (make-item x 0)) - (send grammar - get-prods-for-non-term - non-term))) - (first-non-term next-gsym))) - (LR0-closure (cdr i))))) - (else - (cons (car i) (LR0-closure (cdr i)))))))))) - - - ;; maps trans-keys to kernels - (automaton-term null) - (automaton-non-term null) + ;; closure: LR1-item list -> LR1-item list + ;; Creates a set of items containing i s.t. if A -> n.Xm is in it, + ;; X -> .o is in it too. + (define (LR0-closure i) + (cond + [(null? i) null] + [else + (define next-gsym (sym-at-dot (car i))) + (cond + [(non-term? next-gsym) + (cons (car i) + (append + (for*/list ([non-term (in-list (first-non-term next-gsym))] + [x (in-list (send grammar + get-prods-for-non-term + non-term))]) + (make-item x 0)) + (LR0-closure (cdr i))))] + [else (cons (car i) (LR0-closure (cdr i)))])])) + + ;; maps trans-keys to kernels + (define automaton-term null) + (define automaton-non-term null) - ;; keeps the kernels we have seen, so we can have a unique - ;; list for each kernel - (kernels (make-hash-table 'equal)) - - (counter 0) + ;; keeps the kernels we have seen, so we can have a unique + ;; list for each kernel + (define kernels (make-hash)) + (define counter 0) - ;; goto: LR1-item list -> LR1-item list list - ;; creates new kernels by moving the dot in each item in the - ;; LR0-closure of kernel to the right, and grouping them by - ;; the term/non-term moved over. Returns the kernels not - ;; yet seen, and places the trans-keys into automaton - (goto - (lambda (kernel) - (let ( - ;; maps a gram-syms to a list of items - (table (make-hash-table)) + ;; goto: LR1-item list -> LR1-item list list + ;; creates new kernels by moving the dot in each item in the + ;; LR0-closure of kernel to the right, and grouping them by + ;; the term/non-term moved over. Returns the kernels not + ;; yet seen, and places the trans-keys into automaton + (define (goto kernel) + ;; maps a gram-syms to a list of items + (define table (make-hasheq)) - ;; add-item!: - ;; (symbol (listof item) hashtable) item? -> - ;; adds i into the table grouped with the grammar - ;; symbol following its dot - (add-item! - (lambda (table i) - (let ((gs (sym-at-dot i))) - (cond - (gs - (let ((already - (hash-table-get table - (gram-sym-symbol gs) - (lambda () null)))) - (unless (member i already) - (hash-table-put! table - (gram-sym-symbol gs) - (cons i already))))) - ((= 0 (vector-length (prod-rhs (item-prod i)))) - (let ((current (hash-table-get epsilons - kernel - (lambda () null)))) - (hash-table-put! epsilons - kernel - (cons i current))))))))) - - ;; Group the items of the LR0 closure of the kernel - ;; by the character after the dot - (for-each (lambda (item) - (add-item! table item)) - (LR0-closure (kernel-items kernel))) + ;; add-item!: + ;; (symbol (listof item) hashtable) item? -> + ;; adds i into the table grouped with the grammar + ;; symbol following its dot + (define (add-item! table i) + (define gs (sym-at-dot i)) + (cond + [gs (define already (hash-ref table (gram-sym-symbol gs) (λ () null))) + (unless (member i already) + (hash-set! table (gram-sym-symbol gs) (cons i already)))] + ((zero? (vector-length (prod-rhs (item-prod i)))) + (define current (hash-ref epsilons kernel (λ () null))) + (hash-set! epsilons kernel (cons i current))))) + + ;; Group the items of the LR0 closure of the kernel + ;; by the character after the dot + (for ([item (in-list (LR0-closure (kernel-items kernel)))]) + (add-item! table item)) - ;; each group is a new kernel, with the dot advanced. - ;; sorts the items in a kernel so kernels can be compared - ;; with equal? for using the table kernels to make sure - ;; only one representitive of each kernel is created - (filter - (lambda (x) x) - (map - (lambda (i) - (let* ((gs (car i)) - (items (cadr i)) - (new #f) - (new-kernel (sort - (filter (lambda (x) x) - (map move-dot-right items)) - item ~a on ~a\n" - (kernel->string kernel) - (kernel->string unique-kernel) - (gram-sym-symbol gs)) - (if new - unique-kernel - #f))) - (let loop ((gsyms grammar-symbols)) - (cond - ((null? gsyms) null) - (else - (let ((items (hash-table-get table - (gram-sym-symbol (car gsyms)) - (lambda () null)))) - (cond - ((null? items) (loop (cdr gsyms))) - (else - (cons (list (car gsyms) items) - (loop (cdr gsyms)))))))))))))) + (hash-set! kernels new-kernel k) + k))) + (if (term? gs) + (set! automaton-term (cons (cons (make-trans-key kernel gs) + unique-kernel) + automaton-term)) + (set! automaton-non-term (cons (cons (make-trans-key kernel gs) + unique-kernel) + automaton-non-term))) + #;(printf "~a -> ~a on ~a\n" + (kernel->string kernel) + (kernel->string unique-kernel) + (gram-sym-symbol gs)) + (and new unique-kernel)))) - (starts - (map (lambda (init-prod) (list (make-item init-prod 0))) - (send grammar get-init-prods))) - (startk - (map (lambda (start) - (let ((k (make-kernel start counter))) - (hash-table-put! kernels start k) - (set! counter (add1 counter)) - k)) - starts)) - (new-kernels (make-queue))) + (define starts (map (λ (init-prod) (list (make-item init-prod 0))) + (send grammar get-init-prods))) + (define startk (for/list ([start (in-list starts)]) + (define k (make-kernel start counter)) + (hash-set! kernels start k) + (set! counter (add1 counter)) + k)) + (define new-kernels (make-queue)) + (let loop ([old-kernels startk] + [seen-kernels null]) + (cond + [(and (empty-queue? new-kernels) (null? old-kernels)) + (make-object lr0% automaton-term automaton-non-term + (list->vector (reverse seen-kernels)) epsilons)] + [(null? old-kernels) (loop (deq! new-kernels) seen-kernels)] + [else + (enq! new-kernels (goto (car old-kernels))) + (loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))]))) - (let loop ((old-kernels startk) - (seen-kernels null)) - (cond - ((and (empty-queue? new-kernels) (null? old-kernels)) - (make-object lr0% - automaton-term - automaton-non-term - (list->vector (reverse seen-kernels)) - epsilons)) - ((null? old-kernels) - (loop (deq! new-kernels) seen-kernels)) - (else - (enq! new-kernels (goto (car old-kernels))) - (loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))))))) +(define-struct q (f l) #:inspector (make-inspector) #:mutable) +(define (empty-queue? q) (null? (q-f q))) +(define (make-queue) (make-q null null)) + +(define (enq! q i) + (cond + [(empty-queue? q) + (let ([i (mcons i null)]) + (set-q-l! q i) + (set-q-f! q i))] + [else + (set-mcdr! (q-l q) (mcons i null)) + (set-q-l! q (mcdr (q-l q)))])) + + +(define (deq! q) + (begin0 + (mcar (q-f q)) + (set-q-f! q (mcdr (q-f q))))) - (define-struct q (f l) (make-inspector)) - (define (empty-queue? q) - (null? (q-f q))) - (define (make-queue) - (make-q null null)) - (define (enq! q i) - (if (empty-queue? q) - (let ((i (mcons i null))) - (set-q-l! q i) - (set-q-f! q i)) - (begin - (set-mcdr! (q-l q) (mcons i null)) - (set-q-l! q (mcdr (q-l q)))))) - (define (deq! q) - (begin0 - (mcar (q-f q)) - (set-q-f! q (mcdr (q-f q))))) -) diff --git a/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt index 2a39b36..11e4557 100644 --- a/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt @@ -1,54 +1,54 @@ -(module parser-actions mzscheme - (require "grammar.rkt") - (provide (all-defined-except make-reduce make-reduce*) - (rename make-reduce* make-reduce)) +#lang racket/base +(require "grammar.rkt") +(provide (except-out (all-defined-out) make-reduce make-reduce*) + (rename-out [make-reduce* make-reduce])) - ;; An action is - ;; - (make-shift int) - ;; - (make-reduce prod runtime-action) - ;; - (make-accept) - ;; - (make-goto int) - ;; - (no-action) - ;; A reduce contains a runtime-reduce so that sharing of the reduces can - ;; be easily transferred to sharing of runtime-reduces. +;; An action is +;; - (make-shift int) +;; - (make-reduce prod runtime-action) +;; - (make-accept) +;; - (make-goto int) +;; - (no-action) +;; A reduce contains a runtime-reduce so that sharing of the reduces can +;; be easily transferred to sharing of runtime-reduces. - (define-struct action () (make-inspector)) - (define-struct (shift action) (state) (make-inspector)) - (define-struct (reduce action) (prod runtime-reduce) (make-inspector)) - (define-struct (accept action) () (make-inspector)) - (define-struct (goto action) (state) (make-inspector)) - (define-struct (no-action action) () (make-inspector)) +(define-struct action () #:inspector (make-inspector)) +(define-struct (shift action) (state) #:inspector (make-inspector)) +(define-struct (reduce action) (prod runtime-reduce) #:inspector (make-inspector)) +(define-struct (accept action) () #:inspector (make-inspector)) +(define-struct (goto action) (state) #:inspector (make-inspector)) +(define-struct (no-action action) () #:inspector (make-inspector)) - (define (make-reduce* p) - (make-reduce p - (vector (prod-index p) - (gram-sym-symbol (prod-lhs p)) - (vector-length (prod-rhs p))))) +(define (make-reduce* p) + (make-reduce p + (vector (prod-index p) + (gram-sym-symbol (prod-lhs p)) + (vector-length (prod-rhs p))))) - ;; A runtime-action is - ;; non-negative-int (shift) - ;; (vector int symbol int) (reduce) - ;; 'accept (accept) - ;; negative-int (goto) - ;; #f (no-action) +;; A runtime-action is +;; non-negative-int (shift) +;; (vector int symbol int) (reduce) +;; 'accept (accept) +;; negative-int (goto) +;; #f (no-action) - (define (action->runtime-action a) - (cond - ((shift? a) (shift-state a)) - ((reduce? a) (reduce-runtime-reduce a)) - ((accept? a) 'accept) - ((goto? a) (- (+ (goto-state a) 1))) - ((no-action? a) #f))) +(define (action->runtime-action a) + (cond + [(shift? a) (shift-state a)] + [(reduce? a) (reduce-runtime-reduce a)] + [(accept? a) 'accept] + [(goto? a) (- (+ (goto-state a) 1))] + [(no-action? a) #f])) - (define (runtime-shift? x) (and (integer? x) (>= x 0))) - (define runtime-reduce? vector?) - (define (runtime-accept? x) (eq? x 'accept)) - (define (runtime-goto? x) (and (integer? x) (< x 0))) +(define (runtime-shift? x) (and (integer? x) (>= x 0))) +(define runtime-reduce? vector?) +(define (runtime-accept? x) (eq? x 'accept)) +(define (runtime-goto? x) (and (integer? x) (< x 0))) + +(define runtime-shift-state values) +(define (runtime-reduce-prod-num x) (vector-ref x 0)) +(define (runtime-reduce-lhs x) (vector-ref x 1)) +(define (runtime-reduce-rhs-length x) (vector-ref x 2)) +(define (runtime-goto-state x) (- (+ x 1))) - (define runtime-shift-state values) - (define (runtime-reduce-prod-num x) (vector-ref x 0)) - (define (runtime-reduce-lhs x) (vector-ref x 1)) - (define (runtime-reduce-rhs-length x) (vector-ref x 2)) - (define (runtime-goto-state x) (- (+ x 1))) - ) diff --git a/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt index 1be421c..87638e3 100644 --- a/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt @@ -1,113 +1,103 @@ -(module parser-builder mzscheme +#lang racket/base +(require "input-file-parser.rkt" + "grammar.rkt" + "table.rkt" + racket/class + racket/contract) +(require (for-template racket/base)) - (require "input-file-parser.rkt" - "grammar.rkt" - "table.rkt" - mzlib/class - racket/contract) - (require-for-template mzscheme) +(provide/contract [build-parser (-> string? any/c any/c + (listof identifier?) + (listof identifier?) + (listof identifier?) + (or/c syntax? #f) + syntax? + (values any/c any/c any/c any/c))]) - (provide/contract - (build-parser (-> string? any/c any/c - (listof identifier?) - (listof identifier?) - (listof identifier?) - (or/c syntax? #f) - syntax? - (values any/c any/c any/c any/c)))) - - ;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?) - ;; (union syntax? false/c) syntax?) -> syntax? - (define (fix-check-syntax input-terms start ends assocs prods) - (let* ((term-binders (get-term-list input-terms)) - (get-term-binder - (let ((t (make-hash-table))) - (for-each - (lambda (term) - (hash-table-put! t (syntax-e term) term)) - term-binders) - (lambda (x) - (let ((r (hash-table-get t (syntax-e x) (lambda () #f)))) - (if r - (syntax-local-introduce (datum->syntax-object r (syntax-e x) x x)) - x))))) - (rhs-list - (syntax-case prods () - (((_ rhs ...) ...) - (syntax->list (syntax (rhs ... ...))))))) - (with-syntax (((tmp ...) (map syntax-local-introduce term-binders)) - ((term-group ...) - (map (lambda (tg) - (syntax-property - (datum->syntax-object tg #f) - 'disappeared-use - tg)) - input-terms)) - ((end ...) - (map get-term-binder ends)) - ((start ...) - (map get-term-binder start)) - ((bind ...) - (syntax-case prods () - (((bind _ ...) ...) - (syntax->list (syntax (bind ...)))))) - (((bound ...) ...) - (map - (lambda (rhs) +;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?) +;; (union syntax? false/c) syntax?) -> syntax? +(define (fix-check-syntax input-terms start ends assocs prods) + (define term-binders (get-term-list input-terms)) + (define get-term-binder + (let ([t (make-hasheq)]) + (for ([term (in-list term-binders)]) + (hash-set! t (syntax-e term) term)) + (λ (x) + (define r (hash-ref t (syntax-e x) (λ () #f))) + (if r + (syntax-local-introduce (datum->syntax r (syntax-e x) x x)) + x)))) + (define rhs-list (syntax-case prods () + [((_ RHS ...) ...) (syntax->list #'(RHS ... ...))])) + (with-syntax ([(TMP ...) (map syntax-local-introduce term-binders)] + [(TERM-GROUP ...) + (map (λ (tg) + (syntax-property + (datum->syntax tg #f) + 'disappeared-use + tg)) + input-terms)] + [(END ...) (map get-term-binder ends)] + [(START ...) (map get-term-binder start)] + [(BIND ...) (syntax-case prods () + (((BIND _ ...) ...) + (syntax->list #'(BIND ...))))] + [((BOUND ...) ...) + (map (λ (rhs) (syntax-case rhs () - (((bound ...) (_ pbound) __) + [((BOUND ...) (_ PBOUND) __) (map get-term-binder - (cons (syntax pbound) - (syntax->list (syntax (bound ...)))))) - (((bound ...) _) + (cons #'PBOUND (syntax->list #'(BOUND ...))))] + [((BOUND ...) _) (map get-term-binder - (syntax->list (syntax (bound ...))))))) - rhs-list)) - ((prec ...) - (if assocs - (map get-term-binder - (syntax-case assocs () - (((__ term ...) ...) - (syntax->list (syntax (term ... ...)))))) - null))) - #`(when #f - (let ((bind void) ... (tmp void) ...) - (void bound ... ... term-group ... start ... end ... prec ...)))))) - (require mzlib/list "parser-actions.rkt") - (define (build-parser filename src-pos suppress input-terms start end assocs prods) - (let* ((grammar (parse-input input-terms start end assocs prods src-pos)) - (table (build-table grammar filename suppress)) - (all-tokens (make-hash-table)) - (actions-code - `(vector ,@(map prod-action (send grammar get-prods))))) - (for-each (lambda (term) - (hash-table-put! all-tokens (gram-sym-symbol term) #t)) - (send grammar get-terms)) - #;(let ((num-states (vector-length table)) - (num-gram-syms (+ (send grammar get-num-terms) - (send grammar get-num-non-terms))) - (num-ht-entries (apply + (map length (vector->list table)))) - (num-reduces - (let ((ht (make-hash-table))) - (for-each - (lambda (x) - (when (reduce? x) - (hash-table-put! ht x #t))) - (map cdr (apply append (vector->list table)))) - (length (hash-table-map ht void))))) - (printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n" - num-states num-gram-syms num-ht-entries num-reduces) - (printf "~a -- ~aKB, previously ~aKB\n" - (/ (+ 2 num-states - (* 4 num-states) (* 2 1.5 num-ht-entries) - (* 5 num-reduces)) 256.0) - (/ (+ 2 num-states - (* 4 num-states) (* 2 2.3 num-ht-entries) - (* 5 num-reduces)) 256.0) - (/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0))) - (values table - all-tokens - actions-code - (fix-check-syntax input-terms start end assocs prods)))) - - ) + (syntax->list #'(BOUND ...)))])) + rhs-list)] + [(PREC ...) + (if assocs + (map get-term-binder + (syntax-case assocs () + (((__ TERM ...) ...) + (syntax->list #'(TERM ... ...))))) + null)]) + #`(when #f + (let ((BIND void) ... (TMP void) ...) + (void BOUND ... ... TERM-GROUP ... START ... END ... PREC ...))))) + +(require racket/list "parser-actions.rkt") + +(define (build-parser filename src-pos suppress input-terms start end assocs prods) + (define grammar (parse-input input-terms start end assocs prods src-pos)) + (define table (build-table grammar filename suppress)) + (define all-tokens (make-hasheq)) + (define actions-code `(vector ,@(map prod-action (send grammar get-prods)))) + + (for ([term (in-list (send grammar get-terms))]) + (hash-set! all-tokens (gram-sym-symbol term) #t)) + + #;(let ((num-states (vector-length table)) + (num-gram-syms (+ (send grammar get-num-terms) + (send grammar get-num-non-terms))) + (num-ht-entries (apply + (map length (vector->list table)))) + (num-reduces + (let ((ht (make-hasheq))) + (for-each + (λ (x) + (when (reduce? x) + (hash-set! ht x #t))) + (map cdr (apply append (vector->list table)))) + (length (hash-table-map ht void))))) + (printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n" + num-states num-gram-syms num-ht-entries num-reduces) + (printf "~a -- ~aKB, previously ~aKB\n" + (/ (+ 2 num-states + (* 4 num-states) (* 2 1.5 num-ht-entries) + (* 5 num-reduces)) 256.0) + (/ (+ 2 num-states + (* 4 num-states) (* 2 2.3 num-ht-entries) + (* 5 num-reduces)) 256.0) + (/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0))) + (values table + all-tokens + actions-code + (fix-check-syntax input-terms start end assocs prods))) + diff --git a/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt index f97e4d2..81c9a8d 100644 --- a/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt @@ -1,290 +1,264 @@ -#lang scheme/base +#lang racket/base +(require "grammar.rkt" + "lr0.rkt" + "lalr.rkt" + "parser-actions.rkt" + racket/contract + racket/list + racket/class) - ;; Routine to build the LALR table +;; Routine to build the LALR table - (require "grammar.rkt" - "lr0.rkt" - "lalr.rkt" - "parser-actions.rkt" - racket/contract - mzlib/list - mzlib/class) - (define (is-a-grammar%? x) (is-a? x grammar%)) - (provide/contract - (build-table (-> is-a-grammar%? string? any/c - (vectorof (listof (cons/c (or/c term? non-term?) action?)))))) +(define (is-a-grammar%? x) (is-a? x grammar%)) +(provide/contract + (build-table (-> is-a-grammar%? string? any/c + (vectorof (listof (cons/c (or/c term? non-term?) action?)))))) - ;; A parse-table is (vectorof (listof (cons/c gram-sym? action))) - ;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action)))) +;; A parse-table is (vectorof (listof (cons/c gram-sym? action))) +;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action)))) - ;; make-parse-table : int -> parse-table - (define (make-parse-table num-states) - (make-vector num-states null)) +;; make-parse-table : int -> parse-table +(define (make-parse-table num-states) + (make-vector num-states null)) - ;; table-add!: parse-table nat symbol action -> - (define (table-add! table state-index symbol val) - (vector-set! table state-index (cons (cons symbol val) - (vector-ref table state-index)))) +;; table-add!: parse-table nat symbol action -> +(define (table-add! table state-index symbol val) + (vector-set! table state-index (cons (cons symbol val) + (vector-ref table state-index)))) - ;; group-table : parse-table -> grouped-parse-table - (define (group-table table) - (list->vector - (map - (lambda (state-entry) - (let ((ht (make-hash))) - (for-each - (lambda (gs/actions) - (let ((group (hash-ref ht (car gs/actions) (lambda () null)))) - (unless (member (cdr gs/actions) group) - (hash-set! ht (car gs/actions) (cons (cdr gs/actions) group))))) - state-entry) - (hash-map ht cons))) - (vector->list table)))) +;; group-table : parse-table -> grouped-parse-table +(define (group-table table) + (list->vector + (for/list ([state-entry (in-list (vector->list table))]) + (define ht (make-hasheq)) + (for* ([gs/actions (in-list state-entry)] + [group (in-value (hash-ref ht (car gs/actions) (λ () null)))] + #:unless (member (cdr gs/actions) group)) + (hash-set! ht (car gs/actions) (cons (cdr gs/actions) group))) + (hash-map ht cons)))) - ;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) -> - ;; (vectorof (listof (cons/c gram-sym? Y))) - (define (table-map f table) - (list->vector - (map - (lambda (state-entry) - (map - (lambda (gs/X) - (cons (car gs/X) (f (car gs/X) (cdr gs/X)))) - state-entry)) - (vector->list table)))) +;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) -> +;; (vectorof (listof (cons/c gram-sym? Y))) +(define (table-map f table) + (list->vector + (for/list ([state-entry (in-list (vector->list table))]) + (for/list ([gs/X (in-list state-entry)]) + (cons (car gs/X) (f (car gs/X) (cdr gs/X))))))) - - (define (bit-vector-for-each f bv) - (letrec ((for-each - (lambda (bv number) - (cond - ((= 0 bv) (void)) - ((= 1 (bitwise-and 1 bv)) - (f number) - (for-each (arithmetic-shift bv -1) (add1 number))) - (else (for-each (arithmetic-shift bv -1) (add1 number))))))) - (for-each bv 0))) +(define (bit-vector-for-each f bv) + (let loop ([bv bv] [number 0]) + (cond + [(zero? bv) (void)] + [(= 1 (bitwise-and 1 bv)) + (f number) + (loop (arithmetic-shift bv -1) (add1 number))] + [else (loop (arithmetic-shift bv -1) (add1 number))]))) - ;; print-entry: symbol action output-port -> - ;; prints the action a for lookahead sym to the given port - (define (print-entry sym a port) - (let ((s "\t~a\t\t\t\t\t~a\t~a\n")) - (cond - ((shift? a) - (fprintf port s sym "shift" (shift-state a))) - ((reduce? a) - (fprintf port s sym "reduce" (prod-index (reduce-prod a)))) - ((accept? a) - (fprintf port s sym "accept" "")) - ((goto? a) - (fprintf port s sym "goto" (goto-state a)))))) +;; print-entry: symbol action output-port -> +;; prints the action a for lookahead sym to the given port +(define (print-entry sym a port) + (define s "\t~a\t\t\t\t\t~a\t~a\n") + (cond + [(shift? a) (fprintf port s sym "shift" (shift-state a))] + [(reduce? a) (fprintf port s sym "reduce" (prod-index (reduce-prod a)))] + [(accept? a) (fprintf port s sym "accept" "")] + [(goto? a) (fprintf port s sym "goto" (goto-state a))])) - ;; count: ('a -> bool) * 'a list -> num - ;; counts the number of elements in list that satisfy pred - (define (count pred list) - (cond - ((null? list) 0) - ((pred (car list)) (+ 1 (count pred (cdr list)))) - (else (count pred (cdr list))))) +;; count: ('a -> bool) * 'a list -> num +;; counts the number of elements in list that satisfy pred +(define (count pred list) + (cond + [(null? list) 0] + [(pred (car list)) (+ 1 (count pred (cdr list)))] + [else (count pred (cdr list))])) - ;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port -> - ;; Prints out the parser given by table. - (define (display-parser a grouped-table prods port) - (let* ((SR-conflicts 0) - (RR-conflicts 0)) - (for-each - (lambda (prod) - (fprintf port - "~a\t~a\t=\t~a\n" - (prod-index prod) - (gram-sym-symbol (prod-lhs prod)) - (map gram-sym-symbol (vector->list (prod-rhs prod))))) - prods) - (send a for-each-state - (lambda (state) +;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port -> +;; Prints out the parser given by table. +(define (display-parser a grouped-table prods port) + (define SR-conflicts 0) + (define RR-conflicts 0) + (for ([prod (in-list prods)]) + (fprintf port + "~a\t~a\t=\t~a\n" + (prod-index prod) + (gram-sym-symbol (prod-lhs prod)) + (map gram-sym-symbol (vector->list (prod-rhs prod))))) + + (send a for-each-state + (λ (state) (fprintf port "State ~a\n" (kernel-index state)) - (for-each (lambda (item) - (fprintf port "\t~a\n" (item->string item))) - (kernel-items state)) + (for ([item (in-list (kernel-items state))]) + (fprintf port "\t~a\n" (item->string item))) (newline port) - (for-each - (lambda (gs/action) - (let ((sym (gram-sym-symbol (car gs/action))) - (act (cdr gs/action))) - (cond - ((null? act) (void)) - ((null? (cdr act)) - (print-entry sym (car act) port)) - (else - (fprintf port "begin conflict:\n") - (when (> (count reduce? act) 1) - (set! RR-conflicts (add1 RR-conflicts))) - (when (> (count shift? act) 0) - (set! SR-conflicts (add1 SR-conflicts))) - (map (lambda (x) (print-entry sym x port)) act) - (fprintf port "end conflict\n"))))) - (vector-ref grouped-table (kernel-index state))) + (for ([gs/action (in-list (vector-ref grouped-table (kernel-index state)))]) + (define sym (gram-sym-symbol (car gs/action))) + (define act (cdr gs/action)) + (cond + [(null? act) (void)] + [(null? (cdr act)) + (print-entry sym (car act) port)] + [else + (fprintf port "begin conflict:\n") + (when (> (count reduce? act) 1) + (set! RR-conflicts (add1 RR-conflicts))) + (when (> (count shift? act) 0) + (set! SR-conflicts (add1 SR-conflicts))) + (map (λ (x) (print-entry sym x port)) act) + (fprintf port "end conflict\n")])) (newline port))) - (when (> SR-conflicts 0) - (fprintf port "~a shift/reduce conflict~a\n" - SR-conflicts - (if (= SR-conflicts 1) "" "s"))) - (when (> RR-conflicts 0) - (fprintf port "~a reduce/reduce conflict~a\n" - RR-conflicts - (if (= RR-conflicts 1) "" "s"))))) + (when (> SR-conflicts 0) + (fprintf port "~a shift/reduce conflict~a\n" + SR-conflicts + (if (= SR-conflicts 1) "" "s"))) + (when (> RR-conflicts 0) + (fprintf port "~a reduce/reduce conflict~a\n" + RR-conflicts + (if (= RR-conflicts 1) "" "s")))) - ;; resolve-conflict : (listof action?) -> action? bool bool - (define (resolve-conflict actions) - (cond - ((null? actions) (values (make-no-action) #f #f)) - ((null? (cdr actions)) - (values (car actions) #f #f)) - (else - (let ((SR-conflict? (> (count shift? actions) 0)) - (RR-conflict? (> (count reduce? actions) 1))) - (let loop ((current-guess #f) - (rest actions)) - (cond - ((null? rest) (values current-guess SR-conflict? RR-conflict?)) - ((shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?)) - ((not current-guess) - (loop (car rest) (cdr rest))) - ((and (reduce? (car rest)) - (< (prod-index (reduce-prod (car rest))) - (prod-index (reduce-prod current-guess)))) - (loop (car rest) (cdr rest))) - ((accept? (car rest)) - (eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n") - (loop current-guess (cdr rest))) - (else (loop current-guess (cdr rest))))))))) +;; resolve-conflict : (listof action?) -> action? bool bool +(define (resolve-conflict actions) + (cond + [(null? actions) (values (make-no-action) #f #f)] + [(null? (cdr actions)) (values (car actions) #f #f)] + [else + (define SR-conflict? (> (count shift? actions) 0)) + (define RR-conflict? (> (count reduce? actions) 1)) + (let loop ((current-guess #f) + (rest actions)) + (cond + [(null? rest) (values current-guess SR-conflict? RR-conflict?)] + [(shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?)] + [(not current-guess) (loop (car rest) (cdr rest))] + [(and (reduce? (car rest)) + (< (prod-index (reduce-prod (car rest))) + (prod-index (reduce-prod current-guess)))) + (loop (car rest) (cdr rest))] + [(accept? (car rest)) + (eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n") + (loop current-guess (cdr rest))] + [else (loop current-guess (cdr rest))]))])) - ;; resolve-conflicts : grouped-parse-table bool -> parse-table - (define (resolve-conflicts grouped-table suppress) - (let* ((SR-conflicts 0) - (RR-conflicts 0) - (table (table-map - (lambda (gs actions) - (let-values (((action SR? RR?) - (resolve-conflict actions))) - (when SR? - (set! SR-conflicts (add1 SR-conflicts))) - (when RR? - (set! RR-conflicts (add1 RR-conflicts))) - action)) - grouped-table))) - (unless suppress - (when (> SR-conflicts 0) - (eprintf "~a shift/reduce conflict~a\n" - SR-conflicts - (if (= SR-conflicts 1) "" "s"))) - (when (> RR-conflicts 0) - (eprintf "~a reduce/reduce conflict~a\n" - RR-conflicts - (if (= RR-conflicts 1) "" "s")))) - table)) +;; resolve-conflicts : grouped-parse-table bool -> parse-table +(define (resolve-conflicts grouped-table suppress) + (define SR-conflicts 0) + (define RR-conflicts 0) + (define table (table-map + (λ (gs actions) + (let-values ([(action SR? RR?) + (resolve-conflict actions)]) + (when SR? + (set! SR-conflicts (add1 SR-conflicts))) + (when RR? + (set! RR-conflicts (add1 RR-conflicts))) + action)) + grouped-table)) + (unless suppress + (when (> SR-conflicts 0) + (eprintf "~a shift/reduce conflict~a\n" + SR-conflicts + (if (= SR-conflicts 1) "" "s"))) + (when (> RR-conflicts 0) + (eprintf "~a reduce/reduce conflict~a\n" + RR-conflicts + (if (= RR-conflicts 1) "" "s")))) + table) - ;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action) - ;; Resolves a single shift-reduce conflict, if precedences are in place. - (define (resolve-sr-conflict/prec actions shift-prec) - (let* ((shift (if (shift? (car actions)) - (car actions) - (cadr actions))) - (reduce (if (shift? (car actions)) - (cadr actions) - (car actions))) - (reduce-prec (prod-prec (reduce-prod reduce)))) - (cond - ((and shift-prec reduce-prec) - (cond - ((< (prec-num shift-prec) (prec-num reduce-prec)) - (list reduce)) - ((> (prec-num shift-prec) (prec-num reduce-prec)) - (list shift)) - ((eq? 'left (prec-assoc shift-prec)) - (list reduce)) - ((eq? 'right (prec-assoc shift-prec)) - (list shift)) - (else null))) - (else actions)))) +;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action) +;; Resolves a single shift-reduce conflict, if precedences are in place. +(define (resolve-sr-conflict/prec actions shift-prec) + (define shift (if (shift? (car actions)) + (car actions) + (cadr actions))) + (define reduce (if (shift? (car actions)) + (cadr actions) + (car actions))) + (define reduce-prec (prod-prec (reduce-prod reduce))) + (cond + [(and shift-prec reduce-prec) + (cond + [(< (prec-num shift-prec) (prec-num reduce-prec)) + (list reduce)] + [(> (prec-num shift-prec) (prec-num reduce-prec)) + (list shift)] + [(eq? 'left (prec-assoc shift-prec)) + (list reduce)] + [(eq? 'right (prec-assoc shift-prec)) + (list shift)] + [else null])] + [else actions])) - ;; resolve-prec-conflicts : parse-table -> grouped-parse-table - (define (resolve-prec-conflicts table) - (table-map - (lambda (gs actions) - (cond - ((and (term? gs) - (= 2 (length actions)) - (or (shift? (car actions)) - (shift? (cadr actions)))) - (resolve-sr-conflict/prec actions (term-prec gs))) - (else actions))) - (group-table table))) +;; resolve-prec-conflicts : parse-table -> grouped-parse-table +(define (resolve-prec-conflicts table) + (table-map + (λ (gs actions) + (cond + [(and (term? gs) + (= 2 (length actions)) + (or (shift? (car actions)) + (shift? (cadr actions)))) + (resolve-sr-conflict/prec actions (term-prec gs))] + [else actions])) + (group-table table))) - ;; build-table: grammar string bool -> parse-table - (define (build-table g file suppress) - (let* ((a (build-lr0-automaton g)) - (term-vector (list->vector (send g get-terms))) - (end-terms (send g get-end-terms)) - (table (make-parse-table (send a get-num-states))) - (get-lookahead (compute-LA a g)) - (reduce-cache (make-hash))) +;; build-table: grammar string bool -> parse-table +(define (build-table g file suppress) + (define a (build-lr0-automaton g)) + (define term-vector (list->vector (send g get-terms))) + (define end-terms (send g get-end-terms)) + (define table (make-parse-table (send a get-num-states))) + (define get-lookahead (compute-LA a g)) + (define reduce-cache (make-hash)) + (for ([trans-key/state (in-list (send a get-transitions))]) + (define from-state-index (kernel-index (trans-key-st (car trans-key/state)))) + (define gs (trans-key-gs (car trans-key/state))) + (define to-state (cdr trans-key/state)) + + (table-add! table from-state-index gs + (cond + ((non-term? gs) + (make-goto (kernel-index to-state))) + ((member gs end-terms) + (make-accept)) + (else + (make-shift + (kernel-index to-state)))))) + (send a for-each-state + (λ (state) + (for ([item (in-list (append (hash-ref (send a get-epsilon-trans) state (λ () null)) + (filter (λ (item) + (not (move-dot-right item))) + (kernel-items state))))]) + (let ([item-prod (item-prod item)]) + (bit-vector-for-each + (λ (term-index) + (unless (start-item? item) + (let ((r (hash-ref reduce-cache item-prod + (λ () + (let ((r (make-reduce item-prod))) + (hash-set! reduce-cache item-prod r) + r))))) + (table-add! table + (kernel-index state) + (vector-ref term-vector term-index) + r)))) + (get-lookahead state item-prod)))))) - (for-each - (lambda (trans-key/state) - (let ((from-state-index (kernel-index (trans-key-st (car trans-key/state)))) - (gs (trans-key-gs (car trans-key/state))) - (to-state (cdr trans-key/state))) - (table-add! table from-state-index gs - (cond - ((non-term? gs) - (make-goto (kernel-index to-state))) - ((member gs end-terms) - (make-accept)) - (else - (make-shift - (kernel-index to-state))))))) - (send a get-transitions)) - - (send a for-each-state - (lambda (state) - (for-each - (lambda (item) - (let ((item-prod (item-prod item))) - (bit-vector-for-each - (lambda (term-index) - (unless (start-item? item) - (let ((r (hash-ref reduce-cache item-prod - (lambda () - (let ((r (make-reduce item-prod))) - (hash-set! reduce-cache item-prod r) - r))))) - (table-add! table - (kernel-index state) - (vector-ref term-vector term-index) - r)))) - (get-lookahead state item-prod)))) - (append (hash-ref (send a get-epsilon-trans) state (lambda () null)) - (filter (lambda (item) - (not (move-dot-right item))) - (kernel-items state)))))) - - (let ((grouped-table (resolve-prec-conflicts table))) - (unless (string=? file "") - (with-handlers [(exn:fail:filesystem? - (lambda (e) - (eprintf - "Cannot write debug output to file \"~a\": ~a\n" - file - (exn-message e))))] - (call-with-output-file file - (lambda (port) - (display-parser a grouped-table (send g get-prods) port)) - #:exists 'truncate))) - (resolve-conflicts grouped-table suppress)))) + (define grouped-table (resolve-prec-conflicts table)) + (unless (string=? file "") + (with-handlers [(exn:fail:filesystem? + (λ (e) + (eprintf + "Cannot write debug output to file \"~a\": ~a\n" + file + (exn-message e))))] + (call-with-output-file file + (λ (port) + (display-parser a grouped-table (send g get-prods) port)) + #:exists 'truncate))) + (resolve-conflicts grouped-table suppress)) diff --git a/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt b/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt index 31b3cc6..5f63471 100644 --- a/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt +++ b/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt @@ -1,118 +1,71 @@ -(module yacc-helper mzscheme +#lang racket/base +(require (prefix-in rl: racket/list) + "../private-lex/token-syntax.rkt") - (require mzlib/list - "../private-lex/token-syntax.rkt") +;; General helper routines +(provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc) + +(define (vector-andmap pred vec) + (for/and ([item (in-vector vec)]) + (pred vec))) - ;; General helper routines - - (provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc) - - (define (vector-andmap f v) - (let loop ((i 0)) - (cond - ((= i (vector-length v)) #t) - (else (if (f (vector-ref v i)) - (loop (add1 i)) - #f))))) +;; duplicate-list?: symbol list -> #f | symbol +;; returns a symbol that exists twice in l, or false if no such symbol +;; exists +(define (duplicate-list? syms) + (rl:check-duplicates syms eq?)) - ;; duplicate-list?: symbol list -> #f | symbol - ;; returns a symbol that exists twice in l, or false if no such symbol - ;; exists - (define (duplicate-list? l) - (letrec ((t (make-hash-table)) - (dl? (lambda (l) - (cond - ((null? l) #f) - ((hash-table-get t (car l) (lambda () #f)) => - (lambda (x) x)) - (else - (hash-table-put! t (car l) (car l)) - (dl? (cdr l))))))) - (dl? l))) +;; remove-duplicates: syntax-object list -> syntax-object list +;; removes the duplicates from the lists +(define (remove-duplicates syms) + (rl:remove-duplicates syms equal? #:key syntax->datum)) - ;; remove-duplicates: syntax-object list -> syntax-object list - ;; removes the duplicates from the lists - (define (remove-duplicates sl) - (let ((t (make-hash-table))) - (letrec ((x - (lambda (sl) - (cond - ((null? sl) sl) - ((hash-table-get t (syntax-object->datum (car sl)) (lambda () #f)) - (x (cdr sl))) - (else - (hash-table-put! t (syntax-object->datum (car sl)) #t) - (cons (car sl) (x (cdr sl)))))))) - (x sl)))) - - ;; overlap?: symbol list * symbol list -> #f | symbol - ;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists - (define (overlap? l1 l2) - (let/ec ret - (let ((t (make-hash-table))) - (for-each (lambda (s1) - (hash-table-put! t s1 s1)) - l1) - (for-each (lambda (s2) - (cond - ((hash-table-get t s2 (lambda () #f)) => - (lambda (o) (ret o))))) - l2) - #f))) +;; overlap?: symbol list * symbol list -> #f | symbol +;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists +(define (overlap? syms1 syms2) + (for/first ([sym1 (in-list syms1)] + #:when (memq sym1 syms2)) + sym1)) - (define (display-yacc grammar tokens start precs port) - (let-syntax ((p (syntax-rules () - ((_ args ...) (fprintf port args ...))))) - (let* ((tokens (map syntax-local-value tokens)) - (eterms (filter e-terminals-def? tokens)) - (terms (filter terminals-def? tokens)) - (term-table (make-hash-table)) - (display-rhs - (lambda (rhs) - (for-each (lambda (sym) (p "~a " (hash-table-get term-table sym (lambda () sym)))) - (car rhs)) - (if (= 3 (length rhs)) - (p "%prec ~a" (cadadr rhs))) - (p "\n")))) - (for-each - (lambda (t) - (for-each - (lambda (t) - (hash-table-put! term-table t (format "'~a'" t))) - (syntax-object->datum (e-terminals-def-t t)))) - eterms) - (for-each - (lambda (t) - (for-each - (lambda (t) - (p "%token ~a\n" t) - (hash-table-put! term-table t (format "~a" t))) - (syntax-object->datum (terminals-def-t t)))) - terms) - (if precs - (for-each (lambda (prec) - (p "%~a " (car prec)) - (for-each (lambda (tok) - (p " ~a" (hash-table-get term-table tok))) - (cdr prec)) - (p "\n")) - precs)) - (p "%start ~a\n" start) - (p "%%\n") +(define (display-yacc grammar tokens start precs port) + (let-syntax ([p (syntax-rules () + ((_ args ...) (fprintf port args ...)))]) + (let* ([tokens (map syntax-local-value tokens)] + [eterms (filter e-terminals-def? tokens)] + [terms (filter terminals-def? tokens)] + [term-table (make-hasheq)] + [display-rhs + (λ (rhs) + (for ([sym (in-list (car rhs))]) + (p "~a " (hash-ref term-table sym (λ () sym)))) + (when (= 3 (length rhs)) + (p "%prec ~a" (cadadr rhs))) + (p "\n"))]) + (for* ([t (in-list eterms)] + [t (in-list (syntax->datum (e-terminals-def-t t)))]) + (hash-set! term-table t (format "'~a'" t))) + (for* ([t (in-list terms)] + [t (in-list (syntax->datum (terminals-def-t t)))]) + (p "%token ~a\n" t) + (hash-set! term-table t (format "~a" t))) + (when precs + (for ([prec (in-list precs)]) + (p "%~a " (car prec)) + (for ([tok (in-list (cdr prec))]) + (p " ~a" (hash-ref term-table tok))) + (p "\n"))) + (p "%start ~a\n" start) + (p "%%\n") + (for ([prod (in-list grammar)]) + (define nt (car prod)) + (p "~a: " nt) + (display-rhs (cadr prod)) + (for ([rhs (in-list (cddr prod))]) + (p "| ") + (display-rhs rhs)) + (p ";\n")) + (p "%%\n")))) - (for-each (lambda (prod) - (let ((nt (car prod))) - (p "~a: " nt) - (display-rhs (cadr prod)) - (for-each (lambda (rhs) - (p "| ") - (display-rhs rhs)) - (cddr prod)) - (p ";\n"))) - grammar) - (p "%%\n")))) - -) diff --git a/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt b/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt index 7f766eb..d0e97fe 100644 --- a/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt +++ b/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt @@ -1,135 +1,130 @@ -(module yacc-to-scheme mzscheme - (require br-parser-tools/lex - (prefix : br-parser-tools/lex-sre) - br-parser-tools/yacc - syntax/readerr - mzlib/list) - (provide trans) +#lang racket/base +(require br-parser-tools/lex + (prefix-in : br-parser-tools/lex-sre) + br-parser-tools/yacc + syntax/readerr + racket/list) +(provide trans) - (define match-double-string - (lexer - ((:+ (:~ #\" #\\)) (append (string->list lexeme) - (match-double-string input-port))) - ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port))) - (#\" null))) +(define match-double-string + (lexer + [(:+ (:~ #\" #\\)) (append (string->list lexeme) + (match-double-string input-port))] + [(:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port))] + [#\" null])) - (define match-single-string - (lexer - ((:+ (:~ #\' #\\)) (append (string->list lexeme) - (match-single-string input-port))) - ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port))) - (#\' null))) +(define match-single-string + (lexer + [(:+ (:~ #\' #\\)) (append (string->list lexeme) + (match-single-string input-port))] + [(:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port))] + [#\' null])) - (define-lex-abbrevs - (letter (:or (:/ "a" "z") (:/ "A" "Z"))) - (digit (:/ "0" "9")) - (initial (:or letter (char-set "!$%&*/<=>?^_~@"))) - (subsequent (:or initial digit (char-set "+-.@"))) - (comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/"))) +(define-lex-abbrevs + [letter (:or (:/ "a" "z") (:/ "A" "Z"))] + [digit (:/ "0" "9")] + [initial (:or letter (char-set "!$%&*/<=>?^_~@"))] + [subsequent (:or initial digit (char-set "+-.@"))] + [comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/")]) - (define-empty-tokens x - (EOF PIPE |:| SEMI |%%| %prec)) - (define-tokens y - (SYM STRING)) +(define-empty-tokens x (EOF PIPE |:| SEMI |%%| %prec)) +(define-tokens y (SYM STRING)) - (define get-token-grammar - (lexer-src-pos - ("%%" '|%%|) - (":" (string->symbol lexeme)) - ("%prec" (string->symbol lexeme)) - (#\| 'PIPE) - ((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}"))) - (return-without-pos (get-token-grammar input-port))) - (#\; 'SEMI) - (#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))) - (#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))) - ((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme))))) +(define get-token-grammar + (lexer-src-pos + ["%%" '|%%|] + [":" (string->symbol lexeme)] + ["%prec" (string->symbol lexeme)] + [#\| 'PIPE] + [(:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}"))) + (return-without-pos (get-token-grammar input-port))] + [#\; 'SEMI] + [#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))] + [#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))] + [(:: initial (:* subsequent)) (token-SYM (string->symbol lexeme))])) - (define (parse-grammar enter-term enter-empty-term enter-non-term) - (parser - (tokens x y) - (src-pos) - (error (lambda (tok-ok tok-name tok-value start-pos end-pos) - (raise-read-error - (format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value) - (file-path) - (position-line start-pos) - (position-col start-pos) - (position-offset start-pos) - (- (position-offset end-pos) (position-offset start-pos))))) +(define (parse-grammar enter-term enter-empty-term enter-non-term) + (parser + (tokens x y) + (src-pos) + (error (λ (tok-ok tok-name tok-value start-pos end-pos) + (raise-read-error + (format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value) + (file-path) + (position-line start-pos) + (position-col start-pos) + (position-offset start-pos) + (- (position-offset end-pos) (position-offset start-pos))))) - (end |%%|) - (start gram) - (grammar - (gram - ((production) (list $1)) - ((production gram) (cons $1 $2))) - (production - ((SYM |:| prods SEMI) - (begin - (enter-non-term $1) - (cons $1 $3)))) - (prods - ((rhs) (list `(,$1 #f))) - ((rhs prec) (list `(,$1 ,$2 #f))) - ((rhs PIPE prods) (cons `(,$1 #f) $3)) - ((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4))) - (prec - ((%prec SYM) - (begin - (enter-term $2) - (list 'prec $2))) - ((%prec STRING) - (begin - (enter-empty-term $2) - (list 'prec $2)))) - (rhs - (() null) - ((SYM rhs) - (begin - (enter-term $1) - (cons $1 $2))) - ((STRING rhs) - (begin - (enter-empty-term $1) - (cons $1 $2))))))) + (end |%%|) + (start gram) + (grammar + (gram + ((production) (list $1)) + ((production gram) (cons $1 $2))) + (production + ((SYM |:| prods SEMI) + (begin + (enter-non-term $1) + (cons $1 $3)))) + (prods + ((rhs) (list `(,$1 #f))) + ((rhs prec) (list `(,$1 ,$2 #f))) + ((rhs PIPE prods) (cons `(,$1 #f) $3)) + ((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4))) + (prec + ((%prec SYM) + (begin + (enter-term $2) + (list 'prec $2))) + ((%prec STRING) + (begin + (enter-empty-term $2) + (list 'prec $2)))) + (rhs + (() null) + ((SYM rhs) + (begin + (enter-term $1) + (cons $1 $2))) + ((STRING rhs) + (begin + (enter-empty-term $1) + (cons $1 $2))))))) - (define (symbolstring a) (symbol->string b))) +(define (symbolstring a) (symbol->string b))) - (define (trans filename) - (let* ((i (open-input-file filename)) - (terms (make-hash-table)) - (eterms (make-hash-table)) - (nterms (make-hash-table)) - (enter-term - (lambda (s) - (if (not (hash-table-get nterms s (lambda () #f))) - (hash-table-put! terms s #t)))) - (enter-empty-term - (lambda (s) - (if (not (hash-table-get nterms s (lambda () #f))) - (hash-table-put! eterms s #t)))) - (enter-non-term - (lambda (s) - (hash-table-remove! terms s) - (hash-table-remove! eterms s) - (hash-table-put! nterms s #t)))) - (port-count-lines! i) - (file-path filename) - (regexp-match "%%" i) - (begin0 - (let ((gram ((parse-grammar enter-term enter-empty-term enter-non-term) - (lambda () - (let ((t (get-token-grammar i))) - t))))) - `(begin - (define-tokens t ,(sort (hash-table-map terms (lambda (k v) k)) symbol ;; (vectorof (symbol runtime-action hashtable)) (define-for-syntax (convert-parse-table table) - (list->vector - (map - (lambda (state-entry) - (let ((ht (make-hasheq))) - (for-each - (lambda (gs/action) - (hash-set! ht - (gram-sym-symbol (car gs/action)) - (action->runtime-action (cdr gs/action)))) - state-entry) - ht)) - (vector->list table)))) + (for/vector ([state-entry (in-vector table)]) + (let ([ht (make-hasheq)]) + (for ([gs/action (in-list state-entry)]) + (hash-set! ht + (gram-sym-symbol (car gs/action)) + (action->runtime-action (cdr gs/action)))) + ht))) (define-syntax (parser stx) (syntax-case stx () - ((_ args ...) - (let ((arg-list (syntax->list (syntax (args ...)))) - (src-pos #f) - (debug #f) - (error #f) - (tokens #f) - (start #f) - (end #f) - (precs #f) - (suppress #f) - (grammar #f) - (yacc-output #f)) - (for-each - (lambda (arg) - (syntax-case* arg (debug error tokens start end precs grammar - suppress src-pos yacc-output) - (lambda (a b) - (eq? (syntax-e a) (syntax-e b))) - ((debug filename) - (cond - ((not (string? (syntax-e (syntax filename)))) - (raise-syntax-error - #f - "Debugging filename must be a string" - stx - (syntax filename))) - (debug - (raise-syntax-error #f "Multiple debug declarations" stx)) - (else - (set! debug (syntax-e (syntax filename)))))) - ((suppress) - (set! suppress #t)) - ((src-pos) - (set! src-pos #t)) - ((error expression) - (if error - (raise-syntax-error #f "Multiple error declarations" stx) - (set! error (syntax expression)))) - ((tokens def ...) - (begin - (when tokens - (raise-syntax-error #f "Multiple tokens declarations" stx)) - (let ((defs (syntax->list (syntax (def ...))))) - (for-each - (lambda (d) - (unless (identifier? d) - (raise-syntax-error - #f - "Token-group name must be an identifier" - stx - d))) - defs) - (set! tokens defs)))) - ((start symbol ...) - (let ((symbols (syntax->list (syntax (symbol ...))))) - (for-each - (lambda (sym) - (unless (identifier? sym) - (raise-syntax-error #f - "Start symbol must be a symbol" - stx - sym))) - symbols) - (when start - (raise-syntax-error #f "Multiple start declarations" stx)) - (when (null? symbols) - (raise-syntax-error #f - "Missing start symbol" - stx - arg)) - (set! start symbols))) - ((end symbols ...) - (let ((symbols (syntax->list (syntax (symbols ...))))) - (for-each - (lambda (sym) - (unless (identifier? sym) - (raise-syntax-error #f - "End token must be a symbol" - stx - sym))) - symbols) - (let ((d (duplicate-list? (map syntax-e symbols)))) - (when d - (raise-syntax-error - #f - (format "Duplicate end token definition for ~a" d) - stx - arg)) - (when (null? symbols) - (raise-syntax-error - #f - "end declaration must contain at least 1 token" - stx - arg)) - (when end - (raise-syntax-error #f "Multiple end declarations" stx)) - (set! end symbols)))) - ((precs decls ...) - (if precs - (raise-syntax-error #f "Multiple precs declarations" stx) - (set! precs (syntax/loc arg (decls ...))))) - ((grammar prods ...) - (if grammar - (raise-syntax-error #f "Multiple grammar declarations" stx) - (set! grammar (syntax/loc arg (prods ...))))) - ((yacc-output filename) - (cond - ((not (string? (syntax-e (syntax filename)))) - (raise-syntax-error #f - "Yacc-output filename must be a string" - stx - (syntax filename))) - (yacc-output - (raise-syntax-error #f "Multiple yacc-output declarations" stx)) - (else - (set! yacc-output (syntax-e (syntax filename)))))) - (_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)))) - (syntax->list (syntax (args ...)))) + [(_ ARGS ...) + (let ([arg-list (syntax->list #'(ARGS ...))] + [src-pos #f] + [debug #f] + [error #f] + [tokens #f] + [start #f] + [end #f] + [precs #f] + [suppress #f] + [grammar #f] + [yacc-output #f]) + (for ([arg (in-list (syntax->list #'(ARGS ...)))]) + (syntax-case* arg (debug error tokens start end precs grammar + suppress src-pos yacc-output) + (λ (a b) (eq? (syntax-e a) (syntax-e b))) + [(debug FILENAME) + (cond + [(not (string? (syntax-e #'FILENAME))) + (raise-syntax-error #f "Debugging filename must be a string" stx #'FILENAME)] + [debug (raise-syntax-error #f "Multiple debug declarations" stx)] + [else (set! debug (syntax-e #'FILENAME))])] + [(suppress) (set! suppress #t)] + [(src-pos) (set! src-pos #t)] + [(error EXPRESSION) + (if error + (raise-syntax-error #f "Multiple error declarations" stx) + (set! error #'EXPRESSION))] + [(tokens DEF ...) + (begin + (when tokens + (raise-syntax-error #f "Multiple tokens declarations" stx)) + (let ((defs (syntax->list #'(DEF ...)))) + (for ([d (in-list defs)] + #:unless (identifier? d)) + (raise-syntax-error #f "Token-group name must be an identifier" stx d)) + (set! tokens defs)))] + [(start symbol ...) + (let ([symbols (syntax->list #'(symbol ...))]) + (for ([sym (in-list symbols)] + #:unless (identifier? sym)) + (raise-syntax-error #f "Start symbol must be a symbol" stx sym)) + (when start + (raise-syntax-error #f "Multiple start declarations" stx)) + (when (null? symbols) + (raise-syntax-error #f "Missing start symbol" stx arg)) + (set! start symbols))] + [(end SYMBOLS ...) + (let ((symbols (syntax->list #'(SYMBOLS ...)))) + (for ([sym (in-list symbols)] + #:unless (identifier? sym)) + (raise-syntax-error #f "End token must be a symbol" stx sym)) + (let ([d (duplicate-list? (map syntax-e symbols))]) + (when d + (raise-syntax-error #f (format "Duplicate end token definition for ~a" d) stx arg)) + (when (null? symbols) + (raise-syntax-error #f "end declaration must contain at least 1 token" stx arg)) + (when end + (raise-syntax-error #f "Multiple end declarations" stx)) + (set! end symbols)))] + [(precs DECLS ...) + (if precs + (raise-syntax-error #f "Multiple precs declarations" stx) + (set! precs (syntax/loc arg (DECLS ...))))] + [(grammar PRODS ...) + (if grammar + (raise-syntax-error #f "Multiple grammar declarations" stx) + (set! grammar (syntax/loc arg (PRODS ...))))] + [(yacc-output FILENAME) + (cond + [(not (string? (syntax-e #'FILENAME))) + (raise-syntax-error #f "Yacc-output filename must be a string" stx #'FILENAME)] + [yacc-output + (raise-syntax-error #f "Multiple yacc-output declarations" stx)] + [else + (set! yacc-output (syntax-e #'FILENAME))])] + [_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)])) (unless tokens (raise-syntax-error #f "missing tokens declaration" stx)) (unless error @@ -160,7 +113,7 @@ (raise-syntax-error #f "missing end declaration" stx)) (unless start (raise-syntax-error #f "missing start declaration" stx)) - (let-values (((table all-term-syms actions check-syntax-fix) + (let-values ([(table all-term-syms actions check-syntax-fix) (build-parser (if debug debug "") src-pos suppress @@ -168,66 +121,51 @@ start end precs - grammar))) + grammar)]) (when (and yacc-output (not (string=? yacc-output ""))) (with-handlers [(exn:fail:filesystem? - (lambda (e) - (eprintf - "Cannot write yacc-output to file \"~a\"\n" - yacc-output)))] + (λ (e) (eprintf "Cannot write yacc-output to file \"~a\"\n" yacc-output)))] (call-with-output-file yacc-output - (lambda (port) + (λ (port) (display-yacc (syntax->datum grammar) tokens (map syntax->datum start) - (if precs - (syntax->datum precs) - #f) + (and precs (syntax->datum precs)) port)) #:exists 'truncate))) - (with-syntax ((check-syntax-fix check-syntax-fix) - (err error) - (ends end) - (starts start) - (debug debug) - (table (convert-parse-table table)) - (all-term-syms all-term-syms) - (actions actions) - (src-pos src-pos)) - (syntax - (begin - check-syntax-fix - (parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos))))))) - (_ - (raise-syntax-error #f - "parser must have the form (parser args ...)" - stx)))) + (with-syntax ([check-syntax-fix check-syntax-fix] + [err error] + [ends end] + [starts start] + [debug debug] + [table (convert-parse-table table)] + [all-term-syms all-term-syms] + [actions actions] + [src-pos src-pos]) + #'(begin + check-syntax-fix + (parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos)))))] + [_ (raise-syntax-error #f "parser must have the form (parser args ...)" stx)])) (define (reduce-stack stack num ret-vals src-pos) (cond - ((> num 0) - (let* ((top-frame (car stack)) - (ret-vals - (if src-pos - (cons (stack-frame-value top-frame) - (cons (stack-frame-start-pos top-frame) - (cons (stack-frame-end-pos top-frame) - ret-vals))) - (cons (stack-frame-value top-frame) ret-vals)))) - (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))) - (else (values stack ret-vals)))) + [(positive? num) + (define top-frame (car stack)) + (let ([ret-vals (if src-pos + (cons (stack-frame-value top-frame) + (cons (stack-frame-start-pos top-frame) + (cons (stack-frame-end-pos top-frame) + ret-vals))) + (cons (stack-frame-value top-frame) ret-vals))]) + (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))] + [else (values stack ret-vals)])) ;; extract-helper : (symbol or make-token) any any -> symbol any any any (define (extract-helper tok v1 v2) (cond - ((symbol? tok) - (values tok #f v1 v2)) - ((token? tok) - (values (real-token-name tok) (real-token-value tok) v1 v2)) - (else (raise-argument-error 'parser - "(or/c symbol? token?)" - 0 - tok)))) + [(symbol? tok) (values tok #f v1 v2)] + [(token? tok) (values (real-token-name tok) (real-token-value tok) v1 v2)] + [else (raise-argument-error 'parser "(or/c symbol? token?)" 0 tok)])) ;; well-formed-position-token?: any -> boolean ;; Returns true if pt is a position token whose position-token-token @@ -236,8 +174,7 @@ ;; a tokenizer produces an erroneous position-token wrapped twice. ;; (as often happens when omitting return-without-pos). (define (well-formed-token-field? t) - (or (symbol? t) - (token? t))) + (or (symbol? t) (token? t))) (define (well-formed-position-token? pt) (and (position-token? pt) @@ -250,24 +187,18 @@ ;; extract-src-pos : position-token -> symbol any any any (define (extract-src-pos ip) (unless (well-formed-position-token? ip) - (raise-argument-error 'parser - "well-formed-position-token?" - 0 - ip)) + (raise-argument-error 'parser "well-formed-position-token?" 0 ip)) (extract-helper (position-token-token ip) (position-token-start-pos ip) (position-token-end-pos ip))) (define (extract-srcloc ip) (unless (well-formed-srcloc-token? ip) - (raise-argument-error 'parser - "well-formed-srcloc-token?" - 0 - ip)) - (let ([loc (srcloc-token-srcloc ip)]) - (extract-helper (srcloc-token-token ip) - (position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc)) - (position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f)))) + (raise-argument-error 'parser "well-formed-srcloc-token?" 0 ip)) + (define loc (srcloc-token-srcloc ip)) + (extract-helper (srcloc-token-token ip) + (position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc)) + (position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f))) ;; extract-no-src-pos : (symbol or make-token) -> symbol any any any @@ -295,24 +226,24 @@ (if (memq tok ends) (raise-read-error "parser: Cannot continue after error" #f #f #f #f #f) - (let ((a (find-action stack tok val start-pos end-pos))) + (let ([a (find-action stack tok val start-pos end-pos)]) (cond - ((runtime-shift? a) + [(runtime-shift? a) ;; (printf "shift:~a\n" (runtime-shift-state a)) (cons (make-stack-frame (runtime-shift-state a) val start-pos end-pos) - stack)) - (else + stack)] + [else ;; (printf "discard input:~a\n" tok) - (let-values (((tok val start-pos end-pos) - (extract (get-token)))) - (remove-input tok val start-pos end-pos)))))))) + (let-values ([(tok val start-pos end-pos) + (extract (get-token))]) + (remove-input tok val start-pos end-pos))]))))) (let remove-states () - (let ((a (find-action stack 'error #f start-pos end-pos))) + (let ([a (find-action stack 'error #f start-pos end-pos)]) (cond - ((runtime-shift? a) + [(runtime-shift? a) ;; (printf "shift:~a\n" (runtime-shift-state a)) (set! stack (cons @@ -321,92 +252,83 @@ start-pos end-pos) stack)) - (remove-input tok val start-pos end-pos)) - (else + (remove-input tok val start-pos end-pos)] + [else ;; (printf "discard state:~a\n" (car stack)) (cond - ((< (length stack) 2) + [(< (length stack) 2) (raise-read-error "parser: Cannot continue after error" - #f #f #f #f #f)) - (else + #f #f #f #f #f)] + [else (set! stack (cdr stack)) - (remove-states))))))))) + (remove-states)])]))))) (define (find-action stack tok val start-pos end-pos) - (unless (hash-ref all-term-syms - tok - #f) + (unless (hash-ref all-term-syms tok #f) (if src-pos (err #f tok val start-pos end-pos) (err #f tok val)) (raise-read-error (format "parser: got token of unknown type ~a" tok) #f #f #f #f #f)) - (hash-ref (vector-ref table (stack-frame-state (car stack))) - tok - #f)) + (hash-ref (vector-ref table (stack-frame-state (car stack))) tok #f)) - (define (make-parser start-number) - (lambda (get-token) - (unless (and (procedure? get-token) - (procedure-arity-includes? get-token 0)) - (error 'get-token "expected a nullary procedure, got ~e" get-token)) - (let parsing-loop ((stack (make-empty-stack start-number)) - (ip (get-token))) - (let-values (((tok val start-pos end-pos) - (extract ip))) - (let ((action (find-action stack tok val start-pos end-pos))) - (cond - ((runtime-shift? action) - ;; (printf "shift:~a\n" (runtime-shift-state action)) - (parsing-loop (cons (make-stack-frame (runtime-shift-state action) - val - start-pos - end-pos) - stack) - (get-token))) - ((runtime-reduce? action) - ;; (printf "reduce:~a\n" (runtime-reduce-prod-num action)) - (let-values (((new-stack args) - (reduce-stack stack - (runtime-reduce-rhs-length action) - null - src-pos))) - (let ((goto - (runtime-goto-state - (hash-ref - (vector-ref table (stack-frame-state (car new-stack))) - (runtime-reduce-lhs action))))) - (parsing-loop - (cons - (if src-pos - (make-stack-frame - goto - (apply (vector-ref actions (runtime-reduce-prod-num action)) args) - (if (null? args) start-pos (cadr args)) - (if (null? args) - end-pos - (list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1)))) - (make-stack-frame - goto - (apply (vector-ref actions (runtime-reduce-prod-num action)) args) - #f - #f)) - new-stack) - ip)))) - ((runtime-accept? action) - ;; (printf "accept\n") - (stack-frame-value (car stack))) - (else - (if src-pos - (err #t tok val start-pos end-pos) - (err #t tok val)) - (parsing-loop (fix-error stack tok val start-pos end-pos get-token) - (get-token)))))))))) + (define ((make-parser start-number) get-token) + (unless (and (procedure? get-token) + (procedure-arity-includes? get-token 0)) + (error 'get-token "expected a nullary procedure, got ~e" get-token)) + (let parsing-loop ([stack (make-empty-stack start-number)] + [ip (get-token)]) + (let-values ([(tok val start-pos end-pos) (extract ip)]) + (let ([action (find-action stack tok val start-pos end-pos)]) + (cond + [(runtime-shift? action) + ;; (printf "shift:~a\n" (runtime-shift-state action)) + (parsing-loop (cons (make-stack-frame (runtime-shift-state action) + val + start-pos + end-pos) + stack) + (get-token))] + [(runtime-reduce? action) + ;; (printf "reduce:~a\n" (runtime-reduce-prod-num action)) + (let-values ([(new-stack args) + (reduce-stack stack + (runtime-reduce-rhs-length action) + null + src-pos)]) + (let ([goto + (runtime-goto-state + (hash-ref + (vector-ref table (stack-frame-state (car new-stack))) + (runtime-reduce-lhs action)))]) + (parsing-loop + (cons + (if src-pos + (make-stack-frame + goto + (apply (vector-ref actions (runtime-reduce-prod-num action)) args) + (if (null? args) start-pos (cadr args)) + (if (null? args) + end-pos + (list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1)))) + (make-stack-frame + goto + (apply (vector-ref actions (runtime-reduce-prod-num action)) args) + #f + #f)) + new-stack) + ip)))] + [(runtime-accept? action) + ;; (printf "accept\n") + (stack-frame-value (car stack))] + [else + (if src-pos + (err #t tok val start-pos end-pos) + (err #t tok val)) + (parsing-loop (fix-error stack tok val start-pos end-pos get-token) + (get-token))])))))) (cond - ((null? (cdr starts)) (make-parser 0)) - (else - (let loop ((l starts) - (i 0)) - (cond - ((null? l) null) - (else (cons (make-parser i) (loop (cdr l) (add1 i)))))))))) + [(null? (cdr starts)) (make-parser 0)] + [else + (for/list ([(l i) (in-indexed starts)]) + (make-parser i))]))) diff --git a/br-parser-tools-lib/info.rkt b/br-parser-tools-lib/info.rkt index f9f9e11..1e24a13 100644 --- a/br-parser-tools-lib/info.rkt +++ b/br-parser-tools-lib/info.rkt @@ -7,5 +7,3 @@ (define build-deps '("rackunit-lib")) (define pkg-desc "implementation (no documentation) part of \"br-parser-tools\"") - -(define pkg-authors '(mflatt))