diff --git a/collects/algol60/cfg-parser.rkt b/collects/algol60/cfg-parser.rkt index 8309fb8..71d69b7 100644 --- a/collects/algol60/cfg-parser.rkt +++ b/collects/algol60/cfg-parser.rkt @@ -1,4 +1,4 @@ - +#lang racket/base ;; This module implements a parser form like the parser-tools's ;; `parser', except that it works on an arbitrary CFG (returning ;; the first sucecssful parse). @@ -28,736 +28,738 @@ ;; parser uses `parser' so that it doesn't have to know anything about ;; tokens. -(module cfg-parser mzscheme - (require parser-tools/yacc - parser-tools/lex - mzlib/list - mzlib/etc) - (require-for-syntax syntax/boundmap - mzlib/list - parser-tools/private-lex/token-syntax) - (provide cfg-parser) - - ;; A raw token, wrapped so that we can recognize it: - (define-struct tok (name orig-name val start end)) +(require parser-tools/yacc + parser-tools/lex) - ;; Represents the thread scheduler: - (define-struct tasks (active active-back waits multi-waits cache progress?)) +(require (for-syntax racket/base + syntax/boundmap + parser-tools/private-lex/token-syntax)) - (define-for-syntax make-token-identifier-mapping make-hash-table) - (define-for-syntax token-identifier-mapping-get - (case-lambda - [(t tok) - (hash-table-get t (syntax-e tok))] - [(t tok fail) - (hash-table-get t (syntax-e tok) fail)])) - (define-for-syntax token-identifier-mapping-put! - (lambda (t tok v) - (hash-table-put! t (syntax-e tok) v))) - (define-for-syntax token-identifier-mapping-map - (lambda (t f) - (hash-table-map t f))) - - ;; Used to calculate information on the grammar, such as whether - ;; a particular non-terminal is "simple" instead of recursively defined. - (define-for-syntax (nt-fixpoint nts proc nt-ids patss) - (define (ormap-all val f as bs) - (cond - [(null? as) val] - [else (ormap-all (or (f (car as) (car bs)) val) - f - (cdr as) (cdr bs))])) - (let loop () - (when (ormap-all #f - (lambda (nt pats) - (let ([old (bound-identifier-mapping-get nts nt)]) - (let ([new (proc nt pats old)]) - (if (equal? old new) - #f - (begin - (bound-identifier-mapping-put! nts nt new) - #t))))) - nt-ids patss) - (loop)))) - - ;; Tries parse-a followed by parse-b. If parse-a is not simple, - ;; then after parse-a succeeds once, we parallelize parse-b - ;; and trying a second result for parse-a. - (define (parse-and simple-a? parse-a parse-b - stream depth end success-k fail-k - max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream depth max-depth tasks next1-k) - (if simple-a? - (parse-b val stream depth end - (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks) - (parallel-or - (lambda (success-k fail-k max-depth tasks) - (parse-b val stream depth end - success-k fail-k - max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k max-depth tasks)) - success-k fail-k max-depth tasks))))] - [mk-got2-k - (lambda (success-k fail-k next1-k) - (lambda (val stream depth max-depth tasks next-k) - (success-k val stream depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks)))))] - [mk-fail2-k - (lambda (success-k fail-k next1-k) - (lambda (max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k - max-depth - tasks)))]) - (parse-a stream depth end - (mk-got-k success-k fail-k) - fail-k - max-depth tasks))) +(provide cfg-parser) - ;; Parallel or for non-terminal alternatives - (define (parse-parallel-or parse-a parse-b stream depth end success-k fail-k max-depth tasks) - (parallel-or (lambda (success-k fail-k max-depth tasks) - (parse-a stream depth end success-k fail-k max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) - (parse-b stream depth end success-k fail-k max-depth tasks)) - success-k fail-k max-depth tasks)) - - ;; Generic parallel-or - (define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) - (define answer-key (gensym)) - (letrec ([gota-k +;; A raw token, wrapped so that we can recognize it: +(define-struct tok (name orig-name val start end)) + +;; Represents the thread scheduler: +(define-struct tasks (active active-back waits multi-waits cache progress?)) + +(define-for-syntax make-token-identifier-mapping make-hasheq) +(define-for-syntax token-identifier-mapping-get + (case-lambda + [(t tok) + (hash-ref t (syntax-e tok))] + [(t tok fail) + (hash-ref t (syntax-e tok) fail)])) +(define-for-syntax token-identifier-mapping-put! + (lambda (t tok v) + (hash-set! t (syntax-e tok) v))) +(define-for-syntax token-identifier-mapping-map + (lambda (t f) + (hash-map t f))) + +;; Used to calculate information on the grammar, such as whether +;; a particular non-terminal is "simple" instead of recursively defined. +(define-for-syntax (nt-fixpoint nts proc nt-ids patss) + (define (ormap-all val f as bs) + (cond + [(null? as) val] + [else (ormap-all (or (f (car as) (car bs)) val) + f + (cdr as) (cdr bs))])) + (let loop () + (when (ormap-all #f + (lambda (nt pats) + (let ([old (bound-identifier-mapping-get nts nt)]) + (let ([new (proc nt pats old)]) + (if (equal? old new) + #f + (begin + (bound-identifier-mapping-put! nts nt new) + #t))))) + nt-ids patss) + (loop)))) + +;; Tries parse-a followed by parse-b. If parse-a is not simple, +;; then after parse-a succeeds once, we parallelize parse-b +;; and trying a second result for parse-a. +(define (parse-and simple-a? parse-a parse-b + stream depth end success-k fail-k + max-depth tasks) + (letrec ([mk-got-k + (lambda (success-k fail-k) + (lambda (val stream depth max-depth tasks next1-k) + (if simple-a? + (parse-b val stream depth end + (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks) + (parallel-or + (lambda (success-k fail-k max-depth tasks) + (parse-b val stream depth end + success-k fail-k + max-depth tasks)) + (lambda (success-k fail-k max-depth tasks) + (next1-k (mk-got-k success-k fail-k) + fail-k max-depth tasks)) + success-k fail-k max-depth tasks))))] + [mk-got2-k + (lambda (success-k fail-k next1-k) (lambda (val stream depth max-depth tasks next-k) - (report-answer answer-key - max-depth - tasks - (list val stream depth next-k)))] - [faila-k + (success-k val stream depth max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (next-k (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks)))))] + [mk-fail2-k + (lambda (success-k fail-k next1-k) (lambda (max-depth tasks) - (report-answer answer-key - max-depth - tasks - null))]) - (let* ([tasks (queue-task - tasks - (lambda (max-depth tasks) - (parse-a gota-k - faila-k - max-depth tasks)))] - [tasks (queue-task - tasks - (lambda (max-depth tasks) - (parse-b gota-k - faila-k - max-depth tasks)))] - [queue-next (lambda (next-k tasks) - (queue-task tasks - (lambda (max-depth tasks) - (next-k gota-k - faila-k - max-depth tasks))))]) - (letrec ([mk-got-one - (lambda (immediate-next? get-nth success-k) - (lambda (val stream depth max-depth tasks next-k) - (let ([tasks (if immediate-next? - (queue-next next-k tasks) - tasks)]) - (success-k val stream depth max-depth - tasks - (lambda (success-k fail-k max-depth tasks) - (let ([tasks (if immediate-next? - tasks - (queue-next next-k tasks))]) - (get-nth max-depth tasks success-k fail-k)))))))] - [get-first - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #t get-first success-k) + (next1-k (mk-got-k success-k fail-k) + fail-k + max-depth + tasks)))]) + (parse-a stream depth end + (mk-got-k success-k fail-k) + fail-k + max-depth tasks))) + +;; Parallel or for non-terminal alternatives +(define (parse-parallel-or parse-a parse-b stream depth end success-k fail-k max-depth tasks) + (parallel-or (lambda (success-k fail-k max-depth tasks) + (parse-a stream depth end success-k fail-k max-depth tasks)) + (lambda (success-k fail-k max-depth tasks) + (parse-b stream depth end success-k fail-k max-depth tasks)) + success-k fail-k max-depth tasks)) + +;; Generic parallel-or +(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) + (define answer-key (gensym)) + (letrec ([gota-k + (lambda (val stream depth max-depth tasks next-k) + (report-answer answer-key + max-depth + tasks + (list val stream depth next-k)))] + [faila-k + (lambda (max-depth tasks) + (report-answer answer-key + max-depth + tasks + null))]) + (let* ([tasks (queue-task + tasks + (lambda (max-depth tasks) + (parse-a gota-k + faila-k + max-depth tasks)))] + [tasks (queue-task + tasks + (lambda (max-depth tasks) + (parse-b gota-k + faila-k + max-depth tasks)))] + [queue-next (lambda (next-k tasks) + (queue-task tasks (lambda (max-depth tasks) - (get-second max-depth tasks success-k fail-k)) - #f))] - [get-second - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #f get-second success-k) - fail-k #f))]) - (get-first max-depth tasks success-k fail-k))))) + (next-k gota-k + faila-k + max-depth tasks))))]) + (letrec ([mk-got-one + (lambda (immediate-next? get-nth success-k) + (lambda (val stream depth max-depth tasks next-k) + (let ([tasks (if immediate-next? + (queue-next next-k tasks) + tasks)]) + (success-k val stream depth max-depth + tasks + (lambda (success-k fail-k max-depth tasks) + (let ([tasks (if immediate-next? + tasks + (queue-next next-k tasks))]) + (get-nth max-depth tasks success-k fail-k)))))))] + [get-first + (lambda (max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #t get-first success-k) + (lambda (max-depth tasks) + (get-second max-depth tasks success-k fail-k)) + #f))] + [get-second + (lambda (max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #f get-second success-k) + fail-k #f))]) + (get-first max-depth tasks success-k fail-k))))) - ;; Non-terminal alternatives where the first is "simple" can be done - ;; sequentially, which is simpler - (define (parse-or parse-a parse-b - stream depth end success-k fail-k max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream depth max-depth tasks next-k) - (success-k val stream depth - max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks)))))] - [mk-fail-k - (lambda (success-k fail-k) - (lambda (max-depth tasks) - (parse-b stream depth end success-k fail-k max-depth tasks)))]) - (parse-a stream depth end - (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks))) - - ;; Starts a thread - (define queue-task - (opt-lambda (tasks t [progress? #t]) - (make-tasks (tasks-active tasks) - (cons t (tasks-active-back tasks)) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - (or progress? (tasks-progress? tasks))))) +;; Non-terminal alternatives where the first is "simple" can be done +;; sequentially, which is simpler +(define (parse-or parse-a parse-b + stream depth end success-k fail-k max-depth tasks) + (letrec ([mk-got-k + (lambda (success-k fail-k) + (lambda (val stream depth max-depth tasks next-k) + (success-k val stream depth + max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (next-k (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks)))))] + [mk-fail-k + (lambda (success-k fail-k) + (lambda (max-depth tasks) + (parse-b stream depth end success-k fail-k max-depth tasks)))]) + (parse-a stream depth end + (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks))) - ;; Reports an answer to a waiting thread: - (define (report-answer answer-key max-depth tasks val) - (let ([v (hash-table-get (tasks-waits tasks) answer-key (lambda () #f))]) - (if v - (let ([tasks (make-tasks (cons (v val) - (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (hash-table-remove! (tasks-waits tasks) answer-key) - (swap-task max-depth tasks)) - ;; We have an answer ready too fast; wait - (swap-task max-depth - (queue-task tasks - (lambda (max-depth tasks) - (report-answer answer-key max-depth tasks val)) - #f))))) +;; Starts a thread +(define queue-task + (lambda (tasks t [progress? #t]) + (make-tasks (tasks-active tasks) + (cons t (tasks-active-back tasks)) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + (or progress? (tasks-progress? tasks))))) - ;; Reports an answer to multiple waiting threads: - (define (report-answer-all answer-key max-depth tasks val k) - (let ([v (hash-table-get (tasks-multi-waits tasks) answer-key (lambda () null))]) - (hash-table-remove! (tasks-multi-waits tasks) answer-key) - (let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) +;; Reports an answer to a waiting thread: +(define (report-answer answer-key max-depth tasks val) + (let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))]) + (if v + (let ([tasks (make-tasks (cons (v val) (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (k max-depth tasks)))) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (hash-remove! (tasks-waits tasks) answer-key) + (swap-task max-depth tasks)) + ;; We have an answer ready too fast; wait + (swap-task max-depth + (queue-task tasks + (lambda (max-depth tasks) + (report-answer answer-key max-depth tasks val)) + #f))))) + +;; Reports an answer to multiple waiting threads: +(define (report-answer-all answer-key max-depth tasks val k) + (let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))]) + (hash-remove! (tasks-multi-waits tasks) answer-key) + (let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) + (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (k max-depth tasks)))) - ;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise - ;; there might be many. Use wither #t or #f (and `report-answer' or - ;; `report-answer-all', resptively) consistently for a particular answer key. - (define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) - (let ([wait (lambda (val) - (lambda (max-depth tasks) - (if val - (if (null? val) - (fail-k max-depth tasks) - (let-values ([(val stream depth next-k) (apply values val)]) - (success-k val stream depth max-depth tasks next-k))) - (deadlock-k max-depth tasks))))]) - (if multi? - (hash-table-put! (tasks-multi-waits tasks) answer-key - (cons wait (hash-table-get (tasks-multi-waits tasks) answer-key - (lambda () null)))) - (hash-table-put! (tasks-waits tasks) answer-key wait)) - (let ([tasks (make-tasks (tasks-active tasks) +;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise +;; there might be many. Use wither #t or #f (and `report-answer' or +;; `report-answer-all', resptively) consistently for a particular answer key. +(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) + (let ([wait (lambda (val) + (lambda (max-depth tasks) + (if val + (if (null? val) + (fail-k max-depth tasks) + (let-values ([(val stream depth next-k) (apply values val)]) + (success-k val stream depth max-depth tasks next-k))) + (deadlock-k max-depth tasks))))]) + (if multi? + (hash-set! (tasks-multi-waits tasks) answer-key + (cons wait (hash-ref (tasks-multi-waits tasks) answer-key + (lambda () null)))) + (hash-set! (tasks-waits tasks) answer-key wait)) + (let ([tasks (make-tasks (tasks-active tasks) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (swap-task max-depth tasks)))) + +;; Swap thread +(define (swap-task max-depth tasks) + ;; Swap in first active: + (if (null? (tasks-active tasks)) + (if (tasks-progress? tasks) + (swap-task max-depth + (make-tasks (reverse (tasks-active-back tasks)) + null + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #f)) + ;; No progress, so issue failure for all multi-waits + (if (zero? (hash-count (tasks-multi-waits tasks))) + (error 'swap-task "Deadlock") + (swap-task max-depth + (make-tasks (apply + append + (hash-map (tasks-multi-waits tasks) + (lambda (k l) + (map (lambda (v) (v #f)) l)))) + (tasks-active-back tasks) + (tasks-waits tasks) + (make-hasheq) + (tasks-cache tasks) + #t)))) + (let ([t (car (tasks-active tasks))] + [tasks (make-tasks (cdr (tasks-active tasks)) (tasks-active-back tasks) (tasks-waits tasks) (tasks-multi-waits tasks) (tasks-cache tasks) - #t)]) - (swap-task max-depth tasks)))) - - ;; Swap thread - (define (swap-task max-depth tasks) - ;; Swap in first active: - (if (null? (tasks-active tasks)) - (if (tasks-progress? tasks) - (swap-task max-depth - (make-tasks (reverse (tasks-active-back tasks)) - null - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #f)) - ;; No progress, so issue failure for all multi-waits - (if (zero? (hash-table-count (tasks-multi-waits tasks))) - (error 'swap-task "Deadlock") - (swap-task max-depth - (make-tasks (apply - append - (hash-table-map (tasks-multi-waits tasks) - (lambda (k l) - (map (lambda (v) (v #f)) l)))) - (tasks-active-back tasks) - (tasks-waits tasks) - (make-hash-table) - (tasks-cache tasks) - #t)))) - (let ([t (car (tasks-active tasks))] - [tasks (make-tasks (cdr (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - (tasks-progress? tasks))]) - (t max-depth tasks)))) + (tasks-progress? tasks))]) + (t max-depth tasks)))) - ;; Finds the symbolic representative of a token class - (define-for-syntax (map-token toks tok) - (car (token-identifier-mapping-get toks tok))) +;; Finds the symbolic representative of a token class +(define-for-syntax (map-token toks tok) + (car (token-identifier-mapping-get toks tok))) - (define no-pos-val (make-position #f #f #f)) - (define-for-syntax no-pos - (let ([npv ((syntax-local-certifier) #'no-pos-val)]) - (lambda (stx) npv))) - (define-for-syntax at-tok-pos - (lambda (sel expr) - (lambda (stx) - #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))))) - - ;; Builds a matcher for a particular alternative - (define-for-syntax (build-match nts toks pat handle $ctx) - (let loop ([pat pat] - [pos 1]) - (if (null? pat) - #`(success-k #,handle stream depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (fail-k max-depth tasks))) - (let ([id (datum->syntax-object (car pat) - (string->symbol (format "$~a" pos)))] - [id-start-pos (datum->syntax-object (car pat) - (string->symbol (format "$~a-start-pos" pos)))] - [id-end-pos (datum->syntax-object (car pat) - (string->symbol (format "$~a-end-pos" pos)))] - [n-end-pos (and (null? (cdr pat)) - (datum->syntax-object (car pat) '$n-end-pos))]) - (cond - [(bound-identifier-mapping-get nts (car pat) (lambda () #f)) - ;; Match non-termimal - #`(parse-and - ;; First part is simple? (If so, we don't have to parallelize the `and'.) - #,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) - (or (not l) - (andmap values (caddr l)))) - #,(car pat) - (lambda (#,id stream depth end success-k fail-k max-depth tasks) - (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'(and (pair? stream) (car stream)))] - [#,id-end-pos (at-tok-pos #'tok-end #'(and (pair? stream) (car stream)))] - #,@(if n-end-pos - #`([#,n-end-pos (at-tok-pos #'tok-end #'(and (pair? stream) (car stream)))]) - null)) - #,(loop (cdr pat) (add1 pos)))) - stream depth - #,(let ([cnt (apply + - (map (lambda (item) - (cond - [(bound-identifier-mapping-get nts item (lambda () #f)) - => (lambda (l) (car l))] - [else 1])) - (cdr pat)))]) - #`(- end #,cnt)) - success-k fail-k max-depth tasks)] - [else - ;; Match token - (let ([tok-id (map-token toks (car pat))]) - #`(if (and (pair? stream) - (eq? '#,tok-id (tok-name (car stream)))) - (let* ([stream-a (car stream)] - [#,id (tok-val stream-a)] - [stream (cdr stream)] - [depth (add1 depth)]) - (let ([max-depth (max max-depth depth)]) - (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)] - [#,id-end-pos (at-tok-pos #'tok-end #'stream-a)] - #,@(if n-end-pos - #`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)]) - null)) - #,(loop (cdr pat) (add1 pos))))) - (fail-k max-depth tasks)))]))))) - - ;; Starts parsing to match a non-terminal. There's a minor - ;; optimization that checks for known starting tokens. Otherwise, - ;; use the cache, block if someone else is already trying the match, - ;; and cache the result if it's computed. - ;; The cache maps nontermial+startingpos+iteration to a result, where - ;; the iteration is 0 for the first match attempt, 1 for the second, - ;; etc. - (define (parse-nt/share key min-cnt init-tokens stream depth end max-depth tasks success-k fail-k k) - (if (and (positive? min-cnt) - (pair? stream) - (not (memq (tok-name (car stream)) init-tokens))) - ;; No such leading token; give up - (fail-k max-depth tasks) - ;; Run pattern - (let loop ([n 0] - [success-k success-k] - [fail-k fail-k] - [max-depth max-depth] - [tasks tasks] - [k k]) - (let ([answer-key (gensym)] - [table-key (vector key depth n)] - [old-depth depth] - [old-stream stream]) - #;(printf "Loop ~a\n" table-key) - (cond - [(hash-table-get (tasks-cache tasks) table-key (lambda () #f)) - => (lambda (result) - #;(printf "Reuse ~a\n" table-key) - (result success-k fail-k max-depth tasks))] - [else - #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) - (hash-table-put! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - #;(printf "Wait ~a ~a\n" table-key answer-key) - (wait-for-answer #t max-depth tasks answer-key success-k fail-k +(define no-pos-val (make-position #f #f #f)) +(define-for-syntax no-pos + (let ([npv ((syntax-local-certifier) #'no-pos-val)]) + (lambda (stx) npv))) +(define-for-syntax at-tok-pos + (lambda (sel expr) + (lambda (stx) + #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))))) + +;; Builds a matcher for a particular alternative +(define-for-syntax (build-match nts toks pat handle $ctx) + (let loop ([pat pat] + [pos 1]) + (if (null? pat) + #`(success-k #,handle stream depth max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (fail-k max-depth tasks))) + (let ([id (datum->syntax (car pat) + (string->symbol (format "$~a" pos)))] + [id-start-pos (datum->syntax (car pat) + (string->symbol (format "$~a-start-pos" pos)))] + [id-end-pos (datum->syntax (car pat) + (string->symbol (format "$~a-end-pos" pos)))] + [n-end-pos (and (null? (cdr pat)) + (datum->syntax (car pat) '$n-end-pos))]) + (cond + [(bound-identifier-mapping-get nts (car pat) (lambda () #f)) + ;; Match non-termimal + #`(parse-and + ;; First part is simple? (If so, we don't have to parallelize the `and'.) + #,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) + (or (not l) + (andmap values (caddr l)))) + #,(car pat) + (lambda (#,id stream depth end success-k fail-k max-depth tasks) + (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'(and (pair? stream) (car stream)))] + [#,id-end-pos (at-tok-pos #'tok-end #'(and (pair? stream) (car stream)))] + #,@(if n-end-pos + #`([#,n-end-pos (at-tok-pos #'tok-end #'(and (pair? stream) (car stream)))]) + null)) + #,(loop (cdr pat) (add1 pos)))) + stream depth + #,(let ([cnt (apply + + (map (lambda (item) + (cond + [(bound-identifier-mapping-get nts item (lambda () #f)) + => (lambda (l) (car l))] + [else 1])) + (cdr pat)))]) + #`(- end #,cnt)) + success-k fail-k max-depth tasks)] + [else + ;; Match token + (let ([tok-id (map-token toks (car pat))]) + #`(if (and (pair? stream) + (eq? '#,tok-id (tok-name (car stream)))) + (let* ([stream-a (car stream)] + [#,id (tok-val stream-a)] + [stream (cdr stream)] + [depth (add1 depth)]) + (let ([max-depth (max max-depth depth)]) + (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)] + [#,id-end-pos (at-tok-pos #'tok-end #'stream-a)] + #,@(if n-end-pos + #`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)]) + null)) + #,(loop (cdr pat) (add1 pos))))) + (fail-k max-depth tasks)))]))))) + +;; Starts parsing to match a non-terminal. There's a minor +;; optimization that checks for known starting tokens. Otherwise, +;; use the cache, block if someone else is already trying the match, +;; and cache the result if it's computed. +;; The cache maps nontermial+startingpos+iteration to a result, where +;; the iteration is 0 for the first match attempt, 1 for the second, +;; etc. +(define (parse-nt/share key min-cnt init-tokens stream depth end max-depth tasks success-k fail-k k) + (if (and (positive? min-cnt) + (pair? stream) + (not (memq (tok-name (car stream)) init-tokens))) + ;; No such leading token; give up + (fail-k max-depth tasks) + ;; Run pattern + (let loop ([n 0] + [success-k success-k] + [fail-k fail-k] + [max-depth max-depth] + [tasks tasks] + [k k]) + (let ([answer-key (gensym)] + [table-key (vector key depth n)] + [old-depth depth] + [old-stream stream]) + #;(printf "Loop ~a\n" table-key) + (cond + [(hash-ref (tasks-cache tasks) table-key (lambda () #f)) + => (lambda (result) + #;(printf "Reuse ~a\n" table-key) + (result success-k fail-k max-depth tasks))] + [else + #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + #;(printf "Wait ~a ~a\n" table-key answer-key) + (wait-for-answer #t max-depth tasks answer-key success-k fail-k + (lambda (max-depth tasks) + #;(printf "Deadlock ~a ~a\n" table-key answer-key) + (fail-k max-depth tasks))))) + (let result-loop ([max-depth max-depth][tasks tasks][k k]) + (letrec ([orig-stream stream] + [new-got-k + (lambda (val stream depth max-depth tasks next-k) + ;; Check whether we already have a result that consumed the same amount: + (let ([result-key (vector #f key old-depth depth)]) + (cond + [(hash-ref (tasks-cache tasks) result-key (lambda () #f)) + ;; Go for the next-result + (result-loop max-depth + tasks + (lambda (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks)))] + [else + #;(printf "Success ~a ~a\n" table-key + (map tok-name (let loop ([d old-depth][s old-stream]) + (if (= d depth) + null + (cons (car s) (loop (add1 d) (cdr s))))))) + (let ([next-k (lambda (success-k fail-k max-depth tasks) + (loop (add1 n) + success-k + fail-k + max-depth + tasks + (lambda (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks))))]) + (hash-set! (tasks-cache tasks) result-key #t) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + (success-k val stream depth max-depth tasks next-k))) + (report-answer-all answer-key + max-depth + tasks + (list val stream depth next-k) (lambda (max-depth tasks) - #;(printf "Deadlock ~a ~a\n" table-key answer-key) - (fail-k max-depth tasks))))) - (let result-loop ([max-depth max-depth][tasks tasks][k k]) - (letrec ([orig-stream stream] - [new-got-k - (lambda (val stream depth max-depth tasks next-k) - ;; Check whether we already have a result that consumed the same amount: - (let ([result-key (vector #f key old-depth depth)]) - (cond - [(hash-table-get (tasks-cache tasks) result-key (lambda () #f)) - ;; Go for the next-result - (result-loop max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks)))] - [else - #;(printf "Success ~a ~a\n" table-key - (map tok-name (let loop ([d old-depth][s old-stream]) - (if (= d depth) - null - (cons (car s) (loop (add1 d) (cdr s))))))) - (let ([next-k (lambda (success-k fail-k max-depth tasks) - (loop (add1 n) - success-k - fail-k - max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks))))]) - (hash-table-put! (tasks-cache tasks) result-key #t) - (hash-table-put! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (success-k val stream depth max-depth tasks next-k))) - (report-answer-all answer-key - max-depth - tasks - (list val stream depth next-k) - (lambda (max-depth tasks) - (success-k val stream depth max-depth tasks next-k))))])))] - [new-fail-k - (lambda (max-depth tasks) - #;(printf "Failure ~a\n" table-key) - (hash-table-put! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (fail-k max-depth tasks))) - (report-answer-all answer-key - max-depth - tasks - null - (lambda (max-depth tasks) - (fail-k max-depth tasks))))]) - (k end max-depth tasks new-got-k new-fail-k)))]))))) - - (define-syntax (cfg-parser stx) - (syntax-case stx () - [(_ clause ...) - (let ([clauses (syntax->list #'(clause ...))]) - (let-values ([(start grammar cfg-error parser-clauses) - (let ([all-toks (apply - append - (map (lambda (clause) - (syntax-case clause (tokens) - [(tokens t ...) - (apply - append - (map (lambda (t) - (let ([v (syntax-local-value t (lambda () #f))]) - (cond - [(terminals-def? v) - (map (lambda (v) - (cons v #f)) - (syntax->list (terminals-def-t v)))] - [(e-terminals-def? v) - (map (lambda (v) - (cons v #t)) - (syntax->list (e-terminals-def-t v)))] - [else null]))) - (syntax->list #'(t ...))))] - [_else null])) - clauses))] - [all-end-toks (apply - append - (map (lambda (clause) - (syntax-case clause (end) - [(end t ...) - (syntax->list #'(t ...))] - [_else null])) - clauses))]) - (let loop ([clauses clauses] - [cfg-start #f] - [cfg-grammar #f] - [cfg-error #f] - [src-pos? #f] - [parser-clauses null]) - (if (null? clauses) - (values cfg-start - cfg-grammar - cfg-error - (reverse parser-clauses)) - (syntax-case (car clauses) (start error grammar src-pos) - [(start tok) - (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] - [(error expr) - (loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] - [(grammar [nt [pat handle0 handle ...] ...] ...) - (let ([nts (make-bound-identifier-mapping)] - [toks (make-token-identifier-mapping)] - [end-toks (make-token-identifier-mapping)] - [nt-ids (syntax->list #'(nt ...))] - [patss (map (lambda (stx) - (map syntax->list (syntax->list stx))) - (syntax->list #'((pat ...) ...)))]) - (for-each (lambda (nt) - (bound-identifier-mapping-put! nts nt (list 0))) - nt-ids) - (for-each (lambda (t) - (token-identifier-mapping-put! end-toks t #t)) - all-end-toks) - (for-each (lambda (t) - (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) - (let ([id (gensym (syntax-e (car t)))]) - (token-identifier-mapping-put! toks (car t) - (cons id (cdr t)))))) - all-toks) - ;; Compute min max size for each non-term: - (nt-fixpoint - nts - (lambda (nt pats old-list) - (let ([new-cnt - (apply - min - (map (lambda (pat) - (apply - + - (map (lambda (elem) - (car - (bound-identifier-mapping-get nts - elem - (lambda () (list 1))))) - pat))) - pats))]) - (if (new-cnt . > . (car old-list)) - (cons new-cnt (cdr old-list)) - old-list))) - nt-ids patss) - ;; Compute set of toks that must appear at the beginning - ;; for a non-terminal - (nt-fixpoint - nts - (lambda (nt pats old-list) - (let ([new-list - (apply - append - (map (lambda (pat) - (let loop ([pat pat]) - (if (pair? pat) - (let ([l (bound-identifier-mapping-get - nts - (car pat) - (lambda () - (list 1 (map-token toks (car pat)))))]) - ;; If the non-terminal can match 0 things, - ;; then it might match something from the - ;; next pattern element. Otherwise, it must - ;; match the first element: - (if (zero? (car l)) - (append (cdr l) (loop (cdr pat))) - (cdr l))) - null))) - pats))]) - (let ([new (filter (lambda (id) - (andmap (lambda (id2) - (not (eq? id id2))) - (cdr old-list))) - new-list)]) - (if (pair? new) - ;; Drop dups in new list: - (let ([new (let loop ([new new]) - (if (null? (cdr new)) - new - (if (ormap (lambda (id) - (eq? (car new) id)) - (cdr new)) - (loop (cdr new)) - (cons (car new) (loop (cdr new))))))]) - (cons (car old-list) (append new (cdr old-list)))) - old-list)))) - nt-ids patss) - ;; Determine left-recursive clauses: - (for-each (lambda (nt pats) - (let ([l (bound-identifier-mapping-get nts nt)]) - (bound-identifier-mapping-put! nts nt (list (car l) - (cdr l) - (map (lambda (x) #f) pats))))) - nt-ids patss) - (nt-fixpoint - nts - (lambda (nt pats old-list) - (list (car old-list) - (cadr old-list) - (map (lambda (pat simple?) - (or simple? - (let ([l (map (lambda (elem) - (bound-identifier-mapping-get - nts - elem - (lambda () #f))) - pat)]) - (andmap (lambda (i) - (or (not i) - (andmap values (caddr i)))) - l)))) - pats (caddr old-list)))) - nt-ids patss) - ;; Build a definition for each non-term: - (loop (cdr clauses) - cfg-start - (map (lambda (nt pats handles $ctxs) - (define info (bound-identifier-mapping-get nts nt)) - (list nt - #`(let ([key (gensym '#,nt)]) - (lambda (stream depth end success-k fail-k max-depth tasks) - (parse-nt/share - key #,(car info) '#,(cadr info) stream depth end - max-depth tasks - success-k fail-k - (lambda (end max-depth tasks success-k fail-k) - #,(let loop ([pats pats] - [handles (syntax->list handles)] - [$ctxs (syntax->list $ctxs)] - [simple?s (caddr info)]) - (if (null? pats) - #'(fail-k max-depth tasks) - #`(#,(if (or (null? (cdr pats)) - (car simple?s)) - #'parse-or - #'parse-parallel-or) - (lambda (stream depth end success-k fail-k max-depth tasks) - #,(build-match nts - toks - (car pats) - (car handles) - (car $ctxs))) - (lambda (stream depth end success-k fail-k max-depth tasks) - #,(loop (cdr pats) - (cdr handles) - (cdr $ctxs) - (cdr simple?s))) - stream depth end success-k fail-k max-depth tasks))))))))) - nt-ids - patss - (syntax->list #'(((begin handle0 handle ...) ...) ...)) - (syntax->list #'((handle0 ...) ...))) - cfg-error - src-pos? - (list* - (with-syntax ([((tok tok-id . $e) ...) - (token-identifier-mapping-map toks - (lambda (k v) - (list* k - (car v) - (if (cdr v) - #f - '$1))))] - [(pos ...) - (if src-pos? - #'($1-start-pos $1-end-pos) - #'(#f #f))]) - #`(grammar (start [() null] - [(atok start) (cons $1 $2)]) - (atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) - #`(start start) - parser-clauses)))] - [(grammar . _) - (raise-syntax-error - #f - "bad grammar clause" - stx - (car clauses))] - [(src-pos) + (success-k val stream depth max-depth tasks next-k))))])))] + [new-fail-k + (lambda (max-depth tasks) + #;(printf "Failure ~a\n" table-key) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + (fail-k max-depth tasks))) + (report-answer-all answer-key + max-depth + tasks + null + (lambda (max-depth tasks) + (fail-k max-depth tasks))))]) + (k end max-depth tasks new-got-k new-fail-k)))]))))) + +(define-syntax (cfg-parser stx) + (syntax-case stx () + [(_ clause ...) + (let ([clauses (syntax->list #'(clause ...))]) + (let-values ([(start grammar cfg-error parser-clauses) + (let ([all-toks (apply + append + (map (lambda (clause) + (syntax-case clause (tokens) + [(tokens t ...) + (apply + append + (map (lambda (t) + (let ([v (syntax-local-value t (lambda () #f))]) + (cond + [(terminals-def? v) + (map (lambda (v) + (cons v #f)) + (syntax->list (terminals-def-t v)))] + [(e-terminals-def? v) + (map (lambda (v) + (cons v #t)) + (syntax->list (e-terminals-def-t v)))] + [else null]))) + (syntax->list #'(t ...))))] + [_else null])) + clauses))] + [all-end-toks (apply + append + (map (lambda (clause) + (syntax-case clause (end) + [(end t ...) + (syntax->list #'(t ...))] + [_else null])) + clauses))]) + (let loop ([clauses clauses] + [cfg-start #f] + [cfg-grammar #f] + [cfg-error #f] + [src-pos? #f] + [parser-clauses null]) + (if (null? clauses) + (values cfg-start + cfg-grammar + cfg-error + (reverse parser-clauses)) + (syntax-case (car clauses) (start error grammar src-pos) + [(start tok) + (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] + [(error expr) + (loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] + [(grammar [nt [pat handle0 handle ...] ...] ...) + (let ([nts (make-bound-identifier-mapping)] + [toks (make-token-identifier-mapping)] + [end-toks (make-token-identifier-mapping)] + [nt-ids (syntax->list #'(nt ...))] + [patss (map (lambda (stx) + (map syntax->list (syntax->list stx))) + (syntax->list #'((pat ...) ...)))]) + (for-each (lambda (nt) + (bound-identifier-mapping-put! nts nt (list 0))) + nt-ids) + (for-each (lambda (t) + (token-identifier-mapping-put! end-toks t #t)) + all-end-toks) + (for-each (lambda (t) + (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) + (let ([id (gensym (syntax-e (car t)))]) + (token-identifier-mapping-put! toks (car t) + (cons id (cdr t)))))) + all-toks) + ;; Compute min max size for each non-term: + (nt-fixpoint + nts + (lambda (nt pats old-list) + (let ([new-cnt + (apply + min + (map (lambda (pat) + (apply + + + (map (lambda (elem) + (car + (bound-identifier-mapping-get nts + elem + (lambda () (list 1))))) + pat))) + pats))]) + (if (new-cnt . > . (car old-list)) + (cons new-cnt (cdr old-list)) + old-list))) + nt-ids patss) + ;; Compute set of toks that must appear at the beginning + ;; for a non-terminal + (nt-fixpoint + nts + (lambda (nt pats old-list) + (let ([new-list + (apply + append + (map (lambda (pat) + (let loop ([pat pat]) + (if (pair? pat) + (let ([l (bound-identifier-mapping-get + nts + (car pat) + (lambda () + (list 1 (map-token toks (car pat)))))]) + ;; If the non-terminal can match 0 things, + ;; then it might match something from the + ;; next pattern element. Otherwise, it must + ;; match the first element: + (if (zero? (car l)) + (append (cdr l) (loop (cdr pat))) + (cdr l))) + null))) + pats))]) + (let ([new (filter (lambda (id) + (andmap (lambda (id2) + (not (eq? id id2))) + (cdr old-list))) + new-list)]) + (if (pair? new) + ;; Drop dups in new list: + (let ([new (let loop ([new new]) + (if (null? (cdr new)) + new + (if (ormap (lambda (id) + (eq? (car new) id)) + (cdr new)) + (loop (cdr new)) + (cons (car new) (loop (cdr new))))))]) + (cons (car old-list) (append new (cdr old-list)))) + old-list)))) + nt-ids patss) + ;; Determine left-recursive clauses: + (for-each (lambda (nt pats) + (let ([l (bound-identifier-mapping-get nts nt)]) + (bound-identifier-mapping-put! nts nt (list (car l) + (cdr l) + (map (lambda (x) #f) pats))))) + nt-ids patss) + (nt-fixpoint + nts + (lambda (nt pats old-list) + (list (car old-list) + (cadr old-list) + (map (lambda (pat simple?) + (or simple? + (let ([l (map (lambda (elem) + (bound-identifier-mapping-get + nts + elem + (lambda () #f))) + pat)]) + (andmap (lambda (i) + (or (not i) + (andmap values (caddr i)))) + l)))) + pats (caddr old-list)))) + nt-ids patss) + ;; Build a definition for each non-term: (loop (cdr clauses) - cfg-start - cfg-grammar - cfg-error - #t - (cons (car clauses) parser-clauses))] - [_else - (loop (cdr clauses) - cfg-start - cfg-grammar - cfg-error + cfg-start + (map (lambda (nt pats handles $ctxs) + (define info (bound-identifier-mapping-get nts nt)) + (list nt + #`(let ([key (gensym '#,nt)]) + (lambda (stream depth end success-k fail-k max-depth tasks) + (parse-nt/share + key #,(car info) '#,(cadr info) stream depth end + max-depth tasks + success-k fail-k + (lambda (end max-depth tasks success-k fail-k) + #,(let loop ([pats pats] + [handles (syntax->list handles)] + [$ctxs (syntax->list $ctxs)] + [simple?s (caddr info)]) + (if (null? pats) + #'(fail-k max-depth tasks) + #`(#,(if (or (null? (cdr pats)) + (car simple?s)) + #'parse-or + #'parse-parallel-or) + (lambda (stream depth end success-k fail-k max-depth tasks) + #,(build-match nts + toks + (car pats) + (car handles) + (car $ctxs))) + (lambda (stream depth end success-k fail-k max-depth tasks) + #,(loop (cdr pats) + (cdr handles) + (cdr $ctxs) + (cdr simple?s))) + stream depth end success-k fail-k max-depth tasks))))))))) + nt-ids + patss + (syntax->list #'(((begin handle0 handle ...) ...) ...)) + (syntax->list #'((handle0 ...) ...))) + cfg-error src-pos? - (cons (car clauses) parser-clauses))]))))]) - #`(let ([orig-parse (parser - [error (lambda (a b c) - (error 'cfg-parser "unexpected ~a token: ~a" b c))] - . #,parser-clauses)] - [error-proc #,cfg-error]) - (letrec #,grammar - (lambda (get-tok) - (let ([tok-list (orig-parse get-tok)]) - (letrec ([success-k - (lambda (val stream depth max-depth tasks next) - (if (null? stream) - val - (next success-k fail-k max-depth tasks)))] - [fail-k (lambda (max-depth tasks) - (let ([bad-tok (list-ref tok-list - (min (sub1 (length tok-list)) - max-depth))]) - (if error-proc - (error-proc #t - (tok-orig-name bad-tok) - (tok-val bad-tok) - (tok-start bad-tok) - (tok-end bad-tok)) - (error - 'cfg-parse - "failed at ~a" - (tok-val bad-tok)))))]) - (#,start tok-list 0 - (length tok-list) - success-k - fail-k - 0 (make-tasks null null - (make-hash-table) (make-hash-table) - (make-hash-table 'equal) #t)))))))))])) + (list* + (with-syntax ([((tok tok-id . $e) ...) + (token-identifier-mapping-map toks + (lambda (k v) + (list* k + (car v) + (if (cdr v) + #f + '$1))))] + [(pos ...) + (if src-pos? + #'($1-start-pos $1-end-pos) + #'(#f #f))]) + #`(grammar (start [() null] + [(atok start) (cons $1 $2)]) + (atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) + #`(start start) + parser-clauses)))] + [(grammar . _) + (raise-syntax-error + #f + "bad grammar clause" + stx + (car clauses))] + [(src-pos) + (loop (cdr clauses) + cfg-start + cfg-grammar + cfg-error + #t + (cons (car clauses) parser-clauses))] + [_else + (loop (cdr clauses) + cfg-start + cfg-grammar + cfg-error + src-pos? + (cons (car clauses) parser-clauses))]))))]) + #`(let ([orig-parse (parser + [error (lambda (a b c) + (error 'cfg-parser "unexpected ~a token: ~a" b c))] + . #,parser-clauses)] + [error-proc #,cfg-error]) + (letrec #,grammar + (lambda (get-tok) + (let ([tok-list (orig-parse get-tok)]) + (letrec ([success-k + (lambda (val stream depth max-depth tasks next) + (if (null? stream) + val + (next success-k fail-k max-depth tasks)))] + [fail-k (lambda (max-depth tasks) + (let ([bad-tok (list-ref tok-list + (min (sub1 (length tok-list)) + max-depth))]) + (if error-proc + (error-proc #t + (tok-orig-name bad-tok) + (tok-val bad-tok) + (tok-start bad-tok) + (tok-end bad-tok)) + (error + 'cfg-parse + "failed at ~a" + (tok-val bad-tok)))))]) + (#,start tok-list 0 + (length tok-list) + success-k + fail-k + 0 (make-tasks null null + (make-hasheq) (make-hasheq) + (make-hash) #t)))))))))])) -#| - ;; Tests used during development +(module* test racket/base + (require (submod "..") + parser-tools/lex) + + ;; Tests used during development (define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF)) (define lex @@ -767,8 +769,10 @@ ["*" (token-STAR '*)] ["|" (token-BAR '||)] [":" (token-COLON '|:|)] + [whitespace (lex input-port)] [(eof) (token-EOF 'eof)])) + (define parse (cfg-parser (tokens non-terminals) @@ -790,13 +794,12 @@ (define (result) (let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**" - #;"+*|+**|-" #;"-|-*|-|-*" - #;"-|-*|-|-**|-|-*|-|-**" - "-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-***\ - |-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|\ + #;"+*|+**|-" #;"-|-*|-|-*" + #;"-|-*|-|-**|-|-*|-|-**" + "-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-*** + |-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****| -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****" - ;; This one fails: - #;"+*")]) + ;; This one fails: + #;"+*")]) (time (parse (lambda () (lex p)))))) -|# - ) + (result)) diff --git a/collects/parser-tools/private-lex/token.rkt b/collects/parser-tools/private-lex/token.rkt index 1d2dc0d..6618a57 100644 --- a/collects/parser-tools/private-lex/token.rkt +++ b/collects/parser-tools/private-lex/token.rkt @@ -83,7 +83,7 @@ (define-syntax define-tokens (make-define-tokens #f)) (define-syntax define-empty-tokens (make-define-tokens #t)) - (define-struct position (offset line col)) - (define-struct position-token (token start-pos end-pos)) + (define-struct position (offset line col) #f) + (define-struct position-token (token start-pos end-pos) #f) ) diff --git a/collects/parser-tools/private-yacc/graph.rkt b/collects/parser-tools/private-yacc/graph.rkt index 02e28df..958acc1 100644 --- a/collects/parser-tools/private-yacc/graph.rkt +++ b/collects/parser-tools/private-yacc/graph.rkt @@ -12,55 +12,50 @@ ;; We use a hash-table to represent the result function 'a -> 'b set, so ;; the values of type 'a must be comparable with eq?. (define (digraph nodes edges f- union fail) - (letrec ( - ;; Will map elements of 'a to 'b sets - (results (make-hash-table)) - (f (lambda (x) (hash-table-get results x fail))) - - ;; Maps elements of 'a to integers. - (N (make-hash-table)) - (get-N (lambda (x) (hash-table-get N x zero-thunk))) - (set-N (lambda (x d) (hash-table-put! N x d))) - - (stack null) - (push (lambda (x) - (set! stack (cons x stack)))) - (pop (lambda () + (letrec [ + ;; Will map elements of 'a to 'b sets + (results (make-hash-table)) + (f (lambda (x) (hash-table-get results x fail))) + + ;; Maps elements of 'a to integers. + (N (make-hash-table)) + (get-N (lambda (x) (hash-table-get N x zero-thunk))) + (set-N (lambda (x d) (hash-table-put! N x d))) + + (stack null) + (push (lambda (x) + (set! stack (cons x stack)))) + (pop (lambda () (begin0 - (car stack) - (set! stack (cdr stack))))) - (depth (lambda () (length stack))) - - ;; traverse: 'a -> - (traverse - (lambda (x) - (push x) - (let ((d (depth))) - (set-N x d) - (hash-table-put! results x (f- x)) - (for-each (lambda (y) - (if (= 0 (get-N y)) - (traverse y)) - (hash-table-put! results - x - (union (f x) (f y))) - (set-N x (min (get-N x) (get-N y)))) - (edges x)) - (if (= d (get-N x)) - (let loop ((p (pop))) - (set-N p +inf.0) - (hash-table-put! results p (f x)) - (if (not (eq? x p)) - (loop (pop))))))))) + (car stack) + (set! stack (cdr stack))))) + (depth (lambda () (length stack))) + + ;; traverse: 'a -> + (traverse + (lambda (x) + (push x) + (let ((d (depth))) + (set-N x d) + (hash-table-put! results x (f- x)) + (for-each (lambda (y) + (if (= 0 (get-N y)) + (traverse y)) + (hash-table-put! results + x + (union (f x) (f y))) + (set-N x (min (get-N x) (get-N y)))) + (edges x)) + (if (= d (get-N x)) + (let loop ((p (pop))) + (set-N p +inf.0) + (hash-table-put! results p (f x)) + (if (not (eq? x p)) + (loop (pop))))))))] (for-each (lambda (x) - (if (= 0 (get-N x)) - (traverse x))) - nodes) + (if (= 0 (get-N x)) + (traverse x))) + nodes) f)) ) - - - - - diff --git a/collects/parser-tools/private-yacc/lalr.rkt b/collects/parser-tools/private-yacc/lalr.rkt index 3fb1953..e9b4d3b 100644 --- a/collects/parser-tools/private-yacc/lalr.rkt +++ b/collects/parser-tools/private-yacc/lalr.rkt @@ -38,7 +38,7 @@ ;; output term set is represented in bit-vector form (define (compute-read a g) (let* ((dr (compute-DR a g)) - (reads (compute-reads a g))) + (reads (compute-reads a g))) (digraph-tk->terml (send a get-mapped-non-term-keys) reads dr @@ -127,13 +127,12 @@ ;; output term set is represented in bit-vector form (define (compute-LA a g) (let* ((includes (compute-includes a g)) - (lookback (compute-lookback a g)) - (follow (compute-follow a g includes))) + (lookback (compute-lookback a g)) + (follow (compute-follow a g includes))) (lambda (k p) - (let* ((l (lookback k p)) - (f (map follow l))) - (apply bitwise-ior (cons 0 f)))))) - + (let* ((l (lookback k p)) + (f (map follow l))) + (apply bitwise-ior (cons 0 f)))))) (define (print-DR dr a g) (print-input-st-sym dr "DR" a g print-output-terms)) @@ -192,8 +191,8 @@ (map (lambda (p) (list - (kernel-index (trans-key-st p)) - (gram-sym-symbol (trans-key-gs p)))) + (kernel-index (trans-key-st p)) + (gram-sym-symbol (trans-key-gs p)))) r)) ;; init-tk-map : int -> (vectorof hashtable?) @@ -230,52 +229,49 @@ ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} ;; A specialization of digraph in the file graph.rkt (define (digraph-tk->terml nodes edges f- num-states) - (letrec ( - ;; Will map elements of trans-key to term sets represented as bit vectors - (results (init-tk-map num-states)) + (letrec [ + ;; Will map elements of trans-key to term sets represented as bit vectors + (results (init-tk-map num-states)) - ;; Maps elements of trans-keys to integers. - (N (init-tk-map num-states)) + ;; Maps elements of trans-keys to integers. + (N (init-tk-map num-states)) (get-N (lookup-tk-map N)) - (set-N (add-tk-map N)) - (get-f (lookup-tk-map results)) - (set-f (add-tk-map results)) - - (stack null) - (push (lambda (x) - (set! stack (cons x stack)))) - (pop (lambda () + (set-N (add-tk-map N)) + (get-f (lookup-tk-map results)) + (set-f (add-tk-map results)) + + (stack null) + (push (lambda (x) + (set! stack (cons x stack)))) + (pop (lambda () (begin0 - (car stack) - (set! stack (cdr stack))))) - (depth (lambda () (length stack))) + (car stack) + (set! stack (cdr stack))))) + (depth (lambda () (length stack))) - ;; traverse: 'a -> - (traverse - (lambda (x) - (push x) - (let ((d (depth))) - (set-N x d) - (set-f x (f- x)) - (for-each (lambda (y) - (when (= 0 (get-N y)) + ;; traverse: 'a -> + (traverse + (lambda (x) + (push x) + (let ((d (depth))) + (set-N x d) + (set-f x (f- x)) + (for-each (lambda (y) + (when (= 0 (get-N y)) (traverse y)) - (set-f x (bitwise-ior (get-f x) (get-f y))) - (set-N x (min (get-N x) (get-N y)))) - (edges x)) - (when (= d (get-N x)) + (set-f x (bitwise-ior (get-f x) (get-f y))) + (set-N x (min (get-N x) (get-N y)))) + (edges x)) + (when (= d (get-N x)) (let loop ((p (pop))) (set-N p +inf.0) (set-f p (get-f x)) (unless (equal? x p) - (loop (pop))))))))) + (loop (pop))))))))] (for-each (lambda (x) - (when (= 0 (get-N x)) + (when (= 0 (get-N x)) (traverse x))) - nodes) + nodes) get-f)) ) - - - diff --git a/collects/parser-tools/private-yacc/lr0.rkt b/collects/parser-tools/private-yacc/lr0.rkt index ac359ac..eb0b2da 100644 --- a/collects/parser-tools/private-yacc/lr0.rkt +++ b/collects/parser-tools/private-yacc/lr0.rkt @@ -62,9 +62,9 @@ ;; (listof (cons/c trans-key? (listof kernel?))) (define (reverse-assoc assoc) (let ((reverse-hash (make-hash-table 'equal)) - (hash-table-add! - (lambda (ht k v) - (hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null))))))) + (hash-table-add! + (lambda (ht k v) + (hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null))))))) (for-each (lambda (trans-key/kernel) (let ((tk (car trans-key/kernel))) @@ -99,13 +99,13 @@ (define mapped-non-terms (map car non-term-assoc)) (define/public (get-mapped-non-term-keys) - mapped-non-terms) + mapped-non-terms) (define/public (get-num-states) (vector-length states)) (define/public (get-epsilon-trans) - epsilons) + epsilons) (define/public (get-transitions) (append term-assoc non-term-assoc)) @@ -113,12 +113,12 @@ ;; for-each-state : (state ->) -> ;; Iteration over the states in an automaton (define/public (for-each-state f) - (let ((num-states (vector-length states))) - (let loop ((i 0)) - (if (< i num-states) - (begin - (f (vector-ref states i)) - (loop (add1 i))))))) + (let ((num-states (vector-length states))) + (let loop ((i 0)) + (if (< i num-states) + (begin + (f (vector-ref states i)) + (loop (add1 i))))))) ;; run-automaton: kernel? gram-sym? -> (union kernel #f) ;; returns the state reached from state k on input s, or #f when k @@ -131,28 +131,28 @@ ;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel) ;; returns the list of states that can reach k by transitioning on s. (define/public (run-automaton-back k s) - (apply append + (apply append (map (lambda (k) (hash-table-get (vector-ref reverse-transitions (kernel-index k)) (gram-sym-symbol s) (lambda () null))) k))))) - + (define (union comp (eq? a b) (define (kernel->string k) (apply string-append - `("{" ,@(map (lambda (i) (string-append (item->string i) ", ")) - (kernel-items k)) - "}"))) + `("{" ,@(map (lambda (i) (string-append (item->string i) ", ")) + (kernel-items k)) + "}"))) ;; build-LR0-automaton: grammar -> LR0-automaton ;; Constructs the kernels of the sets of LR(0) items of g (define (build-lr0-automaton grammar) ; (printf "LR(0) automaton:\n") (letrec ( - (epsilons (make-hash-table 'equal)) - (grammar-symbols (append (send grammar get-non-terms) + (epsilons (make-hash-table 'equal)) + (grammar-symbols (append (send grammar get-non-terms) (send grammar get-terms))) - ;; first-non-term: non-term -> non-term list - ;; given a non-terminal symbol C, return those non-terminal - ;; symbols A s.t. C -> An for some string of terminals and - ;; non-terminals n where -> means a rightmost derivation in many - ;; steps. Assumes that each non-term can be reduced to a string - ;; of terms. - (first-non-term - (digraph (send grammar get-non-terms) - (lambda (nt) - (filter non-term? - (map (lambda (prod) - (sym-at-dot (make-item prod 0))) - (send grammar get-prods-for-non-term nt)))) - (lambda (nt) (list nt)) - (union non-term non-term list + ;; given a non-terminal symbol C, return those non-terminal + ;; symbols A s.t. C -> An for some string of terminals and + ;; non-terminals n where -> means a rightmost derivation in many + ;; steps. Assumes that each non-term can be reduced to a string + ;; of terms. + (first-non-term + (digraph (send grammar get-non-terms) + (lambda (nt) + (filter non-term? + (map (lambda (prod) + (sym-at-dot (make-item prod 0))) + (send grammar get-prods-for-non-term nt)))) + (lambda (nt) (list nt)) + (union non-term LR1-item list - ;; Creates a set of items containing i s.t. if A -> n.Xm is in it, - ;; X -> .o is in it too. - (LR0-closure - (lambda (i) - (cond - ((null? i) null) - (else - (let ((next-gsym (sym-at-dot (car i)))) - (cond - ((non-term? next-gsym) - (cons (car i) - (append - (apply append - (map (lambda (non-term) - (map (lambda (x) - (make-item x 0)) - (send grammar + ;; closure: LR1-item list -> LR1-item list + ;; Creates a set of items containing i s.t. if A -> n.Xm is in it, + ;; X -> .o is in it too. + (LR0-closure + (lambda (i) + (cond + ((null? i) null) + (else + (let ((next-gsym (sym-at-dot (car i)))) + (cond + ((non-term? next-gsym) + (cons (car i) + (append + (apply append + (map (lambda (non-term) + (map (lambda (x) + (make-item x 0)) + (send grammar get-prods-for-non-term non-term))) - (first-non-term next-gsym))) - (LR0-closure (cdr i))))) - (else - (cons (car i) (LR0-closure (cdr i)))))))))) + (first-non-term next-gsym))) + (LR0-closure (cdr i))))) + (else + (cons (car i) (LR0-closure (cdr i)))))))))) - ;; maps trans-keys to kernels - (automaton-term null) + ;; maps trans-keys to kernels + (automaton-term null) (automaton-non-term null) - - ;; keeps the kernels we have seen, so we can have a unique - ;; list for each kernel - (kernels (make-hash-table 'equal)) + + ;; keeps the kernels we have seen, so we can have a unique + ;; list for each kernel + (kernels (make-hash-table 'equal)) - (counter 0) - - ;; goto: LR1-item list -> LR1-item list list - ;; creates new kernels by moving the dot in each item in the - ;; LR0-closure of kernel to the right, and grouping them by - ;; the term/non-term moved over. Returns the kernels not - ;; yet seen, and places the trans-keys into automaton - (goto - (lambda (kernel) - (let ( - ;; maps a gram-syms to a list of items - (table (make-hash-table)) + (counter 0) + + ;; goto: LR1-item list -> LR1-item list list + ;; creates new kernels by moving the dot in each item in the + ;; LR0-closure of kernel to the right, and grouping them by + ;; the term/non-term moved over. Returns the kernels not + ;; yet seen, and places the trans-keys into automaton + (goto + (lambda (kernel) + (let ( + ;; maps a gram-syms to a list of items + (table (make-hash-table)) - ;; add-item!: - ;; (symbol (listof item) hashtable) item? -> - ;; adds i into the table grouped with the grammar - ;; symbol following its dot - (add-item! - (lambda (table i) - (let ((gs (sym-at-dot i))) - (cond - (gs - (let ((already + ;; add-item!: + ;; (symbol (listof item) hashtable) item? -> + ;; adds i into the table grouped with the grammar + ;; symbol following its dot + (add-item! + (lambda (table i) + (let ((gs (sym-at-dot i))) + (cond + (gs + (let ((already (hash-table-get table (gram-sym-symbol gs) (lambda () null)))) - (unless (member i already) + (unless (member i already) (hash-table-put! table (gram-sym-symbol gs) (cons i already))))) - ((= 0 (vector-length (prod-rhs (item-prod i)))) - (let ((current (hash-table-get epsilons - kernel - (lambda () null)))) - (hash-table-put! epsilons - kernel - (cons i current))))))))) - - ;; Group the items of the LR0 closure of the kernel - ;; by the character after the dot - (for-each (lambda (item) - (add-item! table item)) - (LR0-closure (kernel-items kernel))) - - ;; each group is a new kernel, with the dot advanced. - ;; sorts the items in a kernel so kernels can be compared - ;; with equal? for using the table kernels to make sure - ;; only one representitive of each kernel is created - (filter - (lambda (x) x) - (map - (lambda (i) - (let* ((gs (car i)) - (items (cadr i)) - (new #f) - (new-kernel (sort - (filter (lambda (x) x) - (map move-dot-right items)) - item ~a on ~a\n" - (kernel->string kernel) - (kernel->string unique-kernel) - (gram-sym-symbol gs)) - (if new - unique-kernel - #f))) - (let loop ((gsyms grammar-symbols)) - (cond + (kernel->string kernel) + (kernel->string unique-kernel) + (gram-sym-symbol gs)) + (if new + unique-kernel + #f))) + (let loop ((gsyms grammar-symbols)) + (cond ((null? gsyms) null) (else (let ((items (hash-table-get table @@ -323,33 +323,33 @@ (else (cons (list (car gsyms) items) (loop (cdr gsyms)))))))))))))) - + (starts (map (lambda (init-prod) (list (make-item init-prod 0))) (send grammar get-init-prods))) - (startk + (startk (map (lambda (start) (let ((k (make-kernel start counter))) (hash-table-put! kernels start k) (set! counter (add1 counter)) k)) starts)) - (new-kernels (make-queue))) + (new-kernels (make-queue))) (let loop ((old-kernels startk) - (seen-kernels null)) - (cond - ((and (empty-queue? new-kernels) (null? old-kernels)) - (make-object lr0% - automaton-term - automaton-non-term - (list->vector (reverse seen-kernels)) - epsilons)) - ((null? old-kernels) - (loop (deq! new-kernels) seen-kernels)) - (else - (enq! new-kernels (goto (car old-kernels))) - (loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))))))) + (seen-kernels null)) + (cond + ((and (empty-queue? new-kernels) (null? old-kernels)) + (make-object lr0% + automaton-term + automaton-non-term + (list->vector (reverse seen-kernels)) + epsilons)) + ((null? old-kernels) + (loop (deq! new-kernels) seen-kernels)) + (else + (enq! new-kernels (goto (car old-kernels))) + (loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))))))) (define-struct q (f l) (make-inspector)) (define (empty-queue? q) @@ -358,12 +358,12 @@ (make-q null null)) (define (enq! q i) (if (empty-queue? q) - (let ((i (mcons i null))) - (set-q-l! q i) - (set-q-f! q i)) - (begin - (set-mcdr! (q-l q) (mcons i null)) - (set-q-l! q (mcdr (q-l q)))))) + (let ((i (mcons i null))) + (set-q-l! q i) + (set-q-f! q i)) + (begin + (set-mcdr! (q-l q) (mcons i null)) + (set-q-l! q (mcdr (q-l q)))))) (define (deq! q) (begin0 (mcar (q-f q))