diff --git a/collects/algol60/cfg-parser.rkt b/collects/algol60/cfg-parser.rkt index 0d56f8e..6473583 100644 --- a/collects/algol60/cfg-parser.rkt +++ b/collects/algol60/cfg-parser.rkt @@ -1,921 +1,7 @@ #lang racket/base -;; This module implements a parser form like the parser-tools's -;; `parser', except that it works on an arbitrary CFG (returning -;; the first sucecssful parse). -;; I'm pretty sure that this is an implementation of Earley's -;; algorithm. +;; Legacy module. cfg-parser used to live in the algol60 collection. +;; This module re-exports its contents for backwards compatibility. -;; To a first approximation, it's a backtracking parser. Alternative -;; for a non-terminal are computed in parallel, and multiple attempts -;; to compute the same result block until the first one completes. If -;; you get into deadlock, such as when trying to match -;; := -;; then it means that there's no successful parse, so everything -;; that's blocked fails. - -;; A cache holds the series of results for a particular non-terminal -;; at a particular starting location. (A series is used, instead of a -;; sinlge result, for backtracking.) Otherwise, the parser uses -;; backtracking search. Backtracking is implemented through explicit -;; success and failure continuations. Multiple results for a -;; particular nonterminal and location are kept only when they have -;; different lengths. (Otherwise, in the spirit of finding one -;; successful parse, only the first result is kept.) - -;; The parser-tools's `parse' is used to transform tokens in the -;; grammar to tokens specific to this parser. In other words, this -;; parser uses `parser' so that it doesn't have to know anything about -;; tokens. -;; - - - -(require parser-tools/yacc - parser-tools/lex) - -(require (for-syntax racket/base - syntax/boundmap - parser-tools/private-lex/token-syntax)) - -(provide cfg-parser) - -;; A raw token, wrapped so that we can recognize it: -(define-struct tok (name orig-name val start end)) - -;; Represents the thread scheduler: -(define-struct tasks (active active-back waits multi-waits cache progress?)) - -(define-for-syntax make-token-identifier-mapping make-hasheq) -(define-for-syntax token-identifier-mapping-get - (case-lambda - [(t tok) - (hash-ref t (syntax-e tok))] - [(t tok fail) - (hash-ref t (syntax-e tok) fail)])) -(define-for-syntax token-identifier-mapping-put! - (lambda (t tok v) - (hash-set! t (syntax-e tok) v))) -(define-for-syntax token-identifier-mapping-map - (lambda (t f) - (hash-map t f))) - -;; Used to calculate information on the grammar, such as whether -;; a particular non-terminal is "simple" instead of recursively defined. -(define-for-syntax (nt-fixpoint nts proc nt-ids patss) - (define (ormap-all val f as bs) - (cond - [(null? as) val] - [else (ormap-all (or (f (car as) (car bs)) val) - f - (cdr as) (cdr bs))])) - (let loop () - (when (ormap-all #f - (lambda (nt pats) - (let ([old (bound-identifier-mapping-get nts nt)]) - (let ([new (proc nt pats old)]) - (if (equal? old new) - #f - (begin - (bound-identifier-mapping-put! nts nt new) - #t))))) - nt-ids patss) - (loop)))) - -;; Tries parse-a followed by parse-b. If parse-a is not simple, -;; then after parse-a succeeds once, we parallelize parse-b -;; and trying a second result for parse-a. -(define (parse-and simple-a? parse-a parse-b - stream last-consumed-token depth end success-k fail-k - max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream last-consumed-token depth max-depth tasks next1-k) - (if simple-a? - (parse-b val stream last-consumed-token depth end - (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks) - (parallel-or - (lambda (success-k fail-k max-depth tasks) - (parse-b val stream last-consumed-token depth end - success-k fail-k - max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k max-depth tasks)) - success-k fail-k max-depth tasks))))] - [mk-got2-k - (lambda (success-k fail-k next1-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (success-k val stream last-consumed-token depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks)))))] - [mk-fail2-k - (lambda (success-k fail-k next1-k) - (lambda (max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k - max-depth - tasks)))]) - (parse-a stream last-consumed-token depth end - (mk-got-k success-k fail-k) - fail-k - max-depth tasks))) - -;; Parallel or for non-terminal alternatives -(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) - (parallel-or (lambda (success-k fail-k max-depth tasks) - (parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) - (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)) - success-k fail-k max-depth tasks)) - -;; Generic parallel-or -(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) - (define answer-key (gensym)) - (letrec ([gota-k - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (report-answer answer-key - max-depth - tasks - (list val stream last-consumed-token depth next-k)))] - [faila-k - (lambda (max-depth tasks) - (report-answer answer-key - max-depth - tasks - null))]) - (let* ([tasks (queue-task - tasks - (lambda (max-depth tasks) - (parse-a gota-k - faila-k - max-depth tasks)))] - [tasks (queue-task - tasks - (lambda (max-depth tasks) - (parse-b gota-k - faila-k - max-depth tasks)))] - [queue-next (lambda (next-k tasks) - (queue-task tasks - (lambda (max-depth tasks) - (next-k gota-k - faila-k - max-depth tasks))))]) - (letrec ([mk-got-one - (lambda (immediate-next? get-nth success-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (let ([tasks (if immediate-next? - (queue-next next-k tasks) - tasks)]) - (success-k val stream last-consumed-token depth max-depth - tasks - (lambda (success-k fail-k max-depth tasks) - (let ([tasks (if immediate-next? - tasks - (queue-next next-k tasks))]) - (get-nth max-depth tasks success-k fail-k)))))))] - [get-first - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #t get-first success-k) - (lambda (max-depth tasks) - (get-second max-depth tasks success-k fail-k)) - #f))] - [get-second - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #f get-second success-k) - fail-k #f))]) - (get-first max-depth tasks success-k fail-k))))) - -;; Non-terminal alternatives where the first is "simple" can be done -;; sequentially, which is simpler -(define (parse-or parse-a parse-b - stream last-consumed-token depth end success-k fail-k max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (success-k val stream last-consumed-token depth - max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks)))))] - [mk-fail-k - (lambda (success-k fail-k) - (lambda (max-depth tasks) - (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))]) - (parse-a stream last-consumed-token depth end - (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks))) - -;; Starts a thread -(define queue-task - (lambda (tasks t [progress? #t]) - (make-tasks (tasks-active tasks) - (cons t (tasks-active-back tasks)) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - (or progress? (tasks-progress? tasks))))) - -;; Reports an answer to a waiting thread: -(define (report-answer answer-key max-depth tasks val) - (let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))]) - (if v - (let ([tasks (make-tasks (cons (v val) - (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (hash-remove! (tasks-waits tasks) answer-key) - (swap-task max-depth tasks)) - ;; We have an answer ready too fast; wait - (swap-task max-depth - (queue-task tasks - (lambda (max-depth tasks) - (report-answer answer-key max-depth tasks val)) - #f))))) - -;; Reports an answer to multiple waiting threads: -(define (report-answer-all answer-key max-depth tasks val k) - (let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))]) - (hash-remove! (tasks-multi-waits tasks) answer-key) - (let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) - (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (k max-depth tasks)))) - -;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise -;; there might be many. Use wither #t or #f (and `report-answer' or -;; `report-answer-all', resptively) consistently for a particular answer key. -(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) - (let ([wait (lambda (val) - (lambda (max-depth tasks) - (if val - (if (null? val) - (fail-k max-depth tasks) - (let-values ([(val stream last-consumed-token depth next-k) (apply values val)]) - (success-k val stream last-consumed-token depth max-depth tasks next-k))) - (deadlock-k max-depth tasks))))]) - (if multi? - (hash-set! (tasks-multi-waits tasks) answer-key - (cons wait (hash-ref (tasks-multi-waits tasks) answer-key - (lambda () null)))) - (hash-set! (tasks-waits tasks) answer-key wait)) - (let ([tasks (make-tasks (tasks-active tasks) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (swap-task max-depth tasks)))) - -;; Swap thread -(define (swap-task max-depth tasks) - ;; Swap in first active: - (if (null? (tasks-active tasks)) - (if (tasks-progress? tasks) - (swap-task max-depth - (make-tasks (reverse (tasks-active-back tasks)) - null - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #f)) - ;; No progress, so issue failure for all multi-waits - (if (zero? (hash-count (tasks-multi-waits tasks))) - (error 'swap-task "Deadlock") - (swap-task max-depth - (make-tasks (apply - append - (hash-map (tasks-multi-waits tasks) - (lambda (k l) - (map (lambda (v) (v #f)) l)))) - (tasks-active-back tasks) - (tasks-waits tasks) - (make-hasheq) - (tasks-cache tasks) - #t)))) - (let ([t (car (tasks-active tasks))] - [tasks (make-tasks (cdr (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - (tasks-progress? tasks))]) - (t max-depth tasks)))) - -;; Finds the symbolic representative of a token class -(define-for-syntax (map-token toks tok) - (car (token-identifier-mapping-get toks tok))) - -(define no-pos-val (make-position #f #f #f)) -(define-for-syntax no-pos - (let ([npv ((syntax-local-certifier) #'no-pos-val)]) - (lambda (stx) npv))) -(define-for-syntax at-tok-pos - (lambda (sel expr) - (lambda (stx) - #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))))) - -;; Builds a matcher for a particular alternative -(define-for-syntax (build-match nts toks pat handle $ctx) - (let loop ([pat pat] - [pos 1]) - (if (null? pat) - #`(success-k #,handle stream last-consumed-token depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (fail-k max-depth tasks))) - (let ([id (datum->syntax (car pat) - (string->symbol (format "$~a" pos)))] - [id-start-pos (datum->syntax (car pat) - (string->symbol (format "$~a-start-pos" pos)))] - [id-end-pos (datum->syntax (car pat) - (string->symbol (format "$~a-end-pos" pos)))] - [n-end-pos (and (null? (cdr pat)) - (datum->syntax (car pat) '$n-end-pos))]) - (cond - [(bound-identifier-mapping-get nts (car pat) (lambda () #f)) - ;; Match non-termimal - #`(parse-and - ;; First part is simple? (If so, we don't have to parallelize the `and'.) - #,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) - (or (not l) - (andmap values (caddr l)))) - #,(car pat) - (let ([original-stream stream]) - (lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks) - (let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream) - tok-end - tok-start) - #'(if (eq? original-stream stream) - last-consumed-token - (and (pair? original-stream) - (car original-stream))))] - [#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)] - #,@(if n-end-pos - #`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]) - null)) - #,(loop (cdr pat) (add1 pos))))) - stream last-consumed-token depth - #,(let ([cnt (apply + - (map (lambda (item) - (cond - [(bound-identifier-mapping-get nts item (lambda () #f)) - => (lambda (l) (car l))] - [else 1])) - (cdr pat)))]) - #`(- end #,cnt)) - success-k fail-k max-depth tasks)] - [else - ;; Match token - (let ([tok-id (map-token toks (car pat))]) - #`(if (and (pair? stream) - (eq? '#,tok-id (tok-name (car stream)))) - (let* ([stream-a (car stream)] - [#,id (tok-val stream-a)] - [last-consumed-token (car stream)] - [stream (cdr stream)] - [depth (add1 depth)]) - (let ([max-depth (max max-depth depth)]) - (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)] - [#,id-end-pos (at-tok-pos #'tok-end #'stream-a)] - #,@(if n-end-pos - #`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)]) - null)) - #,(loop (cdr pat) (add1 pos))))) - (fail-k max-depth tasks)))]))))) - -;; Starts parsing to match a non-terminal. There's a minor -;; optimization that checks for known starting tokens. Otherwise, -;; use the cache, block if someone else is already trying the match, -;; and cache the result if it's computed. -;; The cache maps nontermial+startingpos+iteration to a result, where -;; the iteration is 0 for the first match attempt, 1 for the second, -;; etc. -(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k) - (if (and (positive? min-cnt) - (pair? stream) - (not (memq (tok-name (car stream)) init-tokens))) - ;; No such leading token; give up - (fail-k max-depth tasks) - ;; Run pattern - (let loop ([n 0] - [success-k success-k] - [fail-k fail-k] - [max-depth max-depth] - [tasks tasks] - [k k]) - (let ([answer-key (gensym)] - [table-key (vector key depth n)] - [old-depth depth] - [old-stream stream]) - #;(printf "Loop ~a\n" table-key) - (cond - [(hash-ref (tasks-cache tasks) table-key (lambda () #f)) - => (lambda (result) - #;(printf "Reuse ~a\n" table-key) - (result success-k fail-k max-depth tasks))] - [else - #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - #;(printf "Wait ~a ~a\n" table-key answer-key) - (wait-for-answer #t max-depth tasks answer-key success-k fail-k - (lambda (max-depth tasks) - #;(printf "Deadlock ~a ~a\n" table-key answer-key) - (fail-k max-depth tasks))))) - (let result-loop ([max-depth max-depth][tasks tasks][k k]) - (letrec ([orig-stream stream] - [new-got-k - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - ;; Check whether we already have a result that consumed the same amount: - (let ([result-key (vector #f key old-depth depth)]) - (cond - [(hash-ref (tasks-cache tasks) result-key (lambda () #f)) - ;; Go for the next-result - (result-loop max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks)))] - [else - #;(printf "Success ~a ~a\n" table-key - (map tok-name (let loop ([d old-depth][s old-stream]) - (if (= d depth) - null - (cons (car s) (loop (add1 d) (cdr s))))))) - (let ([next-k (lambda (success-k fail-k max-depth tasks) - (loop (add1 n) - success-k - fail-k - max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks))))]) - (hash-set! (tasks-cache tasks) result-key #t) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (success-k val stream last-consumed-token depth max-depth tasks next-k))) - (report-answer-all answer-key - max-depth - tasks - (list val stream last-consumed-token depth next-k) - (lambda (max-depth tasks) - (success-k val stream last-consumed-token depth max-depth tasks next-k))))])))] - [new-fail-k - (lambda (max-depth tasks) - #;(printf "Failure ~a\n" table-key) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (fail-k max-depth tasks))) - (report-answer-all answer-key - max-depth - tasks - null - (lambda (max-depth tasks) - (fail-k max-depth tasks))))]) - (k end max-depth tasks new-got-k new-fail-k)))]))))) - -(define-syntax (cfg-parser stx) - (syntax-case stx () - [(_ clause ...) - (let ([clauses (syntax->list #'(clause ...))]) - (let-values ([(start grammar cfg-error parser-clauses src-pos?) - (let ([all-toks (apply - append - (map (lambda (clause) - (syntax-case clause (tokens) - [(tokens t ...) - (apply - append - (map (lambda (t) - (let ([v (syntax-local-value t (lambda () #f))]) - (cond - [(terminals-def? v) - (map (lambda (v) - (cons v #f)) - (syntax->list (terminals-def-t v)))] - [(e-terminals-def? v) - (map (lambda (v) - (cons v #t)) - (syntax->list (e-terminals-def-t v)))] - [else null]))) - (syntax->list #'(t ...))))] - [_else null])) - clauses))] - [all-end-toks (apply - append - (map (lambda (clause) - (syntax-case clause (end) - [(end t ...) - (syntax->list #'(t ...))] - [_else null])) - clauses))]) - (let loop ([clauses clauses] - [cfg-start #f] - [cfg-grammar #f] - [cfg-error #f] - [src-pos? #f] - [parser-clauses null]) - (if (null? clauses) - (values cfg-start - cfg-grammar - cfg-error - (reverse parser-clauses) - src-pos?) - (syntax-case (car clauses) (start error grammar src-pos) - [(start tok) - (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] - [(error expr) - (loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] - [(grammar [nt [pat handle0 handle ...] ...] ...) - (let ([nts (make-bound-identifier-mapping)] - [toks (make-token-identifier-mapping)] - [end-toks (make-token-identifier-mapping)] - [nt-ids (syntax->list #'(nt ...))] - [patss (map (lambda (stx) - (map syntax->list (syntax->list stx))) - (syntax->list #'((pat ...) ...)))]) - (for-each (lambda (nt) - (bound-identifier-mapping-put! nts nt (list 0))) - nt-ids) - (for-each (lambda (t) - (token-identifier-mapping-put! end-toks t #t)) - all-end-toks) - (for-each (lambda (t) - (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) - (let ([id (gensym (syntax-e (car t)))]) - (token-identifier-mapping-put! toks (car t) - (cons id (cdr t)))))) - all-toks) - ;; Compute min max size for each non-term: - (nt-fixpoint - nts - (lambda (nt pats old-list) - (let ([new-cnt - (apply - min - (map (lambda (pat) - (apply - + - (map (lambda (elem) - (car - (bound-identifier-mapping-get nts - elem - (lambda () (list 1))))) - pat))) - pats))]) - (if (new-cnt . > . (car old-list)) - (cons new-cnt (cdr old-list)) - old-list))) - nt-ids patss) - ;; Compute set of toks that must appear at the beginning - ;; for a non-terminal - (nt-fixpoint - nts - (lambda (nt pats old-list) - (let ([new-list - (apply - append - (map (lambda (pat) - (let loop ([pat pat]) - (if (pair? pat) - (let ([l (bound-identifier-mapping-get - nts - (car pat) - (lambda () - (list 1 (map-token toks (car pat)))))]) - ;; If the non-terminal can match 0 things, - ;; then it might match something from the - ;; next pattern element. Otherwise, it must - ;; match the first element: - (if (zero? (car l)) - (append (cdr l) (loop (cdr pat))) - (cdr l))) - null))) - pats))]) - (let ([new (filter (lambda (id) - (andmap (lambda (id2) - (not (eq? id id2))) - (cdr old-list))) - new-list)]) - (if (pair? new) - ;; Drop dups in new list: - (let ([new (let loop ([new new]) - (if (null? (cdr new)) - new - (if (ormap (lambda (id) - (eq? (car new) id)) - (cdr new)) - (loop (cdr new)) - (cons (car new) (loop (cdr new))))))]) - (cons (car old-list) (append new (cdr old-list)))) - old-list)))) - nt-ids patss) - ;; Determine left-recursive clauses: - (for-each (lambda (nt pats) - (let ([l (bound-identifier-mapping-get nts nt)]) - (bound-identifier-mapping-put! nts nt (list (car l) - (cdr l) - (map (lambda (x) #f) pats))))) - nt-ids patss) - (nt-fixpoint - nts - (lambda (nt pats old-list) - (list (car old-list) - (cadr old-list) - (map (lambda (pat simple?) - (or simple? - (let ([l (map (lambda (elem) - (bound-identifier-mapping-get - nts - elem - (lambda () #f))) - pat)]) - (andmap (lambda (i) - (or (not i) - (andmap values (caddr i)))) - l)))) - pats (caddr old-list)))) - nt-ids patss) - ;; Build a definition for each non-term: - (loop (cdr clauses) - cfg-start - (map (lambda (nt pats handles $ctxs) - (define info (bound-identifier-mapping-get nts nt)) - (list nt - #`(let ([key (gensym '#,nt)]) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - (parse-nt/share - key #,(car info) '#,(cadr info) stream last-consumed-token depth end - max-depth tasks - success-k fail-k - (lambda (end max-depth tasks success-k fail-k) - #,(let loop ([pats pats] - [handles (syntax->list handles)] - [$ctxs (syntax->list $ctxs)] - [simple?s (caddr info)]) - (if (null? pats) - #'(fail-k max-depth tasks) - #`(#,(if (or (null? (cdr pats)) - (car simple?s)) - #'parse-or - #'parse-parallel-or) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - #,(build-match nts - toks - (car pats) - (car handles) - (car $ctxs))) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - #,(loop (cdr pats) - (cdr handles) - (cdr $ctxs) - (cdr simple?s))) - stream last-consumed-token depth end success-k fail-k max-depth tasks))))))))) - nt-ids - patss - (syntax->list #'(((begin handle0 handle ...) ...) ...)) - (syntax->list #'((handle0 ...) ...))) - cfg-error - src-pos? - (list* - (with-syntax ([((tok tok-id . $e) ...) - (token-identifier-mapping-map toks - (lambda (k v) - (list* k - (car v) - (if (cdr v) - #f - '$1))))] - [(pos ...) - (if src-pos? - #'($1-start-pos $1-end-pos) - #'(#f #f))]) - #`(grammar (start [() null] - [(atok start) (cons $1 $2)]) - (atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) - #`(start start) - parser-clauses)))] - [(grammar . _) - (raise-syntax-error - #f - "bad grammar clause" - stx - (car clauses))] - [(src-pos) - (loop (cdr clauses) - cfg-start - cfg-grammar - cfg-error - #t - (cons (car clauses) parser-clauses))] - [_else - (loop (cdr clauses) - cfg-start - cfg-grammar - cfg-error - src-pos? - (cons (car clauses) parser-clauses))]))))]) - #`(let ([orig-parse (parser - [error (lambda (a b c) - (error 'cfg-parser "unexpected ~a token: ~a" b c))] - . #,parser-clauses)] - [error-proc #,cfg-error]) - (letrec #,grammar - (lambda (get-tok) - (let ([tok-list (orig-parse get-tok)]) - (letrec ([success-k - (lambda (val stream last-consumed-token depth max-depth tasks next) - (if (null? stream) - val - (next success-k fail-k max-depth tasks)))] - [fail-k (lambda (max-depth tasks) - (cond - [(null? tok-list) - (if error-proc - (error-proc #t - 'no-tokens - #f - (make-position #f #f #f) - (make-position #f #f #f)) - (error - 'cfg-parse - "no tokens"))] - [else - (let ([bad-tok (list-ref tok-list - (min (sub1 (length tok-list)) - max-depth))]) - (if error-proc - (error-proc #t - (tok-orig-name bad-tok) - (tok-val bad-tok) - (tok-start bad-tok) - (tok-end bad-tok)) - (error - 'cfg-parse - "failed at ~a" - (tok-val bad-tok))))]))]) - (#,start tok-list - ;; we simulate a token at the very beginning with zero width - ;; for use with the position-generating code (*-start-pos, *-end-pos). - (if (null? tok-list) - (tok #f #f #f - (position 1 - #,(if src-pos? #'1 #'#f) - #,(if src-pos? #'0 #'#f)) - (position 1 - #,(if src-pos? #'1 #'#f) - #,(if src-pos? #'0 #'#f))) - (tok (tok-name (car tok-list)) - (tok-orig-name (car tok-list)) - (tok-val (car tok-list)) - (tok-start (car tok-list)) - (tok-start (car tok-list)))) - 0 - (length tok-list) - success-k - fail-k - 0 - (make-tasks null null - (make-hasheq) (make-hasheq) - (make-hash) #t)))))))))])) - - -(module* test racket/base - (require (submod "..") - parser-tools/lex - racket/block - rackunit) - - ;; Test: parsing regular expressions. - ;; Here is a test case on locations: - (block - (define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF)) - (define lex (lexer-src-pos ["|" (token-OR lexeme)] - ["^" (token-ANCHOR lexeme)] - ["*" (token-STAR lexeme)] - [(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)] - ["(" (token-LPAREN lexeme)] - [")" (token-RPAREN lexeme)] - [whitespace (return-without-pos (lex input-port))] - [(eof) (token-EOF 'eof)])) - (define -parse (cfg-parser - (tokens regexp-tokens) - (start top) - (end EOF) - (src-pos) - (grammar [top [(maybe-anchor regexp) - (cond [$1 - `(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] - [else - `(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]] - [maybe-anchor [(ANCHOR) #t] - [() #f]] - [regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] - [(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] - [(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] - [(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]]))) - (define (pos->sexp pos) - (position-offset pos)) - - (define (parse s) - (define ip (open-input-string s)) - (port-count-lines! ip) - (-parse (lambda () (lex ip)))) - - (check-equal? (parse "abc") - '(unanchored (lit "abc" 1 4) 1 4)) - (check-equal? (parse "a | (b*) | c") - '(unanchored (or (or (lit "a" 1 2) - (group (star (lit "b" 6 7) 6 8) 5 9) - 1 9) - (lit "c" 12 13) - 1 13) - 1 13))) - - - - - - ;; Tests used during development - (define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF)) - - (define lex - (lexer - ["+" (token-PLUS '+)] - ["-" (token-MINUS '-)] - ["*" (token-STAR '*)] - ["|" (token-BAR '||)] - [":" (token-COLON '|:|)] - [whitespace (lex input-port)] - [(eof) (token-EOF 'eof)])) - - (define parse - (cfg-parser - (tokens non-terminals) - (start ) - (end EOF) - (error (lambda (a b stx) - (error 'parse "failed at ~s" stx))) - (grammar [ [(PLUS) "plus"] - [( BAR ) (list $1 $2 $3)] - [( COLON) (list $1)]] - [ [(MINUS) "minus"] - [( STAR) (cons $1 $2)]] - [ [( MINUS) "yes"]] - [ [(PLUS) 'plus] - [(MINUS) 'minus]] - [ [() '0] - [( PLUS) (add1 $1)] - [( PLUS) (add1 $1)]]))) - - (let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**" - #;"+*|+**|-" #;"-|-*|-|-*" - #;"-|-*|-|-**|-|-*|-|-**" - "-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-*** - |-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****| - -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****" - ;; This one fails: - #;"+*")]) - (check-equal? (parse (lambda () (lex p))) - '((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *) - || - (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *)) - . - *) - || - (((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *) - || - (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *)) - . - *))))) +(require parser-tools/cfg-parser) +(provide (all-from-out parser-tools/cfg-parser)) \ No newline at end of file diff --git a/collects/parser-tools/cfg-parser.rkt b/collects/parser-tools/cfg-parser.rkt new file mode 100644 index 0000000..0d56f8e --- /dev/null +++ b/collects/parser-tools/cfg-parser.rkt @@ -0,0 +1,921 @@ +#lang racket/base +;; This module implements a parser form like the parser-tools's +;; `parser', except that it works on an arbitrary CFG (returning +;; the first sucecssful parse). + +;; I'm pretty sure that this is an implementation of Earley's +;; algorithm. + +;; To a first approximation, it's a backtracking parser. Alternative +;; for a non-terminal are computed in parallel, and multiple attempts +;; to compute the same result block until the first one completes. If +;; you get into deadlock, such as when trying to match +;; := +;; then it means that there's no successful parse, so everything +;; that's blocked fails. + +;; A cache holds the series of results for a particular non-terminal +;; at a particular starting location. (A series is used, instead of a +;; sinlge result, for backtracking.) Otherwise, the parser uses +;; backtracking search. Backtracking is implemented through explicit +;; success and failure continuations. Multiple results for a +;; particular nonterminal and location are kept only when they have +;; different lengths. (Otherwise, in the spirit of finding one +;; successful parse, only the first result is kept.) + +;; The parser-tools's `parse' is used to transform tokens in the +;; grammar to tokens specific to this parser. In other words, this +;; parser uses `parser' so that it doesn't have to know anything about +;; tokens. +;; + + + +(require parser-tools/yacc + parser-tools/lex) + +(require (for-syntax racket/base + syntax/boundmap + parser-tools/private-lex/token-syntax)) + +(provide cfg-parser) + +;; A raw token, wrapped so that we can recognize it: +(define-struct tok (name orig-name val start end)) + +;; Represents the thread scheduler: +(define-struct tasks (active active-back waits multi-waits cache progress?)) + +(define-for-syntax make-token-identifier-mapping make-hasheq) +(define-for-syntax token-identifier-mapping-get + (case-lambda + [(t tok) + (hash-ref t (syntax-e tok))] + [(t tok fail) + (hash-ref t (syntax-e tok) fail)])) +(define-for-syntax token-identifier-mapping-put! + (lambda (t tok v) + (hash-set! t (syntax-e tok) v))) +(define-for-syntax token-identifier-mapping-map + (lambda (t f) + (hash-map t f))) + +;; Used to calculate information on the grammar, such as whether +;; a particular non-terminal is "simple" instead of recursively defined. +(define-for-syntax (nt-fixpoint nts proc nt-ids patss) + (define (ormap-all val f as bs) + (cond + [(null? as) val] + [else (ormap-all (or (f (car as) (car bs)) val) + f + (cdr as) (cdr bs))])) + (let loop () + (when (ormap-all #f + (lambda (nt pats) + (let ([old (bound-identifier-mapping-get nts nt)]) + (let ([new (proc nt pats old)]) + (if (equal? old new) + #f + (begin + (bound-identifier-mapping-put! nts nt new) + #t))))) + nt-ids patss) + (loop)))) + +;; Tries parse-a followed by parse-b. If parse-a is not simple, +;; then after parse-a succeeds once, we parallelize parse-b +;; and trying a second result for parse-a. +(define (parse-and simple-a? parse-a parse-b + stream last-consumed-token depth end success-k fail-k + max-depth tasks) + (letrec ([mk-got-k + (lambda (success-k fail-k) + (lambda (val stream last-consumed-token depth max-depth tasks next1-k) + (if simple-a? + (parse-b val stream last-consumed-token depth end + (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks) + (parallel-or + (lambda (success-k fail-k max-depth tasks) + (parse-b val stream last-consumed-token depth end + success-k fail-k + max-depth tasks)) + (lambda (success-k fail-k max-depth tasks) + (next1-k (mk-got-k success-k fail-k) + fail-k max-depth tasks)) + success-k fail-k max-depth tasks))))] + [mk-got2-k + (lambda (success-k fail-k next1-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (success-k val stream last-consumed-token depth max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (next-k (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks)))))] + [mk-fail2-k + (lambda (success-k fail-k next1-k) + (lambda (max-depth tasks) + (next1-k (mk-got-k success-k fail-k) + fail-k + max-depth + tasks)))]) + (parse-a stream last-consumed-token depth end + (mk-got-k success-k fail-k) + fail-k + max-depth tasks))) + +;; Parallel or for non-terminal alternatives +(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) + (parallel-or (lambda (success-k fail-k max-depth tasks) + (parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks)) + (lambda (success-k fail-k max-depth tasks) + (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)) + success-k fail-k max-depth tasks)) + +;; Generic parallel-or +(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) + (define answer-key (gensym)) + (letrec ([gota-k + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (report-answer answer-key + max-depth + tasks + (list val stream last-consumed-token depth next-k)))] + [faila-k + (lambda (max-depth tasks) + (report-answer answer-key + max-depth + tasks + null))]) + (let* ([tasks (queue-task + tasks + (lambda (max-depth tasks) + (parse-a gota-k + faila-k + max-depth tasks)))] + [tasks (queue-task + tasks + (lambda (max-depth tasks) + (parse-b gota-k + faila-k + max-depth tasks)))] + [queue-next (lambda (next-k tasks) + (queue-task tasks + (lambda (max-depth tasks) + (next-k gota-k + faila-k + max-depth tasks))))]) + (letrec ([mk-got-one + (lambda (immediate-next? get-nth success-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (let ([tasks (if immediate-next? + (queue-next next-k tasks) + tasks)]) + (success-k val stream last-consumed-token depth max-depth + tasks + (lambda (success-k fail-k max-depth tasks) + (let ([tasks (if immediate-next? + tasks + (queue-next next-k tasks))]) + (get-nth max-depth tasks success-k fail-k)))))))] + [get-first + (lambda (max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #t get-first success-k) + (lambda (max-depth tasks) + (get-second max-depth tasks success-k fail-k)) + #f))] + [get-second + (lambda (max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #f get-second success-k) + fail-k #f))]) + (get-first max-depth tasks success-k fail-k))))) + +;; Non-terminal alternatives where the first is "simple" can be done +;; sequentially, which is simpler +(define (parse-or parse-a parse-b + stream last-consumed-token depth end success-k fail-k max-depth tasks) + (letrec ([mk-got-k + (lambda (success-k fail-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (success-k val stream last-consumed-token depth + max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (next-k (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks)))))] + [mk-fail-k + (lambda (success-k fail-k) + (lambda (max-depth tasks) + (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))]) + (parse-a stream last-consumed-token depth end + (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks))) + +;; Starts a thread +(define queue-task + (lambda (tasks t [progress? #t]) + (make-tasks (tasks-active tasks) + (cons t (tasks-active-back tasks)) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + (or progress? (tasks-progress? tasks))))) + +;; Reports an answer to a waiting thread: +(define (report-answer answer-key max-depth tasks val) + (let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))]) + (if v + (let ([tasks (make-tasks (cons (v val) + (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (hash-remove! (tasks-waits tasks) answer-key) + (swap-task max-depth tasks)) + ;; We have an answer ready too fast; wait + (swap-task max-depth + (queue-task tasks + (lambda (max-depth tasks) + (report-answer answer-key max-depth tasks val)) + #f))))) + +;; Reports an answer to multiple waiting threads: +(define (report-answer-all answer-key max-depth tasks val k) + (let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))]) + (hash-remove! (tasks-multi-waits tasks) answer-key) + (let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) + (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (k max-depth tasks)))) + +;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise +;; there might be many. Use wither #t or #f (and `report-answer' or +;; `report-answer-all', resptively) consistently for a particular answer key. +(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) + (let ([wait (lambda (val) + (lambda (max-depth tasks) + (if val + (if (null? val) + (fail-k max-depth tasks) + (let-values ([(val stream last-consumed-token depth next-k) (apply values val)]) + (success-k val stream last-consumed-token depth max-depth tasks next-k))) + (deadlock-k max-depth tasks))))]) + (if multi? + (hash-set! (tasks-multi-waits tasks) answer-key + (cons wait (hash-ref (tasks-multi-waits tasks) answer-key + (lambda () null)))) + (hash-set! (tasks-waits tasks) answer-key wait)) + (let ([tasks (make-tasks (tasks-active tasks) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (swap-task max-depth tasks)))) + +;; Swap thread +(define (swap-task max-depth tasks) + ;; Swap in first active: + (if (null? (tasks-active tasks)) + (if (tasks-progress? tasks) + (swap-task max-depth + (make-tasks (reverse (tasks-active-back tasks)) + null + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #f)) + ;; No progress, so issue failure for all multi-waits + (if (zero? (hash-count (tasks-multi-waits tasks))) + (error 'swap-task "Deadlock") + (swap-task max-depth + (make-tasks (apply + append + (hash-map (tasks-multi-waits tasks) + (lambda (k l) + (map (lambda (v) (v #f)) l)))) + (tasks-active-back tasks) + (tasks-waits tasks) + (make-hasheq) + (tasks-cache tasks) + #t)))) + (let ([t (car (tasks-active tasks))] + [tasks (make-tasks (cdr (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + (tasks-progress? tasks))]) + (t max-depth tasks)))) + +;; Finds the symbolic representative of a token class +(define-for-syntax (map-token toks tok) + (car (token-identifier-mapping-get toks tok))) + +(define no-pos-val (make-position #f #f #f)) +(define-for-syntax no-pos + (let ([npv ((syntax-local-certifier) #'no-pos-val)]) + (lambda (stx) npv))) +(define-for-syntax at-tok-pos + (lambda (sel expr) + (lambda (stx) + #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))))) + +;; Builds a matcher for a particular alternative +(define-for-syntax (build-match nts toks pat handle $ctx) + (let loop ([pat pat] + [pos 1]) + (if (null? pat) + #`(success-k #,handle stream last-consumed-token depth max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (fail-k max-depth tasks))) + (let ([id (datum->syntax (car pat) + (string->symbol (format "$~a" pos)))] + [id-start-pos (datum->syntax (car pat) + (string->symbol (format "$~a-start-pos" pos)))] + [id-end-pos (datum->syntax (car pat) + (string->symbol (format "$~a-end-pos" pos)))] + [n-end-pos (and (null? (cdr pat)) + (datum->syntax (car pat) '$n-end-pos))]) + (cond + [(bound-identifier-mapping-get nts (car pat) (lambda () #f)) + ;; Match non-termimal + #`(parse-and + ;; First part is simple? (If so, we don't have to parallelize the `and'.) + #,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) + (or (not l) + (andmap values (caddr l)))) + #,(car pat) + (let ([original-stream stream]) + (lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks) + (let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream) + tok-end + tok-start) + #'(if (eq? original-stream stream) + last-consumed-token + (and (pair? original-stream) + (car original-stream))))] + [#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)] + #,@(if n-end-pos + #`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]) + null)) + #,(loop (cdr pat) (add1 pos))))) + stream last-consumed-token depth + #,(let ([cnt (apply + + (map (lambda (item) + (cond + [(bound-identifier-mapping-get nts item (lambda () #f)) + => (lambda (l) (car l))] + [else 1])) + (cdr pat)))]) + #`(- end #,cnt)) + success-k fail-k max-depth tasks)] + [else + ;; Match token + (let ([tok-id (map-token toks (car pat))]) + #`(if (and (pair? stream) + (eq? '#,tok-id (tok-name (car stream)))) + (let* ([stream-a (car stream)] + [#,id (tok-val stream-a)] + [last-consumed-token (car stream)] + [stream (cdr stream)] + [depth (add1 depth)]) + (let ([max-depth (max max-depth depth)]) + (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)] + [#,id-end-pos (at-tok-pos #'tok-end #'stream-a)] + #,@(if n-end-pos + #`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)]) + null)) + #,(loop (cdr pat) (add1 pos))))) + (fail-k max-depth tasks)))]))))) + +;; Starts parsing to match a non-terminal. There's a minor +;; optimization that checks for known starting tokens. Otherwise, +;; use the cache, block if someone else is already trying the match, +;; and cache the result if it's computed. +;; The cache maps nontermial+startingpos+iteration to a result, where +;; the iteration is 0 for the first match attempt, 1 for the second, +;; etc. +(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k) + (if (and (positive? min-cnt) + (pair? stream) + (not (memq (tok-name (car stream)) init-tokens))) + ;; No such leading token; give up + (fail-k max-depth tasks) + ;; Run pattern + (let loop ([n 0] + [success-k success-k] + [fail-k fail-k] + [max-depth max-depth] + [tasks tasks] + [k k]) + (let ([answer-key (gensym)] + [table-key (vector key depth n)] + [old-depth depth] + [old-stream stream]) + #;(printf "Loop ~a\n" table-key) + (cond + [(hash-ref (tasks-cache tasks) table-key (lambda () #f)) + => (lambda (result) + #;(printf "Reuse ~a\n" table-key) + (result success-k fail-k max-depth tasks))] + [else + #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + #;(printf "Wait ~a ~a\n" table-key answer-key) + (wait-for-answer #t max-depth tasks answer-key success-k fail-k + (lambda (max-depth tasks) + #;(printf "Deadlock ~a ~a\n" table-key answer-key) + (fail-k max-depth tasks))))) + (let result-loop ([max-depth max-depth][tasks tasks][k k]) + (letrec ([orig-stream stream] + [new-got-k + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + ;; Check whether we already have a result that consumed the same amount: + (let ([result-key (vector #f key old-depth depth)]) + (cond + [(hash-ref (tasks-cache tasks) result-key (lambda () #f)) + ;; Go for the next-result + (result-loop max-depth + tasks + (lambda (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks)))] + [else + #;(printf "Success ~a ~a\n" table-key + (map tok-name (let loop ([d old-depth][s old-stream]) + (if (= d depth) + null + (cons (car s) (loop (add1 d) (cdr s))))))) + (let ([next-k (lambda (success-k fail-k max-depth tasks) + (loop (add1 n) + success-k + fail-k + max-depth + tasks + (lambda (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks))))]) + (hash-set! (tasks-cache tasks) result-key #t) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + (success-k val stream last-consumed-token depth max-depth tasks next-k))) + (report-answer-all answer-key + max-depth + tasks + (list val stream last-consumed-token depth next-k) + (lambda (max-depth tasks) + (success-k val stream last-consumed-token depth max-depth tasks next-k))))])))] + [new-fail-k + (lambda (max-depth tasks) + #;(printf "Failure ~a\n" table-key) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + (fail-k max-depth tasks))) + (report-answer-all answer-key + max-depth + tasks + null + (lambda (max-depth tasks) + (fail-k max-depth tasks))))]) + (k end max-depth tasks new-got-k new-fail-k)))]))))) + +(define-syntax (cfg-parser stx) + (syntax-case stx () + [(_ clause ...) + (let ([clauses (syntax->list #'(clause ...))]) + (let-values ([(start grammar cfg-error parser-clauses src-pos?) + (let ([all-toks (apply + append + (map (lambda (clause) + (syntax-case clause (tokens) + [(tokens t ...) + (apply + append + (map (lambda (t) + (let ([v (syntax-local-value t (lambda () #f))]) + (cond + [(terminals-def? v) + (map (lambda (v) + (cons v #f)) + (syntax->list (terminals-def-t v)))] + [(e-terminals-def? v) + (map (lambda (v) + (cons v #t)) + (syntax->list (e-terminals-def-t v)))] + [else null]))) + (syntax->list #'(t ...))))] + [_else null])) + clauses))] + [all-end-toks (apply + append + (map (lambda (clause) + (syntax-case clause (end) + [(end t ...) + (syntax->list #'(t ...))] + [_else null])) + clauses))]) + (let loop ([clauses clauses] + [cfg-start #f] + [cfg-grammar #f] + [cfg-error #f] + [src-pos? #f] + [parser-clauses null]) + (if (null? clauses) + (values cfg-start + cfg-grammar + cfg-error + (reverse parser-clauses) + src-pos?) + (syntax-case (car clauses) (start error grammar src-pos) + [(start tok) + (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] + [(error expr) + (loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] + [(grammar [nt [pat handle0 handle ...] ...] ...) + (let ([nts (make-bound-identifier-mapping)] + [toks (make-token-identifier-mapping)] + [end-toks (make-token-identifier-mapping)] + [nt-ids (syntax->list #'(nt ...))] + [patss (map (lambda (stx) + (map syntax->list (syntax->list stx))) + (syntax->list #'((pat ...) ...)))]) + (for-each (lambda (nt) + (bound-identifier-mapping-put! nts nt (list 0))) + nt-ids) + (for-each (lambda (t) + (token-identifier-mapping-put! end-toks t #t)) + all-end-toks) + (for-each (lambda (t) + (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) + (let ([id (gensym (syntax-e (car t)))]) + (token-identifier-mapping-put! toks (car t) + (cons id (cdr t)))))) + all-toks) + ;; Compute min max size for each non-term: + (nt-fixpoint + nts + (lambda (nt pats old-list) + (let ([new-cnt + (apply + min + (map (lambda (pat) + (apply + + + (map (lambda (elem) + (car + (bound-identifier-mapping-get nts + elem + (lambda () (list 1))))) + pat))) + pats))]) + (if (new-cnt . > . (car old-list)) + (cons new-cnt (cdr old-list)) + old-list))) + nt-ids patss) + ;; Compute set of toks that must appear at the beginning + ;; for a non-terminal + (nt-fixpoint + nts + (lambda (nt pats old-list) + (let ([new-list + (apply + append + (map (lambda (pat) + (let loop ([pat pat]) + (if (pair? pat) + (let ([l (bound-identifier-mapping-get + nts + (car pat) + (lambda () + (list 1 (map-token toks (car pat)))))]) + ;; If the non-terminal can match 0 things, + ;; then it might match something from the + ;; next pattern element. Otherwise, it must + ;; match the first element: + (if (zero? (car l)) + (append (cdr l) (loop (cdr pat))) + (cdr l))) + null))) + pats))]) + (let ([new (filter (lambda (id) + (andmap (lambda (id2) + (not (eq? id id2))) + (cdr old-list))) + new-list)]) + (if (pair? new) + ;; Drop dups in new list: + (let ([new (let loop ([new new]) + (if (null? (cdr new)) + new + (if (ormap (lambda (id) + (eq? (car new) id)) + (cdr new)) + (loop (cdr new)) + (cons (car new) (loop (cdr new))))))]) + (cons (car old-list) (append new (cdr old-list)))) + old-list)))) + nt-ids patss) + ;; Determine left-recursive clauses: + (for-each (lambda (nt pats) + (let ([l (bound-identifier-mapping-get nts nt)]) + (bound-identifier-mapping-put! nts nt (list (car l) + (cdr l) + (map (lambda (x) #f) pats))))) + nt-ids patss) + (nt-fixpoint + nts + (lambda (nt pats old-list) + (list (car old-list) + (cadr old-list) + (map (lambda (pat simple?) + (or simple? + (let ([l (map (lambda (elem) + (bound-identifier-mapping-get + nts + elem + (lambda () #f))) + pat)]) + (andmap (lambda (i) + (or (not i) + (andmap values (caddr i)))) + l)))) + pats (caddr old-list)))) + nt-ids patss) + ;; Build a definition for each non-term: + (loop (cdr clauses) + cfg-start + (map (lambda (nt pats handles $ctxs) + (define info (bound-identifier-mapping-get nts nt)) + (list nt + #`(let ([key (gensym '#,nt)]) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + (parse-nt/share + key #,(car info) '#,(cadr info) stream last-consumed-token depth end + max-depth tasks + success-k fail-k + (lambda (end max-depth tasks success-k fail-k) + #,(let loop ([pats pats] + [handles (syntax->list handles)] + [$ctxs (syntax->list $ctxs)] + [simple?s (caddr info)]) + (if (null? pats) + #'(fail-k max-depth tasks) + #`(#,(if (or (null? (cdr pats)) + (car simple?s)) + #'parse-or + #'parse-parallel-or) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + #,(build-match nts + toks + (car pats) + (car handles) + (car $ctxs))) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + #,(loop (cdr pats) + (cdr handles) + (cdr $ctxs) + (cdr simple?s))) + stream last-consumed-token depth end success-k fail-k max-depth tasks))))))))) + nt-ids + patss + (syntax->list #'(((begin handle0 handle ...) ...) ...)) + (syntax->list #'((handle0 ...) ...))) + cfg-error + src-pos? + (list* + (with-syntax ([((tok tok-id . $e) ...) + (token-identifier-mapping-map toks + (lambda (k v) + (list* k + (car v) + (if (cdr v) + #f + '$1))))] + [(pos ...) + (if src-pos? + #'($1-start-pos $1-end-pos) + #'(#f #f))]) + #`(grammar (start [() null] + [(atok start) (cons $1 $2)]) + (atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) + #`(start start) + parser-clauses)))] + [(grammar . _) + (raise-syntax-error + #f + "bad grammar clause" + stx + (car clauses))] + [(src-pos) + (loop (cdr clauses) + cfg-start + cfg-grammar + cfg-error + #t + (cons (car clauses) parser-clauses))] + [_else + (loop (cdr clauses) + cfg-start + cfg-grammar + cfg-error + src-pos? + (cons (car clauses) parser-clauses))]))))]) + #`(let ([orig-parse (parser + [error (lambda (a b c) + (error 'cfg-parser "unexpected ~a token: ~a" b c))] + . #,parser-clauses)] + [error-proc #,cfg-error]) + (letrec #,grammar + (lambda (get-tok) + (let ([tok-list (orig-parse get-tok)]) + (letrec ([success-k + (lambda (val stream last-consumed-token depth max-depth tasks next) + (if (null? stream) + val + (next success-k fail-k max-depth tasks)))] + [fail-k (lambda (max-depth tasks) + (cond + [(null? tok-list) + (if error-proc + (error-proc #t + 'no-tokens + #f + (make-position #f #f #f) + (make-position #f #f #f)) + (error + 'cfg-parse + "no tokens"))] + [else + (let ([bad-tok (list-ref tok-list + (min (sub1 (length tok-list)) + max-depth))]) + (if error-proc + (error-proc #t + (tok-orig-name bad-tok) + (tok-val bad-tok) + (tok-start bad-tok) + (tok-end bad-tok)) + (error + 'cfg-parse + "failed at ~a" + (tok-val bad-tok))))]))]) + (#,start tok-list + ;; we simulate a token at the very beginning with zero width + ;; for use with the position-generating code (*-start-pos, *-end-pos). + (if (null? tok-list) + (tok #f #f #f + (position 1 + #,(if src-pos? #'1 #'#f) + #,(if src-pos? #'0 #'#f)) + (position 1 + #,(if src-pos? #'1 #'#f) + #,(if src-pos? #'0 #'#f))) + (tok (tok-name (car tok-list)) + (tok-orig-name (car tok-list)) + (tok-val (car tok-list)) + (tok-start (car tok-list)) + (tok-start (car tok-list)))) + 0 + (length tok-list) + success-k + fail-k + 0 + (make-tasks null null + (make-hasheq) (make-hasheq) + (make-hash) #t)))))))))])) + + +(module* test racket/base + (require (submod "..") + parser-tools/lex + racket/block + rackunit) + + ;; Test: parsing regular expressions. + ;; Here is a test case on locations: + (block + (define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF)) + (define lex (lexer-src-pos ["|" (token-OR lexeme)] + ["^" (token-ANCHOR lexeme)] + ["*" (token-STAR lexeme)] + [(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)] + ["(" (token-LPAREN lexeme)] + [")" (token-RPAREN lexeme)] + [whitespace (return-without-pos (lex input-port))] + [(eof) (token-EOF 'eof)])) + (define -parse (cfg-parser + (tokens regexp-tokens) + (start top) + (end EOF) + (src-pos) + (grammar [top [(maybe-anchor regexp) + (cond [$1 + `(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] + [else + `(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]] + [maybe-anchor [(ANCHOR) #t] + [() #f]] + [regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] + [(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] + [(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] + [(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]]))) + (define (pos->sexp pos) + (position-offset pos)) + + (define (parse s) + (define ip (open-input-string s)) + (port-count-lines! ip) + (-parse (lambda () (lex ip)))) + + (check-equal? (parse "abc") + '(unanchored (lit "abc" 1 4) 1 4)) + (check-equal? (parse "a | (b*) | c") + '(unanchored (or (or (lit "a" 1 2) + (group (star (lit "b" 6 7) 6 8) 5 9) + 1 9) + (lit "c" 12 13) + 1 13) + 1 13))) + + + + + + ;; Tests used during development + (define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF)) + + (define lex + (lexer + ["+" (token-PLUS '+)] + ["-" (token-MINUS '-)] + ["*" (token-STAR '*)] + ["|" (token-BAR '||)] + [":" (token-COLON '|:|)] + [whitespace (lex input-port)] + [(eof) (token-EOF 'eof)])) + + (define parse + (cfg-parser + (tokens non-terminals) + (start ) + (end EOF) + (error (lambda (a b stx) + (error 'parse "failed at ~s" stx))) + (grammar [ [(PLUS) "plus"] + [( BAR ) (list $1 $2 $3)] + [( COLON) (list $1)]] + [ [(MINUS) "minus"] + [( STAR) (cons $1 $2)]] + [ [( MINUS) "yes"]] + [ [(PLUS) 'plus] + [(MINUS) 'minus]] + [ [() '0] + [( PLUS) (add1 $1)] + [( PLUS) (add1 $1)]]))) + + (let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**" + #;"+*|+**|-" #;"-|-*|-|-*" + #;"-|-*|-|-**|-|-*|-|-**" + "-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-*** + |-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****| + -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****" + ;; This one fails: + #;"+*")]) + (check-equal? (parse (lambda () (lex p))) + '((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *) + || + (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *)) + . + *) + || + (((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *) + || + (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *)) + . + *)))))