|
|
|
@ -30,10 +30,12 @@
|
|
|
|
|
|
|
|
|
|
(module cfg-parser mzscheme
|
|
|
|
|
(require (lib "yacc.ss" "parser-tools")
|
|
|
|
|
(lib "lex.ss" "parser-tools")
|
|
|
|
|
(lib "list.ss")
|
|
|
|
|
(lib "etc.ss"))
|
|
|
|
|
(require-for-syntax (lib "boundmap.ss" "syntax")
|
|
|
|
|
(lib "list.ss"))
|
|
|
|
|
(lib "list.ss")
|
|
|
|
|
(lib "token-syntax.ss" "parser-tools" "private-lex"))
|
|
|
|
|
|
|
|
|
|
(provide cfg-parser)
|
|
|
|
|
|
|
|
|
@ -43,6 +45,20 @@
|
|
|
|
|
;; Represents the thread scheduler:
|
|
|
|
|
(define-struct tasks (active active-back waits multi-waits cache progress?))
|
|
|
|
|
|
|
|
|
|
(define-for-syntax make-token-identifier-mapping make-hash-table)
|
|
|
|
|
(define-for-syntax token-identifier-mapping-get
|
|
|
|
|
(case-lambda
|
|
|
|
|
[(t tok)
|
|
|
|
|
(hash-table-get t (syntax-e tok))]
|
|
|
|
|
[(t tok fail)
|
|
|
|
|
(hash-table-get t (syntax-e tok) fail)]))
|
|
|
|
|
(define-for-syntax token-identifier-mapping-put!
|
|
|
|
|
(lambda (t tok v)
|
|
|
|
|
(hash-table-put! t (syntax-e tok) v)))
|
|
|
|
|
(define-for-syntax token-identifier-mapping-map
|
|
|
|
|
(lambda (t f)
|
|
|
|
|
(hash-table-map t f)))
|
|
|
|
|
|
|
|
|
|
;; Used to calculate information on the grammar, such as whether
|
|
|
|
|
;; a particular non-terminal is "simple" instead of recursively defined.
|
|
|
|
|
(define-for-syntax (nt-fixpoint nts proc nt-ids patss)
|
|
|
|
@ -303,14 +319,15 @@
|
|
|
|
|
|
|
|
|
|
;; Finds the symbolic representative of a token class
|
|
|
|
|
(define-for-syntax (map-token toks tok)
|
|
|
|
|
(bound-identifier-mapping-get toks tok
|
|
|
|
|
(lambda ()
|
|
|
|
|
(let ([id (gensym (syntax-e tok))])
|
|
|
|
|
(bound-identifier-mapping-put! toks tok id)
|
|
|
|
|
id))))
|
|
|
|
|
(car (token-identifier-mapping-get toks tok)))
|
|
|
|
|
|
|
|
|
|
(define no-pos-val (make-position 0 0 0))
|
|
|
|
|
(define-for-syntax no-pos
|
|
|
|
|
(let ([npv ((syntax-local-certifier) #'no-pos-val)])
|
|
|
|
|
(lambda (stx) npv)))
|
|
|
|
|
|
|
|
|
|
;; Builds a matcher for a particular alternative
|
|
|
|
|
(define-for-syntax (build-match nts toks pat handle)
|
|
|
|
|
(define-for-syntax (build-match nts toks pat handle $ctx)
|
|
|
|
|
(let loop ([pat pat]
|
|
|
|
|
[pos 1])
|
|
|
|
|
(if (null? pat)
|
|
|
|
@ -318,7 +335,11 @@
|
|
|
|
|
(lambda (success-k fail-k max-depth tasks)
|
|
|
|
|
(fail-k max-depth tasks)))
|
|
|
|
|
(let ([id (datum->syntax-object (car pat)
|
|
|
|
|
(string->symbol (format "$~a" pos)))])
|
|
|
|
|
(string->symbol (format "$~a" pos)))]
|
|
|
|
|
[id-start-pos (datum->syntax-object (car pat)
|
|
|
|
|
(string->symbol (format "$~a-start-pos" pos)))]
|
|
|
|
|
[id-end-pos (datum->syntax-object (car pat)
|
|
|
|
|
(string->symbol (format "$~a-end-pos" pos)))])
|
|
|
|
|
(cond
|
|
|
|
|
[(bound-identifier-mapping-get nts (car pat) (lambda () #f))
|
|
|
|
|
;; Match non-termimal
|
|
|
|
@ -329,7 +350,9 @@
|
|
|
|
|
(andmap values (caddr l))))
|
|
|
|
|
#,(car pat)
|
|
|
|
|
(lambda (#,id stream depth end success-k fail-k max-depth tasks)
|
|
|
|
|
#,(loop (cdr pat) (add1 pos)))
|
|
|
|
|
(let-syntax ([#,id-start-pos no-pos]
|
|
|
|
|
[#,id-end-pos no-pos])
|
|
|
|
|
#,(loop (cdr pat) (add1 pos))))
|
|
|
|
|
stream depth
|
|
|
|
|
#,(let ([cnt (apply +
|
|
|
|
|
(map (lambda (item)
|
|
|
|
@ -349,7 +372,9 @@
|
|
|
|
|
[stream (cdr stream)]
|
|
|
|
|
[depth (add1 depth)])
|
|
|
|
|
(let ([max-depth (max max-depth depth)])
|
|
|
|
|
#,(loop (cdr pat) (add1 pos))))
|
|
|
|
|
(let-syntax ([#,id-start-pos no-pos]
|
|
|
|
|
[#,id-end-pos no-pos])
|
|
|
|
|
#,(loop (cdr pat) (add1 pos)))))
|
|
|
|
|
(fail-k max-depth tasks)))])))))
|
|
|
|
|
|
|
|
|
|
;; Starts parsing to match a non-terminal. There's a minor
|
|
|
|
@ -447,6 +472,36 @@
|
|
|
|
|
[(_ clause ...)
|
|
|
|
|
(let ([clauses (syntax->list #'(clause ...))])
|
|
|
|
|
(let-values ([(start grammar cfg-error parser-clauses)
|
|
|
|
|
(let ([all-toks (apply
|
|
|
|
|
append
|
|
|
|
|
(map (lambda (clause)
|
|
|
|
|
(syntax-case clause (tokens)
|
|
|
|
|
[(tokens t ...)
|
|
|
|
|
(apply
|
|
|
|
|
append
|
|
|
|
|
(map (lambda (t)
|
|
|
|
|
(let ([v (syntax-local-value t (lambda () #f))])
|
|
|
|
|
(cond
|
|
|
|
|
[(terminals-def? v)
|
|
|
|
|
(map (lambda (v)
|
|
|
|
|
(cons v #f))
|
|
|
|
|
(syntax->list (terminals-def-t v)))]
|
|
|
|
|
[(e-terminals-def? v)
|
|
|
|
|
(map (lambda (v)
|
|
|
|
|
(cons v #t))
|
|
|
|
|
(syntax->list (e-terminals-def-t v)))]
|
|
|
|
|
[else null])))
|
|
|
|
|
(syntax->list #'(t ...))))]
|
|
|
|
|
[_else null]))
|
|
|
|
|
clauses))]
|
|
|
|
|
[all-end-toks (apply
|
|
|
|
|
append
|
|
|
|
|
(map (lambda (clause)
|
|
|
|
|
(syntax-case clause (end)
|
|
|
|
|
[(end t ...)
|
|
|
|
|
(syntax->list #'(t ...))]
|
|
|
|
|
[_else null]))
|
|
|
|
|
clauses))])
|
|
|
|
|
(let loop ([clauses clauses]
|
|
|
|
|
[cfg-start #f]
|
|
|
|
|
[cfg-grammar #f]
|
|
|
|
@ -464,7 +519,8 @@
|
|
|
|
|
(loop (cdr clauses) cfg-start cfg-grammar #'expr parser-clauses)]
|
|
|
|
|
[(grammar [nt [pat handle0 handle ...] ...] ...)
|
|
|
|
|
(let ([nts (make-bound-identifier-mapping)]
|
|
|
|
|
[toks (make-bound-identifier-mapping)]
|
|
|
|
|
[toks (make-token-identifier-mapping)]
|
|
|
|
|
[end-toks (make-token-identifier-mapping)]
|
|
|
|
|
[nt-ids (syntax->list #'(nt ...))]
|
|
|
|
|
[patss (map (lambda (stx)
|
|
|
|
|
(map syntax->list (syntax->list stx)))
|
|
|
|
@ -472,6 +528,15 @@
|
|
|
|
|
(for-each (lambda (nt)
|
|
|
|
|
(bound-identifier-mapping-put! nts nt (list 0)))
|
|
|
|
|
nt-ids)
|
|
|
|
|
(for-each (lambda (t)
|
|
|
|
|
(token-identifier-mapping-put! end-toks t #t))
|
|
|
|
|
all-end-toks)
|
|
|
|
|
(for-each (lambda (t)
|
|
|
|
|
(unless (token-identifier-mapping-get end-toks (car t) (lambda () #f))
|
|
|
|
|
(let ([id (gensym (syntax-e (car t)))])
|
|
|
|
|
(token-identifier-mapping-put! toks (car t)
|
|
|
|
|
(cons id (cdr t))))))
|
|
|
|
|
all-toks)
|
|
|
|
|
;; Compute min max size for each non-term:
|
|
|
|
|
(nt-fixpoint
|
|
|
|
|
nts
|
|
|
|
@ -565,7 +630,7 @@
|
|
|
|
|
;; Build a definiton for each non-term:
|
|
|
|
|
(loop (cdr clauses)
|
|
|
|
|
cfg-start
|
|
|
|
|
(map (lambda (nt pats handles)
|
|
|
|
|
(map (lambda (nt pats handles $ctxs)
|
|
|
|
|
(define info (bound-identifier-mapping-get nts nt))
|
|
|
|
|
(list nt
|
|
|
|
|
#`(let ([key (gensym '#,nt)])
|
|
|
|
@ -577,6 +642,7 @@
|
|
|
|
|
(lambda (end max-depth tasks success-k fail-k)
|
|
|
|
|
#,(let loop ([pats pats]
|
|
|
|
|
[handles (syntax->list handles)]
|
|
|
|
|
[$ctxs (syntax->list $ctxs)]
|
|
|
|
|
[simple?s (caddr info)])
|
|
|
|
|
(if (null? pats)
|
|
|
|
|
#'(fail-k max-depth tasks)
|
|
|
|
@ -588,26 +654,31 @@
|
|
|
|
|
#,(build-match nts
|
|
|
|
|
toks
|
|
|
|
|
(car pats)
|
|
|
|
|
(car handles)))
|
|
|
|
|
(car handles)
|
|
|
|
|
(car $ctxs)))
|
|
|
|
|
(lambda (stream depth end success-k fail-k max-depth tasks)
|
|
|
|
|
#,(loop (cdr pats)
|
|
|
|
|
(cdr handles)
|
|
|
|
|
(cdr $ctxs)
|
|
|
|
|
(cdr simple?s)))
|
|
|
|
|
stream depth end success-k fail-k max-depth tasks)))))))))
|
|
|
|
|
nt-ids
|
|
|
|
|
patss
|
|
|
|
|
(syntax->list #'(((begin handle0 handle ...) ...) ...)))
|
|
|
|
|
(syntax->list #'(((begin handle0 handle ...) ...) ...))
|
|
|
|
|
(syntax->list #'((handle0 ...) ...)))
|
|
|
|
|
cfg-error
|
|
|
|
|
(list*
|
|
|
|
|
(with-syntax ([((tok . tok-id) ...)
|
|
|
|
|
(bound-identifier-mapping-map toks cons)])
|
|
|
|
|
(with-syntax ([($tok ...)
|
|
|
|
|
(map (lambda (id)
|
|
|
|
|
(datum->syntax-object id '$1))
|
|
|
|
|
(syntax->list #'(tok ...)))])
|
|
|
|
|
(with-syntax ([((tok tok-id . $e) ...)
|
|
|
|
|
(token-identifier-mapping-map toks
|
|
|
|
|
(lambda (k v)
|
|
|
|
|
(list* k
|
|
|
|
|
(car v)
|
|
|
|
|
(if (cdr v)
|
|
|
|
|
#f
|
|
|
|
|
'$1))))])
|
|
|
|
|
#`(grammar (start [() null]
|
|
|
|
|
[(atok start) (cons $1 $2)])
|
|
|
|
|
(atok [(tok) (make-tok 'tok-id 'tok $tok)] ...))))
|
|
|
|
|
(atok [(tok) (make-tok 'tok-id 'tok $e)] ...)))
|
|
|
|
|
#`(start start)
|
|
|
|
|
parser-clauses)))]
|
|
|
|
|
[(grammar . _)
|
|
|
|
@ -621,7 +692,7 @@
|
|
|
|
|
cfg-start
|
|
|
|
|
cfg-grammar
|
|
|
|
|
cfg-error
|
|
|
|
|
(cons (car clauses) parser-clauses))])))])
|
|
|
|
|
(cons (car clauses) parser-clauses))]))))])
|
|
|
|
|
#`(let ([orig-parse (parser
|
|
|
|
|
[error (lambda (a b c)
|
|
|
|
|
(error 'cfg-parser "unexpected ~a token: ~a" b c))]
|
|
|
|
@ -658,8 +729,6 @@
|
|
|
|
|
#|
|
|
|
|
|
;; Tests used during development
|
|
|
|
|
|
|
|
|
|
(require (lib "lex.ss" "parser-tools"))
|
|
|
|
|
|
|
|
|
|
(define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF))
|
|
|
|
|
|
|
|
|
|
(define lex
|
|
|
|
@ -700,7 +769,5 @@
|
|
|
|
|
;; This one fails:
|
|
|
|
|
#;"+*")])
|
|
|
|
|
(time (parse (lambda () (lex p))))))
|
|
|
|
|
|
|
|
|
|
|#
|
|
|
|
|
|
|
|
|
|
)
|
|
|
|
|