fixed handling of empty tokens

svn: r1350

original commit: d37d4e1d24c68ad7e3f40c3c20e3f59487456fb0
tokens
Matthew Flatt 19 years ago
parent 79465df15c
commit 75b8182044

@ -30,10 +30,12 @@
(module cfg-parser mzscheme
(require (lib "yacc.ss" "parser-tools")
(lib "lex.ss" "parser-tools")
(lib "list.ss")
(lib "etc.ss"))
(require-for-syntax (lib "boundmap.ss" "syntax")
(lib "list.ss"))
(lib "list.ss")
(lib "token-syntax.ss" "parser-tools" "private-lex"))
(provide cfg-parser)
@ -43,6 +45,20 @@
;; Represents the thread scheduler:
(define-struct tasks (active active-back waits multi-waits cache progress?))
(define-for-syntax make-token-identifier-mapping make-hash-table)
(define-for-syntax token-identifier-mapping-get
(case-lambda
[(t tok)
(hash-table-get t (syntax-e tok))]
[(t tok fail)
(hash-table-get t (syntax-e tok) fail)]))
(define-for-syntax token-identifier-mapping-put!
(lambda (t tok v)
(hash-table-put! t (syntax-e tok) v)))
(define-for-syntax token-identifier-mapping-map
(lambda (t f)
(hash-table-map t f)))
;; Used to calculate information on the grammar, such as whether
;; a particular non-terminal is "simple" instead of recursively defined.
(define-for-syntax (nt-fixpoint nts proc nt-ids patss)
@ -303,14 +319,15 @@
;; Finds the symbolic representative of a token class
(define-for-syntax (map-token toks tok)
(bound-identifier-mapping-get toks tok
(lambda ()
(let ([id (gensym (syntax-e tok))])
(bound-identifier-mapping-put! toks tok id)
id))))
(car (token-identifier-mapping-get toks tok)))
(define no-pos-val (make-position 0 0 0))
(define-for-syntax no-pos
(let ([npv ((syntax-local-certifier) #'no-pos-val)])
(lambda (stx) npv)))
;; Builds a matcher for a particular alternative
(define-for-syntax (build-match nts toks pat handle)
(define-for-syntax (build-match nts toks pat handle $ctx)
(let loop ([pat pat]
[pos 1])
(if (null? pat)
@ -318,7 +335,11 @@
(lambda (success-k fail-k max-depth tasks)
(fail-k max-depth tasks)))
(let ([id (datum->syntax-object (car pat)
(string->symbol (format "$~a" pos)))])
(string->symbol (format "$~a" pos)))]
[id-start-pos (datum->syntax-object (car pat)
(string->symbol (format "$~a-start-pos" pos)))]
[id-end-pos (datum->syntax-object (car pat)
(string->symbol (format "$~a-end-pos" pos)))])
(cond
[(bound-identifier-mapping-get nts (car pat) (lambda () #f))
;; Match non-termimal
@ -329,7 +350,9 @@
(andmap values (caddr l))))
#,(car pat)
(lambda (#,id stream depth end success-k fail-k max-depth tasks)
#,(loop (cdr pat) (add1 pos)))
(let-syntax ([#,id-start-pos no-pos]
[#,id-end-pos no-pos])
#,(loop (cdr pat) (add1 pos))))
stream depth
#,(let ([cnt (apply +
(map (lambda (item)
@ -349,7 +372,9 @@
[stream (cdr stream)]
[depth (add1 depth)])
(let ([max-depth (max max-depth depth)])
#,(loop (cdr pat) (add1 pos))))
(let-syntax ([#,id-start-pos no-pos]
[#,id-end-pos no-pos])
#,(loop (cdr pat) (add1 pos)))))
(fail-k max-depth tasks)))])))))
;; Starts parsing to match a non-terminal. There's a minor
@ -447,6 +472,36 @@
[(_ clause ...)
(let ([clauses (syntax->list #'(clause ...))])
(let-values ([(start grammar cfg-error parser-clauses)
(let ([all-toks (apply
append
(map (lambda (clause)
(syntax-case clause (tokens)
[(tokens t ...)
(apply
append
(map (lambda (t)
(let ([v (syntax-local-value t (lambda () #f))])
(cond
[(terminals-def? v)
(map (lambda (v)
(cons v #f))
(syntax->list (terminals-def-t v)))]
[(e-terminals-def? v)
(map (lambda (v)
(cons v #t))
(syntax->list (e-terminals-def-t v)))]
[else null])))
(syntax->list #'(t ...))))]
[_else null]))
clauses))]
[all-end-toks (apply
append
(map (lambda (clause)
(syntax-case clause (end)
[(end t ...)
(syntax->list #'(t ...))]
[_else null]))
clauses))])
(let loop ([clauses clauses]
[cfg-start #f]
[cfg-grammar #f]
@ -464,7 +519,8 @@
(loop (cdr clauses) cfg-start cfg-grammar #'expr parser-clauses)]
[(grammar [nt [pat handle0 handle ...] ...] ...)
(let ([nts (make-bound-identifier-mapping)]
[toks (make-bound-identifier-mapping)]
[toks (make-token-identifier-mapping)]
[end-toks (make-token-identifier-mapping)]
[nt-ids (syntax->list #'(nt ...))]
[patss (map (lambda (stx)
(map syntax->list (syntax->list stx)))
@ -472,6 +528,15 @@
(for-each (lambda (nt)
(bound-identifier-mapping-put! nts nt (list 0)))
nt-ids)
(for-each (lambda (t)
(token-identifier-mapping-put! end-toks t #t))
all-end-toks)
(for-each (lambda (t)
(unless (token-identifier-mapping-get end-toks (car t) (lambda () #f))
(let ([id (gensym (syntax-e (car t)))])
(token-identifier-mapping-put! toks (car t)
(cons id (cdr t))))))
all-toks)
;; Compute min max size for each non-term:
(nt-fixpoint
nts
@ -565,7 +630,7 @@
;; Build a definiton for each non-term:
(loop (cdr clauses)
cfg-start
(map (lambda (nt pats handles)
(map (lambda (nt pats handles $ctxs)
(define info (bound-identifier-mapping-get nts nt))
(list nt
#`(let ([key (gensym '#,nt)])
@ -577,6 +642,7 @@
(lambda (end max-depth tasks success-k fail-k)
#,(let loop ([pats pats]
[handles (syntax->list handles)]
[$ctxs (syntax->list $ctxs)]
[simple?s (caddr info)])
(if (null? pats)
#'(fail-k max-depth tasks)
@ -588,26 +654,31 @@
#,(build-match nts
toks
(car pats)
(car handles)))
(car handles)
(car $ctxs)))
(lambda (stream depth end success-k fail-k max-depth tasks)
#,(loop (cdr pats)
(cdr handles)
(cdr $ctxs)
(cdr simple?s)))
stream depth end success-k fail-k max-depth tasks)))))))))
nt-ids
patss
(syntax->list #'(((begin handle0 handle ...) ...) ...)))
(syntax->list #'(((begin handle0 handle ...) ...) ...))
(syntax->list #'((handle0 ...) ...)))
cfg-error
(list*
(with-syntax ([((tok . tok-id) ...)
(bound-identifier-mapping-map toks cons)])
(with-syntax ([($tok ...)
(map (lambda (id)
(datum->syntax-object id '$1))
(syntax->list #'(tok ...)))])
(with-syntax ([((tok tok-id . $e) ...)
(token-identifier-mapping-map toks
(lambda (k v)
(list* k
(car v)
(if (cdr v)
#f
'$1))))])
#`(grammar (start [() null]
[(atok start) (cons $1 $2)])
(atok [(tok) (make-tok 'tok-id 'tok $tok)] ...))))
(atok [(tok) (make-tok 'tok-id 'tok $e)] ...)))
#`(start start)
parser-clauses)))]
[(grammar . _)
@ -621,7 +692,7 @@
cfg-start
cfg-grammar
cfg-error
(cons (car clauses) parser-clauses))])))])
(cons (car clauses) parser-clauses))]))))])
#`(let ([orig-parse (parser
[error (lambda (a b c)
(error 'cfg-parser "unexpected ~a token: ~a" b c))]
@ -658,8 +729,6 @@
#|
;; Tests used during development
(require (lib "lex.ss" "parser-tools"))
(define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF))
(define lex
@ -700,7 +769,5 @@
;; This one fails:
#;"+*")])
(time (parse (lambda () (lex p))))))
|#
)

Loading…
Cancel
Save