fixed handling of empty tokens

svn: r1350

original commit: d37d4e1d24c68ad7e3f40c3c20e3f59487456fb0
tokens
Matthew Flatt 19 years ago
parent 79465df15c
commit 75b8182044

@ -30,10 +30,12 @@
(module cfg-parser mzscheme (module cfg-parser mzscheme
(require (lib "yacc.ss" "parser-tools") (require (lib "yacc.ss" "parser-tools")
(lib "lex.ss" "parser-tools")
(lib "list.ss") (lib "list.ss")
(lib "etc.ss")) (lib "etc.ss"))
(require-for-syntax (lib "boundmap.ss" "syntax") (require-for-syntax (lib "boundmap.ss" "syntax")
(lib "list.ss")) (lib "list.ss")
(lib "token-syntax.ss" "parser-tools" "private-lex"))
(provide cfg-parser) (provide cfg-parser)
@ -43,6 +45,20 @@
;; Represents the thread scheduler: ;; Represents the thread scheduler:
(define-struct tasks (active active-back waits multi-waits cache progress?)) (define-struct tasks (active active-back waits multi-waits cache progress?))
(define-for-syntax make-token-identifier-mapping make-hash-table)
(define-for-syntax token-identifier-mapping-get
(case-lambda
[(t tok)
(hash-table-get t (syntax-e tok))]
[(t tok fail)
(hash-table-get t (syntax-e tok) fail)]))
(define-for-syntax token-identifier-mapping-put!
(lambda (t tok v)
(hash-table-put! t (syntax-e tok) v)))
(define-for-syntax token-identifier-mapping-map
(lambda (t f)
(hash-table-map t f)))
;; Used to calculate information on the grammar, such as whether ;; Used to calculate information on the grammar, such as whether
;; a particular non-terminal is "simple" instead of recursively defined. ;; a particular non-terminal is "simple" instead of recursively defined.
(define-for-syntax (nt-fixpoint nts proc nt-ids patss) (define-for-syntax (nt-fixpoint nts proc nt-ids patss)
@ -303,14 +319,15 @@
;; Finds the symbolic representative of a token class ;; Finds the symbolic representative of a token class
(define-for-syntax (map-token toks tok) (define-for-syntax (map-token toks tok)
(bound-identifier-mapping-get toks tok (car (token-identifier-mapping-get toks tok)))
(lambda ()
(let ([id (gensym (syntax-e tok))]) (define no-pos-val (make-position 0 0 0))
(bound-identifier-mapping-put! toks tok id) (define-for-syntax no-pos
id)))) (let ([npv ((syntax-local-certifier) #'no-pos-val)])
(lambda (stx) npv)))
;; Builds a matcher for a particular alternative ;; Builds a matcher for a particular alternative
(define-for-syntax (build-match nts toks pat handle) (define-for-syntax (build-match nts toks pat handle $ctx)
(let loop ([pat pat] (let loop ([pat pat]
[pos 1]) [pos 1])
(if (null? pat) (if (null? pat)
@ -318,7 +335,11 @@
(lambda (success-k fail-k max-depth tasks) (lambda (success-k fail-k max-depth tasks)
(fail-k max-depth tasks))) (fail-k max-depth tasks)))
(let ([id (datum->syntax-object (car pat) (let ([id (datum->syntax-object (car pat)
(string->symbol (format "$~a" pos)))]) (string->symbol (format "$~a" pos)))]
[id-start-pos (datum->syntax-object (car pat)
(string->symbol (format "$~a-start-pos" pos)))]
[id-end-pos (datum->syntax-object (car pat)
(string->symbol (format "$~a-end-pos" pos)))])
(cond (cond
[(bound-identifier-mapping-get nts (car pat) (lambda () #f)) [(bound-identifier-mapping-get nts (car pat) (lambda () #f))
;; Match non-termimal ;; Match non-termimal
@ -329,7 +350,9 @@
(andmap values (caddr l)))) (andmap values (caddr l))))
#,(car pat) #,(car pat)
(lambda (#,id stream depth end success-k fail-k max-depth tasks) (lambda (#,id stream depth end success-k fail-k max-depth tasks)
#,(loop (cdr pat) (add1 pos))) (let-syntax ([#,id-start-pos no-pos]
[#,id-end-pos no-pos])
#,(loop (cdr pat) (add1 pos))))
stream depth stream depth
#,(let ([cnt (apply + #,(let ([cnt (apply +
(map (lambda (item) (map (lambda (item)
@ -349,7 +372,9 @@
[stream (cdr stream)] [stream (cdr stream)]
[depth (add1 depth)]) [depth (add1 depth)])
(let ([max-depth (max max-depth depth)]) (let ([max-depth (max max-depth depth)])
#,(loop (cdr pat) (add1 pos)))) (let-syntax ([#,id-start-pos no-pos]
[#,id-end-pos no-pos])
#,(loop (cdr pat) (add1 pos)))))
(fail-k max-depth tasks)))]))))) (fail-k max-depth tasks)))])))))
;; Starts parsing to match a non-terminal. There's a minor ;; Starts parsing to match a non-terminal. There's a minor
@ -447,181 +472,227 @@
[(_ clause ...) [(_ clause ...)
(let ([clauses (syntax->list #'(clause ...))]) (let ([clauses (syntax->list #'(clause ...))])
(let-values ([(start grammar cfg-error parser-clauses) (let-values ([(start grammar cfg-error parser-clauses)
(let loop ([clauses clauses] (let ([all-toks (apply
[cfg-start #f] append
[cfg-grammar #f] (map (lambda (clause)
[cfg-error #f] (syntax-case clause (tokens)
[parser-clauses null]) [(tokens t ...)
(if (null? clauses) (apply
(values cfg-start append
cfg-grammar (map (lambda (t)
cfg-error (let ([v (syntax-local-value t (lambda () #f))])
(reverse parser-clauses)) (cond
(syntax-case (car clauses) (start error grammar) [(terminals-def? v)
[(start tok) (map (lambda (v)
(loop (cdr clauses) #'tok cfg-grammar cfg-error parser-clauses)] (cons v #f))
[(error expr) (syntax->list (terminals-def-t v)))]
(loop (cdr clauses) cfg-start cfg-grammar #'expr parser-clauses)] [(e-terminals-def? v)
[(grammar [nt [pat handle0 handle ...] ...] ...) (map (lambda (v)
(let ([nts (make-bound-identifier-mapping)] (cons v #t))
[toks (make-bound-identifier-mapping)] (syntax->list (e-terminals-def-t v)))]
[nt-ids (syntax->list #'(nt ...))] [else null])))
[patss (map (lambda (stx) (syntax->list #'(t ...))))]
(map syntax->list (syntax->list stx))) [_else null]))
(syntax->list #'((pat ...) ...)))]) clauses))]
(for-each (lambda (nt) [all-end-toks (apply
(bound-identifier-mapping-put! nts nt (list 0))) append
nt-ids) (map (lambda (clause)
;; Compute min max size for each non-term: (syntax-case clause (end)
(nt-fixpoint [(end t ...)
nts (syntax->list #'(t ...))]
(lambda (nt pats old-list) [_else null]))
(let ([new-cnt clauses))])
(apply (let loop ([clauses clauses]
min [cfg-start #f]
(map (lambda (pat) [cfg-grammar #f]
(apply [cfg-error #f]
+ [parser-clauses null])
(map (lambda (elem) (if (null? clauses)
(car (values cfg-start
(bound-identifier-mapping-get nts cfg-grammar
elem cfg-error
(lambda () (list 1))))) (reverse parser-clauses))
pat))) (syntax-case (car clauses) (start error grammar)
pats))]) [(start tok)
(if (new-cnt . > . (car old-list)) (loop (cdr clauses) #'tok cfg-grammar cfg-error parser-clauses)]
(cons new-cnt (cdr old-list)) [(error expr)
old-list))) (loop (cdr clauses) cfg-start cfg-grammar #'expr parser-clauses)]
nt-ids patss) [(grammar [nt [pat handle0 handle ...] ...] ...)
;; Compute set of toks that must appear at the beginning (let ([nts (make-bound-identifier-mapping)]
;; for a non-terminal [toks (make-token-identifier-mapping)]
(nt-fixpoint [end-toks (make-token-identifier-mapping)]
nts [nt-ids (syntax->list #'(nt ...))]
(lambda (nt pats old-list) [patss (map (lambda (stx)
(let ([new-list (map syntax->list (syntax->list stx)))
(apply (syntax->list #'((pat ...) ...)))])
append (for-each (lambda (nt)
(map (lambda (pat) (bound-identifier-mapping-put! nts nt (list 0)))
(let loop ([pat pat]) nt-ids)
(if (pair? pat) (for-each (lambda (t)
(let ([l (bound-identifier-mapping-get (token-identifier-mapping-put! end-toks t #t))
nts all-end-toks)
(car pat) (for-each (lambda (t)
(lambda () (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f))
(list 1 (map-token toks (car pat)))))]) (let ([id (gensym (syntax-e (car t)))])
;; If the non-terminal can match 0 things, (token-identifier-mapping-put! toks (car t)
;; then it might match something from the (cons id (cdr t))))))
;; next pattern element. Otherwise, it must all-toks)
;; match the first element: ;; Compute min max size for each non-term:
(if (zero? (car l)) (nt-fixpoint
(append (cdr l) (loop (cdr pat))) nts
(cdr l))) (lambda (nt pats old-list)
null))) (let ([new-cnt
pats))]) (apply
(let ([new (filter (lambda (id) min
(andmap (lambda (id2) (map (lambda (pat)
(not (eq? id id2))) (apply
(cdr old-list))) +
new-list)]) (map (lambda (elem)
(if (pair? new) (car
;; Drop dups in new list: (bound-identifier-mapping-get nts
(let ([new (let loop ([new new]) elem
(if (null? (cdr new)) (lambda () (list 1)))))
new pat)))
(if (ormap (lambda (id) pats))])
(eq? (car new) id)) (if (new-cnt . > . (car old-list))
(cdr new)) (cons new-cnt (cdr old-list))
(loop (cdr new)) old-list)))
(cons (car new) (loop (cdr new))))))]) nt-ids patss)
(cons (car old-list) (append new (cdr old-list)))) ;; Compute set of toks that must appear at the beginning
old-list)))) ;; for a non-terminal
nt-ids patss) (nt-fixpoint
;; Determine left-recursive clauses: nts
(for-each (lambda (nt pats) (lambda (nt pats old-list)
(let ([l (bound-identifier-mapping-get nts nt)]) (let ([new-list
(bound-identifier-mapping-put! nts nt (list (car l) (apply
(cdr l) append
(map (lambda (x) #f) pats))))) (map (lambda (pat)
nt-ids patss) (let loop ([pat pat])
(nt-fixpoint (if (pair? pat)
nts (let ([l (bound-identifier-mapping-get
(lambda (nt pats old-list) nts
(list (car old-list) (car pat)
(cadr old-list) (lambda ()
(map (lambda (pat simple?) (list 1 (map-token toks (car pat)))))])
(or simple? ;; If the non-terminal can match 0 things,
(let ([l (map (lambda (elem) ;; then it might match something from the
(bound-identifier-mapping-get ;; next pattern element. Otherwise, it must
nts ;; match the first element:
elem (if (zero? (car l))
(lambda () #f))) (append (cdr l) (loop (cdr pat)))
pat)]) (cdr l)))
(andmap (lambda (i) null)))
(or (not i) pats))])
(andmap values (caddr i)))) (let ([new (filter (lambda (id)
l)))) (andmap (lambda (id2)
pats (caddr old-list)))) (not (eq? id id2)))
nt-ids patss) (cdr old-list)))
;; Build a definiton for each non-term: new-list)])
(loop (cdr clauses) (if (pair? new)
cfg-start ;; Drop dups in new list:
(map (lambda (nt pats handles) (let ([new (let loop ([new new])
(define info (bound-identifier-mapping-get nts nt)) (if (null? (cdr new))
(list nt new
#`(let ([key (gensym '#,nt)]) (if (ormap (lambda (id)
(lambda (stream depth end success-k fail-k max-depth tasks) (eq? (car new) id))
(parse-nt/share (cdr new))
key #,(car info) '#,(cadr info) stream depth end (loop (cdr new))
max-depth tasks (cons (car new) (loop (cdr new))))))])
success-k fail-k (cons (car old-list) (append new (cdr old-list))))
(lambda (end max-depth tasks success-k fail-k) old-list))))
#,(let loop ([pats pats] nt-ids patss)
[handles (syntax->list handles)] ;; Determine left-recursive clauses:
[simple?s (caddr info)]) (for-each (lambda (nt pats)
(if (null? pats) (let ([l (bound-identifier-mapping-get nts nt)])
#'(fail-k max-depth tasks) (bound-identifier-mapping-put! nts nt (list (car l)
#`(#,(if (or (null? (cdr pats)) (cdr l)
(car simple?s)) (map (lambda (x) #f) pats)))))
#'parse-or nt-ids patss)
#'parse-parallel-or) (nt-fixpoint
(lambda (stream depth end success-k fail-k max-depth tasks) nts
#,(build-match nts (lambda (nt pats old-list)
toks (list (car old-list)
(car pats) (cadr old-list)
(car handles))) (map (lambda (pat simple?)
(lambda (stream depth end success-k fail-k max-depth tasks) (or simple?
#,(loop (cdr pats) (let ([l (map (lambda (elem)
(cdr handles) (bound-identifier-mapping-get
(cdr simple?s))) nts
stream depth end success-k fail-k max-depth tasks))))))))) elem
nt-ids (lambda () #f)))
patss pat)])
(syntax->list #'(((begin handle0 handle ...) ...) ...))) (andmap (lambda (i)
cfg-error (or (not i)
(list* (andmap values (caddr i))))
(with-syntax ([((tok . tok-id) ...) l))))
(bound-identifier-mapping-map toks cons)]) pats (caddr old-list))))
(with-syntax ([($tok ...) nt-ids patss)
(map (lambda (id) ;; Build a definiton for each non-term:
(datum->syntax-object id '$1)) (loop (cdr clauses)
(syntax->list #'(tok ...)))]) cfg-start
#`(grammar (start [() null] (map (lambda (nt pats handles $ctxs)
[(atok start) (cons $1 $2)]) (define info (bound-identifier-mapping-get nts nt))
(atok [(tok) (make-tok 'tok-id 'tok $tok)] ...)))) (list nt
#`(start start) #`(let ([key (gensym '#,nt)])
parser-clauses)))] (lambda (stream depth end success-k fail-k max-depth tasks)
[(grammar . _) (parse-nt/share
(raise-syntax-error key #,(car info) '#,(cadr info) stream depth end
#f max-depth tasks
"bad grammar clause" success-k fail-k
stx (lambda (end max-depth tasks success-k fail-k)
(car #f))] #,(let loop ([pats pats]
[_else [handles (syntax->list handles)]
(loop (cdr clauses) [$ctxs (syntax->list $ctxs)]
cfg-start [simple?s (caddr info)])
cfg-grammar (if (null? pats)
cfg-error #'(fail-k max-depth tasks)
(cons (car clauses) parser-clauses))])))]) #`(#,(if (or (null? (cdr pats))
(car simple?s))
#'parse-or
#'parse-parallel-or)
(lambda (stream depth end success-k fail-k max-depth tasks)
#,(build-match nts
toks
(car pats)
(car handles)
(car $ctxs)))
(lambda (stream depth end success-k fail-k max-depth tasks)
#,(loop (cdr pats)
(cdr handles)
(cdr $ctxs)
(cdr simple?s)))
stream depth end success-k fail-k max-depth tasks)))))))))
nt-ids
patss
(syntax->list #'(((begin handle0 handle ...) ...) ...))
(syntax->list #'((handle0 ...) ...)))
cfg-error
(list*
(with-syntax ([((tok tok-id . $e) ...)
(token-identifier-mapping-map toks
(lambda (k v)
(list* k
(car v)
(if (cdr v)
#f
'$1))))])
#`(grammar (start [() null]
[(atok start) (cons $1 $2)])
(atok [(tok) (make-tok 'tok-id 'tok $e)] ...)))
#`(start start)
parser-clauses)))]
[(grammar . _)
(raise-syntax-error
#f
"bad grammar clause"
stx
(car #f))]
[_else
(loop (cdr clauses)
cfg-start
cfg-grammar
cfg-error
(cons (car clauses) parser-clauses))]))))])
#`(let ([orig-parse (parser #`(let ([orig-parse (parser
[error (lambda (a b c) [error (lambda (a b c)
(error 'cfg-parser "unexpected ~a token: ~a" b c))] (error 'cfg-parser "unexpected ~a token: ~a" b c))]
@ -658,8 +729,6 @@
#| #|
;; Tests used during development ;; Tests used during development
(require (lib "lex.ss" "parser-tools"))
(define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF)) (define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF))
(define lex (define lex
@ -700,7 +769,5 @@
;; This one fails: ;; This one fails:
#;"+*")]) #;"+*")])
(time (parse (lambda () (lex p)))))) (time (parse (lambda () (lex p))))))
|# |#
) )

Loading…
Cancel
Save