original commit: 5564d30b2666489fc516a7791e488d537e6287fd
tokens
Scott Owens 22 years ago
parent 0a5d3bd5f1
commit 01364cc5f3

@ -1,7 +1,9 @@
#cs
(module grammar mzscheme
(require (lib "class.ss"))
(require (lib "class.ss")
(lib "list.ss")
"yacc-helper.ss")
;; Constructs to create and access grammars, the internal
;; representation of the input to the parser generator.
@ -148,16 +150,46 @@
;; where the nth element in the outermost list is the list of productions with the nth non-term as lhs
(init prods)
;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable
(init-field terms non-terms nullable-non-terms end-terms)
(init-field terms non-terms end-terms)
;; indexed by the index of the non-term - contains the list of productions for that non-term
(define nt->prods (list->vector prods))
;; list of all productions
(define all-prods (apply append prods))
(define num-prods (length all-prods))
(define num-terms (length terms))
(define num-non-terms (length non-terms))
(let ((count 0))
(for-each
(lambda (nt)
(set-non-term-index! nt count)
(set! count (add1 count)))
non-terms))
(let ((count 0))
(for-each
(lambda (t)
(set-term-index! t count)
(set! count (add1 count)))
terms))
(let ((count 0))
(for-each
(lambda (prod)
(set-prod-index! prod count)
(set! count (add1 count)))
all-prods))
;; indexed by the index of the non-term - contains the list of productions for that non-term
(define nt->prods
(let ((v (make-vector (length prods) #f)))
(for-each (lambda (prods)
(vector-set! v (non-term-index (prod-lhs (car prods))) prods))
prods)
v))
(define nullable-non-terms
(nullable all-prods num-non-terms))
(define/public (get-num-terms) num-terms)
(define/public (get-num-non-terms) num-non-terms)
@ -195,6 +227,54 @@
(nullable-after-dot? item)))))
;; nullable: production list * int -> non-term set
;; determines which non-terminals can derive epsilon
(define (nullable prods num-nts)
(letrec ((nullable (make-vector num-nts #f))
(added #f)
;; possible-nullable: producion list -> production list
;; Removes all productions that have a terminal
(possible-nullable
(lambda (prods)
(filter (lambda (prod)
(vector-andmap non-term? (prod-rhs prod)))
prods)))
;; set-nullables: production list -> production list
;; makes one pass through the productions, adding the ones
;; known to be nullable now to nullable and returning a list
;; of productions that we don't know about yet.
(set-nullables
(lambda (prods)
(cond
((null? prods) null)
((vector-ref nullable
(gram-sym-index (prod-lhs (car prods))))
(set-nullables (cdr prods)))
((vector-andmap (lambda (nt)
(vector-ref nullable (gram-sym-index nt)))
(prod-rhs (car prods)))
(vector-set! nullable
(gram-sym-index (prod-lhs (car prods)))
#t)
(set! added #t)
(set-nullables (cdr prods)))
(else
(cons (car prods)
(set-nullables (cdr prods))))))))
(let loop ((P (possible-nullable prods)))
(cond
((null? P) nullable)
(else
(set! added #f)
(let ((new-P (set-nullables P)))
(if added
(loop new-P)
nullable)))))))
;; ------------------------ Productions ---------------------------
;; production = (make-prod non-term (gram-sym vector) int prec syntax-object)

@ -7,10 +7,13 @@
(require "yacc-helper.ss"
"../private-lex/token-syntax.ss"
"grammar.ss"
(lib "list.ss")
(lib "class.ss"))
(lib "class.ss")
(lib "contracts.ss"))
(provide parse-input get-term-list)
(provide/contract
(parse-input ((listof syntax?) (listof syntax?) syntax? (or/f false? syntax?) syntax? syntax? any? . -> . (is-a?/c grammar%)))
(get-term-list (syntax? . -> . (listof syntax?))))
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
@ -44,54 +47,6 @@
`(,(datum->syntax-object act name b stx-for-original-property)
,@(get-args (add1 i) (cdr rhs)))))))))))
;; nullable: production list * int -> non-term set
;; determines which non-terminals can derive epsilon
(define (nullable prods num-nts)
(letrec ((nullable (make-vector num-nts #f))
(added #f)
;; possible-nullable: producion list -> production list
;; Removes all productions that have a terminal
(possible-nullable
(lambda (prods)
(filter (lambda (prod)
(vector-andmap non-term? (prod-rhs prod)))
prods)))
;; set-nullables: production list -> production list
;; makes one pass through the productions, adding the ones
;; known to be nullable now to nullable and returning a list
;; of productions that we don't know about yet.
(set-nullables
(lambda (prods)
(cond
((null? prods) null)
((vector-ref nullable
(gram-sym-index (prod-lhs (car prods))))
(set-nullables (cdr prods)))
((vector-andmap (lambda (nt)
(vector-ref nullable (gram-sym-index nt)))
(prod-rhs (car prods)))
(vector-set! nullable
(gram-sym-index (prod-lhs (car prods)))
#t)
(set! added #t)
(set-nullables (cdr prods)))
(else
(cons (car prods)
(set-nullables (cdr prods))))))))
(let loop ((P (possible-nullable prods)))
(cond
((null? P) nullable)
(else
(set! added #f)
(let ((new-P (set-nullables P)))
(if added
(loop new-P)
nullable)))))))
;; Given the list of terminal symbols and the precedence/associativity definitions,
;; builds terminal structures (See grammar.ss)
;; build-terms: symbol list * symbol list list -> term list
@ -115,16 +70,12 @@
(set! counter (add1 counter))))
precs)
(set! counter 0)
;; Build the terminal structures
(map
(lambda (term-sym)
(begin0
(make-term term-sym
counter
(hash-table-get prec-table term-sym (lambda () #f)))
(set! counter (add1 counter))))
(make-term term-sym
#f
(hash-table-get prec-table term-sym (lambda () #f))))
term-list)))
;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.ss)
@ -158,11 +109,8 @@
"Token declaration must be (tokens symbol ...)"
so))))
;; parse-input: syntax-object * syntax-object list * syntax-object^4 * boolean-> grammar
(define (parse-input start ends term-defs prec-decls prods runtime src-pos)
(let* ((counter 0)
(start-sym (syntax-object->datum start))
(let* ((start-syms (map syntax-object->datum start))
(list-of-terms (map syntax-object->datum (get-term-list term-defs)))
@ -263,13 +211,8 @@
(terms (build-terms list-of-terms precs))
(non-terms (begin
(set! counter 2)
(map (lambda (non-term)
(begin0
(make-non-term non-term counter)
(set! counter (add1 counter))))
list-of-non-terms)))
(non-terms (map (lambda (non-term) (make-non-term non-term #f))
list-of-non-terms))
(term-table (make-hash-table))
(non-term-table (make-hash-table)))
@ -337,11 +280,10 @@
(eq? (syntax-object->datum a) (syntax-object->datum b)))
((prod-rhs action)
(let ((p (parse-prod (syntax prod-rhs))))
(set! counter (add1 counter))
(make-prod
nt
p
counter
#f
(let loop ((i (sub1 (vector-length p))))
(if (>= i 0)
(let ((gs (vector-ref p i)))
@ -353,11 +295,10 @@
((prod-rhs (prec term) action)
(identifier? (syntax term))
(let ((p (parse-prod (syntax prod-rhs))))
(set! counter (add1 counter))
(make-prod
nt
p
counter
#f
(term-prec
(hash-table-get
term-table
@ -382,9 +323,8 @@
(syntax-case prods-so ()
((nt productions ...)
(> (length (syntax->list (syntax (productions ...)))) 0)
(let ((nt (hash-table-get
non-term-table
(syntax-object->datum (syntax nt)))))
(let ((nt (hash-table-get non-term-table
(syntax-object->datum (syntax nt)))))
(map (lambda (p) (parse-prod+action nt p))
(syntax->list (syntax (productions ...))))))
(_
@ -393,55 +333,43 @@
"A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side"
prods-so))))))
(if (not (memq start-sym list-of-non-terms))
(raise-syntax-error
'parser-start
(format "Start symbol ~a not defined as a non-terminal"
start-sym)
start))
(set! counter (length end-terms))
(let* ((start (make-non-term (gensym) 0))
(end-non-term (make-non-term (gensym) 1))
(for-each
(lambda (sstx ssym)
(unless (memq ssym list-of-non-terms)
(raise-syntax-error
'parser-start
(format "Start symbol ~a not defined as a non-terminal" ssym)
sstx)))
start start-syms)
(let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
(end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
(parsed-prods (map parse-prods-for-nt (cdr (syntax->list prods))))
(counter2 0)
(prods
`((,(make-prod start (vector end-non-term) 0 #f #f))
,(map
(lambda (end)
(set! counter2 (add1 counter2))
(make-prod end-non-term
(vector
(hash-table-get non-term-table start-sym)
(hash-table-get term-table end))
counter2
#f
(datum->syntax-object
runtime
`(lambda (x) x))))
end-terms)
,@parsed-prods))
(nulls (nullable (apply append prods)
(+ 2 (length non-terms)))))
;; (printf "nullable: {~a}~n~n"
;; (apply string-append
;; (let loop ((i 0))
;; (cond
;; ((>= i (vector-length nulls)) null)
;; ((vector-ref nulls i)
;; (cons
;; (format "~a "
;; (gram-sym-symbol
;; (list-ref (cons start (cons end-non-term non-terms)) i)))
;; (loop (add1 i))))
;; (else (loop (add1 i)))))))
`(,@(map (lambda (start end-non-term)
(list (make-prod start (vector end-non-term) #f #f #f)))
starts end-non-terms)
,@(map
(lambda (end-nt start-sym)
(map
(lambda (end)
(make-prod end-nt
(vector
(hash-table-get non-term-table start-sym)
(hash-table-get term-table end))
#f
#f
(datum->syntax-object
runtime
`(lambda (x) x))))
end-terms))
end-non-terms start-syms)
,@parsed-prods)))
(make-object grammar%
prods
terms
(cons start (cons end-non-term non-terms))
nulls
(append starts (append end-non-terms non-terms))
(map (lambda (term-name)
(hash-table-get term-table term-name))
end-terms)))))))

@ -4,9 +4,12 @@
(require "input-file-parser.ss"
"grammar.ss"
"table.ss"
(lib "class.ss"))
(lib "class.ss")
(lib "contracts.ss"))
(provide build-parser)
(provide/contract
(build-parser ((string? any? any? syntax? (listof syntax?) (listof syntax?)
(or/f syntax? false?) syntax? syntax?) . ->* . (any? any? any? any?))))
(define (strip so)
(syntax-local-introduce
@ -20,14 +23,14 @@
(syntax-case prods ()
((_ (bind ((bound ...) x ...) ...) ...)
(let ((binds (syntax->list (syntax (bind ...))))
(bounds (cons start
(apply
append
(map syntax->list
(apply
append
(map syntax->list
(syntax->list (syntax (((bound ...) ...) ...)))))))))
(bounds (append start
(apply
append
(map syntax->list
(apply
append
(map syntax->list
(syntax->list (syntax (((bound ...) ...) ...)))))))))
(terms (get-term-list terms))
(precs (if precs
(syntax-case precs ()

@ -53,42 +53,43 @@
(if tokens
(raise-syntax-error #f "Multiple tokens declarations" stx)
(set! tokens arg)))
((start symbol)
(cond
((not (identifier? (syntax symbol)))
(raise-syntax-error
'parser-start
"Start non-terminal must be a symbol"
(syntax symbol)))
(start
(raise-syntax-error #f "Multiple start declarations" stx))
(else
(set! start (syntax symbol)))))
((start symbol ...)
(let ((symbols (syntax->list (syntax (symbol ...)))))
(for-each
(lambda (sym)
(unless (identifier? sym)
(raise-syntax-error 'parser-start
"Start symbol must be a symbol"
sym)))
symbols)
(when start
(raise-syntax-error #f "Multiple start declarations" stx))
(when (null? symbols)
(raise-syntax-error 'parser-start
"Missing start symbol"
stx))
(set! start symbols)))
((end symbols ...)
(begin
(let ((symbols (syntax->list (syntax (symbols ...)))))
(for-each
(lambda (sym)
(if (not (identifier? sym))
(raise-syntax-error
'parser-end
"End token must be a symbol"
sym)))
(syntax->list (syntax (symbols ...))))
(let ((d (duplicate-list? (syntax-object->datum
(syntax (symbols ...))))))
(if d
(raise-syntax-error
'parser-end
(format "Duplicate end token definition for ~a" d)
arg)))
(if (= 0 (length (syntax->list (syntax (symbols ...)))))
(raise-syntax-error
'parser-end
"end declaration must contain at least 1 token"
arg))
(if end
(unless (identifier? sym)
(raise-syntax-error 'parser-end
"End token must be a symbol"
sym)))
symbols)
(let ((d (duplicate-list? (map syntax-object->datum symbols))))
(when d
(raise-syntax-error 'parser-end
(format "Duplicate end token definition for ~a" d)
arg))
(when (null? symbols)
(raise-syntax-error 'parser-end
"end declaration must contain at least 1 token"
arg))
(when end
(raise-syntax-error #f "Multiple end declarations" stx))
(set! end (syntax->list (syntax (symbols ...))))))
(set! end symbols))))
((precs decls ...)
(if precs
(raise-syntax-error #f "Multiple precs declarations" stx)
@ -100,26 +101,25 @@
((yacc-output filename)
(cond
((not (string? (syntax-object->datum (syntax filename))))
(raise-syntax-error
'parser-yacc-output
"Yacc-output filename must be a string"
(syntax filename)))
(raise-syntax-error 'parser-yacc-output
"Yacc-output filename must be a string"
(syntax filename)))
(yacc-output
(raise-syntax-error #f "Multiple yacc-output declarations" stx))
(else
(set! yacc-output (syntax-object->datum (syntax filename))))))
(_ (raise-syntax-error 'parser-args "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" arg))))
(syntax->list (syntax (args ...))))
(if (not tokens)
(raise-syntax-error #f "missing tokens declaration" stx))
(if (not error)
(raise-syntax-error #f "missing error declaration" stx))
(if (not grammar)
(raise-syntax-error #f "missing grammar declaration" stx))
(if (not end)
(raise-syntax-error #f "missing end declaration" stx))
(if (not start)
(raise-syntax-error #f "missing start declaration" stx))
(unless tokens
(raise-syntax-error #f "missing tokens declaration" stx))
(unless error
(raise-syntax-error #f "missing error declaration" stx))
(unless grammar
(raise-syntax-error #f "missing grammar declaration" stx))
(unless end
(raise-syntax-error #f "missing end declaration" stx))
(unless start
(raise-syntax-error #f "missing start declaration" stx))
(let-values (((table term-sym->index actions check-syntax-fix)
(build-parser (if debug debug "")
src-pos
@ -130,23 +130,23 @@
precs
grammar
stx)))
(if (and yacc-output (not (string=? yacc-output "")))
(with-handlers [(exn:i/o:filesystem?
(lambda (e)
(fprintf
(current-error-port)
"Cannot write yacc-output to file \"~a\". ~a~n"
(exn:i/o:filesystem-pathname e)
(exn:i/o:filesystem-detail e))))]
(call-with-output-file yacc-output
(lambda (port)
(display-yacc (syntax-object->datum grammar)
tokens
(syntax-object->datum start)
(if precs
(syntax-object->datum precs)
#f)
port)))))
(when (and yacc-output (not (string=? yacc-output "")))
(with-handlers [(exn:i/o:filesystem?
(lambda (e)
(fprintf
(current-error-port)
"Cannot write yacc-output to file \"~a\". ~a~n"
(exn:i/o:filesystem-pathname e)
(exn:i/o:filesystem-detail e))))]
(call-with-output-file yacc-output
(lambda (port)
(display-yacc (syntax-object->datum grammar)
tokens
(syntax-object->datum start)
(if precs
(syntax-object->datum precs)
#f)
port)))))
(with-syntax ((check-syntax-fix check-syntax-fix)
(err error)
(ends end)
@ -160,10 +160,9 @@
check-syntax-fix
(parser-body debug err (quote ends) table term-sym->index actions src-pos)))))))
(_
(raise-syntax-error
#f
"parser must have the form (parser args ...)"
stx))))
(raise-syntax-error #f
"parser must have the form (parser args ...)"
stx))))
(define (reduce-stack stack num ret-vals src-pos)
(cond
@ -181,7 +180,7 @@
(define-struct stack-frame (state value start-pos end-pos) (make-inspector))
(define empty-stack (list (make-stack-frame 0 #f #f #f)))
(define (make-empty-stack i) (list (make-stack-frame i #f #f #f)))
(define (false-thunk) #f)
@ -285,59 +284,62 @@
(err #f (token-name tok) (token-value tok) (cadr ip) (caddr ip))
(err #f (token-name tok) (token-value tok)))
(raise-read-error (format "parser: got token of unknown type ~a" (token-name tok))
#f #f #f #f #f)))))))
(lambda (get-token)
(let parsing-loop ((stack empty-stack)
(ip (get-token)))
(let* ((tok (input->token ip))
(action (find-action stack tok ip)))
(cond
((shift? action)
;; (printf "shift:~a~n" (shift-state action))
(let ((val (token-value tok)))
(parsing-loop (cons (if src-pos
(make-stack-frame (shift-state action)
val
(cadr ip)
(caddr ip))
(make-stack-frame (shift-state action)
val
#f
#f))
stack)
(get-token))))
((reduce? action)
;; (printf "reduce:~a~n" (reduce-prod-num action))
(let-values (((new-stack args)
(reduce-stack stack
(reduce-rhs-length action)
null
src-pos)))
(let* ((A (reduce-lhs-num action))
(goto (array2d-ref table (stack-frame-state (car new-stack)) A)))
(parsing-loop (cons
(if src-pos
(make-stack-frame goto
(apply (vector-ref actions (reduce-prod-num action)) args)
(if (null? args) (cadr ip) (cadr args))
(if (null? args)
(caddr ip)
(list-ref args (- (* (reduce-rhs-length action) 3) 1))))
(make-stack-frame goto
(apply (vector-ref actions (reduce-prod-num action)) args)
#f
#f))
new-stack)
ip))))
((accept? action)
;; (printf "accept~n")
(stack-frame-value (car stack)))
(else
(if src-pos
(err #t (token-name tok) (token-value tok) (cadr ip) (caddr ip))
(err #t (token-name tok) (token-value tok)))
(parsing-loop (fix-error stack tok ip get-token) (get-token)))))))))
#f #f #f #f #f))))))
(make-parser
(lambda (start-number)
(lambda (get-token)
(let parsing-loop ((stack (make-empty-stack start-number))
(ip (get-token)))
(let* ((tok (input->token ip))
(action (find-action stack tok ip)))
(cond
((shift? action)
;; (printf "shift:~a~n" (shift-state action))
(let ((val (token-value tok)))
(parsing-loop (cons (if src-pos
(make-stack-frame (shift-state action)
val
(cadr ip)
(caddr ip))
(make-stack-frame (shift-state action)
val
#f
#f))
stack)
(get-token))))
((reduce? action)
;; (printf "reduce:~a~n" (reduce-prod-num action))
(let-values (((new-stack args)
(reduce-stack stack
(reduce-rhs-length action)
null
src-pos)))
(let* ((A (reduce-lhs-num action))
(goto (array2d-ref table (stack-frame-state (car new-stack)) A)))
(parsing-loop
(cons
(if src-pos
(make-stack-frame
goto
(apply (vector-ref actions (reduce-prod-num action)) args)
(if (null? args) (cadr ip) (cadr args))
(if (null? args)
(caddr ip)
(list-ref args (- (* (reduce-rhs-length action) 3) 1))))
(make-stack-frame
goto
(apply (vector-ref actions (reduce-prod-num action)) args)
#f
#f))
new-stack)
ip))))
((accept? action)
;; (printf "accept~n")
(stack-frame-value (car stack)))
(else
(if src-pos
(err #t (token-name tok) (token-value tok) (cadr ip) (caddr ip))
(err #t (token-name tok) (token-value tok)))
(parsing-loop (fix-error stack tok ip get-token) (get-token))))))))))
(make-parser 0)))
)
Loading…
Cancel
Save