original commit: 5564d30b2666489fc516a7791e488d537e6287fd
tokens
Scott Owens 22 years ago
parent 0a5d3bd5f1
commit 01364cc5f3

@ -1,7 +1,9 @@
#cs #cs
(module grammar mzscheme (module grammar mzscheme
(require (lib "class.ss")) (require (lib "class.ss")
(lib "list.ss")
"yacc-helper.ss")
;; Constructs to create and access grammars, the internal ;; Constructs to create and access grammars, the internal
;; representation of the input to the parser generator. ;; representation of the input to the parser generator.
@ -148,16 +150,46 @@
;; where the nth element in the outermost list is the list of productions with the nth non-term as lhs ;; where the nth element in the outermost list is the list of productions with the nth non-term as lhs
(init prods) (init prods)
;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable ;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable
(init-field terms non-terms nullable-non-terms end-terms) (init-field terms non-terms end-terms)
;; indexed by the index of the non-term - contains the list of productions for that non-term
(define nt->prods (list->vector prods))
;; list of all productions ;; list of all productions
(define all-prods (apply append prods)) (define all-prods (apply append prods))
(define num-prods (length all-prods)) (define num-prods (length all-prods))
(define num-terms (length terms)) (define num-terms (length terms))
(define num-non-terms (length non-terms)) (define num-non-terms (length non-terms))
(let ((count 0))
(for-each
(lambda (nt)
(set-non-term-index! nt count)
(set! count (add1 count)))
non-terms))
(let ((count 0))
(for-each
(lambda (t)
(set-term-index! t count)
(set! count (add1 count)))
terms))
(let ((count 0))
(for-each
(lambda (prod)
(set-prod-index! prod count)
(set! count (add1 count)))
all-prods))
;; indexed by the index of the non-term - contains the list of productions for that non-term
(define nt->prods
(let ((v (make-vector (length prods) #f)))
(for-each (lambda (prods)
(vector-set! v (non-term-index (prod-lhs (car prods))) prods))
prods)
v))
(define nullable-non-terms
(nullable all-prods num-non-terms))
(define/public (get-num-terms) num-terms) (define/public (get-num-terms) num-terms)
(define/public (get-num-non-terms) num-non-terms) (define/public (get-num-non-terms) num-non-terms)
@ -195,6 +227,54 @@
(nullable-after-dot? item))))) (nullable-after-dot? item)))))
;; nullable: production list * int -> non-term set
;; determines which non-terminals can derive epsilon
(define (nullable prods num-nts)
(letrec ((nullable (make-vector num-nts #f))
(added #f)
;; possible-nullable: producion list -> production list
;; Removes all productions that have a terminal
(possible-nullable
(lambda (prods)
(filter (lambda (prod)
(vector-andmap non-term? (prod-rhs prod)))
prods)))
;; set-nullables: production list -> production list
;; makes one pass through the productions, adding the ones
;; known to be nullable now to nullable and returning a list
;; of productions that we don't know about yet.
(set-nullables
(lambda (prods)
(cond
((null? prods) null)
((vector-ref nullable
(gram-sym-index (prod-lhs (car prods))))
(set-nullables (cdr prods)))
((vector-andmap (lambda (nt)
(vector-ref nullable (gram-sym-index nt)))
(prod-rhs (car prods)))
(vector-set! nullable
(gram-sym-index (prod-lhs (car prods)))
#t)
(set! added #t)
(set-nullables (cdr prods)))
(else
(cons (car prods)
(set-nullables (cdr prods))))))))
(let loop ((P (possible-nullable prods)))
(cond
((null? P) nullable)
(else
(set! added #f)
(let ((new-P (set-nullables P)))
(if added
(loop new-P)
nullable)))))))
;; ------------------------ Productions --------------------------- ;; ------------------------ Productions ---------------------------
;; production = (make-prod non-term (gram-sym vector) int prec syntax-object) ;; production = (make-prod non-term (gram-sym vector) int prec syntax-object)

@ -7,10 +7,13 @@
(require "yacc-helper.ss" (require "yacc-helper.ss"
"../private-lex/token-syntax.ss" "../private-lex/token-syntax.ss"
"grammar.ss" "grammar.ss"
(lib "list.ss") (lib "class.ss")
(lib "class.ss")) (lib "contracts.ss"))
(provide parse-input get-term-list)
(provide/contract
(parse-input ((listof syntax?) (listof syntax?) syntax? (or/f false? syntax?) syntax? syntax? any? . -> . (is-a?/c grammar%)))
(get-term-list (syntax? . -> . (listof syntax?))))
(define stx-for-original-property (read-syntax #f (open-input-string "original"))) (define stx-for-original-property (read-syntax #f (open-input-string "original")))
@ -44,54 +47,6 @@
`(,(datum->syntax-object act name b stx-for-original-property) `(,(datum->syntax-object act name b stx-for-original-property)
,@(get-args (add1 i) (cdr rhs))))))))))) ,@(get-args (add1 i) (cdr rhs)))))))))))
;; nullable: production list * int -> non-term set
;; determines which non-terminals can derive epsilon
(define (nullable prods num-nts)
(letrec ((nullable (make-vector num-nts #f))
(added #f)
;; possible-nullable: producion list -> production list
;; Removes all productions that have a terminal
(possible-nullable
(lambda (prods)
(filter (lambda (prod)
(vector-andmap non-term? (prod-rhs prod)))
prods)))
;; set-nullables: production list -> production list
;; makes one pass through the productions, adding the ones
;; known to be nullable now to nullable and returning a list
;; of productions that we don't know about yet.
(set-nullables
(lambda (prods)
(cond
((null? prods) null)
((vector-ref nullable
(gram-sym-index (prod-lhs (car prods))))
(set-nullables (cdr prods)))
((vector-andmap (lambda (nt)
(vector-ref nullable (gram-sym-index nt)))
(prod-rhs (car prods)))
(vector-set! nullable
(gram-sym-index (prod-lhs (car prods)))
#t)
(set! added #t)
(set-nullables (cdr prods)))
(else
(cons (car prods)
(set-nullables (cdr prods))))))))
(let loop ((P (possible-nullable prods)))
(cond
((null? P) nullable)
(else
(set! added #f)
(let ((new-P (set-nullables P)))
(if added
(loop new-P)
nullable)))))))
;; Given the list of terminal symbols and the precedence/associativity definitions, ;; Given the list of terminal symbols and the precedence/associativity definitions,
;; builds terminal structures (See grammar.ss) ;; builds terminal structures (See grammar.ss)
;; build-terms: symbol list * symbol list list -> term list ;; build-terms: symbol list * symbol list list -> term list
@ -115,16 +70,12 @@
(set! counter (add1 counter)))) (set! counter (add1 counter))))
precs) precs)
(set! counter 0)
;; Build the terminal structures ;; Build the terminal structures
(map (map
(lambda (term-sym) (lambda (term-sym)
(begin0
(make-term term-sym (make-term term-sym
counter #f
(hash-table-get prec-table term-sym (lambda () #f))) (hash-table-get prec-table term-sym (lambda () #f))))
(set! counter (add1 counter))))
term-list))) term-list)))
;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.ss) ;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.ss)
@ -158,11 +109,8 @@
"Token declaration must be (tokens symbol ...)" "Token declaration must be (tokens symbol ...)"
so)))) so))))
;; parse-input: syntax-object * syntax-object list * syntax-object^4 * boolean-> grammar
(define (parse-input start ends term-defs prec-decls prods runtime src-pos) (define (parse-input start ends term-defs prec-decls prods runtime src-pos)
(let* ((counter 0) (let* ((start-syms (map syntax-object->datum start))
(start-sym (syntax-object->datum start))
(list-of-terms (map syntax-object->datum (get-term-list term-defs))) (list-of-terms (map syntax-object->datum (get-term-list term-defs)))
@ -263,13 +211,8 @@
(terms (build-terms list-of-terms precs)) (terms (build-terms list-of-terms precs))
(non-terms (begin (non-terms (map (lambda (non-term) (make-non-term non-term #f))
(set! counter 2) list-of-non-terms))
(map (lambda (non-term)
(begin0
(make-non-term non-term counter)
(set! counter (add1 counter))))
list-of-non-terms)))
(term-table (make-hash-table)) (term-table (make-hash-table))
(non-term-table (make-hash-table))) (non-term-table (make-hash-table)))
@ -337,11 +280,10 @@
(eq? (syntax-object->datum a) (syntax-object->datum b))) (eq? (syntax-object->datum a) (syntax-object->datum b)))
((prod-rhs action) ((prod-rhs action)
(let ((p (parse-prod (syntax prod-rhs)))) (let ((p (parse-prod (syntax prod-rhs))))
(set! counter (add1 counter))
(make-prod (make-prod
nt nt
p p
counter #f
(let loop ((i (sub1 (vector-length p)))) (let loop ((i (sub1 (vector-length p))))
(if (>= i 0) (if (>= i 0)
(let ((gs (vector-ref p i))) (let ((gs (vector-ref p i)))
@ -353,11 +295,10 @@
((prod-rhs (prec term) action) ((prod-rhs (prec term) action)
(identifier? (syntax term)) (identifier? (syntax term))
(let ((p (parse-prod (syntax prod-rhs)))) (let ((p (parse-prod (syntax prod-rhs))))
(set! counter (add1 counter))
(make-prod (make-prod
nt nt
p p
counter #f
(term-prec (term-prec
(hash-table-get (hash-table-get
term-table term-table
@ -382,8 +323,7 @@
(syntax-case prods-so () (syntax-case prods-so ()
((nt productions ...) ((nt productions ...)
(> (length (syntax->list (syntax (productions ...)))) 0) (> (length (syntax->list (syntax (productions ...)))) 0)
(let ((nt (hash-table-get (let ((nt (hash-table-get non-term-table
non-term-table
(syntax-object->datum (syntax nt))))) (syntax-object->datum (syntax nt)))))
(map (lambda (p) (parse-prod+action nt p)) (map (lambda (p) (parse-prod+action nt p))
(syntax->list (syntax (productions ...)))))) (syntax->list (syntax (productions ...))))))
@ -393,55 +333,43 @@
"A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side" "A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side"
prods-so)))))) prods-so))))))
(if (not (memq start-sym list-of-non-terms)) (for-each
(lambda (sstx ssym)
(unless (memq ssym list-of-non-terms)
(raise-syntax-error (raise-syntax-error
'parser-start 'parser-start
(format "Start symbol ~a not defined as a non-terminal" (format "Start symbol ~a not defined as a non-terminal" ssym)
start-sym) sstx)))
start)) start start-syms)
(set! counter (length end-terms)) (let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
(let* ((start (make-non-term (gensym) 0)) (end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
(end-non-term (make-non-term (gensym) 1))
(parsed-prods (map parse-prods-for-nt (cdr (syntax->list prods)))) (parsed-prods (map parse-prods-for-nt (cdr (syntax->list prods))))
(counter2 0)
(prods (prods
`((,(make-prod start (vector end-non-term) 0 #f #f)) `(,@(map (lambda (start end-non-term)
,(map (list (make-prod start (vector end-non-term) #f #f #f)))
starts end-non-terms)
,@(map
(lambda (end-nt start-sym)
(map
(lambda (end) (lambda (end)
(set! counter2 (add1 counter2)) (make-prod end-nt
(make-prod end-non-term
(vector (vector
(hash-table-get non-term-table start-sym) (hash-table-get non-term-table start-sym)
(hash-table-get term-table end)) (hash-table-get term-table end))
counter2 #f
#f #f
(datum->syntax-object (datum->syntax-object
runtime runtime
`(lambda (x) x)))) `(lambda (x) x))))
end-terms) end-terms))
,@parsed-prods)) end-non-terms start-syms)
(nulls (nullable (apply append prods) ,@parsed-prods)))
(+ 2 (length non-terms)))))
;; (printf "nullable: {~a}~n~n"
;; (apply string-append
;; (let loop ((i 0))
;; (cond
;; ((>= i (vector-length nulls)) null)
;; ((vector-ref nulls i)
;; (cons
;; (format "~a "
;; (gram-sym-symbol
;; (list-ref (cons start (cons end-non-term non-terms)) i)))
;; (loop (add1 i))))
;; (else (loop (add1 i)))))))
(make-object grammar% (make-object grammar%
prods prods
terms terms
(cons start (cons end-non-term non-terms)) (append starts (append end-non-terms non-terms))
nulls
(map (lambda (term-name) (map (lambda (term-name)
(hash-table-get term-table term-name)) (hash-table-get term-table term-name))
end-terms))))))) end-terms)))))))

@ -4,9 +4,12 @@
(require "input-file-parser.ss" (require "input-file-parser.ss"
"grammar.ss" "grammar.ss"
"table.ss" "table.ss"
(lib "class.ss")) (lib "class.ss")
(lib "contracts.ss"))
(provide build-parser) (provide/contract
(build-parser ((string? any? any? syntax? (listof syntax?) (listof syntax?)
(or/f syntax? false?) syntax? syntax?) . ->* . (any? any? any? any?))))
(define (strip so) (define (strip so)
(syntax-local-introduce (syntax-local-introduce
@ -20,7 +23,7 @@
(syntax-case prods () (syntax-case prods ()
((_ (bind ((bound ...) x ...) ...) ...) ((_ (bind ((bound ...) x ...) ...) ...)
(let ((binds (syntax->list (syntax (bind ...)))) (let ((binds (syntax->list (syntax (bind ...))))
(bounds (cons start (bounds (append start
(apply (apply
append append
(map syntax->list (map syntax->list

@ -53,42 +53,43 @@
(if tokens (if tokens
(raise-syntax-error #f "Multiple tokens declarations" stx) (raise-syntax-error #f "Multiple tokens declarations" stx)
(set! tokens arg))) (set! tokens arg)))
((start symbol) ((start symbol ...)
(cond (let ((symbols (syntax->list (syntax (symbol ...)))))
((not (identifier? (syntax symbol))) (for-each
(raise-syntax-error (lambda (sym)
'parser-start (unless (identifier? sym)
"Start non-terminal must be a symbol" (raise-syntax-error 'parser-start
(syntax symbol))) "Start symbol must be a symbol"
(start sym)))
symbols)
(when start
(raise-syntax-error #f "Multiple start declarations" stx)) (raise-syntax-error #f "Multiple start declarations" stx))
(else (when (null? symbols)
(set! start (syntax symbol))))) (raise-syntax-error 'parser-start
"Missing start symbol"
stx))
(set! start symbols)))
((end symbols ...) ((end symbols ...)
(begin (let ((symbols (syntax->list (syntax (symbols ...)))))
(for-each (for-each
(lambda (sym) (lambda (sym)
(if (not (identifier? sym)) (unless (identifier? sym)
(raise-syntax-error (raise-syntax-error 'parser-end
'parser-end
"End token must be a symbol" "End token must be a symbol"
sym))) sym)))
(syntax->list (syntax (symbols ...)))) symbols)
(let ((d (duplicate-list? (syntax-object->datum (let ((d (duplicate-list? (map syntax-object->datum symbols))))
(syntax (symbols ...)))))) (when d
(if d (raise-syntax-error 'parser-end
(raise-syntax-error
'parser-end
(format "Duplicate end token definition for ~a" d) (format "Duplicate end token definition for ~a" d)
arg))) arg))
(if (= 0 (length (syntax->list (syntax (symbols ...))))) (when (null? symbols)
(raise-syntax-error (raise-syntax-error 'parser-end
'parser-end
"end declaration must contain at least 1 token" "end declaration must contain at least 1 token"
arg)) arg))
(if end (when end
(raise-syntax-error #f "Multiple end declarations" stx)) (raise-syntax-error #f "Multiple end declarations" stx))
(set! end (syntax->list (syntax (symbols ...)))))) (set! end symbols))))
((precs decls ...) ((precs decls ...)
(if precs (if precs
(raise-syntax-error #f "Multiple precs declarations" stx) (raise-syntax-error #f "Multiple precs declarations" stx)
@ -100,8 +101,7 @@
((yacc-output filename) ((yacc-output filename)
(cond (cond
((not (string? (syntax-object->datum (syntax filename)))) ((not (string? (syntax-object->datum (syntax filename))))
(raise-syntax-error (raise-syntax-error 'parser-yacc-output
'parser-yacc-output
"Yacc-output filename must be a string" "Yacc-output filename must be a string"
(syntax filename))) (syntax filename)))
(yacc-output (yacc-output
@ -110,15 +110,15 @@
(set! yacc-output (syntax-object->datum (syntax filename)))))) (set! yacc-output (syntax-object->datum (syntax filename))))))
(_ (raise-syntax-error 'parser-args "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" arg)))) (_ (raise-syntax-error 'parser-args "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" arg))))
(syntax->list (syntax (args ...)))) (syntax->list (syntax (args ...))))
(if (not tokens) (unless tokens
(raise-syntax-error #f "missing tokens declaration" stx)) (raise-syntax-error #f "missing tokens declaration" stx))
(if (not error) (unless error
(raise-syntax-error #f "missing error declaration" stx)) (raise-syntax-error #f "missing error declaration" stx))
(if (not grammar) (unless grammar
(raise-syntax-error #f "missing grammar declaration" stx)) (raise-syntax-error #f "missing grammar declaration" stx))
(if (not end) (unless end
(raise-syntax-error #f "missing end declaration" stx)) (raise-syntax-error #f "missing end declaration" stx))
(if (not start) (unless start
(raise-syntax-error #f "missing start declaration" stx)) (raise-syntax-error #f "missing start declaration" stx))
(let-values (((table term-sym->index actions check-syntax-fix) (let-values (((table term-sym->index actions check-syntax-fix)
(build-parser (if debug debug "") (build-parser (if debug debug "")
@ -130,7 +130,7 @@
precs precs
grammar grammar
stx))) stx)))
(if (and yacc-output (not (string=? yacc-output ""))) (when (and yacc-output (not (string=? yacc-output "")))
(with-handlers [(exn:i/o:filesystem? (with-handlers [(exn:i/o:filesystem?
(lambda (e) (lambda (e)
(fprintf (fprintf
@ -160,8 +160,7 @@
check-syntax-fix check-syntax-fix
(parser-body debug err (quote ends) table term-sym->index actions src-pos))))))) (parser-body debug err (quote ends) table term-sym->index actions src-pos)))))))
(_ (_
(raise-syntax-error (raise-syntax-error #f
#f
"parser must have the form (parser args ...)" "parser must have the form (parser args ...)"
stx)))) stx))))
@ -181,7 +180,7 @@
(define-struct stack-frame (state value start-pos end-pos) (make-inspector)) (define-struct stack-frame (state value start-pos end-pos) (make-inspector))
(define empty-stack (list (make-stack-frame 0 #f #f #f))) (define (make-empty-stack i) (list (make-stack-frame i #f #f #f)))
(define (false-thunk) #f) (define (false-thunk) #f)
@ -285,9 +284,11 @@
(err #f (token-name tok) (token-value tok) (cadr ip) (caddr ip)) (err #f (token-name tok) (token-value tok) (cadr ip) (caddr ip))
(err #f (token-name tok) (token-value tok))) (err #f (token-name tok) (token-value tok)))
(raise-read-error (format "parser: got token of unknown type ~a" (token-name tok)) (raise-read-error (format "parser: got token of unknown type ~a" (token-name tok))
#f #f #f #f #f))))))) #f #f #f #f #f))))))
(make-parser
(lambda (start-number)
(lambda (get-token) (lambda (get-token)
(let parsing-loop ((stack empty-stack) (let parsing-loop ((stack (make-empty-stack start-number))
(ip (get-token))) (ip (get-token)))
(let* ((tok (input->token ip)) (let* ((tok (input->token ip))
(action (find-action stack tok ip))) (action (find-action stack tok ip)))
@ -315,15 +316,18 @@
src-pos))) src-pos)))
(let* ((A (reduce-lhs-num action)) (let* ((A (reduce-lhs-num action))
(goto (array2d-ref table (stack-frame-state (car new-stack)) A))) (goto (array2d-ref table (stack-frame-state (car new-stack)) A)))
(parsing-loop (cons (parsing-loop
(cons
(if src-pos (if src-pos
(make-stack-frame goto (make-stack-frame
goto
(apply (vector-ref actions (reduce-prod-num action)) args) (apply (vector-ref actions (reduce-prod-num action)) args)
(if (null? args) (cadr ip) (cadr args)) (if (null? args) (cadr ip) (cadr args))
(if (null? args) (if (null? args)
(caddr ip) (caddr ip)
(list-ref args (- (* (reduce-rhs-length action) 3) 1)))) (list-ref args (- (* (reduce-rhs-length action) 3) 1))))
(make-stack-frame goto (make-stack-frame
goto
(apply (vector-ref actions (reduce-prod-num action)) args) (apply (vector-ref actions (reduce-prod-num action)) args)
#f #f
#f)) #f))
@ -336,8 +340,6 @@
(if src-pos (if src-pos
(err #t (token-name tok) (token-value tok) (cadr ip) (caddr ip)) (err #t (token-name tok) (token-value tok) (cadr ip) (caddr ip))
(err #t (token-name tok) (token-value tok))) (err #t (token-name tok) (token-value tok)))
(parsing-loop (fix-error stack tok ip get-token) (get-token))))))))) (parsing-loop (fix-error stack tok ip get-token) (get-token))))))))))
(make-parser 0)))
) )
Loading…
Cancel
Save