You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc.rkt

335 lines
16 KiB
Racket

#lang racket/base
(require (for-syntax racket/base
"private-yacc/parser-builder.rkt"
"private-yacc/grammar.rkt"
"private-yacc/yacc-helper.rkt"
"private-yacc/parser-actions.rkt")
"private-lex/token.rkt"
"private-yacc/parser-actions.rkt"
racket/local
racket/pretty
syntax/readerr)
(provide parser)
;; convert-parse-table : (vectorof (listof (cons/c gram-sym? action?))) ->
;; (vectorof (symbol runtime-action hashtable))
(define-for-syntax (convert-parse-table table)
(for/vector ([state-entry (in-vector table)])
(let ([ht (make-hasheq)])
(for ([gs/action (in-list state-entry)])
(hash-set! ht
(gram-sym-symbol (car gs/action))
(action->runtime-action (cdr gs/action))))
ht)))
(define-syntax (parser stx)
(syntax-case stx ()
[(_ ARGS ...)
(let ([arg-list (syntax->list #'(ARGS ...))]
[src-pos #f]
[debug #f]
[error #f]
[tokens #f]
[start #f]
[end #f]
[precs #f]
[suppress #f]
[grammar #f]
[yacc-output #f])
(for ([arg (in-list (syntax->list #'(ARGS ...)))])
(syntax-case* arg (debug error tokens start end precs grammar
suppress src-pos yacc-output)
(λ (a b) (eq? (syntax-e a) (syntax-e b)))
[(debug FILENAME)
(cond
[(not (string? (syntax-e #'FILENAME)))
(raise-syntax-error #f "Debugging filename must be a string" stx #'FILENAME)]
[debug (raise-syntax-error #f "Multiple debug declarations" stx)]
[else (set! debug (syntax-e #'FILENAME))])]
[(suppress) (set! suppress #t)]
[(src-pos) (set! src-pos #t)]
[(error EXPRESSION)
(if error
(raise-syntax-error #f "Multiple error declarations" stx)
(set! error #'EXPRESSION))]
[(tokens DEF ...)
(begin
(when tokens
(raise-syntax-error #f "Multiple tokens declarations" stx))
(let ((defs (syntax->list #'(DEF ...))))
(for ([d (in-list defs)]
#:unless (identifier? d))
(raise-syntax-error #f "Token-group name must be an identifier" stx d))
(set! tokens defs)))]
[(start symbol ...)
(let ([symbols (syntax->list #'(symbol ...))])
(for ([sym (in-list symbols)]
#:unless (identifier? sym))
(raise-syntax-error #f "Start symbol must be a symbol" stx sym))
(when start
(raise-syntax-error #f "Multiple start declarations" stx))
(when (null? symbols)
(raise-syntax-error #f "Missing start symbol" stx arg))
(set! start symbols))]
[(end SYMBOLS ...)
(let ((symbols (syntax->list #'(SYMBOLS ...))))
(for ([sym (in-list symbols)]
#:unless (identifier? sym))
(raise-syntax-error #f "End token must be a symbol" stx sym))
(let ([d (duplicate-list? (map syntax-e symbols))])
(when d
(raise-syntax-error #f (format "Duplicate end token definition for ~a" d) stx arg))
(when (null? symbols)
(raise-syntax-error #f "end declaration must contain at least 1 token" stx arg))
(when end
(raise-syntax-error #f "Multiple end declarations" stx))
(set! end symbols)))]
[(precs DECLS ...)
(if precs
(raise-syntax-error #f "Multiple precs declarations" stx)
(set! precs (syntax/loc arg (DECLS ...))))]
[(grammar PRODS ...)
(if grammar
(raise-syntax-error #f "Multiple grammar declarations" stx)
(set! grammar (syntax/loc arg (PRODS ...))))]
[(yacc-output FILENAME)
(cond
[(not (string? (syntax-e #'FILENAME)))
(raise-syntax-error #f "Yacc-output filename must be a string" stx #'FILENAME)]
[yacc-output
(raise-syntax-error #f "Multiple yacc-output declarations" stx)]
[else
(set! yacc-output (syntax-e #'FILENAME))])]
[_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)]))
(unless tokens
(raise-syntax-error #f "missing tokens declaration" stx))
(unless error
(raise-syntax-error #f "missing error declaration" stx))
(unless grammar
(raise-syntax-error #f "missing grammar declaration" stx))
(unless end
(raise-syntax-error #f "missing end declaration" stx))
(unless start
(raise-syntax-error #f "missing start declaration" stx))
(let-values ([(table all-term-syms actions check-syntax-fix)
(build-parser (if debug debug "")
src-pos
suppress
tokens
start
end
precs
grammar)])
(when (and yacc-output (not (string=? yacc-output "")))
(with-handlers [(exn:fail:filesystem?
(λ (e) (eprintf "Cannot write yacc-output to file \"~a\"\n" yacc-output)))]
(call-with-output-file yacc-output
(λ (port)
(display-yacc (syntax->datum grammar)
tokens
(map syntax->datum start)
(and precs (syntax->datum precs))
port))
#:exists 'truncate)))
(with-syntax ([check-syntax-fix check-syntax-fix]
[err error]
[ends end]
[starts start]
[debug debug]
[table (convert-parse-table table)]
[all-term-syms all-term-syms]
[actions actions]
[src-pos src-pos])
#'(begin
check-syntax-fix
(parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos)))))]
[_ (raise-syntax-error #f "parser must have the form (parser args ...)" stx)]))
(define (reduce-stack stack num ret-vals src-pos)
(cond
[(positive? num)
(define top-frame (car stack))
(let ([ret-vals (if src-pos
(cons (stack-frame-value top-frame)
(cons (stack-frame-start-pos top-frame)
(cons (stack-frame-end-pos top-frame)
ret-vals)))
(cons (stack-frame-value top-frame) ret-vals))])
(reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))]
[else (values stack ret-vals)]))
;; extract-helper : (symbol or make-token) any any -> symbol any any any
(define (extract-helper tok v1 v2)
(cond
[(symbol? tok) (values tok #f v1 v2)]
[(token? tok) (values (real-token-name tok) (real-token-value tok) v1 v2)]
[else (raise-argument-error 'parser "(or/c symbol? token?)" 0 tok)]))
;; well-formed-position-token?: any -> boolean
;; Returns true if pt is a position token whose position-token-token
;; is itself a token or a symbol.
;; This is meant to help raise more precise error messages when
;; a tokenizer produces an erroneous position-token wrapped twice.
;; (as often happens when omitting return-without-pos).
(define (well-formed-token-field? t)
(or (symbol? t) (token? t)))
(define (well-formed-position-token? pt)
(and (position-token? pt)
(well-formed-token-field? (position-token-token pt))))
(define (well-formed-srcloc-token? st)
(and (srcloc-token? st)
(well-formed-token-field? (srcloc-token-token st))))
;; extract-src-pos : position-token -> symbol any any any
(define (extract-src-pos ip)
(unless (well-formed-position-token? ip)
(raise-argument-error 'parser "well-formed-position-token?" 0 ip))
(extract-helper (position-token-token ip)
(position-token-start-pos ip)
(position-token-end-pos ip)))
(define (extract-srcloc ip)
(unless (well-formed-srcloc-token? ip)
(raise-argument-error 'parser "well-formed-srcloc-token?" 0 ip))
(define loc (srcloc-token-srcloc ip))
(extract-helper (srcloc-token-token ip)
(position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc))
(position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f)))
;; extract-no-src-pos : (symbol or make-token) -> symbol any any any
(define (extract-no-src-pos ip)
(extract-helper ip #f #f))
(define-struct stack-frame (state value start-pos end-pos) #:inspector (make-inspector))
(define (make-empty-stack i) (list (make-stack-frame i #f #f #f)))
;; The table is a vector that maps each state to a hash-table that maps a
;; terminal symbol to either an accept, shift, reduce, or goto structure.
; We encode the structures according to the runtime-action data definition in
;; parser-actions.rkt
(define (parser-body debug? err starts ends table all-term-syms actions src-pos)
(local ((define extract
(if src-pos
extract-src-pos
extract-no-src-pos))
(define (fix-error stack tok val start-pos end-pos get-token)
(when debug? (pretty-print stack))
(local ((define (remove-input tok val start-pos end-pos)
(if (memq tok ends)
(raise-read-error "parser: Cannot continue after error"
#f #f #f #f #f)
(let ([a (find-action stack tok val start-pos end-pos)])
(cond
[(runtime-shift? a)
;; (printf "shift:~a\n" (runtime-shift-state a))
(cons (make-stack-frame (runtime-shift-state a)
val
start-pos
end-pos)
stack)]
[else
;; (printf "discard input:~a\n" tok)
(let-values ([(tok val start-pos end-pos)
(extract (get-token))])
(remove-input tok val start-pos end-pos))])))))
(let remove-states ()
(let ([a (find-action stack 'error #f start-pos end-pos)])
(cond
[(runtime-shift? a)
;; (printf "shift:~a\n" (runtime-shift-state a))
(set! stack
(cons
(make-stack-frame (runtime-shift-state a)
#f
start-pos
end-pos)
stack))
(remove-input tok val start-pos end-pos)]
[else
;; (printf "discard state:~a\n" (car stack))
(cond
[(< (length stack) 2)
(raise-read-error "parser: Cannot continue after error"
#f #f #f #f #f)]
[else
(set! stack (cdr stack))
(remove-states)])])))))
(define (find-action stack tok val start-pos end-pos)
(unless (hash-ref all-term-syms tok #f)
(if src-pos
(err #f tok val start-pos end-pos)
(err #f tok val))
(raise-read-error (format "parser: got token of unknown type ~a" tok)
#f #f #f #f #f))
(hash-ref (vector-ref table (stack-frame-state (car stack))) tok #f))
(define ((make-parser start-number) get-token)
(unless (and (procedure? get-token)
(procedure-arity-includes? get-token 0))
(error 'get-token "expected a nullary procedure, got ~e" get-token))
(let parsing-loop ([stack (make-empty-stack start-number)]
[ip (get-token)])
(let-values ([(tok val start-pos end-pos) (extract ip)])
(let ([action (find-action stack tok val start-pos end-pos)])
(cond
[(runtime-shift? action)
;; (printf "shift:~a\n" (runtime-shift-state action))
(parsing-loop (cons (make-stack-frame (runtime-shift-state action)
val
start-pos
end-pos)
stack)
(get-token))]
[(runtime-reduce? action)
;; (printf "reduce:~a\n" (runtime-reduce-prod-num action))
(let-values ([(new-stack args)
(reduce-stack stack
(runtime-reduce-rhs-length action)
null
src-pos)])
(let ([goto
(runtime-goto-state
(hash-ref
(vector-ref table (stack-frame-state (car new-stack)))
(runtime-reduce-lhs action)))])
(parsing-loop
(cons
(if src-pos
(make-stack-frame
goto
(apply (vector-ref actions (runtime-reduce-prod-num action)) args)
(if (null? args) start-pos (cadr args))
(if (null? args)
end-pos
(list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1))))
(make-stack-frame
goto
(apply (vector-ref actions (runtime-reduce-prod-num action)) args)
#f
#f))
new-stack)
ip)))]
[(runtime-accept? action)
;; (printf "accept\n")
(stack-frame-value (car stack))]
[else
(if src-pos
(err #t tok val start-pos end-pos)
(err #t tok val))
(parsing-loop (fix-error stack tok val start-pos end-pos get-token)
(get-token))]))))))
(cond
[(null? (cdr starts)) (make-parser 0)]
[else
(for/list ([(l i) (in-indexed starts)])
(make-parser i))])))