#lang racket/base (require (for-syntax racket/base yaragg/parser-tools/private-yacc/parser-builder yaragg/parser-tools/private-yacc/grammar yaragg/parser-tools/private-yacc/yacc-helper yaragg/parser-tools/private-yacc/parser-actions) yaragg/parser-tools/private-lex/token yaragg/parser-tools/private-yacc/parser-actions racket/local racket/pretty syntax/readerr) (provide parser) ;; convert-parse-table : (vectorof (listof (cons/c gram-sym? action?))) -> ;; (vectorof (symbol runtime-action hashtable)) (define-for-syntax (convert-parse-table table) (for/vector ([state-entry (in-vector table)]) (let ([ht (make-hasheq)]) (for ([gs/action (in-list state-entry)]) (hash-set! ht (gram-sym-symbol (car gs/action)) (action->runtime-action (cdr gs/action)))) ht))) (define-syntax (parser stx) (syntax-case stx () [(_ ARGS ...) (let ([arg-list (syntax->list #'(ARGS ...))] [src-pos #f] [debug #f] [error #f] [tokens #f] [start #f] [end #f] [precs #f] [suppress #f] [grammar #f]) (for ([arg (in-list (syntax->list #'(ARGS ...)))]) (syntax-case* arg (debug error tokens start end precs grammar suppress src-pos) (λ (a b) (eq? (syntax-e a) (syntax-e b))) [(debug FILENAME) (cond [(not (string? (syntax-e #'FILENAME))) (raise-syntax-error #f "Debugging filename must be a string" stx #'FILENAME)] [debug (raise-syntax-error #f "Multiple debug declarations" stx)] [else (set! debug (syntax-e #'FILENAME))])] [(suppress) (set! suppress #t)] [(src-pos) (set! src-pos #t)] [(error EXPRESSION) (if error (raise-syntax-error #f "Multiple error declarations" stx) (set! error #'EXPRESSION))] [(tokens DEF ...) (begin (when tokens (raise-syntax-error #f "Multiple tokens declarations" stx)) (let ((defs (syntax->list #'(DEF ...)))) (for ([d (in-list defs)] #:unless (identifier? d)) (raise-syntax-error #f "Token-group name must be an identifier" stx d)) (set! tokens defs)))] [(start symbol ...) (let ([symbols (syntax->list #'(symbol ...))]) (for ([sym (in-list symbols)] #:unless (identifier? sym)) (raise-syntax-error #f "Start symbol must be a symbol" stx sym)) (when start (raise-syntax-error #f "Multiple start declarations" stx)) (when (null? symbols) (raise-syntax-error #f "Missing start symbol" stx arg)) (set! start symbols))] [(end SYMBOLS ...) (let ((symbols (syntax->list #'(SYMBOLS ...)))) (for ([sym (in-list symbols)] #:unless (identifier? sym)) (raise-syntax-error #f "End token must be a symbol" stx sym)) (define d (duplicate-list? (map syntax-e symbols))) (when d (raise-syntax-error #f (format "Duplicate end token definition for ~a" d) stx arg)) (when (null? symbols) (raise-syntax-error #f "end declaration must contain at least 1 token" stx arg)) (when end (raise-syntax-error #f "Multiple end declarations" stx)) (set! end symbols))] [(precs DECLS ...) (if precs (raise-syntax-error #f "Multiple precs declarations" stx) (set! precs (syntax/loc arg (DECLS ...))))] [(grammar PRODS ...) (if grammar (raise-syntax-error #f "Multiple grammar declarations" stx) (set! grammar (syntax/loc arg (PRODS ...))))] [_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)])) (unless tokens (raise-syntax-error #f "missing tokens declaration" stx)) (unless error (raise-syntax-error #f "missing error declaration" stx)) (unless grammar (raise-syntax-error #f "missing grammar declaration" stx)) (unless end (raise-syntax-error #f "missing end declaration" stx)) (unless start (raise-syntax-error #f "missing start declaration" stx)) (define-values (table all-term-syms actions check-syntax-fix) (build-parser (if debug debug "") src-pos suppress tokens start end precs grammar)) (with-syntax ([check-syntax-fix check-syntax-fix] [err error] [ends end] [starts start] [debug debug] [table (convert-parse-table table)] [all-term-syms all-term-syms] [actions actions] [src-pos src-pos]) #'(begin check-syntax-fix (parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos))))] [_ (raise-syntax-error #f "parser must have the form (parser args ...)" stx)])) (define (reduce-stack stack num ret-vals src-pos) (cond [(positive? num) (define top-frame (car stack)) (let ([ret-vals (if src-pos (cons (stack-frame-value top-frame) (cons (stack-frame-start-pos top-frame) (cons (stack-frame-end-pos top-frame) ret-vals))) (cons (stack-frame-value top-frame) ret-vals))]) (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))] [else (values stack ret-vals)])) ;; extract-helper : (symbol or make-token) any any -> symbol any any any (define (extract-helper tok v1 v2) (cond [(symbol? tok) (values tok #f v1 v2)] [(token? tok) (values (real-token-name tok) (real-token-value tok) v1 v2)] [else (raise-argument-error 'parser "(or/c symbol? token?)" 0 tok)])) ;; well-formed-position-token?: any -> boolean ;; Returns true if pt is a position token whose position-token-token ;; is itself a token or a symbol. ;; This is meant to help raise more precise error messages when ;; a tokenizer produces an erroneous position-token wrapped twice. ;; (as often happens when omitting return-without-pos). (define (well-formed-token-field? t) (or (symbol? t) (token? t))) (define (well-formed-position-token? pt) (and (position-token? pt) (well-formed-token-field? (position-token-token pt)))) (define (well-formed-srcloc-token? st) (and (srcloc-token? st) (well-formed-token-field? (srcloc-token-token st)))) ;; extract-src-pos : position-token -> symbol any any any (define (extract-src-pos ip) (unless (well-formed-position-token? ip) (raise-argument-error 'parser "well-formed-position-token?" 0 ip)) (extract-helper (position-token-token ip) (position-token-start-pos ip) (position-token-end-pos ip))) (define (extract-srcloc ip) (unless (well-formed-srcloc-token? ip) (raise-argument-error 'parser "well-formed-srcloc-token?" 0 ip)) (define loc (srcloc-token-srcloc ip)) (extract-helper (srcloc-token-token ip) (position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc)) (position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f))) ;; extract-no-src-pos : (symbol or make-token) -> symbol any any any (define (extract-no-src-pos ip) (extract-helper ip #f #f)) (struct stack-frame (state value start-pos end-pos) #:transparent) (define (make-empty-stack i) (list (stack-frame i #f #f #f))) ;; The table is a vector that maps each state to a hash-table that maps a ;; terminal symbol to either an accept, shift, reduce, or goto structure. ; We encode the structures according to the runtime-action data definition in ;; parser-actions.rkt (define (parser-body debug? err starts ends table all-term-syms actions src-pos) (local ((define extract (if src-pos extract-src-pos extract-no-src-pos)) (define (fix-error stack tok val start-pos end-pos get-token) (when debug? (pretty-print stack)) (local ((define (remove-input tok val start-pos end-pos) (if (memq tok ends) (raise-read-error "parser: Cannot continue after error" #f #f #f #f #f) (let ([a (find-action stack tok val start-pos end-pos)]) (cond [(runtime-shift? a) ;; (printf "shift:~a\n" (runtime-shift-state a)) (cons (stack-frame (runtime-shift-state a) val start-pos end-pos) stack)] [else ;; (printf "discard input:~a\n" tok) (call-with-values (λ () (extract (get-token))) remove-input)]))))) (let remove-states () (define a (find-action stack 'error #f start-pos end-pos)) (cond [(runtime-shift? a) ;; (printf "shift:~a\n" (runtime-shift-state a)) (set! stack (cons (stack-frame (runtime-shift-state a) #f start-pos end-pos) stack)) (remove-input tok val start-pos end-pos)] [else ;; (printf "discard state:~a\n" (car stack)) (cond [(< (length stack) 2) (raise-read-error "parser: Cannot continue after error" #f #f #f #f #f)] [else (set! stack (cdr stack)) (remove-states)])])))) (define (find-action stack tok val start-pos end-pos) (unless (hash-ref all-term-syms tok #f) (if src-pos (err #f tok val start-pos end-pos) (err #f tok val)) (raise-read-error (format "parser: got token of unknown type ~a" tok) #f #f #f #f #f)) (hash-ref (vector-ref table (stack-frame-state (car stack))) tok #f)) (define ((make-parser start-number) get-token) (unless (and (procedure? get-token) (procedure-arity-includes? get-token 0)) (error 'get-token "expected a nullary procedure, got ~e" get-token)) (let parsing-loop ([stack (make-empty-stack start-number)] [ip (get-token)]) (define-values (tok val start-pos end-pos) (extract ip)) (define action (find-action stack tok val start-pos end-pos)) (cond [(runtime-shift? action) ;; (printf "shift:~a\n" (runtime-shift-state action)) (parsing-loop (cons (stack-frame (runtime-shift-state action) val start-pos end-pos) stack) (get-token))] [(runtime-reduce? action) ;; (printf "reduce:~a\n" (runtime-reduce-prod-num action)) (let-values ([(new-stack args) (reduce-stack stack (runtime-reduce-rhs-length action) '() src-pos)]) (define goto (runtime-goto-state (hash-ref (vector-ref table (stack-frame-state (car new-stack))) (runtime-reduce-lhs action)))) (parsing-loop (cons (if src-pos (stack-frame goto (apply (vector-ref actions (runtime-reduce-prod-num action)) args) (if (null? args) start-pos (cadr args)) (if (null? args) end-pos (list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1)))) (stack-frame goto (apply (vector-ref actions (runtime-reduce-prod-num action)) args) #f #f)) new-stack) ip))] [(runtime-accept? action) ;; (printf "accept\n") (stack-frame-value (car stack))] [else (if src-pos (err #t tok val start-pos end-pos) (err #t tok val)) (parsing-loop (fix-error stack tok val start-pos end-pos get-token) (get-token))])))) (cond [(null? (cdr starts)) (make-parser 0)] [else (for/list ([(l i) (in-indexed starts)]) (make-parser i))])))