#lang racket/base (require (for-template racket/base) racket/list racket/syntax syntax/srcloc brag/rules/stx-types "flatten.rkt" syntax/id-table (prefix-in sat: "satisfaction.rkt") (prefix-in support: brag/support) (prefix-in stxparse: syntax/parse)) (provide rules-codegen) ;; Generates the body of the module. ;; FIXME: abstract this so we can just call (rules ...) without ;; generating the whole module body. (define (rules-codegen rules-stx #:parser-provider-module [parser-provider-module 'br-parser-tools/cfg-parser] #:parser-provider-form [parser-provider-form 'cfg-parser]) (syntax-case rules-stx () [(_) (raise-syntax-error 'brag (format "The grammar does not appear to have any rules") rules-stx)] [(_ . RULES) (let ([rules (syntax->list #'RULES)]) ;; (listof stx) (check-all-rules-defined! rules) (check-all-rules-no-duplicates! rules) (check-all-rules-satisfiable! rules) ;; We flatten the rules so we can use ;; the yacc-style ruleset that br-parser-tools supports. (define generated-rule-codes (map flat-rule->yacc-rule (flatten-rules rules))) (define rule-ids (map rule-id rules)) (define token-types ;; (listof symbol) (let-values ([(implicit-tokens explicit-tokens) (rules-collect-token-types rules)]) (remove-duplicates (append (map string->symbol (map syntax-e implicit-tokens)) (map syntax-e explicit-tokens)) eq?))) (define (rules-stx-id sym) (datum->syntax rules-stx sym)) (with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule. [(TOKEN-TYPE ...) token-types] [(TOKEN-TYPE-CONSTRUCTOR ...) (for/list ([tt (in-list token-types)]) (string->symbol (format "token-~a" tt)))] [GENERATED-GRAMMAR `(grammar ,@generated-rule-codes)] [PARSER-MODULE parser-provider-module] [PARSER-FORM parser-provider-form] [PARSE (rules-stx-id 'parse)] [PARSE-TO-DATUM (rules-stx-id 'parse-to-datum)] [PARSE-TREE (rules-stx-id 'parse-tree)] [MAKE-RULE-PARSER (rules-stx-id 'make-rule-parser)] [ALL-TOKEN-TYPES (rules-stx-id 'all-token-types)] [TOKEN (rules-stx-id 'token)] [APPLY-LEXER (rules-stx-id 'apply-lexer)] [APPLY-TOKENIZER-MAKER (rules-stx-id 'apply-tokenizer-maker)]) ;; this stx object represents the top level of a #lang brag module. ;; so any `define`s are automatically available at the repl. ;; and only identifiers explicitly `provide`d are visible on import. (quasisyntax/loc rules-stx (begin (require br-parser-tools/lex PARSER-MODULE brag/codegen/runtime brag/support brag/private/internal-support racket/set (for-syntax racket/base)) (provide PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES) ;; helpers from brag/support (define TOKEN token) (define APPLY-LEXER apply-lexer) (define APPLY-TOKENIZER-MAKER apply-tokenizer-maker) (define-tokens enumerated-tokens (TOKEN-TYPE ...)) ;; all-token-types lists all the tokens (except for EOF) (define ALL-TOKEN-TYPES (set-remove (set 'TOKEN-TYPE ...) 'EOF)) ;; For internal use by the permissive tokenizer only: (define all-tokens-hash/mutable (make-hash (list ;; Note: we also allow the eof object here, to make ;; the permissive tokenizer even nicer to work with. (cons eof token-EOF) (cons 'TOKEN-TYPE TOKEN-TYPE-CONSTRUCTOR) ...))) (define-syntax (MAKE-RULE-PARSER rule-id-stx) (syntax-case rule-id-stx () [(_ start-rule) (and (identifier? #'start-rule) (member (syntax-e #'start-rule) '#,(map syntax-e rule-ids))) ;; HACK HACK HACK ;; The cfg-parser depends on the start-rule provided in (start ...) to have the same ;; context as the rest of this body, so I need to hack this. I don't like this, but ;; I don't know what else to do. Hence recolored-start-rule. (with-syntax ([RECOLORED-START-RULE (datum->syntax #'#,rules-stx (syntax-e #'start-rule))]) #'(let ([THE-GRAMMAR (PARSER-FORM (tokens enumerated-tokens) (src-pos) (start RECOLORED-START-RULE) (end EOF) (error THE-ERROR-HANDLER) GENERATED-GRAMMAR)]) (procedure-rename (case-lambda [(tokenizer) (define next-token (make-permissive-tokenizer tokenizer all-tokens-hash/mutable)) (THE-GRAMMAR next-token)] [(source tokenizer) (parameterize ([current-source source]) (PARSE tokenizer))]) (string->symbol (format "~a-rule-parser" 'start-rule)))))] [(_ not-a-rule-id) (raise-syntax-error #f (format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id)) rule-id-stx)])) (define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE)) (define (PARSE-TO-DATUM x) (let loop ([x (syntax->datum (PARSE x))]) (cond [(list? x) (map loop x)] [else x]))) (define PARSE-TREE PARSE-TO-DATUM)))))])) ;; Given a flattened rule, returns a syntax for the code that ;; preserves as much source location as possible. ;; ;; Each rule is defined to return a list with the following structure: ;; ;; stx :== (name (U tokens rule-stx) ...) ;; (define (flat-rule->yacc-rule a-flat-rule) (syntax-case a-flat-rule () [(rule-type origin name . clauses) (with-syntax ([translated-clauses (for/list ([clause-stx (in-list (syntax->list #'clauses))]) (translate-clause clause-stx #'name #'origin))]) #'[name . translated-clauses])])) ;; translates a single primitive rule clause. ;; A clause is a simple list of ids, lit, vals, and inferred-id elements. ;; The action taken depends on the pattern type. (define (translate-clause a-clause rule-name/false origin) (define translated-patterns (let loop ([primitive-patterns (syntax->list a-clause)]) (cond [(empty? primitive-patterns) null] [else (cons (syntax-case (first primitive-patterns) (id lit token inferred-id) [(id val) #'val] [(lit val) (datum->syntax #f (string->symbol (syntax-e #'val)) #'val)] [(token val) #'val] [(inferred-id val reason) #'val]) (loop (rest primitive-patterns)))]))) (define translated-actions (for/list ([translated-pattern (in-list translated-patterns)] [primitive-pattern (in-list (syntax->list a-clause))] [pos (in-naturals 1)]) (if (eq? (syntax-property primitive-pattern 'hide) 'hide) #'null (with-syntax ([$X (format-id translated-pattern "$~a" pos)] [$X-start-pos (format-id translated-pattern "$~a-start-pos" pos)] [$X-end-pos (format-id translated-pattern "$~a-end-pos" pos)]) (syntax-case primitive-pattern (id lit token inferred-id) ;; When a rule usage is inferred, the value of $X is a syntax object ;; whose head is the name of the inferred rule. We strip that out, ;; leaving the residue to be absorbed. [(inferred-id val reason) #'(syntax-case $X () [(inferred-rule-name . rest) (syntax->list #'rest)])] [(id val) ;; at this point, the 'hide property is either #f or "splice" ;; ('hide value is handled at the top of this conditional) ;; we need to use boolean because a symbol is treated as an identifier. ;; also we'll separate it into its own property for clarity and test for it in "runtime.rkt" #`(list (syntax-property $X 'splice-rh-id #,(and (syntax-property primitive-pattern 'hide) #t)))] [(lit val) #'(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))] [(token val) #'(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))]))))) (define whole-rule-loc (if (empty? translated-patterns) #'(list (current-source) #f #f #f #f) (with-syntax ([$1-start-pos (datum->syntax (first translated-patterns) '$1-start-pos)] [$n-end-pos (format-id (last translated-patterns) "$~a-end-pos" (length translated-patterns))]) #`(positions->srcloc $1-start-pos $n-end-pos)))) ;; move 'hide-or-splice-lhs-id property into function because name is datum-ized (with-syntax ([(translated-pattern ...) translated-patterns] [(translated-action ...) translated-actions]) #`[(translated-pattern ...) (rule-components->syntax '#,rule-name/false translated-action ... #:srcloc #,whole-rule-loc #:hide-or-splice? #,(syntax-property rule-name/false 'hide-or-splice-lhs-id))])) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; collect-token-types: (listof rule-syntax) -> (values (listof identifier) (listof identifier)) ;; ;; Given a rule, automatically derive the list of implicit and ;; explicit token types we need to generate. ;; ;; Note: EOF is reserved, and will always be included in the list ;; of explicit token types, though the user is not allow to express it themselves. (define (rules-collect-token-types rules) (define-values (implicit explicit) (for/fold ([implicit null] [explicit (list (datum->syntax (first rules) 'EOF))]) ([a-rule (in-list rules)]) (syntax-case a-rule (rule) [(rule _ a-pattern) (let loop ([a-pattern #'a-pattern] [implicit implicit] [explicit explicit]) (syntax-case a-pattern (id lit token choice repeat maybe seq EOF) [(id val) (values implicit explicit)] [(lit val) (values (cons #'val implicit) explicit)] [(token EOF) (raise-syntax-error #f "Token EOF is reserved and can not be used in a grammar" #'val)] [(token val) (values implicit (cons #'val explicit))] [(choice . vals) (for/fold ([implicit implicit] [explicit explicit]) ([v (in-list (syntax->list #'vals))]) (loop v implicit explicit))] [(repeat min max val) (loop #'val implicit explicit)] [(maybe val) (loop #'val implicit explicit)] [(seq . vals) (for/fold ([implicit implicit] [explicit explicit]) ([v (in-list (syntax->list #'vals))]) (loop v implicit explicit))]))]))) (values (reverse implicit) (reverse explicit))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; rule-id: rule -> identifier-stx ;; Get the binding id of a rule. (define (rule-id a-rule) (syntax-case a-rule (rule) [(rule id a-pattern) #'id])) (define (rule-pattern a-rule) (syntax-case a-rule (rule) [(rule id a-pattern) #'a-pattern])) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; check-all-rules-defined!: (listof rule-stx) -> void (define (check-all-rules-defined! rules) (define table (make-free-id-table)) ;; Pass one: collect all the defined rule names. (for ([a-rule (in-list rules)]) (free-id-table-set! table (rule-id a-rule) #t)) ;; Pass two: check each referenced id, and make sure it's been defined. (for* ([a-rule (in-list rules)] [referenced-id (in-list (rule-collect-used-ids a-rule))] #:unless (free-id-table-ref table referenced-id (λ () #f))) (raise-syntax-error #f (format "Rule ~a has no definition" (syntax-e referenced-id)) referenced-id))) ;; check-all-rules-no-duplicates!: (listof rule-stx) -> void (define (check-all-rules-no-duplicates! rules) (define table (make-free-id-table)) ;; Pass one: collect all the defined rule names. (for ([a-rule (in-list rules)]) (define maybe-other-rule-id (free-id-table-ref table (rule-id a-rule) (λ () #f))) (when maybe-other-rule-id (raise-syntax-error #f (format "Rule ~a has a duplicate definition" (syntax-e (rule-id a-rule))) (rule-id a-rule) #f (list (rule-id a-rule) maybe-other-rule-id))) (free-id-table-set! table (rule-id a-rule) (rule-id a-rule)))) ;; rule-collect-used-ids: rule-stx -> (listof identifier) ;; Given a rule, extracts a list of identifiers (define (rule-collect-used-ids a-rule) (syntax-case a-rule (rule) [(rule id a-pattern) (pattern-collect-used-ids #'a-pattern null)])) ;; pattern-collect-used-ids: pattern-stx (listof identifier) -> (listof identifier) ;; Returns a flat list of rule identifiers referenced in the pattern. (define (pattern-collect-used-ids a-pattern acc) (let loop ([a-pattern a-pattern] [acc acc]) (syntax-case a-pattern (id lit token choice repeat maybe seq) [(id val) (cons #'val acc)] [(lit val) acc] [(token val) acc] [(choice . vals) (for/fold ([acc acc]) ([v (in-list (syntax->list #'vals))]) (loop v acc))] [(repeat min max val) (loop #'val acc)] [(maybe val) (loop #'val acc)] [(seq . vals) (for/fold ([acc acc]) ([v (in-list (syntax->list #'vals))]) (loop v acc))]))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; check-all-rules-satisfiable: (listof rule-stx) -> void ;; Does a simple graph traversal / topological sort-like thing to make sure that, for ;; any rule, there's some finite sequence of tokens that ;; satisfies it. If this is not the case, then something horrible ;; has happened, and we need to tell the user about it. ;; ;; NOTE: Assumes all referenced rules have definitions. (define (check-all-rules-satisfiable! rules) (define toplevel-rule-table (make-free-id-table (for/list ([a-rule (in-list rules)]) (cons (rule-id a-rule) (sat:make-and))))) (define leaves null) (define (make-leaf) (define a-leaf (sat:make-and)) (set! leaves (cons a-leaf leaves)) a-leaf) (define (process-pattern a-pattern) (syntax-case a-pattern (id lit token choice repeat maybe seq) [(id val) (free-id-table-ref toplevel-rule-table #'val)] [(lit val) (make-leaf)] [(token val) (make-leaf)] [(choice . vals) (let ([an-or-node (sat:make-or)]) (for* ([v (in-list (syntax->list #'vals))] [a-child (in-value (process-pattern v))]) (sat:add-child! an-or-node a-child)) an-or-node)] [(repeat min max val) (syntax-case #'min () [0 (make-leaf)] [_ (process-pattern #'val)])] [(maybe val) (make-leaf)] [(seq . vals) (let ([an-and-node (sat:make-and)]) (for* ([v (in-list (syntax->list #'vals))] [a-child (in-value (process-pattern v))]) (sat:add-child! an-and-node a-child)) an-and-node)])) (for* ([a-rule (in-list rules)] [rule-node (in-value (free-id-table-ref toplevel-rule-table (rule-id a-rule)))]) (sat:add-child! rule-node (process-pattern (rule-pattern a-rule)))) (for-each sat:visit! leaves) (for* ([a-rule (in-list rules)] [rule-node (in-value (free-id-table-ref toplevel-rule-table (rule-id a-rule)))] #:unless (sat:node-yes? rule-node)) (raise-syntax-error #f (format "Rule ~a has no finite derivation" (syntax-e (rule-id a-rule))) (rule-id a-rule))))