From 40f5cfa640b3b0f9d91ce4131d429e6925aebe95 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 17 Jun 2018 19:29:47 -0700 Subject: [PATCH] use the force --- brag/codegen/codegen.rkt | 139 ++----------------------------- brag/codegen/reader.rkt | 6 +- brag/codegen/sexp-based-lang.rkt | 113 +++++++++++++++++++++++-- brag/rules/stx.rkt | 8 +- 4 files changed, 121 insertions(+), 145 deletions(-) diff --git a/brag/codegen/codegen.rkt b/brag/codegen/codegen.rkt index fe0b4f0..029eb64 100755 --- a/brag/codegen/codegen.rkt +++ b/brag/codegen/codegen.rkt @@ -1,139 +1,16 @@ #lang racket/base - -(require (for-template racket/base) - racket/list +(require racket/list racket/syntax - syntax/srcloc brag/rules/stx-types - "flatten.rkt" syntax/id-table (prefix-in sat: "satisfaction.rkt") - (prefix-in support: brag/support) - (prefix-in stxparse: syntax/parse)) - -(provide rules-codegen) - - -;; Generates the body of the module. -;; FIXME: abstract this so we can just call (rules ...) without -;; generating the whole module body. -(define (rules-codegen rules-stx - #:parser-provider-module [parser-provider-module 'br-parser-tools/cfg-parser] - #:parser-provider-form [parser-provider-form 'cfg-parser]) - (syntax-case rules-stx () - [(_) (raise-syntax-error 'brag - (format "The grammar does not appear to have any rules") - rules-stx)] - [(_ . RULES) - (let ([rules (syntax->list #'RULES)]) ;; (listof stx) - - (check-all-rules-defined! rules) - (check-all-rules-no-duplicates! rules) - (check-all-rules-satisfiable! rules) - - ;; We flatten the rules so we can use - ;; the yacc-style ruleset that br-parser-tools supports. - (define generated-rule-codes (map flat-rule->yacc-rule (flatten-rules rules))) - (define rule-ids (map rule-id rules)) - - (define token-types ;; (listof symbol) - (let-values ([(implicit-tokens explicit-tokens) (rules-collect-token-types rules)]) - (remove-duplicates (append (map string->symbol (map syntax-e implicit-tokens)) - (map syntax-e explicit-tokens)) eq?))) - - (define (rules-stx-id sym) (datum->syntax rules-stx sym)) - - (with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule. - [(TOKEN-TYPE ...) token-types] - [(TOKEN-TYPE-CONSTRUCTOR ...) (for/list ([tt (in-list token-types)]) - (string->symbol (format "token-~a" tt)))] - [GENERATED-GRAMMAR `(grammar ,@generated-rule-codes)] - [PARSER-MODULE parser-provider-module] - [PARSER-FORM parser-provider-form] - [PARSE (rules-stx-id 'parse)] - [PARSE-TO-DATUM (rules-stx-id 'parse-to-datum)] - [PARSE-TREE (rules-stx-id 'parse-tree)] - [MAKE-RULE-PARSER (rules-stx-id 'make-rule-parser)] - [ALL-TOKEN-TYPES (rules-stx-id 'all-token-types)] - [TOKEN (rules-stx-id 'token)] - [APPLY-LEXER (rules-stx-id 'apply-lexer)] - [APPLY-TOKENIZER-MAKER (rules-stx-id 'apply-tokenizer-maker)]) - ;; this stx object represents the top level of a #lang brag module. - ;; so any `define`s are automatically available at the repl. - ;; and only identifiers explicitly `provide`d are visible on import. - (quasisyntax/loc rules-stx - (begin - (require br-parser-tools/lex - PARSER-MODULE - brag/codegen/runtime - brag/support - brag/private/internal-support - racket/set - (for-syntax racket/base)) - - (provide PARSE - PARSE-TO-DATUM - PARSE-TREE - MAKE-RULE-PARSER - ALL-TOKEN-TYPES) - - ;; helpers from brag/support - (define TOKEN token) - (define APPLY-LEXER apply-lexer) - (define APPLY-TOKENIZER-MAKER apply-tokenizer-maker) - - (define-tokens enumerated-tokens (TOKEN-TYPE ...)) - - ;; all-token-types lists all the tokens (except for EOF) - (define ALL-TOKEN-TYPES (set-remove (set 'TOKEN-TYPE ...) 'EOF)) - - ;; For internal use by the permissive tokenizer only: - (define all-tokens-hash/mutable - (make-hash (list ;; Note: we also allow the eof object here, to make - ;; the permissive tokenizer even nicer to work with. - (cons eof token-EOF) - (cons 'TOKEN-TYPE TOKEN-TYPE-CONSTRUCTOR) ...))) - - (define-syntax (MAKE-RULE-PARSER rule-id-stx) - (syntax-case rule-id-stx () - [(_ start-rule) - (and (identifier? #'start-rule) - (member (syntax-e #'start-rule) '#,(map syntax-e rule-ids))) - ;; HACK HACK HACK - ;; The cfg-parser depends on the start-rule provided in (start ...) to have the same - ;; context as the rest of this body, so I need to hack this. I don't like this, but - ;; I don't know what else to do. Hence recolored-start-rule. - (with-syntax ([RECOLORED-START-RULE (datum->syntax #'#,rules-stx (syntax-e #'start-rule))]) - #'(let ([THE-GRAMMAR (PARSER-FORM (tokens enumerated-tokens) - (src-pos) - (start RECOLORED-START-RULE) - (end EOF) - (error THE-ERROR-HANDLER) - GENERATED-GRAMMAR)]) - (procedure-rename - (case-lambda [(tokenizer) - (define next-token - (make-permissive-tokenizer tokenizer all-tokens-hash/mutable)) - (THE-GRAMMAR next-token)] - [(source tokenizer) - (parameterize ([current-source source]) - (PARSE tokenizer))]) - (string->symbol (format "~a-rule-parser" 'start-rule)))))] - [(_ not-a-rule-id) - (raise-syntax-error #f - (format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id)) - rule-id-stx)])) - - (define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE)) - - (define (PARSE-TO-DATUM x) - (let loop ([x (syntax->datum (PARSE x))]) - (cond - [(list? x) (map loop x)] - [else x]))) - - (define PARSE-TREE PARSE-TO-DATUM)))))])) + (for-template racket/base + brag/codegen/runtime + brag/private/internal-support)) +(provide (all-defined-out) + (for-template (all-from-out brag/codegen/runtime + brag/private/internal-support))) ;; Given a flattened rule, returns a syntax for the code that ;; preserves as much source location as possible. @@ -395,4 +272,4 @@ #:unless (sat:node-yes? rule-node)) (raise-syntax-error #f (format "Rule ~a has no finite derivation" (syntax-e (rule-id a-rule))) - (rule-id a-rule)))) + (rule-id a-rule)))) \ No newline at end of file diff --git a/brag/codegen/reader.rkt b/brag/codegen/reader.rkt index 5aac6a4..7ff3ae0 100755 --- a/brag/codegen/reader.rkt +++ b/brag/codegen/reader.rkt @@ -1,5 +1,5 @@ #lang s-exp syntax/module-reader -brag/codegen/sexp-based-lang +brag/codegen/expander #:read my-read #:read-syntax my-read-syntax #:info my-get-info @@ -39,7 +39,7 @@ brag/codegen/sexp-based-lang #f)))))]) (grammar-parser tokenizer))) (define-values (last-line last-column last-position) (port-next-location in)) - (list (rules->stx src rules + (rules->stx src rules #:original-stx (datum->syntax #f 'original-stx (list src first-line @@ -48,7 +48,7 @@ brag/codegen/sexp-based-lang (if (and (number? last-position) (number? first-position)) (- last-position first-position) - #f)))))) + #f))))) (define (my-get-info key default default-filter) (case key diff --git a/brag/codegen/sexp-based-lang.rkt b/brag/codegen/sexp-based-lang.rkt index 0437340..c12c226 100755 --- a/brag/codegen/sexp-based-lang.rkt +++ b/brag/codegen/sexp-based-lang.rkt @@ -1,7 +1,110 @@ #lang racket/base -(require (for-syntax racket/base "codegen.rkt")) -(provide (all-from-out racket/base)) ; borrow #%module-begin from racket/base +(require (for-syntax racket/base + racket/list + "codegen.rkt" + "flatten.rkt") + br-parser-tools/lex + br-parser-tools/cfg-parser + (prefix-in bs: brag/support) + racket/set) + +(provide (except-out (all-from-out racket/base) #%module-begin) + (rename-out [brag-module-begin #%module-begin] + [bs:apply-lexer apply-lexer] ; for repl + [bs:apply-tokenizer-maker apply-tokenizer-maker])) ; for repl + +;; -> (listof symbol) +(define-for-syntax (rules->token-types rules) + (let-values ([(implicit-tokens explicit-tokens) (rules-collect-token-types rules)]) + (remove-duplicates (append (for/list ([it (in-list implicit-tokens)]) + (string->symbol (syntax-e it))) + (map syntax-e explicit-tokens)) eq?))) + +(define-syntax (brag-module-begin rules-stx) + (syntax-case rules-stx () + [(_) (raise-syntax-error 'brag + (format "The grammar does not appear to have any rules") + 'brag-module)] + [(_ . RULES) + (let ([rules (syntax->list #'RULES)]) ;; (listof stx) + + (check-all-rules-defined! rules) + (check-all-rules-no-duplicates! rules) + (check-all-rules-satisfiable! rules) + + (define rule-ids (map rule-id rules)) + + (with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule. + [((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...) + (for/list ([tt (in-list (rules->token-types rules))]) + (cons tt (string->symbol (format "token-~a" tt))))] + ;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports + [GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))] + ;; main exports. Break hygiene so they're also available at top-level / repl + [(PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES) + (map (λ (sym) (datum->syntax rules-stx sym)) + '(parse parse-to-datum parse-tree make-rule-parser all-token-types))] + [TOKEN (datum->syntax rules-stx 'token)] ; for repl + [RULE-IDS (map syntax-e rule-ids)] + [RULES-STX rules-stx]) + ;; this stx object represents the top level of a #lang brag module. + ;; so any `define`s are automatically available at the repl. + ;; and only identifiers explicitly `provide`d are visible on import. + #'(#%module-begin + (provide PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES) + + ;; handle brag/support `token` with special identifier + ;; so it doesn't conflict with brag's internal `token` macro + (define TOKEN bs:token) + + (define-tokens enumerated-tokens (TOKEN-TYPE ...)) + + ;; all-token-types lists all the tokens (except for EOF) + (define ALL-TOKEN-TYPES (set-remove (set 'TOKEN-TYPE ...) 'EOF)) + + ;; For internal use by the permissive tokenizer only: + (define all-tokens-hash/mutable + (make-hash (list ;; Note: we also allow the eof object here, to make + ;; the permissive tokenizer even nicer to work with. + (cons eof token-EOF) + (cons 'TOKEN-TYPE TOKEN-TYPE-CONSTRUCTOR) ...))) + + (define-syntax (MAKE-RULE-PARSER rule-id-stx) + (syntax-case rule-id-stx () + [(_ start-rule) + (and (identifier? #'start-rule) + (member (syntax-e #'start-rule) 'RULE-IDS)) + ;; The cfg-parser depends on the start-rule provided in (start ...) to have the same + ;; context as the rest of this body. Hence RECOLORED-START-RULE + (with-syntax ([RECOLORED-START-RULE (datum->syntax #'RULES-STX (syntax-e #'start-rule))]) + #'(let ([THE-GRAMMAR (cfg-parser (tokens enumerated-tokens) + (src-pos) + (start RECOLORED-START-RULE) + (end EOF) + (error THE-ERROR-HANDLER) + (grammar . GENERATED-RULE-CODES))]) + (procedure-rename + (case-lambda [(tokenizer) + (define next-token + (make-permissive-tokenizer tokenizer all-tokens-hash/mutable)) + (THE-GRAMMAR next-token)] + [(source tokenizer) + (parameterize ([current-source source]) + (PARSE tokenizer))]) + (string->symbol (format "~a-rule-parser" 'start-rule)))))] + [(_ not-a-rule-id) + (raise-syntax-error #f + (format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id)) + rule-id-stx)])) + + ;; start-id has to be a value, not an expr, because make-rule-parser is a macro + (define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE)) + + (define (PARSE-TO-DATUM x) + (let loop ([x (syntax->datum (PARSE x))]) + (cond + [(list? x) (map loop x)] + [else x]))) + + (define PARSE-TREE PARSE-TO-DATUM))))])) -;; body of module invokes `rules` -(provide rules) -(define-syntax (rules rules-stx) (rules-codegen rules-stx)) diff --git a/brag/rules/stx.rkt b/brag/rules/stx.rkt index da57217..141c453 100755 --- a/brag/rules/stx.rkt +++ b/brag/rules/stx.rkt @@ -11,12 +11,8 @@ ;; rules->stx: (listof rule) -> syntax (define (rules->stx source rules #:original-stx [original-stx #f]) - (define rule-stxs - (map (lambda (stx) (rule->stx source stx)) - rules)) - (datum->syntax #f - `(rules ,@rule-stxs) - original-stx)) + (datum->syntax #f (for/list ([stx (in-list rules)]) + (rule->stx source stx)) original-stx)) (define (rule->stx source a-rule)