From 11539991ecf6dcb37ae54560e3d452b49a073032 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Mon, 24 Jun 2019 12:18:36 -0700 Subject: [PATCH] subparsers --- brag-lib/brag/codegen/expander.rkt | 58 ++++++++++++++++++--------- brag-lib/brag/examples/subparser.rkt | 5 +++ brag-lib/brag/test/test-all.rkt | 1 + brag-lib/brag/test/test-subparser.rkt | 9 +++++ brag/brag/brag.scrbl | 6 ++- 5 files changed, 58 insertions(+), 21 deletions(-) create mode 100755 brag-lib/brag/examples/subparser.rkt create mode 100755 brag-lib/brag/test/test-subparser.rkt diff --git a/brag-lib/brag/codegen/expander.rkt b/brag-lib/brag/codegen/expander.rkt index 39690d7..a6a759e 100755 --- a/brag-lib/brag/codegen/expander.rkt +++ b/brag-lib/brag/codegen/expander.rkt @@ -1,11 +1,13 @@ #lang racket/base (require (for-syntax racket/base racket/list + racket/syntax "codegen.rkt" "runtime.rkt" "flatten.rkt") br-parser-tools/lex br-parser-tools/cfg-parser + racket/promise (prefix-in bs: brag/support) racket/set) @@ -34,25 +36,33 @@ (check-all-rules-satisfiable! rules) (define rule-ids (map rule-id rules)) + (define rule-id-datums (map syntax-e rule-ids)) - (with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule. - [((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...) - (for/list ([tt (in-list (rules->token-types rules))]) - (cons tt (string->symbol (format "token-~a" tt))))] - ;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports - [GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))] - ;; main exports. Break hygiene so they're also available at top-level / repl - [(PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES) - (map (λ (sym) (datum->syntax rules-stx sym)) - '(parse parse-to-datum parse-tree make-rule-parser all-token-types))] - [TOKEN (datum->syntax rules-stx 'token)] ; for repl - [RULE-IDS (map syntax-e rule-ids)] - [RULES-STX rules-stx]) + (with-syntax* ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule. + [((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...) + (for/list ([tt (in-list (rules->token-types rules))]) + (cons tt (string->symbol (format "token-~a" tt))))] + ;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports + [GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))] + ;; main exports. Break hygiene so they're also available at top-level / repl + [(PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES) + (map (λ (sym) (datum->syntax rules-stx sym)) + '(parse parse-to-datum parse-tree make-rule-parser all-token-types))] + [TOKEN (datum->syntax rules-stx 'token)] ; for repl + [(RULE-ID ...) rule-id-datums] + [(PARSE-RULE-ID ...) + (map (λ (dat) (format-id rules-stx "parse-~a" dat)) rule-id-datums)] + [(PARSE-RULE-ID-TO-DATUM ...) + (map (λ (dat) (format-id rules-stx "parse-~a-to-datum" dat)) rule-id-datums)] + [(PARSE-START-ID . _) #'(PARSE-RULE-ID ...)] + [(PARSE-START-ID-TO-DATUM . _) #'(PARSE-RULE-ID-TO-DATUM ...)] + [RULES-STX rules-stx]) ;; this stx object represents the top level of a #lang brag module. ;; so any `define`s are automatically available at the repl. ;; and only identifiers explicitly `provide`d are visible on import. #'(#%module-begin - (provide PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES) + (provide PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES + PARSE-RULE-ID ... PARSE-RULE-ID-TO-DATUM ...) ;; handle brag/support `token` with special identifier ;; so it doesn't conflict with brag's internal `token` macro @@ -75,7 +85,7 @@ (syntax-case rule-id-stx () [(_ start-rule) (and (identifier? #'start-rule) - (member (syntax-e #'start-rule) 'RULE-IDS)) + (member (syntax-e #'start-rule) '(RULE-ID ...))) ;; The cfg-parser depends on the start-rule provided in (start ...) to have the same ;; context as the rest of this body. Hence RECOLORED-START-RULE (with-syntax ([RECOLORED-START-RULE (datum->syntax #'RULES-STX (syntax-e #'start-rule))]) @@ -107,10 +117,18 @@ (format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id)) rule-id-stx)])) - ;; start-id has to be a value, not an expr, because make-rule-parser is a macro - (define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE)) - - (define (PARSE-TO-DATUM x) (syntax->datum (PARSE x))) + (define PARSE-RULE-ID + (procedure-rename + (let ([func-p (delay (MAKE-RULE-PARSER RULE-ID))]) + (λ args (apply (force func-p) args))) + 'PARSE-RULE-ID)) ... + (define (PARSE-RULE-ID-TO-DATUM x) + (syntax->datum (PARSE-RULE-ID x))) ... + + + ;; start-id has to be a value, not an expr, because make-rule-parser is a macro + (define PARSE (procedure-rename PARSE-START-ID 'PARSE)) + (define (PARSE-TO-DATUM x) (syntax->datum (PARSE x))) - (define PARSE-TREE PARSE-TO-DATUM))))])) + (define PARSE-TREE PARSE-TO-DATUM))))])) diff --git a/brag-lib/brag/examples/subparser.rkt b/brag-lib/brag/examples/subparser.rkt new file mode 100755 index 0000000..09e3cfb --- /dev/null +++ b/brag-lib/brag/examples/subparser.rkt @@ -0,0 +1,5 @@ +#lang brag + +top : foo +foo : bar +bar : "x" \ No newline at end of file diff --git a/brag-lib/brag/test/test-all.rkt b/brag-lib/brag/test/test-all.rkt index a101de0..7412ca1 100755 --- a/brag-lib/brag/test/test-all.rkt +++ b/brag-lib/brag/test/test-all.rkt @@ -19,6 +19,7 @@ "test-simple-arithmetic-grammar.rkt" "test-simple-line-drawing.rkt" "test-start-and-atok.rkt" + "test-subparser.rkt" "test-top-level-cut.rkt" "test-weird-grammar.rkt" "test-whitespace.rkt" diff --git a/brag-lib/brag/test/test-subparser.rkt b/brag-lib/brag/test/test-subparser.rkt new file mode 100755 index 0000000..02e4714 --- /dev/null +++ b/brag-lib/brag/test/test-subparser.rkt @@ -0,0 +1,9 @@ +#lang racket/base +(require brag/examples/subparser + brag/support + rackunit) + +(check-equal? (parse-top-to-datum "x") (parse-to-datum "x")) +(check-equal? (parse-top-to-datum "x") '(top (foo (bar "x")))) +(check-equal? (parse-foo-to-datum "x") '(foo (bar "x"))) +(check-equal? (parse-bar-to-datum "x") '(bar "x")) \ No newline at end of file diff --git a/brag/brag/brag.scrbl b/brag/brag/brag.scrbl index fa9b0b9..a9ab147 100755 --- a/brag/brag/brag.scrbl +++ b/brag/brag/brag.scrbl @@ -923,7 +923,9 @@ bindings. The most important of these is @racket[parse]: @racket[token-source]. The optional @racket[source-path] argument is used to enrich the syntax-location fields. - The @deftech{token source} can either be a sequence, or a 0-arity function that + @tt{brag} also exports a function called @racket[parse-rule-id] for each @racket[_rule-id] in the grammar. So a grammar with rules called @tt{top}, @tt{foo}, and @tt{bar} would export @racket[parse-top], @racket[parse-foo], and @racket[parse-bar]. These take the same arguments as @racket[parse], but use the corresponding rule for the initial production, instead of the first rule. + + The @deftech{token source} can either be a sequence, or a zero-arity function that produces @tech{tokens}. A @deftech{token} in @tt{brag} can be any of the following values: @@ -974,6 +976,8 @@ Thus, it's only the presence of @tech{rule identifier}s in a rule's (-> token))]) list?]{ Same as @racket[parse], but the result is converted into a plain datum. Useful for testing or debugging a parser. + + @tt{brag} also exports a function called @racket[parse-rule-id-to-datum] for each @racket[_rule-id] in the grammar. So a grammar with rules called @tt{top}, @tt{foo}, and @tt{bar} would export @racket[parse-top-to-datum], @racket[parse-foo-to-datum], and @racket[parse-bar-to-datum]. These take the same arguments as @racket[parse-to-datum], but use the corresponding rule for the initial production, instead of the first rule. }