From 11539991ecf6dcb37ae54560e3d452b49a073032 Mon Sep 17 00:00:00 2001
From: Matthew Butterick <mb@mbtype.com>
Date: Mon, 24 Jun 2019 12:18:36 -0700
Subject: [PATCH] subparsers

---
 brag-lib/brag/codegen/expander.rkt    | 58 ++++++++++++++++++---------
 brag-lib/brag/examples/subparser.rkt  |  5 +++
 brag-lib/brag/test/test-all.rkt       |  1 +
 brag-lib/brag/test/test-subparser.rkt |  9 +++++
 brag/brag/brag.scrbl                  |  6 ++-
 5 files changed, 58 insertions(+), 21 deletions(-)
 create mode 100755 brag-lib/brag/examples/subparser.rkt
 create mode 100755 brag-lib/brag/test/test-subparser.rkt

diff --git a/brag-lib/brag/codegen/expander.rkt b/brag-lib/brag/codegen/expander.rkt
index 39690d7..a6a759e 100755
--- a/brag-lib/brag/codegen/expander.rkt
+++ b/brag-lib/brag/codegen/expander.rkt
@@ -1,11 +1,13 @@
 #lang racket/base
 (require (for-syntax racket/base
                      racket/list
+                     racket/syntax
                      "codegen.rkt"
                      "runtime.rkt"
                      "flatten.rkt")
          br-parser-tools/lex
          br-parser-tools/cfg-parser
+         racket/promise
          (prefix-in bs: brag/support)
          racket/set)
 
@@ -34,25 +36,33 @@
        (check-all-rules-satisfiable! rules)
        
        (define rule-ids (map rule-id rules))
+       (define rule-id-datums (map syntax-e rule-ids))
        
-       (with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule.
-                     [((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...)
-                      (for/list ([tt (in-list (rules->token-types rules))])
-                        (cons tt (string->symbol (format "token-~a" tt))))]
-                     ;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports       
-                     [GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))]
-                     ;; main exports. Break hygiene so they're also available at top-level / repl
-                     [(PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
-                      (map (λ (sym) (datum->syntax rules-stx sym))
-                           '(parse parse-to-datum parse-tree make-rule-parser all-token-types))]
-                     [TOKEN (datum->syntax rules-stx 'token)] ; for repl
-                     [RULE-IDS (map syntax-e rule-ids)]
-                     [RULES-STX rules-stx])
+       (with-syntax* ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule.
+                      [((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...)
+                       (for/list ([tt (in-list (rules->token-types rules))])
+                         (cons tt (string->symbol (format "token-~a" tt))))]
+                      ;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports       
+                      [GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))]
+                      ;; main exports. Break hygiene so they're also available at top-level / repl
+                      [(PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
+                       (map (λ (sym) (datum->syntax rules-stx sym))
+                            '(parse parse-to-datum parse-tree make-rule-parser all-token-types))]
+                      [TOKEN (datum->syntax rules-stx 'token)] ; for repl
+                      [(RULE-ID ...) rule-id-datums]
+                      [(PARSE-RULE-ID ...)
+                       (map (λ (dat) (format-id rules-stx "parse-~a" dat)) rule-id-datums)]
+                      [(PARSE-RULE-ID-TO-DATUM ...)
+                       (map (λ (dat) (format-id rules-stx "parse-~a-to-datum" dat)) rule-id-datums)]
+                      [(PARSE-START-ID . _) #'(PARSE-RULE-ID ...)]
+                      [(PARSE-START-ID-TO-DATUM . _) #'(PARSE-RULE-ID-TO-DATUM ...)]
+                      [RULES-STX rules-stx])
          ;; this stx object represents the top level of a #lang brag module.
          ;; so any `define`s are automatically available at the repl.
          ;; and only identifiers explicitly `provide`d are visible on import.
          #'(#%module-begin             
-            (provide PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
+            (provide PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES
+                     PARSE-RULE-ID ... PARSE-RULE-ID-TO-DATUM ...)
 
             ;; handle brag/support `token` with special identifier
             ;; so it doesn't conflict with brag's internal `token` macro
@@ -75,7 +85,7 @@
               (syntax-case rule-id-stx ()
                 [(_ start-rule)
                  (and (identifier? #'start-rule)
-                      (member (syntax-e #'start-rule) 'RULE-IDS))
+                      (member (syntax-e #'start-rule) '(RULE-ID ...)))
                  ;; The cfg-parser depends on the start-rule provided in (start ...) to have the same
                  ;; context as the rest of this body. Hence RECOLORED-START-RULE
                  (with-syntax ([RECOLORED-START-RULE (datum->syntax #'RULES-STX (syntax-e #'start-rule))])
@@ -107,10 +117,18 @@
                                      (format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id))
                                      rule-id-stx)]))
 
-            ;; start-id has to be a value, not an expr, because make-rule-parser is a macro
-            (define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE))
-             
-            (define (PARSE-TO-DATUM x) (syntax->datum (PARSE x)))
+            (define PARSE-RULE-ID
+              (procedure-rename
+               (let ([func-p (delay (MAKE-RULE-PARSER RULE-ID))])
+                 (λ args (apply (force func-p) args)))
+               'PARSE-RULE-ID)) ...
+                                (define (PARSE-RULE-ID-TO-DATUM x)
+                                  (syntax->datum (PARSE-RULE-ID x))) ...
+
+                                                                     
+                                                                     ;; start-id has to be a value, not an expr, because make-rule-parser is a macro
+                                                                     (define PARSE (procedure-rename PARSE-START-ID 'PARSE))
+                                                                     (define (PARSE-TO-DATUM x) (syntax->datum (PARSE x)))
 
-            (define PARSE-TREE PARSE-TO-DATUM))))]))
+                                                                     (define PARSE-TREE PARSE-TO-DATUM))))]))
 
diff --git a/brag-lib/brag/examples/subparser.rkt b/brag-lib/brag/examples/subparser.rkt
new file mode 100755
index 0000000..09e3cfb
--- /dev/null
+++ b/brag-lib/brag/examples/subparser.rkt
@@ -0,0 +1,5 @@
+#lang brag
+
+top : foo
+foo : bar
+bar : "x"
\ No newline at end of file
diff --git a/brag-lib/brag/test/test-all.rkt b/brag-lib/brag/test/test-all.rkt
index a101de0..7412ca1 100755
--- a/brag-lib/brag/test/test-all.rkt
+++ b/brag-lib/brag/test/test-all.rkt
@@ -19,6 +19,7 @@
          "test-simple-arithmetic-grammar.rkt"
          "test-simple-line-drawing.rkt"
          "test-start-and-atok.rkt"
+         "test-subparser.rkt"
          "test-top-level-cut.rkt"
          "test-weird-grammar.rkt"
          "test-whitespace.rkt"
diff --git a/brag-lib/brag/test/test-subparser.rkt b/brag-lib/brag/test/test-subparser.rkt
new file mode 100755
index 0000000..02e4714
--- /dev/null
+++ b/brag-lib/brag/test/test-subparser.rkt
@@ -0,0 +1,9 @@
+#lang racket/base
+(require brag/examples/subparser
+         brag/support
+         rackunit)
+
+(check-equal? (parse-top-to-datum "x") (parse-to-datum "x"))
+(check-equal? (parse-top-to-datum "x") '(top (foo (bar "x"))))
+(check-equal? (parse-foo-to-datum "x") '(foo (bar "x")))
+(check-equal? (parse-bar-to-datum "x") '(bar "x"))
\ No newline at end of file
diff --git a/brag/brag/brag.scrbl b/brag/brag/brag.scrbl
index fa9b0b9..a9ab147 100755
--- a/brag/brag/brag.scrbl
+++ b/brag/brag/brag.scrbl
@@ -923,7 +923,9 @@ bindings. The most important of these is @racket[parse]:
  @racket[token-source]. The optional @racket[source-path] argument is used to enrich the 
  syntax-location fields.
 
- The @deftech{token source} can either be a sequence, or a 0-arity function that
+ @tt{brag} also exports a function called @racket[parse-rule-id] for each @racket[_rule-id] in the grammar. So a grammar with rules called @tt{top}, @tt{foo}, and @tt{bar} would export @racket[parse-top], @racket[parse-foo], and @racket[parse-bar]. These take the same arguments as @racket[parse], but use the corresponding rule for the initial production, instead of the first rule.
+
+ The @deftech{token source} can either be a sequence, or a zero-arity function that
  produces @tech{tokens}.
 
  A @deftech{token} in @tt{brag} can be any of the following values:
@@ -974,6 +976,8 @@ Thus, it's only the presence of @tech{rule identifier}s in a rule's
                                              (-> token))])
          list?]{
  Same as @racket[parse], but the result is converted into a plain datum. Useful for testing or debugging a parser.
+
+  @tt{brag} also exports a function called @racket[parse-rule-id-to-datum] for each @racket[_rule-id] in the grammar. So a grammar with rules called @tt{top}, @tt{foo}, and @tt{bar} would export @racket[parse-top-to-datum], @racket[parse-foo-to-datum], and @racket[parse-bar-to-datum]. These take the same arguments as @racket[parse-to-datum], but use the corresponding rule for the initial production, instead of the first rule.
 }