subparsers

dev-subparser
Matthew Butterick 5 years ago
parent 4732784dee
commit 11539991ec

@ -1,11 +1,13 @@
#lang racket/base
(require (for-syntax racket/base
racket/list
racket/syntax
"codegen.rkt"
"runtime.rkt"
"flatten.rkt")
br-parser-tools/lex
br-parser-tools/cfg-parser
racket/promise
(prefix-in bs: brag/support)
racket/set)
@ -34,25 +36,33 @@
(check-all-rules-satisfiable! rules)
(define rule-ids (map rule-id rules))
(define rule-id-datums (map syntax-e rule-ids))
(with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule.
[((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...)
(for/list ([tt (in-list (rules->token-types rules))])
(cons tt (string->symbol (format "token-~a" tt))))]
;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports
[GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))]
;; main exports. Break hygiene so they're also available at top-level / repl
[(PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
(map (λ (sym) (datum->syntax rules-stx sym))
'(parse parse-to-datum parse-tree make-rule-parser all-token-types))]
[TOKEN (datum->syntax rules-stx 'token)] ; for repl
[RULE-IDS (map syntax-e rule-ids)]
[RULES-STX rules-stx])
(with-syntax* ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule.
[((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...)
(for/list ([tt (in-list (rules->token-types rules))])
(cons tt (string->symbol (format "token-~a" tt))))]
;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports
[GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))]
;; main exports. Break hygiene so they're also available at top-level / repl
[(PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
(map (λ (sym) (datum->syntax rules-stx sym))
'(parse parse-to-datum parse-tree make-rule-parser all-token-types))]
[TOKEN (datum->syntax rules-stx 'token)] ; for repl
[(RULE-ID ...) rule-id-datums]
[(PARSE-RULE-ID ...)
(map (λ (dat) (format-id rules-stx "parse-~a" dat)) rule-id-datums)]
[(PARSE-RULE-ID-TO-DATUM ...)
(map (λ (dat) (format-id rules-stx "parse-~a-to-datum" dat)) rule-id-datums)]
[(PARSE-START-ID . _) #'(PARSE-RULE-ID ...)]
[(PARSE-START-ID-TO-DATUM . _) #'(PARSE-RULE-ID-TO-DATUM ...)]
[RULES-STX rules-stx])
;; this stx object represents the top level of a #lang brag module.
;; so any `define`s are automatically available at the repl.
;; and only identifiers explicitly `provide`d are visible on import.
#'(#%module-begin
(provide PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
(provide PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES
PARSE-RULE-ID ... PARSE-RULE-ID-TO-DATUM ...)
;; handle brag/support `token` with special identifier
;; so it doesn't conflict with brag's internal `token` macro
@ -75,7 +85,7 @@
(syntax-case rule-id-stx ()
[(_ start-rule)
(and (identifier? #'start-rule)
(member (syntax-e #'start-rule) 'RULE-IDS))
(member (syntax-e #'start-rule) '(RULE-ID ...)))
;; The cfg-parser depends on the start-rule provided in (start ...) to have the same
;; context as the rest of this body. Hence RECOLORED-START-RULE
(with-syntax ([RECOLORED-START-RULE (datum->syntax #'RULES-STX (syntax-e #'start-rule))])
@ -107,10 +117,18 @@
(format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id))
rule-id-stx)]))
;; start-id has to be a value, not an expr, because make-rule-parser is a macro
(define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE))
(define (PARSE-TO-DATUM x) (syntax->datum (PARSE x)))
(define PARSE-RULE-ID
(procedure-rename
(let ([func-p (delay (MAKE-RULE-PARSER RULE-ID))])
(λ args (apply (force func-p) args)))
'PARSE-RULE-ID)) ...
(define (PARSE-RULE-ID-TO-DATUM x)
(syntax->datum (PARSE-RULE-ID x))) ...
;; start-id has to be a value, not an expr, because make-rule-parser is a macro
(define PARSE (procedure-rename PARSE-START-ID 'PARSE))
(define (PARSE-TO-DATUM x) (syntax->datum (PARSE x)))
(define PARSE-TREE PARSE-TO-DATUM))))]))
(define PARSE-TREE PARSE-TO-DATUM))))]))

@ -0,0 +1,5 @@
#lang brag
top : foo
foo : bar
bar : "x"

@ -19,6 +19,7 @@
"test-simple-arithmetic-grammar.rkt"
"test-simple-line-drawing.rkt"
"test-start-and-atok.rkt"
"test-subparser.rkt"
"test-top-level-cut.rkt"
"test-weird-grammar.rkt"
"test-whitespace.rkt"

@ -0,0 +1,9 @@
#lang racket/base
(require brag/examples/subparser
brag/support
rackunit)
(check-equal? (parse-top-to-datum "x") (parse-to-datum "x"))
(check-equal? (parse-top-to-datum "x") '(top (foo (bar "x"))))
(check-equal? (parse-foo-to-datum "x") '(foo (bar "x")))
(check-equal? (parse-bar-to-datum "x") '(bar "x"))

@ -923,7 +923,9 @@ bindings. The most important of these is @racket[parse]:
@racket[token-source]. The optional @racket[source-path] argument is used to enrich the
syntax-location fields.
The @deftech{token source} can either be a sequence, or a 0-arity function that
@tt{brag} also exports a function called @racket[parse-rule-id] for each @racket[_rule-id] in the grammar. So a grammar with rules called @tt{top}, @tt{foo}, and @tt{bar} would export @racket[parse-top], @racket[parse-foo], and @racket[parse-bar]. These take the same arguments as @racket[parse], but use the corresponding rule for the initial production, instead of the first rule.
The @deftech{token source} can either be a sequence, or a zero-arity function that
produces @tech{tokens}.
A @deftech{token} in @tt{brag} can be any of the following values:
@ -974,6 +976,8 @@ Thus, it's only the presence of @tech{rule identifier}s in a rule's
(-> token))])
list?]{
Same as @racket[parse], but the result is converted into a plain datum. Useful for testing or debugging a parser.
@tt{brag} also exports a function called @racket[parse-rule-id-to-datum] for each @racket[_rule-id] in the grammar. So a grammar with rules called @tt{top}, @tt{foo}, and @tt{bar} would export @racket[parse-top-to-datum], @racket[parse-foo-to-datum], and @racket[parse-bar-to-datum]. These take the same arguments as @racket[parse-to-datum], but use the corresponding rule for the initial production, instead of the first rule.
}

Loading…
Cancel
Save