use the force

hide-top-rule-name
Matthew Butterick 6 years ago
parent 52f1f4bce2
commit 40f5cfa640

@ -1,139 +1,16 @@
#lang racket/base
(require (for-template racket/base)
racket/list
(require racket/list
racket/syntax
syntax/srcloc
brag/rules/stx-types
"flatten.rkt"
syntax/id-table
(prefix-in sat: "satisfaction.rkt")
(prefix-in support: brag/support)
(prefix-in stxparse: syntax/parse))
(provide rules-codegen)
;; Generates the body of the module.
;; FIXME: abstract this so we can just call (rules ...) without
;; generating the whole module body.
(define (rules-codegen rules-stx
#:parser-provider-module [parser-provider-module 'br-parser-tools/cfg-parser]
#:parser-provider-form [parser-provider-form 'cfg-parser])
(syntax-case rules-stx ()
[(_) (raise-syntax-error 'brag
(format "The grammar does not appear to have any rules")
rules-stx)]
[(_ . RULES)
(let ([rules (syntax->list #'RULES)]) ;; (listof stx)
(check-all-rules-defined! rules)
(check-all-rules-no-duplicates! rules)
(check-all-rules-satisfiable! rules)
;; We flatten the rules so we can use
;; the yacc-style ruleset that br-parser-tools supports.
(define generated-rule-codes (map flat-rule->yacc-rule (flatten-rules rules)))
(define rule-ids (map rule-id rules))
(define token-types ;; (listof symbol)
(let-values ([(implicit-tokens explicit-tokens) (rules-collect-token-types rules)])
(remove-duplicates (append (map string->symbol (map syntax-e implicit-tokens))
(map syntax-e explicit-tokens)) eq?)))
(define (rules-stx-id sym) (datum->syntax rules-stx sym))
(with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule.
[(TOKEN-TYPE ...) token-types]
[(TOKEN-TYPE-CONSTRUCTOR ...) (for/list ([tt (in-list token-types)])
(string->symbol (format "token-~a" tt)))]
[GENERATED-GRAMMAR `(grammar ,@generated-rule-codes)]
[PARSER-MODULE parser-provider-module]
[PARSER-FORM parser-provider-form]
[PARSE (rules-stx-id 'parse)]
[PARSE-TO-DATUM (rules-stx-id 'parse-to-datum)]
[PARSE-TREE (rules-stx-id 'parse-tree)]
[MAKE-RULE-PARSER (rules-stx-id 'make-rule-parser)]
[ALL-TOKEN-TYPES (rules-stx-id 'all-token-types)]
[TOKEN (rules-stx-id 'token)]
[APPLY-LEXER (rules-stx-id 'apply-lexer)]
[APPLY-TOKENIZER-MAKER (rules-stx-id 'apply-tokenizer-maker)])
;; this stx object represents the top level of a #lang brag module.
;; so any `define`s are automatically available at the repl.
;; and only identifiers explicitly `provide`d are visible on import.
(quasisyntax/loc rules-stx
(begin
(require br-parser-tools/lex
PARSER-MODULE
brag/codegen/runtime
brag/support
brag/private/internal-support
racket/set
(for-syntax racket/base))
(provide PARSE
PARSE-TO-DATUM
PARSE-TREE
MAKE-RULE-PARSER
ALL-TOKEN-TYPES)
;; helpers from brag/support
(define TOKEN token)
(define APPLY-LEXER apply-lexer)
(define APPLY-TOKENIZER-MAKER apply-tokenizer-maker)
(define-tokens enumerated-tokens (TOKEN-TYPE ...))
;; all-token-types lists all the tokens (except for EOF)
(define ALL-TOKEN-TYPES (set-remove (set 'TOKEN-TYPE ...) 'EOF))
;; For internal use by the permissive tokenizer only:
(define all-tokens-hash/mutable
(make-hash (list ;; Note: we also allow the eof object here, to make
;; the permissive tokenizer even nicer to work with.
(cons eof token-EOF)
(cons 'TOKEN-TYPE TOKEN-TYPE-CONSTRUCTOR) ...)))
(define-syntax (MAKE-RULE-PARSER rule-id-stx)
(syntax-case rule-id-stx ()
[(_ start-rule)
(and (identifier? #'start-rule)
(member (syntax-e #'start-rule) '#,(map syntax-e rule-ids)))
;; HACK HACK HACK
;; The cfg-parser depends on the start-rule provided in (start ...) to have the same
;; context as the rest of this body, so I need to hack this. I don't like this, but
;; I don't know what else to do. Hence recolored-start-rule.
(with-syntax ([RECOLORED-START-RULE (datum->syntax #'#,rules-stx (syntax-e #'start-rule))])
#'(let ([THE-GRAMMAR (PARSER-FORM (tokens enumerated-tokens)
(src-pos)
(start RECOLORED-START-RULE)
(end EOF)
(error THE-ERROR-HANDLER)
GENERATED-GRAMMAR)])
(procedure-rename
(case-lambda [(tokenizer)
(define next-token
(make-permissive-tokenizer tokenizer all-tokens-hash/mutable))
(THE-GRAMMAR next-token)]
[(source tokenizer)
(parameterize ([current-source source])
(PARSE tokenizer))])
(string->symbol (format "~a-rule-parser" 'start-rule)))))]
[(_ not-a-rule-id)
(raise-syntax-error #f
(format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id))
rule-id-stx)]))
(define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE))
(define (PARSE-TO-DATUM x)
(let loop ([x (syntax->datum (PARSE x))])
(cond
[(list? x) (map loop x)]
[else x])))
(define PARSE-TREE PARSE-TO-DATUM)))))]))
(for-template racket/base
brag/codegen/runtime
brag/private/internal-support))
(provide (all-defined-out)
(for-template (all-from-out brag/codegen/runtime
brag/private/internal-support)))
;; Given a flattened rule, returns a syntax for the code that
;; preserves as much source location as possible.
@ -395,4 +272,4 @@
#:unless (sat:node-yes? rule-node))
(raise-syntax-error #f
(format "Rule ~a has no finite derivation" (syntax-e (rule-id a-rule)))
(rule-id a-rule))))
(rule-id a-rule))))

@ -1,5 +1,5 @@
#lang s-exp syntax/module-reader
brag/codegen/sexp-based-lang
brag/codegen/expander
#:read my-read
#:read-syntax my-read-syntax
#:info my-get-info
@ -39,7 +39,7 @@ brag/codegen/sexp-based-lang
#f)))))])
(grammar-parser tokenizer)))
(define-values (last-line last-column last-position) (port-next-location in))
(list (rules->stx src rules
(rules->stx src rules
#:original-stx (datum->syntax #f 'original-stx
(list src
first-line
@ -48,7 +48,7 @@ brag/codegen/sexp-based-lang
(if (and (number? last-position)
(number? first-position))
(- last-position first-position)
#f))))))
#f)))))
(define (my-get-info key default default-filter)
(case key

@ -1,7 +1,110 @@
#lang racket/base
(require (for-syntax racket/base "codegen.rkt"))
(provide (all-from-out racket/base)) ; borrow #%module-begin from racket/base
(require (for-syntax racket/base
racket/list
"codegen.rkt"
"flatten.rkt")
br-parser-tools/lex
br-parser-tools/cfg-parser
(prefix-in bs: brag/support)
racket/set)
(provide (except-out (all-from-out racket/base) #%module-begin)
(rename-out [brag-module-begin #%module-begin]
[bs:apply-lexer apply-lexer] ; for repl
[bs:apply-tokenizer-maker apply-tokenizer-maker])) ; for repl
;; -> (listof symbol)
(define-for-syntax (rules->token-types rules)
(let-values ([(implicit-tokens explicit-tokens) (rules-collect-token-types rules)])
(remove-duplicates (append (for/list ([it (in-list implicit-tokens)])
(string->symbol (syntax-e it)))
(map syntax-e explicit-tokens)) eq?)))
(define-syntax (brag-module-begin rules-stx)
(syntax-case rules-stx ()
[(_) (raise-syntax-error 'brag
(format "The grammar does not appear to have any rules")
'brag-module)]
[(_ . RULES)
(let ([rules (syntax->list #'RULES)]) ;; (listof stx)
(check-all-rules-defined! rules)
(check-all-rules-no-duplicates! rules)
(check-all-rules-satisfiable! rules)
(define rule-ids (map rule-id rules))
(with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule.
[((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...)
(for/list ([tt (in-list (rules->token-types rules))])
(cons tt (string->symbol (format "token-~a" tt))))]
;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports
[GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))]
;; main exports. Break hygiene so they're also available at top-level / repl
[(PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
(map (λ (sym) (datum->syntax rules-stx sym))
'(parse parse-to-datum parse-tree make-rule-parser all-token-types))]
[TOKEN (datum->syntax rules-stx 'token)] ; for repl
[RULE-IDS (map syntax-e rule-ids)]
[RULES-STX rules-stx])
;; this stx object represents the top level of a #lang brag module.
;; so any `define`s are automatically available at the repl.
;; and only identifiers explicitly `provide`d are visible on import.
#'(#%module-begin
(provide PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
;; handle brag/support `token` with special identifier
;; so it doesn't conflict with brag's internal `token` macro
(define TOKEN bs:token)
(define-tokens enumerated-tokens (TOKEN-TYPE ...))
;; all-token-types lists all the tokens (except for EOF)
(define ALL-TOKEN-TYPES (set-remove (set 'TOKEN-TYPE ...) 'EOF))
;; For internal use by the permissive tokenizer only:
(define all-tokens-hash/mutable
(make-hash (list ;; Note: we also allow the eof object here, to make
;; the permissive tokenizer even nicer to work with.
(cons eof token-EOF)
(cons 'TOKEN-TYPE TOKEN-TYPE-CONSTRUCTOR) ...)))
(define-syntax (MAKE-RULE-PARSER rule-id-stx)
(syntax-case rule-id-stx ()
[(_ start-rule)
(and (identifier? #'start-rule)
(member (syntax-e #'start-rule) 'RULE-IDS))
;; The cfg-parser depends on the start-rule provided in (start ...) to have the same
;; context as the rest of this body. Hence RECOLORED-START-RULE
(with-syntax ([RECOLORED-START-RULE (datum->syntax #'RULES-STX (syntax-e #'start-rule))])
#'(let ([THE-GRAMMAR (cfg-parser (tokens enumerated-tokens)
(src-pos)
(start RECOLORED-START-RULE)
(end EOF)
(error THE-ERROR-HANDLER)
(grammar . GENERATED-RULE-CODES))])
(procedure-rename
(case-lambda [(tokenizer)
(define next-token
(make-permissive-tokenizer tokenizer all-tokens-hash/mutable))
(THE-GRAMMAR next-token)]
[(source tokenizer)
(parameterize ([current-source source])
(PARSE tokenizer))])
(string->symbol (format "~a-rule-parser" 'start-rule)))))]
[(_ not-a-rule-id)
(raise-syntax-error #f
(format "Rule ~a is not defined in the grammar" (syntax-e #'not-a-rule-id))
rule-id-stx)]))
;; start-id has to be a value, not an expr, because make-rule-parser is a macro
(define PARSE (procedure-rename (MAKE-RULE-PARSER START-ID) 'PARSE))
(define (PARSE-TO-DATUM x)
(let loop ([x (syntax->datum (PARSE x))])
(cond
[(list? x) (map loop x)]
[else x])))
(define PARSE-TREE PARSE-TO-DATUM))))]))
;; body of module invokes `rules`
(provide rules)
(define-syntax (rules rules-stx) (rules-codegen rules-stx))

@ -11,12 +11,8 @@
;; rules->stx: (listof rule) -> syntax
(define (rules->stx source rules #:original-stx [original-stx #f])
(define rule-stxs
(map (lambda (stx) (rule->stx source stx))
rules))
(datum->syntax #f
`(rules ,@rule-stxs)
original-stx))
(datum->syntax #f (for/list ([stx (in-list rules)])
(rule->stx source stx)) original-stx))
(define (rule->stx source a-rule)

Loading…
Cancel
Save