red refaction

hide-top-rule-name
Matthew Butterick 7 years ago
parent b5c75eee64
commit 2ad5296567

@ -1,3 +0,0 @@
#lang racket/base
(require br-parser-tools/cfg-parser)
(provide cfg-parser)

@ -18,8 +18,8 @@
;; FIXME: abstract this so we can just call (rules ...) without
;; generating the whole module body.
(define (rules-codegen rules-stx
#:parser-provider-module [parser-provider-module 'br-parser-tools/yacc]
#:parser-provider-form [parser-provider-form 'parser])
#:parser-provider-module [parser-provider-module 'br-parser-tools/cfg-parser]
#:parser-provider-form [parser-provider-form 'cfg-parser])
(syntax-case rules-stx ()
[(_) (raise-syntax-error 'brag
(format "The grammar does not appear to have any rules")
@ -37,12 +37,12 @@
(define rule-ids (map rule-id rules))
(define token-types ;; (listof symbol)
(let-values ([(implicit-tokens ;; (listof identifier)
explicit-tokens) ;; (listof identifier)
(rules-collect-token-types rules)])
(let-values ([(implicit-tokens explicit-tokens) (rules-collect-token-types rules)])
(remove-duplicates (append (map string->symbol (map syntax-e implicit-tokens))
(map syntax-e explicit-tokens)) eq?)))
(define (rules-stx-id sym) (datum->syntax rules-stx sym))
(with-syntax ([START-ID (first rule-ids)] ; The first rule, by default, is the start rule.
[(TOKEN-TYPE ...) token-types]
[(TOKEN-TYPE-CONSTRUCTOR ...) (for/list ([tt (in-list token-types)])
@ -50,14 +50,14 @@
[GENERATED-GRAMMAR `(grammar ,@generated-rule-codes)]
[PARSER-MODULE parser-provider-module]
[PARSER-FORM parser-provider-form]
[PARSE (syntax-local-introduce (or (syntax-property rules-stx 'parse) (error 'no-parse-id-prop)))]
[PARSE-TO-DATUM (syntax-local-introduce (or (syntax-property rules-stx 'parse-to-datum) (error 'no-parse-to-datum-id-prop)))]
[PARSE-TREE (syntax-local-introduce (or (syntax-property rules-stx 'parse-tree) (error 'no-parse-tree-id-prop)))]
[MAKE-RULE-PARSER (syntax-local-introduce (or (syntax-property rules-stx 'make-rule-parser) (error 'no-make-rule-parser-id-prop)))]
[ALL-TOKEN-TYPES (syntax-local-introduce (or (syntax-property rules-stx 'all-token-types) (error 'no-all-token-types-id-prop)))]
[TOKEN (syntax-local-introduce (or (syntax-property rules-stx 'token) (error 'no-token-id-prop)))]
[APPLY-LEXER (syntax-local-introduce (or (syntax-property rules-stx 'apply-lexer) (error 'no-apply-lexer-id-prop)))]
[APPLY-TOKENIZER-MAKER (syntax-local-introduce (or (syntax-property rules-stx 'apply-tokenizer-maker) (error 'no-apply-tokenizer-maker-id-prop)))])
[PARSE (rules-stx-id 'parse)]
[PARSE-TO-DATUM (rules-stx-id 'parse-to-datum)]
[PARSE-TREE (rules-stx-id 'parse-tree)]
[MAKE-RULE-PARSER (rules-stx-id 'make-rule-parser)]
[ALL-TOKEN-TYPES (rules-stx-id 'all-token-types)]
[TOKEN (rules-stx-id 'token)]
[APPLY-LEXER (rules-stx-id 'apply-lexer)]
[APPLY-TOKENIZER-MAKER (rules-stx-id 'apply-tokenizer-maker)])
;; this stx object represents the top level of a #lang brag module.
;; so any `define`s are automatically available at the repl.
;; and only identifiers explicitly `provide`d are visible on import.

@ -50,18 +50,9 @@ brag/codegen/sexp-based-lang
(- last-position first-position)
#f))))))
;; Extension: we'd like to cooperate with DrRacket and tell
;; it to use the default, textual lexer and color scheme when
;; editing bf programs.
;;
;; See: http://docs.racket-lang.org/guide/language-get-info.html
;; for more details, as well as the documentation in
;; syntax/module-reader.
(define (my-get-info key default default-filter)
(case key
[(color-lexer) (dynamic-require 'brag/private/colorer 'color-brag (λ () #f))]
[(drracket:indentation) (dynamic-require 'brag/private/indenter 'indent-brag (λ () #f))]
[else
(default-filter key default)]))
[else (default-filter key default)]))

@ -1,107 +1,7 @@
#lang racket/base
(require (for-syntax racket/base "codegen.rkt"))
(provide (all-from-out racket/base)) ; borrow #%module-begin from racket/base
;; A language level for automatically generating parsers out of BNF grammars.
;;
;; Danny Yoo (dyoo@hashcollision.org)
;;
;; Intent: make it trivial to generate languages for Racket. At the
;; moment, I find it painful to use br-parser-tools. This library is
;; meant to make it less agonizing.
;;
;; The intended use of this language is as follows:
;;
;;;;; s-exp-grammar.rkt ;;;;;;;;;
;; #lang brag
;; s-exp : "(" s-exp* ")" | ATOM
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; What this generates is:
;;
;; * parse: a function that consumes a source and a
;; position-aware lexer, and produces a syntax object.
;;
;; * make-rule-parser: a custom parser given a provided start rule.
;;
;; You'll still need to do a little work, by providing a lexer that
;; defines what the uppercased tokens mean. For example, you can
;; use the br-parser-tools/lex lexer tools:
;;
;; (require brag/support
;; br-parser-tools/lex
;; br-parser-tools/lex-sre)
;;
;; (define tokenize
;; (lexer-src-pos
;; [(:+ alphabetic)
;; (token 'ATOM lexeme)]
;; [whitespace
;; (return-without-pos (tokenize/1 input-port))]
;; [(:or "(" ")")
;; (token lexeme lexeme)]))
;;
;; However, that should be all you need. The output of an
;; generated grammar is an honest-to-goodness syntax
;; object with source locations, fully-labeled by the rules.
;;
;; (parse (tokenize an-input-port))
;;
;;
;; The first rule is treated as the start rule; any successful parse
;; must finish with end-of-file.
;; Terminology:
;;
;; A rule is a rule identifier, followed by a colon ":", followed by a
;; pattern.
;; A rule identifier is an identifier that is not in upper case.
;; A rule identifier should follow the Racket rules for identifiers,
;; except that it can't contain * or +.
;;
;; A token is a rule identifier that is all in upper case.
;; A pattern may either be
;;
;; * an implicit sequence of patterns,
;;
;; * a literal string,
;;
;; * a rule identifier,
;;
;; * a quanitifed pattern, either with "*" or "+",
;;
;; * an optional pattern: a pattern surrounded by "[" and "]", or
;;
;; * a grouped sequence: a pattern surrounded by "(" and ")".
(require (for-syntax racket/base
"codegen.rkt"
syntax/strip-context))
(provide rules
(except-out (all-from-out racket/base) #%module-begin)
(rename-out [my-module-begin #%module-begin]))
(define-syntax (my-module-begin module-stx)
(syntax-case module-stx ()
[(_ RULES-STX)
(with-syntax ([RULES-STX (for/fold ([stx #'RULES-STX])
([sym (in-list '(parse parse-to-datum parse-tree make-rule-parser all-token-types token apply-lexer apply-tokenizer-maker))])
(syntax-property stx sym (syntax-local-introduce (replace-context module-stx (datum->syntax #f sym)))))])
#'(#%module-begin RULES-STX))]))
(define-syntax (rules rules-stx)
(rules-codegen #:parser-provider-module 'brag/cfg-parser/cfg-parser ;; 'br-parser-tools/yacc
#:parser-provider-form 'cfg-parser ;; 'parser
rules-stx))
;; body of module invokes `rules`
(provide rules)
(define-syntax (rules rules-stx) (rules-codegen rules-stx))

@ -5,12 +5,7 @@
;; These are just here to provide bindings for Check Syntax.
;; Otherwise, we should never hit these, as the toplevel rules-codegen
;; should eliminate all uses of these if it does the right thing.
(define (rules stx) (raise-syntax-error #f "Used out of context of rules" stx))
(define (rule stx) (raise-syntax-error #f "Used out of context of rules" stx))
(define (id stx) (raise-syntax-error #f "Used out of context of rules" stx))
(define (lit stx) (raise-syntax-error #f "Used out of context of rules" stx))
(define (token stx) (raise-syntax-error #f "Used out of context of rules" stx))
(define (choice stx) (raise-syntax-error #f "Used out of context of rules" stx))
(define (repeat stx) (raise-syntax-error #f "Used out of context of rules" stx))
(define (maybe stx) (raise-syntax-error #f "Used out of context of rules" stx))
(define (seq stx) (raise-syntax-error #f "Used out of context of rules" stx))
(define-syntax-rule (define-errors ID ...)
(begin (define (ID stx) (raise-syntax-error 'ID "Used out of context of rules" stx)) ...))
(define-errors rules rule id lit token choice repeat maybe seq)

@ -1,6 +1,5 @@
#lang racket/base
(require "test-0n1.rkt"
"test-0n1n.rkt"
"test-01-equal.rkt"

Loading…
Cancel
Save