diff --git a/beautiful-racket-demo/basic-demo-2/test-parser.rkt b/beautiful-racket-demo/basic-demo-2/test-parser.rkt index e6e19fe..766dc5c 100644 --- a/beautiful-racket-demo/basic-demo-2/test-parser.rkt +++ b/beautiful-racket-demo/basic-demo-2/test-parser.rkt @@ -8,4 +8,4 @@ here ) -(parse-tree (apply-tokenizer-maker make-tokenizer str)) \ No newline at end of file +(parse-to-datum (apply-tokenizer-maker make-tokenizer str)) \ No newline at end of file diff --git a/beautiful-racket-demo/basic-demo/test-parser.rkt b/beautiful-racket-demo/basic-demo/test-parser.rkt index e6e19fe..766dc5c 100644 --- a/beautiful-racket-demo/basic-demo/test-parser.rkt +++ b/beautiful-racket-demo/basic-demo/test-parser.rkt @@ -8,4 +8,4 @@ here ) -(parse-tree (apply-tokenizer-maker make-tokenizer str)) \ No newline at end of file +(parse-to-datum (apply-tokenizer-maker make-tokenizer str)) \ No newline at end of file diff --git a/beautiful-racket-demo/bf-demo/parser-tester.rkt b/beautiful-racket-demo/bf-demo/parser-tester.rkt index 37d77d9..b2ef591 100644 --- a/beautiful-racket-demo/bf-demo/parser-tester.rkt +++ b/beautiful-racket-demo/bf-demo/parser-tester.rkt @@ -1,3 +1,3 @@ #lang br (require "parser.rkt") -(parse-tree "++++-+++-++-++[>++++-+++-++-++<-]>.") \ No newline at end of file +(parse-to-datum "++++-+++-++-++[>++++-+++-++-++<-]>.") \ No newline at end of file diff --git a/beautiful-racket-demo/jsonic-demo-2/parser-test.rkt b/beautiful-racket-demo/jsonic-demo-2/parser-test.rkt index 794d965..4a5aa33 100644 --- a/beautiful-racket-demo/jsonic-demo-2/parser-test.rkt +++ b/beautiful-racket-demo/jsonic-demo-2/parser-test.rkt @@ -2,21 +2,21 @@ (require "parser.rkt" "tokenizer.rkt" brag/support rackunit) (check-equal? - (parse-tree + (parse-to-datum (apply-tokenizer-maker make-tokenizer "// line commment\n")) '(jsonic-program)) (check-equal? - (parse-tree + (parse-to-datum (apply-tokenizer-maker make-tokenizer "@$ 42 $@")) '(jsonic-program (jsonic-sexp " 42 "))) (check-equal? - (parse-tree + (parse-to-datum (apply-tokenizer-maker make-tokenizer "hi")) '(jsonic-program (jsonic-char "h") (jsonic-char "i"))) (check-equal? - (parse-tree + (parse-to-datum (apply-tokenizer-maker make-tokenizer "hi\n// comment\n@$ 42 $@")) '(jsonic-program diff --git a/beautiful-racket-demo/jsonic-demo-3/parser-test.rkt b/beautiful-racket-demo/jsonic-demo-3/parser-test.rkt index 794d965..4a5aa33 100644 --- a/beautiful-racket-demo/jsonic-demo-3/parser-test.rkt +++ b/beautiful-racket-demo/jsonic-demo-3/parser-test.rkt @@ -2,21 +2,21 @@ (require "parser.rkt" "tokenizer.rkt" brag/support rackunit) (check-equal? - (parse-tree + (parse-to-datum (apply-tokenizer-maker make-tokenizer "// line commment\n")) '(jsonic-program)) (check-equal? - (parse-tree + (parse-to-datum (apply-tokenizer-maker make-tokenizer "@$ 42 $@")) '(jsonic-program (jsonic-sexp " 42 "))) (check-equal? - (parse-tree + (parse-to-datum (apply-tokenizer-maker make-tokenizer "hi")) '(jsonic-program (jsonic-char "h") (jsonic-char "i"))) (check-equal? - (parse-tree + (parse-to-datum (apply-tokenizer-maker make-tokenizer "hi\n// comment\n@$ 42 $@")) '(jsonic-program diff --git a/brag/brag/brag.scrbl b/brag/brag/brag.scrbl index 523feb6..9e5f665 100755 --- a/brag/brag/brag.scrbl +++ b/brag/brag/brag.scrbl @@ -35,8 +35,8 @@ @(define my-eval (make-base-eval)) @(my-eval '(require brag/examples/nested-word-list - racket/list - racket/match)) + racket/list + racket/match)) Suppose we're given the following string: @@ -51,19 +51,19 @@ string above looks like a nested list of words. Good start. Second, how might we describe this formally — meaning, in a way that a computer could understand? A common notation to describe the structure of these things is @link["http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form"]{Backus-Naur Form} (BNF). So let's try to notate the structure of nested word lists in BNF. @nested[#:style 'code-inset]{ -@verbatim{ -nested-word-list: WORD - | LEFT-PAREN nested-word-list* RIGHT-PAREN + @verbatim{ + nested-word-list: WORD + | LEFT-PAREN nested-word-list* RIGHT-PAREN }} What we intend by this notation is this: @racket[nested-word-list] is either a @racket[WORD], or a parenthesized list of @racket[nested-word-list]s. We use the character @litchar{*} to represent zero or more repetitions of the previous thing. We treat the uppercased @racket[LEFT-PAREN], @racket[RIGHT-PAREN], and @racket[WORD] as placeholders for @emph{tokens} (a @tech{token} being the smallest meaningful item in the parsed string): Here are a few examples of tokens: @interaction[#:eval my-eval -(require brag/support) -(token 'LEFT-PAREN) -(token 'WORD "crunchy" #:span 7) -(token 'RIGHT-PAREN)] + (require brag/support) + (token 'LEFT-PAREN) + (token 'WORD "crunchy" #:span 7) + (token 'RIGHT-PAREN)] This BNF description is also known as a @deftech{grammar}. Just as it does in a natural language like English or French, a grammar describes something in terms of what elements can fit where. @@ -72,18 +72,18 @@ Have we made progress? We have a valid grammar. But we're still missing a @emph Meanwhile, it's clear that we don't yet have a valid program because there's no @litchar{#lang} line. Let's add one: put @litchar{#lang brag} at the top of the grammar, and save it as a file called @filepath{nested-word-list.rkt}. @filebox["nested-word-list.rkt"]{ -@verbatim{ -#lang brag -nested-word-list: WORD - | LEFT-PAREN nested-word-list* RIGHT-PAREN + @verbatim{ + #lang brag + nested-word-list: WORD + | LEFT-PAREN nested-word-list* RIGHT-PAREN }} Now it's a proper program. But what does it do? @interaction[#:eval my-eval -@eval:alts[(require "nested-word-list.rkt") (void)] -parse -] + @eval:alts[(require "nested-word-list.rkt") (void)] + parse + ] It gives us a @racket[parse] function. Let's investigate what @racket[parse] does. What happens if we pass it a sequence of tokens? @@ -101,31 +101,31 @@ does. What happens if we pass it a sequence of tokens? Those who have messed around with macros will recognize this as a @seclink["stx-obj" #:doc '(lib "scribblings/guide/guide.scrbl")]{syntax object}. @interaction[#:eval my-eval -(syntax->datum a-parsed-value) -] + (syntax->datum a-parsed-value) + ] That's @racket[(some [pig])], essentially. What happens if we pass our @racket[parse] function a bigger source of tokens? @interaction[#:eval my-eval -@code:comment{tokenize: string -> (sequenceof token-struct?)} -@code:comment{Generate tokens from a string:} -(define (tokenize s) - (for/list ([str (regexp-match* #px"\\(|\\)|\\w+" s)]) - (match str - ["(" - (token 'LEFT-PAREN str)] - [")" - (token 'RIGHT-PAREN str)] - [else - (token 'WORD str)]))) - -@code:comment{For example:} -(define token-source (tokenize "(welcome (to (((brag)) ())))")) -(define v (parse token-source)) -(syntax->datum v) -] + @code:comment{tokenize: string -> (sequenceof token-struct?)} + @code:comment{Generate tokens from a string:} + (define (tokenize s) + (for/list ([str (regexp-match* #px"\\(|\\)|\\w+" s)]) + (match str + ["(" + (token 'LEFT-PAREN str)] + [")" + (token 'RIGHT-PAREN str)] + [else + (token 'WORD str)]))) + + @code:comment{For example:} + (define token-source (tokenize "(welcome (to (((brag)) ())))")) + (define v (parse token-source)) + (syntax->datum v) + ] Welcome to @tt{brag}. @@ -142,25 +142,25 @@ to use: @itemize[ -@item{It provides a @litchar{#lang} for writing BNF grammars. -A module written in @litchar{#lang brag} automatically generates a -parser. The grammar controls the structure of the @tech{syntax objects} it generates.} + @item{It provides a @litchar{#lang} for writing BNF grammars. + A module written in @litchar{#lang brag} automatically generates a + parser. The grammar controls the structure of the @tech{syntax objects} it generates.} -@item{The language uses a few conventions to simplify the expression of -grammars. The first rule in the grammar is assumed to be the -starting production. Identifiers in @tt{UPPERCASE} are treated as -terminal tokens. All other identifiers are treated as nonterminals.} + @item{The language uses a few conventions to simplify the expression of + grammars. The first rule in the grammar is assumed to be the + starting production. Identifiers in @tt{UPPERCASE} are treated as + terminal tokens. All other identifiers are treated as nonterminals.} -@item{Tokenizers can be developed independently of parsers. -@tt{brag} takes a liberal view on tokens: they can be strings, -symbols, or instances constructed with @racket[token]. Tokens can optionally provide source location, in which case a syntax object generated by the parser will too.} + @item{Tokenizers can be developed independently of parsers. + @tt{brag} takes a liberal view on tokens: they can be strings, + symbols, or instances constructed with @racket[token]. Tokens can optionally provide source location, in which case a syntax object generated by the parser will too.} -@item{The parser can usually handle ambiguous grammars.} + @item{The parser can usually handle ambiguous grammars.} -@item{It integrates with the rest of the Racket -@link["http://docs.racket-lang.org/guide/languages.html"]{language toolchain}.} + @item{It integrates with the rest of the Racket + @link["http://docs.racket-lang.org/guide/languages.html"]{language toolchain}.} -] + ] @@ -170,29 +170,29 @@ Suppose we'd like to define a language for drawing simple ASCII diagrams. So if we write something like this: @nested[#:style 'inset]{ -@verbatim|{ -3 9 X; -6 3 b 3 X 3 b; -3 9 X; -}|} + @verbatim|{ + 3 9 X; + 6 3 b 3 X 3 b; + 3 9 X; + }|} It should generate the following picture: @nested[#:style 'inset]{ -@verbatim|{ -XXXXXXXXX -XXXXXXXXX -XXXXXXXXX - XXX - XXX - XXX - XXX - XXX - XXX -XXXXXXXXX -XXXXXXXXX -XXXXXXXXX -}|} + @verbatim|{ + XXXXXXXXX + XXXXXXXXX + XXXXXXXXX + XXX + XXX + XXX + XXX + XXX + XXX + XXXXXXXXX + XXXXXXXXX + XXXXXXXXX + }|} This makes sense in a casual way. But let's be more precise about how the language works. @@ -200,12 +200,12 @@ This makes sense in a casual way. But let's be more precise about how the langua Each line of the program has a semicolon at the end, and describes the output of several @emph{rows} of the line drawing. Let's look at two of the lines in the example: @itemize[ -@item{@litchar{3 9 X;}: ``Repeat the following 3 times: print @racket["X"] nine times, followed by -a newline.''} + @item{@litchar{3 9 X;}: ``Repeat the following 3 times: print @racket["X"] nine times, followed by + a newline.''} -@item{@litchar{6 3 b 3 X 3 b;}: ``Repeat the following 6 times: print @racket[" "] three times, -followed by @racket["X"] three times, followed by @racket[" "] three times, followed by a newline.''} -] + @item{@litchar{6 3 b 3 X 3 b;}: ``Repeat the following 6 times: print @racket[" "] three times, + followed by @racket["X"] three times, followed by @racket[" "] three times, followed by a newline.''} + ] Then each line consists of a @emph{repeat} number, followed by pairs of (number, character) @emph{chunks}. We'll assume here that the intent of the lowercased character @litchar{b} is to represent the printing of a 1-character whitespace @racket[" "], and for other uppercase letters to represent the printing of themselves. @@ -217,40 +217,40 @@ Here's a first pass at expressing the structure of these line-drawing programs. @subsection{Parsing the concrete syntax} @filebox["simple-line-drawing.rkt"]{ -@verbatim|{ -#lang brag -drawing: rows* -rows: repeat chunk+ ";" -repeat: INTEGER -chunk: INTEGER STRING -}| + @verbatim|{ + #lang brag + drawing: rows* + rows: repeat chunk+ ";" + repeat: INTEGER + chunk: INTEGER STRING + }| } @margin-note{@secref{brag-syntax} describes @tt{brag}'s syntax in more detail.} We write a @tt{brag} program as an BNF grammar, where patterns can be: @itemize[ -@item{the names of other rules (e.g. @racket[chunk])} -@item{literal and symbolic token names (e.g. @racket[";"], @racket[INTEGER])} -@item{quantified patterns (e.g. @litchar{+} to represent one-or-more repetitions)} -] + @item{the names of other rules (e.g. @racket[chunk])} + @item{literal and symbolic token names (e.g. @racket[";"], @racket[INTEGER])} + @item{quantified patterns (e.g. @litchar{+} to represent one-or-more repetitions)} + ] The result of a @tt{brag} program is a module with a @racket[parse] function that can parse tokens and produce a syntax object as a result. Let's try this function: @interaction[#:eval my-eval -(require brag/support) -@eval:alts[(require "simple-line-drawing.rkt") - (require brag/examples/simple-line-drawing)] -(define stx - (parse (list (token 'INTEGER 6) - (token 'INTEGER 2) - (token 'STRING " ") - (token 'INTEGER 3) - (token 'STRING "X") - ";"))) -(syntax->datum stx) -] + (require brag/support) + @eval:alts[(require "simple-line-drawing.rkt") + (require brag/examples/simple-line-drawing)] + (define stx + (parse (list (token 'INTEGER 6) + (token 'INTEGER 2) + (token 'STRING " ") + (token 'INTEGER 3) + (token 'STRING "X") + ";"))) + (syntax->datum stx) + ] A @emph{token} is the smallest meaningful element of a source program. Tokens can be strings, symbols, or instances of the @racket[token] data structure. (Plus a few other special cases, which we'll discuss later.) Usually, a token holds a single character from the source program. But sometimes it makes sense to package a sequence of characters into a single token, if the sequence has an indivisible meaning. @@ -260,53 +260,53 @@ A parser often works in conjunction with a helper function called a @emph{lexer} tokenizer: @interaction[#:eval my-eval -(require br-parser-tools/lex) -(define (tokenize ip) - (port-count-lines! ip) - (define my-lexer - (lexer-src-pos - [(repetition 1 +inf.0 numeric) - (token 'INTEGER (string->number lexeme))] - [upper-case - (token 'STRING lexeme)] - ["b" - (token 'STRING " ")] - [";" - (token ";" lexeme)] - [whitespace - (token 'WHITESPACE lexeme #:skip? #t)] - [(eof) - (void)])) - (define (next-token) (my-lexer ip)) - next-token) - -(define a-sample-input-port (open-input-string "6 2 b 3 X;")) -(define token-thunk (tokenize a-sample-input-port)) -@code:comment{Now we can pass token-thunk to the parser:} -(define another-stx (parse token-thunk)) -(syntax->datum another-stx) -@code:comment{The syntax object has location information:} -(syntax-line another-stx) -(syntax-column another-stx) -(syntax-span another-stx) -] + (require br-parser-tools/lex) + (define (tokenize ip) + (port-count-lines! ip) + (define my-lexer + (lexer-src-pos + [(repetition 1 +inf.0 numeric) + (token 'INTEGER (string->number lexeme))] + [upper-case + (token 'STRING lexeme)] + ["b" + (token 'STRING " ")] + [";" + (token ";" lexeme)] + [whitespace + (token 'WHITESPACE lexeme #:skip? #t)] + [(eof) + (void)])) + (define (next-token) (my-lexer ip)) + next-token) + + (define a-sample-input-port (open-input-string "6 2 b 3 X;")) + (define token-thunk (tokenize a-sample-input-port)) + @code:comment{Now we can pass token-thunk to the parser:} + (define another-stx (parse token-thunk)) + (syntax->datum another-stx) + @code:comment{The syntax object has location information:} + (syntax-line another-stx) + (syntax-column another-stx) + (syntax-span another-stx) + ] Note also from this lexer example: @itemize[ -@item{@racket[parse] accepts as input either a sequence of tokens, or a -function that produces tokens (which @racket[parse] will call repeatedly to get the next token).} + @item{@racket[parse] accepts as input either a sequence of tokens, or a + function that produces tokens (which @racket[parse] will call repeatedly to get the next token).} -@item{As an alternative to the basic @racket[token] structure, a token can also be an instance of the @racket[position-token] structure (also found in @racketmodname[br-parser-tools/lex]). In that case, the token will try to derive its position from that of the position-token.} + @item{As an alternative to the basic @racket[token] structure, a token can also be an instance of the @racket[position-token] structure (also found in @racketmodname[br-parser-tools/lex]). In that case, the token will try to derive its position from that of the position-token.} -@item{@racket[parse] will stop if it gets @racket[void] (or @racket['eof]) as a token.} + @item{@racket[parse] will stop if it gets @racket[void] (or @racket['eof]) as a token.} -@item{@racket[parse] will skip any token that has -@racket[#:skip?] attribute set to @racket[#t]. For instance, tokens representing comments often use @racket[#:skip?].} + @item{@racket[parse] will skip any token that has + @racket[#:skip?] attribute set to @racket[#t]. For instance, tokens representing comments often use @racket[#:skip?].} -] + ] @subsection{From parsing to interpretation} @@ -315,19 +315,19 @@ We now have a parser for programs written in this simple-line-drawing language. Our parser will return syntax objects: @interaction[#:eval my-eval -(define parsed-program - (parse (tokenize (open-input-string "3 9 X; 6 3 b 3 X 3 b; 3 9 X;")))) -(syntax->datum parsed-program) -] + (define parsed-program + (parse (tokenize (open-input-string "3 9 X; 6 3 b 3 X 3 b; 3 9 X;")))) + (syntax->datum parsed-program) + ] Better still, these syntax objects will have a predictable structure that follows the grammar: @racketblock[ - (drawing (rows (repeat ) - (chunk ) ... ";") - ...) -] + (drawing (rows (repeat ) + (chunk ) ... ";") + ...) + ] where @racket[drawing], @racket[rows], @racket[repeat], and @racket[chunk] should be treated literally, and everything else will be numbers or strings. @@ -338,8 +338,8 @@ interpret them, and make them @emph{print}? We claimed at the beginning of this section that these syntax objects should be easy to interpret. So let's do it. @margin-note{This is a very quick-and-dirty treatment of @racket[syntax-parse]. -See the @racketmodname[syntax/parse] documentation for a gentler guide to its -features.} Racket provides a special form called @racket[syntax-parse] in the + See the @racketmodname[syntax/parse] documentation for a gentler guide to its + features.} Racket provides a special form called @racket[syntax-parse] in the @racketmodname[syntax/parse] library. @racket[syntax-parse] lets us do a structural case-analysis on syntax objects: we provide it a set of patterns to parse and actions to perform when those patterns match. @@ -349,18 +349,18 @@ As a simple example, we can write a function that looks at a syntax object and says @racket[#t] if it's the literal @racket[yes], and @racket[#f] otherwise: @interaction[#:eval my-eval -(require syntax/parse) -@code:comment{yes-syntax-object?: syntax-object -> boolean} -@code:comment{Returns true if the syntax-object is yes.} -(define (yes-syntax-object? stx) - (syntax-parse stx - [(~literal yes) - #t] - [else - #f])) -(yes-syntax-object? #'yes) -(yes-syntax-object? #'nooooooooooo) -] + (require syntax/parse) + @code:comment{yes-syntax-object?: syntax-object -> boolean} + @code:comment{Returns true if the syntax-object is yes.} + (define (yes-syntax-object? stx) + (syntax-parse stx + [(~literal yes) + #t] + [else + #f])) + (yes-syntax-object? #'yes) + (yes-syntax-object? #'nooooooooooo) + ] Here, we use @racket[~literal] to let @racket[syntax-parse] know that @racket[yes] should show up literally in the syntax object. The patterns can @@ -374,17 +374,17 @@ Now that we know a little bit more about @racket[syntax-parse], we can use it to do a case analysis on the syntax objects that our @racket[parse] function gives us. We start by defining a function on syntax objects of the form @racket[(drawing -rows-stx ...)]. + rows-stx ...)]. @interaction[#:eval my-eval -(define (interpret-drawing drawing-stx) - (syntax-parse drawing-stx - [({~literal drawing} rows-stxs ...) + (define (interpret-drawing drawing-stx) + (syntax-parse drawing-stx + [({~literal drawing} rows-stxs ...) - (for ([rows-stx (syntax->list #'(rows-stxs ...))]) - (interpret-rows rows-stx))]))] + (for ([rows-stx (syntax->list #'(rows-stxs ...))]) + (interpret-rows rows-stx))]))] When we encounter a syntax object with @racket[(drawing rows-stx -...)], then @racket[interpret-rows] each @racket[rows-stx]. + ...)], then @racket[interpret-rows] each @racket[rows-stx]. @;The pattern we @;express in @racket[syntax-parse] above marks what things should be treated @@ -395,16 +395,16 @@ When we encounter a syntax object with @racket[(drawing rows-stx Let's define @racket[interpret-rows] now: @interaction[#:eval my-eval -(define (interpret-rows rows-stx) - (syntax-parse rows-stx - [({~literal rows} - ({~literal repeat} repeat-number) - chunks ... ";") + (define (interpret-rows rows-stx) + (syntax-parse rows-stx + [({~literal rows} + ({~literal repeat} repeat-number) + chunks ... ";") - (for ([i (syntax-e #'repeat-number)]) - (for ([chunk-stx (syntax->list #'(chunks ...))]) - (interpret-chunk chunk-stx)) - (newline))]))] + (for ([i (syntax-e #'repeat-number)]) + (for ([chunk-stx (syntax->list #'(chunks ...))]) + (interpret-chunk chunk-stx)) + (newline))]))] For a @racket[rows], we extract out the @racket[repeat-number] out of the syntax object and use it as the range of the @racket[for] loop. The inner loop @@ -416,30 +416,30 @@ it to extract out the @racket[chunk-size] and @racket[chunk-string] portions, and print to standard output: @interaction[#:eval my-eval -(define (interpret-chunk chunk-stx) - (syntax-parse chunk-stx - [({~literal chunk} chunk-size chunk-string) + (define (interpret-chunk chunk-stx) + (syntax-parse chunk-stx + [({~literal chunk} chunk-size chunk-string) - (for ([k (syntax-e #'chunk-size)]) - (display (syntax-e #'chunk-string)))])) -] + (for ([k (syntax-e #'chunk-size)]) + (display (syntax-e #'chunk-string)))])) + ] @margin-note{Here are the definitions in a single file: -@link["examples/simple-line-drawing/interpret.rkt"]{interpret.rkt}.} + @link["examples/simple-line-drawing/interpret.rkt"]{interpret.rkt}.} With these definitions in hand, now we can pass it syntax objects that we construct directly by hand: @interaction[#:eval my-eval -(interpret-chunk #'(chunk 3 "X")) -(interpret-drawing #'(drawing (rows (repeat 5) (chunk 3 "X") ";"))) -] + (interpret-chunk #'(chunk 3 "X")) + (interpret-drawing #'(drawing (rows (repeat 5) (chunk 3 "X") ";"))) + ] or we can pass it the result generated by our parser: @interaction[#:eval my-eval -(define parsed-program - (parse (tokenize (open-input-string "3 9 X; 6 3 b 3 X 3 b; 3 9 X;")))) -(interpret-drawing parsed-program)] + (define parsed-program + (parse (tokenize (open-input-string "3 9 X; 6 3 b 3 X 3 b; 3 9 X;")))) + (interpret-drawing parsed-program)] And now we've got an interpreter! @@ -447,7 +447,7 @@ And now we've got an interpreter! @subsection{From interpretation to compilation} @margin-note{For a gentler tutorial on writing @litchar{#lang}-based languages, see -@link["http://beautifulracket.com"]{Beautiful Racket}.} (Just as a + @link["http://beautifulracket.com"]{Beautiful Racket}.} (Just as a warning: the following material is slightly more advanced, but shows how writing a compiler for the line-drawing language reuses the ideas for the interpreter.) @@ -455,40 +455,40 @@ interpreter.) Wouldn't it be nice to be able to write something like: @nested[#:style 'inset]{ -@verbatim|{ -3 9 X; -6 3 b 3 X 3 b; -3 9 X; -}|} + @verbatim|{ + 3 9 X; + 6 3 b 3 X 3 b; + 3 9 X; + }|} and have Racket automatically compile this down to something like this? @racketblock[ -(for ([i 3]) - (for ([k 9]) (displayln "X")) - (newline)) + (for ([i 3]) + (for ([k 9]) (displayln "X")) + (newline)) -(for ([i 6]) - (for ([k 3]) (displayln " ")) - (for ([k 3]) (displayln "X")) - (for ([k 3]) (displayln " ")) - (newline)) + (for ([i 6]) + (for ([k 3]) (displayln " ")) + (for ([k 3]) (displayln "X")) + (for ([k 3]) (displayln " ")) + (newline)) -(for ([i 3]) - (for ([k 9]) (displayln "X")) - (newline)) -] + (for ([i 3]) + (for ([k 9]) (displayln "X")) + (newline)) + ] Well, of course it won't work: we don't have a @litchar{#lang} line. Let's add one. @filebox["letter-i.rkt"]{ -@verbatim|{ -#lang brag/examples/simple-line-drawing -3 9 X; -6 3 b 3 X 3 b; -3 9 X; -}| + @verbatim|{ + #lang brag/examples/simple-line-drawing + 3 9 X; + 6 3 b 3 X 3 b; + 3 9 X; + }| } Now @filepath{letter-i.rkt} is a program. @@ -500,13 +500,13 @@ how to compile programs labeled with this @litchar{#lang} line. We'll do two things: @itemize[ -@item{Tell Racket to use the @tt{brag}-generated parser and lexer we defined -earlier whenever it sees a program written with -@litchar{#lang brag/examples/simple-line-drawing}.} + @item{Tell Racket to use the @tt{brag}-generated parser and lexer we defined + earlier whenever it sees a program written with + @litchar{#lang brag/examples/simple-line-drawing}.} -@item{Define transformation rules for @racket[drawing], @racket[rows], and - @racket[chunk] to rewrite these into standard Racket forms.} -] + @item{Define transformation rules for @racket[drawing], @racket[rows], and + @racket[chunk] to rewrite these into standard Racket forms.} + ] The second part, the writing of the transformation rules, will look very similar to the definitions we wrote for the interpreter, but the transformation @@ -517,7 +517,7 @@ compilation is also viable.) We do the first part by defining a @emph{module reader}: a @link["http://docs.racket-lang.org/guide/syntax_module-reader.html"]{module -reader} tells Racket how to parse and compile a file. Whenever Racket sees a + reader} tells Racket how to parse and compile a file. Whenever Racket sees a @litchar{#lang }, it looks for a corresponding module reader in @filepath{/lang/reader}. @@ -525,22 +525,22 @@ Here's the definition for @filepath{brag/examples/simple-line-drawing/lang/reader.rkt}: @filebox["brag/examples/simple-line-drawing/lang/reader.rkt"]{ -@codeblock|{ -#lang s-exp syntax/module-reader -brag/examples/simple-line-drawing/semantics -#:read my-read -#:read-syntax my-read-syntax -#:whole-body-readers? #t + @codeblock|{ + #lang s-exp syntax/module-reader + brag/examples/simple-line-drawing/semantics + #:read my-read + #:read-syntax my-read-syntax + #:whole-body-readers? #t -(require brag/examples/simple-line-drawing/lexer - brag/examples/simple-line-drawing/grammar) + (require brag/examples/simple-line-drawing/lexer + brag/examples/simple-line-drawing/grammar) -(define (my-read in) + (define (my-read in) (syntax->datum (my-read-syntax #f in))) -(define (my-read-syntax src ip) + (define (my-read-syntax src ip) (list (parse src (tokenize ip)))) -}| + }| } We use a helper module @racketmodname[syntax/module-reader], which provides @@ -551,56 +551,56 @@ object using a module called @filepath{semantics.rkt}. Let's look into @filepath{semantics.rkt} and see what's involved in compilation: @filebox["brag/examples/simple-line-drawing/semantics.rkt"]{ -@codeblock|{ -#lang racket/base -(require (for-syntax racket/base syntax/parse)) - -(provide #%module-begin - ;; We reuse Racket's treatment of raw datums, specifically - ;; for strings and numbers: - #%datum + @codeblock|{ + #lang racket/base + (require (for-syntax racket/base syntax/parse)) + + (provide #%module-begin + ;; We reuse Racket's treatment of raw datums, specifically + ;; for strings and numbers: + #%datum - ;; And otherwise, we provide definitions of these three forms. - ;; During compiliation, Racket uses these definitions to - ;; rewrite into for loops, displays, and newlines. - drawing rows chunk) + ;; And otherwise, we provide definitions of these three forms. + ;; During compiliation, Racket uses these definitions to + ;; rewrite into for loops, displays, and newlines. + drawing rows chunk) -;; Define a few compile-time functions to do the syntax rewriting: -(begin-for-syntax + ;; Define a few compile-time functions to do the syntax rewriting: + (begin-for-syntax (define (compile-drawing drawing-stx) - (syntax-parse drawing-stx - [({~literal drawing} rows-stxs ...) + (syntax-parse drawing-stx + [({~literal drawing} rows-stxs ...) - (syntax/loc drawing-stx - (begin rows-stxs ...))])) + (syntax/loc drawing-stx + (begin rows-stxs ...))])) (define (compile-rows rows-stx) - (syntax-parse rows-stx - [({~literal rows} - ({~literal repeat} repeat-number) - chunks ... - ";") + (syntax-parse rows-stx + [({~literal rows} + ({~literal repeat} repeat-number) + chunks ... + ";") - (syntax/loc rows-stx - (for ([i repeat-number]) - chunks ... - (newline)))])) + (syntax/loc rows-stx + (for ([i repeat-number]) + chunks ... + (newline)))])) (define (compile-chunk chunk-stx) - (syntax-parse chunk-stx - [({~literal chunk} chunk-size chunk-string) + (syntax-parse chunk-stx + [({~literal chunk} chunk-size chunk-string) - (syntax/loc chunk-stx - (for ([k chunk-size]) - (display chunk-string)))]))) + (syntax/loc chunk-stx + (for ([k chunk-size]) + (display chunk-string)))]))) -;; Wire up the use of "drawing", "rows", and "chunk" to these -;; transformers: -(define-syntax drawing compile-drawing) -(define-syntax rows compile-rows) -(define-syntax chunk compile-chunk) -}| + ;; Wire up the use of "drawing", "rows", and "chunk" to these + ;; transformers: + (define-syntax drawing compile-drawing) + (define-syntax rows compile-rows) + (define-syntax chunk compile-chunk) + }| } The semantics hold definitions for @racket[compile-drawing], @@ -614,32 +614,32 @@ work. There are a few things to note: @margin-note{By the way, we can just as easily rewrite the semantics so that -@racket[compile-rows] does explicitly call @racket[compile-chunk]. Often, -though, it's easier to write the transformation functions in this piecemeal way -and depend on the Racket macro expansion system to do the rewriting as it -encounters each of the forms.} + @racket[compile-rows] does explicitly call @racket[compile-chunk]. Often, + though, it's easier to write the transformation functions in this piecemeal way + and depend on the Racket macro expansion system to do the rewriting as it + encounters each of the forms.} @itemize[ -@item{@tt{brag}'s native data structure is the syntax object because the -majority of Racket's language-processing infrastructure knows how to read and -write this structured value.} + @item{@tt{brag}'s native data structure is the syntax object because the + majority of Racket's language-processing infrastructure knows how to read and + write this structured value.} -@item{Unlike in interpretation, @racket[compile-rows] doesn't -compile each chunk by directly calling @racket[compile-chunk]. Rather, it -depends on the Racket macro expander to call each @racket[compile-XXX] function -as it encounters a @racket[drawing], @racket[rows], or @racket[chunk] in the -parsed value. The three statements at the bottom of @filepath{semantics.rkt} inform -the macro expansion system to do this: + @item{Unlike in interpretation, @racket[compile-rows] doesn't + compile each chunk by directly calling @racket[compile-chunk]. Rather, it + depends on the Racket macro expander to call each @racket[compile-XXX] function + as it encounters a @racket[drawing], @racket[rows], or @racket[chunk] in the + parsed value. The three statements at the bottom of @filepath{semantics.rkt} inform + the macro expansion system to do this: -@racketblock[ -(define-syntax drawing compile-drawing) -(define-syntax rows compile-rows) -(define-syntax chunk compile-chunk) -]} -] + @racketblock[ + (define-syntax drawing compile-drawing) + (define-syntax rows compile-rows) + (define-syntax chunk compile-chunk) + ]} + ] Altogether, @tt{brag}'s intent is to be a parser generator for Racket @@ -676,18 +676,18 @@ quantification. A @deftech{pattern} is one of the following: @itemize[ -@item{an implicit sequence of @tech{pattern}s separated by whitespace} -@item{a terminal: either a literal string or a @tech{symbolic token identifier}. + @item{an implicit sequence of @tech{pattern}s separated by whitespace} + @item{a terminal: either a literal string or a @tech{symbolic token identifier}. -When used in a pattern, both these terminals will match the same set of inputs. A literal string can match the string itself, or a @racket[token] whose type field contains that string (or its symbol form). So @racket["FOO"] would match @racket["FOO"], @racket[(token "FOO" "bar")], or @racket[(token 'FOO "bar")]. A symbolic token identifier can also match the string version of the identifier, or a @racket[token] whose type field is the symbol or string form of the identifier. So @racket[FOO] would also match @racket["FOO"], @racket[(token 'FOO "bar")], or @racket[(token "FOO" "bar")]. (In every case, the value of a token, like @racket["bar"], can be anything, and may or may not be the same as its type.) + When used in a pattern, both these terminals will match the same set of inputs. A literal string can match the string itself, or a @racket[token] whose type field contains that string (or its symbol form). So @racket["FOO"] would match @racket["FOO"], @racket[(token "FOO" "bar")], or @racket[(token 'FOO "bar")]. A symbolic token identifier can also match the string version of the identifier, or a @racket[token] whose type field is the symbol or string form of the identifier. So @racket[FOO] would also match @racket["FOO"], @racket[(token 'FOO "bar")], or @racket[(token "FOO" "bar")]. (In every case, the value of a token, like @racket["bar"], can be anything, and may or may not be the same as its type.) -Because their underlying meanings are the same, the symbolic token identifier ends up being a notational convenience for readability inside a grammar pattern. Typically, the literal string @racket["FOO"] is used to connote ``match the string @racket["FOO"] exactly'' and the symbolic token identifier @racket[FOO] specially connotes ``match any token of type @racket['FOO]''.} + Because their underlying meanings are the same, the symbolic token identifier ends up being a notational convenience for readability inside a grammar pattern. Typically, the literal string @racket["FOO"] is used to connote ``match the string @racket["FOO"] exactly'' and the symbolic token identifier @racket[FOO] specially connotes ``match any token of type @racket['FOO]''.} -@item{a @tech{rule identifier}} -@item{a @deftech{choice pattern}: a sequence of @tech{pattern}s delimited with @litchar{|} characters.} -@item{a @deftech{quantifed pattern}: a @tech{pattern} followed by either @litchar{*} (``zero or more'') or @litchar{+} (``one or more'')} -@item{an @deftech{optional pattern}: a @tech{pattern} surrounded by @litchar{[} and @litchar{]}} -@item{an explicit sequence: a @tech{pattern} surrounded by @litchar{(} and @litchar{)}}] + @item{a @tech{rule identifier}} + @item{a @deftech{choice pattern}: a sequence of @tech{pattern}s delimited with @litchar{|} characters.} + @item{a @deftech{quantifed pattern}: a @tech{pattern} followed by either @litchar{*} (``zero or more'') or @litchar{+} (``one or more'')} + @item{an @deftech{optional pattern}: a @tech{pattern} surrounded by @litchar{[} and @litchar{]}} + @item{an explicit sequence: a @tech{pattern} surrounded by @litchar{(} and @litchar{)}}] A @deftech{line comment} begins with either @litchar{#} or @litchar{;} and continues till the end of the line. @@ -695,19 +695,19 @@ continues till the end of the line. For example, in the following program: @nested[#:style 'inset -@verbatim|{ -#lang brag -;; A parser for a silly language -sentence: verb optional-adjective object -verb: greeting -optional-adjective: ["happy" | "frumpy"] -greeting: "hello" | "hola" | "aloha" -object: "world" | WORLD -}|] + @verbatim|{ + #lang brag + ;; A parser for a silly language + sentence: verb optional-adjective object + verb: greeting + optional-adjective: ["happy" | "frumpy"] + greeting: "hello" | "hola" | "aloha" + object: "world" | WORLD + }|] the elements @tt{sentence}, @tt{verb}, @tt{greeting}, and @tt{object} are rule identifiers. The first rule, @litchar{sentence: verb optional-adjective -object}, is a rule whose right side is an implicit pattern sequence of three + object}, is a rule whose right side is an implicit pattern sequence of three sub-patterns. The uppercased @tt{WORLD} is a symbolic token identifier. The fourth rule in the program associates @tt{greeting} with a @tech{choice pattern}. @@ -715,31 +715,31 @@ sub-patterns. The uppercased @tt{WORLD} is a symbolic token identifier. The four More examples: @itemize[ -@item{A -BNF for binary -strings that contain an equal number of zeros and ones. -@verbatim|{ -#lang brag -equal: [zero one | one zero] ;; equal number of "0"s and "1"s. -zero: "0" equal | equal "0" ;; has an extra "0" in it. -one: "1" equal | equal "1" ;; has an extra "1" in it. -}| -} - -@item{A BNF for -@link["http://www.json.org/"]{JSON}-like structures. -@verbatim|{ -#lang brag -json: number | string + @item{A + BNF for binary + strings that contain an equal number of zeros and ones. + @verbatim|{ + #lang brag + equal: [zero one | one zero] ;; equal number of "0"s and "1"s. + zero: "0" equal | equal "0" ;; has an extra "0" in it. + one: "1" equal | equal "1" ;; has an extra "1" in it. + }| + } + + @item{A BNF for + @link["http://www.json.org/"]{JSON}-like structures. + @verbatim|{ + #lang brag + json: number | string | array | object -number: NUMBER -string: STRING -array: "[" [json ("," json)*] "]" -object: "{" [kvpair ("," kvpair)*] "}" -kvpair: ID ":" json -}| -} -] + number: NUMBER + string: STRING + array: "[" [json ("," json)*] "]" + object: "{" [kvpair ("," kvpair)*] "}" + kvpair: ID ":" json + }| + } + ] @@ -752,38 +752,38 @@ as syntax errors. @tt{brag} will raise a syntax error if the grammar: @itemize[ -@item{doesn't have any rules.} + @item{doesn't have any rules.} -@item{has a rule with the same left hand side as any other rule.} + @item{has a rule with the same left hand side as any other rule.} -@item{refers to rules that have not been defined. e.g. the -following program: -@nested[#:style 'code-inset -@verbatim|{ -#lang brag -foo: [bar] -}| -] -should raise an error because @tt{bar} has not been defined, even though -@tt{foo} refers to it in an @tech{optional pattern}.} + @item{refers to rules that have not been defined. e.g. the + following program: + @nested[#:style 'code-inset + @verbatim|{ + #lang brag + foo: [bar] + }| + ] + should raise an error because @tt{bar} has not been defined, even though + @tt{foo} refers to it in an @tech{optional pattern}.} -@item{uses the token name @racket[EOF]; the end-of-file token type is reserved -for internal use by @tt{brag}.} + @item{uses the token name @racket[EOF]; the end-of-file token type is reserved + for internal use by @tt{brag}.} -@item{contains a rule that has no finite derivation. e.g. the following -program: -@nested[#:style 'code-inset -@verbatim|{ -#lang brag -infinite-a: "a" infinite-a -}| -] -should raise an error because no finite sequence of tokens will satisfy -@tt{infinite-a}.} + @item{contains a rule that has no finite derivation. e.g. the following + program: + @nested[#:style 'code-inset + @verbatim|{ + #lang brag + infinite-a: "a" infinite-a + }| + ] + should raise an error because no finite sequence of tokens will satisfy + @tt{infinite-a}.} -] + ] Otherwise, @tt{brag} should be fairly tolerant and permit even ambiguous grammars. @@ -799,111 +799,111 @@ bindings. The most important of these is @racket[parse]: (-> token))]) syntax?]{ -Parses the sequence of @tech{tokens} according to the rules in the grammar, using the -first rule as the start production. The parse must completely consume -@racket[token-source]. + Parses the sequence of @tech{tokens} according to the rules in the grammar, using the + first rule as the start production. The parse must completely consume + @racket[token-source]. -The @deftech{token source} can either be a sequence, or a 0-arity function that -produces @tech{tokens}. + The @deftech{token source} can either be a sequence, or a 0-arity function that + produces @tech{tokens}. -A @deftech{token} in @tt{brag} can be any of the following values: -@itemize[ -@item{a string} -@item{a symbol} -@item{an instance produced by @racket[token]} -@item{an instance produced by the token constructors of @racketmodname[br-parser-tools/lex]} -@item{an instance of @racketmodname[br-parser-tools/lex]'s @racket[position-token] whose - @racket[position-token-token] is a @tech{token}.} -] + A @deftech{token} in @tt{brag} can be any of the following values: + @itemize[ + @item{a string} + @item{a symbol} + @item{an instance produced by @racket[token]} + @item{an instance produced by the token constructors of @racketmodname[br-parser-tools/lex]} + @item{an instance of @racketmodname[br-parser-tools/lex]'s @racket[position-token] whose + @racket[position-token-token] is a @tech{token}.} + ] -A token whose type is either @racket[void] or @racket['EOF] terminates the -source. + A token whose type is either @racket[void] or @racket['EOF] terminates the + source. -If @racket[parse] succeeds, it will return a structured syntax object. The -structure of the syntax object follows the overall structure of the rules in -the BNF grammar. For each rule @racket[r] and its associated pattern @racket[p], -@racket[parse] generates a syntax object @racket[#'(r p-value)] where -@racket[p-value]'s structure follows a case analysis on @racket[p]: + If @racket[parse] succeeds, it will return a structured syntax object. The + structure of the syntax object follows the overall structure of the rules in + the BNF grammar. For each rule @racket[r] and its associated pattern @racket[p], + @racket[parse] generates a syntax object @racket[#'(r p-value)] where + @racket[p-value]'s structure follows a case analysis on @racket[p]: -@itemize[ -@item{For implicit and explicit sequences of @tech{pattern}s @racket[p1], - @racket[p2], ..., the corresponding values, spliced into the - structure.} -@item{For terminals, the value of the token.} -@item{For @tech{rule identifier}s: the associated parse value for the rule.} -@item{For @tech{choice pattern}s: the associated parse value for one of the matching subpatterns.} -@item{For @tech{quantifed pattern}s and @tech{optional pattern}s: the corresponding values, spliced into the structure.} -] + @itemize[ + @item{For implicit and explicit sequences of @tech{pattern}s @racket[p1], + @racket[p2], ..., the corresponding values, spliced into the + structure.} + @item{For terminals, the value of the token.} + @item{For @tech{rule identifier}s: the associated parse value for the rule.} + @item{For @tech{choice pattern}s: the associated parse value for one of the matching subpatterns.} + @item{For @tech{quantifed pattern}s and @tech{optional pattern}s: the corresponding values, spliced into the structure.} + ] -Consequently, it's only the presence of @tech{rule identifier}s in a rule's -pattern that informs the parser to introduces nested structure into the syntax -object. + Consequently, it's only the presence of @tech{rule identifier}s in a rule's + pattern that informs the parser to introduces nested structure into the syntax + object. -If the grammar is ambiguous, @tt{brag} will choose one of the possible parse results, though it doesn't guarantee which. + If the grammar is ambiguous, @tt{brag} will choose one of the possible parse results, though it doesn't guarantee which. -If the parse cannot be performed successfully, or if a token in the -@racket[token-source] uses a type that isn't mentioned in the grammar, then -@racket[parse] raises an instance of @racket[exn:fail:parsing].} + If the parse cannot be performed successfully, or if a token in the + @racket[token-source] uses a type that isn't mentioned in the grammar, then + @racket[parse] raises an instance of @racket[exn:fail:parsing].} -@defproc[(parse-tree [source any/c #f] - [token-source (or/c (sequenceof token) - (-> token))]) +@defproc[(parse-to-datum [source any/c #f] + [token-source (or/c (sequenceof token) + (-> token))]) list?]{ -Same as @racket[parse], but the result is converted into a visible parse tree. Useful for testing or debugging a parser. + Same as @racket[parse], but the result is converted into a plain datum. Useful for testing or debugging a parser. } @defform[#:id make-rule-parser (make-rule-parser name)]{ -Constructs a parser for the @racket[name] of one of the non-terminals -in the grammar. - -For example, given the @tt{brag} program -@filepath{simple-arithmetic-grammar.rkt}: -@filebox["simple-arithmetic-grammar.rkt"]{ -@verbatim|{ -#lang brag -expr : term ('+' term)* -term : factor ('*' factor)* -factor : INT -}| -} -the following interaction shows how to extract a parser for @racket[term]s. -@interaction[#:eval my-eval -@eval:alts[(require "simple-arithmetic-grammar.rkt") - (require brag/examples/simple-arithmetic-grammar)] -(define term-parse (make-rule-parser term)) -(define tokens (list (token 'INT 3) - "*" - (token 'INT 4))) -(syntax->datum (parse tokens)) -(syntax->datum (term-parse tokens)) - -(define another-token-sequence - (list (token 'INT 1) "+" (token 'INT 2) - "*" (token 'INT 3))) -(syntax->datum (parse another-token-sequence)) -@code:comment{Note that term-parse will break on another-token-sequence} -@code:comment{as it does not know what to do with the "+"} -(term-parse another-token-sequence) -] + Constructs a parser for the @racket[name] of one of the non-terminals + in the grammar. + + For example, given the @tt{brag} program + @filepath{simple-arithmetic-grammar.rkt}: + @filebox["simple-arithmetic-grammar.rkt"]{ + @verbatim|{ + #lang brag + expr : term ('+' term)* + term : factor ('*' factor)* + factor : INT + }| + } + the following interaction shows how to extract a parser for @racket[term]s. + @interaction[#:eval my-eval + @eval:alts[(require "simple-arithmetic-grammar.rkt") + (require brag/examples/simple-arithmetic-grammar)] + (define term-parse (make-rule-parser term)) + (define tokens (list (token 'INT 3) + "*" + (token 'INT 4))) + (syntax->datum (parse tokens)) + (syntax->datum (term-parse tokens)) + + (define another-token-sequence + (list (token 'INT 1) "+" (token 'INT 2) + "*" (token 'INT 3))) + (syntax->datum (parse another-token-sequence)) + @code:comment{Note that term-parse will break on another-token-sequence} + @code:comment{as it does not know what to do with the "+"} + (term-parse another-token-sequence) + ] } @defthing[all-token-types (setof symbol?)]{ -A set of all the token types used in a grammar. + A set of all the token types used in a grammar. -For example: -@interaction[#:eval my-eval -@eval:alts[(require "simple-arithmetic-grammar.rkt") - (require brag/examples/simple-arithmetic-grammar)] -all-token-types -] + For example: + @interaction[#:eval my-eval + @eval:alts[(require "simple-arithmetic-grammar.rkt") + (require brag/examples/simple-arithmetic-grammar)] + all-token-types + ] } @@ -929,13 +929,13 @@ In addition to the exports shown below, the @racketmodname[brag/support] module [#:skip? skip? boolean? #f] ) token-struct?]{ -Creates instances of @racket[token-struct]s. + Creates instances of @racket[token-struct]s. -The syntax objects produced by a parse will inject the value @racket[val] in -place of the token name in the grammar. + The syntax objects produced by a parse will inject the value @racket[val] in + place of the token name in the grammar. -If @racket[#:skip?] is true, then the parser will skip over it during a -parse.} + If @racket[#:skip?] is true, then the parser will skip over it during a + parse.} @defstruct[token-struct ([type symbol?] @@ -945,11 +945,11 @@ parse.} [column (or/c positive-integer? #f)] [span (or/c natural-number? #f)] [skip? boolean?]) - #:transparent]{ -The token structure type. + #:transparent]{ + The token structure type. -Rather than directly using the @racket[token-struct] constructor, please use -the helper function @racket[token] to construct instances. + Rather than directly using the @racket[token-struct] constructor, please use + the helper function @racket[token] to construct instances. } @@ -959,103 +959,103 @@ the helper function @racket[token] to construct instances. ([message string?] [continuation-marks continuation-mark-set?] [srclocs (listof srcloc?)])]{ -The exception raised when parsing fails. + The exception raised when parsing fails. -@racket[exn:fail:parsing] implements Racket's @racket[prop:exn:srcloc] -property, so if this exception reaches DrRacket's default error handler, -DrRacket should highlight the offending locations in the source.} + @racket[exn:fail:parsing] implements Racket's @racket[prop:exn:srcloc] + property, so if this exception reaches DrRacket's default error handler, + DrRacket should highlight the offending locations in the source.} @defproc[(apply-tokenizer-maker [tokenizer-maker procedure?] - [source (or/c string? - input-port?)]) + [source (or/c string? + input-port?)]) list?]{ -Repeatedly apply @racket[tokenizer-maker] to @racket[source], gathering the resulting tokens into a list. @racket[source] can be a string or an input port. Useful for testing or debugging a tokenizer. + Repeatedly apply @racket[tokenizer-maker] to @racket[source], gathering the resulting tokens into a list. @racket[source] can be a string or an input port. Useful for testing or debugging a tokenizer. } @defproc[(apply-lexer [lexer procedure?] - [source (or/c string? + [source (or/c string? input-port?)]) list?]{ -Repeatedly apply @racket[lexer] to @racket[source], gathering the resulting tokens into a list. @racket[source] can be a string or an input port. Useful for testing or debugging a lexer. + Repeatedly apply @racket[lexer] to @racket[source], gathering the resulting tokens into a list. @racket[source] can be a string or an input port. Useful for testing or debugging a lexer. } @defproc[(trim-ends [left-str string?] -[str string?] -[right-str string?]) + [str string?] + [right-str string?]) string?]{ -Remove @racket[left-str] from the left side of @racket[str], and @racket[right-str] from its right side. Intended as a helper function for @racket[from/to]. + Remove @racket[left-str] from the left side of @racket[str], and @racket[right-str] from its right side. Intended as a helper function for @racket[from/to]. } @defform[(:* re ...)]{ -Repetition of @racket[re] sequence 0 or more times.} + Repetition of @racket[re] sequence 0 or more times.} @defform[(:+ re ...)]{ -Repetition of @racket[re] sequence 1 or more times.} + Repetition of @racket[re] sequence 1 or more times.} @defform[(:? re ...)]{ -Zero or one occurrence of @racket[re] sequence.} + Zero or one occurrence of @racket[re] sequence.} @defform[(:= n re ...)]{ -Exactly @racket[n] occurrences of @racket[re] sequence, where -@racket[n] must be a literal exact, non-negative number.} + Exactly @racket[n] occurrences of @racket[re] sequence, where + @racket[n] must be a literal exact, non-negative number.} @defform[(:>= n re ...)]{ -At least @racket[n] occurrences of @racket[re] sequence, where -@racket[n] must be a literal exact, non-negative number.} + At least @racket[n] occurrences of @racket[re] sequence, where + @racket[n] must be a literal exact, non-negative number.} @defform[(:** n m re ...)]{ -Between @racket[n] and @racket[m] (inclusive) occurrences of -@racket[re] sequence, where @racket[n] must be a literal exact, -non-negative number, and @racket[m] must be literally either -@racket[#f], @racket[+inf.0], or an exact, non-negative number; a -@racket[#f] value for @racket[m] is the same as @racket[+inf.0].} + Between @racket[n] and @racket[m] (inclusive) occurrences of + @racket[re] sequence, where @racket[n] must be a literal exact, + non-negative number, and @racket[m] must be literally either + @racket[#f], @racket[+inf.0], or an exact, non-negative number; a + @racket[#f] value for @racket[m] is the same as @racket[+inf.0].} @defform[(:or re ...)]{ -Same as @racket[(union re ...)].} + Same as @racket[(union re ...)].} @deftogether[( -@defform[(:: re ...)] -@defform[(:seq re ...)] -)]{ + @defform[(:: re ...)] + @defform[(:seq re ...)] + )]{ -Both forms concatenate the @racket[re]s.} + Both forms concatenate the @racket[re]s.} @defform[(:& re ...)]{ -Intersects the @racket[re]s.} + Intersects the @racket[re]s.} @defform[(:- re ...)]{ -The set difference of the @racket[re]s.} + The set difference of the @racket[re]s.} @defform[(:~ re ...)]{ -Character-set complement, which each @racket[re] must match exactly -one character.} + Character-set complement, which each @racket[re] must match exactly + one character.} @defform[(:/ char-or-string ...)]{ -Character ranges, matching characters between successive pairs of -characters.} + Character ranges, matching characters between successive pairs of + characters.} @defform[(from/to open close)]{ -A string that is bounded by @racket[open] and @racket[close]. Matching is non-greedy (meaning, it stops at the first occurence of @racket[close]). The resulting lexeme includes @racket[open] and @racket[close]. To remove them, see @racket[trim-ends].} + A string that is bounded by @racket[open] and @racket[close]. Matching is non-greedy (meaning, it stops at the first occurence of @racket[close]). The resulting lexeme includes @racket[open] and @racket[close]. To remove them, see @racket[trim-ends].} @defform[(from/stop-before open close)]{ -Like @racket[from/to], a string that is bounded by @racket[open] and @racket[close], except that @racket[close] is not included in the resulting lexeme. Matching is non-greedy (meaning, it stops at the first occurence of @racket[close]).} + Like @racket[from/to], a string that is bounded by @racket[open] and @racket[close], except that @racket[close] is not included in the resulting lexeme. Matching is non-greedy (meaning, it stops at the first occurence of @racket[close]).} @close-eval[my-eval] diff --git a/brag/brag/codegen/codegen.rkt b/brag/brag/codegen/codegen.rkt index 206b0b2..f8f76cd 100755 --- a/brag/brag/codegen/codegen.rkt +++ b/brag/brag/codegen/codegen.rkt @@ -153,13 +153,16 @@ (parse tokenizer))])))])) (define parse (make-rule-parser start-id)) - (provide parse-tree) - (define (parse-tree x) + (provide parse-to-datum parse-tree) + + (define (parse-to-datum x) (let loop ([x (syntax->datum (parse x))]) (cond [(list? x) (map loop x)] [(char? x) (string x)] - [else x])))))))])) + [else x]))) + + (define parse-tree parse-to-datum)))))])) ;; Given a flattened rule, returns a syntax for the code that diff --git a/brag/brag/test/test-whitespace.rkt b/brag/brag/test/test-whitespace.rkt index c895ca8..455ee3f 100755 --- a/brag/brag/test/test-whitespace.rkt +++ b/brag/brag/test/test-whitespace.rkt @@ -4,9 +4,9 @@ rackunit) (check-equal? - (parse-tree "\ty\n x\tz") + (parse-to-datum "\ty\n x\tz") '(start (tab "\t") (letter "y") (newline "\n") (space " ") (letter "x") (tab "\t") (letter "z"))) (check-equal? - (parse-tree "\t\n \t") + (parse-to-datum "\t\n \t") '(start (tab "\t") (newline "\n") (space " ") (tab "\t")))