diff --git a/brag/brag/brag.scrbl b/brag/brag/brag.scrbl index e0db8cf..59059f3 100755 --- a/brag/brag/brag.scrbl +++ b/brag/brag/brag.scrbl @@ -679,9 +679,12 @@ A @deftech{pattern} is one of the following: @item{an implicit sequence of @tech{pattern}s separated by whitespace} @item{a terminal: either a literal string or a @tech{symbolic token identifier}. - When used in a pattern, both these terminals will match the same set of inputs. A literal string can match the string itself, or a @racket[token] whose type field contains that string (or its symbol form). So @racket["FOO"] would match @racket["FOO"], @racket[(token "FOO" "bar")], or @racket[(token 'FOO "bar")]. A symbolic token identifier can also match the string version of the identifier, or a @racket[token] whose type field is the symbol or string form of the identifier. So @racket[FOO] would also match @racket["FOO"], @racket[(token 'FOO "bar")], or @racket[(token "FOO" "bar")]. (In every case, the value of a token, like @racket["bar"], can be anything, and may or may not be the same as its type.) + When used in a pattern, both these terminals will match the same set of inputs. A literal string can match the string itself, or a @racket[token] structure whose type field contains that string (or its symbol form). So @racket["FOO"] would match @racket["FOO"], @racket[(token "FOO" "bar")], or @racket[(token 'FOO "bar")]. A symbolic token identifier can also match the string version of the identifier, or a @racket[token] whose type field is the symbol or string form of the identifier. So @racket[FOO] would also match @racket["FOO"], @racket[(token 'FOO "bar")], or @racket[(token "FOO" "bar")]. (In every case, the value of a token, like @racket["bar"], can be anything, and may or may not be the same as its type.) - Because their underlying meanings are the same, the symbolic token identifier ends up being a notational convenience for readability inside a grammar pattern. Typically, the literal string @racket["FOO"] is used to connote ``match the string @racket["FOO"] exactly'' and the symbolic token identifier @racket[FOO] specially connotes ``match any token of type @racket['FOO]''.} + Because their underlying meanings are the same, the symbolic token identifier ends up being a notational convenience for readability inside a grammar pattern. Typically, the literal string @racket["FOO"] is used to connote ``match the string @racket["FOO"] exactly'' and the symbolic token identifier @racket[FOO] specially connotes ``match any token of type @racket['FOO]''. + + You @bold{cannot} use the literal string @racket["error"] as a terminal in a grammar, because it's reserved for @tt{brag}. You can, however, adjust your lexer to package it inside a token structure — say, @racket[(token ERROR "error")] — and then use the symbolic token identifier @racket[ERROR] in the grammar to match this token structure. +} @item{a @tech{rule identifier}} @item{a @deftech{choice pattern}: a sequence of @tech{pattern}s delimited with @litchar{|} characters.} diff --git a/brag/brag/cfg-parser/cfg-parser.rkt b/brag/brag/cfg-parser/cfg-parser.rkt index 1bc717b..aafff8a 100755 --- a/brag/brag/cfg-parser/cfg-parser.rkt +++ b/brag/brag/cfg-parser/cfg-parser.rkt @@ -705,10 +705,13 @@ (if src-pos? #'($1-start-pos $1-end-pos) #'(#f #f))]) - #`(grammar (start [() null] - [(atok start) (cons $1 $2)]) - (atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) - #`(start start) + ;; rename `start` and `atok` to `%start` and `%atok` + ;; so that "start" and "atok" can be used as literal string tokens in a grammar. + ;; not sure why this works, but it passes all tests. + #`(grammar (%start [() null] + [(%atok %start) (cons $1 $2)]) + (%atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) + #`(start %start) parser-clauses)))] [(grammar . _) (raise-syntax-error @@ -745,30 +748,30 @@ (next success-k fail-k max-depth tasks)))] [fail-k (lambda (max-depth tasks) (cond - [(null? tok-list) - (if error-proc - (error-proc #t - 'no-tokens - #f - (make-position #f #f #f) - (make-position #f #f #f)) - (error - 'cfg-parse - "no tokens"))] - [else - (let ([bad-tok (list-ref tok-list - (min (sub1 (length tok-list)) - max-depth))]) - (if error-proc - (error-proc #t - (tok-orig-name bad-tok) - (tok-val bad-tok) - (tok-start bad-tok) - (tok-end bad-tok)) - (error - 'cfg-parse - "failed at ~a" - (tok-val bad-tok))))]))]) + [(null? tok-list) + (if error-proc + (error-proc #t + 'no-tokens + #f + (make-position #f #f #f) + (make-position #f #f #f)) + (error + 'cfg-parse + "no tokens"))] + [else + (let ([bad-tok (list-ref tok-list + (min (sub1 (length tok-list)) + max-depth))]) + (if error-proc + (error-proc #t + (tok-orig-name bad-tok) + (tok-val bad-tok) + (tok-start bad-tok) + (tok-end bad-tok)) + (error + 'cfg-parse + "failed at ~a" + (tok-val bad-tok))))]))]) (#,start tok-list ;; we simulate a token at the very beginning with zero width ;; for use with the position-generating code (*-start-pos, *-end-pos).