diff --git a/brag/brag.scrbl b/brag/brag.scrbl index f8af72f..ec8e722 100755 --- a/brag/brag.scrbl +++ b/brag/brag.scrbl @@ -673,6 +673,9 @@ A @deftech{rule identifier} is an @tech{identifier} that is not in upper case. A @deftech{symbolic token identifier} is an @tech{identifier} that is in upper case. +A @deftech{line comment} begins with either @litchar{#} or @litchar{;} and +continues till the end of the line. + An @deftech{identifier} is a sequence of letters, numbers, or characters in the set @racket["-.!$%&/<=>?^_~@"]. It must not contain @litchar{*}, @litchar{+}, or @litchar|{{}| and @litchar|{}}|, as those characters are used to denote quantification. @@ -680,7 +683,8 @@ characters in the set @racket["-.!$%&/<=>?^_~@"]. It must not contain A @deftech{pattern} is one of the following: @itemize[ - @item{an implicit sequence of @tech{pattern}s separated by whitespace} + @item{an implicit sequence of @tech{pattern}s separated by whitespace.} + @item{a @deftech{terminal}: either a literal string or a @tech{symbolic token identifier}. When used in a pattern, both kinds of terminals will match the same set of inputs. @@ -694,7 +698,7 @@ A @deftech{pattern} is one of the following: You @bold{cannot} use the literal string @racket["error"] as a terminal in a grammar, because it's reserved for @tt{brag}. You can, however, adjust your lexer to package it inside a token structure — say, @racket[(token 'ERROR "error")] — and then use the symbolic token identifier @racket[ERROR] in the grammar to match this token structure. } - @item{a @tech{rule identifier}} + @item{a @tech{rule identifier}.} @item{a @deftech{choice pattern}: a sequence of @tech{pattern}s delimited with @litchar{|} characters.} @@ -702,10 +706,10 @@ A @deftech{pattern} is one of the following: @item{an @deftech{optional pattern}: a @tech{pattern} surrounded by @litchar{[} and @litchar{]}. (The @litchar{?} zero-or-one quantifier means the same thing.)} - @item{an explicit sequence: a @tech{pattern} surrounded by @litchar{(} and @litchar{)}}] + @item{an explicit @deftech{sequence}: a @tech{pattern} surrounded by @litchar{(} and @litchar{)}.} -A @deftech{line comment} begins with either @litchar{#} or @litchar{;} and -continues till the end of the line. + @item{the @deftech{empty set}: a special @tech{pattern} that matches a list of zero tokens. When it appears on the right side of a rule, the empty set will match empty input (which obviously contains zero tokens), but also the ``gap'' between two existing tokens (which less obviously also contains zero tokens). The empty set can be denoted by @litchar{[]} (empty square brackets), @litchar{∅} (the Unicode empty-set character), or @litchar{Ø} (the slashed O).} + ] For example, in the following program: diff --git a/brag/examples/empty-symbol.rkt b/brag/examples/empty-symbol.rkt new file mode 100755 index 0000000..20ade75 --- /dev/null +++ b/brag/examples/empty-symbol.rkt @@ -0,0 +1,6 @@ +#lang brag + +top : xs | ys | zs +xs : [] | "x" xs +ys : Ø | "y" /ys +zs : ∅ | "z" @zs \ No newline at end of file diff --git a/brag/rules/lexer.rkt b/brag/rules/lexer.rkt index 93c4a4f..68cfe15 100755 --- a/brag/rules/lexer.rkt +++ b/brag/rules/lexer.rkt @@ -56,6 +56,7 @@ (:or (:* (:or "\\\"" esc-chars (:~ "\"" "\\"))) "\\\\") "\"") (token-LIT (unescape-lexeme lexeme #\"))] + [(:or "[]" "Ø" "∅") (token-EMPTY lexeme)] ["(" (token-LPAREN lexeme)] ["[" diff --git a/brag/rules/parser.rkt b/brag/rules/parser.rkt index 5acc05b..18b8e44 100755 --- a/brag/rules/parser.rkt +++ b/brag/rules/parser.rkt @@ -24,6 +24,7 @@ token-ID token-LIT token-EOF + token-EMPTY grammar-parser current-source @@ -53,7 +54,8 @@ RULE_HEAD_SPLICED ID LIT - EOF)) + EOF + EMPTY)) (define hide-char #\/) (define splice-char #\@) @@ -198,6 +200,14 @@ (position->pos $1-end-pos) $1 #f))] + + [(EMPTY) + (pattern-repeat (position->pos $1-start-pos) + (position->pos $1-end-pos) + 0 0 (pattern-lit (position->pos $1-start-pos) + (position->pos $1-end-pos) + "" #f) + #f)] [(LBRACKET pattern RBRACKET) (pattern-repeat (position->pos $1-start-pos) diff --git a/brag/test/test-all.rkt b/brag/test/test-all.rkt index 4eb18f8..578eefb 100755 --- a/brag/test/test-all.rkt +++ b/brag/test/test-all.rkt @@ -7,6 +7,7 @@ "test-baby-json-hider.rkt" "test-curly-quantifier.rkt" "test-cutter.rkt" + "test-empty-symbol.rkt" "test-errors.rkt" "test-flatten.rkt" "test-lexer.rkt" diff --git a/brag/test/test-empty-symbol.rkt b/brag/test/test-empty-symbol.rkt new file mode 100755 index 0000000..523c0ae --- /dev/null +++ b/brag/test/test-empty-symbol.rkt @@ -0,0 +1,21 @@ +#lang racket/base +(require brag/examples/empty-symbol + brag/support + rackunit) + +(check-true (and (member (parse-to-datum "") (list '(top (xs)) '(top (ys)) '(top (zs)))) #t)) + +;; x is normal +(check-equal? (parse-to-datum "x") '(top (xs "x" (xs)))) +(check-equal? (parse-to-datum "xx") '(top (xs "x" (xs "x" (xs))))) +(check-equal? (parse-to-datum "xxx") '(top (xs "x" (xs "x" (xs "x" (xs)))))) + +;; y cuts +(check-equal? (parse-to-datum "y") '(top (ys "y"))) +(check-equal? (parse-to-datum "yy") '(top (ys "y"))) +(check-equal? (parse-to-datum "yyy") '(top (ys "y"))) + +;; z splices +(check-equal? (parse-to-datum "z") '(top (zs "z"))) +(check-equal? (parse-to-datum "zz") '(top (zs "z" "z"))) +(check-equal? (parse-to-datum "zzz") '(top (zs "z" "z" "z"))) \ No newline at end of file