From bfcdbbce58b6e84ac9d8585f21b9b991cc5e5ad9 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Wed, 13 Jun 2018 15:43:41 -0700 Subject: [PATCH] add ? quantifier --- brag/brag.scrbl | 4 +-- brag/examples/baby-json-alt2.rkt | 17 +++++++++++++ brag/private/colorer.rkt | 2 +- brag/rules/lexer.rkt | 4 +-- brag/rules/parser.rkt | 43 ++++++++++++++------------------ 5 files changed, 41 insertions(+), 29 deletions(-) create mode 100755 brag/examples/baby-json-alt2.rkt diff --git a/brag/brag.scrbl b/brag/brag.scrbl index fa36f68..f4a783c 100755 --- a/brag/brag.scrbl +++ b/brag/brag.scrbl @@ -698,9 +698,9 @@ A @deftech{pattern} is one of the following: @item{a @deftech{choice pattern}: a sequence of @tech{pattern}s delimited with @litchar{|} characters.} - @item{a @deftech{quantified pattern}: a @tech{pattern} followed by either @litchar{*} (``zero or more'') or @litchar{+} (``one or more''). Quantification can also be denoted by integers within curly brackets. So @litchar|{{2}}| means ``exactly 2''; @litchar|{{2,5}}| means ``between 2 and 5, inclusive''; @litchar|{{2,}}| means ``2 or more''; and @litchar|{{,5}}| means ``up to 5''.} + @item{a @deftech{quantified pattern}: a @tech{pattern} followed by either @litchar{*} (``zero or more''), @litchar{?} (``zero or one''), or @litchar{+} (``one or more''). Quantification can also be denoted by integers within curly brackets. So @litchar|{{2}}| means ``exactly 2''; @litchar|{{2,5}}| means ``between 2 and 5, inclusive''; @litchar|{{2,}}| means ``2 or more''; and @litchar|{{,5}}| means ``up to 5''.} - @item{an @deftech{optional pattern}: a @tech{pattern} surrounded by @litchar{[} and @litchar{]}} + @item{an @deftech{optional pattern}: a @tech{pattern} surrounded by @litchar{[} and @litchar{]}}. (The @litchar{?} zero-or-one quantifier means the same thing.) @item{an explicit sequence: a @tech{pattern} surrounded by @litchar{(} and @litchar{)}}] diff --git a/brag/examples/baby-json-alt2.rkt b/brag/examples/baby-json-alt2.rkt new file mode 100755 index 0000000..15d75b2 --- /dev/null +++ b/brag/examples/baby-json-alt2.rkt @@ -0,0 +1,17 @@ +#lang brag + +;; Simple baby example of JSON structure +json: number + | string + | array + | object + +number: NUMBER + +string: STRING + +array: "[" (json ("," json)*)? "]" + +object: "{" (kvpair ("," kvpair)*)? "}" + +kvpair: ID ":" json diff --git a/brag/private/colorer.rkt b/brag/private/colorer.rkt index eb2449a..f59edc6 100644 --- a/brag/private/colorer.rkt +++ b/brag/private/colorer.rkt @@ -9,7 +9,7 @@ ;; (for DrRacket selections etc) [whitespace (token 'WHITE lexeme)] [(:or (from/to "'" "'") (from/to "\"" "\"")) (token 'LIT lexeme)] - [(:or (char-set "()[]{}|+*:") hide-char splice-char) (token 'MISC lexeme)] + [(:or (char-set "()[]{}|+*:?") hide-char splice-char) (token 'MISC lexeme)] [(:seq (:or "#" ";") (complement (:seq (:* any-char) NL (:* any-char))) (:or NL "")) (token 'COMMENT lexeme)] [id (token 'ID lexeme)] [any-char (token 'OTHER lexeme)])) diff --git a/brag/rules/lexer.rkt b/brag/rules/lexer.rkt index 9aaec3d..107159c 100755 --- a/brag/rules/lexer.rkt +++ b/brag/rules/lexer.rkt @@ -14,7 +14,7 @@ (define-lex-abbrev NL (:or "\r\n" "\r" "\n")) ;; reserved-chars = chars used for quantifiers & parse-tree filtering -(define-for-syntax quantifiers "+:*{}") ; colon is reserved to separate rules and productions +(define-for-syntax quantifiers "+:*?{}") ; colon is reserved to separate rules and productions (define-lex-trans reserved-chars (λ(stx) #`(char-set #,(format "~a~a~a" quantifiers hide-char splice-char)))) @@ -64,7 +64,7 @@ (token-SPLICE lexeme)] ["|" (token-PIPE lexeme)] - [(:or "+" "*" + [(:or "+" "*" "?" (:: "{" (:* digit) (:? (:: "," (:* digit))) "}")) (token-REPEAT lexeme)] [whitespace diff --git a/brag/rules/parser.rkt b/brag/rules/parser.rkt index f51f9f6..45facf1 100755 --- a/brag/rules/parser.rkt +++ b/brag/rules/parser.rkt @@ -154,30 +154,25 @@ [repeatable-pattern [(atomic-pattern REPEAT) - (cond [(string=? $2 "*") - (pattern-repeat (position->pos $1-start-pos) - (position->pos $2-end-pos) - 0 #f $1)] - [(string=? $2 "+") - (pattern-repeat (position->pos $1-start-pos) - (position->pos $2-end-pos) - 1 #f $1)] - [(regexp-match #px"^\\{(\\d+)?(,)?(\\d+)?\\}$" $2) ; "{min,max}" with both min & max optional - => (λ (m) - (match-define (list min-repeat max-repeat) - (match m - [(list _ min range? max) (let ([min (if min (string->number min) 0)]) - (list - min - (cond - [(and range? max) (string->number max)] - [(and (not range?) (not max)) min] ; {3} -> {3,3} - [(not max) #f])))])) - (pattern-repeat (position->pos $1-start-pos) - (position->pos $2-end-pos) - min-repeat max-repeat $1))] - [else - (error 'grammar-parse "unknown repetition operator ~e" $2)])] + (let () + (match-define (cons min-repeat max-repeat) + (cond [(string=? $2 "*") (cons 0 #f)] + [(string=? $2 "+") (cons 1 #f)] + [(string=? $2 "?") (cons 0 1)] + [(regexp-match #px"^\\{(\\d+)?(,)?(\\d+)?\\}$" $2) ; "{min,max}" with both min & max optional + => (λ (m) + (match m + [(list all min range? max) (let () + (define min (or (string->number min) 0)) + (define max (cond + [(and range? max) (string->number max)] + [(and (not range?) (not max)) min] ; {3} -> {3,3} + [else #f])) + (cons min max))]))] + [else (raise-argument-error 'grammar-parse "unknown repetition operator ~e" $2)])) + (pattern-repeat (position->pos $1-start-pos) + (position->pos $2-end-pos) + min-repeat max-repeat $1))] [(atomic-pattern) $1]]