add ? quantifier

hide-top-rule-name
Matthew Butterick 6 years ago
parent 9171af7930
commit bfcdbbce58

@ -698,9 +698,9 @@ A @deftech{pattern} is one of the following:
@item{a @deftech{choice pattern}: a sequence of @tech{pattern}s delimited with @litchar{|} characters.} @item{a @deftech{choice pattern}: a sequence of @tech{pattern}s delimited with @litchar{|} characters.}
@item{a @deftech{quantified pattern}: a @tech{pattern} followed by either @litchar{*} (``zero or more'') or @litchar{+} (``one or more''). Quantification can also be denoted by integers within curly brackets. So @litchar|{{2}}| means ``exactly 2''; @litchar|{{2,5}}| means ``between 2 and 5, inclusive''; @litchar|{{2,}}| means ``2 or more''; and @litchar|{{,5}}| means ``up to 5''.} @item{a @deftech{quantified pattern}: a @tech{pattern} followed by either @litchar{*} (``zero or more''), @litchar{?} (``zero or one''), or @litchar{+} (``one or more''). Quantification can also be denoted by integers within curly brackets. So @litchar|{{2}}| means ``exactly 2''; @litchar|{{2,5}}| means ``between 2 and 5, inclusive''; @litchar|{{2,}}| means ``2 or more''; and @litchar|{{,5}}| means ``up to 5''.}
@item{an @deftech{optional pattern}: a @tech{pattern} surrounded by @litchar{[} and @litchar{]}} @item{an @deftech{optional pattern}: a @tech{pattern} surrounded by @litchar{[} and @litchar{]}}. (The @litchar{?} zero-or-one quantifier means the same thing.)
@item{an explicit sequence: a @tech{pattern} surrounded by @litchar{(} and @litchar{)}}] @item{an explicit sequence: a @tech{pattern} surrounded by @litchar{(} and @litchar{)}}]

@ -0,0 +1,17 @@
#lang brag
;; Simple baby example of JSON structure
json: number
| string
| array
| object
number: NUMBER
string: STRING
array: "[" (json ("," json)*)? "]"
object: "{" (kvpair ("," kvpair)*)? "}"
kvpair: ID ":" json

@ -9,7 +9,7 @@
;; (for DrRacket selections etc) ;; (for DrRacket selections etc)
[whitespace (token 'WHITE lexeme)] [whitespace (token 'WHITE lexeme)]
[(:or (from/to "'" "'") (from/to "\"" "\"")) (token 'LIT lexeme)] [(:or (from/to "'" "'") (from/to "\"" "\"")) (token 'LIT lexeme)]
[(:or (char-set "()[]{}|+*:") hide-char splice-char) (token 'MISC lexeme)] [(:or (char-set "()[]{}|+*:?") hide-char splice-char) (token 'MISC lexeme)]
[(:seq (:or "#" ";") (complement (:seq (:* any-char) NL (:* any-char))) (:or NL "")) (token 'COMMENT lexeme)] [(:seq (:or "#" ";") (complement (:seq (:* any-char) NL (:* any-char))) (:or NL "")) (token 'COMMENT lexeme)]
[id (token 'ID lexeme)] [id (token 'ID lexeme)]
[any-char (token 'OTHER lexeme)])) [any-char (token 'OTHER lexeme)]))

@ -14,7 +14,7 @@
(define-lex-abbrev NL (:or "\r\n" "\r" "\n")) (define-lex-abbrev NL (:or "\r\n" "\r" "\n"))
;; reserved-chars = chars used for quantifiers & parse-tree filtering ;; reserved-chars = chars used for quantifiers & parse-tree filtering
(define-for-syntax quantifiers "+:*{}") ; colon is reserved to separate rules and productions (define-for-syntax quantifiers "+:*?{}") ; colon is reserved to separate rules and productions
(define-lex-trans reserved-chars (define-lex-trans reserved-chars
(λ(stx) #`(char-set #,(format "~a~a~a" quantifiers hide-char splice-char)))) (λ(stx) #`(char-set #,(format "~a~a~a" quantifiers hide-char splice-char))))
@ -64,7 +64,7 @@
(token-SPLICE lexeme)] (token-SPLICE lexeme)]
["|" ["|"
(token-PIPE lexeme)] (token-PIPE lexeme)]
[(:or "+" "*" [(:or "+" "*" "?"
(:: "{" (:* digit) (:? (:: "," (:* digit))) "}")) (:: "{" (:* digit) (:? (:: "," (:* digit))) "}"))
(token-REPEAT lexeme)] (token-REPEAT lexeme)]
[whitespace [whitespace

@ -154,30 +154,25 @@
[repeatable-pattern [repeatable-pattern
[(atomic-pattern REPEAT) [(atomic-pattern REPEAT)
(cond [(string=? $2 "*") (let ()
(pattern-repeat (position->pos $1-start-pos) (match-define (cons min-repeat max-repeat)
(position->pos $2-end-pos) (cond [(string=? $2 "*") (cons 0 #f)]
0 #f $1)] [(string=? $2 "+") (cons 1 #f)]
[(string=? $2 "+") [(string=? $2 "?") (cons 0 1)]
(pattern-repeat (position->pos $1-start-pos) [(regexp-match #px"^\\{(\\d+)?(,)?(\\d+)?\\}$" $2) ; "{min,max}" with both min & max optional
(position->pos $2-end-pos) => (λ (m)
1 #f $1)] (match m
[(regexp-match #px"^\\{(\\d+)?(,)?(\\d+)?\\}$" $2) ; "{min,max}" with both min & max optional [(list all min range? max) (let ()
=> (λ (m) (define min (or (string->number min) 0))
(match-define (list min-repeat max-repeat) (define max (cond
(match m [(and range? max) (string->number max)]
[(list _ min range? max) (let ([min (if min (string->number min) 0)]) [(and (not range?) (not max)) min] ; {3} -> {3,3}
(list [else #f]))
min (cons min max))]))]
(cond [else (raise-argument-error 'grammar-parse "unknown repetition operator ~e" $2)]))
[(and range? max) (string->number max)] (pattern-repeat (position->pos $1-start-pos)
[(and (not range?) (not max)) min] ; {3} -> {3,3} (position->pos $2-end-pos)
[(not max) #f])))])) min-repeat max-repeat $1))]
(pattern-repeat (position->pos $1-start-pos)
(position->pos $2-end-pos)
min-repeat max-repeat $1))]
[else
(error 'grammar-parse "unknown repetition operator ~e" $2)])]
[(atomic-pattern) [(atomic-pattern)
$1]] $1]]

Loading…
Cancel
Save