From a5dce2c55e866c662c18e196f721417c57d101f9 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 10 Jun 2018 18:45:33 -0700 Subject: [PATCH 01/11] lex & parse the operator --- brag/rules/lexer.rkt | 7 +++++-- brag/rules/parser.rkt | 6 ++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/brag/rules/lexer.rkt b/brag/rules/lexer.rkt index 8665142..b5a485e 100755 --- a/brag/rules/lexer.rkt +++ b/brag/rules/lexer.rkt @@ -64,8 +64,11 @@ (token-SPLICE lexeme)] ["|" (token-PIPE lexeme)] - [(:or "+" "*") - (token-REPEAT lexeme)] + [(:or "+" "*" + ;; todo: consolidate next two patterns? + (:: "{" (:* digit) "}") + (:: "{" (:* digit) "," (:* digit) "}")) + (token-REPEAT (let () (println lexeme) lexeme))] [whitespace ;; Skip whitespace (return-without-pos (lex/1 input-port))] diff --git a/brag/rules/parser.rkt b/brag/rules/parser.rkt index 9d25117..05cfa9a 100755 --- a/brag/rules/parser.rkt +++ b/brag/rules/parser.rkt @@ -162,6 +162,12 @@ (pattern-repeat (position->pos $1-start-pos) (position->pos $2-end-pos) 1 $1)] + [(regexp-match #px"^\\{(\\d+)?,?(\\d+)?\\}$" $2) + => (λ (m) + (define pr (match m + [(list _ min max) (cons (and min (string->number min)) + (and max (string->number max)))])) + (error (format "~a" pr)))] [else (error 'grammar-parse "unknown repetition operator ~e" $2)])] [(atomic-pattern) -- 2.25.1 From 3985aee8eb9726d7acfce6511b30dbe4b5e9fbf3 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 10 Jun 2018 19:20:42 -0700 Subject: [PATCH 02/11] clarify docs --- brag/brag.scrbl | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/brag/brag.scrbl b/brag/brag.scrbl index ee720a1..fb143b2 100755 --- a/brag/brag.scrbl +++ b/brag/brag.scrbl @@ -1090,44 +1090,45 @@ In addition to the exports shown below, the @racketmodname[brag/support] module @defform[(:* re ...)]{ - Repetition of @racket[re] sequence 0 or more times.} +0 or more occurrences of any @racket[re] pattern.} @defform[(:+ re ...)]{ - Repetition of @racket[re] sequence 1 or more times.} +1 or more occurrences of any @racket[re] pattern.} @defform[(:? re ...)]{ - Zero or one occurrence of @racket[re] sequence.} +0 or 1 occurrence of any @racket[re] pattern.} @defform[(:= n re ...)]{ - Exactly @racket[n] occurrences of @racket[re] sequence, where - @racket[n] must be a literal exact, non-negative number.} +Exactly @racket[n] occurrences of any @racket[re] pattern, where +@racket[n] must be a literal exact, non-negative number.} @defform[(:>= n re ...)]{ - At least @racket[n] occurrences of @racket[re] sequence, where - @racket[n] must be a literal exact, non-negative number.} +At least @racket[n] occurrences of any @racket[re] pattern, where +@racket[n] must be a literal exact, non-negative number.} @defform[(:** n m re ...)]{ - Between @racket[n] and @racket[m] (inclusive) occurrences of - @racket[re] sequence, where @racket[n] must be a literal exact, - non-negative number, and @racket[m] must be literally either - @racket[#f], @racket[+inf.0], or an exact, non-negative number; a - @racket[#f] value for @racket[m] is the same as @racket[+inf.0].} +Between @racket[n] and @racket[m] (inclusive) occurrences of +any @racket[re] pattern, where @racket[n] must be a literal exact, +non-negative number, and @racket[m] must be literally either +@racket[#f], @racket[+inf.0], or an exact, non-negative number; a +@racket[#f] value for @racket[m] is the same as @racket[+inf.0].} @defform[(:or re ...)]{ - Same as @racket[(union re ...)].} +Same as @racket[(:union re ...)].} @deftogether[( - @defform[(:: re ...)] - @defform[(:seq re ...)] - )]{ +@defform[(:: re ...)] +@defform[(:seq re ...)] +)]{ - Both forms concatenate the @racket[re]s.} +Both forms concatenate the @racket[re]s into a single, indivisible pattern. +In other words, this matches @emph{all} the @racket[re]s in order, whereas @racket[(:union re ...)] matches @emph{any} of the @racket[re]s.} @defform[(:& re ...)]{ -- 2.25.1 From 99f97b399c520a91dfa71fe1f49ac54720f99a8a Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 10 Jun 2018 20:34:22 -0700 Subject: [PATCH 03/11] forward --- brag/codegen/flatten.rkt | 30 +++++++++++++++++++++--------- brag/rules/lexer.rkt | 6 ++---- brag/rules/parser.rkt | 15 ++++++++++----- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/brag/codegen/flatten.rkt b/brag/codegen/flatten.rkt index ad3ecfc..aeb1902 100755 --- a/brag/codegen/flatten.rkt +++ b/brag/codegen/flatten.rkt @@ -1,7 +1,8 @@ #lang racket/base (require brag/rules/stx-types + racket/list (for-syntax racket/base)) - +(require (except-in sugar/debug repeat)) (provide flatten-rule flatten-rules prim-rule) @@ -102,21 +103,32 @@ (begin (define-values (inferred-rules new-sub-pats) (lift-nonprimitive-pattern #'sub-pat)) - (with-syntax ([(sub-pat ...) new-sub-pats]) + (report/file (syntax-e #'min)) + (with-syntax ([(sub-pat ...) new-sub-pats] + [MIN-REPEAT-SUB-PATS (apply append (make-list (syntax-e #'min) new-sub-pats))]) (cons (cond [(= (syntax-e #'min) 0) - #`(head origin name - [(inferred-id name repeat) sub-pat ...] - [])] - [(= (syntax-e #'min) 1) - #`(head origin name - [(inferred-id name repeat) sub-pat ...] - [sub-pat ...])]) + #`(head origin name + [(inferred-id name repeat) sub-pat ...] + [])] + [(= (syntax-e #'min) 1) + #`(head origin name + [(inferred-id name repeat) sub-pat ...] + [sub-pat ...])] + [(= (syntax-e #'min) 2) + #`(head origin name + [(inferred-id name repeat) sub-pat ...] + [sub-pat ... sub-pat ...])]) + inferred-rules) + #;(cons (report/file #`(head origin name + [(inferred-id name repeat) sub-pat ...] + MIN-REPEAT-SUB-PATS)) inferred-rules)))] [(maybe sub-pat) (begin (define-values (inferred-rules new-sub-pats) (lift-nonprimitive-pattern #'sub-pat)) + (report*/file #'pat #'sub-pat) (with-syntax ([(sub-pat ...) new-sub-pats]) (cons #'(head origin name [sub-pat ...] diff --git a/brag/rules/lexer.rkt b/brag/rules/lexer.rkt index b5a485e..44e8d13 100755 --- a/brag/rules/lexer.rkt +++ b/brag/rules/lexer.rkt @@ -65,10 +65,8 @@ ["|" (token-PIPE lexeme)] [(:or "+" "*" - ;; todo: consolidate next two patterns? - (:: "{" (:* digit) "}") - (:: "{" (:* digit) "," (:* digit) "}")) - (token-REPEAT (let () (println lexeme) lexeme))] + (:: "{" (:* digit) (:? (:: "," (:* digit))) "}")) + (token-REPEAT lexeme)] [whitespace ;; Skip whitespace (return-without-pos (lex/1 input-port))] diff --git a/brag/rules/parser.rkt b/brag/rules/parser.rkt index 05cfa9a..93c794e 100755 --- a/brag/rules/parser.rkt +++ b/brag/rules/parser.rkt @@ -4,6 +4,7 @@ racket/list racket/match "rule-structs.rkt") +(require sugar/debug) ;; A parser for grammars. @@ -162,12 +163,16 @@ (pattern-repeat (position->pos $1-start-pos) (position->pos $2-end-pos) 1 $1)] - [(regexp-match #px"^\\{(\\d+)?,?(\\d+)?\\}$" $2) + [(regexp-match #px"^\\{(\\d+)?,?(\\d+)?\\}$" $2) ; "{min,max}" with both min & max optional => (λ (m) - (define pr (match m - [(list _ min max) (cons (and min (string->number min)) - (and max (string->number max)))])) - (error (format "~a" pr)))] + (match-define (cons min-repeat max-repeat) + (match m + [(list _ min max) (cons (if min (string->number min) 0) + (and max (string->number max)))])) + (report/file min-repeat) + (pattern-repeat (position->pos $1-start-pos) + (position->pos $2-end-pos) + min-repeat $1))] [else (error 'grammar-parse "unknown repetition operator ~e" $2)])] [(atomic-pattern) -- 2.25.1 From 6909f926b77a071a93e35c79dabfe8ba359283dd Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 10 Jun 2018 22:35:55 -0700 Subject: [PATCH 04/11] curly quantifier --- brag/codegen/codegen.rkt | 6 +-- brag/codegen/flatten.rkt | 86 +++++++++++++++++-------------------- brag/rules/parser.rkt | 12 +++--- brag/rules/rule-structs.rkt | 3 +- brag/rules/stx.rkt | 4 +- 5 files changed, 52 insertions(+), 59 deletions(-) diff --git a/brag/codegen/codegen.rkt b/brag/codegen/codegen.rkt index 14d6697..e2381f8 100755 --- a/brag/codegen/codegen.rkt +++ b/brag/codegen/codegen.rkt @@ -301,7 +301,7 @@ [explicit explicit]) ([v (in-list (syntax->list #'(vals ...)))]) (loop v implicit explicit))] - [(repeat min val) + [(repeat min max val) (loop #'val implicit explicit)] [(maybe val) (loop #'val implicit explicit)] @@ -379,7 +379,7 @@ (for/fold ([acc acc]) ([v (in-list (syntax->list #'(vals ...)))]) (loop v acc))] - [(repeat min val) + [(repeat min max val) (loop #'val acc)] [(maybe val) (loop #'val acc)] @@ -425,7 +425,7 @@ (define a-child (process-pattern v)) (sat:add-child! an-or-node a-child)) an-or-node)] - [(repeat min val) + [(repeat min max val) (syntax-case #'min () [0 (make-leaf)] diff --git a/brag/codegen/flatten.rkt b/brag/codegen/flatten.rkt index aeb1902..89ab7b2 100755 --- a/brag/codegen/flatten.rkt +++ b/brag/codegen/flatten.rkt @@ -2,7 +2,6 @@ (require brag/rules/stx-types racket/list (for-syntax racket/base)) -(require (except-in sugar/debug repeat)) (provide flatten-rule flatten-rules prim-rule) @@ -69,78 +68,73 @@ (values (apply append (reverse rules)) (apply append (reverse patterns)))) - (with-syntax ([head (if inferred? #'inferred-prim-rule #'prim-rule)] - [origin (syntax-case a-rule (rule) [(rule name (pat-head rest ...)) #'pat-head])]) + (with-syntax ([HEAD (if inferred? #'inferred-prim-rule #'prim-rule)] + [ORIGIN (syntax-case a-rule (rule) [(rule name (pat-head rest ...)) #'pat-head])]) (syntax-case a-rule (rule) - [(rule name pat) - (syntax-case #'pat (id inferred-id lit token choice repeat maybe seq) + [(rule NAME PAT) + (syntax-case #'PAT (id inferred-id lit token choice repeat maybe seq) ;; The primitive types stay as they are: [(id val) - (list #'(head origin name [pat]))] + (list #'(HEAD ORIGIN NAME [PAT]))] [(inferred-id val reason) - (list #'(head origin name [pat]))] + (list #'(HEAD ORIGIN NAME [PAT]))] [(lit val) - (list #'(head origin name [pat]))] + (list #'(HEAD ORIGIN NAME [PAT]))] [(token val) - (list #'(head origin name [pat]))] + (list #'(HEAD ORIGIN NAME [PAT]))] ;; Everything else might need lifting: - [(choice sub-pat ...) + [(choice SUB-PAT ...) (begin (define-values (inferred-ruless/rev new-sub-patss/rev) (for/fold ([rs '()] [ps '()]) - ([p (syntax->list #'(sub-pat ...))]) + ([p (syntax->list #'(SUB-PAT ...))]) (let-values ([(new-r new-p) (lift-nonprimitive-pattern p)]) (values (cons new-r rs) (cons new-p ps))))) - (with-syntax ([((sub-pat ...) ...) (reverse new-sub-patss/rev)]) - (append (list #'(head origin name [sub-pat ...] ...)) + (with-syntax ([((SUB-PAT ...) ...) (reverse new-sub-patss/rev)]) + (append (list #'(HEAD ORIGIN NAME [SUB-PAT ...] ...)) (apply append (reverse inferred-ruless/rev)))))] - [(repeat min sub-pat) + [(repeat MIN #f SUB-PAT) + ;; indefinite repeat (begin (define-values (inferred-rules new-sub-pats) - (lift-nonprimitive-pattern #'sub-pat)) - (report/file (syntax-e #'min)) - (with-syntax ([(sub-pat ...) new-sub-pats] - [MIN-REPEAT-SUB-PATS (apply append (make-list (syntax-e #'min) new-sub-pats))]) - (cons (cond [(= (syntax-e #'min) 0) - #`(head origin name - [(inferred-id name repeat) sub-pat ...] - [])] - [(= (syntax-e #'min) 1) - #`(head origin name - [(inferred-id name repeat) sub-pat ...] - [sub-pat ...])] - [(= (syntax-e #'min) 2) - #`(head origin name - [(inferred-id name repeat) sub-pat ...] - [sub-pat ... sub-pat ...])]) - inferred-rules) - #;(cons (report/file #`(head origin name - [(inferred-id name repeat) sub-pat ...] - MIN-REPEAT-SUB-PATS)) + (lift-nonprimitive-pattern #'SUB-PAT)) + (with-syntax ([(SUB-PAT ...) new-sub-pats] + [MIN-REPEAT-SUB-PATS (apply append (make-list (syntax-e #'MIN) new-sub-pats))]) + (cons #`(HEAD ORIGIN NAME + [(inferred-id NAME repeat) SUB-PAT ...] + MIN-REPEAT-SUB-PATS) inferred-rules)))] - - [(maybe sub-pat) + [(repeat MIN MAX SUB-PAT) + ;; finite repeat + (let ([min (syntax-e #'MIN)] + [max (syntax-e #'MAX)]) + (recur + (with-syntax ([(MIN-SUBPAT ...) (make-list min #'SUB-PAT)] + [(EXTRA-SUBPAT ...) (make-list (- max min) #'SUB-PAT)]) + #'(rule NAME (seq MIN-SUBPAT ... (maybe EXTRA-SUBPAT) ...))) + #f))] + + [(maybe SUB-PAT) (begin (define-values (inferred-rules new-sub-pats) - (lift-nonprimitive-pattern #'sub-pat)) - (report*/file #'pat #'sub-pat) - (with-syntax ([(sub-pat ...) new-sub-pats]) - (cons #'(head origin name - [sub-pat ...] + (lift-nonprimitive-pattern #'SUB-PAT)) + (with-syntax ([(SUB-PAT ...) new-sub-pats]) + (cons #'(HEAD ORIGIN NAME + [SUB-PAT ...] []) inferred-rules)))] - [(seq sub-pat ...) + [(seq SUB-PAT ...) (begin (define-values (inferred-rules new-sub-pats) - (lift-nonprimitive-patterns (syntax->list #'(sub-pat ...)))) - (with-syntax ([(sub-pat ...) new-sub-pats]) - (cons #'(head origin name [sub-pat ...]) + (lift-nonprimitive-patterns (syntax->list #'(SUB-PAT ...)))) + (with-syntax ([(SUB-PAT ...) new-sub-pats]) + (cons #'(HEAD ORIGIN NAME [SUB-PAT ...]) inferred-rules)))])])))) @@ -174,7 +168,7 @@ #t] [(choice sub-pat ...) #f] - [(repeat min val) + [(repeat min max val) #f] [(maybe sub-pat) #f] diff --git a/brag/rules/parser.rkt b/brag/rules/parser.rkt index 93c794e..75f33ad 100755 --- a/brag/rules/parser.rkt +++ b/brag/rules/parser.rkt @@ -4,7 +4,6 @@ racket/list racket/match "rule-structs.rkt") -(require sugar/debug) ;; A parser for grammars. @@ -158,21 +157,20 @@ (cond [(string=? $2 "*") (pattern-repeat (position->pos $1-start-pos) (position->pos $2-end-pos) - 0 $1)] + 0 #f $1)] [(string=? $2 "+") (pattern-repeat (position->pos $1-start-pos) (position->pos $2-end-pos) - 1 $1)] + 1 #f $1)] [(regexp-match #px"^\\{(\\d+)?,?(\\d+)?\\}$" $2) ; "{min,max}" with both min & max optional => (λ (m) (match-define (cons min-repeat max-repeat) (match m [(list _ min max) (cons (if min (string->number min) 0) (and max (string->number max)))])) - (report/file min-repeat) (pattern-repeat (position->pos $1-start-pos) (position->pos $2-end-pos) - min-repeat $1))] + min-repeat max-repeat $1))] [else (error 'grammar-parse "unknown repetition operator ~e" $2)])] [(atomic-pattern) @@ -233,8 +231,8 @@ (pattern-lit start-pos end-pos v (or hide? h))] [(pattern-choice _ _ vs) (pattern-choice start-pos end-pos vs)] - [(pattern-repeat _ _ m v) - (pattern-repeat start-pos end-pos m v)] + [(pattern-repeat _ _ min max v) + (pattern-repeat start-pos end-pos min max v)] [(pattern-maybe _ _ v) (pattern-maybe start-pos end-pos v)] [(pattern-seq _ _ vs) diff --git a/brag/rules/rule-structs.rkt b/brag/rules/rule-structs.rkt index 5b5968e..262de1b 100755 --- a/brag/rules/rule-structs.rkt +++ b/brag/rules/rule-structs.rkt @@ -31,7 +31,8 @@ (struct pattern-choice pattern (vals) #:transparent) -(struct pattern-repeat pattern (min ;; either 0 or 1 +(struct pattern-repeat pattern (min + max val) #:transparent) diff --git a/brag/rules/stx.rkt b/brag/rules/stx.rkt index 9c66685..9d5f5dd 100755 --- a/brag/rules/stx.rkt +++ b/brag/rules/stx.rkt @@ -80,8 +80,8 @@ 'hide hide)] [(struct pattern-choice (start end vals)) (datum->syntax #f`(choice ,@(map recur vals)) source-location)] - [(struct pattern-repeat (start end min val)) - (datum->syntax #f`(repeat ,min ,(recur val)) source-location)] + [(struct pattern-repeat (start end min max val)) + (datum->syntax #f`(repeat ,min ,max ,(recur val)) source-location)] [(struct pattern-maybe (start end val)) (datum->syntax #f`(maybe ,(recur val)) source-location)] [(struct pattern-seq (start end vals)) -- 2.25.1 From 98cb84bdcc0fa787ad253460a154a2df818ea0b6 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 10 Jun 2018 22:36:52 -0700 Subject: [PATCH 05/11] curly colors --- brag/private/colorer.rkt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/brag/private/colorer.rkt b/brag/private/colorer.rkt index 8b71244..eb2449a 100644 --- a/brag/private/colorer.rkt +++ b/brag/private/colorer.rkt @@ -9,7 +9,7 @@ ;; (for DrRacket selections etc) [whitespace (token 'WHITE lexeme)] [(:or (from/to "'" "'") (from/to "\"" "\"")) (token 'LIT lexeme)] - [(:or (char-set "()[]|+*:") hide-char splice-char) (token 'MISC lexeme)] + [(:or (char-set "()[]{}|+*:") hide-char splice-char) (token 'MISC lexeme)] [(:seq (:or "#" ";") (complement (:seq (:* any-char) NL (:* any-char))) (:or NL "")) (token 'COMMENT lexeme)] [id (token 'ID lexeme)] [any-char (token 'OTHER lexeme)])) -- 2.25.1 From 46ddd5cfd44f5e6a356991dd48bc919fafa6e723 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 10 Jun 2018 22:53:53 -0700 Subject: [PATCH 06/11] refac --- brag/codegen/flatten.rkt | 16 +++++++++------- brag/rules/parser.rkt | 4 ++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/brag/codegen/flatten.rkt b/brag/codegen/flatten.rkt index 89ab7b2..74f8a0a 100755 --- a/brag/codegen/flatten.rkt +++ b/brag/codegen/flatten.rkt @@ -109,15 +109,17 @@ [(inferred-id NAME repeat) SUB-PAT ...] MIN-REPEAT-SUB-PATS) inferred-rules)))] + [(repeat MIN MAX SUB-PAT) ;; finite repeat - (let ([min (syntax-e #'MIN)] - [max (syntax-e #'MAX)]) - (recur - (with-syntax ([(MIN-SUBPAT ...) (make-list min #'SUB-PAT)] - [(EXTRA-SUBPAT ...) (make-list (- max min) #'SUB-PAT)]) - #'(rule NAME (seq MIN-SUBPAT ... (maybe EXTRA-SUBPAT) ...))) - #f))] + (begin + (define min (syntax-e #'MIN)) + (define max (syntax-e #'MAX)) + (define new-rule-stx (with-syntax ([(MIN-SUBPAT ...) (make-list min #'SUB-PAT)] + [(EXTRA-SUBPAT ...) (make-list (- max min) #'SUB-PAT)]) + ;; has to keep the same name to work correctly + #'(rule NAME (seq MIN-SUBPAT ... (maybe EXTRA-SUBPAT) ...)))) + (recur new-rule-stx #f))] [(maybe SUB-PAT) (begin diff --git a/brag/rules/parser.rkt b/brag/rules/parser.rkt index 75f33ad..a239eb0 100755 --- a/brag/rules/parser.rkt +++ b/brag/rules/parser.rkt @@ -164,9 +164,9 @@ 1 #f $1)] [(regexp-match #px"^\\{(\\d+)?,?(\\d+)?\\}$" $2) ; "{min,max}" with both min & max optional => (λ (m) - (match-define (cons min-repeat max-repeat) + (match-define (list min-repeat max-repeat) (match m - [(list _ min max) (cons (if min (string->number min) 0) + [(list _ min max) (list (if min (string->number min) 0) (and max (string->number max)))])) (pattern-repeat (position->pos $1-start-pos) (position->pos $2-end-pos) -- 2.25.1 From 83f16034b079ac7fc33c23b3cca283b359973478 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 10 Jun 2018 23:19:34 -0700 Subject: [PATCH 07/11] handle exact quantifier --- brag/rules/parser.rkt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/brag/rules/parser.rkt b/brag/rules/parser.rkt index a239eb0..f51f9f6 100755 --- a/brag/rules/parser.rkt +++ b/brag/rules/parser.rkt @@ -162,12 +162,17 @@ (pattern-repeat (position->pos $1-start-pos) (position->pos $2-end-pos) 1 #f $1)] - [(regexp-match #px"^\\{(\\d+)?,?(\\d+)?\\}$" $2) ; "{min,max}" with both min & max optional + [(regexp-match #px"^\\{(\\d+)?(,)?(\\d+)?\\}$" $2) ; "{min,max}" with both min & max optional => (λ (m) (match-define (list min-repeat max-repeat) (match m - [(list _ min max) (list (if min (string->number min) 0) - (and max (string->number max)))])) + [(list _ min range? max) (let ([min (if min (string->number min) 0)]) + (list + min + (cond + [(and range? max) (string->number max)] + [(and (not range?) (not max)) min] ; {3} -> {3,3} + [(not max) #f])))])) (pattern-repeat (position->pos $1-start-pos) (position->pos $2-end-pos) min-repeat max-repeat $1))] -- 2.25.1 From 57f896cd187f28890661810cc573c10f31bd4c29 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Mon, 11 Jun 2018 10:32:14 -0700 Subject: [PATCH 08/11] update-implies --- info.rkt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/info.rkt b/info.rkt index 000d84a..0b408bf 100755 --- a/info.rkt +++ b/info.rkt @@ -1,8 +1,13 @@ #lang info (define version "1.0") -(define deps '("base" "br-parser-tools-lib" "rackunit-lib")) -(define build-deps '("at-exp-lib" "br-parser-tools-doc" "racket-doc" - "scribble-lib")) (define collection 'multi) +(define deps '("base" + "br-parser-tools-lib" + "rackunit-lib")) +(define build-deps '("at-exp-lib" + "br-parser-tools-doc" + "racket-doc" + "scribble-lib")) +(define update-implies '("br-parser-tools-lib")) -- 2.25.1 From 0b6973b8309ebcd251fef25a9af51971d153860f Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Mon, 11 Jun 2018 10:54:17 -0700 Subject: [PATCH 09/11] add quantifier to docs --- brag/brag.scrbl | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/brag/brag.scrbl b/brag/brag.scrbl index fb143b2..1ca918e 100755 --- a/brag/brag.scrbl +++ b/brag/brag.scrbl @@ -231,7 +231,7 @@ We write a @tt{brag} program as an BNF grammar, where patterns can be: @itemize[ @item{the names of other rules (e.g. @racket[chunk])} @item{literal and symbolic token names (e.g. @racket[";"], @racket[INTEGER])} - @item{quantified patterns (e.g. @litchar{+} to represent one-or-more repetitions)} + @item{quantified patterns (e.g. @litchar{+} to represent one or more repetitions)} ] The result of a @tt{brag} program is a module with a @racket[parse] function that can parse tokens and produce a syntax object as a result. @@ -661,35 +661,41 @@ A program in the @tt{brag} language consists of the language line @litchar{#lang brag}, followed by a collection of @tech{rule}s and @tech{line comment}s. -A @deftech{rule} is a sequence consisting of: a @tech{rule identifier}, a colon -@litchar{":"}, and a @tech{pattern}. +A @deftech{rule} is a sequence consisting of: a @tech{rule identifier}, a separator (either @litchar{":"} or @litchar{"::="}), and a @tech{pattern}. A @deftech{rule identifier} is an @tech{identifier} that is not in upper case. A @deftech{symbolic token identifier} is an @tech{identifier} that is in upper case. -An @deftech{identifier} is a character sequence of letters, numbers, and -characters in @racket["-.!$%&/<=>?^_~@"]. It must not contain -@litchar{*} or @litchar{+}, as those characters are used to denote -quantification. +An @deftech{identifier} is a sequence of letters, numbers, or +characters in the set @racket["-.!$%&/<=>?^_~@"]. It must not contain +@litchar{*}, @litchar{+}, or @litchar|{{}| and @litchar|{}}|, as those characters are used to denote quantification. A @deftech{pattern} is one of the following: @itemize[ @item{an implicit sequence of @tech{pattern}s separated by whitespace} - @item{a terminal: either a literal string or a @tech{symbolic token identifier}. + @item{a @deftech{terminal}: either a literal string or a @tech{symbolic token identifier}. - When used in a pattern, both these terminals will match the same set of inputs. A literal string can match the string itself, or a @racket[token] structure whose type field contains that string (or its symbol form). So @racket["FOO"] would match @racket["FOO"], @racket[(token "FOO" "bar")], or @racket[(token 'FOO "bar")]. A symbolic token identifier can also match the string version of the identifier, or a @racket[token] whose type field is the symbol or string form of the identifier. So @racket[FOO] would also match @racket["FOO"], @racket[(token 'FOO "bar")], or @racket[(token "FOO" "bar")]. (In every case, the value of a token, like @racket["bar"], can be anything, and may or may not be the same as its type.) + When used in a pattern, both kinds of terminals will match the same set of inputs. - Because their underlying meanings are the same, the symbolic token identifier ends up being a notational convenience for readability inside a grammar pattern. Typically, the literal string @racket["FOO"] is used to connote ``match the string @racket["FOO"] exactly'' and the symbolic token identifier @racket[FOO] specially connotes ``match any token of type @racket['FOO]''. + A literal string can match the string itself, or a @racket[token] structure whose type field contains that string (or its symbol form). So @racket["FOO"] in a rule pattern would match the tokens @racket["FOO"], @racket[(token "FOO" "bar")], or @racket[(token 'FOO "bar")]. + + A symbolic token identifier can also match the string version of the identifier, or a @racket[token] whose type field is the symbol or string form of the identifier. So @racket[FOO] in a rule pattern would @emph{also} match the tokens @racket["FOO"], @racket[(token 'FOO "bar")], or @racket[(token "FOO" "bar")]. (In every case, the value of a token, like @racket["bar"], can be anything, and may or may not be the same as its type.) + + Because their underlying meanings are the same, the symbolic token identifier ends up being a notational convenience for readability inside a rule pattern. Typically, the literal string @racket["FOO"] is used to connote ``match the string @racket["FOO"] exactly'' and the symbolic token identifier @racket[FOO] specially connotes ``match a token of type @racket['FOO]''. You @bold{cannot} use the literal string @racket["error"] as a terminal in a grammar, because it's reserved for @tt{brag}. You can, however, adjust your lexer to package it inside a token structure — say, @racket[(token 'ERROR "error")] — and then use the symbolic token identifier @racket[ERROR] in the grammar to match this token structure. } @item{a @tech{rule identifier}} + @item{a @deftech{choice pattern}: a sequence of @tech{pattern}s delimited with @litchar{|} characters.} - @item{a @deftech{quantifed pattern}: a @tech{pattern} followed by either @litchar{*} (``zero or more'') or @litchar{+} (``one or more'')} + + @item{a @deftech{quantified pattern}: a @tech{pattern} followed by either @litchar{*} (``zero or more'') or @litchar{+} (``one or more''). Quantification can also be denoted by integers within curly brackets. So @litchar|{{2}}| means ``exactly 2''; @litchar|{{2,5}}| means ``between 2 and 5, inclusive''; @litchar|{{2,}}| means ``2 or more''; and @litchar|{{,5}}| means ``up to 5''.} + @item{an @deftech{optional pattern}: a @tech{pattern} surrounded by @litchar{[} and @litchar{]}} + @item{an explicit sequence: a @tech{pattern} surrounded by @litchar{(} and @litchar{)}}] A @deftech{line comment} begins with either @litchar{#} or @litchar{;} and @@ -931,7 +937,7 @@ bindings. The most important of these is @racket[parse]: @item{For terminals, the value of the token.} @item{For @tech{rule identifier}s: the associated parse value for the rule.} @item{For @tech{choice pattern}s: the associated parse value for one of the matching subpatterns.} - @item{For @tech{quantifed pattern}s and @tech{optional pattern}s: the corresponding values, spliced into the structure.} + @item{For @tech{quantified pattern}s and @tech{optional pattern}s: the corresponding values, spliced into the structure.} ] Consequently, it's only the presence of @tech{rule identifier}s in a rule's -- 2.25.1 From eb98d107f3b474e792bf9825916c2ee6363b8469 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Mon, 11 Jun 2018 10:54:33 -0700 Subject: [PATCH 10/11] put curlies into reserved chars --- brag/rules/lexer.rkt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/brag/rules/lexer.rkt b/brag/rules/lexer.rkt index 44e8d13..9aaec3d 100755 --- a/brag/rules/lexer.rkt +++ b/brag/rules/lexer.rkt @@ -13,8 +13,8 @@ ;; A newline can be any one of the following. (define-lex-abbrev NL (:or "\r\n" "\r" "\n")) -;; chars used for quantifiers & parse-tree filtering -(define-for-syntax quantifiers "+:*") ; colon is reserved to separate rules and productions +;; reserved-chars = chars used for quantifiers & parse-tree filtering +(define-for-syntax quantifiers "+:*{}") ; colon is reserved to separate rules and productions (define-lex-trans reserved-chars (λ(stx) #`(char-set #,(format "~a~a~a" quantifiers hide-char splice-char)))) -- 2.25.1 From 48226fa7c9749a1a62532251ade401aace4b7d33 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Mon, 11 Jun 2018 10:59:09 -0700 Subject: [PATCH 11/11] tests --- brag/examples/curly-quantifier.rkt | 7 +++++++ brag/test/test-curly-quantifier.rkt | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 brag/examples/curly-quantifier.rkt create mode 100755 brag/test/test-curly-quantifier.rkt diff --git a/brag/examples/curly-quantifier.rkt b/brag/examples/curly-quantifier.rkt new file mode 100644 index 0000000..7f0d961 --- /dev/null +++ b/brag/examples/curly-quantifier.rkt @@ -0,0 +1,7 @@ +#lang brag +;; test the curly quantifier +start : a-rule | b-rule | c-rule | d-rule +a-rule : "a"{2} ; exactly 2 +b-rule : "b"{,2} ; up to 2 +c-rule : "c"{2,} ; 2 or more +d-rule : "d"{2,3} ; 2 or 3 \ No newline at end of file diff --git a/brag/test/test-curly-quantifier.rkt b/brag/test/test-curly-quantifier.rkt new file mode 100755 index 0000000..c531368 --- /dev/null +++ b/brag/test/test-curly-quantifier.rkt @@ -0,0 +1,23 @@ +#lang racket/base +(require brag/examples/curly-quantifier + brag/support + rackunit) + +(check-exn exn:fail:parsing? (λ () (parse-to-datum "a"))) +(check-equal? (parse-to-datum "aa") '(start (a-rule "a" "a"))) +(check-exn exn:fail:parsing? (λ () (parse-to-datum "aaa"))) + +(check-equal? (parse-to-datum "") '(start (b-rule))) +(check-equal? (parse-to-datum "b") '(start (b-rule "b"))) +(check-equal? (parse-to-datum "bb") '(start (b-rule "b" "b"))) +(check-exn exn:fail:parsing? (λ () (parse-to-datum "bbb"))) + +(check-exn exn:fail:parsing? (λ () (parse-to-datum "c"))) +(check-equal? (parse-to-datum "cc") '(start (c-rule "c" "c"))) +(check-equal? (parse-to-datum "ccc") '(start (c-rule "c" "c" "c"))) +(check-equal? (parse-to-datum "cccc") '(start (c-rule "c" "c" "c" "c"))) + +(check-exn exn:fail:parsing? (λ () (parse-to-datum "d"))) +(check-equal? (parse-to-datum "dd") '(start (d-rule "d" "d"))) +(check-equal? (parse-to-datum "ddd") '(start (d-rule "d" "d" "d"))) +(check-exn exn:fail:parsing? (λ () (parse-to-datum "dddd"))) -- 2.25.1