From c1b9497b3374d85f860763c3a15c4a4c15265592 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sat, 4 Jun 2016 12:35:28 -0700 Subject: [PATCH] start adding keyword arg support to brag (broken) --- brag/brag/examples/baby-json-hider.rkt | 2 +- brag/brag/rules/lexer.rkt | 46 +++++++++++++++---------- brag/brag/rules/parser.rkt | 3 ++ brag/brag/test/test-baby-json-hider.rkt | 6 ++-- brag/brag/test/test-lexer.rkt | 15 ++++++++ 5 files changed, 50 insertions(+), 22 deletions(-) diff --git a/brag/brag/examples/baby-json-hider.rkt b/brag/brag/examples/baby-json-hider.rkt index b1bdfc8..769ecfd 100755 --- a/brag/brag/examples/baby-json-hider.rkt +++ b/brag/brag/examples/baby-json-hider.rkt @@ -1,10 +1,10 @@ #lang brag +#:prefix-out my: ;; Simple baby example of JSON structure json: number | string | array | @object - number: NUMBER string: STRING diff --git a/brag/brag/rules/lexer.rkt b/brag/brag/rules/lexer.rkt index df4c9e8..b83cf25 100755 --- a/brag/brag/rules/lexer.rkt +++ b/brag/brag/rules/lexer.rkt @@ -3,7 +3,8 @@ (require parser-tools/lex (prefix-in : parser-tools/lex-sre) "parser.rkt" - "rule-structs.rkt") + "rule-structs.rkt" + racket/string) (provide lex/1 tokenize) @@ -11,20 +12,22 @@ (define-lex-abbrev NL (:or "\r\n" "\r" "\n")) ;; chars used for quantifiers & parse-tree filtering -(define-for-syntax quantifiers "+:*") +(define-for-syntax quantifiers "+:*") ; colon is reserved to separate rules and productions (define-lex-trans reserved-chars (λ(stx) #`(char-set #,(format "~a~a~a" quantifiers hide-char splice-char)))) -(define-lex-abbrevs - [letter (:or (:/ "a" "z") (:/ #\A #\Z))] - [digit (:/ #\0 #\9)] - [id-char (:or letter digit (:& (char-set "+:*@!-.$%&/=?^_~<>") (char-complement (reserved-chars))))] - ) +(define-lex-trans hide-char-trans (λ(stx) #`(char-set #,(format "~a" hide-char)))) +(define-lex-trans splice-char-trans (λ(stx) #`(char-set #,(format "~a" splice-char)))) -(define-lex-abbrev id - (:& (complement (:+ digit)) - (:+ id-char))) +(define-lex-abbrevs + [letter (:or (:/ "a" "z") (:/ #\A #\Z))] + [digit (:/ #\0 #\9)] + [id-char (:or letter digit (:& (char-set "+:*@!-.$%&/=?^_~<>") (char-complement (reserved-chars))))] + [hide-char (hide-char-trans)] + [splice-char (splice-char-trans)] + ) +(define-lex-abbrev id (:& (complement (:+ digit)) (:+ id-char))) (define lex/1 (lexer-src-pos @@ -44,9 +47,9 @@ (token-RPAREN lexeme)] ["]" (token-RBRACKET lexeme)] - ["/" + [hide-char (token-HIDE lexeme)] - ["@" + [splice-char (token-SPLICE lexeme)] ["|" (token-PIPE lexeme)] @@ -56,18 +59,25 @@ ;; Skip whitespace (return-without-pos (lex/1 input-port))] ;; Skip comments up to end of line - [(:: (:or "#" ";") + ;; but detect possble kwargs. + [(:: (:or "#" ";") ; remove # as comment char (complement (:: (:* any-char) NL (:* any-char))) (:or NL "")) - ;; Skip comments up to end of line. - (return-without-pos (lex/1 input-port))] + (let ([maybe-kwarg-match (regexp-match #px"^#:(.*?)\\s*(.*?)$" lexeme)]) + (when maybe-kwarg-match + (let* ([parts (map string->symbol (string-split (string-trim lexeme "#:" #:right? #f)))] + [kw (car parts)][val (cadr parts)]) + (case kw + [(prefix-out) (current-prefix-out val)] + [else (error 'lexer (format "got unknown keyword ~a" kw))]))) + (return-without-pos (lex/1 input-port)))] [(eof) (token-EOF lexeme)] [(:: id (:* whitespace) ":") (token-RULE_HEAD lexeme)] - [(:: "/" id (:* whitespace) ":") + [(:: hide-char id (:* whitespace) ":") (token-RULE_HEAD_HIDDEN lexeme)] - [(:: "@" id (:* whitespace) ":") + [(:: splice-char id (:* whitespace) ":") (token-RULE_HEAD_SPLICED lexeme)] [id (token-ID lexeme)] @@ -75,7 +85,7 @@ ;; We call the error handler for everything else: [(:: any-char) (let-values ([(rest-of-text end-pos-2) - (lex-nonwhitespace input-port)]) + (lex-nonwhitespace input-port)]) ((current-parser-error-handler) #f 'error diff --git a/brag/brag/rules/parser.rkt b/brag/brag/rules/parser.rkt index b04983c..3c47886 100755 --- a/brag/brag/rules/parser.rkt +++ b/brag/brag/rules/parser.rkt @@ -28,6 +28,7 @@ current-source current-parser-error-handler + current-prefix-out [struct-out rule] [struct-out lhs-id] @@ -250,6 +251,8 @@ ;; During parsing, we should define the source of the input. (define current-source (make-parameter #f)) +(define current-prefix-out (make-parameter #f)) + ;; When bad things happen, we need to emit errors with source location. (struct exn:fail:parse-grammar exn:fail (srclocs) diff --git a/brag/brag/test/test-baby-json-hider.rkt b/brag/brag/test/test-baby-json-hider.rkt index b7580d6..8f12259 100755 --- a/brag/brag/test/test-baby-json-hider.rkt +++ b/brag/brag/test/test-baby-json-hider.rkt @@ -8,12 +8,12 @@ ":" (token 'STRING "'hello world'") "}"))) -(check-equal? (syntax->datum parse-result) '(json (":"))) +(check-equal? (syntax->datum parse-result) '(my:json (":"))) (define syntaxed-colon-parens (cadr (syntax->list parse-result))) -(check-equal? (syntax->datum (syntax-property syntaxed-colon-parens 'kvpair)) 'kvpair) +(check-equal? (syntax->datum (syntax-property syntaxed-colon-parens 'my:kvpair)) 'my:kvpair) (check-equal? (syntax->datum (parse "[[[{}]],[],[[{}]]]")) - '(json (array #\[ (json (array #\[ (json (array #\[ (json) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json) #\])) #\])) #\]))) + '(my:json (my:array #\[ (my:json (my:array #\[ (my:json (my:array #\[ (my:json) #\])) #\])) #\, (my:json (my:array #\[ #\])) #\, (my:json (my:array #\[ (my:json (my:array #\[ (my:json) #\])) #\])) #\]))) diff --git a/brag/brag/test/test-lexer.rkt b/brag/brag/test/test-lexer.rkt index a79f1f9..6698b65 100755 --- a/brag/brag/test/test-lexer.rkt +++ b/brag/brag/test/test-lexer.rkt @@ -56,3 +56,18 @@ (check-equal? (l "'he\\'llo'") '(LIT "'he\\'llo'" 1 10)) + +(check-equal? (l "/") + '(HIDE "/" 1 2)) + +(check-equal? (l " /") + '(HIDE "/" 2 3)) + +(check-equal? (l "@") + '(SPLICE "@" 1 2)) + +(check-equal? (l " @") + '(SPLICE "@" 2 3)) + +(check-equal? (l "#:prefix-out val:") + (list 'EOF eof 18 18)) ; lexer skips kwarg \ No newline at end of file