From c1b9497b3374d85f860763c3a15c4a4c15265592 Mon Sep 17 00:00:00 2001
From: Matthew Butterick <mb@mbtype.com>
Date: Sat, 4 Jun 2016 12:35:28 -0700
Subject: [PATCH] start adding keyword arg support to brag (broken)

---
 brag/brag/examples/baby-json-hider.rkt  |  2 +-
 brag/brag/rules/lexer.rkt               | 46 +++++++++++++++----------
 brag/brag/rules/parser.rkt              |  3 ++
 brag/brag/test/test-baby-json-hider.rkt |  6 ++--
 brag/brag/test/test-lexer.rkt           | 15 ++++++++
 5 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/brag/brag/examples/baby-json-hider.rkt b/brag/brag/examples/baby-json-hider.rkt
index b1bdfc8..769ecfd 100755
--- a/brag/brag/examples/baby-json-hider.rkt
+++ b/brag/brag/examples/baby-json-hider.rkt
@@ -1,10 +1,10 @@
 #lang brag
+#:prefix-out my:
 
 ;; Simple baby example of JSON structure
 json: number | string
     | array
     | @object
-
 number: NUMBER
 
 string: STRING
diff --git a/brag/brag/rules/lexer.rkt b/brag/brag/rules/lexer.rkt
index df4c9e8..b83cf25 100755
--- a/brag/brag/rules/lexer.rkt
+++ b/brag/brag/rules/lexer.rkt
@@ -3,7 +3,8 @@
 (require parser-tools/lex
          (prefix-in : parser-tools/lex-sre)
          "parser.rkt"
-         "rule-structs.rkt")
+         "rule-structs.rkt"
+         racket/string)
 
 (provide lex/1 tokenize)
 
@@ -11,20 +12,22 @@
 (define-lex-abbrev NL (:or "\r\n" "\r" "\n"))
 
 ;; chars used for quantifiers & parse-tree filtering
-(define-for-syntax quantifiers "+:*")
+(define-for-syntax quantifiers "+:*") ; colon is reserved to separate rules and productions
 (define-lex-trans reserved-chars
   (λ(stx) #`(char-set #,(format "~a~a~a" quantifiers hide-char splice-char))))
 
-(define-lex-abbrevs
-   [letter (:or (:/ "a" "z") (:/ #\A #\Z))]
-   [digit (:/ #\0 #\9)]
-   [id-char (:or letter digit (:& (char-set "+:*@!-.$%&/=?^_~<>") (char-complement (reserved-chars))))]
- )
+(define-lex-trans hide-char-trans (λ(stx) #`(char-set #,(format "~a" hide-char))))
+(define-lex-trans splice-char-trans (λ(stx) #`(char-set #,(format "~a" splice-char))))
 
-(define-lex-abbrev id
-  (:& (complement (:+ digit))
-      (:+ id-char)))
+(define-lex-abbrevs
+  [letter (:or (:/ "a" "z") (:/ #\A #\Z))]
+  [digit (:/ #\0 #\9)]
+  [id-char (:or letter digit (:& (char-set "+:*@!-.$%&/=?^_~<>") (char-complement (reserved-chars))))]
+  [hide-char (hide-char-trans)]
+  [splice-char (splice-char-trans)]
+  )
 
+(define-lex-abbrev id (:& (complement (:+ digit)) (:+ id-char)))
 
 (define lex/1
   (lexer-src-pos
@@ -44,9 +47,9 @@
     (token-RPAREN lexeme)]
    ["]"
     (token-RBRACKET lexeme)]
-   ["/"
+   [hide-char
     (token-HIDE lexeme)]
-   ["@"
+   [splice-char
     (token-SPLICE lexeme)]
    ["|"
     (token-PIPE lexeme)]
@@ -56,18 +59,25 @@
     ;; Skip whitespace
     (return-without-pos (lex/1 input-port))]
    ;; Skip comments up to end of line
-   [(:: (:or "#" ";")
+   ;; but detect possble kwargs.
+   [(:: (:or "#" ";") ; remove # as comment char
         (complement (:: (:* any-char) NL (:* any-char)))
         (:or NL ""))
-    ;; Skip comments up to end of line.
-    (return-without-pos (lex/1 input-port))]
+    (let ([maybe-kwarg-match (regexp-match #px"^#:(.*?)\\s*(.*?)$" lexeme)])
+      (when maybe-kwarg-match
+        (let* ([parts (map string->symbol (string-split (string-trim lexeme "#:" #:right? #f)))]
+               [kw (car parts)][val (cadr parts)])
+          (case kw
+            [(prefix-out) (current-prefix-out val)]
+            [else (error 'lexer (format "got unknown keyword ~a" kw))])))
+      (return-without-pos (lex/1 input-port)))]
    [(eof)
     (token-EOF lexeme)]
    [(:: id (:* whitespace) ":")
     (token-RULE_HEAD lexeme)]
-   [(:: "/" id (:* whitespace) ":")
+   [(:: hide-char id (:* whitespace) ":")
     (token-RULE_HEAD_HIDDEN lexeme)]
-   [(:: "@" id (:* whitespace) ":")
+   [(:: splice-char id (:* whitespace) ":")
     (token-RULE_HEAD_SPLICED lexeme)]
    [id
     (token-ID lexeme)]
@@ -75,7 +85,7 @@
    ;; We call the error handler for everything else:
    [(:: any-char)
     (let-values ([(rest-of-text end-pos-2)
-                 (lex-nonwhitespace input-port)])
+                  (lex-nonwhitespace input-port)])
       ((current-parser-error-handler)
        #f
        'error
diff --git a/brag/brag/rules/parser.rkt b/brag/brag/rules/parser.rkt
index b04983c..3c47886 100755
--- a/brag/brag/rules/parser.rkt
+++ b/brag/brag/rules/parser.rkt
@@ -28,6 +28,7 @@
          
          current-source
          current-parser-error-handler
+         current-prefix-out
          
          [struct-out rule]
          [struct-out lhs-id]
@@ -250,6 +251,8 @@
 ;; During parsing, we should define the source of the input.
 (define current-source (make-parameter #f))
 
+(define current-prefix-out (make-parameter #f))
+
 
 ;; When bad things happen, we need to emit errors with source location.
 (struct exn:fail:parse-grammar exn:fail (srclocs)
diff --git a/brag/brag/test/test-baby-json-hider.rkt b/brag/brag/test/test-baby-json-hider.rkt
index b7580d6..8f12259 100755
--- a/brag/brag/test/test-baby-json-hider.rkt
+++ b/brag/brag/test/test-baby-json-hider.rkt
@@ -8,12 +8,12 @@
                                   ":"
                                   (token 'STRING "'hello world'")
                                   "}")))
-(check-equal? (syntax->datum parse-result) '(json (":")))
+(check-equal? (syntax->datum parse-result) '(my:json (":")))
 
 (define syntaxed-colon-parens (cadr (syntax->list parse-result)))
-(check-equal? (syntax->datum (syntax-property syntaxed-colon-parens 'kvpair)) 'kvpair)
+(check-equal? (syntax->datum (syntax-property syntaxed-colon-parens 'my:kvpair)) 'my:kvpair)
 
 (check-equal? 
  (syntax->datum
   (parse "[[[{}]],[],[[{}]]]"))
- '(json (array #\[ (json (array #\[ (json (array #\[ (json) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json) #\])) #\])) #\])))
+ '(my:json (my:array #\[ (my:json (my:array #\[ (my:json (my:array #\[ (my:json) #\])) #\])) #\, (my:json (my:array #\[ #\])) #\, (my:json (my:array #\[ (my:json (my:array #\[ (my:json) #\])) #\])) #\])))
diff --git a/brag/brag/test/test-lexer.rkt b/brag/brag/test/test-lexer.rkt
index a79f1f9..6698b65 100755
--- a/brag/brag/test/test-lexer.rkt
+++ b/brag/brag/test/test-lexer.rkt
@@ -56,3 +56,18 @@
 
 (check-equal? (l "'he\\'llo'")
               '(LIT "'he\\'llo'" 1 10))
+
+(check-equal? (l "/")
+              '(HIDE "/" 1 2))
+
+(check-equal? (l " /")
+              '(HIDE "/" 2 3))
+
+(check-equal? (l "@")
+              '(SPLICE "@" 1 2))
+
+(check-equal? (l " @")
+              '(SPLICE "@" 2 3))
+
+(check-equal? (l "#:prefix-out val:")
+              (list 'EOF eof 18 18)) ; lexer skips kwarg
\ No newline at end of file