handle escape chars better

hide-top-rule-name
Matthew Butterick 6 years ago
parent 78183544c1
commit d952611293

@ -1,6 +1,8 @@
#lang brag
start: (tab | space | newline | letter)*
start: (tab | space | newline | letter | return | all)*
tab: '\t'
space: " "
newline: "\n"
return : "\r"
all : "\a" "\b" "\t" "\n" "\v" "\f" "\r" "\e"
letter: "x" | "y" | "z"

@ -32,26 +32,31 @@
(define-lex-abbrev id (:& (complement (:+ digit)) (:+ id-char)))
(define-lex-abbrev id-separator (:or ":" "::="))
(define-lex-abbrev esc-chars (char-set "\\a\\b\\t\\n\\v\\f\\r\\e"))
(define (escape-lexeme lexeme quote-char)
;; convert the literal string representation back into an escape char with lookup table
;; maybe use `read` instead?
(define escapes (hash "a" 7 "b" 8 "t" 9 "n" 10 "v" 11 "f" 12 "r" 13 "e" 27 "\"" 34 "'" 39))
(define pat (regexp (format "(?<=^~a\\\\).(?=~a$)" quote-char quote-char)))
(cond
[(regexp-match pat lexeme)
=> (λ (m) (string quote-char (integer->char (hash-ref escapes (car m))) quote-char))]
[else lexeme]))
(define lex/1
(lexer-src-pos
;; handle whitespace chars within quotes as literal tokens: "\n" "\t" '\n' '\t'
;; by matching the escaped version, and then unescaping them before they become token-LITs
[(:: "'"
(:* (:or "\\'" "\\n" "\\t" (:~ "'" "\\")))
(:* (:or "\\'" esc-chars (:~ "'" "\\")))
"'")
(token-LIT (case lexeme
[("'\\''") "\"'\""]
[("'\\n'") "'\n'"]
[("'\\t'") "'\t'"]
[else lexeme]))]
(token-LIT (escape-lexeme lexeme #\'))]
[(:: "\""
(:* (:or "\\\"" "\\n" "\\t" (:~ "\"" "\\")))
(:* (:or "\\\"" esc-chars (:~ "\"" "\\")))
"\"")
(token-LIT (case lexeme
[("\"\\\"\"") "\"\"\""]
[("\"\\n\"") "\"\n\""]
[("\"\\t\"") "\"\t\""]
[else lexeme]))]
(token-LIT (escape-lexeme lexeme #\"))]
["("
(token-LPAREN lexeme)]
["["

@ -4,9 +4,13 @@
rackunit)
(check-equal?
(parse-to-datum "\ty\n x\tz")
'(start (tab "\t") (letter "y") (newline "\n") (space " ") (letter "x") (tab "\t") (letter "z")))
(parse-to-datum "\ty\n x\tz\r")
'(start (tab "\t") (letter "y") (newline "\n") (space " ") (letter "x") (tab "\t") (letter "z") (return "\r")))
(check-equal?
(parse-to-datum "\t\n \t")
'(start (tab "\t") (newline "\n") (space " ") (tab "\t")))
(parse-to-datum "\t\n \t\r")
'(start (tab "\t") (newline "\n") (space " ") (tab "\t") (return "\r")))
(check-equal?
(parse-to-datum "\a\b\t\n\v\f\r\e")
'(start (all "\a" "\b" "\t" "\n" "\v" "\f" "\r" "\e")))

Loading…
Cancel
Save