From 2eb0b5c920d7d7f78338cbf1c26e54c23b68ad10 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Thu, 13 Jan 2022 04:51:47 -0800 Subject: [PATCH] support codepoints & pass current tests --- brag-lib/brag/examples/codepoints.rkt | 5 +++++ brag-lib/brag/rules/lexer.rkt | 22 ++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 brag-lib/brag/examples/codepoints.rkt diff --git a/brag-lib/brag/examples/codepoints.rkt b/brag-lib/brag/examples/codepoints.rkt new file mode 100644 index 0000000..c8be6f9 --- /dev/null +++ b/brag-lib/brag/examples/codepoints.rkt @@ -0,0 +1,5 @@ +#lang brag +start: "\101" ; A + | "\U0063" ; c + | "\u64" ; d + | "\x65" ; e \ No newline at end of file diff --git a/brag-lib/brag/rules/lexer.rkt b/brag-lib/brag/rules/lexer.rkt index 7d19c95..8ab5493 100755 --- a/brag-lib/brag/rules/lexer.rkt +++ b/brag-lib/brag/rules/lexer.rkt @@ -47,15 +47,29 @@ (define double-quotes-on-ends (string-append "\"" double-quotes-escaped "\"")) double-quotes-on-ends) +(define-lex-abbrev escaped-single-quote "\\'") +(define-lex-abbrev single-quote "'") +(define-lex-abbrev escaped-double-quote "\\\"") +(define-lex-abbrev double-quote "\"") +(define-lex-abbrev escaped-backslash "\\\\") + (define lex/1 (lexer-src-pos ;; handle whitespace & escape chars within quotes as literal tokens: "\n" "\t" '\n' '\t' ;; match the escaped version, and then unescape them before they become token-LITs - [(:or (:: "'\\\\'") ; aka '\\' - (:: "'" (:* (:or "\\'" esc-chars (:~ "'" "\\"))) "'")) + [(:: single-quote + (:or + (:+ escaped-backslash) ; aka '\\' + (intersection (:* (:or escaped-single-quote (:~ single-quote))) + (complement (:: escaped-backslash any-string)))) + single-quote) (token-LIT (unescape-double-quoted-lexeme (convert-to-double-quoted lexeme)))] - [(:or (:: "\"\\\\\"") ; aka "\\" - (:: "\"" (:* (:or "\\\"" esc-chars (:~ "\"" "\\"))) "\"")) + [(:: double-quote + (:or + (:+ escaped-backslash) ; aka "\\" + (intersection (:* (:or escaped-double-quote (:~ double-quote))) + (complement (:: escaped-backslash any-string)))) + double-quote) (token-LIT (unescape-double-quoted-lexeme lexeme))] [(:or "()" "Ø" "∅") (token-EMPTY lexeme)] ["("