diff --git a/brag-lib/brag/rules/lexer.rkt b/brag-lib/brag/rules/lexer.rkt index 8ab5493..874bce3 100755 --- a/brag-lib/brag/rules/lexer.rkt +++ b/brag-lib/brag/rules/lexer.rkt @@ -38,39 +38,54 @@ (require syntax-color/racket-lexer) (define (unescape-double-quoted-lexeme lexeme) + ;; use `read` so brag strings have all the notational semantics of Racket strings (list->string `(#\" ,@(string->list (read (open-input-string lexeme))) #\"))) (define (convert-to-double-quoted lexeme) + ;; brag supports single-quoted strings, for some reason + ;; (Racket does not. A single quote denotes a datum) + ;; let's convert a single-quoted string into standard double-quoted style + ;; so we can use Racket's `read` function on it. + ;; and thereby support all the standard Racket string elements: + ;; https://docs.racket-lang.org/reference/reader.html#%28part._parse-string%29 (define outside-quotes-removed (string-trim lexeme "'")) (define single-quotes-unescaped (string-replace outside-quotes-removed "\\'" "'")) (define double-quotes-escaped (string-replace single-quotes-unescaped "\"" "\\\"")) (define double-quotes-on-ends (string-append "\"" double-quotes-escaped "\"")) double-quotes-on-ends) -(define-lex-abbrev escaped-single-quote "\\'") +(define-lex-abbrev backslash "\\") (define-lex-abbrev single-quote "'") -(define-lex-abbrev escaped-double-quote "\\\"") +(define-lex-abbrev escaped-single-quote (:: backslash single-quote)) (define-lex-abbrev double-quote "\"") -(define-lex-abbrev escaped-backslash "\\\\") +(define-lex-abbrev escaped-double-quote (:: backslash double-quote)) +(define-lex-abbrev escaped-backslash (:: backslash backslash)) (define lex/1 (lexer-src-pos - ;; handle whitespace & escape chars within quotes as literal tokens: "\n" "\t" '\n' '\t' - ;; match the escaped version, and then unescape them before they become token-LITs + [(:: double-quote ;; start with double quote + (intersection ;; two conditions need to be true inside the quotes: + ;; we can have anything except + ;; a plain double-quote (which would close the quote) + ;; plus we specially allow escaped double quotes and backslashes + (:* (:or escaped-double-quote escaped-backslash (:~ double-quote))) + ;; we must forbid one situation with the string \\" + ;; the problem is that it's ambiguous: + ;; it can be lexed as (:: escaped-backlash double-quote) = \\ + " + ;; or (:: backlash escaped-double-quote) = \ + \" + ;; because escapes should be "left associative", + ;; we forbid the second possibility + (complement (:: any-string backslash escaped-double-quote any-string))) + double-quote) ;; end with double quote + (token-LIT (unescape-double-quoted-lexeme lexeme))] + ;; single-quoted string follows the same pattern, + ;; but with escaped-single-quote instead of escaped-double-quote [(:: single-quote - (:or - (:+ escaped-backslash) ; aka '\\' - (intersection (:* (:or escaped-single-quote (:~ single-quote))) - (complement (:: escaped-backslash any-string)))) + (intersection + (:* (:or escaped-single-quote escaped-backslash (:~ single-quote))) + (complement (:: any-string backslash escaped-single-quote any-string))) single-quote) (token-LIT (unescape-double-quoted-lexeme (convert-to-double-quoted lexeme)))] - [(:: double-quote - (:or - (:+ escaped-backslash) ; aka "\\" - (intersection (:* (:or escaped-double-quote (:~ double-quote))) - (complement (:: escaped-backslash any-string)))) - double-quote) - (token-LIT (unescape-double-quoted-lexeme lexeme))] [(:or "()" "Ø" "∅") (token-EMPTY lexeme)] ["(" (token-LPAREN lexeme)]