From 1fdd63cd1768b15df8d802dc1fb2a3dd0350db38 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Tue, 11 Jan 2022 19:47:20 -0800 Subject: [PATCH] refactor to make things clearer --- brag-lib/brag/rules/lexer.rkt | 34 +++++++++++++++---------------- brag-lib/brag/test/test-lexer.rkt | 6 ++++-- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/brag-lib/brag/rules/lexer.rkt b/brag-lib/brag/rules/lexer.rkt index 925e6c9..7d19c95 100755 --- a/brag-lib/brag/rules/lexer.rkt +++ b/brag-lib/brag/rules/lexer.rkt @@ -1,4 +1,4 @@ -#lang racket/base +#lang debug at-exp racket/base (require (for-syntax racket/base "parser.rkt")) (require br-parser-tools/lex (prefix-in : br-parser-tools/lex-sre) @@ -35,28 +35,28 @@ (define-lex-abbrev esc-chars (union "\\a" "\\b" "\\t" "\\n" "\\v" "\\f" "\\r" "\\e")) -(define (unescape-lexeme lexeme quote-char) - ;; convert the literal string representation back into an escape char with lookup table - (define unescapes (hash "a" 7 "b" 8 "t" 9 "n" 10 "v" 11 "f" 12 "r" 13 "e" 27 "\"" 34 "'" 39 "\\" 92)) - (define pat (regexp (format "(?<=^~a\\\\).(?=~a$)" quote-char quote-char))) - (cond - [(regexp-match pat lexeme) - => (λ (m) (string quote-char (integer->char (hash-ref unescapes (car m))) quote-char))] - [else lexeme])) +(require syntax-color/racket-lexer) +(define (unescape-double-quoted-lexeme lexeme) + (list->string `(#\" ,@(string->list (read (open-input-string lexeme))) #\"))) + +(define (convert-to-double-quoted lexeme) + (define outside-quotes-removed (string-trim lexeme "'")) + (define single-quotes-unescaped (string-replace outside-quotes-removed "\\'" "'")) + (define double-quotes-escaped (string-replace single-quotes-unescaped "\"" "\\\"")) + (define double-quotes-on-ends (string-append "\"" double-quotes-escaped "\"")) + double-quotes-on-ends) (define lex/1 (lexer-src-pos ;; handle whitespace & escape chars within quotes as literal tokens: "\n" "\t" '\n' '\t' ;; match the escaped version, and then unescape them before they become token-LITs - [(:: "'" - (:or (:* (:or "\\'" esc-chars (:~ "'" "\\"))) "\\\\") - "'") - (token-LIT (unescape-lexeme lexeme #\'))] - [(:: "\"" - (:or (:* (:or "\\\"" esc-chars (:~ "\"" "\\"))) "\\\\") - "\"") - (token-LIT (unescape-lexeme lexeme #\"))] + [(:or (:: "'\\\\'") ; aka '\\' + (:: "'" (:* (:or "\\'" esc-chars (:~ "'" "\\"))) "'")) + (token-LIT (unescape-double-quoted-lexeme (convert-to-double-quoted lexeme)))] + [(:or (:: "\"\\\\\"") ; aka "\\" + (:: "\"" (:* (:or "\\\"" esc-chars (:~ "\"" "\\"))) "\"")) + (token-LIT (unescape-double-quoted-lexeme lexeme))] [(:or "()" "Ø" "∅") (token-EMPTY lexeme)] ["(" (token-LPAREN lexeme)] diff --git a/brag-lib/brag/test/test-lexer.rkt b/brag-lib/brag/test/test-lexer.rkt index 49f3774..8e7830d 100755 --- a/brag-lib/brag/test/test-lexer.rkt +++ b/brag-lib/brag/test/test-lexer.rkt @@ -51,11 +51,13 @@ (check-equal? (l "]") '(RBRACKET "]" 1 2)) +;; 220111: lexer now converts single-quoted lexemes +;; to standard Racket-style double-quoted string literal (check-equal? (l "'hello'") - '(LIT "'hello'" 1 8)) + '(LIT "\"hello\"" 1 8)) (check-equal? (l "'he\\'llo'") - '(LIT "'he\\'llo'" 1 10)) + '(LIT "\"he'llo\"" 1 10)) (check-equal? (l "/") '(HIDE "/" 1 2))