From b7bb0aa50ae5707e81102edaba1680ef59fdbb6c Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sat, 15 Jan 2022 12:40:15 -0800 Subject: [PATCH] use Racket lexer for double-quoted strings --- brag-lib/brag/examples/codepoints.rkt | 4 ++-- brag-lib/brag/rules/lexer.rkt | 27 ++++++++++++++++++++------ brag-lib/brag/test/test-codepoints.rkt | 4 ++-- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/brag-lib/brag/examples/codepoints.rkt b/brag-lib/brag/examples/codepoints.rkt index 2a618f8..1196aef 100644 --- a/brag-lib/brag/examples/codepoints.rkt +++ b/brag-lib/brag/examples/codepoints.rkt @@ -2,5 +2,5 @@ start: A c def hello-world A : "\"\101\\" ; A c : '\'\U0063\\' ; c -def : "*\u64\\\x65f\"" ; de -hello-world : "\150\145\154\154\157\40\167\157\162\154\144" \ No newline at end of file +def : "*\u64\\\"\\\x65f\"" ; de +hello-world : "\150\145\154\154\157\40\167\157\162\154\144" diff --git a/brag-lib/brag/rules/lexer.rkt b/brag-lib/brag/rules/lexer.rkt index 020e8f9..8f9abad 100755 --- a/brag-lib/brag/rules/lexer.rkt +++ b/brag-lib/brag/rules/lexer.rkt @@ -1,11 +1,12 @@ -#lang at-exp racket/base +#lang debug racket/base (require (for-syntax racket/base "parser.rkt")) (require br-parser-tools/lex (prefix-in : br-parser-tools/lex-sre) "parser.rkt" "rule-structs.rkt" (only-in brag/support from/to) - racket/string) + racket/string + syntax-color/racket-lexer) (provide lex/1 tokenize) (module+ lex-abbrevs @@ -66,9 +67,10 @@ (define-lex-abbrev escaped-double-quote (:: backslash double-quote)) (define-lex-abbrev escaped-backslash (:: backslash backslash)) -(define lex/1 +(define brag-lex (lexer-src-pos - [(:: double-quote ;; start with double quote + ;; we are delegating lexing of double-quoted strings to the Racket lexer (see below) + #;[(:: double-quote ;; start with double quote (intersection ;; two conditions need to be true inside the quotes: ;; we can have anything except ;; a plain double-quote (which would close the quote) @@ -146,6 +148,21 @@ (position->pos start-pos) (position->pos end-pos-2)))])) +(define (lex/1 ip) + (cond + [(equal? (peek-bytes 1 0 ip) #"\"") + ;; delegate lexing of strings to the default Racket lexer + ;; this doesn't yet work with single-quoted strings + ;; but maybe there's a way to do that by messing with the readtable + (define-values (line-start col-start pos-start) (port-next-location ip)) + (define-values (str type paren also-pos-start also-pos-end) (racket-lexer ip)) + (define-values (line-end col-end pos-end) (port-next-location ip)) + (make-position-token (token-LIT (string-append "\"" + (read (open-input-string str)) + "\"")) + (make-position pos-start line-start col-start) + (make-position pos-end line-end col-end))] + [else (brag-lex ip)])) ;; This is the helper for the error production. (define lex-nonwhitespace @@ -157,8 +174,6 @@ [(eof) (values "" end-pos)])) - - ;; position->pos: position -> pos ;; Converts position structures from br-parser-tools/lex to our own pos structures. (define (position->pos a-pos) diff --git a/brag-lib/brag/test/test-codepoints.rkt b/brag-lib/brag/test/test-codepoints.rkt index bb3b3b7..13544f8 100755 --- a/brag-lib/brag/test/test-codepoints.rkt +++ b/brag-lib/brag/test/test-codepoints.rkt @@ -3,8 +3,8 @@ (require brag/examples/codepoints rackunit) -(check-equal? (parse-to-datum '("\"A\\" "'c\\" "*d\\ef\"" "hello world")) +(check-equal? (parse-to-datum '("\"A\\" "'c\\" "*d\\\"\\ef\"" "hello world")) '(start (A "\"A\\") (c "'c\\") - (def "*d\\ef\"") + (def "*d\\\"\\ef\"") (hello-world "hello world")))