From 65fc31cf971706392ce19771550a2cad907b627a Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Tue, 21 Mar 2017 21:46:57 -0700 Subject: [PATCH] syntax colorer --- brag/brag/codegen/reader.rkt | 4 +-- brag/brag/private/colorer.rkt | 56 +++++++++++++++++++++++++++++++++++ brag/brag/rules/lexer.rkt | 15 ++++------ 3 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 brag/brag/private/colorer.rkt diff --git a/brag/brag/codegen/reader.rkt b/brag/brag/codegen/reader.rkt index 6413225..c4f35f4 100755 --- a/brag/brag/codegen/reader.rkt +++ b/brag/brag/codegen/reader.rkt @@ -60,9 +60,7 @@ brag/codegen/sexp-based-lang ;; syntax/module-reader. (define (my-get-info key default default-filter) (case key - [(color-lexer) - (dynamic-require 'syntax-color/default-lexer - 'default-lexer)] + [(color-lexer) (dynamic-require 'brag/private/colorer 'color-brag (λ () #f))] [else (default-filter key default)])) diff --git a/brag/brag/private/colorer.rkt b/brag/brag/private/colorer.rkt new file mode 100644 index 0000000..5f4de49 --- /dev/null +++ b/brag/brag/private/colorer.rkt @@ -0,0 +1,56 @@ +#lang racket/base +(require brag/support (submod brag/rules/lexer lex-abbrevs) brag/support racket/match) +(provide color-brag) + +(define brag-syntax-lexer + (lexer-srcloc + [(eof) (return-without-srcloc eof)] + [whitespace (return-without-srcloc (brag-syntax-lexer input-port))] + [(:or (from/to "'" "'") (from/to "\"" "\"")) (token 'LIT lexeme)] + [(:or (char-set "()[]|+*:") hide-char splice-char) (token 'MISC lexeme)] + [(:seq (:or "#" ";") + (complement (:seq (:* any-char) NL (:* any-char))) + (:or NL "")) + (token 'COMMENT lexeme)] + [id (token 'ID lexeme)] + [any-char (token 'OTHER lexeme)])) + + +(define (color-brag port) + (define srcloc-tok + (with-handlers + ([exn:fail:read? + (λ (exn) (srcloc-token (token 'ERROR) (car (exn:fail:read-srclocs exn))))]) + (brag-syntax-lexer port))) + (if (eof-object? srcloc-tok) + (values srcloc-tok 'eof #f #f #f) + (match-let* ([(srcloc-token + (token-struct type val _ _ _ _ _) + (srcloc _ _ _ posn span)) srcloc-tok] + [(cons start end) (cons posn (+ posn span))] + [(cons _ cat) (or (assq type + '((ID . symbol) + (LIT . string) + (MISC . parenthesis) + (COMMENT . comment) + (ERROR . error))) + (cons 'OTHER 'no-color))]) + (values val cat #f start end)))) + +(module+ test + (require rackunit sugar/list) + (define (apply-colorer str) + (for/list ([annotation (in-port (λ (p) + (let ([xs (values->list (color-brag p))]) + (if (eof-object? (car xs)) eof xs))) + (open-input-string str))]) + annotation)) + + (check-equal? (apply-colorer "foo") '(("foo" symbol #f 1 4))) + (check-equal? (apply-colorer "'str'") '(("'str'" string #f 1 6))) + (check-equal? (apply-colorer "(foo)+") '(("(" parenthesis #f 1 2) + ("foo" symbol #f 2 5) + (")" parenthesis #f 5 6) + ("+" parenthesis #f 6 7))) + (check-equal? (apply-colorer "; rem") '(("; rem" comment #f 1 6))) + (check-equal? (apply-colorer "◊") '(("◊" no-color #f 1 4)))) diff --git a/brag/brag/rules/lexer.rkt b/brag/brag/rules/lexer.rkt index e55991b..948c358 100755 --- a/brag/brag/rules/lexer.rkt +++ b/brag/brag/rules/lexer.rkt @@ -7,6 +7,8 @@ racket/string) (provide lex/1 tokenize) +(module+ lex-abbrevs + (provide hide-char splice-char id-char letter digit NL id)) ;; A newline can be any one of the following. (define-lex-abbrev NL (:or "\r\n" "\r" "\n")) @@ -67,18 +69,11 @@ ;; Skip whitespace (return-without-pos (lex/1 input-port))] ;; Skip comments up to end of line - ;; but detect possble kwargs. - [(:: (:or "#" ";") ; remove # as comment char + [(:: (:or "#" ";") (complement (:: (:* any-char) NL (:* any-char))) (:or NL "")) - (let ([maybe-kwarg-match (regexp-match #px"^#:(.*?)\\s*(.*?)$" lexeme)]) - (when maybe-kwarg-match - (let* ([parts (map string->symbol (string-split (string-trim lexeme "#:" #:right? #f)))] - [kw (car parts)][val (cadr parts)]) - (case kw - [(prefix-out) (current-prefix-out val)] - [else (error 'lexer (format "got unknown keyword ~a" kw))]))) - (return-without-pos (lex/1 input-port)))] + ;; Skip comments up to end of line. + (return-without-pos (lex/1 input-port))] [(eof) (token-EOF lexeme)] [(:: id (:* whitespace) ":")