diff --git a/brag/brag.scrbl b/brag/brag.scrbl index 11fa8d3..006ac3d 100755 --- a/brag/brag.scrbl +++ b/brag/brag.scrbl @@ -665,7 +665,7 @@ generates. @subsection[#:tag "brag-syntax"]{Syntax and terminology} A program in the @tt{brag} language consists of the language line @litchar{#lang brag}, followed by a collection of @tech{rule}s and -@tech{line comment}s. +possibly @tech{comment}s. A @deftech{rule} is a sequence consisting of: a @tech{rule identifier}, a separator (either @litchar{":"} or @litchar{"::="}), and a @tech{pattern}. @@ -676,14 +676,15 @@ A @deftech{symbolic token identifier} is an @tech{identifier} that is in upper c A @deftech{line comment} begins with either @litchar{#} or @litchar{;} and continues till the end of the line. +A @deftech{multiline comment} begins with @litchar{(*} and ends with @litchar{*)}. + An @deftech{identifier} is a sequence of letters, numbers, or characters in the set @racket["-.!$%&/<=>?^_~@"]. It must not contain @litchar{*}, @litchar{+}, or @litchar|{{}| and @litchar|{}}|, as those characters are used to denote quantification. - A @deftech{pattern} is one of the following: @itemize[ - @item{an implicit sequence of @tech{pattern}s separated by whitespace.} + @item{an implicit sequence of @tech{pattern}s separated by whitespace or commas.} @item{a @deftech{terminal}: either a literal string or a @tech{symbolic token identifier}. diff --git a/brag/codegen/expander.rkt b/brag/codegen/expander.rkt index 8ddf73f..39690d7 100755 --- a/brag/codegen/expander.rkt +++ b/brag/codegen/expander.rkt @@ -25,7 +25,7 @@ (syntax-case rules-stx () [(_) (raise-syntax-error 'brag (format "The grammar does not appear to have any rules") - 'brag-module)] + (syntax-source rules-stx))] [(_ . RULES) (let ([rules (syntax->list #'RULES)]) ;; (listof stx) diff --git a/brag/private/colorer.rkt b/brag/private/colorer.rkt index 736a9b3..a931887 100644 --- a/brag/private/colorer.rkt +++ b/brag/private/colorer.rkt @@ -12,7 +12,8 @@ (from/to "'" "'") (from/to "\"" "\"")) (token 'LIT lexeme)] [(:or "()" "Ø" "∅") (token 'NO-COLOR lexeme)] ; empty set symbols - [(:or (char-set "()[]{}|+*:?") hide-char splice-char) (token 'MISC lexeme)] + [(:or (char-set "()[]{}|+*:?") hide-char splice-char "::=") (token 'MISC lexeme)] + [(from/to "(*" "*)") (token 'COMMENT lexeme)] [(:seq (:or "#" ";") (complement (:seq (:* any-char) NL (:* any-char))) (:or NL "")) (token 'COMMENT lexeme)] [id (token 'ID lexeme)] [any-char (token 'OTHER lexeme)])) diff --git a/brag/rules/lexer.rkt b/brag/rules/lexer.rkt index 5185dbe..925e6c9 100755 --- a/brag/rules/lexer.rkt +++ b/brag/rules/lexer.rkt @@ -4,6 +4,7 @@ (prefix-in : br-parser-tools/lex-sre) "parser.rkt" "rule-structs.rkt" + (only-in brag/support from/to) racket/string) (provide lex/1 tokenize) @@ -74,14 +75,18 @@ [(:or "+" "*" "?" (:: "{" (:* digit) (:? (:: "," (:* digit))) "}")) (token-REPEAT lexeme)] + ;; Skip whitespace [whitespace - ;; Skip whitespace (return-without-pos (lex/1 input-port))] + ;; skip multiline comments + [(from/to "(*" "*)") (return-without-pos (lex/1 input-port))] ;; Skip comments up to end of line [(:: (:or "#" ";") (complement (:: (:* any-char) NL (:* any-char))) (:or NL "")) (return-without-pos (lex/1 input-port))] + ;; skip commas (concatenation is implied) + ["," (return-without-pos (lex/1 input-port))] [(eof) (token-EOF lexeme)] [(:: id (:* whitespace) id-separator)