add multiline comments; ignore commas

pull/14/head
Matthew Butterick 6 years ago
parent dd37900690
commit 6cf947b8af

@ -665,7 +665,7 @@ generates.
@subsection[#:tag "brag-syntax"]{Syntax and terminology}
A program in the @tt{brag} language consists of the language line
@litchar{#lang brag}, followed by a collection of @tech{rule}s and
@tech{line comment}s.
possibly @tech{comment}s.
A @deftech{rule} is a sequence consisting of: a @tech{rule identifier}, a separator (either @litchar{":"} or @litchar{"::="}), and a @tech{pattern}.
@ -676,14 +676,15 @@ A @deftech{symbolic token identifier} is an @tech{identifier} that is in upper c
A @deftech{line comment} begins with either @litchar{#} or @litchar{;} and
continues till the end of the line.
A @deftech{multiline comment} begins with @litchar{(*} and ends with @litchar{*)}.
An @deftech{identifier} is a sequence of letters, numbers, or
characters in the set @racket["-.!$%&/<=>?^_~@"]. It must not contain
@litchar{*}, @litchar{+}, or @litchar|{{}| and @litchar|{}}|, as those characters are used to denote quantification.
A @deftech{pattern} is one of the following:
@itemize[
@item{an implicit sequence of @tech{pattern}s separated by whitespace.}
@item{an implicit sequence of @tech{pattern}s separated by whitespace or commas.}
@item{a @deftech{terminal}: either a literal string or a @tech{symbolic token identifier}.

@ -25,7 +25,7 @@
(syntax-case rules-stx ()
[(_) (raise-syntax-error 'brag
(format "The grammar does not appear to have any rules")
'brag-module)]
(syntax-source rules-stx))]
[(_ . RULES)
(let ([rules (syntax->list #'RULES)]) ;; (listof stx)

@ -12,7 +12,8 @@
(from/to "'" "'")
(from/to "\"" "\"")) (token 'LIT lexeme)]
[(:or "()" "Ø" "") (token 'NO-COLOR lexeme)] ; empty set symbols
[(:or (char-set "()[]{}|+*:?") hide-char splice-char) (token 'MISC lexeme)]
[(:or (char-set "()[]{}|+*:?") hide-char splice-char "::=") (token 'MISC lexeme)]
[(from/to "(*" "*)") (token 'COMMENT lexeme)]
[(:seq (:or "#" ";") (complement (:seq (:* any-char) NL (:* any-char))) (:or NL "")) (token 'COMMENT lexeme)]
[id (token 'ID lexeme)]
[any-char (token 'OTHER lexeme)]))

@ -4,6 +4,7 @@
(prefix-in : br-parser-tools/lex-sre)
"parser.rkt"
"rule-structs.rkt"
(only-in brag/support from/to)
racket/string)
(provide lex/1 tokenize)
@ -74,14 +75,18 @@
[(:or "+" "*" "?"
(:: "{" (:* digit) (:? (:: "," (:* digit))) "}"))
(token-REPEAT lexeme)]
;; Skip whitespace
[whitespace
;; Skip whitespace
(return-without-pos (lex/1 input-port))]
;; skip multiline comments
[(from/to "(*" "*)") (return-without-pos (lex/1 input-port))]
;; Skip comments up to end of line
[(:: (:or "#" ";")
(complement (:: (:* any-char) NL (:* any-char)))
(:or NL ""))
(return-without-pos (lex/1 input-port))]
;; skip commas (concatenation is implied)
["," (return-without-pos (lex/1 input-port))]
[(eof)
(token-EOF lexeme)]
[(:: id (:* whitespace) id-separator)

Loading…
Cancel
Save