diff --git a/br-parser-tools/br-parser-tools-doc/LICENSE.txt b/br-parser-tools/br-parser-tools-doc/LICENSE.txt new file mode 100644 index 0000000..c424668 --- /dev/null +++ b/br-parser-tools/br-parser-tools-doc/LICENSE.txt @@ -0,0 +1,11 @@ +parser-tools-doc +Copyright (c) 2010-2014 PLT Design Inc. + +This package is distributed under the GNU Lesser General Public +License (LGPL). This means that you can link this package into proprietary +applications, provided you follow the rules stated in the LGPL. You +can also modify this package; if you distribute a modified version, +you must distribute it under the terms of the LGPL, which in +particular means that you must release the source code for the +modified software. See http://www.gnu.org/copyleft/lesser.html +for more information. diff --git a/br-parser-tools/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl b/br-parser-tools/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl new file mode 100644 index 0000000..7e0684c --- /dev/null +++ b/br-parser-tools/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl @@ -0,0 +1,769 @@ +#lang scribble/doc +@(require scribble/manual scribble/struct scribble/xref scribble/bnf + (for-label scheme/base + scheme/contract + br-parser-tools/lex + (prefix-in : br-parser-tools/lex-sre) + br-parser-tools/yacc + br-parser-tools/cfg-parser)) + +@title{Parser Tools: @exec{lex} and @exec{yacc}-style Parsing (Beautiful Racket edition)} + +@author["Scott Owens (99%)" "Matthew Butterick (1%)"] + +This documentation assumes familiarity with @exec{lex} and @exec{yacc} +style lexer and parser generators. + +@table-of-contents[] + +@; ---------------------------------------------------------------------- + +@section{Lexers} + +@section-index["lex"] +@section-index["scanning"] +@section-index["scanner"] + +@defmodule[br-parser-tools/lex] + +@; ---------------------------------------- + +@subsection{Creating a Lexer} + +@defform/subs[#:literals (repetition union intersection complement concatenation + char-range char-complement + eof special special-comment) + (lexer [trigger action-expr] ...) + ([trigger re + (eof) + (special) + (special-comment)] + [re id + string + character + (repetition lo hi re) + (union re ...) + (intersection re ...) + (complement re) + (concatenation re ...) + (char-range char char) + (char-complement re) + (id datum ...)])]{ + + Produces a function that takes an input-port, matches the + @racket[re] patterns against the buffer, and returns the result of + executing the corresponding @racket[action-expr]. When multiple + patterns match, a lexer will choose the longest match, breaking + ties in favor of the rule appearing first. + + @margin-note{The implementation of @racketmodname[syntax-color/racket-lexer] + contains a lexer for the @racketmodname[racket] language. + In addition, files in the @filepath{examples} sub-directory + of the @filepath{br-parser-tools} collection contain + simpler example lexers.} + + An @racket[re] is matched as follows: + + @itemize[ + @item{@racket[id] --- expands to the named @deftech{lexer abbreviation}; + abbreviations are defined via @racket[define-lex-abbrev] or supplied by modules + like @racketmodname[br-parser-tools/lex-sre].} + @item{@racket[string] --- matches the sequence of characters in @racket[string].} + @item{@racket[character] --- matches a literal @racket[character].} + @item{@racket[(repetition lo hi re)] --- matches @racket[re] repeated between @racket[lo] + and @racket[hi] times, inclusive; @racket[hi] can be @racket[+inf.0] for unbounded repetitions.} + @item{@racket[(union re ...)] --- matches if any of the sub-expressions match} + @item{@racket[(intersection re ...)] --- matches if all of the @racket[re]s match.} + @item{@racket[(complement re)] --- matches anything that @racket[re] does not.} + @item{@racket[(concatenation re ...)] --- matches each @racket[re] in succession.} + @item{@racket[(char-range char char)] --- matches any character between the two (inclusive); + a single character string can be used as a @racket[char].} + @item{@racket[(char-complement re)] --- matches any character not matched by @racket[re]. + The sub-expression must be a set of characters @racket[re].} + @item{@racket[(id datum ...)] --- expands the @deftech{lexer macro} named @racket[id]; macros + are defined via @racket[define-lex-trans].} + ] + +Note that both @racket[(concatenation)] and @racket[""] match the +empty string, @racket[(union)] matches nothing, +@racket[(intersection)] matches any string, and +@racket[(char-complement (union))] matches any single character. + +The regular expression language is not designed to be used directly, +but rather as a basis for a user-friendly notation written with +regular expression macros. For example, +@racketmodname[br-parser-tools/lex-sre] supplies operators from Olin +Shivers's SREs, and @racketmodname[br-parser-tools/lex-plt-v200] supplies +(deprecated) operators from the previous version of this library. +Since those libraries provide operators whose names match other Racket +bindings, such as @racket[*] and @racket[+], they normally must be +imported using a prefix: + +@racketblock[ +(require (prefix-in : br-parser-tools/lex-sre)) +] + +The suggested prefix is @racket[:], so that @racket[:*] and +@racket[:+] are imported. Of course, a prefix other than @racket[:] +(such as @racket[re-]) will work too. + +Since negation is not a common operator on regular expressions, here +are a few examples, using @racket[:] prefixed SRE syntax: + +@itemize[ + +@item{@racketblock0[(complement "1")] + + Matches all strings except the string @racket["1"], including + @racket["11"], @racket["111"], @racket["0"], @racket["01"], + @racket[""], and so on.} + +@item{@racketblock0[(complement (:* "1"))] + + Matches all strings that are not sequences of @racket["1"], + including @racket["0"], @racket["00"], @racket["11110"], + @racket["0111"], @racket["11001010"] and so on.} + +@item{@racketblock0[(:& (:: any-string "111" any-string) + (complement (:or (:: any-string "01") (:+ "1"))))] + + Matches all strings that have 3 consecutive ones, but not those that + end in @racket["01"] and not those that are ones only. These + include @racket["1110"], @racket["0001000111"] and @racket["0111"] + but not @racket[""], @racket["11"], @racket["11101"], @racket["111"] + and @racket["11111"].} + +@item{@racketblock0[(:: "/*" (complement (:: any-string "*/" any-string)) "*/")] + + Matches Java/C block comments. @racket["/**/"], + @racket["/******/"], @racket["/*////*/"], @racket["/*asg4*/"] and so + on. It does not match @racket["/**/*/"], @racket["/* */ */"] and so + on. @racket[(:: any-string "*/" any-string)] matches any string + that has a @racket["*/"] in is, so @racket[(complement (:: any-string "*/" + any-string))] matches any string without a @racket["*/"] in it.} + +@item{@racketblock0[(:: "/*" (:* (complement "*/")) "*/")] + + Matches any string that starts with @racket["/*"] and ends with + @racket["*/"], including @racket["/* */ */ */"]. + @racket[(complement "*/")] matches any string except @racket["*/"]. + This includes @racket["*"] and @racket["/"] separately. Thus + @racket[(:* (complement "*/"))] matches @racket["*/"] by first + matching @racket["*"] and then matching @racket["/"]. Any other + string is matched directly by @racket[(complement "*/")]. In other + words, @racket[(:* (complement "xx"))] = @racket[any-string]. It is + usually not correct to place a @racket[:*] around a + @racket[complement].} +] + + + The following binding have special meaning inside of a lexer + action: + + @itemize[ + @item{@racket[start-pos] --- a @racket[position] struct for the first character matched.} + @item{@racket[end-pos] --- a @racket[position] struct for the character after the last character in the match.} + @item{@racket[lexeme] --- the matched string.} + @item{@racket[input-port] --- the input-port being + processed (this is useful for matching input with multiple + lexers).} + @item{@racket[(return-without-pos x)] and @racket[(return-without-srcloc x)] are functions (continuations) that + immediately return the value of @racket[x] from the lexer. This useful + in a src-pos or src-loc lexer to prevent the lexer from adding source + information. For example: + + @racketblock[ + (define get-token + (lexer-srcloc + ... + ((comment) (get-token input-port)) + ...)) + ] + + would wrap the source location information for the comment around + the value of the recursive call. Using + @racket[((comment) (return-without-srcloc (get-token input-port)))] + will cause the value of the recursive call to be returned without + wrapping position around it.} + ] + + The lexer raises an @racket[exn:fail:read] exception if none of the + regular expressions match the input. Hint: If @racket[(any-char + _custom-error-behavior)] is the last rule, then there will always + be a match, and @racket[_custom-error-behavior] is executed to + handle the error situation as desired, only consuming the first + character from the input buffer. + + In addition to returning characters, input + ports can return @racket[eof-object]s. Custom input ports can + also return a @racket[special-comment] value to indicate a + non-textual comment, or return another arbitrary value (a + special). The non-@racket[re] @racket[trigger] forms handle these + cases: + + @itemize[ + + @item{The @racket[(eof)] rule is matched when the input port + returns an @racket[eof-object] value. If no @racket[(eof)] + rule is present, the lexer returns the symbol @racket['eof] + when the port returns an @racket[eof-object] value.} + + @item{The @racket[(special-comment)] rule is matched when the + input port returns a @racket[special-comment] structure. If no + @racket[special-comment] rule is present, the lexer + automatically tries to return the next token from the input + port.} + + @item{The @racket[(special)] rule is matched when the input + port returns a value other than a character, + @racket[eof-object], or @racket[special-comment] structure. If + no @racket[(special)] rule is present, the lexer returns + @racket[(void)].}] + + End-of-files, specials, special-comments and special-errors cannot + be parsed via a rule using an ordinary regular expression + (but dropping down and manipulating the port to handle them + is possible in some situations). + + Since the lexer gets its source information from the port, use + @racket[port-count-lines!] to enable the tracking of line and + column information. Otherwise, the line and column information + will return @racket[#f]. + + When peeking from the input port raises an exception (such as by + an embedded XML editor with malformed syntax), the exception can + be raised before all tokens preceding the exception have been + returned. + + Each time the racket code for a lexer is compiled (e.g. when a + @filepath{.rkt} file containing a @racket[lexer] form is loaded), + the lexer generator is run. To avoid this overhead place the + lexer into a module and compile the module to a @filepath{.zo} + bytecode file.} + +@defform[(lexer-src-pos (trigger action-expr) ...)]{ + +Like @racket[lexer], but for each @racket[_action-result] produced by +an @racket[action-expr], returns @racket[(make-position-token +_action-result start-pos end-pos)] instead of simply +@racket[_action-result].} + +@defform[(lexer-srcloc (trigger action-expr) ...)]{ + +Like @racket[lexer], but for each @racket[_action-result] produced by +an @racket[action-expr], returns @racket[(make-srcloc-token +_action-result lexeme-srcloc)] instead of simply +@racket[_action-result].} + +@deftogether[( +@defidform[start-pos] +@defidform[end-pos] +@defidform[lexeme] +@defidform[lexeme-srcloc] +@defidform[input-port] +@defidform[return-without-pos] +@defidform[return-without-srcloc] +)]{ + +Use of these names outside of a @racket[lexer] action is a syntax +error.} + +@defstruct[position ([offset exact-positive-integer?] + [line exact-positive-integer?] + [col exact-nonnegative-integer?])]{ + + Instances of @racket[position] are bound to @racket[start-pos] and + @racket[end-pos]. The @racket[offset] field contains the offset of + the character in the input. The @racket[line] field contains the + line number of the character. The @racket[col] field contains the + offset in the current line.} + +@defstruct[position-token ([token any/c] + [start-pos position?] + [end-pos position?])]{ + + Lexers created with @racket[lexer-src-pos] return instances of @racket[position-token].} + + +@defstruct[srcloc-token ([token any/c] + [srcloc srcloc?])]{ + + Lexers created with @racket[lexer-srcloc] return instances of @racket[srcloc-token].} + +@defparam[file-path source any/c]{ + + A parameter that the lexer uses as the source location if it + raises a @racket[exn:fail:read] error. Setting this parameter allows + DrRacket, for example, to open the file containing the error.} + +@defparam[lexer-file-path source any/c]{ + +Alias for @racket[file-path].} + + +@; ---------------------------------------- + +@subsection{Lexer Abbreviations and Macros} + +@defform[(char-set string)]{ + +A @tech{lexer macro} that matches any character in @racket[string].} + +@defidform[any-char]{A @tech{lexer abbreviation} that matches any character.} + +@defidform[any-string]{A @tech{lexer abbreviation} that matches any string.} + +@defidform[nothing]{A @tech{lexer abbreviation} that matches no string.} + +@deftogether[( +@defidform[alphabetic] +@defidform[lower-case] +@defidform[upper-case] +@defidform[title-case] +@defidform[numeric] +@defidform[symbolic] +@defidform[punctuation] +@defidform[graphic] +@defidform[whitespace] +@defidform[blank] +@defidform[iso-control] +)]{ + +@tech{Lexer abbreviations} that match @racket[char-alphabetic?] +characters, @racket[char-lower-case?] characters, etc.} + +@defform[(define-lex-abbrev id re)]{ + + Defines a @tech{lexer abbreviation} by associating a regular + expression to be used in place of the @racket[id] in other + regular expression. The definition of name has the same scoping + properties as a other syntactic binding (e.g., it can be exported + from a module).} + +@defform[(define-lex-abbrevs (id re) ...)]{ + + Like @racket[define-lex-abbrev], but defines several @tech{lexer + abbreviations}.} + + +@defform[(define-lex-trans id trans-expr)]{ + + Defines a @tech{lexer macro}, where @racket[trans-expr] produces a + transformer procedure that takes one argument. When @racket[(id + _datum ...)] appears as a regular expression, it is replaced with + the result of applying the transformer to the expression.} + + +@; ---------------------------------------- + +@subsection{Lexer SRE Operators} + +@defmodule[br-parser-tools/lex-sre] + +@; Put the docs in a macro, so that we can bound the scope of +@; the import of `*', etc.: +@(define-syntax-rule (lex-sre-doc) + (... + (begin + (require (for-label br-parser-tools/lex-sre)) + +@defform[(* re ...)]{ + +Repetition of @racket[re] sequence 0 or more times.} + +@defform[(+ re ...)]{ + +Repetition of @racket[re] sequence 1 or more times.} + +@defform[(? re ...)]{ + +Zero or one occurrence of @racket[re] sequence.} + +@defform[(= n re ...)]{ + +Exactly @racket[n] occurrences of @racket[re] sequence, where +@racket[n] must be a literal exact, non-negative number.} + +@defform[(>= n re ...)]{ + +At least @racket[n] occurrences of @racket[re] sequence, where +@racket[n] must be a literal exact, non-negative number.} + +@defform[(** n m re ...)]{ + +Between @racket[n] and @racket[m] (inclusive) occurrences of +@racket[re] sequence, where @racket[n] must be a literal exact, +non-negative number, and @racket[m] must be literally either +@racket[#f], @racket[+inf.0], or an exact, non-negative number; a +@racket[#f] value for @racket[m] is the same as @racket[+inf.0].} + +@defform[(or re ...)]{ + +Same as @racket[(union re ...)].} + +@deftogether[( +@defform[(: re ...)] +@defform[(seq re ...)] +)]{ + +Both forms concatenate the @racket[re]s.} + +@defform[(& re ...)]{ + +Intersects the @racket[re]s.} + +@defform[(- re ...)]{ + +The set difference of the @racket[re]s.} + +@defform[(~ re ...)]{ + +Character-set complement, which each @racket[re] must match exactly +one character.} + +@defform[(/ char-or-string ...)]{ + +Character ranges, matching characters between successive pairs of +characters.} + +))) + +@(lex-sre-doc) + +@; ---------------------------------------- + +@subsection{Lexer Legacy Operators} + +@defmodule[br-parser-tools/lex-plt-v200] + +@(define-syntax-rule (lex-v200-doc) + (... + (begin + (require (for-label br-parser-tools/lex-plt-v200)) + +@t{The @racketmodname[br-parser-tools/lex-plt-v200] module re-exports + @racket[*], @racket[+], @racket[?], and @racket[&] from + @racketmodname[br-parser-tools/lex-sre]. It also re-exports + @racket[:or] as @racket[:], @racket[::] as @racket[|@|], @racket[:~] + as @racket[^], and @racket[:/] as @racket[-].} + +@defform[(epsilon)]{ + +A @tech{lexer macro} that matches an empty sequence.} + +@defform[(~ re ...)]{ + +The same as @racket[(complement re ...)].}))) + +@(lex-v200-doc) + +@; ---------------------------------------- + +@subsection{Tokens} + +Each @racket[_action-expr] in a @racket[lexer] form can produce any +kind of value, but for many purposes, producing a @deftech{token} +value is useful. Tokens are usually necessary for inter-operating with +a parser generated by @racket[br-parser-tools/parser], but tokens may not +be the right choice when using @racket[lexer] in other situations. + +@defform[(define-tokens group-id (token-id ...))]{ + + Binds @racket[group-id] to the group of tokens being defined. For + each @racket[token-id], a function + @racketidfont{token-}@racket[token-id] is created that takes any + value and puts it in a token record specific to @racket[token-id]. + The token value is inspected using @racket[token-id] and + @racket[token-value]. + + A token cannot be named @racketidfont{error}, since + @racketidfont{error} it has special use in the parser.} + +@defform[(define-empty-tokens group-id (token-id ...) )]{ + + + Like @racket[define-tokens], except a each token constructor + @racketidfont{token-}@racket[token-id] takes no arguments and returns + @racket[(@#,racket[quote] token-id)].} + + +@defproc[(token-name [t (or/c token? symbol?)]) symbol?]{ + + Returns the name of a token that is represented either by a symbol + or a token structure.} + + +@defproc[(token-value [t (or/c token? symbol?)]) any/c]{ + + Returns the value of a token that is represented either by a symbol + or a token structure, returning @racket[#f] for a symbol token.} + + +@defproc[(token? [v any/c]) boolean?]{ + + Returns @racket[#t] if @racket[val] is a + token structure, @racket[#f] otherwise.} + +@; ---------------------------------------------------------------------- + +@section{LALR(1) Parsers} + +@section-index["yacc"] + +@defmodule[br-parser-tools/yacc] + +@defform/subs[#:literals (grammar tokens start end precs src-pos + suppress debug yacc-output prec) + (parser clause ...) + ([clause (grammar (non-terminal-id + ((grammar-id ...) maybe-prec expr) + ...) + ...) + (tokens group-id ...) + (start non-terminal-id ...) + (end token-id ...) + (@#,racketidfont{error} expr) + (precs (assoc token-id ...) ...) + (src-pos) + (suppress) + (debug filename) + (yacc-output filename)] + [maybe-prec code:blank + (prec token-id)] + [assoc left right nonassoc])]{ + + Creates a parser. The clauses may be in any order, as long as there + are no duplicates and all non-@italic{OPTIONAL} declarations are + present: + + @itemize[ + + @item{@racketblock0[(grammar (non-terminal-id + ((grammar-id ...) maybe-prec expr) + ...) + ...)] + + Declares the grammar to be parsed. Each @racket[grammar-id] can + be a @racket[token-id] from a @racket[group-id] named in a + @racket[tokens] declaration, or it can be a + @racket[non-terminal-id] declared in the @racket[grammar] + declaration. The optional @racket[prec] declaration works with + the @racket[precs] declaration. The @racket[expr] is a + ``semantic action,'' which is evaluated when the input is found + to match its corresponding production. + + Each action is Racket code that has the same scope as its + parser's definition, except that the variables @racket[$1], ..., + @racketidfont{$}@math{i} are bound, where @math{i} is the number + of @racket[grammar-id]s in the corresponding production. Each + @racketidfont{$}@math{k} is bound to the result of the action + for the @math{k}@superscript{th} grammar symbol on the right of + the production, if that grammar symbol is a non-terminal, or the + value stored in the token if the grammar symbol is a terminal. + If the @racket[src-pos] option is present in the parser, then + variables @racket[$1-start-pos], ..., + @racketidfont{$}@math{i}@racketidfont{-start-pos} and + @racket[$1-end-pos], ..., + @racketidfont{$}@math{i}@racketidfont{-end-pos} and are also + available, and they refer to the position structures + corresponding to the start and end of the corresponding + @racket[grammar-symbol]. Grammar symbols defined as empty-tokens + have no @racketidfont{$}@math{k} associated, but do have + @racketidfont{$}@math{k}@racketidfont{-start-pos} and + @racketidfont{$}@math{k}@racketidfont{-end-pos}. + Also @racketidfont{$n-start-pos} and @racketidfont{$n-end-pos} + are bound to the largest start and end positions, (i.e., + @racketidfont{$}@math{i}@racketidfont{-start-pos} and + @racketidfont{$}@math{i}@racketidfont{-end-pos}). + + An @deftech{error production} can be defined by providing + a production of the form @racket[(error α)], where α is a + string of grammar symbols, possibly empty. + + All of the productions for a given non-terminal must be grouped + with it. That is, no @racket[non-terminal-id] may appear twice + on the left hand side in a parser.} + + + @item{@racket[(tokens group-id ...)] + + Declares that all of the tokens defined in each + @racket[group-id]---as bound by @racket[define-tokens] or + @racket[define-empty-tokens]---can be used by the parser in the + @racket[grammar] declaration.} + + + @item{@racket[(start non-terminal-id ...)] + + Declares a list of starting non-terminals for the grammar.} + + + @item{@racket[(end token-id ...)] + + Specifies a set of tokens from which some member must follow any + valid parse. For example, an EOF token would be specified for a + parser that parses entire files and a newline token for a parser + that parses entire lines individually.} + + + @item{@racket[(@#,racketidfont{error} expr)] + + The @racket[expr] should evaluate to a function which will be + executed for its side-effect whenever the parser encounters an + error. + + If the @racket[src-pos] declaration is present, the function + should accept 5 arguments,: + + @racketblock[(lambda (tok-ok? tok-name tok-value _start-pos _end-pos) + ....)] + + Otherwise it should accept 3: + + @racketblock[(lambda (tok-ok? tok-name tok-value) + ....)] + + The first argument will be @racket[#f] if and only if the error + is that an invalid token was received. The second and third + arguments will be the name and the value of the token at which + the error was detected. The fourth and fifth arguments, if + present, provide the source positions of that token.} + + + @item{@racket[(precs (assoc token-id ...) ...)] + @italic{OPTIONAL} + + Precedence declarations to resolve shift/reduce and + reduce/reduce conflicts as in @exec{yacc}/@exec{bison}. An + @racket[assoc] must be one of @racket[left], @racket[right] or + @racket[nonassoc]. States with multiple shift/reduce or + reduce/reduce conflicts (or some combination thereof) are not + resolved with precedence.} + + @item{@racket[(src-pos)] @italic{OPTIONAL} + + Causes the generated parser to expect input in the form + @racket[(make-position-token _token _start-pos _end-pos)] instead + of simply @racket[_token]. Include this option when using the + parser with a lexer generated with @racket[lexer-src-pos].} + + + @item{@racket[(debug filename)] @italic{OPTIONAL} + + Causes the parser generator to write the LALR table to the file + named @racket[filename] (unless the file exists), where + @racket[filename] is a literal string. Additionally, if a debug + file is specified, when a running generated parser encounters a + parse error on some input file, after the user specified error + expression returns, the complete parse stack is printed to + assist in debugging the grammar of that particular parser. The + numbers in the stack printout correspond to the state numbers in + the LALR table file.} + + + @item{@racket[(yacc-output filename)] @italic{OPTIONAL} + + Causes the parser generator to write a grammar file in + approximately the syntax of @exec{yacc}/@exec{bison}. The file + might not be a valid @exec{yacc} file, because the Racket + grammar can use symbols that are invalid in C.} + + + @item{@racket[(suppress)] @italic{OPTIONAL} + + Causes the parser generator not to report shift/reduce or + reduce/reduce conflicts.} + + ] + + The result of a @racket[parser] expression with one @racket[start] + non-terminal is a function, @racket[_parse], that takes one + argument. This argument must be a zero argument function, + @racket[_gen], that produces successive tokens of the input each + time it is called. If desired, the @racket[_gen] may return + symbols instead of tokens, and the parser will treat symbols as + tokens of the corresponding name (with @racket[#f] as a value, so + it is usual to return symbols only in the case of empty tokens). + The @racket[_parse] function returns the value associated with the + parse tree by the semantic actions. If the parser encounters an + error, after invoking the supplied error function, it will try to + use @tech{error production}s to continue parsing. If it cannot, it + raises @racket[exn:fail:read]. + + If multiple non-terminals are provided in @racket[start], the + @racket[parser] expression produces a list of parsing functions, + one for each non-terminal in the same order. Each parsing function + is like the result of a parser expression with only one + @racket[start] non-terminal, + + Each time the Racket code for a @racket[parser] is compiled + (e.g. when a @filepath{.rkt} file containing a @racket[parser] form + is loaded), the parser generator is run. To avoid this overhead + place the parser into a module and compile the module to a + @filepath{.zo} bytecode file.} + + + + +@section{Context-Free Parsers} + +@section-index["cfg-parser"] + +@defmodule[br-parser-tools/cfg-parser]{The @racketmodname[br-parser-tools/cfg-parser] +library provides a parser generator that is an alternative to that of +@racketmodname[br-parser-tools/yacc].} + +@defform/subs[#:literals (grammar tokens start end precs src-pos + suppress debug yacc-output prec) + (cfg-parser clause ...) + ([clause (grammar (non-terminal-id + ((grammar-id ...) maybe-prec expr) + ...) + ...) + (tokens group-id ...) + (start non-terminal-id ...) + (end token-id ...) + (@#,racketidfont{error} expr) + (src-pos)])]{ + + Creates a parser similar to that of @racket[parser]. Unlike @racket[parser], + @racket[cfg-parser], can consume arbitrary and potentially ambiguous context-free + grammars. Its interface is a subset of @racketmodname[br-parser-tools/yacc], with + the following differences: + + @itemize[ + + @item{@racket[(start non-terminal-id)] + + Unlike @racket[parser], @racket[cfg-parser] only allows for + a single non-terminal-id.} + + @item{The @racket[cfg-parser] form does not support the @racket[precs], + @racket[suppress], @racket[debug], or @racket[yacc-output] + options of @racket[parser].} + ] +} + +@; ---------------------------------------------------------------------- + +@section{Converting @exec{yacc} or @exec{bison} Grammars} + +@defmodule[br-parser-tools/yacc-to-scheme] + +@defproc[(trans [file path-string?]) any/c]{ + +Reads a C @exec{yacc}/@exec{bison} grammar from @racket[file] and +produces an s-expression that represents a Racket parser for use with +@racket[parser]. + +This function is intended to assist in the manual conversion of +grammars for use with @racket[parser], and not as a fully automatic +conversion tool. It is not entirely robust. For example, if the C +actions in the original grammar have nested blocks, the tool will fail. + +Annotated examples are in the @filepath{examples} subdirectory of the +@filepath{br-parser-tools} collection.} + +@; ---------------------------------------------------------------------- + +@index-section[] diff --git a/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/br-parser-tools_scrbl.dep b/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/br-parser-tools_scrbl.dep new file mode 100644 index 0000000..9a39731 --- /dev/null +++ b/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/br-parser-tools_scrbl.dep @@ -0,0 +1 @@ +("6.8.0.2" ("e40ef6f4ad8e94b16dd696e0e56aff8797e08366" . "20628650cd070c4f9b3a47399bfc46ffabd56006") (collects #"br-parser-tools" #"cfg-parser.rkt") (collects #"br-parser-tools" #"lex-plt-v200.rkt") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"scheme" #"base.rkt") (collects #"scheme" #"contract.rkt") (collects #"scribble" #"bnf.rkt") (collects #"scribble" #"doc" #"lang" #"reader.rkt") (collects #"scribble" #"doclang.rkt") (collects #"scribble" #"manual.rkt") (collects #"scribble" #"struct.rkt") (collects #"scribble" #"xref.rkt")) diff --git a/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/br-parser-tools_scrbl.zo b/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/br-parser-tools_scrbl.zo new file mode 100644 index 0000000..9dbe4a4 Binary files /dev/null and b/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/br-parser-tools_scrbl.zo differ diff --git a/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/info_rkt.dep b/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/info_rkt.dep new file mode 100644 index 0000000..2bfa718 --- /dev/null +++ b/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/info_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("4594481ac3bf7a1dfb75324c86a180c4a121ab41" . "64293529315a4e5ca67bc9da08b943f520e0d704") (collects #"info" #"main.rkt") (collects #"setup" #"infotab.rkt")) diff --git a/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/info_rkt.zo b/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/info_rkt.zo new file mode 100644 index 0000000..c925759 Binary files /dev/null and b/br-parser-tools/br-parser-tools-doc/br-parser-tools/compiled/info_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-doc/br-parser-tools/info.rkt b/br-parser-tools/br-parser-tools-doc/br-parser-tools/info.rkt new file mode 100644 index 0000000..f219d03 --- /dev/null +++ b/br-parser-tools/br-parser-tools-doc/br-parser-tools/info.rkt @@ -0,0 +1,3 @@ +#lang info + +(define scribblings '(("br-parser-tools.scrbl" (multi-page) (parsing-library)))) diff --git a/br-parser-tools/br-parser-tools-doc/info.rkt b/br-parser-tools/br-parser-tools-doc/info.rkt new file mode 100644 index 0000000..8760588 --- /dev/null +++ b/br-parser-tools/br-parser-tools-doc/info.rkt @@ -0,0 +1,14 @@ +#lang info + +(define collection 'multi) +(define deps '("base")) +(define build-deps '("scheme-lib" + "racket-doc" + "syntax-color-doc" + "br-parser-tools-lib" + "scribble-lib")) +(define update-implies '("br-parser-tools-lib")) + +(define pkg-desc "documentation part of \"br-parser-tools\"") + +(define pkg-authors '(mflatt)) diff --git a/br-parser-tools/br-parser-tools-lib/LICENSE.txt b/br-parser-tools/br-parser-tools-lib/LICENSE.txt new file mode 100644 index 0000000..f6889f1 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/LICENSE.txt @@ -0,0 +1,11 @@ +parser-tools-lib +Copyright (c) 2010-2014 PLT Design Inc. + +This package is distributed under the GNU Lesser General Public +License (LGPL). This means that you can link this package into proprietary +applications, provided you follow the rules stated in the LGPL. You +can also modify this package; if you distribute a modified version, +you must distribute it under the terms of the LGPL, which in +particular means that you must release the source code for the +modified software. See http://www.gnu.org/copyleft/lesser.html +for more information. diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt new file mode 100644 index 0000000..7b8b839 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt @@ -0,0 +1,982 @@ +#lang racket/base +;; This module implements a parser form like the br-parser-tools's +;; `parser', except that it works on an arbitrary CFG (returning +;; the first sucecssful parse). + +;; I'm pretty sure that this is an implementation of Earley's +;; algorithm. + +;; To a first approximation, it's a backtracking parser. Alternative +;; for a non-terminal are computed in parallel, and multiple attempts +;; to compute the same result block until the first one completes. If +;; you get into deadlock, such as when trying to match +;; := +;; then it means that there's no successful parse, so everything +;; that's blocked fails. + +;; A cache holds the series of results for a particular non-terminal +;; at a particular starting location. (A series is used, instead of a +;; sinlge result, for backtracking.) Otherwise, the parser uses +;; backtracking search. Backtracking is implemented through explicit +;; success and failure continuations. Multiple results for a +;; particular nonterminal and location are kept only when they have +;; different lengths. (Otherwise, in the spirit of finding one +;; successful parse, only the first result is kept.) + +;; The br-parser-tools's `parse' is used to transform tokens in the +;; grammar to tokens specific to this parser. In other words, this +;; parser uses `parser' so that it doesn't have to know anything about +;; tokens. +;; + + + +(require br-parser-tools/yacc + br-parser-tools/lex) + +(require (for-syntax racket/base + syntax/boundmap + br-parser-tools/private-lex/token-syntax)) + +(provide cfg-parser) + +;; A raw token, wrapped so that we can recognize it: +(define-struct tok (name orig-name val start end)) + +;; Represents the thread scheduler: +(define-struct tasks (active active-back waits multi-waits cache progress?)) + +(define-for-syntax make-token-identifier-mapping make-hasheq) +(define-for-syntax token-identifier-mapping-get + (case-lambda + [(t tok) + (hash-ref t (syntax-e tok))] + [(t tok fail) + (hash-ref t (syntax-e tok) fail)])) +(define-for-syntax token-identifier-mapping-put! + (lambda (t tok v) + (hash-set! t (syntax-e tok) v))) +(define-for-syntax token-identifier-mapping-map + (lambda (t f) + (hash-map t f))) + +;; Used to calculate information on the grammar, such as whether +;; a particular non-terminal is "simple" instead of recursively defined. +(define-for-syntax (nt-fixpoint nts proc nt-ids patss) + (define (ormap-all val f as bs) + (cond + [(null? as) val] + [else (ormap-all (or (f (car as) (car bs)) val) + f + (cdr as) (cdr bs))])) + (let loop () + (when (ormap-all #f + (lambda (nt pats) + (let ([old (bound-identifier-mapping-get nts nt)]) + (let ([new (proc nt pats old)]) + (if (equal? old new) + #f + (begin + (bound-identifier-mapping-put! nts nt new) + #t))))) + nt-ids patss) + (loop)))) + +;; Tries parse-a followed by parse-b. If parse-a is not simple, +;; then after parse-a succeeds once, we parallelize parse-b +;; and trying a second result for parse-a. +(define (parse-and simple-a? parse-a parse-b + stream last-consumed-token depth end success-k fail-k + max-depth tasks) + (letrec ([mk-got-k + (lambda (success-k fail-k) + (lambda (val stream last-consumed-token depth max-depth tasks next1-k) + (if simple-a? + (parse-b val stream last-consumed-token depth end + (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks) + (parallel-or + (lambda (success-k fail-k max-depth tasks) + (parse-b val stream last-consumed-token depth end + success-k fail-k + max-depth tasks)) + (lambda (success-k fail-k max-depth tasks) + (next1-k (mk-got-k success-k fail-k) + fail-k max-depth tasks)) + success-k fail-k max-depth tasks))))] + [mk-got2-k + (lambda (success-k fail-k next1-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (success-k val stream last-consumed-token depth max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (next-k (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks)))))] + [mk-fail2-k + (lambda (success-k fail-k next1-k) + (lambda (max-depth tasks) + (next1-k (mk-got-k success-k fail-k) + fail-k + max-depth + tasks)))]) + (parse-a stream last-consumed-token depth end + (mk-got-k success-k fail-k) + fail-k + max-depth tasks))) + +;; Parallel or for non-terminal alternatives +(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) + (parallel-or (lambda (success-k fail-k max-depth tasks) + (parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks)) + (lambda (success-k fail-k max-depth tasks) + (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)) + success-k fail-k max-depth tasks)) + +;; Generic parallel-or +(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) + (define answer-key (gensym)) + (letrec ([gota-k + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (report-answer answer-key + max-depth + tasks + (list val stream last-consumed-token depth next-k)))] + [faila-k + (lambda (max-depth tasks) + (report-answer answer-key + max-depth + tasks + null))]) + (let* ([tasks (queue-task + tasks + (lambda (max-depth tasks) + (parse-a gota-k + faila-k + max-depth tasks)))] + [tasks (queue-task + tasks + (lambda (max-depth tasks) + (parse-b gota-k + faila-k + max-depth tasks)))] + [queue-next (lambda (next-k tasks) + (queue-task tasks + (lambda (max-depth tasks) + (next-k gota-k + faila-k + max-depth tasks))))]) + (letrec ([mk-got-one + (lambda (immediate-next? get-nth success-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (let ([tasks (if immediate-next? + (queue-next next-k tasks) + tasks)]) + (success-k val stream last-consumed-token depth max-depth + tasks + (lambda (success-k fail-k max-depth tasks) + (let ([tasks (if immediate-next? + tasks + (queue-next next-k tasks))]) + (get-nth max-depth tasks success-k fail-k)))))))] + [get-first + (lambda (max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #t get-first success-k) + (lambda (max-depth tasks) + (get-second max-depth tasks success-k fail-k)) + #f))] + [get-second + (lambda (max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #f get-second success-k) + fail-k #f))]) + (get-first max-depth tasks success-k fail-k))))) + +;; Non-terminal alternatives where the first is "simple" can be done +;; sequentially, which is simpler +(define (parse-or parse-a parse-b + stream last-consumed-token depth end success-k fail-k max-depth tasks) + (letrec ([mk-got-k + (lambda (success-k fail-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (success-k val stream last-consumed-token depth + max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (next-k (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks)))))] + [mk-fail-k + (lambda (success-k fail-k) + (lambda (max-depth tasks) + (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))]) + (parse-a stream last-consumed-token depth end + (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks))) + +;; Starts a thread +(define queue-task + (lambda (tasks t [progress? #t]) + (make-tasks (tasks-active tasks) + (cons t (tasks-active-back tasks)) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + (or progress? (tasks-progress? tasks))))) + +;; Reports an answer to a waiting thread: +(define (report-answer answer-key max-depth tasks val) + (let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))]) + (if v + (let ([tasks (make-tasks (cons (v val) + (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (hash-remove! (tasks-waits tasks) answer-key) + (swap-task max-depth tasks)) + ;; We have an answer ready too fast; wait + (swap-task max-depth + (queue-task tasks + (lambda (max-depth tasks) + (report-answer answer-key max-depth tasks val)) + #f))))) + +;; Reports an answer to multiple waiting threads: +(define (report-answer-all answer-key max-depth tasks val k) + (let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))]) + (hash-remove! (tasks-multi-waits tasks) answer-key) + (let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) + (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (k max-depth tasks)))) + +;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise +;; there might be many. Use wither #t or #f (and `report-answer' or +;; `report-answer-all', resptively) consistently for a particular answer key. +(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) + (let ([wait (lambda (val) + (lambda (max-depth tasks) + (if val + (if (null? val) + (fail-k max-depth tasks) + (let-values ([(val stream last-consumed-token depth next-k) (apply values val)]) + (success-k val stream last-consumed-token depth max-depth tasks next-k))) + (deadlock-k max-depth tasks))))]) + (if multi? + (hash-set! (tasks-multi-waits tasks) answer-key + (cons wait (hash-ref (tasks-multi-waits tasks) answer-key + (lambda () null)))) + (hash-set! (tasks-waits tasks) answer-key wait)) + (let ([tasks (make-tasks (tasks-active tasks) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (swap-task max-depth tasks)))) + +;; Swap thread +(define (swap-task max-depth tasks) + ;; Swap in first active: + (if (null? (tasks-active tasks)) + (if (tasks-progress? tasks) + (swap-task max-depth + (make-tasks (reverse (tasks-active-back tasks)) + null + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #f)) + ;; No progress, so issue failure for all multi-waits + (if (zero? (hash-count (tasks-multi-waits tasks))) + (error 'swap-task "Deadlock") + (swap-task max-depth + (make-tasks (apply + append + (hash-map (tasks-multi-waits tasks) + (lambda (k l) + (map (lambda (v) (v #f)) l)))) + (tasks-active-back tasks) + (tasks-waits tasks) + (make-hasheq) + (tasks-cache tasks) + #t)))) + (let ([t (car (tasks-active tasks))] + [tasks (make-tasks (cdr (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + (tasks-progress? tasks))]) + (t max-depth tasks)))) + +;; Finds the symbolic representative of a token class +(define-for-syntax (map-token toks tok) + (car (token-identifier-mapping-get toks tok))) + +(define no-pos-val (make-position #f #f #f)) +(define-for-syntax no-pos + (let ([npv ((syntax-local-certifier) #'no-pos-val)]) + (lambda (stx) npv))) +(define-for-syntax at-tok-pos + (lambda (sel expr) + (lambda (stx) + #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))))) + +;; Builds a matcher for a particular alternative +(define-for-syntax (build-match nts toks pat handle $ctx) + (let loop ([pat pat] + [pos 1]) + (if (null? pat) + #`(success-k #,handle stream last-consumed-token depth max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (fail-k max-depth tasks))) + (let ([id (datum->syntax (car pat) + (string->symbol (format "$~a" pos)))] + [id-start-pos (datum->syntax (car pat) + (string->symbol (format "$~a-start-pos" pos)))] + [id-end-pos (datum->syntax (car pat) + (string->symbol (format "$~a-end-pos" pos)))] + [n-end-pos (and (null? (cdr pat)) + (datum->syntax (car pat) '$n-end-pos))]) + (cond + [(bound-identifier-mapping-get nts (car pat) (lambda () #f)) + ;; Match non-termimal + #`(parse-and + ;; First part is simple? (If so, we don't have to parallelize the `and'.) + #,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) + (or (not l) + (andmap values (caddr l)))) + #,(car pat) + (let ([original-stream stream]) + (lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks) + (let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream) + tok-end + tok-start) + #'(if (eq? original-stream stream) + last-consumed-token + (and (pair? original-stream) + (car original-stream))))] + [#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)] + #,@(if n-end-pos + #`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]) + null)) + #,(loop (cdr pat) (add1 pos))))) + stream last-consumed-token depth + #,(let ([cnt (apply + + (map (lambda (item) + (cond + [(bound-identifier-mapping-get nts item (lambda () #f)) + => (lambda (l) (car l))] + [else 1])) + (cdr pat)))]) + #`(- end #,cnt)) + success-k fail-k max-depth tasks)] + [else + ;; Match token + (let ([tok-id (map-token toks (car pat))]) + #`(if (and (pair? stream) + (eq? '#,tok-id (tok-name (car stream)))) + (let* ([stream-a (car stream)] + [#,id (tok-val stream-a)] + [last-consumed-token (car stream)] + [stream (cdr stream)] + [depth (add1 depth)]) + (let ([max-depth (max max-depth depth)]) + (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)] + [#,id-end-pos (at-tok-pos #'tok-end #'stream-a)] + #,@(if n-end-pos + #`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)]) + null)) + #,(loop (cdr pat) (add1 pos))))) + (fail-k max-depth tasks)))]))))) + +;; Starts parsing to match a non-terminal. There's a minor +;; optimization that checks for known starting tokens. Otherwise, +;; use the cache, block if someone else is already trying the match, +;; and cache the result if it's computed. +;; The cache maps nontermial+startingpos+iteration to a result, where +;; the iteration is 0 for the first match attempt, 1 for the second, +;; etc. +(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k) + (if (and (positive? min-cnt) + (pair? stream) + (not (memq (tok-name (car stream)) init-tokens))) + ;; No such leading token; give up + (fail-k max-depth tasks) + ;; Run pattern + (let loop ([n 0] + [success-k success-k] + [fail-k fail-k] + [max-depth max-depth] + [tasks tasks] + [k k]) + (let ([answer-key (gensym)] + [table-key (vector key depth n)] + [old-depth depth] + [old-stream stream]) + #;(printf "Loop ~a\n" table-key) + (cond + [(hash-ref (tasks-cache tasks) table-key (lambda () #f)) + => (lambda (result) + #;(printf "Reuse ~a\n" table-key) + (result success-k fail-k max-depth tasks))] + [else + #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + #;(printf "Wait ~a ~a\n" table-key answer-key) + (wait-for-answer #t max-depth tasks answer-key success-k fail-k + (lambda (max-depth tasks) + #;(printf "Deadlock ~a ~a\n" table-key answer-key) + (fail-k max-depth tasks))))) + (let result-loop ([max-depth max-depth][tasks tasks][k k]) + (letrec ([orig-stream stream] + [new-got-k + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + ;; Check whether we already have a result that consumed the same amount: + (let ([result-key (vector #f key old-depth depth)]) + (cond + [(hash-ref (tasks-cache tasks) result-key (lambda () #f)) + ;; Go for the next-result + (result-loop max-depth + tasks + (lambda (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks)))] + [else + #;(printf "Success ~a ~a\n" table-key + (map tok-name (let loop ([d old-depth][s old-stream]) + (if (= d depth) + null + (cons (car s) (loop (add1 d) (cdr s))))))) + (let ([next-k (lambda (success-k fail-k max-depth tasks) + (loop (add1 n) + success-k + fail-k + max-depth + tasks + (lambda (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks))))]) + (hash-set! (tasks-cache tasks) result-key #t) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + (success-k val stream last-consumed-token depth max-depth tasks next-k))) + (report-answer-all answer-key + max-depth + tasks + (list val stream last-consumed-token depth next-k) + (lambda (max-depth tasks) + (success-k val stream last-consumed-token depth max-depth tasks next-k))))])))] + [new-fail-k + (lambda (max-depth tasks) + #;(printf "Failure ~a\n" table-key) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + (fail-k max-depth tasks))) + (report-answer-all answer-key + max-depth + tasks + null + (lambda (max-depth tasks) + (fail-k max-depth tasks))))]) + (k end max-depth tasks new-got-k new-fail-k)))]))))) + +(define-syntax (cfg-parser stx) + (syntax-case stx () + [(_ clause ...) + (let ([clauses (syntax->list #'(clause ...))]) + (let-values ([(start grammar cfg-error parser-clauses src-pos?) + (let ([all-toks (apply + append + (map (lambda (clause) + (syntax-case clause (tokens) + [(tokens t ...) + (apply + append + (map (lambda (t) + (let ([v (syntax-local-value t (lambda () #f))]) + (cond + [(terminals-def? v) + (map (lambda (v) + (cons v #f)) + (syntax->list (terminals-def-t v)))] + [(e-terminals-def? v) + (map (lambda (v) + (cons v #t)) + (syntax->list (e-terminals-def-t v)))] + [else null]))) + (syntax->list #'(t ...))))] + [_else null])) + clauses))] + [all-end-toks (apply + append + (map (lambda (clause) + (syntax-case clause (end) + [(end t ...) + (syntax->list #'(t ...))] + [_else null])) + clauses))]) + (let loop ([clauses clauses] + [cfg-start #f] + [cfg-grammar #f] + [cfg-error #f] + [src-pos? #f] + [parser-clauses null]) + (if (null? clauses) + (values cfg-start + cfg-grammar + cfg-error + (reverse parser-clauses) + src-pos?) + (syntax-case (car clauses) (start error grammar src-pos) + [(start tok) + (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] + [(error expr) + (loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] + [(grammar [nt [pat handle0 handle ...] ...] ...) + (let ([nts (make-bound-identifier-mapping)] + [toks (make-token-identifier-mapping)] + [end-toks (make-token-identifier-mapping)] + [nt-ids (syntax->list #'(nt ...))] + [patss (map (lambda (stx) + (map syntax->list (syntax->list stx))) + (syntax->list #'((pat ...) ...)))]) + (for-each (lambda (nt) + (bound-identifier-mapping-put! nts nt (list 0))) + nt-ids) + (for-each (lambda (t) + (token-identifier-mapping-put! end-toks t #t)) + all-end-toks) + (for-each (lambda (t) + (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) + (let ([id (gensym (syntax-e (car t)))]) + (token-identifier-mapping-put! toks (car t) + (cons id (cdr t)))))) + all-toks) + ;; Compute min max size for each non-term: + (nt-fixpoint + nts + (lambda (nt pats old-list) + (let ([new-cnt + (apply + min + (map (lambda (pat) + (apply + + + (map (lambda (elem) + (car + (bound-identifier-mapping-get nts + elem + (lambda () (list 1))))) + pat))) + pats))]) + (if (new-cnt . > . (car old-list)) + (cons new-cnt (cdr old-list)) + old-list))) + nt-ids patss) + ;; Compute set of toks that must appear at the beginning + ;; for a non-terminal + (nt-fixpoint + nts + (lambda (nt pats old-list) + (let ([new-list + (apply + append + (map (lambda (pat) + (let loop ([pat pat]) + (if (pair? pat) + (let ([l (bound-identifier-mapping-get + nts + (car pat) + (lambda () + (list 1 (map-token toks (car pat)))))]) + ;; If the non-terminal can match 0 things, + ;; then it might match something from the + ;; next pattern element. Otherwise, it must + ;; match the first element: + (if (zero? (car l)) + (append (cdr l) (loop (cdr pat))) + (cdr l))) + null))) + pats))]) + (let ([new (filter (lambda (id) + (andmap (lambda (id2) + (not (eq? id id2))) + (cdr old-list))) + new-list)]) + (if (pair? new) + ;; Drop dups in new list: + (let ([new (let loop ([new new]) + (if (null? (cdr new)) + new + (if (ormap (lambda (id) + (eq? (car new) id)) + (cdr new)) + (loop (cdr new)) + (cons (car new) (loop (cdr new))))))]) + (cons (car old-list) (append new (cdr old-list)))) + old-list)))) + nt-ids patss) + ;; Determine left-recursive clauses: + (for-each (lambda (nt pats) + (let ([l (bound-identifier-mapping-get nts nt)]) + (bound-identifier-mapping-put! nts nt (list (car l) + (cdr l) + (map (lambda (x) #f) pats))))) + nt-ids patss) + (nt-fixpoint + nts + (lambda (nt pats old-list) + (list (car old-list) + (cadr old-list) + (map (lambda (pat simple?) + (or simple? + (let ([l (map (lambda (elem) + (bound-identifier-mapping-get + nts + elem + (lambda () #f))) + pat)]) + (andmap (lambda (i) + (or (not i) + (andmap values (caddr i)))) + l)))) + pats (caddr old-list)))) + nt-ids patss) + ;; Build a definition for each non-term: + (loop (cdr clauses) + cfg-start + (map (lambda (nt pats handles $ctxs) + (define info (bound-identifier-mapping-get nts nt)) + (list nt + #`(let ([key (gensym '#,nt)]) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + (parse-nt/share + key #,(car info) '#,(cadr info) stream last-consumed-token depth end + max-depth tasks + success-k fail-k + (lambda (end max-depth tasks success-k fail-k) + #,(let loop ([pats pats] + [handles (syntax->list handles)] + [$ctxs (syntax->list $ctxs)] + [simple?s (caddr info)]) + (if (null? pats) + #'(fail-k max-depth tasks) + #`(#,(if (or (null? (cdr pats)) + (car simple?s)) + #'parse-or + #'parse-parallel-or) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + #,(build-match nts + toks + (car pats) + (car handles) + (car $ctxs))) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + #,(loop (cdr pats) + (cdr handles) + (cdr $ctxs) + (cdr simple?s))) + stream last-consumed-token depth end success-k fail-k max-depth tasks))))))))) + nt-ids + patss + (syntax->list #'(((begin handle0 handle ...) ...) ...)) + (syntax->list #'((handle0 ...) ...))) + cfg-error + src-pos? + (list* + (with-syntax ([((tok tok-id . $e) ...) + (token-identifier-mapping-map toks + (lambda (k v) + (list* k + (car v) + (if (cdr v) + #f + '$1))))] + [(pos ...) + (if src-pos? + #'($1-start-pos $1-end-pos) + #'(#f #f))]) + #`(grammar (start [() null] + [(atok start) (cons $1 $2)]) + (atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) + #`(start start) + parser-clauses)))] + [(grammar . _) + (raise-syntax-error + #f + "bad grammar clause" + stx + (car clauses))] + [(src-pos) + (loop (cdr clauses) + cfg-start + cfg-grammar + cfg-error + #t + (cons (car clauses) parser-clauses))] + [_else + (loop (cdr clauses) + cfg-start + cfg-grammar + cfg-error + src-pos? + (cons (car clauses) parser-clauses))]))))]) + #`(let ([orig-parse (parser + [error (lambda (a b c) + (error 'cfg-parser "unexpected ~a token: ~a" b c))] + . #,parser-clauses)] + [error-proc #,cfg-error]) + (letrec #,grammar + (lambda (get-tok) + (let ([tok-list (orig-parse get-tok)]) + (letrec ([success-k + (lambda (val stream last-consumed-token depth max-depth tasks next) + (if (null? stream) + val + (next success-k fail-k max-depth tasks)))] + [fail-k (lambda (max-depth tasks) + (define (call-error-proc tok-ok? tok-name tok-value start-pos end-pos) + (cond + [(procedure-arity-includes? error-proc 5) + (error-proc tok-ok? tok-name tok-value start-pos end-pos)] + [else + (error-proc tok-ok? tok-name tok-value)])) + (cond + [(null? tok-list) + (if error-proc + (call-error-proc #t + 'no-tokens + #f + (make-position #f #f #f) + (make-position #f #f #f)) + (error + 'cfg-parse + "no tokens"))] + [else + (let ([bad-tok (list-ref tok-list + (min (sub1 (length tok-list)) + max-depth))]) + (if error-proc + (call-error-proc #t + (tok-orig-name bad-tok) + (tok-val bad-tok) + (tok-start bad-tok) + (tok-end bad-tok)) + (error + 'cfg-parse + "failed at ~a" + (tok-val bad-tok))))]))]) + (#,start tok-list + ;; we simulate a token at the very beginning with zero width + ;; for use with the position-generating code (*-start-pos, *-end-pos). + (if (null? tok-list) + (tok #f #f #f + (position 1 + #,(if src-pos? #'1 #'#f) + #,(if src-pos? #'0 #'#f)) + (position 1 + #,(if src-pos? #'1 #'#f) + #,(if src-pos? #'0 #'#f))) + (tok (tok-name (car tok-list)) + (tok-orig-name (car tok-list)) + (tok-val (car tok-list)) + (tok-start (car tok-list)) + (tok-start (car tok-list)))) + 0 + (length tok-list) + success-k + fail-k + 0 + (make-tasks null null + (make-hasheq) (make-hasheq) + (make-hash) #t)))))))))])) + + +(module* test racket/base + (require (submod "..") + br-parser-tools/lex + racket/block + racket/generator + rackunit) + + ;; Test: parsing regular expressions. + ;; Here is a test case on locations: + (block + (define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF)) + (define lex (lexer-src-pos ["|" (token-OR lexeme)] + ["^" (token-ANCHOR lexeme)] + ["*" (token-STAR lexeme)] + [(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)] + ["(" (token-LPAREN lexeme)] + [")" (token-RPAREN lexeme)] + [whitespace (return-without-pos (lex input-port))] + [(eof) (token-EOF 'eof)])) + (define -parse (cfg-parser + (tokens regexp-tokens) + (start top) + (end EOF) + (src-pos) + (grammar [top [(maybe-anchor regexp) + (cond [$1 + `(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] + [else + `(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]] + [maybe-anchor [(ANCHOR) #t] + [() #f]] + [regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] + [(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] + [(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] + [(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]]))) + (define (pos->sexp pos) + (position-offset pos)) + + (define (parse s) + (define ip (open-input-string s)) + (port-count-lines! ip) + (-parse (lambda () (lex ip)))) + + (check-equal? (parse "abc") + '(unanchored (lit "abc" 1 4) 1 4)) + (check-equal? (parse "a | (b*) | c") + '(unanchored (or (or (lit "a" 1 2) + (group (star (lit "b" 6 7) 6 8) 5 9) + 1 9) + (lit "c" 12 13) + 1 13) + 1 13))) + + + ;; Check that cfg-parser can accept error functions of 3 arguments: + (block + (define-tokens non-terminals (ONE ZERO EOF)) + (define parse + (cfg-parser (tokens non-terminals) + (start ones) + (end EOF) + (error (lambda (tok-ok tok-name tok-val) + (error (format "~a ~a ~a" tok-ok tok-name tok-val)))) + (grammar [ones [() null] + [(ONE ones) (cons $1 $2)]]))) + (define (sequence->tokenizer s) + (define-values (more? next) (sequence-generate s)) + (lambda () + (cond [(more?) (next)] + [else (token-EOF 'eof)]))) + (check-exn #rx"#t ZERO zero" + (lambda () (parse (sequence->tokenizer (list (token-ZERO "zero"))))))) + + + + + ;; Check that cfg-parser can accept error functions of 5 arguments: + (block + (define-tokens non-terminals (ONE ZERO EOF)) + (define parse + (cfg-parser (tokens non-terminals) + (start ones) + (src-pos) + (end EOF) + (error (lambda (tok-ok tok-name tok-val start-pos end-pos) + (error (format "~a ~a ~a ~a ~a" + tok-ok tok-name tok-val + (position-offset start-pos) + (position-offset end-pos))))) + (grammar [ones [() null] + [(ONE ones) (cons $1 $2)]]))) + (define (sequence->tokenizer s) + (define-values (more? next) (sequence-generate s)) + (lambda () + (cond [(more?) (next)] + [else (position-token (token-EOF 'eof) + (position #f #f #f) + (position #f #f #f))]))) + (check-exn #rx"#t ZERO zero 2 3" + (lambda () + (parse + (sequence->tokenizer + (list (position-token + (token-ZERO "zero") + (position 2 2 5) + (position 3 2 6)))))))) + + + + + + ;; Tests used during development + (define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF)) + + (define lex + (lexer + ["+" (token-PLUS '+)] + ["-" (token-MINUS '-)] + ["*" (token-STAR '*)] + ["|" (token-BAR '||)] + [":" (token-COLON '|:|)] + [whitespace (lex input-port)] + [(eof) (token-EOF 'eof)])) + + (define parse + (cfg-parser + (tokens non-terminals) + (start ) + (end EOF) + (error (lambda (a b stx) + (error 'parse "failed at ~s" stx))) + (grammar [ [(PLUS) "plus"] + [( BAR ) (list $1 $2 $3)] + [( COLON) (list $1)]] + [ [(MINUS) "minus"] + [( STAR) (cons $1 $2)]] + [ [( MINUS) "yes"]] + [ [(PLUS) 'plus] + [(MINUS) 'minus]] + [ [() '0] + [( PLUS) (add1 $1)] + [( PLUS) (add1 $1)]]))) + + (let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**" + #;"+*|+**|-" #;"-|-*|-|-*" + #;"-|-*|-|-**|-|-*|-|-**" + "-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-*** + |-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****| + -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****" + ;; This one fails: + #;"+*")]) + (check-equal? (parse (lambda () (lex p))) + '((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *) + || + (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *)) + . + *) + || + (((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *) + || + (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *)) + . + *))))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/cfg-parser_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/cfg-parser_rkt.dep new file mode 100644 index 0000000..783942b --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/cfg-parser_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("1da87ebbdbd287c3141d81b344c83a22fdcaead1" . "913322440977cfa44185506acee3ea9ca2d4426d") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"block.rkt") (collects #"racket" #"generator.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (collects #"syntax" #"boundmap.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/cfg-parser_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/cfg-parser_rkt.zo new file mode 100644 index 0000000..d7e51c3 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/cfg-parser_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex-sre_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex-sre_rkt.dep new file mode 100644 index 0000000..1443c49 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex-sre_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("31789e05e19e6122c7619af755b6438bc1872e9b" . "b19638aea3046717541136642402ab336892a3aa") (collects #"br-parser-tools" #"lex.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex-sre_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex-sre_rkt.zo new file mode 100644 index 0000000..ba37398 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex-sre_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex_rkt.dep new file mode 100644 index 0000000..22f558e --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("fa9d915c4fb46de94fa89c9eac68c0a0fe32cd40" . "6eb29578f87766fcd1ee8209b3edc21c1081b8e4") (collects #"br-parser-tools" #"private-lex" #"actions.rkt") (collects #"br-parser-tools" #"private-lex" #"front.rkt") (collects #"br-parser-tools" #"private-lex" #"token.rkt") (collects #"br-parser-tools" #"private-lex" #"unicode-chars.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzlib" #"stxparam.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"boundmap.rkt") (collects #"syntax" #"define.rkt") (collects #"syntax" #"readerr.rkt") (collects #"syntax" #"stx.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex_rkt.zo new file mode 100644 index 0000000..6b45383 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/lex_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/yacc_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/yacc_rkt.dep new file mode 100644 index 0000000..f51c124 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/yacc_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("100e497a060ad40147465b34990a741797ddb6c0" . "4a109ffd564a7614c177351282958ab6cc95da13") (collects #"br-parser-tools" #"private-lex" #"token.rkt") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-actions.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-builder.rkt") (collects #"br-parser-tools" #"private-yacc" #"yacc-helper.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"etc.rkt") (collects #"mzlib" #"pretty.rkt") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt") (collects #"syntax" #"readerr.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/yacc_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/yacc_rkt.zo new file mode 100644 index 0000000..3b7cf31 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/errortrace/yacc_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex-sre_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex-sre_rkt.dep new file mode 100644 index 0000000..48ef386 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex-sre_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("31789e05e19e6122c7619af755b6438bc1872e9b" . "0a9e246cdda8f6239b7422f687b6513aa57dfb7f") (collects #"br-parser-tools" #"lex.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex-sre_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex-sre_rkt.zo new file mode 100644 index 0000000..7440dc1 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex-sre_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex_rkt.dep new file mode 100644 index 0000000..9cdedc6 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("340fcc1fd30e6afc450a6027068d0e71ff42234e" . "57d5de7788049c0521682559da14c2475e4e08b5") (collects #"br-parser-tools" #"private-lex" #"actions.rkt") (collects #"br-parser-tools" #"private-lex" #"front.rkt") (collects #"br-parser-tools" #"private-lex" #"token.rkt") (collects #"br-parser-tools" #"private-lex" #"unicode-chars.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzlib" #"stxparam.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"boundmap.rkt") (collects #"syntax" #"define.rkt") (collects #"syntax" #"readerr.rkt") (collects #"syntax" #"stx.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex_rkt.zo new file mode 100644 index 0000000..7df50d6 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/drracket/lex_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/info_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/info_rkt.dep new file mode 100644 index 0000000..5643975 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/info_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("0160a40a20d9e0d2a9dc08e0d3b6407cd43b669f" . "64293529315a4e5ca67bc9da08b943f520e0d704") (collects #"info" #"main.rkt") (collects #"setup" #"infotab.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/info_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/info_rkt.zo new file mode 100644 index 0000000..5e36c79 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/info_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-plt-v200_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-plt-v200_rkt.dep new file mode 100644 index 0000000..1319688 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-plt-v200_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("1c73d195a19fdaccf7ef3b12cf2e3b5d7fa49f8f" . "38b5833add35ba09a9b03d9a3edef53637cc159c") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-plt-v200_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-plt-v200_rkt.zo new file mode 100644 index 0000000..7ee7c48 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-plt-v200_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-sre_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-sre_rkt.dep new file mode 100644 index 0000000..a168d2e --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-sre_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("31789e05e19e6122c7619af755b6438bc1872e9b" . "6c5a05919183e8d1de083eff2db1966f6ae5ccb6") (collects #"br-parser-tools" #"lex.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-sre_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-sre_rkt.zo new file mode 100644 index 0000000..88a6dfb Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex-sre_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex_rkt.dep new file mode 100644 index 0000000..052725f --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("340fcc1fd30e6afc450a6027068d0e71ff42234e" . "8cc42f47b45f7984d90feb68df53180d93bdefeb") (collects #"br-parser-tools" #"private-lex" #"actions.rkt") (collects #"br-parser-tools" #"private-lex" #"front.rkt") (collects #"br-parser-tools" #"private-lex" #"token.rkt") (collects #"br-parser-tools" #"private-lex" #"unicode-chars.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzlib" #"stxparam.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"boundmap.rkt") (collects #"syntax" #"define.rkt") (collects #"syntax" #"readerr.rkt") (collects #"syntax" #"stx.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex_rkt.zo new file mode 100644 index 0000000..9d9ac1b Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/lex_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc-to-scheme_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc-to-scheme_rkt.dep new file mode 100644 index 0000000..61e3adf --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc-to-scheme_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("d5abd58a5a7bfc4bc558dd51bd60ad27bf7d5be9" . "1ff2a8b025cffabf443820bc20e3a0c286570369") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"readerr.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc-to-scheme_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc-to-scheme_rkt.zo new file mode 100644 index 0000000..3b5966e Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc-to-scheme_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc_rkt.dep new file mode 100644 index 0000000..32c4f24 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("100e497a060ad40147465b34990a741797ddb6c0" . "35e63de458cf673b37751cb8bdcb77a583578019") (collects #"br-parser-tools" #"private-lex" #"token.rkt") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-actions.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-builder.rkt") (collects #"br-parser-tools" #"private-yacc" #"yacc-helper.rkt") (collects #"mzlib" #"etc.rkt") (collects #"mzlib" #"pretty.rkt") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt") (collects #"syntax" #"readerr.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc_rkt.zo new file mode 100644 index 0000000..a7f86ae Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/compiled/yacc_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/calc.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/calc.rkt new file mode 100644 index 0000000..9ad1218 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/calc.rkt @@ -0,0 +1,89 @@ +#lang scheme + +;; An interactive calculator inspired by the calculator example in the bison manual. + + +;; Import the parser and lexer generators. +(require br-parser-tools/yacc + br-parser-tools/lex + (prefix-in : br-parser-tools/lex-sre)) + +(define-tokens value-tokens (NUM VAR FNCT)) +(define-empty-tokens op-tokens (newline = OP CP + - * / ^ EOF NEG)) + +;; A hash table to store variable values in for the calculator +(define vars (make-hash)) + +(define-lex-abbrevs + (lower-letter (:/ "a" "z")) + + (upper-letter (:/ #\A #\Z)) + + ;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too. + (digit (:/ "0" "9"))) + +(define calcl + (lexer + [(eof) 'EOF] + ;; recursively call the lexer on the remaining input after a tab or space. Returning the + ;; result of that operation. This effectively skips all whitespace. + [(:or #\tab #\space) (calcl input-port)] + ;; (token-newline) returns 'newline + [#\newline (token-newline)] + ;; Since (token-=) returns '=, just return the symbol directly + [(:or "=" "+" "-" "*" "/" "^") (string->symbol lexeme)] + ["(" 'OP] + [")" 'CP] + ["sin" (token-FNCT sin)] + [(:+ (:or lower-letter upper-letter)) (token-VAR (string->symbol lexeme))] + [(:+ digit) (token-NUM (string->number lexeme))] + [(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))])) + + +(define calcp + (parser + + (start start) + (end newline EOF) + (tokens value-tokens op-tokens) + (error (lambda (a b c) (void))) + + (precs (right =) + (left - +) + (left * /) + (left NEG) + (right ^)) + + (grammar + + (start [() #f] + ;; If there is an error, ignore everything before the error + ;; and try to start over right after the error + [(error start) $2] + [(exp) $1]) + + (exp [(NUM) $1] + [(VAR) (hash-ref vars $1 (lambda () 0))] + [(VAR = exp) (begin (hash-set! vars $1 $3) + $3)] + [(FNCT OP exp CP) ($1 $3)] + [(exp + exp) (+ $1 $3)] + [(exp - exp) (- $1 $3)] + [(exp * exp) (* $1 $3)] + [(exp / exp) (/ $1 $3)] + [(- exp) (prec NEG) (- $2)] + [(exp ^ exp) (expt $1 $3)] + [(OP exp CP) $2])))) + +;; run the calculator on the given input-port +(define (calc ip) + (port-count-lines! ip) + (letrec ((one-line + (lambda () + (let ((result (calcp (lambda () (calcl ip))))) + (when result + (printf "~a\n" result) + (one-line)))))) + (one-line))) + +(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/calc_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/calc_rkt.dep new file mode 100644 index 0000000..175fda2 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/calc_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("e3352df1b72626dc220a94ee0bd16f165519bade" . "3c46fc3eda107e037940fbfb68032106839316da") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"scheme" #"main.rkt") (collects #"scheme" #"runtime-config.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/calc_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/calc_rkt.zo new file mode 100644 index 0000000..63ed2de Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/calc_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/read_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/read_rkt.dep new file mode 100644 index 0000000..990e503 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/read_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("05c4bd3fd622dd1b33ebd5eec53f3018b9d64055" . "9a6409107f8f3a3566e2c5a71cb5bbf5b38f014e") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"readerr.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/read_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/read_rkt.zo new file mode 100644 index 0000000..f369ee8 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/compiled/read_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/read.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/read.rkt new file mode 100644 index 0000000..a10b2c1 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/read.rkt @@ -0,0 +1,242 @@ +;; This implements the equivalent of racket's read-syntax for R5RS scheme. +;; It has not been thoroughly tested. Also it will read an entire file into a +;; list of syntax objects, instead of returning one syntax object at a time + +(module read mzscheme + + (require br-parser-tools/lex + (prefix : br-parser-tools/lex-sre) + br-parser-tools/yacc + syntax/readerr) + + (define-tokens data (DATUM)) + (define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF)) + + (define scheme-lexer + (lexer-src-pos + + ;; Skip comments, without accumulating extra position information + [(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))] + + ["#t" (token-DATUM #t)] + ["#f" (token-DATUM #f)] + [(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))] + ["#\\space" (token-DATUM #\space)] + ["#\\newline" (token-DATUM #\newline)] + [(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))] + [#\" (token-DATUM (list->string (get-string-token input-port)))] + [#\( 'OP] + [#\) 'CP] + [#\[ 'OP] + [#\] 'CP] + ["#(" 'HASHOP] + [num2 (token-DATUM (string->number lexeme 2))] + [num8 (token-DATUM (string->number lexeme 8))] + [num10 (token-DATUM (string->number lexeme 10))] + [num16 (token-DATUM (string->number lexeme 16))] + ["'" 'QUOTE] + ["`" 'QUASIQUOTE] + ["," 'UNQUOTE] + [",@" 'UNQUOTE-SPLICING] + ["." 'DOT] + [(eof) 'EOF])) + + (define get-string-token + (lexer + [(:~ #\" #\\) (cons (car (string->list lexeme)) + (get-string-token input-port))] + [(:: #\\ #\\) (cons #\\ (get-string-token input-port))] + [(:: #\\ #\") (cons #\" (get-string-token input-port))] + [#\" null])) + + + (define-lex-abbrevs + [letter (:or (:/ "a" "z") (:/ #\A #\Z))] + [digit (:/ #\0 #\9)] + [scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)] + [initial (:or letter (char-set "!$%&*/:<=>?^_~@"))] + [subsequent (:or initial digit (char-set "+-.@"))] + [comment (:: #\; (:* (:~ #\newline)) #\newline)] + + + ;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of + ;; using regexp macros to avoid the cut and paste. + ; [numR (:: prefixR complexR)] + ; [complexR (:or realR + ; (:: realR "@" realR) + ; (:: realR "+" urealR "i") + ; (:: realR "-" urealR "i") + ; (:: realR "+i") + ; (:: realR "-i") + ; (:: "+" urealR "i") + ; (:: "-" urealR "i") + ; (:: "+i") + ; (:: "-i"))] + ; [realR (:: sign urealR)] + ; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)] + ; [uintegerR (:: (:+ digitR) (:* #\#))] + ; [prefixR (:or (:: radixR exactness) + ; (:: exactness radixR))] + + [num2 (:: prefix2 complex2)] + [complex2 (:or real2 + (:: real2 "@" real2) + (:: real2 "+" ureal2 "i") + (:: real2 "-" ureal2 "i") + (:: real2 "+i") + (:: real2 "-i") + (:: "+" ureal2 "i") + (:: "-" ureal2 "i") + (:: "+i") + (:: "-i"))] + [real2 (:: sign ureal2)] + [ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))] + [uinteger2 (:: (:+ digit2) (:* #\#))] + [prefix2 (:or (:: radix2 exactness) + (:: exactness radix2))] + [radix2 "#b"] + [digit2 (:or "0" "1")] + [num8 (:: prefix8 complex8)] + [complex8 (:or real8 + (:: real8 "@" real8) + (:: real8 "+" ureal8 "i") + (:: real8 "-" ureal8 "i") + (:: real8 "+i") + (:: real8 "-i") + (:: "+" ureal8 "i") + (:: "-" ureal8 "i") + (:: "+i") + (:: "-i"))] + [real8 (:: sign ureal8)] + [ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))] + [uinteger8 (:: (:+ digit8) (:* #\#))] + [prefix8 (:or (:: radix8 exactness) + (:: exactness radix8))] + [radix8 "#o"] + [digit8 (:/ "0" "7")] + + [num10 (:: prefix10 complex10)] + [complex10 (:or real10 + (:: real10 "@" real10) + (:: real10 "+" ureal10 "i") + (:: real10 "-" ureal10 "i") + (:: real10 "+i") + (:: real10 "-i") + (:: "+" ureal10 "i") + (:: "-" ureal10 "i") + (:: "+i") + (:: "-i"))] + [real10 (:: sign ureal10)] + [ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)] + [uinteger10 (:: (:+ digit10) (:* #\#))] + [prefix10 (:or (:: radix10 exactness) + (:: exactness radix10))] + [radix10 (:? "#d")] + [digit10 digit] + [decimal10 (:or (:: uinteger10 suffix) + (:: #\. (:+ digit10) (:* #\#) suffix) + (:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix) + (:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))] + + [num16 (:: prefix16 complex16)] + [complex16 (:or real16 + (:: real16 "@" real16) + (:: real16 "+" ureal16 "i") + (:: real16 "-" ureal16 "i") + (:: real16 "+i") + (:: real16 "-i") + (:: "+" ureal16 "i") + (:: "-" ureal16 "i") + "+i" + "-i")] + [real16 (:: sign ureal16)] + [ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))] + [uinteger16 (:: (:+ digit16) (:* #\#))] + [prefix16 (:or (:: radix16 exactness) + (:: exactness radix16))] + [radix16 "#x"] + [digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))] + + + [suffix (:or "" (:: exponent-marker sign (:+ digit10)))] + [exponent-marker (:or "e" "s" "f" "d" "l")] + [sign (:or "" "+" "-")] + [exactness (:or "" "#i" "#e")]) + + + (define stx-for-original-property (read-syntax #f (open-input-string "original"))) + + ;; A macro to build the syntax object + (define-syntax (build-so stx) + (syntax-case stx () + ((_ value start end) + (with-syntax ((start-pos (datum->syntax-object + (syntax end) + (string->symbol + (format "$~a-start-pos" + (syntax-object->datum (syntax start)))))) + (end-pos (datum->syntax-object + (syntax end) + (string->symbol + (format "$~a-end-pos" + (syntax-object->datum (syntax end)))))) + (source (datum->syntax-object + (syntax end) + 'source-name))) + (syntax + (datum->syntax-object + #f + value + (list source + (position-line start-pos) + (position-col start-pos) + (position-offset start-pos) + (- (position-offset end-pos) + (position-offset start-pos))) + stx-for-original-property)))))) + + (define (scheme-parser source-name) + (parser + (src-pos) + + (start s) + (end EOF) + (error (lambda (a name val start end) + (raise-read-error + "read-error" + source-name + (position-line start) + (position-col start) + (position-offset start) + (- (position-offset end) + (position-offset start))))) + (tokens data delim) + + + (grammar + + (s [(sexp-list) (reverse $1)]) + + (sexp [(DATUM) (build-so $1 1 1)] + [(OP sexp-list CP) (build-so (reverse $2) 1 3)] + [(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)] + [(QUOTE sexp) (build-so (list 'quote $2) 1 2)] + [(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)] + [(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)] + [(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)] + [(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)]) + + (sexp-list [() null] + [(sexp-list sexp) (cons $2 $1)])))) + + (define (rs sn ip) + (port-count-lines! ip) + ((scheme-parser sn) (lambda () (scheme-lexer ip)))) + + (define readsyntax + (case-lambda ((sn) (rs sn (current-input-port))) + ((sn ip) (rs sn ip)))) + + (provide (rename readsyntax read-syntax)) + + ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/info.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/info.rkt new file mode 100644 index 0000000..ae66609 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/info.rkt @@ -0,0 +1,3 @@ +#lang info + +(define compile-omit-paths '("private-lex/error-tests.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt new file mode 100644 index 0000000..0cbb175 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt @@ -0,0 +1,24 @@ +(module lex-plt-v200 mzscheme + (require br-parser-tools/lex + (prefix : br-parser-tools/lex-sre)) + + (provide epsilon + ~ + (rename :* *) + (rename :+ +) + (rename :? ?) + (rename :or :) + (rename :& &) + (rename :: @) + (rename :~ ^) + (rename :/ -)) + + (define-lex-trans epsilon + (syntax-rules () + ((_) ""))) + + (define-lex-trans ~ + (syntax-rules () + ((_ re) (complement re))))) + + diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-sre.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-sre.rkt new file mode 100644 index 0000000..820d090 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-sre.rkt @@ -0,0 +1,119 @@ +(module lex-sre mzscheme + (require br-parser-tools/lex) + + (provide (rename sre-* *) + (rename sre-+ +) + ? + (rename sre-= =) + (rename sre->= >=) + ** + (rename sre-or or) + : + seq + & + ~ + (rename sre-- -) + (rename sre-/ /) + /-only-chars) + + (define-lex-trans sre-* + (syntax-rules () + ((_ re ...) + (repetition 0 +inf.0 (union re ...))))) + + (define-lex-trans sre-+ + (syntax-rules () + ((_ re ...) + (repetition 1 +inf.0 (union re ...))))) + + (define-lex-trans ? + (syntax-rules () + ((_ re ...) + (repetition 0 1 (union re ...))))) + + (define-lex-trans sre-= + (syntax-rules () + ((_ n re ...) + (repetition n n (union re ...))))) + + (define-lex-trans sre->= + (syntax-rules () + ((_ n re ...) + (repetition n +inf.0 (union re ...))))) + + (define-lex-trans ** + (syntax-rules () + ((_ low #f re ...) + (** low +inf.0 re ...)) + ((_ low high re ...) + (repetition low high (union re ...))))) + + (define-lex-trans sre-or + (syntax-rules () + ((_ re ...) + (union re ...)))) + + (define-lex-trans : + (syntax-rules () + ((_ re ...) + (concatenation re ...)))) + + (define-lex-trans seq + (syntax-rules () + ((_ re ...) + (concatenation re ...)))) + + (define-lex-trans & + (syntax-rules () + ((_ re ...) + (intersection re ...)))) + + (define-lex-trans ~ + (syntax-rules () + ((_ re ...) + (char-complement (union re ...))))) + + ;; set difference + (define-lex-trans (sre-- stx) + (syntax-case stx () + ((_) + (raise-syntax-error #f + "must have at least one argument" + stx)) + ((_ big-re re ...) + (syntax (& big-re (complement (union re ...))))))) + + (define-lex-trans (sre-/ stx) + (syntax-case stx () + ((_ range ...) + (let ((chars + (apply append (map (lambda (r) + (let ((x (syntax-e r))) + (cond + ((char? x) (list x)) + ((string? x) (string->list x)) + (else + (raise-syntax-error + #f + "not a char or string" + stx + r))))) + (syntax->list (syntax (range ...))))))) + (unless (even? (length chars)) + (raise-syntax-error + #f + "not given an even number of characters" + stx)) + #`(/-only-chars #,@chars))))) + + (define-lex-trans /-only-chars + (syntax-rules () + ((_ c1 c2) + (char-range c1 c2)) + ((_ c1 c2 c ...) + (union (char-range c1 c2) + (/-only-chars c ...))))) + + ) + + diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex.rkt new file mode 100644 index 0000000..c7202ab --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex.rkt @@ -0,0 +1,412 @@ +(module lex mzscheme + + ;; Provides the syntax used to create lexers and the functions needed to + ;; create and use the buffer that the lexer reads from. See docs. + + (require-for-syntax mzlib/list + syntax/stx + syntax/define + syntax/boundmap + "private-lex/util.rkt" + "private-lex/actions.rkt" + "private-lex/front.rkt" + "private-lex/unicode-chars.rkt") + + (require mzlib/stxparam + syntax/readerr + "private-lex/token.rkt") + + (provide lexer lexer-src-pos lexer-srcloc define-lex-abbrev define-lex-abbrevs define-lex-trans + + ;; Dealing with tokens and related structures + define-tokens define-empty-tokens token-name token-value token? + (struct position (offset line col)) + (struct position-token (token start-pos end-pos)) + (struct srcloc-token (token srcloc)) + + ;; File path for highlighting errors while lexing + file-path + lexer-file-path ;; alternate name + + ;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4. + any-char any-string nothing alphabetic lower-case upper-case title-case + numeric symbolic punctuation graphic whitespace blank iso-control + + ;; A regular expression operator + char-set) + + ;; wrap-action: syntax-object src-pos? -> syntax-object + (define-for-syntax (wrap-action action src-loc-style) + (with-syntax ((action-stx + (cond + [(eq? src-loc-style 'lexer-src-pos) + #`(let/ec ret + (syntax-parameterize + ([return-without-pos (make-rename-transformer #'ret)]) + (make-position-token #,action start-pos end-pos)))] + [(eq? src-loc-style 'lexer-srcloc) + #`(let/ec ret + (syntax-parameterize + ([return-without-srcloc (make-rename-transformer #'ret)]) + (make-srcloc-token #,action lexeme-srcloc)))] + [else action]))) + (syntax/loc action + (lambda (start-pos-p end-pos-p lexeme-p input-port-p) + (define lexeme-srcloc-p (make-srcloc (object-name input-port-p) + (position-line start-pos-p) + (position-col start-pos-p) + (position-offset start-pos-p) + (and (number? (position-offset end-pos-p)) + (number? (position-offset start-pos-p)) + (- (position-offset end-pos-p) + (position-offset start-pos-p))))) + (syntax-parameterize + ([start-pos (make-rename-transformer #'start-pos-p)] + [end-pos (make-rename-transformer #'end-pos-p)] + [lexeme (make-rename-transformer #'lexeme-p)] + [input-port (make-rename-transformer #'input-port-p)] + [lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)]) + action-stx))))) + + (define-for-syntax (make-lexer-trans src-loc-style) + (lambda (stx) + (syntax-case stx () + ((_ re-act ...) + (begin + (for-each + (lambda (x) + (syntax-case x () + ((re act) (void)) + (_ (raise-syntax-error #f + "not a regular expression / action pair" + stx + x)))) + (syntax->list (syntax (re-act ...)))) + (let* ((spec/re-act-lst + (syntax->list (syntax (re-act ...)))) + (eof-act + (get-special-action spec/re-act-lst #'eof #''eof)) + (spec-act + (get-special-action spec/re-act-lst #'special #'(void))) + (spec-comment-act + (get-special-action spec/re-act-lst #'special-comment #'#f)) + (ids (list #'special #'special-comment #'eof)) + (re-act-lst + (filter + (lambda (spec/re-act) + (syntax-case spec/re-act () + (((special) act) + (not (ormap + (lambda (x) + (and (identifier? #'special) + (module-or-top-identifier=? (syntax special) x))) + ids))) + (_ #t))) + spec/re-act-lst)) + (name-lst (map (lambda (x) (datum->syntax-object #f (gensym))) re-act-lst)) + (act-lst (map (lambda (x) (stx-car (stx-cdr x))) re-act-lst)) + (re-actname-lst (map (lambda (re-act name) + (list (stx-car re-act) + name)) + re-act-lst + name-lst))) + (when (null? spec/re-act-lst) + (raise-syntax-error (or src-loc-style 'lexer) "expected at least one action" stx)) + (let-values (((trans start action-names no-look disappeared-uses) + (build-lexer re-actname-lst))) + (when (vector-ref action-names start) ;; Start state is final + (unless (and + ;; All the successor states are final + (andmap (lambda (x) (vector-ref action-names (vector-ref x 2))) + (vector->list (vector-ref trans start))) + ;; Each character has a successor state + (let loop ((check 0) + (nexts (vector->list (vector-ref trans start)))) + (cond + ((null? nexts) #f) + (else + (let ((next (car nexts))) + (and (= (vector-ref next 0) check) + (let ((next-check (vector-ref next 1))) + (or (>= next-check max-char-num) + (loop (add1 next-check) (cdr nexts)))))))))) + (eprintf "Warning: lexer at ~a can accept the empty string.\n" stx))) + (with-syntax ((start-state-stx start) + (trans-table-stx trans) + (no-lookahead-stx no-look) + ((name ...) name-lst) + ((act ...) (map (lambda (a) + (wrap-action a src-loc-style)) + act-lst)) + ((act-name ...) (vector->list action-names)) + (spec-act-stx + (wrap-action spec-act src-loc-style)) + (has-comment-act?-stx + (if (syntax-e spec-comment-act) #t #f)) + (spec-comment-act-stx + (wrap-action spec-comment-act src-loc-style)) + (eof-act-stx (wrap-action eof-act src-loc-style))) + (syntax-property + (syntax/loc stx + (let ([name act] ...) + (let ([proc + (lexer-body start-state-stx + trans-table-stx + (vector act-name ...) + no-lookahead-stx + spec-act-stx + has-comment-act?-stx + spec-comment-act-stx + eof-act-stx)]) + ;; reverse eta to get named procedures: + (lambda (port) (proc port))))) + 'disappeared-use + disappeared-uses))))))))) + + (define-syntax lexer (make-lexer-trans #f)) + (define-syntax lexer-src-pos (make-lexer-trans 'lexer-src-pos)) + (define-syntax lexer-srcloc (make-lexer-trans 'lexer-srcloc)) + + (define-syntax (define-lex-abbrev stx) + (syntax-case stx () + ((_ name re) + (identifier? (syntax name)) + (syntax/loc stx + (define-syntax name + (make-lex-abbrev (lambda () (quote-syntax re)))))) + (_ + (raise-syntax-error + #f + "form should be (define-lex-abbrev name re)" + stx)))) + + (define-syntax (define-lex-abbrevs stx) + (syntax-case stx () + ((_ x ...) + (with-syntax (((abbrev ...) + (map + (lambda (a) + (syntax-case a () + ((name re) + (identifier? (syntax name)) + (syntax/loc a (define-lex-abbrev name re))) + (_ (raise-syntax-error + #f + "form should be (define-lex-abbrevs (name re) ...)" + stx + a)))) + (syntax->list (syntax (x ...)))))) + (syntax/loc stx (begin abbrev ...)))) + (_ + (raise-syntax-error + #f + "form should be (define-lex-abbrevs (name re) ...)" + stx)))) + + (define-syntax (define-lex-trans stx) + (syntax-case stx () + ((_ name-form body-form) + (let-values (((name body) + (normalize-definition (syntax (define-syntax name-form body-form)) #'lambda))) + + #`(define-syntax #,name + (let ((func #,body)) + (unless (procedure? func) + (raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func)) + (unless (procedure-arity-includes? func 1) + (raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func)) + (make-lex-trans func))))) + (_ + (raise-syntax-error + #f + "form should be (define-lex-trans name transformer)" + stx)))) + + + (define (get-next-state-helper char min max table) + (if (>= min max) + #f + (let* ((try (quotient (+ min max) 2)) + (el (vector-ref table try)) + (r1 (vector-ref el 0)) + (r2 (vector-ref el 1))) + (cond + ((and (>= char r1) (<= char r2)) (vector-ref el 2)) + ((< char r1) (get-next-state-helper char min try table)) + (else (get-next-state-helper char (add1 try) max table)))))) + + + + + (define (get-next-state char table) + (if table + (get-next-state-helper char 0 (vector-length table) table) + #f)) + + (define (lexer-body start-state trans-table actions no-lookahead special-action + has-special-comment-action? special-comment-action eof-action) + (letrec ((lexer + (lambda (ip) + (let ((first-pos (get-position ip)) + (first-char (peek-char-or-special ip 0))) + ;(printf "(peek-char-or-special port 0) = ~e\n" first-char) + (cond + ((eof-object? first-char) + (do-match ip first-pos eof-action (read-char-or-special ip))) + ((special-comment? first-char) + (read-char-or-special ip) + (cond + (has-special-comment-action? + (do-match ip first-pos special-comment-action #f)) + (else (lexer ip)))) + ((not (char? first-char)) + (do-match ip first-pos special-action (read-char-or-special ip))) + (else + (let lexer-loop ( + ;; current-state + (state start-state) + ;; the character to transition on + (char first-char) + ;; action for the longest match seen thus far + ;; including a match at the current state + (longest-match-action + (vector-ref actions start-state)) + ;; how many bytes precede char + (length-bytes 0) + ;; how many characters have been read + ;; including the one just read + (length-chars 1) + ;; how many characters are in the longest match + (longest-match-length 0)) + (let ((next-state + (cond + ((not (char? char)) #f) + (else (get-next-state (char->integer char) + (vector-ref trans-table state)))))) + (cond + ((not next-state) + (check-match ip first-pos longest-match-length + length-chars longest-match-action)) + ((vector-ref no-lookahead next-state) + (let ((act (vector-ref actions next-state))) + (check-match ip + first-pos + (if act length-chars longest-match-length) + length-chars + (if act act longest-match-action)))) + (else + (let* ((act (vector-ref actions next-state)) + (next-length-bytes (+ (char-utf-8-length char) length-bytes)) + (next-char (peek-char-or-special ip next-length-bytes))) + #;(printf "(peek-char-or-special port ~e) = ~e\n" + next-length-bytes next-char) + (lexer-loop next-state + next-char + (if act + act + longest-match-action) + next-length-bytes + (add1 length-chars) + (if act + length-chars + longest-match-length))))))))))))) + (lambda (ip) + (unless (input-port? ip) + (raise-argument-error + 'lexer + "input-port?" + 0 + ip)) + (lexer ip)))) + + (define (check-match lb first-pos longest-match-length length longest-match-action) + (unless longest-match-action + (let* ((match (read-string length lb)) + (end-pos (get-position lb))) + (raise-read-error + (format "lexer: No match found in input starting with: ~a" match) + (file-path) + (position-line first-pos) + (position-col first-pos) + (position-offset first-pos) + (- (position-offset end-pos) (position-offset first-pos))))) + (let ((match (read-string longest-match-length lb))) + ;(printf "(read-string ~e port) = ~e\n" longest-match-length match) + (do-match lb first-pos longest-match-action match))) + + (define file-path (make-parameter #f)) + (define lexer-file-path file-path) + + (define (do-match ip first-pos action value) + #;(printf "(action ~a ~a ~a ~a)\n" + (position-offset first-pos) (position-offset (get-position ip)) value ip) + (action first-pos (get-position ip) value ip)) + + (define (get-position ip) + (let-values (((line col off) (port-next-location ip))) + (make-position off line col))) + + (define-syntax (create-unicode-abbrevs stx) + (syntax-case stx () + ((_ ctxt) + (with-syntax (((ranges ...) (map (lambda (range) + `(union ,@(map (lambda (x) + `(char-range ,(integer->char (car x)) + ,(integer->char (cdr x)))) + range))) + (list (force alphabetic-ranges) + (force lower-case-ranges) + (force upper-case-ranges) + (force title-case-ranges) + (force numeric-ranges) + (force symbolic-ranges) + (force punctuation-ranges) + (force graphic-ranges) + (force whitespace-ranges) + (force blank-ranges) + (force iso-control-ranges)))) + ((names ...) (map (lambda (sym) + (datum->syntax-object (syntax ctxt) sym #f)) + '(alphabetic + lower-case + upper-case + title-case + numeric + symbolic + punctuation + graphic + whitespace + blank + iso-control)))) + (syntax (define-lex-abbrevs (names ranges) ...)))))) + + (define-lex-abbrev any-char (char-complement (union))) + (define-lex-abbrev any-string (intersection)) + (define-lex-abbrev nothing (union)) + (create-unicode-abbrevs #'here) + + (define-lex-trans (char-set stx) + (syntax-case stx () + ((_ str) + (string? (syntax-e (syntax str))) + (with-syntax (((char ...) (string->list (syntax-e (syntax str))))) + (syntax (union char ...)))))) + + (define-syntax provide-lex-keyword + (syntax-rules () + [(_ id ...) + (begin + (define-syntax-parameter id + (make-set!-transformer + (lambda (stx) + (raise-syntax-error + #f + (format "use of a lexer keyword (~a) is not in an appropriate lexer action" + 'id) + stx)))) + ... + (provide id ...))])) + + (provide-lex-keyword start-pos end-pos lexeme lexeme-srcloc input-port return-without-pos return-without-srcloc) + + ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt new file mode 100644 index 0000000..6ec0c7f --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt @@ -0,0 +1,16 @@ +#lang scheme/base + +(provide (all-defined-out)) +(require syntax/stx) + +;; get-special-action: (syntax-object list) syntax-object syntax-object -> syntax-object +;; Returns the first action from a rule of the form ((which-special) action) +(define (get-special-action rules which-special none) + (cond + ((null? rules) none) + (else + (syntax-case (car rules) () + (((special) act) + (and (identifier? #'special) (module-or-top-identifier=? (syntax special) which-special)) + (syntax act)) + (_ (get-special-action (cdr rules) which-special none)))))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/actions_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/actions_rkt.dep new file mode 100644 index 0000000..367d875 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/actions_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("f095138d7dfc29ae18c12d7daf44f1c9b74ab56d" . "f0c7dd306804eb5e8da06235651b07296b23b36d") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt") (collects #"syntax" #"stx.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/actions_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/actions_rkt.zo new file mode 100644 index 0000000..5ab0ff7 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/actions_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/deriv_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/deriv_rkt.dep new file mode 100644 index 0000000..04c00b6 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/deriv_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("1edbe8de6053f0e8735c4d7777b345672d547c86" . "980e8a36193a9253ed01c61fc421729123f6b314") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/deriv_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/deriv_rkt.zo new file mode 100644 index 0000000..4c6615b Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/deriv_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/actions_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/actions_rkt.dep new file mode 100644 index 0000000..367d875 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/actions_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("f095138d7dfc29ae18c12d7daf44f1c9b74ab56d" . "f0c7dd306804eb5e8da06235651b07296b23b36d") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt") (collects #"syntax" #"stx.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/actions_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/actions_rkt.zo new file mode 100644 index 0000000..cfb5947 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/actions_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/deriv_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/deriv_rkt.dep new file mode 100644 index 0000000..b10b81a --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/deriv_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("1edbe8de6053f0e8735c4d7777b345672d547c86" . "44623f82f80a88e2fe5683fec412332baf6a8ed3") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/deriv_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/deriv_rkt.zo new file mode 100644 index 0000000..0630f6a Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/deriv_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/actions_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/actions_rkt.dep new file mode 100644 index 0000000..4ac48c3 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/actions_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("f095138d7dfc29ae18c12d7daf44f1c9b74ab56d" . "4c8c1a7dddb28b104057c25eaf86b1a95e60048c") (collects #"errortrace" #"errortrace-key.rkt") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt") (collects #"syntax" #"stx.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/actions_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/actions_rkt.zo new file mode 100644 index 0000000..88699d5 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/actions_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/deriv_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/deriv_rkt.dep new file mode 100644 index 0000000..b64bcd7 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/deriv_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("1edbe8de6053f0e8735c4d7777b345672d547c86" . "02238d6deec4e15d91f05091ef67f522127eb47b") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/deriv_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/deriv_rkt.zo new file mode 100644 index 0000000..f781864 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/deriv_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/front_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/front_rkt.dep new file mode 100644 index 0000000..bf19f0d --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/front_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("560290907963aeb1a4cc1e997b45b3113174dcc2" . "8b3fb56340e1383a464e9e09b878c80c8588c8db") (collects #"br-parser-tools" #"private-lex" #"deriv.rkt") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"stx.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"stx.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/front_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/front_rkt.zo new file mode 100644 index 0000000..4faf4fc Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/front_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/re_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/re_rkt.dep new file mode 100644 index 0000000..53dec6e --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/re_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c35e7c257a2700b22560bd1ef8fa4ec0d85440cf" . "b1551c6be564899f5570915abe65bd6754d6ee02") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"scheme" #"match.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/re_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/re_rkt.zo new file mode 100644 index 0000000..5ec67ce Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/re_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/stx_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/stx_rkt.dep new file mode 100644 index 0000000..6340545 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/stx_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c716d0316e7115d4b50b04c3f923b22c43068fb2" . "de91880596de9c821e9e2c4828b024d42f6a199c") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (collects #"syntax" #"id-table.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/stx_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/stx_rkt.zo new file mode 100644 index 0000000..7101f6f Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/stx_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token-syntax_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token-syntax_rkt.dep new file mode 100644 index 0000000..e4e71b9 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token-syntax_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("15ddc4ae61da4856338c06fe3f3aca89043495ec" . "1bd4c8735758355b04c6e172aa61084639448a7c") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token-syntax_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token-syntax_rkt.zo new file mode 100644 index 0000000..7a7215d Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token-syntax_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token_rkt.dep new file mode 100644 index 0000000..7760015 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c47b01af4f27383fafb3cea5d0e14b2b92da094b" . "9988c5c353bec4edf7eafa42e08d630b02a9328a") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token_rkt.zo new file mode 100644 index 0000000..b43bcb8 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/token_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/unicode-chars_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/unicode-chars_rkt.dep new file mode 100644 index 0000000..9757e03 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/unicode-chars_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("11fb7e214dfb048796bfcbca53932424123137a2" . "b3606fa90704340e1a962ec5048ec3b01f440e06") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/unicode-chars_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/unicode-chars_rkt.zo new file mode 100644 index 0000000..66780eb Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/unicode-chars_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/util_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/util_rkt.dep new file mode 100644 index 0000000..110e76d --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/util_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("54744a008c8ffd6704fc954ca59351a6f19774a4" . "5b3e3294fee47f7377adc85735ab90b85321865e") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/util_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/util_rkt.zo new file mode 100644 index 0000000..43ec79d Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/errortrace/util_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/front_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/front_rkt.dep new file mode 100644 index 0000000..061ef75 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/front_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("560290907963aeb1a4cc1e997b45b3113174dcc2" . "53517572cdb75487afeb2fff3000f8f73fc998aa") (collects #"br-parser-tools" #"private-lex" #"deriv.rkt") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"stx.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"stx.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/front_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/front_rkt.zo new file mode 100644 index 0000000..381dd99 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/front_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/re_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/re_rkt.dep new file mode 100644 index 0000000..188b6a8 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/re_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c35e7c257a2700b22560bd1ef8fa4ec0d85440cf" . "b2bb34be5f7b01ff900aa10ee415738b2a19b0a0") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"scheme" #"match.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/re_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/re_rkt.zo new file mode 100644 index 0000000..6a38ffb Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/re_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/stx_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/stx_rkt.dep new file mode 100644 index 0000000..60063db --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/stx_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c716d0316e7115d4b50b04c3f923b22c43068fb2" . "b47d873be742873e79b0099de8845e32143ab4d5") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (collects #"syntax" #"id-table.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/stx_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/stx_rkt.zo new file mode 100644 index 0000000..ddab7d1 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/stx_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token-syntax_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token-syntax_rkt.dep new file mode 100644 index 0000000..f44cce8 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token-syntax_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("15ddc4ae61da4856338c06fe3f3aca89043495ec" . "46a0cffdba2f82aa32f0fd32a5783621a6a78323") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token-syntax_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token-syntax_rkt.zo new file mode 100644 index 0000000..2d3be5b Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token-syntax_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token_rkt.dep new file mode 100644 index 0000000..ad043d4 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c47b01af4f27383fafb3cea5d0e14b2b92da094b" . "5a9e9888caac8c6df5e32eab1eb77a9522bb1097") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token_rkt.zo new file mode 100644 index 0000000..334c9f3 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/token_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/unicode-chars_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/unicode-chars_rkt.dep new file mode 100644 index 0000000..295fe56 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/unicode-chars_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("11fb7e214dfb048796bfcbca53932424123137a2" . "d1e63a1cc4d040c9bbc9481d0d8c6d890f9d2952") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/unicode-chars_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/unicode-chars_rkt.zo new file mode 100644 index 0000000..5b8e8af Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/unicode-chars_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/util_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/util_rkt.dep new file mode 100644 index 0000000..b713025 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/util_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("54744a008c8ffd6704fc954ca59351a6f19774a4" . "04b200a056cd736ad4bd3d7221f381755cf94607") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/util_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/util_rkt.zo new file mode 100644 index 0000000..0ec59f0 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/drracket/util_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/front_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/front_rkt.dep new file mode 100644 index 0000000..9138969 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/front_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("560290907963aeb1a4cc1e997b45b3113174dcc2" . "11a0fde414e54b53f7c8a690972c091191515617") (collects #"br-parser-tools" #"private-lex" #"deriv.rkt") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"stx.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"stx.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/front_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/front_rkt.zo new file mode 100644 index 0000000..0d2fcac Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/front_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/re_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/re_rkt.dep new file mode 100644 index 0000000..d84f1fb --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/re_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c35e7c257a2700b22560bd1ef8fa4ec0d85440cf" . "8309136e8195b423615deebdf156b1cf77168dfe") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"scheme" #"match.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/re_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/re_rkt.zo new file mode 100644 index 0000000..14b5cd7 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/re_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/stx_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/stx_rkt.dep new file mode 100644 index 0000000..44a4093 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/stx_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c716d0316e7115d4b50b04c3f923b22c43068fb2" . "01788f46755b7f7a3e2f5a04be22246525bde728") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (collects #"syntax" #"id-table.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/stx_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/stx_rkt.zo new file mode 100644 index 0000000..8995e2e Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/stx_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token-syntax_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token-syntax_rkt.dep new file mode 100644 index 0000000..f44cce8 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token-syntax_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("15ddc4ae61da4856338c06fe3f3aca89043495ec" . "46a0cffdba2f82aa32f0fd32a5783621a6a78323") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token-syntax_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token-syntax_rkt.zo new file mode 100644 index 0000000..2d3be5b Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token-syntax_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token_rkt.dep new file mode 100644 index 0000000..ad043d4 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c47b01af4f27383fafb3cea5d0e14b2b92da094b" . "5a9e9888caac8c6df5e32eab1eb77a9522bb1097") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token_rkt.zo new file mode 100644 index 0000000..e9ba2d5 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/token_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/unicode-chars_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/unicode-chars_rkt.dep new file mode 100644 index 0000000..ec11321 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/unicode-chars_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("11fb7e214dfb048796bfcbca53932424123137a2" . "c85260e8066b06a6c7e3ad05f21848a935912ffc") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/unicode-chars_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/unicode-chars_rkt.zo new file mode 100644 index 0000000..c73316f Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/unicode-chars_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/util_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/util_rkt.dep new file mode 100644 index 0000000..b713025 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/util_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("54744a008c8ffd6704fc954ca59351a6f19774a4" . "04b200a056cd736ad4bd3d7221f381755cf94607") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/util_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/util_rkt.zo new file mode 100644 index 0000000..ffc684d Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/compiled/util_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt new file mode 100644 index 0000000..28919a3 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt @@ -0,0 +1,339 @@ +(module deriv mzscheme + + (require mzlib/list + (prefix is: mzlib/integer-set) + "re.rkt" + "util.rkt") + + (provide build-dfa print-dfa (struct dfa (num-states start-state final-states/actions transitions))) + + (define e (build-epsilon)) + (define z (build-zero)) + + + ;; Don't do anything with this one but extract the chars + (define all-chars (->re `(char-complement (union)) (make-cache))) + + ;; get-char-groups : re bool -> (list-of char-setR?) + ;; Collects the char-setRs in r that could be used in + ;; taking the derivative of r. + (define (get-char-groups r found-negation) + (cond + ((or (eq? r e) (eq? r z)) null) + ((char-setR? r) (list r)) + ((concatR? r) + (if (re-nullable? (concatR-re1 r)) + (append (get-char-groups (concatR-re1 r) found-negation) + (get-char-groups (concatR-re2 r) found-negation)) + (get-char-groups (concatR-re1 r) found-negation))) + ((repeatR? r) + (get-char-groups (repeatR-re r) found-negation)) + ((orR? r) + (apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r)))) + ((andR? r) + (apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r)))) + ((negR? r) + (if found-negation + (get-char-groups (negR-re r) #t) + (cons all-chars (get-char-groups (negR-re r) #t)))))) + + (test-block ((c (make-cache)) + (r1 (->re #\1 c)) + (r2 (->re #\2 c))) + ((get-char-groups e #f) null) + ((get-char-groups z #f) null) + ((get-char-groups r1 #f) (list r1)) + ((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f) + (list r1)) + ((get-char-groups (->re `(concatenation ,e ,r2) c) #f) + (list r2)) + ((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f) + (list r1 r2)) + ((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f) + (list r1)) + ((get-char-groups + (->re `(union (repetition 0 +inf.0 ,r1) + (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) + (list r1 r2 (->re "3" c) (->re "4" c))) + ((get-char-groups (->re `(complement ,r1) c) #f) + (list all-chars r1)) + ((get-char-groups + (->re `(intersection (repetition 0 +inf.0 ,r1) + (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) + (list r1 r2 (->re "3" c) (->re "4" c))) + ) + (define loc:member? is:member?) + + ;; deriveR : re char cache -> re + (define (deriveR r c cache) + (cond + ((or (eq? r e) (eq? r z)) z) + ((char-setR? r) + (if (loc:member? c (char-setR-chars r)) e z)) + ((concatR? r) + (let* ((r1 (concatR-re1 r)) + (r2 (concatR-re2 r)) + (d (build-concat (deriveR r1 c cache) r2 cache))) + (if (re-nullable? r1) + (build-or (list d (deriveR r2 c cache)) cache) + d))) + ((repeatR? r) + (build-concat (deriveR (repeatR-re r) c cache) + (build-repeat (sub1 (repeatR-low r)) + (sub1 (repeatR-high r)) + (repeatR-re r) cache) + cache)) + ((orR? r) + (build-or (map (lambda (x) (deriveR x c cache)) + (orR-res r)) + cache)) + ((andR? r) + (build-and (map (lambda (x) (deriveR x c cache)) + (andR-res r)) + cache)) + ((negR? r) + (build-neg (deriveR (negR-re r) c cache) cache)))) + + (test-block ((c (make-cache)) + (a (char->integer #\a)) + (b (char->integer #\b)) + (r1 (->re #\a c)) + (r2 (->re `(repetition 0 +inf.0 #\a) c)) + (r3 (->re `(repetition 0 +inf.0 ,r2) c)) + (r4 (->re `(concatenation #\a ,r2) c)) + (r5 (->re `(repetition 0 +inf.0 ,r4) c)) + (r6 (->re `(union ,r5 #\a) c)) + (r7 (->re `(concatenation ,r2 ,r2) c)) + (r8 (->re `(complement ,r4) c)) + (r9 (->re `(intersection ,r2 ,r4) c))) + ((deriveR e a c) z) + ((deriveR z a c) z) + ((deriveR r1 b c) z) + ((deriveR r1 a c) e) + ((deriveR r2 a c) r2) + ((deriveR r2 b c) z) + ((deriveR r3 a c) r2) + ((deriveR r3 b c) z) + ((deriveR r4 a c) r2) + ((deriveR r4 b c) z) + ((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c)) + ((deriveR r5 b c) z) + ((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c)) + ((deriveR r6 b c) z) + ((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c)) + ((deriveR r7 b c) z) + ((deriveR r8 a c) (->re `(complement, r2) c)) + ((deriveR r8 b c) (->re `(complement ,z) c)) + ((deriveR r9 a c) r2) + ((deriveR r9 b c) z) + ((deriveR (->re `(repetition 1 2 "ab") c) a c) + (->re `(concatenation "b" (repetition 0 1 "ab")) c))) + + ;; An re-action is (cons re action) + + ;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f) + ;; applies deriveR to all the re-actions's re parts. + ;; Returns #f if the derived state is equivalent to z. + (define (derive r c cache) + (let ((new-r (map (lambda (ra) + (cons (deriveR (car ra) c cache) (cdr ra))) + r))) + (if (andmap (lambda (x) (eq? z (car x))) + new-r) + #f + new-r))) + + (test-block ((c (make-cache)) + (r1 (->re #\1 c)) + (r2 (->re #\2 c))) + ((derive null (char->integer #\1) c) #f) + ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c) + (list (cons e 1) (cons z 2))) + ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f)) + + + ;; get-final : (list-of re-action) -> (union #f syntax-object) + ;; An re that accepts e represents a final state. Return the + ;; action from the first final state or #f if there is none. + (define (get-final res) + (cond + ((null? res) #f) + ((re-nullable? (caar res)) (cdar res)) + (else (get-final (cdr res))))) + + (test-block ((c->i char->integer) + (c (make-cache)) + (r1 (->re #\a c)) + (r2 (->re #\b c)) + (b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5))) + (a (list (cons r1 1) (cons r2 2)))) + ((derive null (c->i #\a) c) #f) + ((derive a (c->i #\a) c) (list (cons e 1) (cons z 2))) + ((derive a (c->i #\b) c) (list (cons z 1) (cons e 2))) + ((derive a (c->i #\c) c) #f) + ((derive (list (cons (->re `(union " " "\n" ",") c) 1) + (cons (->re `(concatenation (repetition 0 1 "-") + (repetition 1 +inf.0 (char-range "0" "9"))) c) 2) + (cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3) + (cons (->re "[" c) 4) + (cons (->re "]" c) 5)) (c->i #\[) c) + b) + ((get-final a) #f) + ((get-final (list (cons e 1) (cons e 2))) 1) + ((get-final b) 4)) + + + ;; A state is (make-state (list-of re-action) nat) + (define-struct state (spec index)) + + ;; get->key : re-action -> (list-of nat) + ;; states are indexed by the list of indexes of their res + (define (get-key s) + (map (lambda (x) (re-index (car x))) s)) + + (define loc:partition is:partition) + + ;; compute-chars : (list-of state) -> (list-of char-set) + ;; Computed the sets of equivalent characters for taking the + ;; derivative of the car of st. Only one derivative per set need to be taken. + (define (compute-chars st) + (cond + ((null? st) null) + (else + (loc:partition (map char-setR-chars + (apply append (map (lambda (x) (get-char-groups (car x) #f)) + (state-spec (car st))))))))) + + (test-block ((c (make-cache)) + (c->i char->integer) + (r1 (->re `(char-range #\1 #\4) c)) + (r2 (->re `(char-range #\2 #\3) c))) + ((compute-chars null) null) + ((compute-chars (list (make-state null 1))) null) + ((map is:integer-set-contents + (compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2)))) + (list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3))) + (is:integer-set-contents (is:union (is:make-range (c->i #\1)) + (is:make-range (c->i #\4))))))) + + + ;; A dfa is (make-dfa int int + ;; (list-of (cons int syntax-object)) + ;; (list-of (cons int (list-of (cons char-set int))))) + ;; Each transitions is a state and a list of chars with the state to transition to. + ;; The finals and transitions are sorted by state number, and duplicate free. + (define-struct dfa (num-states start-state final-states/actions transitions) (make-inspector)) + + (define loc:get-integer is:get-integer) + + ;; build-dfa : (list-of re-action) cache -> dfa + (define (build-dfa rs cache) + (let* ((transitions (make-hash-table)) + (get-state-number (make-counter)) + (start (make-state rs (get-state-number)))) + (cache (cons 'state (get-key rs)) (lambda () start)) + (let loop ((old-states (list start)) + (new-states null) + (all-states (list start)) + (cs (compute-chars (list start)))) + (cond + ((and (null? old-states) (null? new-states)) + (make-dfa (get-state-number) (state-index start) + (sort (filter (lambda (x) (cdr x)) + (map (lambda (state) + (cons (state-index state) (get-final (state-spec state)))) + all-states)) + (lambda (a b) (< (car a) (car b)))) + (sort (hash-table-map transitions + (lambda (state trans) + (cons (state-index state) + (map (lambda (t) + (cons (car t) + (state-index (cdr t)))) + trans)))) + (lambda (a b) (< (car a) (car b)))))) + ((null? old-states) + (loop new-states null all-states (compute-chars new-states))) + ((null? cs) + (loop (cdr old-states) new-states all-states (compute-chars (cdr old-states)))) + (else + (let* ((state (car old-states)) + (c (car cs)) + (new-re (derive (state-spec state) (loc:get-integer c) cache))) + (cond + (new-re + (let* ((new-state? #f) + (new-state (cache (cons 'state (get-key new-re)) + (lambda () + (set! new-state? #t) + (make-state new-re (get-state-number))))) + (new-all-states (if new-state? (cons new-state all-states) all-states))) + (hash-table-put! transitions + state + (cons (cons c new-state) + (hash-table-get transitions state + (lambda () null)))) + (cond + (new-state? + (loop old-states (cons new-state new-states) new-all-states (cdr cs))) + (else + (loop old-states new-states new-all-states (cdr cs)))))) + (else (loop old-states new-states all-states (cdr cs)))))))))) + + (define (print-dfa x) + (printf "number of states: ~a\n" (dfa-num-states x)) + (printf "start state: ~a\n" (dfa-start-state x)) + (printf "final states: ~a\n" (map car (dfa-final-states/actions x))) + (for-each (lambda (trans) + (printf "state: ~a\n" (car trans)) + (for-each (lambda (rule) + (printf " -~a-> ~a\n" + (is:integer-set-contents (car rule)) + (cdr rule))) + (cdr trans))) + (dfa-transitions x))) + + (define (build-test-dfa rs) + (let ((c (make-cache))) + (build-dfa (map (lambda (x) (cons (->re x c) 'action)) + rs) + c))) + + +#| + (define t1 (build-test-dfa null)) + (define t2 (build-test-dfa `(#\a))) + (define t3 (build-test-dfa `(#\a #\b))) + (define t4 (build-test-dfa `((repetition 0 +inf.0 #\a) + (repetition 0 +inf.0 (concatenation #\a #\b))))) + (define t5 (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\0 #\1)) #\1)))) + (define t6 (build-test-dfa `((repetition 0 +inf.0 (repetition 0 +inf.0 #\a)) + (repetition 0 +inf.0 (concatenation #\b (repetition 1 +inf.0 #\b)))))) + (define t7 (build-test-dfa `((concatenation (repetition 0 +inf.0 #\a) (repetition 0 +inf.0 #\b) + (repetition 0 +inf.0 #\c) (repetition 0 +inf.0 #\d) + (repetition 0 +inf.0 #\e))))) + (define t8 + (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b) + (union #\a #\b) (union #\a #\b) (union #\a #\b))))) + (define t9 (build-test-dfa `((concatenation "/*" + (complement (concatenation (intersection) "*/" (intersection))) + "*/")))) + (define t11 (build-test-dfa `((complement "1")))) + (define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b") + (concatenation "a" (repetition 0 +inf.0 "b"))) + "ab")))) + (define x (build-test-dfa `((union " " "\n" ",") + (concatenation (repetition 0 1 "-") (repetition 1 +inf.0 (char-range "0" "9"))) + (concatenation "-" (repetition 1 +inf.0 "-")) + "[" + "]"))) + (define y (build-test-dfa + `((repetition 1 +inf.0 + (union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|") + (concatenation "|" (repetition 0 +inf.0 (char-complement "|")))))))) + (define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection)) + (complement (union (concatenation (intersection) "01") + (repetition 1 +inf.0 "1"))))))) + (define t14 (build-test-dfa `((complement "1")))) +|# + ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt new file mode 100644 index 0000000..bbccbe0 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt @@ -0,0 +1,81 @@ +#lang scheme/base +(require (for-syntax scheme/base) + "../lex.rkt" + rackunit) + +(define-syntax (catch-syn-error stx) + (syntax-case stx () + [(_ arg) + (datum->syntax + #'here + (with-handlers ((exn:fail:syntax? exn-message)) + (syntax-local-expand-expression #'arg) + "not-an-error"))])) + +(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev))) +(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev a))) +(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev (a b) v))) +(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev 1 1))) +(check-regexp-match #rx"lex-abbrevs" (catch-syn-error (define-lex-abbrevs ()))) + +(check-regexp-match #rx"lex-trans" (catch-syn-error (define-lex-trans))) + +(check-regexp-match #rx"lexer" (catch-syn-error (lexer))) +(check-regexp-match #rx"lexer" (catch-syn-error (lexer ("a" "b" "c")))) +(check-regexp-match #rx"lexer" (catch-syn-error (lexer ()))) +(check-regexp-match #rx"lexer" (catch-syn-error (lexer ("")))) + +(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (a 1)))) +(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer ((a) 1)))) +(check-regexp-match #rx"regular-expression" (catch-syn-error (let ((a 1)) (lexer ((a) 1))))) + +(check-regexp-match #rx"regular-expression" + (catch-syn-error (let-syntax ((a 1)) + (lexer ((a) 1))))) + +(check-regexp-match #rx"define-lex-trans" + (catch-syn-error + (let () + (define-lex-trans a 1) + (let () + (lexer ((a) 1)))))) + +;; Detecting mutual recursion cycle: +(check-regexp-match #rx"regular-expression" + (catch-syn-error + (let () + (define-lex-abbrev a b) + (define-lex-abbrev b a) + (let () + (lexer (a 1)))))) + +(check-regexp-match #rx"regular-expression" + (catch-syn-error + (let () + (define-lex-abbrev a (repetition 0 1 b)) + (define-lex-abbrev b (repetition 0 1 a)) + (let () + (lexer (a 1)))))) + +;; Detecting cycle within same abbreviation: +(check-regexp-match #rx"regular-expression" + (catch-syn-error + (let () + (define-lex-abbrev balanced + (union (concatenation "(" balanced ")" balanced) + any-char)) + (lexer + [balanced (string-append lexeme (balanced input-port))] + [(eof) ""])))) + + +(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (1 1)))) +(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition) 1)))) +(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition #\1 #\1 "3") 1)))) +(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 #\1 "3") 1)))) +(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 0 "3") 1)))) +(check-regexp-match #rx"complement" (catch-syn-error (lexer ((complement) 1)))) +(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range) 1)))) +(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range #\9 #\0) 1)))) +(check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement) 1)))) +(check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement (concatenation "1" "2")) 1)))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt new file mode 100644 index 0000000..f74c003 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt @@ -0,0 +1,179 @@ +(module front mzscheme + (require (prefix is: mzlib/integer-set) + mzlib/list + syntax/stx + "util.rkt" + "stx.rkt" + "re.rkt" + "deriv.rkt") + + (provide build-lexer) + + (define-syntax time-label + (syntax-rules () + ((_ l e ...) + (begin + (printf "~a: " l) + (time (begin e ...)))))) + + ;; A table is either + ;; - (vector-of (union #f nat)) + ;; - (vector-of (vector-of (vector nat nat nat))) + + (define loc:integer-set-contents is:integer-set-contents) + + ;; dfa->1d-table : dfa -> (same as build-lexer) + (define (dfa->1d-table dfa) + (let ((state-table (make-vector (dfa-num-states dfa) #f)) + (transition-cache (make-hash-table 'equal))) + (for-each + (lambda (trans) + (let* ((from-state (car trans)) + (all-chars/to (cdr trans)) + (flat-all-chars/to + (sort + (apply append + (map (lambda (chars/to) + (let ((char-ranges (loc:integer-set-contents (car chars/to))) + (to (cdr chars/to))) + (map (lambda (char-range) + (let ((entry (vector (car char-range) (cdr char-range) to))) + (hash-table-get transition-cache entry + (lambda () + (hash-table-put! transition-cache + entry + entry) + entry)))) + char-ranges))) + all-chars/to)) + (lambda (a b) + (< (vector-ref a 0) (vector-ref b 0)))))) + (vector-set! state-table from-state (list->vector flat-all-chars/to)))) + (dfa-transitions dfa)) + state-table)) + + + (define loc:foldr is:foldr) + + ;; dfa->2d-table : dfa -> (same as build-lexer) + (define (dfa->2d-table dfa) + (let ( + ;; char-table : (vector-of (union #f nat)) + ;; The lexer table, one entry per state per char. + ;; Each entry specifies a state to transition to. + ;; #f indicates no transition + (char-table (make-vector (* 256 (dfa-num-states dfa)) #f))) + + ;; Fill the char-table vector + (for-each + (lambda (trans) + (let ((from-state (car trans))) + (for-each (lambda (chars/to) + (let ((to-state (cdr chars/to))) + (loc:foldr (lambda (char _) + (vector-set! char-table + (bitwise-ior + char + (arithmetic-shift from-state 8)) + to-state)) + (void) + (car chars/to)))) + (cdr trans)))) + (dfa-transitions dfa)) + char-table)) + + + ;; dfa->actions : dfa -> (vector-of (union #f syntax-object)) + ;; The action for each final state, #f if the state isn't final + (define (dfa->actions dfa) + (let ((actions (make-vector (dfa-num-states dfa) #f))) + (for-each (lambda (state/action) + (vector-set! actions (car state/action) (cdr state/action))) + (dfa-final-states/actions dfa)) + actions)) + + ;; dfa->no-look : dfa -> (vector-of bool) + ;; For each state whether the lexer can ignore the next input. + ;; It can do this only if there are no transitions out of the + ;; current state. + (define (dfa->no-look dfa) + (let ((no-look (make-vector (dfa-num-states dfa) #t))) + (for-each (lambda (trans) + (vector-set! no-look (car trans) #f)) + (dfa-transitions dfa)) + no-look)) + + (test-block ((d1 (make-dfa 1 1 (list) (list))) + (d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) + (list (cons 1 (list (cons (is:make-range 49 50) 1) + (cons (is:make-range 51) 2))) + (cons 2 (list (cons (is:make-range 49) 3)))))) + (d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) + (list (cons 1 (list (cons (is:make-range 100 200) 0) + (cons (is:make-range 49 50) 1) + (cons (is:make-range 51) 2))) + (cons 2 (list (cons (is:make-range 49) 3))))))) + ((dfa->2d-table d1) (make-vector 256 #f)) + ((dfa->2d-table d2) (let ((v (make-vector 1024 #f))) + (vector-set! v 305 1) + (vector-set! v 306 1) + (vector-set! v 307 2) + (vector-set! v 561 3) + v)) + ((dfa->1d-table d1) (make-vector 1 #f)) + ((dfa->1d-table d2) #(#f + #(#(49 50 1) #(51 51 2)) + #(#(49 49 3)) + #f)) + ((dfa->1d-table d3) #(#f + #(#(49 50 1) #(51 51 2) #(100 200 0)) + #(#(49 49 3)) + #f)) + ((dfa->actions d1) (vector #f)) + ((dfa->actions d2) (vector #f #f 2 3)) + ((dfa->no-look d1) (vector #t)) + ((dfa->no-look d2) (vector #t #f #f #t))) + + ;; build-lexer : syntax-object list -> + ;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object)) + ;; each syntax object has the form (re action) + (define (build-lexer sos) + (let* ((disappeared-uses (box null)) + (s-re-acts (map (lambda (so) + (cons (parse (stx-car so) disappeared-uses) + (stx-car (stx-cdr so)))) + sos)) + + (cache (make-cache)) + + (re-acts (map (lambda (s-re-act) + (cons (->re (car s-re-act) cache) + (cdr s-re-act))) + s-re-acts)) + + (dfa (build-dfa re-acts cache)) + (table (dfa->1d-table dfa))) + ;(print-dfa dfa) + #;(let ((num-states (vector-length table)) + (num-vectors (length (filter values (vector->list table)))) + (num-entries (apply + (map + (lambda (x) (if x (vector-length x) 0)) + (vector->list table)))) + (num-different-entries + (let ((ht (make-hash-table))) + (for-each + (lambda (x) + (when x + (for-each + (lambda (y) + (hash-table-put! ht y #t)) + (vector->list x)))) + (vector->list table)) + (length (hash-table-map ht cons))))) + (printf "~a states, ~aKB\n" + num-states + (/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries + (* 5 num-different-entries))) 1024))) + (values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa) + (unbox disappeared-uses)))) + ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt new file mode 100644 index 0000000..9da62de --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt @@ -0,0 +1,385 @@ +(module re mzscheme + (require mzlib/list + scheme/match + (prefix is: mzlib/integer-set) + "util.rkt") + + (provide ->re build-epsilon build-zero build-char-set build-concat + build-repeat build-or build-and build-neg + epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR? + char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high + orR-res andR-res negR-re + re-nullable? re-index) + + ;; get-index : -> nat + (define get-index (make-counter)) + + ;; An re is either + ;; - (make-epsilonR bool nat) + ;; - (make-zeroR bool nat) + ;; - (make-char-setR bool nat char-set) + ;; - (make-concatR bool nat re re) + ;; - (make-repeatR bool nat nat nat-or-+inf.0 re) + ;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs + ;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs + ;; - (make-negR bool nat re) + ;; + ;; Every re must have an index field globally different from all + ;; other re index fields. + (define-struct re (nullable? index) (make-inspector)) + (define-struct (epsilonR re) () (make-inspector)) + (define-struct (zeroR re) () (make-inspector)) + (define-struct (char-setR re) (chars) (make-inspector)) + (define-struct (concatR re) (re1 re2) (make-inspector)) + (define-struct (repeatR re) (low high re) (make-inspector)) + (define-struct (orR re) (res) (make-inspector)) + (define-struct (andR re) (res) (make-inspector)) + (define-struct (negR re) (re) (make-inspector)) + + ;; e : re + ;; The unique epsilon re + (define e (make-epsilonR #t (get-index))) + + ;; z : re + ;; The unique zero re + (define z (make-zeroR #f (get-index))) + + + ;; s-re = char constant + ;; | string constant (sequence of characters) + ;; | re a precompiled re + ;; | (repetition low high s-re) repetition between low and high times (inclusive) + ;; | (union s-re ...) + ;; | (intersection s-re ...) + ;; | (complement s-re) + ;; | (concatenation s-re ...) + ;; | (char-range rng rng) match any character between two (inclusive) + ;; | (char-complement char-set) match any character not listed + ;; low = natural-number + ;; high = natural-number or +inf.0 + ;; rng = char or string with length 1 + ;; (concatenation) (repetition 0 0 x), and "" match the empty string. + ;; (union) matches no strings. + ;; (intersection) matches any string. + + (define loc:make-range is:make-range) + (define loc:union is:union) + (define loc:split is:split) + (define loc:complement is:complement) + + ;; ->re : s-re cache -> re + (define (->re exp cache) + (match exp + ((? char?) (build-char-set (loc:make-range (char->integer exp)) cache)) + ((? string?) (->re `(concatenation ,@(string->list exp)) cache)) + ((? re?) exp) + (`(repetition ,low ,high ,r) + (build-repeat low high (->re r cache) cache)) + (`(union ,rs ...) + (build-or (flatten-res (map (lambda (r) (->re r cache)) rs) + orR? orR-res loc:union cache) + cache)) + (`(intersection ,rs ...) + (build-and (flatten-res (map (lambda (r) (->re r cache)) rs) + andR? andR-res (lambda (a b) + (let-values (((i _ __) (loc:split a b))) i)) + cache) + cache)) + (`(complement ,r) + (build-neg (->re r cache) cache)) + (`(concatenation ,rs ...) + (foldr (lambda (x y) + (build-concat (->re x cache) y cache)) + e + rs)) + (`(char-range ,c1 ,c2) + (let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1))) + (i2 (char->integer (if (string? c2) (string-ref c2 0) c2)))) + (if (<= i1 i2) + (build-char-set (loc:make-range i1 i2) cache) + z))) + (`(char-complement ,crs ...) + (let ((cs (->re `(union ,@crs) cache))) + (cond + ((zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)) + ((char-setR? cs) + (build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)) + (else z)))))) + + + + + ;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re)) + ;; (char-set char-set -> char-set) cache -> (list-of re) + ;; Takes all the char-sets in l and combines them into one char-set using the combine function. + ;; Flattens out the values of type?. get-res only needs to function on things type? returns + ;; true for. + (define (flatten-res l type? get-res combine cache) + (let loop ((res l) + ;; chars : (union #f char-set) + (chars #f) + (no-chars null)) + (cond + ((null? res) + (if chars + (cons (build-char-set chars cache) no-chars) + no-chars)) + ((char-setR? (car res)) + (if chars + (loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars) + (loop (cdr res) (char-setR-chars (car res)) no-chars))) + ((type? (car res)) + (loop (append (get-res (car res)) (cdr res)) chars no-chars)) + (else (loop (cdr res) chars (cons (car res) no-chars)))))) + + ;; build-epsilon : -> re + (define (build-epsilon) e) + + (define (build-zero) z) + + (define loc:integer-set-contents is:integer-set-contents) + + ;; build-char-set : char-set cache -> re + (define (build-char-set cs cache) + (let ((l (loc:integer-set-contents cs))) + (cond + ((null? l) z) + (else + (cache l + (lambda () + (make-char-setR #f (get-index) cs))))))) + + + + ;; build-concat : re re cache -> re + (define (build-concat r1 r2 cache) + (cond + ((eq? e r1) r2) + ((eq? e r2) r1) + ((or (eq? z r1) (eq? z r2)) z) + (else + (cache (cons 'concat (cons (re-index r1) (re-index r2))) + (lambda () + (make-concatR (and (re-nullable? r1) (re-nullable? r2)) + (get-index) + r1 r2)))))) + + ;; build-repeat : nat nat-or-+inf.0 re cache -> re + (define (build-repeat low high r cache) + (let ((low (if (< low 0) 0 low))) + (cond + ((eq? r e) e) + ((and (= 0 low) (or (= 0 high) (eq? z r))) e) + ((and (= 1 low) (= 1 high)) r) + ((and (repeatR? r) + (eq? (repeatR-high r) +inf.0) + (or (= 0 (repeatR-low r)) + (= 1 (repeatR-low r)))) + (build-repeat (* low (repeatR-low r)) + +inf.0 + (repeatR-re r) + cache)) + (else + (cache (cons 'repeat (cons low (cons high (re-index r)))) + (lambda () + (make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r))))))) + + + ;; build-or : (list-of re) cache -> re + (define (build-or rs cache) + (let ((rs + (filter + (lambda (x) (not (eq? x z))) + (do-simple-equiv (replace rs orR? orR-res null) re-index)))) + (cond + ((null? rs) z) + ((null? (cdr rs)) (car rs)) + ((memq (build-neg z cache) rs) (build-neg z cache)) + (else + (cache (cons 'or (map re-index rs)) + (lambda () + (make-orR (ormap re-nullable? rs) (get-index) rs))))))) + + ;; build-and : (list-of re) cache -> re + (define (build-and rs cache) + (let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index))) + (cond + ((null? rs) (build-neg z cache)) + ((null? (cdr rs)) (car rs)) + ((memq z rs) z) + (else + (cache (cons 'and (map re-index rs)) + (lambda () + (make-andR (andmap re-nullable? rs) (get-index) rs))))))) + + ;; build-neg : re cache -> re + (define (build-neg r cache) + (cond + ((negR? r) (negR-re r)) + (else + (cache (cons 'neg (re-index r)) + (lambda () + (make-negR (not (re-nullable? r)) (get-index) r)))))) + + ;; Tests for the build-functions + (test-block ((c (make-cache)) + (isc is:integer-set-contents) + (r1 (build-char-set (is:make-range (char->integer #\1)) c)) + (r2 (build-char-set (is:make-range (char->integer #\2)) c)) + (r3 (build-char-set (is:make-range (char->integer #\3)) c)) + (rc (build-concat r1 r2 c)) + (rc2 (build-concat r2 r1 c)) + (rr (build-repeat 0 +inf.0 rc c)) + (ro (build-or `(,rr ,rc ,rr) c)) + (ro2 (build-or `(,rc ,rr ,z) c)) + (ro3 (build-or `(,rr ,rc) c)) + (ro4 (build-or `(,(build-or `(,r1 ,r2) c) + ,(build-or `(,r2 ,r3) c)) c)) + (ra (build-and `(,rr ,rc ,rr) c)) + (ra2 (build-and `(,rc ,rr) c)) + (ra3 (build-and `(,rr ,rc) c)) + (ra4 (build-and `(,(build-and `(,r3 ,r2) c) + ,(build-and `(,r2 ,r1) c)) c)) + (rn (build-neg z c)) + (rn2 (build-neg r1 c))) + + ((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1)))) + ((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2)))) + ((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3)))) + ((build-char-set (is:make-range) c) z) + ((build-concat r1 e c) r1) + ((build-concat e r1 c) r1) + ((build-concat r1 z c) z) + ((build-concat z r1 c) z) + ((build-concat r1 r2 c) rc) + ((concatR-re1 rc) r1) + ((concatR-re2 rc) r2) + ((concatR-re1 rc2) r2) + ((concatR-re2 rc2) r1) + (ro ro2) + (ro ro3) + (ro4 (build-or `(,r1 ,r2 ,r3) c)) + ((orR-res ro) (list rc rr)) + ((orR-res ro4) (list r1 r2 r3)) + ((build-or null c) z) + ((build-or `(,r1 ,z) c) r1) + ((build-repeat 0 +inf.0 rc c) rr) + ((build-repeat 0 1 z c) e) + ((build-repeat 0 0 rc c) e) + ((build-repeat 0 +inf.0 z c) e) + ((build-repeat -1 +inf.0 z c) e) + ((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c) + (build-repeat 0 +inf.0 rc c)) + ((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c) + (build-repeat 0 +inf.0 rc c)) + ((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c) + (build-repeat 20 +inf.0 rc c)) + ((build-repeat 1 1 rc c) rc) + ((repeatR-re rr) rc) + (ra ra2) + (ra ra3) + (ra4 (build-and `(,r1 ,r2 ,r3) c)) + ((andR-res ra) (list rc rr)) + ((andR-res ra4) (list r1 r2 r3)) + ((build-and null c) (build-neg z c)) + ((build-and `(,r1 ,z) c) z) + ((build-and `(,r1) c) r1) + ((build-neg r1 c) (build-neg r1 c)) + ((build-neg (build-neg r1 c) c) r1) + ((negR-re (build-neg r2 c)) r2) + ((re-nullable? r1) #f) + ((re-nullable? rc) #f) + ((re-nullable? (build-concat rr rr c)) #t) + ((re-nullable? rr) #t) + ((re-nullable? (build-repeat 0 1 rc c)) #t) + ((re-nullable? (build-repeat 1 2 rc c)) #f) + ((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t) + ((re-nullable? ro) #t) + ((re-nullable? (build-or `(,r1 ,r2) c)) #f) + ((re-nullable? (build-and `(,r1 ,e) c)) #f) + ((re-nullable? (build-and `(,rr ,e) c)) #t) + ((re-nullable? (build-neg r1 c)) #t) + ((re-nullable? (build-neg rr c)) #f)) + + (test-block ((c (make-cache)) + (isc is:integer-set-contents) + (r1 (->re #\1 c)) + (r2 (->re #\2 c)) + (r3-5 (->re '(char-range #\3 #\5) c)) + (r4 (build-or `(,r1 ,r2) c)) + (r5 (->re `(union ,r3-5 #\7) c)) + (r6 (->re #\6 c))) + ((flatten-res null orR? orR-res is:union c) null) + ((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c)))) + (isc (is:make-range (char->integer #\1)))) + ((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c)))) + (isc (is:make-range (char->integer #\1) (char->integer #\2)))) + ((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) + orR? orR-res is:union c)))) + (isc (is:make-range (char->integer #\1) (char->integer #\7)))) + ((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y) + (let-values (((i _ __) + (is:split x y))) + i)) + c) + (list z))) + + ;; ->re + (test-block ((c (make-cache)) + (isc is:integer-set-contents) + (r (->re #\a c)) + (rr (->re `(concatenation ,r ,r) c)) + (rrr (->re `(concatenation ,r ,rr) c)) + (rrr* (->re `(repetition 0 +inf.0 ,rrr) c))) + ((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a)))) + ((->re "" c) e) + ((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c)) + ((->re r c) r) + ((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c)) + ((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c)) + ((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c)) + ((->re `(repetition 0 1 ,rrr*) c) rrr*) + ((->re `(union (union (char-range #\a #\c) + (char-complement (char-range #\000 #\110) + (char-range #\112 ,(integer->char max-char-num)))) + (union (repetition 0 +inf.0 #\2))) c) + (build-or (list (build-char-set (is:union (is:make-range 73) + (is:make-range 97 99)) + c) + (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) + c)) + ((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c)) + ((->re `(union ,r) c) r) + ((->re `(union) c) z) + ((->re `(intersection (intersection #\111 + (char-complement (char-range #\000 #\110) + (char-range #\112 ,(integer->char max-char-num)))) + (intersection (repetition 0 +inf.0 #\2))) c) + (build-and (list (build-char-set (is:make-range 73) c) + (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) + c)) + ((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110) + (char-range #\112 ,(integer->char max-char-num)))) + (intersection (repetition 0 +inf.0 #\2))) c) + z) + ((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c)) + ((->re `(intersection ,r) c) r) + ((->re `(intersection) c) (build-neg z c)) + ((->re `(complement ,r) c) (build-neg r c)) + ((->re `(concatenation) c) e) + ((->re `(concatenation ,rrr*) c) rrr*) + (rr (build-concat r r c)) + ((->re `(concatenation ,r ,rr ,rrr) c) + (build-concat r (build-concat rr rrr c) c)) + ((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49))) + ((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57))) + ((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49))) + ((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57))) + ((->re `(char-range "9" "1") c) z) + ((isc (char-setR-chars (->re `(char-complement) c))) + (isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c)))) + ((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c))) + (isc (is:make-range 0))) + ) + + ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt new file mode 100644 index 0000000..1104a87 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt @@ -0,0 +1,220 @@ +#lang racket + +(require "util.rkt" + syntax/id-table) + +(provide parse) + +(define (bad-args stx num) + (raise-syntax-error + #f + (format "incorrect number of arguments (should have ~a)" num) + stx)) + +;; char-range-arg: syntax-object syntax-object -> nat +;; If c contains is a character or length 1 string, returns the integer +;; for the character. Otherwise raises a syntax error. +(define (char-range-arg stx containing-stx) + (let ((c (syntax-e stx))) + (cond + ((char? c) (char->integer c)) + ((and (string? c) (= (string-length c) 1)) + (char->integer (string-ref c 0))) + (else + (raise-syntax-error + #f + "not a char or single-char string" + containing-stx stx))))) +(module+ test + (check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1)) + (check-equal? (char-range-arg #'"1" #'here) (char->integer #\1))) + +(define orig-insp (variable-reference->module-declaration-inspector + (#%variable-reference))) +(define (disarm stx) + (syntax-disarm stx orig-insp)) + + ;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt) + ;; checks for errors and generates the plain s-exp form for s + ;; Expands lex-abbrevs and applies lex-trans. + (define (parse stx disappeared-uses) + (let loop ([stx stx] + [disappeared-uses disappeared-uses] + ;; seen-lex-abbrevs: id-table + [seen-lex-abbrevs (make-immutable-free-id-table)]) + (let ([recur (lambda (s) + (loop (syntax-rearm s stx) + disappeared-uses + seen-lex-abbrevs))] + [recur/abbrev (lambda (s id) + (loop (syntax-rearm s stx) + disappeared-uses + (free-id-table-set seen-lex-abbrevs id id)))]) + (syntax-case (disarm stx) (repetition union intersection complement concatenation + char-range char-complement) + (_ + (identifier? stx) + (let ((expansion (syntax-local-value stx (lambda () #f)))) + (unless (lex-abbrev? expansion) + (raise-syntax-error 'regular-expression + "undefined abbreviation" + stx)) + ;; Check for cycles. + (when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f)) + (raise-syntax-error 'regular-expression + "illegal lex-abbrev cycle detected" + stx + #f + (list (free-id-table-ref seen-lex-abbrevs stx)))) + (set-box! disappeared-uses (cons stx (unbox disappeared-uses))) + (recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))) + (_ + (or (char? (syntax-e stx)) (string? (syntax-e stx))) + (syntax-e stx)) + ((repetition arg ...) + (let ((arg-list (syntax->list (syntax (arg ...))))) + (unless (= 3 (length arg-list)) + (bad-args stx 2)) + (let ((low (syntax-e (car arg-list))) + (high (syntax-e (cadr arg-list))) + (re (caddr arg-list))) + (unless (and (number? low) (exact? low) (integer? low) (>= low 0)) + (raise-syntax-error #f + "not a non-negative exact integer" + stx + (car arg-list))) + (unless (or (and (number? high) (exact? high) (integer? high) (>= high 0)) + (eq? high +inf.0)) + (raise-syntax-error #f + "not a non-negative exact integer or +inf.0" + stx + (cadr arg-list))) + (unless (<= low high) + (raise-syntax-error + #f + "the first argument is not less than or equal to the second argument" + stx)) + `(repetition ,low ,high ,(recur re))))) + ((union re ...) + `(union ,@(map recur (syntax->list (syntax (re ...)))))) + ((intersection re ...) + `(intersection ,@(map recur (syntax->list (syntax (re ...)))))) + ((complement re ...) + (let ((re-list (syntax->list (syntax (re ...))))) + (unless (= 1 (length re-list)) + (bad-args stx 1)) + `(complement ,(recur (car re-list))))) + ((concatenation re ...) + `(concatenation ,@(map recur (syntax->list (syntax (re ...)))))) + ((char-range arg ...) + (let ((arg-list (syntax->list (syntax (arg ...))))) + (unless (= 2 (length arg-list)) + (bad-args stx 2)) + (let ((i1 (char-range-arg (car arg-list) stx)) + (i2 (char-range-arg (cadr arg-list) stx))) + (if (<= i1 i2) + `(char-range ,(integer->char i1) ,(integer->char i2)) + (raise-syntax-error + #f + "the first argument does not precede or equal second argument" + stx))))) + ((char-complement arg ...) + (let ((arg-list (syntax->list (syntax (arg ...))))) + (unless (= 1 (length arg-list)) + (bad-args stx 1)) + (let ((parsed (recur (car arg-list)))) + (unless (char-set? parsed) + (raise-syntax-error #f + "not a character set" + stx + (car arg-list))) + `(char-complement ,parsed)))) + ((op form ...) + (identifier? (syntax op)) + (let* ((o (syntax op)) + (expansion (syntax-local-value o (lambda () #f)))) + (set-box! disappeared-uses (cons o (unbox disappeared-uses))) + (cond + ((lex-trans? expansion) + (recur ((lex-trans-f expansion) (disarm stx)))) + (expansion + (raise-syntax-error 'regular-expression + "not a lex-trans" + stx)) + (else + (raise-syntax-error 'regular-expression + "undefined operator" + stx))))) + (_ + (raise-syntax-error + 'regular-expression + "not a char, string, identifier, or (op args ...)" + stx)))))) + + + + ;; char-set? : s-re -> bool + ;; A char-set is an re that matches only strings of length 1. + ;; char-set? is conservative. + (define (char-set? s-re) + (cond + ((char? s-re) #t) + ((string? s-re) (= (string-length s-re) 1)) + ((list? s-re) + (let ((op (car s-re))) + (case op + ((union intersection) (andmap char-set? (cdr s-re))) + ((char-range char-complement) #t) + ((repetition) + (and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re)))) + ((concatenation) + (and (= 2 (length s-re)) (char-set? (cadr s-re)))) + (else #f)))) + (else #f))) + + (module+ test + (require rackunit)) + (module+ test + (check-equal? (char-set? #\a) #t) + (check-equal? (char-set? "12") #f) + (check-equal? (char-set? "1") #t) + (check-equal? (char-set? '(repetition 1 2 #\1)) #f) + (check-equal? (char-set? '(repetition 1 1 "12")) #f) + (check-equal? (char-set? '(repetition 1 1 "1")) #t) + (check-equal? (char-set? '(union "1" "2" "3")) #t) + (check-equal? (char-set? '(union "1" "" "3")) #f) + (check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t) + (check-equal? (char-set? '(intersection "1" "")) #f) + (check-equal? (char-set? '(complement "1")) #f) + (check-equal? (char-set? '(concatenation "1" "2")) #f) + (check-equal? (char-set? '(concatenation "" "2")) #f) + (check-equal? (char-set? '(concatenation "1")) #t) + (check-equal? (char-set? '(concatenation "12")) #f) + (check-equal? (char-set? '(char-range #\1 #\2)) #t) + (check-equal? (char-set? '(char-complement #\1)) #t)) + + ;; yikes... these test cases all have the wrong arity, now. + ;; and by "now", I mean it's been broken since before we + ;; moved to git. + (module+ test + (check-equal? (parse #'#\a null) #\a) + (check-equal? (parse #'"1" null) "1") + (check-equal? (parse #'(repetition 1 1 #\1) null) + '(repetition 1 1 #\1)) + (check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1)) + (check-equal? (parse #'(union #\1 (union "2") (union)) null) + '(union #\1 (union "2") (union))) + (check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)) + null) + '(intersection #\1 (intersection "2") (intersection))) + (check-equal? (parse #'(complement (union #\1 #\2)) + null) + '(complement (union #\1 #\2))) + (check-equal? (parse #'(concatenation "1" "2" (concatenation)) null) + '(concatenation "1" "2" (concatenation))) + (check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1)) + (check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1)) + (check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3)) + (check-equal? (parse #'(char-complement (union "1" "2")) null) + '(char-complement (union "1" "2")))) +; ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt new file mode 100644 index 0000000..c1f1492 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt @@ -0,0 +1,9 @@ +(module token-syntax mzscheme + + ;; The things needed at compile time to handle definition of tokens + + (provide make-terminals-def terminals-def-t terminals-def? + make-e-terminals-def e-terminals-def-t e-terminals-def?) + (define-struct terminals-def (t)) + (define-struct e-terminals-def (t)) + ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt new file mode 100644 index 0000000..27b3458 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt @@ -0,0 +1,92 @@ +(module token mzscheme + + (require-for-syntax "token-syntax.rkt") + + ;; Defining tokens + + (provide define-tokens define-empty-tokens make-token token? + (protect (rename token-name real-token-name)) + (protect (rename token-value real-token-value)) + (rename token-name* token-name) + (rename token-value* token-value) + (struct position (offset line col)) + (struct position-token (token start-pos end-pos)) + (struct srcloc-token (token srcloc))) + + + ;; A token is either + ;; - symbol + ;; - (make-token symbol any) + (define-struct token (name value) (make-inspector)) + + ;; token-name*: token -> symbol + (define (token-name* t) + (cond + ((symbol? t) t) + ((token? t) (token-name t)) + (else (raise-type-error + 'token-name + "symbol or struct:token" + 0 + t)))) + + ;; token-value*: token -> any + (define (token-value* t) + (cond + ((symbol? t) #f) + ((token? t) (token-value t)) + (else (raise-type-error + 'token-value + "symbol or struct:token" + 0 + t)))) + + (define-for-syntax (make-ctor-name n) + (datum->syntax-object n + (string->symbol (format "token-~a" (syntax-e n))) + n + n)) + + (define-for-syntax (make-define-tokens empty?) + (lambda (stx) + (syntax-case stx () + ((_ name (token ...)) + (andmap identifier? (syntax->list (syntax (token ...)))) + (with-syntax (((marked-token ...) + (map values #;(make-syntax-introducer) + (syntax->list (syntax (token ...)))))) + (quasisyntax/loc stx + (begin + (define-syntax name + #,(if empty? + #'(make-e-terminals-def (quote-syntax (marked-token ...))) + #'(make-terminals-def (quote-syntax (marked-token ...))))) + #,@(map + (lambda (n) + (when (eq? (syntax-e n) 'error) + (raise-syntax-error + #f + "Cannot define a token named error." + stx)) + (if empty? + #`(define (#,(make-ctor-name n)) + '#,n) + #`(define (#,(make-ctor-name n) x) + (make-token '#,n x)))) + (syntax->list (syntax (token ...)))) + #;(define marked-token #f) #;...)))) + ((_ ...) + (raise-syntax-error + #f + "must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))" + stx))))) + + (define-syntax define-tokens (make-define-tokens #f)) + (define-syntax define-empty-tokens (make-define-tokens #t)) + + (define-struct position (offset line col) #f) + (define-struct position-token (token start-pos end-pos) #f) + + (define-struct srcloc-token (token srcloc) #f) + ) + diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt new file mode 100644 index 0000000..c21e88c --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt @@ -0,0 +1,69 @@ +#lang racket + +(require "util.rkt") + +(provide (all-defined-out)) + +;; mapped-chars : (listof (list nat nat bool)) +(define mapped-chars (make-known-char-range-list)) + +;; get-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat)) +(define (get-chars-for char-x? mapped-chars) + (cond + ((null? mapped-chars) null) + (else + (let* ((range (car mapped-chars)) + (low (car range)) + (high (cadr range)) + (x (char-x? low))) + (cond + ((caddr range) + (if x + (cons (cons low high) + (get-chars-for char-x? (cdr mapped-chars))) + (get-chars-for char-x? (cdr mapped-chars)))) + (else + (let loop ((range-start low) + (i (car range)) + (parity x)) + (cond + ((> i high) + (if parity + (cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars))) + (get-chars-for char-x? (cdr mapped-chars)))) + ((eq? parity (char-x? i)) + (loop range-start (add1 i) parity)) + (parity + (cons (cons range-start (sub1 i)) (loop i (add1 i) #f))) + (else + (loop i (add1 i) #t)))))))))) + +(define (compute-ranges x?) + (delay (get-chars-for (lambda (x) (x? (integer->char x))) mapped-chars))) + +(define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325 +(define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405 +(define upper-case-ranges (compute-ranges char-upper-case?)) ;; 380 +(define title-case-ranges (compute-ranges char-title-case?)) ;; 10 +(define numeric-ranges (compute-ranges char-numeric?)) ;; 47 +(define symbolic-ranges (compute-ranges char-symbolic?)) ;; 153 +(define punctuation-ranges (compute-ranges char-punctuation?)) ;; 86 +(define graphic-ranges (compute-ranges char-graphic?)) ;; 401 +(define whitespace-ranges (compute-ranges char-whitespace?)) ;; 10 +(define blank-ranges (compute-ranges char-blank?)) ;; 9 +#;(define hexadecimal-ranges (compute-ranges char-hexadecimal?)) +(define iso-control-ranges (compute-ranges char-iso-control?)) ;; 2 + + + +(module+ test + (require rackunit) + (check-equal? (get-chars-for odd? '()) '()) + (check-equal? (get-chars-for odd? '((1 4 #f) (8 13 #f))) + '((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13))) + (check-equal? (get-chars-for (lambda (x) + (odd? (quotient x 10))) + '((1 5 #t) (17 19 #t) (21 51 #f))) + '((17 . 19) (30 . 39) (50 . 51)))) + + diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt new file mode 100644 index 0000000..a7afc54 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt @@ -0,0 +1,127 @@ +#lang racket + +(provide (all-defined-out)) + +(define max-char-num #x10FFFF) + +(define-struct lex-abbrev (get-abbrev)) +(define-struct lex-trans (f)) + +(module+ test + (require rackunit)) + +#;(define-syntax test-block + (syntax-rules () + ((_ defs (code right-ans) ...) + (let* defs + (let ((real-ans code)) + (unless (equal? real-ans right-ans) + (printf "Test failed: ~e gave ~e. Expected ~e\n" + 'code real-ans 'right-ans))) ...)))) + +(define-syntax test-block + (syntax-rules () + ((_ x ...) (void)))) + + +;; A cache is (X ( -> Y) -> Y) +;; make-cache : -> cache +;; table map Xs to Ys. If key is mapped, its value is returned. +;; Otherwise, build is invoked and its result is placed in the table and +;; returned. +;; Xs are compared with equal? +(define (make-cache) + (let ((table (make-hash))) + (lambda (key build) + (hash-ref table key + (lambda () + (let ((new (build))) + (hash-set! table key new) + new)))))) + +(module+ test + (define cache (make-cache)) + (check-equal? (cache '(s 1 2) (lambda () 9)) 9) + (check-equal? (cache '(s 2 1) (lambda () 8)) 8) + (check-equal? (cache '(s 1 2) (lambda () 1)) 9) + (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) + (lambda () 22)) 22) + (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) + (lambda () 1)) 22)) + + + +;; make-counter : -> -> nat +;; makes a function that returns a higher number by 1, each time +;; it is called. +(define (make-counter) + (let ((counter 0)) + (lambda () + (begin0 + counter + (set! counter (add1 counter)))))) + +(module+ test + (define c (make-counter)) + (define d (make-counter)) + (check-equal? (c) 0) + (check-equal? (d) 0) + (check-equal? (c) 1) + (check-equal? (d) 1) + (check-equal? (c) 2)) + + +;; remove-dups : (list-of X) (X -> number) -> (list-of X) +;; removes the entries from l that have the same index as a +;; previous entry. l must be grouped by indexes. +(define (remove-dups l index acc) + (cond + ((null? l) (reverse acc)) + ((null? acc) (remove-dups (cdr l) index (cons (car l) acc))) + ((= (index (car acc)) (index (car l))) + (remove-dups (cdr l) index acc)) + (else + (remove-dups (cdr l) index (cons (car l) acc))))) + + +(module+ test + (check-equal? (remove-dups '((1 2) (2 2) (1 3) (1 4) + (100 4) (0 5)) cadr null) + '((1 2) (1 3) (1 4) (0 5))) + (check-equal? (remove-dups null error null) null)) + +;; do-simple-equiv : (list-of X) (X -> nat) -> (list-of X) +;; Sorts l according to index and removes the entries with duplicate +;; indexes. +(define (do-simple-equiv l index) + (let ((ordered (sort l (lambda (a b) (< (index a) (index b)))))) + (remove-dups ordered index null))) + +(module+ test + (check-equal? (do-simple-equiv '((2 2) (1 4) (1 2) + (100 4) (1 3) (0 5)) + cadr) + '((2 2) (1 3) (1 4) (0 5))) + (check-equal? (do-simple-equiv null error) null)) + +;; replace : (list-of X) (X -> bool) (X -> (list-of X)) (list-of X) -> +;; (list-of X) +;; If (pred? r) for some r in l, splice (get r) in place of r in the resulting +;; list. +(define (replace l pred? get acc) + (cond + ((null? l) acc) + ((pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc))) + (else (replace (cdr l) pred? get (cons (car l) acc))))) + + +(module+ test + (check-equal? (replace null void (lambda () (list 1)) null) null) + (check-equal? (replace '(1 2 3 4 3 5) + (lambda (x) (= x 3)) + (lambda (x) (list 1 2 3)) + null) + '(5 1 2 3 4 1 2 3 2 1))) + + + diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/grammar_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/grammar_rkt.dep new file mode 100644 index 0000000..ee73688 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/grammar_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("468f21e5aa316cce76913f10fb16179eb072183c" . "c0c07e543a2da6a5a900016b86916e67cfc9a89f") (collects #"br-parser-tools" #"private-yacc" #"yacc-helper.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"racket" #"contract.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/grammar_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/grammar_rkt.zo new file mode 100644 index 0000000..247a41c Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/grammar_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/graph_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/graph_rkt.dep new file mode 100644 index 0000000..3b4d740 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/graph_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("efcbacc6a2841b29dbaedbc655290598a196a721" . "1bd4c8735758355b04c6e172aa61084639448a7c") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/graph_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/graph_rkt.zo new file mode 100644 index 0000000..26afbaa Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/graph_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/input-file-parser_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/input-file-parser_rkt.dep new file mode 100644 index 0000000..5be0677 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/input-file-parser_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("4cb386976883e8cd0d891950a469f3b7c8c6d957" . "75215a2b37f9360fd02597d45cf2f423f7dd5bef") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"yacc-helper.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzscheme" #"main.rkt") (collects #"racket" #"contract.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/input-file-parser_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/input-file-parser_rkt.zo new file mode 100644 index 0000000..1d9026e Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/input-file-parser_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lalr_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lalr_rkt.dep new file mode 100644 index 0000000..914517e --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lalr_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("15ac433d879371638d04ae3ecedf9be56c4c2ae9" . "0b2297c9131f2231ecaa39cf14cfddf70bf7b7e9") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"lr0.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lalr_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lalr_rkt.zo new file mode 100644 index 0000000..8b2ad4b Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lalr_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lr0_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lr0_rkt.dep new file mode 100644 index 0000000..0c39c8d --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lr0_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("24f4ec91205bfbab013165350eb9375fea53884c" . "0c6241a969ffc362c3ad9fa66badebcfe2b66f84") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"graph.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lr0_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lr0_rkt.zo new file mode 100644 index 0000000..0229a6f Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/lr0_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-actions_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-actions_rkt.dep new file mode 100644 index 0000000..3418778 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-actions_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("db7e9899c4b17d8ad8283dec50e0e9090f4acb19" . "4a9497eb11d109799b943d433fff01d2c1eb3338") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-actions_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-actions_rkt.zo new file mode 100644 index 0000000..9de2408 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-actions_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-builder_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-builder_rkt.dep new file mode 100644 index 0000000..787c2f7 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-builder_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("4d49b2e9c74b94ec0d402da7ac7ff55643f762cc" . "0ad13697a02d3576409cd6687e363fb6a35897a6") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"input-file-parser.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-actions.rkt") (collects #"br-parser-tools" #"private-yacc" #"table.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"racket" #"contract.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-builder_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-builder_rkt.zo new file mode 100644 index 0000000..db4fa13 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/parser-builder_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/table_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/table_rkt.dep new file mode 100644 index 0000000..33db78e --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/table_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("3e315d6d491b6d88856829733bbe08e6fc2343dc" . "237931ee716cef28ca1dc1284b5405c271c9c9dc") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"lalr.rkt") (collects #"br-parser-tools" #"private-yacc" #"lr0.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-actions.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzlib" #"list.rkt") (collects #"racket" #"contract.rkt") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/table_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/table_rkt.zo new file mode 100644 index 0000000..0d7a299 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/table_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/yacc-helper_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/yacc-helper_rkt.dep new file mode 100644 index 0000000..73fda78 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/yacc-helper_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("6cd5cc09c96354b229671df88858952e2a9ab7de" . "e2ae7af582413c6b68316362386d9ab6a73dc9ab") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/yacc-helper_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/yacc-helper_rkt.zo new file mode 100644 index 0000000..16d24ad Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/errortrace/yacc-helper_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/grammar_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/grammar_rkt.dep new file mode 100644 index 0000000..b1e7b6f --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/grammar_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("468f21e5aa316cce76913f10fb16179eb072183c" . "76e6bdf2fead5f128bc517845b6f316fd21520b3") (collects #"br-parser-tools" #"private-yacc" #"yacc-helper.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"racket" #"contract.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/grammar_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/grammar_rkt.zo new file mode 100644 index 0000000..9dbad9e Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/grammar_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/yacc-helper_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/yacc-helper_rkt.dep new file mode 100644 index 0000000..f9328a6 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/yacc-helper_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("6cd5cc09c96354b229671df88858952e2a9ab7de" . "6aab85a3c56c4a8b434cf6a4421336a3a8a0d39b") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/yacc-helper_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/yacc-helper_rkt.zo new file mode 100644 index 0000000..1994757 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/drracket/yacc-helper_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/grammar_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/grammar_rkt.dep new file mode 100644 index 0000000..e9764b8 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/grammar_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("468f21e5aa316cce76913f10fb16179eb072183c" . "fcd96d2745778793f4c428009decea4031f01b6b") (collects #"br-parser-tools" #"private-yacc" #"yacc-helper.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"racket" #"contract.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/grammar_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/grammar_rkt.zo new file mode 100644 index 0000000..f21a910 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/grammar_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/graph_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/graph_rkt.dep new file mode 100644 index 0000000..d7e5be9 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/graph_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("efcbacc6a2841b29dbaedbc655290598a196a721" . "46a0cffdba2f82aa32f0fd32a5783621a6a78323") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/graph_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/graph_rkt.zo new file mode 100644 index 0000000..bb5f7ca Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/graph_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/input-file-parser_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/input-file-parser_rkt.dep new file mode 100644 index 0000000..edd4787 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/input-file-parser_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("4cb386976883e8cd0d891950a469f3b7c8c6d957" . "6d4317182d4a6351759b559a48720b97b01088ae") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"yacc-helper.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzscheme" #"main.rkt") (collects #"racket" #"contract.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/input-file-parser_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/input-file-parser_rkt.zo new file mode 100644 index 0000000..3478930 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/input-file-parser_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lalr_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lalr_rkt.dep new file mode 100644 index 0000000..b788d39 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lalr_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("15ac433d879371638d04ae3ecedf9be56c4c2ae9" . "ee6522ae77c23f3d6ccaed6a45dfaf0915efe251") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"lr0.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lalr_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lalr_rkt.zo new file mode 100644 index 0000000..095291a Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lalr_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lr0_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lr0_rkt.dep new file mode 100644 index 0000000..f56295b --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lr0_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("24f4ec91205bfbab013165350eb9375fea53884c" . "d97ba6be97a513c7169fb71a846459a962705f1c") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"graph.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lr0_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lr0_rkt.zo new file mode 100644 index 0000000..113fc36 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/lr0_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-actions_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-actions_rkt.dep new file mode 100644 index 0000000..7979f77 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-actions_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("db7e9899c4b17d8ad8283dec50e0e9090f4acb19" . "96b9fd723bd354b9e93281793a161f7b0285e0da") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-actions_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-actions_rkt.zo new file mode 100644 index 0000000..52a0323 Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-actions_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-builder_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-builder_rkt.dep new file mode 100644 index 0000000..d689301 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-builder_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("4d49b2e9c74b94ec0d402da7ac7ff55643f762cc" . "873212f5ff8fe2056832ca8c60822e3629397aaf") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"input-file-parser.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-actions.rkt") (collects #"br-parser-tools" #"private-yacc" #"table.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"racket" #"contract.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-builder_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-builder_rkt.zo new file mode 100644 index 0000000..402846a Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/parser-builder_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/table_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/table_rkt.dep new file mode 100644 index 0000000..b323791 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/table_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("3e315d6d491b6d88856829733bbe08e6fc2343dc" . "a276a09cfaba1c0260fe8aaa42fe2679cfb8d1f1") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"lalr.rkt") (collects #"br-parser-tools" #"private-yacc" #"lr0.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-actions.rkt") (collects #"mzlib" #"class.rkt") (collects #"mzlib" #"list.rkt") (collects #"racket" #"contract.rkt") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/table_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/table_rkt.zo new file mode 100644 index 0000000..f61e88d Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/table_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/yacc-helper_rkt.dep b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/yacc-helper_rkt.dep new file mode 100644 index 0000000..f9328a6 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/yacc-helper_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("6cd5cc09c96354b229671df88858952e2a9ab7de" . "6aab85a3c56c4a8b434cf6a4421336a3a8a0d39b") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/yacc-helper_rkt.zo b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/yacc-helper_rkt.zo new file mode 100644 index 0000000..7e9c11c Binary files /dev/null and b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/compiled/yacc-helper_rkt.zo differ diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt new file mode 100644 index 0000000..ebff00d --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt @@ -0,0 +1,280 @@ +;; Constructs to create and access grammars, the internal +;; representation of the input to the parser generator. + +(module grammar mzscheme + + (require mzlib/class + mzlib/list + "yacc-helper.rkt" + racket/contract) + + ;; Each production has a unique index 0 <= index <= number of productions + (define-struct prod (lhs rhs index prec action) (make-inspector)) + + ;; The dot-pos field is the index of the element in the rhs + ;; of prod that the dot immediately precedes. + ;; Thus 0 <= dot-pos <= (vector-length rhs). + (define-struct item (prod dot-pos) (make-inspector)) + + ;; gram-sym = (union term? non-term?) + ;; Each term has a unique index 0 <= index < number of terms + ;; Each non-term has a unique index 0 <= index < number of non-terms + (define-struct term (sym index prec) (make-inspector)) + (define-struct non-term (sym index) (make-inspector)) + + ;; a precedence declaration. + (define-struct prec (num assoc) (make-inspector)) + + (provide/contract + (make-item (prod? (or/c #f natural-number/c) . -> . item?)) + (make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?)) + (make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?)) + (make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?)) + (make-prod (non-term? (vectorof (or/c non-term? term?)) + (or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?))) + + (provide + + + ;; Things that work on items + start-item? item-prod item->string + sym-at-dot move-dot-right itemstring + non-term? term? non-termbit-vector term-index non-term-index + + ;; Things that work on precs + prec-num prec-assoc + + grammar% + + ;; Things that work on productions + prod-index prod-prec prod-rhs prod-lhs prod-action) + + + ;;---------------------- LR items -------------------------- + + ;; item bool + ;; Lexicographic comparison on two items. + (define (item bool + ;; The start production always has index 0 + (define (start-item? i) + (= 0 (non-term-index (prod-lhs (item-prod i))))) + + + ;; move-dot-right: LR-item -> LR-item | #f + ;; moves the dot to the right in the item, unless it is at its + ;; rightmost, then it returns false + (define (move-dot-right i) + (cond + ((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) + (else (make-item (item-prod i) + (add1 (item-dot-pos i)))))) + + ;; sym-at-dot: LR-item -> gram-sym | #f + ;; returns the symbol after the dot in the item or #f if there is none + (define (sym-at-dot i) + (let ((dp (item-dot-pos i)) + (rhs (prod-rhs (item-prod i)))) + (cond + ((= dp (vector-length rhs)) #f) + (else (vector-ref rhs dp))))) + + + ;; print-item: LR-item -> + (define (item->string it) + (let ((print-sym (lambda (i) + (let ((gs (vector-ref (prod-rhs (item-prod it)) i))) + (cond + ((term? gs) (format "~a " (term-sym gs))) + (else (format "~a " (non-term-sym gs)))))))) + (string-append + (format "~a -> " (non-term-sym (prod-lhs (item-prod it)))) + (let loop ((i 0)) + (cond + ((= i (vector-length (prod-rhs (item-prod it)))) + (if (= i (item-dot-pos it)) + ". " + "")) + ((= i (item-dot-pos it)) + (string-append ". " (print-sym i) (loop (add1 i)))) + (else (string-append (print-sym i) (loop (add1 i))))))))) + + ;; --------------------- Grammar Symbols -------------------------- + + (define (non-termstring gs) + (symbol->string (gram-sym-symbol gs))) + + ;; term-list->bit-vector: term list -> int + ;; Creates a number where the nth bit is 1 if the term with index n is in + ;; the list, and whose nth bit is 0 otherwise + (define (term-list->bit-vector terms) + (cond + ((null? terms) 0) + (else + (bitwise-ior (arithmetic-shift 1 (term-index (car terms))) (term-list->bit-vector (cdr terms)))))) + + + ;; ------------------------- Grammar ------------------------------ + + (define grammar% + (class object% + (super-instantiate ()) + ;; prods: production list list + ;; where there is one production list per non-term + (init prods) + ;; init-prods: production list + ;; The productions parsing can start from + ;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable + (init-field init-prods terms non-terms end-terms) + + ;; list of all productions + (define all-prods (apply append prods)) + (define num-prods (length all-prods)) + (define num-terms (length terms)) + (define num-non-terms (length non-terms)) + + (let ((count 0)) + (for-each + (lambda (nt) + (set-non-term-index! nt count) + (set! count (add1 count))) + non-terms)) + + (let ((count 0)) + (for-each + (lambda (t) + (set-term-index! t count) + (set! count (add1 count))) + terms)) + + (let ((count 0)) + (for-each + (lambda (prod) + (set-prod-index! prod count) + (set! count (add1 count))) + all-prods)) + + ;; indexed by the index of the non-term - contains the list of productions for that non-term + (define nt->prods + (let ((v (make-vector (length prods) #f))) + (for-each (lambda (prods) + (vector-set! v (non-term-index (prod-lhs (car prods))) prods)) + prods) + v)) + + (define nullable-non-terms + (nullable all-prods num-non-terms)) + + (define/public (get-num-terms) num-terms) + (define/public (get-num-non-terms) num-non-terms) + + (define/public (get-prods-for-non-term nt) + (vector-ref nt->prods (non-term-index nt))) + (define/public (get-prods) all-prods) + (define/public (get-init-prods) init-prods) + + (define/public (get-terms) terms) + (define/public (get-non-terms) non-terms) + + (define/public (get-num-prods) num-prods) + (define/public (get-end-terms) end-terms) + + (define/public (nullable-non-term? nt) + (vector-ref nullable-non-terms (non-term-index nt))) + + (define/public (nullable-after-dot? item) + (let* ((rhs (prod-rhs (item-prod item))) + (prod-length (vector-length rhs))) + (let loop ((i (item-dot-pos item))) + (cond + ((< i prod-length) + (if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i))) + (loop (add1 i)) + #f)) + ((= i prod-length) #t))))) + + (define/public (nullable-non-term-thunk) + (lambda (nt) + (nullable-non-term? nt))) + (define/public (nullable-after-dot?-thunk) + (lambda (item) + (nullable-after-dot? item))))) + + + ;; nullable: production list * int -> non-term set + ;; determines which non-terminals can derive epsilon + (define (nullable prods num-nts) + (letrec ((nullable (make-vector num-nts #f)) + (added #f) + + ;; possible-nullable: producion list -> production list + ;; Removes all productions that have a terminal + (possible-nullable + (lambda (prods) + (filter (lambda (prod) + (vector-andmap non-term? (prod-rhs prod))) + prods))) + + ;; set-nullables: production list -> production list + ;; makes one pass through the productions, adding the ones + ;; known to be nullable now to nullable and returning a list + ;; of productions that we don't know about yet. + (set-nullables + (lambda (prods) + (cond + ((null? prods) null) + ((vector-ref nullable + (gram-sym-index (prod-lhs (car prods)))) + (set-nullables (cdr prods))) + ((vector-andmap (lambda (nt) + (vector-ref nullable (gram-sym-index nt))) + (prod-rhs (car prods))) + (vector-set! nullable + (gram-sym-index (prod-lhs (car prods))) + #t) + (set! added #t) + (set-nullables (cdr prods))) + (else + (cons (car prods) + (set-nullables (cdr prods)))))))) + + (let loop ((P (possible-nullable prods))) + (cond + ((null? P) nullable) + (else + (set! added #f) + (let ((new-P (set-nullables P))) + (if added + (loop new-P) + nullable))))))) + + +) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt new file mode 100644 index 0000000..958acc1 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt @@ -0,0 +1,61 @@ +(module graph mzscheme + + (provide digraph) + + (define (zero-thunk) 0) + + ;; digraph: + ;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b) + ;; -> ('a -> 'b) + ;; DeRemer and Pennello 1982 + ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} + ;; We use a hash-table to represent the result function 'a -> 'b set, so + ;; the values of type 'a must be comparable with eq?. + (define (digraph nodes edges f- union fail) + (letrec [ + ;; Will map elements of 'a to 'b sets + (results (make-hash-table)) + (f (lambda (x) (hash-table-get results x fail))) + + ;; Maps elements of 'a to integers. + (N (make-hash-table)) + (get-N (lambda (x) (hash-table-get N x zero-thunk))) + (set-N (lambda (x d) (hash-table-put! N x d))) + + (stack null) + (push (lambda (x) + (set! stack (cons x stack)))) + (pop (lambda () + (begin0 + (car stack) + (set! stack (cdr stack))))) + (depth (lambda () (length stack))) + + ;; traverse: 'a -> + (traverse + (lambda (x) + (push x) + (let ((d (depth))) + (set-N x d) + (hash-table-put! results x (f- x)) + (for-each (lambda (y) + (if (= 0 (get-N y)) + (traverse y)) + (hash-table-put! results + x + (union (f x) (f y))) + (set-N x (min (get-N x) (get-N y)))) + (edges x)) + (if (= d (get-N x)) + (let loop ((p (pop))) + (set-N p +inf.0) + (hash-table-put! results p (f x)) + (if (not (eq? x p)) + (loop (pop))))))))] + (for-each (lambda (x) + (if (= 0 (get-N x)) + (traverse x))) + nodes) + f)) + +) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt new file mode 100644 index 0000000..7309f51 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt @@ -0,0 +1,374 @@ +(module input-file-parser mzscheme + + ;; routines for parsing the input to the parser generator and producing a + ;; grammar (See grammar.rkt) + + (require "yacc-helper.rkt" + "../private-lex/token-syntax.rkt" + "grammar.rkt" + mzlib/class + racket/contract) + (require-for-template mzscheme) + +(define (is-a-grammar%? x) (is-a? x grammar%)) + (provide/contract + (parse-input ((listof identifier?) (listof identifier?) (listof identifier?) + (or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?)) + (get-term-list ((listof identifier?) . -> . (listof identifier?)))) + + (define stx-for-original-property (read-syntax #f (open-input-string "original"))) + + ;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx))) + (define (get-args i rhs src-pos term-defs) + (let ((empty-table (make-hash-table)) + (biggest-pos #f)) + (hash-table-put! empty-table 'error #t) + (for-each (lambda (td) + (let ((v (syntax-local-value td))) + (if (e-terminals-def? v) + (for-each (lambda (s) + (hash-table-put! empty-table (syntax-object->datum s) #t)) + (syntax->list (e-terminals-def-t v)))))) + term-defs) + (let ([args + (let get-args ((i i) + (rhs rhs)) + (cond + ((null? rhs) null) + (else + (let ((b (car rhs)) + (name (if (hash-table-get empty-table (syntax-object->datum (car rhs)) (lambda () #f)) + (gensym) + (string->symbol (format "$~a" i))))) + (cond + (src-pos + (let ([start-pos-id + (datum->syntax-object b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property)] + [end-pos-id + (datum->syntax-object b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property)]) + (set! biggest-pos (cons start-pos-id end-pos-id)) + `(,(datum->syntax-object b name b stx-for-original-property) + ,start-pos-id + ,end-pos-id + ,@(get-args (add1 i) (cdr rhs))))) + (else + `(,(datum->syntax-object b name b stx-for-original-property) + ,@(get-args (add1 i) (cdr rhs)))))))))]) + (values args biggest-pos)))) + + ;; Given the list of terminal symbols and the precedence/associativity definitions, + ;; builds terminal structures (See grammar.rkt) + ;; build-terms: symbol list * symbol list list -> term list + (define (build-terms term-list precs) + (let ((counter 0) + + ;;(term-list (cons (gensym) term-list)) + + ;; Will map a terminal symbol to its precedence/associativity + (prec-table (make-hash-table))) + + ;; Fill the prec table + (for-each + (lambda (p-decl) + (begin0 + (let ((assoc (car p-decl))) + (for-each + (lambda (term-sym) + (hash-table-put! prec-table term-sym (make-prec counter assoc))) + (cdr p-decl))) + (set! counter (add1 counter)))) + precs) + + ;; Build the terminal structures + (map + (lambda (term-sym) + (make-term term-sym + #f + (hash-table-get prec-table term-sym (lambda () #f)))) + term-list))) + + ;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt) + ;; get-terms-from-def: identifier? -> (listof identifier?) + (define (get-terms-from-def term-syn) + (let ((t (syntax-local-value term-syn (lambda () #f)))) + (cond + ((terminals-def? t) (syntax->list (terminals-def-t t))) + ((e-terminals-def? t) (syntax->list (e-terminals-def-t t))) + (else + (raise-syntax-error + 'parser-tokens + "undefined token group" + term-syn))))) + + (define (get-term-list term-group-names) + (remove-duplicates + (cons (datum->syntax-object #f 'error) + (apply append + (map get-terms-from-def term-group-names))))) + + (define (parse-input term-defs start ends prec-decls prods src-pos) + (let* ((start-syms (map syntax-e start)) + + (list-of-terms (map syntax-e (get-term-list term-defs))) + + (end-terms + (map + (lambda (end) + (unless (memq (syntax-e end) list-of-terms) + (raise-syntax-error + 'parser-end-tokens + (format "End token ~a not defined as a token" + (syntax-e end)) + end)) + (syntax-e end)) + ends)) + + ;; Get the list of terminals out of input-terms + + (list-of-non-terms + (syntax-case prods () + (((non-term production ...) ...) + (begin + (for-each + (lambda (nts) + (if (memq (syntax-object->datum nts) list-of-terms) + (raise-syntax-error + 'parser-non-terminals + (format "~a used as both token and non-terminal" + (syntax-object->datum nts)) + nts))) + (syntax->list (syntax (non-term ...)))) + + (let ((dup (duplicate-list? (syntax-object->datum + (syntax (non-term ...)))))) + (if dup + (raise-syntax-error + 'parser-non-terminals + (format "non-terminal ~a defined multiple times" + dup) + prods))) + + (syntax-object->datum (syntax (non-term ...))))) + (_ + (raise-syntax-error + 'parser-grammar + "Grammar must be of the form (grammar (non-terminal productions ...) ...)" + prods)))) + + ;; Check the precedence declarations for errors and turn them into data + (precs + (syntax-case prec-decls () + (((type term ...) ...) + (let ((p-terms + (syntax-object->datum (syntax (term ... ...))))) + (cond + ((duplicate-list? p-terms) => + (lambda (d) + (raise-syntax-error + 'parser-precedences + (format "duplicate precedence declaration for token ~a" + d) + prec-decls))) + (else + (for-each + (lambda (a) + (for-each + (lambda (t) + (if (not (memq (syntax-object->datum t) + list-of-terms)) + (raise-syntax-error + 'parser-precedences + (format + "Precedence declared for non-token ~a" + (syntax-object->datum t)) + t))) + (syntax->list a))) + (syntax->list (syntax ((term ...) ...)))) + (for-each + (lambda (type) + (if (not (memq (syntax-object->datum type) + `(left right nonassoc))) + (raise-syntax-error + 'parser-precedences + "Associativity must be left, right or nonassoc" + type))) + (syntax->list (syntax (type ...)))) + (syntax-object->datum prec-decls))))) + (#f null) + (_ + (raise-syntax-error + 'parser-precedences + "Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc" + prec-decls)))) + + (terms (build-terms list-of-terms precs)) + + (non-terms (map (lambda (non-term) (make-non-term non-term #f)) + list-of-non-terms)) + (term-table (make-hash-table)) + (non-term-table (make-hash-table))) + + (for-each (lambda (t) + (hash-table-put! term-table (gram-sym-symbol t) t)) + terms) + + (for-each (lambda (nt) + (hash-table-put! non-term-table (gram-sym-symbol nt) nt)) + non-terms) + + (let* ( + ;; parse-prod: syntax-object -> gram-sym vector + (parse-prod + (lambda (prod-so) + (syntax-case prod-so () + ((prod-rhs-sym ...) + (andmap identifier? (syntax->list prod-so)) + (begin + (for-each (lambda (t) + (if (memq (syntax-object->datum t) end-terms) + (raise-syntax-error + 'parser-production-rhs + (format "~a is an end token and cannot be used in a production" + (syntax-object->datum t)) + t))) + (syntax->list prod-so)) + (list->vector + (map (lambda (s) + (hash-table-get + term-table + (syntax-object->datum s) + (lambda () + (hash-table-get + non-term-table + (syntax-object->datum s) + (lambda () + (raise-syntax-error + 'parser-production-rhs + (format + "~a is not declared as a terminal or non-terminal" + (syntax-object->datum s)) + s)))))) + (syntax->list prod-so))))) + (_ + (raise-syntax-error + 'parser-production-rhs + "production right-hand-side must have form (symbol ...)" + prod-so))))) + + ;; parse-action: syntax-object * syntax-object -> syntax-object + (parse-action + (lambda (rhs act) + (let-values ([(args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs)]) + (let ([act + (if biggest + (with-syntax ([$n-start-pos (datum->syntax-object (car biggest) '$n-start-pos)] + [$n-end-pos (datum->syntax-object (cdr biggest) '$n-end-pos)]) + #`(let ([$n-start-pos #,(car biggest)] + [$n-end-pos #,(cdr biggest)]) + #,act)) + act)]) + (quasisyntax/loc act + (lambda #,args + #,act)))))) + + ;; parse-prod+action: non-term * syntax-object -> production + (parse-prod+action + (lambda (nt prod-so) + (syntax-case prod-so () + ((prod-rhs action) + (let ((p (parse-prod (syntax prod-rhs)))) + (make-prod + nt + p + #f + (let loop ((i (sub1 (vector-length p)))) + (if (>= i 0) + (let ((gs (vector-ref p i))) + (if (term? gs) + (term-prec gs) + (loop (sub1 i)))) + #f)) + (parse-action (syntax prod-rhs) (syntax action))))) + ((prod-rhs (prec term) action) + (identifier? (syntax term)) + (let ((p (parse-prod (syntax prod-rhs)))) + (make-prod + nt + p + #f + (term-prec + (hash-table-get + term-table + (syntax-object->datum (syntax term)) + (lambda () + (raise-syntax-error + 'parser-production-rhs + (format + "unrecognized terminal ~a in precedence declaration" + (syntax-object->datum (syntax term))) + (syntax term))))) + (parse-action (syntax prod-rhs) (syntax action))))) + (_ + (raise-syntax-error + 'parser-production-rhs + "production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]" + prod-so))))) + + ;; parse-prod-for-nt: syntax-object -> production list + (parse-prods-for-nt + (lambda (prods-so) + (syntax-case prods-so () + ((nt productions ...) + (> (length (syntax->list (syntax (productions ...)))) 0) + (let ((nt (hash-table-get non-term-table + (syntax-object->datum (syntax nt))))) + (map (lambda (p) (parse-prod+action nt p)) + (syntax->list (syntax (productions ...)))))) + (_ + (raise-syntax-error + 'parser-productions + "A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side" + prods-so)))))) + + (for-each + (lambda (sstx ssym) + (unless (memq ssym list-of-non-terms) + (raise-syntax-error + 'parser-start + (format "Start symbol ~a not defined as a non-terminal" ssym) + sstx))) + start start-syms) + + (let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) + (end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) + (parsed-prods (map parse-prods-for-nt (syntax->list prods))) + (start-prods + (map (lambda (start end-non-term) + (list (make-prod start (vector end-non-term) #f #f + (syntax (lambda (x) x))))) + starts end-non-terms)) + (prods + `(,@start-prods + ,@(map + (lambda (end-nt start-sym) + (map + (lambda (end) + (make-prod end-nt + (vector + (hash-table-get non-term-table start-sym) + (hash-table-get term-table end)) + #f + #f + (syntax (lambda (x) x)))) + end-terms)) + end-non-terms start-syms) + ,@parsed-prods))) + + (make-object grammar% + prods + (map car start-prods) + terms + (append starts (append end-non-terms non-terms)) + (map (lambda (term-name) + (hash-table-get term-table term-name)) + end-terms))))))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt new file mode 100644 index 0000000..e9b4d3b --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt @@ -0,0 +1,277 @@ +(module lalr mzscheme + + ;; Compute LALR lookaheads from DeRemer and Pennello 1982 + + (require "lr0.rkt" + "grammar.rkt" + mzlib/list + mzlib/class) + + (provide compute-LA) + + ;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set) + ;; computes for each state, non-term transition pair, the terminals + ;; which can transition out of the resulting state + ;; output term set is represented in bit-vector form + (define (compute-DR a g) + (lambda (tk) + (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) + (term-list->bit-vector + (filter + (lambda (term) + (send a run-automaton r term)) + (send g get-terms)))))) + + ;; compute-reads: + ;; LR0-automaton * grammar -> (trans-key -> trans-key list) + (define (compute-reads a g) + (let ((nullable-non-terms + (filter (lambda (nt) (send g nullable-non-term? nt)) + (send g get-non-terms)))) + (lambda (tk) + (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) + (map (lambda (x) (make-trans-key r x)) + (filter (lambda (non-term) (send a run-automaton r non-term)) + nullable-non-terms)))))) + + ;; compute-read: LR0-automaton * grammar -> (trans-key -> term set) + ;; output term set is represented in bit-vector form + (define (compute-read a g) + (let* ((dr (compute-DR a g)) + (reads (compute-reads a g))) + (digraph-tk->terml (send a get-mapped-non-term-keys) + reads + dr + (send a get-num-states)))) + ;; returns the list of all k such that state k transitions to state start on the + ;; transitions in rhs (in order) + (define (run-lr0-backward a rhs dot-pos start num-states) + (let loop ((states (list start)) + (i (sub1 dot-pos))) + (cond + ((< i 0) states) + (else (loop (send a run-automaton-back states (vector-ref rhs i)) + (sub1 i)))))) + + ;; prod->items-for-include: grammar * prod * non-term -> lr0-item list + ;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma) + ;; and gamma =>* epsilon + (define (prod->items-for-include g prod nt) + (let* ((rhs (prod-rhs prod)) + (rhs-l (vector-length rhs))) + (append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l)))) + (list (make-item prod (sub1 rhs-l))) + null) + (let loop ((i (sub1 rhs-l))) + (cond + ((and (> i 0) + (non-term? (vector-ref rhs i)) + (send g nullable-non-term? (vector-ref rhs i))) + (if (eq? nt (vector-ref rhs (sub1 i))) + (cons (make-item prod (sub1 i)) + (loop (sub1 i))) + (loop (sub1 i)))) + (else null)))))) + + ;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list + ;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list + ;; and gamma =>* epsilon + (define (prod-list->items-for-include g prod-list nt) + (apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list))) + + ;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list) + (define (compute-includes a g) + (let ((num-states (send a get-num-states)) + (items-for-input-nt (make-vector (send g get-num-non-terms) null))) + (for-each + (lambda (input-nt) + (vector-set! items-for-input-nt (non-term-index input-nt) + (prod-list->items-for-include g (send g get-prods) input-nt))) + (send g get-non-terms)) + (lambda (tk) + (let* ((goal-state (trans-key-st tk)) + (non-term (trans-key-gs tk)) + (items (vector-ref items-for-input-nt (non-term-index non-term)))) + (trans-key-list-remove-dups + (apply append + (map (lambda (item) + (let* ((prod (item-prod item)) + (rhs (prod-rhs prod)) + (lhs (prod-lhs prod))) + (map (lambda (state) + (make-trans-key state lhs)) + (run-lr0-backward a + rhs + (item-dot-pos item) + goal-state + num-states)))) + items))))))) + + ;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list) + (define (compute-lookback a g) + (let ((num-states (send a get-num-states))) + (lambda (state prod) + (map (lambda (k) (make-trans-key k (prod-lhs prod))) + (run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states))))) + + ;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set) + ;; output term set is represented in bit-vector form + (define (compute-follow a g includes) + (let ((read (compute-read a g))) + (digraph-tk->terml (send a get-mapped-non-term-keys) + includes + read + (send a get-num-states)))) + + ;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set + ;; output term set is represented in bit-vector form + (define (compute-LA a g) + (let* ((includes (compute-includes a g)) + (lookback (compute-lookback a g)) + (follow (compute-follow a g includes))) + (lambda (k p) + (let* ((l (lookback k p)) + (f (map follow l))) + (apply bitwise-ior (cons 0 f)))))) + + (define (print-DR dr a g) + (print-input-st-sym dr "DR" a g print-output-terms)) + (define (print-Read Read a g) + (print-input-st-sym Read "Read" a g print-output-terms)) + (define (print-includes i a g) + (print-input-st-sym i "includes" a g print-output-st-nt)) + (define (print-lookback l a g) + (print-input-st-prod l "lookback" a g print-output-st-nt)) + (define (print-follow f a g) + (print-input-st-sym f "follow" a g print-output-terms)) + (define (print-LA l a g) + (print-input-st-prod l "LA" a g print-output-terms)) + + (define (print-input-st-sym f name a g print-output) + (printf "~a:\n" name) + (send a for-each-state + (lambda (state) + (for-each + (lambda (non-term) + (let ((res (f (make-trans-key state non-term)))) + (if (not (null? res)) + (printf "~a(~a, ~a) = ~a\n" + name + state + (gram-sym-symbol non-term) + (print-output res))))) + (send g get-non-terms)))) + (newline)) + + (define (print-input-st-prod f name a g print-output) + (printf "~a:\n" name) + (send a for-each-state + (lambda (state) + (for-each + (lambda (non-term) + (for-each + (lambda (prod) + (let ((res (f state prod))) + (if (not (null? res)) + (printf "~a(~a, ~a) = ~a\n" + name + (kernel-index state) + (prod-index prod) + (print-output res))))) + (send g get-prods-for-non-term non-term))) + (send g get-non-terms))))) + + (define (print-output-terms r) + (map + (lambda (p) + (gram-sym-symbol p)) + r)) + + (define (print-output-st-nt r) + (map + (lambda (p) + (list + (kernel-index (trans-key-st p)) + (gram-sym-symbol (trans-key-gs p)))) + r)) + + ;; init-tk-map : int -> (vectorof hashtable?) + (define (init-tk-map n) + (let ((v (make-vector n #f))) + (let loop ((i (sub1 (vector-length v)))) + (when (>= i 0) + (vector-set! v i (make-hash-table)) + (loop (sub1 i)))) + v)) + + ;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int + (define (lookup-tk-map map) + (lambda (tk) + (let ((st (trans-key-st tk)) + (gs (trans-key-gs tk))) + (hash-table-get (vector-ref map (kernel-index st)) + (gram-sym-symbol gs) + (lambda () 0))))) + + ;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int -> + (define (add-tk-map map) + (lambda (tk v) + (let ((st (trans-key-st tk)) + (gs (trans-key-gs tk))) + (hash-table-put! (vector-ref map (kernel-index st)) + (gram-sym-symbol gs) + v)))) + + ;; digraph-tk->terml: + ;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int + ;; -> (trans-key -> term list) + ;; DeRemer and Pennello 1982 + ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} + ;; A specialization of digraph in the file graph.rkt + (define (digraph-tk->terml nodes edges f- num-states) + (letrec [ + ;; Will map elements of trans-key to term sets represented as bit vectors + (results (init-tk-map num-states)) + + ;; Maps elements of trans-keys to integers. + (N (init-tk-map num-states)) + + (get-N (lookup-tk-map N)) + (set-N (add-tk-map N)) + (get-f (lookup-tk-map results)) + (set-f (add-tk-map results)) + + (stack null) + (push (lambda (x) + (set! stack (cons x stack)))) + (pop (lambda () + (begin0 + (car stack) + (set! stack (cdr stack))))) + (depth (lambda () (length stack))) + + ;; traverse: 'a -> + (traverse + (lambda (x) + (push x) + (let ((d (depth))) + (set-N x d) + (set-f x (f- x)) + (for-each (lambda (y) + (when (= 0 (get-N y)) + (traverse y)) + (set-f x (bitwise-ior (get-f x) (get-f y))) + (set-N x (min (get-N x) (get-N y)))) + (edges x)) + (when (= d (get-N x)) + (let loop ((p (pop))) + (set-N p +inf.0) + (set-f p (get-f x)) + (unless (equal? x p) + (loop (pop))))))))] + (for-each (lambda (x) + (when (= 0 (get-N x)) + (traverse x))) + nodes) + get-f)) +) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt new file mode 100644 index 0000000..f237735 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt @@ -0,0 +1,372 @@ +(module lr0 mzscheme + + ;; Handle the LR0 automaton + + (require "grammar.rkt" + "graph.rkt" + mzlib/list + mzlib/class) + + (provide build-lr0-automaton lr0% + (struct trans-key (st gs)) trans-key-list-remove-dups + kernel-items kernel-index) + + ;; kernel = (make-kernel (LR1-item list) index) + ;; the list must be kept sorted according to item + ;; (vectorof (symbol X hashtable)) + (define (build-transition-table num-states assoc) + (let ((transitions (make-vector num-states #f))) + (let loop ((i (sub1 (vector-length transitions)))) + (when (>= i 0) + (vector-set! transitions i (make-hash-table)) + (loop (sub1 i)))) + (for-each + (lambda (trans-key/kernel) + (let ((tk (car trans-key/kernel))) + (hash-table-put! (vector-ref transitions (kernel-index (trans-key-st tk))) + (gram-sym-symbol (trans-key-gs tk)) + (cdr trans-key/kernel)))) + assoc) + transitions)) + + ;; reverse-assoc : (listof (cons/c trans-key? kernel?)) -> + ;; (listof (cons/c trans-key? (listof kernel?))) + (define (reverse-assoc assoc) + (let ((reverse-hash (make-hash-table 'equal)) + (hash-table-add! + (lambda (ht k v) + (hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null))))))) + (for-each + (lambda (trans-key/kernel) + (let ((tk (car trans-key/kernel))) + (hash-table-add! reverse-hash + (make-trans-key (cdr trans-key/kernel) + (trans-key-gs tk)) + (trans-key-st tk)))) + assoc) + (hash-table-map reverse-hash cons))) + + + ;; kernel-list-remove-duplicates + ;; LR0-automaton = object of class lr0% + (define lr0% + (class object% + (super-instantiate ()) + ;; term-assoc : (listof (cons/c trans-key? kernel?)) + ;; non-term-assoc : (listof (cons/c trans-key? kernel?)) + ;; states : (vectorof kernel?) + ;; epsilons : ??? + (init-field term-assoc non-term-assoc states epsilons) + + (define transitions (build-transition-table (vector-length states) + (append term-assoc non-term-assoc))) + + (define reverse-term-assoc (reverse-assoc term-assoc)) + (define reverse-non-term-assoc (reverse-assoc non-term-assoc)) + (define reverse-transitions + (build-transition-table (vector-length states) + (append reverse-term-assoc reverse-non-term-assoc))) + + (define mapped-non-terms (map car non-term-assoc)) + + (define/public (get-mapped-non-term-keys) + mapped-non-terms) + + (define/public (get-num-states) + (vector-length states)) + + (define/public (get-epsilon-trans) + epsilons) + + (define/public (get-transitions) + (append term-assoc non-term-assoc)) + + ;; for-each-state : (state ->) -> + ;; Iteration over the states in an automaton + (define/public (for-each-state f) + (let ((num-states (vector-length states))) + (let loop ((i 0)) + (if (< i num-states) + (begin + (f (vector-ref states i)) + (loop (add1 i))))))) + + ;; run-automaton: kernel? gram-sym? -> (union kernel #f) + ;; returns the state reached from state k on input s, or #f when k + ;; has no transition on s + (define/public (run-automaton k s) + (hash-table-get (vector-ref transitions (kernel-index k)) + (gram-sym-symbol s) + (lambda () #f))) + + ;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel) + ;; returns the list of states that can reach k by transitioning on s. + (define/public (run-automaton-back k s) + (apply append + (map + (lambda (k) + (hash-table-get (vector-ref reverse-transitions (kernel-index k)) + (gram-sym-symbol s) + (lambda () null))) + k))))) + + (define (union comp (eq? a b) + (define (kernel->string k) + (apply string-append + `("{" ,@(map (lambda (i) (string-append (item->string i) ", ")) + (kernel-items k)) + "}"))) + + ;; build-LR0-automaton: grammar -> LR0-automaton + ;; Constructs the kernels of the sets of LR(0) items of g + (define (build-lr0-automaton grammar) +; (printf "LR(0) automaton:\n") + (letrec ( + (epsilons (make-hash-table 'equal)) + (grammar-symbols (append (send grammar get-non-terms) + (send grammar get-terms))) + ;; first-non-term: non-term -> non-term list + ;; given a non-terminal symbol C, return those non-terminal + ;; symbols A s.t. C -> An for some string of terminals and + ;; non-terminals n where -> means a rightmost derivation in many + ;; steps. Assumes that each non-term can be reduced to a string + ;; of terms. + (first-non-term + (digraph (send grammar get-non-terms) + (lambda (nt) + (filter non-term? + (map (lambda (prod) + (sym-at-dot (make-item prod 0))) + (send grammar get-prods-for-non-term nt)))) + (lambda (nt) (list nt)) + (union non-term LR1-item list + ;; Creates a set of items containing i s.t. if A -> n.Xm is in it, + ;; X -> .o is in it too. + (LR0-closure + (lambda (i) + (cond + ((null? i) null) + (else + (let ((next-gsym (sym-at-dot (car i)))) + (cond + ((non-term? next-gsym) + (cons (car i) + (append + (apply append + (map (lambda (non-term) + (map (lambda (x) + (make-item x 0)) + (send grammar + get-prods-for-non-term + non-term))) + (first-non-term next-gsym))) + (LR0-closure (cdr i))))) + (else + (cons (car i) (LR0-closure (cdr i)))))))))) + + + ;; maps trans-keys to kernels + (automaton-term null) + (automaton-non-term null) + + ;; keeps the kernels we have seen, so we can have a unique + ;; list for each kernel + (kernels (make-hash-table 'equal)) + + (counter 0) + + ;; goto: LR1-item list -> LR1-item list list + ;; creates new kernels by moving the dot in each item in the + ;; LR0-closure of kernel to the right, and grouping them by + ;; the term/non-term moved over. Returns the kernels not + ;; yet seen, and places the trans-keys into automaton + (goto + (lambda (kernel) + (let ( + ;; maps a gram-syms to a list of items + (table (make-hash-table)) + + ;; add-item!: + ;; (symbol (listof item) hashtable) item? -> + ;; adds i into the table grouped with the grammar + ;; symbol following its dot + (add-item! + (lambda (table i) + (let ((gs (sym-at-dot i))) + (cond + (gs + (let ((already + (hash-table-get table + (gram-sym-symbol gs) + (lambda () null)))) + (unless (member i already) + (hash-table-put! table + (gram-sym-symbol gs) + (cons i already))))) + ((= 0 (vector-length (prod-rhs (item-prod i)))) + (let ((current (hash-table-get epsilons + kernel + (lambda () null)))) + (hash-table-put! epsilons + kernel + (cons i current))))))))) + + ;; Group the items of the LR0 closure of the kernel + ;; by the character after the dot + (for-each (lambda (item) + (add-item! table item)) + (LR0-closure (kernel-items kernel))) + + ;; each group is a new kernel, with the dot advanced. + ;; sorts the items in a kernel so kernels can be compared + ;; with equal? for using the table kernels to make sure + ;; only one representitive of each kernel is created + (filter + (lambda (x) x) + (map + (lambda (i) + (let* ((gs (car i)) + (items (cadr i)) + (new #f) + (new-kernel (sort + (filter (lambda (x) x) + (map move-dot-right items)) + item ~a on ~a\n" + (kernel->string kernel) + (kernel->string unique-kernel) + (gram-sym-symbol gs)) + (if new + unique-kernel + #f))) + (let loop ((gsyms grammar-symbols)) + (cond + ((null? gsyms) null) + (else + (let ((items (hash-table-get table + (gram-sym-symbol (car gsyms)) + (lambda () null)))) + (cond + ((null? items) (loop (cdr gsyms))) + (else + (cons (list (car gsyms) items) + (loop (cdr gsyms)))))))))))))) + + (starts + (map (lambda (init-prod) (list (make-item init-prod 0))) + (send grammar get-init-prods))) + (startk + (map (lambda (start) + (let ((k (make-kernel start counter))) + (hash-table-put! kernels start k) + (set! counter (add1 counter)) + k)) + starts)) + (new-kernels (make-queue))) + + (let loop ((old-kernels startk) + (seen-kernels null)) + (cond + ((and (empty-queue? new-kernels) (null? old-kernels)) + (make-object lr0% + automaton-term + automaton-non-term + (list->vector (reverse seen-kernels)) + epsilons)) + ((null? old-kernels) + (loop (deq! new-kernels) seen-kernels)) + (else + (enq! new-kernels (goto (car old-kernels))) + (loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))))))) + + (define-struct q (f l) (make-inspector)) + (define (empty-queue? q) + (null? (q-f q))) + (define (make-queue) + (make-q null null)) + (define (enq! q i) + (if (empty-queue? q) + (let ((i (mcons i null))) + (set-q-l! q i) + (set-q-f! q i)) + (begin + (set-mcdr! (q-l q) (mcons i null)) + (set-q-l! q (mcdr (q-l q)))))) + (define (deq! q) + (begin0 + (mcar (q-f q)) + (set-q-f! q (mcdr (q-f q))))) + +) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt new file mode 100644 index 0000000..2a39b36 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt @@ -0,0 +1,54 @@ +(module parser-actions mzscheme + (require "grammar.rkt") + (provide (all-defined-except make-reduce make-reduce*) + (rename make-reduce* make-reduce)) + + ;; An action is + ;; - (make-shift int) + ;; - (make-reduce prod runtime-action) + ;; - (make-accept) + ;; - (make-goto int) + ;; - (no-action) + ;; A reduce contains a runtime-reduce so that sharing of the reduces can + ;; be easily transferred to sharing of runtime-reduces. + + (define-struct action () (make-inspector)) + (define-struct (shift action) (state) (make-inspector)) + (define-struct (reduce action) (prod runtime-reduce) (make-inspector)) + (define-struct (accept action) () (make-inspector)) + (define-struct (goto action) (state) (make-inspector)) + (define-struct (no-action action) () (make-inspector)) + + (define (make-reduce* p) + (make-reduce p + (vector (prod-index p) + (gram-sym-symbol (prod-lhs p)) + (vector-length (prod-rhs p))))) + + ;; A runtime-action is + ;; non-negative-int (shift) + ;; (vector int symbol int) (reduce) + ;; 'accept (accept) + ;; negative-int (goto) + ;; #f (no-action) + + (define (action->runtime-action a) + (cond + ((shift? a) (shift-state a)) + ((reduce? a) (reduce-runtime-reduce a)) + ((accept? a) 'accept) + ((goto? a) (- (+ (goto-state a) 1))) + ((no-action? a) #f))) + + (define (runtime-shift? x) (and (integer? x) (>= x 0))) + (define runtime-reduce? vector?) + (define (runtime-accept? x) (eq? x 'accept)) + (define (runtime-goto? x) (and (integer? x) (< x 0))) + + (define runtime-shift-state values) + (define (runtime-reduce-prod-num x) (vector-ref x 0)) + (define (runtime-reduce-lhs x) (vector-ref x 1)) + (define (runtime-reduce-rhs-length x) (vector-ref x 2)) + (define (runtime-goto-state x) (- (+ x 1))) + + ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt new file mode 100644 index 0000000..1be421c --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt @@ -0,0 +1,113 @@ +(module parser-builder mzscheme + + (require "input-file-parser.rkt" + "grammar.rkt" + "table.rkt" + mzlib/class + racket/contract) + (require-for-template mzscheme) + + (provide/contract + (build-parser (-> string? any/c any/c + (listof identifier?) + (listof identifier?) + (listof identifier?) + (or/c syntax? #f) + syntax? + (values any/c any/c any/c any/c)))) + + ;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?) + ;; (union syntax? false/c) syntax?) -> syntax? + (define (fix-check-syntax input-terms start ends assocs prods) + (let* ((term-binders (get-term-list input-terms)) + (get-term-binder + (let ((t (make-hash-table))) + (for-each + (lambda (term) + (hash-table-put! t (syntax-e term) term)) + term-binders) + (lambda (x) + (let ((r (hash-table-get t (syntax-e x) (lambda () #f)))) + (if r + (syntax-local-introduce (datum->syntax-object r (syntax-e x) x x)) + x))))) + (rhs-list + (syntax-case prods () + (((_ rhs ...) ...) + (syntax->list (syntax (rhs ... ...))))))) + (with-syntax (((tmp ...) (map syntax-local-introduce term-binders)) + ((term-group ...) + (map (lambda (tg) + (syntax-property + (datum->syntax-object tg #f) + 'disappeared-use + tg)) + input-terms)) + ((end ...) + (map get-term-binder ends)) + ((start ...) + (map get-term-binder start)) + ((bind ...) + (syntax-case prods () + (((bind _ ...) ...) + (syntax->list (syntax (bind ...)))))) + (((bound ...) ...) + (map + (lambda (rhs) + (syntax-case rhs () + (((bound ...) (_ pbound) __) + (map get-term-binder + (cons (syntax pbound) + (syntax->list (syntax (bound ...)))))) + (((bound ...) _) + (map get-term-binder + (syntax->list (syntax (bound ...))))))) + rhs-list)) + ((prec ...) + (if assocs + (map get-term-binder + (syntax-case assocs () + (((__ term ...) ...) + (syntax->list (syntax (term ... ...)))))) + null))) + #`(when #f + (let ((bind void) ... (tmp void) ...) + (void bound ... ... term-group ... start ... end ... prec ...)))))) + (require mzlib/list "parser-actions.rkt") + (define (build-parser filename src-pos suppress input-terms start end assocs prods) + (let* ((grammar (parse-input input-terms start end assocs prods src-pos)) + (table (build-table grammar filename suppress)) + (all-tokens (make-hash-table)) + (actions-code + `(vector ,@(map prod-action (send grammar get-prods))))) + (for-each (lambda (term) + (hash-table-put! all-tokens (gram-sym-symbol term) #t)) + (send grammar get-terms)) + #;(let ((num-states (vector-length table)) + (num-gram-syms (+ (send grammar get-num-terms) + (send grammar get-num-non-terms))) + (num-ht-entries (apply + (map length (vector->list table)))) + (num-reduces + (let ((ht (make-hash-table))) + (for-each + (lambda (x) + (when (reduce? x) + (hash-table-put! ht x #t))) + (map cdr (apply append (vector->list table)))) + (length (hash-table-map ht void))))) + (printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n" + num-states num-gram-syms num-ht-entries num-reduces) + (printf "~a -- ~aKB, previously ~aKB\n" + (/ (+ 2 num-states + (* 4 num-states) (* 2 1.5 num-ht-entries) + (* 5 num-reduces)) 256.0) + (/ (+ 2 num-states + (* 4 num-states) (* 2 2.3 num-ht-entries) + (* 5 num-reduces)) 256.0) + (/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0))) + (values table + all-tokens + actions-code + (fix-check-syntax input-terms start end assocs prods)))) + + ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt new file mode 100644 index 0000000..f97e4d2 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt @@ -0,0 +1,290 @@ +#lang scheme/base + + ;; Routine to build the LALR table + + (require "grammar.rkt" + "lr0.rkt" + "lalr.rkt" + "parser-actions.rkt" + racket/contract + mzlib/list + mzlib/class) + + (define (is-a-grammar%? x) (is-a? x grammar%)) + (provide/contract + (build-table (-> is-a-grammar%? string? any/c + (vectorof (listof (cons/c (or/c term? non-term?) action?)))))) + + ;; A parse-table is (vectorof (listof (cons/c gram-sym? action))) + ;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action)))) + + ;; make-parse-table : int -> parse-table + (define (make-parse-table num-states) + (make-vector num-states null)) + + ;; table-add!: parse-table nat symbol action -> + (define (table-add! table state-index symbol val) + (vector-set! table state-index (cons (cons symbol val) + (vector-ref table state-index)))) + + ;; group-table : parse-table -> grouped-parse-table + (define (group-table table) + (list->vector + (map + (lambda (state-entry) + (let ((ht (make-hash))) + (for-each + (lambda (gs/actions) + (let ((group (hash-ref ht (car gs/actions) (lambda () null)))) + (unless (member (cdr gs/actions) group) + (hash-set! ht (car gs/actions) (cons (cdr gs/actions) group))))) + state-entry) + (hash-map ht cons))) + (vector->list table)))) + + ;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) -> + ;; (vectorof (listof (cons/c gram-sym? Y))) + (define (table-map f table) + (list->vector + (map + (lambda (state-entry) + (map + (lambda (gs/X) + (cons (car gs/X) (f (car gs/X) (cdr gs/X)))) + state-entry)) + (vector->list table)))) + + + (define (bit-vector-for-each f bv) + (letrec ((for-each + (lambda (bv number) + (cond + ((= 0 bv) (void)) + ((= 1 (bitwise-and 1 bv)) + (f number) + (for-each (arithmetic-shift bv -1) (add1 number))) + (else (for-each (arithmetic-shift bv -1) (add1 number))))))) + (for-each bv 0))) + + + ;; print-entry: symbol action output-port -> + ;; prints the action a for lookahead sym to the given port + (define (print-entry sym a port) + (let ((s "\t~a\t\t\t\t\t~a\t~a\n")) + (cond + ((shift? a) + (fprintf port s sym "shift" (shift-state a))) + ((reduce? a) + (fprintf port s sym "reduce" (prod-index (reduce-prod a)))) + ((accept? a) + (fprintf port s sym "accept" "")) + ((goto? a) + (fprintf port s sym "goto" (goto-state a)))))) + + + ;; count: ('a -> bool) * 'a list -> num + ;; counts the number of elements in list that satisfy pred + (define (count pred list) + (cond + ((null? list) 0) + ((pred (car list)) (+ 1 (count pred (cdr list)))) + (else (count pred (cdr list))))) + + ;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port -> + ;; Prints out the parser given by table. + (define (display-parser a grouped-table prods port) + (let* ((SR-conflicts 0) + (RR-conflicts 0)) + (for-each + (lambda (prod) + (fprintf port + "~a\t~a\t=\t~a\n" + (prod-index prod) + (gram-sym-symbol (prod-lhs prod)) + (map gram-sym-symbol (vector->list (prod-rhs prod))))) + prods) + (send a for-each-state + (lambda (state) + (fprintf port "State ~a\n" (kernel-index state)) + (for-each (lambda (item) + (fprintf port "\t~a\n" (item->string item))) + (kernel-items state)) + (newline port) + (for-each + (lambda (gs/action) + (let ((sym (gram-sym-symbol (car gs/action))) + (act (cdr gs/action))) + (cond + ((null? act) (void)) + ((null? (cdr act)) + (print-entry sym (car act) port)) + (else + (fprintf port "begin conflict:\n") + (when (> (count reduce? act) 1) + (set! RR-conflicts (add1 RR-conflicts))) + (when (> (count shift? act) 0) + (set! SR-conflicts (add1 SR-conflicts))) + (map (lambda (x) (print-entry sym x port)) act) + (fprintf port "end conflict\n"))))) + (vector-ref grouped-table (kernel-index state))) + (newline port))) + + (when (> SR-conflicts 0) + (fprintf port "~a shift/reduce conflict~a\n" + SR-conflicts + (if (= SR-conflicts 1) "" "s"))) + (when (> RR-conflicts 0) + (fprintf port "~a reduce/reduce conflict~a\n" + RR-conflicts + (if (= RR-conflicts 1) "" "s"))))) + + ;; resolve-conflict : (listof action?) -> action? bool bool + (define (resolve-conflict actions) + (cond + ((null? actions) (values (make-no-action) #f #f)) + ((null? (cdr actions)) + (values (car actions) #f #f)) + (else + (let ((SR-conflict? (> (count shift? actions) 0)) + (RR-conflict? (> (count reduce? actions) 1))) + (let loop ((current-guess #f) + (rest actions)) + (cond + ((null? rest) (values current-guess SR-conflict? RR-conflict?)) + ((shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?)) + ((not current-guess) + (loop (car rest) (cdr rest))) + ((and (reduce? (car rest)) + (< (prod-index (reduce-prod (car rest))) + (prod-index (reduce-prod current-guess)))) + (loop (car rest) (cdr rest))) + ((accept? (car rest)) + (eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n") + (loop current-guess (cdr rest))) + (else (loop current-guess (cdr rest))))))))) + + ;; resolve-conflicts : grouped-parse-table bool -> parse-table + (define (resolve-conflicts grouped-table suppress) + (let* ((SR-conflicts 0) + (RR-conflicts 0) + (table (table-map + (lambda (gs actions) + (let-values (((action SR? RR?) + (resolve-conflict actions))) + (when SR? + (set! SR-conflicts (add1 SR-conflicts))) + (when RR? + (set! RR-conflicts (add1 RR-conflicts))) + action)) + grouped-table))) + (unless suppress + (when (> SR-conflicts 0) + (eprintf "~a shift/reduce conflict~a\n" + SR-conflicts + (if (= SR-conflicts 1) "" "s"))) + (when (> RR-conflicts 0) + (eprintf "~a reduce/reduce conflict~a\n" + RR-conflicts + (if (= RR-conflicts 1) "" "s")))) + table)) + + + ;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action) + ;; Resolves a single shift-reduce conflict, if precedences are in place. + (define (resolve-sr-conflict/prec actions shift-prec) + (let* ((shift (if (shift? (car actions)) + (car actions) + (cadr actions))) + (reduce (if (shift? (car actions)) + (cadr actions) + (car actions))) + (reduce-prec (prod-prec (reduce-prod reduce)))) + (cond + ((and shift-prec reduce-prec) + (cond + ((< (prec-num shift-prec) (prec-num reduce-prec)) + (list reduce)) + ((> (prec-num shift-prec) (prec-num reduce-prec)) + (list shift)) + ((eq? 'left (prec-assoc shift-prec)) + (list reduce)) + ((eq? 'right (prec-assoc shift-prec)) + (list shift)) + (else null))) + (else actions)))) + + + ;; resolve-prec-conflicts : parse-table -> grouped-parse-table + (define (resolve-prec-conflicts table) + (table-map + (lambda (gs actions) + (cond + ((and (term? gs) + (= 2 (length actions)) + (or (shift? (car actions)) + (shift? (cadr actions)))) + (resolve-sr-conflict/prec actions (term-prec gs))) + (else actions))) + (group-table table))) + + ;; build-table: grammar string bool -> parse-table + (define (build-table g file suppress) + (let* ((a (build-lr0-automaton g)) + (term-vector (list->vector (send g get-terms))) + (end-terms (send g get-end-terms)) + (table (make-parse-table (send a get-num-states))) + (get-lookahead (compute-LA a g)) + (reduce-cache (make-hash))) + + (for-each + (lambda (trans-key/state) + (let ((from-state-index (kernel-index (trans-key-st (car trans-key/state)))) + (gs (trans-key-gs (car trans-key/state))) + (to-state (cdr trans-key/state))) + (table-add! table from-state-index gs + (cond + ((non-term? gs) + (make-goto (kernel-index to-state))) + ((member gs end-terms) + (make-accept)) + (else + (make-shift + (kernel-index to-state))))))) + (send a get-transitions)) + + (send a for-each-state + (lambda (state) + (for-each + (lambda (item) + (let ((item-prod (item-prod item))) + (bit-vector-for-each + (lambda (term-index) + (unless (start-item? item) + (let ((r (hash-ref reduce-cache item-prod + (lambda () + (let ((r (make-reduce item-prod))) + (hash-set! reduce-cache item-prod r) + r))))) + (table-add! table + (kernel-index state) + (vector-ref term-vector term-index) + r)))) + (get-lookahead state item-prod)))) + (append (hash-ref (send a get-epsilon-trans) state (lambda () null)) + (filter (lambda (item) + (not (move-dot-right item))) + (kernel-items state)))))) + + (let ((grouped-table (resolve-prec-conflicts table))) + (unless (string=? file "") + (with-handlers [(exn:fail:filesystem? + (lambda (e) + (eprintf + "Cannot write debug output to file \"~a\": ~a\n" + file + (exn-message e))))] + (call-with-output-file file + (lambda (port) + (display-parser a grouped-table (send g get-prods) port)) + #:exists 'truncate))) + (resolve-conflicts grouped-table suppress)))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt new file mode 100644 index 0000000..31b3cc6 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt @@ -0,0 +1,118 @@ +(module yacc-helper mzscheme + + (require mzlib/list + "../private-lex/token-syntax.rkt") + + ;; General helper routines + + (provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc) + + (define (vector-andmap f v) + (let loop ((i 0)) + (cond + ((= i (vector-length v)) #t) + (else (if (f (vector-ref v i)) + (loop (add1 i)) + #f))))) + + ;; duplicate-list?: symbol list -> #f | symbol + ;; returns a symbol that exists twice in l, or false if no such symbol + ;; exists + (define (duplicate-list? l) + (letrec ((t (make-hash-table)) + (dl? (lambda (l) + (cond + ((null? l) #f) + ((hash-table-get t (car l) (lambda () #f)) => + (lambda (x) x)) + (else + (hash-table-put! t (car l) (car l)) + (dl? (cdr l))))))) + (dl? l))) + + ;; remove-duplicates: syntax-object list -> syntax-object list + ;; removes the duplicates from the lists + (define (remove-duplicates sl) + (let ((t (make-hash-table))) + (letrec ((x + (lambda (sl) + (cond + ((null? sl) sl) + ((hash-table-get t (syntax-object->datum (car sl)) (lambda () #f)) + (x (cdr sl))) + (else + (hash-table-put! t (syntax-object->datum (car sl)) #t) + (cons (car sl) (x (cdr sl)))))))) + (x sl)))) + + ;; overlap?: symbol list * symbol list -> #f | symbol + ;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists + (define (overlap? l1 l2) + (let/ec ret + (let ((t (make-hash-table))) + (for-each (lambda (s1) + (hash-table-put! t s1 s1)) + l1) + (for-each (lambda (s2) + (cond + ((hash-table-get t s2 (lambda () #f)) => + (lambda (o) (ret o))))) + l2) + #f))) + + + (define (display-yacc grammar tokens start precs port) + (let-syntax ((p (syntax-rules () + ((_ args ...) (fprintf port args ...))))) + (let* ((tokens (map syntax-local-value tokens)) + (eterms (filter e-terminals-def? tokens)) + (terms (filter terminals-def? tokens)) + (term-table (make-hash-table)) + (display-rhs + (lambda (rhs) + (for-each (lambda (sym) (p "~a " (hash-table-get term-table sym (lambda () sym)))) + (car rhs)) + (if (= 3 (length rhs)) + (p "%prec ~a" (cadadr rhs))) + (p "\n")))) + (for-each + (lambda (t) + (for-each + (lambda (t) + (hash-table-put! term-table t (format "'~a'" t))) + (syntax-object->datum (e-terminals-def-t t)))) + eterms) + (for-each + (lambda (t) + (for-each + (lambda (t) + (p "%token ~a\n" t) + (hash-table-put! term-table t (format "~a" t))) + (syntax-object->datum (terminals-def-t t)))) + terms) + (if precs + (for-each (lambda (prec) + (p "%~a " (car prec)) + (for-each (lambda (tok) + (p " ~a" (hash-table-get term-table tok))) + (cdr prec)) + (p "\n")) + precs)) + (p "%start ~a\n" start) + (p "%%\n") + + (for-each (lambda (prod) + (let ((nt (car prod))) + (p "~a: " nt) + (display-rhs (cadr prod)) + (for-each (lambda (rhs) + (p "| ") + (display-rhs rhs)) + (cddr prod)) + (p ";\n"))) + grammar) + (p "%%\n")))) + + +) + diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt new file mode 100644 index 0000000..7f766eb --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt @@ -0,0 +1,135 @@ +(module yacc-to-scheme mzscheme + (require br-parser-tools/lex + (prefix : br-parser-tools/lex-sre) + br-parser-tools/yacc + syntax/readerr + mzlib/list) + (provide trans) + + (define match-double-string + (lexer + ((:+ (:~ #\" #\\)) (append (string->list lexeme) + (match-double-string input-port))) + ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port))) + (#\" null))) + + (define match-single-string + (lexer + ((:+ (:~ #\' #\\)) (append (string->list lexeme) + (match-single-string input-port))) + ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port))) + (#\' null))) + + (define-lex-abbrevs + (letter (:or (:/ "a" "z") (:/ "A" "Z"))) + (digit (:/ "0" "9")) + (initial (:or letter (char-set "!$%&*/<=>?^_~@"))) + (subsequent (:or initial digit (char-set "+-.@"))) + (comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/"))) + + (define-empty-tokens x + (EOF PIPE |:| SEMI |%%| %prec)) + (define-tokens y + (SYM STRING)) + + (define get-token-grammar + (lexer-src-pos + ("%%" '|%%|) + (":" (string->symbol lexeme)) + ("%prec" (string->symbol lexeme)) + (#\| 'PIPE) + ((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}"))) + (return-without-pos (get-token-grammar input-port))) + (#\; 'SEMI) + (#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))) + (#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))) + ((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme))))) + + (define (parse-grammar enter-term enter-empty-term enter-non-term) + (parser + (tokens x y) + (src-pos) + (error (lambda (tok-ok tok-name tok-value start-pos end-pos) + (raise-read-error + (format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value) + (file-path) + (position-line start-pos) + (position-col start-pos) + (position-offset start-pos) + (- (position-offset end-pos) (position-offset start-pos))))) + + (end |%%|) + (start gram) + (grammar + (gram + ((production) (list $1)) + ((production gram) (cons $1 $2))) + (production + ((SYM |:| prods SEMI) + (begin + (enter-non-term $1) + (cons $1 $3)))) + (prods + ((rhs) (list `(,$1 #f))) + ((rhs prec) (list `(,$1 ,$2 #f))) + ((rhs PIPE prods) (cons `(,$1 #f) $3)) + ((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4))) + (prec + ((%prec SYM) + (begin + (enter-term $2) + (list 'prec $2))) + ((%prec STRING) + (begin + (enter-empty-term $2) + (list 'prec $2)))) + (rhs + (() null) + ((SYM rhs) + (begin + (enter-term $1) + (cons $1 $2))) + ((STRING rhs) + (begin + (enter-empty-term $1) + (cons $1 $2))))))) + + (define (symbolstring a) (symbol->string b))) + + (define (trans filename) + (let* ((i (open-input-file filename)) + (terms (make-hash-table)) + (eterms (make-hash-table)) + (nterms (make-hash-table)) + (enter-term + (lambda (s) + (if (not (hash-table-get nterms s (lambda () #f))) + (hash-table-put! terms s #t)))) + (enter-empty-term + (lambda (s) + (if (not (hash-table-get nterms s (lambda () #f))) + (hash-table-put! eterms s #t)))) + (enter-non-term + (lambda (s) + (hash-table-remove! terms s) + (hash-table-remove! eterms s) + (hash-table-put! nterms s #t)))) + (port-count-lines! i) + (file-path filename) + (regexp-match "%%" i) + (begin0 + (let ((gram ((parse-grammar enter-term enter-empty-term enter-non-term) + (lambda () + (let ((t (get-token-grammar i))) + t))))) + `(begin + (define-tokens t ,(sort (hash-table-map terms (lambda (k v) k)) symbol +;; (vectorof (symbol runtime-action hashtable)) +(define-for-syntax (convert-parse-table table) + (list->vector + (map + (lambda (state-entry) + (let ((ht (make-hasheq))) + (for-each + (lambda (gs/action) + (hash-set! ht + (gram-sym-symbol (car gs/action)) + (action->runtime-action (cdr gs/action)))) + state-entry) + ht)) + (vector->list table)))) + +(define-syntax (parser stx) + (syntax-case stx () + ((_ args ...) + (let ((arg-list (syntax->list (syntax (args ...)))) + (src-pos #f) + (debug #f) + (error #f) + (tokens #f) + (start #f) + (end #f) + (precs #f) + (suppress #f) + (grammar #f) + (yacc-output #f)) + (for-each + (lambda (arg) + (syntax-case* arg (debug error tokens start end precs grammar + suppress src-pos yacc-output) + (lambda (a b) + (eq? (syntax-e a) (syntax-e b))) + ((debug filename) + (cond + ((not (string? (syntax-e (syntax filename)))) + (raise-syntax-error + #f + "Debugging filename must be a string" + stx + (syntax filename))) + (debug + (raise-syntax-error #f "Multiple debug declarations" stx)) + (else + (set! debug (syntax-e (syntax filename)))))) + ((suppress) + (set! suppress #t)) + ((src-pos) + (set! src-pos #t)) + ((error expression) + (if error + (raise-syntax-error #f "Multiple error declarations" stx) + (set! error (syntax expression)))) + ((tokens def ...) + (begin + (when tokens + (raise-syntax-error #f "Multiple tokens declarations" stx)) + (let ((defs (syntax->list (syntax (def ...))))) + (for-each + (lambda (d) + (unless (identifier? d) + (raise-syntax-error + #f + "Token-group name must be an identifier" + stx + d))) + defs) + (set! tokens defs)))) + ((start symbol ...) + (let ((symbols (syntax->list (syntax (symbol ...))))) + (for-each + (lambda (sym) + (unless (identifier? sym) + (raise-syntax-error #f + "Start symbol must be a symbol" + stx + sym))) + symbols) + (when start + (raise-syntax-error #f "Multiple start declarations" stx)) + (when (null? symbols) + (raise-syntax-error #f + "Missing start symbol" + stx + arg)) + (set! start symbols))) + ((end symbols ...) + (let ((symbols (syntax->list (syntax (symbols ...))))) + (for-each + (lambda (sym) + (unless (identifier? sym) + (raise-syntax-error #f + "End token must be a symbol" + stx + sym))) + symbols) + (let ((d (duplicate-list? (map syntax-e symbols)))) + (when d + (raise-syntax-error + #f + (format "Duplicate end token definition for ~a" d) + stx + arg)) + (when (null? symbols) + (raise-syntax-error + #f + "end declaration must contain at least 1 token" + stx + arg)) + (when end + (raise-syntax-error #f "Multiple end declarations" stx)) + (set! end symbols)))) + ((precs decls ...) + (if precs + (raise-syntax-error #f "Multiple precs declarations" stx) + (set! precs (syntax/loc arg (decls ...))))) + ((grammar prods ...) + (if grammar + (raise-syntax-error #f "Multiple grammar declarations" stx) + (set! grammar (syntax/loc arg (prods ...))))) + ((yacc-output filename) + (cond + ((not (string? (syntax-e (syntax filename)))) + (raise-syntax-error #f + "Yacc-output filename must be a string" + stx + (syntax filename))) + (yacc-output + (raise-syntax-error #f "Multiple yacc-output declarations" stx)) + (else + (set! yacc-output (syntax-e (syntax filename)))))) + (_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)))) + (syntax->list (syntax (args ...)))) + (unless tokens + (raise-syntax-error #f "missing tokens declaration" stx)) + (unless error + (raise-syntax-error #f "missing error declaration" stx)) + (unless grammar + (raise-syntax-error #f "missing grammar declaration" stx)) + (unless end + (raise-syntax-error #f "missing end declaration" stx)) + (unless start + (raise-syntax-error #f "missing start declaration" stx)) + (let-values (((table all-term-syms actions check-syntax-fix) + (build-parser (if debug debug "") + src-pos + suppress + tokens + start + end + precs + grammar))) + (when (and yacc-output (not (string=? yacc-output ""))) + (with-handlers [(exn:fail:filesystem? + (lambda (e) + (eprintf + "Cannot write yacc-output to file \"~a\"\n" + yacc-output)))] + (call-with-output-file yacc-output + (lambda (port) + (display-yacc (syntax->datum grammar) + tokens + (map syntax->datum start) + (if precs + (syntax->datum precs) + #f) + port)) + #:exists 'truncate))) + (with-syntax ((check-syntax-fix check-syntax-fix) + (err error) + (ends end) + (starts start) + (debug debug) + (table (convert-parse-table table)) + (all-term-syms all-term-syms) + (actions actions) + (src-pos src-pos)) + (syntax + (begin + check-syntax-fix + (parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos))))))) + (_ + (raise-syntax-error #f + "parser must have the form (parser args ...)" + stx)))) + +(define (reduce-stack stack num ret-vals src-pos) + (cond + ((> num 0) + (let* ((top-frame (car stack)) + (ret-vals + (if src-pos + (cons (stack-frame-value top-frame) + (cons (stack-frame-start-pos top-frame) + (cons (stack-frame-end-pos top-frame) + ret-vals))) + (cons (stack-frame-value top-frame) ret-vals)))) + (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))) + (else (values stack ret-vals)))) + +;; extract-helper : (symbol or make-token) any any -> symbol any any any +(define (extract-helper tok v1 v2) + (cond + ((symbol? tok) + (values tok #f v1 v2)) + ((token? tok) + (values (real-token-name tok) (real-token-value tok) v1 v2)) + (else (raise-argument-error 'parser + "(or/c symbol? token?)" + 0 + tok)))) + +;; well-formed-position-token?: any -> boolean +;; Returns true if pt is a position token whose position-token-token +;; is itself a token or a symbol. +;; This is meant to help raise more precise error messages when +;; a tokenizer produces an erroneous position-token wrapped twice. +;; (as often happens when omitting return-without-pos). +(define (well-formed-token-field? t) + (or (symbol? t) + (token? t))) + +(define (well-formed-position-token? pt) + (and (position-token? pt) + (well-formed-token-field? (position-token-token pt)))) + +(define (well-formed-srcloc-token? st) + (and (srcloc-token? st) + (well-formed-token-field? (srcloc-token-token st)))) + +;; extract-src-pos : position-token -> symbol any any any +(define (extract-src-pos ip) + (unless (well-formed-position-token? ip) + (raise-argument-error 'parser + "well-formed-position-token?" + 0 + ip)) + (extract-helper (position-token-token ip) + (position-token-start-pos ip) + (position-token-end-pos ip))) + +(define (extract-srcloc ip) + (unless (well-formed-srcloc-token? ip) + (raise-argument-error 'parser + "well-formed-srcloc-token?" + 0 + ip)) + (let ([loc (srcloc-token-srcloc ip)]) + (extract-helper (srcloc-token-token ip) + (position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc)) + (position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f)))) + + +;; extract-no-src-pos : (symbol or make-token) -> symbol any any any +(define (extract-no-src-pos ip) + (extract-helper ip #f #f)) + +(define-struct stack-frame (state value start-pos end-pos) #:inspector (make-inspector)) + +(define (make-empty-stack i) (list (make-stack-frame i #f #f #f))) + + +;; The table is a vector that maps each state to a hash-table that maps a +;; terminal symbol to either an accept, shift, reduce, or goto structure. +; We encode the structures according to the runtime-action data definition in +;; parser-actions.rkt +(define (parser-body debug? err starts ends table all-term-syms actions src-pos) + (local ((define extract + (if src-pos + extract-src-pos + extract-no-src-pos)) + + (define (fix-error stack tok val start-pos end-pos get-token) + (when debug? (pretty-print stack)) + (local ((define (remove-input tok val start-pos end-pos) + (if (memq tok ends) + (raise-read-error "parser: Cannot continue after error" + #f #f #f #f #f) + (let ((a (find-action stack tok val start-pos end-pos))) + (cond + ((runtime-shift? a) + ;; (printf "shift:~a\n" (runtime-shift-state a)) + (cons (make-stack-frame (runtime-shift-state a) + val + start-pos + end-pos) + stack)) + (else + ;; (printf "discard input:~a\n" tok) + (let-values (((tok val start-pos end-pos) + (extract (get-token)))) + (remove-input tok val start-pos end-pos)))))))) + (let remove-states () + (let ((a (find-action stack 'error #f start-pos end-pos))) + (cond + ((runtime-shift? a) + ;; (printf "shift:~a\n" (runtime-shift-state a)) + (set! stack + (cons + (make-stack-frame (runtime-shift-state a) + #f + start-pos + end-pos) + stack)) + (remove-input tok val start-pos end-pos)) + (else + ;; (printf "discard state:~a\n" (car stack)) + (cond + ((< (length stack) 2) + (raise-read-error "parser: Cannot continue after error" + #f #f #f #f #f)) + (else + (set! stack (cdr stack)) + (remove-states))))))))) + + (define (find-action stack tok val start-pos end-pos) + (unless (hash-ref all-term-syms + tok + #f) + (if src-pos + (err #f tok val start-pos end-pos) + (err #f tok val)) + (raise-read-error (format "parser: got token of unknown type ~a" tok) + #f #f #f #f #f)) + (hash-ref (vector-ref table (stack-frame-state (car stack))) + tok + #f)) + + (define (make-parser start-number) + (lambda (get-token) + (unless (and (procedure? get-token) + (procedure-arity-includes? get-token 0)) + (error 'get-token "expected a nullary procedure, got ~e" get-token)) + (let parsing-loop ((stack (make-empty-stack start-number)) + (ip (get-token))) + (let-values (((tok val start-pos end-pos) + (extract ip))) + (let ((action (find-action stack tok val start-pos end-pos))) + (cond + ((runtime-shift? action) + ;; (printf "shift:~a\n" (runtime-shift-state action)) + (parsing-loop (cons (make-stack-frame (runtime-shift-state action) + val + start-pos + end-pos) + stack) + (get-token))) + ((runtime-reduce? action) + ;; (printf "reduce:~a\n" (runtime-reduce-prod-num action)) + (let-values (((new-stack args) + (reduce-stack stack + (runtime-reduce-rhs-length action) + null + src-pos))) + (let ((goto + (runtime-goto-state + (hash-ref + (vector-ref table (stack-frame-state (car new-stack))) + (runtime-reduce-lhs action))))) + (parsing-loop + (cons + (if src-pos + (make-stack-frame + goto + (apply (vector-ref actions (runtime-reduce-prod-num action)) args) + (if (null? args) start-pos (cadr args)) + (if (null? args) + end-pos + (list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1)))) + (make-stack-frame + goto + (apply (vector-ref actions (runtime-reduce-prod-num action)) args) + #f + #f)) + new-stack) + ip)))) + ((runtime-accept? action) + ;; (printf "accept\n") + (stack-frame-value (car stack))) + (else + (if src-pos + (err #t tok val start-pos end-pos) + (err #t tok val)) + (parsing-loop (fix-error stack tok val start-pos end-pos get-token) + (get-token)))))))))) + (cond + ((null? (cdr starts)) (make-parser 0)) + (else + (let loop ((l starts) + (i 0)) + (cond + ((null? l) null) + (else (cons (make-parser i) (loop (cdr l) (add1 i)))))))))) diff --git a/br-parser-tools/br-parser-tools-lib/info.rkt b/br-parser-tools/br-parser-tools-lib/info.rkt new file mode 100644 index 0000000..f9f9e11 --- /dev/null +++ b/br-parser-tools/br-parser-tools-lib/info.rkt @@ -0,0 +1,11 @@ +#lang info + +(define collection 'multi) +(define deps '("scheme-lib" + "base" + "compatibility-lib")) +(define build-deps '("rackunit-lib")) + +(define pkg-desc "implementation (no documentation) part of \"br-parser-tools\"") + +(define pkg-authors '(mflatt)) diff --git a/br-parser-tools/br-parser-tools/LICENSE.txt b/br-parser-tools/br-parser-tools/LICENSE.txt new file mode 100644 index 0000000..d012f58 --- /dev/null +++ b/br-parser-tools/br-parser-tools/LICENSE.txt @@ -0,0 +1,11 @@ +parser-tools +Copyright (c) 2010-2014 PLT Design Inc. + +This package is distributed under the GNU Lesser General Public +License (LGPL). This means that you can link this package into proprietary +applications, provided you follow the rules stated in the LGPL. You +can also modify this package; if you distribute a modified version, +you must distribute it under the terms of the LGPL, which in +particular means that you must release the source code for the +modified software. See http://www.gnu.org/copyleft/lesser.html +for more information. diff --git a/br-parser-tools/br-parser-tools/info.rkt b/br-parser-tools/br-parser-tools/info.rkt new file mode 100644 index 0000000..6a692a8 --- /dev/null +++ b/br-parser-tools/br-parser-tools/info.rkt @@ -0,0 +1,12 @@ +#lang info + +(define collection 'multi) + +(define deps '("br-parser-tools-lib" + "br-parser-tools-doc")) +(define implies '("br-parser-tools-lib" + "br-parser-tools-doc")) + +(define pkg-desc "Lex- and Yacc-style parsing tools") + +(define pkg-authors '(mflatt)) diff --git a/brag/LICENSE b/brag/LICENSE new file mode 100755 index 0000000..65c5ca8 --- /dev/null +++ b/brag/LICENSE @@ -0,0 +1,165 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + + This version of the GNU Lesser General Public License incorporates +the terms and conditions of version 3 of the GNU General Public +License, supplemented by the additional permissions listed below. + + 0. Additional Definitions. + + As used herein, "this License" refers to version 3 of the GNU Lesser +General Public License, and the "GNU GPL" refers to version 3 of the GNU +General Public License. + + "The Library" refers to a covered work governed by this License, +other than an Application or a Combined Work as defined below. + + An "Application" is any work that makes use of an interface provided +by the Library, but which is not otherwise based on the Library. +Defining a subclass of a class defined by the Library is deemed a mode +of using an interface provided by the Library. + + A "Combined Work" is a work produced by combining or linking an +Application with the Library. The particular version of the Library +with which the Combined Work was made is also called the "Linked +Version". + + The "Minimal Corresponding Source" for a Combined Work means the +Corresponding Source for the Combined Work, excluding any source code +for portions of the Combined Work that, considered in isolation, are +based on the Application, and not on the Linked Version. + + The "Corresponding Application Code" for a Combined Work means the +object code and/or source code for the Application, including any data +and utility programs needed for reproducing the Combined Work from the +Application, but excluding the System Libraries of the Combined Work. + + 1. Exception to Section 3 of the GNU GPL. + + You may convey a covered work under sections 3 and 4 of this License +without being bound by section 3 of the GNU GPL. + + 2. Conveying Modified Versions. + + If you modify a copy of the Library, and, in your modifications, a +facility refers to a function or data to be supplied by an Application +that uses the facility (other than as an argument passed when the +facility is invoked), then you may convey a copy of the modified +version: + + a) under this License, provided that you make a good faith effort to + ensure that, in the event an Application does not supply the + function or data, the facility still operates, and performs + whatever part of its purpose remains meaningful, or + + b) under the GNU GPL, with none of the additional permissions of + this License applicable to that copy. + + 3. Object Code Incorporating Material from Library Header Files. + + The object code form of an Application may incorporate material from +a header file that is part of the Library. You may convey such object +code under terms of your choice, provided that, if the incorporated +material is not limited to numerical parameters, data structure +layouts and accessors, or small macros, inline functions and templates +(ten or fewer lines in length), you do both of the following: + + a) Give prominent notice with each copy of the object code that the + Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the object code with a copy of the GNU GPL and this license + document. + + 4. Combined Works. + + You may convey a Combined Work under terms of your choice that, +taken together, effectively do not restrict modification of the +portions of the Library contained in the Combined Work and reverse +engineering for debugging such modifications, if you also do each of +the following: + + a) Give prominent notice with each copy of the Combined Work that + the Library is used in it and that the Library and its use are + covered by this License. + + b) Accompany the Combined Work with a copy of the GNU GPL and this license + document. + + c) For a Combined Work that displays copyright notices during + execution, include the copyright notice for the Library among + these notices, as well as a reference directing the user to the + copies of the GNU GPL and this license document. + + d) Do one of the following: + + 0) Convey the Minimal Corresponding Source under the terms of this + License, and the Corresponding Application Code in a form + suitable for, and under terms that permit, the user to + recombine or relink the Application with a modified version of + the Linked Version to produce a modified Combined Work, in the + manner specified by section 6 of the GNU GPL for conveying + Corresponding Source. + + 1) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (a) uses at run time + a copy of the Library already present on the user's computer + system, and (b) will operate properly with a modified version + of the Library that is interface-compatible with the Linked + Version. + + e) Provide Installation Information, but only if you would otherwise + be required to provide such information under section 6 of the + GNU GPL, and only to the extent that such information is + necessary to install and execute a modified version of the + Combined Work produced by recombining or relinking the + Application with a modified version of the Linked Version. (If + you use option 4d0, the Installation Information must accompany + the Minimal Corresponding Source and Corresponding Application + Code. If you use option 4d1, you must provide the Installation + Information in the manner specified by section 6 of the GNU GPL + for conveying Corresponding Source.) + + 5. Combined Libraries. + + You may place library facilities that are a work based on the +Library side by side in a single library together with other library +facilities that are not Applications and are not covered by this +License, and convey such a combined library under terms of your +choice, if you do both of the following: + + a) Accompany the combined library with a copy of the same work based + on the Library, uncombined with any other library facilities, + conveyed under the terms of this License. + + b) Give prominent notice with the combined library that part of it + is a work based on the Library, and explaining where to find the + accompanying uncombined form of the same work. + + 6. Revised Versions of the GNU Lesser General Public License. + + The Free Software Foundation may publish revised and/or new versions +of the GNU Lesser General Public License from time to time. Such new +versions will be similar in spirit to the present version, but may +differ in detail to address new problems or concerns. + + Each version is given a distinguishing version number. If the +Library as you received it specifies that a certain numbered version +of the GNU Lesser General Public License "or any later version" +applies to it, you have the option of following the terms and +conditions either of that published version or of any later version +published by the Free Software Foundation. If the Library as you +received it does not specify a version number of the GNU Lesser +General Public License, you may choose any version of the GNU Lesser +General Public License ever published by the Free Software Foundation. + + If the Library as you received it specifies that a proxy can decide +whether future versions of the GNU Lesser General Public License shall +apply, that proxy's public statement of acceptance of any version is +permanent authorization for you to choose that version for the +Library. diff --git a/brag/README.md b/brag/README.md new file mode 100755 index 0000000..623c425 --- /dev/null +++ b/brag/README.md @@ -0,0 +1,4 @@ +This repo contains a fork of Danny Yoo's RAGG, a Racket AST Generator Generator, +also known as a parser generator. + +Licensed under the LGPL. diff --git a/brag/brag/Makefile b/brag/brag/Makefile new file mode 100755 index 0000000..579d424 --- /dev/null +++ b/brag/brag/Makefile @@ -0,0 +1,12 @@ +doc: + scribble ++xref-in setup/xref load-collections-xref --redirect-main http://docs.racket-lang.org/ --dest-name index.html manual.scrbl + +clean: + git clean -fdx . + +web: clean plt doc + scp -r * hashcollision.org:webapps/htdocs/ragg/ + + +plt: + raco pack --collect ragg.plt ragg diff --git a/brag/brag/brag.scrbl b/brag/brag/brag.scrbl new file mode 100755 index 0000000..9017342 --- /dev/null +++ b/brag/brag/brag.scrbl @@ -0,0 +1,1157 @@ +#lang scribble/manual +@(require scribble/eval + racket/date + file/md5 + (for-label racket + brag/support + brag/examples/nested-word-list + (only-in br-parser-tools/lex lexer-src-pos) + (only-in syntax/parse syntax-parse ~literal))) + + +@(define (lookup-date filename [default ""]) + (cond + [(file-exists? filename) + (define modify-seconds (file-or-directory-modify-seconds filename)) + (define a-date (seconds->date modify-seconds)) + (date->string a-date)] + [else + default])) + +@(define (compute-md5sum filename [default ""]) + (cond [(file-exists? filename) + (bytes->string/utf-8 (call-with-input-file filename md5 #:mode 'binary))] + [else + default])) + + + +@title{brag: the Beautiful Racket AST Generator} +@author["Danny Yoo (95%)" "Matthew Butterick (5%)"] + +@defmodulelang[brag] + +@section{Quick start} + +@(define my-eval (make-base-eval)) +@(my-eval '(require brag/examples/nested-word-list + racket/list + racket/match)) + +Suppose we're given the +following string: +@racketblock["(radiant (humble))"] + + +How would we turn this string into a structured value? That is, how would we @emph{parse} it? (Let's also suppose we've never heard of @racket[read].) + +First, we need to consider the structure of the things we'd like to parse. The +string above looks like a nested list of words. Good start. + +Second, how might we describe this formally — meaning, in a way that a computer could understand? A common notation to describe the structure of these things is @link["http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form"]{Backus-Naur Form} (BNF). So let's try to notate the structure of nested word lists in BNF. + +@nested[#:style 'code-inset]{ + @verbatim{ + nested-word-list: WORD + | LEFT-PAREN nested-word-list* RIGHT-PAREN +}} + +What we intend by this notation is this: @racket[nested-word-list] is either a @racket[WORD], or a parenthesized list of @racket[nested-word-list]s. We use the character @litchar{*} to represent zero or more repetitions of the previous thing. We treat the uppercased @racket[LEFT-PAREN], @racket[RIGHT-PAREN], and @racket[WORD] as placeholders for @emph{tokens} (a @tech{token} being the smallest meaningful item in the parsed string): + +Here are a few examples of tokens: +@interaction[#:eval my-eval + (require brag/support) + (token 'LEFT-PAREN) + (token 'WORD "crunchy" #:span 7) + (token 'RIGHT-PAREN)] + +This BNF description is also known as a @deftech{grammar}. Just as it does in a natural language like English or French, a grammar describes something in terms of what elements can fit where. + +Have we made progress? We have a valid grammar. But we're still missing a @emph{parser}: a function that can use that description to make structures out of a sequence of tokens. + +Meanwhile, it's clear that we don't yet have a valid program because there's no @litchar{#lang} line. Let's add one: put @litchar{#lang brag} at the top of the grammar, and save it as a file called @filepath{nested-word-list.rkt}. + +@filebox["nested-word-list.rkt"]{ + @verbatim{ + #lang brag + nested-word-list: WORD + | LEFT-PAREN nested-word-list* RIGHT-PAREN +}} + +Now it's a proper program. But what does it do? + +@interaction[#:eval my-eval + @eval:alts[(require "nested-word-list.rkt") (void)] + parse + ] + +It gives us a @racket[parse] function. Let's investigate what @racket[parse] +does. What happens if we pass it a sequence of tokens? + +@interaction[#:eval my-eval + (define a-parsed-value + (parse (list (token 'LEFT-PAREN "(") + (token 'WORD "some") + (token 'LEFT-PAREN "[") + (token 'WORD "pig") + (token 'RIGHT-PAREN "]") + (token 'RIGHT-PAREN ")")))) + a-parsed-value] + +Those who have messed around with macros will recognize this as a @seclink["stx-obj" #:doc '(lib "scribblings/guide/guide.scrbl")]{syntax object}. + +@interaction[#:eval my-eval + (syntax->datum a-parsed-value) + ] + +That's @racket[(some [pig])], essentially. + +What happens if we pass our @racket[parse] function a bigger source of tokens? + +@interaction[#:eval my-eval + @code:comment{tokenize: string -> (sequenceof token-struct?)} + @code:comment{Generate tokens from a string:} + (define (tokenize s) + (for/list ([str (regexp-match* #px"\\(|\\)|\\w+" s)]) + (match str + ["(" + (token 'LEFT-PAREN str)] + [")" + (token 'RIGHT-PAREN str)] + [else + (token 'WORD str)]))) + + @code:comment{For example:} + (define token-source (tokenize "(welcome (to (((brag)) ())))")) + (define v (parse token-source)) + (syntax->datum v) + ] + +Welcome to @tt{brag}. + + + + +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +@section{Introduction} + +@tt{brag} is a parser generator designed to be easy +to use: + +@itemize[ + + @item{It provides a @litchar{#lang} for writing BNF grammars. + A module written in @litchar{#lang brag} automatically generates a + parser. The grammar controls the structure of the @tech{syntax objects} it generates.} + + @item{The language uses a few conventions to simplify the expression of + grammars. The first rule in the grammar is assumed to be the + starting production. Identifiers in @tt{UPPERCASE} are treated as + terminal tokens. All other identifiers are treated as nonterminals.} + + @item{Tokenizers can be developed independently of parsers. + @tt{brag} takes a liberal view on tokens: they can be strings, + symbols, or instances constructed with @racket[token]. Tokens can optionally provide source location, in which case a syntax object generated by the parser will too.} + + @item{The parser can usually handle ambiguous grammars.} + + @item{It integrates with the rest of the Racket + @link["http://docs.racket-lang.org/guide/languages.html"]{language toolchain}.} + + ] + + + +@subsection{Example: a small DSL for ASCII diagrams} + +Suppose we'd like to define a language for +drawing simple ASCII diagrams. So if we write something like this: + +@nested[#:style 'inset]{ + @verbatim|{ + 3 9 X; + 6 3 b 3 X 3 b; + 3 9 X; + }|} + +It should generate the following picture: + +@nested[#:style 'inset]{ +@verbatim|{ +XXXXXXXXX +XXXXXXXXX +XXXXXXXXX + XXX + XXX + XXX + XXX + XXX + XXX +XXXXXXXXX +XXXXXXXXX +XXXXXXXXX +}|} + + +This makes sense in a casual way. But let's be more precise about how the language works. + +Each line of the program has a semicolon at the end, and describes the output of several @emph{rows} of the line drawing. Let's look at two of the lines in the example: + +@itemize[ + @item{@litchar{3 9 X;}: ``Repeat the following 3 times: print @racket["X"] nine times, followed by + a newline.''} + + @item{@litchar{6 3 b 3 X 3 b;}: ``Repeat the following 6 times: print @racket[" "] three times, + followed by @racket["X"] three times, followed by @racket[" "] three times, followed by a newline.''} + ] + +Then each line consists of a @emph{repeat} number, followed by pairs of +(number, character) @emph{chunks}. We'll assume here that the intent of the lowercased character @litchar{b} is to represent the printing of a 1-character whitespace @racket[" "], and for other uppercase letters to represent the printing of themselves. + +By understanding the pieces of each line, we can more easily capture that meaning in a grammar. Once we have each instruction of our ASCII DSL in a structured format, we should be able to parse it. + +Here's a first pass at expressing the structure of these line-drawing programs. + +@subsection{Parsing the concrete syntax} + +@filebox["simple-line-drawing.rkt"]{ + @verbatim|{ + #lang brag + drawing: rows* + rows: repeat chunk+ ";" + repeat: INTEGER + chunk: INTEGER STRING + }| +} + +@margin-note{@secref{brag-syntax} describes @tt{brag}'s syntax in more detail.} +We write a @tt{brag} program as an BNF grammar, where patterns can be: +@itemize[ + @item{the names of other rules (e.g. @racket[chunk])} + @item{literal and symbolic token names (e.g. @racket[";"], @racket[INTEGER])} + @item{quantified patterns (e.g. @litchar{+} to represent one-or-more repetitions)} + ] +The result of a @tt{brag} program is a module with a @racket[parse] function +that can parse tokens and produce a syntax object as a result. + +Let's try this function: + +@interaction[#:eval my-eval + (require brag/support) + @eval:alts[(require "simple-line-drawing.rkt") + (require brag/examples/simple-line-drawing)] + (define stx + (parse (list (token 'INTEGER 6) + (token 'INTEGER 2) + (token 'STRING " ") + (token 'INTEGER 3) + (token 'STRING "X") + ";"))) + (syntax->datum stx) + ] + +A @emph{token} is the smallest meaningful element of a source program. Tokens can be strings, symbols, or instances of the @racket[token] data structure. (Plus a few other special cases, which we'll discuss later.) Usually, a token holds a single character from the source program. But sometimes it makes sense to package a sequence of characters into a single token, if the sequence has an indivisible meaning. + +If possible, we also want to attach source location information to each token. Why? Because this informatino will be incorporated into the syntax objects produced by @racket[parse]. + +A parser often works in conjunction with a helper function called a @emph{lexer} that converts the raw code of the source program into tokens. The @racketmodname[br-parser-tools/lex] library can help us write a position-sensitive +tokenizer: + +@interaction[#:eval my-eval + (require br-parser-tools/lex) + (define (tokenize ip) + (port-count-lines! ip) + (define my-lexer + (lexer-src-pos + [(repetition 1 +inf.0 numeric) + (token 'INTEGER (string->number lexeme))] + [upper-case + (token 'STRING lexeme)] + ["b" + (token 'STRING " ")] + [";" + (token ";" lexeme)] + [whitespace + (token 'WHITESPACE lexeme #:skip? #t)] + [(eof) + (void)])) + (define (next-token) (my-lexer ip)) + next-token) + + (define a-sample-input-port (open-input-string "6 2 b 3 X;")) + (define token-thunk (tokenize a-sample-input-port)) + @code:comment{Now we can pass token-thunk to the parser:} + (define another-stx (parse token-thunk)) + (syntax->datum another-stx) + @code:comment{The syntax object has location information:} + (syntax-line another-stx) + (syntax-column another-stx) + (syntax-span another-stx) + ] + + +Note also from this lexer example: + +@itemize[ + + @item{@racket[parse] accepts as input either a sequence of tokens, or a + function that produces tokens (which @racket[parse] will call repeatedly to get the next token).} + + @item{As an alternative to the basic @racket[token] structure, a token can also be an instance of the @racket[position-token] structure (also found in @racketmodname[br-parser-tools/lex]). In that case, the token will try to derive its position from that of the position-token.} + + @item{@racket[parse] will stop if it gets @racket[void] (or @racket['eof]) as a token.} + + @item{@racket[parse] will skip any token that has + @racket[#:skip?] attribute set to @racket[#t]. For instance, tokens representing comments often use @racket[#:skip?].} + + ] + + +@subsection{From parsing to interpretation} + +We now have a parser for programs written in this simple-line-drawing language. +Our parser will return syntax objects: + +@interaction[#:eval my-eval + (define parsed-program + (parse (tokenize (open-input-string "3 9 X; 6 3 b 3 X 3 b; 3 9 X;")))) + (syntax->datum parsed-program) + ] + +Better still, these syntax objects will have a predictable +structure that follows the grammar: + +@racketblock[ + (drawing (rows (repeat ) + (chunk ) ... ";") + ...) + ] + +where @racket[drawing], @racket[rows], @racket[repeat], and @racket[chunk] +should be treated literally, and everything else will be numbers or strings. + + +Still, these syntax-object values are just inert structures. How do we +interpret them, and make them @emph{print}? We claimed at the beginning of +this section that these syntax objects should be easy to interpret. So let's do it. + +@margin-note{This is a very quick-and-dirty treatment of @racket[syntax-parse]. + See the @racketmodname[syntax/parse] documentation for a gentler guide to its + features.} Racket provides a special form called @racket[syntax-parse] in the +@racketmodname[syntax/parse] library. @racket[syntax-parse] lets us do a +structural case-analysis on syntax objects: we provide it a set of patterns to +parse and actions to perform when those patterns match. + + +As a simple example, we can write a function that looks at a syntax object and +says @racket[#t] if it's the literal @racket[yes], and @racket[#f] otherwise: + +@interaction[#:eval my-eval + (require syntax/parse) + @code:comment{yes-syntax-object?: syntax-object -> boolean} + @code:comment{Returns true if the syntax-object is yes.} + (define (yes-syntax-object? stx) + (syntax-parse stx + [(~literal yes) + #t] + [else + #f])) + (yes-syntax-object? #'yes) + (yes-syntax-object? #'nooooooooooo) + ] + +Here, we use @racket[~literal] to let @racket[syntax-parse] know that +@racket[yes] should show up literally in the syntax object. The patterns can +also have some structure to them, such as: +@racketblock[({~literal drawing} rows-stxs ...)] +which matches on syntax objects that begin, literally, with @racket[drawing], +followed by any number of rows (which are syntax objects too). + + +Now that we know a little bit more about @racket[syntax-parse], +we can use it to do a case analysis on the syntax +objects that our @racket[parse] function gives us. +We start by defining a function on syntax objects of the form @racket[(drawing + rows-stx ...)]. +@interaction[#:eval my-eval + (define (interpret-drawing drawing-stx) + (syntax-parse drawing-stx + [({~literal drawing} rows-stxs ...) + + (for ([rows-stx (syntax->list #'(rows-stxs ...))]) + (interpret-rows rows-stx))]))] + +When we encounter a syntax object with @racket[(drawing rows-stx + ...)], then @racket[interpret-rows] each @racket[rows-stx]. + +@;The pattern we +@;express in @racket[syntax-parse] above marks what things should be treated +@;literally, and the @racket[...] is a a part of the pattern matching language +@;known by @racket[syntax-parse] that lets us match multiple instances of the +@;last pattern. + + +Let's define @racket[interpret-rows] now: +@interaction[#:eval my-eval + (define (interpret-rows rows-stx) + (syntax-parse rows-stx + [({~literal rows} + ({~literal repeat} repeat-number) + chunks ... ";") + + (for ([i (syntax-e #'repeat-number)]) + (for ([chunk-stx (syntax->list #'(chunks ...))]) + (interpret-chunk chunk-stx)) + (newline))]))] + +For a @racket[rows], we extract out the @racket[repeat-number] out of the +syntax object and use it as the range of the @racket[for] loop. The inner loop +walks across each @racket[chunk-stx] and calls @racket[interpret-chunk] on it. + + +Finally, we need to write a definition for @racket[interpret-chunk]. We want +it to extract out the @racket[chunk-size] and @racket[chunk-string] portions, +and print to standard output: + +@interaction[#:eval my-eval + (define (interpret-chunk chunk-stx) + (syntax-parse chunk-stx + [({~literal chunk} chunk-size chunk-string) + + (for ([k (syntax-e #'chunk-size)]) + (display (syntax-e #'chunk-string)))])) + ] + + +@margin-note{Here are the definitions in a single file: + @link["examples/simple-line-drawing/interpret.rkt"]{interpret.rkt}.} +With these definitions in hand, now we can pass it syntax objects +that we construct directly by hand: + +@interaction[#:eval my-eval + (interpret-chunk #'(chunk 3 "X")) + (interpret-drawing #'(drawing (rows (repeat 5) (chunk 3 "X") ";"))) + ] + +or we can pass it the result generated by our parser: +@interaction[#:eval my-eval + (define parsed-program + (parse (tokenize (open-input-string "3 9 X; 6 3 b 3 X 3 b; 3 9 X;")))) + (interpret-drawing parsed-program)] + +And now we've got an interpreter! + + +@subsection{From interpretation to compilation} + +@margin-note{For a gentler tutorial on writing @litchar{#lang}-based languages, see + @link["http://beautifulracket.com"]{Beautiful Racket}.} (Just as a +warning: the following material is slightly more advanced, but shows how +writing a compiler for the line-drawing language reuses the ideas for the +interpreter.) + +Wouldn't it be nice to be able to write something like: + +@nested[#:style 'inset]{ + @verbatim|{ + 3 9 X; + 6 3 b 3 X 3 b; + 3 9 X; + }|} + +and have Racket automatically compile this down to something like this? +@racketblock[ + (for ([i 3]) + (for ([k 9]) (displayln "X")) + (newline)) + + (for ([i 6]) + (for ([k 3]) (displayln " ")) + (for ([k 3]) (displayln "X")) + (for ([k 3]) (displayln " ")) + (newline)) + + (for ([i 3]) + (for ([k 9]) (displayln "X")) + (newline)) + ] + +Well, of course it won't work: we don't have a @litchar{#lang} line. + +Let's add one. + +@filebox["letter-i.rkt"]{ + @verbatim|{ + #lang brag/examples/simple-line-drawing + 3 9 X; + 6 3 b 3 X 3 b; + 3 9 X; + }| +} + +Now @filepath{letter-i.rkt} is a program. + + +How does this work? From the previous sections, we've seen how to take the +contents of a file and interpret it. What we want to do now is teach Racket +how to compile programs labeled with this @litchar{#lang} line. We'll do two +things: + +@itemize[ + @item{Tell Racket to use the @tt{brag}-generated parser and lexer we defined + earlier whenever it sees a program written with + @litchar{#lang brag/examples/simple-line-drawing}.} + + @item{Define transformation rules for @racket[drawing], @racket[rows], and + @racket[chunk] to rewrite these into standard Racket forms.} + ] + +The second part, the writing of the transformation rules, will look very +similar to the definitions we wrote for the interpreter, but the transformation +will happen at compile-time. (We @emph{could} just resort to simply calling +into the interpreter we just wrote up, but this section is meant to show that +compilation is also viable.) + + +We do the first part by defining a @emph{module reader}: a +@link["http://docs.racket-lang.org/guide/syntax_module-reader.html"]{module + reader} tells Racket how to parse and compile a file. Whenever Racket sees a +@litchar{#lang }, it looks for a corresponding module reader in +@filepath{/lang/reader}. + +Here's the definition for +@filepath{brag/examples/simple-line-drawing/lang/reader.rkt}: + +@filebox["brag/examples/simple-line-drawing/lang/reader.rkt"]{ + @codeblock|{ + #lang s-exp syntax/module-reader + brag/examples/simple-line-drawing/semantics + #:read my-read + #:read-syntax my-read-syntax + #:whole-body-readers? #t + + (require brag/examples/simple-line-drawing/lexer + brag/examples/simple-line-drawing/grammar) + + (define (my-read in) + (syntax->datum (my-read-syntax #f in))) + + (define (my-read-syntax src ip) + (list (parse src (tokenize ip)))) + }| +} + +We use a helper module @racketmodname[syntax/module-reader], which provides +utilities for creating a module reader. It uses the lexer and +@tt{brag}-generated parser we defined earlier, and also tells Racket that it should compile the forms in the syntax +object using a module called @filepath{semantics.rkt}. + +Let's look into @filepath{semantics.rkt} and see what's involved in +compilation: +@filebox["brag/examples/simple-line-drawing/semantics.rkt"]{ + @codeblock|{ + #lang racket/base + (require (for-syntax racket/base syntax/parse)) + + (provide #%module-begin + ;; We reuse Racket's treatment of raw datums, specifically + ;; for strings and numbers: + #%datum + + ;; And otherwise, we provide definitions of these three forms. + ;; During compiliation, Racket uses these definitions to + ;; rewrite into for loops, displays, and newlines. + drawing rows chunk) + + ;; Define a few compile-time functions to do the syntax rewriting: + (begin-for-syntax + (define (compile-drawing drawing-stx) + (syntax-parse drawing-stx + [({~literal drawing} rows-stxs ...) + + (syntax/loc drawing-stx + (begin rows-stxs ...))])) + + (define (compile-rows rows-stx) + (syntax-parse rows-stx + [({~literal rows} + ({~literal repeat} repeat-number) + chunks ... + ";") + + (syntax/loc rows-stx + (for ([i repeat-number]) + chunks ... + (newline)))])) + + (define (compile-chunk chunk-stx) + (syntax-parse chunk-stx + [({~literal chunk} chunk-size chunk-string) + + (syntax/loc chunk-stx + (for ([k chunk-size]) + (display chunk-string)))]))) + + + ;; Wire up the use of "drawing", "rows", and "chunk" to these + ;; transformers: + (define-syntax drawing compile-drawing) + (define-syntax rows compile-rows) + (define-syntax chunk compile-chunk) + }| +} + +The semantics hold definitions for @racket[compile-drawing], +@racket[compile-rows], and @racket[compile-chunk], similar to what we had for +interpretation with @racket[interpret-drawing], @racket[interpret-rows], and +@racket[interpret-chunk]. However, compilation is not the same as +interpretation: each definition does not immediately execute the act of +drawing, but rather returns a syntax object whose evaluation will do the actual +work. + +There are a few things to note: + +@margin-note{By the way, we can just as easily rewrite the semantics so that + @racket[compile-rows] does explicitly call @racket[compile-chunk]. Often, + though, it's easier to write the transformation functions in this piecemeal way + and depend on the Racket macro expansion system to do the rewriting as it + encounters each of the forms.} + + +@itemize[ + + @item{@tt{brag}'s native data structure is the syntax object because the + majority of Racket's language-processing infrastructure knows how to read and + write this structured value.} + + + @item{Unlike in interpretation, @racket[compile-rows] doesn't + compile each chunk by directly calling @racket[compile-chunk]. Rather, it + depends on the Racket macro expander to call each @racket[compile-XXX] function + as it encounters a @racket[drawing], @racket[rows], or @racket[chunk] in the + parsed value. The three statements at the bottom of @filepath{semantics.rkt} inform + the macro expansion system to do this: + + @racketblock[ + (define-syntax drawing compile-drawing) + (define-syntax rows compile-rows) + (define-syntax chunk compile-chunk) + ]} + ] + + +Altogether, @tt{brag}'s intent is to be a parser generator for Racket +that's easy and fun to use. It's meant to fit naturally with the other tools +in the Racket language toolchain. Hopefully, it will reduce the friction in +making new languages with alternative concrete syntaxes. + +The rest of this document describes the @tt{brag} language and the parsers it +generates. + + +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +@section{The language} + +@subsection[#:tag "brag-syntax"]{Syntax and terminology} +A program in the @tt{brag} language consists of the language line +@litchar{#lang brag}, followed by a collection of @tech{rule}s and +@tech{line comment}s. + +A @deftech{rule} is a sequence consisting of: a @tech{rule identifier}, a colon +@litchar{":"}, and a @tech{pattern}. + +A @deftech{rule identifier} is an @tech{identifier} that is not in upper case. + +A @deftech{symbolic token identifier} is an @tech{identifier} that is in upper case. + +An @deftech{identifier} is a character sequence of letters, numbers, and +characters in @racket["-.!$%&/<=>?^_~@"]. It must not contain +@litchar{*} or @litchar{+}, as those characters are used to denote +quantification. + + +A @deftech{pattern} is one of the following: +@itemize[ + @item{an implicit sequence of @tech{pattern}s separated by whitespace} + @item{a terminal: either a literal string or a @tech{symbolic token identifier}. + + When used in a pattern, both these terminals will match the same set of inputs. A literal string can match the string itself, or a @racket[token] whose type field contains that string (or its symbol form). So @racket["FOO"] would match @racket["FOO"], @racket[(token "FOO" "bar")], or @racket[(token 'FOO "bar")]. A symbolic token identifier can also match the string version of the identifier, or a @racket[token] whose type field is the symbol or string form of the identifier. So @racket[FOO] would also match @racket["FOO"], @racket[(token 'FOO "bar")], or @racket[(token "FOO" "bar")]. (In every case, the value of a token, like @racket["bar"], can be anything, and may or may not be the same as its type.) + + Because their underlying meanings are the same, the symbolic token identifier ends up being a notational convenience for readability inside a grammar pattern. Typically, the literal string @racket["FOO"] is used to connote ``match the string @racket["FOO"] exactly'' and the symbolic token identifier @racket[FOO] specially connotes ``match any token of type @racket['FOO]''.} + + @item{a @tech{rule identifier}} + @item{a @deftech{choice pattern}: a sequence of @tech{pattern}s delimited with @litchar{|} characters.} + @item{a @deftech{quantifed pattern}: a @tech{pattern} followed by either @litchar{*} (``zero or more'') or @litchar{+} (``one or more'')} + @item{an @deftech{optional pattern}: a @tech{pattern} surrounded by @litchar{[} and @litchar{]}} + @item{an explicit sequence: a @tech{pattern} surrounded by @litchar{(} and @litchar{)}}] + +A @deftech{line comment} begins with either @litchar{#} or @litchar{;} and +continues till the end of the line. + + +For example, in the following program: +@nested[#:style 'inset + @verbatim|{ + #lang brag + ;; A parser for a silly language + sentence: verb optional-adjective object + verb: greeting + optional-adjective: ["happy" | "frumpy"] + greeting: "hello" | "hola" | "aloha" + object: "world" | WORLD + }|] + +the elements @tt{sentence}, @tt{verb}, @tt{greeting}, and @tt{object} are rule +identifiers. The first rule, @litchar{sentence: verb optional-adjective + object}, is a rule whose right side is an implicit pattern sequence of three +sub-patterns. The uppercased @tt{WORLD} is a symbolic token identifier. The fourth rule in the program associates @tt{greeting} with a @tech{choice pattern}. + + + +More examples: +@itemize[ + + @item{A + BNF for binary + strings that contain an equal number of zeros and ones. + @verbatim|{ + #lang brag + equal: [zero one | one zero] ;; equal number of "0"s and "1"s. + zero: "0" equal | equal "0" ;; has an extra "0" in it. + one: "1" equal | equal "1" ;; has an extra "1" in it. + }| + } + + @item{A BNF for + @link["http://www.json.org/"]{JSON}-like structures. + @verbatim|{ + #lang brag + json: number | string + | array | object + number: NUMBER + string: STRING + array: "[" [json ("," json)*] "]" + object: "{" [kvpair ("," kvpair)*] "}" + kvpair: ID ":" json + }| + } + ] + +@subsection{Cuts & splices} + +By default, every matched token shows up in the parse tree. But sometimes that means that the parse tree ends up holding a bunch of tokens that were only needed to complete the parsing. Once they've served their purpose, it's sometimes useful to filter them out (for instance, to simplify the implementation of a language expander). To help with this kind of housekeeping, @racket[brag] supports @emph{cuts} and @emph{splices}. + +A @deftech{cut} in a grammar will delete an item from the parse tree. A cut is notated by prefixing either the left-hand rule name or a right-hand pattern element with a slash @litchar{/}. + +If the cut is applied to a left-hand rule name, the rule name is omitted from the parse tree, but its node and its matched elements remain. + +If the cut is applied to a right-hand pattern element, then that element is omitted from every node matching that rule. + +For instance, consider this simple grammar for arithmetic expressions: + +@verbatim|{ +#lang brag +expr : term ('+' term)* +term : factor ('*' factor)* +factor : ("0" | "1" | "2" | "3" + | "4" | "5" | "6" | "7" + | "8" | "9")+ +}| + +If we use it to parse this string: + +@verbatim|{1+2*3}| + +We get this parse tree: + +@racketblock['(expr (term (factor "1")) "+" (term (factor "2") "*" (factor "3")))] + +Suppose we felt the @litchar{+} and @litchar{*} characters were superfluous. We can add cuts to the grammar by prefixing these pattern elements with @litchar{/}: + +@verbatim|{ +#lang brag +expr : term (/'+' term)* +term : factor (/'*' factor)* +factor : ("0" | "1" | "2" | "3" + | "4" | "5" | "6" | "7" + | "8" | "9")+ +}| + +Our parse tree changes accordingly: + +@racketblock['(expr (term (factor "1")) (term (factor "2") (factor "3")))] + +Now suppose we apply a cut on the rule name, @racket[factor]: + +@verbatim|{ +#lang brag +expr : term (/'+' term)* +term : factor (/'*' factor)* +/factor : ("0" | "1" | "2" | "3" + | "4" | "5" | "6" | "7" + | "8" | "9")+ +}| + +This time, the rule name disppears from the parse tree, but its nodes and elements remain: + +@racketblock['(expr (term ("1")) (term ("2") ("3")))] + +A @deftech{splice} in a grammar will merge the elements of a node into the surrounding node. A splice is notated by prefixing either the left-hand rule name or a right-hand pattern element with an at sign @litchar|{@}|. + +If the splice is applied to a left-hand rule name, then the splice is applied every time the rule is used in the parse tree. + +If the splice is applied to a right-hand pattern element, that element is spliced only when it appears as part of the production for that rule. + +Suppose we remove the cut from the @racket[factor] rule name and instead splice the second appearance of @racket[factor] in the pattern for the @racket[term] rule: + +@verbatim|{ +#lang brag +expr : term (/'+' term)* +term : factor (/'*' @factor)* +factor : ("0" | "1" | "2" | "3" + | "4" | "5" | "6" | "7" + | "8" | "9")+ +}| + +The @racket[factor] elements matching the first position of the @racket[term] pattern remain as they were, but the @racket[factor] element matching the second position is spliced into the surrounding node: + +@racketblock['(expr (term (factor "1")) (term (factor "2") "3"))] + +Finally, suppose we add a splice to the @racket[term] rule name: + +@verbatim|{ +#lang brag +expr : term (/'+' term)* +@term : factor (/'*' @factor)* +factor : ("0" | "1" | "2" | "3" + | "4" | "5" | "6" | "7" + | "8" | "9")+ +}| + +This time, all the appearances of @racket[term] nodes in the parse tree will have their elements spliced into the surrounding nodes: + +@racketblock['(expr (factor "1") (factor "2") "3")] + + +As a convenience, when a grammar element is spliced, or a rule name is cut, @racket[brag] preserves the rule name by adding it as a syntax property to the residual elements, using the rule name as a key, and the original syntax object representing the rule name as the value. + + +@subsection{Syntax errors} + +Besides the basic syntax errors that can occur with a malformed grammar, there +are a few other classes of situations that @litchar{#lang brag} will consider +as syntax errors. + +@tt{brag} will raise a syntax error if the grammar: +@itemize[ + @item{doesn't have any rules.} + + @item{has a rule with the same left hand side as any other rule.} + + @item{refers to rules that have not been defined. e.g. the + following program: + @nested[#:style 'code-inset + @verbatim|{ + #lang brag + foo: [bar] + }| + ] + should raise an error because @tt{bar} has not been defined, even though + @tt{foo} refers to it in an @tech{optional pattern}.} + + + @item{uses the token name @racket[EOF]; the end-of-file token type is reserved + for internal use by @tt{brag}.} + + + @item{contains a rule that has no finite derivation. e.g. the following + program: + @nested[#:style 'code-inset + @verbatim|{ + #lang brag + infinite-a: "a" infinite-a + }| + ] + should raise an error because no finite sequence of tokens will satisfy + @tt{infinite-a}.} + + ] + +Otherwise, @tt{brag} should be fairly tolerant and permit even ambiguous +grammars. + +@subsection{Semantics} +@declare-exporting[brag/examples/nested-word-list] + +A program written in @litchar{#lang brag} produces a module that provides a few +bindings. The most important of these is @racket[parse]: + +@defproc[(parse [source any/c #f] + [token-source (or/c (sequenceof token) + (-> token))]) + syntax?]{ + + Parses the sequence of @tech{tokens} according to the rules in the grammar, using the + first rule as the start production. The parse must completely consume + @racket[token-source]. + + The @deftech{token source} can either be a sequence, or a 0-arity function that + produces @tech{tokens}. + + A @deftech{token} in @tt{brag} can be any of the following values: + @itemize[ + @item{a string} + @item{a symbol} + @item{an instance produced by @racket[token]} + @item{an instance produced by the token constructors of @racketmodname[br-parser-tools/lex]} + @item{an instance of @racketmodname[br-parser-tools/lex]'s @racket[position-token] whose + @racket[position-token-token] is a @tech{token}.} + ] + + A token whose type is either @racket[void] or @racket['EOF] terminates the + source. + + + If @racket[parse] succeeds, it will return a structured syntax object. The + structure of the syntax object follows the overall structure of the rules in + the BNF grammar. For each rule @racket[r] and its associated pattern @racket[p], + @racket[parse] generates a syntax object @racket[#'(r p-value)] where + @racket[p-value]'s structure follows a case analysis on @racket[p]: + + @itemize[ + @item{For implicit and explicit sequences of @tech{pattern}s @racket[p1], + @racket[p2], ..., the corresponding values, spliced into the + structure.} + @item{For terminals, the value of the token.} + @item{For @tech{rule identifier}s: the associated parse value for the rule.} + @item{For @tech{choice pattern}s: the associated parse value for one of the matching subpatterns.} + @item{For @tech{quantifed pattern}s and @tech{optional pattern}s: the corresponding values, spliced into the structure.} + ] + + Consequently, it's only the presence of @tech{rule identifier}s in a rule's + pattern that informs the parser to introduces nested structure into the syntax + object. + + + If the grammar is ambiguous, @tt{brag} will choose one of the possible parse results, though it doesn't guarantee which. + + + If the parse cannot be performed successfully, or if a token in the + @racket[token-source] uses a type that isn't mentioned in the grammar, then + @racket[parse] raises an instance of @racket[exn:fail:parsing].} + + +@defproc[(parse-to-datum [source any/c #f] + [token-source (or/c (sequenceof token) + (-> token))]) + list?]{ + Same as @racket[parse], but the result is converted into a plain datum. Useful for testing or debugging a parser. +} + + +@defform[#:id make-rule-parser + (make-rule-parser name)]{ + Constructs a parser for the @racket[name] of one of the non-terminals + in the grammar. + + For example, given the @tt{brag} program + @filepath{simple-arithmetic-grammar.rkt}: + @filebox["simple-arithmetic-grammar.rkt"]{ + @verbatim|{ + #lang brag + expr : term ('+' term)* + term : factor ('*' factor)* + factor : INT + }| + } + the following interaction shows how to extract a parser for @racket[term]s. + @interaction[#:eval my-eval + @eval:alts[(require "simple-arithmetic-grammar.rkt") + (require brag/examples/simple-arithmetic-grammar)] + (define term-parse (make-rule-parser term)) + (define tokens (list (token 'INT 3) + "*" + (token 'INT 4))) + (syntax->datum (parse tokens)) + (syntax->datum (term-parse tokens)) + + (define another-token-sequence + (list (token 'INT 1) "+" (token 'INT 2) + "*" (token 'INT 3))) + (syntax->datum (parse another-token-sequence)) + @code:comment{Note that term-parse will break on another-token-sequence} + @code:comment{as it does not know what to do with the "+"} + (term-parse another-token-sequence) + ] + +} + + +@defthing[all-token-types (setof symbol?)]{ + A set of all the token types used in a grammar. + + For example: + @interaction[#:eval my-eval + @eval:alts[(require "simple-arithmetic-grammar.rkt") + (require brag/examples/simple-arithmetic-grammar)] + all-token-types + ] + +} + + + +@section{Support API} + +@defmodule[brag/support] + +The @racketmodname[brag/support] module provides functions to interact with +@tt{brag} programs. The most useful is the @racket[token] function, which +produces tokens to be parsed. + +In addition to the exports shown below, the @racketmodname[brag/support] module also provides everything from @racketmodname[brag/support], and everything from @racketmodname[br-parser-tools/lex]. + + +@defproc[(token [type (or/c string? symbol?)] + [val any/c #f] + [#:line line (or/c positive-integer? #f) #f] + [#:column column (or/c natural-number? #f) #f] + [#:position position (or/c positive-integer? #f) #f] + [#:span span (or/c natural-number? #f) #f] + [#:skip? skip? boolean? #f] + ) + token-struct?]{ + Creates instances of @racket[token-struct]s. + + The syntax objects produced by a parse will inject the value @racket[val] in + place of the token name in the grammar. + + If @racket[#:skip?] is true, then the parser will skip over it during a + parse.} + + +@defstruct[token-struct ([type symbol?] + [val any/c] + [position (or/c positive-integer? #f)] + [line (or/c natural-number? #f)] + [column (or/c positive-integer? #f)] + [span (or/c natural-number? #f)] + [skip? boolean?]) + #:transparent]{ + The token structure type. + + Rather than directly using the @racket[token-struct] constructor, please use + the helper function @racket[token] to construct instances. +} + + + + +@defstruct[(exn:fail:parsing exn:fail) + ([message string?] + [continuation-marks continuation-mark-set?] + [srclocs (listof srcloc?)])]{ + The exception raised when parsing fails. + + @racket[exn:fail:parsing] implements Racket's @racket[prop:exn:srcloc] + property, so if this exception reaches DrRacket's default error handler, + DrRacket should highlight the offending locations in the source.} + + + +@defproc[(apply-tokenizer-maker [tokenizer-maker procedure?] + [source (or/c string? + input-port?)]) + list?]{ + Repeatedly apply @racket[tokenizer-maker] to @racket[source], gathering the resulting tokens into a list. @racket[source] can be a string or an input port. Useful for testing or debugging a tokenizer. +} + +@defproc[(apply-lexer [lexer procedure?] + [source (or/c string? + input-port?)]) + list?]{ + Repeatedly apply @racket[lexer] to @racket[source], gathering the resulting tokens into a list. @racket[source] can be a string or an input port. Useful for testing or debugging a lexer. +} + + +@defproc[(trim-ends [left-str string?] + [str string?] + [right-str string?]) + string?]{ + Remove @racket[left-str] from the left side of @racket[str], and @racket[right-str] from its right side. Intended as a helper function for @racket[from/to]. +} + + +@defform[(:* re ...)]{ + + Repetition of @racket[re] sequence 0 or more times.} + +@defform[(:+ re ...)]{ + + Repetition of @racket[re] sequence 1 or more times.} + +@defform[(:? re ...)]{ + + Zero or one occurrence of @racket[re] sequence.} + +@defform[(:= n re ...)]{ + + Exactly @racket[n] occurrences of @racket[re] sequence, where + @racket[n] must be a literal exact, non-negative number.} + +@defform[(:>= n re ...)]{ + + At least @racket[n] occurrences of @racket[re] sequence, where + @racket[n] must be a literal exact, non-negative number.} + +@defform[(:** n m re ...)]{ + + Between @racket[n] and @racket[m] (inclusive) occurrences of + @racket[re] sequence, where @racket[n] must be a literal exact, + non-negative number, and @racket[m] must be literally either + @racket[#f], @racket[+inf.0], or an exact, non-negative number; a + @racket[#f] value for @racket[m] is the same as @racket[+inf.0].} + +@defform[(:or re ...)]{ + + Same as @racket[(union re ...)].} + +@deftogether[( + @defform[(:: re ...)] + @defform[(:seq re ...)] + )]{ + + Both forms concatenate the @racket[re]s.} + +@defform[(:& re ...)]{ + + Intersects the @racket[re]s.} + +@defform[(:- re ...)]{ + + The set difference of the @racket[re]s.} + +@defform[(:~ re ...)]{ + + Character-set complement, which each @racket[re] must match exactly + one character.} + +@defform[(:/ char-or-string ...)]{ + + Character ranges, matching characters between successive pairs of + characters.} + +@defform[(from/to open close)]{ + + A string that is bounded by @racket[open] and @racket[close]. Matching is non-greedy (meaning, it stops at the first occurence of @racket[close]). The resulting lexeme includes @racket[open] and @racket[close]. To remove them, see @racket[trim-ends].} + +@defform[(from/stop-before open close)]{ + + Like @racket[from/to], a string that is bounded by @racket[open] and @racket[close], except that @racket[close] is not included in the resulting lexeme. Matching is non-greedy (meaning, it stops at the first occurence of @racket[close]).} + + +@close-eval[my-eval] diff --git a/brag/brag/cfg-parser/cfg-parser.rkt b/brag/brag/cfg-parser/cfg-parser.rkt new file mode 100755 index 0000000..1bc717b --- /dev/null +++ b/brag/brag/cfg-parser/cfg-parser.rkt @@ -0,0 +1,921 @@ +#lang racket/base +;; This module implements a parser form like the br-parser-tools's +;; `parser', except that it works on an arbitrary CFG (returning +;; the first sucecssful parse). + +;; I'm pretty sure that this is an implementation of Earley's +;; algorithm. + +;; To a first approximation, it's a backtracking parser. Alternative +;; for a non-terminal are computed in parallel, and multiple attempts +;; to compute the same result block until the first one completes. If +;; you get into deadlock, such as when trying to match +;; := +;; then it means that there's no successful parse, so everything +;; that's blocked fails. + +;; A cache holds the series of results for a particular non-terminal +;; at a particular starting location. (A series is used, instead of a +;; sinlge result, for backtracking.) Otherwise, the parser uses +;; backtracking search. Backtracking is implemented through explicit +;; success and failure continuations. Multiple results for a +;; particular nonterminal and location are kept only when they have +;; different lengths. (Otherwise, in the spirit of finding one +;; successful parse, only the first result is kept.) + +;; The br-parser-tools's `parse' is used to transform tokens in the +;; grammar to tokens specific to this parser. In other words, this +;; parser uses `parser' so that it doesn't have to know anything about +;; tokens. +;; + + + +(require br-parser-tools/yacc + br-parser-tools/lex) + +(require (for-syntax racket/base + syntax/boundmap + br-parser-tools/private-lex/token-syntax)) + +(provide cfg-parser) + +;; A raw token, wrapped so that we can recognize it: +(define-struct tok (name orig-name val start end)) + +;; Represents the thread scheduler: +(define-struct tasks (active active-back waits multi-waits cache progress?)) + +(define-for-syntax make-token-identifier-mapping make-hasheq) +(define-for-syntax token-identifier-mapping-get + (case-lambda + [(t tok) + (hash-ref t (syntax-e tok))] + [(t tok fail) + (hash-ref t (syntax-e tok) fail)])) +(define-for-syntax token-identifier-mapping-put! + (lambda (t tok v) + (hash-set! t (syntax-e tok) v))) +(define-for-syntax token-identifier-mapping-map + (lambda (t f) + (hash-map t f))) + +;; Used to calculate information on the grammar, such as whether +;; a particular non-terminal is "simple" instead of recursively defined. +(define-for-syntax (nt-fixpoint nts proc nt-ids patss) + (define (ormap-all val f as bs) + (cond + [(null? as) val] + [else (ormap-all (or (f (car as) (car bs)) val) + f + (cdr as) (cdr bs))])) + (let loop () + (when (ormap-all #f + (lambda (nt pats) + (let ([old (bound-identifier-mapping-get nts nt)]) + (let ([new (proc nt pats old)]) + (if (equal? old new) + #f + (begin + (bound-identifier-mapping-put! nts nt new) + #t))))) + nt-ids patss) + (loop)))) + +;; Tries parse-a followed by parse-b. If parse-a is not simple, +;; then after parse-a succeeds once, we parallelize parse-b +;; and trying a second result for parse-a. +(define (parse-and simple-a? parse-a parse-b + stream last-consumed-token depth end success-k fail-k + max-depth tasks) + (letrec ([mk-got-k + (lambda (success-k fail-k) + (lambda (val stream last-consumed-token depth max-depth tasks next1-k) + (if simple-a? + (parse-b val stream last-consumed-token depth end + (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks) + (parallel-or + (lambda (success-k fail-k max-depth tasks) + (parse-b val stream last-consumed-token depth end + success-k fail-k + max-depth tasks)) + (lambda (success-k fail-k max-depth tasks) + (next1-k (mk-got-k success-k fail-k) + fail-k max-depth tasks)) + success-k fail-k max-depth tasks))))] + [mk-got2-k + (lambda (success-k fail-k next1-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (success-k val stream last-consumed-token depth max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (next-k (mk-got2-k success-k fail-k next1-k) + (mk-fail2-k success-k fail-k next1-k) + max-depth tasks)))))] + [mk-fail2-k + (lambda (success-k fail-k next1-k) + (lambda (max-depth tasks) + (next1-k (mk-got-k success-k fail-k) + fail-k + max-depth + tasks)))]) + (parse-a stream last-consumed-token depth end + (mk-got-k success-k fail-k) + fail-k + max-depth tasks))) + +;; Parallel or for non-terminal alternatives +(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) + (parallel-or (lambda (success-k fail-k max-depth tasks) + (parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks)) + (lambda (success-k fail-k max-depth tasks) + (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)) + success-k fail-k max-depth tasks)) + +;; Generic parallel-or +(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) + (define answer-key (gensym)) + (letrec ([gota-k + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (report-answer answer-key + max-depth + tasks + (list val stream last-consumed-token depth next-k)))] + [faila-k + (lambda (max-depth tasks) + (report-answer answer-key + max-depth + tasks + null))]) + (let* ([tasks (queue-task + tasks + (lambda (max-depth tasks) + (parse-a gota-k + faila-k + max-depth tasks)))] + [tasks (queue-task + tasks + (lambda (max-depth tasks) + (parse-b gota-k + faila-k + max-depth tasks)))] + [queue-next (lambda (next-k tasks) + (queue-task tasks + (lambda (max-depth tasks) + (next-k gota-k + faila-k + max-depth tasks))))]) + (letrec ([mk-got-one + (lambda (immediate-next? get-nth success-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (let ([tasks (if immediate-next? + (queue-next next-k tasks) + tasks)]) + (success-k val stream last-consumed-token depth max-depth + tasks + (lambda (success-k fail-k max-depth tasks) + (let ([tasks (if immediate-next? + tasks + (queue-next next-k tasks))]) + (get-nth max-depth tasks success-k fail-k)))))))] + [get-first + (lambda (max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #t get-first success-k) + (lambda (max-depth tasks) + (get-second max-depth tasks success-k fail-k)) + #f))] + [get-second + (lambda (max-depth tasks success-k fail-k) + (wait-for-answer #f max-depth tasks answer-key + (mk-got-one #f get-second success-k) + fail-k #f))]) + (get-first max-depth tasks success-k fail-k))))) + +;; Non-terminal alternatives where the first is "simple" can be done +;; sequentially, which is simpler +(define (parse-or parse-a parse-b + stream last-consumed-token depth end success-k fail-k max-depth tasks) + (letrec ([mk-got-k + (lambda (success-k fail-k) + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + (success-k val stream last-consumed-token depth + max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (next-k (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks)))))] + [mk-fail-k + (lambda (success-k fail-k) + (lambda (max-depth tasks) + (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))]) + (parse-a stream last-consumed-token depth end + (mk-got-k success-k fail-k) + (mk-fail-k success-k fail-k) + max-depth tasks))) + +;; Starts a thread +(define queue-task + (lambda (tasks t [progress? #t]) + (make-tasks (tasks-active tasks) + (cons t (tasks-active-back tasks)) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + (or progress? (tasks-progress? tasks))))) + +;; Reports an answer to a waiting thread: +(define (report-answer answer-key max-depth tasks val) + (let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))]) + (if v + (let ([tasks (make-tasks (cons (v val) + (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (hash-remove! (tasks-waits tasks) answer-key) + (swap-task max-depth tasks)) + ;; We have an answer ready too fast; wait + (swap-task max-depth + (queue-task tasks + (lambda (max-depth tasks) + (report-answer answer-key max-depth tasks val)) + #f))))) + +;; Reports an answer to multiple waiting threads: +(define (report-answer-all answer-key max-depth tasks val k) + (let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))]) + (hash-remove! (tasks-multi-waits tasks) answer-key) + (let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) + (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (k max-depth tasks)))) + +;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise +;; there might be many. Use wither #t or #f (and `report-answer' or +;; `report-answer-all', resptively) consistently for a particular answer key. +(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) + (let ([wait (lambda (val) + (lambda (max-depth tasks) + (if val + (if (null? val) + (fail-k max-depth tasks) + (let-values ([(val stream last-consumed-token depth next-k) (apply values val)]) + (success-k val stream last-consumed-token depth max-depth tasks next-k))) + (deadlock-k max-depth tasks))))]) + (if multi? + (hash-set! (tasks-multi-waits tasks) answer-key + (cons wait (hash-ref (tasks-multi-waits tasks) answer-key + (lambda () null)))) + (hash-set! (tasks-waits tasks) answer-key wait)) + (let ([tasks (make-tasks (tasks-active tasks) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #t)]) + (swap-task max-depth tasks)))) + +;; Swap thread +(define (swap-task max-depth tasks) + ;; Swap in first active: + (if (null? (tasks-active tasks)) + (if (tasks-progress? tasks) + (swap-task max-depth + (make-tasks (reverse (tasks-active-back tasks)) + null + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + #f)) + ;; No progress, so issue failure for all multi-waits + (if (zero? (hash-count (tasks-multi-waits tasks))) + (error 'swap-task "Deadlock") + (swap-task max-depth + (make-tasks (apply + append + (hash-map (tasks-multi-waits tasks) + (lambda (k l) + (map (lambda (v) (v #f)) l)))) + (tasks-active-back tasks) + (tasks-waits tasks) + (make-hasheq) + (tasks-cache tasks) + #t)))) + (let ([t (car (tasks-active tasks))] + [tasks (make-tasks (cdr (tasks-active tasks)) + (tasks-active-back tasks) + (tasks-waits tasks) + (tasks-multi-waits tasks) + (tasks-cache tasks) + (tasks-progress? tasks))]) + (t max-depth tasks)))) + +;; Finds the symbolic representative of a token class +(define-for-syntax (map-token toks tok) + (car (token-identifier-mapping-get toks tok))) + +(define no-pos-val (make-position #f #f #f)) +(define-for-syntax no-pos + (let ([npv ((syntax-local-certifier) #'no-pos-val)]) + (lambda (stx) npv))) +(define-for-syntax at-tok-pos + (lambda (sel expr) + (lambda (stx) + #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))))) + +;; Builds a matcher for a particular alternative +(define-for-syntax (build-match nts toks pat handle $ctx) + (let loop ([pat pat] + [pos 1]) + (if (null? pat) + #`(success-k #,handle stream last-consumed-token depth max-depth tasks + (lambda (success-k fail-k max-depth tasks) + (fail-k max-depth tasks))) + (let ([id (datum->syntax (car pat) + (string->symbol (format "$~a" pos)))] + [id-start-pos (datum->syntax (car pat) + (string->symbol (format "$~a-start-pos" pos)))] + [id-end-pos (datum->syntax (car pat) + (string->symbol (format "$~a-end-pos" pos)))] + [n-end-pos (and (null? (cdr pat)) + (datum->syntax (car pat) '$n-end-pos))]) + (cond + [(bound-identifier-mapping-get nts (car pat) (lambda () #f)) + ;; Match non-termimal + #`(parse-and + ;; First part is simple? (If so, we don't have to parallelize the `and'.) + #,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) + (or (not l) + (andmap values (caddr l)))) + #,(car pat) + (let ([original-stream stream]) + (lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks) + (let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream) + tok-end + tok-start) + #'(if (eq? original-stream stream) + last-consumed-token + (and (pair? original-stream) + (car original-stream))))] + [#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)] + #,@(if n-end-pos + #`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]) + null)) + #,(loop (cdr pat) (add1 pos))))) + stream last-consumed-token depth + #,(let ([cnt (apply + + (map (lambda (item) + (cond + [(bound-identifier-mapping-get nts item (lambda () #f)) + => (lambda (l) (car l))] + [else 1])) + (cdr pat)))]) + #`(- end #,cnt)) + success-k fail-k max-depth tasks)] + [else + ;; Match token + (let ([tok-id (map-token toks (car pat))]) + #`(if (and (pair? stream) + (eq? '#,tok-id (tok-name (car stream)))) + (let* ([stream-a (car stream)] + [#,id (tok-val stream-a)] + [last-consumed-token (car stream)] + [stream (cdr stream)] + [depth (add1 depth)]) + (let ([max-depth (max max-depth depth)]) + (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)] + [#,id-end-pos (at-tok-pos #'tok-end #'stream-a)] + #,@(if n-end-pos + #`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)]) + null)) + #,(loop (cdr pat) (add1 pos))))) + (fail-k max-depth tasks)))]))))) + +;; Starts parsing to match a non-terminal. There's a minor +;; optimization that checks for known starting tokens. Otherwise, +;; use the cache, block if someone else is already trying the match, +;; and cache the result if it's computed. +;; The cache maps nontermial+startingpos+iteration to a result, where +;; the iteration is 0 for the first match attempt, 1 for the second, +;; etc. +(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k) + (if (and (positive? min-cnt) + (pair? stream) + (not (memq (tok-name (car stream)) init-tokens))) + ;; No such leading token; give up + (fail-k max-depth tasks) + ;; Run pattern + (let loop ([n 0] + [success-k success-k] + [fail-k fail-k] + [max-depth max-depth] + [tasks tasks] + [k k]) + (let ([answer-key (gensym)] + [table-key (vector key depth n)] + [old-depth depth] + [old-stream stream]) + #;(printf "Loop ~a\n" table-key) + (cond + [(hash-ref (tasks-cache tasks) table-key (lambda () #f)) + => (lambda (result) + #;(printf "Reuse ~a\n" table-key) + (result success-k fail-k max-depth tasks))] + [else + #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + #;(printf "Wait ~a ~a\n" table-key answer-key) + (wait-for-answer #t max-depth tasks answer-key success-k fail-k + (lambda (max-depth tasks) + #;(printf "Deadlock ~a ~a\n" table-key answer-key) + (fail-k max-depth tasks))))) + (let result-loop ([max-depth max-depth][tasks tasks][k k]) + (letrec ([orig-stream stream] + [new-got-k + (lambda (val stream last-consumed-token depth max-depth tasks next-k) + ;; Check whether we already have a result that consumed the same amount: + (let ([result-key (vector #f key old-depth depth)]) + (cond + [(hash-ref (tasks-cache tasks) result-key (lambda () #f)) + ;; Go for the next-result + (result-loop max-depth + tasks + (lambda (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks)))] + [else + #;(printf "Success ~a ~a\n" table-key + (map tok-name (let loop ([d old-depth][s old-stream]) + (if (= d depth) + null + (cons (car s) (loop (add1 d) (cdr s))))))) + (let ([next-k (lambda (success-k fail-k max-depth tasks) + (loop (add1 n) + success-k + fail-k + max-depth + tasks + (lambda (end max-depth tasks success-k fail-k) + (next-k success-k fail-k max-depth tasks))))]) + (hash-set! (tasks-cache tasks) result-key #t) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + (success-k val stream last-consumed-token depth max-depth tasks next-k))) + (report-answer-all answer-key + max-depth + tasks + (list val stream last-consumed-token depth next-k) + (lambda (max-depth tasks) + (success-k val stream last-consumed-token depth max-depth tasks next-k))))])))] + [new-fail-k + (lambda (max-depth tasks) + #;(printf "Failure ~a\n" table-key) + (hash-set! (tasks-cache tasks) table-key + (lambda (success-k fail-k max-depth tasks) + (fail-k max-depth tasks))) + (report-answer-all answer-key + max-depth + tasks + null + (lambda (max-depth tasks) + (fail-k max-depth tasks))))]) + (k end max-depth tasks new-got-k new-fail-k)))]))))) + +(define-syntax (cfg-parser stx) + (syntax-case stx () + [(_ clause ...) + (let ([clauses (syntax->list #'(clause ...))]) + (let-values ([(start grammar cfg-error parser-clauses src-pos?) + (let ([all-toks (apply + append + (map (lambda (clause) + (syntax-case clause (tokens) + [(tokens t ...) + (apply + append + (map (lambda (t) + (let ([v (syntax-local-value t (lambda () #f))]) + (cond + [(terminals-def? v) + (map (lambda (v) + (cons v #f)) + (syntax->list (terminals-def-t v)))] + [(e-terminals-def? v) + (map (lambda (v) + (cons v #t)) + (syntax->list (e-terminals-def-t v)))] + [else null]))) + (syntax->list #'(t ...))))] + [_else null])) + clauses))] + [all-end-toks (apply + append + (map (lambda (clause) + (syntax-case clause (end) + [(end t ...) + (syntax->list #'(t ...))] + [_else null])) + clauses))]) + (let loop ([clauses clauses] + [cfg-start #f] + [cfg-grammar #f] + [cfg-error #f] + [src-pos? #f] + [parser-clauses null]) + (if (null? clauses) + (values cfg-start + cfg-grammar + cfg-error + (reverse parser-clauses) + src-pos?) + (syntax-case (car clauses) (start error grammar src-pos) + [(start tok) + (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] + [(error expr) + (loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] + [(grammar [nt [pat handle0 handle ...] ...] ...) + (let ([nts (make-bound-identifier-mapping)] + [toks (make-token-identifier-mapping)] + [end-toks (make-token-identifier-mapping)] + [nt-ids (syntax->list #'(nt ...))] + [patss (map (lambda (stx) + (map syntax->list (syntax->list stx))) + (syntax->list #'((pat ...) ...)))]) + (for-each (lambda (nt) + (bound-identifier-mapping-put! nts nt (list 0))) + nt-ids) + (for-each (lambda (t) + (token-identifier-mapping-put! end-toks t #t)) + all-end-toks) + (for-each (lambda (t) + (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) + (let ([id (gensym (syntax-e (car t)))]) + (token-identifier-mapping-put! toks (car t) + (cons id (cdr t)))))) + all-toks) + ;; Compute min max size for each non-term: + (nt-fixpoint + nts + (lambda (nt pats old-list) + (let ([new-cnt + (apply + min + (map (lambda (pat) + (apply + + + (map (lambda (elem) + (car + (bound-identifier-mapping-get nts + elem + (lambda () (list 1))))) + pat))) + pats))]) + (if (new-cnt . > . (car old-list)) + (cons new-cnt (cdr old-list)) + old-list))) + nt-ids patss) + ;; Compute set of toks that must appear at the beginning + ;; for a non-terminal + (nt-fixpoint + nts + (lambda (nt pats old-list) + (let ([new-list + (apply + append + (map (lambda (pat) + (let loop ([pat pat]) + (if (pair? pat) + (let ([l (bound-identifier-mapping-get + nts + (car pat) + (lambda () + (list 1 (map-token toks (car pat)))))]) + ;; If the non-terminal can match 0 things, + ;; then it might match something from the + ;; next pattern element. Otherwise, it must + ;; match the first element: + (if (zero? (car l)) + (append (cdr l) (loop (cdr pat))) + (cdr l))) + null))) + pats))]) + (let ([new (filter (lambda (id) + (andmap (lambda (id2) + (not (eq? id id2))) + (cdr old-list))) + new-list)]) + (if (pair? new) + ;; Drop dups in new list: + (let ([new (let loop ([new new]) + (if (null? (cdr new)) + new + (if (ormap (lambda (id) + (eq? (car new) id)) + (cdr new)) + (loop (cdr new)) + (cons (car new) (loop (cdr new))))))]) + (cons (car old-list) (append new (cdr old-list)))) + old-list)))) + nt-ids patss) + ;; Determine left-recursive clauses: + (for-each (lambda (nt pats) + (let ([l (bound-identifier-mapping-get nts nt)]) + (bound-identifier-mapping-put! nts nt (list (car l) + (cdr l) + (map (lambda (x) #f) pats))))) + nt-ids patss) + (nt-fixpoint + nts + (lambda (nt pats old-list) + (list (car old-list) + (cadr old-list) + (map (lambda (pat simple?) + (or simple? + (let ([l (map (lambda (elem) + (bound-identifier-mapping-get + nts + elem + (lambda () #f))) + pat)]) + (andmap (lambda (i) + (or (not i) + (andmap values (caddr i)))) + l)))) + pats (caddr old-list)))) + nt-ids patss) + ;; Build a definition for each non-term: + (loop (cdr clauses) + cfg-start + (map (lambda (nt pats handles $ctxs) + (define info (bound-identifier-mapping-get nts nt)) + (list nt + #`(let ([key (gensym '#,nt)]) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + (parse-nt/share + key #,(car info) '#,(cadr info) stream last-consumed-token depth end + max-depth tasks + success-k fail-k + (lambda (end max-depth tasks success-k fail-k) + #,(let loop ([pats pats] + [handles (syntax->list handles)] + [$ctxs (syntax->list $ctxs)] + [simple?s (caddr info)]) + (if (null? pats) + #'(fail-k max-depth tasks) + #`(#,(if (or (null? (cdr pats)) + (car simple?s)) + #'parse-or + #'parse-parallel-or) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + #,(build-match nts + toks + (car pats) + (car handles) + (car $ctxs))) + (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) + #,(loop (cdr pats) + (cdr handles) + (cdr $ctxs) + (cdr simple?s))) + stream last-consumed-token depth end success-k fail-k max-depth tasks))))))))) + nt-ids + patss + (syntax->list #'(((begin handle0 handle ...) ...) ...)) + (syntax->list #'((handle0 ...) ...))) + cfg-error + src-pos? + (list* + (with-syntax ([((tok tok-id . $e) ...) + (token-identifier-mapping-map toks + (lambda (k v) + (list* k + (car v) + (if (cdr v) + #f + '$1))))] + [(pos ...) + (if src-pos? + #'($1-start-pos $1-end-pos) + #'(#f #f))]) + #`(grammar (start [() null] + [(atok start) (cons $1 $2)]) + (atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) + #`(start start) + parser-clauses)))] + [(grammar . _) + (raise-syntax-error + #f + "bad grammar clause" + stx + (car clauses))] + [(src-pos) + (loop (cdr clauses) + cfg-start + cfg-grammar + cfg-error + #t + (cons (car clauses) parser-clauses))] + [_else + (loop (cdr clauses) + cfg-start + cfg-grammar + cfg-error + src-pos? + (cons (car clauses) parser-clauses))]))))]) + #`(let ([orig-parse (parser + [error (lambda (a b c) + (error 'cfg-parser "unexpected ~a token: ~a" b c))] + . #,parser-clauses)] + [error-proc #,cfg-error]) + (letrec #,grammar + (lambda (get-tok) + (let ([tok-list (orig-parse get-tok)]) + (letrec ([success-k + (lambda (val stream last-consumed-token depth max-depth tasks next) + (if (null? stream) + val + (next success-k fail-k max-depth tasks)))] + [fail-k (lambda (max-depth tasks) + (cond + [(null? tok-list) + (if error-proc + (error-proc #t + 'no-tokens + #f + (make-position #f #f #f) + (make-position #f #f #f)) + (error + 'cfg-parse + "no tokens"))] + [else + (let ([bad-tok (list-ref tok-list + (min (sub1 (length tok-list)) + max-depth))]) + (if error-proc + (error-proc #t + (tok-orig-name bad-tok) + (tok-val bad-tok) + (tok-start bad-tok) + (tok-end bad-tok)) + (error + 'cfg-parse + "failed at ~a" + (tok-val bad-tok))))]))]) + (#,start tok-list + ;; we simulate a token at the very beginning with zero width + ;; for use with the position-generating code (*-start-pos, *-end-pos). + (if (null? tok-list) + (tok #f #f #f + (position 1 + #,(if src-pos? #'1 #'#f) + #,(if src-pos? #'0 #'#f)) + (position 1 + #,(if src-pos? #'1 #'#f) + #,(if src-pos? #'0 #'#f))) + (tok (tok-name (car tok-list)) + (tok-orig-name (car tok-list)) + (tok-val (car tok-list)) + (tok-start (car tok-list)) + (tok-start (car tok-list)))) + 0 + (length tok-list) + success-k + fail-k + 0 + (make-tasks null null + (make-hasheq) (make-hasheq) + (make-hash) #t)))))))))])) + + +(module* test racket/base + (require (submod "..") + br-parser-tools/lex + racket/block + rackunit) + + ;; Test: parsing regular expressions. + ;; Here is a test case on locations: + (block + (define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF)) + (define lex (lexer-src-pos ["|" (token-OR lexeme)] + ["^" (token-ANCHOR lexeme)] + ["*" (token-STAR lexeme)] + [(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)] + ["(" (token-LPAREN lexeme)] + [")" (token-RPAREN lexeme)] + [whitespace (return-without-pos (lex input-port))] + [(eof) (token-EOF 'eof)])) + (define -parse (cfg-parser + (tokens regexp-tokens) + (start top) + (end EOF) + (src-pos) + (grammar [top [(maybe-anchor regexp) + (cond [$1 + `(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] + [else + `(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]] + [maybe-anchor [(ANCHOR) #t] + [() #f]] + [regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] + [(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] + [(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] + [(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]]))) + (define (pos->sexp pos) + (position-offset pos)) + + (define (parse s) + (define ip (open-input-string s)) + (port-count-lines! ip) + (-parse (lambda () (lex ip)))) + + (check-equal? (parse "abc") + '(unanchored (lit "abc" 1 4) 1 4)) + (check-equal? (parse "a | (b*) | c") + '(unanchored (or (or (lit "a" 1 2) + (group (star (lit "b" 6 7) 6 8) 5 9) + 1 9) + (lit "c" 12 13) + 1 13) + 1 13))) + + + + + + ;; Tests used during development + (define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF)) + + (define lex + (lexer + ["+" (token-PLUS '+)] + ["-" (token-MINUS '-)] + ["*" (token-STAR '*)] + ["|" (token-BAR '||)] + [":" (token-COLON '|:|)] + [whitespace (lex input-port)] + [(eof) (token-EOF 'eof)])) + + (define parse + (cfg-parser + (tokens non-terminals) + (start ) + (end EOF) + (error (lambda (a b stx) + (error 'parse "failed at ~s" stx))) + (grammar [ [(PLUS) "plus"] + [( BAR ) (list $1 $2 $3)] + [( COLON) (list $1)]] + [ [(MINUS) "minus"] + [( STAR) (cons $1 $2)]] + [ [( MINUS) "yes"]] + [ [(PLUS) 'plus] + [(MINUS) 'minus]] + [ [() '0] + [( PLUS) (add1 $1)] + [( PLUS) (add1 $1)]]))) + + (let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**" + #;"+*|+**|-" #;"-|-*|-|-*" + #;"-|-*|-|-**|-|-*|-|-**" + "-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-*** + |-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****| + -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****" + ;; This one fails: + #;"+*")]) + (check-equal? (parse (lambda () (lex p))) + '((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *) + || + (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *)) + . + *) + || + (((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *) + || + (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) + || + (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) + . + *)) + . + *))))) diff --git a/brag/brag/cfg-parser/compiled/cfg-parser_rkt.dep b/brag/brag/cfg-parser/compiled/cfg-parser_rkt.dep new file mode 100644 index 0000000..01a846a --- /dev/null +++ b/brag/brag/cfg-parser/compiled/cfg-parser_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("da5744da22ed2aa3d983ac0392f263db7c827a39" . "f4e5b37aa4da6827509a70806f182e5614e3fc7a") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"block.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (collects #"syntax" #"boundmap.rkt")) diff --git a/brag/brag/cfg-parser/compiled/cfg-parser_rkt.zo b/brag/brag/cfg-parser/compiled/cfg-parser_rkt.zo new file mode 100644 index 0000000..cbb4a43 Binary files /dev/null and b/brag/brag/cfg-parser/compiled/cfg-parser_rkt.zo differ diff --git a/brag/brag/cfg-parser/compiled/drracket/errortrace/cfg-parser_rkt.dep b/brag/brag/cfg-parser/compiled/drracket/errortrace/cfg-parser_rkt.dep new file mode 100644 index 0000000..b053327 --- /dev/null +++ b/brag/brag/cfg-parser/compiled/drracket/errortrace/cfg-parser_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("da5744da22ed2aa3d983ac0392f263db7c827a39" . "fbbfd010bdbd406145bca0e19a6d9c5fc91eef4c") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"block.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (collects #"syntax" #"boundmap.rkt")) diff --git a/brag/brag/cfg-parser/compiled/drracket/errortrace/cfg-parser_rkt.zo b/brag/brag/cfg-parser/compiled/drracket/errortrace/cfg-parser_rkt.zo new file mode 100644 index 0000000..6168742 Binary files /dev/null and b/brag/brag/cfg-parser/compiled/drracket/errortrace/cfg-parser_rkt.zo differ diff --git a/brag/brag/codegen/codegen.rkt b/brag/brag/codegen/codegen.rkt new file mode 100755 index 0000000..f8f76cd --- /dev/null +++ b/brag/brag/codegen/codegen.rkt @@ -0,0 +1,448 @@ +#lang racket/base + +(require (for-template racket/base) + racket/list + racket/set + racket/syntax + syntax/srcloc + brag/rules/stx-types + "flatten.rkt" + syntax/id-table + (prefix-in sat: "satisfaction.rkt") + (prefix-in support: brag/support) + (prefix-in stxparse: syntax/parse)) + +(provide rules-codegen) + + +;; Generates the body of the module. +;; FIXME: abstract this so we can just call (rules ...) without +;; generating the whole module body. +(define (rules-codegen stx + #:parser-provider-module [parser-provider-module 'br-parser-tools/yacc] + #:parser-provider-form [parser-provider-form 'parser]) + (syntax-case stx () + [(_ r ...) + (begin + ;; (listof stx) + (define rules (syntax->list #'(r ...))) + + (when (empty? rules) + (raise-syntax-error 'brag + (format "The grammar does not appear to have any rules") + stx)) + + (check-all-rules-defined! rules) + (check-all-rules-no-duplicates! rules) + (check-all-rules-satisfiable! rules) + + ;; We flatten the rules so we can use the yacc-style ruleset that br-parser-tools + ;; supports. + (define flattened-rules (flatten-rules rules)) + + (define generated-rule-codes (map flat-rule->yacc-rule flattened-rules)) + + ;; The first rule, by default, is the start rule. + (define rule-ids (for/list ([a-rule (in-list rules)]) + (rule-id a-rule))) + (define start-id (first rule-ids)) + + + (define-values (implicit-tokens ;; (listof identifier) + explicit-tokens) ;; (listof identifier) + (rules-collect-token-types rules)) + + ;; (listof symbol) + (define implicit-token-types + (map string->symbol + (set->list (list->set (map syntax-e implicit-tokens))))) + + ;; (listof symbol) + (define explicit-token-types + (set->list (list->set (map syntax-e explicit-tokens)))) + + ;; (listof symbol) + (define token-types + (set->list (list->set (append (map (lambda (x) (string->symbol (syntax-e x))) + implicit-tokens) + (map syntax-e explicit-tokens))))) + + (with-syntax ([start-id start-id] + + [(token-type ...) token-types] + + [(token-type-constructor ...) + (map (lambda (x) (string->symbol (format "token-~a" x))) + token-types)] + + [(explicit-token-types ...) explicit-token-types] + [(implicit-token-types ...) implicit-token-types] + [(implicit-token-types-str ...) (map symbol->string implicit-token-types)] + [(implicit-token-type-constructor ...) + (map (lambda (x) (string->symbol (format "token-~a" x))) + implicit-token-types)] + [generated-grammar #`(grammar #,@generated-rule-codes)] + [parser-module parser-provider-module] + [parser-form parser-provider-form]) + (quasisyntax/loc stx + (begin + (require br-parser-tools/lex + parser-module + brag/codegen/runtime + brag/support + brag/private/internal-support + racket/set + (for-syntax syntax/parse racket/base)) + + (provide parse + make-rule-parser + all-token-types + #;current-source + #;current-parser-error-handler + #;current-tokenizer-error-handler + #;[struct-out exn:fail:parsing] + ) + + (define-tokens enumerated-tokens (token-type ...)) + + ;; all-token-types lists all the tokens (except for EOF) + (define all-token-types + (set-remove (set 'token-type ...) 'EOF)) + + ;; For internal use by the permissive tokenizer only: + (define all-tokens-hash/mutable + (make-hash (list ;; Note: we also allow the eof object here, to make + ;; the permissive tokenizer even nicer to work with. + (cons eof token-EOF) + (cons 'token-type token-type-constructor) ...))) + + + #;(define default-lex/1 + (lexer-src-pos [implicit-token-types-str + (token 'implicit-token-types lexeme)] + ... + [(eof) (token eof)])) + + (define-syntax (make-rule-parser stx-2) + (syntax-parse stx-2 + [(_ start-rule:id) + (begin + ;; HACK HACK HACK + ;; The cfg-parser depends on the start-rule provided in (start ...) to have the same + ;; context as the rest of this body, so I need to hack this. I don't like this, but + ;; I don't know what else to do. Hence recolored-start-rule. + (unless (member (syntax-e #'start-rule) + '#,(map syntax-e rule-ids)) + (raise-syntax-error #f + (format "Rule ~a is not defined in the grammar" (syntax-e #'start-rule)) + stx-2)) + + (define recolored-start-rule (datum->syntax (syntax #,stx) (syntax-e #'start-rule))) + #`(let ([THE-GRAMMAR (parser-form (tokens enumerated-tokens) + (src-pos) + (start #,recolored-start-rule) + (end EOF) + (error THE-ERROR-HANDLER) + generated-grammar)]) + (case-lambda [(tokenizer) + (define next-token + (make-permissive-tokenizer tokenizer all-tokens-hash/mutable)) + (THE-GRAMMAR next-token)] + [(source tokenizer) + (parameterize ([current-source source]) + (parse tokenizer))])))])) + + (define parse (make-rule-parser start-id)) + (provide parse-to-datum parse-tree) + + (define (parse-to-datum x) + (let loop ([x (syntax->datum (parse x))]) + (cond + [(list? x) (map loop x)] + [(char? x) (string x)] + [else x]))) + + (define parse-tree parse-to-datum)))))])) + + +;; Given a flattened rule, returns a syntax for the code that +;; preserves as much source location as possible. +;; +;; Each rule is defined to return a list with the following structure: +;; +;; stx :== (name (U tokens rule-stx) ...) +;; +(define (flat-rule->yacc-rule a-flat-rule) + (syntax-case a-flat-rule () + [(rule-type origin name clauses ...) + (begin + (define translated-clauses + (map (lambda (clause) (translate-clause clause #'name #'origin)) + (syntax->list #'(clauses ...)))) + (with-syntax ([(translated-clause ...) translated-clauses]) + #`[name translated-clause ...]))])) + + + +;; translates a single primitive rule clause. +;; A clause is a simple list of ids, lit, vals, and inferred-id elements. +;; The action taken depends on the pattern type. +(define (translate-clause a-clause rule-name/false origin) + (define translated-patterns + (let loop ([primitive-patterns (syntax->list a-clause)]) + (cond + [(empty? primitive-patterns) + '()] + [else + (cons (syntax-case (first primitive-patterns) (id lit token inferred-id) + [(id val) + #'val] + [(lit val) + (datum->syntax #f (string->symbol (syntax-e #'val)) #'val)] + [(token val) + #'val] + [(inferred-id val reason) + #'val]) + (loop (rest primitive-patterns)))]))) + + (define translated-actions + (for/list ([translated-pattern (in-list translated-patterns)] + [primitive-pattern (syntax->list a-clause)] + [pos (in-naturals 1)]) + (if (eq? (syntax-property primitive-pattern 'hide) 'hide) + #'null + (with-syntax ([$X + (format-id translated-pattern "$~a" pos)] + [$X-start-pos + (format-id translated-pattern "$~a-start-pos" pos)] + [$X-end-pos + (format-id translated-pattern "$~a-end-pos" pos)]) + (syntax-case primitive-pattern (id lit token inferred-id) + + ;; When a rule usage is inferred, the value of $X is a syntax object + ;; whose head is the name of the inferred rule . We strip that out, + ;; leaving the residue to be absorbed. + [(inferred-id val reason) + #'(syntax-case $X () + [(inferred-rule-name . rest) + (syntax->list #'rest)])] + [(id val) + ;; at this point, the 'hide property is either #f or "splice" + ;; ('hide value is handled at the top of this conditional + ;; we need to use boolean because a symbol is treated as an identifier. + ;; also we'll separate it into its own property for clarity and test for it in "runtime.rkt" + #`(list (syntax-property $X 'splice-rh-id #,(and (syntax-property primitive-pattern 'hide) #t)))] + [(lit val) + #'(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))] + [(token val) + #'(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))]))))) + + (define whole-rule-loc + (if (empty? translated-patterns) + #'(list (current-source) #f #f #f #f) + (with-syntax ([$1-start-pos (datum->syntax (first translated-patterns) '$1-start-pos)] + [$n-end-pos (format-id (last translated-patterns) "$~a-end-pos" (length translated-patterns))]) + #`(positions->srcloc $1-start-pos $n-end-pos)))) + + ;; move 'hide-or-splice-lhs-id property into function because name is datum-ized + (with-syntax ([(translated-pattern ...) translated-patterns] + [(translated-action ...) translated-actions]) + #`[(translated-pattern ...) + (rule-components->syntax '#,rule-name/false translated-action ... + #:srcloc #,whole-rule-loc + #:hide-or-splice? #,(syntax-property rule-name/false 'hide-or-splice-lhs-id))])) + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; collect-token-types: (listof rule-syntax) -> (values (listof identifier) (listof identifier)) +;; +;; Given a rule, automatically derive the list of implicit and +;; explicit token types we need to generate. +;; +;; Note: EOF is reserved, and will always be included in the list +;; of explicit token types, though the user is not allow to express it themselves. +(define (rules-collect-token-types rules) + (define-values (implicit explicit) + (for/fold ([implicit '()] + [explicit (list (datum->syntax (first rules) 'EOF))]) + ([r (in-list rules)]) + (rule-collect-token-types r implicit explicit))) + (values (reverse implicit) (reverse explicit))) + +(define (rule-collect-token-types a-rule implicit explicit) + (syntax-case a-rule (rule) + [(rule id a-pattern) + (pattern-collect-implicit-token-types #'a-pattern implicit explicit)])) + +(define (pattern-collect-implicit-token-types a-pattern implicit explicit) + (let loop ([a-pattern a-pattern] + [implicit implicit] + [explicit explicit]) + (syntax-case a-pattern (id lit token choice repeat maybe seq) + [(id val) + (values implicit explicit)] + [(lit val) + (values (cons #'val implicit) explicit)] + [(token val) + (begin + (when (eq? (syntax-e #'val) 'EOF) + (raise-syntax-error #f "Token EOF is reserved and can not be used in a grammar" #'val)) + (values implicit (cons #'val explicit)))] + [(choice vals ...) + (for/fold ([implicit implicit] + [explicit explicit]) + ([v (in-list (syntax->list #'(vals ...)))]) + (loop v implicit explicit))] + [(repeat min val) + (loop #'val implicit explicit)] + [(maybe val) + (loop #'val implicit explicit)] + [(seq vals ...) + (for/fold ([implicit implicit] + [explicit explicit]) + ([v (in-list (syntax->list #'(vals ...)))]) + (loop v implicit explicit))]))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; rule-id: rule -> identifier-stx +;; Get the binding id of a rule. +(define (rule-id a-rule) + (syntax-case a-rule (rule) + [(rule id a-pattern) + #'id])) + +(define (rule-pattern a-rule) + (syntax-case a-rule (rule) + [(rule id a-pattern) + #'a-pattern])) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; check-all-rules-defined!: (listof rule-stx) -> void +(define (check-all-rules-defined! rules) + (define table (make-free-id-table)) + ;; Pass one: collect all the defined rule names. + (for ([a-rule (in-list rules)]) + (free-id-table-set! table (rule-id a-rule) #t)) + ;; Pass two: check each referenced id, and make sure it's been defined. + (for ([a-rule (in-list rules)]) + (for ([referenced-id (in-list (rule-collect-used-ids a-rule))]) + (unless (free-id-table-ref table referenced-id (lambda () #f)) + (raise-syntax-error #f (format "Rule ~a has no definition" (syntax-e referenced-id)) + referenced-id))))) + +;; check-all-rules-no-duplicates!: (listof rule-stx) -> void +(define (check-all-rules-no-duplicates! rules) + (define table (make-free-id-table)) + ;; Pass one: collect all the defined rule names. + (for ([a-rule (in-list rules)]) + (define maybe-other-rule-id (free-id-table-ref table (rule-id a-rule) (lambda () #f))) + (when maybe-other-rule-id + (raise-syntax-error #f (format "Rule ~a has a duplicate definition" (syntax-e (rule-id a-rule))) + (rule-id a-rule) + #f + (list (rule-id a-rule) maybe-other-rule-id))) + (free-id-table-set! table (rule-id a-rule) (rule-id a-rule)))) + + + +;; rule-collect-used-ids: rule-stx -> (listof identifier) +;; Given a rule, extracts a list of identifiers +(define (rule-collect-used-ids a-rule) + (syntax-case a-rule (rule) + [(rule id a-pattern) + (pattern-collect-used-ids #'a-pattern '())])) + +;; pattern-collect-used-ids: pattern-stx (listof identifier) -> (listof identifier) +;; Returns a flat list of rule identifiers referenced in the pattern. +(define (pattern-collect-used-ids a-pattern acc) + (let loop ([a-pattern a-pattern] + [acc acc]) + (syntax-case a-pattern (id lit token choice repeat maybe seq) + [(id val) + (cons #'val acc)] + [(lit val) + acc] + [(token val) + acc] + [(choice vals ...) + (for/fold ([acc acc]) + ([v (in-list (syntax->list #'(vals ...)))]) + (loop v acc))] + [(repeat min val) + (loop #'val acc)] + [(maybe val) + (loop #'val acc)] + [(seq vals ...) + (for/fold ([acc acc]) + ([v (in-list (syntax->list #'(vals ...)))]) + (loop v acc))]))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; check-all-rules-satisfiable: (listof rule-stx) -> void +;; Does a simple graph traversal / topological sort-like thing to make sure that, for +;; any rule, there's some finite sequence of tokens that +;; satisfies it. If this is not the case, then something horrible +;; has happened, and we need to tell the user about it. +;; +;; NOTE: Assumes all referenced rules have definitions. +(define (check-all-rules-satisfiable! rules) + (define toplevel-rule-table (make-free-id-table)) + (for ([a-rule (in-list rules)]) + (free-id-table-set! toplevel-rule-table + (rule-id a-rule) + (sat:make-and))) + (define leaves '()) + + (define (make-leaf) + (define a-leaf (sat:make-and)) + (set! leaves (cons a-leaf leaves)) + a-leaf) + + (define (process-pattern a-pattern) + (syntax-case a-pattern (id lit token choice repeat maybe seq) + [(id val) + (free-id-table-ref toplevel-rule-table #'val)] + [(lit val) + (make-leaf)] + [(token val) + (make-leaf)] + [(choice vals ...) + (begin + (define an-or-node (sat:make-or)) + (for ([v (in-list (syntax->list #'(vals ...)))]) + (define a-child (process-pattern v)) + (sat:add-child! an-or-node a-child)) + an-or-node)] + [(repeat min val) + (syntax-case #'min () + [0 + (make-leaf)] + [else + (process-pattern #'val)])] + [(maybe val) + (make-leaf)] + [(seq vals ...) + (begin + (define an-and-node (sat:make-and)) + (for ([v (in-list (syntax->list #'(vals ...)))]) + (define a-child (process-pattern v)) + (sat:add-child! an-and-node a-child)) + an-and-node)])) + + (for ([a-rule (in-list rules)]) + (define rule-node (free-id-table-ref toplevel-rule-table (rule-id a-rule))) + (sat:add-child! rule-node (process-pattern (rule-pattern a-rule)))) + + (for ([a-leaf leaves]) + (sat:visit! a-leaf)) + + (for ([a-rule (in-list rules)]) + (define rule-node (free-id-table-ref toplevel-rule-table (rule-id a-rule))) + (unless (sat:node-yes? rule-node) + (raise-syntax-error #f (format "Rule ~a has no finite derivation" (syntax-e (rule-id a-rule))) + (rule-id a-rule))))) diff --git a/brag/brag/codegen/compiled/codegen_rkt.dep b/brag/brag/codegen/compiled/codegen_rkt.dep new file mode 100644 index 0000000..eb3a1d5 --- /dev/null +++ b/brag/brag/codegen/compiled/codegen_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("4c5beaa97680e66117fd170df42efc9b3b073674" . "07aa1cb4610cb4096348a120760d9e4c95390927") (collects #"brag" #"codegen" #"flatten.rkt") (collects #"brag" #"codegen" #"satisfaction.rkt") (collects #"brag" #"rules" #"stx-types.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"list.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"racket" #"set.rkt") (collects #"racket" #"syntax.rkt") (collects #"syntax" #"id-table.rkt") (collects #"syntax" #"parse.rkt") (collects #"syntax" #"srcloc.rkt")) diff --git a/brag/brag/codegen/compiled/codegen_rkt.zo b/brag/brag/codegen/compiled/codegen_rkt.zo new file mode 100644 index 0000000..b197416 Binary files /dev/null and b/brag/brag/codegen/compiled/codegen_rkt.zo differ diff --git a/brag/brag/codegen/compiled/drracket/errortrace/codegen_rkt.dep b/brag/brag/codegen/compiled/drracket/errortrace/codegen_rkt.dep new file mode 100644 index 0000000..4bd6851 --- /dev/null +++ b/brag/brag/codegen/compiled/drracket/errortrace/codegen_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("4c5beaa97680e66117fd170df42efc9b3b073674" . "99da3e7cc99d3a0b7dda6934254dd845b69099ec") (collects #"brag" #"codegen" #"flatten.rkt") (collects #"brag" #"codegen" #"satisfaction.rkt") (collects #"brag" #"rules" #"stx-types.rkt") (collects #"brag" #"support.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"list.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"racket" #"set.rkt") (collects #"racket" #"syntax.rkt") (collects #"syntax" #"id-table.rkt") (collects #"syntax" #"parse.rkt") (collects #"syntax" #"srcloc.rkt")) diff --git a/brag/brag/codegen/compiled/drracket/errortrace/codegen_rkt.zo b/brag/brag/codegen/compiled/drracket/errortrace/codegen_rkt.zo new file mode 100644 index 0000000..21d4b0b Binary files /dev/null and b/brag/brag/codegen/compiled/drracket/errortrace/codegen_rkt.zo differ diff --git a/brag/brag/codegen/compiled/drracket/errortrace/flatten_rkt.dep b/brag/brag/codegen/compiled/drracket/errortrace/flatten_rkt.dep new file mode 100644 index 0000000..9aafe01 --- /dev/null +++ b/brag/brag/codegen/compiled/drracket/errortrace/flatten_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("b059eb349e1c078ba074168cee5a2e4572b986de" . "01717984ba47cc71328752bd03518d0b6b44a2d3") (collects #"brag" #"rules" #"stx-types.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/codegen/compiled/drracket/errortrace/flatten_rkt.zo b/brag/brag/codegen/compiled/drracket/errortrace/flatten_rkt.zo new file mode 100644 index 0000000..400f628 Binary files /dev/null and b/brag/brag/codegen/compiled/drracket/errortrace/flatten_rkt.zo differ diff --git a/brag/brag/codegen/compiled/drracket/errortrace/reader_rkt.dep b/brag/brag/codegen/compiled/drracket/errortrace/reader_rkt.dep new file mode 100644 index 0000000..da813f3 --- /dev/null +++ b/brag/brag/codegen/compiled/drracket/errortrace/reader_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c6cf0a3fe7420938bebf4eb428b13c3518d05e1f" . "3c8e76d38cae8cb8b28fc0b56809d12cd07057b1") (collects #"brag" #"rules" #"lexer.rkt") (collects #"brag" #"rules" #"parser.rkt") (collects #"brag" #"rules" #"rule-structs.rkt") (collects #"brag" #"rules" #"stx.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"s-exp" #"lang" #"reader.rkt") (collects #"syntax" #"module-reader.rkt")) diff --git a/brag/brag/codegen/compiled/drracket/errortrace/reader_rkt.zo b/brag/brag/codegen/compiled/drracket/errortrace/reader_rkt.zo new file mode 100644 index 0000000..03b640f Binary files /dev/null and b/brag/brag/codegen/compiled/drracket/errortrace/reader_rkt.zo differ diff --git a/brag/brag/codegen/compiled/drracket/errortrace/runtime_rkt.dep b/brag/brag/codegen/compiled/drracket/errortrace/runtime_rkt.dep new file mode 100644 index 0000000..b7538e1 --- /dev/null +++ b/brag/brag/codegen/compiled/drracket/errortrace/runtime_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("9396075533d041b1c8ae2dd3d832fbc4ba338099" . "d93eef514d94bb3f423c202e3674398150cedfe8") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"generator.rkt") (collects #"racket" #"list.rkt") (collects #"racket" #"match.rkt") (collects #"racket" #"runtime-config.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt")) diff --git a/brag/brag/codegen/compiled/drracket/errortrace/runtime_rkt.zo b/brag/brag/codegen/compiled/drracket/errortrace/runtime_rkt.zo new file mode 100644 index 0000000..15cc720 Binary files /dev/null and b/brag/brag/codegen/compiled/drracket/errortrace/runtime_rkt.zo differ diff --git a/brag/brag/codegen/compiled/drracket/errortrace/satisfaction_rkt.dep b/brag/brag/codegen/compiled/drracket/errortrace/satisfaction_rkt.dep new file mode 100644 index 0000000..6bf64c7 --- /dev/null +++ b/brag/brag/codegen/compiled/drracket/errortrace/satisfaction_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("1d63342f72da51ee260ad3d02d1efdc362ebd6a9" . "ea4e78d4bbc9044dd272c26055ba0f85c7a53ea1") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"block.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"match.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt")) diff --git a/brag/brag/codegen/compiled/drracket/errortrace/satisfaction_rkt.zo b/brag/brag/codegen/compiled/drracket/errortrace/satisfaction_rkt.zo new file mode 100644 index 0000000..b676484 Binary files /dev/null and b/brag/brag/codegen/compiled/drracket/errortrace/satisfaction_rkt.zo differ diff --git a/brag/brag/codegen/compiled/drracket/errortrace/sexp-based-lang_rkt.dep b/brag/brag/codegen/compiled/drracket/errortrace/sexp-based-lang_rkt.dep new file mode 100644 index 0000000..5b913c2 --- /dev/null +++ b/brag/brag/codegen/compiled/drracket/errortrace/sexp-based-lang_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c1fbef7a4ab4031977d43ba847ea0431155e59da" . "befe7d6ee440444d102cdba29b790478737ce1a8") (collects #"brag" #"codegen" #"codegen.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/codegen/compiled/drracket/errortrace/sexp-based-lang_rkt.zo b/brag/brag/codegen/compiled/drracket/errortrace/sexp-based-lang_rkt.zo new file mode 100644 index 0000000..b28fb2b Binary files /dev/null and b/brag/brag/codegen/compiled/drracket/errortrace/sexp-based-lang_rkt.zo differ diff --git a/brag/brag/codegen/compiled/flatten_rkt.dep b/brag/brag/codegen/compiled/flatten_rkt.dep new file mode 100644 index 0000000..980f45c --- /dev/null +++ b/brag/brag/codegen/compiled/flatten_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("463d09a215f1ce6ce3966e548ddd5cd4f64c0585" . "7357211146d398eacc2c6e8b2960519815c8b742") (collects #"brag" #"rules" #"stx-types.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/codegen/compiled/flatten_rkt.zo b/brag/brag/codegen/compiled/flatten_rkt.zo new file mode 100644 index 0000000..92b9cb1 Binary files /dev/null and b/brag/brag/codegen/compiled/flatten_rkt.zo differ diff --git a/brag/brag/codegen/compiled/reader_rkt.dep b/brag/brag/codegen/compiled/reader_rkt.dep new file mode 100644 index 0000000..7f719bd --- /dev/null +++ b/brag/brag/codegen/compiled/reader_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c6cf0a3fe7420938bebf4eb428b13c3518d05e1f" . "5bf2ed04a6262a72f3bcf3661c660327d949bee1") (collects #"brag" #"rules" #"lexer.rkt") (collects #"brag" #"rules" #"parser.rkt") (collects #"brag" #"rules" #"rule-structs.rkt") (collects #"brag" #"rules" #"stx.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"s-exp" #"lang" #"reader.rkt") (collects #"syntax" #"module-reader.rkt")) diff --git a/brag/brag/codegen/compiled/reader_rkt.zo b/brag/brag/codegen/compiled/reader_rkt.zo new file mode 100644 index 0000000..17f27b1 Binary files /dev/null and b/brag/brag/codegen/compiled/reader_rkt.zo differ diff --git a/brag/brag/codegen/compiled/runtime_rkt.dep b/brag/brag/codegen/compiled/runtime_rkt.dep new file mode 100644 index 0000000..10cf183 --- /dev/null +++ b/brag/brag/codegen/compiled/runtime_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("4660b139e701bdbadf98bfc66b1e659ffe6393ed" . "720ffc1f361e9b200aaa0b4adcda8e4e07779cb0") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"generator.rkt") (collects #"racket" #"list.rkt") (collects #"racket" #"match.rkt") (collects #"racket" #"runtime-config.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt")) diff --git a/brag/brag/codegen/compiled/runtime_rkt.zo b/brag/brag/codegen/compiled/runtime_rkt.zo new file mode 100644 index 0000000..2f76750 Binary files /dev/null and b/brag/brag/codegen/compiled/runtime_rkt.zo differ diff --git a/brag/brag/codegen/compiled/satisfaction_rkt.dep b/brag/brag/codegen/compiled/satisfaction_rkt.dep new file mode 100644 index 0000000..e66dfb0 --- /dev/null +++ b/brag/brag/codegen/compiled/satisfaction_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("1d63342f72da51ee260ad3d02d1efdc362ebd6a9" . "b222ad49281f6c6f3843873450215de1d25363af") (collects #"racket" #"base.rkt") (collects #"racket" #"block.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"match.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt")) diff --git a/brag/brag/codegen/compiled/satisfaction_rkt.zo b/brag/brag/codegen/compiled/satisfaction_rkt.zo new file mode 100644 index 0000000..f3e8517 Binary files /dev/null and b/brag/brag/codegen/compiled/satisfaction_rkt.zo differ diff --git a/brag/brag/codegen/compiled/sexp-based-lang_rkt.dep b/brag/brag/codegen/compiled/sexp-based-lang_rkt.dep new file mode 100644 index 0000000..8170bc9 --- /dev/null +++ b/brag/brag/codegen/compiled/sexp-based-lang_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c1fbef7a4ab4031977d43ba847ea0431155e59da" . "58d806ee20049838cb29000eb0b709bf9b999209") (collects #"brag" #"codegen" #"codegen.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/codegen/compiled/sexp-based-lang_rkt.zo b/brag/brag/codegen/compiled/sexp-based-lang_rkt.zo new file mode 100644 index 0000000..780ed94 Binary files /dev/null and b/brag/brag/codegen/compiled/sexp-based-lang_rkt.zo differ diff --git a/brag/brag/codegen/flatten.rkt b/brag/brag/codegen/flatten.rkt new file mode 100755 index 0000000..ad3ecfc --- /dev/null +++ b/brag/brag/codegen/flatten.rkt @@ -0,0 +1,200 @@ +#lang racket/base +(require brag/rules/stx-types + (for-syntax racket/base)) + +(provide flatten-rule + flatten-rules + prim-rule) + + + +(define (make-fresh-name) + (let ([n 0]) + (lambda () + (set! n (add1 n)) + (string->symbol (format "%rule~a" n))))) + +(define default-fresh-name + (make-fresh-name)) + + +;; Translates rules to lists of primitive rules. + + +(define (flatten-rules rules #:fresh-name [fresh-name default-fresh-name]) + (define ht (make-hash)) + (apply append (map (lambda (a-rule) (flatten-rule a-rule + #:ht ht + #:fresh-name fresh-name)) + rules))) + + +;; flatten-rule: rule -> (listof primitive-rule) +(define (flatten-rule a-rule + #:fresh-name [fresh-name default-fresh-name] + + ;; ht: (hashtableof pattern-hash-key pat) + #:ht [ht (make-hash)]) + + (let recur ([a-rule a-rule] + [inferred? #f]) + + ;; lift-nonprimitive-pattern: pattern -> (values (listof primitive-rule) pattern) + ;; Turns non-primitive patterns into primitive patterns, and produces a set of + ;; derived rules. + (define (lift-nonprimitive-pattern a-pat) + (cond + [(primitive-pattern? a-pat) + (values '() (linearize-primitive-pattern a-pat))] + [(hash-has-key? ht (pattern->hash-key a-pat)) + (values '() (list (hash-ref ht (pattern->hash-key a-pat))))] + [else + (define head (syntax-case a-pat () [(head rest ...) #'head])) + (define new-name (datum->syntax #f (fresh-name) a-pat)) + (define new-inferred-id (datum->syntax #f `(inferred-id ,new-name ,head) a-pat)) + (hash-set! ht (pattern->hash-key a-pat) new-inferred-id) + (values (recur #`(rule #,new-name #,a-pat) head) + (list new-inferred-id))])) + + (define (lift-nonprimitive-patterns pats) + (define-values (rules patterns) + (for/fold ([inferred-ruless '()] + [patternss '()]) + ([p (in-list pats)]) + (define-values (new-rules new-ps) + (lift-nonprimitive-pattern p)) + (values (cons new-rules inferred-ruless) + (cons new-ps patternss)))) + (values (apply append (reverse rules)) + (apply append (reverse patterns)))) + + (with-syntax ([head (if inferred? #'inferred-prim-rule #'prim-rule)] + [origin (syntax-case a-rule (rule) [(rule name (pat-head rest ...)) #'pat-head])]) + (syntax-case a-rule (rule) + [(rule name pat) + (syntax-case #'pat (id inferred-id lit token choice repeat maybe seq) + + ;; The primitive types stay as they are: + [(id val) + (list #'(head origin name [pat]))] + [(inferred-id val reason) + (list #'(head origin name [pat]))] + [(lit val) + (list #'(head origin name [pat]))] + [(token val) + (list #'(head origin name [pat]))] + + + ;; Everything else might need lifting: + [(choice sub-pat ...) + (begin + (define-values (inferred-ruless/rev new-sub-patss/rev) + (for/fold ([rs '()] [ps '()]) + ([p (syntax->list #'(sub-pat ...))]) + (let-values ([(new-r new-p) + (lift-nonprimitive-pattern p)]) + (values (cons new-r rs) (cons new-p ps))))) + (with-syntax ([((sub-pat ...) ...) (reverse new-sub-patss/rev)]) + (append (list #'(head origin name [sub-pat ...] ...)) + (apply append (reverse inferred-ruless/rev)))))] + + [(repeat min sub-pat) + (begin + (define-values (inferred-rules new-sub-pats) + (lift-nonprimitive-pattern #'sub-pat)) + (with-syntax ([(sub-pat ...) new-sub-pats]) + (cons (cond [(= (syntax-e #'min) 0) + #`(head origin name + [(inferred-id name repeat) sub-pat ...] + [])] + [(= (syntax-e #'min) 1) + #`(head origin name + [(inferred-id name repeat) sub-pat ...] + [sub-pat ...])]) + inferred-rules)))] + + [(maybe sub-pat) + (begin + (define-values (inferred-rules new-sub-pats) + (lift-nonprimitive-pattern #'sub-pat)) + (with-syntax ([(sub-pat ...) new-sub-pats]) + (cons #'(head origin name + [sub-pat ...] + []) + inferred-rules)))] + + [(seq sub-pat ...) + (begin + (define-values (inferred-rules new-sub-pats) + (lift-nonprimitive-patterns (syntax->list #'(sub-pat ...)))) + (with-syntax ([(sub-pat ...) new-sub-pats]) + (cons #'(head origin name [sub-pat ...]) + inferred-rules)))])])))) + + +;; Given a pattern, return a key appropriate for a hash. +;; +;; In the `ragg` days this used `syntax->datum` only. +;; The problem is that with cuts & splices in the mix, it creates ambiguity: +;; e.g., the pattern (/"," foo)* and ("," foo)* differ only in the 'hide syntax property +;; so `syntax->datum` does not capture their differences. +;; That means they produced the same hash key, +;; which meant they produced the same inferred pattern. Which is wrong. +;; So we adjust the key to take account of the 'hide property +;; by "lifting" it into the datum with cons. +;; Then the pattern-inference process treats them separately. +(define (pattern->hash-key a-pat) + (let loop ([x a-pat]) + (let ([maybe-stx-list (syntax->list x)]) + (if maybe-stx-list + (cons (syntax-property x 'hide) (map loop maybe-stx-list)) + (syntax->datum x))))) + + +;; Returns true if the pattern looks primitive +(define (primitive-pattern? a-pat) + (syntax-case a-pat (id lit token choice repeat maybe seq) + [(id val) + #t] + [(lit val) + #t] + [(token val) + #t] + [(choice sub-pat ...) + #f] + [(repeat min val) + #f] + [(maybe sub-pat) + #f] + [(seq sub-pat ...) + (andmap primitive-pattern? (syntax->list #'(sub-pat ...)))])) + + +;; Given a primitive pattern (id, lit, token, and seqs only containing +;; primitive patterns), returns a linear sequence of just id, lits, +;; and tokens. +(define (linearize-primitive-pattern a-pat) + (define (traverse a-pat acc) + (syntax-case a-pat (id inferred-id lit token seq) + [(id val) + (cons a-pat acc)] + [(inferred-id val reason) + (cons a-pat acc)] + [(lit val) + (cons a-pat acc)] + [(token val) + (cons a-pat acc)] + [(seq vals ...) + (foldl traverse acc (syntax->list #'(vals ...)))])) + (reverse (traverse a-pat '()))) + + + +(define-syntax (prim-rule stx) + (raise-syntax-error #f "internal error: should not be macro expanded" stx)) + +(define-syntax (inferred-prim-rule stx) + (raise-syntax-error #f "internal error: should not be macro expanded" stx)) + +(define-syntax (inferred-id stx) + (raise-syntax-error #f "internal error: should not be macro expanded" stx)) diff --git a/brag/brag/codegen/reader.rkt b/brag/brag/codegen/reader.rkt new file mode 100755 index 0000000..6413225 --- /dev/null +++ b/brag/brag/codegen/reader.rkt @@ -0,0 +1,68 @@ +#lang s-exp syntax/module-reader +brag/codegen/sexp-based-lang +#:read my-read +#:read-syntax my-read-syntax +#:info my-get-info +#:whole-body-readers? #t + +(require brag/rules/parser + brag/rules/lexer + brag/rules/stx + brag/rules/rule-structs) + +(define (my-read in) + (syntax->datum (my-read-syntax #f in))) + +(define (my-read-syntax src in) + (define-values (first-line first-column first-position) (port-next-location in)) + (define tokenizer (tokenize in)) + (define rules + (parameterize ([current-parser-error-handler + (lambda (tok-ok? tok-name tok-value start-pos end-pos) + (raise-syntax-error + #f + (format "Error while parsing grammar near: ~a [line=~a, column=~a, position=~a]" + tok-value + (pos-line start-pos) + (pos-col start-pos) + (pos-offset start-pos)) + (datum->syntax #f + (string->symbol (format "~a" tok-value)) + (list src + (pos-line start-pos) + (pos-col start-pos) + (pos-offset start-pos) + (if (and (number? (pos-offset end-pos)) + (number? (pos-offset start-pos))) + (- (pos-offset end-pos) + (pos-offset start-pos)) + #f)))))]) + (grammar-parser tokenizer))) + (define-values (last-line last-column last-position) (port-next-location in)) + (list (rules->stx src rules + #:original-stx (datum->syntax #f 'original-stx + (list src + first-line + first-column + first-position + (if (and (number? last-position) + (number? first-position)) + (- last-position first-position) + #f)))))) + + +;; Extension: we'd like to cooperate with DrRacket and tell +;; it to use the default, textual lexer and color scheme when +;; editing bf programs. +;; +;; See: http://docs.racket-lang.org/guide/language-get-info.html +;; for more details, as well as the documentation in +;; syntax/module-reader. +(define (my-get-info key default default-filter) + (case key + [(color-lexer) + (dynamic-require 'syntax-color/default-lexer + 'default-lexer)] + [else + (default-filter key default)])) + diff --git a/brag/brag/codegen/runtime.rkt b/brag/brag/codegen/runtime.rkt new file mode 100755 index 0000000..5acdaf9 --- /dev/null +++ b/brag/brag/codegen/runtime.rkt @@ -0,0 +1,212 @@ +#lang racket/base + +(require racket/match + racket/list + racket/generator + (prefix-in lex: br-parser-tools/lex) + brag/support + brag/private/internal-support) + + +(provide THE-ERROR-HANDLER + make-permissive-tokenizer + atomic-datum->syntax + positions->srcloc + rule-components->syntax) + + + +;; The level of indirection here is necessary since the yacc grammar wants a +;; function value for the error handler up front. We want to delay that decision +;; till parse time. +(define (THE-ERROR-HANDLER tok-ok? tok-name tok-value start-pos end-pos) + (match (positions->srcloc start-pos end-pos) + [(list src line col offset span) + ((current-parser-error-handler) tok-name + tok-value + offset + line + col + span)])) + + + + +(define no-position (lex:position #f #f #f)) +(define (no-position? p) + (not + (or (lex:position-line p) + (lex:position-col p) + (lex:position-offset p)))) + + +;; make-permissive-tokenizer: (U (sequenceof (U token token-struct eof void)) (-> (U token token-struct eof void))) hash -> (-> position-token) +;; Creates a tokenizer from the given value. +;; FIXME: clean up code. +(define (make-permissive-tokenizer tokenizer token-type-hash) + (define tokenizer-thunk (cond + [(sequence? tokenizer) + (sequence->generator tokenizer)] + [(procedure? tokenizer) + tokenizer])) + + ;; lookup: symbol any pos pos -> position-token + (define (lookup type val start-pos end-pos) + (lex:position-token + ((hash-ref token-type-hash type + (lambda () + ((current-tokenizer-error-handler) (format "~a" type) val + (lex:position-offset start-pos) + (lex:position-line start-pos) + (lex:position-col start-pos) + (and (number? (lex:position-offset start-pos)) + (number? (lex:position-offset end-pos)) + (- (lex:position-offset end-pos) + (lex:position-offset start-pos)))))) + val) + start-pos end-pos)) + + (define (permissive-tokenizer) + (define next-token (tokenizer-thunk)) + (let loop ([next-token next-token]) + (match next-token + [(or (? eof-object?) (? void?)) + (lookup 'EOF eof no-position no-position)] + + [(? symbol?) + (lookup next-token next-token no-position no-position)] + + [(? string?) + (lookup (string->symbol next-token) next-token no-position no-position)] + + [(? char?) + (lookup (string->symbol (string next-token)) next-token no-position no-position)] + + ;; Compatibility + [(? lex:token?) + (loop (token (lex:token-name next-token) + (lex:token-value next-token)))] + + [(token-struct type val offset line column span skip?) + (cond [skip? + ;; skip whitespace, and just tokenize again. + (permissive-tokenizer)] + + [(hash-has-key? token-type-hash type) + (define start-pos (lex:position offset line column)) + ;; try to synthesize a consistent end position. + (define end-pos (lex:position (if (and (number? offset) (number? span)) + (+ offset span) + offset) + line + (if (and (number? column) (number? span)) + (+ column span) + column))) + (lookup type val start-pos end-pos)] + [else + ;; We ran into a token of unrecognized type. Let's raise an appropriate error. + ((current-tokenizer-error-handler) type val + offset line column span)])] + + [(lex:position-token t s e) + (define a-position-token (loop t)) + (lex:position-token (lex:position-token-token a-position-token) + (if (no-position? (lex:position-token-start-pos a-position-token)) + s + (lex:position-token-start-pos a-position-token)) + (if (no-position? (lex:position-token-end-pos a-position-token)) + e + (lex:position-token-end-pos a-position-token)))] + + [(lex:srcloc-token t loc) + (define a-position-token (loop t)) + (lex:position-token (lex:position-token-token a-position-token) + (if (no-position? (lex:position-token-start-pos a-position-token)) + (lex:position (srcloc-position loc) (srcloc-line loc) (srcloc-column loc)) + (lex:position-token-start-pos a-position-token)) + (if (no-position? (lex:position-token-start-pos a-position-token)) + (lex:position (+ (srcloc-position loc) (srcloc-span loc)) #f #f) + (lex:position-token-end-pos a-position-token)))] + + [else + ;; Otherwise, we have no idea how to treat this as a token. + ((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token) + #f #f #f #f)]))) + permissive-tokenizer) + + + +;; positions->srcloc: position position -> (list source line column offset span) +;; Given two positions, returns a srcloc-like structure, where srcloc is the value +;; consumed as the third argument to datum->syntax. +(define (positions->srcloc start-pos end-pos) + (list (current-source) + (lex:position-line start-pos) + (lex:position-col start-pos) + (lex:position-offset start-pos) + (if (and (number? (lex:position-offset end-pos)) + (number? (lex:position-offset start-pos))) + (- (lex:position-offset end-pos) + (lex:position-offset start-pos)) + #f))) + +#| +MB: the next three functions control the parse tree output. +This would be the place to check a syntax property for hiding. +|# +;; We create a syntax using read-syntax; by definition, it should have the +;; original? property set to #t, which we then copy over to syntaxes constructed +;; with atomic-datum->syntax and rule-components->syntax. +(define stx-with-original?-property + (read-syntax #f (open-input-string "meaningless-string"))) + + +;; atomic-datum->syntax: datum position position +;; Helper that does the ugly work in wrapping a datum into a syntax +;; with source location. +(define (atomic-datum->syntax d start-pos end-pos) + (datum->syntax #f d (positions->srcloc start-pos end-pos) stx-with-original?-property)) + + +(define (remove-rule-name component-stx [splice #f]) + ;; when removing a rule name, we apply it as a syntax property to the remaining elements + ;; for possible later usage (aka, why throw away information) + (with-syntax ([(name . subcomponents) component-stx]) + (let ([name-datum (syntax->datum #'name)]) + (if splice + ;; when splicing, returned list is a regular list, with each element having the property. + (map (λ(sc) (syntax-property sc name-datum #'name)) (syntax->list #'subcomponents)) + ;; when hiding, returned list should be a syntaxed list with the property + ;; iow, basically the same as `component-stx`, minus the name + (syntax-property (datum->syntax component-stx #'subcomponents component-stx component-stx) name-datum #'name))))) + + +(define (preprocess-component-lists component-lists) + ; "preprocess" means splicing and rule-name-hiding where indicated + (append* + ;; each `component-list` is a list that's either empty, or has a single component-stx object + ;; inside `component-stx` is a name followed by subcomponents + (for*/list ([component-list (in-list component-lists)] + [component-stx (in-list component-list)]) ; this has the effect of omitting any empty `component-list` + (list + (cond + [(eq? (syntax-property component-stx 'hide-or-splice) 'hide) + (list (remove-rule-name component-stx))] ; hidden version still wrapped in a sublist + [(or (eq? (syntax-property component-stx 'hide-or-splice) 'splice) + (syntax-property component-stx 'splice-rh-id)) + (remove-rule-name component-stx #t)] ; spliced version is lifted out of the sublist + [else (list component-stx)]))))) + + +;; rule-components->syntax: (U symbol false) (listof stx) ... #:srcloc (U #f (list src line column offset span)) -> stx +;; Creates an stx out of the rule name and its components. +;; The location information of the rule spans that of its components. +(define (rule-components->syntax rule-name/false #:srcloc [srcloc #f] #:hide-or-splice? [hide-or-splice #f] . component-lists) + (define new-rule-name (datum->syntax #f rule-name/false srcloc stx-with-original?-property)) + (define new-rule-components (append* (preprocess-component-lists component-lists))) + (define rule-result (cons new-rule-name new-rule-components)) + (define syntaxed-rule-result (datum->syntax #f rule-result srcloc stx-with-original?-property)) + ;; not 'hide-or-splice-lhs-id, because this will now become a (right-hand) component in a different (left-hand) rule + ;; actual splicing happens when the parent rule is processed (with procedure above) + (syntax-property syntaxed-rule-result 'hide-or-splice hide-or-splice)) + diff --git a/brag/brag/codegen/satisfaction.rkt b/brag/brag/codegen/satisfaction.rkt new file mode 100755 index 0000000..07f20ac --- /dev/null +++ b/brag/brag/codegen/satisfaction.rkt @@ -0,0 +1,207 @@ +#lang racket/base + +(provide make-and make-or node? node-val node-yes? visit! add-child!) + +(require racket/match) + +;; I can't get no... satisfaction. +;; +;; A small module to make sure a small constraint system can be satisfied. +;; +;; Small variation on topological sort where we need both AND and OR nodes. + + +(struct node (type val yes? parents count-to-satisfy) #:mutable) +;; or nodes are satisfied if any of the children is satisfied. +;; and nodes are satisfied if all of the children are satisfied. + + +;; visit!: node -> void +;; Visit a node, and marking it if it's all satisfied. Propagate +;; satisfaction to parents as appropriate. +(define visit! + (let () + (define (dec! n) + (set-node-count-to-satisfy! n (max 0 (sub1 (node-count-to-satisfy n)))) + (when (and (not (node-yes? n)) + (= (node-count-to-satisfy n) 0)) + (sat! n))) + + (define (sat! n) + (set-node-yes?! n #t) + (for ([p (in-list (node-parents n))]) + (dec! p))) + + (lambda (n) + (unless (node-yes? n) + (when (= (node-count-to-satisfy n) 0) + (sat! n)))))) + + +;; make-or: X -> node +;; Create an or node +(define (make-or [val #f]) + (node 'or val #f '() 1)) + + +;; make-and: X -> node +;; Create an and node +(define (make-and [val #f]) + (node 'and val #f '() 0)) + + +;; add-child!: node node -> void +;; Attach a child c to the parent node p. +(define (add-child! p c) + (set-node-parents! c (cons p (node-parents c))) + (match p + [(node 'or _ _ _ count-to-satisfy) + (void)] + [(node 'and _ _ _ count-to-satisfy) + (set-node-count-to-satisfy! p (add1 count-to-satisfy))])) + + +(module* test racket + (require (submod "..") + racket/block + rackunit) + + ;; a ::= a + (block + ;; Self-looping "a" and-node should not say yes after visiting. + (define a (make-and 'a)) + (add-child! a a) + (visit! a) + (check-false (node-yes? a))) + + + ;; a ::= a + (block + ;; Self-looping "a" or-node should not say yes after visiting. + (define a (make-or 'a)) + (add-child! a a) + (visit! a) + (check-false (node-yes? a))) + + + ;; This case should never happen in my situation, but we should check. + (block + ;; Empty "a" or-node should not say yes after visiting. + (define a (make-or 'a)) + (visit! a) + (check-false (node-yes? a))) + + + ;; a : TOKEN + (block + ;; Empty "a" and-node SHOULD say yes after visiting. + (define a (make-and 'a)) + (visit! a) + (check-true (node-yes? a))) + + + ;; a : a | b + ;; b : TOKEN + (block + (define a (make-or 'a)) + (add-child! a a) + (define b (make-and 'b)) + (add-child! a b) + (visit! b) + (check-true (node-yes? a)) + (check-true (node-yes? b))) + + ;; a : a b + ;; b : TOKEN + (block + (define a (make-and 'a)) + (define b (make-and 'b)) + (define TOKEN (make-and 'TOKEN)) + (add-child! a a) + (add-child! a b) + (add-child! b TOKEN) + (visit! TOKEN) + (check-false (node-yes? a)) + (check-true (node-yes? b)) + (check-true (node-yes? TOKEN))) + + ;; a : b + ;; b : a + (block + (define a (make-and 'a)) + (define b (make-and 'b)) + (add-child! a b) + (add-child! b a) + (check-false (node-yes? a)) + (check-false (node-yes? b))) + + ;; a : "a" b + ;; b : a | b + (block + (define a (make-and 'a)) + (define b (make-or 'b)) + (define lit (make-and "a")) + (add-child! a lit) + (add-child! a b) + (add-child! b a) + (add-child! b b) + (visit! lit) + (check-false (node-yes? a)) + (check-false (node-yes? b)) + (check-true (node-yes? lit))) + + + ;; x : x y + ;; y : LIT + (block + (define x (make-and "x")) + (define y (make-and "y")) + (define lit (make-and "LIT")) + (add-child! x x) + (add-child! x y) + (add-child! y lit) + (visit! lit) + (check-false (node-yes? x)) + (check-true (node-yes? y)) + (check-true (node-yes? lit))) + + + ;; expr: LPAREN expr RPAREN | ATOM + (block + (define LPAREN (make-and)) + (define RPAREN (make-and)) + (define expr (make-or)) + (define expr-1 (make-and)) + (define expr-2 (make-and)) + (define ATOM (make-and)) + (add-child! expr expr-1) + (add-child! expr expr-2) + (add-child! expr-1 LPAREN) + (add-child! expr-1 expr) + (add-child! expr-1 RPAREN) + (add-child! expr-2 ATOM) + (visit! LPAREN) + (visit! RPAREN) + (visit! ATOM) + (check-true (node-yes? expr))) + + + + ;; expr: LPAREN expr RPAREN + (block + (define LPAREN (make-and)) + (define RPAREN (make-and)) + (define expr (make-or)) + (define expr-1 (make-and)) + (define expr-2 (make-and)) + (define ATOM (make-and)) + (add-child! expr expr-1) + (add-child! expr expr-2) + (add-child! expr-1 LPAREN) + (add-child! expr-1 expr) + (add-child! expr-1 RPAREN) + (visit! LPAREN) + (visit! RPAREN) + (check-false (node-yes? expr))) + + ) diff --git a/brag/brag/codegen/sexp-based-lang.rkt b/brag/brag/codegen/sexp-based-lang.rkt new file mode 100755 index 0000000..305966a --- /dev/null +++ b/brag/brag/codegen/sexp-based-lang.rkt @@ -0,0 +1,96 @@ +#lang racket/base + +;; A language level for automatically generating parsers out of BNF grammars. +;; +;; Danny Yoo (dyoo@hashcollision.org) +;; +;; Intent: make it trivial to generate languages for Racket. At the +;; moment, I find it painful to use br-parser-tools. This library is +;; meant to make it less agonizing. +;; +;; The intended use of this language is as follows: +;; +;;;;; s-exp-grammar.rkt ;;;;;;;;; +;; #lang brag +;; s-exp : "(" s-exp* ")" | ATOM +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +;; What this generates is: +;; +;; * parse: a function that consumes a source and a +;; position-aware lexer, and produces a syntax object. +;; +;; * make-rule-parser: a custom parser given a provided start rule. +;; +;; You'll still need to do a little work, by providing a lexer that +;; defines what the uppercased tokens mean. For example, you can +;; use the br-parser-tools/lex lexer tools: +;; +;; (require brag/support +;; br-parser-tools/lex +;; br-parser-tools/lex-sre) +;; +;; (define tokenize +;; (lexer-src-pos +;; [(:+ alphabetic) +;; (token 'ATOM lexeme)] +;; [whitespace +;; (return-without-pos (tokenize/1 input-port))] +;; [(:or "(" ")") +;; (token lexeme lexeme)])) +;; + +;; However, that should be all you need. The output of an +;; generated grammar is an honest-to-goodness syntax +;; object with source locations, fully-labeled by the rules. +;; +;; (parse (tokenize an-input-port)) +;; +;; + +;; The first rule is treated as the start rule; any successful parse +;; must finish with end-of-file. + + +;; Terminology: +;; + + +;; A rule is a rule identifier, followed by a colon ":", followed by a +;; pattern. + +;; A rule identifier is an identifier that is not in upper case. +;; A rule identifier should follow the Racket rules for identifiers, +;; except that it can't contain * or +. +;; +;; A token is a rule identifier that is all in upper case. + + + +;; A pattern may either be +;; +;; * an implicit sequence of patterns, +;; +;; * a literal string, +;; +;; * a rule identifier, +;; +;; * a quanitifed pattern, either with "*" or "+", +;; +;; * an optional pattern: a pattern surrounded by "[" and "]", or +;; +;; * a grouped sequence: a pattern surrounded by "(" and ")". + + +(require (for-syntax racket/base + "codegen.rkt")) + +(provide rules + (rename-out [#%plain-module-begin #%module-begin]) + #%top-interaction) + +(define-syntax (rules stx) + (rules-codegen #:parser-provider-module 'brag/cfg-parser/cfg-parser ;; 'br-parser-tools/yacc + #:parser-provider-form 'cfg-parser ;; 'parser + stx)) diff --git a/brag/brag/compiled/brag_scrbl.dep b/brag/brag/compiled/brag_scrbl.dep new file mode 100644 index 0000000..10ad5de --- /dev/null +++ b/brag/brag/compiled/brag_scrbl.dep @@ -0,0 +1 @@ +("6.8.0.2" ("a1ff51bb55e2900a1af85794564c48f9416f735f" . "4266080729186b06f505910e27df38b5c56d83a1") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"examples" #"nested-word-list.rkt") (collects #"brag" #"support.rkt") (collects #"file" #"md5.rkt") (collects #"racket" #"date.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"scribble" #"eval.rkt") (collects #"scribble" #"manual" #"lang" #"reader.rkt") (collects #"scribble" #"manual" #"lang.rkt") (collects #"syntax" #"parse.rkt")) diff --git a/brag/brag/compiled/brag_scrbl.zo b/brag/brag/compiled/brag_scrbl.zo new file mode 100644 index 0000000..cd9fab4 Binary files /dev/null and b/brag/brag/compiled/brag_scrbl.zo differ diff --git a/brag/brag/compiled/drracket/errortrace/main_rkt.dep b/brag/brag/compiled/drracket/errortrace/main_rkt.dep new file mode 100644 index 0000000..7cce840 --- /dev/null +++ b/brag/brag/compiled/drracket/errortrace/main_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("b8fc55f87c5a6d3c6491bca66cc1a785a0f0f27a" . "461b36edd1a9bf8574913c9d026e2c38db798586") (collects #"brag" #"codegen" #"reader.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/compiled/drracket/errortrace/main_rkt.zo b/brag/brag/compiled/drracket/errortrace/main_rkt.zo new file mode 100644 index 0000000..d53e177 Binary files /dev/null and b/brag/brag/compiled/drracket/errortrace/main_rkt.zo differ diff --git a/brag/brag/compiled/drracket/errortrace/support_rkt.dep b/brag/brag/compiled/drracket/errortrace/support_rkt.dep new file mode 100644 index 0000000..147e31d --- /dev/null +++ b/brag/brag/compiled/drracket/errortrace/support_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("a121c5f796282cd367e872ca5befbeb0e0cdea91" . "8db5334793221f8665b9d8b10f326a02bf883911") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"racket" #"string.rkt") (collects #"racket" #"struct.rkt")) diff --git a/brag/brag/compiled/drracket/errortrace/support_rkt.zo b/brag/brag/compiled/drracket/errortrace/support_rkt.zo new file mode 100644 index 0000000..fe53d13 Binary files /dev/null and b/brag/brag/compiled/drracket/errortrace/support_rkt.zo differ diff --git a/brag/brag/compiled/info_rkt.dep b/brag/brag/compiled/info_rkt.dep new file mode 100644 index 0000000..9bec892 --- /dev/null +++ b/brag/brag/compiled/info_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("55824d94e193a4c24592968a52c0a9a2cc9533a5" . "a41a5bbd17d5ae17e20ba9ef3e0d4b4c31eacaf5") (collects #"setup" #"infotab" #"lang" #"reader.rkt") (collects #"setup" #"infotab.rkt")) diff --git a/brag/brag/compiled/info_rkt.zo b/brag/brag/compiled/info_rkt.zo new file mode 100644 index 0000000..1e36268 Binary files /dev/null and b/brag/brag/compiled/info_rkt.zo differ diff --git a/brag/brag/compiled/main_rkt.dep b/brag/brag/compiled/main_rkt.dep new file mode 100644 index 0000000..f806dd1 --- /dev/null +++ b/brag/brag/compiled/main_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("b8fc55f87c5a6d3c6491bca66cc1a785a0f0f27a" . "0d4a4153810c21481c2f715a6bd6881353f310b0") (collects #"brag" #"codegen" #"reader.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/compiled/main_rkt.zo b/brag/brag/compiled/main_rkt.zo new file mode 100644 index 0000000..4eebd02 Binary files /dev/null and b/brag/brag/compiled/main_rkt.zo differ diff --git a/brag/brag/compiled/support_rkt.dep b/brag/brag/compiled/support_rkt.dep new file mode 100644 index 0000000..74595eb --- /dev/null +++ b/brag/brag/compiled/support_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("6f20548d2bc718e2b6fcfbc0977f3e00806a2660" . "51dcd333f44ba69550238530613b350e93443152") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"racket" #"string.rkt") (collects #"racket" #"struct.rkt")) diff --git a/brag/brag/compiled/support_rkt.zo b/brag/brag/compiled/support_rkt.zo new file mode 100644 index 0000000..de31131 Binary files /dev/null and b/brag/brag/compiled/support_rkt.zo differ diff --git a/brag/brag/examples/01-equal.rkt b/brag/brag/examples/01-equal.rkt new file mode 100755 index 0000000..c53bd18 --- /dev/null +++ b/brag/brag/examples/01-equal.rkt @@ -0,0 +1,12 @@ +#lang brag + +## Equal numbers of 0 and 1s in a string. +## +## (Thanks to mithos28 for this one.) + + +equal : [zero one | one zero] + +zero : "0" equal | equal "0" + +one : "1" equal | equal "1" diff --git a/brag/brag/examples/0n1.rkt b/brag/brag/examples/0n1.rkt new file mode 100755 index 0000000..53beae0 --- /dev/null +++ b/brag/brag/examples/0n1.rkt @@ -0,0 +1,3 @@ +#lang brag + +rule: "0"* "1" diff --git a/brag/brag/examples/0n1n.rkt b/brag/brag/examples/0n1n.rkt new file mode 100755 index 0000000..25b0ea5 --- /dev/null +++ b/brag/brag/examples/0n1n.rkt @@ -0,0 +1,3 @@ +#lang brag + +rule-0n1n: ["0" rule-0n1n "1"] diff --git a/brag/brag/examples/add-mult.rkt b/brag/brag/examples/add-mult.rkt new file mode 100755 index 0000000..bde2ac5 --- /dev/null +++ b/brag/brag/examples/add-mult.rkt @@ -0,0 +1,7 @@ +#lang brag + +expr : term (/'+' term)* +@term : factor (/'*' @factor)* +factor : ("0" | "1" | "2" | "3" + | "4" | "5" | "6" | "7" + | "8" | "9")+ diff --git a/brag/brag/examples/baby-json-hider.rkt b/brag/brag/examples/baby-json-hider.rkt new file mode 100755 index 0000000..769ecfd --- /dev/null +++ b/brag/brag/examples/baby-json-hider.rkt @@ -0,0 +1,18 @@ +#lang brag +#:prefix-out my: + +;; Simple baby example of JSON structure +json: number | string + | array + | @object +number: NUMBER + +string: STRING + +array: "[" [json ("," json)*] "]" + +object: /"{" [kvpair ("," kvpair)*] /"}" + +@kvpair : /ID colon /json + +/colon : ":" diff --git a/brag/brag/examples/baby-json.rkt b/brag/brag/examples/baby-json.rkt new file mode 100755 index 0000000..491dfc8 --- /dev/null +++ b/brag/brag/examples/baby-json.rkt @@ -0,0 +1,16 @@ +#lang brag + +;; Simple baby example of JSON structure +json: number | string + | array + | object + +number: NUMBER + +string: STRING + +array: "[" [json ("," json)*] "]" + +object: "{" [kvpair ("," kvpair)*] "}" + +kvpair: ID ":" json diff --git a/brag/brag/examples/bnf.rkt b/brag/brag/examples/bnf.rkt new file mode 100755 index 0000000..2349038 --- /dev/null +++ b/brag/brag/examples/bnf.rkt @@ -0,0 +1,13 @@ +#lang brag + +## The following comes from: http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form + + : | + : "<" ">" "::=" + + : " " | "" ## "" is empty string, i.e. no whitespace + : | "|" + : | + : | + : | "<" ">" + : '"' '"' | "'" "'" ## actually, the original BNF did not use quotes diff --git a/brag/brag/examples/compiled/01-equal_rkt.dep b/brag/brag/examples/compiled/01-equal_rkt.dep new file mode 100644 index 0000000..57d690c --- /dev/null +++ b/brag/brag/examples/compiled/01-equal_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("b0fb22188c4ee33d5132034695cd58da84fc6460" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/01-equal_rkt.zo b/brag/brag/examples/compiled/01-equal_rkt.zo new file mode 100644 index 0000000..8775711 Binary files /dev/null and b/brag/brag/examples/compiled/01-equal_rkt.zo differ diff --git a/brag/brag/examples/compiled/0n1_rkt.dep b/brag/brag/examples/compiled/0n1_rkt.dep new file mode 100644 index 0000000..7ee303b --- /dev/null +++ b/brag/brag/examples/compiled/0n1_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("45135f2605111b9bf5da084e5a070c0ef6ea6f8b" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/0n1_rkt.zo b/brag/brag/examples/compiled/0n1_rkt.zo new file mode 100644 index 0000000..583004c Binary files /dev/null and b/brag/brag/examples/compiled/0n1_rkt.zo differ diff --git a/brag/brag/examples/compiled/0n1n_rkt.dep b/brag/brag/examples/compiled/0n1n_rkt.dep new file mode 100644 index 0000000..01ecd2f --- /dev/null +++ b/brag/brag/examples/compiled/0n1n_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("72691af1fa81f16c36663c5858f0378b518723c2" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/0n1n_rkt.zo b/brag/brag/examples/compiled/0n1n_rkt.zo new file mode 100644 index 0000000..5ab3606 Binary files /dev/null and b/brag/brag/examples/compiled/0n1n_rkt.zo differ diff --git a/brag/brag/examples/compiled/add-mult_rkt.dep b/brag/brag/examples/compiled/add-mult_rkt.dep new file mode 100644 index 0000000..a648fd8 --- /dev/null +++ b/brag/brag/examples/compiled/add-mult_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("659c615263fdf73ed763dfe9b9399108b9d602ff" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/add-mult_rkt.zo b/brag/brag/examples/compiled/add-mult_rkt.zo new file mode 100644 index 0000000..29e175f Binary files /dev/null and b/brag/brag/examples/compiled/add-mult_rkt.zo differ diff --git a/brag/brag/examples/compiled/baby-json-hider_rkt.dep b/brag/brag/examples/compiled/baby-json-hider_rkt.dep new file mode 100644 index 0000000..1e3e73c --- /dev/null +++ b/brag/brag/examples/compiled/baby-json-hider_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("12f2e61ef09373777e7345fbcd9ad48e20368dd7" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/baby-json-hider_rkt.zo b/brag/brag/examples/compiled/baby-json-hider_rkt.zo new file mode 100644 index 0000000..4d78a19 Binary files /dev/null and b/brag/brag/examples/compiled/baby-json-hider_rkt.zo differ diff --git a/brag/brag/examples/compiled/baby-json_rkt.dep b/brag/brag/examples/compiled/baby-json_rkt.dep new file mode 100644 index 0000000..9844a72 --- /dev/null +++ b/brag/brag/examples/compiled/baby-json_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("19cf283e877d2212e42c5a496d0abf1169644d7e" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/baby-json_rkt.zo b/brag/brag/examples/compiled/baby-json_rkt.zo new file mode 100644 index 0000000..b9c9d09 Binary files /dev/null and b/brag/brag/examples/compiled/baby-json_rkt.zo differ diff --git a/brag/brag/examples/compiled/bnf_rkt.dep b/brag/brag/examples/compiled/bnf_rkt.dep new file mode 100644 index 0000000..666b3d0 --- /dev/null +++ b/brag/brag/examples/compiled/bnf_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c276c4b6284e3a5bfcf71928b0dfe29f6410b5a1" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/bnf_rkt.zo b/brag/brag/examples/compiled/bnf_rkt.zo new file mode 100644 index 0000000..b2c4a28 Binary files /dev/null and b/brag/brag/examples/compiled/bnf_rkt.zo differ diff --git a/brag/brag/examples/compiled/drracket/errortrace/add-mult_rkt.dep b/brag/brag/examples/compiled/drracket/errortrace/add-mult_rkt.dep new file mode 100644 index 0000000..5ecd823 --- /dev/null +++ b/brag/brag/examples/compiled/drracket/errortrace/add-mult_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("659c615263fdf73ed763dfe9b9399108b9d602ff" . "c8f3a5864b3b3f847e54677f6cc1e760d95ea1c4") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/drracket/errortrace/add-mult_rkt.zo b/brag/brag/examples/compiled/drracket/errortrace/add-mult_rkt.zo new file mode 100644 index 0000000..bded552 Binary files /dev/null and b/brag/brag/examples/compiled/drracket/errortrace/add-mult_rkt.zo differ diff --git a/brag/brag/examples/compiled/drracket/errortrace/simple-arithmetic-grammar_rkt.dep b/brag/brag/examples/compiled/drracket/errortrace/simple-arithmetic-grammar_rkt.dep new file mode 100644 index 0000000..b495a32 --- /dev/null +++ b/brag/brag/examples/compiled/drracket/errortrace/simple-arithmetic-grammar_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("a71ebdfc1b9faf9333bd9f43f508ff430274953d" . "c8f3a5864b3b3f847e54677f6cc1e760d95ea1c4") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/drracket/errortrace/simple-arithmetic-grammar_rkt.zo b/brag/brag/examples/compiled/drracket/errortrace/simple-arithmetic-grammar_rkt.zo new file mode 100644 index 0000000..48e666e Binary files /dev/null and b/brag/brag/examples/compiled/drracket/errortrace/simple-arithmetic-grammar_rkt.zo differ diff --git a/brag/brag/examples/compiled/lua-parser_rkt.dep b/brag/brag/examples/compiled/lua-parser_rkt.dep new file mode 100644 index 0000000..89e43ba --- /dev/null +++ b/brag/brag/examples/compiled/lua-parser_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("5f013fb473cfe3e4828d9eab2da368b4b34f3a83" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/lua-parser_rkt.zo b/brag/brag/examples/compiled/lua-parser_rkt.zo new file mode 100644 index 0000000..095e333 Binary files /dev/null and b/brag/brag/examples/compiled/lua-parser_rkt.zo differ diff --git a/brag/brag/examples/compiled/nested-word-list_rkt.dep b/brag/brag/examples/compiled/nested-word-list_rkt.dep new file mode 100644 index 0000000..b7d3fec --- /dev/null +++ b/brag/brag/examples/compiled/nested-word-list_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("60883511dc50f547d2ffc9684baf72eef099400f" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/nested-word-list_rkt.zo b/brag/brag/examples/compiled/nested-word-list_rkt.zo new file mode 100644 index 0000000..b0fa3ee Binary files /dev/null and b/brag/brag/examples/compiled/nested-word-list_rkt.zo differ diff --git a/brag/brag/examples/compiled/simple-arithmetic-grammar_rkt.dep b/brag/brag/examples/compiled/simple-arithmetic-grammar_rkt.dep new file mode 100644 index 0000000..762df7f --- /dev/null +++ b/brag/brag/examples/compiled/simple-arithmetic-grammar_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("a71ebdfc1b9faf9333bd9f43f508ff430274953d" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/simple-arithmetic-grammar_rkt.zo b/brag/brag/examples/compiled/simple-arithmetic-grammar_rkt.zo new file mode 100644 index 0000000..ae90ac0 Binary files /dev/null and b/brag/brag/examples/compiled/simple-arithmetic-grammar_rkt.zo differ diff --git a/brag/brag/examples/compiled/simple-line-drawing_rkt.dep b/brag/brag/examples/compiled/simple-line-drawing_rkt.dep new file mode 100644 index 0000000..c64a161 --- /dev/null +++ b/brag/brag/examples/compiled/simple-line-drawing_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("fcfac72351476e73408d6f03bab8f9c10fdb40c7" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/simple-line-drawing_rkt.zo b/brag/brag/examples/compiled/simple-line-drawing_rkt.zo new file mode 100644 index 0000000..663b35c Binary files /dev/null and b/brag/brag/examples/compiled/simple-line-drawing_rkt.zo differ diff --git a/brag/brag/examples/compiled/statlist-grammar_rkt.dep b/brag/brag/examples/compiled/statlist-grammar_rkt.dep new file mode 100644 index 0000000..38b9f5f --- /dev/null +++ b/brag/brag/examples/compiled/statlist-grammar_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("aa75acef74eb18f9b6f701024f34891177fbaf6c" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/statlist-grammar_rkt.zo b/brag/brag/examples/compiled/statlist-grammar_rkt.zo new file mode 100644 index 0000000..f2be210 Binary files /dev/null and b/brag/brag/examples/compiled/statlist-grammar_rkt.zo differ diff --git a/brag/brag/examples/compiled/whitespace_rkt.dep b/brag/brag/examples/compiled/whitespace_rkt.dep new file mode 100644 index 0000000..6e822f8 --- /dev/null +++ b/brag/brag/examples/compiled/whitespace_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("ab812dc68ee3f3aaca78f9cd22ba5116f7fb1758" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/whitespace_rkt.zo b/brag/brag/examples/compiled/whitespace_rkt.zo new file mode 100644 index 0000000..fedba91 Binary files /dev/null and b/brag/brag/examples/compiled/whitespace_rkt.zo differ diff --git a/brag/brag/examples/compiled/wordy_rkt.dep b/brag/brag/examples/compiled/wordy_rkt.dep new file mode 100644 index 0000000..b2837f7 --- /dev/null +++ b/brag/brag/examples/compiled/wordy_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("75b7c4d2c72c06d406e0fc6e7dda6b1592068f9a" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/compiled/wordy_rkt.zo b/brag/brag/examples/compiled/wordy_rkt.zo new file mode 100644 index 0000000..045f4ef Binary files /dev/null and b/brag/brag/examples/compiled/wordy_rkt.zo differ diff --git a/brag/brag/examples/cutter.rkt b/brag/brag/examples/cutter.rkt new file mode 100755 index 0000000..7cd7023 --- /dev/null +++ b/brag/brag/examples/cutter.rkt @@ -0,0 +1,4 @@ +#lang brag +top : expr (/"," expr)* +expr : "x" | list +list : "(" expr ("," expr)* ")" \ No newline at end of file diff --git a/brag/brag/examples/lua-parser.rkt b/brag/brag/examples/lua-parser.rkt new file mode 100755 index 0000000..75781e9 --- /dev/null +++ b/brag/brag/examples/lua-parser.rkt @@ -0,0 +1,111 @@ +#lang brag + +;; Lua parser, adapted from: +;; http://www.lua.org/manual/5.1/manual.html#8 +;; + + +chunk : (stat ["; "])* [laststat ["; "]] + +block : chunk + +stat : varlist "=" explist | + functioncall | + DO block END | + WHILE exp DO block END | + REPEAT block UNTIL exp | + IF exp THEN block (ELSEIF exp THEN block)* [ELSE block] END | + FOR NAME "=" exp "," exp ["," exp] DO block END | + FOR namelist IN explist DO block END | + FUNCTION funcname funcbody | + LOCAL FUNCTION NAME funcbody | + LOCAL namelist ["=" explist] + +laststat : RETURN [explist] | BREAK + +funcname : NAME ("." NAME)* [":" NAME] + +varlist : var ("," var)* + +var : NAME | prefixexp "[" exp "]" | prefixexp "." NAME + +namelist : NAME ("," NAME)* + +explist : (exp ",")* exp + + +;; Note by dyoo: The parsing of exp deviates from Lua in that we have these administrative +;; rules to explicitly represent the precedence rules. +;; +;; See: http://www.lua.org/manual/5.1/manual.html#2.5.6 +;; +;; Ragg doesn't yet automatically desugar operator precedence rules. +;; I'm doing it by hand at the moment, which is not ideal, so a future version of +;; ragg will have a story about describing precedence. +;; +;; Operator precedence in Lua follows the table below, from lower to higher priority: +;; +;; or exp_1 +;; and exp_2 +;; < > <= >= ~= == exp_3 +;; .. exp_4 +;; + - exp_5 +;; * / % exp_6 +;; not # - (unary) exp_7 +;; ^ exp_8 +;; +;; As usual, you can use parentheses to change the precedences of an expression. +;; The concatenation ('..') and exponentiation ('^') operators are right associative. +;; All other binary operators are left associative. +;; +;; The original grammar rule before encoding precedence was: +;; +;; exp : NIL | FALSE | TRUE | NUMBER | STRING | "..." | function | +;; prefixexp | tableconstructor | exp binop exp | unop exp + +exp : exp_1 +exp_1: exp_1 binop_1 exp_2 | exp_2 +exp_2: exp_2 binop_2 exp_3 | exp_3 +exp_3: exp_3 binop_3 exp_4 | exp_4 +exp_4: exp_5 binop_4 exp_4 | exp_5 ;; right associative +exp_5: exp_5 binop_5 exp_6 | exp_6 +exp_6: exp_6 binop_6 exp_7 | exp_7 +exp_7: unop exp_8 +exp_8: exp_9 binop_8 exp_8 | exp_9 ;; right associative +exp_9: NIL | FALSE | TRUE | NUMBER | STRING | "..." | function | + prefixexp | tableconstructor +binop_1: OR +binop_2: AND +binop_3: "<" | ">" | "<=" | ">=" | "~=" | "==" +binop_4: ".." +binop_5: "+" | "-" +binop_6: "*" | "/" | "%" +binop_8: "^" +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +prefixexp : var | functioncall | "(" exp ")" + +functioncall : prefixexp args | prefixexp ":" NAME args + +args : "(" [explist] ")" | tableconstructor | STRING + +function : FUNCTION funcbody + +funcbody : "(" [parlist] ")" block END + +parlist : namelist ["," "..."] | "..." + +tableconstructor : "{" [fieldlist] "}" + +fieldlist : field (fieldsep field)* [fieldsep] + +field : "[" exp "]" "=" exp | NAME "=" exp | exp + +fieldsep : "," | ";" + +binop : "+" | "-" | "*" | "/" | "^" | "%" | ".." | + "<" | "<=" | ">" | ">=" | "==" | "~=" | + AND | OR + +unop : "-" | NOT | "#" \ No newline at end of file diff --git a/brag/brag/examples/nested-word-list.rkt b/brag/brag/examples/nested-word-list.rkt new file mode 100755 index 0000000..b7489c4 --- /dev/null +++ b/brag/brag/examples/nested-word-list.rkt @@ -0,0 +1,3 @@ +#lang brag +nested-word-list: WORD + | LEFT-PAREN nested-word-list* RIGHT-PAREN diff --git a/brag/brag/examples/simple-arithmetic-grammar.rkt b/brag/brag/examples/simple-arithmetic-grammar.rkt new file mode 100755 index 0000000..69a5f2c --- /dev/null +++ b/brag/brag/examples/simple-arithmetic-grammar.rkt @@ -0,0 +1,5 @@ +#lang brag + +expr : term ('+' term)* +term : factor ('*' factor)* +factor : INT diff --git a/brag/brag/examples/simple-line-drawing.rkt b/brag/brag/examples/simple-line-drawing.rkt new file mode 100755 index 0000000..b97bdf5 --- /dev/null +++ b/brag/brag/examples/simple-line-drawing.rkt @@ -0,0 +1,10 @@ +#lang brag + +;; +;; See: http://stackoverflow.com/questions/12345647/rewrite-this-script-by-designing-an-interpreter-in-racket +;; + +drawing: rows* +rows: repeat chunk+ ";" +repeat: INTEGER +chunk: INTEGER STRING diff --git a/brag/brag/examples/simple-line-drawing/compiled/grammar_rkt.dep b/brag/brag/examples/simple-line-drawing/compiled/grammar_rkt.dep new file mode 100644 index 0000000..c64a161 --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/compiled/grammar_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("fcfac72351476e73408d6f03bab8f9c10fdb40c7" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/simple-line-drawing/compiled/grammar_rkt.zo b/brag/brag/examples/simple-line-drawing/compiled/grammar_rkt.zo new file mode 100644 index 0000000..06d46a8 Binary files /dev/null and b/brag/brag/examples/simple-line-drawing/compiled/grammar_rkt.zo differ diff --git a/brag/brag/examples/simple-line-drawing/compiled/interpret_rkt.dep b/brag/brag/examples/simple-line-drawing/compiled/interpret_rkt.dep new file mode 100644 index 0000000..f1ca0b9 --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/compiled/interpret_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("2be7b8f9073e403bfc5794c6d86bf89dd1589e9b" . "871c9e6fe1b14a02cedd5cb931351fa9a4d5c217") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/simple-line-drawing/compiled/interpret_rkt.zo b/brag/brag/examples/simple-line-drawing/compiled/interpret_rkt.zo new file mode 100644 index 0000000..f5f7f0c Binary files /dev/null and b/brag/brag/examples/simple-line-drawing/compiled/interpret_rkt.zo differ diff --git a/brag/brag/examples/simple-line-drawing/compiled/lexer_rkt.dep b/brag/brag/examples/simple-line-drawing/compiled/lexer_rkt.dep new file mode 100644 index 0000000..b24f2a2 --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/compiled/lexer_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("da50291e26298b21ae69135a48c43d16ef91db56" . "6b9cc7b42db2b58f41c65fd908091c074caf6397") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/examples/simple-line-drawing/compiled/lexer_rkt.zo b/brag/brag/examples/simple-line-drawing/compiled/lexer_rkt.zo new file mode 100644 index 0000000..60dcd44 Binary files /dev/null and b/brag/brag/examples/simple-line-drawing/compiled/lexer_rkt.zo differ diff --git a/brag/brag/examples/simple-line-drawing/compiled/semantics_rkt.dep b/brag/brag/examples/simple-line-drawing/compiled/semantics_rkt.dep new file mode 100644 index 0000000..a785d09 --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/compiled/semantics_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("6b91087b0a6eeac473dec615af66fc7cbbcd2210" . "871c9e6fe1b14a02cedd5cb931351fa9a4d5c217") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/examples/simple-line-drawing/compiled/semantics_rkt.zo b/brag/brag/examples/simple-line-drawing/compiled/semantics_rkt.zo new file mode 100644 index 0000000..15eb3a6 Binary files /dev/null and b/brag/brag/examples/simple-line-drawing/compiled/semantics_rkt.zo differ diff --git a/brag/brag/examples/simple-line-drawing/examples/compiled/letter-i_rkt.dep b/brag/brag/examples/simple-line-drawing/examples/compiled/letter-i_rkt.dep new file mode 100644 index 0000000..c8bbb90 --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/examples/compiled/letter-i_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("5a1d531db723394e70b1b19884d42a562c283876" . "ad10ce84713689ab99a459c16f36106d4048da68") (collects #"brag" #"examples" #"simple-line-drawing" #"lang" #"reader.rkt") (collects #"brag" #"examples" #"simple-line-drawing" #"semantics.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/examples/simple-line-drawing/examples/compiled/letter-i_rkt.zo b/brag/brag/examples/simple-line-drawing/examples/compiled/letter-i_rkt.zo new file mode 100644 index 0000000..5ca0d7f Binary files /dev/null and b/brag/brag/examples/simple-line-drawing/examples/compiled/letter-i_rkt.zo differ diff --git a/brag/brag/examples/simple-line-drawing/examples/letter-i.rkt b/brag/brag/examples/simple-line-drawing/examples/letter-i.rkt new file mode 100755 index 0000000..41f6570 --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/examples/letter-i.rkt @@ -0,0 +1,4 @@ +#lang brag/examples/simple-line-drawing +3 9 X; +6 3 b 3 X 3 b; +3 9 X; diff --git a/brag/brag/examples/simple-line-drawing/grammar.rkt b/brag/brag/examples/simple-line-drawing/grammar.rkt new file mode 100755 index 0000000..b97bdf5 --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/grammar.rkt @@ -0,0 +1,10 @@ +#lang brag + +;; +;; See: http://stackoverflow.com/questions/12345647/rewrite-this-script-by-designing-an-interpreter-in-racket +;; + +drawing: rows* +rows: repeat chunk+ ";" +repeat: INTEGER +chunk: INTEGER STRING diff --git a/brag/brag/examples/simple-line-drawing/interpret.rkt b/brag/brag/examples/simple-line-drawing/interpret.rkt new file mode 100755 index 0000000..e6cec0c --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/interpret.rkt @@ -0,0 +1,31 @@ +#lang racket/base +(require syntax/parse) + +(provide interpret-drawing) + +(define (interpret-drawing drawing-stx) + (syntax-parse drawing-stx + [({~literal drawing} row-stxs ...) + + (for ([row-stx (syntax->list #'(row-stxs ...))]) + (interpret-row row-stx))])) + + +(define (interpret-row row-stx) + (syntax-parse row-stx + [({~literal rows} + ({~literal repeat} repeat-number) + chunks ... ";") + + (for ([i (syntax-e #'repeat-number)]) + (for ([chunk-stx (syntax->list #'(chunks ...))]) + (interpret-chunk chunk-stx)) + (newline))])) + + +(define (interpret-chunk chunk-stx) + (syntax-parse chunk-stx + [({~literal chunk} chunk-size chunk-string) + + (for ([k (syntax-e #'chunk-size)]) + (display (syntax-e #'chunk-string)))])) diff --git a/brag/brag/examples/simple-line-drawing/lang/compiled/reader_rkt.dep b/brag/brag/examples/simple-line-drawing/lang/compiled/reader_rkt.dep new file mode 100644 index 0000000..8b97560 --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/lang/compiled/reader_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c8b67b7201b23c3e04cb7c403946a85353dfc61f" . "4566c23431d2a1b416e2baaacb9f264fe8af413f") (collects #"brag" #"examples" #"simple-line-drawing" #"grammar.rkt") (collects #"brag" #"examples" #"simple-line-drawing" #"lexer.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"s-exp" #"lang" #"reader.rkt") (collects #"syntax" #"module-reader.rkt")) diff --git a/brag/brag/examples/simple-line-drawing/lang/compiled/reader_rkt.zo b/brag/brag/examples/simple-line-drawing/lang/compiled/reader_rkt.zo new file mode 100644 index 0000000..8521ab8 Binary files /dev/null and b/brag/brag/examples/simple-line-drawing/lang/compiled/reader_rkt.zo differ diff --git a/brag/brag/examples/simple-line-drawing/lang/reader.rkt b/brag/brag/examples/simple-line-drawing/lang/reader.rkt new file mode 100755 index 0000000..d900f99 --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/lang/reader.rkt @@ -0,0 +1,22 @@ +#lang s-exp syntax/module-reader +brag/examples/simple-line-drawing/semantics +#:read my-read +#:read-syntax my-read-syntax +#:info my-get-info +#:whole-body-readers? #t + +(require brag/examples/simple-line-drawing/lexer + brag/examples/simple-line-drawing/grammar) + +(define (my-read in) + (syntax->datum (my-read-syntax #f in))) + +(define (my-read-syntax src ip) + (list (parse src (tokenize ip)))) + +(define (my-get-info key default default-filter) + (case key + [(color-lexer) + (dynamic-require 'syntax-color/default-lexer 'default-lexer)] + [else + (default-filter key default)])) diff --git a/brag/brag/examples/simple-line-drawing/lexer.rkt b/brag/brag/examples/simple-line-drawing/lexer.rkt new file mode 100755 index 0000000..3e0e810 --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/lexer.rkt @@ -0,0 +1,27 @@ +#lang racket/base + +(provide tokenize) + +;; A simple lexer for simple-line-drawing. +(require brag/support + br-parser-tools/lex) + +(define (tokenize ip) + (port-count-lines! ip) + (define my-lexer + (lexer-src-pos + [(repetition 1 +inf.0 numeric) + (token 'INTEGER (string->number lexeme))] + [upper-case + (token 'STRING lexeme)] + ["b" + (token 'STRING " ")] + [";" + (token ";" lexeme)] + [whitespace + (token 'WHITESPACE lexeme #:skip? #t)] + [(eof) + (void)])) + (define (next-token) (my-lexer ip)) + next-token) + diff --git a/brag/brag/examples/simple-line-drawing/semantics.rkt b/brag/brag/examples/simple-line-drawing/semantics.rkt new file mode 100755 index 0000000..028662d --- /dev/null +++ b/brag/brag/examples/simple-line-drawing/semantics.rkt @@ -0,0 +1,48 @@ +#lang racket/base +(require (for-syntax racket/base syntax/parse)) + +(provide #%module-begin + ;; We reuse Racket's treatment of raw datums, specifically + ;; for strings and numbers: + #%datum + + ;; And otherwise, we provide definitions of these three forms. + ;; During compiliation, Racket uses these definitions to + ;; rewrite into for loops, displays, and newlines. + drawing rows chunk) + +;; Define a few compile-time functions to do the syntax rewriting: +(begin-for-syntax + (define (compile-drawing drawing-stx) + (syntax-parse drawing-stx + [({~literal drawing} row-stxs ...) + + (syntax/loc drawing-stx + (begin row-stxs ...))])) + + (define (compile-rows row-stx) + (syntax-parse row-stx + [({~literal rows} + ({~literal repeat} repeat-number) + chunks ... + ";") + + (syntax/loc row-stx + (for ([i repeat-number]) + chunks ... + (newline)))])) + + (define (compile-chunk chunk-stx) + (syntax-parse chunk-stx + [({~literal chunk} chunk-size chunk-string) + + (syntax/loc chunk-stx + (for ([k chunk-size]) + (display chunk-string)))]))) + + +;; Wire up the use of "drawing", "rows", and "chunk" to these +;; transformers: +(define-syntax drawing compile-drawing) +(define-syntax rows compile-rows) +(define-syntax chunk compile-chunk) diff --git a/brag/brag/examples/statlist-grammar.rkt b/brag/brag/examples/statlist-grammar.rkt new file mode 100755 index 0000000..18b2561 --- /dev/null +++ b/brag/brag/examples/statlist-grammar.rkt @@ -0,0 +1,14 @@ +#lang brag + +## Statlist grammar + +statlist : stat+ +stat: ID '=' expr + | 'print' expr + +expr: multExpr ('+' multExpr)* +multExpr: primary (('*'|'.') primary)* +primary : + INT + | ID + | '[' expr ("," expr)* ']' diff --git a/brag/brag/examples/whitespace.rkt b/brag/brag/examples/whitespace.rkt new file mode 100644 index 0000000..b76ebf4 --- /dev/null +++ b/brag/brag/examples/whitespace.rkt @@ -0,0 +1,6 @@ +#lang brag +start: (tab | space | newline | letter)* +tab: '\t' +space: " " +newline: "\n" +letter: "x" | "y" | "z" \ No newline at end of file diff --git a/brag/brag/examples/wordy.rkt b/brag/brag/examples/wordy.rkt new file mode 100755 index 0000000..66c9042 --- /dev/null +++ b/brag/brag/examples/wordy.rkt @@ -0,0 +1,7 @@ +#lang brag +;; A parser for a silly language +sentence: verb optional-adjective object +verb: greeting +optional-adjective: ["happy" | "frumpy"] +greeting: "hello" | "hola" | "aloha" +object: "world" | WORLD diff --git a/brag/brag/info.rkt b/brag/brag/info.rkt new file mode 100755 index 0000000..0477e13 --- /dev/null +++ b/brag/brag/info.rkt @@ -0,0 +1,7 @@ +#lang setup/infotab +(define name "brag") +(define version "1.0") +(define scribblings '(("brag.scrbl"))) +(define blurb '("brag: the Beautiful Racket AST Generator. A fork of Danny Yoo's ragg. A design goal is to be easy for beginners to use. Given a grammar in EBNF, brag produces a parser that generates Racket's native syntax objects with full source location.")) +(define deps (list)) +(define test-omit-paths '("examples/simple-line-drawing/examples/letter-i.rkt")) \ No newline at end of file diff --git a/brag/brag/main.rkt b/brag/brag/main.rkt new file mode 100755 index 0000000..6ae6606 --- /dev/null +++ b/brag/brag/main.rkt @@ -0,0 +1,5 @@ +#lang racket/base + +(module+ reader + (require "codegen/reader.rkt") + (provide (all-from-out "codegen/reader.rkt"))) diff --git a/brag/brag/private/compiled/drracket/errortrace/internal-support_rkt.dep b/brag/brag/private/compiled/drracket/errortrace/internal-support_rkt.dep new file mode 100644 index 0000000..9d52233 --- /dev/null +++ b/brag/brag/private/compiled/drracket/errortrace/internal-support_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("8619edb19f30b9ac617b7960383e516134ff3a3e" . "3fc85bf5c92e338c8dec52b3279d24ac13297fef") (collects #"brag" #"support.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/private/compiled/drracket/errortrace/internal-support_rkt.zo b/brag/brag/private/compiled/drracket/errortrace/internal-support_rkt.zo new file mode 100644 index 0000000..854566a Binary files /dev/null and b/brag/brag/private/compiled/drracket/errortrace/internal-support_rkt.zo differ diff --git a/brag/brag/private/compiled/internal-support_rkt.dep b/brag/brag/private/compiled/internal-support_rkt.dep new file mode 100644 index 0000000..71b3677 --- /dev/null +++ b/brag/brag/private/compiled/internal-support_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("8619edb19f30b9ac617b7960383e516134ff3a3e" . "727d83b38cce050723aa91077974dacc8ff78de4") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/private/compiled/internal-support_rkt.zo b/brag/brag/private/compiled/internal-support_rkt.zo new file mode 100644 index 0000000..a50c6ac Binary files /dev/null and b/brag/brag/private/compiled/internal-support_rkt.zo differ diff --git a/brag/brag/private/internal-support.rkt b/brag/brag/private/internal-support.rkt new file mode 100755 index 0000000..0beec3e --- /dev/null +++ b/brag/brag/private/internal-support.rkt @@ -0,0 +1,36 @@ +#lang racket/base + +(require brag/support) + +(provide current-source + current-parser-error-handler + current-tokenizer-error-handler) + +;; During parsing, we should define the source of the input. +(define current-source (make-parameter #f)) + + +;; When an parse error happens, we call the current-parser-error-handler: +(define current-parser-error-handler + (make-parameter + (lambda (tok-name tok-value offset line col span) + (raise (exn:fail:parsing + (format "Encountered parsing error near ~e (token ~e) while parsing ~e [line=~a, column=~a, offset=~a]" + tok-value tok-name + (current-source) + line col offset) + (current-continuation-marks) + (list (srcloc (current-source) line col offset span))))))) + +;; When a tokenization error happens, we call the current-tokenizer-error-handler. +(define current-tokenizer-error-handler + (make-parameter + (lambda (tok-type tok-value offset line column span) + (raise (exn:fail:parsing + (format "Encountered unexpected token ~e (~e) while parsing ~e [line=~a, column=~a, offset=~a]" + tok-type + tok-value + (current-source) + line column offset) + (current-continuation-marks) + (list (srcloc (current-source) line column offset span))))))) diff --git a/brag/brag/rules/compiled/drracket/errortrace/lexer_rkt.dep b/brag/brag/rules/compiled/drracket/errortrace/lexer_rkt.dep new file mode 100644 index 0000000..3eec1d3 --- /dev/null +++ b/brag/brag/rules/compiled/drracket/errortrace/lexer_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("630617306ce8f15ed7cd29825fa4262ce2e6d82c" . "39b27ae3d173229df5e4d93c536e2c3f6e9b96aa") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"rules" #"parser.rkt") (collects #"brag" #"rules" #"rule-structs.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"racket" #"string.rkt")) diff --git a/brag/brag/rules/compiled/drracket/errortrace/lexer_rkt.zo b/brag/brag/rules/compiled/drracket/errortrace/lexer_rkt.zo new file mode 100644 index 0000000..68b750d Binary files /dev/null and b/brag/brag/rules/compiled/drracket/errortrace/lexer_rkt.zo differ diff --git a/brag/brag/rules/compiled/drracket/errortrace/parser_rkt.dep b/brag/brag/rules/compiled/drracket/errortrace/parser_rkt.dep new file mode 100644 index 0000000..592d07a --- /dev/null +++ b/brag/brag/rules/compiled/drracket/errortrace/parser_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("ddb2689b1fb8a26f0df5eebaa60891ee3e8d651e" . "262425f23a22dd416b35fafee443e5f442307857") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"brag" #"rules" #"rule-structs.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"list.rkt") (collects #"racket" #"match.rkt") (collects #"racket" #"runtime-config.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt")) diff --git a/brag/brag/rules/compiled/drracket/errortrace/parser_rkt.zo b/brag/brag/rules/compiled/drracket/errortrace/parser_rkt.zo new file mode 100644 index 0000000..48f2802 Binary files /dev/null and b/brag/brag/rules/compiled/drracket/errortrace/parser_rkt.zo differ diff --git a/brag/brag/rules/compiled/drracket/errortrace/rule-structs_rkt.dep b/brag/brag/rules/compiled/drracket/errortrace/rule-structs_rkt.dep new file mode 100644 index 0000000..6b9d587 --- /dev/null +++ b/brag/brag/rules/compiled/drracket/errortrace/rule-structs_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("0b577b4daab1368df053c63e7ce67b932447ae91" . "6ab49f18354cfd81572a57ba4235967eae50f58e") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/rules/compiled/drracket/errortrace/rule-structs_rkt.zo b/brag/brag/rules/compiled/drracket/errortrace/rule-structs_rkt.zo new file mode 100644 index 0000000..5985509 Binary files /dev/null and b/brag/brag/rules/compiled/drracket/errortrace/rule-structs_rkt.zo differ diff --git a/brag/brag/rules/compiled/drracket/errortrace/stx-types_rkt.dep b/brag/brag/rules/compiled/drracket/errortrace/stx-types_rkt.dep new file mode 100644 index 0000000..94321df --- /dev/null +++ b/brag/brag/rules/compiled/drracket/errortrace/stx-types_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("04c0dfaac34e0f545a22ed58f0b88448e57f3237" . "6ab49f18354cfd81572a57ba4235967eae50f58e") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/rules/compiled/drracket/errortrace/stx-types_rkt.zo b/brag/brag/rules/compiled/drracket/errortrace/stx-types_rkt.zo new file mode 100644 index 0000000..bd2ba13 Binary files /dev/null and b/brag/brag/rules/compiled/drracket/errortrace/stx-types_rkt.zo differ diff --git a/brag/brag/rules/compiled/drracket/errortrace/stx_rkt.dep b/brag/brag/rules/compiled/drracket/errortrace/stx_rkt.dep new file mode 100644 index 0000000..174139a --- /dev/null +++ b/brag/brag/rules/compiled/drracket/errortrace/stx_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("8688436c031186fe3078355e044ddf5a407ad406" . "28045863c1e3f6ea48c70da1eadb8c329bb5a833") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"rules" #"rule-structs.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"match.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"syntax" #"strip-context.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt")) diff --git a/brag/brag/rules/compiled/drracket/errortrace/stx_rkt.zo b/brag/brag/rules/compiled/drracket/errortrace/stx_rkt.zo new file mode 100644 index 0000000..c1cd504 Binary files /dev/null and b/brag/brag/rules/compiled/drracket/errortrace/stx_rkt.zo differ diff --git a/brag/brag/rules/compiled/lexer_rkt.dep b/brag/brag/rules/compiled/lexer_rkt.dep new file mode 100644 index 0000000..da432ca --- /dev/null +++ b/brag/brag/rules/compiled/lexer_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("630617306ce8f15ed7cd29825fa4262ce2e6d82c" . "5140653a66a64815f56c272ec59609ec542605b2") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"rules" #"parser.rkt") (collects #"brag" #"rules" #"rule-structs.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"racket" #"string.rkt")) diff --git a/brag/brag/rules/compiled/lexer_rkt.zo b/brag/brag/rules/compiled/lexer_rkt.zo new file mode 100644 index 0000000..41c156f Binary files /dev/null and b/brag/brag/rules/compiled/lexer_rkt.zo differ diff --git a/brag/brag/rules/compiled/parser_rkt.dep b/brag/brag/rules/compiled/parser_rkt.dep new file mode 100644 index 0000000..0b24b08 --- /dev/null +++ b/brag/brag/rules/compiled/parser_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("cfc9e7888ae194c7ac0c23b4d674d1feeb6b169b" . "1589f3d1948a8dfb10ae7b58612b10d71625e529") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"brag" #"rules" #"rule-structs.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"list.rkt") (collects #"racket" #"match.rkt") (collects #"racket" #"runtime-config.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt")) diff --git a/brag/brag/rules/compiled/parser_rkt.zo b/brag/brag/rules/compiled/parser_rkt.zo new file mode 100644 index 0000000..070eaf7 Binary files /dev/null and b/brag/brag/rules/compiled/parser_rkt.zo differ diff --git a/brag/brag/rules/compiled/rule-structs_rkt.dep b/brag/brag/rules/compiled/rule-structs_rkt.dep new file mode 100644 index 0000000..e669fee --- /dev/null +++ b/brag/brag/rules/compiled/rule-structs_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("0b577b4daab1368df053c63e7ce67b932447ae91" . "45edc5268ccfc8f2ffdbecc5ce55bf87097999a1") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/rules/compiled/rule-structs_rkt.zo b/brag/brag/rules/compiled/rule-structs_rkt.zo new file mode 100644 index 0000000..1b7cd00 Binary files /dev/null and b/brag/brag/rules/compiled/rule-structs_rkt.zo differ diff --git a/brag/brag/rules/compiled/runtime_rkt.dep b/brag/brag/rules/compiled/runtime_rkt.dep new file mode 100644 index 0000000..0ee8171 --- /dev/null +++ b/brag/brag/rules/compiled/runtime_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("0d83924b9d995d1fb6cbdc3b89ca29f5bec77bd9" . "98cc41d41b06c0ff1d2755eadb1cd3e7a3331091") (collects #"br-parser-tools" #"lex.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/rules/compiled/runtime_rkt.zo b/brag/brag/rules/compiled/runtime_rkt.zo new file mode 100644 index 0000000..9c80c87 Binary files /dev/null and b/brag/brag/rules/compiled/runtime_rkt.zo differ diff --git a/brag/brag/rules/compiled/stx-types_rkt.dep b/brag/brag/rules/compiled/stx-types_rkt.dep new file mode 100644 index 0000000..c634b1a --- /dev/null +++ b/brag/brag/rules/compiled/stx-types_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("04c0dfaac34e0f545a22ed58f0b88448e57f3237" . "45edc5268ccfc8f2ffdbecc5ce55bf87097999a1") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/rules/compiled/stx-types_rkt.zo b/brag/brag/rules/compiled/stx-types_rkt.zo new file mode 100644 index 0000000..09625cb Binary files /dev/null and b/brag/brag/rules/compiled/stx-types_rkt.zo differ diff --git a/brag/brag/rules/compiled/stx_rkt.dep b/brag/brag/rules/compiled/stx_rkt.dep new file mode 100644 index 0000000..59b7cf4 --- /dev/null +++ b/brag/brag/rules/compiled/stx_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("8688436c031186fe3078355e044ddf5a407ad406" . "33a16deedcfd8c3475612c75ae7aa1edcf45fe55") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"rules" #"rule-structs.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"match.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"syntax" #"strip-context.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt")) diff --git a/brag/brag/rules/compiled/stx_rkt.zo b/brag/brag/rules/compiled/stx_rkt.zo new file mode 100644 index 0000000..1d092e1 Binary files /dev/null and b/brag/brag/rules/compiled/stx_rkt.zo differ diff --git a/brag/brag/rules/lexer.rkt b/brag/brag/rules/lexer.rkt new file mode 100755 index 0000000..e55991b --- /dev/null +++ b/brag/brag/rules/lexer.rkt @@ -0,0 +1,131 @@ +#lang racket/base +(require (for-syntax racket/base "parser.rkt")) +(require br-parser-tools/lex + (prefix-in : br-parser-tools/lex-sre) + "parser.rkt" + "rule-structs.rkt" + racket/string) + +(provide lex/1 tokenize) + +;; A newline can be any one of the following. +(define-lex-abbrev NL (:or "\r\n" "\r" "\n")) + +;; chars used for quantifiers & parse-tree filtering +(define-for-syntax quantifiers "+:*") ; colon is reserved to separate rules and productions +(define-lex-trans reserved-chars + (λ(stx) #`(char-set #,(format "~a~a~a" quantifiers hide-char splice-char)))) + +(define-lex-trans hide-char-trans (λ(stx) #`(char-set #,(format "~a" hide-char)))) +(define-lex-trans splice-char-trans (λ(stx) #`(char-set #,(format "~a" splice-char)))) + +(define-lex-abbrevs + [letter (:or (:/ "a" "z") (:/ #\A #\Z))] + [digit (:/ #\0 #\9)] + [id-char (:or letter digit (:& (char-set "+:*@!-.$%&/=?^_~<>") (char-complement (reserved-chars))))] + [hide-char (hide-char-trans)] + [splice-char (splice-char-trans)] + ) + +(define-lex-abbrev id (:& (complement (:+ digit)) (:+ id-char))) + +(define lex/1 + (lexer-src-pos + ;; handle whitespace chars within quotes as literal tokens: "\n" "\t" '\n' '\t' + ;; by matching the escaped version, and then unescaping them before they become token-LITs + [(:: "'" + (:* (:or "\\'" "\\n" "\\t" (:~ "'" "\\"))) + "'") + (token-LIT (case lexeme + [("'\\n'") "'\n'"] + [("'\\t'") "'\t'"] + [else lexeme]))] + [(:: "\"" + (:* (:or "\\\"" "\\n" "\\t" (:~ "\"" "\\"))) + "\"") + (token-LIT (case lexeme + [("\"\\n\"") "\"\n\""] + [("\"\\t\"") "\"\t\""] + [else lexeme]))] + ["(" + (token-LPAREN lexeme)] + ["[" + (token-LBRACKET lexeme)] + [")" + (token-RPAREN lexeme)] + ["]" + (token-RBRACKET lexeme)] + [hide-char + (token-HIDE lexeme)] + [splice-char + (token-SPLICE lexeme)] + ["|" + (token-PIPE lexeme)] + [(:or "+" "*") + (token-REPEAT lexeme)] + [whitespace + ;; Skip whitespace + (return-without-pos (lex/1 input-port))] + ;; Skip comments up to end of line + ;; but detect possble kwargs. + [(:: (:or "#" ";") ; remove # as comment char + (complement (:: (:* any-char) NL (:* any-char))) + (:or NL "")) + (let ([maybe-kwarg-match (regexp-match #px"^#:(.*?)\\s*(.*?)$" lexeme)]) + (when maybe-kwarg-match + (let* ([parts (map string->symbol (string-split (string-trim lexeme "#:" #:right? #f)))] + [kw (car parts)][val (cadr parts)]) + (case kw + [(prefix-out) (current-prefix-out val)] + [else (error 'lexer (format "got unknown keyword ~a" kw))]))) + (return-without-pos (lex/1 input-port)))] + [(eof) + (token-EOF lexeme)] + [(:: id (:* whitespace) ":") + (token-RULE_HEAD lexeme)] + [(:: hide-char id (:* whitespace) ":") + (token-RULE_HEAD_HIDDEN lexeme)] + [(:: splice-char id (:* whitespace) ":") + (token-RULE_HEAD_SPLICED lexeme)] + [id + (token-ID lexeme)] + + ;; We call the error handler for everything else: + [(:: any-char) + (let-values ([(rest-of-text end-pos-2) + (lex-nonwhitespace input-port)]) + ((current-parser-error-handler) + #f + 'error + (string-append lexeme rest-of-text) + (position->pos start-pos) + (position->pos end-pos-2)))])) + + +;; This is the helper for the error production. +(define lex-nonwhitespace + (lexer + [(:+ (char-complement whitespace)) + (values lexeme end-pos)] + [any-char + (values lexeme end-pos)] + [(eof) + (values "" end-pos)])) + + + +;; position->pos: position -> pos +;; Coerses position structures from br-parser-tools/lex to our own pos structures. +(define (position->pos a-pos) + (pos (position-offset a-pos) + (position-line a-pos) + (position-col a-pos))) + + + +;; tokenize: input-port -> (-> token) +(define (tokenize ip + #:source [source (object-name ip)]) + (lambda () + (parameterize ([file-path source]) + (lex/1 ip)))) diff --git a/brag/brag/rules/parser.rkt b/brag/brag/rules/parser.rkt new file mode 100755 index 0000000..4cf9608 --- /dev/null +++ b/brag/brag/rules/parser.rkt @@ -0,0 +1,281 @@ +#lang racket/base +(require br-parser-tools/yacc + br-parser-tools/lex + racket/list + racket/match + "rule-structs.rkt") + +;; A parser for grammars. + +(provide hide-char + splice-char + tokens + token-LPAREN + token-RPAREN + token-HIDE ; for hider + token-SPLICE ; for splicer + token-LBRACKET + token-RBRACKET + token-PIPE + token-REPEAT + token-RULE_HEAD + token-RULE_HEAD_HIDDEN + token-RULE_HEAD_SPLICED + token-ID + token-LIT + token-EOF + grammar-parser + + current-source + current-parser-error-handler + current-prefix-out + + [struct-out rule] + [struct-out lhs-id] + [struct-out pattern] + [struct-out pattern-id] + [struct-out pattern-lit] + [struct-out pattern-token] + [struct-out pattern-choice] + [struct-out pattern-repeat] + [struct-out pattern-maybe] + [struct-out pattern-seq]) + +(define-tokens tokens (LPAREN + RPAREN + LBRACKET + RBRACKET + HIDE + SPLICE + PIPE + REPEAT + RULE_HEAD + RULE_HEAD_HIDDEN + RULE_HEAD_SPLICED + ID + LIT + EOF)) + +(define hide-char #\/) +(define splice-char #\@) + +;; grammar-parser: (-> token) -> (listof rule) +(define grammar-parser + (parser + (tokens tokens) + (src-pos) + (start rules) + (end EOF) + + (grammar + [rules + [(rules*) $1]] + + [rules* + [(rule rules*) + (cons $1 $2)] + [() + '()]] + + ;; I have a separate token type for rule identifiers to avoid the + ;; shift/reduce conflict that happens with the implicit sequencing + ;; of top-level rules. i.e. the parser can't currently tell, when + ;; it sees an ID, if it should shift or reduce to a new rule. + [rule + [(RULE_HEAD pattern) + (begin + (define trimmed (regexp-replace #px"\\s*:$" $1 "")) + (rule (position->pos $1-start-pos) + (position->pos $2-end-pos) + (lhs-id (position->pos $1-start-pos) + (pos (+ (position-offset $1-start-pos) + (string-length trimmed)) + (position-line $1-start-pos) + (position-col $1-start-pos)) + trimmed + #f) + $2))] + + [(RULE_HEAD_HIDDEN pattern) ; slash indicates hiding + (begin + (define trimmed (cadr (regexp-match (pregexp (format "~a(\\S+)\\s*:$" hide-char)) $1))) + (rule (position->pos $1-start-pos) + (position->pos $2-end-pos) + (lhs-id (position->pos $1-start-pos) + (pos (+ (position-offset $1-start-pos) + (string-length trimmed) + (string-length "!")) + (position-line $1-start-pos) + (position-col $1-start-pos)) + trimmed + ''hide) ; symbol needs to be double quoted in this case + $2))] + + [(RULE_HEAD_SPLICED pattern) ; atsign indicates splicing + (begin + (define trimmed (cadr (regexp-match (pregexp (format "~a(\\S+)\\s*:$" splice-char)) $1))) + (rule (position->pos $1-start-pos) + (position->pos $2-end-pos) + (lhs-id (position->pos $1-start-pos) + (pos (+ (position-offset $1-start-pos) + (string-length trimmed) + (string-length "@")) + (position-line $1-start-pos) + (position-col $1-start-pos)) + trimmed + ''splice) ; symbol needs to be double quoted in this case + $2))]] + + [pattern + [(implicit-pattern-sequence PIPE pattern) + (if (pattern-choice? $3) + (pattern-choice (position->pos $1-start-pos) + (position->pos $3-end-pos) + (cons $1 (pattern-choice-vals $3))) + (pattern-choice (position->pos $1-start-pos) + (position->pos $3-end-pos) + (list $1 $3)))] + [(implicit-pattern-sequence) + $1]] + + [implicit-pattern-sequence + [(repeatable-pattern implicit-pattern-sequence) + (if (pattern-seq? $2) + (pattern-seq (position->pos $1-start-pos) + (position->pos $2-end-pos) + (cons $1 (pattern-seq-vals $2))) + (pattern-seq (position->pos $1-start-pos) + (position->pos $2-end-pos) + (list $1 $2)))] + [(repeatable-pattern) + $1]] + + [repeatable-pattern + [(atomic-pattern REPEAT) + (cond [(string=? $2 "*") + (pattern-repeat (position->pos $1-start-pos) + (position->pos $2-end-pos) + 0 $1)] + [(string=? $2 "+") + (pattern-repeat (position->pos $1-start-pos) + (position->pos $2-end-pos) + 1 $1)] + [else + (error 'grammar-parse "unknown repetition operator ~e" $2)])] + [(atomic-pattern) + $1]] + + [atomic-pattern + [(LIT) + (pattern-lit (position->pos $1-start-pos) + (position->pos $1-end-pos) + (substring $1 1 (sub1 (string-length $1))) + #f)] + + [(ID) + (if (token-id? $1) + (pattern-token (position->pos $1-start-pos) + (position->pos $1-end-pos) + $1 + #f) + (pattern-id (position->pos $1-start-pos) + (position->pos $1-end-pos) + $1 + #f))] + + [(LBRACKET pattern RBRACKET) + (pattern-maybe (position->pos $1-start-pos) + (position->pos $3-end-pos) + $2)] + + [(LPAREN pattern RPAREN) + (relocate-pattern $2 (position->pos $1-start-pos) (position->pos $3-end-pos))] + + [(HIDE atomic-pattern) + (relocate-pattern $2 (position->pos $1-start-pos) (position->pos $2-end-pos) 'hide)] + + [(SPLICE ID) + ;; only works for nonterminals on the right side + ;; (meaningless with terminals) + (if (token-id? $2) + (error 'brag "Can't use splice operator with terminal") + (pattern-id (position->pos $1-start-pos) + (position->pos $2-end-pos) + $2 + 'splice))]]) + + + (error (lambda (tok-ok? tok-name tok-value start-pos end-pos) + ((current-parser-error-handler) tok-ok? tok-name tok-value (position->pos start-pos) (position->pos end-pos)))))) + +;; relocate-pattern: pattern -> pattern +;; Rewrites the pattern's start and end pos accordingly. +(define (relocate-pattern a-pat start-pos end-pos [hide? #f]) + (match a-pat + [(pattern-id _ _ v h) + (pattern-id start-pos end-pos v (or hide? h))] + [(pattern-token _ _ v h) + (pattern-token start-pos end-pos v (or hide? h))] + [(pattern-lit _ _ v h) + (pattern-lit start-pos end-pos v (or hide? h))] + [(pattern-choice _ _ vs) + (pattern-choice start-pos end-pos vs)] + [(pattern-repeat _ _ m v) + (pattern-repeat start-pos end-pos m v)] + [(pattern-maybe _ _ v) + (pattern-maybe start-pos end-pos v)] + [(pattern-seq _ _ vs) + (pattern-seq start-pos end-pos vs)] + [else + (error 'relocate-pattern "Internal error when relocating ~s\n" a-pat)])) + + +; token-id: string -> boolean +;; Produces true if the id we see should be treated as the name of a token. +;; By convention, tokens are all upper-cased. +(define (token-id? id) + (string=? (string-upcase id) + id)) + + + +;; position->pos: position -> pos +;; Coerses position structures from br-parser-tools/lex to our own pos structures. +(define (position->pos a-pos) + (pos (position-offset a-pos) + (position-line a-pos) + (position-col a-pos))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; During parsing, we should define the source of the input. +(define current-source (make-parameter #f)) + +(define current-prefix-out (make-parameter #f)) + + +;; When bad things happen, we need to emit errors with source location. +(struct exn:fail:parse-grammar exn:fail (srclocs) + #:transparent + #:property prop:exn:srclocs (lambda (instance) + (exn:fail:parse-grammar-srclocs instance))) + +(define current-parser-error-handler + (make-parameter + (lambda (tok-ok? tok-name tok-value start-pos end-pos) + (raise (exn:fail:parse-grammar + (format "Error while parsing grammar near: ~e [line=~a, column=~a, position=~a]" + tok-value + (pos-line start-pos) + (pos-col start-pos) + (pos-offset start-pos)) + (current-continuation-marks) + (list (srcloc (current-source) + (pos-line start-pos) + (pos-col start-pos) + (pos-offset start-pos) + (if (and (number? (pos-offset end-pos)) + (number? (pos-offset start-pos))) + (- (pos-offset end-pos) + (pos-offset start-pos)) + #f)))))))) diff --git a/brag/brag/rules/rule-structs.rkt b/brag/brag/rules/rule-structs.rkt new file mode 100755 index 0000000..5b5968e --- /dev/null +++ b/brag/brag/rules/rule-structs.rkt @@ -0,0 +1,43 @@ +#lang racket/base +(provide (all-defined-out)) + + +;; We keep our own position structure because br-parser-tools/lex's position +;; structure is non-transparent, hence highly resistant to unit testing. +(struct pos (offset line col) + #:transparent) + +(struct rule (start end lhs pattern) + #:transparent) + +(struct lhs-id (start end val splice) + #:transparent) + +;; A pattern can be one of the following: +(struct pattern (start end) + #:transparent) + +(struct pattern-id pattern (val hide) + #:transparent) + +;; Token structure to be defined by the user +(struct pattern-token pattern (val hide) + #:transparent) + +;; Token structure defined as the literal string to be matched. +(struct pattern-lit pattern (val hide) + #:transparent) + +(struct pattern-choice pattern (vals) + #:transparent) + +(struct pattern-repeat pattern (min ;; either 0 or 1 + val) + #:transparent) + +(struct pattern-maybe pattern (val) + #:transparent) + +(struct pattern-seq pattern (vals) + #:transparent) + diff --git a/brag/brag/rules/runtime.rkt b/brag/brag/rules/runtime.rkt new file mode 100755 index 0000000..e34df13 --- /dev/null +++ b/brag/brag/rules/runtime.rkt @@ -0,0 +1,34 @@ +#lang racket/base +(require br-parser-tools/lex) + +(provide (all-defined-out)) + +;; During parsing, we should define the source of the input. +(define current-source (make-parameter #f)) + + +;; When bad things happen, we need to emit errors with source location. +(struct exn:fail:parse-grammar exn:fail (srclocs) + #:transparent + #:property prop:exn:srclocs (lambda (instance) + (exn:fail:parse-grammar-srclocs instance))) + +(define current-parser-error-handler + (make-parameter + (lambda (tok-ok? tok-name tok-value start-pos end-pos) + (raise (exn:fail:parse-grammar + (format "Error while parsing grammar near: ~e [line=~a, column~a, position=~a]" + tok-value + (position-line start-pos) + (position-col start-pos) + (position-offset start-pos)) + (current-continuation-marks) + (list (srcloc (current-source) + (position-line start-pos) + (position-col start-pos) + (position-offset start-pos) + (if (and (number? (position-offset end-pos)) + (number? (position-offset start-pos))) + (- (position-offset end-pos) + (position-offset start-pos)) + #f)))))))) diff --git a/brag/brag/rules/stx-types.rkt b/brag/brag/rules/stx-types.rkt new file mode 100755 index 0000000..e0ac70a --- /dev/null +++ b/brag/brag/rules/stx-types.rkt @@ -0,0 +1,16 @@ +#lang racket/base +(provide (all-defined-out)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; These are just here to provide bindings for Check Syntax. +;; Otherwise, we should never hit these, as the toplevel rules-codegen +;; should eliminate all uses of these if it does the right thing. +(define (rules stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (rule stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (id stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (lit stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (token stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (choice stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (repeat stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (maybe stx) (raise-syntax-error #f "Used out of context of rules" stx)) +(define (seq stx) (raise-syntax-error #f "Used out of context of rules" stx)) \ No newline at end of file diff --git a/brag/brag/rules/stx.rkt b/brag/brag/rules/stx.rkt new file mode 100755 index 0000000..9c66685 --- /dev/null +++ b/brag/brag/rules/stx.rkt @@ -0,0 +1,88 @@ +#lang racket/base + +(require "rule-structs.rkt" + br-parser-tools/lex + racket/match + syntax/strip-context) + +(provide rules->stx) + +;; Given a sequence of rules, we translate these to syntax objects. + +;; rules->stx: (listof rule) -> syntax +(define (rules->stx source rules #:original-stx [original-stx #f]) + (define rule-stxs + (map (lambda (stx) (rule->stx source stx)) + rules)) + (datum->syntax #f + `(rules ,@rule-stxs) + original-stx)) + + +(define (rule->stx source a-rule) + (define id-stx + (syntax-property + (datum->syntax #f + (string->symbol (lhs-id-val (rule-lhs a-rule))) + (list source + (pos-line (lhs-id-start (rule-lhs a-rule))) + (pos-col (lhs-id-start (rule-lhs a-rule))) + (pos-offset (lhs-id-start (rule-lhs a-rule))) + (if (and (number? (pos-offset (lhs-id-start (rule-lhs a-rule)))) + (number? (pos-offset (lhs-id-end (rule-lhs a-rule))))) + (- (pos-offset (lhs-id-end (rule-lhs a-rule))) + (pos-offset (lhs-id-start (rule-lhs a-rule)))) + #f))) + 'hide-or-splice-lhs-id (lhs-id-splice (rule-lhs a-rule)))) + (define pattern-stx (pattern->stx source (rule-pattern a-rule))) + (define line (pos-line (rule-start a-rule))) + (define column (pos-col (rule-start a-rule))) + (define position (pos-offset (rule-start a-rule))) + (define span (if (and (number? (pos-offset (rule-start a-rule))) + (number? (pos-offset (rule-end a-rule)))) + (- (pos-offset (rule-end a-rule)) + (pos-offset (rule-start a-rule))) + #f)) + (datum->syntax #f + `(rule ,id-stx ,pattern-stx) + (list source line column position span))) + +(define (pattern->stx source a-pattern) + (define recur (lambda (s) (pattern->stx source s))) + + (define line (pos-line (pattern-start a-pattern))) + (define column (pos-col (pattern-start a-pattern))) + (define position (pos-offset (pattern-start a-pattern))) + (define span (if (and (number? (pos-offset (pattern-start a-pattern))) + (number? (pos-offset (pattern-end a-pattern)))) + (- (pos-offset (pattern-end a-pattern)) + (pos-offset (pattern-start a-pattern))) + #f)) + (define source-location (list source line column position span)) + (match a-pattern + [(struct pattern-id (start end val hide)) + (syntax-property + (datum->syntax #f + `(id ,(datum->syntax #f (string->symbol val) source-location)) + source-location) + 'hide hide)] + [(struct pattern-lit (start end val hide)) + (syntax-property + (datum->syntax #f + `(lit ,(datum->syntax #f val source-location)) + source-location) + 'hide hide)] + [(struct pattern-token (start end val hide)) + (syntax-property + (datum->syntax #f + `(token ,(datum->syntax #f (string->symbol val) source-location)) + source-location) + 'hide hide)] + [(struct pattern-choice (start end vals)) + (datum->syntax #f`(choice ,@(map recur vals)) source-location)] + [(struct pattern-repeat (start end min val)) + (datum->syntax #f`(repeat ,min ,(recur val)) source-location)] + [(struct pattern-maybe (start end val)) + (datum->syntax #f`(maybe ,(recur val)) source-location)] + [(struct pattern-seq (start end vals)) + (datum->syntax #f`(seq ,@(map recur vals)) source-location)])) diff --git a/brag/brag/support.rkt b/brag/brag/support.rkt new file mode 100755 index 0000000..232fbf4 --- /dev/null +++ b/brag/brag/support.rkt @@ -0,0 +1,143 @@ +#lang racket/base +(require br-parser-tools/lex + racket/string + racket/struct + (prefix-in : br-parser-tools/lex-sre) + (for-syntax racket/base)) +(provide (all-from-out br-parser-tools/lex) + (all-from-out br-parser-tools/lex-sre) + [struct-out token-struct] + token + [struct-out exn:fail:parsing]) + + + +(define (token-print token port mode) + (write-string (format "~a" + (cons 'token-struct + (map (λ(proc) (format "~v" (proc token))) + (list + token-struct-type + token-struct-val + token-struct-line + token-struct-column + token-struct-offset + token-struct-span + token-struct-skip?)))) port)) + + +(struct token-struct (type val offset line column span skip?) + #:auto-value #f + #:transparent) + + +;; Token constructor. +;; This is intended to be a general token structure constructor that's nice +;; to work with. +;; It should cooperate with the tokenizers constructed with make-permissive-tokenizer. +(define (token type ;; (U symbol string) + [val #f] ;; any + [srcloc #f] + #:position [position #f] ;; (U #f number) + #:line [line #f] ;; (U #f number) + #:column [column #f] ;; (U #f number) + #:span [span #f] ;; boolean + #:skip? [skip? #f]) + (token-struct (if (string? type) (string->symbol type) type) + val + ;; keyword values take precedence over srcloc values + (or position (and srcloc (srcloc-position srcloc))) + (or line (and srcloc (srcloc-line srcloc))) + (or column (and srcloc (srcloc-column srcloc))) + (or span (and srcloc (srcloc-span srcloc))) + skip?)) + + +;; When bad things happen, we need to emit errors with source location. +(struct exn:fail:parsing exn:fail (srclocs) + #:transparent + #:property prop:exn:srclocs (lambda (instance) + (exn:fail:parsing-srclocs instance))) + + +(provide apply-lexer) +(define (apply-lexer lexer val) + (for/list ([t (in-port lexer (if (string? val) (open-input-string val) val))]) + t)) + +(provide apply-tokenizer-maker + (rename-out [apply-tokenizer-maker apply-tokenizer])) +(define (apply-tokenizer-maker tokenize in) + (define input-port (if (string? in) + (open-input-string in) + in)) + (define token-producer (tokenize input-port)) + (for/list ([token (in-producer token-producer (λ(tok) + (define val (cond + ;; position-tokens are produced by lexer-src-pos, + [(position-token? tok) + (position-token-token tok)] + ;; and srcloc-tokens by lexer-srcloc + [(srcloc-token? tok) + (srcloc-token-token tok)] + [else tok])) + (or (eof-object? val) (void? val))))]) + token)) + +(provide apply-colorer) +(define (apply-colorer colorer port-or-string) + (define p (if (string? port-or-string) + (open-input-string port-or-string) + port-or-string)) + (let loop ([acc null]) + (define-values (lex cat shape start end) (colorer p)) + (if (or (eq? 'eof cat) (eof-object? lex)) + (reverse acc) + (loop (cons (list lex cat shape start end) acc))))) + +(provide trim-ends) +(define (trim-ends left lexeme right) + (string-trim (string-trim lexeme left #:right? #f) right #:left? #f)) + +(provide from/to) +(define-lex-trans from/to + (λ(stx) + (syntax-case stx () + [(_ OPEN CLOSE) + ;; (:seq any-string CLOSE any-string) pattern makes it non-greedy + #'(:seq OPEN (complement (:seq any-string CLOSE any-string)) CLOSE)]))) + +(provide from/stop-before) +(define-lex-trans from/stop-before + (λ(stx) + (syntax-case stx () + [(_ OPEN CLOSE) + #'(:seq OPEN (:* (:~ CLOSE)))]))) + +(provide uc+lc) +(define-lex-trans uc+lc + (λ(stx) + (syntax-case stx () + [(_ . STRS) + (with-syntax ([(UCSTR ...) (map (compose1 string-upcase syntax->datum) (syntax->list #'STRS))] + [(LCSTR ...) (map (compose1 string-downcase syntax->datum) (syntax->list #'STRS))]) + #'(union (union UCSTR ...) (union LCSTR ...)))]))) + +;; change names of lexer abbreviations to be consistent with Racket srcloc conventions + +(define-syntax-rule (dprt ID-IN ID-OUT) + (begin + (provide ID-IN) + (define-syntax ID-IN (make-rename-transformer (syntax ID-OUT))))) + +(dprt lexeme-start start-pos) +(dprt lexeme-end end-pos) +(dprt line position-line) +(dprt col position-col) +(dprt pos position-offset) + +(provide span) +(define (span lexeme-start lexeme-end) + (abs ; thus same result in reverse order + (- (pos lexeme-end) + (pos lexeme-start)))) \ No newline at end of file diff --git a/brag/brag/test/compiled/test-01-equal_rkt.dep b/brag/brag/test/compiled/test-01-equal_rkt.dep new file mode 100644 index 0000000..79c42a8 --- /dev/null +++ b/brag/brag/test/compiled/test-01-equal_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("6074d0c3498e4d57d1148fd6ef6cc6b5e17f1cc1" . "2a62cf7ac209eb1f0a9246c2c8a1eb527fc90d0a") (collects #"brag" #"examples" #"01-equal.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-01-equal_rkt.zo b/brag/brag/test/compiled/test-01-equal_rkt.zo new file mode 100644 index 0000000..e509913 Binary files /dev/null and b/brag/brag/test/compiled/test-01-equal_rkt.zo differ diff --git a/brag/brag/test/compiled/test-0n1_rkt.dep b/brag/brag/test/compiled/test-0n1_rkt.dep new file mode 100644 index 0000000..a99b82a --- /dev/null +++ b/brag/brag/test/compiled/test-0n1_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("b77d4737877cd74afdccd09973c4e842cb5ba7ac" . "3b7422272b31655dd5642bf208be0a5f430f8f7e") (collects #"brag" #"examples" #"0n1.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-0n1_rkt.zo b/brag/brag/test/compiled/test-0n1_rkt.zo new file mode 100644 index 0000000..1351fb8 Binary files /dev/null and b/brag/brag/test/compiled/test-0n1_rkt.zo differ diff --git a/brag/brag/test/compiled/test-0n1n_rkt.dep b/brag/brag/test/compiled/test-0n1n_rkt.dep new file mode 100644 index 0000000..fe4887a --- /dev/null +++ b/brag/brag/test/compiled/test-0n1n_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("6672e079e8e9ab0e3d935085eb5df256935576ab" . "200f52baf6e66c42333b606fa7d1d199959067e4") (collects #"brag" #"examples" #"0n1n.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-0n1n_rkt.zo b/brag/brag/test/compiled/test-0n1n_rkt.zo new file mode 100644 index 0000000..c022a27 Binary files /dev/null and b/brag/brag/test/compiled/test-0n1n_rkt.zo differ diff --git a/brag/brag/test/compiled/test-all_rkt.dep b/brag/brag/test/compiled/test-all_rkt.dep new file mode 100644 index 0000000..6602f33 --- /dev/null +++ b/brag/brag/test/compiled/test-all_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("64138cc3731f68d708c46b8a838cc5b61a8880cd" . "91d2cbabd26b7d7916da8f6c81b58d13c6e9f076") (collects #"brag" #"codegen" #"satisfaction.rkt") (collects #"brag" #"test" #"test-01-equal.rkt") (collects #"brag" #"test" #"test-0n1.rkt") (collects #"brag" #"test" #"test-0n1n.rkt") (collects #"brag" #"test" #"test-baby-json-hider.rkt") (collects #"brag" #"test" #"test-baby-json.rkt") (collects #"brag" #"test" #"test-errors.rkt") (collects #"brag" #"test" #"test-flatten.rkt") (collects #"brag" #"test" #"test-lexer.rkt") (collects #"brag" #"test" #"test-old-token.rkt") (collects #"brag" #"test" #"test-parser.rkt") (collects #"brag" #"test" #"test-simple-arithmetic-grammar.rkt") (collects #"brag" #"test" #"test-simple-line-drawing.rkt") (collects #"brag" #"test" #"test-weird-grammar.rkt") (collects #"brag" #"test" #"test-wordy.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt")) diff --git a/brag/brag/test/compiled/test-all_rkt.zo b/brag/brag/test/compiled/test-all_rkt.zo new file mode 100644 index 0000000..4a3cfcc Binary files /dev/null and b/brag/brag/test/compiled/test-all_rkt.zo differ diff --git a/brag/brag/test/compiled/test-baby-json-hider_rkt.dep b/brag/brag/test/compiled/test-baby-json-hider_rkt.dep new file mode 100644 index 0000000..8f9d7f5 --- /dev/null +++ b/brag/brag/test/compiled/test-baby-json-hider_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("43f054080821974693d9ee992027b3c30927517e" . "83bf9effd1ee7f99450a3e92e3766141972d9f95") (collects #"brag" #"examples" #"baby-json-hider.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-baby-json-hider_rkt.zo b/brag/brag/test/compiled/test-baby-json-hider_rkt.zo new file mode 100644 index 0000000..73aa6e3 Binary files /dev/null and b/brag/brag/test/compiled/test-baby-json-hider_rkt.zo differ diff --git a/brag/brag/test/compiled/test-baby-json_rkt.dep b/brag/brag/test/compiled/test-baby-json_rkt.dep new file mode 100644 index 0000000..7a46842 --- /dev/null +++ b/brag/brag/test/compiled/test-baby-json_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("87d418a0a4e39f423ff7913177dbe5bfd0fa8bf6" . "39c77f483e938c1bac09c43d96d0e740e2a2ac55") (collects #"brag" #"examples" #"baby-json.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-baby-json_rkt.zo b/brag/brag/test/compiled/test-baby-json_rkt.zo new file mode 100644 index 0000000..577819c Binary files /dev/null and b/brag/brag/test/compiled/test-baby-json_rkt.zo differ diff --git a/brag/brag/test/compiled/test-errors_rkt.dep b/brag/brag/test/compiled/test-errors_rkt.dep new file mode 100644 index 0000000..2b26526 --- /dev/null +++ b/brag/brag/test/compiled/test-errors_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("ea9872f94314222106fbeea3d4096a2f9894262f" . "393b9ff5714e6b76ef2b26b11b6b72aae81c27a1") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-errors_rkt.zo b/brag/brag/test/compiled/test-errors_rkt.zo new file mode 100644 index 0000000..582a922 Binary files /dev/null and b/brag/brag/test/compiled/test-errors_rkt.zo differ diff --git a/brag/brag/test/compiled/test-flatten_rkt.dep b/brag/brag/test/compiled/test-flatten_rkt.dep new file mode 100644 index 0000000..0b21b74 --- /dev/null +++ b/brag/brag/test/compiled/test-flatten_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("44f386de5de9b199efd938a53686d684711b8b59" . "d6abc65a0064c2432f0d6822f10bb12f671f79ea") (collects #"brag" #"codegen" #"flatten.rkt") (collects #"brag" #"rules" #"stx-types.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-flatten_rkt.zo b/brag/brag/test/compiled/test-flatten_rkt.zo new file mode 100644 index 0000000..b66c5b2 Binary files /dev/null and b/brag/brag/test/compiled/test-flatten_rkt.zo differ diff --git a/brag/brag/test/compiled/test-lexer_rkt.dep b/brag/brag/test/compiled/test-lexer_rkt.dep new file mode 100644 index 0000000..fd98935 --- /dev/null +++ b/brag/brag/test/compiled/test-lexer_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("64b15003cbcd85c25df383092d4ebb14f75d376e" . "a7998cc52dc92a4833ea7edeb8d9c51ba0e13f43") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"rules" #"lexer.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-lexer_rkt.zo b/brag/brag/test/compiled/test-lexer_rkt.zo new file mode 100644 index 0000000..7a34e2f Binary files /dev/null and b/brag/brag/test/compiled/test-lexer_rkt.zo differ diff --git a/brag/brag/test/compiled/test-old-token_rkt.dep b/brag/brag/test/compiled/test-old-token_rkt.dep new file mode 100644 index 0000000..e2f6d90 --- /dev/null +++ b/brag/brag/test/compiled/test-old-token_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("244c1422dc988085c698af7241bc7d0586ab2d47" . "de6eefdbba51c512633e84bd45388ecb7059ac24") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"examples" #"simple-line-drawing.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"list.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-old-token_rkt.zo b/brag/brag/test/compiled/test-old-token_rkt.zo new file mode 100644 index 0000000..fdf6190 Binary files /dev/null and b/brag/brag/test/compiled/test-old-token_rkt.zo differ diff --git a/brag/brag/test/compiled/test-parser_rkt.dep b/brag/brag/test/compiled/test-parser_rkt.dep new file mode 100644 index 0000000..0c2dd73 --- /dev/null +++ b/brag/brag/test/compiled/test-parser_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("4ba1f4f3bb2f4b199af834e1545308d8529fadab" . "53ef8f0ee8166369c60d4fb4bebd8405dad96f37") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"rules" #"lexer.rkt") (collects #"brag" #"rules" #"parser.rkt") (collects #"brag" #"rules" #"rule-structs.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-parser_rkt.zo b/brag/brag/test/compiled/test-parser_rkt.zo new file mode 100644 index 0000000..056a444 Binary files /dev/null and b/brag/brag/test/compiled/test-parser_rkt.zo differ diff --git a/brag/brag/test/compiled/test-simple-arithmetic-grammar_rkt.dep b/brag/brag/test/compiled/test-simple-arithmetic-grammar_rkt.dep new file mode 100644 index 0000000..5eaeafd --- /dev/null +++ b/brag/brag/test/compiled/test-simple-arithmetic-grammar_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("c075b07edda5003da7b1bfbfac18d232b707d0a9" . "760983ca4b5b8e37f1113bb65808ced980f6bcd4") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"examples" #"simple-arithmetic-grammar.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"list.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"racket" #"set.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-simple-arithmetic-grammar_rkt.zo b/brag/brag/test/compiled/test-simple-arithmetic-grammar_rkt.zo new file mode 100644 index 0000000..d7d14aa Binary files /dev/null and b/brag/brag/test/compiled/test-simple-arithmetic-grammar_rkt.zo differ diff --git a/brag/brag/test/compiled/test-simple-line-drawing_rkt.dep b/brag/brag/test/compiled/test-simple-line-drawing_rkt.dep new file mode 100644 index 0000000..aea5d88 --- /dev/null +++ b/brag/brag/test/compiled/test-simple-line-drawing_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("1540bc00eda73cdc3a16820ba361c40581cdf6cb" . "de6eefdbba51c512633e84bd45388ecb7059ac24") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"examples" #"simple-line-drawing.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"list.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-simple-line-drawing_rkt.zo b/brag/brag/test/compiled/test-simple-line-drawing_rkt.zo new file mode 100644 index 0000000..c7e6cce Binary files /dev/null and b/brag/brag/test/compiled/test-simple-line-drawing_rkt.zo differ diff --git a/brag/brag/test/compiled/test-weird-grammar_rkt.dep b/brag/brag/test/compiled/test-weird-grammar_rkt.dep new file mode 100644 index 0000000..1835f71 --- /dev/null +++ b/brag/brag/test/compiled/test-weird-grammar_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("59d09aeac644aa8b3036579385e967ede43e8f51" . "766b50c856b353075b57cb446de395bdcbe078be") (collects #"brag" #"test" #"weird-grammar.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-weird-grammar_rkt.zo b/brag/brag/test/compiled/test-weird-grammar_rkt.zo new file mode 100644 index 0000000..a492bc6 Binary files /dev/null and b/brag/brag/test/compiled/test-weird-grammar_rkt.zo differ diff --git a/brag/brag/test/compiled/test-whitespace_rkt.dep b/brag/brag/test/compiled/test-whitespace_rkt.dep new file mode 100644 index 0000000..a610252 --- /dev/null +++ b/brag/brag/test/compiled/test-whitespace_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("1c97d7e22db47809ac6c961f0f58deb5078dad56" . "39bc30135c4b0af7ae8b7a01c99f1f2f22ded2b5") (collects #"brag" #"examples" #"whitespace.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-whitespace_rkt.zo b/brag/brag/test/compiled/test-whitespace_rkt.zo new file mode 100644 index 0000000..48b920c Binary files /dev/null and b/brag/brag/test/compiled/test-whitespace_rkt.zo differ diff --git a/brag/brag/test/compiled/test-wordy_rkt.dep b/brag/brag/test/compiled/test-wordy_rkt.dep new file mode 100644 index 0000000..e7764e4 --- /dev/null +++ b/brag/brag/test/compiled/test-wordy_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("aecade1992f3c242a6d3a07823e16c221da23733" . "fc08b1b4e1c113e96b12b77c226f97f56e071d98") (collects #"brag" #"examples" #"wordy.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt")) diff --git a/brag/brag/test/compiled/test-wordy_rkt.zo b/brag/brag/test/compiled/test-wordy_rkt.zo new file mode 100644 index 0000000..3ba067a Binary files /dev/null and b/brag/brag/test/compiled/test-wordy_rkt.zo differ diff --git a/brag/brag/test/compiled/weird-grammar_rkt.dep b/brag/brag/test/compiled/weird-grammar_rkt.dep new file mode 100644 index 0000000..7397787 --- /dev/null +++ b/brag/brag/test/compiled/weird-grammar_rkt.dep @@ -0,0 +1 @@ +("6.8.0.2" ("53c8c9a51cedd8c81380d2297e52b5b855348de8" . "a76c5ca59f0d97f05f835e217e9883ecbd4d7447") (collects #"br-parser-tools" #"lex.rkt") (collects #"brag" #"cfg-parser" #"cfg-parser.rkt") (collects #"brag" #"codegen" #"runtime.rkt") (collects #"brag" #"codegen" #"sexp-based-lang.rkt") (collects #"brag" #"main.rkt") (collects #"brag" #"private" #"internal-support.rkt") (collects #"brag" #"support.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"set.rkt") (collects #"syntax" #"parse.rkt") (indirect collects #"syntax" #"parse" #"private" #"parse-aux.rkt")) diff --git a/brag/brag/test/compiled/weird-grammar_rkt.zo b/brag/brag/test/compiled/weird-grammar_rkt.zo new file mode 100644 index 0000000..6d52e0f Binary files /dev/null and b/brag/brag/test/compiled/weird-grammar_rkt.zo differ diff --git a/brag/brag/test/test-01-equal.rkt b/brag/brag/test/test-01-equal.rkt new file mode 100755 index 0000000..dcb2138 --- /dev/null +++ b/brag/brag/test/test-01-equal.rkt @@ -0,0 +1,30 @@ +#lang racket/base + +(require brag/examples/01-equal + rackunit) + +(check-equal? (syntax->datum (parse "")) + '(equal)) +(check-equal? (syntax->datum (parse "01")) + '(equal (zero (equal) #\0) + (one (equal) #\1))) +(check-equal? (syntax->datum (parse "10")) + '(equal (one (equal) #\1) + (zero (equal) #\0))) +(check-equal? (syntax->datum (parse "0011")) + '(equal (zero (equal) #\0) + (one (equal (zero (equal) #\0) + (one (equal) #\1)) + #\1))) +(check-equal? (syntax->datum (parse "0110")) + '(equal (one (equal (zero (equal) #\0) + (one (equal) #\1)) + #\1) + (zero (equal) #\0))) + +(check-equal? (syntax->datum (parse "1100")) + '(equal (one (equal) #\1) + (zero (equal (one (equal) #\1) + (zero (equal) #\0)) + #\0))) + diff --git a/brag/brag/test/test-0n1.rkt b/brag/brag/test/test-0n1.rkt new file mode 100755 index 0000000..5611f6e --- /dev/null +++ b/brag/brag/test/test-0n1.rkt @@ -0,0 +1,50 @@ +#lang racket/base + +(require brag/examples/0n1 + brag/support + rackunit) + +(define (lex ip) + (port-count-lines! ip) + (lambda () + (define next-char (read-char ip)) + (cond [(eof-object? next-char) + (token eof)] + [(char=? next-char #\0) + (token "0" "0")] + [(char=? next-char #\1) + (token "1" "1")]))) + + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "1")))) + '(rule "1")) + + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "01")))) + '(rule "0" "1")) + + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "001")))) + '(rule "0" "0" "1")) + + +(check-exn exn:fail:parsing? + (lambda () + (parse #f (lex (open-input-string "0"))))) + +(check-exn exn:fail:parsing? + (lambda () + (parse #f (lex (open-input-string "10"))))) + +(check-exn exn:fail:parsing? + (lambda () + (parse #f (lex (open-input-string "010"))))) + + +;; This should fail predictably because we're passing in tokens +;; that the parser doesn't know. +(check-exn exn:fail:parsing? + (lambda () (parse '("zero" "one" "zero")))) +(check-exn (regexp (regexp-quote + "Encountered unexpected token \"zero\" (\"zero\") while parsing")) + (lambda () (parse '("zero" "one" "zero")))) diff --git a/brag/brag/test/test-0n1n.rkt b/brag/brag/test/test-0n1n.rkt new file mode 100755 index 0000000..f7655d5 --- /dev/null +++ b/brag/brag/test/test-0n1n.rkt @@ -0,0 +1,49 @@ +#lang racket/base +(require brag/examples/0n1n + brag/support + rackunit) + +(define (lex ip) + (port-count-lines! ip) + (lambda () + (define next-char (read-char ip)) + (cond [(eof-object? next-char) + (token eof)] + [(char=? next-char #\0) + (token "0" "0")] + [(char=? next-char #\1) + (token "1" "1")]))) + + +;; The only rule in the grammar is: +;; +;; rule-0n1n: ["0" rule-0n1n "1"] +;; +;; It makes use of the "maybe" pattern. The result type of the +;; grammar rule is: +;; +;; rule-0n1n: (U #f +;; (list "0" rule-0n1n "1")) + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "0011")))) + '(rule-0n1n "0" (rule-0n1n "0" (rule-0n1n) "1") "1")) + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "01")))) + '(rule-0n1n "0" (rule-0n1n) "1")) + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "")))) + '(rule-0n1n)) + +(check-equal? (syntax->datum (parse #f (lex (open-input-string "000111")))) + '(rule-0n1n "0" (rule-0n1n "0" (rule-0n1n "0" (rule-0n1n) "1") "1") "1")) + + + +(check-exn exn:fail:parsing? + (lambda () (parse #f (lex (open-input-string "0001111"))))) + +(check-exn exn:fail:parsing? + (lambda () (parse #f (lex (open-input-string "0001110"))))) + +(check-exn exn:fail:parsing? + (lambda () (parse #f (lex (open-input-string "10001110"))))) diff --git a/brag/brag/test/test-all.rkt b/brag/brag/test/test-all.rkt new file mode 100755 index 0000000..92c3153 --- /dev/null +++ b/brag/brag/test/test-all.rkt @@ -0,0 +1,18 @@ +#lang racket/base + + +(require "test-0n1.rkt" + "test-0n1n.rkt" + "test-01-equal.rkt" + "test-simple-arithmetic-grammar.rkt" + "test-baby-json.rkt" + "test-baby-json-hider.rkt" + "test-wordy.rkt" + "test-simple-line-drawing.rkt" + "test-flatten.rkt" + "test-lexer.rkt" + "test-parser.rkt" + "test-errors.rkt" + "test-old-token.rkt" + "test-weird-grammar.rkt" + (submod brag/codegen/satisfaction test)) diff --git a/brag/brag/test/test-baby-json-hider.rkt b/brag/brag/test/test-baby-json-hider.rkt new file mode 100755 index 0000000..b7580d6 --- /dev/null +++ b/brag/brag/test/test-baby-json-hider.rkt @@ -0,0 +1,19 @@ +#lang racket/base +(require brag/examples/baby-json-hider + brag/support + rackunit) + +(define parse-result (parse (list "{" + (token 'ID "message") + ":" + (token 'STRING "'hello world'") + "}"))) +(check-equal? (syntax->datum parse-result) '(json (":"))) + +(define syntaxed-colon-parens (cadr (syntax->list parse-result))) +(check-equal? (syntax->datum (syntax-property syntaxed-colon-parens 'kvpair)) 'kvpair) + +(check-equal? + (syntax->datum + (parse "[[[{}]],[],[[{}]]]")) + '(json (array #\[ (json (array #\[ (json (array #\[ (json) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json) #\])) #\])) #\]))) diff --git a/brag/brag/test/test-baby-json.rkt b/brag/brag/test/test-baby-json.rkt new file mode 100755 index 0000000..dff09f4 --- /dev/null +++ b/brag/brag/test/test-baby-json.rkt @@ -0,0 +1,25 @@ +#lang racket/base +(require brag/examples/baby-json + brag/support + rackunit) + +(check-equal? + (syntax->datum + (parse (list "{" + (token 'ID "message") + ":" + (token 'STRING "'hello world'") + "}"))) + '(json (object "{" + (kvpair "message" ":" (json (string "'hello world'"))) + "}"))) + + +(check-equal? + (syntax->datum + (parse "[[[{}]],[],[[{}]]]")) + '(json (array #\[ (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\]))) + + + + diff --git a/brag/brag/test/test-cutter.rkt b/brag/brag/test/test-cutter.rkt new file mode 100755 index 0000000..f4a14ac --- /dev/null +++ b/brag/brag/test/test-cutter.rkt @@ -0,0 +1,9 @@ +#lang racket/base +(require brag/examples/cutter + brag/support + rackunit) + +;; related to rule-flattening problem +(check-equal? + (parse-to-datum (list "(" "x" "," "x" ")")) + '(top (expr (list "(" (expr "x") "," (expr "x") ")")))) \ No newline at end of file diff --git a/brag/brag/test/test-errors.rkt b/brag/brag/test/test-errors.rkt new file mode 100755 index 0000000..b84ae34 --- /dev/null +++ b/brag/brag/test/test-errors.rkt @@ -0,0 +1,137 @@ +#lang racket/base + +(require rackunit + (for-syntax racket/base)) + +;; The tests in this module make sure we produce proper error messages +;; on weird grammars. + + +(define-namespace-anchor anchor) +(define ns (namespace-anchor->namespace anchor)) +(define (c prog) + (parameterize ([current-namespace ns] + [read-accept-reader #t]) + (define ip (open-input-string prog)) + (port-count-lines! ip) + (compile (read-syntax #f ip)))) + + +;; Helper to let me quickly write compile-error checks. +(define-syntax (check-compile-error stx) + (syntax-case stx () + [(_ prog expected-msg) + (quasisyntax/loc stx + (begin #,(syntax/loc stx + (check-exn (regexp (regexp-quote expected-msg)) + (lambda () + (c prog)))) + #,(syntax/loc stx + (check-exn exn:fail:syntax? + (lambda () + (c prog))))))])) + + + + + +;; errors with position are sensitive to length of lang line +(define lang-line "#lang brag") + +(check-compile-error (format "~a" lang-line) + "The grammar does not appear to have any rules") + +(check-compile-error (format "~a\nfoo" lang-line) + "Error while parsing grammar near: foo [line=2, column=0, position=12]") + +(check-compile-error (format "~a\nnumber : 42" lang-line) + "Error while parsing grammar near: 42 [line=2, column=9, position=21]") + +(check-compile-error (format "~a\nnumber : 1" lang-line) + "Error while parsing grammar near: 1 [line=2, column=9, position=21]") + + + +(check-compile-error "#lang brag\n x: NUMBER\nx:STRING" + "Rule x has a duplicate definition") + +;; Check to see that missing definitions for rules also raise good syntax +;; errors: + +(check-compile-error "#lang brag\nx:y" + "Rule y has no definition") + +(check-compile-error "#lang brag\nnumber : 1flarbl" + "Rule 1flarbl has no definition") + + + + +(check-compile-error "#lang brag\nprogram: EOF" + "Token EOF is reserved and can not be used in a grammar") + + + +;; Nontermination checks: +(check-compile-error "#lang brag\nx : x" + "Rule x has no finite derivation") + + + +(check-compile-error #<symbol (format "r~a" n))))) + + +;; Simple literals +(check-equal? (map syntax->datum (flatten-rule #'(rule expr (lit "hello")))) + '((prim-rule lit expr [(lit "hello")]))) + +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr + (seq (lit "hello") + (lit "world"))))) + '((prim-rule seq expr [(lit "hello") (lit "world")]))) + + +(check-equal? (map syntax->datum (flatten-rule #'(rule expr (token HELLO)))) + '((prim-rule token expr [(token HELLO)]))) + +(check-equal? (map syntax->datum (flatten-rule #'(rule expr (id rule-2)))) + '((prim-rule id expr [(id rule-2)]))) + + +;; Sequences of primitives +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (seq (lit "1") (seq (lit "2") (lit "3")))))) + '((prim-rule seq expr + [(lit "1") (lit "2") (lit "3")]))) + +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (seq (seq (lit "1") (lit "2")) (lit "3"))))) + '((prim-rule seq expr + [(lit "1") (lit "2") (lit "3")]))) + + +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (seq (seq (lit "1")) (seq (lit "2") (lit "3")))))) + '((prim-rule seq expr + [(lit "1") (lit "2") (lit "3")]))) + + + +;; choices +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (choice (id rule-2) (id rule-3))))) + '((prim-rule choice expr + [(id rule-2)] + [(id rule-3)]))) + +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (choice (seq (lit "(") (lit ")")) + (seq))) + #:fresh-name (make-fresh-name))) + '((prim-rule choice sexp + [(lit "(") (lit ")")] []))) + +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (choice (seq (seq (lit "(") (token BLAH)) + (lit ")")) + (seq))) + #:fresh-name (make-fresh-name))) + '((prim-rule choice sexp + [(lit "(") (token BLAH) (lit ")")] []))) + + + + +;; maybe +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (maybe (id rule-2))))) + '((prim-rule maybe expr + [(id rule-2)] + []))) +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (maybe (token HUH))))) + '((prim-rule maybe expr + [(token HUH)] + []))) +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (maybe (seq (lit "hello") (lit "world")))))) + '((prim-rule maybe expr + [(lit "hello") (lit "world")] + []))) + + + + +;; repeat +(check-equal? (map syntax->datum + (flatten-rule #'(rule rule-2+ (repeat 0 (id rule-2))))) + '((prim-rule repeat rule-2+ + [(inferred-id rule-2+ repeat) (id rule-2)] + []))) +(check-equal? (map syntax->datum + (flatten-rule #'(rule rule-2+ (repeat 0 (seq (lit "+") (id rule-2)))))) + '((prim-rule repeat rule-2+ + [(inferred-id rule-2+ repeat) (lit "+") (id rule-2)] + []))) + +(check-equal? (map syntax->datum + (flatten-rule #'(rule rule-2+ (repeat 1 (id rule-2))))) + '((prim-rule repeat rule-2+ + [(inferred-id rule-2+ repeat) (id rule-2)] + [(id rule-2)]))) +(check-equal? (map syntax->datum + (flatten-rule #'(rule rule-2+ (repeat 1 (seq (lit "-") (id rule-2)))))) + '((prim-rule repeat rule-2+ + [(inferred-id rule-2+ repeat) (lit "-") (id rule-2)] + [(lit "-") (id rule-2)]))) + + + + + + +;; Mixtures + +;; choice and maybe +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (choice (lit "x") + (maybe (lit "y")))) + #:fresh-name (make-fresh-name))) + '((prim-rule choice sexp + [(lit "x")] + [(inferred-id r1 maybe)]) + (inferred-prim-rule maybe r1 + [(lit "y")] + []))) +;; choice, maybe, repeat +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (choice (lit "x") + (maybe (repeat 1 (lit "y"))))) + #:fresh-name (make-fresh-name))) + '((prim-rule choice sexp + [(lit "x")] + [(inferred-id r1 maybe)]) + (inferred-prim-rule maybe r1 + [(inferred-id r2 repeat)] + []) + (inferred-prim-rule repeat r2 + [(inferred-id r2 repeat) (lit "y")] + [(lit "y")]))) +;; choice, seq +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (choice (seq (lit "x") (lit "y")) + (seq (lit "z") (lit "w")))) + #:fresh-name (make-fresh-name))) + '((prim-rule choice sexp + [(lit "x") (lit "y")] + [(lit "z") (lit "w")]))) + +;; maybe, choice +(check-equal? (map syntax->datum + (flatten-rule #'(rule sexp (maybe (choice (seq (lit "x") (lit "y")) + (seq (lit "z") (lit "w"))))) + #:fresh-name (make-fresh-name))) + '((prim-rule maybe sexp + [(inferred-id r1 choice)] + []) + (inferred-prim-rule choice r1 + [(lit "x") (lit "y")] + [(lit "z") (lit "w")]))) + + +;; seq, repeat +(check-equal? (map syntax->datum + (flatten-rule #'(rule expr (seq (id term) (repeat 0 (seq (lit "+") (id term))))) + #:fresh-name (make-fresh-name))) + '((prim-rule seq expr [(id term) (inferred-id r1 repeat)]) + (inferred-prim-rule repeat r1 [(inferred-id r1 repeat) (lit "+") (id term)] []))) + + +;; larger example: simple arithmetic +(check-equal? (map syntax->datum + (flatten-rules (syntax->list + #'((rule expr (seq (id term) (repeat 0 (seq (lit "+") (id term))))) + (rule term (seq (id factor) (repeat 0 (seq (lit "*") (id factor))))) + (rule factor (token INT)))) + #:fresh-name (make-fresh-name))) + + '((prim-rule seq expr [(id term) (inferred-id r1 repeat)]) + (inferred-prim-rule repeat r1 [(inferred-id r1 repeat) (lit "+") (id term)] []) + (prim-rule seq term [(id factor) (inferred-id r2 repeat)]) + (inferred-prim-rule repeat r2 [(inferred-id r2 repeat) (lit "*") (id factor)] []) + (prim-rule token factor [(token INT)]))) diff --git a/brag/brag/test/test-lexer.rkt b/brag/brag/test/test-lexer.rkt new file mode 100755 index 0000000..49f3774 --- /dev/null +++ b/brag/brag/test/test-lexer.rkt @@ -0,0 +1,73 @@ +#lang racket/base +(require brag/rules/lexer + rackunit + br-parser-tools/lex) + +(define (l s) + (define t (lex/1 (open-input-string s))) + (list (token-name (position-token-token t)) + (token-value (position-token-token t)) + (position-offset (position-token-start-pos t)) + (position-offset (position-token-end-pos t)))) + +;; WARNING: the offsets are not in terms of file positions. So they +;; start counting at 1, not 0. +(check-equal? (l " hi") + '(ID "hi" 2 4)) + +(check-equal? (l " hi") + '(ID "hi" 3 5)) + +(check-equal? (l "hi") + '(ID "hi" 1 3)) + +(check-equal? (l "# foobar\nhi") + '(ID "hi" 10 12)) + +(check-equal? (l "# foobar\rhi") + '(ID "hi" 10 12)) + +(check-equal? (l "# foobar\r\nhi") + '(ID "hi" 11 13)) + +(check-equal? (l "hi:") + '(RULE_HEAD "hi:" 1 4)) + +(check-equal? (l "hi :") + '(RULE_HEAD "hi :" 1 7)) + +(check-equal? (l "|") + '(PIPE "|" 1 2)) + +(check-equal? (l "(") + '(LPAREN "(" 1 2)) + +(check-equal? (l "[") + '(LBRACKET "[" 1 2)) + +(check-equal? (l ")") + '(RPAREN ")" 1 2)) + +(check-equal? (l "]") + '(RBRACKET "]" 1 2)) + +(check-equal? (l "'hello'") + '(LIT "'hello'" 1 8)) + +(check-equal? (l "'he\\'llo'") + '(LIT "'he\\'llo'" 1 10)) + +(check-equal? (l "/") + '(HIDE "/" 1 2)) + +(check-equal? (l " /") + '(HIDE "/" 2 3)) + +(check-equal? (l "@") + '(SPLICE "@" 1 2)) + +(check-equal? (l " @") + '(SPLICE "@" 2 3)) + +(check-equal? (l "#:prefix-out val:") + (list 'EOF eof 18 18)) ; lexer skips kwarg \ No newline at end of file diff --git a/brag/brag/test/test-old-token.rkt b/brag/brag/test/test-old-token.rkt new file mode 100755 index 0000000..2654b68 --- /dev/null +++ b/brag/brag/test/test-old-token.rkt @@ -0,0 +1,76 @@ +#lang racket/base + +;; Make sure the old token type also works fine. + +(require brag/examples/simple-line-drawing + brag/support + racket/list + br-parser-tools/lex + (prefix-in : br-parser-tools/lex-sre) + rackunit) + +(define-tokens tokens (INTEGER STRING |;| EOF)) + +(define (make-tokenizer ip) + (port-count-lines! ip) + (define lex (lexer-src-pos + [(:+ numeric) + (token-INTEGER (string->number lexeme))] + [upper-case + (token-STRING lexeme)] + ["b" + (token-STRING " ")] + [";" + (|token-;| lexeme)] + [whitespace + (return-without-pos (lex input-port))] + [(eof) + (token-EOF 'eof)])) + (lambda () + (lex ip))) + + + +(define the-parsed-object-stx + (parse (make-tokenizer (open-input-string #<list the-parsed-object-stx)))) + +(check-equal? (syntax->datum the-parsed-object-stx) + '(drawing (rows (repeat 3) (chunk 9 "X") ";") + (rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";") + (rows (repeat 3) (chunk 9 "X") ";"))) + +(define the-parsed-object (syntax->list the-parsed-object-stx)) + +(check-equal? (syntax-line the-parsed-object-stx) 1) +(check-equal? (syntax-column the-parsed-object-stx) 0) +(check-equal? (syntax-position the-parsed-object-stx) 1) +(check-equal? (syntax-span the-parsed-object-stx) 28) + +(check-equal? (length the-parsed-object) 4) + +(check-equal? (syntax->datum (second the-parsed-object)) + '(rows (repeat 3) (chunk 9 "X") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 1)) 1) + +(check-equal? (syntax->datum (third the-parsed-object)) + '(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 2)) 2) + +(check-equal? (syntax->datum (fourth the-parsed-object)) + '(rows (repeat 3) (chunk 9 "X") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 3)) 3) + +;; FIXME: add tests to make sure location is as we expect. +;; +;; FIXME: handle the EOF issue better. Something in cfg-parser +;; appears to deviate from br-parser-tools/yacc with regards to the stop +;; token. diff --git a/brag/brag/test/test-parser.rkt b/brag/brag/test/test-parser.rkt new file mode 100755 index 0000000..2b56e27 --- /dev/null +++ b/brag/brag/test/test-parser.rkt @@ -0,0 +1,153 @@ +#lang racket/base + + +(require rackunit + br-parser-tools/lex + brag/rules/parser + brag/rules/lexer + brag/rules/rule-structs) + + +;; quick-and-dirty helper for pos construction. +(define (p x) + (pos x #f #f)) + + + +;; FIXME: fix the test cases so they work on locations rather than just offsets. +(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'"))) + (list (rule (p 1) (p 15) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-lit (p 8) (p 15) "hello" #f)))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON"))) + (list (rule (p 1) (p 13) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-token (p 8) (p 13) "COLON" #f)))) + +(check-equal? (grammar-parser (tokenize (open-input-string "/expr : COLON"))) + (list (rule (p 1) (p 14) + (lhs-id (p 1) (p 6) "expr" ''hide) + (pattern-token (p 9) (p 14) "COLON" #f)))) + +(check-equal? (grammar-parser (tokenize (open-input-string "@expr : COLON"))) + (list (rule (p 1) (p 14) + (lhs-id (p 1) (p 6) "expr" ''splice) + (pattern-token (p 9) (p 14) "COLON" #f)))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : /COLON COLON"))) + (list (rule (p 1) (p 20) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-seq (p 8) (p 20) + (list + (pattern-token (p 8) (p 14) "COLON" 'hide) + (pattern-token (p 15) (p 20) "COLON" #f)))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : /thing COLON"))) + (list (rule (p 1) (p 20) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-seq (p 8) (p 20) + (list + (pattern-id (p 8) (p 14) "thing" 'hide) + (pattern-token (p 15) (p 20) "COLON" #f)))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : @thing COLON"))) + (list (rule (p 1) (p 20) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-seq (p 8) (p 20) + (list + (pattern-id (p 8) (p 14) "thing" 'splice) + (pattern-token (p 15) (p 20) "COLON" #f)))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'*"))) + (list (rule (p 1) (p 16) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-repeat (p 8) (p 16) + 0 + (pattern-lit (p 8) (p 15) "hello" #f))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'+"))) + (list (rule (p 1) (p 16) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-repeat (p 8) (p 16) + 1 + (pattern-lit (p 8) (p 15) "hello" #f))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : [/'hello']"))) + (list (rule (p 1) (p 18) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-maybe (p 8) (p 18) + (pattern-lit (p 9) (p 17) "hello" 'hide))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH"))) + (list (rule (p 1) (p 20) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-choice (p 8) (p 20) + (list (pattern-token (p 8) (p 13) "COLON" #f) + (pattern-token (p 16) (p 20) "BLAH" #f)))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH | BAZ expr"))) + (list (rule (p 1) (p 31) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-choice (p 8) (p 31) + (list (pattern-token (p 8) (p 13) "COLON" #f) + (pattern-token (p 16) (p 20) "BLAH" #f) + (pattern-seq (p 23) (p 31) + (list (pattern-token (p 23) (p 26) "BAZ" #f) + (pattern-id (p 27) (p 31) "expr" #f)))))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two /three"))) + (list (rule (p 1) (p 22) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-seq (p 8) (p 22) (list (pattern-id (p 8) (p 11) "one" #f) + (pattern-id (p 12) (p 15) "two" #f) + (pattern-id (p 16) (p 22) "three" 'hide)))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two three)"))) + (list (rule (p 1) (p 23) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-seq (p 8) (p 23) (list (pattern-id (p 9) (p 12) "one" #f) + (pattern-id (p 13) (p 16) "two" #f) + (pattern-id (p 17) (p 22) "three" #f)))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two* three"))) + (list (rule (p 1) (p 22) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-seq (p 8) (p 22) (list (pattern-id (p 8) (p 11) "one" #f) + (pattern-repeat (p 12) (p 16) 0 (pattern-id (p 12) (p 15) "two" #f)) + (pattern-id (p 17) (p 22) "three" #f)))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two+ three"))) + (list (rule (p 1) (p 22) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-seq (p 8) (p 22) (list (pattern-id (p 8) (p 11) "one" #f) + (pattern-repeat (p 12) (p 16) 1 (pattern-id (p 12) (p 15) "two" #f)) + (pattern-id (p 17) (p 22) "three" #f)))))) + +(check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two)+ three"))) + (list (rule (p 1) (p 24) + (lhs-id (p 1) (p 5) "expr" #f) + (pattern-seq (p 8) (p 24) (list (pattern-repeat (p 8) (p 18) 1 + (pattern-seq (p 8) (p 17) + (list (pattern-id (p 9) (p 12) "one" #f) + (pattern-id (p 13) (p 16) "two" #f)))) + (pattern-id (p 19) (p 24) "three" #f)))))) + + +(check-equal? (grammar-parser (tokenize (open-input-string #<number lexeme))] + [whitespace + (token 'WHITESPACE #:skip? #t)] + ["+" + (token '+ "+")] + ["*" + (token '* "*")] + [(eof) + (token eof)])) + (lambda () + (lex/1 ip))) + + +;; expr : term ('+' term)* +;; term : factor (('*') factor)* +;; factor : INT + +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "42")))) + '(expr (term (factor 42)))) +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3+4")))) + '(expr (term (factor 3)) + "+" + (term (factor 4)))) +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3+4+5")))) + '(expr (term (factor 3)) + "+" + (term (factor 4)) + "+" + (term (factor 5)))) + + +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3*4*5")))) + '(expr (term (factor 3) "*" (factor 4) "*" (factor 5)))) + + +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3*4 + 5*6")))) + '(expr (term (factor 3) "*" (factor 4)) + "+" + (term (factor 5) "*" (factor 6)))) + +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "4*5+6")))) + '(expr (term (factor 4) "*" (factor 5)) + "+" + (term (factor 6)))) + +(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "4+5 *6")))) + '(expr (term (factor 4)) + "+" + (term (factor 5) "*" (factor 6)))) + + +(check-exn exn:fail:parsing? + (lambda () (parse #f (tokenize (open-input-string "7+"))))) +(check-exn exn:fail:parsing? + (lambda () (parse #f (tokenize (open-input-string "7+6+"))))) + + +(check-equal? all-token-types + (set '+ '* 'INT)) diff --git a/brag/brag/test/test-simple-line-drawing.rkt b/brag/brag/test/test-simple-line-drawing.rkt new file mode 100755 index 0000000..b9e3d19 --- /dev/null +++ b/brag/brag/test/test-simple-line-drawing.rkt @@ -0,0 +1,72 @@ +#lang racket/base + +(require brag/examples/simple-line-drawing + brag/support + racket/list + br-parser-tools/lex + (prefix-in : br-parser-tools/lex-sre) + rackunit) + +(define (make-tokenizer ip) + (port-count-lines! ip) + (define lex (lexer-src-pos + [(:+ numeric) + (token 'INTEGER (string->number lexeme))] + [upper-case + (token 'STRING lexeme)] + ["b" + (token 'STRING " ")] + [";" + (token ";" lexeme)] + [whitespace + (token 'WHITESPACE lexeme #:skip? #t)] + [(eof) + (void)])) + (lambda () + (lex ip))) + + + +(define the-parsed-object-stx + (parse (make-tokenizer (open-input-string #<list the-parsed-object-stx)))) + +(check-equal? (syntax->datum the-parsed-object-stx) + '(drawing (rows (repeat 3) (chunk 9 "X") ";") + (rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";") + (rows (repeat 3) (chunk 9 "X") ";"))) + +(define the-parsed-object (syntax->list the-parsed-object-stx)) + +(check-equal? (syntax-line the-parsed-object-stx) 1) +(check-equal? (syntax-column the-parsed-object-stx) 0) +(check-equal? (syntax-position the-parsed-object-stx) 1) +(check-equal? (syntax-span the-parsed-object-stx) 28) + +(check-equal? (length the-parsed-object) 4) + +(check-equal? (syntax->datum (second the-parsed-object)) + '(rows (repeat 3) (chunk 9 "X") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 1)) 1) + +(check-equal? (syntax->datum (third the-parsed-object)) + '(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 2)) 2) + +(check-equal? (syntax->datum (fourth the-parsed-object)) + '(rows (repeat 3) (chunk 9 "X") ";")) +(check-equal? (syntax-line (list-ref the-parsed-object 3)) 3) + +;; FIXME: add tests to make sure location is as we expect. +;; +;; FIXME: handle the EOF issue better. Something in cfg-parser +;; appears to deviate from br-parser-tools/yacc with regards to the stop +;; token. diff --git a/brag/brag/test/test-weird-grammar.rkt b/brag/brag/test/test-weird-grammar.rkt new file mode 100755 index 0000000..1847feb --- /dev/null +++ b/brag/brag/test/test-weird-grammar.rkt @@ -0,0 +1,7 @@ +#lang racket/base + +(require "weird-grammar.rkt" + rackunit) + +(check-equal? (syntax->datum (parse '("foo"))) + '(foo "foo")) diff --git a/brag/brag/test/test-whitespace.rkt b/brag/brag/test/test-whitespace.rkt new file mode 100755 index 0000000..455ee3f --- /dev/null +++ b/brag/brag/test/test-whitespace.rkt @@ -0,0 +1,12 @@ +#lang racket/base +(require brag/examples/whitespace + brag/support + rackunit) + +(check-equal? + (parse-to-datum "\ty\n x\tz") + '(start (tab "\t") (letter "y") (newline "\n") (space " ") (letter "x") (tab "\t") (letter "z"))) + +(check-equal? + (parse-to-datum "\t\n \t") + '(start (tab "\t") (newline "\n") (space " ") (tab "\t"))) diff --git a/brag/brag/test/test-wordy.rkt b/brag/brag/test/test-wordy.rkt new file mode 100755 index 0000000..bcb516d --- /dev/null +++ b/brag/brag/test/test-wordy.rkt @@ -0,0 +1,18 @@ +#lang racket/base +(require brag/examples/wordy + brag/support + rackunit) + +(check-equal? + (syntax->datum + (parse (list "hello" "world"))) + '(sentence (verb (greeting "hello")) (optional-adjective) (object "world"))) + + + +(check-equal? + (syntax->datum + (parse (list "hola" "frumpy" (token 'WORLD "세계")))) + + '(sentence (verb (greeting "hola")) (optional-adjective "frumpy") (object "세계"))) + diff --git a/brag/brag/test/weird-grammar.rkt b/brag/brag/test/weird-grammar.rkt new file mode 100755 index 0000000..094ddcd --- /dev/null +++ b/brag/brag/test/weird-grammar.rkt @@ -0,0 +1,6 @@ +#lang brag + +;; This used to fail when we had the yacc-based backend, but +;; cfg-parser seems to be ok with it. + +foo: "foo" diff --git a/brag/compiled/info_rkt.dep b/brag/compiled/info_rkt.dep new file mode 100644 index 0000000..d9ddf54 --- /dev/null +++ b/brag/compiled/info_rkt.dep @@ -0,0 +1 @@ +("6.7.0.3" ("204513a73112731266f01cc8a274660a65d93031" . "a5c1fb83a37820ddc78fedf5c303e1df7f1896cc") (collects #"setup" #"infotab" #"lang" #"reader.rkt") (collects #"setup" #"infotab.rkt")) diff --git a/brag/compiled/info_rkt.zo b/brag/compiled/info_rkt.zo new file mode 100644 index 0000000..16aabff Binary files /dev/null and b/brag/compiled/info_rkt.zo differ diff --git a/brag/info.rkt b/brag/info.rkt new file mode 100755 index 0000000..0211925 --- /dev/null +++ b/brag/info.rkt @@ -0,0 +1,7 @@ +#lang setup/infotab + +(define deps '("base" "br-parser-tools-lib" "rackunit-lib")) +(define build-deps '("at-exp-lib" "br-parser-tools-doc" "racket-doc" + "scribble-lib")) +(define collection 'multi) +