diff --git a/br-parser-tools/br-parser-tools-doc/LICENSE.txt b/br-parser-tools/br-parser-tools-doc/LICENSE.txt deleted file mode 100644 index c424668..0000000 --- a/br-parser-tools/br-parser-tools-doc/LICENSE.txt +++ /dev/null @@ -1,11 +0,0 @@ -parser-tools-doc -Copyright (c) 2010-2014 PLT Design Inc. - -This package is distributed under the GNU Lesser General Public -License (LGPL). This means that you can link this package into proprietary -applications, provided you follow the rules stated in the LGPL. You -can also modify this package; if you distribute a modified version, -you must distribute it under the terms of the LGPL, which in -particular means that you must release the source code for the -modified software. See http://www.gnu.org/copyleft/lesser.html -for more information. diff --git a/br-parser-tools/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl b/br-parser-tools/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl deleted file mode 100644 index 7e0684c..0000000 --- a/br-parser-tools/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl +++ /dev/null @@ -1,769 +0,0 @@ -#lang scribble/doc -@(require scribble/manual scribble/struct scribble/xref scribble/bnf - (for-label scheme/base - scheme/contract - br-parser-tools/lex - (prefix-in : br-parser-tools/lex-sre) - br-parser-tools/yacc - br-parser-tools/cfg-parser)) - -@title{Parser Tools: @exec{lex} and @exec{yacc}-style Parsing (Beautiful Racket edition)} - -@author["Scott Owens (99%)" "Matthew Butterick (1%)"] - -This documentation assumes familiarity with @exec{lex} and @exec{yacc} -style lexer and parser generators. - -@table-of-contents[] - -@; ---------------------------------------------------------------------- - -@section{Lexers} - -@section-index["lex"] -@section-index["scanning"] -@section-index["scanner"] - -@defmodule[br-parser-tools/lex] - -@; ---------------------------------------- - -@subsection{Creating a Lexer} - -@defform/subs[#:literals (repetition union intersection complement concatenation - char-range char-complement - eof special special-comment) - (lexer [trigger action-expr] ...) - ([trigger re - (eof) - (special) - (special-comment)] - [re id - string - character - (repetition lo hi re) - (union re ...) - (intersection re ...) - (complement re) - (concatenation re ...) - (char-range char char) - (char-complement re) - (id datum ...)])]{ - - Produces a function that takes an input-port, matches the - @racket[re] patterns against the buffer, and returns the result of - executing the corresponding @racket[action-expr]. When multiple - patterns match, a lexer will choose the longest match, breaking - ties in favor of the rule appearing first. - - @margin-note{The implementation of @racketmodname[syntax-color/racket-lexer] - contains a lexer for the @racketmodname[racket] language. - In addition, files in the @filepath{examples} sub-directory - of the @filepath{br-parser-tools} collection contain - simpler example lexers.} - - An @racket[re] is matched as follows: - - @itemize[ - @item{@racket[id] --- expands to the named @deftech{lexer abbreviation}; - abbreviations are defined via @racket[define-lex-abbrev] or supplied by modules - like @racketmodname[br-parser-tools/lex-sre].} - @item{@racket[string] --- matches the sequence of characters in @racket[string].} - @item{@racket[character] --- matches a literal @racket[character].} - @item{@racket[(repetition lo hi re)] --- matches @racket[re] repeated between @racket[lo] - and @racket[hi] times, inclusive; @racket[hi] can be @racket[+inf.0] for unbounded repetitions.} - @item{@racket[(union re ...)] --- matches if any of the sub-expressions match} - @item{@racket[(intersection re ...)] --- matches if all of the @racket[re]s match.} - @item{@racket[(complement re)] --- matches anything that @racket[re] does not.} - @item{@racket[(concatenation re ...)] --- matches each @racket[re] in succession.} - @item{@racket[(char-range char char)] --- matches any character between the two (inclusive); - a single character string can be used as a @racket[char].} - @item{@racket[(char-complement re)] --- matches any character not matched by @racket[re]. - The sub-expression must be a set of characters @racket[re].} - @item{@racket[(id datum ...)] --- expands the @deftech{lexer macro} named @racket[id]; macros - are defined via @racket[define-lex-trans].} - ] - -Note that both @racket[(concatenation)] and @racket[""] match the -empty string, @racket[(union)] matches nothing, -@racket[(intersection)] matches any string, and -@racket[(char-complement (union))] matches any single character. - -The regular expression language is not designed to be used directly, -but rather as a basis for a user-friendly notation written with -regular expression macros. For example, -@racketmodname[br-parser-tools/lex-sre] supplies operators from Olin -Shivers's SREs, and @racketmodname[br-parser-tools/lex-plt-v200] supplies -(deprecated) operators from the previous version of this library. -Since those libraries provide operators whose names match other Racket -bindings, such as @racket[*] and @racket[+], they normally must be -imported using a prefix: - -@racketblock[ -(require (prefix-in : br-parser-tools/lex-sre)) -] - -The suggested prefix is @racket[:], so that @racket[:*] and -@racket[:+] are imported. Of course, a prefix other than @racket[:] -(such as @racket[re-]) will work too. - -Since negation is not a common operator on regular expressions, here -are a few examples, using @racket[:] prefixed SRE syntax: - -@itemize[ - -@item{@racketblock0[(complement "1")] - - Matches all strings except the string @racket["1"], including - @racket["11"], @racket["111"], @racket["0"], @racket["01"], - @racket[""], and so on.} - -@item{@racketblock0[(complement (:* "1"))] - - Matches all strings that are not sequences of @racket["1"], - including @racket["0"], @racket["00"], @racket["11110"], - @racket["0111"], @racket["11001010"] and so on.} - -@item{@racketblock0[(:& (:: any-string "111" any-string) - (complement (:or (:: any-string "01") (:+ "1"))))] - - Matches all strings that have 3 consecutive ones, but not those that - end in @racket["01"] and not those that are ones only. These - include @racket["1110"], @racket["0001000111"] and @racket["0111"] - but not @racket[""], @racket["11"], @racket["11101"], @racket["111"] - and @racket["11111"].} - -@item{@racketblock0[(:: "/*" (complement (:: any-string "*/" any-string)) "*/")] - - Matches Java/C block comments. @racket["/**/"], - @racket["/******/"], @racket["/*////*/"], @racket["/*asg4*/"] and so - on. It does not match @racket["/**/*/"], @racket["/* */ */"] and so - on. @racket[(:: any-string "*/" any-string)] matches any string - that has a @racket["*/"] in is, so @racket[(complement (:: any-string "*/" - any-string))] matches any string without a @racket["*/"] in it.} - -@item{@racketblock0[(:: "/*" (:* (complement "*/")) "*/")] - - Matches any string that starts with @racket["/*"] and ends with - @racket["*/"], including @racket["/* */ */ */"]. - @racket[(complement "*/")] matches any string except @racket["*/"]. - This includes @racket["*"] and @racket["/"] separately. Thus - @racket[(:* (complement "*/"))] matches @racket["*/"] by first - matching @racket["*"] and then matching @racket["/"]. Any other - string is matched directly by @racket[(complement "*/")]. In other - words, @racket[(:* (complement "xx"))] = @racket[any-string]. It is - usually not correct to place a @racket[:*] around a - @racket[complement].} -] - - - The following binding have special meaning inside of a lexer - action: - - @itemize[ - @item{@racket[start-pos] --- a @racket[position] struct for the first character matched.} - @item{@racket[end-pos] --- a @racket[position] struct for the character after the last character in the match.} - @item{@racket[lexeme] --- the matched string.} - @item{@racket[input-port] --- the input-port being - processed (this is useful for matching input with multiple - lexers).} - @item{@racket[(return-without-pos x)] and @racket[(return-without-srcloc x)] are functions (continuations) that - immediately return the value of @racket[x] from the lexer. This useful - in a src-pos or src-loc lexer to prevent the lexer from adding source - information. For example: - - @racketblock[ - (define get-token - (lexer-srcloc - ... - ((comment) (get-token input-port)) - ...)) - ] - - would wrap the source location information for the comment around - the value of the recursive call. Using - @racket[((comment) (return-without-srcloc (get-token input-port)))] - will cause the value of the recursive call to be returned without - wrapping position around it.} - ] - - The lexer raises an @racket[exn:fail:read] exception if none of the - regular expressions match the input. Hint: If @racket[(any-char - _custom-error-behavior)] is the last rule, then there will always - be a match, and @racket[_custom-error-behavior] is executed to - handle the error situation as desired, only consuming the first - character from the input buffer. - - In addition to returning characters, input - ports can return @racket[eof-object]s. Custom input ports can - also return a @racket[special-comment] value to indicate a - non-textual comment, or return another arbitrary value (a - special). The non-@racket[re] @racket[trigger] forms handle these - cases: - - @itemize[ - - @item{The @racket[(eof)] rule is matched when the input port - returns an @racket[eof-object] value. If no @racket[(eof)] - rule is present, the lexer returns the symbol @racket['eof] - when the port returns an @racket[eof-object] value.} - - @item{The @racket[(special-comment)] rule is matched when the - input port returns a @racket[special-comment] structure. If no - @racket[special-comment] rule is present, the lexer - automatically tries to return the next token from the input - port.} - - @item{The @racket[(special)] rule is matched when the input - port returns a value other than a character, - @racket[eof-object], or @racket[special-comment] structure. If - no @racket[(special)] rule is present, the lexer returns - @racket[(void)].}] - - End-of-files, specials, special-comments and special-errors cannot - be parsed via a rule using an ordinary regular expression - (but dropping down and manipulating the port to handle them - is possible in some situations). - - Since the lexer gets its source information from the port, use - @racket[port-count-lines!] to enable the tracking of line and - column information. Otherwise, the line and column information - will return @racket[#f]. - - When peeking from the input port raises an exception (such as by - an embedded XML editor with malformed syntax), the exception can - be raised before all tokens preceding the exception have been - returned. - - Each time the racket code for a lexer is compiled (e.g. when a - @filepath{.rkt} file containing a @racket[lexer] form is loaded), - the lexer generator is run. To avoid this overhead place the - lexer into a module and compile the module to a @filepath{.zo} - bytecode file.} - -@defform[(lexer-src-pos (trigger action-expr) ...)]{ - -Like @racket[lexer], but for each @racket[_action-result] produced by -an @racket[action-expr], returns @racket[(make-position-token -_action-result start-pos end-pos)] instead of simply -@racket[_action-result].} - -@defform[(lexer-srcloc (trigger action-expr) ...)]{ - -Like @racket[lexer], but for each @racket[_action-result] produced by -an @racket[action-expr], returns @racket[(make-srcloc-token -_action-result lexeme-srcloc)] instead of simply -@racket[_action-result].} - -@deftogether[( -@defidform[start-pos] -@defidform[end-pos] -@defidform[lexeme] -@defidform[lexeme-srcloc] -@defidform[input-port] -@defidform[return-without-pos] -@defidform[return-without-srcloc] -)]{ - -Use of these names outside of a @racket[lexer] action is a syntax -error.} - -@defstruct[position ([offset exact-positive-integer?] - [line exact-positive-integer?] - [col exact-nonnegative-integer?])]{ - - Instances of @racket[position] are bound to @racket[start-pos] and - @racket[end-pos]. The @racket[offset] field contains the offset of - the character in the input. The @racket[line] field contains the - line number of the character. The @racket[col] field contains the - offset in the current line.} - -@defstruct[position-token ([token any/c] - [start-pos position?] - [end-pos position?])]{ - - Lexers created with @racket[lexer-src-pos] return instances of @racket[position-token].} - - -@defstruct[srcloc-token ([token any/c] - [srcloc srcloc?])]{ - - Lexers created with @racket[lexer-srcloc] return instances of @racket[srcloc-token].} - -@defparam[file-path source any/c]{ - - A parameter that the lexer uses as the source location if it - raises a @racket[exn:fail:read] error. Setting this parameter allows - DrRacket, for example, to open the file containing the error.} - -@defparam[lexer-file-path source any/c]{ - -Alias for @racket[file-path].} - - -@; ---------------------------------------- - -@subsection{Lexer Abbreviations and Macros} - -@defform[(char-set string)]{ - -A @tech{lexer macro} that matches any character in @racket[string].} - -@defidform[any-char]{A @tech{lexer abbreviation} that matches any character.} - -@defidform[any-string]{A @tech{lexer abbreviation} that matches any string.} - -@defidform[nothing]{A @tech{lexer abbreviation} that matches no string.} - -@deftogether[( -@defidform[alphabetic] -@defidform[lower-case] -@defidform[upper-case] -@defidform[title-case] -@defidform[numeric] -@defidform[symbolic] -@defidform[punctuation] -@defidform[graphic] -@defidform[whitespace] -@defidform[blank] -@defidform[iso-control] -)]{ - -@tech{Lexer abbreviations} that match @racket[char-alphabetic?] -characters, @racket[char-lower-case?] characters, etc.} - -@defform[(define-lex-abbrev id re)]{ - - Defines a @tech{lexer abbreviation} by associating a regular - expression to be used in place of the @racket[id] in other - regular expression. The definition of name has the same scoping - properties as a other syntactic binding (e.g., it can be exported - from a module).} - -@defform[(define-lex-abbrevs (id re) ...)]{ - - Like @racket[define-lex-abbrev], but defines several @tech{lexer - abbreviations}.} - - -@defform[(define-lex-trans id trans-expr)]{ - - Defines a @tech{lexer macro}, where @racket[trans-expr] produces a - transformer procedure that takes one argument. When @racket[(id - _datum ...)] appears as a regular expression, it is replaced with - the result of applying the transformer to the expression.} - - -@; ---------------------------------------- - -@subsection{Lexer SRE Operators} - -@defmodule[br-parser-tools/lex-sre] - -@; Put the docs in a macro, so that we can bound the scope of -@; the import of `*', etc.: -@(define-syntax-rule (lex-sre-doc) - (... - (begin - (require (for-label br-parser-tools/lex-sre)) - -@defform[(* re ...)]{ - -Repetition of @racket[re] sequence 0 or more times.} - -@defform[(+ re ...)]{ - -Repetition of @racket[re] sequence 1 or more times.} - -@defform[(? re ...)]{ - -Zero or one occurrence of @racket[re] sequence.} - -@defform[(= n re ...)]{ - -Exactly @racket[n] occurrences of @racket[re] sequence, where -@racket[n] must be a literal exact, non-negative number.} - -@defform[(>= n re ...)]{ - -At least @racket[n] occurrences of @racket[re] sequence, where -@racket[n] must be a literal exact, non-negative number.} - -@defform[(** n m re ...)]{ - -Between @racket[n] and @racket[m] (inclusive) occurrences of -@racket[re] sequence, where @racket[n] must be a literal exact, -non-negative number, and @racket[m] must be literally either -@racket[#f], @racket[+inf.0], or an exact, non-negative number; a -@racket[#f] value for @racket[m] is the same as @racket[+inf.0].} - -@defform[(or re ...)]{ - -Same as @racket[(union re ...)].} - -@deftogether[( -@defform[(: re ...)] -@defform[(seq re ...)] -)]{ - -Both forms concatenate the @racket[re]s.} - -@defform[(& re ...)]{ - -Intersects the @racket[re]s.} - -@defform[(- re ...)]{ - -The set difference of the @racket[re]s.} - -@defform[(~ re ...)]{ - -Character-set complement, which each @racket[re] must match exactly -one character.} - -@defform[(/ char-or-string ...)]{ - -Character ranges, matching characters between successive pairs of -characters.} - -))) - -@(lex-sre-doc) - -@; ---------------------------------------- - -@subsection{Lexer Legacy Operators} - -@defmodule[br-parser-tools/lex-plt-v200] - -@(define-syntax-rule (lex-v200-doc) - (... - (begin - (require (for-label br-parser-tools/lex-plt-v200)) - -@t{The @racketmodname[br-parser-tools/lex-plt-v200] module re-exports - @racket[*], @racket[+], @racket[?], and @racket[&] from - @racketmodname[br-parser-tools/lex-sre]. It also re-exports - @racket[:or] as @racket[:], @racket[::] as @racket[|@|], @racket[:~] - as @racket[^], and @racket[:/] as @racket[-].} - -@defform[(epsilon)]{ - -A @tech{lexer macro} that matches an empty sequence.} - -@defform[(~ re ...)]{ - -The same as @racket[(complement re ...)].}))) - -@(lex-v200-doc) - -@; ---------------------------------------- - -@subsection{Tokens} - -Each @racket[_action-expr] in a @racket[lexer] form can produce any -kind of value, but for many purposes, producing a @deftech{token} -value is useful. Tokens are usually necessary for inter-operating with -a parser generated by @racket[br-parser-tools/parser], but tokens may not -be the right choice when using @racket[lexer] in other situations. - -@defform[(define-tokens group-id (token-id ...))]{ - - Binds @racket[group-id] to the group of tokens being defined. For - each @racket[token-id], a function - @racketidfont{token-}@racket[token-id] is created that takes any - value and puts it in a token record specific to @racket[token-id]. - The token value is inspected using @racket[token-id] and - @racket[token-value]. - - A token cannot be named @racketidfont{error}, since - @racketidfont{error} it has special use in the parser.} - -@defform[(define-empty-tokens group-id (token-id ...) )]{ - - - Like @racket[define-tokens], except a each token constructor - @racketidfont{token-}@racket[token-id] takes no arguments and returns - @racket[(@#,racket[quote] token-id)].} - - -@defproc[(token-name [t (or/c token? symbol?)]) symbol?]{ - - Returns the name of a token that is represented either by a symbol - or a token structure.} - - -@defproc[(token-value [t (or/c token? symbol?)]) any/c]{ - - Returns the value of a token that is represented either by a symbol - or a token structure, returning @racket[#f] for a symbol token.} - - -@defproc[(token? [v any/c]) boolean?]{ - - Returns @racket[#t] if @racket[val] is a - token structure, @racket[#f] otherwise.} - -@; ---------------------------------------------------------------------- - -@section{LALR(1) Parsers} - -@section-index["yacc"] - -@defmodule[br-parser-tools/yacc] - -@defform/subs[#:literals (grammar tokens start end precs src-pos - suppress debug yacc-output prec) - (parser clause ...) - ([clause (grammar (non-terminal-id - ((grammar-id ...) maybe-prec expr) - ...) - ...) - (tokens group-id ...) - (start non-terminal-id ...) - (end token-id ...) - (@#,racketidfont{error} expr) - (precs (assoc token-id ...) ...) - (src-pos) - (suppress) - (debug filename) - (yacc-output filename)] - [maybe-prec code:blank - (prec token-id)] - [assoc left right nonassoc])]{ - - Creates a parser. The clauses may be in any order, as long as there - are no duplicates and all non-@italic{OPTIONAL} declarations are - present: - - @itemize[ - - @item{@racketblock0[(grammar (non-terminal-id - ((grammar-id ...) maybe-prec expr) - ...) - ...)] - - Declares the grammar to be parsed. Each @racket[grammar-id] can - be a @racket[token-id] from a @racket[group-id] named in a - @racket[tokens] declaration, or it can be a - @racket[non-terminal-id] declared in the @racket[grammar] - declaration. The optional @racket[prec] declaration works with - the @racket[precs] declaration. The @racket[expr] is a - ``semantic action,'' which is evaluated when the input is found - to match its corresponding production. - - Each action is Racket code that has the same scope as its - parser's definition, except that the variables @racket[$1], ..., - @racketidfont{$}@math{i} are bound, where @math{i} is the number - of @racket[grammar-id]s in the corresponding production. Each - @racketidfont{$}@math{k} is bound to the result of the action - for the @math{k}@superscript{th} grammar symbol on the right of - the production, if that grammar symbol is a non-terminal, or the - value stored in the token if the grammar symbol is a terminal. - If the @racket[src-pos] option is present in the parser, then - variables @racket[$1-start-pos], ..., - @racketidfont{$}@math{i}@racketidfont{-start-pos} and - @racket[$1-end-pos], ..., - @racketidfont{$}@math{i}@racketidfont{-end-pos} and are also - available, and they refer to the position structures - corresponding to the start and end of the corresponding - @racket[grammar-symbol]. Grammar symbols defined as empty-tokens - have no @racketidfont{$}@math{k} associated, but do have - @racketidfont{$}@math{k}@racketidfont{-start-pos} and - @racketidfont{$}@math{k}@racketidfont{-end-pos}. - Also @racketidfont{$n-start-pos} and @racketidfont{$n-end-pos} - are bound to the largest start and end positions, (i.e., - @racketidfont{$}@math{i}@racketidfont{-start-pos} and - @racketidfont{$}@math{i}@racketidfont{-end-pos}). - - An @deftech{error production} can be defined by providing - a production of the form @racket[(error α)], where α is a - string of grammar symbols, possibly empty. - - All of the productions for a given non-terminal must be grouped - with it. That is, no @racket[non-terminal-id] may appear twice - on the left hand side in a parser.} - - - @item{@racket[(tokens group-id ...)] - - Declares that all of the tokens defined in each - @racket[group-id]---as bound by @racket[define-tokens] or - @racket[define-empty-tokens]---can be used by the parser in the - @racket[grammar] declaration.} - - - @item{@racket[(start non-terminal-id ...)] - - Declares a list of starting non-terminals for the grammar.} - - - @item{@racket[(end token-id ...)] - - Specifies a set of tokens from which some member must follow any - valid parse. For example, an EOF token would be specified for a - parser that parses entire files and a newline token for a parser - that parses entire lines individually.} - - - @item{@racket[(@#,racketidfont{error} expr)] - - The @racket[expr] should evaluate to a function which will be - executed for its side-effect whenever the parser encounters an - error. - - If the @racket[src-pos] declaration is present, the function - should accept 5 arguments,: - - @racketblock[(lambda (tok-ok? tok-name tok-value _start-pos _end-pos) - ....)] - - Otherwise it should accept 3: - - @racketblock[(lambda (tok-ok? tok-name tok-value) - ....)] - - The first argument will be @racket[#f] if and only if the error - is that an invalid token was received. The second and third - arguments will be the name and the value of the token at which - the error was detected. The fourth and fifth arguments, if - present, provide the source positions of that token.} - - - @item{@racket[(precs (assoc token-id ...) ...)] - @italic{OPTIONAL} - - Precedence declarations to resolve shift/reduce and - reduce/reduce conflicts as in @exec{yacc}/@exec{bison}. An - @racket[assoc] must be one of @racket[left], @racket[right] or - @racket[nonassoc]. States with multiple shift/reduce or - reduce/reduce conflicts (or some combination thereof) are not - resolved with precedence.} - - @item{@racket[(src-pos)] @italic{OPTIONAL} - - Causes the generated parser to expect input in the form - @racket[(make-position-token _token _start-pos _end-pos)] instead - of simply @racket[_token]. Include this option when using the - parser with a lexer generated with @racket[lexer-src-pos].} - - - @item{@racket[(debug filename)] @italic{OPTIONAL} - - Causes the parser generator to write the LALR table to the file - named @racket[filename] (unless the file exists), where - @racket[filename] is a literal string. Additionally, if a debug - file is specified, when a running generated parser encounters a - parse error on some input file, after the user specified error - expression returns, the complete parse stack is printed to - assist in debugging the grammar of that particular parser. The - numbers in the stack printout correspond to the state numbers in - the LALR table file.} - - - @item{@racket[(yacc-output filename)] @italic{OPTIONAL} - - Causes the parser generator to write a grammar file in - approximately the syntax of @exec{yacc}/@exec{bison}. The file - might not be a valid @exec{yacc} file, because the Racket - grammar can use symbols that are invalid in C.} - - - @item{@racket[(suppress)] @italic{OPTIONAL} - - Causes the parser generator not to report shift/reduce or - reduce/reduce conflicts.} - - ] - - The result of a @racket[parser] expression with one @racket[start] - non-terminal is a function, @racket[_parse], that takes one - argument. This argument must be a zero argument function, - @racket[_gen], that produces successive tokens of the input each - time it is called. If desired, the @racket[_gen] may return - symbols instead of tokens, and the parser will treat symbols as - tokens of the corresponding name (with @racket[#f] as a value, so - it is usual to return symbols only in the case of empty tokens). - The @racket[_parse] function returns the value associated with the - parse tree by the semantic actions. If the parser encounters an - error, after invoking the supplied error function, it will try to - use @tech{error production}s to continue parsing. If it cannot, it - raises @racket[exn:fail:read]. - - If multiple non-terminals are provided in @racket[start], the - @racket[parser] expression produces a list of parsing functions, - one for each non-terminal in the same order. Each parsing function - is like the result of a parser expression with only one - @racket[start] non-terminal, - - Each time the Racket code for a @racket[parser] is compiled - (e.g. when a @filepath{.rkt} file containing a @racket[parser] form - is loaded), the parser generator is run. To avoid this overhead - place the parser into a module and compile the module to a - @filepath{.zo} bytecode file.} - - - - -@section{Context-Free Parsers} - -@section-index["cfg-parser"] - -@defmodule[br-parser-tools/cfg-parser]{The @racketmodname[br-parser-tools/cfg-parser] -library provides a parser generator that is an alternative to that of -@racketmodname[br-parser-tools/yacc].} - -@defform/subs[#:literals (grammar tokens start end precs src-pos - suppress debug yacc-output prec) - (cfg-parser clause ...) - ([clause (grammar (non-terminal-id - ((grammar-id ...) maybe-prec expr) - ...) - ...) - (tokens group-id ...) - (start non-terminal-id ...) - (end token-id ...) - (@#,racketidfont{error} expr) - (src-pos)])]{ - - Creates a parser similar to that of @racket[parser]. Unlike @racket[parser], - @racket[cfg-parser], can consume arbitrary and potentially ambiguous context-free - grammars. Its interface is a subset of @racketmodname[br-parser-tools/yacc], with - the following differences: - - @itemize[ - - @item{@racket[(start non-terminal-id)] - - Unlike @racket[parser], @racket[cfg-parser] only allows for - a single non-terminal-id.} - - @item{The @racket[cfg-parser] form does not support the @racket[precs], - @racket[suppress], @racket[debug], or @racket[yacc-output] - options of @racket[parser].} - ] -} - -@; ---------------------------------------------------------------------- - -@section{Converting @exec{yacc} or @exec{bison} Grammars} - -@defmodule[br-parser-tools/yacc-to-scheme] - -@defproc[(trans [file path-string?]) any/c]{ - -Reads a C @exec{yacc}/@exec{bison} grammar from @racket[file] and -produces an s-expression that represents a Racket parser for use with -@racket[parser]. - -This function is intended to assist in the manual conversion of -grammars for use with @racket[parser], and not as a fully automatic -conversion tool. It is not entirely robust. For example, if the C -actions in the original grammar have nested blocks, the tool will fail. - -Annotated examples are in the @filepath{examples} subdirectory of the -@filepath{br-parser-tools} collection.} - -@; ---------------------------------------------------------------------- - -@index-section[] diff --git a/br-parser-tools/br-parser-tools-doc/br-parser-tools/info.rkt b/br-parser-tools/br-parser-tools-doc/br-parser-tools/info.rkt deleted file mode 100644 index f219d03..0000000 --- a/br-parser-tools/br-parser-tools-doc/br-parser-tools/info.rkt +++ /dev/null @@ -1,3 +0,0 @@ -#lang info - -(define scribblings '(("br-parser-tools.scrbl" (multi-page) (parsing-library)))) diff --git a/br-parser-tools/br-parser-tools-doc/info.rkt b/br-parser-tools/br-parser-tools-doc/info.rkt deleted file mode 100644 index 8760588..0000000 --- a/br-parser-tools/br-parser-tools-doc/info.rkt +++ /dev/null @@ -1,14 +0,0 @@ -#lang info - -(define collection 'multi) -(define deps '("base")) -(define build-deps '("scheme-lib" - "racket-doc" - "syntax-color-doc" - "br-parser-tools-lib" - "scribble-lib")) -(define update-implies '("br-parser-tools-lib")) - -(define pkg-desc "documentation part of \"br-parser-tools\"") - -(define pkg-authors '(mflatt)) diff --git a/br-parser-tools/br-parser-tools-lib/LICENSE.txt b/br-parser-tools/br-parser-tools-lib/LICENSE.txt deleted file mode 100644 index f6889f1..0000000 --- a/br-parser-tools/br-parser-tools-lib/LICENSE.txt +++ /dev/null @@ -1,11 +0,0 @@ -parser-tools-lib -Copyright (c) 2010-2014 PLT Design Inc. - -This package is distributed under the GNU Lesser General Public -License (LGPL). This means that you can link this package into proprietary -applications, provided you follow the rules stated in the LGPL. You -can also modify this package; if you distribute a modified version, -you must distribute it under the terms of the LGPL, which in -particular means that you must release the source code for the -modified software. See http://www.gnu.org/copyleft/lesser.html -for more information. diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt deleted file mode 100755 index 26692a7..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt +++ /dev/null @@ -1,933 +0,0 @@ -#lang racket/base -;; This module implements a parser form like the br-parser-tools's -;; `parser', except that it works on an arbitrary CFG (returning -;; the first sucecssful parse). - -;; I'm pretty sure that this is an implementation of Earley's -;; algorithm. - -;; To a first approximation, it's a backtracking parser. Alternative -;; for a non-terminal are computed in parallel, and multiple attempts -;; to compute the same result block until the first one completes. If -;; you get into deadlock, such as when trying to match -;; := -;; then it means that there's no successful parse, so everything -;; that's blocked fails. - -;; A cache holds the series of results for a particular non-terminal -;; at a particular starting location. (A series is used, instead of a -;; sinlge result, for backtracking.) Otherwise, the parser uses -;; backtracking search. Backtracking is implemented through explicit -;; success and failure continuations. Multiple results for a -;; particular nonterminal and location are kept only when they have -;; different lengths. (Otherwise, in the spirit of finding one -;; successful parse, only the first result is kept.) - -;; The br-parser-tools's `parse' is used to transform tokens in the -;; grammar to tokens specific to this parser. In other words, this -;; parser uses `parser' so that it doesn't have to know anything about -;; tokens. -;; - - - -(require br-parser-tools/yacc - br-parser-tools/lex) - -(require (for-syntax racket/base - syntax/boundmap - br-parser-tools/private-lex/token-syntax)) - -(provide cfg-parser) - -;; A raw token, wrapped so that we can recognize it: -(define-struct tok (name orig-name val start end)) - -;; Represents the thread scheduler: -(define-struct tasks (active active-back waits multi-waits cache progress?)) - -(define-for-syntax make-token-identifier-mapping make-hasheq) -(define-for-syntax token-identifier-mapping-get - (case-lambda - [(t tok) - (hash-ref t (syntax-e tok))] - [(t tok fail) - (hash-ref t (syntax-e tok) fail)])) -(define-for-syntax token-identifier-mapping-put! - (lambda (t tok v) - (hash-set! t (syntax-e tok) v))) -(define-for-syntax token-identifier-mapping-map - (lambda (t f) - (hash-map t f))) - -;; Used to calculate information on the grammar, such as whether -;; a particular non-terminal is "simple" instead of recursively defined. -(define-for-syntax (nt-fixpoint nts proc nt-ids patss) - (define (ormap-all val f as bs) - (cond - [(null? as) val] - [else (ormap-all (or (f (car as) (car bs)) val) - f - (cdr as) (cdr bs))])) - (let loop () - (when (ormap-all #f - (lambda (nt pats) - (let ([old (bound-identifier-mapping-get nts nt)]) - (let ([new (proc nt pats old)]) - (if (equal? old new) - #f - (begin - (bound-identifier-mapping-put! nts nt new) - #t))))) - nt-ids patss) - (loop)))) - -;; Tries parse-a followed by parse-b. If parse-a is not simple, -;; then after parse-a succeeds once, we parallelize parse-b -;; and trying a second result for parse-a. -(define (parse-and simple-a? parse-a parse-b - stream last-consumed-token depth end success-k fail-k - max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream last-consumed-token depth max-depth tasks next1-k) - (if simple-a? - (parse-b val stream last-consumed-token depth end - (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks) - (parallel-or - (lambda (success-k fail-k max-depth tasks) - (parse-b val stream last-consumed-token depth end - success-k fail-k - max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k max-depth tasks)) - success-k fail-k max-depth tasks))))] - [mk-got2-k - (lambda (success-k fail-k next1-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (success-k val stream last-consumed-token depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks)))))] - [mk-fail2-k - (lambda (success-k fail-k next1-k) - (lambda (max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k - max-depth - tasks)))]) - (parse-a stream last-consumed-token depth end - (mk-got-k success-k fail-k) - fail-k - max-depth tasks))) - -;; Parallel or for non-terminal alternatives -(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) - (parallel-or (lambda (success-k fail-k max-depth tasks) - (parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) - (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)) - success-k fail-k max-depth tasks)) - -;; Generic parallel-or -(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) - (define answer-key (gensym)) - (letrec ([gota-k - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (report-answer answer-key - max-depth - tasks - (list val stream last-consumed-token depth next-k)))] - [faila-k - (lambda (max-depth tasks) - (report-answer answer-key - max-depth - tasks - null))]) - (let* ([tasks (queue-task - tasks - (lambda (max-depth tasks) - (parse-a gota-k - faila-k - max-depth tasks)))] - [tasks (queue-task - tasks - (lambda (max-depth tasks) - (parse-b gota-k - faila-k - max-depth tasks)))] - [queue-next (lambda (next-k tasks) - (queue-task tasks - (lambda (max-depth tasks) - (next-k gota-k - faila-k - max-depth tasks))))]) - (letrec ([mk-got-one - (lambda (immediate-next? get-nth success-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (let ([tasks (if immediate-next? - (queue-next next-k tasks) - tasks)]) - (success-k val stream last-consumed-token depth max-depth - tasks - (lambda (success-k fail-k max-depth tasks) - (let ([tasks (if immediate-next? - tasks - (queue-next next-k tasks))]) - (get-nth max-depth tasks success-k fail-k)))))))] - [get-first - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #t get-first success-k) - (lambda (max-depth tasks) - (get-second max-depth tasks success-k fail-k)) - #f))] - [get-second - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #f get-second success-k) - fail-k #f))]) - (get-first max-depth tasks success-k fail-k))))) - -;; Non-terminal alternatives where the first is "simple" can be done -;; sequentially, which is simpler -(define (parse-or parse-a parse-b - stream last-consumed-token depth end success-k fail-k max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (success-k val stream last-consumed-token depth - max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks)))))] - [mk-fail-k - (lambda (success-k fail-k) - (lambda (max-depth tasks) - (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))]) - (parse-a stream last-consumed-token depth end - (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks))) - -;; Starts a thread -(define queue-task - (lambda (tasks t [progress? #t]) - (make-tasks (tasks-active tasks) - (cons t (tasks-active-back tasks)) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - (or progress? (tasks-progress? tasks))))) - -;; Reports an answer to a waiting thread: -(define (report-answer answer-key max-depth tasks val) - (let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))]) - (if v - (let ([tasks (make-tasks (cons (v val) - (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (hash-remove! (tasks-waits tasks) answer-key) - (swap-task max-depth tasks)) - ;; We have an answer ready too fast; wait - (swap-task max-depth - (queue-task tasks - (lambda (max-depth tasks) - (report-answer answer-key max-depth tasks val)) - #f))))) - -;; Reports an answer to multiple waiting threads: -(define (report-answer-all answer-key max-depth tasks val k) - (let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))]) - (hash-remove! (tasks-multi-waits tasks) answer-key) - (let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) - (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (k max-depth tasks)))) - -;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise -;; there might be many. Use wither #t or #f (and `report-answer' or -;; `report-answer-all', resptively) consistently for a particular answer key. -(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) - (let ([wait (lambda (val) - (lambda (max-depth tasks) - (if val - (if (null? val) - (fail-k max-depth tasks) - (let-values ([(val stream last-consumed-token depth next-k) (apply values val)]) - (success-k val stream last-consumed-token depth max-depth tasks next-k))) - (deadlock-k max-depth tasks))))]) - (if multi? - (hash-set! (tasks-multi-waits tasks) answer-key - (cons wait (hash-ref (tasks-multi-waits tasks) answer-key - (lambda () null)))) - (hash-set! (tasks-waits tasks) answer-key wait)) - (let ([tasks (make-tasks (tasks-active tasks) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (swap-task max-depth tasks)))) - -;; Swap thread -(define (swap-task max-depth tasks) - ;; Swap in first active: - (if (null? (tasks-active tasks)) - (if (tasks-progress? tasks) - (swap-task max-depth - (make-tasks (reverse (tasks-active-back tasks)) - null - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #f)) - ;; No progress, so issue failure for all multi-waits - (if (zero? (hash-count (tasks-multi-waits tasks))) - (error 'swap-task "Deadlock") - (swap-task max-depth - (make-tasks (apply - append - (hash-map (tasks-multi-waits tasks) - (lambda (k l) - (map (lambda (v) (v #f)) l)))) - (tasks-active-back tasks) - (tasks-waits tasks) - (make-hasheq) - (tasks-cache tasks) - #t)))) - (let ([t (car (tasks-active tasks))] - [tasks (make-tasks (cdr (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - (tasks-progress? tasks))]) - (t max-depth tasks)))) - -;; Finds the symbolic representative of a token class -(define-for-syntax (map-token toks tok) - (car (token-identifier-mapping-get toks tok))) - -(define no-pos-val (make-position #f #f #f)) -(define-for-syntax no-pos - (let ([npv ((syntax-local-certifier) #'no-pos-val)]) - (lambda (stx) npv))) -(define-for-syntax at-tok-pos - (lambda (sel expr) - (lambda (stx) - #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))))) - -;; Builds a matcher for a particular alternative -(define-for-syntax (build-match nts toks pat handle $ctx) - (let loop ([pat pat] - [pos 1]) - (if (null? pat) - #`(success-k #,handle stream last-consumed-token depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (fail-k max-depth tasks))) - (let ([id (datum->syntax (car pat) - (string->symbol (format "$~a" pos)))] - [id-start-pos (datum->syntax (car pat) - (string->symbol (format "$~a-start-pos" pos)))] - [id-end-pos (datum->syntax (car pat) - (string->symbol (format "$~a-end-pos" pos)))] - [n-end-pos (and (null? (cdr pat)) - (datum->syntax (car pat) '$n-end-pos))]) - (cond - [(bound-identifier-mapping-get nts (car pat) (lambda () #f)) - ;; Match non-termimal - #`(parse-and - ;; First part is simple? (If so, we don't have to parallelize the `and'.) - #,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) - (or (not l) - (andmap values (caddr l)))) - #,(car pat) - (let ([original-stream stream]) - (lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks) - (let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream) - tok-end - tok-start) - #'(if (eq? original-stream stream) - last-consumed-token - (and (pair? original-stream) - (car original-stream))))] - [#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)] - #,@(if n-end-pos - #`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]) - null)) - #,(loop (cdr pat) (add1 pos))))) - stream last-consumed-token depth - #,(let ([cnt (apply + - (map (lambda (item) - (cond - [(bound-identifier-mapping-get nts item (lambda () #f)) - => (lambda (l) (car l))] - [else 1])) - (cdr pat)))]) - #`(- end #,cnt)) - success-k fail-k max-depth tasks)] - [else - ;; Match token - (let ([tok-id (map-token toks (car pat))]) - #`(if (and (pair? stream) - (eq? '#,tok-id (tok-name (car stream)))) - (let* ([stream-a (car stream)] - [#,id (tok-val stream-a)] - [last-consumed-token (car stream)] - [stream (cdr stream)] - [depth (add1 depth)]) - (let ([max-depth (max max-depth depth)]) - (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)] - [#,id-end-pos (at-tok-pos #'tok-end #'stream-a)] - #,@(if n-end-pos - #`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)]) - null)) - #,(loop (cdr pat) (add1 pos))))) - (fail-k max-depth tasks)))]))))) - -;; Starts parsing to match a non-terminal. There's a minor -;; optimization that checks for known starting tokens. Otherwise, -;; use the cache, block if someone else is already trying the match, -;; and cache the result if it's computed. -;; The cache maps nontermial+startingpos+iteration to a result, where -;; the iteration is 0 for the first match attempt, 1 for the second, -;; etc. -(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k) - (if (and (positive? min-cnt) - (pair? stream) - (not (memq (tok-name (car stream)) init-tokens))) - ;; No such leading token; give up - (fail-k max-depth tasks) - ;; Run pattern - (let loop ([n 0] - [success-k success-k] - [fail-k fail-k] - [max-depth max-depth] - [tasks tasks] - [k k]) - (let ([answer-key (gensym)] - [table-key (vector key depth n)] - [old-depth depth] - [old-stream stream]) - #;(printf "Loop ~a\n" table-key) - (cond - [(hash-ref (tasks-cache tasks) table-key (lambda () #f)) - => (lambda (result) - #;(printf "Reuse ~a\n" table-key) - (result success-k fail-k max-depth tasks))] - [else - #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - #;(printf "Wait ~a ~a\n" table-key answer-key) - (wait-for-answer #t max-depth tasks answer-key success-k fail-k - (lambda (max-depth tasks) - #;(printf "Deadlock ~a ~a\n" table-key answer-key) - (fail-k max-depth tasks))))) - (let result-loop ([max-depth max-depth][tasks tasks][k k]) - (letrec ([orig-stream stream] - [new-got-k - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - ;; Check whether we already have a result that consumed the same amount: - (let ([result-key (vector #f key old-depth depth)]) - (cond - [(hash-ref (tasks-cache tasks) result-key (lambda () #f)) - ;; Go for the next-result - (result-loop max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks)))] - [else - #;(printf "Success ~a ~a\n" table-key - (map tok-name (let loop ([d old-depth][s old-stream]) - (if (= d depth) - null - (cons (car s) (loop (add1 d) (cdr s))))))) - (let ([next-k (lambda (success-k fail-k max-depth tasks) - (loop (add1 n) - success-k - fail-k - max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks))))]) - (hash-set! (tasks-cache tasks) result-key #t) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (success-k val stream last-consumed-token depth max-depth tasks next-k))) - (report-answer-all answer-key - max-depth - tasks - (list val stream last-consumed-token depth next-k) - (lambda (max-depth tasks) - (success-k val stream last-consumed-token depth max-depth tasks next-k))))])))] - [new-fail-k - (lambda (max-depth tasks) - #;(printf "Failure ~a\n" table-key) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (fail-k max-depth tasks))) - (report-answer-all answer-key - max-depth - tasks - null - (lambda (max-depth tasks) - (fail-k max-depth tasks))))]) - (k end max-depth tasks new-got-k new-fail-k)))]))))) - -;; These temp identifiers can't be `gensym` or `generate-temporary` -;; because they have to be consistent between module loads -;; (IIUC, the parser is multi-threaded, and this approach is not thread-safe) -;; so I see no alternative to the old standby of making them ludicrously unlikely -(define-for-syntax start-id-temp 'start_jihqolbbafscgxvsufnepvmxqipnxgmlpxukmdoqxqzmzgaogaftbkbyqjttwwfimifowdxfyekjiixdmtprfkcvfciraehoeuaz) -(define-for-syntax atok-id-temp 'atok_wrutdjgecmybyfipiwsgjlvsveryodlgassuzcargiuznzgdghrykfqfbwcjgzdhdoeqxcucmtjkuyucskzethozhqkasphdwbht) -(define-syntax (cfg-parser stx) - (syntax-case stx () - [(_ clause ...) - (let ([clauses (syntax->list #'(clause ...))]) - (let-values ([(start grammar cfg-error parser-clauses src-pos?) - (let ([all-toks (apply - append - (map (lambda (clause) - (syntax-case clause (tokens) - [(tokens t ...) - (apply - append - (map (lambda (t) - (let ([v (syntax-local-value t (lambda () #f))]) - (cond - [(terminals-def? v) - (map (lambda (v) - (cons v #f)) - (syntax->list (terminals-def-t v)))] - [(e-terminals-def? v) - (map (lambda (v) - (cons v #t)) - (syntax->list (e-terminals-def-t v)))] - [else null]))) - (syntax->list #'(t ...))))] - [_else null])) - clauses))] - [all-end-toks (apply - append - (map (lambda (clause) - (syntax-case clause (end) - [(end t ...) - (syntax->list #'(t ...))] - [_else null])) - clauses))]) - (let loop ([clauses clauses] - [cfg-start #f] - [cfg-grammar #f] - [cfg-error #f] - [src-pos? #f] - [parser-clauses null]) - (if (null? clauses) - (values cfg-start - cfg-grammar - cfg-error - (reverse parser-clauses) - src-pos?) - (syntax-case (car clauses) (start error grammar src-pos) - [(start tok) - (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] - [(error expr) - (loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] - [(grammar [nt [pat handle0 handle ...] ...] ...) - (let ([nts (make-bound-identifier-mapping)] - [toks (make-token-identifier-mapping)] - [end-toks (make-token-identifier-mapping)] - [nt-ids (syntax->list #'(nt ...))] - [patss (map (lambda (stx) - (map syntax->list (syntax->list stx))) - (syntax->list #'((pat ...) ...)))]) - (for-each (lambda (nt) - (bound-identifier-mapping-put! nts nt (list 0))) - nt-ids) - (for-each (lambda (t) - (token-identifier-mapping-put! end-toks t #t)) - all-end-toks) - (for-each (lambda (t) - (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) - (let ([id (gensym (syntax-e (car t)))]) - (token-identifier-mapping-put! toks (car t) - (cons id (cdr t)))))) - all-toks) - ;; Compute min max size for each non-term: - (nt-fixpoint - nts - (lambda (nt pats old-list) - (let ([new-cnt - (apply - min - (map (lambda (pat) - (apply - + - (map (lambda (elem) - (car - (bound-identifier-mapping-get nts - elem - (lambda () (list 1))))) - pat))) - pats))]) - (if (new-cnt . > . (car old-list)) - (cons new-cnt (cdr old-list)) - old-list))) - nt-ids patss) - ;; Compute set of toks that must appear at the beginning - ;; for a non-terminal - (nt-fixpoint - nts - (lambda (nt pats old-list) - (let ([new-list - (apply - append - (map (lambda (pat) - (let loop ([pat pat]) - (if (pair? pat) - (let ([l (bound-identifier-mapping-get - nts - (car pat) - (lambda () - (list 1 (map-token toks (car pat)))))]) - ;; If the non-terminal can match 0 things, - ;; then it might match something from the - ;; next pattern element. Otherwise, it must - ;; match the first element: - (if (zero? (car l)) - (append (cdr l) (loop (cdr pat))) - (cdr l))) - null))) - pats))]) - (let ([new (filter (lambda (id) - (andmap (lambda (id2) - (not (eq? id id2))) - (cdr old-list))) - new-list)]) - (if (pair? new) - ;; Drop dups in new list: - (let ([new (let loop ([new new]) - (if (null? (cdr new)) - new - (if (ormap (lambda (id) - (eq? (car new) id)) - (cdr new)) - (loop (cdr new)) - (cons (car new) (loop (cdr new))))))]) - (cons (car old-list) (append new (cdr old-list)))) - old-list)))) - nt-ids patss) - ;; Determine left-recursive clauses: - (for-each (lambda (nt pats) - (let ([l (bound-identifier-mapping-get nts nt)]) - (bound-identifier-mapping-put! nts nt (list (car l) - (cdr l) - (map (lambda (x) #f) pats))))) - nt-ids patss) - (nt-fixpoint - nts - (lambda (nt pats old-list) - (list (car old-list) - (cadr old-list) - (map (lambda (pat simple?) - (or simple? - (let ([l (map (lambda (elem) - (bound-identifier-mapping-get - nts - elem - (lambda () #f))) - pat)]) - (andmap (lambda (i) - (or (not i) - (andmap values (caddr i)))) - l)))) - pats (caddr old-list)))) - nt-ids patss) - ;; Build a definition for each non-term: - (loop (cdr clauses) - cfg-start - (map (lambda (nt pats handles $ctxs) - (define info (bound-identifier-mapping-get nts nt)) - (list nt - #`(let ([key (gensym '#,nt)]) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - (parse-nt/share - key #,(car info) '#,(cadr info) stream last-consumed-token depth end - max-depth tasks - success-k fail-k - (lambda (end max-depth tasks success-k fail-k) - #,(let loop ([pats pats] - [handles (syntax->list handles)] - [$ctxs (syntax->list $ctxs)] - [simple?s (caddr info)]) - (if (null? pats) - #'(fail-k max-depth tasks) - #`(#,(if (or (null? (cdr pats)) - (car simple?s)) - #'parse-or - #'parse-parallel-or) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - #,(build-match nts - toks - (car pats) - (car handles) - (car $ctxs))) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - #,(loop (cdr pats) - (cdr handles) - (cdr $ctxs) - (cdr simple?s))) - stream last-consumed-token depth end success-k fail-k max-depth tasks))))))))) - nt-ids - patss - (syntax->list #'(((begin handle0 handle ...) ...) ...)) - (syntax->list #'((handle0 ...) ...))) - cfg-error - src-pos? - (list* - (with-syntax ([((tok tok-id . $e) ...) - (token-identifier-mapping-map toks - (lambda (k v) - (list* k - (car v) - (if (cdr v) - #f - '$1))))] - [(pos ...) - (if src-pos? - #'($1-start-pos $1-end-pos) - #'(#f #f))] - ;; rename `start` and `atok` to temp ids - ;; so that "start" and "atok" can be used as literal string tokens in a grammar. - ;; not sure why this works, but it passes all tests. - [%start start-id-temp] - [%atok atok-id-temp]) - #`(grammar (%start [() null] - [(%atok %start) (cons $1 $2)]) - (%atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) - (with-syntax ([%start start-id-temp]) - #`(start %start)) - parser-clauses)))] - [(grammar . _) - (raise-syntax-error - #f - "bad grammar clause" - stx - (car clauses))] - [(src-pos) - (loop (cdr clauses) - cfg-start - cfg-grammar - cfg-error - #t - (cons (car clauses) parser-clauses))] - [_else - (loop (cdr clauses) - cfg-start - cfg-grammar - cfg-error - src-pos? - (cons (car clauses) parser-clauses))]))))]) - #`(let ([orig-parse (parser - [error (lambda (a b c) - (error 'cfg-parser "unexpected ~a token: ~a" b c))] - . #,parser-clauses)] - [error-proc #,cfg-error]) - (letrec #,grammar - (lambda (get-tok) - (let ([tok-list (orig-parse get-tok)]) - (letrec ([success-k - (lambda (val stream last-consumed-token depth max-depth tasks next) - (if (null? stream) - val - (next success-k fail-k max-depth tasks)))] - [fail-k (lambda (max-depth tasks) - (cond - [(null? tok-list) - (if error-proc - (error-proc #t - 'no-tokens - #f - (make-position #f #f #f) - (make-position #f #f #f)) - (error - 'cfg-parse - "no tokens"))] - [else - (let ([bad-tok (list-ref tok-list - (min (sub1 (length tok-list)) - max-depth))]) - (if error-proc - (error-proc #t - (tok-orig-name bad-tok) - (tok-val bad-tok) - (tok-start bad-tok) - (tok-end bad-tok)) - (error - 'cfg-parse - "failed at ~a" - (tok-val bad-tok))))]))]) - (#,start tok-list - ;; we simulate a token at the very beginning with zero width - ;; for use with the position-generating code (*-start-pos, *-end-pos). - (if (null? tok-list) - (tok #f #f #f - (position 1 - #,(if src-pos? #'1 #'#f) - #,(if src-pos? #'0 #'#f)) - (position 1 - #,(if src-pos? #'1 #'#f) - #,(if src-pos? #'0 #'#f))) - (tok (tok-name (car tok-list)) - (tok-orig-name (car tok-list)) - (tok-val (car tok-list)) - (tok-start (car tok-list)) - (tok-start (car tok-list)))) - 0 - (length tok-list) - success-k - fail-k - 0 - (make-tasks null null - (make-hasheq) (make-hasheq) - (make-hash) #t)))))))))])) - - -(module* test racket/base - (require (submod "..") - br-parser-tools/lex - racket/block - rackunit) - - ;; Test: parsing regular expressions. - ;; Here is a test case on locations: - (block - (define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF)) - (define lex (lexer-src-pos ["|" (token-OR lexeme)] - ["^" (token-ANCHOR lexeme)] - ["*" (token-STAR lexeme)] - [(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)] - ["(" (token-LPAREN lexeme)] - [")" (token-RPAREN lexeme)] - [whitespace (return-without-pos (lex input-port))] - [(eof) (token-EOF 'eof)])) - (define -parse (cfg-parser - (tokens regexp-tokens) - (start top) - (end EOF) - (src-pos) - (grammar [top [(maybe-anchor regexp) - (cond [$1 - `(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] - [else - `(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]] - [maybe-anchor [(ANCHOR) #t] - [() #f]] - [regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] - [(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] - [(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] - [(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]]))) - (define (pos->sexp pos) - (position-offset pos)) - - (define (parse s) - (define ip (open-input-string s)) - (port-count-lines! ip) - (-parse (lambda () (lex ip)))) - - (check-equal? (parse "abc") - '(unanchored (lit "abc" 1 4) 1 4)) - (check-equal? (parse "a | (b*) | c") - '(unanchored (or (or (lit "a" 1 2) - (group (star (lit "b" 6 7) 6 8) 5 9) - 1 9) - (lit "c" 12 13) - 1 13) - 1 13))) - - - - - - ;; Tests used during development - (define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF)) - - (define lex - (lexer - ["+" (token-PLUS '+)] - ["-" (token-MINUS '-)] - ["*" (token-STAR '*)] - ["|" (token-BAR '||)] - [":" (token-COLON '|:|)] - [whitespace (lex input-port)] - [(eof) (token-EOF 'eof)])) - - (define parse - (cfg-parser - (tokens non-terminals) - (start ) - (end EOF) - (error (lambda (a b stx) - (error 'parse "failed at ~s" stx))) - (grammar [ [(PLUS) "plus"] - [( BAR ) (list $1 $2 $3)] - [( COLON) (list $1)]] - [ [(MINUS) "minus"] - [( STAR) (cons $1 $2)]] - [ [( MINUS) "yes"]] - [ [(PLUS) 'plus] - [(MINUS) 'minus]] - [ [() '0] - [( PLUS) (add1 $1)] - [( PLUS) (add1 $1)]]))) - - (let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**" - #;"+*|+**|-" #;"-|-*|-|-*" - #;"-|-*|-|-**|-|-*|-|-**" - "-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-*** - |-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****| - -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****" - ;; This one fails: - #;"+*")]) - (check-equal? (parse (lambda () (lex p))) - '((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *) - || - (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *)) - . - *) - || - (((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *) - || - (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *)) - . - *))))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/calc.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/calc.rkt deleted file mode 100644 index 9ad1218..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/calc.rkt +++ /dev/null @@ -1,89 +0,0 @@ -#lang scheme - -;; An interactive calculator inspired by the calculator example in the bison manual. - - -;; Import the parser and lexer generators. -(require br-parser-tools/yacc - br-parser-tools/lex - (prefix-in : br-parser-tools/lex-sre)) - -(define-tokens value-tokens (NUM VAR FNCT)) -(define-empty-tokens op-tokens (newline = OP CP + - * / ^ EOF NEG)) - -;; A hash table to store variable values in for the calculator -(define vars (make-hash)) - -(define-lex-abbrevs - (lower-letter (:/ "a" "z")) - - (upper-letter (:/ #\A #\Z)) - - ;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too. - (digit (:/ "0" "9"))) - -(define calcl - (lexer - [(eof) 'EOF] - ;; recursively call the lexer on the remaining input after a tab or space. Returning the - ;; result of that operation. This effectively skips all whitespace. - [(:or #\tab #\space) (calcl input-port)] - ;; (token-newline) returns 'newline - [#\newline (token-newline)] - ;; Since (token-=) returns '=, just return the symbol directly - [(:or "=" "+" "-" "*" "/" "^") (string->symbol lexeme)] - ["(" 'OP] - [")" 'CP] - ["sin" (token-FNCT sin)] - [(:+ (:or lower-letter upper-letter)) (token-VAR (string->symbol lexeme))] - [(:+ digit) (token-NUM (string->number lexeme))] - [(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))])) - - -(define calcp - (parser - - (start start) - (end newline EOF) - (tokens value-tokens op-tokens) - (error (lambda (a b c) (void))) - - (precs (right =) - (left - +) - (left * /) - (left NEG) - (right ^)) - - (grammar - - (start [() #f] - ;; If there is an error, ignore everything before the error - ;; and try to start over right after the error - [(error start) $2] - [(exp) $1]) - - (exp [(NUM) $1] - [(VAR) (hash-ref vars $1 (lambda () 0))] - [(VAR = exp) (begin (hash-set! vars $1 $3) - $3)] - [(FNCT OP exp CP) ($1 $3)] - [(exp + exp) (+ $1 $3)] - [(exp - exp) (- $1 $3)] - [(exp * exp) (* $1 $3)] - [(exp / exp) (/ $1 $3)] - [(- exp) (prec NEG) (- $2)] - [(exp ^ exp) (expt $1 $3)] - [(OP exp CP) $2])))) - -;; run the calculator on the given input-port -(define (calc ip) - (port-count-lines! ip) - (letrec ((one-line - (lambda () - (let ((result (calcp (lambda () (calcl ip))))) - (when result - (printf "~a\n" result) - (one-line)))))) - (one-line))) - -(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/read.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/read.rkt deleted file mode 100644 index a10b2c1..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/read.rkt +++ /dev/null @@ -1,242 +0,0 @@ -;; This implements the equivalent of racket's read-syntax for R5RS scheme. -;; It has not been thoroughly tested. Also it will read an entire file into a -;; list of syntax objects, instead of returning one syntax object at a time - -(module read mzscheme - - (require br-parser-tools/lex - (prefix : br-parser-tools/lex-sre) - br-parser-tools/yacc - syntax/readerr) - - (define-tokens data (DATUM)) - (define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF)) - - (define scheme-lexer - (lexer-src-pos - - ;; Skip comments, without accumulating extra position information - [(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))] - - ["#t" (token-DATUM #t)] - ["#f" (token-DATUM #f)] - [(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))] - ["#\\space" (token-DATUM #\space)] - ["#\\newline" (token-DATUM #\newline)] - [(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))] - [#\" (token-DATUM (list->string (get-string-token input-port)))] - [#\( 'OP] - [#\) 'CP] - [#\[ 'OP] - [#\] 'CP] - ["#(" 'HASHOP] - [num2 (token-DATUM (string->number lexeme 2))] - [num8 (token-DATUM (string->number lexeme 8))] - [num10 (token-DATUM (string->number lexeme 10))] - [num16 (token-DATUM (string->number lexeme 16))] - ["'" 'QUOTE] - ["`" 'QUASIQUOTE] - ["," 'UNQUOTE] - [",@" 'UNQUOTE-SPLICING] - ["." 'DOT] - [(eof) 'EOF])) - - (define get-string-token - (lexer - [(:~ #\" #\\) (cons (car (string->list lexeme)) - (get-string-token input-port))] - [(:: #\\ #\\) (cons #\\ (get-string-token input-port))] - [(:: #\\ #\") (cons #\" (get-string-token input-port))] - [#\" null])) - - - (define-lex-abbrevs - [letter (:or (:/ "a" "z") (:/ #\A #\Z))] - [digit (:/ #\0 #\9)] - [scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)] - [initial (:or letter (char-set "!$%&*/:<=>?^_~@"))] - [subsequent (:or initial digit (char-set "+-.@"))] - [comment (:: #\; (:* (:~ #\newline)) #\newline)] - - - ;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of - ;; using regexp macros to avoid the cut and paste. - ; [numR (:: prefixR complexR)] - ; [complexR (:or realR - ; (:: realR "@" realR) - ; (:: realR "+" urealR "i") - ; (:: realR "-" urealR "i") - ; (:: realR "+i") - ; (:: realR "-i") - ; (:: "+" urealR "i") - ; (:: "-" urealR "i") - ; (:: "+i") - ; (:: "-i"))] - ; [realR (:: sign urealR)] - ; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)] - ; [uintegerR (:: (:+ digitR) (:* #\#))] - ; [prefixR (:or (:: radixR exactness) - ; (:: exactness radixR))] - - [num2 (:: prefix2 complex2)] - [complex2 (:or real2 - (:: real2 "@" real2) - (:: real2 "+" ureal2 "i") - (:: real2 "-" ureal2 "i") - (:: real2 "+i") - (:: real2 "-i") - (:: "+" ureal2 "i") - (:: "-" ureal2 "i") - (:: "+i") - (:: "-i"))] - [real2 (:: sign ureal2)] - [ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))] - [uinteger2 (:: (:+ digit2) (:* #\#))] - [prefix2 (:or (:: radix2 exactness) - (:: exactness radix2))] - [radix2 "#b"] - [digit2 (:or "0" "1")] - [num8 (:: prefix8 complex8)] - [complex8 (:or real8 - (:: real8 "@" real8) - (:: real8 "+" ureal8 "i") - (:: real8 "-" ureal8 "i") - (:: real8 "+i") - (:: real8 "-i") - (:: "+" ureal8 "i") - (:: "-" ureal8 "i") - (:: "+i") - (:: "-i"))] - [real8 (:: sign ureal8)] - [ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))] - [uinteger8 (:: (:+ digit8) (:* #\#))] - [prefix8 (:or (:: radix8 exactness) - (:: exactness radix8))] - [radix8 "#o"] - [digit8 (:/ "0" "7")] - - [num10 (:: prefix10 complex10)] - [complex10 (:or real10 - (:: real10 "@" real10) - (:: real10 "+" ureal10 "i") - (:: real10 "-" ureal10 "i") - (:: real10 "+i") - (:: real10 "-i") - (:: "+" ureal10 "i") - (:: "-" ureal10 "i") - (:: "+i") - (:: "-i"))] - [real10 (:: sign ureal10)] - [ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)] - [uinteger10 (:: (:+ digit10) (:* #\#))] - [prefix10 (:or (:: radix10 exactness) - (:: exactness radix10))] - [radix10 (:? "#d")] - [digit10 digit] - [decimal10 (:or (:: uinteger10 suffix) - (:: #\. (:+ digit10) (:* #\#) suffix) - (:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix) - (:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))] - - [num16 (:: prefix16 complex16)] - [complex16 (:or real16 - (:: real16 "@" real16) - (:: real16 "+" ureal16 "i") - (:: real16 "-" ureal16 "i") - (:: real16 "+i") - (:: real16 "-i") - (:: "+" ureal16 "i") - (:: "-" ureal16 "i") - "+i" - "-i")] - [real16 (:: sign ureal16)] - [ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))] - [uinteger16 (:: (:+ digit16) (:* #\#))] - [prefix16 (:or (:: radix16 exactness) - (:: exactness radix16))] - [radix16 "#x"] - [digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))] - - - [suffix (:or "" (:: exponent-marker sign (:+ digit10)))] - [exponent-marker (:or "e" "s" "f" "d" "l")] - [sign (:or "" "+" "-")] - [exactness (:or "" "#i" "#e")]) - - - (define stx-for-original-property (read-syntax #f (open-input-string "original"))) - - ;; A macro to build the syntax object - (define-syntax (build-so stx) - (syntax-case stx () - ((_ value start end) - (with-syntax ((start-pos (datum->syntax-object - (syntax end) - (string->symbol - (format "$~a-start-pos" - (syntax-object->datum (syntax start)))))) - (end-pos (datum->syntax-object - (syntax end) - (string->symbol - (format "$~a-end-pos" - (syntax-object->datum (syntax end)))))) - (source (datum->syntax-object - (syntax end) - 'source-name))) - (syntax - (datum->syntax-object - #f - value - (list source - (position-line start-pos) - (position-col start-pos) - (position-offset start-pos) - (- (position-offset end-pos) - (position-offset start-pos))) - stx-for-original-property)))))) - - (define (scheme-parser source-name) - (parser - (src-pos) - - (start s) - (end EOF) - (error (lambda (a name val start end) - (raise-read-error - "read-error" - source-name - (position-line start) - (position-col start) - (position-offset start) - (- (position-offset end) - (position-offset start))))) - (tokens data delim) - - - (grammar - - (s [(sexp-list) (reverse $1)]) - - (sexp [(DATUM) (build-so $1 1 1)] - [(OP sexp-list CP) (build-so (reverse $2) 1 3)] - [(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)] - [(QUOTE sexp) (build-so (list 'quote $2) 1 2)] - [(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)] - [(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)] - [(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)] - [(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)]) - - (sexp-list [() null] - [(sexp-list sexp) (cons $2 $1)])))) - - (define (rs sn ip) - (port-count-lines! ip) - ((scheme-parser sn) (lambda () (scheme-lexer ip)))) - - (define readsyntax - (case-lambda ((sn) (rs sn (current-input-port))) - ((sn ip) (rs sn ip)))) - - (provide (rename readsyntax read-syntax)) - - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/info.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/info.rkt deleted file mode 100644 index ae66609..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/info.rkt +++ /dev/null @@ -1,3 +0,0 @@ -#lang info - -(define compile-omit-paths '("private-lex/error-tests.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt deleted file mode 100644 index 0cbb175..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt +++ /dev/null @@ -1,24 +0,0 @@ -(module lex-plt-v200 mzscheme - (require br-parser-tools/lex - (prefix : br-parser-tools/lex-sre)) - - (provide epsilon - ~ - (rename :* *) - (rename :+ +) - (rename :? ?) - (rename :or :) - (rename :& &) - (rename :: @) - (rename :~ ^) - (rename :/ -)) - - (define-lex-trans epsilon - (syntax-rules () - ((_) ""))) - - (define-lex-trans ~ - (syntax-rules () - ((_ re) (complement re))))) - - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-sre.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-sre.rkt deleted file mode 100644 index 820d090..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-sre.rkt +++ /dev/null @@ -1,119 +0,0 @@ -(module lex-sre mzscheme - (require br-parser-tools/lex) - - (provide (rename sre-* *) - (rename sre-+ +) - ? - (rename sre-= =) - (rename sre->= >=) - ** - (rename sre-or or) - : - seq - & - ~ - (rename sre-- -) - (rename sre-/ /) - /-only-chars) - - (define-lex-trans sre-* - (syntax-rules () - ((_ re ...) - (repetition 0 +inf.0 (union re ...))))) - - (define-lex-trans sre-+ - (syntax-rules () - ((_ re ...) - (repetition 1 +inf.0 (union re ...))))) - - (define-lex-trans ? - (syntax-rules () - ((_ re ...) - (repetition 0 1 (union re ...))))) - - (define-lex-trans sre-= - (syntax-rules () - ((_ n re ...) - (repetition n n (union re ...))))) - - (define-lex-trans sre->= - (syntax-rules () - ((_ n re ...) - (repetition n +inf.0 (union re ...))))) - - (define-lex-trans ** - (syntax-rules () - ((_ low #f re ...) - (** low +inf.0 re ...)) - ((_ low high re ...) - (repetition low high (union re ...))))) - - (define-lex-trans sre-or - (syntax-rules () - ((_ re ...) - (union re ...)))) - - (define-lex-trans : - (syntax-rules () - ((_ re ...) - (concatenation re ...)))) - - (define-lex-trans seq - (syntax-rules () - ((_ re ...) - (concatenation re ...)))) - - (define-lex-trans & - (syntax-rules () - ((_ re ...) - (intersection re ...)))) - - (define-lex-trans ~ - (syntax-rules () - ((_ re ...) - (char-complement (union re ...))))) - - ;; set difference - (define-lex-trans (sre-- stx) - (syntax-case stx () - ((_) - (raise-syntax-error #f - "must have at least one argument" - stx)) - ((_ big-re re ...) - (syntax (& big-re (complement (union re ...))))))) - - (define-lex-trans (sre-/ stx) - (syntax-case stx () - ((_ range ...) - (let ((chars - (apply append (map (lambda (r) - (let ((x (syntax-e r))) - (cond - ((char? x) (list x)) - ((string? x) (string->list x)) - (else - (raise-syntax-error - #f - "not a char or string" - stx - r))))) - (syntax->list (syntax (range ...))))))) - (unless (even? (length chars)) - (raise-syntax-error - #f - "not given an even number of characters" - stx)) - #`(/-only-chars #,@chars))))) - - (define-lex-trans /-only-chars - (syntax-rules () - ((_ c1 c2) - (char-range c1 c2)) - ((_ c1 c2 c ...) - (union (char-range c1 c2) - (/-only-chars c ...))))) - - ) - - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex.rkt deleted file mode 100644 index 6bb9dd4..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex.rkt +++ /dev/null @@ -1,369 +0,0 @@ -#lang racket/base - -;; Provides the syntax used to create lexers and the functions needed to -;; create and use the buffer that the lexer reads from. See docs. - -(require (for-syntax mzlib/list - syntax/stx - syntax/define - syntax/boundmap - "private-lex/util.rkt" - "private-lex/actions.rkt" - "private-lex/front.rkt" - "private-lex/unicode-chars.rkt" - racket/base - racket/promise)) - -(require mzlib/stxparam - syntax/readerr - "private-lex/token.rkt") - -(provide lexer lexer-src-pos lexer-srcloc define-lex-abbrev define-lex-abbrevs define-lex-trans - - ;; Dealing with tokens and related structures - define-tokens define-empty-tokens token-name token-value token? - (struct-out position) - (struct-out position-token) - (struct-out srcloc-token) - - ;; File path for highlighting errors while lexing - file-path - lexer-file-path ;; alternate name - - ;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4. - any-char any-string nothing alphabetic lower-case upper-case title-case - numeric symbolic punctuation graphic whitespace blank iso-control - - ;; A regular expression operator - char-set) - -;; wrap-action: syntax-object src-pos? -> syntax-object -(define-for-syntax (wrap-action action src-loc-style) - (with-syntax ([action-stx - (cond - [(eq? src-loc-style 'lexer-src-pos) - #`(let/ec ret - (syntax-parameterize - ([return-without-pos (make-rename-transformer #'ret)]) - (make-position-token #,action start-pos end-pos)))] - [(eq? src-loc-style 'lexer-srcloc) - #`(let/ec ret - (syntax-parameterize - ([return-without-srcloc (make-rename-transformer #'ret)]) - (make-srcloc-token #,action lexeme-srcloc)))] - [else action])]) - (syntax/loc action - (λ (start-pos-p end-pos-p lexeme-p input-port-p) - (define lexeme-srcloc-p (make-srcloc (object-name input-port-p) - (position-line start-pos-p) - (position-col start-pos-p) - (position-offset start-pos-p) - (and (number? (position-offset end-pos-p)) - (number? (position-offset start-pos-p)) - (- (position-offset end-pos-p) - (position-offset start-pos-p))))) - (syntax-parameterize - ([start-pos (make-rename-transformer #'start-pos-p)] - [end-pos (make-rename-transformer #'end-pos-p)] - [lexeme (make-rename-transformer #'lexeme-p)] - [input-port (make-rename-transformer #'input-port-p)] - [lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)]) - action-stx))))) - -(define-for-syntax (make-lexer-macro caller src-loc-style) - (λ (stx) - (syntax-case stx () - [(_ . RE+ACTS) - (let () - (define spec/re-acts (syntax->list #'RE+ACTS)) - (for/and ([x (in-list spec/re-acts)]) - (syntax-case x () - [(RE ACT) #t] - [else (raise-syntax-error caller "not a regular expression / action pair" stx x)])) - (define eof-act (get-special-action spec/re-acts #'eof #'eof)) - (define spec-act (get-special-action spec/re-acts #'special #'(void))) - (define spec-comment-act (get-special-action spec/re-acts #'special-comment #'#f)) - (define ids (list #'special #'special-comment #'eof)) - (define re-acts (filter (λ (spec/re-act) - (syntax-case spec/re-act () - [((special) act) - (not (ormap - (λ (x) - (and (identifier? #'special) - (module-or-top-identifier=? #'special x))) - ids))] - [_ #t])) spec/re-acts)) - (define names (map (λ (x) (datum->syntax #f (gensym))) re-acts)) - (define acts (map (λ (x) (stx-car (stx-cdr x))) re-acts)) - (define re-actnames (map (λ (re-act name) (list (stx-car re-act) name)) re-acts names)) - (when (null? spec/re-acts) - (raise-syntax-error caller "expected at least one action" stx)) - (define-values (trans start action-names no-look disappeared-uses) (build-lexer re-actnames)) - (when (vector-ref action-names start) ;; Start state is final - (unless (and - ;; All the successor states are final - (vector? (vector-ref trans start)) - (andmap (λ (x) (vector-ref action-names (vector-ref x 2))) - (vector->list (vector-ref trans start))) - ;; Each character has a successor state - (let loop ([check 0] - [nexts (vector->list (vector-ref trans start))]) - (cond - [(null? nexts) #f] - [else - (let ([next (car nexts)]) - (and (= (vector-ref next 0) check) - (let ([next-check (vector-ref next 1)]) - (or (>= next-check max-char-num) - (loop (add1 next-check) (cdr nexts))))))]))) - (eprintf "warning: lexer at ~a can accept the empty string\n" stx))) - (with-syntax ([START-STATE-STX start] - [TRANS-TABLE-STX trans] - [NO-LOOKAHEAD-STX no-look] - [(NAME ...) names] - [(ACT ...) (map (λ (a) (wrap-action a src-loc-style)) acts)] - [(ACT-NAME ...) (vector->list action-names)] - [SPEC-ACT-STX (wrap-action spec-act src-loc-style)] - [HAS-COMMENT-ACT?-STX (if (syntax-e spec-comment-act) #t #f)] - [SPEC-COMMENT-ACT-STX (wrap-action spec-comment-act src-loc-style)] - [EOF-ACT-STX (wrap-action eof-act src-loc-style)]) - (syntax-property - (syntax/loc stx (let ([NAME ACT] ...) - (let ([proc (lexer-body START-STATE-STX - TRANS-TABLE-STX - (vector ACT-NAME ...) - NO-LOOKAHEAD-STX - SPEC-ACT-STX - HAS-COMMENT-ACT?-STX - SPEC-COMMENT-ACT-STX - EOF-ACT-STX)]) - ;; reverse eta to get named procedures: - (λ (port) (proc port))))) - 'disappeared-use disappeared-uses)))]))) - -(define-syntax lexer (make-lexer-macro 'lexer #f)) -(define-syntax lexer-src-pos (make-lexer-macro 'lexer-src-pos 'lexer-src-pos)) -(define-syntax lexer-srcloc (make-lexer-macro 'lexer-srcloc 'lexer-srcloc)) - -(define-syntax (define-lex-abbrev stx) - (syntax-case stx () - [(_ NAME RE) (identifier? #'NAME) - (syntax/loc stx - (define-syntax NAME - (make-lex-abbrev (λ () (quote-syntax RE)))))] - [_ (raise-syntax-error 'define-lex-abbrev "form should be (define-lex-abbrev name re)" stx)])) - -(define-syntax (define-lex-abbrevs stx) - (syntax-case stx () - [(_ . XS) - (with-syntax ([(ABBREV ...) (map - (λ (a) - (syntax-case a () - [(NAME RE) (identifier? #'NAME) - (syntax/loc a (define-lex-abbrev NAME RE))] - [_ (raise-syntax-error - #f - "form should be (define-lex-abbrevs (name re) ...)" - stx - a)])) - (syntax->list #'XS))]) - (syntax/loc stx (begin ABBREV ...)))] - [_ (raise-syntax-error #f "form should be (define-lex-abbrevs (name re) ...)" stx)])) - -(define-syntax (define-lex-trans stx) - (syntax-case stx () - [(_ name-form body-form) - (let-values (((name body) - (normalize-definition #'(define-syntax name-form body-form) #'λ))) - - #`(define-syntax #,name - (let ((func #,body)) - (unless (procedure? func) - (raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func)) - (unless (procedure-arity-includes? func 1) - (raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func)) - (make-lex-trans func))))] - [_ - (raise-syntax-error - #f - "form should be (define-lex-trans name transformer)" - stx)])) - - -(define (get-next-state-helper char min max table) - (cond - [(>= min max) #f] - [else - (define try (quotient (+ min max) 2)) - (define el (vector-ref table try)) - (define r1 (vector-ref el 0)) - (define r2 (vector-ref el 1)) - (cond - [(and (>= char r1) (<= char r2)) (vector-ref el 2)] - [(< char r1) (get-next-state-helper char min try table)] - [else (get-next-state-helper char (add1 try) max table)])])) - - - - -(define (get-next-state char table) - (and table (get-next-state-helper char 0 (vector-length table) table))) - -(define (lexer-body start-state trans-table actions no-lookahead special-action - has-special-comment-action? special-comment-action eof-action) - (letrec ([lexer - (λ (ip) - (let ((first-pos (get-position ip)) - (first-char (peek-char-or-special ip 0))) - ;(printf "(peek-char-or-special port 0) = ~e\n" first-char) - (cond - [(eof-object? first-char) - (do-match ip first-pos eof-action (read-char-or-special ip))] - [(special-comment? first-char) - (read-char-or-special ip) - (cond - (has-special-comment-action? - (do-match ip first-pos special-comment-action #f)) - (else (lexer ip)))] - [(not (char? first-char)) - (do-match ip first-pos special-action (read-char-or-special ip))] - [else - (let lexer-loop ( - ;; current-state - (state start-state) - ;; the character to transition on - (char first-char) - ;; action for the longest match seen thus far - ;; including a match at the current state - (longest-match-action - (vector-ref actions start-state)) - ;; how many bytes precede char - (length-bytes 0) - ;; how many characters have been read - ;; including the one just read - (length-chars 1) - ;; how many characters are in the longest match - (longest-match-length 0)) - (let ([next-state - (cond - [(not (char? char)) #f] - [else (get-next-state (char->integer char) - (vector-ref trans-table state))])]) - (cond - [(not next-state) - (check-match ip first-pos longest-match-length - length-chars longest-match-action)] - [(vector-ref no-lookahead next-state) - (let ((act (vector-ref actions next-state))) - (check-match ip - first-pos - (if act length-chars longest-match-length) - length-chars - (if act act longest-match-action)))] - [else - (let* ([act (vector-ref actions next-state)] - [next-length-bytes (+ (char-utf-8-length char) length-bytes)] - [next-char (peek-char-or-special ip next-length-bytes)]) - #;(printf "(peek-char-or-special port ~e) = ~e\n" - next-length-bytes next-char) - (lexer-loop next-state - next-char - (if act - act - longest-match-action) - next-length-bytes - (add1 length-chars) - (if act - length-chars - longest-match-length)))])))])))]) - (λ (ip) - (unless (input-port? ip) - (raise-argument-error 'lexer "input-port?" 0 ip)) - (lexer ip)))) - -(define (check-match lb first-pos longest-match-length length longest-match-action) - (unless longest-match-action - (let* ([match (read-string length lb)] - [end-pos (get-position lb)]) - (raise-read-error - (format "lexer: No match found in input starting with: ~a" match) - (file-path) - (position-line first-pos) - (position-col first-pos) - (position-offset first-pos) - (- (position-offset end-pos) (position-offset first-pos))))) - (let ([match (read-string longest-match-length lb)]) - ;(printf "(read-string ~e port) = ~e\n" longest-match-length match) - (do-match lb first-pos longest-match-action match))) - -(define file-path (make-parameter #f)) -(define lexer-file-path file-path) - -(define (do-match ip first-pos action value) - #;(printf "(action ~a ~a ~a ~a)\n" - (position-offset first-pos) (position-offset (get-position ip)) value ip) - (action first-pos (get-position ip) value ip)) - -(define (get-position ip) - (define-values (line col off) (port-next-location ip)) - (make-position off line col)) - -(define-syntax (create-unicode-abbrevs stx) - (syntax-case stx () - [(_ CTXT) - (with-syntax ([(RANGES ...) (for/list ([range (in-list (list (force alphabetic-ranges) - (force lower-case-ranges) - (force upper-case-ranges) - (force title-case-ranges) - (force numeric-ranges) - (force symbolic-ranges) - (force punctuation-ranges) - (force graphic-ranges) - (force whitespace-ranges) - (force blank-ranges) - (force iso-control-ranges)))]) - `(union ,@(map (λ (x) - `(char-range ,(integer->char (car x)) - ,(integer->char (cdr x)))) - range)))] - [(NAMES ...) (for/list ([sym (in-list '(alphabetic - lower-case - upper-case - title-case - numeric - symbolic - punctuation - graphic - whitespace - blank - iso-control))]) - (datum->syntax #'CTXT sym #f))]) - #'(define-lex-abbrevs (NAMES RANGES) ...))])) - -(define-lex-abbrev any-char (char-complement (union))) -(define-lex-abbrev any-string (intersection)) -(define-lex-abbrev nothing (union)) -(create-unicode-abbrevs #'here) - -(define-lex-trans (char-set stx) - (syntax-case stx () - [(_ STR) - (string? (syntax-e #'STR)) - (with-syntax ([(CHAR ...) (string->list (syntax-e #'STR))]) - #'(union CHAR ...))])) - -(define-syntax provide-lex-keyword - (syntax-rules () - [(_ ID ...) - (begin - (define-syntax-parameter ID - (make-set!-transformer - (λ (stx) - (raise-syntax-error - 'provide-lex-keyword - (format "use of a lexer keyword (~a) is not in an appropriate lexer action" 'ID) - stx)))) - ... - (provide ID ...))])) - -(provide-lex-keyword start-pos end-pos lexeme lexeme-srcloc input-port return-without-pos return-without-srcloc) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt deleted file mode 100644 index 13f982c..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt +++ /dev/null @@ -1,16 +0,0 @@ -#lang racket/base - -(provide (all-defined-out)) -(require syntax/stx) - -;; get-special-action: (syntax-object list) syntax-object syntax-object -> syntax-object -;; Returns the first action from a rule of the form ((which-special) action) -(define (get-special-action rules which-special none) - (cond - ((null? rules) none) - (else - (syntax-case (car rules) () - [((special) ACT) - (and (identifier? #'special) (module-or-top-identifier=? (syntax special) which-special)) - #'ACT] - [_ (get-special-action (cdr rules) which-special none)])))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt deleted file mode 100644 index 28919a3..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt +++ /dev/null @@ -1,339 +0,0 @@ -(module deriv mzscheme - - (require mzlib/list - (prefix is: mzlib/integer-set) - "re.rkt" - "util.rkt") - - (provide build-dfa print-dfa (struct dfa (num-states start-state final-states/actions transitions))) - - (define e (build-epsilon)) - (define z (build-zero)) - - - ;; Don't do anything with this one but extract the chars - (define all-chars (->re `(char-complement (union)) (make-cache))) - - ;; get-char-groups : re bool -> (list-of char-setR?) - ;; Collects the char-setRs in r that could be used in - ;; taking the derivative of r. - (define (get-char-groups r found-negation) - (cond - ((or (eq? r e) (eq? r z)) null) - ((char-setR? r) (list r)) - ((concatR? r) - (if (re-nullable? (concatR-re1 r)) - (append (get-char-groups (concatR-re1 r) found-negation) - (get-char-groups (concatR-re2 r) found-negation)) - (get-char-groups (concatR-re1 r) found-negation))) - ((repeatR? r) - (get-char-groups (repeatR-re r) found-negation)) - ((orR? r) - (apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r)))) - ((andR? r) - (apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r)))) - ((negR? r) - (if found-negation - (get-char-groups (negR-re r) #t) - (cons all-chars (get-char-groups (negR-re r) #t)))))) - - (test-block ((c (make-cache)) - (r1 (->re #\1 c)) - (r2 (->re #\2 c))) - ((get-char-groups e #f) null) - ((get-char-groups z #f) null) - ((get-char-groups r1 #f) (list r1)) - ((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f) - (list r1)) - ((get-char-groups (->re `(concatenation ,e ,r2) c) #f) - (list r2)) - ((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f) - (list r1 r2)) - ((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f) - (list r1)) - ((get-char-groups - (->re `(union (repetition 0 +inf.0 ,r1) - (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) - (list r1 r2 (->re "3" c) (->re "4" c))) - ((get-char-groups (->re `(complement ,r1) c) #f) - (list all-chars r1)) - ((get-char-groups - (->re `(intersection (repetition 0 +inf.0 ,r1) - (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) - (list r1 r2 (->re "3" c) (->re "4" c))) - ) - (define loc:member? is:member?) - - ;; deriveR : re char cache -> re - (define (deriveR r c cache) - (cond - ((or (eq? r e) (eq? r z)) z) - ((char-setR? r) - (if (loc:member? c (char-setR-chars r)) e z)) - ((concatR? r) - (let* ((r1 (concatR-re1 r)) - (r2 (concatR-re2 r)) - (d (build-concat (deriveR r1 c cache) r2 cache))) - (if (re-nullable? r1) - (build-or (list d (deriveR r2 c cache)) cache) - d))) - ((repeatR? r) - (build-concat (deriveR (repeatR-re r) c cache) - (build-repeat (sub1 (repeatR-low r)) - (sub1 (repeatR-high r)) - (repeatR-re r) cache) - cache)) - ((orR? r) - (build-or (map (lambda (x) (deriveR x c cache)) - (orR-res r)) - cache)) - ((andR? r) - (build-and (map (lambda (x) (deriveR x c cache)) - (andR-res r)) - cache)) - ((negR? r) - (build-neg (deriveR (negR-re r) c cache) cache)))) - - (test-block ((c (make-cache)) - (a (char->integer #\a)) - (b (char->integer #\b)) - (r1 (->re #\a c)) - (r2 (->re `(repetition 0 +inf.0 #\a) c)) - (r3 (->re `(repetition 0 +inf.0 ,r2) c)) - (r4 (->re `(concatenation #\a ,r2) c)) - (r5 (->re `(repetition 0 +inf.0 ,r4) c)) - (r6 (->re `(union ,r5 #\a) c)) - (r7 (->re `(concatenation ,r2 ,r2) c)) - (r8 (->re `(complement ,r4) c)) - (r9 (->re `(intersection ,r2 ,r4) c))) - ((deriveR e a c) z) - ((deriveR z a c) z) - ((deriveR r1 b c) z) - ((deriveR r1 a c) e) - ((deriveR r2 a c) r2) - ((deriveR r2 b c) z) - ((deriveR r3 a c) r2) - ((deriveR r3 b c) z) - ((deriveR r4 a c) r2) - ((deriveR r4 b c) z) - ((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c)) - ((deriveR r5 b c) z) - ((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c)) - ((deriveR r6 b c) z) - ((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c)) - ((deriveR r7 b c) z) - ((deriveR r8 a c) (->re `(complement, r2) c)) - ((deriveR r8 b c) (->re `(complement ,z) c)) - ((deriveR r9 a c) r2) - ((deriveR r9 b c) z) - ((deriveR (->re `(repetition 1 2 "ab") c) a c) - (->re `(concatenation "b" (repetition 0 1 "ab")) c))) - - ;; An re-action is (cons re action) - - ;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f) - ;; applies deriveR to all the re-actions's re parts. - ;; Returns #f if the derived state is equivalent to z. - (define (derive r c cache) - (let ((new-r (map (lambda (ra) - (cons (deriveR (car ra) c cache) (cdr ra))) - r))) - (if (andmap (lambda (x) (eq? z (car x))) - new-r) - #f - new-r))) - - (test-block ((c (make-cache)) - (r1 (->re #\1 c)) - (r2 (->re #\2 c))) - ((derive null (char->integer #\1) c) #f) - ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c) - (list (cons e 1) (cons z 2))) - ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f)) - - - ;; get-final : (list-of re-action) -> (union #f syntax-object) - ;; An re that accepts e represents a final state. Return the - ;; action from the first final state or #f if there is none. - (define (get-final res) - (cond - ((null? res) #f) - ((re-nullable? (caar res)) (cdar res)) - (else (get-final (cdr res))))) - - (test-block ((c->i char->integer) - (c (make-cache)) - (r1 (->re #\a c)) - (r2 (->re #\b c)) - (b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5))) - (a (list (cons r1 1) (cons r2 2)))) - ((derive null (c->i #\a) c) #f) - ((derive a (c->i #\a) c) (list (cons e 1) (cons z 2))) - ((derive a (c->i #\b) c) (list (cons z 1) (cons e 2))) - ((derive a (c->i #\c) c) #f) - ((derive (list (cons (->re `(union " " "\n" ",") c) 1) - (cons (->re `(concatenation (repetition 0 1 "-") - (repetition 1 +inf.0 (char-range "0" "9"))) c) 2) - (cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3) - (cons (->re "[" c) 4) - (cons (->re "]" c) 5)) (c->i #\[) c) - b) - ((get-final a) #f) - ((get-final (list (cons e 1) (cons e 2))) 1) - ((get-final b) 4)) - - - ;; A state is (make-state (list-of re-action) nat) - (define-struct state (spec index)) - - ;; get->key : re-action -> (list-of nat) - ;; states are indexed by the list of indexes of their res - (define (get-key s) - (map (lambda (x) (re-index (car x))) s)) - - (define loc:partition is:partition) - - ;; compute-chars : (list-of state) -> (list-of char-set) - ;; Computed the sets of equivalent characters for taking the - ;; derivative of the car of st. Only one derivative per set need to be taken. - (define (compute-chars st) - (cond - ((null? st) null) - (else - (loc:partition (map char-setR-chars - (apply append (map (lambda (x) (get-char-groups (car x) #f)) - (state-spec (car st))))))))) - - (test-block ((c (make-cache)) - (c->i char->integer) - (r1 (->re `(char-range #\1 #\4) c)) - (r2 (->re `(char-range #\2 #\3) c))) - ((compute-chars null) null) - ((compute-chars (list (make-state null 1))) null) - ((map is:integer-set-contents - (compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2)))) - (list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3))) - (is:integer-set-contents (is:union (is:make-range (c->i #\1)) - (is:make-range (c->i #\4))))))) - - - ;; A dfa is (make-dfa int int - ;; (list-of (cons int syntax-object)) - ;; (list-of (cons int (list-of (cons char-set int))))) - ;; Each transitions is a state and a list of chars with the state to transition to. - ;; The finals and transitions are sorted by state number, and duplicate free. - (define-struct dfa (num-states start-state final-states/actions transitions) (make-inspector)) - - (define loc:get-integer is:get-integer) - - ;; build-dfa : (list-of re-action) cache -> dfa - (define (build-dfa rs cache) - (let* ((transitions (make-hash-table)) - (get-state-number (make-counter)) - (start (make-state rs (get-state-number)))) - (cache (cons 'state (get-key rs)) (lambda () start)) - (let loop ((old-states (list start)) - (new-states null) - (all-states (list start)) - (cs (compute-chars (list start)))) - (cond - ((and (null? old-states) (null? new-states)) - (make-dfa (get-state-number) (state-index start) - (sort (filter (lambda (x) (cdr x)) - (map (lambda (state) - (cons (state-index state) (get-final (state-spec state)))) - all-states)) - (lambda (a b) (< (car a) (car b)))) - (sort (hash-table-map transitions - (lambda (state trans) - (cons (state-index state) - (map (lambda (t) - (cons (car t) - (state-index (cdr t)))) - trans)))) - (lambda (a b) (< (car a) (car b)))))) - ((null? old-states) - (loop new-states null all-states (compute-chars new-states))) - ((null? cs) - (loop (cdr old-states) new-states all-states (compute-chars (cdr old-states)))) - (else - (let* ((state (car old-states)) - (c (car cs)) - (new-re (derive (state-spec state) (loc:get-integer c) cache))) - (cond - (new-re - (let* ((new-state? #f) - (new-state (cache (cons 'state (get-key new-re)) - (lambda () - (set! new-state? #t) - (make-state new-re (get-state-number))))) - (new-all-states (if new-state? (cons new-state all-states) all-states))) - (hash-table-put! transitions - state - (cons (cons c new-state) - (hash-table-get transitions state - (lambda () null)))) - (cond - (new-state? - (loop old-states (cons new-state new-states) new-all-states (cdr cs))) - (else - (loop old-states new-states new-all-states (cdr cs)))))) - (else (loop old-states new-states all-states (cdr cs)))))))))) - - (define (print-dfa x) - (printf "number of states: ~a\n" (dfa-num-states x)) - (printf "start state: ~a\n" (dfa-start-state x)) - (printf "final states: ~a\n" (map car (dfa-final-states/actions x))) - (for-each (lambda (trans) - (printf "state: ~a\n" (car trans)) - (for-each (lambda (rule) - (printf " -~a-> ~a\n" - (is:integer-set-contents (car rule)) - (cdr rule))) - (cdr trans))) - (dfa-transitions x))) - - (define (build-test-dfa rs) - (let ((c (make-cache))) - (build-dfa (map (lambda (x) (cons (->re x c) 'action)) - rs) - c))) - - -#| - (define t1 (build-test-dfa null)) - (define t2 (build-test-dfa `(#\a))) - (define t3 (build-test-dfa `(#\a #\b))) - (define t4 (build-test-dfa `((repetition 0 +inf.0 #\a) - (repetition 0 +inf.0 (concatenation #\a #\b))))) - (define t5 (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\0 #\1)) #\1)))) - (define t6 (build-test-dfa `((repetition 0 +inf.0 (repetition 0 +inf.0 #\a)) - (repetition 0 +inf.0 (concatenation #\b (repetition 1 +inf.0 #\b)))))) - (define t7 (build-test-dfa `((concatenation (repetition 0 +inf.0 #\a) (repetition 0 +inf.0 #\b) - (repetition 0 +inf.0 #\c) (repetition 0 +inf.0 #\d) - (repetition 0 +inf.0 #\e))))) - (define t8 - (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b) - (union #\a #\b) (union #\a #\b) (union #\a #\b))))) - (define t9 (build-test-dfa `((concatenation "/*" - (complement (concatenation (intersection) "*/" (intersection))) - "*/")))) - (define t11 (build-test-dfa `((complement "1")))) - (define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b") - (concatenation "a" (repetition 0 +inf.0 "b"))) - "ab")))) - (define x (build-test-dfa `((union " " "\n" ",") - (concatenation (repetition 0 1 "-") (repetition 1 +inf.0 (char-range "0" "9"))) - (concatenation "-" (repetition 1 +inf.0 "-")) - "[" - "]"))) - (define y (build-test-dfa - `((repetition 1 +inf.0 - (union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|") - (concatenation "|" (repetition 0 +inf.0 (char-complement "|")))))))) - (define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection)) - (complement (union (concatenation (intersection) "01") - (repetition 1 +inf.0 "1"))))))) - (define t14 (build-test-dfa `((complement "1")))) -|# - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt deleted file mode 100644 index bbccbe0..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt +++ /dev/null @@ -1,81 +0,0 @@ -#lang scheme/base -(require (for-syntax scheme/base) - "../lex.rkt" - rackunit) - -(define-syntax (catch-syn-error stx) - (syntax-case stx () - [(_ arg) - (datum->syntax - #'here - (with-handlers ((exn:fail:syntax? exn-message)) - (syntax-local-expand-expression #'arg) - "not-an-error"))])) - -(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev))) -(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev a))) -(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev (a b) v))) -(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev 1 1))) -(check-regexp-match #rx"lex-abbrevs" (catch-syn-error (define-lex-abbrevs ()))) - -(check-regexp-match #rx"lex-trans" (catch-syn-error (define-lex-trans))) - -(check-regexp-match #rx"lexer" (catch-syn-error (lexer))) -(check-regexp-match #rx"lexer" (catch-syn-error (lexer ("a" "b" "c")))) -(check-regexp-match #rx"lexer" (catch-syn-error (lexer ()))) -(check-regexp-match #rx"lexer" (catch-syn-error (lexer ("")))) - -(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (a 1)))) -(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer ((a) 1)))) -(check-regexp-match #rx"regular-expression" (catch-syn-error (let ((a 1)) (lexer ((a) 1))))) - -(check-regexp-match #rx"regular-expression" - (catch-syn-error (let-syntax ((a 1)) - (lexer ((a) 1))))) - -(check-regexp-match #rx"define-lex-trans" - (catch-syn-error - (let () - (define-lex-trans a 1) - (let () - (lexer ((a) 1)))))) - -;; Detecting mutual recursion cycle: -(check-regexp-match #rx"regular-expression" - (catch-syn-error - (let () - (define-lex-abbrev a b) - (define-lex-abbrev b a) - (let () - (lexer (a 1)))))) - -(check-regexp-match #rx"regular-expression" - (catch-syn-error - (let () - (define-lex-abbrev a (repetition 0 1 b)) - (define-lex-abbrev b (repetition 0 1 a)) - (let () - (lexer (a 1)))))) - -;; Detecting cycle within same abbreviation: -(check-regexp-match #rx"regular-expression" - (catch-syn-error - (let () - (define-lex-abbrev balanced - (union (concatenation "(" balanced ")" balanced) - any-char)) - (lexer - [balanced (string-append lexeme (balanced input-port))] - [(eof) ""])))) - - -(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (1 1)))) -(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition) 1)))) -(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition #\1 #\1 "3") 1)))) -(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 #\1 "3") 1)))) -(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 0 "3") 1)))) -(check-regexp-match #rx"complement" (catch-syn-error (lexer ((complement) 1)))) -(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range) 1)))) -(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range #\9 #\0) 1)))) -(check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement) 1)))) -(check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement (concatenation "1" "2")) 1)))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt deleted file mode 100644 index f74c003..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt +++ /dev/null @@ -1,179 +0,0 @@ -(module front mzscheme - (require (prefix is: mzlib/integer-set) - mzlib/list - syntax/stx - "util.rkt" - "stx.rkt" - "re.rkt" - "deriv.rkt") - - (provide build-lexer) - - (define-syntax time-label - (syntax-rules () - ((_ l e ...) - (begin - (printf "~a: " l) - (time (begin e ...)))))) - - ;; A table is either - ;; - (vector-of (union #f nat)) - ;; - (vector-of (vector-of (vector nat nat nat))) - - (define loc:integer-set-contents is:integer-set-contents) - - ;; dfa->1d-table : dfa -> (same as build-lexer) - (define (dfa->1d-table dfa) - (let ((state-table (make-vector (dfa-num-states dfa) #f)) - (transition-cache (make-hash-table 'equal))) - (for-each - (lambda (trans) - (let* ((from-state (car trans)) - (all-chars/to (cdr trans)) - (flat-all-chars/to - (sort - (apply append - (map (lambda (chars/to) - (let ((char-ranges (loc:integer-set-contents (car chars/to))) - (to (cdr chars/to))) - (map (lambda (char-range) - (let ((entry (vector (car char-range) (cdr char-range) to))) - (hash-table-get transition-cache entry - (lambda () - (hash-table-put! transition-cache - entry - entry) - entry)))) - char-ranges))) - all-chars/to)) - (lambda (a b) - (< (vector-ref a 0) (vector-ref b 0)))))) - (vector-set! state-table from-state (list->vector flat-all-chars/to)))) - (dfa-transitions dfa)) - state-table)) - - - (define loc:foldr is:foldr) - - ;; dfa->2d-table : dfa -> (same as build-lexer) - (define (dfa->2d-table dfa) - (let ( - ;; char-table : (vector-of (union #f nat)) - ;; The lexer table, one entry per state per char. - ;; Each entry specifies a state to transition to. - ;; #f indicates no transition - (char-table (make-vector (* 256 (dfa-num-states dfa)) #f))) - - ;; Fill the char-table vector - (for-each - (lambda (trans) - (let ((from-state (car trans))) - (for-each (lambda (chars/to) - (let ((to-state (cdr chars/to))) - (loc:foldr (lambda (char _) - (vector-set! char-table - (bitwise-ior - char - (arithmetic-shift from-state 8)) - to-state)) - (void) - (car chars/to)))) - (cdr trans)))) - (dfa-transitions dfa)) - char-table)) - - - ;; dfa->actions : dfa -> (vector-of (union #f syntax-object)) - ;; The action for each final state, #f if the state isn't final - (define (dfa->actions dfa) - (let ((actions (make-vector (dfa-num-states dfa) #f))) - (for-each (lambda (state/action) - (vector-set! actions (car state/action) (cdr state/action))) - (dfa-final-states/actions dfa)) - actions)) - - ;; dfa->no-look : dfa -> (vector-of bool) - ;; For each state whether the lexer can ignore the next input. - ;; It can do this only if there are no transitions out of the - ;; current state. - (define (dfa->no-look dfa) - (let ((no-look (make-vector (dfa-num-states dfa) #t))) - (for-each (lambda (trans) - (vector-set! no-look (car trans) #f)) - (dfa-transitions dfa)) - no-look)) - - (test-block ((d1 (make-dfa 1 1 (list) (list))) - (d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) - (list (cons 1 (list (cons (is:make-range 49 50) 1) - (cons (is:make-range 51) 2))) - (cons 2 (list (cons (is:make-range 49) 3)))))) - (d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) - (list (cons 1 (list (cons (is:make-range 100 200) 0) - (cons (is:make-range 49 50) 1) - (cons (is:make-range 51) 2))) - (cons 2 (list (cons (is:make-range 49) 3))))))) - ((dfa->2d-table d1) (make-vector 256 #f)) - ((dfa->2d-table d2) (let ((v (make-vector 1024 #f))) - (vector-set! v 305 1) - (vector-set! v 306 1) - (vector-set! v 307 2) - (vector-set! v 561 3) - v)) - ((dfa->1d-table d1) (make-vector 1 #f)) - ((dfa->1d-table d2) #(#f - #(#(49 50 1) #(51 51 2)) - #(#(49 49 3)) - #f)) - ((dfa->1d-table d3) #(#f - #(#(49 50 1) #(51 51 2) #(100 200 0)) - #(#(49 49 3)) - #f)) - ((dfa->actions d1) (vector #f)) - ((dfa->actions d2) (vector #f #f 2 3)) - ((dfa->no-look d1) (vector #t)) - ((dfa->no-look d2) (vector #t #f #f #t))) - - ;; build-lexer : syntax-object list -> - ;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object)) - ;; each syntax object has the form (re action) - (define (build-lexer sos) - (let* ((disappeared-uses (box null)) - (s-re-acts (map (lambda (so) - (cons (parse (stx-car so) disappeared-uses) - (stx-car (stx-cdr so)))) - sos)) - - (cache (make-cache)) - - (re-acts (map (lambda (s-re-act) - (cons (->re (car s-re-act) cache) - (cdr s-re-act))) - s-re-acts)) - - (dfa (build-dfa re-acts cache)) - (table (dfa->1d-table dfa))) - ;(print-dfa dfa) - #;(let ((num-states (vector-length table)) - (num-vectors (length (filter values (vector->list table)))) - (num-entries (apply + (map - (lambda (x) (if x (vector-length x) 0)) - (vector->list table)))) - (num-different-entries - (let ((ht (make-hash-table))) - (for-each - (lambda (x) - (when x - (for-each - (lambda (y) - (hash-table-put! ht y #t)) - (vector->list x)))) - (vector->list table)) - (length (hash-table-map ht cons))))) - (printf "~a states, ~aKB\n" - num-states - (/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries - (* 5 num-different-entries))) 1024))) - (values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa) - (unbox disappeared-uses)))) - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt deleted file mode 100644 index b06c3eb..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt +++ /dev/null @@ -1,385 +0,0 @@ -(module re mzscheme - (require mzlib/list - scheme/match - (prefix is: mzlib/integer-set) - "util.rkt") - - (provide ->re build-epsilon build-zero build-char-set build-concat - build-repeat build-or build-and build-neg - epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR? - char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high - orR-res andR-res negR-re - re-nullable? re-index) - - ;; get-index : -> nat - (define get-index (make-counter)) - - ;; An re is either - ;; - (make-epsilonR bool nat) - ;; - (make-zeroR bool nat) - ;; - (make-char-setR bool nat char-set) - ;; - (make-concatR bool nat re re) - ;; - (make-repeatR bool nat nat nat-or-+inf.0 re) - ;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs - ;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs - ;; - (make-negR bool nat re) - ;; - ;; Every re must have an index field globally different from all - ;; other re index fields. - (define-struct re (nullable? index) (make-inspector)) - (define-struct (epsilonR re) () (make-inspector)) - (define-struct (zeroR re) () (make-inspector)) - (define-struct (char-setR re) (chars) (make-inspector)) - (define-struct (concatR re) (re1 re2) (make-inspector)) - (define-struct (repeatR re) (low high re) (make-inspector)) - (define-struct (orR re) (res) (make-inspector)) - (define-struct (andR re) (res) (make-inspector)) - (define-struct (negR re) (re) (make-inspector)) - - ;; e : re - ;; The unique epsilon re - (define e (make-epsilonR #t (get-index))) - - ;; z : re - ;; The unique zero re - (define z (make-zeroR #f (get-index))) - - - ;; s-re = char constant - ;; | string constant (sequence of characters) - ;; | re a precompiled re - ;; | (repetition low high s-re) repetition between low and high times (inclusive) - ;; | (union s-re ...) - ;; | (intersection s-re ...) - ;; | (complement s-re) - ;; | (concatenation s-re ...) - ;; | (char-range rng rng) match any character between two (inclusive) - ;; | (char-complement char-set) match any character not listed - ;; low = natural-number - ;; high = natural-number or +inf.0 - ;; rng = char or string with length 1 - ;; (concatenation) (repetition 0 0 x), and "" match the empty string. - ;; (union) matches no strings. - ;; (intersection) matches any string. - - (define loc:make-range is:make-range) - (define loc:union is:union) - (define loc:split is:split) - (define loc:complement is:complement) - - ;; ->re : s-re cache -> re - (define (->re exp cache) - (match exp - ((? char?) (build-char-set (loc:make-range (char->integer exp)) cache)) - ((? string?) (->re `(concatenation ,@(string->list exp)) cache)) - ((? re?) exp) - (`(repetition ,low ,high ,r) - (build-repeat low high (->re r cache) cache)) - (`(union ,rs ...) - (build-or (flatten-res (map (lambda (r) (->re r cache)) rs) - orR? orR-res loc:union cache) - cache)) - (`(intersection ,rs ...) - (build-and (flatten-res (map (lambda (r) (->re r cache)) rs) - andR? andR-res (lambda (a b) - (let-values (((i _ __) (loc:split a b))) i)) - cache) - cache)) - (`(complement ,r) - (build-neg (->re r cache) cache)) - (`(concatenation ,rs ...) - (foldr (lambda (x y) - (build-concat (->re x cache) y cache)) - e - rs)) - (`(char-range ,c1 ,c2) - (let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1))) - (i2 (char->integer (if (string? c2) (string-ref c2 0) c2)))) - (if (<= i1 i2) - (build-char-set (loc:make-range i1 i2) cache) - z))) - (`(char-complement ,crs ...) - (let ((cs (->re `(union ,@crs) cache))) - (cond - ((zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)) - ((char-setR? cs) - (build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)) - (else z)))))) - - - - - ;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re)) - ;; (char-set char-set -> char-set) cache -> (list-of re) - ;; Takes all the char-sets in l and combines them into one char-set using the combine function. - ;; Flattens out the values of type?. get-res only needs to function on things type? returns - ;; true for. - (define (flatten-res l type? get-res combine cache) - (let loop ((res l) - ;; chars : (union #f char-set) - (chars #f) - (no-chars null)) - (cond - ((null? res) - (if chars - (cons (build-char-set chars cache) no-chars) - no-chars)) - ((char-setR? (car res)) - (if chars - (loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars) - (loop (cdr res) (char-setR-chars (car res)) no-chars))) - ((type? (car res)) - (loop (append (get-res (car res)) (cdr res)) chars no-chars)) - (else (loop (cdr res) chars (cons (car res) no-chars)))))) - - ;; build-epsilon : -> re - (define (build-epsilon) e) - - (define (build-zero) z) - - (define loc:integer-set-contents is:integer-set-contents) - - ;; build-char-set : char-set cache -> re - (define (build-char-set cs cache) - (let ((l (loc:integer-set-contents cs))) - (cond - ((null? l) z) - (else - (cache l - (lambda () - (make-char-setR #f (get-index) cs))))))) - - - - ;; build-concat : re re cache -> re - (define (build-concat r1 r2 cache) - (cond - ((eq? e r1) r2) - ((eq? e r2) r1) - ((or (eq? z r1) (eq? z r2)) z) - (else - (cache (cons 'concat (cons (re-index r1) (re-index r2))) - (lambda () - (make-concatR (and (re-nullable? r1) (re-nullable? r2)) - (get-index) - r1 r2)))))) - - ;; build-repeat : nat nat-or-+inf.0 re cache -> re - (define (build-repeat low high r cache) - (let ((low (if (< low 0) 0 low))) - (cond - ((eq? r e) e) - ((and (= 0 low) (or (= 0 high) (eq? z r))) e) - ((and (= 1 low) (= 1 high)) r) - ((and (repeatR? r) - (eqv? (repeatR-high r) +inf.0) - (or (= 0 (repeatR-low r)) - (= 1 (repeatR-low r)))) - (build-repeat (* low (repeatR-low r)) - +inf.0 - (repeatR-re r) - cache)) - (else - (cache (cons 'repeat (cons low (cons high (re-index r)))) - (lambda () - (make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r))))))) - - - ;; build-or : (list-of re) cache -> re - (define (build-or rs cache) - (let ((rs - (filter - (lambda (x) (not (eq? x z))) - (do-simple-equiv (replace rs orR? orR-res null) re-index)))) - (cond - ((null? rs) z) - ((null? (cdr rs)) (car rs)) - ((memq (build-neg z cache) rs) (build-neg z cache)) - (else - (cache (cons 'or (map re-index rs)) - (lambda () - (make-orR (ormap re-nullable? rs) (get-index) rs))))))) - - ;; build-and : (list-of re) cache -> re - (define (build-and rs cache) - (let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index))) - (cond - ((null? rs) (build-neg z cache)) - ((null? (cdr rs)) (car rs)) - ((memq z rs) z) - (else - (cache (cons 'and (map re-index rs)) - (lambda () - (make-andR (andmap re-nullable? rs) (get-index) rs))))))) - - ;; build-neg : re cache -> re - (define (build-neg r cache) - (cond - ((negR? r) (negR-re r)) - (else - (cache (cons 'neg (re-index r)) - (lambda () - (make-negR (not (re-nullable? r)) (get-index) r)))))) - - ;; Tests for the build-functions - (test-block ((c (make-cache)) - (isc is:integer-set-contents) - (r1 (build-char-set (is:make-range (char->integer #\1)) c)) - (r2 (build-char-set (is:make-range (char->integer #\2)) c)) - (r3 (build-char-set (is:make-range (char->integer #\3)) c)) - (rc (build-concat r1 r2 c)) - (rc2 (build-concat r2 r1 c)) - (rr (build-repeat 0 +inf.0 rc c)) - (ro (build-or `(,rr ,rc ,rr) c)) - (ro2 (build-or `(,rc ,rr ,z) c)) - (ro3 (build-or `(,rr ,rc) c)) - (ro4 (build-or `(,(build-or `(,r1 ,r2) c) - ,(build-or `(,r2 ,r3) c)) c)) - (ra (build-and `(,rr ,rc ,rr) c)) - (ra2 (build-and `(,rc ,rr) c)) - (ra3 (build-and `(,rr ,rc) c)) - (ra4 (build-and `(,(build-and `(,r3 ,r2) c) - ,(build-and `(,r2 ,r1) c)) c)) - (rn (build-neg z c)) - (rn2 (build-neg r1 c))) - - ((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1)))) - ((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2)))) - ((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3)))) - ((build-char-set (is:make-range) c) z) - ((build-concat r1 e c) r1) - ((build-concat e r1 c) r1) - ((build-concat r1 z c) z) - ((build-concat z r1 c) z) - ((build-concat r1 r2 c) rc) - ((concatR-re1 rc) r1) - ((concatR-re2 rc) r2) - ((concatR-re1 rc2) r2) - ((concatR-re2 rc2) r1) - (ro ro2) - (ro ro3) - (ro4 (build-or `(,r1 ,r2 ,r3) c)) - ((orR-res ro) (list rc rr)) - ((orR-res ro4) (list r1 r2 r3)) - ((build-or null c) z) - ((build-or `(,r1 ,z) c) r1) - ((build-repeat 0 +inf.0 rc c) rr) - ((build-repeat 0 1 z c) e) - ((build-repeat 0 0 rc c) e) - ((build-repeat 0 +inf.0 z c) e) - ((build-repeat -1 +inf.0 z c) e) - ((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c) - (build-repeat 0 +inf.0 rc c)) - ((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c) - (build-repeat 0 +inf.0 rc c)) - ((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c) - (build-repeat 20 +inf.0 rc c)) - ((build-repeat 1 1 rc c) rc) - ((repeatR-re rr) rc) - (ra ra2) - (ra ra3) - (ra4 (build-and `(,r1 ,r2 ,r3) c)) - ((andR-res ra) (list rc rr)) - ((andR-res ra4) (list r1 r2 r3)) - ((build-and null c) (build-neg z c)) - ((build-and `(,r1 ,z) c) z) - ((build-and `(,r1) c) r1) - ((build-neg r1 c) (build-neg r1 c)) - ((build-neg (build-neg r1 c) c) r1) - ((negR-re (build-neg r2 c)) r2) - ((re-nullable? r1) #f) - ((re-nullable? rc) #f) - ((re-nullable? (build-concat rr rr c)) #t) - ((re-nullable? rr) #t) - ((re-nullable? (build-repeat 0 1 rc c)) #t) - ((re-nullable? (build-repeat 1 2 rc c)) #f) - ((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t) - ((re-nullable? ro) #t) - ((re-nullable? (build-or `(,r1 ,r2) c)) #f) - ((re-nullable? (build-and `(,r1 ,e) c)) #f) - ((re-nullable? (build-and `(,rr ,e) c)) #t) - ((re-nullable? (build-neg r1 c)) #t) - ((re-nullable? (build-neg rr c)) #f)) - - (test-block ((c (make-cache)) - (isc is:integer-set-contents) - (r1 (->re #\1 c)) - (r2 (->re #\2 c)) - (r3-5 (->re '(char-range #\3 #\5) c)) - (r4 (build-or `(,r1 ,r2) c)) - (r5 (->re `(union ,r3-5 #\7) c)) - (r6 (->re #\6 c))) - ((flatten-res null orR? orR-res is:union c) null) - ((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c)))) - (isc (is:make-range (char->integer #\1)))) - ((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c)))) - (isc (is:make-range (char->integer #\1) (char->integer #\2)))) - ((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) - orR? orR-res is:union c)))) - (isc (is:make-range (char->integer #\1) (char->integer #\7)))) - ((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y) - (let-values (((i _ __) - (is:split x y))) - i)) - c) - (list z))) - - ;; ->re - (test-block ((c (make-cache)) - (isc is:integer-set-contents) - (r (->re #\a c)) - (rr (->re `(concatenation ,r ,r) c)) - (rrr (->re `(concatenation ,r ,rr) c)) - (rrr* (->re `(repetition 0 +inf.0 ,rrr) c))) - ((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a)))) - ((->re "" c) e) - ((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c)) - ((->re r c) r) - ((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c)) - ((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c)) - ((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c)) - ((->re `(repetition 0 1 ,rrr*) c) rrr*) - ((->re `(union (union (char-range #\a #\c) - (char-complement (char-range #\000 #\110) - (char-range #\112 ,(integer->char max-char-num)))) - (union (repetition 0 +inf.0 #\2))) c) - (build-or (list (build-char-set (is:union (is:make-range 73) - (is:make-range 97 99)) - c) - (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) - c)) - ((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c)) - ((->re `(union ,r) c) r) - ((->re `(union) c) z) - ((->re `(intersection (intersection #\111 - (char-complement (char-range #\000 #\110) - (char-range #\112 ,(integer->char max-char-num)))) - (intersection (repetition 0 +inf.0 #\2))) c) - (build-and (list (build-char-set (is:make-range 73) c) - (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) - c)) - ((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110) - (char-range #\112 ,(integer->char max-char-num)))) - (intersection (repetition 0 +inf.0 #\2))) c) - z) - ((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c)) - ((->re `(intersection ,r) c) r) - ((->re `(intersection) c) (build-neg z c)) - ((->re `(complement ,r) c) (build-neg r c)) - ((->re `(concatenation) c) e) - ((->re `(concatenation ,rrr*) c) rrr*) - (rr (build-concat r r c)) - ((->re `(concatenation ,r ,rr ,rrr) c) - (build-concat r (build-concat rr rrr c) c)) - ((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49))) - ((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57))) - ((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49))) - ((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57))) - ((->re `(char-range "9" "1") c) z) - ((isc (char-setR-chars (->re `(char-complement) c))) - (isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c)))) - ((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c))) - (isc (is:make-range 0))) - ) - - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt deleted file mode 100644 index 86f7a70..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt +++ /dev/null @@ -1,220 +0,0 @@ -#lang racket - -(require "util.rkt" - syntax/id-table) - -(provide parse) - -(define (bad-args stx num) - (raise-syntax-error - #f - (format "incorrect number of arguments (should have ~a)" num) - stx)) - -;; char-range-arg: syntax-object syntax-object -> nat -;; If c contains is a character or length 1 string, returns the integer -;; for the character. Otherwise raises a syntax error. -(define (char-range-arg stx containing-stx) - (let ((c (syntax-e stx))) - (cond - ((char? c) (char->integer c)) - ((and (string? c) (= (string-length c) 1)) - (char->integer (string-ref c 0))) - (else - (raise-syntax-error - #f - "not a char or single-char string" - containing-stx stx))))) -(module+ test - (check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1)) - (check-equal? (char-range-arg #'"1" #'here) (char->integer #\1))) - -(define orig-insp (variable-reference->module-declaration-inspector - (#%variable-reference))) -(define (disarm stx) - (syntax-disarm stx orig-insp)) - - ;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt) - ;; checks for errors and generates the plain s-exp form for s - ;; Expands lex-abbrevs and applies lex-trans. - (define (parse stx disappeared-uses) - (let loop ([stx stx] - [disappeared-uses disappeared-uses] - ;; seen-lex-abbrevs: id-table - [seen-lex-abbrevs (make-immutable-free-id-table)]) - (let ([recur (lambda (s) - (loop (syntax-rearm s stx) - disappeared-uses - seen-lex-abbrevs))] - [recur/abbrev (lambda (s id) - (loop (syntax-rearm s stx) - disappeared-uses - (free-id-table-set seen-lex-abbrevs id id)))]) - (syntax-case (disarm stx) (repetition union intersection complement concatenation - char-range char-complement) - (_ - (identifier? stx) - (let ((expansion (syntax-local-value stx (lambda () #f)))) - (unless (lex-abbrev? expansion) - (raise-syntax-error 'regular-expression - "undefined abbreviation" - stx)) - ;; Check for cycles. - (when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f)) - (raise-syntax-error 'regular-expression - "illegal lex-abbrev cycle detected" - stx - #f - (list (free-id-table-ref seen-lex-abbrevs stx)))) - (set-box! disappeared-uses (cons stx (unbox disappeared-uses))) - (recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))) - (_ - (or (char? (syntax-e stx)) (string? (syntax-e stx))) - (syntax-e stx)) - ((repetition arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 3 (length arg-list)) - (bad-args stx 2)) - (let ((low (syntax-e (car arg-list))) - (high (syntax-e (cadr arg-list))) - (re (caddr arg-list))) - (unless (and (number? low) (exact? low) (integer? low) (>= low 0)) - (raise-syntax-error #f - "not a non-negative exact integer" - stx - (car arg-list))) - (unless (or (and (number? high) (exact? high) (integer? high) (>= high 0)) - (eqv? high +inf.0)) - (raise-syntax-error #f - "not a non-negative exact integer or +inf.0" - stx - (cadr arg-list))) - (unless (<= low high) - (raise-syntax-error - #f - "the first argument is not less than or equal to the second argument" - stx)) - `(repetition ,low ,high ,(recur re))))) - ((union re ...) - `(union ,@(map recur (syntax->list (syntax (re ...)))))) - ((intersection re ...) - `(intersection ,@(map recur (syntax->list (syntax (re ...)))))) - ((complement re ...) - (let ((re-list (syntax->list (syntax (re ...))))) - (unless (= 1 (length re-list)) - (bad-args stx 1)) - `(complement ,(recur (car re-list))))) - ((concatenation re ...) - `(concatenation ,@(map recur (syntax->list (syntax (re ...)))))) - ((char-range arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 2 (length arg-list)) - (bad-args stx 2)) - (let ((i1 (char-range-arg (car arg-list) stx)) - (i2 (char-range-arg (cadr arg-list) stx))) - (if (<= i1 i2) - `(char-range ,(integer->char i1) ,(integer->char i2)) - (raise-syntax-error - #f - "the first argument does not precede or equal second argument" - stx))))) - ((char-complement arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 1 (length arg-list)) - (bad-args stx 1)) - (let ((parsed (recur (car arg-list)))) - (unless (char-set? parsed) - (raise-syntax-error #f - "not a character set" - stx - (car arg-list))) - `(char-complement ,parsed)))) - ((op form ...) - (identifier? (syntax op)) - (let* ((o (syntax op)) - (expansion (syntax-local-value o (lambda () #f)))) - (set-box! disappeared-uses (cons o (unbox disappeared-uses))) - (cond - ((lex-trans? expansion) - (recur ((lex-trans-f expansion) (disarm stx)))) - (expansion - (raise-syntax-error 'regular-expression - "not a lex-trans" - stx)) - (else - (raise-syntax-error 'regular-expression - "undefined operator" - stx))))) - (_ - (raise-syntax-error - 'regular-expression - "not a char, string, identifier, or (op args ...)" - stx)))))) - - - - ;; char-set? : s-re -> bool - ;; A char-set is an re that matches only strings of length 1. - ;; char-set? is conservative. - (define (char-set? s-re) - (cond - ((char? s-re) #t) - ((string? s-re) (= (string-length s-re) 1)) - ((list? s-re) - (let ((op (car s-re))) - (case op - ((union intersection) (andmap char-set? (cdr s-re))) - ((char-range char-complement) #t) - ((repetition) - (and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re)))) - ((concatenation) - (and (= 2 (length s-re)) (char-set? (cadr s-re)))) - (else #f)))) - (else #f))) - - (module+ test - (require rackunit)) - (module+ test - (check-equal? (char-set? #\a) #t) - (check-equal? (char-set? "12") #f) - (check-equal? (char-set? "1") #t) - (check-equal? (char-set? '(repetition 1 2 #\1)) #f) - (check-equal? (char-set? '(repetition 1 1 "12")) #f) - (check-equal? (char-set? '(repetition 1 1 "1")) #t) - (check-equal? (char-set? '(union "1" "2" "3")) #t) - (check-equal? (char-set? '(union "1" "" "3")) #f) - (check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t) - (check-equal? (char-set? '(intersection "1" "")) #f) - (check-equal? (char-set? '(complement "1")) #f) - (check-equal? (char-set? '(concatenation "1" "2")) #f) - (check-equal? (char-set? '(concatenation "" "2")) #f) - (check-equal? (char-set? '(concatenation "1")) #t) - (check-equal? (char-set? '(concatenation "12")) #f) - (check-equal? (char-set? '(char-range #\1 #\2)) #t) - (check-equal? (char-set? '(char-complement #\1)) #t)) - - ;; yikes... these test cases all have the wrong arity, now. - ;; and by "now", I mean it's been broken since before we - ;; moved to git. - (module+ test - (check-equal? (parse #'#\a null) #\a) - (check-equal? (parse #'"1" null) "1") - (check-equal? (parse #'(repetition 1 1 #\1) null) - '(repetition 1 1 #\1)) - (check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1)) - (check-equal? (parse #'(union #\1 (union "2") (union)) null) - '(union #\1 (union "2") (union))) - (check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)) - null) - '(intersection #\1 (intersection "2") (intersection))) - (check-equal? (parse #'(complement (union #\1 #\2)) - null) - '(complement (union #\1 #\2))) - (check-equal? (parse #'(concatenation "1" "2" (concatenation)) null) - '(concatenation "1" "2" (concatenation))) - (check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1)) - (check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1)) - (check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3)) - (check-equal? (parse #'(char-complement (union "1" "2")) null) - '(char-complement (union "1" "2")))) -; ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt deleted file mode 100644 index c1f1492..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt +++ /dev/null @@ -1,9 +0,0 @@ -(module token-syntax mzscheme - - ;; The things needed at compile time to handle definition of tokens - - (provide make-terminals-def terminals-def-t terminals-def? - make-e-terminals-def e-terminals-def-t e-terminals-def?) - (define-struct terminals-def (t)) - (define-struct e-terminals-def (t)) - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt deleted file mode 100644 index 27b3458..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt +++ /dev/null @@ -1,92 +0,0 @@ -(module token mzscheme - - (require-for-syntax "token-syntax.rkt") - - ;; Defining tokens - - (provide define-tokens define-empty-tokens make-token token? - (protect (rename token-name real-token-name)) - (protect (rename token-value real-token-value)) - (rename token-name* token-name) - (rename token-value* token-value) - (struct position (offset line col)) - (struct position-token (token start-pos end-pos)) - (struct srcloc-token (token srcloc))) - - - ;; A token is either - ;; - symbol - ;; - (make-token symbol any) - (define-struct token (name value) (make-inspector)) - - ;; token-name*: token -> symbol - (define (token-name* t) - (cond - ((symbol? t) t) - ((token? t) (token-name t)) - (else (raise-type-error - 'token-name - "symbol or struct:token" - 0 - t)))) - - ;; token-value*: token -> any - (define (token-value* t) - (cond - ((symbol? t) #f) - ((token? t) (token-value t)) - (else (raise-type-error - 'token-value - "symbol or struct:token" - 0 - t)))) - - (define-for-syntax (make-ctor-name n) - (datum->syntax-object n - (string->symbol (format "token-~a" (syntax-e n))) - n - n)) - - (define-for-syntax (make-define-tokens empty?) - (lambda (stx) - (syntax-case stx () - ((_ name (token ...)) - (andmap identifier? (syntax->list (syntax (token ...)))) - (with-syntax (((marked-token ...) - (map values #;(make-syntax-introducer) - (syntax->list (syntax (token ...)))))) - (quasisyntax/loc stx - (begin - (define-syntax name - #,(if empty? - #'(make-e-terminals-def (quote-syntax (marked-token ...))) - #'(make-terminals-def (quote-syntax (marked-token ...))))) - #,@(map - (lambda (n) - (when (eq? (syntax-e n) 'error) - (raise-syntax-error - #f - "Cannot define a token named error." - stx)) - (if empty? - #`(define (#,(make-ctor-name n)) - '#,n) - #`(define (#,(make-ctor-name n) x) - (make-token '#,n x)))) - (syntax->list (syntax (token ...)))) - #;(define marked-token #f) #;...)))) - ((_ ...) - (raise-syntax-error - #f - "must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))" - stx))))) - - (define-syntax define-tokens (make-define-tokens #f)) - (define-syntax define-empty-tokens (make-define-tokens #t)) - - (define-struct position (offset line col) #f) - (define-struct position-token (token start-pos end-pos) #f) - - (define-struct srcloc-token (token srcloc) #f) - ) - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt deleted file mode 100644 index c21e88c..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt +++ /dev/null @@ -1,69 +0,0 @@ -#lang racket - -(require "util.rkt") - -(provide (all-defined-out)) - -;; mapped-chars : (listof (list nat nat bool)) -(define mapped-chars (make-known-char-range-list)) - -;; get-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat)) -(define (get-chars-for char-x? mapped-chars) - (cond - ((null? mapped-chars) null) - (else - (let* ((range (car mapped-chars)) - (low (car range)) - (high (cadr range)) - (x (char-x? low))) - (cond - ((caddr range) - (if x - (cons (cons low high) - (get-chars-for char-x? (cdr mapped-chars))) - (get-chars-for char-x? (cdr mapped-chars)))) - (else - (let loop ((range-start low) - (i (car range)) - (parity x)) - (cond - ((> i high) - (if parity - (cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars))) - (get-chars-for char-x? (cdr mapped-chars)))) - ((eq? parity (char-x? i)) - (loop range-start (add1 i) parity)) - (parity - (cons (cons range-start (sub1 i)) (loop i (add1 i) #f))) - (else - (loop i (add1 i) #t)))))))))) - -(define (compute-ranges x?) - (delay (get-chars-for (lambda (x) (x? (integer->char x))) mapped-chars))) - -(define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325 -(define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405 -(define upper-case-ranges (compute-ranges char-upper-case?)) ;; 380 -(define title-case-ranges (compute-ranges char-title-case?)) ;; 10 -(define numeric-ranges (compute-ranges char-numeric?)) ;; 47 -(define symbolic-ranges (compute-ranges char-symbolic?)) ;; 153 -(define punctuation-ranges (compute-ranges char-punctuation?)) ;; 86 -(define graphic-ranges (compute-ranges char-graphic?)) ;; 401 -(define whitespace-ranges (compute-ranges char-whitespace?)) ;; 10 -(define blank-ranges (compute-ranges char-blank?)) ;; 9 -#;(define hexadecimal-ranges (compute-ranges char-hexadecimal?)) -(define iso-control-ranges (compute-ranges char-iso-control?)) ;; 2 - - - -(module+ test - (require rackunit) - (check-equal? (get-chars-for odd? '()) '()) - (check-equal? (get-chars-for odd? '((1 4 #f) (8 13 #f))) - '((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13))) - (check-equal? (get-chars-for (lambda (x) - (odd? (quotient x 10))) - '((1 5 #t) (17 19 #t) (21 51 #f))) - '((17 . 19) (30 . 39) (50 . 51)))) - - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt deleted file mode 100644 index a7afc54..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt +++ /dev/null @@ -1,127 +0,0 @@ -#lang racket - -(provide (all-defined-out)) - -(define max-char-num #x10FFFF) - -(define-struct lex-abbrev (get-abbrev)) -(define-struct lex-trans (f)) - -(module+ test - (require rackunit)) - -#;(define-syntax test-block - (syntax-rules () - ((_ defs (code right-ans) ...) - (let* defs - (let ((real-ans code)) - (unless (equal? real-ans right-ans) - (printf "Test failed: ~e gave ~e. Expected ~e\n" - 'code real-ans 'right-ans))) ...)))) - -(define-syntax test-block - (syntax-rules () - ((_ x ...) (void)))) - - -;; A cache is (X ( -> Y) -> Y) -;; make-cache : -> cache -;; table map Xs to Ys. If key is mapped, its value is returned. -;; Otherwise, build is invoked and its result is placed in the table and -;; returned. -;; Xs are compared with equal? -(define (make-cache) - (let ((table (make-hash))) - (lambda (key build) - (hash-ref table key - (lambda () - (let ((new (build))) - (hash-set! table key new) - new)))))) - -(module+ test - (define cache (make-cache)) - (check-equal? (cache '(s 1 2) (lambda () 9)) 9) - (check-equal? (cache '(s 2 1) (lambda () 8)) 8) - (check-equal? (cache '(s 1 2) (lambda () 1)) 9) - (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) - (lambda () 22)) 22) - (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) - (lambda () 1)) 22)) - - - -;; make-counter : -> -> nat -;; makes a function that returns a higher number by 1, each time -;; it is called. -(define (make-counter) - (let ((counter 0)) - (lambda () - (begin0 - counter - (set! counter (add1 counter)))))) - -(module+ test - (define c (make-counter)) - (define d (make-counter)) - (check-equal? (c) 0) - (check-equal? (d) 0) - (check-equal? (c) 1) - (check-equal? (d) 1) - (check-equal? (c) 2)) - - -;; remove-dups : (list-of X) (X -> number) -> (list-of X) -;; removes the entries from l that have the same index as a -;; previous entry. l must be grouped by indexes. -(define (remove-dups l index acc) - (cond - ((null? l) (reverse acc)) - ((null? acc) (remove-dups (cdr l) index (cons (car l) acc))) - ((= (index (car acc)) (index (car l))) - (remove-dups (cdr l) index acc)) - (else - (remove-dups (cdr l) index (cons (car l) acc))))) - - -(module+ test - (check-equal? (remove-dups '((1 2) (2 2) (1 3) (1 4) - (100 4) (0 5)) cadr null) - '((1 2) (1 3) (1 4) (0 5))) - (check-equal? (remove-dups null error null) null)) - -;; do-simple-equiv : (list-of X) (X -> nat) -> (list-of X) -;; Sorts l according to index and removes the entries with duplicate -;; indexes. -(define (do-simple-equiv l index) - (let ((ordered (sort l (lambda (a b) (< (index a) (index b)))))) - (remove-dups ordered index null))) - -(module+ test - (check-equal? (do-simple-equiv '((2 2) (1 4) (1 2) - (100 4) (1 3) (0 5)) - cadr) - '((2 2) (1 3) (1 4) (0 5))) - (check-equal? (do-simple-equiv null error) null)) - -;; replace : (list-of X) (X -> bool) (X -> (list-of X)) (list-of X) -> -;; (list-of X) -;; If (pred? r) for some r in l, splice (get r) in place of r in the resulting -;; list. -(define (replace l pred? get acc) - (cond - ((null? l) acc) - ((pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc))) - (else (replace (cdr l) pred? get (cons (car l) acc))))) - - -(module+ test - (check-equal? (replace null void (lambda () (list 1)) null) null) - (check-equal? (replace '(1 2 3 4 3 5) - (lambda (x) (= x 3)) - (lambda (x) (list 1 2 3)) - null) - '(5 1 2 3 4 1 2 3 2 1))) - - - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt deleted file mode 100644 index ebff00d..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt +++ /dev/null @@ -1,280 +0,0 @@ -;; Constructs to create and access grammars, the internal -;; representation of the input to the parser generator. - -(module grammar mzscheme - - (require mzlib/class - mzlib/list - "yacc-helper.rkt" - racket/contract) - - ;; Each production has a unique index 0 <= index <= number of productions - (define-struct prod (lhs rhs index prec action) (make-inspector)) - - ;; The dot-pos field is the index of the element in the rhs - ;; of prod that the dot immediately precedes. - ;; Thus 0 <= dot-pos <= (vector-length rhs). - (define-struct item (prod dot-pos) (make-inspector)) - - ;; gram-sym = (union term? non-term?) - ;; Each term has a unique index 0 <= index < number of terms - ;; Each non-term has a unique index 0 <= index < number of non-terms - (define-struct term (sym index prec) (make-inspector)) - (define-struct non-term (sym index) (make-inspector)) - - ;; a precedence declaration. - (define-struct prec (num assoc) (make-inspector)) - - (provide/contract - (make-item (prod? (or/c #f natural-number/c) . -> . item?)) - (make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?)) - (make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?)) - (make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?)) - (make-prod (non-term? (vectorof (or/c non-term? term?)) - (or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?))) - - (provide - - - ;; Things that work on items - start-item? item-prod item->string - sym-at-dot move-dot-right itemstring - non-term? term? non-termbit-vector term-index non-term-index - - ;; Things that work on precs - prec-num prec-assoc - - grammar% - - ;; Things that work on productions - prod-index prod-prec prod-rhs prod-lhs prod-action) - - - ;;---------------------- LR items -------------------------- - - ;; item bool - ;; Lexicographic comparison on two items. - (define (item bool - ;; The start production always has index 0 - (define (start-item? i) - (= 0 (non-term-index (prod-lhs (item-prod i))))) - - - ;; move-dot-right: LR-item -> LR-item | #f - ;; moves the dot to the right in the item, unless it is at its - ;; rightmost, then it returns false - (define (move-dot-right i) - (cond - ((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) - (else (make-item (item-prod i) - (add1 (item-dot-pos i)))))) - - ;; sym-at-dot: LR-item -> gram-sym | #f - ;; returns the symbol after the dot in the item or #f if there is none - (define (sym-at-dot i) - (let ((dp (item-dot-pos i)) - (rhs (prod-rhs (item-prod i)))) - (cond - ((= dp (vector-length rhs)) #f) - (else (vector-ref rhs dp))))) - - - ;; print-item: LR-item -> - (define (item->string it) - (let ((print-sym (lambda (i) - (let ((gs (vector-ref (prod-rhs (item-prod it)) i))) - (cond - ((term? gs) (format "~a " (term-sym gs))) - (else (format "~a " (non-term-sym gs)))))))) - (string-append - (format "~a -> " (non-term-sym (prod-lhs (item-prod it)))) - (let loop ((i 0)) - (cond - ((= i (vector-length (prod-rhs (item-prod it)))) - (if (= i (item-dot-pos it)) - ". " - "")) - ((= i (item-dot-pos it)) - (string-append ". " (print-sym i) (loop (add1 i)))) - (else (string-append (print-sym i) (loop (add1 i))))))))) - - ;; --------------------- Grammar Symbols -------------------------- - - (define (non-termstring gs) - (symbol->string (gram-sym-symbol gs))) - - ;; term-list->bit-vector: term list -> int - ;; Creates a number where the nth bit is 1 if the term with index n is in - ;; the list, and whose nth bit is 0 otherwise - (define (term-list->bit-vector terms) - (cond - ((null? terms) 0) - (else - (bitwise-ior (arithmetic-shift 1 (term-index (car terms))) (term-list->bit-vector (cdr terms)))))) - - - ;; ------------------------- Grammar ------------------------------ - - (define grammar% - (class object% - (super-instantiate ()) - ;; prods: production list list - ;; where there is one production list per non-term - (init prods) - ;; init-prods: production list - ;; The productions parsing can start from - ;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable - (init-field init-prods terms non-terms end-terms) - - ;; list of all productions - (define all-prods (apply append prods)) - (define num-prods (length all-prods)) - (define num-terms (length terms)) - (define num-non-terms (length non-terms)) - - (let ((count 0)) - (for-each - (lambda (nt) - (set-non-term-index! nt count) - (set! count (add1 count))) - non-terms)) - - (let ((count 0)) - (for-each - (lambda (t) - (set-term-index! t count) - (set! count (add1 count))) - terms)) - - (let ((count 0)) - (for-each - (lambda (prod) - (set-prod-index! prod count) - (set! count (add1 count))) - all-prods)) - - ;; indexed by the index of the non-term - contains the list of productions for that non-term - (define nt->prods - (let ((v (make-vector (length prods) #f))) - (for-each (lambda (prods) - (vector-set! v (non-term-index (prod-lhs (car prods))) prods)) - prods) - v)) - - (define nullable-non-terms - (nullable all-prods num-non-terms)) - - (define/public (get-num-terms) num-terms) - (define/public (get-num-non-terms) num-non-terms) - - (define/public (get-prods-for-non-term nt) - (vector-ref nt->prods (non-term-index nt))) - (define/public (get-prods) all-prods) - (define/public (get-init-prods) init-prods) - - (define/public (get-terms) terms) - (define/public (get-non-terms) non-terms) - - (define/public (get-num-prods) num-prods) - (define/public (get-end-terms) end-terms) - - (define/public (nullable-non-term? nt) - (vector-ref nullable-non-terms (non-term-index nt))) - - (define/public (nullable-after-dot? item) - (let* ((rhs (prod-rhs (item-prod item))) - (prod-length (vector-length rhs))) - (let loop ((i (item-dot-pos item))) - (cond - ((< i prod-length) - (if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i))) - (loop (add1 i)) - #f)) - ((= i prod-length) #t))))) - - (define/public (nullable-non-term-thunk) - (lambda (nt) - (nullable-non-term? nt))) - (define/public (nullable-after-dot?-thunk) - (lambda (item) - (nullable-after-dot? item))))) - - - ;; nullable: production list * int -> non-term set - ;; determines which non-terminals can derive epsilon - (define (nullable prods num-nts) - (letrec ((nullable (make-vector num-nts #f)) - (added #f) - - ;; possible-nullable: producion list -> production list - ;; Removes all productions that have a terminal - (possible-nullable - (lambda (prods) - (filter (lambda (prod) - (vector-andmap non-term? (prod-rhs prod))) - prods))) - - ;; set-nullables: production list -> production list - ;; makes one pass through the productions, adding the ones - ;; known to be nullable now to nullable and returning a list - ;; of productions that we don't know about yet. - (set-nullables - (lambda (prods) - (cond - ((null? prods) null) - ((vector-ref nullable - (gram-sym-index (prod-lhs (car prods)))) - (set-nullables (cdr prods))) - ((vector-andmap (lambda (nt) - (vector-ref nullable (gram-sym-index nt))) - (prod-rhs (car prods))) - (vector-set! nullable - (gram-sym-index (prod-lhs (car prods))) - #t) - (set! added #t) - (set-nullables (cdr prods))) - (else - (cons (car prods) - (set-nullables (cdr prods)))))))) - - (let loop ((P (possible-nullable prods))) - (cond - ((null? P) nullable) - (else - (set! added #f) - (let ((new-P (set-nullables P))) - (if added - (loop new-P) - nullable))))))) - - -) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt deleted file mode 100644 index 958acc1..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt +++ /dev/null @@ -1,61 +0,0 @@ -(module graph mzscheme - - (provide digraph) - - (define (zero-thunk) 0) - - ;; digraph: - ;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b) - ;; -> ('a -> 'b) - ;; DeRemer and Pennello 1982 - ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} - ;; We use a hash-table to represent the result function 'a -> 'b set, so - ;; the values of type 'a must be comparable with eq?. - (define (digraph nodes edges f- union fail) - (letrec [ - ;; Will map elements of 'a to 'b sets - (results (make-hash-table)) - (f (lambda (x) (hash-table-get results x fail))) - - ;; Maps elements of 'a to integers. - (N (make-hash-table)) - (get-N (lambda (x) (hash-table-get N x zero-thunk))) - (set-N (lambda (x d) (hash-table-put! N x d))) - - (stack null) - (push (lambda (x) - (set! stack (cons x stack)))) - (pop (lambda () - (begin0 - (car stack) - (set! stack (cdr stack))))) - (depth (lambda () (length stack))) - - ;; traverse: 'a -> - (traverse - (lambda (x) - (push x) - (let ((d (depth))) - (set-N x d) - (hash-table-put! results x (f- x)) - (for-each (lambda (y) - (if (= 0 (get-N y)) - (traverse y)) - (hash-table-put! results - x - (union (f x) (f y))) - (set-N x (min (get-N x) (get-N y)))) - (edges x)) - (if (= d (get-N x)) - (let loop ((p (pop))) - (set-N p +inf.0) - (hash-table-put! results p (f x)) - (if (not (eq? x p)) - (loop (pop))))))))] - (for-each (lambda (x) - (if (= 0 (get-N x)) - (traverse x))) - nodes) - f)) - -) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt deleted file mode 100644 index 7309f51..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt +++ /dev/null @@ -1,374 +0,0 @@ -(module input-file-parser mzscheme - - ;; routines for parsing the input to the parser generator and producing a - ;; grammar (See grammar.rkt) - - (require "yacc-helper.rkt" - "../private-lex/token-syntax.rkt" - "grammar.rkt" - mzlib/class - racket/contract) - (require-for-template mzscheme) - -(define (is-a-grammar%? x) (is-a? x grammar%)) - (provide/contract - (parse-input ((listof identifier?) (listof identifier?) (listof identifier?) - (or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?)) - (get-term-list ((listof identifier?) . -> . (listof identifier?)))) - - (define stx-for-original-property (read-syntax #f (open-input-string "original"))) - - ;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx))) - (define (get-args i rhs src-pos term-defs) - (let ((empty-table (make-hash-table)) - (biggest-pos #f)) - (hash-table-put! empty-table 'error #t) - (for-each (lambda (td) - (let ((v (syntax-local-value td))) - (if (e-terminals-def? v) - (for-each (lambda (s) - (hash-table-put! empty-table (syntax-object->datum s) #t)) - (syntax->list (e-terminals-def-t v)))))) - term-defs) - (let ([args - (let get-args ((i i) - (rhs rhs)) - (cond - ((null? rhs) null) - (else - (let ((b (car rhs)) - (name (if (hash-table-get empty-table (syntax-object->datum (car rhs)) (lambda () #f)) - (gensym) - (string->symbol (format "$~a" i))))) - (cond - (src-pos - (let ([start-pos-id - (datum->syntax-object b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property)] - [end-pos-id - (datum->syntax-object b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property)]) - (set! biggest-pos (cons start-pos-id end-pos-id)) - `(,(datum->syntax-object b name b stx-for-original-property) - ,start-pos-id - ,end-pos-id - ,@(get-args (add1 i) (cdr rhs))))) - (else - `(,(datum->syntax-object b name b stx-for-original-property) - ,@(get-args (add1 i) (cdr rhs)))))))))]) - (values args biggest-pos)))) - - ;; Given the list of terminal symbols and the precedence/associativity definitions, - ;; builds terminal structures (See grammar.rkt) - ;; build-terms: symbol list * symbol list list -> term list - (define (build-terms term-list precs) - (let ((counter 0) - - ;;(term-list (cons (gensym) term-list)) - - ;; Will map a terminal symbol to its precedence/associativity - (prec-table (make-hash-table))) - - ;; Fill the prec table - (for-each - (lambda (p-decl) - (begin0 - (let ((assoc (car p-decl))) - (for-each - (lambda (term-sym) - (hash-table-put! prec-table term-sym (make-prec counter assoc))) - (cdr p-decl))) - (set! counter (add1 counter)))) - precs) - - ;; Build the terminal structures - (map - (lambda (term-sym) - (make-term term-sym - #f - (hash-table-get prec-table term-sym (lambda () #f)))) - term-list))) - - ;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt) - ;; get-terms-from-def: identifier? -> (listof identifier?) - (define (get-terms-from-def term-syn) - (let ((t (syntax-local-value term-syn (lambda () #f)))) - (cond - ((terminals-def? t) (syntax->list (terminals-def-t t))) - ((e-terminals-def? t) (syntax->list (e-terminals-def-t t))) - (else - (raise-syntax-error - 'parser-tokens - "undefined token group" - term-syn))))) - - (define (get-term-list term-group-names) - (remove-duplicates - (cons (datum->syntax-object #f 'error) - (apply append - (map get-terms-from-def term-group-names))))) - - (define (parse-input term-defs start ends prec-decls prods src-pos) - (let* ((start-syms (map syntax-e start)) - - (list-of-terms (map syntax-e (get-term-list term-defs))) - - (end-terms - (map - (lambda (end) - (unless (memq (syntax-e end) list-of-terms) - (raise-syntax-error - 'parser-end-tokens - (format "End token ~a not defined as a token" - (syntax-e end)) - end)) - (syntax-e end)) - ends)) - - ;; Get the list of terminals out of input-terms - - (list-of-non-terms - (syntax-case prods () - (((non-term production ...) ...) - (begin - (for-each - (lambda (nts) - (if (memq (syntax-object->datum nts) list-of-terms) - (raise-syntax-error - 'parser-non-terminals - (format "~a used as both token and non-terminal" - (syntax-object->datum nts)) - nts))) - (syntax->list (syntax (non-term ...)))) - - (let ((dup (duplicate-list? (syntax-object->datum - (syntax (non-term ...)))))) - (if dup - (raise-syntax-error - 'parser-non-terminals - (format "non-terminal ~a defined multiple times" - dup) - prods))) - - (syntax-object->datum (syntax (non-term ...))))) - (_ - (raise-syntax-error - 'parser-grammar - "Grammar must be of the form (grammar (non-terminal productions ...) ...)" - prods)))) - - ;; Check the precedence declarations for errors and turn them into data - (precs - (syntax-case prec-decls () - (((type term ...) ...) - (let ((p-terms - (syntax-object->datum (syntax (term ... ...))))) - (cond - ((duplicate-list? p-terms) => - (lambda (d) - (raise-syntax-error - 'parser-precedences - (format "duplicate precedence declaration for token ~a" - d) - prec-decls))) - (else - (for-each - (lambda (a) - (for-each - (lambda (t) - (if (not (memq (syntax-object->datum t) - list-of-terms)) - (raise-syntax-error - 'parser-precedences - (format - "Precedence declared for non-token ~a" - (syntax-object->datum t)) - t))) - (syntax->list a))) - (syntax->list (syntax ((term ...) ...)))) - (for-each - (lambda (type) - (if (not (memq (syntax-object->datum type) - `(left right nonassoc))) - (raise-syntax-error - 'parser-precedences - "Associativity must be left, right or nonassoc" - type))) - (syntax->list (syntax (type ...)))) - (syntax-object->datum prec-decls))))) - (#f null) - (_ - (raise-syntax-error - 'parser-precedences - "Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc" - prec-decls)))) - - (terms (build-terms list-of-terms precs)) - - (non-terms (map (lambda (non-term) (make-non-term non-term #f)) - list-of-non-terms)) - (term-table (make-hash-table)) - (non-term-table (make-hash-table))) - - (for-each (lambda (t) - (hash-table-put! term-table (gram-sym-symbol t) t)) - terms) - - (for-each (lambda (nt) - (hash-table-put! non-term-table (gram-sym-symbol nt) nt)) - non-terms) - - (let* ( - ;; parse-prod: syntax-object -> gram-sym vector - (parse-prod - (lambda (prod-so) - (syntax-case prod-so () - ((prod-rhs-sym ...) - (andmap identifier? (syntax->list prod-so)) - (begin - (for-each (lambda (t) - (if (memq (syntax-object->datum t) end-terms) - (raise-syntax-error - 'parser-production-rhs - (format "~a is an end token and cannot be used in a production" - (syntax-object->datum t)) - t))) - (syntax->list prod-so)) - (list->vector - (map (lambda (s) - (hash-table-get - term-table - (syntax-object->datum s) - (lambda () - (hash-table-get - non-term-table - (syntax-object->datum s) - (lambda () - (raise-syntax-error - 'parser-production-rhs - (format - "~a is not declared as a terminal or non-terminal" - (syntax-object->datum s)) - s)))))) - (syntax->list prod-so))))) - (_ - (raise-syntax-error - 'parser-production-rhs - "production right-hand-side must have form (symbol ...)" - prod-so))))) - - ;; parse-action: syntax-object * syntax-object -> syntax-object - (parse-action - (lambda (rhs act) - (let-values ([(args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs)]) - (let ([act - (if biggest - (with-syntax ([$n-start-pos (datum->syntax-object (car biggest) '$n-start-pos)] - [$n-end-pos (datum->syntax-object (cdr biggest) '$n-end-pos)]) - #`(let ([$n-start-pos #,(car biggest)] - [$n-end-pos #,(cdr biggest)]) - #,act)) - act)]) - (quasisyntax/loc act - (lambda #,args - #,act)))))) - - ;; parse-prod+action: non-term * syntax-object -> production - (parse-prod+action - (lambda (nt prod-so) - (syntax-case prod-so () - ((prod-rhs action) - (let ((p (parse-prod (syntax prod-rhs)))) - (make-prod - nt - p - #f - (let loop ((i (sub1 (vector-length p)))) - (if (>= i 0) - (let ((gs (vector-ref p i))) - (if (term? gs) - (term-prec gs) - (loop (sub1 i)))) - #f)) - (parse-action (syntax prod-rhs) (syntax action))))) - ((prod-rhs (prec term) action) - (identifier? (syntax term)) - (let ((p (parse-prod (syntax prod-rhs)))) - (make-prod - nt - p - #f - (term-prec - (hash-table-get - term-table - (syntax-object->datum (syntax term)) - (lambda () - (raise-syntax-error - 'parser-production-rhs - (format - "unrecognized terminal ~a in precedence declaration" - (syntax-object->datum (syntax term))) - (syntax term))))) - (parse-action (syntax prod-rhs) (syntax action))))) - (_ - (raise-syntax-error - 'parser-production-rhs - "production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]" - prod-so))))) - - ;; parse-prod-for-nt: syntax-object -> production list - (parse-prods-for-nt - (lambda (prods-so) - (syntax-case prods-so () - ((nt productions ...) - (> (length (syntax->list (syntax (productions ...)))) 0) - (let ((nt (hash-table-get non-term-table - (syntax-object->datum (syntax nt))))) - (map (lambda (p) (parse-prod+action nt p)) - (syntax->list (syntax (productions ...)))))) - (_ - (raise-syntax-error - 'parser-productions - "A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side" - prods-so)))))) - - (for-each - (lambda (sstx ssym) - (unless (memq ssym list-of-non-terms) - (raise-syntax-error - 'parser-start - (format "Start symbol ~a not defined as a non-terminal" ssym) - sstx))) - start start-syms) - - (let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) - (end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) - (parsed-prods (map parse-prods-for-nt (syntax->list prods))) - (start-prods - (map (lambda (start end-non-term) - (list (make-prod start (vector end-non-term) #f #f - (syntax (lambda (x) x))))) - starts end-non-terms)) - (prods - `(,@start-prods - ,@(map - (lambda (end-nt start-sym) - (map - (lambda (end) - (make-prod end-nt - (vector - (hash-table-get non-term-table start-sym) - (hash-table-get term-table end)) - #f - #f - (syntax (lambda (x) x)))) - end-terms)) - end-non-terms start-syms) - ,@parsed-prods))) - - (make-object grammar% - prods - (map car start-prods) - terms - (append starts (append end-non-terms non-terms)) - (map (lambda (term-name) - (hash-table-get term-table term-name)) - end-terms))))))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt deleted file mode 100644 index e9b4d3b..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt +++ /dev/null @@ -1,277 +0,0 @@ -(module lalr mzscheme - - ;; Compute LALR lookaheads from DeRemer and Pennello 1982 - - (require "lr0.rkt" - "grammar.rkt" - mzlib/list - mzlib/class) - - (provide compute-LA) - - ;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set) - ;; computes for each state, non-term transition pair, the terminals - ;; which can transition out of the resulting state - ;; output term set is represented in bit-vector form - (define (compute-DR a g) - (lambda (tk) - (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) - (term-list->bit-vector - (filter - (lambda (term) - (send a run-automaton r term)) - (send g get-terms)))))) - - ;; compute-reads: - ;; LR0-automaton * grammar -> (trans-key -> trans-key list) - (define (compute-reads a g) - (let ((nullable-non-terms - (filter (lambda (nt) (send g nullable-non-term? nt)) - (send g get-non-terms)))) - (lambda (tk) - (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) - (map (lambda (x) (make-trans-key r x)) - (filter (lambda (non-term) (send a run-automaton r non-term)) - nullable-non-terms)))))) - - ;; compute-read: LR0-automaton * grammar -> (trans-key -> term set) - ;; output term set is represented in bit-vector form - (define (compute-read a g) - (let* ((dr (compute-DR a g)) - (reads (compute-reads a g))) - (digraph-tk->terml (send a get-mapped-non-term-keys) - reads - dr - (send a get-num-states)))) - ;; returns the list of all k such that state k transitions to state start on the - ;; transitions in rhs (in order) - (define (run-lr0-backward a rhs dot-pos start num-states) - (let loop ((states (list start)) - (i (sub1 dot-pos))) - (cond - ((< i 0) states) - (else (loop (send a run-automaton-back states (vector-ref rhs i)) - (sub1 i)))))) - - ;; prod->items-for-include: grammar * prod * non-term -> lr0-item list - ;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma) - ;; and gamma =>* epsilon - (define (prod->items-for-include g prod nt) - (let* ((rhs (prod-rhs prod)) - (rhs-l (vector-length rhs))) - (append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l)))) - (list (make-item prod (sub1 rhs-l))) - null) - (let loop ((i (sub1 rhs-l))) - (cond - ((and (> i 0) - (non-term? (vector-ref rhs i)) - (send g nullable-non-term? (vector-ref rhs i))) - (if (eq? nt (vector-ref rhs (sub1 i))) - (cons (make-item prod (sub1 i)) - (loop (sub1 i))) - (loop (sub1 i)))) - (else null)))))) - - ;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list - ;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list - ;; and gamma =>* epsilon - (define (prod-list->items-for-include g prod-list nt) - (apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list))) - - ;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list) - (define (compute-includes a g) - (let ((num-states (send a get-num-states)) - (items-for-input-nt (make-vector (send g get-num-non-terms) null))) - (for-each - (lambda (input-nt) - (vector-set! items-for-input-nt (non-term-index input-nt) - (prod-list->items-for-include g (send g get-prods) input-nt))) - (send g get-non-terms)) - (lambda (tk) - (let* ((goal-state (trans-key-st tk)) - (non-term (trans-key-gs tk)) - (items (vector-ref items-for-input-nt (non-term-index non-term)))) - (trans-key-list-remove-dups - (apply append - (map (lambda (item) - (let* ((prod (item-prod item)) - (rhs (prod-rhs prod)) - (lhs (prod-lhs prod))) - (map (lambda (state) - (make-trans-key state lhs)) - (run-lr0-backward a - rhs - (item-dot-pos item) - goal-state - num-states)))) - items))))))) - - ;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list) - (define (compute-lookback a g) - (let ((num-states (send a get-num-states))) - (lambda (state prod) - (map (lambda (k) (make-trans-key k (prod-lhs prod))) - (run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states))))) - - ;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set) - ;; output term set is represented in bit-vector form - (define (compute-follow a g includes) - (let ((read (compute-read a g))) - (digraph-tk->terml (send a get-mapped-non-term-keys) - includes - read - (send a get-num-states)))) - - ;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set - ;; output term set is represented in bit-vector form - (define (compute-LA a g) - (let* ((includes (compute-includes a g)) - (lookback (compute-lookback a g)) - (follow (compute-follow a g includes))) - (lambda (k p) - (let* ((l (lookback k p)) - (f (map follow l))) - (apply bitwise-ior (cons 0 f)))))) - - (define (print-DR dr a g) - (print-input-st-sym dr "DR" a g print-output-terms)) - (define (print-Read Read a g) - (print-input-st-sym Read "Read" a g print-output-terms)) - (define (print-includes i a g) - (print-input-st-sym i "includes" a g print-output-st-nt)) - (define (print-lookback l a g) - (print-input-st-prod l "lookback" a g print-output-st-nt)) - (define (print-follow f a g) - (print-input-st-sym f "follow" a g print-output-terms)) - (define (print-LA l a g) - (print-input-st-prod l "LA" a g print-output-terms)) - - (define (print-input-st-sym f name a g print-output) - (printf "~a:\n" name) - (send a for-each-state - (lambda (state) - (for-each - (lambda (non-term) - (let ((res (f (make-trans-key state non-term)))) - (if (not (null? res)) - (printf "~a(~a, ~a) = ~a\n" - name - state - (gram-sym-symbol non-term) - (print-output res))))) - (send g get-non-terms)))) - (newline)) - - (define (print-input-st-prod f name a g print-output) - (printf "~a:\n" name) - (send a for-each-state - (lambda (state) - (for-each - (lambda (non-term) - (for-each - (lambda (prod) - (let ((res (f state prod))) - (if (not (null? res)) - (printf "~a(~a, ~a) = ~a\n" - name - (kernel-index state) - (prod-index prod) - (print-output res))))) - (send g get-prods-for-non-term non-term))) - (send g get-non-terms))))) - - (define (print-output-terms r) - (map - (lambda (p) - (gram-sym-symbol p)) - r)) - - (define (print-output-st-nt r) - (map - (lambda (p) - (list - (kernel-index (trans-key-st p)) - (gram-sym-symbol (trans-key-gs p)))) - r)) - - ;; init-tk-map : int -> (vectorof hashtable?) - (define (init-tk-map n) - (let ((v (make-vector n #f))) - (let loop ((i (sub1 (vector-length v)))) - (when (>= i 0) - (vector-set! v i (make-hash-table)) - (loop (sub1 i)))) - v)) - - ;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int - (define (lookup-tk-map map) - (lambda (tk) - (let ((st (trans-key-st tk)) - (gs (trans-key-gs tk))) - (hash-table-get (vector-ref map (kernel-index st)) - (gram-sym-symbol gs) - (lambda () 0))))) - - ;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int -> - (define (add-tk-map map) - (lambda (tk v) - (let ((st (trans-key-st tk)) - (gs (trans-key-gs tk))) - (hash-table-put! (vector-ref map (kernel-index st)) - (gram-sym-symbol gs) - v)))) - - ;; digraph-tk->terml: - ;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int - ;; -> (trans-key -> term list) - ;; DeRemer and Pennello 1982 - ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} - ;; A specialization of digraph in the file graph.rkt - (define (digraph-tk->terml nodes edges f- num-states) - (letrec [ - ;; Will map elements of trans-key to term sets represented as bit vectors - (results (init-tk-map num-states)) - - ;; Maps elements of trans-keys to integers. - (N (init-tk-map num-states)) - - (get-N (lookup-tk-map N)) - (set-N (add-tk-map N)) - (get-f (lookup-tk-map results)) - (set-f (add-tk-map results)) - - (stack null) - (push (lambda (x) - (set! stack (cons x stack)))) - (pop (lambda () - (begin0 - (car stack) - (set! stack (cdr stack))))) - (depth (lambda () (length stack))) - - ;; traverse: 'a -> - (traverse - (lambda (x) - (push x) - (let ((d (depth))) - (set-N x d) - (set-f x (f- x)) - (for-each (lambda (y) - (when (= 0 (get-N y)) - (traverse y)) - (set-f x (bitwise-ior (get-f x) (get-f y))) - (set-N x (min (get-N x) (get-N y)))) - (edges x)) - (when (= d (get-N x)) - (let loop ((p (pop))) - (set-N p +inf.0) - (set-f p (get-f x)) - (unless (equal? x p) - (loop (pop))))))))] - (for-each (lambda (x) - (when (= 0 (get-N x)) - (traverse x))) - nodes) - get-f)) -) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt deleted file mode 100644 index f237735..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt +++ /dev/null @@ -1,372 +0,0 @@ -(module lr0 mzscheme - - ;; Handle the LR0 automaton - - (require "grammar.rkt" - "graph.rkt" - mzlib/list - mzlib/class) - - (provide build-lr0-automaton lr0% - (struct trans-key (st gs)) trans-key-list-remove-dups - kernel-items kernel-index) - - ;; kernel = (make-kernel (LR1-item list) index) - ;; the list must be kept sorted according to item - ;; (vectorof (symbol X hashtable)) - (define (build-transition-table num-states assoc) - (let ((transitions (make-vector num-states #f))) - (let loop ((i (sub1 (vector-length transitions)))) - (when (>= i 0) - (vector-set! transitions i (make-hash-table)) - (loop (sub1 i)))) - (for-each - (lambda (trans-key/kernel) - (let ((tk (car trans-key/kernel))) - (hash-table-put! (vector-ref transitions (kernel-index (trans-key-st tk))) - (gram-sym-symbol (trans-key-gs tk)) - (cdr trans-key/kernel)))) - assoc) - transitions)) - - ;; reverse-assoc : (listof (cons/c trans-key? kernel?)) -> - ;; (listof (cons/c trans-key? (listof kernel?))) - (define (reverse-assoc assoc) - (let ((reverse-hash (make-hash-table 'equal)) - (hash-table-add! - (lambda (ht k v) - (hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null))))))) - (for-each - (lambda (trans-key/kernel) - (let ((tk (car trans-key/kernel))) - (hash-table-add! reverse-hash - (make-trans-key (cdr trans-key/kernel) - (trans-key-gs tk)) - (trans-key-st tk)))) - assoc) - (hash-table-map reverse-hash cons))) - - - ;; kernel-list-remove-duplicates - ;; LR0-automaton = object of class lr0% - (define lr0% - (class object% - (super-instantiate ()) - ;; term-assoc : (listof (cons/c trans-key? kernel?)) - ;; non-term-assoc : (listof (cons/c trans-key? kernel?)) - ;; states : (vectorof kernel?) - ;; epsilons : ??? - (init-field term-assoc non-term-assoc states epsilons) - - (define transitions (build-transition-table (vector-length states) - (append term-assoc non-term-assoc))) - - (define reverse-term-assoc (reverse-assoc term-assoc)) - (define reverse-non-term-assoc (reverse-assoc non-term-assoc)) - (define reverse-transitions - (build-transition-table (vector-length states) - (append reverse-term-assoc reverse-non-term-assoc))) - - (define mapped-non-terms (map car non-term-assoc)) - - (define/public (get-mapped-non-term-keys) - mapped-non-terms) - - (define/public (get-num-states) - (vector-length states)) - - (define/public (get-epsilon-trans) - epsilons) - - (define/public (get-transitions) - (append term-assoc non-term-assoc)) - - ;; for-each-state : (state ->) -> - ;; Iteration over the states in an automaton - (define/public (for-each-state f) - (let ((num-states (vector-length states))) - (let loop ((i 0)) - (if (< i num-states) - (begin - (f (vector-ref states i)) - (loop (add1 i))))))) - - ;; run-automaton: kernel? gram-sym? -> (union kernel #f) - ;; returns the state reached from state k on input s, or #f when k - ;; has no transition on s - (define/public (run-automaton k s) - (hash-table-get (vector-ref transitions (kernel-index k)) - (gram-sym-symbol s) - (lambda () #f))) - - ;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel) - ;; returns the list of states that can reach k by transitioning on s. - (define/public (run-automaton-back k s) - (apply append - (map - (lambda (k) - (hash-table-get (vector-ref reverse-transitions (kernel-index k)) - (gram-sym-symbol s) - (lambda () null))) - k))))) - - (define (union comp (eq? a b) - (define (kernel->string k) - (apply string-append - `("{" ,@(map (lambda (i) (string-append (item->string i) ", ")) - (kernel-items k)) - "}"))) - - ;; build-LR0-automaton: grammar -> LR0-automaton - ;; Constructs the kernels of the sets of LR(0) items of g - (define (build-lr0-automaton grammar) -; (printf "LR(0) automaton:\n") - (letrec ( - (epsilons (make-hash-table 'equal)) - (grammar-symbols (append (send grammar get-non-terms) - (send grammar get-terms))) - ;; first-non-term: non-term -> non-term list - ;; given a non-terminal symbol C, return those non-terminal - ;; symbols A s.t. C -> An for some string of terminals and - ;; non-terminals n where -> means a rightmost derivation in many - ;; steps. Assumes that each non-term can be reduced to a string - ;; of terms. - (first-non-term - (digraph (send grammar get-non-terms) - (lambda (nt) - (filter non-term? - (map (lambda (prod) - (sym-at-dot (make-item prod 0))) - (send grammar get-prods-for-non-term nt)))) - (lambda (nt) (list nt)) - (union non-term LR1-item list - ;; Creates a set of items containing i s.t. if A -> n.Xm is in it, - ;; X -> .o is in it too. - (LR0-closure - (lambda (i) - (cond - ((null? i) null) - (else - (let ((next-gsym (sym-at-dot (car i)))) - (cond - ((non-term? next-gsym) - (cons (car i) - (append - (apply append - (map (lambda (non-term) - (map (lambda (x) - (make-item x 0)) - (send grammar - get-prods-for-non-term - non-term))) - (first-non-term next-gsym))) - (LR0-closure (cdr i))))) - (else - (cons (car i) (LR0-closure (cdr i)))))))))) - - - ;; maps trans-keys to kernels - (automaton-term null) - (automaton-non-term null) - - ;; keeps the kernels we have seen, so we can have a unique - ;; list for each kernel - (kernels (make-hash-table 'equal)) - - (counter 0) - - ;; goto: LR1-item list -> LR1-item list list - ;; creates new kernels by moving the dot in each item in the - ;; LR0-closure of kernel to the right, and grouping them by - ;; the term/non-term moved over. Returns the kernels not - ;; yet seen, and places the trans-keys into automaton - (goto - (lambda (kernel) - (let ( - ;; maps a gram-syms to a list of items - (table (make-hash-table)) - - ;; add-item!: - ;; (symbol (listof item) hashtable) item? -> - ;; adds i into the table grouped with the grammar - ;; symbol following its dot - (add-item! - (lambda (table i) - (let ((gs (sym-at-dot i))) - (cond - (gs - (let ((already - (hash-table-get table - (gram-sym-symbol gs) - (lambda () null)))) - (unless (member i already) - (hash-table-put! table - (gram-sym-symbol gs) - (cons i already))))) - ((= 0 (vector-length (prod-rhs (item-prod i)))) - (let ((current (hash-table-get epsilons - kernel - (lambda () null)))) - (hash-table-put! epsilons - kernel - (cons i current))))))))) - - ;; Group the items of the LR0 closure of the kernel - ;; by the character after the dot - (for-each (lambda (item) - (add-item! table item)) - (LR0-closure (kernel-items kernel))) - - ;; each group is a new kernel, with the dot advanced. - ;; sorts the items in a kernel so kernels can be compared - ;; with equal? for using the table kernels to make sure - ;; only one representitive of each kernel is created - (filter - (lambda (x) x) - (map - (lambda (i) - (let* ((gs (car i)) - (items (cadr i)) - (new #f) - (new-kernel (sort - (filter (lambda (x) x) - (map move-dot-right items)) - item ~a on ~a\n" - (kernel->string kernel) - (kernel->string unique-kernel) - (gram-sym-symbol gs)) - (if new - unique-kernel - #f))) - (let loop ((gsyms grammar-symbols)) - (cond - ((null? gsyms) null) - (else - (let ((items (hash-table-get table - (gram-sym-symbol (car gsyms)) - (lambda () null)))) - (cond - ((null? items) (loop (cdr gsyms))) - (else - (cons (list (car gsyms) items) - (loop (cdr gsyms)))))))))))))) - - (starts - (map (lambda (init-prod) (list (make-item init-prod 0))) - (send grammar get-init-prods))) - (startk - (map (lambda (start) - (let ((k (make-kernel start counter))) - (hash-table-put! kernels start k) - (set! counter (add1 counter)) - k)) - starts)) - (new-kernels (make-queue))) - - (let loop ((old-kernels startk) - (seen-kernels null)) - (cond - ((and (empty-queue? new-kernels) (null? old-kernels)) - (make-object lr0% - automaton-term - automaton-non-term - (list->vector (reverse seen-kernels)) - epsilons)) - ((null? old-kernels) - (loop (deq! new-kernels) seen-kernels)) - (else - (enq! new-kernels (goto (car old-kernels))) - (loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))))))) - - (define-struct q (f l) (make-inspector)) - (define (empty-queue? q) - (null? (q-f q))) - (define (make-queue) - (make-q null null)) - (define (enq! q i) - (if (empty-queue? q) - (let ((i (mcons i null))) - (set-q-l! q i) - (set-q-f! q i)) - (begin - (set-mcdr! (q-l q) (mcons i null)) - (set-q-l! q (mcdr (q-l q)))))) - (define (deq! q) - (begin0 - (mcar (q-f q)) - (set-q-f! q (mcdr (q-f q))))) - -) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt deleted file mode 100644 index 2a39b36..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt +++ /dev/null @@ -1,54 +0,0 @@ -(module parser-actions mzscheme - (require "grammar.rkt") - (provide (all-defined-except make-reduce make-reduce*) - (rename make-reduce* make-reduce)) - - ;; An action is - ;; - (make-shift int) - ;; - (make-reduce prod runtime-action) - ;; - (make-accept) - ;; - (make-goto int) - ;; - (no-action) - ;; A reduce contains a runtime-reduce so that sharing of the reduces can - ;; be easily transferred to sharing of runtime-reduces. - - (define-struct action () (make-inspector)) - (define-struct (shift action) (state) (make-inspector)) - (define-struct (reduce action) (prod runtime-reduce) (make-inspector)) - (define-struct (accept action) () (make-inspector)) - (define-struct (goto action) (state) (make-inspector)) - (define-struct (no-action action) () (make-inspector)) - - (define (make-reduce* p) - (make-reduce p - (vector (prod-index p) - (gram-sym-symbol (prod-lhs p)) - (vector-length (prod-rhs p))))) - - ;; A runtime-action is - ;; non-negative-int (shift) - ;; (vector int symbol int) (reduce) - ;; 'accept (accept) - ;; negative-int (goto) - ;; #f (no-action) - - (define (action->runtime-action a) - (cond - ((shift? a) (shift-state a)) - ((reduce? a) (reduce-runtime-reduce a)) - ((accept? a) 'accept) - ((goto? a) (- (+ (goto-state a) 1))) - ((no-action? a) #f))) - - (define (runtime-shift? x) (and (integer? x) (>= x 0))) - (define runtime-reduce? vector?) - (define (runtime-accept? x) (eq? x 'accept)) - (define (runtime-goto? x) (and (integer? x) (< x 0))) - - (define runtime-shift-state values) - (define (runtime-reduce-prod-num x) (vector-ref x 0)) - (define (runtime-reduce-lhs x) (vector-ref x 1)) - (define (runtime-reduce-rhs-length x) (vector-ref x 2)) - (define (runtime-goto-state x) (- (+ x 1))) - - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt deleted file mode 100644 index 1be421c..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt +++ /dev/null @@ -1,113 +0,0 @@ -(module parser-builder mzscheme - - (require "input-file-parser.rkt" - "grammar.rkt" - "table.rkt" - mzlib/class - racket/contract) - (require-for-template mzscheme) - - (provide/contract - (build-parser (-> string? any/c any/c - (listof identifier?) - (listof identifier?) - (listof identifier?) - (or/c syntax? #f) - syntax? - (values any/c any/c any/c any/c)))) - - ;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?) - ;; (union syntax? false/c) syntax?) -> syntax? - (define (fix-check-syntax input-terms start ends assocs prods) - (let* ((term-binders (get-term-list input-terms)) - (get-term-binder - (let ((t (make-hash-table))) - (for-each - (lambda (term) - (hash-table-put! t (syntax-e term) term)) - term-binders) - (lambda (x) - (let ((r (hash-table-get t (syntax-e x) (lambda () #f)))) - (if r - (syntax-local-introduce (datum->syntax-object r (syntax-e x) x x)) - x))))) - (rhs-list - (syntax-case prods () - (((_ rhs ...) ...) - (syntax->list (syntax (rhs ... ...))))))) - (with-syntax (((tmp ...) (map syntax-local-introduce term-binders)) - ((term-group ...) - (map (lambda (tg) - (syntax-property - (datum->syntax-object tg #f) - 'disappeared-use - tg)) - input-terms)) - ((end ...) - (map get-term-binder ends)) - ((start ...) - (map get-term-binder start)) - ((bind ...) - (syntax-case prods () - (((bind _ ...) ...) - (syntax->list (syntax (bind ...)))))) - (((bound ...) ...) - (map - (lambda (rhs) - (syntax-case rhs () - (((bound ...) (_ pbound) __) - (map get-term-binder - (cons (syntax pbound) - (syntax->list (syntax (bound ...)))))) - (((bound ...) _) - (map get-term-binder - (syntax->list (syntax (bound ...))))))) - rhs-list)) - ((prec ...) - (if assocs - (map get-term-binder - (syntax-case assocs () - (((__ term ...) ...) - (syntax->list (syntax (term ... ...)))))) - null))) - #`(when #f - (let ((bind void) ... (tmp void) ...) - (void bound ... ... term-group ... start ... end ... prec ...)))))) - (require mzlib/list "parser-actions.rkt") - (define (build-parser filename src-pos suppress input-terms start end assocs prods) - (let* ((grammar (parse-input input-terms start end assocs prods src-pos)) - (table (build-table grammar filename suppress)) - (all-tokens (make-hash-table)) - (actions-code - `(vector ,@(map prod-action (send grammar get-prods))))) - (for-each (lambda (term) - (hash-table-put! all-tokens (gram-sym-symbol term) #t)) - (send grammar get-terms)) - #;(let ((num-states (vector-length table)) - (num-gram-syms (+ (send grammar get-num-terms) - (send grammar get-num-non-terms))) - (num-ht-entries (apply + (map length (vector->list table)))) - (num-reduces - (let ((ht (make-hash-table))) - (for-each - (lambda (x) - (when (reduce? x) - (hash-table-put! ht x #t))) - (map cdr (apply append (vector->list table)))) - (length (hash-table-map ht void))))) - (printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n" - num-states num-gram-syms num-ht-entries num-reduces) - (printf "~a -- ~aKB, previously ~aKB\n" - (/ (+ 2 num-states - (* 4 num-states) (* 2 1.5 num-ht-entries) - (* 5 num-reduces)) 256.0) - (/ (+ 2 num-states - (* 4 num-states) (* 2 2.3 num-ht-entries) - (* 5 num-reduces)) 256.0) - (/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0))) - (values table - all-tokens - actions-code - (fix-check-syntax input-terms start end assocs prods)))) - - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt deleted file mode 100644 index f97e4d2..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt +++ /dev/null @@ -1,290 +0,0 @@ -#lang scheme/base - - ;; Routine to build the LALR table - - (require "grammar.rkt" - "lr0.rkt" - "lalr.rkt" - "parser-actions.rkt" - racket/contract - mzlib/list - mzlib/class) - - (define (is-a-grammar%? x) (is-a? x grammar%)) - (provide/contract - (build-table (-> is-a-grammar%? string? any/c - (vectorof (listof (cons/c (or/c term? non-term?) action?)))))) - - ;; A parse-table is (vectorof (listof (cons/c gram-sym? action))) - ;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action)))) - - ;; make-parse-table : int -> parse-table - (define (make-parse-table num-states) - (make-vector num-states null)) - - ;; table-add!: parse-table nat symbol action -> - (define (table-add! table state-index symbol val) - (vector-set! table state-index (cons (cons symbol val) - (vector-ref table state-index)))) - - ;; group-table : parse-table -> grouped-parse-table - (define (group-table table) - (list->vector - (map - (lambda (state-entry) - (let ((ht (make-hash))) - (for-each - (lambda (gs/actions) - (let ((group (hash-ref ht (car gs/actions) (lambda () null)))) - (unless (member (cdr gs/actions) group) - (hash-set! ht (car gs/actions) (cons (cdr gs/actions) group))))) - state-entry) - (hash-map ht cons))) - (vector->list table)))) - - ;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) -> - ;; (vectorof (listof (cons/c gram-sym? Y))) - (define (table-map f table) - (list->vector - (map - (lambda (state-entry) - (map - (lambda (gs/X) - (cons (car gs/X) (f (car gs/X) (cdr gs/X)))) - state-entry)) - (vector->list table)))) - - - (define (bit-vector-for-each f bv) - (letrec ((for-each - (lambda (bv number) - (cond - ((= 0 bv) (void)) - ((= 1 (bitwise-and 1 bv)) - (f number) - (for-each (arithmetic-shift bv -1) (add1 number))) - (else (for-each (arithmetic-shift bv -1) (add1 number))))))) - (for-each bv 0))) - - - ;; print-entry: symbol action output-port -> - ;; prints the action a for lookahead sym to the given port - (define (print-entry sym a port) - (let ((s "\t~a\t\t\t\t\t~a\t~a\n")) - (cond - ((shift? a) - (fprintf port s sym "shift" (shift-state a))) - ((reduce? a) - (fprintf port s sym "reduce" (prod-index (reduce-prod a)))) - ((accept? a) - (fprintf port s sym "accept" "")) - ((goto? a) - (fprintf port s sym "goto" (goto-state a)))))) - - - ;; count: ('a -> bool) * 'a list -> num - ;; counts the number of elements in list that satisfy pred - (define (count pred list) - (cond - ((null? list) 0) - ((pred (car list)) (+ 1 (count pred (cdr list)))) - (else (count pred (cdr list))))) - - ;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port -> - ;; Prints out the parser given by table. - (define (display-parser a grouped-table prods port) - (let* ((SR-conflicts 0) - (RR-conflicts 0)) - (for-each - (lambda (prod) - (fprintf port - "~a\t~a\t=\t~a\n" - (prod-index prod) - (gram-sym-symbol (prod-lhs prod)) - (map gram-sym-symbol (vector->list (prod-rhs prod))))) - prods) - (send a for-each-state - (lambda (state) - (fprintf port "State ~a\n" (kernel-index state)) - (for-each (lambda (item) - (fprintf port "\t~a\n" (item->string item))) - (kernel-items state)) - (newline port) - (for-each - (lambda (gs/action) - (let ((sym (gram-sym-symbol (car gs/action))) - (act (cdr gs/action))) - (cond - ((null? act) (void)) - ((null? (cdr act)) - (print-entry sym (car act) port)) - (else - (fprintf port "begin conflict:\n") - (when (> (count reduce? act) 1) - (set! RR-conflicts (add1 RR-conflicts))) - (when (> (count shift? act) 0) - (set! SR-conflicts (add1 SR-conflicts))) - (map (lambda (x) (print-entry sym x port)) act) - (fprintf port "end conflict\n"))))) - (vector-ref grouped-table (kernel-index state))) - (newline port))) - - (when (> SR-conflicts 0) - (fprintf port "~a shift/reduce conflict~a\n" - SR-conflicts - (if (= SR-conflicts 1) "" "s"))) - (when (> RR-conflicts 0) - (fprintf port "~a reduce/reduce conflict~a\n" - RR-conflicts - (if (= RR-conflicts 1) "" "s"))))) - - ;; resolve-conflict : (listof action?) -> action? bool bool - (define (resolve-conflict actions) - (cond - ((null? actions) (values (make-no-action) #f #f)) - ((null? (cdr actions)) - (values (car actions) #f #f)) - (else - (let ((SR-conflict? (> (count shift? actions) 0)) - (RR-conflict? (> (count reduce? actions) 1))) - (let loop ((current-guess #f) - (rest actions)) - (cond - ((null? rest) (values current-guess SR-conflict? RR-conflict?)) - ((shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?)) - ((not current-guess) - (loop (car rest) (cdr rest))) - ((and (reduce? (car rest)) - (< (prod-index (reduce-prod (car rest))) - (prod-index (reduce-prod current-guess)))) - (loop (car rest) (cdr rest))) - ((accept? (car rest)) - (eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n") - (loop current-guess (cdr rest))) - (else (loop current-guess (cdr rest))))))))) - - ;; resolve-conflicts : grouped-parse-table bool -> parse-table - (define (resolve-conflicts grouped-table suppress) - (let* ((SR-conflicts 0) - (RR-conflicts 0) - (table (table-map - (lambda (gs actions) - (let-values (((action SR? RR?) - (resolve-conflict actions))) - (when SR? - (set! SR-conflicts (add1 SR-conflicts))) - (when RR? - (set! RR-conflicts (add1 RR-conflicts))) - action)) - grouped-table))) - (unless suppress - (when (> SR-conflicts 0) - (eprintf "~a shift/reduce conflict~a\n" - SR-conflicts - (if (= SR-conflicts 1) "" "s"))) - (when (> RR-conflicts 0) - (eprintf "~a reduce/reduce conflict~a\n" - RR-conflicts - (if (= RR-conflicts 1) "" "s")))) - table)) - - - ;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action) - ;; Resolves a single shift-reduce conflict, if precedences are in place. - (define (resolve-sr-conflict/prec actions shift-prec) - (let* ((shift (if (shift? (car actions)) - (car actions) - (cadr actions))) - (reduce (if (shift? (car actions)) - (cadr actions) - (car actions))) - (reduce-prec (prod-prec (reduce-prod reduce)))) - (cond - ((and shift-prec reduce-prec) - (cond - ((< (prec-num shift-prec) (prec-num reduce-prec)) - (list reduce)) - ((> (prec-num shift-prec) (prec-num reduce-prec)) - (list shift)) - ((eq? 'left (prec-assoc shift-prec)) - (list reduce)) - ((eq? 'right (prec-assoc shift-prec)) - (list shift)) - (else null))) - (else actions)))) - - - ;; resolve-prec-conflicts : parse-table -> grouped-parse-table - (define (resolve-prec-conflicts table) - (table-map - (lambda (gs actions) - (cond - ((and (term? gs) - (= 2 (length actions)) - (or (shift? (car actions)) - (shift? (cadr actions)))) - (resolve-sr-conflict/prec actions (term-prec gs))) - (else actions))) - (group-table table))) - - ;; build-table: grammar string bool -> parse-table - (define (build-table g file suppress) - (let* ((a (build-lr0-automaton g)) - (term-vector (list->vector (send g get-terms))) - (end-terms (send g get-end-terms)) - (table (make-parse-table (send a get-num-states))) - (get-lookahead (compute-LA a g)) - (reduce-cache (make-hash))) - - (for-each - (lambda (trans-key/state) - (let ((from-state-index (kernel-index (trans-key-st (car trans-key/state)))) - (gs (trans-key-gs (car trans-key/state))) - (to-state (cdr trans-key/state))) - (table-add! table from-state-index gs - (cond - ((non-term? gs) - (make-goto (kernel-index to-state))) - ((member gs end-terms) - (make-accept)) - (else - (make-shift - (kernel-index to-state))))))) - (send a get-transitions)) - - (send a for-each-state - (lambda (state) - (for-each - (lambda (item) - (let ((item-prod (item-prod item))) - (bit-vector-for-each - (lambda (term-index) - (unless (start-item? item) - (let ((r (hash-ref reduce-cache item-prod - (lambda () - (let ((r (make-reduce item-prod))) - (hash-set! reduce-cache item-prod r) - r))))) - (table-add! table - (kernel-index state) - (vector-ref term-vector term-index) - r)))) - (get-lookahead state item-prod)))) - (append (hash-ref (send a get-epsilon-trans) state (lambda () null)) - (filter (lambda (item) - (not (move-dot-right item))) - (kernel-items state)))))) - - (let ((grouped-table (resolve-prec-conflicts table))) - (unless (string=? file "") - (with-handlers [(exn:fail:filesystem? - (lambda (e) - (eprintf - "Cannot write debug output to file \"~a\": ~a\n" - file - (exn-message e))))] - (call-with-output-file file - (lambda (port) - (display-parser a grouped-table (send g get-prods) port)) - #:exists 'truncate))) - (resolve-conflicts grouped-table suppress)))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt deleted file mode 100644 index 31b3cc6..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt +++ /dev/null @@ -1,118 +0,0 @@ -(module yacc-helper mzscheme - - (require mzlib/list - "../private-lex/token-syntax.rkt") - - ;; General helper routines - - (provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc) - - (define (vector-andmap f v) - (let loop ((i 0)) - (cond - ((= i (vector-length v)) #t) - (else (if (f (vector-ref v i)) - (loop (add1 i)) - #f))))) - - ;; duplicate-list?: symbol list -> #f | symbol - ;; returns a symbol that exists twice in l, or false if no such symbol - ;; exists - (define (duplicate-list? l) - (letrec ((t (make-hash-table)) - (dl? (lambda (l) - (cond - ((null? l) #f) - ((hash-table-get t (car l) (lambda () #f)) => - (lambda (x) x)) - (else - (hash-table-put! t (car l) (car l)) - (dl? (cdr l))))))) - (dl? l))) - - ;; remove-duplicates: syntax-object list -> syntax-object list - ;; removes the duplicates from the lists - (define (remove-duplicates sl) - (let ((t (make-hash-table))) - (letrec ((x - (lambda (sl) - (cond - ((null? sl) sl) - ((hash-table-get t (syntax-object->datum (car sl)) (lambda () #f)) - (x (cdr sl))) - (else - (hash-table-put! t (syntax-object->datum (car sl)) #t) - (cons (car sl) (x (cdr sl)))))))) - (x sl)))) - - ;; overlap?: symbol list * symbol list -> #f | symbol - ;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists - (define (overlap? l1 l2) - (let/ec ret - (let ((t (make-hash-table))) - (for-each (lambda (s1) - (hash-table-put! t s1 s1)) - l1) - (for-each (lambda (s2) - (cond - ((hash-table-get t s2 (lambda () #f)) => - (lambda (o) (ret o))))) - l2) - #f))) - - - (define (display-yacc grammar tokens start precs port) - (let-syntax ((p (syntax-rules () - ((_ args ...) (fprintf port args ...))))) - (let* ((tokens (map syntax-local-value tokens)) - (eterms (filter e-terminals-def? tokens)) - (terms (filter terminals-def? tokens)) - (term-table (make-hash-table)) - (display-rhs - (lambda (rhs) - (for-each (lambda (sym) (p "~a " (hash-table-get term-table sym (lambda () sym)))) - (car rhs)) - (if (= 3 (length rhs)) - (p "%prec ~a" (cadadr rhs))) - (p "\n")))) - (for-each - (lambda (t) - (for-each - (lambda (t) - (hash-table-put! term-table t (format "'~a'" t))) - (syntax-object->datum (e-terminals-def-t t)))) - eterms) - (for-each - (lambda (t) - (for-each - (lambda (t) - (p "%token ~a\n" t) - (hash-table-put! term-table t (format "~a" t))) - (syntax-object->datum (terminals-def-t t)))) - terms) - (if precs - (for-each (lambda (prec) - (p "%~a " (car prec)) - (for-each (lambda (tok) - (p " ~a" (hash-table-get term-table tok))) - (cdr prec)) - (p "\n")) - precs)) - (p "%start ~a\n" start) - (p "%%\n") - - (for-each (lambda (prod) - (let ((nt (car prod))) - (p "~a: " nt) - (display-rhs (cadr prod)) - (for-each (lambda (rhs) - (p "| ") - (display-rhs rhs)) - (cddr prod)) - (p ";\n"))) - grammar) - (p "%%\n")))) - - -) - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt deleted file mode 100644 index 7f766eb..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt +++ /dev/null @@ -1,135 +0,0 @@ -(module yacc-to-scheme mzscheme - (require br-parser-tools/lex - (prefix : br-parser-tools/lex-sre) - br-parser-tools/yacc - syntax/readerr - mzlib/list) - (provide trans) - - (define match-double-string - (lexer - ((:+ (:~ #\" #\\)) (append (string->list lexeme) - (match-double-string input-port))) - ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port))) - (#\" null))) - - (define match-single-string - (lexer - ((:+ (:~ #\' #\\)) (append (string->list lexeme) - (match-single-string input-port))) - ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port))) - (#\' null))) - - (define-lex-abbrevs - (letter (:or (:/ "a" "z") (:/ "A" "Z"))) - (digit (:/ "0" "9")) - (initial (:or letter (char-set "!$%&*/<=>?^_~@"))) - (subsequent (:or initial digit (char-set "+-.@"))) - (comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/"))) - - (define-empty-tokens x - (EOF PIPE |:| SEMI |%%| %prec)) - (define-tokens y - (SYM STRING)) - - (define get-token-grammar - (lexer-src-pos - ("%%" '|%%|) - (":" (string->symbol lexeme)) - ("%prec" (string->symbol lexeme)) - (#\| 'PIPE) - ((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}"))) - (return-without-pos (get-token-grammar input-port))) - (#\; 'SEMI) - (#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))) - (#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))) - ((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme))))) - - (define (parse-grammar enter-term enter-empty-term enter-non-term) - (parser - (tokens x y) - (src-pos) - (error (lambda (tok-ok tok-name tok-value start-pos end-pos) - (raise-read-error - (format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value) - (file-path) - (position-line start-pos) - (position-col start-pos) - (position-offset start-pos) - (- (position-offset end-pos) (position-offset start-pos))))) - - (end |%%|) - (start gram) - (grammar - (gram - ((production) (list $1)) - ((production gram) (cons $1 $2))) - (production - ((SYM |:| prods SEMI) - (begin - (enter-non-term $1) - (cons $1 $3)))) - (prods - ((rhs) (list `(,$1 #f))) - ((rhs prec) (list `(,$1 ,$2 #f))) - ((rhs PIPE prods) (cons `(,$1 #f) $3)) - ((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4))) - (prec - ((%prec SYM) - (begin - (enter-term $2) - (list 'prec $2))) - ((%prec STRING) - (begin - (enter-empty-term $2) - (list 'prec $2)))) - (rhs - (() null) - ((SYM rhs) - (begin - (enter-term $1) - (cons $1 $2))) - ((STRING rhs) - (begin - (enter-empty-term $1) - (cons $1 $2))))))) - - (define (symbolstring a) (symbol->string b))) - - (define (trans filename) - (let* ((i (open-input-file filename)) - (terms (make-hash-table)) - (eterms (make-hash-table)) - (nterms (make-hash-table)) - (enter-term - (lambda (s) - (if (not (hash-table-get nterms s (lambda () #f))) - (hash-table-put! terms s #t)))) - (enter-empty-term - (lambda (s) - (if (not (hash-table-get nterms s (lambda () #f))) - (hash-table-put! eterms s #t)))) - (enter-non-term - (lambda (s) - (hash-table-remove! terms s) - (hash-table-remove! eterms s) - (hash-table-put! nterms s #t)))) - (port-count-lines! i) - (file-path filename) - (regexp-match "%%" i) - (begin0 - (let ((gram ((parse-grammar enter-term enter-empty-term enter-non-term) - (lambda () - (let ((t (get-token-grammar i))) - t))))) - `(begin - (define-tokens t ,(sort (hash-table-map terms (lambda (k v) k)) symbol -;; (vectorof (symbol runtime-action hashtable)) -(define-for-syntax (convert-parse-table table) - (list->vector - (map - (lambda (state-entry) - (let ((ht (make-hasheq))) - (for-each - (lambda (gs/action) - (hash-set! ht - (gram-sym-symbol (car gs/action)) - (action->runtime-action (cdr gs/action)))) - state-entry) - ht)) - (vector->list table)))) - -(define-syntax (parser stx) - (syntax-case stx () - ((_ args ...) - (let ((arg-list (syntax->list (syntax (args ...)))) - (src-pos #f) - (debug #f) - (error #f) - (tokens #f) - (start #f) - (end #f) - (precs #f) - (suppress #f) - (grammar #f) - (yacc-output #f)) - (for-each - (lambda (arg) - (syntax-case* arg (debug error tokens start end precs grammar - suppress src-pos yacc-output) - (lambda (a b) - (eq? (syntax-e a) (syntax-e b))) - ((debug filename) - (cond - ((not (string? (syntax-e (syntax filename)))) - (raise-syntax-error - #f - "Debugging filename must be a string" - stx - (syntax filename))) - (debug - (raise-syntax-error #f "Multiple debug declarations" stx)) - (else - (set! debug (syntax-e (syntax filename)))))) - ((suppress) - (set! suppress #t)) - ((src-pos) - (set! src-pos #t)) - ((error expression) - (if error - (raise-syntax-error #f "Multiple error declarations" stx) - (set! error (syntax expression)))) - ((tokens def ...) - (begin - (when tokens - (raise-syntax-error #f "Multiple tokens declarations" stx)) - (let ((defs (syntax->list (syntax (def ...))))) - (for-each - (lambda (d) - (unless (identifier? d) - (raise-syntax-error - #f - "Token-group name must be an identifier" - stx - d))) - defs) - (set! tokens defs)))) - ((start symbol ...) - (let ((symbols (syntax->list (syntax (symbol ...))))) - (for-each - (lambda (sym) - (unless (identifier? sym) - (raise-syntax-error #f - "Start symbol must be a symbol" - stx - sym))) - symbols) - (when start - (raise-syntax-error #f "Multiple start declarations" stx)) - (when (null? symbols) - (raise-syntax-error #f - "Missing start symbol" - stx - arg)) - (set! start symbols))) - ((end symbols ...) - (let ((symbols (syntax->list (syntax (symbols ...))))) - (for-each - (lambda (sym) - (unless (identifier? sym) - (raise-syntax-error #f - "End token must be a symbol" - stx - sym))) - symbols) - (let ((d (duplicate-list? (map syntax-e symbols)))) - (when d - (raise-syntax-error - #f - (format "Duplicate end token definition for ~a" d) - stx - arg)) - (when (null? symbols) - (raise-syntax-error - #f - "end declaration must contain at least 1 token" - stx - arg)) - (when end - (raise-syntax-error #f "Multiple end declarations" stx)) - (set! end symbols)))) - ((precs decls ...) - (if precs - (raise-syntax-error #f "Multiple precs declarations" stx) - (set! precs (syntax/loc arg (decls ...))))) - ((grammar prods ...) - (if grammar - (raise-syntax-error #f "Multiple grammar declarations" stx) - (set! grammar (syntax/loc arg (prods ...))))) - ((yacc-output filename) - (cond - ((not (string? (syntax-e (syntax filename)))) - (raise-syntax-error #f - "Yacc-output filename must be a string" - stx - (syntax filename))) - (yacc-output - (raise-syntax-error #f "Multiple yacc-output declarations" stx)) - (else - (set! yacc-output (syntax-e (syntax filename)))))) - (_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)))) - (syntax->list (syntax (args ...)))) - (unless tokens - (raise-syntax-error #f "missing tokens declaration" stx)) - (unless error - (raise-syntax-error #f "missing error declaration" stx)) - (unless grammar - (raise-syntax-error #f "missing grammar declaration" stx)) - (unless end - (raise-syntax-error #f "missing end declaration" stx)) - (unless start - (raise-syntax-error #f "missing start declaration" stx)) - (let-values (((table all-term-syms actions check-syntax-fix) - (build-parser (if debug debug "") - src-pos - suppress - tokens - start - end - precs - grammar))) - (when (and yacc-output (not (string=? yacc-output ""))) - (with-handlers [(exn:fail:filesystem? - (lambda (e) - (eprintf - "Cannot write yacc-output to file \"~a\"\n" - yacc-output)))] - (call-with-output-file yacc-output - (lambda (port) - (display-yacc (syntax->datum grammar) - tokens - (map syntax->datum start) - (if precs - (syntax->datum precs) - #f) - port)) - #:exists 'truncate))) - (with-syntax ((check-syntax-fix check-syntax-fix) - (err error) - (ends end) - (starts start) - (debug debug) - (table (convert-parse-table table)) - (all-term-syms all-term-syms) - (actions actions) - (src-pos src-pos)) - (syntax - (begin - check-syntax-fix - (parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos))))))) - (_ - (raise-syntax-error #f - "parser must have the form (parser args ...)" - stx)))) - -(define (reduce-stack stack num ret-vals src-pos) - (cond - ((> num 0) - (let* ((top-frame (car stack)) - (ret-vals - (if src-pos - (cons (stack-frame-value top-frame) - (cons (stack-frame-start-pos top-frame) - (cons (stack-frame-end-pos top-frame) - ret-vals))) - (cons (stack-frame-value top-frame) ret-vals)))) - (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))) - (else (values stack ret-vals)))) - -;; extract-helper : (symbol or make-token) any any -> symbol any any any -(define (extract-helper tok v1 v2) - (cond - ((symbol? tok) - (values tok #f v1 v2)) - ((token? tok) - (values (real-token-name tok) (real-token-value tok) v1 v2)) - (else (raise-argument-error 'parser - "(or/c symbol? token?)" - 0 - tok)))) - -;; well-formed-position-token?: any -> boolean -;; Returns true if pt is a position token whose position-token-token -;; is itself a token or a symbol. -;; This is meant to help raise more precise error messages when -;; a tokenizer produces an erroneous position-token wrapped twice. -;; (as often happens when omitting return-without-pos). -(define (well-formed-token-field? t) - (or (symbol? t) - (token? t))) - -(define (well-formed-position-token? pt) - (and (position-token? pt) - (well-formed-token-field? (position-token-token pt)))) - -(define (well-formed-srcloc-token? st) - (and (srcloc-token? st) - (well-formed-token-field? (srcloc-token-token st)))) - -;; extract-src-pos : position-token -> symbol any any any -(define (extract-src-pos ip) - (unless (well-formed-position-token? ip) - (raise-argument-error 'parser - "well-formed-position-token?" - 0 - ip)) - (extract-helper (position-token-token ip) - (position-token-start-pos ip) - (position-token-end-pos ip))) - -(define (extract-srcloc ip) - (unless (well-formed-srcloc-token? ip) - (raise-argument-error 'parser - "well-formed-srcloc-token?" - 0 - ip)) - (let ([loc (srcloc-token-srcloc ip)]) - (extract-helper (srcloc-token-token ip) - (position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc)) - (position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f)))) - - -;; extract-no-src-pos : (symbol or make-token) -> symbol any any any -(define (extract-no-src-pos ip) - (extract-helper ip #f #f)) - -(define-struct stack-frame (state value start-pos end-pos) #:inspector (make-inspector)) - -(define (make-empty-stack i) (list (make-stack-frame i #f #f #f))) - - -;; The table is a vector that maps each state to a hash-table that maps a -;; terminal symbol to either an accept, shift, reduce, or goto structure. -; We encode the structures according to the runtime-action data definition in -;; parser-actions.rkt -(define (parser-body debug? err starts ends table all-term-syms actions src-pos) - (local ((define extract - (if src-pos - extract-src-pos - extract-no-src-pos)) - - (define (fix-error stack tok val start-pos end-pos get-token) - (when debug? (pretty-print stack)) - (local ((define (remove-input tok val start-pos end-pos) - (if (memq tok ends) - (raise-read-error "parser: Cannot continue after error" - #f #f #f #f #f) - (let ((a (find-action stack tok val start-pos end-pos))) - (cond - ((runtime-shift? a) - ;; (printf "shift:~a\n" (runtime-shift-state a)) - (cons (make-stack-frame (runtime-shift-state a) - val - start-pos - end-pos) - stack)) - (else - ;; (printf "discard input:~a\n" tok) - (let-values (((tok val start-pos end-pos) - (extract (get-token)))) - (remove-input tok val start-pos end-pos)))))))) - (let remove-states () - (let ((a (find-action stack 'error #f start-pos end-pos))) - (cond - ((runtime-shift? a) - ;; (printf "shift:~a\n" (runtime-shift-state a)) - (set! stack - (cons - (make-stack-frame (runtime-shift-state a) - #f - start-pos - end-pos) - stack)) - (remove-input tok val start-pos end-pos)) - (else - ;; (printf "discard state:~a\n" (car stack)) - (cond - ((< (length stack) 2) - (raise-read-error "parser: Cannot continue after error" - #f #f #f #f #f)) - (else - (set! stack (cdr stack)) - (remove-states))))))))) - - (define (find-action stack tok val start-pos end-pos) - (unless (hash-ref all-term-syms - tok - #f) - (if src-pos - (err #f tok val start-pos end-pos) - (err #f tok val)) - (raise-read-error (format "parser: got token of unknown type ~a" tok) - #f #f #f #f #f)) - (hash-ref (vector-ref table (stack-frame-state (car stack))) - tok - #f)) - - (define (make-parser start-number) - (lambda (get-token) - (unless (and (procedure? get-token) - (procedure-arity-includes? get-token 0)) - (error 'get-token "expected a nullary procedure, got ~e" get-token)) - (let parsing-loop ((stack (make-empty-stack start-number)) - (ip (get-token))) - (let-values (((tok val start-pos end-pos) - (extract ip))) - (let ((action (find-action stack tok val start-pos end-pos))) - (cond - ((runtime-shift? action) - ;; (printf "shift:~a\n" (runtime-shift-state action)) - (parsing-loop (cons (make-stack-frame (runtime-shift-state action) - val - start-pos - end-pos) - stack) - (get-token))) - ((runtime-reduce? action) - ;; (printf "reduce:~a\n" (runtime-reduce-prod-num action)) - (let-values (((new-stack args) - (reduce-stack stack - (runtime-reduce-rhs-length action) - null - src-pos))) - (let ((goto - (runtime-goto-state - (hash-ref - (vector-ref table (stack-frame-state (car new-stack))) - (runtime-reduce-lhs action))))) - (parsing-loop - (cons - (if src-pos - (make-stack-frame - goto - (apply (vector-ref actions (runtime-reduce-prod-num action)) args) - (if (null? args) start-pos (cadr args)) - (if (null? args) - end-pos - (list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1)))) - (make-stack-frame - goto - (apply (vector-ref actions (runtime-reduce-prod-num action)) args) - #f - #f)) - new-stack) - ip)))) - ((runtime-accept? action) - ;; (printf "accept\n") - (stack-frame-value (car stack))) - (else - (if src-pos - (err #t tok val start-pos end-pos) - (err #t tok val)) - (parsing-loop (fix-error stack tok val start-pos end-pos get-token) - (get-token)))))))))) - (cond - ((null? (cdr starts)) (make-parser 0)) - (else - (let loop ((l starts) - (i 0)) - (cond - ((null? l) null) - (else (cons (make-parser i) (loop (cdr l) (add1 i)))))))))) diff --git a/br-parser-tools/br-parser-tools-lib/info.rkt b/br-parser-tools/br-parser-tools-lib/info.rkt deleted file mode 100644 index f9f9e11..0000000 --- a/br-parser-tools/br-parser-tools-lib/info.rkt +++ /dev/null @@ -1,11 +0,0 @@ -#lang info - -(define collection 'multi) -(define deps '("scheme-lib" - "base" - "compatibility-lib")) -(define build-deps '("rackunit-lib")) - -(define pkg-desc "implementation (no documentation) part of \"br-parser-tools\"") - -(define pkg-authors '(mflatt)) diff --git a/br-parser-tools/br-parser-tools/LICENSE.txt b/br-parser-tools/br-parser-tools/LICENSE.txt deleted file mode 100644 index d012f58..0000000 --- a/br-parser-tools/br-parser-tools/LICENSE.txt +++ /dev/null @@ -1,11 +0,0 @@ -parser-tools -Copyright (c) 2010-2014 PLT Design Inc. - -This package is distributed under the GNU Lesser General Public -License (LGPL). This means that you can link this package into proprietary -applications, provided you follow the rules stated in the LGPL. You -can also modify this package; if you distribute a modified version, -you must distribute it under the terms of the LGPL, which in -particular means that you must release the source code for the -modified software. See http://www.gnu.org/copyleft/lesser.html -for more information. diff --git a/br-parser-tools/br-parser-tools/info.rkt b/br-parser-tools/br-parser-tools/info.rkt deleted file mode 100644 index 6a692a8..0000000 --- a/br-parser-tools/br-parser-tools/info.rkt +++ /dev/null @@ -1,12 +0,0 @@ -#lang info - -(define collection 'multi) - -(define deps '("br-parser-tools-lib" - "br-parser-tools-doc")) -(define implies '("br-parser-tools-lib" - "br-parser-tools-doc")) - -(define pkg-desc "Lex- and Yacc-style parsing tools") - -(define pkg-authors '(mflatt))