From 48cd7f00823e811cfafb2b57e08fc2802309b2ff Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Tue, 21 Feb 2017 10:04:31 -0800 Subject: [PATCH] move brag --- LGPL.txt | 165 --- LICENSE | 3 - README.md | 21 +- beautiful-racket-lib/br/scribblings/br.scrbl | 46 - .../br-parser-tools-doc/LICENSE.txt | 11 - .../br-parser-tools/br-parser-tools.scrbl | 769 ----------- .../br-parser-tools/info.rkt | 3 - br-parser-tools/br-parser-tools-doc/info.rkt | 14 - .../br-parser-tools-lib/LICENSE.txt | 11 - .../br-parser-tools/cfg-parser.rkt | 982 -------------- .../br-parser-tools/examples/calc.rkt | 89 -- .../br-parser-tools/examples/read.rkt | 242 ---- .../br-parser-tools/info.rkt | 3 - .../br-parser-tools/lex-plt-v200.rkt | 24 - .../br-parser-tools/lex-sre.rkt | 119 -- .../br-parser-tools/lex.rkt | 412 ------ .../br-parser-tools/private-lex/actions.rkt | 16 - .../br-parser-tools/private-lex/deriv.rkt | 339 ----- .../private-lex/error-tests.rkt | 81 -- .../br-parser-tools/private-lex/front.rkt | 179 --- .../br-parser-tools/private-lex/re.rkt | 385 ------ .../br-parser-tools/private-lex/stx.rkt | 220 ---- .../private-lex/token-syntax.rkt | 9 - .../br-parser-tools/private-lex/token.rkt | 92 -- .../private-lex/unicode-chars.rkt | 69 - .../br-parser-tools/private-lex/util.rkt | 127 -- .../br-parser-tools/private-yacc/grammar.rkt | 280 ---- .../br-parser-tools/private-yacc/graph.rkt | 61 - .../private-yacc/input-file-parser.rkt | 374 ------ .../br-parser-tools/private-yacc/lalr.rkt | 277 ---- .../br-parser-tools/private-yacc/lr0.rkt | 372 ------ .../private-yacc/parser-actions.rkt | 54 - .../private-yacc/parser-builder.rkt | 113 -- .../br-parser-tools/private-yacc/table.rkt | 290 ----- .../private-yacc/yacc-helper.rkt | 118 -- .../br-parser-tools/yacc-to-scheme.rkt | 135 -- .../br-parser-tools/yacc.rkt | 412 ------ br-parser-tools/br-parser-tools-lib/info.rkt | 11 - br-parser-tools/br-parser-tools/LICENSE.txt | 11 - br-parser-tools/br-parser-tools/info.rkt | 12 - brag/LICENSE | 165 --- brag/README.md | 4 - brag/brag/Makefile | 12 - brag/brag/brag.scrbl | 1157 ----------------- brag/brag/cfg-parser/cfg-parser.rkt | 921 ------------- brag/brag/codegen/codegen.rkt | 448 ------- brag/brag/codegen/flatten.rkt | 200 --- brag/brag/codegen/reader.rkt | 68 - brag/brag/codegen/runtime.rkt | 212 --- brag/brag/codegen/satisfaction.rkt | 207 --- brag/brag/codegen/sexp-based-lang.rkt | 96 -- brag/brag/examples/01-equal.rkt | 12 - brag/brag/examples/0n1.rkt | 3 - brag/brag/examples/0n1n.rkt | 3 - brag/brag/examples/add-mult.rkt | 7 - brag/brag/examples/baby-json-hider.rkt | 18 - brag/brag/examples/baby-json.rkt | 16 - brag/brag/examples/bnf.rkt | 13 - brag/brag/examples/cutter.rkt | 4 - brag/brag/examples/lua-parser.rkt | 111 -- brag/brag/examples/nested-word-list.rkt | 3 - .../examples/simple-arithmetic-grammar.rkt | 5 - brag/brag/examples/simple-line-drawing.rkt | 10 - .../simple-line-drawing/examples/letter-i.rkt | 4 - .../examples/simple-line-drawing/grammar.rkt | 10 - .../simple-line-drawing/interpret.rkt | 31 - .../simple-line-drawing/lang/reader.rkt | 22 - .../examples/simple-line-drawing/lexer.rkt | 27 - .../simple-line-drawing/semantics.rkt | 48 - brag/brag/examples/statlist-grammar.rkt | 14 - brag/brag/examples/whitespace.rkt | 6 - brag/brag/examples/wordy.rkt | 7 - brag/brag/info.rkt | 7 - brag/brag/main.rkt | 5 - brag/brag/private/internal-support.rkt | 36 - brag/brag/rules/lexer.rkt | 131 -- brag/brag/rules/parser.rkt | 281 ---- brag/brag/rules/rule-structs.rkt | 43 - brag/brag/rules/runtime.rkt | 34 - brag/brag/rules/stx-types.rkt | 16 - brag/brag/rules/stx.rkt | 88 -- brag/brag/support.rkt | 143 -- brag/brag/test/test-01-equal.rkt | 30 - brag/brag/test/test-0n1.rkt | 50 - brag/brag/test/test-0n1n.rkt | 49 - brag/brag/test/test-all.rkt | 18 - brag/brag/test/test-baby-json-hider.rkt | 19 - brag/brag/test/test-baby-json.rkt | 25 - brag/brag/test/test-cutter.rkt | 9 - brag/brag/test/test-errors.rkt | 137 -- brag/brag/test/test-flatten.rkt | 193 --- brag/brag/test/test-lexer.rkt | 73 -- brag/brag/test/test-old-token.rkt | 76 -- brag/brag/test/test-parser.rkt | 153 --- .../test/test-simple-arithmetic-grammar.rkt | 72 - brag/brag/test/test-simple-line-drawing.rkt | 72 - brag/brag/test/test-weird-grammar.rkt | 7 - brag/brag/test/test-whitespace.rkt | 12 - brag/brag/test/test-wordy.rkt | 18 - brag/brag/test/weird-grammar.rkt | 6 - brag/info.rkt | 7 - 101 files changed, 16 insertions(+), 12609 deletions(-) delete mode 100644 LGPL.txt delete mode 100644 LICENSE delete mode 100644 br-parser-tools/br-parser-tools-doc/LICENSE.txt delete mode 100644 br-parser-tools/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl delete mode 100644 br-parser-tools/br-parser-tools-doc/br-parser-tools/info.rkt delete mode 100644 br-parser-tools/br-parser-tools-doc/info.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/LICENSE.txt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/calc.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/read.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/info.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-sre.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/lex.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc.rkt delete mode 100644 br-parser-tools/br-parser-tools-lib/info.rkt delete mode 100644 br-parser-tools/br-parser-tools/LICENSE.txt delete mode 100644 br-parser-tools/br-parser-tools/info.rkt delete mode 100755 brag/LICENSE delete mode 100755 brag/README.md delete mode 100755 brag/brag/Makefile delete mode 100755 brag/brag/brag.scrbl delete mode 100755 brag/brag/cfg-parser/cfg-parser.rkt delete mode 100755 brag/brag/codegen/codegen.rkt delete mode 100755 brag/brag/codegen/flatten.rkt delete mode 100755 brag/brag/codegen/reader.rkt delete mode 100755 brag/brag/codegen/runtime.rkt delete mode 100755 brag/brag/codegen/satisfaction.rkt delete mode 100755 brag/brag/codegen/sexp-based-lang.rkt delete mode 100755 brag/brag/examples/01-equal.rkt delete mode 100755 brag/brag/examples/0n1.rkt delete mode 100755 brag/brag/examples/0n1n.rkt delete mode 100755 brag/brag/examples/add-mult.rkt delete mode 100755 brag/brag/examples/baby-json-hider.rkt delete mode 100755 brag/brag/examples/baby-json.rkt delete mode 100755 brag/brag/examples/bnf.rkt delete mode 100755 brag/brag/examples/cutter.rkt delete mode 100755 brag/brag/examples/lua-parser.rkt delete mode 100755 brag/brag/examples/nested-word-list.rkt delete mode 100755 brag/brag/examples/simple-arithmetic-grammar.rkt delete mode 100755 brag/brag/examples/simple-line-drawing.rkt delete mode 100755 brag/brag/examples/simple-line-drawing/examples/letter-i.rkt delete mode 100755 brag/brag/examples/simple-line-drawing/grammar.rkt delete mode 100755 brag/brag/examples/simple-line-drawing/interpret.rkt delete mode 100755 brag/brag/examples/simple-line-drawing/lang/reader.rkt delete mode 100755 brag/brag/examples/simple-line-drawing/lexer.rkt delete mode 100755 brag/brag/examples/simple-line-drawing/semantics.rkt delete mode 100755 brag/brag/examples/statlist-grammar.rkt delete mode 100644 brag/brag/examples/whitespace.rkt delete mode 100755 brag/brag/examples/wordy.rkt delete mode 100755 brag/brag/info.rkt delete mode 100755 brag/brag/main.rkt delete mode 100755 brag/brag/private/internal-support.rkt delete mode 100755 brag/brag/rules/lexer.rkt delete mode 100755 brag/brag/rules/parser.rkt delete mode 100755 brag/brag/rules/rule-structs.rkt delete mode 100755 brag/brag/rules/runtime.rkt delete mode 100755 brag/brag/rules/stx-types.rkt delete mode 100755 brag/brag/rules/stx.rkt delete mode 100755 brag/brag/support.rkt delete mode 100755 brag/brag/test/test-01-equal.rkt delete mode 100755 brag/brag/test/test-0n1.rkt delete mode 100755 brag/brag/test/test-0n1n.rkt delete mode 100755 brag/brag/test/test-all.rkt delete mode 100755 brag/brag/test/test-baby-json-hider.rkt delete mode 100755 brag/brag/test/test-baby-json.rkt delete mode 100755 brag/brag/test/test-cutter.rkt delete mode 100755 brag/brag/test/test-errors.rkt delete mode 100755 brag/brag/test/test-flatten.rkt delete mode 100755 brag/brag/test/test-lexer.rkt delete mode 100755 brag/brag/test/test-old-token.rkt delete mode 100755 brag/brag/test/test-parser.rkt delete mode 100755 brag/brag/test/test-simple-arithmetic-grammar.rkt delete mode 100755 brag/brag/test/test-simple-line-drawing.rkt delete mode 100755 brag/brag/test/test-weird-grammar.rkt delete mode 100755 brag/brag/test/test-whitespace.rkt delete mode 100755 brag/brag/test/test-wordy.rkt delete mode 100755 brag/brag/test/weird-grammar.rkt delete mode 100755 brag/info.rkt diff --git a/LGPL.txt b/LGPL.txt deleted file mode 100644 index 6600f1c..0000000 --- a/LGPL.txt +++ /dev/null @@ -1,165 +0,0 @@ -GNU LESSER GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - - This version of the GNU Lesser General Public License incorporates -the terms and conditions of version 3 of the GNU General Public -License, supplemented by the additional permissions listed below. - - 0. Additional Definitions. - - As used herein, "this License" refers to version 3 of the GNU Lesser -General Public License, and the "GNU GPL" refers to version 3 of the GNU -General Public License. - - "The Library" refers to a covered work governed by this License, -other than an Application or a Combined Work as defined below. - - An "Application" is any work that makes use of an interface provided -by the Library, but which is not otherwise based on the Library. -Defining a subclass of a class defined by the Library is deemed a mode -of using an interface provided by the Library. - - A "Combined Work" is a work produced by combining or linking an -Application with the Library. The particular version of the Library -with which the Combined Work was made is also called the "Linked -Version". - - The "Minimal Corresponding Source" for a Combined Work means the -Corresponding Source for the Combined Work, excluding any source code -for portions of the Combined Work that, considered in isolation, are -based on the Application, and not on the Linked Version. - - The "Corresponding Application Code" for a Combined Work means the -object code and/or source code for the Application, including any data -and utility programs needed for reproducing the Combined Work from the -Application, but excluding the System Libraries of the Combined Work. - - 1. Exception to Section 3 of the GNU GPL. - - You may convey a covered work under sections 3 and 4 of this License -without being bound by section 3 of the GNU GPL. - - 2. Conveying Modified Versions. - - If you modify a copy of the Library, and, in your modifications, a -facility refers to a function or data to be supplied by an Application -that uses the facility (other than as an argument passed when the -facility is invoked), then you may convey a copy of the modified -version: - - a) under this License, provided that you make a good faith effort to - ensure that, in the event an Application does not supply the - function or data, the facility still operates, and performs - whatever part of its purpose remains meaningful, or - - b) under the GNU GPL, with none of the additional permissions of - this License applicable to that copy. - - 3. Object Code Incorporating Material from Library Header Files. - - The object code form of an Application may incorporate material from -a header file that is part of the Library. You may convey such object -code under terms of your choice, provided that, if the incorporated -material is not limited to numerical parameters, data structure -layouts and accessors, or small macros, inline functions and templates -(ten or fewer lines in length), you do both of the following: - - a) Give prominent notice with each copy of the object code that the - Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the object code with a copy of the GNU GPL and this license - document. - - 4. Combined Works. - - You may convey a Combined Work under terms of your choice that, -taken together, effectively do not restrict modification of the -portions of the Library contained in the Combined Work and reverse -engineering for debugging such modifications, if you also do each of -the following: - - a) Give prominent notice with each copy of the Combined Work that - the Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the Combined Work with a copy of the GNU GPL and this license - document. - - c) For a Combined Work that displays copyright notices during - execution, include the copyright notice for the Library among - these notices, as well as a reference directing the user to the - copies of the GNU GPL and this license document. - - d) Do one of the following: - - 0) Convey the Minimal Corresponding Source under the terms of this - License, and the Corresponding Application Code in a form - suitable for, and under terms that permit, the user to - recombine or relink the Application with a modified version of - the Linked Version to produce a modified Combined Work, in the - manner specified by section 6 of the GNU GPL for conveying - Corresponding Source. - - 1) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (a) uses at run time - a copy of the Library already present on the user's computer - system, and (b) will operate properly with a modified version - of the Library that is interface-compatible with the Linked - Version. - - e) Provide Installation Information, but only if you would otherwise - be required to provide such information under section 6 of the - GNU GPL, and only to the extent that such information is - necessary to install and execute a modified version of the - Combined Work produced by recombining or relinking the - Application with a modified version of the Linked Version. (If - you use option 4d0, the Installation Information must accompany - the Minimal Corresponding Source and Corresponding Application - Code. If you use option 4d1, you must provide the Installation - Information in the manner specified by section 6 of the GNU GPL - for conveying Corresponding Source.) - - 5. Combined Libraries. - - You may place library facilities that are a work based on the -Library side by side in a single library together with other library -facilities that are not Applications and are not covered by this -License, and convey such a combined library under terms of your -choice, if you do both of the following: - - a) Accompany the combined library with a copy of the same work based - on the Library, uncombined with any other library facilities, - conveyed under the terms of this License. - - b) Give prominent notice with the combined library that part of it - is a work based on the Library, and explaining where to find the - accompanying uncombined form of the same work. - - 6. Revised Versions of the GNU Lesser General Public License. - - The Free Software Foundation may publish revised and/or new versions -of the GNU Lesser General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the -Library as you received it specifies that a certain numbered version -of the GNU Lesser General Public License "or any later version" -applies to it, you have the option of following the terms and -conditions either of that published version or of any later version -published by the Free Software Foundation. If the Library as you -received it does not specify a version number of the GNU Lesser -General Public License, you may choose any version of the GNU Lesser -General Public License ever published by the Free Software Foundation. - - If the Library as you received it specifies that a proxy can decide -whether future versions of the GNU Lesser General Public License shall -apply, that proxy's public statement of acceptance of any version is -permanent authorization for you to choose that version for the -Library. diff --git a/LICENSE b/LICENSE deleted file mode 100644 index c55992b..0000000 --- a/LICENSE +++ /dev/null @@ -1,3 +0,0 @@ -Beautiful Racket -© 2016 Matthew Butterick -Licensed under the LGPL (see "LGPL.txt") \ No newline at end of file diff --git a/README.md b/README.md index 89fb7b3..4bc57f5 100644 --- a/README.md +++ b/README.md @@ -4,20 +4,31 @@ beautiful-racket [![Build Status](https://travis-ci.org/mbutterick/beautiful-rac Resources for the upcoming “Beautiful Racket” book, including: * `#lang br` teaching language - -* `#lang brag` parser generator language (a fork of Danny Yoo's [ragg](http://github.com/jbclements/ragg)) * supporting modules * sample languages - -Installation: +Installation +- `raco pkg install beautiful-racket` -Update: +Update +- `raco pkg update beautiful-racket` + + +License (MIT) +- + +Copyright (c) 2016-2017 Matthew Butterick + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/beautiful-racket-lib/br/scribblings/br.scrbl b/beautiful-racket-lib/br/scribblings/br.scrbl index f08b0b4..c08daeb 100644 --- a/beautiful-racket-lib/br/scribblings/br.scrbl +++ b/beautiful-racket-lib/br/scribblings/br.scrbl @@ -391,52 +391,6 @@ A special variable only available inside the body of @racket[define-macro] or @r ]{ Like @racket[define-macro], but moves @racket[result-expr] into the lexical context of the calling site. For demonstration purposes only. If you really need to write an unhygienic macro, this is a rather blunt instrument. } - -@section{Reader utilities} - -@defmodule[br/reader-utils] - - -@defproc[ -(test-reader -[read-syntax-proc procedure?] -[source-str string?]) -datum?]{ -Applies @racket[read-syntax-proc] to @racket[source-str] as if it were being read in from a source file. -} - - -@defform[ -(define-read-and-read-syntax (path-id port-id) - reader-result-expr ...+) -]{ -For use within a language reader. Automatically @racket[define] and @racket[provide] the @racket[read] and @racket[read-syntax] functions needed for the reader's public interface. @racket[reader-result-expr] can return either a syntax object or a datum (which will be converted to a syntax object). - -The generated @racket[read-syntax] function takes two arguments, a path and an input port. It returns a syntax object stripped of all bindings. - -The generated @racket[read] function takes one argument, an input port. It calls @racket[read-syntax] and converts the result to a datum. - - -@examples[#:eval my-eval -(module sample-reader racket/base - (require br/reader-utils racket/list) - (define-read-and-read-syntax (path port) - (add-between - (for/list ([datum (in-port read port)]) - datum) - 'whee))) - -(require (prefix-in sample: 'sample-reader)) - -(define string-port (open-input-string "(+ 2 2) 'hello")) -(sample:read-syntax 'no-path string-port) - -(define string-port-2 (open-input-string "(+ 2 2) 'hello")) -(sample:read string-port-2) -] - - -} @section{Syntax} diff --git a/br-parser-tools/br-parser-tools-doc/LICENSE.txt b/br-parser-tools/br-parser-tools-doc/LICENSE.txt deleted file mode 100644 index c424668..0000000 --- a/br-parser-tools/br-parser-tools-doc/LICENSE.txt +++ /dev/null @@ -1,11 +0,0 @@ -parser-tools-doc -Copyright (c) 2010-2014 PLT Design Inc. - -This package is distributed under the GNU Lesser General Public -License (LGPL). This means that you can link this package into proprietary -applications, provided you follow the rules stated in the LGPL. You -can also modify this package; if you distribute a modified version, -you must distribute it under the terms of the LGPL, which in -particular means that you must release the source code for the -modified software. See http://www.gnu.org/copyleft/lesser.html -for more information. diff --git a/br-parser-tools/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl b/br-parser-tools/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl deleted file mode 100644 index 7e0684c..0000000 --- a/br-parser-tools/br-parser-tools-doc/br-parser-tools/br-parser-tools.scrbl +++ /dev/null @@ -1,769 +0,0 @@ -#lang scribble/doc -@(require scribble/manual scribble/struct scribble/xref scribble/bnf - (for-label scheme/base - scheme/contract - br-parser-tools/lex - (prefix-in : br-parser-tools/lex-sre) - br-parser-tools/yacc - br-parser-tools/cfg-parser)) - -@title{Parser Tools: @exec{lex} and @exec{yacc}-style Parsing (Beautiful Racket edition)} - -@author["Scott Owens (99%)" "Matthew Butterick (1%)"] - -This documentation assumes familiarity with @exec{lex} and @exec{yacc} -style lexer and parser generators. - -@table-of-contents[] - -@; ---------------------------------------------------------------------- - -@section{Lexers} - -@section-index["lex"] -@section-index["scanning"] -@section-index["scanner"] - -@defmodule[br-parser-tools/lex] - -@; ---------------------------------------- - -@subsection{Creating a Lexer} - -@defform/subs[#:literals (repetition union intersection complement concatenation - char-range char-complement - eof special special-comment) - (lexer [trigger action-expr] ...) - ([trigger re - (eof) - (special) - (special-comment)] - [re id - string - character - (repetition lo hi re) - (union re ...) - (intersection re ...) - (complement re) - (concatenation re ...) - (char-range char char) - (char-complement re) - (id datum ...)])]{ - - Produces a function that takes an input-port, matches the - @racket[re] patterns against the buffer, and returns the result of - executing the corresponding @racket[action-expr]. When multiple - patterns match, a lexer will choose the longest match, breaking - ties in favor of the rule appearing first. - - @margin-note{The implementation of @racketmodname[syntax-color/racket-lexer] - contains a lexer for the @racketmodname[racket] language. - In addition, files in the @filepath{examples} sub-directory - of the @filepath{br-parser-tools} collection contain - simpler example lexers.} - - An @racket[re] is matched as follows: - - @itemize[ - @item{@racket[id] --- expands to the named @deftech{lexer abbreviation}; - abbreviations are defined via @racket[define-lex-abbrev] or supplied by modules - like @racketmodname[br-parser-tools/lex-sre].} - @item{@racket[string] --- matches the sequence of characters in @racket[string].} - @item{@racket[character] --- matches a literal @racket[character].} - @item{@racket[(repetition lo hi re)] --- matches @racket[re] repeated between @racket[lo] - and @racket[hi] times, inclusive; @racket[hi] can be @racket[+inf.0] for unbounded repetitions.} - @item{@racket[(union re ...)] --- matches if any of the sub-expressions match} - @item{@racket[(intersection re ...)] --- matches if all of the @racket[re]s match.} - @item{@racket[(complement re)] --- matches anything that @racket[re] does not.} - @item{@racket[(concatenation re ...)] --- matches each @racket[re] in succession.} - @item{@racket[(char-range char char)] --- matches any character between the two (inclusive); - a single character string can be used as a @racket[char].} - @item{@racket[(char-complement re)] --- matches any character not matched by @racket[re]. - The sub-expression must be a set of characters @racket[re].} - @item{@racket[(id datum ...)] --- expands the @deftech{lexer macro} named @racket[id]; macros - are defined via @racket[define-lex-trans].} - ] - -Note that both @racket[(concatenation)] and @racket[""] match the -empty string, @racket[(union)] matches nothing, -@racket[(intersection)] matches any string, and -@racket[(char-complement (union))] matches any single character. - -The regular expression language is not designed to be used directly, -but rather as a basis for a user-friendly notation written with -regular expression macros. For example, -@racketmodname[br-parser-tools/lex-sre] supplies operators from Olin -Shivers's SREs, and @racketmodname[br-parser-tools/lex-plt-v200] supplies -(deprecated) operators from the previous version of this library. -Since those libraries provide operators whose names match other Racket -bindings, such as @racket[*] and @racket[+], they normally must be -imported using a prefix: - -@racketblock[ -(require (prefix-in : br-parser-tools/lex-sre)) -] - -The suggested prefix is @racket[:], so that @racket[:*] and -@racket[:+] are imported. Of course, a prefix other than @racket[:] -(such as @racket[re-]) will work too. - -Since negation is not a common operator on regular expressions, here -are a few examples, using @racket[:] prefixed SRE syntax: - -@itemize[ - -@item{@racketblock0[(complement "1")] - - Matches all strings except the string @racket["1"], including - @racket["11"], @racket["111"], @racket["0"], @racket["01"], - @racket[""], and so on.} - -@item{@racketblock0[(complement (:* "1"))] - - Matches all strings that are not sequences of @racket["1"], - including @racket["0"], @racket["00"], @racket["11110"], - @racket["0111"], @racket["11001010"] and so on.} - -@item{@racketblock0[(:& (:: any-string "111" any-string) - (complement (:or (:: any-string "01") (:+ "1"))))] - - Matches all strings that have 3 consecutive ones, but not those that - end in @racket["01"] and not those that are ones only. These - include @racket["1110"], @racket["0001000111"] and @racket["0111"] - but not @racket[""], @racket["11"], @racket["11101"], @racket["111"] - and @racket["11111"].} - -@item{@racketblock0[(:: "/*" (complement (:: any-string "*/" any-string)) "*/")] - - Matches Java/C block comments. @racket["/**/"], - @racket["/******/"], @racket["/*////*/"], @racket["/*asg4*/"] and so - on. It does not match @racket["/**/*/"], @racket["/* */ */"] and so - on. @racket[(:: any-string "*/" any-string)] matches any string - that has a @racket["*/"] in is, so @racket[(complement (:: any-string "*/" - any-string))] matches any string without a @racket["*/"] in it.} - -@item{@racketblock0[(:: "/*" (:* (complement "*/")) "*/")] - - Matches any string that starts with @racket["/*"] and ends with - @racket["*/"], including @racket["/* */ */ */"]. - @racket[(complement "*/")] matches any string except @racket["*/"]. - This includes @racket["*"] and @racket["/"] separately. Thus - @racket[(:* (complement "*/"))] matches @racket["*/"] by first - matching @racket["*"] and then matching @racket["/"]. Any other - string is matched directly by @racket[(complement "*/")]. In other - words, @racket[(:* (complement "xx"))] = @racket[any-string]. It is - usually not correct to place a @racket[:*] around a - @racket[complement].} -] - - - The following binding have special meaning inside of a lexer - action: - - @itemize[ - @item{@racket[start-pos] --- a @racket[position] struct for the first character matched.} - @item{@racket[end-pos] --- a @racket[position] struct for the character after the last character in the match.} - @item{@racket[lexeme] --- the matched string.} - @item{@racket[input-port] --- the input-port being - processed (this is useful for matching input with multiple - lexers).} - @item{@racket[(return-without-pos x)] and @racket[(return-without-srcloc x)] are functions (continuations) that - immediately return the value of @racket[x] from the lexer. This useful - in a src-pos or src-loc lexer to prevent the lexer from adding source - information. For example: - - @racketblock[ - (define get-token - (lexer-srcloc - ... - ((comment) (get-token input-port)) - ...)) - ] - - would wrap the source location information for the comment around - the value of the recursive call. Using - @racket[((comment) (return-without-srcloc (get-token input-port)))] - will cause the value of the recursive call to be returned without - wrapping position around it.} - ] - - The lexer raises an @racket[exn:fail:read] exception if none of the - regular expressions match the input. Hint: If @racket[(any-char - _custom-error-behavior)] is the last rule, then there will always - be a match, and @racket[_custom-error-behavior] is executed to - handle the error situation as desired, only consuming the first - character from the input buffer. - - In addition to returning characters, input - ports can return @racket[eof-object]s. Custom input ports can - also return a @racket[special-comment] value to indicate a - non-textual comment, or return another arbitrary value (a - special). The non-@racket[re] @racket[trigger] forms handle these - cases: - - @itemize[ - - @item{The @racket[(eof)] rule is matched when the input port - returns an @racket[eof-object] value. If no @racket[(eof)] - rule is present, the lexer returns the symbol @racket['eof] - when the port returns an @racket[eof-object] value.} - - @item{The @racket[(special-comment)] rule is matched when the - input port returns a @racket[special-comment] structure. If no - @racket[special-comment] rule is present, the lexer - automatically tries to return the next token from the input - port.} - - @item{The @racket[(special)] rule is matched when the input - port returns a value other than a character, - @racket[eof-object], or @racket[special-comment] structure. If - no @racket[(special)] rule is present, the lexer returns - @racket[(void)].}] - - End-of-files, specials, special-comments and special-errors cannot - be parsed via a rule using an ordinary regular expression - (but dropping down and manipulating the port to handle them - is possible in some situations). - - Since the lexer gets its source information from the port, use - @racket[port-count-lines!] to enable the tracking of line and - column information. Otherwise, the line and column information - will return @racket[#f]. - - When peeking from the input port raises an exception (such as by - an embedded XML editor with malformed syntax), the exception can - be raised before all tokens preceding the exception have been - returned. - - Each time the racket code for a lexer is compiled (e.g. when a - @filepath{.rkt} file containing a @racket[lexer] form is loaded), - the lexer generator is run. To avoid this overhead place the - lexer into a module and compile the module to a @filepath{.zo} - bytecode file.} - -@defform[(lexer-src-pos (trigger action-expr) ...)]{ - -Like @racket[lexer], but for each @racket[_action-result] produced by -an @racket[action-expr], returns @racket[(make-position-token -_action-result start-pos end-pos)] instead of simply -@racket[_action-result].} - -@defform[(lexer-srcloc (trigger action-expr) ...)]{ - -Like @racket[lexer], but for each @racket[_action-result] produced by -an @racket[action-expr], returns @racket[(make-srcloc-token -_action-result lexeme-srcloc)] instead of simply -@racket[_action-result].} - -@deftogether[( -@defidform[start-pos] -@defidform[end-pos] -@defidform[lexeme] -@defidform[lexeme-srcloc] -@defidform[input-port] -@defidform[return-without-pos] -@defidform[return-without-srcloc] -)]{ - -Use of these names outside of a @racket[lexer] action is a syntax -error.} - -@defstruct[position ([offset exact-positive-integer?] - [line exact-positive-integer?] - [col exact-nonnegative-integer?])]{ - - Instances of @racket[position] are bound to @racket[start-pos] and - @racket[end-pos]. The @racket[offset] field contains the offset of - the character in the input. The @racket[line] field contains the - line number of the character. The @racket[col] field contains the - offset in the current line.} - -@defstruct[position-token ([token any/c] - [start-pos position?] - [end-pos position?])]{ - - Lexers created with @racket[lexer-src-pos] return instances of @racket[position-token].} - - -@defstruct[srcloc-token ([token any/c] - [srcloc srcloc?])]{ - - Lexers created with @racket[lexer-srcloc] return instances of @racket[srcloc-token].} - -@defparam[file-path source any/c]{ - - A parameter that the lexer uses as the source location if it - raises a @racket[exn:fail:read] error. Setting this parameter allows - DrRacket, for example, to open the file containing the error.} - -@defparam[lexer-file-path source any/c]{ - -Alias for @racket[file-path].} - - -@; ---------------------------------------- - -@subsection{Lexer Abbreviations and Macros} - -@defform[(char-set string)]{ - -A @tech{lexer macro} that matches any character in @racket[string].} - -@defidform[any-char]{A @tech{lexer abbreviation} that matches any character.} - -@defidform[any-string]{A @tech{lexer abbreviation} that matches any string.} - -@defidform[nothing]{A @tech{lexer abbreviation} that matches no string.} - -@deftogether[( -@defidform[alphabetic] -@defidform[lower-case] -@defidform[upper-case] -@defidform[title-case] -@defidform[numeric] -@defidform[symbolic] -@defidform[punctuation] -@defidform[graphic] -@defidform[whitespace] -@defidform[blank] -@defidform[iso-control] -)]{ - -@tech{Lexer abbreviations} that match @racket[char-alphabetic?] -characters, @racket[char-lower-case?] characters, etc.} - -@defform[(define-lex-abbrev id re)]{ - - Defines a @tech{lexer abbreviation} by associating a regular - expression to be used in place of the @racket[id] in other - regular expression. The definition of name has the same scoping - properties as a other syntactic binding (e.g., it can be exported - from a module).} - -@defform[(define-lex-abbrevs (id re) ...)]{ - - Like @racket[define-lex-abbrev], but defines several @tech{lexer - abbreviations}.} - - -@defform[(define-lex-trans id trans-expr)]{ - - Defines a @tech{lexer macro}, where @racket[trans-expr] produces a - transformer procedure that takes one argument. When @racket[(id - _datum ...)] appears as a regular expression, it is replaced with - the result of applying the transformer to the expression.} - - -@; ---------------------------------------- - -@subsection{Lexer SRE Operators} - -@defmodule[br-parser-tools/lex-sre] - -@; Put the docs in a macro, so that we can bound the scope of -@; the import of `*', etc.: -@(define-syntax-rule (lex-sre-doc) - (... - (begin - (require (for-label br-parser-tools/lex-sre)) - -@defform[(* re ...)]{ - -Repetition of @racket[re] sequence 0 or more times.} - -@defform[(+ re ...)]{ - -Repetition of @racket[re] sequence 1 or more times.} - -@defform[(? re ...)]{ - -Zero or one occurrence of @racket[re] sequence.} - -@defform[(= n re ...)]{ - -Exactly @racket[n] occurrences of @racket[re] sequence, where -@racket[n] must be a literal exact, non-negative number.} - -@defform[(>= n re ...)]{ - -At least @racket[n] occurrences of @racket[re] sequence, where -@racket[n] must be a literal exact, non-negative number.} - -@defform[(** n m re ...)]{ - -Between @racket[n] and @racket[m] (inclusive) occurrences of -@racket[re] sequence, where @racket[n] must be a literal exact, -non-negative number, and @racket[m] must be literally either -@racket[#f], @racket[+inf.0], or an exact, non-negative number; a -@racket[#f] value for @racket[m] is the same as @racket[+inf.0].} - -@defform[(or re ...)]{ - -Same as @racket[(union re ...)].} - -@deftogether[( -@defform[(: re ...)] -@defform[(seq re ...)] -)]{ - -Both forms concatenate the @racket[re]s.} - -@defform[(& re ...)]{ - -Intersects the @racket[re]s.} - -@defform[(- re ...)]{ - -The set difference of the @racket[re]s.} - -@defform[(~ re ...)]{ - -Character-set complement, which each @racket[re] must match exactly -one character.} - -@defform[(/ char-or-string ...)]{ - -Character ranges, matching characters between successive pairs of -characters.} - -))) - -@(lex-sre-doc) - -@; ---------------------------------------- - -@subsection{Lexer Legacy Operators} - -@defmodule[br-parser-tools/lex-plt-v200] - -@(define-syntax-rule (lex-v200-doc) - (... - (begin - (require (for-label br-parser-tools/lex-plt-v200)) - -@t{The @racketmodname[br-parser-tools/lex-plt-v200] module re-exports - @racket[*], @racket[+], @racket[?], and @racket[&] from - @racketmodname[br-parser-tools/lex-sre]. It also re-exports - @racket[:or] as @racket[:], @racket[::] as @racket[|@|], @racket[:~] - as @racket[^], and @racket[:/] as @racket[-].} - -@defform[(epsilon)]{ - -A @tech{lexer macro} that matches an empty sequence.} - -@defform[(~ re ...)]{ - -The same as @racket[(complement re ...)].}))) - -@(lex-v200-doc) - -@; ---------------------------------------- - -@subsection{Tokens} - -Each @racket[_action-expr] in a @racket[lexer] form can produce any -kind of value, but for many purposes, producing a @deftech{token} -value is useful. Tokens are usually necessary for inter-operating with -a parser generated by @racket[br-parser-tools/parser], but tokens may not -be the right choice when using @racket[lexer] in other situations. - -@defform[(define-tokens group-id (token-id ...))]{ - - Binds @racket[group-id] to the group of tokens being defined. For - each @racket[token-id], a function - @racketidfont{token-}@racket[token-id] is created that takes any - value and puts it in a token record specific to @racket[token-id]. - The token value is inspected using @racket[token-id] and - @racket[token-value]. - - A token cannot be named @racketidfont{error}, since - @racketidfont{error} it has special use in the parser.} - -@defform[(define-empty-tokens group-id (token-id ...) )]{ - - - Like @racket[define-tokens], except a each token constructor - @racketidfont{token-}@racket[token-id] takes no arguments and returns - @racket[(@#,racket[quote] token-id)].} - - -@defproc[(token-name [t (or/c token? symbol?)]) symbol?]{ - - Returns the name of a token that is represented either by a symbol - or a token structure.} - - -@defproc[(token-value [t (or/c token? symbol?)]) any/c]{ - - Returns the value of a token that is represented either by a symbol - or a token structure, returning @racket[#f] for a symbol token.} - - -@defproc[(token? [v any/c]) boolean?]{ - - Returns @racket[#t] if @racket[val] is a - token structure, @racket[#f] otherwise.} - -@; ---------------------------------------------------------------------- - -@section{LALR(1) Parsers} - -@section-index["yacc"] - -@defmodule[br-parser-tools/yacc] - -@defform/subs[#:literals (grammar tokens start end precs src-pos - suppress debug yacc-output prec) - (parser clause ...) - ([clause (grammar (non-terminal-id - ((grammar-id ...) maybe-prec expr) - ...) - ...) - (tokens group-id ...) - (start non-terminal-id ...) - (end token-id ...) - (@#,racketidfont{error} expr) - (precs (assoc token-id ...) ...) - (src-pos) - (suppress) - (debug filename) - (yacc-output filename)] - [maybe-prec code:blank - (prec token-id)] - [assoc left right nonassoc])]{ - - Creates a parser. The clauses may be in any order, as long as there - are no duplicates and all non-@italic{OPTIONAL} declarations are - present: - - @itemize[ - - @item{@racketblock0[(grammar (non-terminal-id - ((grammar-id ...) maybe-prec expr) - ...) - ...)] - - Declares the grammar to be parsed. Each @racket[grammar-id] can - be a @racket[token-id] from a @racket[group-id] named in a - @racket[tokens] declaration, or it can be a - @racket[non-terminal-id] declared in the @racket[grammar] - declaration. The optional @racket[prec] declaration works with - the @racket[precs] declaration. The @racket[expr] is a - ``semantic action,'' which is evaluated when the input is found - to match its corresponding production. - - Each action is Racket code that has the same scope as its - parser's definition, except that the variables @racket[$1], ..., - @racketidfont{$}@math{i} are bound, where @math{i} is the number - of @racket[grammar-id]s in the corresponding production. Each - @racketidfont{$}@math{k} is bound to the result of the action - for the @math{k}@superscript{th} grammar symbol on the right of - the production, if that grammar symbol is a non-terminal, or the - value stored in the token if the grammar symbol is a terminal. - If the @racket[src-pos] option is present in the parser, then - variables @racket[$1-start-pos], ..., - @racketidfont{$}@math{i}@racketidfont{-start-pos} and - @racket[$1-end-pos], ..., - @racketidfont{$}@math{i}@racketidfont{-end-pos} and are also - available, and they refer to the position structures - corresponding to the start and end of the corresponding - @racket[grammar-symbol]. Grammar symbols defined as empty-tokens - have no @racketidfont{$}@math{k} associated, but do have - @racketidfont{$}@math{k}@racketidfont{-start-pos} and - @racketidfont{$}@math{k}@racketidfont{-end-pos}. - Also @racketidfont{$n-start-pos} and @racketidfont{$n-end-pos} - are bound to the largest start and end positions, (i.e., - @racketidfont{$}@math{i}@racketidfont{-start-pos} and - @racketidfont{$}@math{i}@racketidfont{-end-pos}). - - An @deftech{error production} can be defined by providing - a production of the form @racket[(error α)], where α is a - string of grammar symbols, possibly empty. - - All of the productions for a given non-terminal must be grouped - with it. That is, no @racket[non-terminal-id] may appear twice - on the left hand side in a parser.} - - - @item{@racket[(tokens group-id ...)] - - Declares that all of the tokens defined in each - @racket[group-id]---as bound by @racket[define-tokens] or - @racket[define-empty-tokens]---can be used by the parser in the - @racket[grammar] declaration.} - - - @item{@racket[(start non-terminal-id ...)] - - Declares a list of starting non-terminals for the grammar.} - - - @item{@racket[(end token-id ...)] - - Specifies a set of tokens from which some member must follow any - valid parse. For example, an EOF token would be specified for a - parser that parses entire files and a newline token for a parser - that parses entire lines individually.} - - - @item{@racket[(@#,racketidfont{error} expr)] - - The @racket[expr] should evaluate to a function which will be - executed for its side-effect whenever the parser encounters an - error. - - If the @racket[src-pos] declaration is present, the function - should accept 5 arguments,: - - @racketblock[(lambda (tok-ok? tok-name tok-value _start-pos _end-pos) - ....)] - - Otherwise it should accept 3: - - @racketblock[(lambda (tok-ok? tok-name tok-value) - ....)] - - The first argument will be @racket[#f] if and only if the error - is that an invalid token was received. The second and third - arguments will be the name and the value of the token at which - the error was detected. The fourth and fifth arguments, if - present, provide the source positions of that token.} - - - @item{@racket[(precs (assoc token-id ...) ...)] - @italic{OPTIONAL} - - Precedence declarations to resolve shift/reduce and - reduce/reduce conflicts as in @exec{yacc}/@exec{bison}. An - @racket[assoc] must be one of @racket[left], @racket[right] or - @racket[nonassoc]. States with multiple shift/reduce or - reduce/reduce conflicts (or some combination thereof) are not - resolved with precedence.} - - @item{@racket[(src-pos)] @italic{OPTIONAL} - - Causes the generated parser to expect input in the form - @racket[(make-position-token _token _start-pos _end-pos)] instead - of simply @racket[_token]. Include this option when using the - parser with a lexer generated with @racket[lexer-src-pos].} - - - @item{@racket[(debug filename)] @italic{OPTIONAL} - - Causes the parser generator to write the LALR table to the file - named @racket[filename] (unless the file exists), where - @racket[filename] is a literal string. Additionally, if a debug - file is specified, when a running generated parser encounters a - parse error on some input file, after the user specified error - expression returns, the complete parse stack is printed to - assist in debugging the grammar of that particular parser. The - numbers in the stack printout correspond to the state numbers in - the LALR table file.} - - - @item{@racket[(yacc-output filename)] @italic{OPTIONAL} - - Causes the parser generator to write a grammar file in - approximately the syntax of @exec{yacc}/@exec{bison}. The file - might not be a valid @exec{yacc} file, because the Racket - grammar can use symbols that are invalid in C.} - - - @item{@racket[(suppress)] @italic{OPTIONAL} - - Causes the parser generator not to report shift/reduce or - reduce/reduce conflicts.} - - ] - - The result of a @racket[parser] expression with one @racket[start] - non-terminal is a function, @racket[_parse], that takes one - argument. This argument must be a zero argument function, - @racket[_gen], that produces successive tokens of the input each - time it is called. If desired, the @racket[_gen] may return - symbols instead of tokens, and the parser will treat symbols as - tokens of the corresponding name (with @racket[#f] as a value, so - it is usual to return symbols only in the case of empty tokens). - The @racket[_parse] function returns the value associated with the - parse tree by the semantic actions. If the parser encounters an - error, after invoking the supplied error function, it will try to - use @tech{error production}s to continue parsing. If it cannot, it - raises @racket[exn:fail:read]. - - If multiple non-terminals are provided in @racket[start], the - @racket[parser] expression produces a list of parsing functions, - one for each non-terminal in the same order. Each parsing function - is like the result of a parser expression with only one - @racket[start] non-terminal, - - Each time the Racket code for a @racket[parser] is compiled - (e.g. when a @filepath{.rkt} file containing a @racket[parser] form - is loaded), the parser generator is run. To avoid this overhead - place the parser into a module and compile the module to a - @filepath{.zo} bytecode file.} - - - - -@section{Context-Free Parsers} - -@section-index["cfg-parser"] - -@defmodule[br-parser-tools/cfg-parser]{The @racketmodname[br-parser-tools/cfg-parser] -library provides a parser generator that is an alternative to that of -@racketmodname[br-parser-tools/yacc].} - -@defform/subs[#:literals (grammar tokens start end precs src-pos - suppress debug yacc-output prec) - (cfg-parser clause ...) - ([clause (grammar (non-terminal-id - ((grammar-id ...) maybe-prec expr) - ...) - ...) - (tokens group-id ...) - (start non-terminal-id ...) - (end token-id ...) - (@#,racketidfont{error} expr) - (src-pos)])]{ - - Creates a parser similar to that of @racket[parser]. Unlike @racket[parser], - @racket[cfg-parser], can consume arbitrary and potentially ambiguous context-free - grammars. Its interface is a subset of @racketmodname[br-parser-tools/yacc], with - the following differences: - - @itemize[ - - @item{@racket[(start non-terminal-id)] - - Unlike @racket[parser], @racket[cfg-parser] only allows for - a single non-terminal-id.} - - @item{The @racket[cfg-parser] form does not support the @racket[precs], - @racket[suppress], @racket[debug], or @racket[yacc-output] - options of @racket[parser].} - ] -} - -@; ---------------------------------------------------------------------- - -@section{Converting @exec{yacc} or @exec{bison} Grammars} - -@defmodule[br-parser-tools/yacc-to-scheme] - -@defproc[(trans [file path-string?]) any/c]{ - -Reads a C @exec{yacc}/@exec{bison} grammar from @racket[file] and -produces an s-expression that represents a Racket parser for use with -@racket[parser]. - -This function is intended to assist in the manual conversion of -grammars for use with @racket[parser], and not as a fully automatic -conversion tool. It is not entirely robust. For example, if the C -actions in the original grammar have nested blocks, the tool will fail. - -Annotated examples are in the @filepath{examples} subdirectory of the -@filepath{br-parser-tools} collection.} - -@; ---------------------------------------------------------------------- - -@index-section[] diff --git a/br-parser-tools/br-parser-tools-doc/br-parser-tools/info.rkt b/br-parser-tools/br-parser-tools-doc/br-parser-tools/info.rkt deleted file mode 100644 index f219d03..0000000 --- a/br-parser-tools/br-parser-tools-doc/br-parser-tools/info.rkt +++ /dev/null @@ -1,3 +0,0 @@ -#lang info - -(define scribblings '(("br-parser-tools.scrbl" (multi-page) (parsing-library)))) diff --git a/br-parser-tools/br-parser-tools-doc/info.rkt b/br-parser-tools/br-parser-tools-doc/info.rkt deleted file mode 100644 index 8760588..0000000 --- a/br-parser-tools/br-parser-tools-doc/info.rkt +++ /dev/null @@ -1,14 +0,0 @@ -#lang info - -(define collection 'multi) -(define deps '("base")) -(define build-deps '("scheme-lib" - "racket-doc" - "syntax-color-doc" - "br-parser-tools-lib" - "scribble-lib")) -(define update-implies '("br-parser-tools-lib")) - -(define pkg-desc "documentation part of \"br-parser-tools\"") - -(define pkg-authors '(mflatt)) diff --git a/br-parser-tools/br-parser-tools-lib/LICENSE.txt b/br-parser-tools/br-parser-tools-lib/LICENSE.txt deleted file mode 100644 index f6889f1..0000000 --- a/br-parser-tools/br-parser-tools-lib/LICENSE.txt +++ /dev/null @@ -1,11 +0,0 @@ -parser-tools-lib -Copyright (c) 2010-2014 PLT Design Inc. - -This package is distributed under the GNU Lesser General Public -License (LGPL). This means that you can link this package into proprietary -applications, provided you follow the rules stated in the LGPL. You -can also modify this package; if you distribute a modified version, -you must distribute it under the terms of the LGPL, which in -particular means that you must release the source code for the -modified software. See http://www.gnu.org/copyleft/lesser.html -for more information. diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt deleted file mode 100644 index 7b8b839..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/cfg-parser.rkt +++ /dev/null @@ -1,982 +0,0 @@ -#lang racket/base -;; This module implements a parser form like the br-parser-tools's -;; `parser', except that it works on an arbitrary CFG (returning -;; the first sucecssful parse). - -;; I'm pretty sure that this is an implementation of Earley's -;; algorithm. - -;; To a first approximation, it's a backtracking parser. Alternative -;; for a non-terminal are computed in parallel, and multiple attempts -;; to compute the same result block until the first one completes. If -;; you get into deadlock, such as when trying to match -;; := -;; then it means that there's no successful parse, so everything -;; that's blocked fails. - -;; A cache holds the series of results for a particular non-terminal -;; at a particular starting location. (A series is used, instead of a -;; sinlge result, for backtracking.) Otherwise, the parser uses -;; backtracking search. Backtracking is implemented through explicit -;; success and failure continuations. Multiple results for a -;; particular nonterminal and location are kept only when they have -;; different lengths. (Otherwise, in the spirit of finding one -;; successful parse, only the first result is kept.) - -;; The br-parser-tools's `parse' is used to transform tokens in the -;; grammar to tokens specific to this parser. In other words, this -;; parser uses `parser' so that it doesn't have to know anything about -;; tokens. -;; - - - -(require br-parser-tools/yacc - br-parser-tools/lex) - -(require (for-syntax racket/base - syntax/boundmap - br-parser-tools/private-lex/token-syntax)) - -(provide cfg-parser) - -;; A raw token, wrapped so that we can recognize it: -(define-struct tok (name orig-name val start end)) - -;; Represents the thread scheduler: -(define-struct tasks (active active-back waits multi-waits cache progress?)) - -(define-for-syntax make-token-identifier-mapping make-hasheq) -(define-for-syntax token-identifier-mapping-get - (case-lambda - [(t tok) - (hash-ref t (syntax-e tok))] - [(t tok fail) - (hash-ref t (syntax-e tok) fail)])) -(define-for-syntax token-identifier-mapping-put! - (lambda (t tok v) - (hash-set! t (syntax-e tok) v))) -(define-for-syntax token-identifier-mapping-map - (lambda (t f) - (hash-map t f))) - -;; Used to calculate information on the grammar, such as whether -;; a particular non-terminal is "simple" instead of recursively defined. -(define-for-syntax (nt-fixpoint nts proc nt-ids patss) - (define (ormap-all val f as bs) - (cond - [(null? as) val] - [else (ormap-all (or (f (car as) (car bs)) val) - f - (cdr as) (cdr bs))])) - (let loop () - (when (ormap-all #f - (lambda (nt pats) - (let ([old (bound-identifier-mapping-get nts nt)]) - (let ([new (proc nt pats old)]) - (if (equal? old new) - #f - (begin - (bound-identifier-mapping-put! nts nt new) - #t))))) - nt-ids patss) - (loop)))) - -;; Tries parse-a followed by parse-b. If parse-a is not simple, -;; then after parse-a succeeds once, we parallelize parse-b -;; and trying a second result for parse-a. -(define (parse-and simple-a? parse-a parse-b - stream last-consumed-token depth end success-k fail-k - max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream last-consumed-token depth max-depth tasks next1-k) - (if simple-a? - (parse-b val stream last-consumed-token depth end - (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks) - (parallel-or - (lambda (success-k fail-k max-depth tasks) - (parse-b val stream last-consumed-token depth end - success-k fail-k - max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k max-depth tasks)) - success-k fail-k max-depth tasks))))] - [mk-got2-k - (lambda (success-k fail-k next1-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (success-k val stream last-consumed-token depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks)))))] - [mk-fail2-k - (lambda (success-k fail-k next1-k) - (lambda (max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k - max-depth - tasks)))]) - (parse-a stream last-consumed-token depth end - (mk-got-k success-k fail-k) - fail-k - max-depth tasks))) - -;; Parallel or for non-terminal alternatives -(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) - (parallel-or (lambda (success-k fail-k max-depth tasks) - (parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) - (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)) - success-k fail-k max-depth tasks)) - -;; Generic parallel-or -(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) - (define answer-key (gensym)) - (letrec ([gota-k - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (report-answer answer-key - max-depth - tasks - (list val stream last-consumed-token depth next-k)))] - [faila-k - (lambda (max-depth tasks) - (report-answer answer-key - max-depth - tasks - null))]) - (let* ([tasks (queue-task - tasks - (lambda (max-depth tasks) - (parse-a gota-k - faila-k - max-depth tasks)))] - [tasks (queue-task - tasks - (lambda (max-depth tasks) - (parse-b gota-k - faila-k - max-depth tasks)))] - [queue-next (lambda (next-k tasks) - (queue-task tasks - (lambda (max-depth tasks) - (next-k gota-k - faila-k - max-depth tasks))))]) - (letrec ([mk-got-one - (lambda (immediate-next? get-nth success-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (let ([tasks (if immediate-next? - (queue-next next-k tasks) - tasks)]) - (success-k val stream last-consumed-token depth max-depth - tasks - (lambda (success-k fail-k max-depth tasks) - (let ([tasks (if immediate-next? - tasks - (queue-next next-k tasks))]) - (get-nth max-depth tasks success-k fail-k)))))))] - [get-first - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #t get-first success-k) - (lambda (max-depth tasks) - (get-second max-depth tasks success-k fail-k)) - #f))] - [get-second - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #f get-second success-k) - fail-k #f))]) - (get-first max-depth tasks success-k fail-k))))) - -;; Non-terminal alternatives where the first is "simple" can be done -;; sequentially, which is simpler -(define (parse-or parse-a parse-b - stream last-consumed-token depth end success-k fail-k max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (success-k val stream last-consumed-token depth - max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks)))))] - [mk-fail-k - (lambda (success-k fail-k) - (lambda (max-depth tasks) - (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))]) - (parse-a stream last-consumed-token depth end - (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks))) - -;; Starts a thread -(define queue-task - (lambda (tasks t [progress? #t]) - (make-tasks (tasks-active tasks) - (cons t (tasks-active-back tasks)) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - (or progress? (tasks-progress? tasks))))) - -;; Reports an answer to a waiting thread: -(define (report-answer answer-key max-depth tasks val) - (let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))]) - (if v - (let ([tasks (make-tasks (cons (v val) - (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (hash-remove! (tasks-waits tasks) answer-key) - (swap-task max-depth tasks)) - ;; We have an answer ready too fast; wait - (swap-task max-depth - (queue-task tasks - (lambda (max-depth tasks) - (report-answer answer-key max-depth tasks val)) - #f))))) - -;; Reports an answer to multiple waiting threads: -(define (report-answer-all answer-key max-depth tasks val k) - (let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))]) - (hash-remove! (tasks-multi-waits tasks) answer-key) - (let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) - (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (k max-depth tasks)))) - -;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise -;; there might be many. Use wither #t or #f (and `report-answer' or -;; `report-answer-all', resptively) consistently for a particular answer key. -(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) - (let ([wait (lambda (val) - (lambda (max-depth tasks) - (if val - (if (null? val) - (fail-k max-depth tasks) - (let-values ([(val stream last-consumed-token depth next-k) (apply values val)]) - (success-k val stream last-consumed-token depth max-depth tasks next-k))) - (deadlock-k max-depth tasks))))]) - (if multi? - (hash-set! (tasks-multi-waits tasks) answer-key - (cons wait (hash-ref (tasks-multi-waits tasks) answer-key - (lambda () null)))) - (hash-set! (tasks-waits tasks) answer-key wait)) - (let ([tasks (make-tasks (tasks-active tasks) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (swap-task max-depth tasks)))) - -;; Swap thread -(define (swap-task max-depth tasks) - ;; Swap in first active: - (if (null? (tasks-active tasks)) - (if (tasks-progress? tasks) - (swap-task max-depth - (make-tasks (reverse (tasks-active-back tasks)) - null - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #f)) - ;; No progress, so issue failure for all multi-waits - (if (zero? (hash-count (tasks-multi-waits tasks))) - (error 'swap-task "Deadlock") - (swap-task max-depth - (make-tasks (apply - append - (hash-map (tasks-multi-waits tasks) - (lambda (k l) - (map (lambda (v) (v #f)) l)))) - (tasks-active-back tasks) - (tasks-waits tasks) - (make-hasheq) - (tasks-cache tasks) - #t)))) - (let ([t (car (tasks-active tasks))] - [tasks (make-tasks (cdr (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - (tasks-progress? tasks))]) - (t max-depth tasks)))) - -;; Finds the symbolic representative of a token class -(define-for-syntax (map-token toks tok) - (car (token-identifier-mapping-get toks tok))) - -(define no-pos-val (make-position #f #f #f)) -(define-for-syntax no-pos - (let ([npv ((syntax-local-certifier) #'no-pos-val)]) - (lambda (stx) npv))) -(define-for-syntax at-tok-pos - (lambda (sel expr) - (lambda (stx) - #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))))) - -;; Builds a matcher for a particular alternative -(define-for-syntax (build-match nts toks pat handle $ctx) - (let loop ([pat pat] - [pos 1]) - (if (null? pat) - #`(success-k #,handle stream last-consumed-token depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (fail-k max-depth tasks))) - (let ([id (datum->syntax (car pat) - (string->symbol (format "$~a" pos)))] - [id-start-pos (datum->syntax (car pat) - (string->symbol (format "$~a-start-pos" pos)))] - [id-end-pos (datum->syntax (car pat) - (string->symbol (format "$~a-end-pos" pos)))] - [n-end-pos (and (null? (cdr pat)) - (datum->syntax (car pat) '$n-end-pos))]) - (cond - [(bound-identifier-mapping-get nts (car pat) (lambda () #f)) - ;; Match non-termimal - #`(parse-and - ;; First part is simple? (If so, we don't have to parallelize the `and'.) - #,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) - (or (not l) - (andmap values (caddr l)))) - #,(car pat) - (let ([original-stream stream]) - (lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks) - (let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream) - tok-end - tok-start) - #'(if (eq? original-stream stream) - last-consumed-token - (and (pair? original-stream) - (car original-stream))))] - [#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)] - #,@(if n-end-pos - #`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]) - null)) - #,(loop (cdr pat) (add1 pos))))) - stream last-consumed-token depth - #,(let ([cnt (apply + - (map (lambda (item) - (cond - [(bound-identifier-mapping-get nts item (lambda () #f)) - => (lambda (l) (car l))] - [else 1])) - (cdr pat)))]) - #`(- end #,cnt)) - success-k fail-k max-depth tasks)] - [else - ;; Match token - (let ([tok-id (map-token toks (car pat))]) - #`(if (and (pair? stream) - (eq? '#,tok-id (tok-name (car stream)))) - (let* ([stream-a (car stream)] - [#,id (tok-val stream-a)] - [last-consumed-token (car stream)] - [stream (cdr stream)] - [depth (add1 depth)]) - (let ([max-depth (max max-depth depth)]) - (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)] - [#,id-end-pos (at-tok-pos #'tok-end #'stream-a)] - #,@(if n-end-pos - #`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)]) - null)) - #,(loop (cdr pat) (add1 pos))))) - (fail-k max-depth tasks)))]))))) - -;; Starts parsing to match a non-terminal. There's a minor -;; optimization that checks for known starting tokens. Otherwise, -;; use the cache, block if someone else is already trying the match, -;; and cache the result if it's computed. -;; The cache maps nontermial+startingpos+iteration to a result, where -;; the iteration is 0 for the first match attempt, 1 for the second, -;; etc. -(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k) - (if (and (positive? min-cnt) - (pair? stream) - (not (memq (tok-name (car stream)) init-tokens))) - ;; No such leading token; give up - (fail-k max-depth tasks) - ;; Run pattern - (let loop ([n 0] - [success-k success-k] - [fail-k fail-k] - [max-depth max-depth] - [tasks tasks] - [k k]) - (let ([answer-key (gensym)] - [table-key (vector key depth n)] - [old-depth depth] - [old-stream stream]) - #;(printf "Loop ~a\n" table-key) - (cond - [(hash-ref (tasks-cache tasks) table-key (lambda () #f)) - => (lambda (result) - #;(printf "Reuse ~a\n" table-key) - (result success-k fail-k max-depth tasks))] - [else - #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - #;(printf "Wait ~a ~a\n" table-key answer-key) - (wait-for-answer #t max-depth tasks answer-key success-k fail-k - (lambda (max-depth tasks) - #;(printf "Deadlock ~a ~a\n" table-key answer-key) - (fail-k max-depth tasks))))) - (let result-loop ([max-depth max-depth][tasks tasks][k k]) - (letrec ([orig-stream stream] - [new-got-k - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - ;; Check whether we already have a result that consumed the same amount: - (let ([result-key (vector #f key old-depth depth)]) - (cond - [(hash-ref (tasks-cache tasks) result-key (lambda () #f)) - ;; Go for the next-result - (result-loop max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks)))] - [else - #;(printf "Success ~a ~a\n" table-key - (map tok-name (let loop ([d old-depth][s old-stream]) - (if (= d depth) - null - (cons (car s) (loop (add1 d) (cdr s))))))) - (let ([next-k (lambda (success-k fail-k max-depth tasks) - (loop (add1 n) - success-k - fail-k - max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks))))]) - (hash-set! (tasks-cache tasks) result-key #t) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (success-k val stream last-consumed-token depth max-depth tasks next-k))) - (report-answer-all answer-key - max-depth - tasks - (list val stream last-consumed-token depth next-k) - (lambda (max-depth tasks) - (success-k val stream last-consumed-token depth max-depth tasks next-k))))])))] - [new-fail-k - (lambda (max-depth tasks) - #;(printf "Failure ~a\n" table-key) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (fail-k max-depth tasks))) - (report-answer-all answer-key - max-depth - tasks - null - (lambda (max-depth tasks) - (fail-k max-depth tasks))))]) - (k end max-depth tasks new-got-k new-fail-k)))]))))) - -(define-syntax (cfg-parser stx) - (syntax-case stx () - [(_ clause ...) - (let ([clauses (syntax->list #'(clause ...))]) - (let-values ([(start grammar cfg-error parser-clauses src-pos?) - (let ([all-toks (apply - append - (map (lambda (clause) - (syntax-case clause (tokens) - [(tokens t ...) - (apply - append - (map (lambda (t) - (let ([v (syntax-local-value t (lambda () #f))]) - (cond - [(terminals-def? v) - (map (lambda (v) - (cons v #f)) - (syntax->list (terminals-def-t v)))] - [(e-terminals-def? v) - (map (lambda (v) - (cons v #t)) - (syntax->list (e-terminals-def-t v)))] - [else null]))) - (syntax->list #'(t ...))))] - [_else null])) - clauses))] - [all-end-toks (apply - append - (map (lambda (clause) - (syntax-case clause (end) - [(end t ...) - (syntax->list #'(t ...))] - [_else null])) - clauses))]) - (let loop ([clauses clauses] - [cfg-start #f] - [cfg-grammar #f] - [cfg-error #f] - [src-pos? #f] - [parser-clauses null]) - (if (null? clauses) - (values cfg-start - cfg-grammar - cfg-error - (reverse parser-clauses) - src-pos?) - (syntax-case (car clauses) (start error grammar src-pos) - [(start tok) - (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] - [(error expr) - (loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] - [(grammar [nt [pat handle0 handle ...] ...] ...) - (let ([nts (make-bound-identifier-mapping)] - [toks (make-token-identifier-mapping)] - [end-toks (make-token-identifier-mapping)] - [nt-ids (syntax->list #'(nt ...))] - [patss (map (lambda (stx) - (map syntax->list (syntax->list stx))) - (syntax->list #'((pat ...) ...)))]) - (for-each (lambda (nt) - (bound-identifier-mapping-put! nts nt (list 0))) - nt-ids) - (for-each (lambda (t) - (token-identifier-mapping-put! end-toks t #t)) - all-end-toks) - (for-each (lambda (t) - (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) - (let ([id (gensym (syntax-e (car t)))]) - (token-identifier-mapping-put! toks (car t) - (cons id (cdr t)))))) - all-toks) - ;; Compute min max size for each non-term: - (nt-fixpoint - nts - (lambda (nt pats old-list) - (let ([new-cnt - (apply - min - (map (lambda (pat) - (apply - + - (map (lambda (elem) - (car - (bound-identifier-mapping-get nts - elem - (lambda () (list 1))))) - pat))) - pats))]) - (if (new-cnt . > . (car old-list)) - (cons new-cnt (cdr old-list)) - old-list))) - nt-ids patss) - ;; Compute set of toks that must appear at the beginning - ;; for a non-terminal - (nt-fixpoint - nts - (lambda (nt pats old-list) - (let ([new-list - (apply - append - (map (lambda (pat) - (let loop ([pat pat]) - (if (pair? pat) - (let ([l (bound-identifier-mapping-get - nts - (car pat) - (lambda () - (list 1 (map-token toks (car pat)))))]) - ;; If the non-terminal can match 0 things, - ;; then it might match something from the - ;; next pattern element. Otherwise, it must - ;; match the first element: - (if (zero? (car l)) - (append (cdr l) (loop (cdr pat))) - (cdr l))) - null))) - pats))]) - (let ([new (filter (lambda (id) - (andmap (lambda (id2) - (not (eq? id id2))) - (cdr old-list))) - new-list)]) - (if (pair? new) - ;; Drop dups in new list: - (let ([new (let loop ([new new]) - (if (null? (cdr new)) - new - (if (ormap (lambda (id) - (eq? (car new) id)) - (cdr new)) - (loop (cdr new)) - (cons (car new) (loop (cdr new))))))]) - (cons (car old-list) (append new (cdr old-list)))) - old-list)))) - nt-ids patss) - ;; Determine left-recursive clauses: - (for-each (lambda (nt pats) - (let ([l (bound-identifier-mapping-get nts nt)]) - (bound-identifier-mapping-put! nts nt (list (car l) - (cdr l) - (map (lambda (x) #f) pats))))) - nt-ids patss) - (nt-fixpoint - nts - (lambda (nt pats old-list) - (list (car old-list) - (cadr old-list) - (map (lambda (pat simple?) - (or simple? - (let ([l (map (lambda (elem) - (bound-identifier-mapping-get - nts - elem - (lambda () #f))) - pat)]) - (andmap (lambda (i) - (or (not i) - (andmap values (caddr i)))) - l)))) - pats (caddr old-list)))) - nt-ids patss) - ;; Build a definition for each non-term: - (loop (cdr clauses) - cfg-start - (map (lambda (nt pats handles $ctxs) - (define info (bound-identifier-mapping-get nts nt)) - (list nt - #`(let ([key (gensym '#,nt)]) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - (parse-nt/share - key #,(car info) '#,(cadr info) stream last-consumed-token depth end - max-depth tasks - success-k fail-k - (lambda (end max-depth tasks success-k fail-k) - #,(let loop ([pats pats] - [handles (syntax->list handles)] - [$ctxs (syntax->list $ctxs)] - [simple?s (caddr info)]) - (if (null? pats) - #'(fail-k max-depth tasks) - #`(#,(if (or (null? (cdr pats)) - (car simple?s)) - #'parse-or - #'parse-parallel-or) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - #,(build-match nts - toks - (car pats) - (car handles) - (car $ctxs))) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - #,(loop (cdr pats) - (cdr handles) - (cdr $ctxs) - (cdr simple?s))) - stream last-consumed-token depth end success-k fail-k max-depth tasks))))))))) - nt-ids - patss - (syntax->list #'(((begin handle0 handle ...) ...) ...)) - (syntax->list #'((handle0 ...) ...))) - cfg-error - src-pos? - (list* - (with-syntax ([((tok tok-id . $e) ...) - (token-identifier-mapping-map toks - (lambda (k v) - (list* k - (car v) - (if (cdr v) - #f - '$1))))] - [(pos ...) - (if src-pos? - #'($1-start-pos $1-end-pos) - #'(#f #f))]) - #`(grammar (start [() null] - [(atok start) (cons $1 $2)]) - (atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) - #`(start start) - parser-clauses)))] - [(grammar . _) - (raise-syntax-error - #f - "bad grammar clause" - stx - (car clauses))] - [(src-pos) - (loop (cdr clauses) - cfg-start - cfg-grammar - cfg-error - #t - (cons (car clauses) parser-clauses))] - [_else - (loop (cdr clauses) - cfg-start - cfg-grammar - cfg-error - src-pos? - (cons (car clauses) parser-clauses))]))))]) - #`(let ([orig-parse (parser - [error (lambda (a b c) - (error 'cfg-parser "unexpected ~a token: ~a" b c))] - . #,parser-clauses)] - [error-proc #,cfg-error]) - (letrec #,grammar - (lambda (get-tok) - (let ([tok-list (orig-parse get-tok)]) - (letrec ([success-k - (lambda (val stream last-consumed-token depth max-depth tasks next) - (if (null? stream) - val - (next success-k fail-k max-depth tasks)))] - [fail-k (lambda (max-depth tasks) - (define (call-error-proc tok-ok? tok-name tok-value start-pos end-pos) - (cond - [(procedure-arity-includes? error-proc 5) - (error-proc tok-ok? tok-name tok-value start-pos end-pos)] - [else - (error-proc tok-ok? tok-name tok-value)])) - (cond - [(null? tok-list) - (if error-proc - (call-error-proc #t - 'no-tokens - #f - (make-position #f #f #f) - (make-position #f #f #f)) - (error - 'cfg-parse - "no tokens"))] - [else - (let ([bad-tok (list-ref tok-list - (min (sub1 (length tok-list)) - max-depth))]) - (if error-proc - (call-error-proc #t - (tok-orig-name bad-tok) - (tok-val bad-tok) - (tok-start bad-tok) - (tok-end bad-tok)) - (error - 'cfg-parse - "failed at ~a" - (tok-val bad-tok))))]))]) - (#,start tok-list - ;; we simulate a token at the very beginning with zero width - ;; for use with the position-generating code (*-start-pos, *-end-pos). - (if (null? tok-list) - (tok #f #f #f - (position 1 - #,(if src-pos? #'1 #'#f) - #,(if src-pos? #'0 #'#f)) - (position 1 - #,(if src-pos? #'1 #'#f) - #,(if src-pos? #'0 #'#f))) - (tok (tok-name (car tok-list)) - (tok-orig-name (car tok-list)) - (tok-val (car tok-list)) - (tok-start (car tok-list)) - (tok-start (car tok-list)))) - 0 - (length tok-list) - success-k - fail-k - 0 - (make-tasks null null - (make-hasheq) (make-hasheq) - (make-hash) #t)))))))))])) - - -(module* test racket/base - (require (submod "..") - br-parser-tools/lex - racket/block - racket/generator - rackunit) - - ;; Test: parsing regular expressions. - ;; Here is a test case on locations: - (block - (define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF)) - (define lex (lexer-src-pos ["|" (token-OR lexeme)] - ["^" (token-ANCHOR lexeme)] - ["*" (token-STAR lexeme)] - [(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)] - ["(" (token-LPAREN lexeme)] - [")" (token-RPAREN lexeme)] - [whitespace (return-without-pos (lex input-port))] - [(eof) (token-EOF 'eof)])) - (define -parse (cfg-parser - (tokens regexp-tokens) - (start top) - (end EOF) - (src-pos) - (grammar [top [(maybe-anchor regexp) - (cond [$1 - `(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] - [else - `(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]] - [maybe-anchor [(ANCHOR) #t] - [() #f]] - [regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] - [(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] - [(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] - [(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]]))) - (define (pos->sexp pos) - (position-offset pos)) - - (define (parse s) - (define ip (open-input-string s)) - (port-count-lines! ip) - (-parse (lambda () (lex ip)))) - - (check-equal? (parse "abc") - '(unanchored (lit "abc" 1 4) 1 4)) - (check-equal? (parse "a | (b*) | c") - '(unanchored (or (or (lit "a" 1 2) - (group (star (lit "b" 6 7) 6 8) 5 9) - 1 9) - (lit "c" 12 13) - 1 13) - 1 13))) - - - ;; Check that cfg-parser can accept error functions of 3 arguments: - (block - (define-tokens non-terminals (ONE ZERO EOF)) - (define parse - (cfg-parser (tokens non-terminals) - (start ones) - (end EOF) - (error (lambda (tok-ok tok-name tok-val) - (error (format "~a ~a ~a" tok-ok tok-name tok-val)))) - (grammar [ones [() null] - [(ONE ones) (cons $1 $2)]]))) - (define (sequence->tokenizer s) - (define-values (more? next) (sequence-generate s)) - (lambda () - (cond [(more?) (next)] - [else (token-EOF 'eof)]))) - (check-exn #rx"#t ZERO zero" - (lambda () (parse (sequence->tokenizer (list (token-ZERO "zero"))))))) - - - - - ;; Check that cfg-parser can accept error functions of 5 arguments: - (block - (define-tokens non-terminals (ONE ZERO EOF)) - (define parse - (cfg-parser (tokens non-terminals) - (start ones) - (src-pos) - (end EOF) - (error (lambda (tok-ok tok-name tok-val start-pos end-pos) - (error (format "~a ~a ~a ~a ~a" - tok-ok tok-name tok-val - (position-offset start-pos) - (position-offset end-pos))))) - (grammar [ones [() null] - [(ONE ones) (cons $1 $2)]]))) - (define (sequence->tokenizer s) - (define-values (more? next) (sequence-generate s)) - (lambda () - (cond [(more?) (next)] - [else (position-token (token-EOF 'eof) - (position #f #f #f) - (position #f #f #f))]))) - (check-exn #rx"#t ZERO zero 2 3" - (lambda () - (parse - (sequence->tokenizer - (list (position-token - (token-ZERO "zero") - (position 2 2 5) - (position 3 2 6)))))))) - - - - - - ;; Tests used during development - (define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF)) - - (define lex - (lexer - ["+" (token-PLUS '+)] - ["-" (token-MINUS '-)] - ["*" (token-STAR '*)] - ["|" (token-BAR '||)] - [":" (token-COLON '|:|)] - [whitespace (lex input-port)] - [(eof) (token-EOF 'eof)])) - - (define parse - (cfg-parser - (tokens non-terminals) - (start ) - (end EOF) - (error (lambda (a b stx) - (error 'parse "failed at ~s" stx))) - (grammar [ [(PLUS) "plus"] - [( BAR ) (list $1 $2 $3)] - [( COLON) (list $1)]] - [ [(MINUS) "minus"] - [( STAR) (cons $1 $2)]] - [ [( MINUS) "yes"]] - [ [(PLUS) 'plus] - [(MINUS) 'minus]] - [ [() '0] - [( PLUS) (add1 $1)] - [( PLUS) (add1 $1)]]))) - - (let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**" - #;"+*|+**|-" #;"-|-*|-|-*" - #;"-|-*|-|-**|-|-*|-|-**" - "-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-*** - |-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****| - -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****" - ;; This one fails: - #;"+*")]) - (check-equal? (parse (lambda () (lex p))) - '((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *) - || - (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *)) - . - *) - || - (((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *) - || - (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *)) - . - *))))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/calc.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/calc.rkt deleted file mode 100644 index 9ad1218..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/calc.rkt +++ /dev/null @@ -1,89 +0,0 @@ -#lang scheme - -;; An interactive calculator inspired by the calculator example in the bison manual. - - -;; Import the parser and lexer generators. -(require br-parser-tools/yacc - br-parser-tools/lex - (prefix-in : br-parser-tools/lex-sre)) - -(define-tokens value-tokens (NUM VAR FNCT)) -(define-empty-tokens op-tokens (newline = OP CP + - * / ^ EOF NEG)) - -;; A hash table to store variable values in for the calculator -(define vars (make-hash)) - -(define-lex-abbrevs - (lower-letter (:/ "a" "z")) - - (upper-letter (:/ #\A #\Z)) - - ;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too. - (digit (:/ "0" "9"))) - -(define calcl - (lexer - [(eof) 'EOF] - ;; recursively call the lexer on the remaining input after a tab or space. Returning the - ;; result of that operation. This effectively skips all whitespace. - [(:or #\tab #\space) (calcl input-port)] - ;; (token-newline) returns 'newline - [#\newline (token-newline)] - ;; Since (token-=) returns '=, just return the symbol directly - [(:or "=" "+" "-" "*" "/" "^") (string->symbol lexeme)] - ["(" 'OP] - [")" 'CP] - ["sin" (token-FNCT sin)] - [(:+ (:or lower-letter upper-letter)) (token-VAR (string->symbol lexeme))] - [(:+ digit) (token-NUM (string->number lexeme))] - [(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))])) - - -(define calcp - (parser - - (start start) - (end newline EOF) - (tokens value-tokens op-tokens) - (error (lambda (a b c) (void))) - - (precs (right =) - (left - +) - (left * /) - (left NEG) - (right ^)) - - (grammar - - (start [() #f] - ;; If there is an error, ignore everything before the error - ;; and try to start over right after the error - [(error start) $2] - [(exp) $1]) - - (exp [(NUM) $1] - [(VAR) (hash-ref vars $1 (lambda () 0))] - [(VAR = exp) (begin (hash-set! vars $1 $3) - $3)] - [(FNCT OP exp CP) ($1 $3)] - [(exp + exp) (+ $1 $3)] - [(exp - exp) (- $1 $3)] - [(exp * exp) (* $1 $3)] - [(exp / exp) (/ $1 $3)] - [(- exp) (prec NEG) (- $2)] - [(exp ^ exp) (expt $1 $3)] - [(OP exp CP) $2])))) - -;; run the calculator on the given input-port -(define (calc ip) - (port-count-lines! ip) - (letrec ((one-line - (lambda () - (let ((result (calcp (lambda () (calcl ip))))) - (when result - (printf "~a\n" result) - (one-line)))))) - (one-line))) - -(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/read.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/read.rkt deleted file mode 100644 index a10b2c1..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/examples/read.rkt +++ /dev/null @@ -1,242 +0,0 @@ -;; This implements the equivalent of racket's read-syntax for R5RS scheme. -;; It has not been thoroughly tested. Also it will read an entire file into a -;; list of syntax objects, instead of returning one syntax object at a time - -(module read mzscheme - - (require br-parser-tools/lex - (prefix : br-parser-tools/lex-sre) - br-parser-tools/yacc - syntax/readerr) - - (define-tokens data (DATUM)) - (define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF)) - - (define scheme-lexer - (lexer-src-pos - - ;; Skip comments, without accumulating extra position information - [(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))] - - ["#t" (token-DATUM #t)] - ["#f" (token-DATUM #f)] - [(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))] - ["#\\space" (token-DATUM #\space)] - ["#\\newline" (token-DATUM #\newline)] - [(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))] - [#\" (token-DATUM (list->string (get-string-token input-port)))] - [#\( 'OP] - [#\) 'CP] - [#\[ 'OP] - [#\] 'CP] - ["#(" 'HASHOP] - [num2 (token-DATUM (string->number lexeme 2))] - [num8 (token-DATUM (string->number lexeme 8))] - [num10 (token-DATUM (string->number lexeme 10))] - [num16 (token-DATUM (string->number lexeme 16))] - ["'" 'QUOTE] - ["`" 'QUASIQUOTE] - ["," 'UNQUOTE] - [",@" 'UNQUOTE-SPLICING] - ["." 'DOT] - [(eof) 'EOF])) - - (define get-string-token - (lexer - [(:~ #\" #\\) (cons (car (string->list lexeme)) - (get-string-token input-port))] - [(:: #\\ #\\) (cons #\\ (get-string-token input-port))] - [(:: #\\ #\") (cons #\" (get-string-token input-port))] - [#\" null])) - - - (define-lex-abbrevs - [letter (:or (:/ "a" "z") (:/ #\A #\Z))] - [digit (:/ #\0 #\9)] - [scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)] - [initial (:or letter (char-set "!$%&*/:<=>?^_~@"))] - [subsequent (:or initial digit (char-set "+-.@"))] - [comment (:: #\; (:* (:~ #\newline)) #\newline)] - - - ;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of - ;; using regexp macros to avoid the cut and paste. - ; [numR (:: prefixR complexR)] - ; [complexR (:or realR - ; (:: realR "@" realR) - ; (:: realR "+" urealR "i") - ; (:: realR "-" urealR "i") - ; (:: realR "+i") - ; (:: realR "-i") - ; (:: "+" urealR "i") - ; (:: "-" urealR "i") - ; (:: "+i") - ; (:: "-i"))] - ; [realR (:: sign urealR)] - ; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)] - ; [uintegerR (:: (:+ digitR) (:* #\#))] - ; [prefixR (:or (:: radixR exactness) - ; (:: exactness radixR))] - - [num2 (:: prefix2 complex2)] - [complex2 (:or real2 - (:: real2 "@" real2) - (:: real2 "+" ureal2 "i") - (:: real2 "-" ureal2 "i") - (:: real2 "+i") - (:: real2 "-i") - (:: "+" ureal2 "i") - (:: "-" ureal2 "i") - (:: "+i") - (:: "-i"))] - [real2 (:: sign ureal2)] - [ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))] - [uinteger2 (:: (:+ digit2) (:* #\#))] - [prefix2 (:or (:: radix2 exactness) - (:: exactness radix2))] - [radix2 "#b"] - [digit2 (:or "0" "1")] - [num8 (:: prefix8 complex8)] - [complex8 (:or real8 - (:: real8 "@" real8) - (:: real8 "+" ureal8 "i") - (:: real8 "-" ureal8 "i") - (:: real8 "+i") - (:: real8 "-i") - (:: "+" ureal8 "i") - (:: "-" ureal8 "i") - (:: "+i") - (:: "-i"))] - [real8 (:: sign ureal8)] - [ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))] - [uinteger8 (:: (:+ digit8) (:* #\#))] - [prefix8 (:or (:: radix8 exactness) - (:: exactness radix8))] - [radix8 "#o"] - [digit8 (:/ "0" "7")] - - [num10 (:: prefix10 complex10)] - [complex10 (:or real10 - (:: real10 "@" real10) - (:: real10 "+" ureal10 "i") - (:: real10 "-" ureal10 "i") - (:: real10 "+i") - (:: real10 "-i") - (:: "+" ureal10 "i") - (:: "-" ureal10 "i") - (:: "+i") - (:: "-i"))] - [real10 (:: sign ureal10)] - [ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)] - [uinteger10 (:: (:+ digit10) (:* #\#))] - [prefix10 (:or (:: radix10 exactness) - (:: exactness radix10))] - [radix10 (:? "#d")] - [digit10 digit] - [decimal10 (:or (:: uinteger10 suffix) - (:: #\. (:+ digit10) (:* #\#) suffix) - (:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix) - (:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))] - - [num16 (:: prefix16 complex16)] - [complex16 (:or real16 - (:: real16 "@" real16) - (:: real16 "+" ureal16 "i") - (:: real16 "-" ureal16 "i") - (:: real16 "+i") - (:: real16 "-i") - (:: "+" ureal16 "i") - (:: "-" ureal16 "i") - "+i" - "-i")] - [real16 (:: sign ureal16)] - [ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))] - [uinteger16 (:: (:+ digit16) (:* #\#))] - [prefix16 (:or (:: radix16 exactness) - (:: exactness radix16))] - [radix16 "#x"] - [digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))] - - - [suffix (:or "" (:: exponent-marker sign (:+ digit10)))] - [exponent-marker (:or "e" "s" "f" "d" "l")] - [sign (:or "" "+" "-")] - [exactness (:or "" "#i" "#e")]) - - - (define stx-for-original-property (read-syntax #f (open-input-string "original"))) - - ;; A macro to build the syntax object - (define-syntax (build-so stx) - (syntax-case stx () - ((_ value start end) - (with-syntax ((start-pos (datum->syntax-object - (syntax end) - (string->symbol - (format "$~a-start-pos" - (syntax-object->datum (syntax start)))))) - (end-pos (datum->syntax-object - (syntax end) - (string->symbol - (format "$~a-end-pos" - (syntax-object->datum (syntax end)))))) - (source (datum->syntax-object - (syntax end) - 'source-name))) - (syntax - (datum->syntax-object - #f - value - (list source - (position-line start-pos) - (position-col start-pos) - (position-offset start-pos) - (- (position-offset end-pos) - (position-offset start-pos))) - stx-for-original-property)))))) - - (define (scheme-parser source-name) - (parser - (src-pos) - - (start s) - (end EOF) - (error (lambda (a name val start end) - (raise-read-error - "read-error" - source-name - (position-line start) - (position-col start) - (position-offset start) - (- (position-offset end) - (position-offset start))))) - (tokens data delim) - - - (grammar - - (s [(sexp-list) (reverse $1)]) - - (sexp [(DATUM) (build-so $1 1 1)] - [(OP sexp-list CP) (build-so (reverse $2) 1 3)] - [(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)] - [(QUOTE sexp) (build-so (list 'quote $2) 1 2)] - [(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)] - [(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)] - [(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)] - [(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)]) - - (sexp-list [() null] - [(sexp-list sexp) (cons $2 $1)])))) - - (define (rs sn ip) - (port-count-lines! ip) - ((scheme-parser sn) (lambda () (scheme-lexer ip)))) - - (define readsyntax - (case-lambda ((sn) (rs sn (current-input-port))) - ((sn ip) (rs sn ip)))) - - (provide (rename readsyntax read-syntax)) - - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/info.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/info.rkt deleted file mode 100644 index ae66609..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/info.rkt +++ /dev/null @@ -1,3 +0,0 @@ -#lang info - -(define compile-omit-paths '("private-lex/error-tests.rkt")) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt deleted file mode 100644 index 0cbb175..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-plt-v200.rkt +++ /dev/null @@ -1,24 +0,0 @@ -(module lex-plt-v200 mzscheme - (require br-parser-tools/lex - (prefix : br-parser-tools/lex-sre)) - - (provide epsilon - ~ - (rename :* *) - (rename :+ +) - (rename :? ?) - (rename :or :) - (rename :& &) - (rename :: @) - (rename :~ ^) - (rename :/ -)) - - (define-lex-trans epsilon - (syntax-rules () - ((_) ""))) - - (define-lex-trans ~ - (syntax-rules () - ((_ re) (complement re))))) - - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-sre.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-sre.rkt deleted file mode 100644 index 820d090..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex-sre.rkt +++ /dev/null @@ -1,119 +0,0 @@ -(module lex-sre mzscheme - (require br-parser-tools/lex) - - (provide (rename sre-* *) - (rename sre-+ +) - ? - (rename sre-= =) - (rename sre->= >=) - ** - (rename sre-or or) - : - seq - & - ~ - (rename sre-- -) - (rename sre-/ /) - /-only-chars) - - (define-lex-trans sre-* - (syntax-rules () - ((_ re ...) - (repetition 0 +inf.0 (union re ...))))) - - (define-lex-trans sre-+ - (syntax-rules () - ((_ re ...) - (repetition 1 +inf.0 (union re ...))))) - - (define-lex-trans ? - (syntax-rules () - ((_ re ...) - (repetition 0 1 (union re ...))))) - - (define-lex-trans sre-= - (syntax-rules () - ((_ n re ...) - (repetition n n (union re ...))))) - - (define-lex-trans sre->= - (syntax-rules () - ((_ n re ...) - (repetition n +inf.0 (union re ...))))) - - (define-lex-trans ** - (syntax-rules () - ((_ low #f re ...) - (** low +inf.0 re ...)) - ((_ low high re ...) - (repetition low high (union re ...))))) - - (define-lex-trans sre-or - (syntax-rules () - ((_ re ...) - (union re ...)))) - - (define-lex-trans : - (syntax-rules () - ((_ re ...) - (concatenation re ...)))) - - (define-lex-trans seq - (syntax-rules () - ((_ re ...) - (concatenation re ...)))) - - (define-lex-trans & - (syntax-rules () - ((_ re ...) - (intersection re ...)))) - - (define-lex-trans ~ - (syntax-rules () - ((_ re ...) - (char-complement (union re ...))))) - - ;; set difference - (define-lex-trans (sre-- stx) - (syntax-case stx () - ((_) - (raise-syntax-error #f - "must have at least one argument" - stx)) - ((_ big-re re ...) - (syntax (& big-re (complement (union re ...))))))) - - (define-lex-trans (sre-/ stx) - (syntax-case stx () - ((_ range ...) - (let ((chars - (apply append (map (lambda (r) - (let ((x (syntax-e r))) - (cond - ((char? x) (list x)) - ((string? x) (string->list x)) - (else - (raise-syntax-error - #f - "not a char or string" - stx - r))))) - (syntax->list (syntax (range ...))))))) - (unless (even? (length chars)) - (raise-syntax-error - #f - "not given an even number of characters" - stx)) - #`(/-only-chars #,@chars))))) - - (define-lex-trans /-only-chars - (syntax-rules () - ((_ c1 c2) - (char-range c1 c2)) - ((_ c1 c2 c ...) - (union (char-range c1 c2) - (/-only-chars c ...))))) - - ) - - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex.rkt deleted file mode 100644 index c7202ab..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/lex.rkt +++ /dev/null @@ -1,412 +0,0 @@ -(module lex mzscheme - - ;; Provides the syntax used to create lexers and the functions needed to - ;; create and use the buffer that the lexer reads from. See docs. - - (require-for-syntax mzlib/list - syntax/stx - syntax/define - syntax/boundmap - "private-lex/util.rkt" - "private-lex/actions.rkt" - "private-lex/front.rkt" - "private-lex/unicode-chars.rkt") - - (require mzlib/stxparam - syntax/readerr - "private-lex/token.rkt") - - (provide lexer lexer-src-pos lexer-srcloc define-lex-abbrev define-lex-abbrevs define-lex-trans - - ;; Dealing with tokens and related structures - define-tokens define-empty-tokens token-name token-value token? - (struct position (offset line col)) - (struct position-token (token start-pos end-pos)) - (struct srcloc-token (token srcloc)) - - ;; File path for highlighting errors while lexing - file-path - lexer-file-path ;; alternate name - - ;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4. - any-char any-string nothing alphabetic lower-case upper-case title-case - numeric symbolic punctuation graphic whitespace blank iso-control - - ;; A regular expression operator - char-set) - - ;; wrap-action: syntax-object src-pos? -> syntax-object - (define-for-syntax (wrap-action action src-loc-style) - (with-syntax ((action-stx - (cond - [(eq? src-loc-style 'lexer-src-pos) - #`(let/ec ret - (syntax-parameterize - ([return-without-pos (make-rename-transformer #'ret)]) - (make-position-token #,action start-pos end-pos)))] - [(eq? src-loc-style 'lexer-srcloc) - #`(let/ec ret - (syntax-parameterize - ([return-without-srcloc (make-rename-transformer #'ret)]) - (make-srcloc-token #,action lexeme-srcloc)))] - [else action]))) - (syntax/loc action - (lambda (start-pos-p end-pos-p lexeme-p input-port-p) - (define lexeme-srcloc-p (make-srcloc (object-name input-port-p) - (position-line start-pos-p) - (position-col start-pos-p) - (position-offset start-pos-p) - (and (number? (position-offset end-pos-p)) - (number? (position-offset start-pos-p)) - (- (position-offset end-pos-p) - (position-offset start-pos-p))))) - (syntax-parameterize - ([start-pos (make-rename-transformer #'start-pos-p)] - [end-pos (make-rename-transformer #'end-pos-p)] - [lexeme (make-rename-transformer #'lexeme-p)] - [input-port (make-rename-transformer #'input-port-p)] - [lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)]) - action-stx))))) - - (define-for-syntax (make-lexer-trans src-loc-style) - (lambda (stx) - (syntax-case stx () - ((_ re-act ...) - (begin - (for-each - (lambda (x) - (syntax-case x () - ((re act) (void)) - (_ (raise-syntax-error #f - "not a regular expression / action pair" - stx - x)))) - (syntax->list (syntax (re-act ...)))) - (let* ((spec/re-act-lst - (syntax->list (syntax (re-act ...)))) - (eof-act - (get-special-action spec/re-act-lst #'eof #''eof)) - (spec-act - (get-special-action spec/re-act-lst #'special #'(void))) - (spec-comment-act - (get-special-action spec/re-act-lst #'special-comment #'#f)) - (ids (list #'special #'special-comment #'eof)) - (re-act-lst - (filter - (lambda (spec/re-act) - (syntax-case spec/re-act () - (((special) act) - (not (ormap - (lambda (x) - (and (identifier? #'special) - (module-or-top-identifier=? (syntax special) x))) - ids))) - (_ #t))) - spec/re-act-lst)) - (name-lst (map (lambda (x) (datum->syntax-object #f (gensym))) re-act-lst)) - (act-lst (map (lambda (x) (stx-car (stx-cdr x))) re-act-lst)) - (re-actname-lst (map (lambda (re-act name) - (list (stx-car re-act) - name)) - re-act-lst - name-lst))) - (when (null? spec/re-act-lst) - (raise-syntax-error (or src-loc-style 'lexer) "expected at least one action" stx)) - (let-values (((trans start action-names no-look disappeared-uses) - (build-lexer re-actname-lst))) - (when (vector-ref action-names start) ;; Start state is final - (unless (and - ;; All the successor states are final - (andmap (lambda (x) (vector-ref action-names (vector-ref x 2))) - (vector->list (vector-ref trans start))) - ;; Each character has a successor state - (let loop ((check 0) - (nexts (vector->list (vector-ref trans start)))) - (cond - ((null? nexts) #f) - (else - (let ((next (car nexts))) - (and (= (vector-ref next 0) check) - (let ((next-check (vector-ref next 1))) - (or (>= next-check max-char-num) - (loop (add1 next-check) (cdr nexts)))))))))) - (eprintf "Warning: lexer at ~a can accept the empty string.\n" stx))) - (with-syntax ((start-state-stx start) - (trans-table-stx trans) - (no-lookahead-stx no-look) - ((name ...) name-lst) - ((act ...) (map (lambda (a) - (wrap-action a src-loc-style)) - act-lst)) - ((act-name ...) (vector->list action-names)) - (spec-act-stx - (wrap-action spec-act src-loc-style)) - (has-comment-act?-stx - (if (syntax-e spec-comment-act) #t #f)) - (spec-comment-act-stx - (wrap-action spec-comment-act src-loc-style)) - (eof-act-stx (wrap-action eof-act src-loc-style))) - (syntax-property - (syntax/loc stx - (let ([name act] ...) - (let ([proc - (lexer-body start-state-stx - trans-table-stx - (vector act-name ...) - no-lookahead-stx - spec-act-stx - has-comment-act?-stx - spec-comment-act-stx - eof-act-stx)]) - ;; reverse eta to get named procedures: - (lambda (port) (proc port))))) - 'disappeared-use - disappeared-uses))))))))) - - (define-syntax lexer (make-lexer-trans #f)) - (define-syntax lexer-src-pos (make-lexer-trans 'lexer-src-pos)) - (define-syntax lexer-srcloc (make-lexer-trans 'lexer-srcloc)) - - (define-syntax (define-lex-abbrev stx) - (syntax-case stx () - ((_ name re) - (identifier? (syntax name)) - (syntax/loc stx - (define-syntax name - (make-lex-abbrev (lambda () (quote-syntax re)))))) - (_ - (raise-syntax-error - #f - "form should be (define-lex-abbrev name re)" - stx)))) - - (define-syntax (define-lex-abbrevs stx) - (syntax-case stx () - ((_ x ...) - (with-syntax (((abbrev ...) - (map - (lambda (a) - (syntax-case a () - ((name re) - (identifier? (syntax name)) - (syntax/loc a (define-lex-abbrev name re))) - (_ (raise-syntax-error - #f - "form should be (define-lex-abbrevs (name re) ...)" - stx - a)))) - (syntax->list (syntax (x ...)))))) - (syntax/loc stx (begin abbrev ...)))) - (_ - (raise-syntax-error - #f - "form should be (define-lex-abbrevs (name re) ...)" - stx)))) - - (define-syntax (define-lex-trans stx) - (syntax-case stx () - ((_ name-form body-form) - (let-values (((name body) - (normalize-definition (syntax (define-syntax name-form body-form)) #'lambda))) - - #`(define-syntax #,name - (let ((func #,body)) - (unless (procedure? func) - (raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func)) - (unless (procedure-arity-includes? func 1) - (raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func)) - (make-lex-trans func))))) - (_ - (raise-syntax-error - #f - "form should be (define-lex-trans name transformer)" - stx)))) - - - (define (get-next-state-helper char min max table) - (if (>= min max) - #f - (let* ((try (quotient (+ min max) 2)) - (el (vector-ref table try)) - (r1 (vector-ref el 0)) - (r2 (vector-ref el 1))) - (cond - ((and (>= char r1) (<= char r2)) (vector-ref el 2)) - ((< char r1) (get-next-state-helper char min try table)) - (else (get-next-state-helper char (add1 try) max table)))))) - - - - - (define (get-next-state char table) - (if table - (get-next-state-helper char 0 (vector-length table) table) - #f)) - - (define (lexer-body start-state trans-table actions no-lookahead special-action - has-special-comment-action? special-comment-action eof-action) - (letrec ((lexer - (lambda (ip) - (let ((first-pos (get-position ip)) - (first-char (peek-char-or-special ip 0))) - ;(printf "(peek-char-or-special port 0) = ~e\n" first-char) - (cond - ((eof-object? first-char) - (do-match ip first-pos eof-action (read-char-or-special ip))) - ((special-comment? first-char) - (read-char-or-special ip) - (cond - (has-special-comment-action? - (do-match ip first-pos special-comment-action #f)) - (else (lexer ip)))) - ((not (char? first-char)) - (do-match ip first-pos special-action (read-char-or-special ip))) - (else - (let lexer-loop ( - ;; current-state - (state start-state) - ;; the character to transition on - (char first-char) - ;; action for the longest match seen thus far - ;; including a match at the current state - (longest-match-action - (vector-ref actions start-state)) - ;; how many bytes precede char - (length-bytes 0) - ;; how many characters have been read - ;; including the one just read - (length-chars 1) - ;; how many characters are in the longest match - (longest-match-length 0)) - (let ((next-state - (cond - ((not (char? char)) #f) - (else (get-next-state (char->integer char) - (vector-ref trans-table state)))))) - (cond - ((not next-state) - (check-match ip first-pos longest-match-length - length-chars longest-match-action)) - ((vector-ref no-lookahead next-state) - (let ((act (vector-ref actions next-state))) - (check-match ip - first-pos - (if act length-chars longest-match-length) - length-chars - (if act act longest-match-action)))) - (else - (let* ((act (vector-ref actions next-state)) - (next-length-bytes (+ (char-utf-8-length char) length-bytes)) - (next-char (peek-char-or-special ip next-length-bytes))) - #;(printf "(peek-char-or-special port ~e) = ~e\n" - next-length-bytes next-char) - (lexer-loop next-state - next-char - (if act - act - longest-match-action) - next-length-bytes - (add1 length-chars) - (if act - length-chars - longest-match-length))))))))))))) - (lambda (ip) - (unless (input-port? ip) - (raise-argument-error - 'lexer - "input-port?" - 0 - ip)) - (lexer ip)))) - - (define (check-match lb first-pos longest-match-length length longest-match-action) - (unless longest-match-action - (let* ((match (read-string length lb)) - (end-pos (get-position lb))) - (raise-read-error - (format "lexer: No match found in input starting with: ~a" match) - (file-path) - (position-line first-pos) - (position-col first-pos) - (position-offset first-pos) - (- (position-offset end-pos) (position-offset first-pos))))) - (let ((match (read-string longest-match-length lb))) - ;(printf "(read-string ~e port) = ~e\n" longest-match-length match) - (do-match lb first-pos longest-match-action match))) - - (define file-path (make-parameter #f)) - (define lexer-file-path file-path) - - (define (do-match ip first-pos action value) - #;(printf "(action ~a ~a ~a ~a)\n" - (position-offset first-pos) (position-offset (get-position ip)) value ip) - (action first-pos (get-position ip) value ip)) - - (define (get-position ip) - (let-values (((line col off) (port-next-location ip))) - (make-position off line col))) - - (define-syntax (create-unicode-abbrevs stx) - (syntax-case stx () - ((_ ctxt) - (with-syntax (((ranges ...) (map (lambda (range) - `(union ,@(map (lambda (x) - `(char-range ,(integer->char (car x)) - ,(integer->char (cdr x)))) - range))) - (list (force alphabetic-ranges) - (force lower-case-ranges) - (force upper-case-ranges) - (force title-case-ranges) - (force numeric-ranges) - (force symbolic-ranges) - (force punctuation-ranges) - (force graphic-ranges) - (force whitespace-ranges) - (force blank-ranges) - (force iso-control-ranges)))) - ((names ...) (map (lambda (sym) - (datum->syntax-object (syntax ctxt) sym #f)) - '(alphabetic - lower-case - upper-case - title-case - numeric - symbolic - punctuation - graphic - whitespace - blank - iso-control)))) - (syntax (define-lex-abbrevs (names ranges) ...)))))) - - (define-lex-abbrev any-char (char-complement (union))) - (define-lex-abbrev any-string (intersection)) - (define-lex-abbrev nothing (union)) - (create-unicode-abbrevs #'here) - - (define-lex-trans (char-set stx) - (syntax-case stx () - ((_ str) - (string? (syntax-e (syntax str))) - (with-syntax (((char ...) (string->list (syntax-e (syntax str))))) - (syntax (union char ...)))))) - - (define-syntax provide-lex-keyword - (syntax-rules () - [(_ id ...) - (begin - (define-syntax-parameter id - (make-set!-transformer - (lambda (stx) - (raise-syntax-error - #f - (format "use of a lexer keyword (~a) is not in an appropriate lexer action" - 'id) - stx)))) - ... - (provide id ...))])) - - (provide-lex-keyword start-pos end-pos lexeme lexeme-srcloc input-port return-without-pos return-without-srcloc) - - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt deleted file mode 100644 index 6ec0c7f..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/actions.rkt +++ /dev/null @@ -1,16 +0,0 @@ -#lang scheme/base - -(provide (all-defined-out)) -(require syntax/stx) - -;; get-special-action: (syntax-object list) syntax-object syntax-object -> syntax-object -;; Returns the first action from a rule of the form ((which-special) action) -(define (get-special-action rules which-special none) - (cond - ((null? rules) none) - (else - (syntax-case (car rules) () - (((special) act) - (and (identifier? #'special) (module-or-top-identifier=? (syntax special) which-special)) - (syntax act)) - (_ (get-special-action (cdr rules) which-special none)))))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt deleted file mode 100644 index 28919a3..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/deriv.rkt +++ /dev/null @@ -1,339 +0,0 @@ -(module deriv mzscheme - - (require mzlib/list - (prefix is: mzlib/integer-set) - "re.rkt" - "util.rkt") - - (provide build-dfa print-dfa (struct dfa (num-states start-state final-states/actions transitions))) - - (define e (build-epsilon)) - (define z (build-zero)) - - - ;; Don't do anything with this one but extract the chars - (define all-chars (->re `(char-complement (union)) (make-cache))) - - ;; get-char-groups : re bool -> (list-of char-setR?) - ;; Collects the char-setRs in r that could be used in - ;; taking the derivative of r. - (define (get-char-groups r found-negation) - (cond - ((or (eq? r e) (eq? r z)) null) - ((char-setR? r) (list r)) - ((concatR? r) - (if (re-nullable? (concatR-re1 r)) - (append (get-char-groups (concatR-re1 r) found-negation) - (get-char-groups (concatR-re2 r) found-negation)) - (get-char-groups (concatR-re1 r) found-negation))) - ((repeatR? r) - (get-char-groups (repeatR-re r) found-negation)) - ((orR? r) - (apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r)))) - ((andR? r) - (apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r)))) - ((negR? r) - (if found-negation - (get-char-groups (negR-re r) #t) - (cons all-chars (get-char-groups (negR-re r) #t)))))) - - (test-block ((c (make-cache)) - (r1 (->re #\1 c)) - (r2 (->re #\2 c))) - ((get-char-groups e #f) null) - ((get-char-groups z #f) null) - ((get-char-groups r1 #f) (list r1)) - ((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f) - (list r1)) - ((get-char-groups (->re `(concatenation ,e ,r2) c) #f) - (list r2)) - ((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f) - (list r1 r2)) - ((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f) - (list r1)) - ((get-char-groups - (->re `(union (repetition 0 +inf.0 ,r1) - (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) - (list r1 r2 (->re "3" c) (->re "4" c))) - ((get-char-groups (->re `(complement ,r1) c) #f) - (list all-chars r1)) - ((get-char-groups - (->re `(intersection (repetition 0 +inf.0 ,r1) - (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) - (list r1 r2 (->re "3" c) (->re "4" c))) - ) - (define loc:member? is:member?) - - ;; deriveR : re char cache -> re - (define (deriveR r c cache) - (cond - ((or (eq? r e) (eq? r z)) z) - ((char-setR? r) - (if (loc:member? c (char-setR-chars r)) e z)) - ((concatR? r) - (let* ((r1 (concatR-re1 r)) - (r2 (concatR-re2 r)) - (d (build-concat (deriveR r1 c cache) r2 cache))) - (if (re-nullable? r1) - (build-or (list d (deriveR r2 c cache)) cache) - d))) - ((repeatR? r) - (build-concat (deriveR (repeatR-re r) c cache) - (build-repeat (sub1 (repeatR-low r)) - (sub1 (repeatR-high r)) - (repeatR-re r) cache) - cache)) - ((orR? r) - (build-or (map (lambda (x) (deriveR x c cache)) - (orR-res r)) - cache)) - ((andR? r) - (build-and (map (lambda (x) (deriveR x c cache)) - (andR-res r)) - cache)) - ((negR? r) - (build-neg (deriveR (negR-re r) c cache) cache)))) - - (test-block ((c (make-cache)) - (a (char->integer #\a)) - (b (char->integer #\b)) - (r1 (->re #\a c)) - (r2 (->re `(repetition 0 +inf.0 #\a) c)) - (r3 (->re `(repetition 0 +inf.0 ,r2) c)) - (r4 (->re `(concatenation #\a ,r2) c)) - (r5 (->re `(repetition 0 +inf.0 ,r4) c)) - (r6 (->re `(union ,r5 #\a) c)) - (r7 (->re `(concatenation ,r2 ,r2) c)) - (r8 (->re `(complement ,r4) c)) - (r9 (->re `(intersection ,r2 ,r4) c))) - ((deriveR e a c) z) - ((deriveR z a c) z) - ((deriveR r1 b c) z) - ((deriveR r1 a c) e) - ((deriveR r2 a c) r2) - ((deriveR r2 b c) z) - ((deriveR r3 a c) r2) - ((deriveR r3 b c) z) - ((deriveR r4 a c) r2) - ((deriveR r4 b c) z) - ((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c)) - ((deriveR r5 b c) z) - ((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c)) - ((deriveR r6 b c) z) - ((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c)) - ((deriveR r7 b c) z) - ((deriveR r8 a c) (->re `(complement, r2) c)) - ((deriveR r8 b c) (->re `(complement ,z) c)) - ((deriveR r9 a c) r2) - ((deriveR r9 b c) z) - ((deriveR (->re `(repetition 1 2 "ab") c) a c) - (->re `(concatenation "b" (repetition 0 1 "ab")) c))) - - ;; An re-action is (cons re action) - - ;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f) - ;; applies deriveR to all the re-actions's re parts. - ;; Returns #f if the derived state is equivalent to z. - (define (derive r c cache) - (let ((new-r (map (lambda (ra) - (cons (deriveR (car ra) c cache) (cdr ra))) - r))) - (if (andmap (lambda (x) (eq? z (car x))) - new-r) - #f - new-r))) - - (test-block ((c (make-cache)) - (r1 (->re #\1 c)) - (r2 (->re #\2 c))) - ((derive null (char->integer #\1) c) #f) - ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c) - (list (cons e 1) (cons z 2))) - ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f)) - - - ;; get-final : (list-of re-action) -> (union #f syntax-object) - ;; An re that accepts e represents a final state. Return the - ;; action from the first final state or #f if there is none. - (define (get-final res) - (cond - ((null? res) #f) - ((re-nullable? (caar res)) (cdar res)) - (else (get-final (cdr res))))) - - (test-block ((c->i char->integer) - (c (make-cache)) - (r1 (->re #\a c)) - (r2 (->re #\b c)) - (b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5))) - (a (list (cons r1 1) (cons r2 2)))) - ((derive null (c->i #\a) c) #f) - ((derive a (c->i #\a) c) (list (cons e 1) (cons z 2))) - ((derive a (c->i #\b) c) (list (cons z 1) (cons e 2))) - ((derive a (c->i #\c) c) #f) - ((derive (list (cons (->re `(union " " "\n" ",") c) 1) - (cons (->re `(concatenation (repetition 0 1 "-") - (repetition 1 +inf.0 (char-range "0" "9"))) c) 2) - (cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3) - (cons (->re "[" c) 4) - (cons (->re "]" c) 5)) (c->i #\[) c) - b) - ((get-final a) #f) - ((get-final (list (cons e 1) (cons e 2))) 1) - ((get-final b) 4)) - - - ;; A state is (make-state (list-of re-action) nat) - (define-struct state (spec index)) - - ;; get->key : re-action -> (list-of nat) - ;; states are indexed by the list of indexes of their res - (define (get-key s) - (map (lambda (x) (re-index (car x))) s)) - - (define loc:partition is:partition) - - ;; compute-chars : (list-of state) -> (list-of char-set) - ;; Computed the sets of equivalent characters for taking the - ;; derivative of the car of st. Only one derivative per set need to be taken. - (define (compute-chars st) - (cond - ((null? st) null) - (else - (loc:partition (map char-setR-chars - (apply append (map (lambda (x) (get-char-groups (car x) #f)) - (state-spec (car st))))))))) - - (test-block ((c (make-cache)) - (c->i char->integer) - (r1 (->re `(char-range #\1 #\4) c)) - (r2 (->re `(char-range #\2 #\3) c))) - ((compute-chars null) null) - ((compute-chars (list (make-state null 1))) null) - ((map is:integer-set-contents - (compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2)))) - (list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3))) - (is:integer-set-contents (is:union (is:make-range (c->i #\1)) - (is:make-range (c->i #\4))))))) - - - ;; A dfa is (make-dfa int int - ;; (list-of (cons int syntax-object)) - ;; (list-of (cons int (list-of (cons char-set int))))) - ;; Each transitions is a state and a list of chars with the state to transition to. - ;; The finals and transitions are sorted by state number, and duplicate free. - (define-struct dfa (num-states start-state final-states/actions transitions) (make-inspector)) - - (define loc:get-integer is:get-integer) - - ;; build-dfa : (list-of re-action) cache -> dfa - (define (build-dfa rs cache) - (let* ((transitions (make-hash-table)) - (get-state-number (make-counter)) - (start (make-state rs (get-state-number)))) - (cache (cons 'state (get-key rs)) (lambda () start)) - (let loop ((old-states (list start)) - (new-states null) - (all-states (list start)) - (cs (compute-chars (list start)))) - (cond - ((and (null? old-states) (null? new-states)) - (make-dfa (get-state-number) (state-index start) - (sort (filter (lambda (x) (cdr x)) - (map (lambda (state) - (cons (state-index state) (get-final (state-spec state)))) - all-states)) - (lambda (a b) (< (car a) (car b)))) - (sort (hash-table-map transitions - (lambda (state trans) - (cons (state-index state) - (map (lambda (t) - (cons (car t) - (state-index (cdr t)))) - trans)))) - (lambda (a b) (< (car a) (car b)))))) - ((null? old-states) - (loop new-states null all-states (compute-chars new-states))) - ((null? cs) - (loop (cdr old-states) new-states all-states (compute-chars (cdr old-states)))) - (else - (let* ((state (car old-states)) - (c (car cs)) - (new-re (derive (state-spec state) (loc:get-integer c) cache))) - (cond - (new-re - (let* ((new-state? #f) - (new-state (cache (cons 'state (get-key new-re)) - (lambda () - (set! new-state? #t) - (make-state new-re (get-state-number))))) - (new-all-states (if new-state? (cons new-state all-states) all-states))) - (hash-table-put! transitions - state - (cons (cons c new-state) - (hash-table-get transitions state - (lambda () null)))) - (cond - (new-state? - (loop old-states (cons new-state new-states) new-all-states (cdr cs))) - (else - (loop old-states new-states new-all-states (cdr cs)))))) - (else (loop old-states new-states all-states (cdr cs)))))))))) - - (define (print-dfa x) - (printf "number of states: ~a\n" (dfa-num-states x)) - (printf "start state: ~a\n" (dfa-start-state x)) - (printf "final states: ~a\n" (map car (dfa-final-states/actions x))) - (for-each (lambda (trans) - (printf "state: ~a\n" (car trans)) - (for-each (lambda (rule) - (printf " -~a-> ~a\n" - (is:integer-set-contents (car rule)) - (cdr rule))) - (cdr trans))) - (dfa-transitions x))) - - (define (build-test-dfa rs) - (let ((c (make-cache))) - (build-dfa (map (lambda (x) (cons (->re x c) 'action)) - rs) - c))) - - -#| - (define t1 (build-test-dfa null)) - (define t2 (build-test-dfa `(#\a))) - (define t3 (build-test-dfa `(#\a #\b))) - (define t4 (build-test-dfa `((repetition 0 +inf.0 #\a) - (repetition 0 +inf.0 (concatenation #\a #\b))))) - (define t5 (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\0 #\1)) #\1)))) - (define t6 (build-test-dfa `((repetition 0 +inf.0 (repetition 0 +inf.0 #\a)) - (repetition 0 +inf.0 (concatenation #\b (repetition 1 +inf.0 #\b)))))) - (define t7 (build-test-dfa `((concatenation (repetition 0 +inf.0 #\a) (repetition 0 +inf.0 #\b) - (repetition 0 +inf.0 #\c) (repetition 0 +inf.0 #\d) - (repetition 0 +inf.0 #\e))))) - (define t8 - (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b) - (union #\a #\b) (union #\a #\b) (union #\a #\b))))) - (define t9 (build-test-dfa `((concatenation "/*" - (complement (concatenation (intersection) "*/" (intersection))) - "*/")))) - (define t11 (build-test-dfa `((complement "1")))) - (define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b") - (concatenation "a" (repetition 0 +inf.0 "b"))) - "ab")))) - (define x (build-test-dfa `((union " " "\n" ",") - (concatenation (repetition 0 1 "-") (repetition 1 +inf.0 (char-range "0" "9"))) - (concatenation "-" (repetition 1 +inf.0 "-")) - "[" - "]"))) - (define y (build-test-dfa - `((repetition 1 +inf.0 - (union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|") - (concatenation "|" (repetition 0 +inf.0 (char-complement "|")))))))) - (define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection)) - (complement (union (concatenation (intersection) "01") - (repetition 1 +inf.0 "1"))))))) - (define t14 (build-test-dfa `((complement "1")))) -|# - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt deleted file mode 100644 index bbccbe0..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/error-tests.rkt +++ /dev/null @@ -1,81 +0,0 @@ -#lang scheme/base -(require (for-syntax scheme/base) - "../lex.rkt" - rackunit) - -(define-syntax (catch-syn-error stx) - (syntax-case stx () - [(_ arg) - (datum->syntax - #'here - (with-handlers ((exn:fail:syntax? exn-message)) - (syntax-local-expand-expression #'arg) - "not-an-error"))])) - -(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev))) -(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev a))) -(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev (a b) v))) -(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev 1 1))) -(check-regexp-match #rx"lex-abbrevs" (catch-syn-error (define-lex-abbrevs ()))) - -(check-regexp-match #rx"lex-trans" (catch-syn-error (define-lex-trans))) - -(check-regexp-match #rx"lexer" (catch-syn-error (lexer))) -(check-regexp-match #rx"lexer" (catch-syn-error (lexer ("a" "b" "c")))) -(check-regexp-match #rx"lexer" (catch-syn-error (lexer ()))) -(check-regexp-match #rx"lexer" (catch-syn-error (lexer ("")))) - -(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (a 1)))) -(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer ((a) 1)))) -(check-regexp-match #rx"regular-expression" (catch-syn-error (let ((a 1)) (lexer ((a) 1))))) - -(check-regexp-match #rx"regular-expression" - (catch-syn-error (let-syntax ((a 1)) - (lexer ((a) 1))))) - -(check-regexp-match #rx"define-lex-trans" - (catch-syn-error - (let () - (define-lex-trans a 1) - (let () - (lexer ((a) 1)))))) - -;; Detecting mutual recursion cycle: -(check-regexp-match #rx"regular-expression" - (catch-syn-error - (let () - (define-lex-abbrev a b) - (define-lex-abbrev b a) - (let () - (lexer (a 1)))))) - -(check-regexp-match #rx"regular-expression" - (catch-syn-error - (let () - (define-lex-abbrev a (repetition 0 1 b)) - (define-lex-abbrev b (repetition 0 1 a)) - (let () - (lexer (a 1)))))) - -;; Detecting cycle within same abbreviation: -(check-regexp-match #rx"regular-expression" - (catch-syn-error - (let () - (define-lex-abbrev balanced - (union (concatenation "(" balanced ")" balanced) - any-char)) - (lexer - [balanced (string-append lexeme (balanced input-port))] - [(eof) ""])))) - - -(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (1 1)))) -(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition) 1)))) -(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition #\1 #\1 "3") 1)))) -(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 #\1 "3") 1)))) -(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 0 "3") 1)))) -(check-regexp-match #rx"complement" (catch-syn-error (lexer ((complement) 1)))) -(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range) 1)))) -(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range #\9 #\0) 1)))) -(check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement) 1)))) -(check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement (concatenation "1" "2")) 1)))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt deleted file mode 100644 index f74c003..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/front.rkt +++ /dev/null @@ -1,179 +0,0 @@ -(module front mzscheme - (require (prefix is: mzlib/integer-set) - mzlib/list - syntax/stx - "util.rkt" - "stx.rkt" - "re.rkt" - "deriv.rkt") - - (provide build-lexer) - - (define-syntax time-label - (syntax-rules () - ((_ l e ...) - (begin - (printf "~a: " l) - (time (begin e ...)))))) - - ;; A table is either - ;; - (vector-of (union #f nat)) - ;; - (vector-of (vector-of (vector nat nat nat))) - - (define loc:integer-set-contents is:integer-set-contents) - - ;; dfa->1d-table : dfa -> (same as build-lexer) - (define (dfa->1d-table dfa) - (let ((state-table (make-vector (dfa-num-states dfa) #f)) - (transition-cache (make-hash-table 'equal))) - (for-each - (lambda (trans) - (let* ((from-state (car trans)) - (all-chars/to (cdr trans)) - (flat-all-chars/to - (sort - (apply append - (map (lambda (chars/to) - (let ((char-ranges (loc:integer-set-contents (car chars/to))) - (to (cdr chars/to))) - (map (lambda (char-range) - (let ((entry (vector (car char-range) (cdr char-range) to))) - (hash-table-get transition-cache entry - (lambda () - (hash-table-put! transition-cache - entry - entry) - entry)))) - char-ranges))) - all-chars/to)) - (lambda (a b) - (< (vector-ref a 0) (vector-ref b 0)))))) - (vector-set! state-table from-state (list->vector flat-all-chars/to)))) - (dfa-transitions dfa)) - state-table)) - - - (define loc:foldr is:foldr) - - ;; dfa->2d-table : dfa -> (same as build-lexer) - (define (dfa->2d-table dfa) - (let ( - ;; char-table : (vector-of (union #f nat)) - ;; The lexer table, one entry per state per char. - ;; Each entry specifies a state to transition to. - ;; #f indicates no transition - (char-table (make-vector (* 256 (dfa-num-states dfa)) #f))) - - ;; Fill the char-table vector - (for-each - (lambda (trans) - (let ((from-state (car trans))) - (for-each (lambda (chars/to) - (let ((to-state (cdr chars/to))) - (loc:foldr (lambda (char _) - (vector-set! char-table - (bitwise-ior - char - (arithmetic-shift from-state 8)) - to-state)) - (void) - (car chars/to)))) - (cdr trans)))) - (dfa-transitions dfa)) - char-table)) - - - ;; dfa->actions : dfa -> (vector-of (union #f syntax-object)) - ;; The action for each final state, #f if the state isn't final - (define (dfa->actions dfa) - (let ((actions (make-vector (dfa-num-states dfa) #f))) - (for-each (lambda (state/action) - (vector-set! actions (car state/action) (cdr state/action))) - (dfa-final-states/actions dfa)) - actions)) - - ;; dfa->no-look : dfa -> (vector-of bool) - ;; For each state whether the lexer can ignore the next input. - ;; It can do this only if there are no transitions out of the - ;; current state. - (define (dfa->no-look dfa) - (let ((no-look (make-vector (dfa-num-states dfa) #t))) - (for-each (lambda (trans) - (vector-set! no-look (car trans) #f)) - (dfa-transitions dfa)) - no-look)) - - (test-block ((d1 (make-dfa 1 1 (list) (list))) - (d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) - (list (cons 1 (list (cons (is:make-range 49 50) 1) - (cons (is:make-range 51) 2))) - (cons 2 (list (cons (is:make-range 49) 3)))))) - (d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) - (list (cons 1 (list (cons (is:make-range 100 200) 0) - (cons (is:make-range 49 50) 1) - (cons (is:make-range 51) 2))) - (cons 2 (list (cons (is:make-range 49) 3))))))) - ((dfa->2d-table d1) (make-vector 256 #f)) - ((dfa->2d-table d2) (let ((v (make-vector 1024 #f))) - (vector-set! v 305 1) - (vector-set! v 306 1) - (vector-set! v 307 2) - (vector-set! v 561 3) - v)) - ((dfa->1d-table d1) (make-vector 1 #f)) - ((dfa->1d-table d2) #(#f - #(#(49 50 1) #(51 51 2)) - #(#(49 49 3)) - #f)) - ((dfa->1d-table d3) #(#f - #(#(49 50 1) #(51 51 2) #(100 200 0)) - #(#(49 49 3)) - #f)) - ((dfa->actions d1) (vector #f)) - ((dfa->actions d2) (vector #f #f 2 3)) - ((dfa->no-look d1) (vector #t)) - ((dfa->no-look d2) (vector #t #f #f #t))) - - ;; build-lexer : syntax-object list -> - ;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object)) - ;; each syntax object has the form (re action) - (define (build-lexer sos) - (let* ((disappeared-uses (box null)) - (s-re-acts (map (lambda (so) - (cons (parse (stx-car so) disappeared-uses) - (stx-car (stx-cdr so)))) - sos)) - - (cache (make-cache)) - - (re-acts (map (lambda (s-re-act) - (cons (->re (car s-re-act) cache) - (cdr s-re-act))) - s-re-acts)) - - (dfa (build-dfa re-acts cache)) - (table (dfa->1d-table dfa))) - ;(print-dfa dfa) - #;(let ((num-states (vector-length table)) - (num-vectors (length (filter values (vector->list table)))) - (num-entries (apply + (map - (lambda (x) (if x (vector-length x) 0)) - (vector->list table)))) - (num-different-entries - (let ((ht (make-hash-table))) - (for-each - (lambda (x) - (when x - (for-each - (lambda (y) - (hash-table-put! ht y #t)) - (vector->list x)))) - (vector->list table)) - (length (hash-table-map ht cons))))) - (printf "~a states, ~aKB\n" - num-states - (/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries - (* 5 num-different-entries))) 1024))) - (values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa) - (unbox disappeared-uses)))) - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt deleted file mode 100644 index 9da62de..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/re.rkt +++ /dev/null @@ -1,385 +0,0 @@ -(module re mzscheme - (require mzlib/list - scheme/match - (prefix is: mzlib/integer-set) - "util.rkt") - - (provide ->re build-epsilon build-zero build-char-set build-concat - build-repeat build-or build-and build-neg - epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR? - char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high - orR-res andR-res negR-re - re-nullable? re-index) - - ;; get-index : -> nat - (define get-index (make-counter)) - - ;; An re is either - ;; - (make-epsilonR bool nat) - ;; - (make-zeroR bool nat) - ;; - (make-char-setR bool nat char-set) - ;; - (make-concatR bool nat re re) - ;; - (make-repeatR bool nat nat nat-or-+inf.0 re) - ;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs - ;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs - ;; - (make-negR bool nat re) - ;; - ;; Every re must have an index field globally different from all - ;; other re index fields. - (define-struct re (nullable? index) (make-inspector)) - (define-struct (epsilonR re) () (make-inspector)) - (define-struct (zeroR re) () (make-inspector)) - (define-struct (char-setR re) (chars) (make-inspector)) - (define-struct (concatR re) (re1 re2) (make-inspector)) - (define-struct (repeatR re) (low high re) (make-inspector)) - (define-struct (orR re) (res) (make-inspector)) - (define-struct (andR re) (res) (make-inspector)) - (define-struct (negR re) (re) (make-inspector)) - - ;; e : re - ;; The unique epsilon re - (define e (make-epsilonR #t (get-index))) - - ;; z : re - ;; The unique zero re - (define z (make-zeroR #f (get-index))) - - - ;; s-re = char constant - ;; | string constant (sequence of characters) - ;; | re a precompiled re - ;; | (repetition low high s-re) repetition between low and high times (inclusive) - ;; | (union s-re ...) - ;; | (intersection s-re ...) - ;; | (complement s-re) - ;; | (concatenation s-re ...) - ;; | (char-range rng rng) match any character between two (inclusive) - ;; | (char-complement char-set) match any character not listed - ;; low = natural-number - ;; high = natural-number or +inf.0 - ;; rng = char or string with length 1 - ;; (concatenation) (repetition 0 0 x), and "" match the empty string. - ;; (union) matches no strings. - ;; (intersection) matches any string. - - (define loc:make-range is:make-range) - (define loc:union is:union) - (define loc:split is:split) - (define loc:complement is:complement) - - ;; ->re : s-re cache -> re - (define (->re exp cache) - (match exp - ((? char?) (build-char-set (loc:make-range (char->integer exp)) cache)) - ((? string?) (->re `(concatenation ,@(string->list exp)) cache)) - ((? re?) exp) - (`(repetition ,low ,high ,r) - (build-repeat low high (->re r cache) cache)) - (`(union ,rs ...) - (build-or (flatten-res (map (lambda (r) (->re r cache)) rs) - orR? orR-res loc:union cache) - cache)) - (`(intersection ,rs ...) - (build-and (flatten-res (map (lambda (r) (->re r cache)) rs) - andR? andR-res (lambda (a b) - (let-values (((i _ __) (loc:split a b))) i)) - cache) - cache)) - (`(complement ,r) - (build-neg (->re r cache) cache)) - (`(concatenation ,rs ...) - (foldr (lambda (x y) - (build-concat (->re x cache) y cache)) - e - rs)) - (`(char-range ,c1 ,c2) - (let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1))) - (i2 (char->integer (if (string? c2) (string-ref c2 0) c2)))) - (if (<= i1 i2) - (build-char-set (loc:make-range i1 i2) cache) - z))) - (`(char-complement ,crs ...) - (let ((cs (->re `(union ,@crs) cache))) - (cond - ((zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)) - ((char-setR? cs) - (build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)) - (else z)))))) - - - - - ;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re)) - ;; (char-set char-set -> char-set) cache -> (list-of re) - ;; Takes all the char-sets in l and combines them into one char-set using the combine function. - ;; Flattens out the values of type?. get-res only needs to function on things type? returns - ;; true for. - (define (flatten-res l type? get-res combine cache) - (let loop ((res l) - ;; chars : (union #f char-set) - (chars #f) - (no-chars null)) - (cond - ((null? res) - (if chars - (cons (build-char-set chars cache) no-chars) - no-chars)) - ((char-setR? (car res)) - (if chars - (loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars) - (loop (cdr res) (char-setR-chars (car res)) no-chars))) - ((type? (car res)) - (loop (append (get-res (car res)) (cdr res)) chars no-chars)) - (else (loop (cdr res) chars (cons (car res) no-chars)))))) - - ;; build-epsilon : -> re - (define (build-epsilon) e) - - (define (build-zero) z) - - (define loc:integer-set-contents is:integer-set-contents) - - ;; build-char-set : char-set cache -> re - (define (build-char-set cs cache) - (let ((l (loc:integer-set-contents cs))) - (cond - ((null? l) z) - (else - (cache l - (lambda () - (make-char-setR #f (get-index) cs))))))) - - - - ;; build-concat : re re cache -> re - (define (build-concat r1 r2 cache) - (cond - ((eq? e r1) r2) - ((eq? e r2) r1) - ((or (eq? z r1) (eq? z r2)) z) - (else - (cache (cons 'concat (cons (re-index r1) (re-index r2))) - (lambda () - (make-concatR (and (re-nullable? r1) (re-nullable? r2)) - (get-index) - r1 r2)))))) - - ;; build-repeat : nat nat-or-+inf.0 re cache -> re - (define (build-repeat low high r cache) - (let ((low (if (< low 0) 0 low))) - (cond - ((eq? r e) e) - ((and (= 0 low) (or (= 0 high) (eq? z r))) e) - ((and (= 1 low) (= 1 high)) r) - ((and (repeatR? r) - (eq? (repeatR-high r) +inf.0) - (or (= 0 (repeatR-low r)) - (= 1 (repeatR-low r)))) - (build-repeat (* low (repeatR-low r)) - +inf.0 - (repeatR-re r) - cache)) - (else - (cache (cons 'repeat (cons low (cons high (re-index r)))) - (lambda () - (make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r))))))) - - - ;; build-or : (list-of re) cache -> re - (define (build-or rs cache) - (let ((rs - (filter - (lambda (x) (not (eq? x z))) - (do-simple-equiv (replace rs orR? orR-res null) re-index)))) - (cond - ((null? rs) z) - ((null? (cdr rs)) (car rs)) - ((memq (build-neg z cache) rs) (build-neg z cache)) - (else - (cache (cons 'or (map re-index rs)) - (lambda () - (make-orR (ormap re-nullable? rs) (get-index) rs))))))) - - ;; build-and : (list-of re) cache -> re - (define (build-and rs cache) - (let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index))) - (cond - ((null? rs) (build-neg z cache)) - ((null? (cdr rs)) (car rs)) - ((memq z rs) z) - (else - (cache (cons 'and (map re-index rs)) - (lambda () - (make-andR (andmap re-nullable? rs) (get-index) rs))))))) - - ;; build-neg : re cache -> re - (define (build-neg r cache) - (cond - ((negR? r) (negR-re r)) - (else - (cache (cons 'neg (re-index r)) - (lambda () - (make-negR (not (re-nullable? r)) (get-index) r)))))) - - ;; Tests for the build-functions - (test-block ((c (make-cache)) - (isc is:integer-set-contents) - (r1 (build-char-set (is:make-range (char->integer #\1)) c)) - (r2 (build-char-set (is:make-range (char->integer #\2)) c)) - (r3 (build-char-set (is:make-range (char->integer #\3)) c)) - (rc (build-concat r1 r2 c)) - (rc2 (build-concat r2 r1 c)) - (rr (build-repeat 0 +inf.0 rc c)) - (ro (build-or `(,rr ,rc ,rr) c)) - (ro2 (build-or `(,rc ,rr ,z) c)) - (ro3 (build-or `(,rr ,rc) c)) - (ro4 (build-or `(,(build-or `(,r1 ,r2) c) - ,(build-or `(,r2 ,r3) c)) c)) - (ra (build-and `(,rr ,rc ,rr) c)) - (ra2 (build-and `(,rc ,rr) c)) - (ra3 (build-and `(,rr ,rc) c)) - (ra4 (build-and `(,(build-and `(,r3 ,r2) c) - ,(build-and `(,r2 ,r1) c)) c)) - (rn (build-neg z c)) - (rn2 (build-neg r1 c))) - - ((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1)))) - ((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2)))) - ((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3)))) - ((build-char-set (is:make-range) c) z) - ((build-concat r1 e c) r1) - ((build-concat e r1 c) r1) - ((build-concat r1 z c) z) - ((build-concat z r1 c) z) - ((build-concat r1 r2 c) rc) - ((concatR-re1 rc) r1) - ((concatR-re2 rc) r2) - ((concatR-re1 rc2) r2) - ((concatR-re2 rc2) r1) - (ro ro2) - (ro ro3) - (ro4 (build-or `(,r1 ,r2 ,r3) c)) - ((orR-res ro) (list rc rr)) - ((orR-res ro4) (list r1 r2 r3)) - ((build-or null c) z) - ((build-or `(,r1 ,z) c) r1) - ((build-repeat 0 +inf.0 rc c) rr) - ((build-repeat 0 1 z c) e) - ((build-repeat 0 0 rc c) e) - ((build-repeat 0 +inf.0 z c) e) - ((build-repeat -1 +inf.0 z c) e) - ((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c) - (build-repeat 0 +inf.0 rc c)) - ((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c) - (build-repeat 0 +inf.0 rc c)) - ((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c) - (build-repeat 20 +inf.0 rc c)) - ((build-repeat 1 1 rc c) rc) - ((repeatR-re rr) rc) - (ra ra2) - (ra ra3) - (ra4 (build-and `(,r1 ,r2 ,r3) c)) - ((andR-res ra) (list rc rr)) - ((andR-res ra4) (list r1 r2 r3)) - ((build-and null c) (build-neg z c)) - ((build-and `(,r1 ,z) c) z) - ((build-and `(,r1) c) r1) - ((build-neg r1 c) (build-neg r1 c)) - ((build-neg (build-neg r1 c) c) r1) - ((negR-re (build-neg r2 c)) r2) - ((re-nullable? r1) #f) - ((re-nullable? rc) #f) - ((re-nullable? (build-concat rr rr c)) #t) - ((re-nullable? rr) #t) - ((re-nullable? (build-repeat 0 1 rc c)) #t) - ((re-nullable? (build-repeat 1 2 rc c)) #f) - ((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t) - ((re-nullable? ro) #t) - ((re-nullable? (build-or `(,r1 ,r2) c)) #f) - ((re-nullable? (build-and `(,r1 ,e) c)) #f) - ((re-nullable? (build-and `(,rr ,e) c)) #t) - ((re-nullable? (build-neg r1 c)) #t) - ((re-nullable? (build-neg rr c)) #f)) - - (test-block ((c (make-cache)) - (isc is:integer-set-contents) - (r1 (->re #\1 c)) - (r2 (->re #\2 c)) - (r3-5 (->re '(char-range #\3 #\5) c)) - (r4 (build-or `(,r1 ,r2) c)) - (r5 (->re `(union ,r3-5 #\7) c)) - (r6 (->re #\6 c))) - ((flatten-res null orR? orR-res is:union c) null) - ((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c)))) - (isc (is:make-range (char->integer #\1)))) - ((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c)))) - (isc (is:make-range (char->integer #\1) (char->integer #\2)))) - ((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) - orR? orR-res is:union c)))) - (isc (is:make-range (char->integer #\1) (char->integer #\7)))) - ((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y) - (let-values (((i _ __) - (is:split x y))) - i)) - c) - (list z))) - - ;; ->re - (test-block ((c (make-cache)) - (isc is:integer-set-contents) - (r (->re #\a c)) - (rr (->re `(concatenation ,r ,r) c)) - (rrr (->re `(concatenation ,r ,rr) c)) - (rrr* (->re `(repetition 0 +inf.0 ,rrr) c))) - ((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a)))) - ((->re "" c) e) - ((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c)) - ((->re r c) r) - ((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c)) - ((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c)) - ((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c)) - ((->re `(repetition 0 1 ,rrr*) c) rrr*) - ((->re `(union (union (char-range #\a #\c) - (char-complement (char-range #\000 #\110) - (char-range #\112 ,(integer->char max-char-num)))) - (union (repetition 0 +inf.0 #\2))) c) - (build-or (list (build-char-set (is:union (is:make-range 73) - (is:make-range 97 99)) - c) - (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) - c)) - ((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c)) - ((->re `(union ,r) c) r) - ((->re `(union) c) z) - ((->re `(intersection (intersection #\111 - (char-complement (char-range #\000 #\110) - (char-range #\112 ,(integer->char max-char-num)))) - (intersection (repetition 0 +inf.0 #\2))) c) - (build-and (list (build-char-set (is:make-range 73) c) - (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) - c)) - ((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110) - (char-range #\112 ,(integer->char max-char-num)))) - (intersection (repetition 0 +inf.0 #\2))) c) - z) - ((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c)) - ((->re `(intersection ,r) c) r) - ((->re `(intersection) c) (build-neg z c)) - ((->re `(complement ,r) c) (build-neg r c)) - ((->re `(concatenation) c) e) - ((->re `(concatenation ,rrr*) c) rrr*) - (rr (build-concat r r c)) - ((->re `(concatenation ,r ,rr ,rrr) c) - (build-concat r (build-concat rr rrr c) c)) - ((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49))) - ((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57))) - ((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49))) - ((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57))) - ((->re `(char-range "9" "1") c) z) - ((isc (char-setR-chars (->re `(char-complement) c))) - (isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c)))) - ((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c))) - (isc (is:make-range 0))) - ) - - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt deleted file mode 100644 index 1104a87..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/stx.rkt +++ /dev/null @@ -1,220 +0,0 @@ -#lang racket - -(require "util.rkt" - syntax/id-table) - -(provide parse) - -(define (bad-args stx num) - (raise-syntax-error - #f - (format "incorrect number of arguments (should have ~a)" num) - stx)) - -;; char-range-arg: syntax-object syntax-object -> nat -;; If c contains is a character or length 1 string, returns the integer -;; for the character. Otherwise raises a syntax error. -(define (char-range-arg stx containing-stx) - (let ((c (syntax-e stx))) - (cond - ((char? c) (char->integer c)) - ((and (string? c) (= (string-length c) 1)) - (char->integer (string-ref c 0))) - (else - (raise-syntax-error - #f - "not a char or single-char string" - containing-stx stx))))) -(module+ test - (check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1)) - (check-equal? (char-range-arg #'"1" #'here) (char->integer #\1))) - -(define orig-insp (variable-reference->module-declaration-inspector - (#%variable-reference))) -(define (disarm stx) - (syntax-disarm stx orig-insp)) - - ;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt) - ;; checks for errors and generates the plain s-exp form for s - ;; Expands lex-abbrevs and applies lex-trans. - (define (parse stx disappeared-uses) - (let loop ([stx stx] - [disappeared-uses disappeared-uses] - ;; seen-lex-abbrevs: id-table - [seen-lex-abbrevs (make-immutable-free-id-table)]) - (let ([recur (lambda (s) - (loop (syntax-rearm s stx) - disappeared-uses - seen-lex-abbrevs))] - [recur/abbrev (lambda (s id) - (loop (syntax-rearm s stx) - disappeared-uses - (free-id-table-set seen-lex-abbrevs id id)))]) - (syntax-case (disarm stx) (repetition union intersection complement concatenation - char-range char-complement) - (_ - (identifier? stx) - (let ((expansion (syntax-local-value stx (lambda () #f)))) - (unless (lex-abbrev? expansion) - (raise-syntax-error 'regular-expression - "undefined abbreviation" - stx)) - ;; Check for cycles. - (when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f)) - (raise-syntax-error 'regular-expression - "illegal lex-abbrev cycle detected" - stx - #f - (list (free-id-table-ref seen-lex-abbrevs stx)))) - (set-box! disappeared-uses (cons stx (unbox disappeared-uses))) - (recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))) - (_ - (or (char? (syntax-e stx)) (string? (syntax-e stx))) - (syntax-e stx)) - ((repetition arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 3 (length arg-list)) - (bad-args stx 2)) - (let ((low (syntax-e (car arg-list))) - (high (syntax-e (cadr arg-list))) - (re (caddr arg-list))) - (unless (and (number? low) (exact? low) (integer? low) (>= low 0)) - (raise-syntax-error #f - "not a non-negative exact integer" - stx - (car arg-list))) - (unless (or (and (number? high) (exact? high) (integer? high) (>= high 0)) - (eq? high +inf.0)) - (raise-syntax-error #f - "not a non-negative exact integer or +inf.0" - stx - (cadr arg-list))) - (unless (<= low high) - (raise-syntax-error - #f - "the first argument is not less than or equal to the second argument" - stx)) - `(repetition ,low ,high ,(recur re))))) - ((union re ...) - `(union ,@(map recur (syntax->list (syntax (re ...)))))) - ((intersection re ...) - `(intersection ,@(map recur (syntax->list (syntax (re ...)))))) - ((complement re ...) - (let ((re-list (syntax->list (syntax (re ...))))) - (unless (= 1 (length re-list)) - (bad-args stx 1)) - `(complement ,(recur (car re-list))))) - ((concatenation re ...) - `(concatenation ,@(map recur (syntax->list (syntax (re ...)))))) - ((char-range arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 2 (length arg-list)) - (bad-args stx 2)) - (let ((i1 (char-range-arg (car arg-list) stx)) - (i2 (char-range-arg (cadr arg-list) stx))) - (if (<= i1 i2) - `(char-range ,(integer->char i1) ,(integer->char i2)) - (raise-syntax-error - #f - "the first argument does not precede or equal second argument" - stx))))) - ((char-complement arg ...) - (let ((arg-list (syntax->list (syntax (arg ...))))) - (unless (= 1 (length arg-list)) - (bad-args stx 1)) - (let ((parsed (recur (car arg-list)))) - (unless (char-set? parsed) - (raise-syntax-error #f - "not a character set" - stx - (car arg-list))) - `(char-complement ,parsed)))) - ((op form ...) - (identifier? (syntax op)) - (let* ((o (syntax op)) - (expansion (syntax-local-value o (lambda () #f)))) - (set-box! disappeared-uses (cons o (unbox disappeared-uses))) - (cond - ((lex-trans? expansion) - (recur ((lex-trans-f expansion) (disarm stx)))) - (expansion - (raise-syntax-error 'regular-expression - "not a lex-trans" - stx)) - (else - (raise-syntax-error 'regular-expression - "undefined operator" - stx))))) - (_ - (raise-syntax-error - 'regular-expression - "not a char, string, identifier, or (op args ...)" - stx)))))) - - - - ;; char-set? : s-re -> bool - ;; A char-set is an re that matches only strings of length 1. - ;; char-set? is conservative. - (define (char-set? s-re) - (cond - ((char? s-re) #t) - ((string? s-re) (= (string-length s-re) 1)) - ((list? s-re) - (let ((op (car s-re))) - (case op - ((union intersection) (andmap char-set? (cdr s-re))) - ((char-range char-complement) #t) - ((repetition) - (and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re)))) - ((concatenation) - (and (= 2 (length s-re)) (char-set? (cadr s-re)))) - (else #f)))) - (else #f))) - - (module+ test - (require rackunit)) - (module+ test - (check-equal? (char-set? #\a) #t) - (check-equal? (char-set? "12") #f) - (check-equal? (char-set? "1") #t) - (check-equal? (char-set? '(repetition 1 2 #\1)) #f) - (check-equal? (char-set? '(repetition 1 1 "12")) #f) - (check-equal? (char-set? '(repetition 1 1 "1")) #t) - (check-equal? (char-set? '(union "1" "2" "3")) #t) - (check-equal? (char-set? '(union "1" "" "3")) #f) - (check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t) - (check-equal? (char-set? '(intersection "1" "")) #f) - (check-equal? (char-set? '(complement "1")) #f) - (check-equal? (char-set? '(concatenation "1" "2")) #f) - (check-equal? (char-set? '(concatenation "" "2")) #f) - (check-equal? (char-set? '(concatenation "1")) #t) - (check-equal? (char-set? '(concatenation "12")) #f) - (check-equal? (char-set? '(char-range #\1 #\2)) #t) - (check-equal? (char-set? '(char-complement #\1)) #t)) - - ;; yikes... these test cases all have the wrong arity, now. - ;; and by "now", I mean it's been broken since before we - ;; moved to git. - (module+ test - (check-equal? (parse #'#\a null) #\a) - (check-equal? (parse #'"1" null) "1") - (check-equal? (parse #'(repetition 1 1 #\1) null) - '(repetition 1 1 #\1)) - (check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1)) - (check-equal? (parse #'(union #\1 (union "2") (union)) null) - '(union #\1 (union "2") (union))) - (check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)) - null) - '(intersection #\1 (intersection "2") (intersection))) - (check-equal? (parse #'(complement (union #\1 #\2)) - null) - '(complement (union #\1 #\2))) - (check-equal? (parse #'(concatenation "1" "2" (concatenation)) null) - '(concatenation "1" "2" (concatenation))) - (check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1)) - (check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1)) - (check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3)) - (check-equal? (parse #'(char-complement (union "1" "2")) null) - '(char-complement (union "1" "2")))) -; ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt deleted file mode 100644 index c1f1492..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token-syntax.rkt +++ /dev/null @@ -1,9 +0,0 @@ -(module token-syntax mzscheme - - ;; The things needed at compile time to handle definition of tokens - - (provide make-terminals-def terminals-def-t terminals-def? - make-e-terminals-def e-terminals-def-t e-terminals-def?) - (define-struct terminals-def (t)) - (define-struct e-terminals-def (t)) - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt deleted file mode 100644 index 27b3458..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/token.rkt +++ /dev/null @@ -1,92 +0,0 @@ -(module token mzscheme - - (require-for-syntax "token-syntax.rkt") - - ;; Defining tokens - - (provide define-tokens define-empty-tokens make-token token? - (protect (rename token-name real-token-name)) - (protect (rename token-value real-token-value)) - (rename token-name* token-name) - (rename token-value* token-value) - (struct position (offset line col)) - (struct position-token (token start-pos end-pos)) - (struct srcloc-token (token srcloc))) - - - ;; A token is either - ;; - symbol - ;; - (make-token symbol any) - (define-struct token (name value) (make-inspector)) - - ;; token-name*: token -> symbol - (define (token-name* t) - (cond - ((symbol? t) t) - ((token? t) (token-name t)) - (else (raise-type-error - 'token-name - "symbol or struct:token" - 0 - t)))) - - ;; token-value*: token -> any - (define (token-value* t) - (cond - ((symbol? t) #f) - ((token? t) (token-value t)) - (else (raise-type-error - 'token-value - "symbol or struct:token" - 0 - t)))) - - (define-for-syntax (make-ctor-name n) - (datum->syntax-object n - (string->symbol (format "token-~a" (syntax-e n))) - n - n)) - - (define-for-syntax (make-define-tokens empty?) - (lambda (stx) - (syntax-case stx () - ((_ name (token ...)) - (andmap identifier? (syntax->list (syntax (token ...)))) - (with-syntax (((marked-token ...) - (map values #;(make-syntax-introducer) - (syntax->list (syntax (token ...)))))) - (quasisyntax/loc stx - (begin - (define-syntax name - #,(if empty? - #'(make-e-terminals-def (quote-syntax (marked-token ...))) - #'(make-terminals-def (quote-syntax (marked-token ...))))) - #,@(map - (lambda (n) - (when (eq? (syntax-e n) 'error) - (raise-syntax-error - #f - "Cannot define a token named error." - stx)) - (if empty? - #`(define (#,(make-ctor-name n)) - '#,n) - #`(define (#,(make-ctor-name n) x) - (make-token '#,n x)))) - (syntax->list (syntax (token ...)))) - #;(define marked-token #f) #;...)))) - ((_ ...) - (raise-syntax-error - #f - "must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))" - stx))))) - - (define-syntax define-tokens (make-define-tokens #f)) - (define-syntax define-empty-tokens (make-define-tokens #t)) - - (define-struct position (offset line col) #f) - (define-struct position-token (token start-pos end-pos) #f) - - (define-struct srcloc-token (token srcloc) #f) - ) - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt deleted file mode 100644 index c21e88c..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/unicode-chars.rkt +++ /dev/null @@ -1,69 +0,0 @@ -#lang racket - -(require "util.rkt") - -(provide (all-defined-out)) - -;; mapped-chars : (listof (list nat nat bool)) -(define mapped-chars (make-known-char-range-list)) - -;; get-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat)) -(define (get-chars-for char-x? mapped-chars) - (cond - ((null? mapped-chars) null) - (else - (let* ((range (car mapped-chars)) - (low (car range)) - (high (cadr range)) - (x (char-x? low))) - (cond - ((caddr range) - (if x - (cons (cons low high) - (get-chars-for char-x? (cdr mapped-chars))) - (get-chars-for char-x? (cdr mapped-chars)))) - (else - (let loop ((range-start low) - (i (car range)) - (parity x)) - (cond - ((> i high) - (if parity - (cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars))) - (get-chars-for char-x? (cdr mapped-chars)))) - ((eq? parity (char-x? i)) - (loop range-start (add1 i) parity)) - (parity - (cons (cons range-start (sub1 i)) (loop i (add1 i) #f))) - (else - (loop i (add1 i) #t)))))))))) - -(define (compute-ranges x?) - (delay (get-chars-for (lambda (x) (x? (integer->char x))) mapped-chars))) - -(define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325 -(define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405 -(define upper-case-ranges (compute-ranges char-upper-case?)) ;; 380 -(define title-case-ranges (compute-ranges char-title-case?)) ;; 10 -(define numeric-ranges (compute-ranges char-numeric?)) ;; 47 -(define symbolic-ranges (compute-ranges char-symbolic?)) ;; 153 -(define punctuation-ranges (compute-ranges char-punctuation?)) ;; 86 -(define graphic-ranges (compute-ranges char-graphic?)) ;; 401 -(define whitespace-ranges (compute-ranges char-whitespace?)) ;; 10 -(define blank-ranges (compute-ranges char-blank?)) ;; 9 -#;(define hexadecimal-ranges (compute-ranges char-hexadecimal?)) -(define iso-control-ranges (compute-ranges char-iso-control?)) ;; 2 - - - -(module+ test - (require rackunit) - (check-equal? (get-chars-for odd? '()) '()) - (check-equal? (get-chars-for odd? '((1 4 #f) (8 13 #f))) - '((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13))) - (check-equal? (get-chars-for (lambda (x) - (odd? (quotient x 10))) - '((1 5 #t) (17 19 #t) (21 51 #f))) - '((17 . 19) (30 . 39) (50 . 51)))) - - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt deleted file mode 100644 index a7afc54..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-lex/util.rkt +++ /dev/null @@ -1,127 +0,0 @@ -#lang racket - -(provide (all-defined-out)) - -(define max-char-num #x10FFFF) - -(define-struct lex-abbrev (get-abbrev)) -(define-struct lex-trans (f)) - -(module+ test - (require rackunit)) - -#;(define-syntax test-block - (syntax-rules () - ((_ defs (code right-ans) ...) - (let* defs - (let ((real-ans code)) - (unless (equal? real-ans right-ans) - (printf "Test failed: ~e gave ~e. Expected ~e\n" - 'code real-ans 'right-ans))) ...)))) - -(define-syntax test-block - (syntax-rules () - ((_ x ...) (void)))) - - -;; A cache is (X ( -> Y) -> Y) -;; make-cache : -> cache -;; table map Xs to Ys. If key is mapped, its value is returned. -;; Otherwise, build is invoked and its result is placed in the table and -;; returned. -;; Xs are compared with equal? -(define (make-cache) - (let ((table (make-hash))) - (lambda (key build) - (hash-ref table key - (lambda () - (let ((new (build))) - (hash-set! table key new) - new)))))) - -(module+ test - (define cache (make-cache)) - (check-equal? (cache '(s 1 2) (lambda () 9)) 9) - (check-equal? (cache '(s 2 1) (lambda () 8)) 8) - (check-equal? (cache '(s 1 2) (lambda () 1)) 9) - (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) - (lambda () 22)) 22) - (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) - (lambda () 1)) 22)) - - - -;; make-counter : -> -> nat -;; makes a function that returns a higher number by 1, each time -;; it is called. -(define (make-counter) - (let ((counter 0)) - (lambda () - (begin0 - counter - (set! counter (add1 counter)))))) - -(module+ test - (define c (make-counter)) - (define d (make-counter)) - (check-equal? (c) 0) - (check-equal? (d) 0) - (check-equal? (c) 1) - (check-equal? (d) 1) - (check-equal? (c) 2)) - - -;; remove-dups : (list-of X) (X -> number) -> (list-of X) -;; removes the entries from l that have the same index as a -;; previous entry. l must be grouped by indexes. -(define (remove-dups l index acc) - (cond - ((null? l) (reverse acc)) - ((null? acc) (remove-dups (cdr l) index (cons (car l) acc))) - ((= (index (car acc)) (index (car l))) - (remove-dups (cdr l) index acc)) - (else - (remove-dups (cdr l) index (cons (car l) acc))))) - - -(module+ test - (check-equal? (remove-dups '((1 2) (2 2) (1 3) (1 4) - (100 4) (0 5)) cadr null) - '((1 2) (1 3) (1 4) (0 5))) - (check-equal? (remove-dups null error null) null)) - -;; do-simple-equiv : (list-of X) (X -> nat) -> (list-of X) -;; Sorts l according to index and removes the entries with duplicate -;; indexes. -(define (do-simple-equiv l index) - (let ((ordered (sort l (lambda (a b) (< (index a) (index b)))))) - (remove-dups ordered index null))) - -(module+ test - (check-equal? (do-simple-equiv '((2 2) (1 4) (1 2) - (100 4) (1 3) (0 5)) - cadr) - '((2 2) (1 3) (1 4) (0 5))) - (check-equal? (do-simple-equiv null error) null)) - -;; replace : (list-of X) (X -> bool) (X -> (list-of X)) (list-of X) -> -;; (list-of X) -;; If (pred? r) for some r in l, splice (get r) in place of r in the resulting -;; list. -(define (replace l pred? get acc) - (cond - ((null? l) acc) - ((pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc))) - (else (replace (cdr l) pred? get (cons (car l) acc))))) - - -(module+ test - (check-equal? (replace null void (lambda () (list 1)) null) null) - (check-equal? (replace '(1 2 3 4 3 5) - (lambda (x) (= x 3)) - (lambda (x) (list 1 2 3)) - null) - '(5 1 2 3 4 1 2 3 2 1))) - - - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt deleted file mode 100644 index ebff00d..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/grammar.rkt +++ /dev/null @@ -1,280 +0,0 @@ -;; Constructs to create and access grammars, the internal -;; representation of the input to the parser generator. - -(module grammar mzscheme - - (require mzlib/class - mzlib/list - "yacc-helper.rkt" - racket/contract) - - ;; Each production has a unique index 0 <= index <= number of productions - (define-struct prod (lhs rhs index prec action) (make-inspector)) - - ;; The dot-pos field is the index of the element in the rhs - ;; of prod that the dot immediately precedes. - ;; Thus 0 <= dot-pos <= (vector-length rhs). - (define-struct item (prod dot-pos) (make-inspector)) - - ;; gram-sym = (union term? non-term?) - ;; Each term has a unique index 0 <= index < number of terms - ;; Each non-term has a unique index 0 <= index < number of non-terms - (define-struct term (sym index prec) (make-inspector)) - (define-struct non-term (sym index) (make-inspector)) - - ;; a precedence declaration. - (define-struct prec (num assoc) (make-inspector)) - - (provide/contract - (make-item (prod? (or/c #f natural-number/c) . -> . item?)) - (make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?)) - (make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?)) - (make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?)) - (make-prod (non-term? (vectorof (or/c non-term? term?)) - (or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?))) - - (provide - - - ;; Things that work on items - start-item? item-prod item->string - sym-at-dot move-dot-right itemstring - non-term? term? non-termbit-vector term-index non-term-index - - ;; Things that work on precs - prec-num prec-assoc - - grammar% - - ;; Things that work on productions - prod-index prod-prec prod-rhs prod-lhs prod-action) - - - ;;---------------------- LR items -------------------------- - - ;; item bool - ;; Lexicographic comparison on two items. - (define (item bool - ;; The start production always has index 0 - (define (start-item? i) - (= 0 (non-term-index (prod-lhs (item-prod i))))) - - - ;; move-dot-right: LR-item -> LR-item | #f - ;; moves the dot to the right in the item, unless it is at its - ;; rightmost, then it returns false - (define (move-dot-right i) - (cond - ((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) - (else (make-item (item-prod i) - (add1 (item-dot-pos i)))))) - - ;; sym-at-dot: LR-item -> gram-sym | #f - ;; returns the symbol after the dot in the item or #f if there is none - (define (sym-at-dot i) - (let ((dp (item-dot-pos i)) - (rhs (prod-rhs (item-prod i)))) - (cond - ((= dp (vector-length rhs)) #f) - (else (vector-ref rhs dp))))) - - - ;; print-item: LR-item -> - (define (item->string it) - (let ((print-sym (lambda (i) - (let ((gs (vector-ref (prod-rhs (item-prod it)) i))) - (cond - ((term? gs) (format "~a " (term-sym gs))) - (else (format "~a " (non-term-sym gs)))))))) - (string-append - (format "~a -> " (non-term-sym (prod-lhs (item-prod it)))) - (let loop ((i 0)) - (cond - ((= i (vector-length (prod-rhs (item-prod it)))) - (if (= i (item-dot-pos it)) - ". " - "")) - ((= i (item-dot-pos it)) - (string-append ". " (print-sym i) (loop (add1 i)))) - (else (string-append (print-sym i) (loop (add1 i))))))))) - - ;; --------------------- Grammar Symbols -------------------------- - - (define (non-termstring gs) - (symbol->string (gram-sym-symbol gs))) - - ;; term-list->bit-vector: term list -> int - ;; Creates a number where the nth bit is 1 if the term with index n is in - ;; the list, and whose nth bit is 0 otherwise - (define (term-list->bit-vector terms) - (cond - ((null? terms) 0) - (else - (bitwise-ior (arithmetic-shift 1 (term-index (car terms))) (term-list->bit-vector (cdr terms)))))) - - - ;; ------------------------- Grammar ------------------------------ - - (define grammar% - (class object% - (super-instantiate ()) - ;; prods: production list list - ;; where there is one production list per non-term - (init prods) - ;; init-prods: production list - ;; The productions parsing can start from - ;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable - (init-field init-prods terms non-terms end-terms) - - ;; list of all productions - (define all-prods (apply append prods)) - (define num-prods (length all-prods)) - (define num-terms (length terms)) - (define num-non-terms (length non-terms)) - - (let ((count 0)) - (for-each - (lambda (nt) - (set-non-term-index! nt count) - (set! count (add1 count))) - non-terms)) - - (let ((count 0)) - (for-each - (lambda (t) - (set-term-index! t count) - (set! count (add1 count))) - terms)) - - (let ((count 0)) - (for-each - (lambda (prod) - (set-prod-index! prod count) - (set! count (add1 count))) - all-prods)) - - ;; indexed by the index of the non-term - contains the list of productions for that non-term - (define nt->prods - (let ((v (make-vector (length prods) #f))) - (for-each (lambda (prods) - (vector-set! v (non-term-index (prod-lhs (car prods))) prods)) - prods) - v)) - - (define nullable-non-terms - (nullable all-prods num-non-terms)) - - (define/public (get-num-terms) num-terms) - (define/public (get-num-non-terms) num-non-terms) - - (define/public (get-prods-for-non-term nt) - (vector-ref nt->prods (non-term-index nt))) - (define/public (get-prods) all-prods) - (define/public (get-init-prods) init-prods) - - (define/public (get-terms) terms) - (define/public (get-non-terms) non-terms) - - (define/public (get-num-prods) num-prods) - (define/public (get-end-terms) end-terms) - - (define/public (nullable-non-term? nt) - (vector-ref nullable-non-terms (non-term-index nt))) - - (define/public (nullable-after-dot? item) - (let* ((rhs (prod-rhs (item-prod item))) - (prod-length (vector-length rhs))) - (let loop ((i (item-dot-pos item))) - (cond - ((< i prod-length) - (if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i))) - (loop (add1 i)) - #f)) - ((= i prod-length) #t))))) - - (define/public (nullable-non-term-thunk) - (lambda (nt) - (nullable-non-term? nt))) - (define/public (nullable-after-dot?-thunk) - (lambda (item) - (nullable-after-dot? item))))) - - - ;; nullable: production list * int -> non-term set - ;; determines which non-terminals can derive epsilon - (define (nullable prods num-nts) - (letrec ((nullable (make-vector num-nts #f)) - (added #f) - - ;; possible-nullable: producion list -> production list - ;; Removes all productions that have a terminal - (possible-nullable - (lambda (prods) - (filter (lambda (prod) - (vector-andmap non-term? (prod-rhs prod))) - prods))) - - ;; set-nullables: production list -> production list - ;; makes one pass through the productions, adding the ones - ;; known to be nullable now to nullable and returning a list - ;; of productions that we don't know about yet. - (set-nullables - (lambda (prods) - (cond - ((null? prods) null) - ((vector-ref nullable - (gram-sym-index (prod-lhs (car prods)))) - (set-nullables (cdr prods))) - ((vector-andmap (lambda (nt) - (vector-ref nullable (gram-sym-index nt))) - (prod-rhs (car prods))) - (vector-set! nullable - (gram-sym-index (prod-lhs (car prods))) - #t) - (set! added #t) - (set-nullables (cdr prods))) - (else - (cons (car prods) - (set-nullables (cdr prods)))))))) - - (let loop ((P (possible-nullable prods))) - (cond - ((null? P) nullable) - (else - (set! added #f) - (let ((new-P (set-nullables P))) - (if added - (loop new-P) - nullable))))))) - - -) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt deleted file mode 100644 index 958acc1..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/graph.rkt +++ /dev/null @@ -1,61 +0,0 @@ -(module graph mzscheme - - (provide digraph) - - (define (zero-thunk) 0) - - ;; digraph: - ;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b) - ;; -> ('a -> 'b) - ;; DeRemer and Pennello 1982 - ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} - ;; We use a hash-table to represent the result function 'a -> 'b set, so - ;; the values of type 'a must be comparable with eq?. - (define (digraph nodes edges f- union fail) - (letrec [ - ;; Will map elements of 'a to 'b sets - (results (make-hash-table)) - (f (lambda (x) (hash-table-get results x fail))) - - ;; Maps elements of 'a to integers. - (N (make-hash-table)) - (get-N (lambda (x) (hash-table-get N x zero-thunk))) - (set-N (lambda (x d) (hash-table-put! N x d))) - - (stack null) - (push (lambda (x) - (set! stack (cons x stack)))) - (pop (lambda () - (begin0 - (car stack) - (set! stack (cdr stack))))) - (depth (lambda () (length stack))) - - ;; traverse: 'a -> - (traverse - (lambda (x) - (push x) - (let ((d (depth))) - (set-N x d) - (hash-table-put! results x (f- x)) - (for-each (lambda (y) - (if (= 0 (get-N y)) - (traverse y)) - (hash-table-put! results - x - (union (f x) (f y))) - (set-N x (min (get-N x) (get-N y)))) - (edges x)) - (if (= d (get-N x)) - (let loop ((p (pop))) - (set-N p +inf.0) - (hash-table-put! results p (f x)) - (if (not (eq? x p)) - (loop (pop))))))))] - (for-each (lambda (x) - (if (= 0 (get-N x)) - (traverse x))) - nodes) - f)) - -) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt deleted file mode 100644 index 7309f51..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/input-file-parser.rkt +++ /dev/null @@ -1,374 +0,0 @@ -(module input-file-parser mzscheme - - ;; routines for parsing the input to the parser generator and producing a - ;; grammar (See grammar.rkt) - - (require "yacc-helper.rkt" - "../private-lex/token-syntax.rkt" - "grammar.rkt" - mzlib/class - racket/contract) - (require-for-template mzscheme) - -(define (is-a-grammar%? x) (is-a? x grammar%)) - (provide/contract - (parse-input ((listof identifier?) (listof identifier?) (listof identifier?) - (or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?)) - (get-term-list ((listof identifier?) . -> . (listof identifier?)))) - - (define stx-for-original-property (read-syntax #f (open-input-string "original"))) - - ;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx))) - (define (get-args i rhs src-pos term-defs) - (let ((empty-table (make-hash-table)) - (biggest-pos #f)) - (hash-table-put! empty-table 'error #t) - (for-each (lambda (td) - (let ((v (syntax-local-value td))) - (if (e-terminals-def? v) - (for-each (lambda (s) - (hash-table-put! empty-table (syntax-object->datum s) #t)) - (syntax->list (e-terminals-def-t v)))))) - term-defs) - (let ([args - (let get-args ((i i) - (rhs rhs)) - (cond - ((null? rhs) null) - (else - (let ((b (car rhs)) - (name (if (hash-table-get empty-table (syntax-object->datum (car rhs)) (lambda () #f)) - (gensym) - (string->symbol (format "$~a" i))))) - (cond - (src-pos - (let ([start-pos-id - (datum->syntax-object b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property)] - [end-pos-id - (datum->syntax-object b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property)]) - (set! biggest-pos (cons start-pos-id end-pos-id)) - `(,(datum->syntax-object b name b stx-for-original-property) - ,start-pos-id - ,end-pos-id - ,@(get-args (add1 i) (cdr rhs))))) - (else - `(,(datum->syntax-object b name b stx-for-original-property) - ,@(get-args (add1 i) (cdr rhs)))))))))]) - (values args biggest-pos)))) - - ;; Given the list of terminal symbols and the precedence/associativity definitions, - ;; builds terminal structures (See grammar.rkt) - ;; build-terms: symbol list * symbol list list -> term list - (define (build-terms term-list precs) - (let ((counter 0) - - ;;(term-list (cons (gensym) term-list)) - - ;; Will map a terminal symbol to its precedence/associativity - (prec-table (make-hash-table))) - - ;; Fill the prec table - (for-each - (lambda (p-decl) - (begin0 - (let ((assoc (car p-decl))) - (for-each - (lambda (term-sym) - (hash-table-put! prec-table term-sym (make-prec counter assoc))) - (cdr p-decl))) - (set! counter (add1 counter)))) - precs) - - ;; Build the terminal structures - (map - (lambda (term-sym) - (make-term term-sym - #f - (hash-table-get prec-table term-sym (lambda () #f)))) - term-list))) - - ;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt) - ;; get-terms-from-def: identifier? -> (listof identifier?) - (define (get-terms-from-def term-syn) - (let ((t (syntax-local-value term-syn (lambda () #f)))) - (cond - ((terminals-def? t) (syntax->list (terminals-def-t t))) - ((e-terminals-def? t) (syntax->list (e-terminals-def-t t))) - (else - (raise-syntax-error - 'parser-tokens - "undefined token group" - term-syn))))) - - (define (get-term-list term-group-names) - (remove-duplicates - (cons (datum->syntax-object #f 'error) - (apply append - (map get-terms-from-def term-group-names))))) - - (define (parse-input term-defs start ends prec-decls prods src-pos) - (let* ((start-syms (map syntax-e start)) - - (list-of-terms (map syntax-e (get-term-list term-defs))) - - (end-terms - (map - (lambda (end) - (unless (memq (syntax-e end) list-of-terms) - (raise-syntax-error - 'parser-end-tokens - (format "End token ~a not defined as a token" - (syntax-e end)) - end)) - (syntax-e end)) - ends)) - - ;; Get the list of terminals out of input-terms - - (list-of-non-terms - (syntax-case prods () - (((non-term production ...) ...) - (begin - (for-each - (lambda (nts) - (if (memq (syntax-object->datum nts) list-of-terms) - (raise-syntax-error - 'parser-non-terminals - (format "~a used as both token and non-terminal" - (syntax-object->datum nts)) - nts))) - (syntax->list (syntax (non-term ...)))) - - (let ((dup (duplicate-list? (syntax-object->datum - (syntax (non-term ...)))))) - (if dup - (raise-syntax-error - 'parser-non-terminals - (format "non-terminal ~a defined multiple times" - dup) - prods))) - - (syntax-object->datum (syntax (non-term ...))))) - (_ - (raise-syntax-error - 'parser-grammar - "Grammar must be of the form (grammar (non-terminal productions ...) ...)" - prods)))) - - ;; Check the precedence declarations for errors and turn them into data - (precs - (syntax-case prec-decls () - (((type term ...) ...) - (let ((p-terms - (syntax-object->datum (syntax (term ... ...))))) - (cond - ((duplicate-list? p-terms) => - (lambda (d) - (raise-syntax-error - 'parser-precedences - (format "duplicate precedence declaration for token ~a" - d) - prec-decls))) - (else - (for-each - (lambda (a) - (for-each - (lambda (t) - (if (not (memq (syntax-object->datum t) - list-of-terms)) - (raise-syntax-error - 'parser-precedences - (format - "Precedence declared for non-token ~a" - (syntax-object->datum t)) - t))) - (syntax->list a))) - (syntax->list (syntax ((term ...) ...)))) - (for-each - (lambda (type) - (if (not (memq (syntax-object->datum type) - `(left right nonassoc))) - (raise-syntax-error - 'parser-precedences - "Associativity must be left, right or nonassoc" - type))) - (syntax->list (syntax (type ...)))) - (syntax-object->datum prec-decls))))) - (#f null) - (_ - (raise-syntax-error - 'parser-precedences - "Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc" - prec-decls)))) - - (terms (build-terms list-of-terms precs)) - - (non-terms (map (lambda (non-term) (make-non-term non-term #f)) - list-of-non-terms)) - (term-table (make-hash-table)) - (non-term-table (make-hash-table))) - - (for-each (lambda (t) - (hash-table-put! term-table (gram-sym-symbol t) t)) - terms) - - (for-each (lambda (nt) - (hash-table-put! non-term-table (gram-sym-symbol nt) nt)) - non-terms) - - (let* ( - ;; parse-prod: syntax-object -> gram-sym vector - (parse-prod - (lambda (prod-so) - (syntax-case prod-so () - ((prod-rhs-sym ...) - (andmap identifier? (syntax->list prod-so)) - (begin - (for-each (lambda (t) - (if (memq (syntax-object->datum t) end-terms) - (raise-syntax-error - 'parser-production-rhs - (format "~a is an end token and cannot be used in a production" - (syntax-object->datum t)) - t))) - (syntax->list prod-so)) - (list->vector - (map (lambda (s) - (hash-table-get - term-table - (syntax-object->datum s) - (lambda () - (hash-table-get - non-term-table - (syntax-object->datum s) - (lambda () - (raise-syntax-error - 'parser-production-rhs - (format - "~a is not declared as a terminal or non-terminal" - (syntax-object->datum s)) - s)))))) - (syntax->list prod-so))))) - (_ - (raise-syntax-error - 'parser-production-rhs - "production right-hand-side must have form (symbol ...)" - prod-so))))) - - ;; parse-action: syntax-object * syntax-object -> syntax-object - (parse-action - (lambda (rhs act) - (let-values ([(args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs)]) - (let ([act - (if biggest - (with-syntax ([$n-start-pos (datum->syntax-object (car biggest) '$n-start-pos)] - [$n-end-pos (datum->syntax-object (cdr biggest) '$n-end-pos)]) - #`(let ([$n-start-pos #,(car biggest)] - [$n-end-pos #,(cdr biggest)]) - #,act)) - act)]) - (quasisyntax/loc act - (lambda #,args - #,act)))))) - - ;; parse-prod+action: non-term * syntax-object -> production - (parse-prod+action - (lambda (nt prod-so) - (syntax-case prod-so () - ((prod-rhs action) - (let ((p (parse-prod (syntax prod-rhs)))) - (make-prod - nt - p - #f - (let loop ((i (sub1 (vector-length p)))) - (if (>= i 0) - (let ((gs (vector-ref p i))) - (if (term? gs) - (term-prec gs) - (loop (sub1 i)))) - #f)) - (parse-action (syntax prod-rhs) (syntax action))))) - ((prod-rhs (prec term) action) - (identifier? (syntax term)) - (let ((p (parse-prod (syntax prod-rhs)))) - (make-prod - nt - p - #f - (term-prec - (hash-table-get - term-table - (syntax-object->datum (syntax term)) - (lambda () - (raise-syntax-error - 'parser-production-rhs - (format - "unrecognized terminal ~a in precedence declaration" - (syntax-object->datum (syntax term))) - (syntax term))))) - (parse-action (syntax prod-rhs) (syntax action))))) - (_ - (raise-syntax-error - 'parser-production-rhs - "production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]" - prod-so))))) - - ;; parse-prod-for-nt: syntax-object -> production list - (parse-prods-for-nt - (lambda (prods-so) - (syntax-case prods-so () - ((nt productions ...) - (> (length (syntax->list (syntax (productions ...)))) 0) - (let ((nt (hash-table-get non-term-table - (syntax-object->datum (syntax nt))))) - (map (lambda (p) (parse-prod+action nt p)) - (syntax->list (syntax (productions ...)))))) - (_ - (raise-syntax-error - 'parser-productions - "A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side" - prods-so)))))) - - (for-each - (lambda (sstx ssym) - (unless (memq ssym list-of-non-terms) - (raise-syntax-error - 'parser-start - (format "Start symbol ~a not defined as a non-terminal" ssym) - sstx))) - start start-syms) - - (let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) - (end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) - (parsed-prods (map parse-prods-for-nt (syntax->list prods))) - (start-prods - (map (lambda (start end-non-term) - (list (make-prod start (vector end-non-term) #f #f - (syntax (lambda (x) x))))) - starts end-non-terms)) - (prods - `(,@start-prods - ,@(map - (lambda (end-nt start-sym) - (map - (lambda (end) - (make-prod end-nt - (vector - (hash-table-get non-term-table start-sym) - (hash-table-get term-table end)) - #f - #f - (syntax (lambda (x) x)))) - end-terms)) - end-non-terms start-syms) - ,@parsed-prods))) - - (make-object grammar% - prods - (map car start-prods) - terms - (append starts (append end-non-terms non-terms)) - (map (lambda (term-name) - (hash-table-get term-table term-name)) - end-terms))))))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt deleted file mode 100644 index e9b4d3b..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lalr.rkt +++ /dev/null @@ -1,277 +0,0 @@ -(module lalr mzscheme - - ;; Compute LALR lookaheads from DeRemer and Pennello 1982 - - (require "lr0.rkt" - "grammar.rkt" - mzlib/list - mzlib/class) - - (provide compute-LA) - - ;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set) - ;; computes for each state, non-term transition pair, the terminals - ;; which can transition out of the resulting state - ;; output term set is represented in bit-vector form - (define (compute-DR a g) - (lambda (tk) - (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) - (term-list->bit-vector - (filter - (lambda (term) - (send a run-automaton r term)) - (send g get-terms)))))) - - ;; compute-reads: - ;; LR0-automaton * grammar -> (trans-key -> trans-key list) - (define (compute-reads a g) - (let ((nullable-non-terms - (filter (lambda (nt) (send g nullable-non-term? nt)) - (send g get-non-terms)))) - (lambda (tk) - (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) - (map (lambda (x) (make-trans-key r x)) - (filter (lambda (non-term) (send a run-automaton r non-term)) - nullable-non-terms)))))) - - ;; compute-read: LR0-automaton * grammar -> (trans-key -> term set) - ;; output term set is represented in bit-vector form - (define (compute-read a g) - (let* ((dr (compute-DR a g)) - (reads (compute-reads a g))) - (digraph-tk->terml (send a get-mapped-non-term-keys) - reads - dr - (send a get-num-states)))) - ;; returns the list of all k such that state k transitions to state start on the - ;; transitions in rhs (in order) - (define (run-lr0-backward a rhs dot-pos start num-states) - (let loop ((states (list start)) - (i (sub1 dot-pos))) - (cond - ((< i 0) states) - (else (loop (send a run-automaton-back states (vector-ref rhs i)) - (sub1 i)))))) - - ;; prod->items-for-include: grammar * prod * non-term -> lr0-item list - ;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma) - ;; and gamma =>* epsilon - (define (prod->items-for-include g prod nt) - (let* ((rhs (prod-rhs prod)) - (rhs-l (vector-length rhs))) - (append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l)))) - (list (make-item prod (sub1 rhs-l))) - null) - (let loop ((i (sub1 rhs-l))) - (cond - ((and (> i 0) - (non-term? (vector-ref rhs i)) - (send g nullable-non-term? (vector-ref rhs i))) - (if (eq? nt (vector-ref rhs (sub1 i))) - (cons (make-item prod (sub1 i)) - (loop (sub1 i))) - (loop (sub1 i)))) - (else null)))))) - - ;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list - ;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list - ;; and gamma =>* epsilon - (define (prod-list->items-for-include g prod-list nt) - (apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list))) - - ;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list) - (define (compute-includes a g) - (let ((num-states (send a get-num-states)) - (items-for-input-nt (make-vector (send g get-num-non-terms) null))) - (for-each - (lambda (input-nt) - (vector-set! items-for-input-nt (non-term-index input-nt) - (prod-list->items-for-include g (send g get-prods) input-nt))) - (send g get-non-terms)) - (lambda (tk) - (let* ((goal-state (trans-key-st tk)) - (non-term (trans-key-gs tk)) - (items (vector-ref items-for-input-nt (non-term-index non-term)))) - (trans-key-list-remove-dups - (apply append - (map (lambda (item) - (let* ((prod (item-prod item)) - (rhs (prod-rhs prod)) - (lhs (prod-lhs prod))) - (map (lambda (state) - (make-trans-key state lhs)) - (run-lr0-backward a - rhs - (item-dot-pos item) - goal-state - num-states)))) - items))))))) - - ;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list) - (define (compute-lookback a g) - (let ((num-states (send a get-num-states))) - (lambda (state prod) - (map (lambda (k) (make-trans-key k (prod-lhs prod))) - (run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states))))) - - ;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set) - ;; output term set is represented in bit-vector form - (define (compute-follow a g includes) - (let ((read (compute-read a g))) - (digraph-tk->terml (send a get-mapped-non-term-keys) - includes - read - (send a get-num-states)))) - - ;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set - ;; output term set is represented in bit-vector form - (define (compute-LA a g) - (let* ((includes (compute-includes a g)) - (lookback (compute-lookback a g)) - (follow (compute-follow a g includes))) - (lambda (k p) - (let* ((l (lookback k p)) - (f (map follow l))) - (apply bitwise-ior (cons 0 f)))))) - - (define (print-DR dr a g) - (print-input-st-sym dr "DR" a g print-output-terms)) - (define (print-Read Read a g) - (print-input-st-sym Read "Read" a g print-output-terms)) - (define (print-includes i a g) - (print-input-st-sym i "includes" a g print-output-st-nt)) - (define (print-lookback l a g) - (print-input-st-prod l "lookback" a g print-output-st-nt)) - (define (print-follow f a g) - (print-input-st-sym f "follow" a g print-output-terms)) - (define (print-LA l a g) - (print-input-st-prod l "LA" a g print-output-terms)) - - (define (print-input-st-sym f name a g print-output) - (printf "~a:\n" name) - (send a for-each-state - (lambda (state) - (for-each - (lambda (non-term) - (let ((res (f (make-trans-key state non-term)))) - (if (not (null? res)) - (printf "~a(~a, ~a) = ~a\n" - name - state - (gram-sym-symbol non-term) - (print-output res))))) - (send g get-non-terms)))) - (newline)) - - (define (print-input-st-prod f name a g print-output) - (printf "~a:\n" name) - (send a for-each-state - (lambda (state) - (for-each - (lambda (non-term) - (for-each - (lambda (prod) - (let ((res (f state prod))) - (if (not (null? res)) - (printf "~a(~a, ~a) = ~a\n" - name - (kernel-index state) - (prod-index prod) - (print-output res))))) - (send g get-prods-for-non-term non-term))) - (send g get-non-terms))))) - - (define (print-output-terms r) - (map - (lambda (p) - (gram-sym-symbol p)) - r)) - - (define (print-output-st-nt r) - (map - (lambda (p) - (list - (kernel-index (trans-key-st p)) - (gram-sym-symbol (trans-key-gs p)))) - r)) - - ;; init-tk-map : int -> (vectorof hashtable?) - (define (init-tk-map n) - (let ((v (make-vector n #f))) - (let loop ((i (sub1 (vector-length v)))) - (when (>= i 0) - (vector-set! v i (make-hash-table)) - (loop (sub1 i)))) - v)) - - ;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int - (define (lookup-tk-map map) - (lambda (tk) - (let ((st (trans-key-st tk)) - (gs (trans-key-gs tk))) - (hash-table-get (vector-ref map (kernel-index st)) - (gram-sym-symbol gs) - (lambda () 0))))) - - ;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int -> - (define (add-tk-map map) - (lambda (tk v) - (let ((st (trans-key-st tk)) - (gs (trans-key-gs tk))) - (hash-table-put! (vector-ref map (kernel-index st)) - (gram-sym-symbol gs) - v)))) - - ;; digraph-tk->terml: - ;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int - ;; -> (trans-key -> term list) - ;; DeRemer and Pennello 1982 - ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} - ;; A specialization of digraph in the file graph.rkt - (define (digraph-tk->terml nodes edges f- num-states) - (letrec [ - ;; Will map elements of trans-key to term sets represented as bit vectors - (results (init-tk-map num-states)) - - ;; Maps elements of trans-keys to integers. - (N (init-tk-map num-states)) - - (get-N (lookup-tk-map N)) - (set-N (add-tk-map N)) - (get-f (lookup-tk-map results)) - (set-f (add-tk-map results)) - - (stack null) - (push (lambda (x) - (set! stack (cons x stack)))) - (pop (lambda () - (begin0 - (car stack) - (set! stack (cdr stack))))) - (depth (lambda () (length stack))) - - ;; traverse: 'a -> - (traverse - (lambda (x) - (push x) - (let ((d (depth))) - (set-N x d) - (set-f x (f- x)) - (for-each (lambda (y) - (when (= 0 (get-N y)) - (traverse y)) - (set-f x (bitwise-ior (get-f x) (get-f y))) - (set-N x (min (get-N x) (get-N y)))) - (edges x)) - (when (= d (get-N x)) - (let loop ((p (pop))) - (set-N p +inf.0) - (set-f p (get-f x)) - (unless (equal? x p) - (loop (pop))))))))] - (for-each (lambda (x) - (when (= 0 (get-N x)) - (traverse x))) - nodes) - get-f)) -) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt deleted file mode 100644 index f237735..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/lr0.rkt +++ /dev/null @@ -1,372 +0,0 @@ -(module lr0 mzscheme - - ;; Handle the LR0 automaton - - (require "grammar.rkt" - "graph.rkt" - mzlib/list - mzlib/class) - - (provide build-lr0-automaton lr0% - (struct trans-key (st gs)) trans-key-list-remove-dups - kernel-items kernel-index) - - ;; kernel = (make-kernel (LR1-item list) index) - ;; the list must be kept sorted according to item - ;; (vectorof (symbol X hashtable)) - (define (build-transition-table num-states assoc) - (let ((transitions (make-vector num-states #f))) - (let loop ((i (sub1 (vector-length transitions)))) - (when (>= i 0) - (vector-set! transitions i (make-hash-table)) - (loop (sub1 i)))) - (for-each - (lambda (trans-key/kernel) - (let ((tk (car trans-key/kernel))) - (hash-table-put! (vector-ref transitions (kernel-index (trans-key-st tk))) - (gram-sym-symbol (trans-key-gs tk)) - (cdr trans-key/kernel)))) - assoc) - transitions)) - - ;; reverse-assoc : (listof (cons/c trans-key? kernel?)) -> - ;; (listof (cons/c trans-key? (listof kernel?))) - (define (reverse-assoc assoc) - (let ((reverse-hash (make-hash-table 'equal)) - (hash-table-add! - (lambda (ht k v) - (hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null))))))) - (for-each - (lambda (trans-key/kernel) - (let ((tk (car trans-key/kernel))) - (hash-table-add! reverse-hash - (make-trans-key (cdr trans-key/kernel) - (trans-key-gs tk)) - (trans-key-st tk)))) - assoc) - (hash-table-map reverse-hash cons))) - - - ;; kernel-list-remove-duplicates - ;; LR0-automaton = object of class lr0% - (define lr0% - (class object% - (super-instantiate ()) - ;; term-assoc : (listof (cons/c trans-key? kernel?)) - ;; non-term-assoc : (listof (cons/c trans-key? kernel?)) - ;; states : (vectorof kernel?) - ;; epsilons : ??? - (init-field term-assoc non-term-assoc states epsilons) - - (define transitions (build-transition-table (vector-length states) - (append term-assoc non-term-assoc))) - - (define reverse-term-assoc (reverse-assoc term-assoc)) - (define reverse-non-term-assoc (reverse-assoc non-term-assoc)) - (define reverse-transitions - (build-transition-table (vector-length states) - (append reverse-term-assoc reverse-non-term-assoc))) - - (define mapped-non-terms (map car non-term-assoc)) - - (define/public (get-mapped-non-term-keys) - mapped-non-terms) - - (define/public (get-num-states) - (vector-length states)) - - (define/public (get-epsilon-trans) - epsilons) - - (define/public (get-transitions) - (append term-assoc non-term-assoc)) - - ;; for-each-state : (state ->) -> - ;; Iteration over the states in an automaton - (define/public (for-each-state f) - (let ((num-states (vector-length states))) - (let loop ((i 0)) - (if (< i num-states) - (begin - (f (vector-ref states i)) - (loop (add1 i))))))) - - ;; run-automaton: kernel? gram-sym? -> (union kernel #f) - ;; returns the state reached from state k on input s, or #f when k - ;; has no transition on s - (define/public (run-automaton k s) - (hash-table-get (vector-ref transitions (kernel-index k)) - (gram-sym-symbol s) - (lambda () #f))) - - ;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel) - ;; returns the list of states that can reach k by transitioning on s. - (define/public (run-automaton-back k s) - (apply append - (map - (lambda (k) - (hash-table-get (vector-ref reverse-transitions (kernel-index k)) - (gram-sym-symbol s) - (lambda () null))) - k))))) - - (define (union comp (eq? a b) - (define (kernel->string k) - (apply string-append - `("{" ,@(map (lambda (i) (string-append (item->string i) ", ")) - (kernel-items k)) - "}"))) - - ;; build-LR0-automaton: grammar -> LR0-automaton - ;; Constructs the kernels of the sets of LR(0) items of g - (define (build-lr0-automaton grammar) -; (printf "LR(0) automaton:\n") - (letrec ( - (epsilons (make-hash-table 'equal)) - (grammar-symbols (append (send grammar get-non-terms) - (send grammar get-terms))) - ;; first-non-term: non-term -> non-term list - ;; given a non-terminal symbol C, return those non-terminal - ;; symbols A s.t. C -> An for some string of terminals and - ;; non-terminals n where -> means a rightmost derivation in many - ;; steps. Assumes that each non-term can be reduced to a string - ;; of terms. - (first-non-term - (digraph (send grammar get-non-terms) - (lambda (nt) - (filter non-term? - (map (lambda (prod) - (sym-at-dot (make-item prod 0))) - (send grammar get-prods-for-non-term nt)))) - (lambda (nt) (list nt)) - (union non-term LR1-item list - ;; Creates a set of items containing i s.t. if A -> n.Xm is in it, - ;; X -> .o is in it too. - (LR0-closure - (lambda (i) - (cond - ((null? i) null) - (else - (let ((next-gsym (sym-at-dot (car i)))) - (cond - ((non-term? next-gsym) - (cons (car i) - (append - (apply append - (map (lambda (non-term) - (map (lambda (x) - (make-item x 0)) - (send grammar - get-prods-for-non-term - non-term))) - (first-non-term next-gsym))) - (LR0-closure (cdr i))))) - (else - (cons (car i) (LR0-closure (cdr i)))))))))) - - - ;; maps trans-keys to kernels - (automaton-term null) - (automaton-non-term null) - - ;; keeps the kernels we have seen, so we can have a unique - ;; list for each kernel - (kernels (make-hash-table 'equal)) - - (counter 0) - - ;; goto: LR1-item list -> LR1-item list list - ;; creates new kernels by moving the dot in each item in the - ;; LR0-closure of kernel to the right, and grouping them by - ;; the term/non-term moved over. Returns the kernels not - ;; yet seen, and places the trans-keys into automaton - (goto - (lambda (kernel) - (let ( - ;; maps a gram-syms to a list of items - (table (make-hash-table)) - - ;; add-item!: - ;; (symbol (listof item) hashtable) item? -> - ;; adds i into the table grouped with the grammar - ;; symbol following its dot - (add-item! - (lambda (table i) - (let ((gs (sym-at-dot i))) - (cond - (gs - (let ((already - (hash-table-get table - (gram-sym-symbol gs) - (lambda () null)))) - (unless (member i already) - (hash-table-put! table - (gram-sym-symbol gs) - (cons i already))))) - ((= 0 (vector-length (prod-rhs (item-prod i)))) - (let ((current (hash-table-get epsilons - kernel - (lambda () null)))) - (hash-table-put! epsilons - kernel - (cons i current))))))))) - - ;; Group the items of the LR0 closure of the kernel - ;; by the character after the dot - (for-each (lambda (item) - (add-item! table item)) - (LR0-closure (kernel-items kernel))) - - ;; each group is a new kernel, with the dot advanced. - ;; sorts the items in a kernel so kernels can be compared - ;; with equal? for using the table kernels to make sure - ;; only one representitive of each kernel is created - (filter - (lambda (x) x) - (map - (lambda (i) - (let* ((gs (car i)) - (items (cadr i)) - (new #f) - (new-kernel (sort - (filter (lambda (x) x) - (map move-dot-right items)) - item ~a on ~a\n" - (kernel->string kernel) - (kernel->string unique-kernel) - (gram-sym-symbol gs)) - (if new - unique-kernel - #f))) - (let loop ((gsyms grammar-symbols)) - (cond - ((null? gsyms) null) - (else - (let ((items (hash-table-get table - (gram-sym-symbol (car gsyms)) - (lambda () null)))) - (cond - ((null? items) (loop (cdr gsyms))) - (else - (cons (list (car gsyms) items) - (loop (cdr gsyms)))))))))))))) - - (starts - (map (lambda (init-prod) (list (make-item init-prod 0))) - (send grammar get-init-prods))) - (startk - (map (lambda (start) - (let ((k (make-kernel start counter))) - (hash-table-put! kernels start k) - (set! counter (add1 counter)) - k)) - starts)) - (new-kernels (make-queue))) - - (let loop ((old-kernels startk) - (seen-kernels null)) - (cond - ((and (empty-queue? new-kernels) (null? old-kernels)) - (make-object lr0% - automaton-term - automaton-non-term - (list->vector (reverse seen-kernels)) - epsilons)) - ((null? old-kernels) - (loop (deq! new-kernels) seen-kernels)) - (else - (enq! new-kernels (goto (car old-kernels))) - (loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))))))) - - (define-struct q (f l) (make-inspector)) - (define (empty-queue? q) - (null? (q-f q))) - (define (make-queue) - (make-q null null)) - (define (enq! q i) - (if (empty-queue? q) - (let ((i (mcons i null))) - (set-q-l! q i) - (set-q-f! q i)) - (begin - (set-mcdr! (q-l q) (mcons i null)) - (set-q-l! q (mcdr (q-l q)))))) - (define (deq! q) - (begin0 - (mcar (q-f q)) - (set-q-f! q (mcdr (q-f q))))) - -) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt deleted file mode 100644 index 2a39b36..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-actions.rkt +++ /dev/null @@ -1,54 +0,0 @@ -(module parser-actions mzscheme - (require "grammar.rkt") - (provide (all-defined-except make-reduce make-reduce*) - (rename make-reduce* make-reduce)) - - ;; An action is - ;; - (make-shift int) - ;; - (make-reduce prod runtime-action) - ;; - (make-accept) - ;; - (make-goto int) - ;; - (no-action) - ;; A reduce contains a runtime-reduce so that sharing of the reduces can - ;; be easily transferred to sharing of runtime-reduces. - - (define-struct action () (make-inspector)) - (define-struct (shift action) (state) (make-inspector)) - (define-struct (reduce action) (prod runtime-reduce) (make-inspector)) - (define-struct (accept action) () (make-inspector)) - (define-struct (goto action) (state) (make-inspector)) - (define-struct (no-action action) () (make-inspector)) - - (define (make-reduce* p) - (make-reduce p - (vector (prod-index p) - (gram-sym-symbol (prod-lhs p)) - (vector-length (prod-rhs p))))) - - ;; A runtime-action is - ;; non-negative-int (shift) - ;; (vector int symbol int) (reduce) - ;; 'accept (accept) - ;; negative-int (goto) - ;; #f (no-action) - - (define (action->runtime-action a) - (cond - ((shift? a) (shift-state a)) - ((reduce? a) (reduce-runtime-reduce a)) - ((accept? a) 'accept) - ((goto? a) (- (+ (goto-state a) 1))) - ((no-action? a) #f))) - - (define (runtime-shift? x) (and (integer? x) (>= x 0))) - (define runtime-reduce? vector?) - (define (runtime-accept? x) (eq? x 'accept)) - (define (runtime-goto? x) (and (integer? x) (< x 0))) - - (define runtime-shift-state values) - (define (runtime-reduce-prod-num x) (vector-ref x 0)) - (define (runtime-reduce-lhs x) (vector-ref x 1)) - (define (runtime-reduce-rhs-length x) (vector-ref x 2)) - (define (runtime-goto-state x) (- (+ x 1))) - - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt deleted file mode 100644 index 1be421c..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/parser-builder.rkt +++ /dev/null @@ -1,113 +0,0 @@ -(module parser-builder mzscheme - - (require "input-file-parser.rkt" - "grammar.rkt" - "table.rkt" - mzlib/class - racket/contract) - (require-for-template mzscheme) - - (provide/contract - (build-parser (-> string? any/c any/c - (listof identifier?) - (listof identifier?) - (listof identifier?) - (or/c syntax? #f) - syntax? - (values any/c any/c any/c any/c)))) - - ;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?) - ;; (union syntax? false/c) syntax?) -> syntax? - (define (fix-check-syntax input-terms start ends assocs prods) - (let* ((term-binders (get-term-list input-terms)) - (get-term-binder - (let ((t (make-hash-table))) - (for-each - (lambda (term) - (hash-table-put! t (syntax-e term) term)) - term-binders) - (lambda (x) - (let ((r (hash-table-get t (syntax-e x) (lambda () #f)))) - (if r - (syntax-local-introduce (datum->syntax-object r (syntax-e x) x x)) - x))))) - (rhs-list - (syntax-case prods () - (((_ rhs ...) ...) - (syntax->list (syntax (rhs ... ...))))))) - (with-syntax (((tmp ...) (map syntax-local-introduce term-binders)) - ((term-group ...) - (map (lambda (tg) - (syntax-property - (datum->syntax-object tg #f) - 'disappeared-use - tg)) - input-terms)) - ((end ...) - (map get-term-binder ends)) - ((start ...) - (map get-term-binder start)) - ((bind ...) - (syntax-case prods () - (((bind _ ...) ...) - (syntax->list (syntax (bind ...)))))) - (((bound ...) ...) - (map - (lambda (rhs) - (syntax-case rhs () - (((bound ...) (_ pbound) __) - (map get-term-binder - (cons (syntax pbound) - (syntax->list (syntax (bound ...)))))) - (((bound ...) _) - (map get-term-binder - (syntax->list (syntax (bound ...))))))) - rhs-list)) - ((prec ...) - (if assocs - (map get-term-binder - (syntax-case assocs () - (((__ term ...) ...) - (syntax->list (syntax (term ... ...)))))) - null))) - #`(when #f - (let ((bind void) ... (tmp void) ...) - (void bound ... ... term-group ... start ... end ... prec ...)))))) - (require mzlib/list "parser-actions.rkt") - (define (build-parser filename src-pos suppress input-terms start end assocs prods) - (let* ((grammar (parse-input input-terms start end assocs prods src-pos)) - (table (build-table grammar filename suppress)) - (all-tokens (make-hash-table)) - (actions-code - `(vector ,@(map prod-action (send grammar get-prods))))) - (for-each (lambda (term) - (hash-table-put! all-tokens (gram-sym-symbol term) #t)) - (send grammar get-terms)) - #;(let ((num-states (vector-length table)) - (num-gram-syms (+ (send grammar get-num-terms) - (send grammar get-num-non-terms))) - (num-ht-entries (apply + (map length (vector->list table)))) - (num-reduces - (let ((ht (make-hash-table))) - (for-each - (lambda (x) - (when (reduce? x) - (hash-table-put! ht x #t))) - (map cdr (apply append (vector->list table)))) - (length (hash-table-map ht void))))) - (printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n" - num-states num-gram-syms num-ht-entries num-reduces) - (printf "~a -- ~aKB, previously ~aKB\n" - (/ (+ 2 num-states - (* 4 num-states) (* 2 1.5 num-ht-entries) - (* 5 num-reduces)) 256.0) - (/ (+ 2 num-states - (* 4 num-states) (* 2 2.3 num-ht-entries) - (* 5 num-reduces)) 256.0) - (/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0))) - (values table - all-tokens - actions-code - (fix-check-syntax input-terms start end assocs prods)))) - - ) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt deleted file mode 100644 index f97e4d2..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/table.rkt +++ /dev/null @@ -1,290 +0,0 @@ -#lang scheme/base - - ;; Routine to build the LALR table - - (require "grammar.rkt" - "lr0.rkt" - "lalr.rkt" - "parser-actions.rkt" - racket/contract - mzlib/list - mzlib/class) - - (define (is-a-grammar%? x) (is-a? x grammar%)) - (provide/contract - (build-table (-> is-a-grammar%? string? any/c - (vectorof (listof (cons/c (or/c term? non-term?) action?)))))) - - ;; A parse-table is (vectorof (listof (cons/c gram-sym? action))) - ;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action)))) - - ;; make-parse-table : int -> parse-table - (define (make-parse-table num-states) - (make-vector num-states null)) - - ;; table-add!: parse-table nat symbol action -> - (define (table-add! table state-index symbol val) - (vector-set! table state-index (cons (cons symbol val) - (vector-ref table state-index)))) - - ;; group-table : parse-table -> grouped-parse-table - (define (group-table table) - (list->vector - (map - (lambda (state-entry) - (let ((ht (make-hash))) - (for-each - (lambda (gs/actions) - (let ((group (hash-ref ht (car gs/actions) (lambda () null)))) - (unless (member (cdr gs/actions) group) - (hash-set! ht (car gs/actions) (cons (cdr gs/actions) group))))) - state-entry) - (hash-map ht cons))) - (vector->list table)))) - - ;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) -> - ;; (vectorof (listof (cons/c gram-sym? Y))) - (define (table-map f table) - (list->vector - (map - (lambda (state-entry) - (map - (lambda (gs/X) - (cons (car gs/X) (f (car gs/X) (cdr gs/X)))) - state-entry)) - (vector->list table)))) - - - (define (bit-vector-for-each f bv) - (letrec ((for-each - (lambda (bv number) - (cond - ((= 0 bv) (void)) - ((= 1 (bitwise-and 1 bv)) - (f number) - (for-each (arithmetic-shift bv -1) (add1 number))) - (else (for-each (arithmetic-shift bv -1) (add1 number))))))) - (for-each bv 0))) - - - ;; print-entry: symbol action output-port -> - ;; prints the action a for lookahead sym to the given port - (define (print-entry sym a port) - (let ((s "\t~a\t\t\t\t\t~a\t~a\n")) - (cond - ((shift? a) - (fprintf port s sym "shift" (shift-state a))) - ((reduce? a) - (fprintf port s sym "reduce" (prod-index (reduce-prod a)))) - ((accept? a) - (fprintf port s sym "accept" "")) - ((goto? a) - (fprintf port s sym "goto" (goto-state a)))))) - - - ;; count: ('a -> bool) * 'a list -> num - ;; counts the number of elements in list that satisfy pred - (define (count pred list) - (cond - ((null? list) 0) - ((pred (car list)) (+ 1 (count pred (cdr list)))) - (else (count pred (cdr list))))) - - ;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port -> - ;; Prints out the parser given by table. - (define (display-parser a grouped-table prods port) - (let* ((SR-conflicts 0) - (RR-conflicts 0)) - (for-each - (lambda (prod) - (fprintf port - "~a\t~a\t=\t~a\n" - (prod-index prod) - (gram-sym-symbol (prod-lhs prod)) - (map gram-sym-symbol (vector->list (prod-rhs prod))))) - prods) - (send a for-each-state - (lambda (state) - (fprintf port "State ~a\n" (kernel-index state)) - (for-each (lambda (item) - (fprintf port "\t~a\n" (item->string item))) - (kernel-items state)) - (newline port) - (for-each - (lambda (gs/action) - (let ((sym (gram-sym-symbol (car gs/action))) - (act (cdr gs/action))) - (cond - ((null? act) (void)) - ((null? (cdr act)) - (print-entry sym (car act) port)) - (else - (fprintf port "begin conflict:\n") - (when (> (count reduce? act) 1) - (set! RR-conflicts (add1 RR-conflicts))) - (when (> (count shift? act) 0) - (set! SR-conflicts (add1 SR-conflicts))) - (map (lambda (x) (print-entry sym x port)) act) - (fprintf port "end conflict\n"))))) - (vector-ref grouped-table (kernel-index state))) - (newline port))) - - (when (> SR-conflicts 0) - (fprintf port "~a shift/reduce conflict~a\n" - SR-conflicts - (if (= SR-conflicts 1) "" "s"))) - (when (> RR-conflicts 0) - (fprintf port "~a reduce/reduce conflict~a\n" - RR-conflicts - (if (= RR-conflicts 1) "" "s"))))) - - ;; resolve-conflict : (listof action?) -> action? bool bool - (define (resolve-conflict actions) - (cond - ((null? actions) (values (make-no-action) #f #f)) - ((null? (cdr actions)) - (values (car actions) #f #f)) - (else - (let ((SR-conflict? (> (count shift? actions) 0)) - (RR-conflict? (> (count reduce? actions) 1))) - (let loop ((current-guess #f) - (rest actions)) - (cond - ((null? rest) (values current-guess SR-conflict? RR-conflict?)) - ((shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?)) - ((not current-guess) - (loop (car rest) (cdr rest))) - ((and (reduce? (car rest)) - (< (prod-index (reduce-prod (car rest))) - (prod-index (reduce-prod current-guess)))) - (loop (car rest) (cdr rest))) - ((accept? (car rest)) - (eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n") - (loop current-guess (cdr rest))) - (else (loop current-guess (cdr rest))))))))) - - ;; resolve-conflicts : grouped-parse-table bool -> parse-table - (define (resolve-conflicts grouped-table suppress) - (let* ((SR-conflicts 0) - (RR-conflicts 0) - (table (table-map - (lambda (gs actions) - (let-values (((action SR? RR?) - (resolve-conflict actions))) - (when SR? - (set! SR-conflicts (add1 SR-conflicts))) - (when RR? - (set! RR-conflicts (add1 RR-conflicts))) - action)) - grouped-table))) - (unless suppress - (when (> SR-conflicts 0) - (eprintf "~a shift/reduce conflict~a\n" - SR-conflicts - (if (= SR-conflicts 1) "" "s"))) - (when (> RR-conflicts 0) - (eprintf "~a reduce/reduce conflict~a\n" - RR-conflicts - (if (= RR-conflicts 1) "" "s")))) - table)) - - - ;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action) - ;; Resolves a single shift-reduce conflict, if precedences are in place. - (define (resolve-sr-conflict/prec actions shift-prec) - (let* ((shift (if (shift? (car actions)) - (car actions) - (cadr actions))) - (reduce (if (shift? (car actions)) - (cadr actions) - (car actions))) - (reduce-prec (prod-prec (reduce-prod reduce)))) - (cond - ((and shift-prec reduce-prec) - (cond - ((< (prec-num shift-prec) (prec-num reduce-prec)) - (list reduce)) - ((> (prec-num shift-prec) (prec-num reduce-prec)) - (list shift)) - ((eq? 'left (prec-assoc shift-prec)) - (list reduce)) - ((eq? 'right (prec-assoc shift-prec)) - (list shift)) - (else null))) - (else actions)))) - - - ;; resolve-prec-conflicts : parse-table -> grouped-parse-table - (define (resolve-prec-conflicts table) - (table-map - (lambda (gs actions) - (cond - ((and (term? gs) - (= 2 (length actions)) - (or (shift? (car actions)) - (shift? (cadr actions)))) - (resolve-sr-conflict/prec actions (term-prec gs))) - (else actions))) - (group-table table))) - - ;; build-table: grammar string bool -> parse-table - (define (build-table g file suppress) - (let* ((a (build-lr0-automaton g)) - (term-vector (list->vector (send g get-terms))) - (end-terms (send g get-end-terms)) - (table (make-parse-table (send a get-num-states))) - (get-lookahead (compute-LA a g)) - (reduce-cache (make-hash))) - - (for-each - (lambda (trans-key/state) - (let ((from-state-index (kernel-index (trans-key-st (car trans-key/state)))) - (gs (trans-key-gs (car trans-key/state))) - (to-state (cdr trans-key/state))) - (table-add! table from-state-index gs - (cond - ((non-term? gs) - (make-goto (kernel-index to-state))) - ((member gs end-terms) - (make-accept)) - (else - (make-shift - (kernel-index to-state))))))) - (send a get-transitions)) - - (send a for-each-state - (lambda (state) - (for-each - (lambda (item) - (let ((item-prod (item-prod item))) - (bit-vector-for-each - (lambda (term-index) - (unless (start-item? item) - (let ((r (hash-ref reduce-cache item-prod - (lambda () - (let ((r (make-reduce item-prod))) - (hash-set! reduce-cache item-prod r) - r))))) - (table-add! table - (kernel-index state) - (vector-ref term-vector term-index) - r)))) - (get-lookahead state item-prod)))) - (append (hash-ref (send a get-epsilon-trans) state (lambda () null)) - (filter (lambda (item) - (not (move-dot-right item))) - (kernel-items state)))))) - - (let ((grouped-table (resolve-prec-conflicts table))) - (unless (string=? file "") - (with-handlers [(exn:fail:filesystem? - (lambda (e) - (eprintf - "Cannot write debug output to file \"~a\": ~a\n" - file - (exn-message e))))] - (call-with-output-file file - (lambda (port) - (display-parser a grouped-table (send g get-prods) port)) - #:exists 'truncate))) - (resolve-conflicts grouped-table suppress)))) diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt deleted file mode 100644 index 31b3cc6..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/private-yacc/yacc-helper.rkt +++ /dev/null @@ -1,118 +0,0 @@ -(module yacc-helper mzscheme - - (require mzlib/list - "../private-lex/token-syntax.rkt") - - ;; General helper routines - - (provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc) - - (define (vector-andmap f v) - (let loop ((i 0)) - (cond - ((= i (vector-length v)) #t) - (else (if (f (vector-ref v i)) - (loop (add1 i)) - #f))))) - - ;; duplicate-list?: symbol list -> #f | symbol - ;; returns a symbol that exists twice in l, or false if no such symbol - ;; exists - (define (duplicate-list? l) - (letrec ((t (make-hash-table)) - (dl? (lambda (l) - (cond - ((null? l) #f) - ((hash-table-get t (car l) (lambda () #f)) => - (lambda (x) x)) - (else - (hash-table-put! t (car l) (car l)) - (dl? (cdr l))))))) - (dl? l))) - - ;; remove-duplicates: syntax-object list -> syntax-object list - ;; removes the duplicates from the lists - (define (remove-duplicates sl) - (let ((t (make-hash-table))) - (letrec ((x - (lambda (sl) - (cond - ((null? sl) sl) - ((hash-table-get t (syntax-object->datum (car sl)) (lambda () #f)) - (x (cdr sl))) - (else - (hash-table-put! t (syntax-object->datum (car sl)) #t) - (cons (car sl) (x (cdr sl)))))))) - (x sl)))) - - ;; overlap?: symbol list * symbol list -> #f | symbol - ;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists - (define (overlap? l1 l2) - (let/ec ret - (let ((t (make-hash-table))) - (for-each (lambda (s1) - (hash-table-put! t s1 s1)) - l1) - (for-each (lambda (s2) - (cond - ((hash-table-get t s2 (lambda () #f)) => - (lambda (o) (ret o))))) - l2) - #f))) - - - (define (display-yacc grammar tokens start precs port) - (let-syntax ((p (syntax-rules () - ((_ args ...) (fprintf port args ...))))) - (let* ((tokens (map syntax-local-value tokens)) - (eterms (filter e-terminals-def? tokens)) - (terms (filter terminals-def? tokens)) - (term-table (make-hash-table)) - (display-rhs - (lambda (rhs) - (for-each (lambda (sym) (p "~a " (hash-table-get term-table sym (lambda () sym)))) - (car rhs)) - (if (= 3 (length rhs)) - (p "%prec ~a" (cadadr rhs))) - (p "\n")))) - (for-each - (lambda (t) - (for-each - (lambda (t) - (hash-table-put! term-table t (format "'~a'" t))) - (syntax-object->datum (e-terminals-def-t t)))) - eterms) - (for-each - (lambda (t) - (for-each - (lambda (t) - (p "%token ~a\n" t) - (hash-table-put! term-table t (format "~a" t))) - (syntax-object->datum (terminals-def-t t)))) - terms) - (if precs - (for-each (lambda (prec) - (p "%~a " (car prec)) - (for-each (lambda (tok) - (p " ~a" (hash-table-get term-table tok))) - (cdr prec)) - (p "\n")) - precs)) - (p "%start ~a\n" start) - (p "%%\n") - - (for-each (lambda (prod) - (let ((nt (car prod))) - (p "~a: " nt) - (display-rhs (cadr prod)) - (for-each (lambda (rhs) - (p "| ") - (display-rhs rhs)) - (cddr prod)) - (p ";\n"))) - grammar) - (p "%%\n")))) - - -) - diff --git a/br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt b/br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt deleted file mode 100644 index 7f766eb..0000000 --- a/br-parser-tools/br-parser-tools-lib/br-parser-tools/yacc-to-scheme.rkt +++ /dev/null @@ -1,135 +0,0 @@ -(module yacc-to-scheme mzscheme - (require br-parser-tools/lex - (prefix : br-parser-tools/lex-sre) - br-parser-tools/yacc - syntax/readerr - mzlib/list) - (provide trans) - - (define match-double-string - (lexer - ((:+ (:~ #\" #\\)) (append (string->list lexeme) - (match-double-string input-port))) - ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port))) - (#\" null))) - - (define match-single-string - (lexer - ((:+ (:~ #\' #\\)) (append (string->list lexeme) - (match-single-string input-port))) - ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port))) - (#\' null))) - - (define-lex-abbrevs - (letter (:or (:/ "a" "z") (:/ "A" "Z"))) - (digit (:/ "0" "9")) - (initial (:or letter (char-set "!$%&*/<=>?^_~@"))) - (subsequent (:or initial digit (char-set "+-.@"))) - (comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/"))) - - (define-empty-tokens x - (EOF PIPE |:| SEMI |%%| %prec)) - (define-tokens y - (SYM STRING)) - - (define get-token-grammar - (lexer-src-pos - ("%%" '|%%|) - (":" (string->symbol lexeme)) - ("%prec" (string->symbol lexeme)) - (#\| 'PIPE) - ((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}"))) - (return-without-pos (get-token-grammar input-port))) - (#\; 'SEMI) - (#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))) - (#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))) - ((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme))))) - - (define (parse-grammar enter-term enter-empty-term enter-non-term) - (parser - (tokens x y) - (src-pos) - (error (lambda (tok-ok tok-name tok-value start-pos end-pos) - (raise-read-error - (format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value) - (file-path) - (position-line start-pos) - (position-col start-pos) - (position-offset start-pos) - (- (position-offset end-pos) (position-offset start-pos))))) - - (end |%%|) - (start gram) - (grammar - (gram - ((production) (list $1)) - ((production gram) (cons $1 $2))) - (production - ((SYM |:| prods SEMI) - (begin - (enter-non-term $1) - (cons $1 $3)))) - (prods - ((rhs) (list `(,$1 #f))) - ((rhs prec) (list `(,$1 ,$2 #f))) - ((rhs PIPE prods) (cons `(,$1 #f) $3)) - ((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4))) - (prec - ((%prec SYM) - (begin - (enter-term $2) - (list 'prec $2))) - ((%prec STRING) - (begin - (enter-empty-term $2) - (list 'prec $2)))) - (rhs - (() null) - ((SYM rhs) - (begin - (enter-term $1) - (cons $1 $2))) - ((STRING rhs) - (begin - (enter-empty-term $1) - (cons $1 $2))))))) - - (define (symbolstring a) (symbol->string b))) - - (define (trans filename) - (let* ((i (open-input-file filename)) - (terms (make-hash-table)) - (eterms (make-hash-table)) - (nterms (make-hash-table)) - (enter-term - (lambda (s) - (if (not (hash-table-get nterms s (lambda () #f))) - (hash-table-put! terms s #t)))) - (enter-empty-term - (lambda (s) - (if (not (hash-table-get nterms s (lambda () #f))) - (hash-table-put! eterms s #t)))) - (enter-non-term - (lambda (s) - (hash-table-remove! terms s) - (hash-table-remove! eterms s) - (hash-table-put! nterms s #t)))) - (port-count-lines! i) - (file-path filename) - (regexp-match "%%" i) - (begin0 - (let ((gram ((parse-grammar enter-term enter-empty-term enter-non-term) - (lambda () - (let ((t (get-token-grammar i))) - t))))) - `(begin - (define-tokens t ,(sort (hash-table-map terms (lambda (k v) k)) symbol -;; (vectorof (symbol runtime-action hashtable)) -(define-for-syntax (convert-parse-table table) - (list->vector - (map - (lambda (state-entry) - (let ((ht (make-hasheq))) - (for-each - (lambda (gs/action) - (hash-set! ht - (gram-sym-symbol (car gs/action)) - (action->runtime-action (cdr gs/action)))) - state-entry) - ht)) - (vector->list table)))) - -(define-syntax (parser stx) - (syntax-case stx () - ((_ args ...) - (let ((arg-list (syntax->list (syntax (args ...)))) - (src-pos #f) - (debug #f) - (error #f) - (tokens #f) - (start #f) - (end #f) - (precs #f) - (suppress #f) - (grammar #f) - (yacc-output #f)) - (for-each - (lambda (arg) - (syntax-case* arg (debug error tokens start end precs grammar - suppress src-pos yacc-output) - (lambda (a b) - (eq? (syntax-e a) (syntax-e b))) - ((debug filename) - (cond - ((not (string? (syntax-e (syntax filename)))) - (raise-syntax-error - #f - "Debugging filename must be a string" - stx - (syntax filename))) - (debug - (raise-syntax-error #f "Multiple debug declarations" stx)) - (else - (set! debug (syntax-e (syntax filename)))))) - ((suppress) - (set! suppress #t)) - ((src-pos) - (set! src-pos #t)) - ((error expression) - (if error - (raise-syntax-error #f "Multiple error declarations" stx) - (set! error (syntax expression)))) - ((tokens def ...) - (begin - (when tokens - (raise-syntax-error #f "Multiple tokens declarations" stx)) - (let ((defs (syntax->list (syntax (def ...))))) - (for-each - (lambda (d) - (unless (identifier? d) - (raise-syntax-error - #f - "Token-group name must be an identifier" - stx - d))) - defs) - (set! tokens defs)))) - ((start symbol ...) - (let ((symbols (syntax->list (syntax (symbol ...))))) - (for-each - (lambda (sym) - (unless (identifier? sym) - (raise-syntax-error #f - "Start symbol must be a symbol" - stx - sym))) - symbols) - (when start - (raise-syntax-error #f "Multiple start declarations" stx)) - (when (null? symbols) - (raise-syntax-error #f - "Missing start symbol" - stx - arg)) - (set! start symbols))) - ((end symbols ...) - (let ((symbols (syntax->list (syntax (symbols ...))))) - (for-each - (lambda (sym) - (unless (identifier? sym) - (raise-syntax-error #f - "End token must be a symbol" - stx - sym))) - symbols) - (let ((d (duplicate-list? (map syntax-e symbols)))) - (when d - (raise-syntax-error - #f - (format "Duplicate end token definition for ~a" d) - stx - arg)) - (when (null? symbols) - (raise-syntax-error - #f - "end declaration must contain at least 1 token" - stx - arg)) - (when end - (raise-syntax-error #f "Multiple end declarations" stx)) - (set! end symbols)))) - ((precs decls ...) - (if precs - (raise-syntax-error #f "Multiple precs declarations" stx) - (set! precs (syntax/loc arg (decls ...))))) - ((grammar prods ...) - (if grammar - (raise-syntax-error #f "Multiple grammar declarations" stx) - (set! grammar (syntax/loc arg (prods ...))))) - ((yacc-output filename) - (cond - ((not (string? (syntax-e (syntax filename)))) - (raise-syntax-error #f - "Yacc-output filename must be a string" - stx - (syntax filename))) - (yacc-output - (raise-syntax-error #f "Multiple yacc-output declarations" stx)) - (else - (set! yacc-output (syntax-e (syntax filename)))))) - (_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)))) - (syntax->list (syntax (args ...)))) - (unless tokens - (raise-syntax-error #f "missing tokens declaration" stx)) - (unless error - (raise-syntax-error #f "missing error declaration" stx)) - (unless grammar - (raise-syntax-error #f "missing grammar declaration" stx)) - (unless end - (raise-syntax-error #f "missing end declaration" stx)) - (unless start - (raise-syntax-error #f "missing start declaration" stx)) - (let-values (((table all-term-syms actions check-syntax-fix) - (build-parser (if debug debug "") - src-pos - suppress - tokens - start - end - precs - grammar))) - (when (and yacc-output (not (string=? yacc-output ""))) - (with-handlers [(exn:fail:filesystem? - (lambda (e) - (eprintf - "Cannot write yacc-output to file \"~a\"\n" - yacc-output)))] - (call-with-output-file yacc-output - (lambda (port) - (display-yacc (syntax->datum grammar) - tokens - (map syntax->datum start) - (if precs - (syntax->datum precs) - #f) - port)) - #:exists 'truncate))) - (with-syntax ((check-syntax-fix check-syntax-fix) - (err error) - (ends end) - (starts start) - (debug debug) - (table (convert-parse-table table)) - (all-term-syms all-term-syms) - (actions actions) - (src-pos src-pos)) - (syntax - (begin - check-syntax-fix - (parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos))))))) - (_ - (raise-syntax-error #f - "parser must have the form (parser args ...)" - stx)))) - -(define (reduce-stack stack num ret-vals src-pos) - (cond - ((> num 0) - (let* ((top-frame (car stack)) - (ret-vals - (if src-pos - (cons (stack-frame-value top-frame) - (cons (stack-frame-start-pos top-frame) - (cons (stack-frame-end-pos top-frame) - ret-vals))) - (cons (stack-frame-value top-frame) ret-vals)))) - (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))) - (else (values stack ret-vals)))) - -;; extract-helper : (symbol or make-token) any any -> symbol any any any -(define (extract-helper tok v1 v2) - (cond - ((symbol? tok) - (values tok #f v1 v2)) - ((token? tok) - (values (real-token-name tok) (real-token-value tok) v1 v2)) - (else (raise-argument-error 'parser - "(or/c symbol? token?)" - 0 - tok)))) - -;; well-formed-position-token?: any -> boolean -;; Returns true if pt is a position token whose position-token-token -;; is itself a token or a symbol. -;; This is meant to help raise more precise error messages when -;; a tokenizer produces an erroneous position-token wrapped twice. -;; (as often happens when omitting return-without-pos). -(define (well-formed-token-field? t) - (or (symbol? t) - (token? t))) - -(define (well-formed-position-token? pt) - (and (position-token? pt) - (well-formed-token-field? (position-token-token pt)))) - -(define (well-formed-srcloc-token? st) - (and (srcloc-token? st) - (well-formed-token-field? (srcloc-token-token st)))) - -;; extract-src-pos : position-token -> symbol any any any -(define (extract-src-pos ip) - (unless (well-formed-position-token? ip) - (raise-argument-error 'parser - "well-formed-position-token?" - 0 - ip)) - (extract-helper (position-token-token ip) - (position-token-start-pos ip) - (position-token-end-pos ip))) - -(define (extract-srcloc ip) - (unless (well-formed-srcloc-token? ip) - (raise-argument-error 'parser - "well-formed-srcloc-token?" - 0 - ip)) - (let ([loc (srcloc-token-srcloc ip)]) - (extract-helper (srcloc-token-token ip) - (position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc)) - (position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f)))) - - -;; extract-no-src-pos : (symbol or make-token) -> symbol any any any -(define (extract-no-src-pos ip) - (extract-helper ip #f #f)) - -(define-struct stack-frame (state value start-pos end-pos) #:inspector (make-inspector)) - -(define (make-empty-stack i) (list (make-stack-frame i #f #f #f))) - - -;; The table is a vector that maps each state to a hash-table that maps a -;; terminal symbol to either an accept, shift, reduce, or goto structure. -; We encode the structures according to the runtime-action data definition in -;; parser-actions.rkt -(define (parser-body debug? err starts ends table all-term-syms actions src-pos) - (local ((define extract - (if src-pos - extract-src-pos - extract-no-src-pos)) - - (define (fix-error stack tok val start-pos end-pos get-token) - (when debug? (pretty-print stack)) - (local ((define (remove-input tok val start-pos end-pos) - (if (memq tok ends) - (raise-read-error "parser: Cannot continue after error" - #f #f #f #f #f) - (let ((a (find-action stack tok val start-pos end-pos))) - (cond - ((runtime-shift? a) - ;; (printf "shift:~a\n" (runtime-shift-state a)) - (cons (make-stack-frame (runtime-shift-state a) - val - start-pos - end-pos) - stack)) - (else - ;; (printf "discard input:~a\n" tok) - (let-values (((tok val start-pos end-pos) - (extract (get-token)))) - (remove-input tok val start-pos end-pos)))))))) - (let remove-states () - (let ((a (find-action stack 'error #f start-pos end-pos))) - (cond - ((runtime-shift? a) - ;; (printf "shift:~a\n" (runtime-shift-state a)) - (set! stack - (cons - (make-stack-frame (runtime-shift-state a) - #f - start-pos - end-pos) - stack)) - (remove-input tok val start-pos end-pos)) - (else - ;; (printf "discard state:~a\n" (car stack)) - (cond - ((< (length stack) 2) - (raise-read-error "parser: Cannot continue after error" - #f #f #f #f #f)) - (else - (set! stack (cdr stack)) - (remove-states))))))))) - - (define (find-action stack tok val start-pos end-pos) - (unless (hash-ref all-term-syms - tok - #f) - (if src-pos - (err #f tok val start-pos end-pos) - (err #f tok val)) - (raise-read-error (format "parser: got token of unknown type ~a" tok) - #f #f #f #f #f)) - (hash-ref (vector-ref table (stack-frame-state (car stack))) - tok - #f)) - - (define (make-parser start-number) - (lambda (get-token) - (unless (and (procedure? get-token) - (procedure-arity-includes? get-token 0)) - (error 'get-token "expected a nullary procedure, got ~e" get-token)) - (let parsing-loop ((stack (make-empty-stack start-number)) - (ip (get-token))) - (let-values (((tok val start-pos end-pos) - (extract ip))) - (let ((action (find-action stack tok val start-pos end-pos))) - (cond - ((runtime-shift? action) - ;; (printf "shift:~a\n" (runtime-shift-state action)) - (parsing-loop (cons (make-stack-frame (runtime-shift-state action) - val - start-pos - end-pos) - stack) - (get-token))) - ((runtime-reduce? action) - ;; (printf "reduce:~a\n" (runtime-reduce-prod-num action)) - (let-values (((new-stack args) - (reduce-stack stack - (runtime-reduce-rhs-length action) - null - src-pos))) - (let ((goto - (runtime-goto-state - (hash-ref - (vector-ref table (stack-frame-state (car new-stack))) - (runtime-reduce-lhs action))))) - (parsing-loop - (cons - (if src-pos - (make-stack-frame - goto - (apply (vector-ref actions (runtime-reduce-prod-num action)) args) - (if (null? args) start-pos (cadr args)) - (if (null? args) - end-pos - (list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1)))) - (make-stack-frame - goto - (apply (vector-ref actions (runtime-reduce-prod-num action)) args) - #f - #f)) - new-stack) - ip)))) - ((runtime-accept? action) - ;; (printf "accept\n") - (stack-frame-value (car stack))) - (else - (if src-pos - (err #t tok val start-pos end-pos) - (err #t tok val)) - (parsing-loop (fix-error stack tok val start-pos end-pos get-token) - (get-token)))))))))) - (cond - ((null? (cdr starts)) (make-parser 0)) - (else - (let loop ((l starts) - (i 0)) - (cond - ((null? l) null) - (else (cons (make-parser i) (loop (cdr l) (add1 i)))))))))) diff --git a/br-parser-tools/br-parser-tools-lib/info.rkt b/br-parser-tools/br-parser-tools-lib/info.rkt deleted file mode 100644 index f9f9e11..0000000 --- a/br-parser-tools/br-parser-tools-lib/info.rkt +++ /dev/null @@ -1,11 +0,0 @@ -#lang info - -(define collection 'multi) -(define deps '("scheme-lib" - "base" - "compatibility-lib")) -(define build-deps '("rackunit-lib")) - -(define pkg-desc "implementation (no documentation) part of \"br-parser-tools\"") - -(define pkg-authors '(mflatt)) diff --git a/br-parser-tools/br-parser-tools/LICENSE.txt b/br-parser-tools/br-parser-tools/LICENSE.txt deleted file mode 100644 index d012f58..0000000 --- a/br-parser-tools/br-parser-tools/LICENSE.txt +++ /dev/null @@ -1,11 +0,0 @@ -parser-tools -Copyright (c) 2010-2014 PLT Design Inc. - -This package is distributed under the GNU Lesser General Public -License (LGPL). This means that you can link this package into proprietary -applications, provided you follow the rules stated in the LGPL. You -can also modify this package; if you distribute a modified version, -you must distribute it under the terms of the LGPL, which in -particular means that you must release the source code for the -modified software. See http://www.gnu.org/copyleft/lesser.html -for more information. diff --git a/br-parser-tools/br-parser-tools/info.rkt b/br-parser-tools/br-parser-tools/info.rkt deleted file mode 100644 index 6a692a8..0000000 --- a/br-parser-tools/br-parser-tools/info.rkt +++ /dev/null @@ -1,12 +0,0 @@ -#lang info - -(define collection 'multi) - -(define deps '("br-parser-tools-lib" - "br-parser-tools-doc")) -(define implies '("br-parser-tools-lib" - "br-parser-tools-doc")) - -(define pkg-desc "Lex- and Yacc-style parsing tools") - -(define pkg-authors '(mflatt)) diff --git a/brag/LICENSE b/brag/LICENSE deleted file mode 100755 index 65c5ca8..0000000 --- a/brag/LICENSE +++ /dev/null @@ -1,165 +0,0 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - - This version of the GNU Lesser General Public License incorporates -the terms and conditions of version 3 of the GNU General Public -License, supplemented by the additional permissions listed below. - - 0. Additional Definitions. - - As used herein, "this License" refers to version 3 of the GNU Lesser -General Public License, and the "GNU GPL" refers to version 3 of the GNU -General Public License. - - "The Library" refers to a covered work governed by this License, -other than an Application or a Combined Work as defined below. - - An "Application" is any work that makes use of an interface provided -by the Library, but which is not otherwise based on the Library. -Defining a subclass of a class defined by the Library is deemed a mode -of using an interface provided by the Library. - - A "Combined Work" is a work produced by combining or linking an -Application with the Library. The particular version of the Library -with which the Combined Work was made is also called the "Linked -Version". - - The "Minimal Corresponding Source" for a Combined Work means the -Corresponding Source for the Combined Work, excluding any source code -for portions of the Combined Work that, considered in isolation, are -based on the Application, and not on the Linked Version. - - The "Corresponding Application Code" for a Combined Work means the -object code and/or source code for the Application, including any data -and utility programs needed for reproducing the Combined Work from the -Application, but excluding the System Libraries of the Combined Work. - - 1. Exception to Section 3 of the GNU GPL. - - You may convey a covered work under sections 3 and 4 of this License -without being bound by section 3 of the GNU GPL. - - 2. Conveying Modified Versions. - - If you modify a copy of the Library, and, in your modifications, a -facility refers to a function or data to be supplied by an Application -that uses the facility (other than as an argument passed when the -facility is invoked), then you may convey a copy of the modified -version: - - a) under this License, provided that you make a good faith effort to - ensure that, in the event an Application does not supply the - function or data, the facility still operates, and performs - whatever part of its purpose remains meaningful, or - - b) under the GNU GPL, with none of the additional permissions of - this License applicable to that copy. - - 3. Object Code Incorporating Material from Library Header Files. - - The object code form of an Application may incorporate material from -a header file that is part of the Library. You may convey such object -code under terms of your choice, provided that, if the incorporated -material is not limited to numerical parameters, data structure -layouts and accessors, or small macros, inline functions and templates -(ten or fewer lines in length), you do both of the following: - - a) Give prominent notice with each copy of the object code that the - Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the object code with a copy of the GNU GPL and this license - document. - - 4. Combined Works. - - You may convey a Combined Work under terms of your choice that, -taken together, effectively do not restrict modification of the -portions of the Library contained in the Combined Work and reverse -engineering for debugging such modifications, if you also do each of -the following: - - a) Give prominent notice with each copy of the Combined Work that - the Library is used in it and that the Library and its use are - covered by this License. - - b) Accompany the Combined Work with a copy of the GNU GPL and this license - document. - - c) For a Combined Work that displays copyright notices during - execution, include the copyright notice for the Library among - these notices, as well as a reference directing the user to the - copies of the GNU GPL and this license document. - - d) Do one of the following: - - 0) Convey the Minimal Corresponding Source under the terms of this - License, and the Corresponding Application Code in a form - suitable for, and under terms that permit, the user to - recombine or relink the Application with a modified version of - the Linked Version to produce a modified Combined Work, in the - manner specified by section 6 of the GNU GPL for conveying - Corresponding Source. - - 1) Use a suitable shared library mechanism for linking with the - Library. A suitable mechanism is one that (a) uses at run time - a copy of the Library already present on the user's computer - system, and (b) will operate properly with a modified version - of the Library that is interface-compatible with the Linked - Version. - - e) Provide Installation Information, but only if you would otherwise - be required to provide such information under section 6 of the - GNU GPL, and only to the extent that such information is - necessary to install and execute a modified version of the - Combined Work produced by recombining or relinking the - Application with a modified version of the Linked Version. (If - you use option 4d0, the Installation Information must accompany - the Minimal Corresponding Source and Corresponding Application - Code. If you use option 4d1, you must provide the Installation - Information in the manner specified by section 6 of the GNU GPL - for conveying Corresponding Source.) - - 5. Combined Libraries. - - You may place library facilities that are a work based on the -Library side by side in a single library together with other library -facilities that are not Applications and are not covered by this -License, and convey such a combined library under terms of your -choice, if you do both of the following: - - a) Accompany the combined library with a copy of the same work based - on the Library, uncombined with any other library facilities, - conveyed under the terms of this License. - - b) Give prominent notice with the combined library that part of it - is a work based on the Library, and explaining where to find the - accompanying uncombined form of the same work. - - 6. Revised Versions of the GNU Lesser General Public License. - - The Free Software Foundation may publish revised and/or new versions -of the GNU Lesser General Public License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the -Library as you received it specifies that a certain numbered version -of the GNU Lesser General Public License "or any later version" -applies to it, you have the option of following the terms and -conditions either of that published version or of any later version -published by the Free Software Foundation. If the Library as you -received it does not specify a version number of the GNU Lesser -General Public License, you may choose any version of the GNU Lesser -General Public License ever published by the Free Software Foundation. - - If the Library as you received it specifies that a proxy can decide -whether future versions of the GNU Lesser General Public License shall -apply, that proxy's public statement of acceptance of any version is -permanent authorization for you to choose that version for the -Library. diff --git a/brag/README.md b/brag/README.md deleted file mode 100755 index 623c425..0000000 --- a/brag/README.md +++ /dev/null @@ -1,4 +0,0 @@ -This repo contains a fork of Danny Yoo's RAGG, a Racket AST Generator Generator, -also known as a parser generator. - -Licensed under the LGPL. diff --git a/brag/brag/Makefile b/brag/brag/Makefile deleted file mode 100755 index 579d424..0000000 --- a/brag/brag/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -doc: - scribble ++xref-in setup/xref load-collections-xref --redirect-main http://docs.racket-lang.org/ --dest-name index.html manual.scrbl - -clean: - git clean -fdx . - -web: clean plt doc - scp -r * hashcollision.org:webapps/htdocs/ragg/ - - -plt: - raco pack --collect ragg.plt ragg diff --git a/brag/brag/brag.scrbl b/brag/brag/brag.scrbl deleted file mode 100755 index 9017342..0000000 --- a/brag/brag/brag.scrbl +++ /dev/null @@ -1,1157 +0,0 @@ -#lang scribble/manual -@(require scribble/eval - racket/date - file/md5 - (for-label racket - brag/support - brag/examples/nested-word-list - (only-in br-parser-tools/lex lexer-src-pos) - (only-in syntax/parse syntax-parse ~literal))) - - -@(define (lookup-date filename [default ""]) - (cond - [(file-exists? filename) - (define modify-seconds (file-or-directory-modify-seconds filename)) - (define a-date (seconds->date modify-seconds)) - (date->string a-date)] - [else - default])) - -@(define (compute-md5sum filename [default ""]) - (cond [(file-exists? filename) - (bytes->string/utf-8 (call-with-input-file filename md5 #:mode 'binary))] - [else - default])) - - - -@title{brag: the Beautiful Racket AST Generator} -@author["Danny Yoo (95%)" "Matthew Butterick (5%)"] - -@defmodulelang[brag] - -@section{Quick start} - -@(define my-eval (make-base-eval)) -@(my-eval '(require brag/examples/nested-word-list - racket/list - racket/match)) - -Suppose we're given the -following string: -@racketblock["(radiant (humble))"] - - -How would we turn this string into a structured value? That is, how would we @emph{parse} it? (Let's also suppose we've never heard of @racket[read].) - -First, we need to consider the structure of the things we'd like to parse. The -string above looks like a nested list of words. Good start. - -Second, how might we describe this formally — meaning, in a way that a computer could understand? A common notation to describe the structure of these things is @link["http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form"]{Backus-Naur Form} (BNF). So let's try to notate the structure of nested word lists in BNF. - -@nested[#:style 'code-inset]{ - @verbatim{ - nested-word-list: WORD - | LEFT-PAREN nested-word-list* RIGHT-PAREN -}} - -What we intend by this notation is this: @racket[nested-word-list] is either a @racket[WORD], or a parenthesized list of @racket[nested-word-list]s. We use the character @litchar{*} to represent zero or more repetitions of the previous thing. We treat the uppercased @racket[LEFT-PAREN], @racket[RIGHT-PAREN], and @racket[WORD] as placeholders for @emph{tokens} (a @tech{token} being the smallest meaningful item in the parsed string): - -Here are a few examples of tokens: -@interaction[#:eval my-eval - (require brag/support) - (token 'LEFT-PAREN) - (token 'WORD "crunchy" #:span 7) - (token 'RIGHT-PAREN)] - -This BNF description is also known as a @deftech{grammar}. Just as it does in a natural language like English or French, a grammar describes something in terms of what elements can fit where. - -Have we made progress? We have a valid grammar. But we're still missing a @emph{parser}: a function that can use that description to make structures out of a sequence of tokens. - -Meanwhile, it's clear that we don't yet have a valid program because there's no @litchar{#lang} line. Let's add one: put @litchar{#lang brag} at the top of the grammar, and save it as a file called @filepath{nested-word-list.rkt}. - -@filebox["nested-word-list.rkt"]{ - @verbatim{ - #lang brag - nested-word-list: WORD - | LEFT-PAREN nested-word-list* RIGHT-PAREN -}} - -Now it's a proper program. But what does it do? - -@interaction[#:eval my-eval - @eval:alts[(require "nested-word-list.rkt") (void)] - parse - ] - -It gives us a @racket[parse] function. Let's investigate what @racket[parse] -does. What happens if we pass it a sequence of tokens? - -@interaction[#:eval my-eval - (define a-parsed-value - (parse (list (token 'LEFT-PAREN "(") - (token 'WORD "some") - (token 'LEFT-PAREN "[") - (token 'WORD "pig") - (token 'RIGHT-PAREN "]") - (token 'RIGHT-PAREN ")")))) - a-parsed-value] - -Those who have messed around with macros will recognize this as a @seclink["stx-obj" #:doc '(lib "scribblings/guide/guide.scrbl")]{syntax object}. - -@interaction[#:eval my-eval - (syntax->datum a-parsed-value) - ] - -That's @racket[(some [pig])], essentially. - -What happens if we pass our @racket[parse] function a bigger source of tokens? - -@interaction[#:eval my-eval - @code:comment{tokenize: string -> (sequenceof token-struct?)} - @code:comment{Generate tokens from a string:} - (define (tokenize s) - (for/list ([str (regexp-match* #px"\\(|\\)|\\w+" s)]) - (match str - ["(" - (token 'LEFT-PAREN str)] - [")" - (token 'RIGHT-PAREN str)] - [else - (token 'WORD str)]))) - - @code:comment{For example:} - (define token-source (tokenize "(welcome (to (((brag)) ())))")) - (define v (parse token-source)) - (syntax->datum v) - ] - -Welcome to @tt{brag}. - - - - -@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -@section{Introduction} - -@tt{brag} is a parser generator designed to be easy -to use: - -@itemize[ - - @item{It provides a @litchar{#lang} for writing BNF grammars. - A module written in @litchar{#lang brag} automatically generates a - parser. The grammar controls the structure of the @tech{syntax objects} it generates.} - - @item{The language uses a few conventions to simplify the expression of - grammars. The first rule in the grammar is assumed to be the - starting production. Identifiers in @tt{UPPERCASE} are treated as - terminal tokens. All other identifiers are treated as nonterminals.} - - @item{Tokenizers can be developed independently of parsers. - @tt{brag} takes a liberal view on tokens: they can be strings, - symbols, or instances constructed with @racket[token]. Tokens can optionally provide source location, in which case a syntax object generated by the parser will too.} - - @item{The parser can usually handle ambiguous grammars.} - - @item{It integrates with the rest of the Racket - @link["http://docs.racket-lang.org/guide/languages.html"]{language toolchain}.} - - ] - - - -@subsection{Example: a small DSL for ASCII diagrams} - -Suppose we'd like to define a language for -drawing simple ASCII diagrams. So if we write something like this: - -@nested[#:style 'inset]{ - @verbatim|{ - 3 9 X; - 6 3 b 3 X 3 b; - 3 9 X; - }|} - -It should generate the following picture: - -@nested[#:style 'inset]{ -@verbatim|{ -XXXXXXXXX -XXXXXXXXX -XXXXXXXXX - XXX - XXX - XXX - XXX - XXX - XXX -XXXXXXXXX -XXXXXXXXX -XXXXXXXXX -}|} - - -This makes sense in a casual way. But let's be more precise about how the language works. - -Each line of the program has a semicolon at the end, and describes the output of several @emph{rows} of the line drawing. Let's look at two of the lines in the example: - -@itemize[ - @item{@litchar{3 9 X;}: ``Repeat the following 3 times: print @racket["X"] nine times, followed by - a newline.''} - - @item{@litchar{6 3 b 3 X 3 b;}: ``Repeat the following 6 times: print @racket[" "] three times, - followed by @racket["X"] three times, followed by @racket[" "] three times, followed by a newline.''} - ] - -Then each line consists of a @emph{repeat} number, followed by pairs of -(number, character) @emph{chunks}. We'll assume here that the intent of the lowercased character @litchar{b} is to represent the printing of a 1-character whitespace @racket[" "], and for other uppercase letters to represent the printing of themselves. - -By understanding the pieces of each line, we can more easily capture that meaning in a grammar. Once we have each instruction of our ASCII DSL in a structured format, we should be able to parse it. - -Here's a first pass at expressing the structure of these line-drawing programs. - -@subsection{Parsing the concrete syntax} - -@filebox["simple-line-drawing.rkt"]{ - @verbatim|{ - #lang brag - drawing: rows* - rows: repeat chunk+ ";" - repeat: INTEGER - chunk: INTEGER STRING - }| -} - -@margin-note{@secref{brag-syntax} describes @tt{brag}'s syntax in more detail.} -We write a @tt{brag} program as an BNF grammar, where patterns can be: -@itemize[ - @item{the names of other rules (e.g. @racket[chunk])} - @item{literal and symbolic token names (e.g. @racket[";"], @racket[INTEGER])} - @item{quantified patterns (e.g. @litchar{+} to represent one-or-more repetitions)} - ] -The result of a @tt{brag} program is a module with a @racket[parse] function -that can parse tokens and produce a syntax object as a result. - -Let's try this function: - -@interaction[#:eval my-eval - (require brag/support) - @eval:alts[(require "simple-line-drawing.rkt") - (require brag/examples/simple-line-drawing)] - (define stx - (parse (list (token 'INTEGER 6) - (token 'INTEGER 2) - (token 'STRING " ") - (token 'INTEGER 3) - (token 'STRING "X") - ";"))) - (syntax->datum stx) - ] - -A @emph{token} is the smallest meaningful element of a source program. Tokens can be strings, symbols, or instances of the @racket[token] data structure. (Plus a few other special cases, which we'll discuss later.) Usually, a token holds a single character from the source program. But sometimes it makes sense to package a sequence of characters into a single token, if the sequence has an indivisible meaning. - -If possible, we also want to attach source location information to each token. Why? Because this informatino will be incorporated into the syntax objects produced by @racket[parse]. - -A parser often works in conjunction with a helper function called a @emph{lexer} that converts the raw code of the source program into tokens. The @racketmodname[br-parser-tools/lex] library can help us write a position-sensitive -tokenizer: - -@interaction[#:eval my-eval - (require br-parser-tools/lex) - (define (tokenize ip) - (port-count-lines! ip) - (define my-lexer - (lexer-src-pos - [(repetition 1 +inf.0 numeric) - (token 'INTEGER (string->number lexeme))] - [upper-case - (token 'STRING lexeme)] - ["b" - (token 'STRING " ")] - [";" - (token ";" lexeme)] - [whitespace - (token 'WHITESPACE lexeme #:skip? #t)] - [(eof) - (void)])) - (define (next-token) (my-lexer ip)) - next-token) - - (define a-sample-input-port (open-input-string "6 2 b 3 X;")) - (define token-thunk (tokenize a-sample-input-port)) - @code:comment{Now we can pass token-thunk to the parser:} - (define another-stx (parse token-thunk)) - (syntax->datum another-stx) - @code:comment{The syntax object has location information:} - (syntax-line another-stx) - (syntax-column another-stx) - (syntax-span another-stx) - ] - - -Note also from this lexer example: - -@itemize[ - - @item{@racket[parse] accepts as input either a sequence of tokens, or a - function that produces tokens (which @racket[parse] will call repeatedly to get the next token).} - - @item{As an alternative to the basic @racket[token] structure, a token can also be an instance of the @racket[position-token] structure (also found in @racketmodname[br-parser-tools/lex]). In that case, the token will try to derive its position from that of the position-token.} - - @item{@racket[parse] will stop if it gets @racket[void] (or @racket['eof]) as a token.} - - @item{@racket[parse] will skip any token that has - @racket[#:skip?] attribute set to @racket[#t]. For instance, tokens representing comments often use @racket[#:skip?].} - - ] - - -@subsection{From parsing to interpretation} - -We now have a parser for programs written in this simple-line-drawing language. -Our parser will return syntax objects: - -@interaction[#:eval my-eval - (define parsed-program - (parse (tokenize (open-input-string "3 9 X; 6 3 b 3 X 3 b; 3 9 X;")))) - (syntax->datum parsed-program) - ] - -Better still, these syntax objects will have a predictable -structure that follows the grammar: - -@racketblock[ - (drawing (rows (repeat ) - (chunk ) ... ";") - ...) - ] - -where @racket[drawing], @racket[rows], @racket[repeat], and @racket[chunk] -should be treated literally, and everything else will be numbers or strings. - - -Still, these syntax-object values are just inert structures. How do we -interpret them, and make them @emph{print}? We claimed at the beginning of -this section that these syntax objects should be easy to interpret. So let's do it. - -@margin-note{This is a very quick-and-dirty treatment of @racket[syntax-parse]. - See the @racketmodname[syntax/parse] documentation for a gentler guide to its - features.} Racket provides a special form called @racket[syntax-parse] in the -@racketmodname[syntax/parse] library. @racket[syntax-parse] lets us do a -structural case-analysis on syntax objects: we provide it a set of patterns to -parse and actions to perform when those patterns match. - - -As a simple example, we can write a function that looks at a syntax object and -says @racket[#t] if it's the literal @racket[yes], and @racket[#f] otherwise: - -@interaction[#:eval my-eval - (require syntax/parse) - @code:comment{yes-syntax-object?: syntax-object -> boolean} - @code:comment{Returns true if the syntax-object is yes.} - (define (yes-syntax-object? stx) - (syntax-parse stx - [(~literal yes) - #t] - [else - #f])) - (yes-syntax-object? #'yes) - (yes-syntax-object? #'nooooooooooo) - ] - -Here, we use @racket[~literal] to let @racket[syntax-parse] know that -@racket[yes] should show up literally in the syntax object. The patterns can -also have some structure to them, such as: -@racketblock[({~literal drawing} rows-stxs ...)] -which matches on syntax objects that begin, literally, with @racket[drawing], -followed by any number of rows (which are syntax objects too). - - -Now that we know a little bit more about @racket[syntax-parse], -we can use it to do a case analysis on the syntax -objects that our @racket[parse] function gives us. -We start by defining a function on syntax objects of the form @racket[(drawing - rows-stx ...)]. -@interaction[#:eval my-eval - (define (interpret-drawing drawing-stx) - (syntax-parse drawing-stx - [({~literal drawing} rows-stxs ...) - - (for ([rows-stx (syntax->list #'(rows-stxs ...))]) - (interpret-rows rows-stx))]))] - -When we encounter a syntax object with @racket[(drawing rows-stx - ...)], then @racket[interpret-rows] each @racket[rows-stx]. - -@;The pattern we -@;express in @racket[syntax-parse] above marks what things should be treated -@;literally, and the @racket[...] is a a part of the pattern matching language -@;known by @racket[syntax-parse] that lets us match multiple instances of the -@;last pattern. - - -Let's define @racket[interpret-rows] now: -@interaction[#:eval my-eval - (define (interpret-rows rows-stx) - (syntax-parse rows-stx - [({~literal rows} - ({~literal repeat} repeat-number) - chunks ... ";") - - (for ([i (syntax-e #'repeat-number)]) - (for ([chunk-stx (syntax->list #'(chunks ...))]) - (interpret-chunk chunk-stx)) - (newline))]))] - -For a @racket[rows], we extract out the @racket[repeat-number] out of the -syntax object and use it as the range of the @racket[for] loop. The inner loop -walks across each @racket[chunk-stx] and calls @racket[interpret-chunk] on it. - - -Finally, we need to write a definition for @racket[interpret-chunk]. We want -it to extract out the @racket[chunk-size] and @racket[chunk-string] portions, -and print to standard output: - -@interaction[#:eval my-eval - (define (interpret-chunk chunk-stx) - (syntax-parse chunk-stx - [({~literal chunk} chunk-size chunk-string) - - (for ([k (syntax-e #'chunk-size)]) - (display (syntax-e #'chunk-string)))])) - ] - - -@margin-note{Here are the definitions in a single file: - @link["examples/simple-line-drawing/interpret.rkt"]{interpret.rkt}.} -With these definitions in hand, now we can pass it syntax objects -that we construct directly by hand: - -@interaction[#:eval my-eval - (interpret-chunk #'(chunk 3 "X")) - (interpret-drawing #'(drawing (rows (repeat 5) (chunk 3 "X") ";"))) - ] - -or we can pass it the result generated by our parser: -@interaction[#:eval my-eval - (define parsed-program - (parse (tokenize (open-input-string "3 9 X; 6 3 b 3 X 3 b; 3 9 X;")))) - (interpret-drawing parsed-program)] - -And now we've got an interpreter! - - -@subsection{From interpretation to compilation} - -@margin-note{For a gentler tutorial on writing @litchar{#lang}-based languages, see - @link["http://beautifulracket.com"]{Beautiful Racket}.} (Just as a -warning: the following material is slightly more advanced, but shows how -writing a compiler for the line-drawing language reuses the ideas for the -interpreter.) - -Wouldn't it be nice to be able to write something like: - -@nested[#:style 'inset]{ - @verbatim|{ - 3 9 X; - 6 3 b 3 X 3 b; - 3 9 X; - }|} - -and have Racket automatically compile this down to something like this? -@racketblock[ - (for ([i 3]) - (for ([k 9]) (displayln "X")) - (newline)) - - (for ([i 6]) - (for ([k 3]) (displayln " ")) - (for ([k 3]) (displayln "X")) - (for ([k 3]) (displayln " ")) - (newline)) - - (for ([i 3]) - (for ([k 9]) (displayln "X")) - (newline)) - ] - -Well, of course it won't work: we don't have a @litchar{#lang} line. - -Let's add one. - -@filebox["letter-i.rkt"]{ - @verbatim|{ - #lang brag/examples/simple-line-drawing - 3 9 X; - 6 3 b 3 X 3 b; - 3 9 X; - }| -} - -Now @filepath{letter-i.rkt} is a program. - - -How does this work? From the previous sections, we've seen how to take the -contents of a file and interpret it. What we want to do now is teach Racket -how to compile programs labeled with this @litchar{#lang} line. We'll do two -things: - -@itemize[ - @item{Tell Racket to use the @tt{brag}-generated parser and lexer we defined - earlier whenever it sees a program written with - @litchar{#lang brag/examples/simple-line-drawing}.} - - @item{Define transformation rules for @racket[drawing], @racket[rows], and - @racket[chunk] to rewrite these into standard Racket forms.} - ] - -The second part, the writing of the transformation rules, will look very -similar to the definitions we wrote for the interpreter, but the transformation -will happen at compile-time. (We @emph{could} just resort to simply calling -into the interpreter we just wrote up, but this section is meant to show that -compilation is also viable.) - - -We do the first part by defining a @emph{module reader}: a -@link["http://docs.racket-lang.org/guide/syntax_module-reader.html"]{module - reader} tells Racket how to parse and compile a file. Whenever Racket sees a -@litchar{#lang }, it looks for a corresponding module reader in -@filepath{/lang/reader}. - -Here's the definition for -@filepath{brag/examples/simple-line-drawing/lang/reader.rkt}: - -@filebox["brag/examples/simple-line-drawing/lang/reader.rkt"]{ - @codeblock|{ - #lang s-exp syntax/module-reader - brag/examples/simple-line-drawing/semantics - #:read my-read - #:read-syntax my-read-syntax - #:whole-body-readers? #t - - (require brag/examples/simple-line-drawing/lexer - brag/examples/simple-line-drawing/grammar) - - (define (my-read in) - (syntax->datum (my-read-syntax #f in))) - - (define (my-read-syntax src ip) - (list (parse src (tokenize ip)))) - }| -} - -We use a helper module @racketmodname[syntax/module-reader], which provides -utilities for creating a module reader. It uses the lexer and -@tt{brag}-generated parser we defined earlier, and also tells Racket that it should compile the forms in the syntax -object using a module called @filepath{semantics.rkt}. - -Let's look into @filepath{semantics.rkt} and see what's involved in -compilation: -@filebox["brag/examples/simple-line-drawing/semantics.rkt"]{ - @codeblock|{ - #lang racket/base - (require (for-syntax racket/base syntax/parse)) - - (provide #%module-begin - ;; We reuse Racket's treatment of raw datums, specifically - ;; for strings and numbers: - #%datum - - ;; And otherwise, we provide definitions of these three forms. - ;; During compiliation, Racket uses these definitions to - ;; rewrite into for loops, displays, and newlines. - drawing rows chunk) - - ;; Define a few compile-time functions to do the syntax rewriting: - (begin-for-syntax - (define (compile-drawing drawing-stx) - (syntax-parse drawing-stx - [({~literal drawing} rows-stxs ...) - - (syntax/loc drawing-stx - (begin rows-stxs ...))])) - - (define (compile-rows rows-stx) - (syntax-parse rows-stx - [({~literal rows} - ({~literal repeat} repeat-number) - chunks ... - ";") - - (syntax/loc rows-stx - (for ([i repeat-number]) - chunks ... - (newline)))])) - - (define (compile-chunk chunk-stx) - (syntax-parse chunk-stx - [({~literal chunk} chunk-size chunk-string) - - (syntax/loc chunk-stx - (for ([k chunk-size]) - (display chunk-string)))]))) - - - ;; Wire up the use of "drawing", "rows", and "chunk" to these - ;; transformers: - (define-syntax drawing compile-drawing) - (define-syntax rows compile-rows) - (define-syntax chunk compile-chunk) - }| -} - -The semantics hold definitions for @racket[compile-drawing], -@racket[compile-rows], and @racket[compile-chunk], similar to what we had for -interpretation with @racket[interpret-drawing], @racket[interpret-rows], and -@racket[interpret-chunk]. However, compilation is not the same as -interpretation: each definition does not immediately execute the act of -drawing, but rather returns a syntax object whose evaluation will do the actual -work. - -There are a few things to note: - -@margin-note{By the way, we can just as easily rewrite the semantics so that - @racket[compile-rows] does explicitly call @racket[compile-chunk]. Often, - though, it's easier to write the transformation functions in this piecemeal way - and depend on the Racket macro expansion system to do the rewriting as it - encounters each of the forms.} - - -@itemize[ - - @item{@tt{brag}'s native data structure is the syntax object because the - majority of Racket's language-processing infrastructure knows how to read and - write this structured value.} - - - @item{Unlike in interpretation, @racket[compile-rows] doesn't - compile each chunk by directly calling @racket[compile-chunk]. Rather, it - depends on the Racket macro expander to call each @racket[compile-XXX] function - as it encounters a @racket[drawing], @racket[rows], or @racket[chunk] in the - parsed value. The three statements at the bottom of @filepath{semantics.rkt} inform - the macro expansion system to do this: - - @racketblock[ - (define-syntax drawing compile-drawing) - (define-syntax rows compile-rows) - (define-syntax chunk compile-chunk) - ]} - ] - - -Altogether, @tt{brag}'s intent is to be a parser generator for Racket -that's easy and fun to use. It's meant to fit naturally with the other tools -in the Racket language toolchain. Hopefully, it will reduce the friction in -making new languages with alternative concrete syntaxes. - -The rest of this document describes the @tt{brag} language and the parsers it -generates. - - -@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -@;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -@section{The language} - -@subsection[#:tag "brag-syntax"]{Syntax and terminology} -A program in the @tt{brag} language consists of the language line -@litchar{#lang brag}, followed by a collection of @tech{rule}s and -@tech{line comment}s. - -A @deftech{rule} is a sequence consisting of: a @tech{rule identifier}, a colon -@litchar{":"}, and a @tech{pattern}. - -A @deftech{rule identifier} is an @tech{identifier} that is not in upper case. - -A @deftech{symbolic token identifier} is an @tech{identifier} that is in upper case. - -An @deftech{identifier} is a character sequence of letters, numbers, and -characters in @racket["-.!$%&/<=>?^_~@"]. It must not contain -@litchar{*} or @litchar{+}, as those characters are used to denote -quantification. - - -A @deftech{pattern} is one of the following: -@itemize[ - @item{an implicit sequence of @tech{pattern}s separated by whitespace} - @item{a terminal: either a literal string or a @tech{symbolic token identifier}. - - When used in a pattern, both these terminals will match the same set of inputs. A literal string can match the string itself, or a @racket[token] whose type field contains that string (or its symbol form). So @racket["FOO"] would match @racket["FOO"], @racket[(token "FOO" "bar")], or @racket[(token 'FOO "bar")]. A symbolic token identifier can also match the string version of the identifier, or a @racket[token] whose type field is the symbol or string form of the identifier. So @racket[FOO] would also match @racket["FOO"], @racket[(token 'FOO "bar")], or @racket[(token "FOO" "bar")]. (In every case, the value of a token, like @racket["bar"], can be anything, and may or may not be the same as its type.) - - Because their underlying meanings are the same, the symbolic token identifier ends up being a notational convenience for readability inside a grammar pattern. Typically, the literal string @racket["FOO"] is used to connote ``match the string @racket["FOO"] exactly'' and the symbolic token identifier @racket[FOO] specially connotes ``match any token of type @racket['FOO]''.} - - @item{a @tech{rule identifier}} - @item{a @deftech{choice pattern}: a sequence of @tech{pattern}s delimited with @litchar{|} characters.} - @item{a @deftech{quantifed pattern}: a @tech{pattern} followed by either @litchar{*} (``zero or more'') or @litchar{+} (``one or more'')} - @item{an @deftech{optional pattern}: a @tech{pattern} surrounded by @litchar{[} and @litchar{]}} - @item{an explicit sequence: a @tech{pattern} surrounded by @litchar{(} and @litchar{)}}] - -A @deftech{line comment} begins with either @litchar{#} or @litchar{;} and -continues till the end of the line. - - -For example, in the following program: -@nested[#:style 'inset - @verbatim|{ - #lang brag - ;; A parser for a silly language - sentence: verb optional-adjective object - verb: greeting - optional-adjective: ["happy" | "frumpy"] - greeting: "hello" | "hola" | "aloha" - object: "world" | WORLD - }|] - -the elements @tt{sentence}, @tt{verb}, @tt{greeting}, and @tt{object} are rule -identifiers. The first rule, @litchar{sentence: verb optional-adjective - object}, is a rule whose right side is an implicit pattern sequence of three -sub-patterns. The uppercased @tt{WORLD} is a symbolic token identifier. The fourth rule in the program associates @tt{greeting} with a @tech{choice pattern}. - - - -More examples: -@itemize[ - - @item{A - BNF for binary - strings that contain an equal number of zeros and ones. - @verbatim|{ - #lang brag - equal: [zero one | one zero] ;; equal number of "0"s and "1"s. - zero: "0" equal | equal "0" ;; has an extra "0" in it. - one: "1" equal | equal "1" ;; has an extra "1" in it. - }| - } - - @item{A BNF for - @link["http://www.json.org/"]{JSON}-like structures. - @verbatim|{ - #lang brag - json: number | string - | array | object - number: NUMBER - string: STRING - array: "[" [json ("," json)*] "]" - object: "{" [kvpair ("," kvpair)*] "}" - kvpair: ID ":" json - }| - } - ] - -@subsection{Cuts & splices} - -By default, every matched token shows up in the parse tree. But sometimes that means that the parse tree ends up holding a bunch of tokens that were only needed to complete the parsing. Once they've served their purpose, it's sometimes useful to filter them out (for instance, to simplify the implementation of a language expander). To help with this kind of housekeeping, @racket[brag] supports @emph{cuts} and @emph{splices}. - -A @deftech{cut} in a grammar will delete an item from the parse tree. A cut is notated by prefixing either the left-hand rule name or a right-hand pattern element with a slash @litchar{/}. - -If the cut is applied to a left-hand rule name, the rule name is omitted from the parse tree, but its node and its matched elements remain. - -If the cut is applied to a right-hand pattern element, then that element is omitted from every node matching that rule. - -For instance, consider this simple grammar for arithmetic expressions: - -@verbatim|{ -#lang brag -expr : term ('+' term)* -term : factor ('*' factor)* -factor : ("0" | "1" | "2" | "3" - | "4" | "5" | "6" | "7" - | "8" | "9")+ -}| - -If we use it to parse this string: - -@verbatim|{1+2*3}| - -We get this parse tree: - -@racketblock['(expr (term (factor "1")) "+" (term (factor "2") "*" (factor "3")))] - -Suppose we felt the @litchar{+} and @litchar{*} characters were superfluous. We can add cuts to the grammar by prefixing these pattern elements with @litchar{/}: - -@verbatim|{ -#lang brag -expr : term (/'+' term)* -term : factor (/'*' factor)* -factor : ("0" | "1" | "2" | "3" - | "4" | "5" | "6" | "7" - | "8" | "9")+ -}| - -Our parse tree changes accordingly: - -@racketblock['(expr (term (factor "1")) (term (factor "2") (factor "3")))] - -Now suppose we apply a cut on the rule name, @racket[factor]: - -@verbatim|{ -#lang brag -expr : term (/'+' term)* -term : factor (/'*' factor)* -/factor : ("0" | "1" | "2" | "3" - | "4" | "5" | "6" | "7" - | "8" | "9")+ -}| - -This time, the rule name disppears from the parse tree, but its nodes and elements remain: - -@racketblock['(expr (term ("1")) (term ("2") ("3")))] - -A @deftech{splice} in a grammar will merge the elements of a node into the surrounding node. A splice is notated by prefixing either the left-hand rule name or a right-hand pattern element with an at sign @litchar|{@}|. - -If the splice is applied to a left-hand rule name, then the splice is applied every time the rule is used in the parse tree. - -If the splice is applied to a right-hand pattern element, that element is spliced only when it appears as part of the production for that rule. - -Suppose we remove the cut from the @racket[factor] rule name and instead splice the second appearance of @racket[factor] in the pattern for the @racket[term] rule: - -@verbatim|{ -#lang brag -expr : term (/'+' term)* -term : factor (/'*' @factor)* -factor : ("0" | "1" | "2" | "3" - | "4" | "5" | "6" | "7" - | "8" | "9")+ -}| - -The @racket[factor] elements matching the first position of the @racket[term] pattern remain as they were, but the @racket[factor] element matching the second position is spliced into the surrounding node: - -@racketblock['(expr (term (factor "1")) (term (factor "2") "3"))] - -Finally, suppose we add a splice to the @racket[term] rule name: - -@verbatim|{ -#lang brag -expr : term (/'+' term)* -@term : factor (/'*' @factor)* -factor : ("0" | "1" | "2" | "3" - | "4" | "5" | "6" | "7" - | "8" | "9")+ -}| - -This time, all the appearances of @racket[term] nodes in the parse tree will have their elements spliced into the surrounding nodes: - -@racketblock['(expr (factor "1") (factor "2") "3")] - - -As a convenience, when a grammar element is spliced, or a rule name is cut, @racket[brag] preserves the rule name by adding it as a syntax property to the residual elements, using the rule name as a key, and the original syntax object representing the rule name as the value. - - -@subsection{Syntax errors} - -Besides the basic syntax errors that can occur with a malformed grammar, there -are a few other classes of situations that @litchar{#lang brag} will consider -as syntax errors. - -@tt{brag} will raise a syntax error if the grammar: -@itemize[ - @item{doesn't have any rules.} - - @item{has a rule with the same left hand side as any other rule.} - - @item{refers to rules that have not been defined. e.g. the - following program: - @nested[#:style 'code-inset - @verbatim|{ - #lang brag - foo: [bar] - }| - ] - should raise an error because @tt{bar} has not been defined, even though - @tt{foo} refers to it in an @tech{optional pattern}.} - - - @item{uses the token name @racket[EOF]; the end-of-file token type is reserved - for internal use by @tt{brag}.} - - - @item{contains a rule that has no finite derivation. e.g. the following - program: - @nested[#:style 'code-inset - @verbatim|{ - #lang brag - infinite-a: "a" infinite-a - }| - ] - should raise an error because no finite sequence of tokens will satisfy - @tt{infinite-a}.} - - ] - -Otherwise, @tt{brag} should be fairly tolerant and permit even ambiguous -grammars. - -@subsection{Semantics} -@declare-exporting[brag/examples/nested-word-list] - -A program written in @litchar{#lang brag} produces a module that provides a few -bindings. The most important of these is @racket[parse]: - -@defproc[(parse [source any/c #f] - [token-source (or/c (sequenceof token) - (-> token))]) - syntax?]{ - - Parses the sequence of @tech{tokens} according to the rules in the grammar, using the - first rule as the start production. The parse must completely consume - @racket[token-source]. - - The @deftech{token source} can either be a sequence, or a 0-arity function that - produces @tech{tokens}. - - A @deftech{token} in @tt{brag} can be any of the following values: - @itemize[ - @item{a string} - @item{a symbol} - @item{an instance produced by @racket[token]} - @item{an instance produced by the token constructors of @racketmodname[br-parser-tools/lex]} - @item{an instance of @racketmodname[br-parser-tools/lex]'s @racket[position-token] whose - @racket[position-token-token] is a @tech{token}.} - ] - - A token whose type is either @racket[void] or @racket['EOF] terminates the - source. - - - If @racket[parse] succeeds, it will return a structured syntax object. The - structure of the syntax object follows the overall structure of the rules in - the BNF grammar. For each rule @racket[r] and its associated pattern @racket[p], - @racket[parse] generates a syntax object @racket[#'(r p-value)] where - @racket[p-value]'s structure follows a case analysis on @racket[p]: - - @itemize[ - @item{For implicit and explicit sequences of @tech{pattern}s @racket[p1], - @racket[p2], ..., the corresponding values, spliced into the - structure.} - @item{For terminals, the value of the token.} - @item{For @tech{rule identifier}s: the associated parse value for the rule.} - @item{For @tech{choice pattern}s: the associated parse value for one of the matching subpatterns.} - @item{For @tech{quantifed pattern}s and @tech{optional pattern}s: the corresponding values, spliced into the structure.} - ] - - Consequently, it's only the presence of @tech{rule identifier}s in a rule's - pattern that informs the parser to introduces nested structure into the syntax - object. - - - If the grammar is ambiguous, @tt{brag} will choose one of the possible parse results, though it doesn't guarantee which. - - - If the parse cannot be performed successfully, or if a token in the - @racket[token-source] uses a type that isn't mentioned in the grammar, then - @racket[parse] raises an instance of @racket[exn:fail:parsing].} - - -@defproc[(parse-to-datum [source any/c #f] - [token-source (or/c (sequenceof token) - (-> token))]) - list?]{ - Same as @racket[parse], but the result is converted into a plain datum. Useful for testing or debugging a parser. -} - - -@defform[#:id make-rule-parser - (make-rule-parser name)]{ - Constructs a parser for the @racket[name] of one of the non-terminals - in the grammar. - - For example, given the @tt{brag} program - @filepath{simple-arithmetic-grammar.rkt}: - @filebox["simple-arithmetic-grammar.rkt"]{ - @verbatim|{ - #lang brag - expr : term ('+' term)* - term : factor ('*' factor)* - factor : INT - }| - } - the following interaction shows how to extract a parser for @racket[term]s. - @interaction[#:eval my-eval - @eval:alts[(require "simple-arithmetic-grammar.rkt") - (require brag/examples/simple-arithmetic-grammar)] - (define term-parse (make-rule-parser term)) - (define tokens (list (token 'INT 3) - "*" - (token 'INT 4))) - (syntax->datum (parse tokens)) - (syntax->datum (term-parse tokens)) - - (define another-token-sequence - (list (token 'INT 1) "+" (token 'INT 2) - "*" (token 'INT 3))) - (syntax->datum (parse another-token-sequence)) - @code:comment{Note that term-parse will break on another-token-sequence} - @code:comment{as it does not know what to do with the "+"} - (term-parse another-token-sequence) - ] - -} - - -@defthing[all-token-types (setof symbol?)]{ - A set of all the token types used in a grammar. - - For example: - @interaction[#:eval my-eval - @eval:alts[(require "simple-arithmetic-grammar.rkt") - (require brag/examples/simple-arithmetic-grammar)] - all-token-types - ] - -} - - - -@section{Support API} - -@defmodule[brag/support] - -The @racketmodname[brag/support] module provides functions to interact with -@tt{brag} programs. The most useful is the @racket[token] function, which -produces tokens to be parsed. - -In addition to the exports shown below, the @racketmodname[brag/support] module also provides everything from @racketmodname[brag/support], and everything from @racketmodname[br-parser-tools/lex]. - - -@defproc[(token [type (or/c string? symbol?)] - [val any/c #f] - [#:line line (or/c positive-integer? #f) #f] - [#:column column (or/c natural-number? #f) #f] - [#:position position (or/c positive-integer? #f) #f] - [#:span span (or/c natural-number? #f) #f] - [#:skip? skip? boolean? #f] - ) - token-struct?]{ - Creates instances of @racket[token-struct]s. - - The syntax objects produced by a parse will inject the value @racket[val] in - place of the token name in the grammar. - - If @racket[#:skip?] is true, then the parser will skip over it during a - parse.} - - -@defstruct[token-struct ([type symbol?] - [val any/c] - [position (or/c positive-integer? #f)] - [line (or/c natural-number? #f)] - [column (or/c positive-integer? #f)] - [span (or/c natural-number? #f)] - [skip? boolean?]) - #:transparent]{ - The token structure type. - - Rather than directly using the @racket[token-struct] constructor, please use - the helper function @racket[token] to construct instances. -} - - - - -@defstruct[(exn:fail:parsing exn:fail) - ([message string?] - [continuation-marks continuation-mark-set?] - [srclocs (listof srcloc?)])]{ - The exception raised when parsing fails. - - @racket[exn:fail:parsing] implements Racket's @racket[prop:exn:srcloc] - property, so if this exception reaches DrRacket's default error handler, - DrRacket should highlight the offending locations in the source.} - - - -@defproc[(apply-tokenizer-maker [tokenizer-maker procedure?] - [source (or/c string? - input-port?)]) - list?]{ - Repeatedly apply @racket[tokenizer-maker] to @racket[source], gathering the resulting tokens into a list. @racket[source] can be a string or an input port. Useful for testing or debugging a tokenizer. -} - -@defproc[(apply-lexer [lexer procedure?] - [source (or/c string? - input-port?)]) - list?]{ - Repeatedly apply @racket[lexer] to @racket[source], gathering the resulting tokens into a list. @racket[source] can be a string or an input port. Useful for testing or debugging a lexer. -} - - -@defproc[(trim-ends [left-str string?] - [str string?] - [right-str string?]) - string?]{ - Remove @racket[left-str] from the left side of @racket[str], and @racket[right-str] from its right side. Intended as a helper function for @racket[from/to]. -} - - -@defform[(:* re ...)]{ - - Repetition of @racket[re] sequence 0 or more times.} - -@defform[(:+ re ...)]{ - - Repetition of @racket[re] sequence 1 or more times.} - -@defform[(:? re ...)]{ - - Zero or one occurrence of @racket[re] sequence.} - -@defform[(:= n re ...)]{ - - Exactly @racket[n] occurrences of @racket[re] sequence, where - @racket[n] must be a literal exact, non-negative number.} - -@defform[(:>= n re ...)]{ - - At least @racket[n] occurrences of @racket[re] sequence, where - @racket[n] must be a literal exact, non-negative number.} - -@defform[(:** n m re ...)]{ - - Between @racket[n] and @racket[m] (inclusive) occurrences of - @racket[re] sequence, where @racket[n] must be a literal exact, - non-negative number, and @racket[m] must be literally either - @racket[#f], @racket[+inf.0], or an exact, non-negative number; a - @racket[#f] value for @racket[m] is the same as @racket[+inf.0].} - -@defform[(:or re ...)]{ - - Same as @racket[(union re ...)].} - -@deftogether[( - @defform[(:: re ...)] - @defform[(:seq re ...)] - )]{ - - Both forms concatenate the @racket[re]s.} - -@defform[(:& re ...)]{ - - Intersects the @racket[re]s.} - -@defform[(:- re ...)]{ - - The set difference of the @racket[re]s.} - -@defform[(:~ re ...)]{ - - Character-set complement, which each @racket[re] must match exactly - one character.} - -@defform[(:/ char-or-string ...)]{ - - Character ranges, matching characters between successive pairs of - characters.} - -@defform[(from/to open close)]{ - - A string that is bounded by @racket[open] and @racket[close]. Matching is non-greedy (meaning, it stops at the first occurence of @racket[close]). The resulting lexeme includes @racket[open] and @racket[close]. To remove them, see @racket[trim-ends].} - -@defform[(from/stop-before open close)]{ - - Like @racket[from/to], a string that is bounded by @racket[open] and @racket[close], except that @racket[close] is not included in the resulting lexeme. Matching is non-greedy (meaning, it stops at the first occurence of @racket[close]).} - - -@close-eval[my-eval] diff --git a/brag/brag/cfg-parser/cfg-parser.rkt b/brag/brag/cfg-parser/cfg-parser.rkt deleted file mode 100755 index 1bc717b..0000000 --- a/brag/brag/cfg-parser/cfg-parser.rkt +++ /dev/null @@ -1,921 +0,0 @@ -#lang racket/base -;; This module implements a parser form like the br-parser-tools's -;; `parser', except that it works on an arbitrary CFG (returning -;; the first sucecssful parse). - -;; I'm pretty sure that this is an implementation of Earley's -;; algorithm. - -;; To a first approximation, it's a backtracking parser. Alternative -;; for a non-terminal are computed in parallel, and multiple attempts -;; to compute the same result block until the first one completes. If -;; you get into deadlock, such as when trying to match -;; := -;; then it means that there's no successful parse, so everything -;; that's blocked fails. - -;; A cache holds the series of results for a particular non-terminal -;; at a particular starting location. (A series is used, instead of a -;; sinlge result, for backtracking.) Otherwise, the parser uses -;; backtracking search. Backtracking is implemented through explicit -;; success and failure continuations. Multiple results for a -;; particular nonterminal and location are kept only when they have -;; different lengths. (Otherwise, in the spirit of finding one -;; successful parse, only the first result is kept.) - -;; The br-parser-tools's `parse' is used to transform tokens in the -;; grammar to tokens specific to this parser. In other words, this -;; parser uses `parser' so that it doesn't have to know anything about -;; tokens. -;; - - - -(require br-parser-tools/yacc - br-parser-tools/lex) - -(require (for-syntax racket/base - syntax/boundmap - br-parser-tools/private-lex/token-syntax)) - -(provide cfg-parser) - -;; A raw token, wrapped so that we can recognize it: -(define-struct tok (name orig-name val start end)) - -;; Represents the thread scheduler: -(define-struct tasks (active active-back waits multi-waits cache progress?)) - -(define-for-syntax make-token-identifier-mapping make-hasheq) -(define-for-syntax token-identifier-mapping-get - (case-lambda - [(t tok) - (hash-ref t (syntax-e tok))] - [(t tok fail) - (hash-ref t (syntax-e tok) fail)])) -(define-for-syntax token-identifier-mapping-put! - (lambda (t tok v) - (hash-set! t (syntax-e tok) v))) -(define-for-syntax token-identifier-mapping-map - (lambda (t f) - (hash-map t f))) - -;; Used to calculate information on the grammar, such as whether -;; a particular non-terminal is "simple" instead of recursively defined. -(define-for-syntax (nt-fixpoint nts proc nt-ids patss) - (define (ormap-all val f as bs) - (cond - [(null? as) val] - [else (ormap-all (or (f (car as) (car bs)) val) - f - (cdr as) (cdr bs))])) - (let loop () - (when (ormap-all #f - (lambda (nt pats) - (let ([old (bound-identifier-mapping-get nts nt)]) - (let ([new (proc nt pats old)]) - (if (equal? old new) - #f - (begin - (bound-identifier-mapping-put! nts nt new) - #t))))) - nt-ids patss) - (loop)))) - -;; Tries parse-a followed by parse-b. If parse-a is not simple, -;; then after parse-a succeeds once, we parallelize parse-b -;; and trying a second result for parse-a. -(define (parse-and simple-a? parse-a parse-b - stream last-consumed-token depth end success-k fail-k - max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream last-consumed-token depth max-depth tasks next1-k) - (if simple-a? - (parse-b val stream last-consumed-token depth end - (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks) - (parallel-or - (lambda (success-k fail-k max-depth tasks) - (parse-b val stream last-consumed-token depth end - success-k fail-k - max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k max-depth tasks)) - success-k fail-k max-depth tasks))))] - [mk-got2-k - (lambda (success-k fail-k next1-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (success-k val stream last-consumed-token depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got2-k success-k fail-k next1-k) - (mk-fail2-k success-k fail-k next1-k) - max-depth tasks)))))] - [mk-fail2-k - (lambda (success-k fail-k next1-k) - (lambda (max-depth tasks) - (next1-k (mk-got-k success-k fail-k) - fail-k - max-depth - tasks)))]) - (parse-a stream last-consumed-token depth end - (mk-got-k success-k fail-k) - fail-k - max-depth tasks))) - -;; Parallel or for non-terminal alternatives -(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks) - (parallel-or (lambda (success-k fail-k max-depth tasks) - (parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks)) - (lambda (success-k fail-k max-depth tasks) - (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)) - success-k fail-k max-depth tasks)) - -;; Generic parallel-or -(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks) - (define answer-key (gensym)) - (letrec ([gota-k - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (report-answer answer-key - max-depth - tasks - (list val stream last-consumed-token depth next-k)))] - [faila-k - (lambda (max-depth tasks) - (report-answer answer-key - max-depth - tasks - null))]) - (let* ([tasks (queue-task - tasks - (lambda (max-depth tasks) - (parse-a gota-k - faila-k - max-depth tasks)))] - [tasks (queue-task - tasks - (lambda (max-depth tasks) - (parse-b gota-k - faila-k - max-depth tasks)))] - [queue-next (lambda (next-k tasks) - (queue-task tasks - (lambda (max-depth tasks) - (next-k gota-k - faila-k - max-depth tasks))))]) - (letrec ([mk-got-one - (lambda (immediate-next? get-nth success-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (let ([tasks (if immediate-next? - (queue-next next-k tasks) - tasks)]) - (success-k val stream last-consumed-token depth max-depth - tasks - (lambda (success-k fail-k max-depth tasks) - (let ([tasks (if immediate-next? - tasks - (queue-next next-k tasks))]) - (get-nth max-depth tasks success-k fail-k)))))))] - [get-first - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #t get-first success-k) - (lambda (max-depth tasks) - (get-second max-depth tasks success-k fail-k)) - #f))] - [get-second - (lambda (max-depth tasks success-k fail-k) - (wait-for-answer #f max-depth tasks answer-key - (mk-got-one #f get-second success-k) - fail-k #f))]) - (get-first max-depth tasks success-k fail-k))))) - -;; Non-terminal alternatives where the first is "simple" can be done -;; sequentially, which is simpler -(define (parse-or parse-a parse-b - stream last-consumed-token depth end success-k fail-k max-depth tasks) - (letrec ([mk-got-k - (lambda (success-k fail-k) - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - (success-k val stream last-consumed-token depth - max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (next-k (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks)))))] - [mk-fail-k - (lambda (success-k fail-k) - (lambda (max-depth tasks) - (parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))]) - (parse-a stream last-consumed-token depth end - (mk-got-k success-k fail-k) - (mk-fail-k success-k fail-k) - max-depth tasks))) - -;; Starts a thread -(define queue-task - (lambda (tasks t [progress? #t]) - (make-tasks (tasks-active tasks) - (cons t (tasks-active-back tasks)) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - (or progress? (tasks-progress? tasks))))) - -;; Reports an answer to a waiting thread: -(define (report-answer answer-key max-depth tasks val) - (let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))]) - (if v - (let ([tasks (make-tasks (cons (v val) - (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (hash-remove! (tasks-waits tasks) answer-key) - (swap-task max-depth tasks)) - ;; We have an answer ready too fast; wait - (swap-task max-depth - (queue-task tasks - (lambda (max-depth tasks) - (report-answer answer-key max-depth tasks val)) - #f))))) - -;; Reports an answer to multiple waiting threads: -(define (report-answer-all answer-key max-depth tasks val k) - (let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))]) - (hash-remove! (tasks-multi-waits tasks) answer-key) - (let ([tasks (make-tasks (append (map (lambda (a) (a val)) v) - (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (k max-depth tasks)))) - -;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise -;; there might be many. Use wither #t or #f (and `report-answer' or -;; `report-answer-all', resptively) consistently for a particular answer key. -(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k) - (let ([wait (lambda (val) - (lambda (max-depth tasks) - (if val - (if (null? val) - (fail-k max-depth tasks) - (let-values ([(val stream last-consumed-token depth next-k) (apply values val)]) - (success-k val stream last-consumed-token depth max-depth tasks next-k))) - (deadlock-k max-depth tasks))))]) - (if multi? - (hash-set! (tasks-multi-waits tasks) answer-key - (cons wait (hash-ref (tasks-multi-waits tasks) answer-key - (lambda () null)))) - (hash-set! (tasks-waits tasks) answer-key wait)) - (let ([tasks (make-tasks (tasks-active tasks) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #t)]) - (swap-task max-depth tasks)))) - -;; Swap thread -(define (swap-task max-depth tasks) - ;; Swap in first active: - (if (null? (tasks-active tasks)) - (if (tasks-progress? tasks) - (swap-task max-depth - (make-tasks (reverse (tasks-active-back tasks)) - null - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - #f)) - ;; No progress, so issue failure for all multi-waits - (if (zero? (hash-count (tasks-multi-waits tasks))) - (error 'swap-task "Deadlock") - (swap-task max-depth - (make-tasks (apply - append - (hash-map (tasks-multi-waits tasks) - (lambda (k l) - (map (lambda (v) (v #f)) l)))) - (tasks-active-back tasks) - (tasks-waits tasks) - (make-hasheq) - (tasks-cache tasks) - #t)))) - (let ([t (car (tasks-active tasks))] - [tasks (make-tasks (cdr (tasks-active tasks)) - (tasks-active-back tasks) - (tasks-waits tasks) - (tasks-multi-waits tasks) - (tasks-cache tasks) - (tasks-progress? tasks))]) - (t max-depth tasks)))) - -;; Finds the symbolic representative of a token class -(define-for-syntax (map-token toks tok) - (car (token-identifier-mapping-get toks tok))) - -(define no-pos-val (make-position #f #f #f)) -(define-for-syntax no-pos - (let ([npv ((syntax-local-certifier) #'no-pos-val)]) - (lambda (stx) npv))) -(define-for-syntax at-tok-pos - (lambda (sel expr) - (lambda (stx) - #`(let ([v #,expr]) (if v (#,sel v) no-pos-val))))) - -;; Builds a matcher for a particular alternative -(define-for-syntax (build-match nts toks pat handle $ctx) - (let loop ([pat pat] - [pos 1]) - (if (null? pat) - #`(success-k #,handle stream last-consumed-token depth max-depth tasks - (lambda (success-k fail-k max-depth tasks) - (fail-k max-depth tasks))) - (let ([id (datum->syntax (car pat) - (string->symbol (format "$~a" pos)))] - [id-start-pos (datum->syntax (car pat) - (string->symbol (format "$~a-start-pos" pos)))] - [id-end-pos (datum->syntax (car pat) - (string->symbol (format "$~a-end-pos" pos)))] - [n-end-pos (and (null? (cdr pat)) - (datum->syntax (car pat) '$n-end-pos))]) - (cond - [(bound-identifier-mapping-get nts (car pat) (lambda () #f)) - ;; Match non-termimal - #`(parse-and - ;; First part is simple? (If so, we don't have to parallelize the `and'.) - #,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))]) - (or (not l) - (andmap values (caddr l)))) - #,(car pat) - (let ([original-stream stream]) - (lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks) - (let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream) - tok-end - tok-start) - #'(if (eq? original-stream stream) - last-consumed-token - (and (pair? original-stream) - (car original-stream))))] - [#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)] - #,@(if n-end-pos - #`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]) - null)) - #,(loop (cdr pat) (add1 pos))))) - stream last-consumed-token depth - #,(let ([cnt (apply + - (map (lambda (item) - (cond - [(bound-identifier-mapping-get nts item (lambda () #f)) - => (lambda (l) (car l))] - [else 1])) - (cdr pat)))]) - #`(- end #,cnt)) - success-k fail-k max-depth tasks)] - [else - ;; Match token - (let ([tok-id (map-token toks (car pat))]) - #`(if (and (pair? stream) - (eq? '#,tok-id (tok-name (car stream)))) - (let* ([stream-a (car stream)] - [#,id (tok-val stream-a)] - [last-consumed-token (car stream)] - [stream (cdr stream)] - [depth (add1 depth)]) - (let ([max-depth (max max-depth depth)]) - (let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)] - [#,id-end-pos (at-tok-pos #'tok-end #'stream-a)] - #,@(if n-end-pos - #`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)]) - null)) - #,(loop (cdr pat) (add1 pos))))) - (fail-k max-depth tasks)))]))))) - -;; Starts parsing to match a non-terminal. There's a minor -;; optimization that checks for known starting tokens. Otherwise, -;; use the cache, block if someone else is already trying the match, -;; and cache the result if it's computed. -;; The cache maps nontermial+startingpos+iteration to a result, where -;; the iteration is 0 for the first match attempt, 1 for the second, -;; etc. -(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k) - (if (and (positive? min-cnt) - (pair? stream) - (not (memq (tok-name (car stream)) init-tokens))) - ;; No such leading token; give up - (fail-k max-depth tasks) - ;; Run pattern - (let loop ([n 0] - [success-k success-k] - [fail-k fail-k] - [max-depth max-depth] - [tasks tasks] - [k k]) - (let ([answer-key (gensym)] - [table-key (vector key depth n)] - [old-depth depth] - [old-stream stream]) - #;(printf "Loop ~a\n" table-key) - (cond - [(hash-ref (tasks-cache tasks) table-key (lambda () #f)) - => (lambda (result) - #;(printf "Reuse ~a\n" table-key) - (result success-k fail-k max-depth tasks))] - [else - #;(printf "Try ~a ~a\n" table-key (map tok-name stream)) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - #;(printf "Wait ~a ~a\n" table-key answer-key) - (wait-for-answer #t max-depth tasks answer-key success-k fail-k - (lambda (max-depth tasks) - #;(printf "Deadlock ~a ~a\n" table-key answer-key) - (fail-k max-depth tasks))))) - (let result-loop ([max-depth max-depth][tasks tasks][k k]) - (letrec ([orig-stream stream] - [new-got-k - (lambda (val stream last-consumed-token depth max-depth tasks next-k) - ;; Check whether we already have a result that consumed the same amount: - (let ([result-key (vector #f key old-depth depth)]) - (cond - [(hash-ref (tasks-cache tasks) result-key (lambda () #f)) - ;; Go for the next-result - (result-loop max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks)))] - [else - #;(printf "Success ~a ~a\n" table-key - (map tok-name (let loop ([d old-depth][s old-stream]) - (if (= d depth) - null - (cons (car s) (loop (add1 d) (cdr s))))))) - (let ([next-k (lambda (success-k fail-k max-depth tasks) - (loop (add1 n) - success-k - fail-k - max-depth - tasks - (lambda (end max-depth tasks success-k fail-k) - (next-k success-k fail-k max-depth tasks))))]) - (hash-set! (tasks-cache tasks) result-key #t) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (success-k val stream last-consumed-token depth max-depth tasks next-k))) - (report-answer-all answer-key - max-depth - tasks - (list val stream last-consumed-token depth next-k) - (lambda (max-depth tasks) - (success-k val stream last-consumed-token depth max-depth tasks next-k))))])))] - [new-fail-k - (lambda (max-depth tasks) - #;(printf "Failure ~a\n" table-key) - (hash-set! (tasks-cache tasks) table-key - (lambda (success-k fail-k max-depth tasks) - (fail-k max-depth tasks))) - (report-answer-all answer-key - max-depth - tasks - null - (lambda (max-depth tasks) - (fail-k max-depth tasks))))]) - (k end max-depth tasks new-got-k new-fail-k)))]))))) - -(define-syntax (cfg-parser stx) - (syntax-case stx () - [(_ clause ...) - (let ([clauses (syntax->list #'(clause ...))]) - (let-values ([(start grammar cfg-error parser-clauses src-pos?) - (let ([all-toks (apply - append - (map (lambda (clause) - (syntax-case clause (tokens) - [(tokens t ...) - (apply - append - (map (lambda (t) - (let ([v (syntax-local-value t (lambda () #f))]) - (cond - [(terminals-def? v) - (map (lambda (v) - (cons v #f)) - (syntax->list (terminals-def-t v)))] - [(e-terminals-def? v) - (map (lambda (v) - (cons v #t)) - (syntax->list (e-terminals-def-t v)))] - [else null]))) - (syntax->list #'(t ...))))] - [_else null])) - clauses))] - [all-end-toks (apply - append - (map (lambda (clause) - (syntax-case clause (end) - [(end t ...) - (syntax->list #'(t ...))] - [_else null])) - clauses))]) - (let loop ([clauses clauses] - [cfg-start #f] - [cfg-grammar #f] - [cfg-error #f] - [src-pos? #f] - [parser-clauses null]) - (if (null? clauses) - (values cfg-start - cfg-grammar - cfg-error - (reverse parser-clauses) - src-pos?) - (syntax-case (car clauses) (start error grammar src-pos) - [(start tok) - (loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)] - [(error expr) - (loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)] - [(grammar [nt [pat handle0 handle ...] ...] ...) - (let ([nts (make-bound-identifier-mapping)] - [toks (make-token-identifier-mapping)] - [end-toks (make-token-identifier-mapping)] - [nt-ids (syntax->list #'(nt ...))] - [patss (map (lambda (stx) - (map syntax->list (syntax->list stx))) - (syntax->list #'((pat ...) ...)))]) - (for-each (lambda (nt) - (bound-identifier-mapping-put! nts nt (list 0))) - nt-ids) - (for-each (lambda (t) - (token-identifier-mapping-put! end-toks t #t)) - all-end-toks) - (for-each (lambda (t) - (unless (token-identifier-mapping-get end-toks (car t) (lambda () #f)) - (let ([id (gensym (syntax-e (car t)))]) - (token-identifier-mapping-put! toks (car t) - (cons id (cdr t)))))) - all-toks) - ;; Compute min max size for each non-term: - (nt-fixpoint - nts - (lambda (nt pats old-list) - (let ([new-cnt - (apply - min - (map (lambda (pat) - (apply - + - (map (lambda (elem) - (car - (bound-identifier-mapping-get nts - elem - (lambda () (list 1))))) - pat))) - pats))]) - (if (new-cnt . > . (car old-list)) - (cons new-cnt (cdr old-list)) - old-list))) - nt-ids patss) - ;; Compute set of toks that must appear at the beginning - ;; for a non-terminal - (nt-fixpoint - nts - (lambda (nt pats old-list) - (let ([new-list - (apply - append - (map (lambda (pat) - (let loop ([pat pat]) - (if (pair? pat) - (let ([l (bound-identifier-mapping-get - nts - (car pat) - (lambda () - (list 1 (map-token toks (car pat)))))]) - ;; If the non-terminal can match 0 things, - ;; then it might match something from the - ;; next pattern element. Otherwise, it must - ;; match the first element: - (if (zero? (car l)) - (append (cdr l) (loop (cdr pat))) - (cdr l))) - null))) - pats))]) - (let ([new (filter (lambda (id) - (andmap (lambda (id2) - (not (eq? id id2))) - (cdr old-list))) - new-list)]) - (if (pair? new) - ;; Drop dups in new list: - (let ([new (let loop ([new new]) - (if (null? (cdr new)) - new - (if (ormap (lambda (id) - (eq? (car new) id)) - (cdr new)) - (loop (cdr new)) - (cons (car new) (loop (cdr new))))))]) - (cons (car old-list) (append new (cdr old-list)))) - old-list)))) - nt-ids patss) - ;; Determine left-recursive clauses: - (for-each (lambda (nt pats) - (let ([l (bound-identifier-mapping-get nts nt)]) - (bound-identifier-mapping-put! nts nt (list (car l) - (cdr l) - (map (lambda (x) #f) pats))))) - nt-ids patss) - (nt-fixpoint - nts - (lambda (nt pats old-list) - (list (car old-list) - (cadr old-list) - (map (lambda (pat simple?) - (or simple? - (let ([l (map (lambda (elem) - (bound-identifier-mapping-get - nts - elem - (lambda () #f))) - pat)]) - (andmap (lambda (i) - (or (not i) - (andmap values (caddr i)))) - l)))) - pats (caddr old-list)))) - nt-ids patss) - ;; Build a definition for each non-term: - (loop (cdr clauses) - cfg-start - (map (lambda (nt pats handles $ctxs) - (define info (bound-identifier-mapping-get nts nt)) - (list nt - #`(let ([key (gensym '#,nt)]) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - (parse-nt/share - key #,(car info) '#,(cadr info) stream last-consumed-token depth end - max-depth tasks - success-k fail-k - (lambda (end max-depth tasks success-k fail-k) - #,(let loop ([pats pats] - [handles (syntax->list handles)] - [$ctxs (syntax->list $ctxs)] - [simple?s (caddr info)]) - (if (null? pats) - #'(fail-k max-depth tasks) - #`(#,(if (or (null? (cdr pats)) - (car simple?s)) - #'parse-or - #'parse-parallel-or) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - #,(build-match nts - toks - (car pats) - (car handles) - (car $ctxs))) - (lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks) - #,(loop (cdr pats) - (cdr handles) - (cdr $ctxs) - (cdr simple?s))) - stream last-consumed-token depth end success-k fail-k max-depth tasks))))))))) - nt-ids - patss - (syntax->list #'(((begin handle0 handle ...) ...) ...)) - (syntax->list #'((handle0 ...) ...))) - cfg-error - src-pos? - (list* - (with-syntax ([((tok tok-id . $e) ...) - (token-identifier-mapping-map toks - (lambda (k v) - (list* k - (car v) - (if (cdr v) - #f - '$1))))] - [(pos ...) - (if src-pos? - #'($1-start-pos $1-end-pos) - #'(#f #f))]) - #`(grammar (start [() null] - [(atok start) (cons $1 $2)]) - (atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...))) - #`(start start) - parser-clauses)))] - [(grammar . _) - (raise-syntax-error - #f - "bad grammar clause" - stx - (car clauses))] - [(src-pos) - (loop (cdr clauses) - cfg-start - cfg-grammar - cfg-error - #t - (cons (car clauses) parser-clauses))] - [_else - (loop (cdr clauses) - cfg-start - cfg-grammar - cfg-error - src-pos? - (cons (car clauses) parser-clauses))]))))]) - #`(let ([orig-parse (parser - [error (lambda (a b c) - (error 'cfg-parser "unexpected ~a token: ~a" b c))] - . #,parser-clauses)] - [error-proc #,cfg-error]) - (letrec #,grammar - (lambda (get-tok) - (let ([tok-list (orig-parse get-tok)]) - (letrec ([success-k - (lambda (val stream last-consumed-token depth max-depth tasks next) - (if (null? stream) - val - (next success-k fail-k max-depth tasks)))] - [fail-k (lambda (max-depth tasks) - (cond - [(null? tok-list) - (if error-proc - (error-proc #t - 'no-tokens - #f - (make-position #f #f #f) - (make-position #f #f #f)) - (error - 'cfg-parse - "no tokens"))] - [else - (let ([bad-tok (list-ref tok-list - (min (sub1 (length tok-list)) - max-depth))]) - (if error-proc - (error-proc #t - (tok-orig-name bad-tok) - (tok-val bad-tok) - (tok-start bad-tok) - (tok-end bad-tok)) - (error - 'cfg-parse - "failed at ~a" - (tok-val bad-tok))))]))]) - (#,start tok-list - ;; we simulate a token at the very beginning with zero width - ;; for use with the position-generating code (*-start-pos, *-end-pos). - (if (null? tok-list) - (tok #f #f #f - (position 1 - #,(if src-pos? #'1 #'#f) - #,(if src-pos? #'0 #'#f)) - (position 1 - #,(if src-pos? #'1 #'#f) - #,(if src-pos? #'0 #'#f))) - (tok (tok-name (car tok-list)) - (tok-orig-name (car tok-list)) - (tok-val (car tok-list)) - (tok-start (car tok-list)) - (tok-start (car tok-list)))) - 0 - (length tok-list) - success-k - fail-k - 0 - (make-tasks null null - (make-hasheq) (make-hasheq) - (make-hash) #t)))))))))])) - - -(module* test racket/base - (require (submod "..") - br-parser-tools/lex - racket/block - rackunit) - - ;; Test: parsing regular expressions. - ;; Here is a test case on locations: - (block - (define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF)) - (define lex (lexer-src-pos ["|" (token-OR lexeme)] - ["^" (token-ANCHOR lexeme)] - ["*" (token-STAR lexeme)] - [(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)] - ["(" (token-LPAREN lexeme)] - [")" (token-RPAREN lexeme)] - [whitespace (return-without-pos (lex input-port))] - [(eof) (token-EOF 'eof)])) - (define -parse (cfg-parser - (tokens regexp-tokens) - (start top) - (end EOF) - (src-pos) - (grammar [top [(maybe-anchor regexp) - (cond [$1 - `(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] - [else - `(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]] - [maybe-anchor [(ANCHOR) #t] - [() #f]] - [regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))] - [(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] - [(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))] - [(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]]))) - (define (pos->sexp pos) - (position-offset pos)) - - (define (parse s) - (define ip (open-input-string s)) - (port-count-lines! ip) - (-parse (lambda () (lex ip)))) - - (check-equal? (parse "abc") - '(unanchored (lit "abc" 1 4) 1 4)) - (check-equal? (parse "a | (b*) | c") - '(unanchored (or (or (lit "a" 1 2) - (group (star (lit "b" 6 7) 6 8) 5 9) - 1 9) - (lit "c" 12 13) - 1 13) - 1 13))) - - - - - - ;; Tests used during development - (define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF)) - - (define lex - (lexer - ["+" (token-PLUS '+)] - ["-" (token-MINUS '-)] - ["*" (token-STAR '*)] - ["|" (token-BAR '||)] - [":" (token-COLON '|:|)] - [whitespace (lex input-port)] - [(eof) (token-EOF 'eof)])) - - (define parse - (cfg-parser - (tokens non-terminals) - (start ) - (end EOF) - (error (lambda (a b stx) - (error 'parse "failed at ~s" stx))) - (grammar [ [(PLUS) "plus"] - [( BAR ) (list $1 $2 $3)] - [( COLON) (list $1)]] - [ [(MINUS) "minus"] - [( STAR) (cons $1 $2)]] - [ [( MINUS) "yes"]] - [ [(PLUS) 'plus] - [(MINUS) 'minus]] - [ [() '0] - [( PLUS) (add1 $1)] - [( PLUS) (add1 $1)]]))) - - (let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**" - #;"+*|+**|-" #;"-|-*|-|-*" - #;"-|-*|-|-**|-|-*|-|-**" - "-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-*** - |-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****| - -|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****" - ;; This one fails: - #;"+*")]) - (check-equal? (parse (lambda () (lex p))) - '((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *) - || - (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *)) - . - *) - || - (((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *) - || - (((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *) - || - (((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)) - . - *)) - . - *))))) diff --git a/brag/brag/codegen/codegen.rkt b/brag/brag/codegen/codegen.rkt deleted file mode 100755 index f8f76cd..0000000 --- a/brag/brag/codegen/codegen.rkt +++ /dev/null @@ -1,448 +0,0 @@ -#lang racket/base - -(require (for-template racket/base) - racket/list - racket/set - racket/syntax - syntax/srcloc - brag/rules/stx-types - "flatten.rkt" - syntax/id-table - (prefix-in sat: "satisfaction.rkt") - (prefix-in support: brag/support) - (prefix-in stxparse: syntax/parse)) - -(provide rules-codegen) - - -;; Generates the body of the module. -;; FIXME: abstract this so we can just call (rules ...) without -;; generating the whole module body. -(define (rules-codegen stx - #:parser-provider-module [parser-provider-module 'br-parser-tools/yacc] - #:parser-provider-form [parser-provider-form 'parser]) - (syntax-case stx () - [(_ r ...) - (begin - ;; (listof stx) - (define rules (syntax->list #'(r ...))) - - (when (empty? rules) - (raise-syntax-error 'brag - (format "The grammar does not appear to have any rules") - stx)) - - (check-all-rules-defined! rules) - (check-all-rules-no-duplicates! rules) - (check-all-rules-satisfiable! rules) - - ;; We flatten the rules so we can use the yacc-style ruleset that br-parser-tools - ;; supports. - (define flattened-rules (flatten-rules rules)) - - (define generated-rule-codes (map flat-rule->yacc-rule flattened-rules)) - - ;; The first rule, by default, is the start rule. - (define rule-ids (for/list ([a-rule (in-list rules)]) - (rule-id a-rule))) - (define start-id (first rule-ids)) - - - (define-values (implicit-tokens ;; (listof identifier) - explicit-tokens) ;; (listof identifier) - (rules-collect-token-types rules)) - - ;; (listof symbol) - (define implicit-token-types - (map string->symbol - (set->list (list->set (map syntax-e implicit-tokens))))) - - ;; (listof symbol) - (define explicit-token-types - (set->list (list->set (map syntax-e explicit-tokens)))) - - ;; (listof symbol) - (define token-types - (set->list (list->set (append (map (lambda (x) (string->symbol (syntax-e x))) - implicit-tokens) - (map syntax-e explicit-tokens))))) - - (with-syntax ([start-id start-id] - - [(token-type ...) token-types] - - [(token-type-constructor ...) - (map (lambda (x) (string->symbol (format "token-~a" x))) - token-types)] - - [(explicit-token-types ...) explicit-token-types] - [(implicit-token-types ...) implicit-token-types] - [(implicit-token-types-str ...) (map symbol->string implicit-token-types)] - [(implicit-token-type-constructor ...) - (map (lambda (x) (string->symbol (format "token-~a" x))) - implicit-token-types)] - [generated-grammar #`(grammar #,@generated-rule-codes)] - [parser-module parser-provider-module] - [parser-form parser-provider-form]) - (quasisyntax/loc stx - (begin - (require br-parser-tools/lex - parser-module - brag/codegen/runtime - brag/support - brag/private/internal-support - racket/set - (for-syntax syntax/parse racket/base)) - - (provide parse - make-rule-parser - all-token-types - #;current-source - #;current-parser-error-handler - #;current-tokenizer-error-handler - #;[struct-out exn:fail:parsing] - ) - - (define-tokens enumerated-tokens (token-type ...)) - - ;; all-token-types lists all the tokens (except for EOF) - (define all-token-types - (set-remove (set 'token-type ...) 'EOF)) - - ;; For internal use by the permissive tokenizer only: - (define all-tokens-hash/mutable - (make-hash (list ;; Note: we also allow the eof object here, to make - ;; the permissive tokenizer even nicer to work with. - (cons eof token-EOF) - (cons 'token-type token-type-constructor) ...))) - - - #;(define default-lex/1 - (lexer-src-pos [implicit-token-types-str - (token 'implicit-token-types lexeme)] - ... - [(eof) (token eof)])) - - (define-syntax (make-rule-parser stx-2) - (syntax-parse stx-2 - [(_ start-rule:id) - (begin - ;; HACK HACK HACK - ;; The cfg-parser depends on the start-rule provided in (start ...) to have the same - ;; context as the rest of this body, so I need to hack this. I don't like this, but - ;; I don't know what else to do. Hence recolored-start-rule. - (unless (member (syntax-e #'start-rule) - '#,(map syntax-e rule-ids)) - (raise-syntax-error #f - (format "Rule ~a is not defined in the grammar" (syntax-e #'start-rule)) - stx-2)) - - (define recolored-start-rule (datum->syntax (syntax #,stx) (syntax-e #'start-rule))) - #`(let ([THE-GRAMMAR (parser-form (tokens enumerated-tokens) - (src-pos) - (start #,recolored-start-rule) - (end EOF) - (error THE-ERROR-HANDLER) - generated-grammar)]) - (case-lambda [(tokenizer) - (define next-token - (make-permissive-tokenizer tokenizer all-tokens-hash/mutable)) - (THE-GRAMMAR next-token)] - [(source tokenizer) - (parameterize ([current-source source]) - (parse tokenizer))])))])) - - (define parse (make-rule-parser start-id)) - (provide parse-to-datum parse-tree) - - (define (parse-to-datum x) - (let loop ([x (syntax->datum (parse x))]) - (cond - [(list? x) (map loop x)] - [(char? x) (string x)] - [else x]))) - - (define parse-tree parse-to-datum)))))])) - - -;; Given a flattened rule, returns a syntax for the code that -;; preserves as much source location as possible. -;; -;; Each rule is defined to return a list with the following structure: -;; -;; stx :== (name (U tokens rule-stx) ...) -;; -(define (flat-rule->yacc-rule a-flat-rule) - (syntax-case a-flat-rule () - [(rule-type origin name clauses ...) - (begin - (define translated-clauses - (map (lambda (clause) (translate-clause clause #'name #'origin)) - (syntax->list #'(clauses ...)))) - (with-syntax ([(translated-clause ...) translated-clauses]) - #`[name translated-clause ...]))])) - - - -;; translates a single primitive rule clause. -;; A clause is a simple list of ids, lit, vals, and inferred-id elements. -;; The action taken depends on the pattern type. -(define (translate-clause a-clause rule-name/false origin) - (define translated-patterns - (let loop ([primitive-patterns (syntax->list a-clause)]) - (cond - [(empty? primitive-patterns) - '()] - [else - (cons (syntax-case (first primitive-patterns) (id lit token inferred-id) - [(id val) - #'val] - [(lit val) - (datum->syntax #f (string->symbol (syntax-e #'val)) #'val)] - [(token val) - #'val] - [(inferred-id val reason) - #'val]) - (loop (rest primitive-patterns)))]))) - - (define translated-actions - (for/list ([translated-pattern (in-list translated-patterns)] - [primitive-pattern (syntax->list a-clause)] - [pos (in-naturals 1)]) - (if (eq? (syntax-property primitive-pattern 'hide) 'hide) - #'null - (with-syntax ([$X - (format-id translated-pattern "$~a" pos)] - [$X-start-pos - (format-id translated-pattern "$~a-start-pos" pos)] - [$X-end-pos - (format-id translated-pattern "$~a-end-pos" pos)]) - (syntax-case primitive-pattern (id lit token inferred-id) - - ;; When a rule usage is inferred, the value of $X is a syntax object - ;; whose head is the name of the inferred rule . We strip that out, - ;; leaving the residue to be absorbed. - [(inferred-id val reason) - #'(syntax-case $X () - [(inferred-rule-name . rest) - (syntax->list #'rest)])] - [(id val) - ;; at this point, the 'hide property is either #f or "splice" - ;; ('hide value is handled at the top of this conditional - ;; we need to use boolean because a symbol is treated as an identifier. - ;; also we'll separate it into its own property for clarity and test for it in "runtime.rkt" - #`(list (syntax-property $X 'splice-rh-id #,(and (syntax-property primitive-pattern 'hide) #t)))] - [(lit val) - #'(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))] - [(token val) - #'(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))]))))) - - (define whole-rule-loc - (if (empty? translated-patterns) - #'(list (current-source) #f #f #f #f) - (with-syntax ([$1-start-pos (datum->syntax (first translated-patterns) '$1-start-pos)] - [$n-end-pos (format-id (last translated-patterns) "$~a-end-pos" (length translated-patterns))]) - #`(positions->srcloc $1-start-pos $n-end-pos)))) - - ;; move 'hide-or-splice-lhs-id property into function because name is datum-ized - (with-syntax ([(translated-pattern ...) translated-patterns] - [(translated-action ...) translated-actions]) - #`[(translated-pattern ...) - (rule-components->syntax '#,rule-name/false translated-action ... - #:srcloc #,whole-rule-loc - #:hide-or-splice? #,(syntax-property rule-name/false 'hide-or-splice-lhs-id))])) - - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; collect-token-types: (listof rule-syntax) -> (values (listof identifier) (listof identifier)) -;; -;; Given a rule, automatically derive the list of implicit and -;; explicit token types we need to generate. -;; -;; Note: EOF is reserved, and will always be included in the list -;; of explicit token types, though the user is not allow to express it themselves. -(define (rules-collect-token-types rules) - (define-values (implicit explicit) - (for/fold ([implicit '()] - [explicit (list (datum->syntax (first rules) 'EOF))]) - ([r (in-list rules)]) - (rule-collect-token-types r implicit explicit))) - (values (reverse implicit) (reverse explicit))) - -(define (rule-collect-token-types a-rule implicit explicit) - (syntax-case a-rule (rule) - [(rule id a-pattern) - (pattern-collect-implicit-token-types #'a-pattern implicit explicit)])) - -(define (pattern-collect-implicit-token-types a-pattern implicit explicit) - (let loop ([a-pattern a-pattern] - [implicit implicit] - [explicit explicit]) - (syntax-case a-pattern (id lit token choice repeat maybe seq) - [(id val) - (values implicit explicit)] - [(lit val) - (values (cons #'val implicit) explicit)] - [(token val) - (begin - (when (eq? (syntax-e #'val) 'EOF) - (raise-syntax-error #f "Token EOF is reserved and can not be used in a grammar" #'val)) - (values implicit (cons #'val explicit)))] - [(choice vals ...) - (for/fold ([implicit implicit] - [explicit explicit]) - ([v (in-list (syntax->list #'(vals ...)))]) - (loop v implicit explicit))] - [(repeat min val) - (loop #'val implicit explicit)] - [(maybe val) - (loop #'val implicit explicit)] - [(seq vals ...) - (for/fold ([implicit implicit] - [explicit explicit]) - ([v (in-list (syntax->list #'(vals ...)))]) - (loop v implicit explicit))]))) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; rule-id: rule -> identifier-stx -;; Get the binding id of a rule. -(define (rule-id a-rule) - (syntax-case a-rule (rule) - [(rule id a-pattern) - #'id])) - -(define (rule-pattern a-rule) - (syntax-case a-rule (rule) - [(rule id a-pattern) - #'a-pattern])) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; check-all-rules-defined!: (listof rule-stx) -> void -(define (check-all-rules-defined! rules) - (define table (make-free-id-table)) - ;; Pass one: collect all the defined rule names. - (for ([a-rule (in-list rules)]) - (free-id-table-set! table (rule-id a-rule) #t)) - ;; Pass two: check each referenced id, and make sure it's been defined. - (for ([a-rule (in-list rules)]) - (for ([referenced-id (in-list (rule-collect-used-ids a-rule))]) - (unless (free-id-table-ref table referenced-id (lambda () #f)) - (raise-syntax-error #f (format "Rule ~a has no definition" (syntax-e referenced-id)) - referenced-id))))) - -;; check-all-rules-no-duplicates!: (listof rule-stx) -> void -(define (check-all-rules-no-duplicates! rules) - (define table (make-free-id-table)) - ;; Pass one: collect all the defined rule names. - (for ([a-rule (in-list rules)]) - (define maybe-other-rule-id (free-id-table-ref table (rule-id a-rule) (lambda () #f))) - (when maybe-other-rule-id - (raise-syntax-error #f (format "Rule ~a has a duplicate definition" (syntax-e (rule-id a-rule))) - (rule-id a-rule) - #f - (list (rule-id a-rule) maybe-other-rule-id))) - (free-id-table-set! table (rule-id a-rule) (rule-id a-rule)))) - - - -;; rule-collect-used-ids: rule-stx -> (listof identifier) -;; Given a rule, extracts a list of identifiers -(define (rule-collect-used-ids a-rule) - (syntax-case a-rule (rule) - [(rule id a-pattern) - (pattern-collect-used-ids #'a-pattern '())])) - -;; pattern-collect-used-ids: pattern-stx (listof identifier) -> (listof identifier) -;; Returns a flat list of rule identifiers referenced in the pattern. -(define (pattern-collect-used-ids a-pattern acc) - (let loop ([a-pattern a-pattern] - [acc acc]) - (syntax-case a-pattern (id lit token choice repeat maybe seq) - [(id val) - (cons #'val acc)] - [(lit val) - acc] - [(token val) - acc] - [(choice vals ...) - (for/fold ([acc acc]) - ([v (in-list (syntax->list #'(vals ...)))]) - (loop v acc))] - [(repeat min val) - (loop #'val acc)] - [(maybe val) - (loop #'val acc)] - [(seq vals ...) - (for/fold ([acc acc]) - ([v (in-list (syntax->list #'(vals ...)))]) - (loop v acc))]))) - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; check-all-rules-satisfiable: (listof rule-stx) -> void -;; Does a simple graph traversal / topological sort-like thing to make sure that, for -;; any rule, there's some finite sequence of tokens that -;; satisfies it. If this is not the case, then something horrible -;; has happened, and we need to tell the user about it. -;; -;; NOTE: Assumes all referenced rules have definitions. -(define (check-all-rules-satisfiable! rules) - (define toplevel-rule-table (make-free-id-table)) - (for ([a-rule (in-list rules)]) - (free-id-table-set! toplevel-rule-table - (rule-id a-rule) - (sat:make-and))) - (define leaves '()) - - (define (make-leaf) - (define a-leaf (sat:make-and)) - (set! leaves (cons a-leaf leaves)) - a-leaf) - - (define (process-pattern a-pattern) - (syntax-case a-pattern (id lit token choice repeat maybe seq) - [(id val) - (free-id-table-ref toplevel-rule-table #'val)] - [(lit val) - (make-leaf)] - [(token val) - (make-leaf)] - [(choice vals ...) - (begin - (define an-or-node (sat:make-or)) - (for ([v (in-list (syntax->list #'(vals ...)))]) - (define a-child (process-pattern v)) - (sat:add-child! an-or-node a-child)) - an-or-node)] - [(repeat min val) - (syntax-case #'min () - [0 - (make-leaf)] - [else - (process-pattern #'val)])] - [(maybe val) - (make-leaf)] - [(seq vals ...) - (begin - (define an-and-node (sat:make-and)) - (for ([v (in-list (syntax->list #'(vals ...)))]) - (define a-child (process-pattern v)) - (sat:add-child! an-and-node a-child)) - an-and-node)])) - - (for ([a-rule (in-list rules)]) - (define rule-node (free-id-table-ref toplevel-rule-table (rule-id a-rule))) - (sat:add-child! rule-node (process-pattern (rule-pattern a-rule)))) - - (for ([a-leaf leaves]) - (sat:visit! a-leaf)) - - (for ([a-rule (in-list rules)]) - (define rule-node (free-id-table-ref toplevel-rule-table (rule-id a-rule))) - (unless (sat:node-yes? rule-node) - (raise-syntax-error #f (format "Rule ~a has no finite derivation" (syntax-e (rule-id a-rule))) - (rule-id a-rule))))) diff --git a/brag/brag/codegen/flatten.rkt b/brag/brag/codegen/flatten.rkt deleted file mode 100755 index ad3ecfc..0000000 --- a/brag/brag/codegen/flatten.rkt +++ /dev/null @@ -1,200 +0,0 @@ -#lang racket/base -(require brag/rules/stx-types - (for-syntax racket/base)) - -(provide flatten-rule - flatten-rules - prim-rule) - - - -(define (make-fresh-name) - (let ([n 0]) - (lambda () - (set! n (add1 n)) - (string->symbol (format "%rule~a" n))))) - -(define default-fresh-name - (make-fresh-name)) - - -;; Translates rules to lists of primitive rules. - - -(define (flatten-rules rules #:fresh-name [fresh-name default-fresh-name]) - (define ht (make-hash)) - (apply append (map (lambda (a-rule) (flatten-rule a-rule - #:ht ht - #:fresh-name fresh-name)) - rules))) - - -;; flatten-rule: rule -> (listof primitive-rule) -(define (flatten-rule a-rule - #:fresh-name [fresh-name default-fresh-name] - - ;; ht: (hashtableof pattern-hash-key pat) - #:ht [ht (make-hash)]) - - (let recur ([a-rule a-rule] - [inferred? #f]) - - ;; lift-nonprimitive-pattern: pattern -> (values (listof primitive-rule) pattern) - ;; Turns non-primitive patterns into primitive patterns, and produces a set of - ;; derived rules. - (define (lift-nonprimitive-pattern a-pat) - (cond - [(primitive-pattern? a-pat) - (values '() (linearize-primitive-pattern a-pat))] - [(hash-has-key? ht (pattern->hash-key a-pat)) - (values '() (list (hash-ref ht (pattern->hash-key a-pat))))] - [else - (define head (syntax-case a-pat () [(head rest ...) #'head])) - (define new-name (datum->syntax #f (fresh-name) a-pat)) - (define new-inferred-id (datum->syntax #f `(inferred-id ,new-name ,head) a-pat)) - (hash-set! ht (pattern->hash-key a-pat) new-inferred-id) - (values (recur #`(rule #,new-name #,a-pat) head) - (list new-inferred-id))])) - - (define (lift-nonprimitive-patterns pats) - (define-values (rules patterns) - (for/fold ([inferred-ruless '()] - [patternss '()]) - ([p (in-list pats)]) - (define-values (new-rules new-ps) - (lift-nonprimitive-pattern p)) - (values (cons new-rules inferred-ruless) - (cons new-ps patternss)))) - (values (apply append (reverse rules)) - (apply append (reverse patterns)))) - - (with-syntax ([head (if inferred? #'inferred-prim-rule #'prim-rule)] - [origin (syntax-case a-rule (rule) [(rule name (pat-head rest ...)) #'pat-head])]) - (syntax-case a-rule (rule) - [(rule name pat) - (syntax-case #'pat (id inferred-id lit token choice repeat maybe seq) - - ;; The primitive types stay as they are: - [(id val) - (list #'(head origin name [pat]))] - [(inferred-id val reason) - (list #'(head origin name [pat]))] - [(lit val) - (list #'(head origin name [pat]))] - [(token val) - (list #'(head origin name [pat]))] - - - ;; Everything else might need lifting: - [(choice sub-pat ...) - (begin - (define-values (inferred-ruless/rev new-sub-patss/rev) - (for/fold ([rs '()] [ps '()]) - ([p (syntax->list #'(sub-pat ...))]) - (let-values ([(new-r new-p) - (lift-nonprimitive-pattern p)]) - (values (cons new-r rs) (cons new-p ps))))) - (with-syntax ([((sub-pat ...) ...) (reverse new-sub-patss/rev)]) - (append (list #'(head origin name [sub-pat ...] ...)) - (apply append (reverse inferred-ruless/rev)))))] - - [(repeat min sub-pat) - (begin - (define-values (inferred-rules new-sub-pats) - (lift-nonprimitive-pattern #'sub-pat)) - (with-syntax ([(sub-pat ...) new-sub-pats]) - (cons (cond [(= (syntax-e #'min) 0) - #`(head origin name - [(inferred-id name repeat) sub-pat ...] - [])] - [(= (syntax-e #'min) 1) - #`(head origin name - [(inferred-id name repeat) sub-pat ...] - [sub-pat ...])]) - inferred-rules)))] - - [(maybe sub-pat) - (begin - (define-values (inferred-rules new-sub-pats) - (lift-nonprimitive-pattern #'sub-pat)) - (with-syntax ([(sub-pat ...) new-sub-pats]) - (cons #'(head origin name - [sub-pat ...] - []) - inferred-rules)))] - - [(seq sub-pat ...) - (begin - (define-values (inferred-rules new-sub-pats) - (lift-nonprimitive-patterns (syntax->list #'(sub-pat ...)))) - (with-syntax ([(sub-pat ...) new-sub-pats]) - (cons #'(head origin name [sub-pat ...]) - inferred-rules)))])])))) - - -;; Given a pattern, return a key appropriate for a hash. -;; -;; In the `ragg` days this used `syntax->datum` only. -;; The problem is that with cuts & splices in the mix, it creates ambiguity: -;; e.g., the pattern (/"," foo)* and ("," foo)* differ only in the 'hide syntax property -;; so `syntax->datum` does not capture their differences. -;; That means they produced the same hash key, -;; which meant they produced the same inferred pattern. Which is wrong. -;; So we adjust the key to take account of the 'hide property -;; by "lifting" it into the datum with cons. -;; Then the pattern-inference process treats them separately. -(define (pattern->hash-key a-pat) - (let loop ([x a-pat]) - (let ([maybe-stx-list (syntax->list x)]) - (if maybe-stx-list - (cons (syntax-property x 'hide) (map loop maybe-stx-list)) - (syntax->datum x))))) - - -;; Returns true if the pattern looks primitive -(define (primitive-pattern? a-pat) - (syntax-case a-pat (id lit token choice repeat maybe seq) - [(id val) - #t] - [(lit val) - #t] - [(token val) - #t] - [(choice sub-pat ...) - #f] - [(repeat min val) - #f] - [(maybe sub-pat) - #f] - [(seq sub-pat ...) - (andmap primitive-pattern? (syntax->list #'(sub-pat ...)))])) - - -;; Given a primitive pattern (id, lit, token, and seqs only containing -;; primitive patterns), returns a linear sequence of just id, lits, -;; and tokens. -(define (linearize-primitive-pattern a-pat) - (define (traverse a-pat acc) - (syntax-case a-pat (id inferred-id lit token seq) - [(id val) - (cons a-pat acc)] - [(inferred-id val reason) - (cons a-pat acc)] - [(lit val) - (cons a-pat acc)] - [(token val) - (cons a-pat acc)] - [(seq vals ...) - (foldl traverse acc (syntax->list #'(vals ...)))])) - (reverse (traverse a-pat '()))) - - - -(define-syntax (prim-rule stx) - (raise-syntax-error #f "internal error: should not be macro expanded" stx)) - -(define-syntax (inferred-prim-rule stx) - (raise-syntax-error #f "internal error: should not be macro expanded" stx)) - -(define-syntax (inferred-id stx) - (raise-syntax-error #f "internal error: should not be macro expanded" stx)) diff --git a/brag/brag/codegen/reader.rkt b/brag/brag/codegen/reader.rkt deleted file mode 100755 index 6413225..0000000 --- a/brag/brag/codegen/reader.rkt +++ /dev/null @@ -1,68 +0,0 @@ -#lang s-exp syntax/module-reader -brag/codegen/sexp-based-lang -#:read my-read -#:read-syntax my-read-syntax -#:info my-get-info -#:whole-body-readers? #t - -(require brag/rules/parser - brag/rules/lexer - brag/rules/stx - brag/rules/rule-structs) - -(define (my-read in) - (syntax->datum (my-read-syntax #f in))) - -(define (my-read-syntax src in) - (define-values (first-line first-column first-position) (port-next-location in)) - (define tokenizer (tokenize in)) - (define rules - (parameterize ([current-parser-error-handler - (lambda (tok-ok? tok-name tok-value start-pos end-pos) - (raise-syntax-error - #f - (format "Error while parsing grammar near: ~a [line=~a, column=~a, position=~a]" - tok-value - (pos-line start-pos) - (pos-col start-pos) - (pos-offset start-pos)) - (datum->syntax #f - (string->symbol (format "~a" tok-value)) - (list src - (pos-line start-pos) - (pos-col start-pos) - (pos-offset start-pos) - (if (and (number? (pos-offset end-pos)) - (number? (pos-offset start-pos))) - (- (pos-offset end-pos) - (pos-offset start-pos)) - #f)))))]) - (grammar-parser tokenizer))) - (define-values (last-line last-column last-position) (port-next-location in)) - (list (rules->stx src rules - #:original-stx (datum->syntax #f 'original-stx - (list src - first-line - first-column - first-position - (if (and (number? last-position) - (number? first-position)) - (- last-position first-position) - #f)))))) - - -;; Extension: we'd like to cooperate with DrRacket and tell -;; it to use the default, textual lexer and color scheme when -;; editing bf programs. -;; -;; See: http://docs.racket-lang.org/guide/language-get-info.html -;; for more details, as well as the documentation in -;; syntax/module-reader. -(define (my-get-info key default default-filter) - (case key - [(color-lexer) - (dynamic-require 'syntax-color/default-lexer - 'default-lexer)] - [else - (default-filter key default)])) - diff --git a/brag/brag/codegen/runtime.rkt b/brag/brag/codegen/runtime.rkt deleted file mode 100755 index 5acdaf9..0000000 --- a/brag/brag/codegen/runtime.rkt +++ /dev/null @@ -1,212 +0,0 @@ -#lang racket/base - -(require racket/match - racket/list - racket/generator - (prefix-in lex: br-parser-tools/lex) - brag/support - brag/private/internal-support) - - -(provide THE-ERROR-HANDLER - make-permissive-tokenizer - atomic-datum->syntax - positions->srcloc - rule-components->syntax) - - - -;; The level of indirection here is necessary since the yacc grammar wants a -;; function value for the error handler up front. We want to delay that decision -;; till parse time. -(define (THE-ERROR-HANDLER tok-ok? tok-name tok-value start-pos end-pos) - (match (positions->srcloc start-pos end-pos) - [(list src line col offset span) - ((current-parser-error-handler) tok-name - tok-value - offset - line - col - span)])) - - - - -(define no-position (lex:position #f #f #f)) -(define (no-position? p) - (not - (or (lex:position-line p) - (lex:position-col p) - (lex:position-offset p)))) - - -;; make-permissive-tokenizer: (U (sequenceof (U token token-struct eof void)) (-> (U token token-struct eof void))) hash -> (-> position-token) -;; Creates a tokenizer from the given value. -;; FIXME: clean up code. -(define (make-permissive-tokenizer tokenizer token-type-hash) - (define tokenizer-thunk (cond - [(sequence? tokenizer) - (sequence->generator tokenizer)] - [(procedure? tokenizer) - tokenizer])) - - ;; lookup: symbol any pos pos -> position-token - (define (lookup type val start-pos end-pos) - (lex:position-token - ((hash-ref token-type-hash type - (lambda () - ((current-tokenizer-error-handler) (format "~a" type) val - (lex:position-offset start-pos) - (lex:position-line start-pos) - (lex:position-col start-pos) - (and (number? (lex:position-offset start-pos)) - (number? (lex:position-offset end-pos)) - (- (lex:position-offset end-pos) - (lex:position-offset start-pos)))))) - val) - start-pos end-pos)) - - (define (permissive-tokenizer) - (define next-token (tokenizer-thunk)) - (let loop ([next-token next-token]) - (match next-token - [(or (? eof-object?) (? void?)) - (lookup 'EOF eof no-position no-position)] - - [(? symbol?) - (lookup next-token next-token no-position no-position)] - - [(? string?) - (lookup (string->symbol next-token) next-token no-position no-position)] - - [(? char?) - (lookup (string->symbol (string next-token)) next-token no-position no-position)] - - ;; Compatibility - [(? lex:token?) - (loop (token (lex:token-name next-token) - (lex:token-value next-token)))] - - [(token-struct type val offset line column span skip?) - (cond [skip? - ;; skip whitespace, and just tokenize again. - (permissive-tokenizer)] - - [(hash-has-key? token-type-hash type) - (define start-pos (lex:position offset line column)) - ;; try to synthesize a consistent end position. - (define end-pos (lex:position (if (and (number? offset) (number? span)) - (+ offset span) - offset) - line - (if (and (number? column) (number? span)) - (+ column span) - column))) - (lookup type val start-pos end-pos)] - [else - ;; We ran into a token of unrecognized type. Let's raise an appropriate error. - ((current-tokenizer-error-handler) type val - offset line column span)])] - - [(lex:position-token t s e) - (define a-position-token (loop t)) - (lex:position-token (lex:position-token-token a-position-token) - (if (no-position? (lex:position-token-start-pos a-position-token)) - s - (lex:position-token-start-pos a-position-token)) - (if (no-position? (lex:position-token-end-pos a-position-token)) - e - (lex:position-token-end-pos a-position-token)))] - - [(lex:srcloc-token t loc) - (define a-position-token (loop t)) - (lex:position-token (lex:position-token-token a-position-token) - (if (no-position? (lex:position-token-start-pos a-position-token)) - (lex:position (srcloc-position loc) (srcloc-line loc) (srcloc-column loc)) - (lex:position-token-start-pos a-position-token)) - (if (no-position? (lex:position-token-start-pos a-position-token)) - (lex:position (+ (srcloc-position loc) (srcloc-span loc)) #f #f) - (lex:position-token-end-pos a-position-token)))] - - [else - ;; Otherwise, we have no idea how to treat this as a token. - ((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token) - #f #f #f #f)]))) - permissive-tokenizer) - - - -;; positions->srcloc: position position -> (list source line column offset span) -;; Given two positions, returns a srcloc-like structure, where srcloc is the value -;; consumed as the third argument to datum->syntax. -(define (positions->srcloc start-pos end-pos) - (list (current-source) - (lex:position-line start-pos) - (lex:position-col start-pos) - (lex:position-offset start-pos) - (if (and (number? (lex:position-offset end-pos)) - (number? (lex:position-offset start-pos))) - (- (lex:position-offset end-pos) - (lex:position-offset start-pos)) - #f))) - -#| -MB: the next three functions control the parse tree output. -This would be the place to check a syntax property for hiding. -|# -;; We create a syntax using read-syntax; by definition, it should have the -;; original? property set to #t, which we then copy over to syntaxes constructed -;; with atomic-datum->syntax and rule-components->syntax. -(define stx-with-original?-property - (read-syntax #f (open-input-string "meaningless-string"))) - - -;; atomic-datum->syntax: datum position position -;; Helper that does the ugly work in wrapping a datum into a syntax -;; with source location. -(define (atomic-datum->syntax d start-pos end-pos) - (datum->syntax #f d (positions->srcloc start-pos end-pos) stx-with-original?-property)) - - -(define (remove-rule-name component-stx [splice #f]) - ;; when removing a rule name, we apply it as a syntax property to the remaining elements - ;; for possible later usage (aka, why throw away information) - (with-syntax ([(name . subcomponents) component-stx]) - (let ([name-datum (syntax->datum #'name)]) - (if splice - ;; when splicing, returned list is a regular list, with each element having the property. - (map (λ(sc) (syntax-property sc name-datum #'name)) (syntax->list #'subcomponents)) - ;; when hiding, returned list should be a syntaxed list with the property - ;; iow, basically the same as `component-stx`, minus the name - (syntax-property (datum->syntax component-stx #'subcomponents component-stx component-stx) name-datum #'name))))) - - -(define (preprocess-component-lists component-lists) - ; "preprocess" means splicing and rule-name-hiding where indicated - (append* - ;; each `component-list` is a list that's either empty, or has a single component-stx object - ;; inside `component-stx` is a name followed by subcomponents - (for*/list ([component-list (in-list component-lists)] - [component-stx (in-list component-list)]) ; this has the effect of omitting any empty `component-list` - (list - (cond - [(eq? (syntax-property component-stx 'hide-or-splice) 'hide) - (list (remove-rule-name component-stx))] ; hidden version still wrapped in a sublist - [(or (eq? (syntax-property component-stx 'hide-or-splice) 'splice) - (syntax-property component-stx 'splice-rh-id)) - (remove-rule-name component-stx #t)] ; spliced version is lifted out of the sublist - [else (list component-stx)]))))) - - -;; rule-components->syntax: (U symbol false) (listof stx) ... #:srcloc (U #f (list src line column offset span)) -> stx -;; Creates an stx out of the rule name and its components. -;; The location information of the rule spans that of its components. -(define (rule-components->syntax rule-name/false #:srcloc [srcloc #f] #:hide-or-splice? [hide-or-splice #f] . component-lists) - (define new-rule-name (datum->syntax #f rule-name/false srcloc stx-with-original?-property)) - (define new-rule-components (append* (preprocess-component-lists component-lists))) - (define rule-result (cons new-rule-name new-rule-components)) - (define syntaxed-rule-result (datum->syntax #f rule-result srcloc stx-with-original?-property)) - ;; not 'hide-or-splice-lhs-id, because this will now become a (right-hand) component in a different (left-hand) rule - ;; actual splicing happens when the parent rule is processed (with procedure above) - (syntax-property syntaxed-rule-result 'hide-or-splice hide-or-splice)) - diff --git a/brag/brag/codegen/satisfaction.rkt b/brag/brag/codegen/satisfaction.rkt deleted file mode 100755 index 07f20ac..0000000 --- a/brag/brag/codegen/satisfaction.rkt +++ /dev/null @@ -1,207 +0,0 @@ -#lang racket/base - -(provide make-and make-or node? node-val node-yes? visit! add-child!) - -(require racket/match) - -;; I can't get no... satisfaction. -;; -;; A small module to make sure a small constraint system can be satisfied. -;; -;; Small variation on topological sort where we need both AND and OR nodes. - - -(struct node (type val yes? parents count-to-satisfy) #:mutable) -;; or nodes are satisfied if any of the children is satisfied. -;; and nodes are satisfied if all of the children are satisfied. - - -;; visit!: node -> void -;; Visit a node, and marking it if it's all satisfied. Propagate -;; satisfaction to parents as appropriate. -(define visit! - (let () - (define (dec! n) - (set-node-count-to-satisfy! n (max 0 (sub1 (node-count-to-satisfy n)))) - (when (and (not (node-yes? n)) - (= (node-count-to-satisfy n) 0)) - (sat! n))) - - (define (sat! n) - (set-node-yes?! n #t) - (for ([p (in-list (node-parents n))]) - (dec! p))) - - (lambda (n) - (unless (node-yes? n) - (when (= (node-count-to-satisfy n) 0) - (sat! n)))))) - - -;; make-or: X -> node -;; Create an or node -(define (make-or [val #f]) - (node 'or val #f '() 1)) - - -;; make-and: X -> node -;; Create an and node -(define (make-and [val #f]) - (node 'and val #f '() 0)) - - -;; add-child!: node node -> void -;; Attach a child c to the parent node p. -(define (add-child! p c) - (set-node-parents! c (cons p (node-parents c))) - (match p - [(node 'or _ _ _ count-to-satisfy) - (void)] - [(node 'and _ _ _ count-to-satisfy) - (set-node-count-to-satisfy! p (add1 count-to-satisfy))])) - - -(module* test racket - (require (submod "..") - racket/block - rackunit) - - ;; a ::= a - (block - ;; Self-looping "a" and-node should not say yes after visiting. - (define a (make-and 'a)) - (add-child! a a) - (visit! a) - (check-false (node-yes? a))) - - - ;; a ::= a - (block - ;; Self-looping "a" or-node should not say yes after visiting. - (define a (make-or 'a)) - (add-child! a a) - (visit! a) - (check-false (node-yes? a))) - - - ;; This case should never happen in my situation, but we should check. - (block - ;; Empty "a" or-node should not say yes after visiting. - (define a (make-or 'a)) - (visit! a) - (check-false (node-yes? a))) - - - ;; a : TOKEN - (block - ;; Empty "a" and-node SHOULD say yes after visiting. - (define a (make-and 'a)) - (visit! a) - (check-true (node-yes? a))) - - - ;; a : a | b - ;; b : TOKEN - (block - (define a (make-or 'a)) - (add-child! a a) - (define b (make-and 'b)) - (add-child! a b) - (visit! b) - (check-true (node-yes? a)) - (check-true (node-yes? b))) - - ;; a : a b - ;; b : TOKEN - (block - (define a (make-and 'a)) - (define b (make-and 'b)) - (define TOKEN (make-and 'TOKEN)) - (add-child! a a) - (add-child! a b) - (add-child! b TOKEN) - (visit! TOKEN) - (check-false (node-yes? a)) - (check-true (node-yes? b)) - (check-true (node-yes? TOKEN))) - - ;; a : b - ;; b : a - (block - (define a (make-and 'a)) - (define b (make-and 'b)) - (add-child! a b) - (add-child! b a) - (check-false (node-yes? a)) - (check-false (node-yes? b))) - - ;; a : "a" b - ;; b : a | b - (block - (define a (make-and 'a)) - (define b (make-or 'b)) - (define lit (make-and "a")) - (add-child! a lit) - (add-child! a b) - (add-child! b a) - (add-child! b b) - (visit! lit) - (check-false (node-yes? a)) - (check-false (node-yes? b)) - (check-true (node-yes? lit))) - - - ;; x : x y - ;; y : LIT - (block - (define x (make-and "x")) - (define y (make-and "y")) - (define lit (make-and "LIT")) - (add-child! x x) - (add-child! x y) - (add-child! y lit) - (visit! lit) - (check-false (node-yes? x)) - (check-true (node-yes? y)) - (check-true (node-yes? lit))) - - - ;; expr: LPAREN expr RPAREN | ATOM - (block - (define LPAREN (make-and)) - (define RPAREN (make-and)) - (define expr (make-or)) - (define expr-1 (make-and)) - (define expr-2 (make-and)) - (define ATOM (make-and)) - (add-child! expr expr-1) - (add-child! expr expr-2) - (add-child! expr-1 LPAREN) - (add-child! expr-1 expr) - (add-child! expr-1 RPAREN) - (add-child! expr-2 ATOM) - (visit! LPAREN) - (visit! RPAREN) - (visit! ATOM) - (check-true (node-yes? expr))) - - - - ;; expr: LPAREN expr RPAREN - (block - (define LPAREN (make-and)) - (define RPAREN (make-and)) - (define expr (make-or)) - (define expr-1 (make-and)) - (define expr-2 (make-and)) - (define ATOM (make-and)) - (add-child! expr expr-1) - (add-child! expr expr-2) - (add-child! expr-1 LPAREN) - (add-child! expr-1 expr) - (add-child! expr-1 RPAREN) - (visit! LPAREN) - (visit! RPAREN) - (check-false (node-yes? expr))) - - ) diff --git a/brag/brag/codegen/sexp-based-lang.rkt b/brag/brag/codegen/sexp-based-lang.rkt deleted file mode 100755 index 305966a..0000000 --- a/brag/brag/codegen/sexp-based-lang.rkt +++ /dev/null @@ -1,96 +0,0 @@ -#lang racket/base - -;; A language level for automatically generating parsers out of BNF grammars. -;; -;; Danny Yoo (dyoo@hashcollision.org) -;; -;; Intent: make it trivial to generate languages for Racket. At the -;; moment, I find it painful to use br-parser-tools. This library is -;; meant to make it less agonizing. -;; -;; The intended use of this language is as follows: -;; -;;;;; s-exp-grammar.rkt ;;;;;;;;; -;; #lang brag -;; s-exp : "(" s-exp* ")" | ATOM -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - -;; What this generates is: -;; -;; * parse: a function that consumes a source and a -;; position-aware lexer, and produces a syntax object. -;; -;; * make-rule-parser: a custom parser given a provided start rule. -;; -;; You'll still need to do a little work, by providing a lexer that -;; defines what the uppercased tokens mean. For example, you can -;; use the br-parser-tools/lex lexer tools: -;; -;; (require brag/support -;; br-parser-tools/lex -;; br-parser-tools/lex-sre) -;; -;; (define tokenize -;; (lexer-src-pos -;; [(:+ alphabetic) -;; (token 'ATOM lexeme)] -;; [whitespace -;; (return-without-pos (tokenize/1 input-port))] -;; [(:or "(" ")") -;; (token lexeme lexeme)])) -;; - -;; However, that should be all you need. The output of an -;; generated grammar is an honest-to-goodness syntax -;; object with source locations, fully-labeled by the rules. -;; -;; (parse (tokenize an-input-port)) -;; -;; - -;; The first rule is treated as the start rule; any successful parse -;; must finish with end-of-file. - - -;; Terminology: -;; - - -;; A rule is a rule identifier, followed by a colon ":", followed by a -;; pattern. - -;; A rule identifier is an identifier that is not in upper case. -;; A rule identifier should follow the Racket rules for identifiers, -;; except that it can't contain * or +. -;; -;; A token is a rule identifier that is all in upper case. - - - -;; A pattern may either be -;; -;; * an implicit sequence of patterns, -;; -;; * a literal string, -;; -;; * a rule identifier, -;; -;; * a quanitifed pattern, either with "*" or "+", -;; -;; * an optional pattern: a pattern surrounded by "[" and "]", or -;; -;; * a grouped sequence: a pattern surrounded by "(" and ")". - - -(require (for-syntax racket/base - "codegen.rkt")) - -(provide rules - (rename-out [#%plain-module-begin #%module-begin]) - #%top-interaction) - -(define-syntax (rules stx) - (rules-codegen #:parser-provider-module 'brag/cfg-parser/cfg-parser ;; 'br-parser-tools/yacc - #:parser-provider-form 'cfg-parser ;; 'parser - stx)) diff --git a/brag/brag/examples/01-equal.rkt b/brag/brag/examples/01-equal.rkt deleted file mode 100755 index c53bd18..0000000 --- a/brag/brag/examples/01-equal.rkt +++ /dev/null @@ -1,12 +0,0 @@ -#lang brag - -## Equal numbers of 0 and 1s in a string. -## -## (Thanks to mithos28 for this one.) - - -equal : [zero one | one zero] - -zero : "0" equal | equal "0" - -one : "1" equal | equal "1" diff --git a/brag/brag/examples/0n1.rkt b/brag/brag/examples/0n1.rkt deleted file mode 100755 index 53beae0..0000000 --- a/brag/brag/examples/0n1.rkt +++ /dev/null @@ -1,3 +0,0 @@ -#lang brag - -rule: "0"* "1" diff --git a/brag/brag/examples/0n1n.rkt b/brag/brag/examples/0n1n.rkt deleted file mode 100755 index 25b0ea5..0000000 --- a/brag/brag/examples/0n1n.rkt +++ /dev/null @@ -1,3 +0,0 @@ -#lang brag - -rule-0n1n: ["0" rule-0n1n "1"] diff --git a/brag/brag/examples/add-mult.rkt b/brag/brag/examples/add-mult.rkt deleted file mode 100755 index bde2ac5..0000000 --- a/brag/brag/examples/add-mult.rkt +++ /dev/null @@ -1,7 +0,0 @@ -#lang brag - -expr : term (/'+' term)* -@term : factor (/'*' @factor)* -factor : ("0" | "1" | "2" | "3" - | "4" | "5" | "6" | "7" - | "8" | "9")+ diff --git a/brag/brag/examples/baby-json-hider.rkt b/brag/brag/examples/baby-json-hider.rkt deleted file mode 100755 index 769ecfd..0000000 --- a/brag/brag/examples/baby-json-hider.rkt +++ /dev/null @@ -1,18 +0,0 @@ -#lang brag -#:prefix-out my: - -;; Simple baby example of JSON structure -json: number | string - | array - | @object -number: NUMBER - -string: STRING - -array: "[" [json ("," json)*] "]" - -object: /"{" [kvpair ("," kvpair)*] /"}" - -@kvpair : /ID colon /json - -/colon : ":" diff --git a/brag/brag/examples/baby-json.rkt b/brag/brag/examples/baby-json.rkt deleted file mode 100755 index 491dfc8..0000000 --- a/brag/brag/examples/baby-json.rkt +++ /dev/null @@ -1,16 +0,0 @@ -#lang brag - -;; Simple baby example of JSON structure -json: number | string - | array - | object - -number: NUMBER - -string: STRING - -array: "[" [json ("," json)*] "]" - -object: "{" [kvpair ("," kvpair)*] "}" - -kvpair: ID ":" json diff --git a/brag/brag/examples/bnf.rkt b/brag/brag/examples/bnf.rkt deleted file mode 100755 index 2349038..0000000 --- a/brag/brag/examples/bnf.rkt +++ /dev/null @@ -1,13 +0,0 @@ -#lang brag - -## The following comes from: http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form - - : | - : "<" ">" "::=" - - : " " | "" ## "" is empty string, i.e. no whitespace - : | "|" - : | - : | - : | "<" ">" - : '"' '"' | "'" "'" ## actually, the original BNF did not use quotes diff --git a/brag/brag/examples/cutter.rkt b/brag/brag/examples/cutter.rkt deleted file mode 100755 index 7cd7023..0000000 --- a/brag/brag/examples/cutter.rkt +++ /dev/null @@ -1,4 +0,0 @@ -#lang brag -top : expr (/"," expr)* -expr : "x" | list -list : "(" expr ("," expr)* ")" \ No newline at end of file diff --git a/brag/brag/examples/lua-parser.rkt b/brag/brag/examples/lua-parser.rkt deleted file mode 100755 index 75781e9..0000000 --- a/brag/brag/examples/lua-parser.rkt +++ /dev/null @@ -1,111 +0,0 @@ -#lang brag - -;; Lua parser, adapted from: -;; http://www.lua.org/manual/5.1/manual.html#8 -;; - - -chunk : (stat ["; "])* [laststat ["; "]] - -block : chunk - -stat : varlist "=" explist | - functioncall | - DO block END | - WHILE exp DO block END | - REPEAT block UNTIL exp | - IF exp THEN block (ELSEIF exp THEN block)* [ELSE block] END | - FOR NAME "=" exp "," exp ["," exp] DO block END | - FOR namelist IN explist DO block END | - FUNCTION funcname funcbody | - LOCAL FUNCTION NAME funcbody | - LOCAL namelist ["=" explist] - -laststat : RETURN [explist] | BREAK - -funcname : NAME ("." NAME)* [":" NAME] - -varlist : var ("," var)* - -var : NAME | prefixexp "[" exp "]" | prefixexp "." NAME - -namelist : NAME ("," NAME)* - -explist : (exp ",")* exp - - -;; Note by dyoo: The parsing of exp deviates from Lua in that we have these administrative -;; rules to explicitly represent the precedence rules. -;; -;; See: http://www.lua.org/manual/5.1/manual.html#2.5.6 -;; -;; Ragg doesn't yet automatically desugar operator precedence rules. -;; I'm doing it by hand at the moment, which is not ideal, so a future version of -;; ragg will have a story about describing precedence. -;; -;; Operator precedence in Lua follows the table below, from lower to higher priority: -;; -;; or exp_1 -;; and exp_2 -;; < > <= >= ~= == exp_3 -;; .. exp_4 -;; + - exp_5 -;; * / % exp_6 -;; not # - (unary) exp_7 -;; ^ exp_8 -;; -;; As usual, you can use parentheses to change the precedences of an expression. -;; The concatenation ('..') and exponentiation ('^') operators are right associative. -;; All other binary operators are left associative. -;; -;; The original grammar rule before encoding precedence was: -;; -;; exp : NIL | FALSE | TRUE | NUMBER | STRING | "..." | function | -;; prefixexp | tableconstructor | exp binop exp | unop exp - -exp : exp_1 -exp_1: exp_1 binop_1 exp_2 | exp_2 -exp_2: exp_2 binop_2 exp_3 | exp_3 -exp_3: exp_3 binop_3 exp_4 | exp_4 -exp_4: exp_5 binop_4 exp_4 | exp_5 ;; right associative -exp_5: exp_5 binop_5 exp_6 | exp_6 -exp_6: exp_6 binop_6 exp_7 | exp_7 -exp_7: unop exp_8 -exp_8: exp_9 binop_8 exp_8 | exp_9 ;; right associative -exp_9: NIL | FALSE | TRUE | NUMBER | STRING | "..." | function | - prefixexp | tableconstructor -binop_1: OR -binop_2: AND -binop_3: "<" | ">" | "<=" | ">=" | "~=" | "==" -binop_4: ".." -binop_5: "+" | "-" -binop_6: "*" | "/" | "%" -binop_8: "^" -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - -prefixexp : var | functioncall | "(" exp ")" - -functioncall : prefixexp args | prefixexp ":" NAME args - -args : "(" [explist] ")" | tableconstructor | STRING - -function : FUNCTION funcbody - -funcbody : "(" [parlist] ")" block END - -parlist : namelist ["," "..."] | "..." - -tableconstructor : "{" [fieldlist] "}" - -fieldlist : field (fieldsep field)* [fieldsep] - -field : "[" exp "]" "=" exp | NAME "=" exp | exp - -fieldsep : "," | ";" - -binop : "+" | "-" | "*" | "/" | "^" | "%" | ".." | - "<" | "<=" | ">" | ">=" | "==" | "~=" | - AND | OR - -unop : "-" | NOT | "#" \ No newline at end of file diff --git a/brag/brag/examples/nested-word-list.rkt b/brag/brag/examples/nested-word-list.rkt deleted file mode 100755 index b7489c4..0000000 --- a/brag/brag/examples/nested-word-list.rkt +++ /dev/null @@ -1,3 +0,0 @@ -#lang brag -nested-word-list: WORD - | LEFT-PAREN nested-word-list* RIGHT-PAREN diff --git a/brag/brag/examples/simple-arithmetic-grammar.rkt b/brag/brag/examples/simple-arithmetic-grammar.rkt deleted file mode 100755 index 69a5f2c..0000000 --- a/brag/brag/examples/simple-arithmetic-grammar.rkt +++ /dev/null @@ -1,5 +0,0 @@ -#lang brag - -expr : term ('+' term)* -term : factor ('*' factor)* -factor : INT diff --git a/brag/brag/examples/simple-line-drawing.rkt b/brag/brag/examples/simple-line-drawing.rkt deleted file mode 100755 index b97bdf5..0000000 --- a/brag/brag/examples/simple-line-drawing.rkt +++ /dev/null @@ -1,10 +0,0 @@ -#lang brag - -;; -;; See: http://stackoverflow.com/questions/12345647/rewrite-this-script-by-designing-an-interpreter-in-racket -;; - -drawing: rows* -rows: repeat chunk+ ";" -repeat: INTEGER -chunk: INTEGER STRING diff --git a/brag/brag/examples/simple-line-drawing/examples/letter-i.rkt b/brag/brag/examples/simple-line-drawing/examples/letter-i.rkt deleted file mode 100755 index 41f6570..0000000 --- a/brag/brag/examples/simple-line-drawing/examples/letter-i.rkt +++ /dev/null @@ -1,4 +0,0 @@ -#lang brag/examples/simple-line-drawing -3 9 X; -6 3 b 3 X 3 b; -3 9 X; diff --git a/brag/brag/examples/simple-line-drawing/grammar.rkt b/brag/brag/examples/simple-line-drawing/grammar.rkt deleted file mode 100755 index b97bdf5..0000000 --- a/brag/brag/examples/simple-line-drawing/grammar.rkt +++ /dev/null @@ -1,10 +0,0 @@ -#lang brag - -;; -;; See: http://stackoverflow.com/questions/12345647/rewrite-this-script-by-designing-an-interpreter-in-racket -;; - -drawing: rows* -rows: repeat chunk+ ";" -repeat: INTEGER -chunk: INTEGER STRING diff --git a/brag/brag/examples/simple-line-drawing/interpret.rkt b/brag/brag/examples/simple-line-drawing/interpret.rkt deleted file mode 100755 index e6cec0c..0000000 --- a/brag/brag/examples/simple-line-drawing/interpret.rkt +++ /dev/null @@ -1,31 +0,0 @@ -#lang racket/base -(require syntax/parse) - -(provide interpret-drawing) - -(define (interpret-drawing drawing-stx) - (syntax-parse drawing-stx - [({~literal drawing} row-stxs ...) - - (for ([row-stx (syntax->list #'(row-stxs ...))]) - (interpret-row row-stx))])) - - -(define (interpret-row row-stx) - (syntax-parse row-stx - [({~literal rows} - ({~literal repeat} repeat-number) - chunks ... ";") - - (for ([i (syntax-e #'repeat-number)]) - (for ([chunk-stx (syntax->list #'(chunks ...))]) - (interpret-chunk chunk-stx)) - (newline))])) - - -(define (interpret-chunk chunk-stx) - (syntax-parse chunk-stx - [({~literal chunk} chunk-size chunk-string) - - (for ([k (syntax-e #'chunk-size)]) - (display (syntax-e #'chunk-string)))])) diff --git a/brag/brag/examples/simple-line-drawing/lang/reader.rkt b/brag/brag/examples/simple-line-drawing/lang/reader.rkt deleted file mode 100755 index d900f99..0000000 --- a/brag/brag/examples/simple-line-drawing/lang/reader.rkt +++ /dev/null @@ -1,22 +0,0 @@ -#lang s-exp syntax/module-reader -brag/examples/simple-line-drawing/semantics -#:read my-read -#:read-syntax my-read-syntax -#:info my-get-info -#:whole-body-readers? #t - -(require brag/examples/simple-line-drawing/lexer - brag/examples/simple-line-drawing/grammar) - -(define (my-read in) - (syntax->datum (my-read-syntax #f in))) - -(define (my-read-syntax src ip) - (list (parse src (tokenize ip)))) - -(define (my-get-info key default default-filter) - (case key - [(color-lexer) - (dynamic-require 'syntax-color/default-lexer 'default-lexer)] - [else - (default-filter key default)])) diff --git a/brag/brag/examples/simple-line-drawing/lexer.rkt b/brag/brag/examples/simple-line-drawing/lexer.rkt deleted file mode 100755 index 3e0e810..0000000 --- a/brag/brag/examples/simple-line-drawing/lexer.rkt +++ /dev/null @@ -1,27 +0,0 @@ -#lang racket/base - -(provide tokenize) - -;; A simple lexer for simple-line-drawing. -(require brag/support - br-parser-tools/lex) - -(define (tokenize ip) - (port-count-lines! ip) - (define my-lexer - (lexer-src-pos - [(repetition 1 +inf.0 numeric) - (token 'INTEGER (string->number lexeme))] - [upper-case - (token 'STRING lexeme)] - ["b" - (token 'STRING " ")] - [";" - (token ";" lexeme)] - [whitespace - (token 'WHITESPACE lexeme #:skip? #t)] - [(eof) - (void)])) - (define (next-token) (my-lexer ip)) - next-token) - diff --git a/brag/brag/examples/simple-line-drawing/semantics.rkt b/brag/brag/examples/simple-line-drawing/semantics.rkt deleted file mode 100755 index 028662d..0000000 --- a/brag/brag/examples/simple-line-drawing/semantics.rkt +++ /dev/null @@ -1,48 +0,0 @@ -#lang racket/base -(require (for-syntax racket/base syntax/parse)) - -(provide #%module-begin - ;; We reuse Racket's treatment of raw datums, specifically - ;; for strings and numbers: - #%datum - - ;; And otherwise, we provide definitions of these three forms. - ;; During compiliation, Racket uses these definitions to - ;; rewrite into for loops, displays, and newlines. - drawing rows chunk) - -;; Define a few compile-time functions to do the syntax rewriting: -(begin-for-syntax - (define (compile-drawing drawing-stx) - (syntax-parse drawing-stx - [({~literal drawing} row-stxs ...) - - (syntax/loc drawing-stx - (begin row-stxs ...))])) - - (define (compile-rows row-stx) - (syntax-parse row-stx - [({~literal rows} - ({~literal repeat} repeat-number) - chunks ... - ";") - - (syntax/loc row-stx - (for ([i repeat-number]) - chunks ... - (newline)))])) - - (define (compile-chunk chunk-stx) - (syntax-parse chunk-stx - [({~literal chunk} chunk-size chunk-string) - - (syntax/loc chunk-stx - (for ([k chunk-size]) - (display chunk-string)))]))) - - -;; Wire up the use of "drawing", "rows", and "chunk" to these -;; transformers: -(define-syntax drawing compile-drawing) -(define-syntax rows compile-rows) -(define-syntax chunk compile-chunk) diff --git a/brag/brag/examples/statlist-grammar.rkt b/brag/brag/examples/statlist-grammar.rkt deleted file mode 100755 index 18b2561..0000000 --- a/brag/brag/examples/statlist-grammar.rkt +++ /dev/null @@ -1,14 +0,0 @@ -#lang brag - -## Statlist grammar - -statlist : stat+ -stat: ID '=' expr - | 'print' expr - -expr: multExpr ('+' multExpr)* -multExpr: primary (('*'|'.') primary)* -primary : - INT - | ID - | '[' expr ("," expr)* ']' diff --git a/brag/brag/examples/whitespace.rkt b/brag/brag/examples/whitespace.rkt deleted file mode 100644 index b76ebf4..0000000 --- a/brag/brag/examples/whitespace.rkt +++ /dev/null @@ -1,6 +0,0 @@ -#lang brag -start: (tab | space | newline | letter)* -tab: '\t' -space: " " -newline: "\n" -letter: "x" | "y" | "z" \ No newline at end of file diff --git a/brag/brag/examples/wordy.rkt b/brag/brag/examples/wordy.rkt deleted file mode 100755 index 66c9042..0000000 --- a/brag/brag/examples/wordy.rkt +++ /dev/null @@ -1,7 +0,0 @@ -#lang brag -;; A parser for a silly language -sentence: verb optional-adjective object -verb: greeting -optional-adjective: ["happy" | "frumpy"] -greeting: "hello" | "hola" | "aloha" -object: "world" | WORLD diff --git a/brag/brag/info.rkt b/brag/brag/info.rkt deleted file mode 100755 index 0477e13..0000000 --- a/brag/brag/info.rkt +++ /dev/null @@ -1,7 +0,0 @@ -#lang setup/infotab -(define name "brag") -(define version "1.0") -(define scribblings '(("brag.scrbl"))) -(define blurb '("brag: the Beautiful Racket AST Generator. A fork of Danny Yoo's ragg. A design goal is to be easy for beginners to use. Given a grammar in EBNF, brag produces a parser that generates Racket's native syntax objects with full source location.")) -(define deps (list)) -(define test-omit-paths '("examples/simple-line-drawing/examples/letter-i.rkt")) \ No newline at end of file diff --git a/brag/brag/main.rkt b/brag/brag/main.rkt deleted file mode 100755 index 6ae6606..0000000 --- a/brag/brag/main.rkt +++ /dev/null @@ -1,5 +0,0 @@ -#lang racket/base - -(module+ reader - (require "codegen/reader.rkt") - (provide (all-from-out "codegen/reader.rkt"))) diff --git a/brag/brag/private/internal-support.rkt b/brag/brag/private/internal-support.rkt deleted file mode 100755 index 0beec3e..0000000 --- a/brag/brag/private/internal-support.rkt +++ /dev/null @@ -1,36 +0,0 @@ -#lang racket/base - -(require brag/support) - -(provide current-source - current-parser-error-handler - current-tokenizer-error-handler) - -;; During parsing, we should define the source of the input. -(define current-source (make-parameter #f)) - - -;; When an parse error happens, we call the current-parser-error-handler: -(define current-parser-error-handler - (make-parameter - (lambda (tok-name tok-value offset line col span) - (raise (exn:fail:parsing - (format "Encountered parsing error near ~e (token ~e) while parsing ~e [line=~a, column=~a, offset=~a]" - tok-value tok-name - (current-source) - line col offset) - (current-continuation-marks) - (list (srcloc (current-source) line col offset span))))))) - -;; When a tokenization error happens, we call the current-tokenizer-error-handler. -(define current-tokenizer-error-handler - (make-parameter - (lambda (tok-type tok-value offset line column span) - (raise (exn:fail:parsing - (format "Encountered unexpected token ~e (~e) while parsing ~e [line=~a, column=~a, offset=~a]" - tok-type - tok-value - (current-source) - line column offset) - (current-continuation-marks) - (list (srcloc (current-source) line column offset span))))))) diff --git a/brag/brag/rules/lexer.rkt b/brag/brag/rules/lexer.rkt deleted file mode 100755 index e55991b..0000000 --- a/brag/brag/rules/lexer.rkt +++ /dev/null @@ -1,131 +0,0 @@ -#lang racket/base -(require (for-syntax racket/base "parser.rkt")) -(require br-parser-tools/lex - (prefix-in : br-parser-tools/lex-sre) - "parser.rkt" - "rule-structs.rkt" - racket/string) - -(provide lex/1 tokenize) - -;; A newline can be any one of the following. -(define-lex-abbrev NL (:or "\r\n" "\r" "\n")) - -;; chars used for quantifiers & parse-tree filtering -(define-for-syntax quantifiers "+:*") ; colon is reserved to separate rules and productions -(define-lex-trans reserved-chars - (λ(stx) #`(char-set #,(format "~a~a~a" quantifiers hide-char splice-char)))) - -(define-lex-trans hide-char-trans (λ(stx) #`(char-set #,(format "~a" hide-char)))) -(define-lex-trans splice-char-trans (λ(stx) #`(char-set #,(format "~a" splice-char)))) - -(define-lex-abbrevs - [letter (:or (:/ "a" "z") (:/ #\A #\Z))] - [digit (:/ #\0 #\9)] - [id-char (:or letter digit (:& (char-set "+:*@!-.$%&/=?^_~<>") (char-complement (reserved-chars))))] - [hide-char (hide-char-trans)] - [splice-char (splice-char-trans)] - ) - -(define-lex-abbrev id (:& (complement (:+ digit)) (:+ id-char))) - -(define lex/1 - (lexer-src-pos - ;; handle whitespace chars within quotes as literal tokens: "\n" "\t" '\n' '\t' - ;; by matching the escaped version, and then unescaping them before they become token-LITs - [(:: "'" - (:* (:or "\\'" "\\n" "\\t" (:~ "'" "\\"))) - "'") - (token-LIT (case lexeme - [("'\\n'") "'\n'"] - [("'\\t'") "'\t'"] - [else lexeme]))] - [(:: "\"" - (:* (:or "\\\"" "\\n" "\\t" (:~ "\"" "\\"))) - "\"") - (token-LIT (case lexeme - [("\"\\n\"") "\"\n\""] - [("\"\\t\"") "\"\t\""] - [else lexeme]))] - ["(" - (token-LPAREN lexeme)] - ["[" - (token-LBRACKET lexeme)] - [")" - (token-RPAREN lexeme)] - ["]" - (token-RBRACKET lexeme)] - [hide-char - (token-HIDE lexeme)] - [splice-char - (token-SPLICE lexeme)] - ["|" - (token-PIPE lexeme)] - [(:or "+" "*") - (token-REPEAT lexeme)] - [whitespace - ;; Skip whitespace - (return-without-pos (lex/1 input-port))] - ;; Skip comments up to end of line - ;; but detect possble kwargs. - [(:: (:or "#" ";") ; remove # as comment char - (complement (:: (:* any-char) NL (:* any-char))) - (:or NL "")) - (let ([maybe-kwarg-match (regexp-match #px"^#:(.*?)\\s*(.*?)$" lexeme)]) - (when maybe-kwarg-match - (let* ([parts (map string->symbol (string-split (string-trim lexeme "#:" #:right? #f)))] - [kw (car parts)][val (cadr parts)]) - (case kw - [(prefix-out) (current-prefix-out val)] - [else (error 'lexer (format "got unknown keyword ~a" kw))]))) - (return-without-pos (lex/1 input-port)))] - [(eof) - (token-EOF lexeme)] - [(:: id (:* whitespace) ":") - (token-RULE_HEAD lexeme)] - [(:: hide-char id (:* whitespace) ":") - (token-RULE_HEAD_HIDDEN lexeme)] - [(:: splice-char id (:* whitespace) ":") - (token-RULE_HEAD_SPLICED lexeme)] - [id - (token-ID lexeme)] - - ;; We call the error handler for everything else: - [(:: any-char) - (let-values ([(rest-of-text end-pos-2) - (lex-nonwhitespace input-port)]) - ((current-parser-error-handler) - #f - 'error - (string-append lexeme rest-of-text) - (position->pos start-pos) - (position->pos end-pos-2)))])) - - -;; This is the helper for the error production. -(define lex-nonwhitespace - (lexer - [(:+ (char-complement whitespace)) - (values lexeme end-pos)] - [any-char - (values lexeme end-pos)] - [(eof) - (values "" end-pos)])) - - - -;; position->pos: position -> pos -;; Coerses position structures from br-parser-tools/lex to our own pos structures. -(define (position->pos a-pos) - (pos (position-offset a-pos) - (position-line a-pos) - (position-col a-pos))) - - - -;; tokenize: input-port -> (-> token) -(define (tokenize ip - #:source [source (object-name ip)]) - (lambda () - (parameterize ([file-path source]) - (lex/1 ip)))) diff --git a/brag/brag/rules/parser.rkt b/brag/brag/rules/parser.rkt deleted file mode 100755 index 4cf9608..0000000 --- a/brag/brag/rules/parser.rkt +++ /dev/null @@ -1,281 +0,0 @@ -#lang racket/base -(require br-parser-tools/yacc - br-parser-tools/lex - racket/list - racket/match - "rule-structs.rkt") - -;; A parser for grammars. - -(provide hide-char - splice-char - tokens - token-LPAREN - token-RPAREN - token-HIDE ; for hider - token-SPLICE ; for splicer - token-LBRACKET - token-RBRACKET - token-PIPE - token-REPEAT - token-RULE_HEAD - token-RULE_HEAD_HIDDEN - token-RULE_HEAD_SPLICED - token-ID - token-LIT - token-EOF - grammar-parser - - current-source - current-parser-error-handler - current-prefix-out - - [struct-out rule] - [struct-out lhs-id] - [struct-out pattern] - [struct-out pattern-id] - [struct-out pattern-lit] - [struct-out pattern-token] - [struct-out pattern-choice] - [struct-out pattern-repeat] - [struct-out pattern-maybe] - [struct-out pattern-seq]) - -(define-tokens tokens (LPAREN - RPAREN - LBRACKET - RBRACKET - HIDE - SPLICE - PIPE - REPEAT - RULE_HEAD - RULE_HEAD_HIDDEN - RULE_HEAD_SPLICED - ID - LIT - EOF)) - -(define hide-char #\/) -(define splice-char #\@) - -;; grammar-parser: (-> token) -> (listof rule) -(define grammar-parser - (parser - (tokens tokens) - (src-pos) - (start rules) - (end EOF) - - (grammar - [rules - [(rules*) $1]] - - [rules* - [(rule rules*) - (cons $1 $2)] - [() - '()]] - - ;; I have a separate token type for rule identifiers to avoid the - ;; shift/reduce conflict that happens with the implicit sequencing - ;; of top-level rules. i.e. the parser can't currently tell, when - ;; it sees an ID, if it should shift or reduce to a new rule. - [rule - [(RULE_HEAD pattern) - (begin - (define trimmed (regexp-replace #px"\\s*:$" $1 "")) - (rule (position->pos $1-start-pos) - (position->pos $2-end-pos) - (lhs-id (position->pos $1-start-pos) - (pos (+ (position-offset $1-start-pos) - (string-length trimmed)) - (position-line $1-start-pos) - (position-col $1-start-pos)) - trimmed - #f) - $2))] - - [(RULE_HEAD_HIDDEN pattern) ; slash indicates hiding - (begin - (define trimmed (cadr (regexp-match (pregexp (format "~a(\\S+)\\s*:$" hide-char)) $1))) - (rule (position->pos $1-start-pos) - (position->pos $2-end-pos) - (lhs-id (position->pos $1-start-pos) - (pos (+ (position-offset $1-start-pos) - (string-length trimmed) - (string-length "!")) - (position-line $1-start-pos) - (position-col $1-start-pos)) - trimmed - ''hide) ; symbol needs to be double quoted in this case - $2))] - - [(RULE_HEAD_SPLICED pattern) ; atsign indicates splicing - (begin - (define trimmed (cadr (regexp-match (pregexp (format "~a(\\S+)\\s*:$" splice-char)) $1))) - (rule (position->pos $1-start-pos) - (position->pos $2-end-pos) - (lhs-id (position->pos $1-start-pos) - (pos (+ (position-offset $1-start-pos) - (string-length trimmed) - (string-length "@")) - (position-line $1-start-pos) - (position-col $1-start-pos)) - trimmed - ''splice) ; symbol needs to be double quoted in this case - $2))]] - - [pattern - [(implicit-pattern-sequence PIPE pattern) - (if (pattern-choice? $3) - (pattern-choice (position->pos $1-start-pos) - (position->pos $3-end-pos) - (cons $1 (pattern-choice-vals $3))) - (pattern-choice (position->pos $1-start-pos) - (position->pos $3-end-pos) - (list $1 $3)))] - [(implicit-pattern-sequence) - $1]] - - [implicit-pattern-sequence - [(repeatable-pattern implicit-pattern-sequence) - (if (pattern-seq? $2) - (pattern-seq (position->pos $1-start-pos) - (position->pos $2-end-pos) - (cons $1 (pattern-seq-vals $2))) - (pattern-seq (position->pos $1-start-pos) - (position->pos $2-end-pos) - (list $1 $2)))] - [(repeatable-pattern) - $1]] - - [repeatable-pattern - [(atomic-pattern REPEAT) - (cond [(string=? $2 "*") - (pattern-repeat (position->pos $1-start-pos) - (position->pos $2-end-pos) - 0 $1)] - [(string=? $2 "+") - (pattern-repeat (position->pos $1-start-pos) - (position->pos $2-end-pos) - 1 $1)] - [else - (error 'grammar-parse "unknown repetition operator ~e" $2)])] - [(atomic-pattern) - $1]] - - [atomic-pattern - [(LIT) - (pattern-lit (position->pos $1-start-pos) - (position->pos $1-end-pos) - (substring $1 1 (sub1 (string-length $1))) - #f)] - - [(ID) - (if (token-id? $1) - (pattern-token (position->pos $1-start-pos) - (position->pos $1-end-pos) - $1 - #f) - (pattern-id (position->pos $1-start-pos) - (position->pos $1-end-pos) - $1 - #f))] - - [(LBRACKET pattern RBRACKET) - (pattern-maybe (position->pos $1-start-pos) - (position->pos $3-end-pos) - $2)] - - [(LPAREN pattern RPAREN) - (relocate-pattern $2 (position->pos $1-start-pos) (position->pos $3-end-pos))] - - [(HIDE atomic-pattern) - (relocate-pattern $2 (position->pos $1-start-pos) (position->pos $2-end-pos) 'hide)] - - [(SPLICE ID) - ;; only works for nonterminals on the right side - ;; (meaningless with terminals) - (if (token-id? $2) - (error 'brag "Can't use splice operator with terminal") - (pattern-id (position->pos $1-start-pos) - (position->pos $2-end-pos) - $2 - 'splice))]]) - - - (error (lambda (tok-ok? tok-name tok-value start-pos end-pos) - ((current-parser-error-handler) tok-ok? tok-name tok-value (position->pos start-pos) (position->pos end-pos)))))) - -;; relocate-pattern: pattern -> pattern -;; Rewrites the pattern's start and end pos accordingly. -(define (relocate-pattern a-pat start-pos end-pos [hide? #f]) - (match a-pat - [(pattern-id _ _ v h) - (pattern-id start-pos end-pos v (or hide? h))] - [(pattern-token _ _ v h) - (pattern-token start-pos end-pos v (or hide? h))] - [(pattern-lit _ _ v h) - (pattern-lit start-pos end-pos v (or hide? h))] - [(pattern-choice _ _ vs) - (pattern-choice start-pos end-pos vs)] - [(pattern-repeat _ _ m v) - (pattern-repeat start-pos end-pos m v)] - [(pattern-maybe _ _ v) - (pattern-maybe start-pos end-pos v)] - [(pattern-seq _ _ vs) - (pattern-seq start-pos end-pos vs)] - [else - (error 'relocate-pattern "Internal error when relocating ~s\n" a-pat)])) - - -; token-id: string -> boolean -;; Produces true if the id we see should be treated as the name of a token. -;; By convention, tokens are all upper-cased. -(define (token-id? id) - (string=? (string-upcase id) - id)) - - - -;; position->pos: position -> pos -;; Coerses position structures from br-parser-tools/lex to our own pos structures. -(define (position->pos a-pos) - (pos (position-offset a-pos) - (position-line a-pos) - (position-col a-pos))) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; During parsing, we should define the source of the input. -(define current-source (make-parameter #f)) - -(define current-prefix-out (make-parameter #f)) - - -;; When bad things happen, we need to emit errors with source location. -(struct exn:fail:parse-grammar exn:fail (srclocs) - #:transparent - #:property prop:exn:srclocs (lambda (instance) - (exn:fail:parse-grammar-srclocs instance))) - -(define current-parser-error-handler - (make-parameter - (lambda (tok-ok? tok-name tok-value start-pos end-pos) - (raise (exn:fail:parse-grammar - (format "Error while parsing grammar near: ~e [line=~a, column=~a, position=~a]" - tok-value - (pos-line start-pos) - (pos-col start-pos) - (pos-offset start-pos)) - (current-continuation-marks) - (list (srcloc (current-source) - (pos-line start-pos) - (pos-col start-pos) - (pos-offset start-pos) - (if (and (number? (pos-offset end-pos)) - (number? (pos-offset start-pos))) - (- (pos-offset end-pos) - (pos-offset start-pos)) - #f)))))))) diff --git a/brag/brag/rules/rule-structs.rkt b/brag/brag/rules/rule-structs.rkt deleted file mode 100755 index 5b5968e..0000000 --- a/brag/brag/rules/rule-structs.rkt +++ /dev/null @@ -1,43 +0,0 @@ -#lang racket/base -(provide (all-defined-out)) - - -;; We keep our own position structure because br-parser-tools/lex's position -;; structure is non-transparent, hence highly resistant to unit testing. -(struct pos (offset line col) - #:transparent) - -(struct rule (start end lhs pattern) - #:transparent) - -(struct lhs-id (start end val splice) - #:transparent) - -;; A pattern can be one of the following: -(struct pattern (start end) - #:transparent) - -(struct pattern-id pattern (val hide) - #:transparent) - -;; Token structure to be defined by the user -(struct pattern-token pattern (val hide) - #:transparent) - -;; Token structure defined as the literal string to be matched. -(struct pattern-lit pattern (val hide) - #:transparent) - -(struct pattern-choice pattern (vals) - #:transparent) - -(struct pattern-repeat pattern (min ;; either 0 or 1 - val) - #:transparent) - -(struct pattern-maybe pattern (val) - #:transparent) - -(struct pattern-seq pattern (vals) - #:transparent) - diff --git a/brag/brag/rules/runtime.rkt b/brag/brag/rules/runtime.rkt deleted file mode 100755 index e34df13..0000000 --- a/brag/brag/rules/runtime.rkt +++ /dev/null @@ -1,34 +0,0 @@ -#lang racket/base -(require br-parser-tools/lex) - -(provide (all-defined-out)) - -;; During parsing, we should define the source of the input. -(define current-source (make-parameter #f)) - - -;; When bad things happen, we need to emit errors with source location. -(struct exn:fail:parse-grammar exn:fail (srclocs) - #:transparent - #:property prop:exn:srclocs (lambda (instance) - (exn:fail:parse-grammar-srclocs instance))) - -(define current-parser-error-handler - (make-parameter - (lambda (tok-ok? tok-name tok-value start-pos end-pos) - (raise (exn:fail:parse-grammar - (format "Error while parsing grammar near: ~e [line=~a, column~a, position=~a]" - tok-value - (position-line start-pos) - (position-col start-pos) - (position-offset start-pos)) - (current-continuation-marks) - (list (srcloc (current-source) - (position-line start-pos) - (position-col start-pos) - (position-offset start-pos) - (if (and (number? (position-offset end-pos)) - (number? (position-offset start-pos))) - (- (position-offset end-pos) - (position-offset start-pos)) - #f)))))))) diff --git a/brag/brag/rules/stx-types.rkt b/brag/brag/rules/stx-types.rkt deleted file mode 100755 index e0ac70a..0000000 --- a/brag/brag/rules/stx-types.rkt +++ /dev/null @@ -1,16 +0,0 @@ -#lang racket/base -(provide (all-defined-out)) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; These are just here to provide bindings for Check Syntax. -;; Otherwise, we should never hit these, as the toplevel rules-codegen -;; should eliminate all uses of these if it does the right thing. -(define (rules stx) (raise-syntax-error #f "Used out of context of rules" stx)) -(define (rule stx) (raise-syntax-error #f "Used out of context of rules" stx)) -(define (id stx) (raise-syntax-error #f "Used out of context of rules" stx)) -(define (lit stx) (raise-syntax-error #f "Used out of context of rules" stx)) -(define (token stx) (raise-syntax-error #f "Used out of context of rules" stx)) -(define (choice stx) (raise-syntax-error #f "Used out of context of rules" stx)) -(define (repeat stx) (raise-syntax-error #f "Used out of context of rules" stx)) -(define (maybe stx) (raise-syntax-error #f "Used out of context of rules" stx)) -(define (seq stx) (raise-syntax-error #f "Used out of context of rules" stx)) \ No newline at end of file diff --git a/brag/brag/rules/stx.rkt b/brag/brag/rules/stx.rkt deleted file mode 100755 index 9c66685..0000000 --- a/brag/brag/rules/stx.rkt +++ /dev/null @@ -1,88 +0,0 @@ -#lang racket/base - -(require "rule-structs.rkt" - br-parser-tools/lex - racket/match - syntax/strip-context) - -(provide rules->stx) - -;; Given a sequence of rules, we translate these to syntax objects. - -;; rules->stx: (listof rule) -> syntax -(define (rules->stx source rules #:original-stx [original-stx #f]) - (define rule-stxs - (map (lambda (stx) (rule->stx source stx)) - rules)) - (datum->syntax #f - `(rules ,@rule-stxs) - original-stx)) - - -(define (rule->stx source a-rule) - (define id-stx - (syntax-property - (datum->syntax #f - (string->symbol (lhs-id-val (rule-lhs a-rule))) - (list source - (pos-line (lhs-id-start (rule-lhs a-rule))) - (pos-col (lhs-id-start (rule-lhs a-rule))) - (pos-offset (lhs-id-start (rule-lhs a-rule))) - (if (and (number? (pos-offset (lhs-id-start (rule-lhs a-rule)))) - (number? (pos-offset (lhs-id-end (rule-lhs a-rule))))) - (- (pos-offset (lhs-id-end (rule-lhs a-rule))) - (pos-offset (lhs-id-start (rule-lhs a-rule)))) - #f))) - 'hide-or-splice-lhs-id (lhs-id-splice (rule-lhs a-rule)))) - (define pattern-stx (pattern->stx source (rule-pattern a-rule))) - (define line (pos-line (rule-start a-rule))) - (define column (pos-col (rule-start a-rule))) - (define position (pos-offset (rule-start a-rule))) - (define span (if (and (number? (pos-offset (rule-start a-rule))) - (number? (pos-offset (rule-end a-rule)))) - (- (pos-offset (rule-end a-rule)) - (pos-offset (rule-start a-rule))) - #f)) - (datum->syntax #f - `(rule ,id-stx ,pattern-stx) - (list source line column position span))) - -(define (pattern->stx source a-pattern) - (define recur (lambda (s) (pattern->stx source s))) - - (define line (pos-line (pattern-start a-pattern))) - (define column (pos-col (pattern-start a-pattern))) - (define position (pos-offset (pattern-start a-pattern))) - (define span (if (and (number? (pos-offset (pattern-start a-pattern))) - (number? (pos-offset (pattern-end a-pattern)))) - (- (pos-offset (pattern-end a-pattern)) - (pos-offset (pattern-start a-pattern))) - #f)) - (define source-location (list source line column position span)) - (match a-pattern - [(struct pattern-id (start end val hide)) - (syntax-property - (datum->syntax #f - `(id ,(datum->syntax #f (string->symbol val) source-location)) - source-location) - 'hide hide)] - [(struct pattern-lit (start end val hide)) - (syntax-property - (datum->syntax #f - `(lit ,(datum->syntax #f val source-location)) - source-location) - 'hide hide)] - [(struct pattern-token (start end val hide)) - (syntax-property - (datum->syntax #f - `(token ,(datum->syntax #f (string->symbol val) source-location)) - source-location) - 'hide hide)] - [(struct pattern-choice (start end vals)) - (datum->syntax #f`(choice ,@(map recur vals)) source-location)] - [(struct pattern-repeat (start end min val)) - (datum->syntax #f`(repeat ,min ,(recur val)) source-location)] - [(struct pattern-maybe (start end val)) - (datum->syntax #f`(maybe ,(recur val)) source-location)] - [(struct pattern-seq (start end vals)) - (datum->syntax #f`(seq ,@(map recur vals)) source-location)])) diff --git a/brag/brag/support.rkt b/brag/brag/support.rkt deleted file mode 100755 index 232fbf4..0000000 --- a/brag/brag/support.rkt +++ /dev/null @@ -1,143 +0,0 @@ -#lang racket/base -(require br-parser-tools/lex - racket/string - racket/struct - (prefix-in : br-parser-tools/lex-sre) - (for-syntax racket/base)) -(provide (all-from-out br-parser-tools/lex) - (all-from-out br-parser-tools/lex-sre) - [struct-out token-struct] - token - [struct-out exn:fail:parsing]) - - - -(define (token-print token port mode) - (write-string (format "~a" - (cons 'token-struct - (map (λ(proc) (format "~v" (proc token))) - (list - token-struct-type - token-struct-val - token-struct-line - token-struct-column - token-struct-offset - token-struct-span - token-struct-skip?)))) port)) - - -(struct token-struct (type val offset line column span skip?) - #:auto-value #f - #:transparent) - - -;; Token constructor. -;; This is intended to be a general token structure constructor that's nice -;; to work with. -;; It should cooperate with the tokenizers constructed with make-permissive-tokenizer. -(define (token type ;; (U symbol string) - [val #f] ;; any - [srcloc #f] - #:position [position #f] ;; (U #f number) - #:line [line #f] ;; (U #f number) - #:column [column #f] ;; (U #f number) - #:span [span #f] ;; boolean - #:skip? [skip? #f]) - (token-struct (if (string? type) (string->symbol type) type) - val - ;; keyword values take precedence over srcloc values - (or position (and srcloc (srcloc-position srcloc))) - (or line (and srcloc (srcloc-line srcloc))) - (or column (and srcloc (srcloc-column srcloc))) - (or span (and srcloc (srcloc-span srcloc))) - skip?)) - - -;; When bad things happen, we need to emit errors with source location. -(struct exn:fail:parsing exn:fail (srclocs) - #:transparent - #:property prop:exn:srclocs (lambda (instance) - (exn:fail:parsing-srclocs instance))) - - -(provide apply-lexer) -(define (apply-lexer lexer val) - (for/list ([t (in-port lexer (if (string? val) (open-input-string val) val))]) - t)) - -(provide apply-tokenizer-maker - (rename-out [apply-tokenizer-maker apply-tokenizer])) -(define (apply-tokenizer-maker tokenize in) - (define input-port (if (string? in) - (open-input-string in) - in)) - (define token-producer (tokenize input-port)) - (for/list ([token (in-producer token-producer (λ(tok) - (define val (cond - ;; position-tokens are produced by lexer-src-pos, - [(position-token? tok) - (position-token-token tok)] - ;; and srcloc-tokens by lexer-srcloc - [(srcloc-token? tok) - (srcloc-token-token tok)] - [else tok])) - (or (eof-object? val) (void? val))))]) - token)) - -(provide apply-colorer) -(define (apply-colorer colorer port-or-string) - (define p (if (string? port-or-string) - (open-input-string port-or-string) - port-or-string)) - (let loop ([acc null]) - (define-values (lex cat shape start end) (colorer p)) - (if (or (eq? 'eof cat) (eof-object? lex)) - (reverse acc) - (loop (cons (list lex cat shape start end) acc))))) - -(provide trim-ends) -(define (trim-ends left lexeme right) - (string-trim (string-trim lexeme left #:right? #f) right #:left? #f)) - -(provide from/to) -(define-lex-trans from/to - (λ(stx) - (syntax-case stx () - [(_ OPEN CLOSE) - ;; (:seq any-string CLOSE any-string) pattern makes it non-greedy - #'(:seq OPEN (complement (:seq any-string CLOSE any-string)) CLOSE)]))) - -(provide from/stop-before) -(define-lex-trans from/stop-before - (λ(stx) - (syntax-case stx () - [(_ OPEN CLOSE) - #'(:seq OPEN (:* (:~ CLOSE)))]))) - -(provide uc+lc) -(define-lex-trans uc+lc - (λ(stx) - (syntax-case stx () - [(_ . STRS) - (with-syntax ([(UCSTR ...) (map (compose1 string-upcase syntax->datum) (syntax->list #'STRS))] - [(LCSTR ...) (map (compose1 string-downcase syntax->datum) (syntax->list #'STRS))]) - #'(union (union UCSTR ...) (union LCSTR ...)))]))) - -;; change names of lexer abbreviations to be consistent with Racket srcloc conventions - -(define-syntax-rule (dprt ID-IN ID-OUT) - (begin - (provide ID-IN) - (define-syntax ID-IN (make-rename-transformer (syntax ID-OUT))))) - -(dprt lexeme-start start-pos) -(dprt lexeme-end end-pos) -(dprt line position-line) -(dprt col position-col) -(dprt pos position-offset) - -(provide span) -(define (span lexeme-start lexeme-end) - (abs ; thus same result in reverse order - (- (pos lexeme-end) - (pos lexeme-start)))) \ No newline at end of file diff --git a/brag/brag/test/test-01-equal.rkt b/brag/brag/test/test-01-equal.rkt deleted file mode 100755 index dcb2138..0000000 --- a/brag/brag/test/test-01-equal.rkt +++ /dev/null @@ -1,30 +0,0 @@ -#lang racket/base - -(require brag/examples/01-equal - rackunit) - -(check-equal? (syntax->datum (parse "")) - '(equal)) -(check-equal? (syntax->datum (parse "01")) - '(equal (zero (equal) #\0) - (one (equal) #\1))) -(check-equal? (syntax->datum (parse "10")) - '(equal (one (equal) #\1) - (zero (equal) #\0))) -(check-equal? (syntax->datum (parse "0011")) - '(equal (zero (equal) #\0) - (one (equal (zero (equal) #\0) - (one (equal) #\1)) - #\1))) -(check-equal? (syntax->datum (parse "0110")) - '(equal (one (equal (zero (equal) #\0) - (one (equal) #\1)) - #\1) - (zero (equal) #\0))) - -(check-equal? (syntax->datum (parse "1100")) - '(equal (one (equal) #\1) - (zero (equal (one (equal) #\1) - (zero (equal) #\0)) - #\0))) - diff --git a/brag/brag/test/test-0n1.rkt b/brag/brag/test/test-0n1.rkt deleted file mode 100755 index 5611f6e..0000000 --- a/brag/brag/test/test-0n1.rkt +++ /dev/null @@ -1,50 +0,0 @@ -#lang racket/base - -(require brag/examples/0n1 - brag/support - rackunit) - -(define (lex ip) - (port-count-lines! ip) - (lambda () - (define next-char (read-char ip)) - (cond [(eof-object? next-char) - (token eof)] - [(char=? next-char #\0) - (token "0" "0")] - [(char=? next-char #\1) - (token "1" "1")]))) - - -(check-equal? (syntax->datum (parse #f (lex (open-input-string "1")))) - '(rule "1")) - - -(check-equal? (syntax->datum (parse #f (lex (open-input-string "01")))) - '(rule "0" "1")) - - -(check-equal? (syntax->datum (parse #f (lex (open-input-string "001")))) - '(rule "0" "0" "1")) - - -(check-exn exn:fail:parsing? - (lambda () - (parse #f (lex (open-input-string "0"))))) - -(check-exn exn:fail:parsing? - (lambda () - (parse #f (lex (open-input-string "10"))))) - -(check-exn exn:fail:parsing? - (lambda () - (parse #f (lex (open-input-string "010"))))) - - -;; This should fail predictably because we're passing in tokens -;; that the parser doesn't know. -(check-exn exn:fail:parsing? - (lambda () (parse '("zero" "one" "zero")))) -(check-exn (regexp (regexp-quote - "Encountered unexpected token \"zero\" (\"zero\") while parsing")) - (lambda () (parse '("zero" "one" "zero")))) diff --git a/brag/brag/test/test-0n1n.rkt b/brag/brag/test/test-0n1n.rkt deleted file mode 100755 index f7655d5..0000000 --- a/brag/brag/test/test-0n1n.rkt +++ /dev/null @@ -1,49 +0,0 @@ -#lang racket/base -(require brag/examples/0n1n - brag/support - rackunit) - -(define (lex ip) - (port-count-lines! ip) - (lambda () - (define next-char (read-char ip)) - (cond [(eof-object? next-char) - (token eof)] - [(char=? next-char #\0) - (token "0" "0")] - [(char=? next-char #\1) - (token "1" "1")]))) - - -;; The only rule in the grammar is: -;; -;; rule-0n1n: ["0" rule-0n1n "1"] -;; -;; It makes use of the "maybe" pattern. The result type of the -;; grammar rule is: -;; -;; rule-0n1n: (U #f -;; (list "0" rule-0n1n "1")) - -(check-equal? (syntax->datum (parse #f (lex (open-input-string "0011")))) - '(rule-0n1n "0" (rule-0n1n "0" (rule-0n1n) "1") "1")) - -(check-equal? (syntax->datum (parse #f (lex (open-input-string "01")))) - '(rule-0n1n "0" (rule-0n1n) "1")) - -(check-equal? (syntax->datum (parse #f (lex (open-input-string "")))) - '(rule-0n1n)) - -(check-equal? (syntax->datum (parse #f (lex (open-input-string "000111")))) - '(rule-0n1n "0" (rule-0n1n "0" (rule-0n1n "0" (rule-0n1n) "1") "1") "1")) - - - -(check-exn exn:fail:parsing? - (lambda () (parse #f (lex (open-input-string "0001111"))))) - -(check-exn exn:fail:parsing? - (lambda () (parse #f (lex (open-input-string "0001110"))))) - -(check-exn exn:fail:parsing? - (lambda () (parse #f (lex (open-input-string "10001110"))))) diff --git a/brag/brag/test/test-all.rkt b/brag/brag/test/test-all.rkt deleted file mode 100755 index 92c3153..0000000 --- a/brag/brag/test/test-all.rkt +++ /dev/null @@ -1,18 +0,0 @@ -#lang racket/base - - -(require "test-0n1.rkt" - "test-0n1n.rkt" - "test-01-equal.rkt" - "test-simple-arithmetic-grammar.rkt" - "test-baby-json.rkt" - "test-baby-json-hider.rkt" - "test-wordy.rkt" - "test-simple-line-drawing.rkt" - "test-flatten.rkt" - "test-lexer.rkt" - "test-parser.rkt" - "test-errors.rkt" - "test-old-token.rkt" - "test-weird-grammar.rkt" - (submod brag/codegen/satisfaction test)) diff --git a/brag/brag/test/test-baby-json-hider.rkt b/brag/brag/test/test-baby-json-hider.rkt deleted file mode 100755 index b7580d6..0000000 --- a/brag/brag/test/test-baby-json-hider.rkt +++ /dev/null @@ -1,19 +0,0 @@ -#lang racket/base -(require brag/examples/baby-json-hider - brag/support - rackunit) - -(define parse-result (parse (list "{" - (token 'ID "message") - ":" - (token 'STRING "'hello world'") - "}"))) -(check-equal? (syntax->datum parse-result) '(json (":"))) - -(define syntaxed-colon-parens (cadr (syntax->list parse-result))) -(check-equal? (syntax->datum (syntax-property syntaxed-colon-parens 'kvpair)) 'kvpair) - -(check-equal? - (syntax->datum - (parse "[[[{}]],[],[[{}]]]")) - '(json (array #\[ (json (array #\[ (json (array #\[ (json) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json) #\])) #\])) #\]))) diff --git a/brag/brag/test/test-baby-json.rkt b/brag/brag/test/test-baby-json.rkt deleted file mode 100755 index dff09f4..0000000 --- a/brag/brag/test/test-baby-json.rkt +++ /dev/null @@ -1,25 +0,0 @@ -#lang racket/base -(require brag/examples/baby-json - brag/support - rackunit) - -(check-equal? - (syntax->datum - (parse (list "{" - (token 'ID "message") - ":" - (token 'STRING "'hello world'") - "}"))) - '(json (object "{" - (kvpair "message" ":" (json (string "'hello world'"))) - "}"))) - - -(check-equal? - (syntax->datum - (parse "[[[{}]],[],[[{}]]]")) - '(json (array #\[ (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\]))) - - - - diff --git a/brag/brag/test/test-cutter.rkt b/brag/brag/test/test-cutter.rkt deleted file mode 100755 index f4a14ac..0000000 --- a/brag/brag/test/test-cutter.rkt +++ /dev/null @@ -1,9 +0,0 @@ -#lang racket/base -(require brag/examples/cutter - brag/support - rackunit) - -;; related to rule-flattening problem -(check-equal? - (parse-to-datum (list "(" "x" "," "x" ")")) - '(top (expr (list "(" (expr "x") "," (expr "x") ")")))) \ No newline at end of file diff --git a/brag/brag/test/test-errors.rkt b/brag/brag/test/test-errors.rkt deleted file mode 100755 index b84ae34..0000000 --- a/brag/brag/test/test-errors.rkt +++ /dev/null @@ -1,137 +0,0 @@ -#lang racket/base - -(require rackunit - (for-syntax racket/base)) - -;; The tests in this module make sure we produce proper error messages -;; on weird grammars. - - -(define-namespace-anchor anchor) -(define ns (namespace-anchor->namespace anchor)) -(define (c prog) - (parameterize ([current-namespace ns] - [read-accept-reader #t]) - (define ip (open-input-string prog)) - (port-count-lines! ip) - (compile (read-syntax #f ip)))) - - -;; Helper to let me quickly write compile-error checks. -(define-syntax (check-compile-error stx) - (syntax-case stx () - [(_ prog expected-msg) - (quasisyntax/loc stx - (begin #,(syntax/loc stx - (check-exn (regexp (regexp-quote expected-msg)) - (lambda () - (c prog)))) - #,(syntax/loc stx - (check-exn exn:fail:syntax? - (lambda () - (c prog))))))])) - - - - - -;; errors with position are sensitive to length of lang line -(define lang-line "#lang brag") - -(check-compile-error (format "~a" lang-line) - "The grammar does not appear to have any rules") - -(check-compile-error (format "~a\nfoo" lang-line) - "Error while parsing grammar near: foo [line=2, column=0, position=12]") - -(check-compile-error (format "~a\nnumber : 42" lang-line) - "Error while parsing grammar near: 42 [line=2, column=9, position=21]") - -(check-compile-error (format "~a\nnumber : 1" lang-line) - "Error while parsing grammar near: 1 [line=2, column=9, position=21]") - - - -(check-compile-error "#lang brag\n x: NUMBER\nx:STRING" - "Rule x has a duplicate definition") - -;; Check to see that missing definitions for rules also raise good syntax -;; errors: - -(check-compile-error "#lang brag\nx:y" - "Rule y has no definition") - -(check-compile-error "#lang brag\nnumber : 1flarbl" - "Rule 1flarbl has no definition") - - - - -(check-compile-error "#lang brag\nprogram: EOF" - "Token EOF is reserved and can not be used in a grammar") - - - -;; Nontermination checks: -(check-compile-error "#lang brag\nx : x" - "Rule x has no finite derivation") - - - -(check-compile-error #<symbol (format "r~a" n))))) - - -;; Simple literals -(check-equal? (map syntax->datum (flatten-rule #'(rule expr (lit "hello")))) - '((prim-rule lit expr [(lit "hello")]))) - -(check-equal? (map syntax->datum - (flatten-rule #'(rule expr - (seq (lit "hello") - (lit "world"))))) - '((prim-rule seq expr [(lit "hello") (lit "world")]))) - - -(check-equal? (map syntax->datum (flatten-rule #'(rule expr (token HELLO)))) - '((prim-rule token expr [(token HELLO)]))) - -(check-equal? (map syntax->datum (flatten-rule #'(rule expr (id rule-2)))) - '((prim-rule id expr [(id rule-2)]))) - - -;; Sequences of primitives -(check-equal? (map syntax->datum - (flatten-rule #'(rule expr (seq (lit "1") (seq (lit "2") (lit "3")))))) - '((prim-rule seq expr - [(lit "1") (lit "2") (lit "3")]))) - -(check-equal? (map syntax->datum - (flatten-rule #'(rule expr (seq (seq (lit "1") (lit "2")) (lit "3"))))) - '((prim-rule seq expr - [(lit "1") (lit "2") (lit "3")]))) - - -(check-equal? (map syntax->datum - (flatten-rule #'(rule expr (seq (seq (lit "1")) (seq (lit "2") (lit "3")))))) - '((prim-rule seq expr - [(lit "1") (lit "2") (lit "3")]))) - - - -;; choices -(check-equal? (map syntax->datum - (flatten-rule #'(rule expr (choice (id rule-2) (id rule-3))))) - '((prim-rule choice expr - [(id rule-2)] - [(id rule-3)]))) - -(check-equal? (map syntax->datum - (flatten-rule #'(rule sexp (choice (seq (lit "(") (lit ")")) - (seq))) - #:fresh-name (make-fresh-name))) - '((prim-rule choice sexp - [(lit "(") (lit ")")] []))) - -(check-equal? (map syntax->datum - (flatten-rule #'(rule sexp (choice (seq (seq (lit "(") (token BLAH)) - (lit ")")) - (seq))) - #:fresh-name (make-fresh-name))) - '((prim-rule choice sexp - [(lit "(") (token BLAH) (lit ")")] []))) - - - - -;; maybe -(check-equal? (map syntax->datum - (flatten-rule #'(rule expr (maybe (id rule-2))))) - '((prim-rule maybe expr - [(id rule-2)] - []))) -(check-equal? (map syntax->datum - (flatten-rule #'(rule expr (maybe (token HUH))))) - '((prim-rule maybe expr - [(token HUH)] - []))) -(check-equal? (map syntax->datum - (flatten-rule #'(rule expr (maybe (seq (lit "hello") (lit "world")))))) - '((prim-rule maybe expr - [(lit "hello") (lit "world")] - []))) - - - - -;; repeat -(check-equal? (map syntax->datum - (flatten-rule #'(rule rule-2+ (repeat 0 (id rule-2))))) - '((prim-rule repeat rule-2+ - [(inferred-id rule-2+ repeat) (id rule-2)] - []))) -(check-equal? (map syntax->datum - (flatten-rule #'(rule rule-2+ (repeat 0 (seq (lit "+") (id rule-2)))))) - '((prim-rule repeat rule-2+ - [(inferred-id rule-2+ repeat) (lit "+") (id rule-2)] - []))) - -(check-equal? (map syntax->datum - (flatten-rule #'(rule rule-2+ (repeat 1 (id rule-2))))) - '((prim-rule repeat rule-2+ - [(inferred-id rule-2+ repeat) (id rule-2)] - [(id rule-2)]))) -(check-equal? (map syntax->datum - (flatten-rule #'(rule rule-2+ (repeat 1 (seq (lit "-") (id rule-2)))))) - '((prim-rule repeat rule-2+ - [(inferred-id rule-2+ repeat) (lit "-") (id rule-2)] - [(lit "-") (id rule-2)]))) - - - - - - -;; Mixtures - -;; choice and maybe -(check-equal? (map syntax->datum - (flatten-rule #'(rule sexp (choice (lit "x") - (maybe (lit "y")))) - #:fresh-name (make-fresh-name))) - '((prim-rule choice sexp - [(lit "x")] - [(inferred-id r1 maybe)]) - (inferred-prim-rule maybe r1 - [(lit "y")] - []))) -;; choice, maybe, repeat -(check-equal? (map syntax->datum - (flatten-rule #'(rule sexp (choice (lit "x") - (maybe (repeat 1 (lit "y"))))) - #:fresh-name (make-fresh-name))) - '((prim-rule choice sexp - [(lit "x")] - [(inferred-id r1 maybe)]) - (inferred-prim-rule maybe r1 - [(inferred-id r2 repeat)] - []) - (inferred-prim-rule repeat r2 - [(inferred-id r2 repeat) (lit "y")] - [(lit "y")]))) -;; choice, seq -(check-equal? (map syntax->datum - (flatten-rule #'(rule sexp (choice (seq (lit "x") (lit "y")) - (seq (lit "z") (lit "w")))) - #:fresh-name (make-fresh-name))) - '((prim-rule choice sexp - [(lit "x") (lit "y")] - [(lit "z") (lit "w")]))) - -;; maybe, choice -(check-equal? (map syntax->datum - (flatten-rule #'(rule sexp (maybe (choice (seq (lit "x") (lit "y")) - (seq (lit "z") (lit "w"))))) - #:fresh-name (make-fresh-name))) - '((prim-rule maybe sexp - [(inferred-id r1 choice)] - []) - (inferred-prim-rule choice r1 - [(lit "x") (lit "y")] - [(lit "z") (lit "w")]))) - - -;; seq, repeat -(check-equal? (map syntax->datum - (flatten-rule #'(rule expr (seq (id term) (repeat 0 (seq (lit "+") (id term))))) - #:fresh-name (make-fresh-name))) - '((prim-rule seq expr [(id term) (inferred-id r1 repeat)]) - (inferred-prim-rule repeat r1 [(inferred-id r1 repeat) (lit "+") (id term)] []))) - - -;; larger example: simple arithmetic -(check-equal? (map syntax->datum - (flatten-rules (syntax->list - #'((rule expr (seq (id term) (repeat 0 (seq (lit "+") (id term))))) - (rule term (seq (id factor) (repeat 0 (seq (lit "*") (id factor))))) - (rule factor (token INT)))) - #:fresh-name (make-fresh-name))) - - '((prim-rule seq expr [(id term) (inferred-id r1 repeat)]) - (inferred-prim-rule repeat r1 [(inferred-id r1 repeat) (lit "+") (id term)] []) - (prim-rule seq term [(id factor) (inferred-id r2 repeat)]) - (inferred-prim-rule repeat r2 [(inferred-id r2 repeat) (lit "*") (id factor)] []) - (prim-rule token factor [(token INT)]))) diff --git a/brag/brag/test/test-lexer.rkt b/brag/brag/test/test-lexer.rkt deleted file mode 100755 index 49f3774..0000000 --- a/brag/brag/test/test-lexer.rkt +++ /dev/null @@ -1,73 +0,0 @@ -#lang racket/base -(require brag/rules/lexer - rackunit - br-parser-tools/lex) - -(define (l s) - (define t (lex/1 (open-input-string s))) - (list (token-name (position-token-token t)) - (token-value (position-token-token t)) - (position-offset (position-token-start-pos t)) - (position-offset (position-token-end-pos t)))) - -;; WARNING: the offsets are not in terms of file positions. So they -;; start counting at 1, not 0. -(check-equal? (l " hi") - '(ID "hi" 2 4)) - -(check-equal? (l " hi") - '(ID "hi" 3 5)) - -(check-equal? (l "hi") - '(ID "hi" 1 3)) - -(check-equal? (l "# foobar\nhi") - '(ID "hi" 10 12)) - -(check-equal? (l "# foobar\rhi") - '(ID "hi" 10 12)) - -(check-equal? (l "# foobar\r\nhi") - '(ID "hi" 11 13)) - -(check-equal? (l "hi:") - '(RULE_HEAD "hi:" 1 4)) - -(check-equal? (l "hi :") - '(RULE_HEAD "hi :" 1 7)) - -(check-equal? (l "|") - '(PIPE "|" 1 2)) - -(check-equal? (l "(") - '(LPAREN "(" 1 2)) - -(check-equal? (l "[") - '(LBRACKET "[" 1 2)) - -(check-equal? (l ")") - '(RPAREN ")" 1 2)) - -(check-equal? (l "]") - '(RBRACKET "]" 1 2)) - -(check-equal? (l "'hello'") - '(LIT "'hello'" 1 8)) - -(check-equal? (l "'he\\'llo'") - '(LIT "'he\\'llo'" 1 10)) - -(check-equal? (l "/") - '(HIDE "/" 1 2)) - -(check-equal? (l " /") - '(HIDE "/" 2 3)) - -(check-equal? (l "@") - '(SPLICE "@" 1 2)) - -(check-equal? (l " @") - '(SPLICE "@" 2 3)) - -(check-equal? (l "#:prefix-out val:") - (list 'EOF eof 18 18)) ; lexer skips kwarg \ No newline at end of file diff --git a/brag/brag/test/test-old-token.rkt b/brag/brag/test/test-old-token.rkt deleted file mode 100755 index 2654b68..0000000 --- a/brag/brag/test/test-old-token.rkt +++ /dev/null @@ -1,76 +0,0 @@ -#lang racket/base - -;; Make sure the old token type also works fine. - -(require brag/examples/simple-line-drawing - brag/support - racket/list - br-parser-tools/lex - (prefix-in : br-parser-tools/lex-sre) - rackunit) - -(define-tokens tokens (INTEGER STRING |;| EOF)) - -(define (make-tokenizer ip) - (port-count-lines! ip) - (define lex (lexer-src-pos - [(:+ numeric) - (token-INTEGER (string->number lexeme))] - [upper-case - (token-STRING lexeme)] - ["b" - (token-STRING " ")] - [";" - (|token-;| lexeme)] - [whitespace - (return-without-pos (lex input-port))] - [(eof) - (token-EOF 'eof)])) - (lambda () - (lex ip))) - - - -(define the-parsed-object-stx - (parse (make-tokenizer (open-input-string #<list the-parsed-object-stx)))) - -(check-equal? (syntax->datum the-parsed-object-stx) - '(drawing (rows (repeat 3) (chunk 9 "X") ";") - (rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";") - (rows (repeat 3) (chunk 9 "X") ";"))) - -(define the-parsed-object (syntax->list the-parsed-object-stx)) - -(check-equal? (syntax-line the-parsed-object-stx) 1) -(check-equal? (syntax-column the-parsed-object-stx) 0) -(check-equal? (syntax-position the-parsed-object-stx) 1) -(check-equal? (syntax-span the-parsed-object-stx) 28) - -(check-equal? (length the-parsed-object) 4) - -(check-equal? (syntax->datum (second the-parsed-object)) - '(rows (repeat 3) (chunk 9 "X") ";")) -(check-equal? (syntax-line (list-ref the-parsed-object 1)) 1) - -(check-equal? (syntax->datum (third the-parsed-object)) - '(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";")) -(check-equal? (syntax-line (list-ref the-parsed-object 2)) 2) - -(check-equal? (syntax->datum (fourth the-parsed-object)) - '(rows (repeat 3) (chunk 9 "X") ";")) -(check-equal? (syntax-line (list-ref the-parsed-object 3)) 3) - -;; FIXME: add tests to make sure location is as we expect. -;; -;; FIXME: handle the EOF issue better. Something in cfg-parser -;; appears to deviate from br-parser-tools/yacc with regards to the stop -;; token. diff --git a/brag/brag/test/test-parser.rkt b/brag/brag/test/test-parser.rkt deleted file mode 100755 index 2b56e27..0000000 --- a/brag/brag/test/test-parser.rkt +++ /dev/null @@ -1,153 +0,0 @@ -#lang racket/base - - -(require rackunit - br-parser-tools/lex - brag/rules/parser - brag/rules/lexer - brag/rules/rule-structs) - - -;; quick-and-dirty helper for pos construction. -(define (p x) - (pos x #f #f)) - - - -;; FIXME: fix the test cases so they work on locations rather than just offsets. -(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'"))) - (list (rule (p 1) (p 15) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-lit (p 8) (p 15) "hello" #f)))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON"))) - (list (rule (p 1) (p 13) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-token (p 8) (p 13) "COLON" #f)))) - -(check-equal? (grammar-parser (tokenize (open-input-string "/expr : COLON"))) - (list (rule (p 1) (p 14) - (lhs-id (p 1) (p 6) "expr" ''hide) - (pattern-token (p 9) (p 14) "COLON" #f)))) - -(check-equal? (grammar-parser (tokenize (open-input-string "@expr : COLON"))) - (list (rule (p 1) (p 14) - (lhs-id (p 1) (p 6) "expr" ''splice) - (pattern-token (p 9) (p 14) "COLON" #f)))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : /COLON COLON"))) - (list (rule (p 1) (p 20) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-seq (p 8) (p 20) - (list - (pattern-token (p 8) (p 14) "COLON" 'hide) - (pattern-token (p 15) (p 20) "COLON" #f)))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : /thing COLON"))) - (list (rule (p 1) (p 20) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-seq (p 8) (p 20) - (list - (pattern-id (p 8) (p 14) "thing" 'hide) - (pattern-token (p 15) (p 20) "COLON" #f)))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : @thing COLON"))) - (list (rule (p 1) (p 20) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-seq (p 8) (p 20) - (list - (pattern-id (p 8) (p 14) "thing" 'splice) - (pattern-token (p 15) (p 20) "COLON" #f)))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'*"))) - (list (rule (p 1) (p 16) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-repeat (p 8) (p 16) - 0 - (pattern-lit (p 8) (p 15) "hello" #f))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'+"))) - (list (rule (p 1) (p 16) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-repeat (p 8) (p 16) - 1 - (pattern-lit (p 8) (p 15) "hello" #f))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : [/'hello']"))) - (list (rule (p 1) (p 18) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-maybe (p 8) (p 18) - (pattern-lit (p 9) (p 17) "hello" 'hide))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH"))) - (list (rule (p 1) (p 20) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-choice (p 8) (p 20) - (list (pattern-token (p 8) (p 13) "COLON" #f) - (pattern-token (p 16) (p 20) "BLAH" #f)))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH | BAZ expr"))) - (list (rule (p 1) (p 31) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-choice (p 8) (p 31) - (list (pattern-token (p 8) (p 13) "COLON" #f) - (pattern-token (p 16) (p 20) "BLAH" #f) - (pattern-seq (p 23) (p 31) - (list (pattern-token (p 23) (p 26) "BAZ" #f) - (pattern-id (p 27) (p 31) "expr" #f)))))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two /three"))) - (list (rule (p 1) (p 22) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-seq (p 8) (p 22) (list (pattern-id (p 8) (p 11) "one" #f) - (pattern-id (p 12) (p 15) "two" #f) - (pattern-id (p 16) (p 22) "three" 'hide)))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two three)"))) - (list (rule (p 1) (p 23) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-seq (p 8) (p 23) (list (pattern-id (p 9) (p 12) "one" #f) - (pattern-id (p 13) (p 16) "two" #f) - (pattern-id (p 17) (p 22) "three" #f)))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two* three"))) - (list (rule (p 1) (p 22) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-seq (p 8) (p 22) (list (pattern-id (p 8) (p 11) "one" #f) - (pattern-repeat (p 12) (p 16) 0 (pattern-id (p 12) (p 15) "two" #f)) - (pattern-id (p 17) (p 22) "three" #f)))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two+ three"))) - (list (rule (p 1) (p 22) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-seq (p 8) (p 22) (list (pattern-id (p 8) (p 11) "one" #f) - (pattern-repeat (p 12) (p 16) 1 (pattern-id (p 12) (p 15) "two" #f)) - (pattern-id (p 17) (p 22) "three" #f)))))) - -(check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two)+ three"))) - (list (rule (p 1) (p 24) - (lhs-id (p 1) (p 5) "expr" #f) - (pattern-seq (p 8) (p 24) (list (pattern-repeat (p 8) (p 18) 1 - (pattern-seq (p 8) (p 17) - (list (pattern-id (p 9) (p 12) "one" #f) - (pattern-id (p 13) (p 16) "two" #f)))) - (pattern-id (p 19) (p 24) "three" #f)))))) - - -(check-equal? (grammar-parser (tokenize (open-input-string #<number lexeme))] - [whitespace - (token 'WHITESPACE #:skip? #t)] - ["+" - (token '+ "+")] - ["*" - (token '* "*")] - [(eof) - (token eof)])) - (lambda () - (lex/1 ip))) - - -;; expr : term ('+' term)* -;; term : factor (('*') factor)* -;; factor : INT - -(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "42")))) - '(expr (term (factor 42)))) -(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3+4")))) - '(expr (term (factor 3)) - "+" - (term (factor 4)))) -(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3+4+5")))) - '(expr (term (factor 3)) - "+" - (term (factor 4)) - "+" - (term (factor 5)))) - - -(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3*4*5")))) - '(expr (term (factor 3) "*" (factor 4) "*" (factor 5)))) - - -(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3*4 + 5*6")))) - '(expr (term (factor 3) "*" (factor 4)) - "+" - (term (factor 5) "*" (factor 6)))) - -(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "4*5+6")))) - '(expr (term (factor 4) "*" (factor 5)) - "+" - (term (factor 6)))) - -(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "4+5 *6")))) - '(expr (term (factor 4)) - "+" - (term (factor 5) "*" (factor 6)))) - - -(check-exn exn:fail:parsing? - (lambda () (parse #f (tokenize (open-input-string "7+"))))) -(check-exn exn:fail:parsing? - (lambda () (parse #f (tokenize (open-input-string "7+6+"))))) - - -(check-equal? all-token-types - (set '+ '* 'INT)) diff --git a/brag/brag/test/test-simple-line-drawing.rkt b/brag/brag/test/test-simple-line-drawing.rkt deleted file mode 100755 index b9e3d19..0000000 --- a/brag/brag/test/test-simple-line-drawing.rkt +++ /dev/null @@ -1,72 +0,0 @@ -#lang racket/base - -(require brag/examples/simple-line-drawing - brag/support - racket/list - br-parser-tools/lex - (prefix-in : br-parser-tools/lex-sre) - rackunit) - -(define (make-tokenizer ip) - (port-count-lines! ip) - (define lex (lexer-src-pos - [(:+ numeric) - (token 'INTEGER (string->number lexeme))] - [upper-case - (token 'STRING lexeme)] - ["b" - (token 'STRING " ")] - [";" - (token ";" lexeme)] - [whitespace - (token 'WHITESPACE lexeme #:skip? #t)] - [(eof) - (void)])) - (lambda () - (lex ip))) - - - -(define the-parsed-object-stx - (parse (make-tokenizer (open-input-string #<list the-parsed-object-stx)))) - -(check-equal? (syntax->datum the-parsed-object-stx) - '(drawing (rows (repeat 3) (chunk 9 "X") ";") - (rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";") - (rows (repeat 3) (chunk 9 "X") ";"))) - -(define the-parsed-object (syntax->list the-parsed-object-stx)) - -(check-equal? (syntax-line the-parsed-object-stx) 1) -(check-equal? (syntax-column the-parsed-object-stx) 0) -(check-equal? (syntax-position the-parsed-object-stx) 1) -(check-equal? (syntax-span the-parsed-object-stx) 28) - -(check-equal? (length the-parsed-object) 4) - -(check-equal? (syntax->datum (second the-parsed-object)) - '(rows (repeat 3) (chunk 9 "X") ";")) -(check-equal? (syntax-line (list-ref the-parsed-object 1)) 1) - -(check-equal? (syntax->datum (third the-parsed-object)) - '(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";")) -(check-equal? (syntax-line (list-ref the-parsed-object 2)) 2) - -(check-equal? (syntax->datum (fourth the-parsed-object)) - '(rows (repeat 3) (chunk 9 "X") ";")) -(check-equal? (syntax-line (list-ref the-parsed-object 3)) 3) - -;; FIXME: add tests to make sure location is as we expect. -;; -;; FIXME: handle the EOF issue better. Something in cfg-parser -;; appears to deviate from br-parser-tools/yacc with regards to the stop -;; token. diff --git a/brag/brag/test/test-weird-grammar.rkt b/brag/brag/test/test-weird-grammar.rkt deleted file mode 100755 index 1847feb..0000000 --- a/brag/brag/test/test-weird-grammar.rkt +++ /dev/null @@ -1,7 +0,0 @@ -#lang racket/base - -(require "weird-grammar.rkt" - rackunit) - -(check-equal? (syntax->datum (parse '("foo"))) - '(foo "foo")) diff --git a/brag/brag/test/test-whitespace.rkt b/brag/brag/test/test-whitespace.rkt deleted file mode 100755 index 455ee3f..0000000 --- a/brag/brag/test/test-whitespace.rkt +++ /dev/null @@ -1,12 +0,0 @@ -#lang racket/base -(require brag/examples/whitespace - brag/support - rackunit) - -(check-equal? - (parse-to-datum "\ty\n x\tz") - '(start (tab "\t") (letter "y") (newline "\n") (space " ") (letter "x") (tab "\t") (letter "z"))) - -(check-equal? - (parse-to-datum "\t\n \t") - '(start (tab "\t") (newline "\n") (space " ") (tab "\t"))) diff --git a/brag/brag/test/test-wordy.rkt b/brag/brag/test/test-wordy.rkt deleted file mode 100755 index bcb516d..0000000 --- a/brag/brag/test/test-wordy.rkt +++ /dev/null @@ -1,18 +0,0 @@ -#lang racket/base -(require brag/examples/wordy - brag/support - rackunit) - -(check-equal? - (syntax->datum - (parse (list "hello" "world"))) - '(sentence (verb (greeting "hello")) (optional-adjective) (object "world"))) - - - -(check-equal? - (syntax->datum - (parse (list "hola" "frumpy" (token 'WORLD "세계")))) - - '(sentence (verb (greeting "hola")) (optional-adjective "frumpy") (object "세계"))) - diff --git a/brag/brag/test/weird-grammar.rkt b/brag/brag/test/weird-grammar.rkt deleted file mode 100755 index 094ddcd..0000000 --- a/brag/brag/test/weird-grammar.rkt +++ /dev/null @@ -1,6 +0,0 @@ -#lang brag - -;; This used to fail when we had the yacc-based backend, but -;; cfg-parser seems to be ok with it. - -foo: "foo" diff --git a/brag/info.rkt b/brag/info.rkt deleted file mode 100755 index 0211925..0000000 --- a/brag/info.rkt +++ /dev/null @@ -1,7 +0,0 @@ -#lang setup/infotab - -(define deps '("base" "br-parser-tools-lib" "rackunit-lib")) -(define build-deps '("at-exp-lib" "br-parser-tools-doc" "racket-doc" - "scribble-lib")) -(define collection 'multi) -