Merge pull request #1 from jackfirth/fork

Fork brag into a new package called `yaragg`.
remotes/jackfirth/master
Jack Firth 2 years ago committed by GitHub
commit 1a2df4295e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -4,54 +4,19 @@ on: [push, pull_request]
jobs:
run:
name: "Build using Racket '${{ matrix.racket-version }}' (${{ matrix.racket-variant }})"
name: "Build using Racket (${{ matrix.racket-variant }})"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
racket-version: ["6.6", "6.7", "6.8", "6.9", "6.10.1", "6.11", "6.12", "7.0", "7.1", "7.2", "7.3", "7.4", "7.5", "7.6", "7.7", "7.8", "7.9", "8.0", "8.1", "8.2", "8.3", "current"]
racket-variant: ["BC", "CS"]
# CS builds are only provided for versions 7.4 and up so avoid
# running the job for prior versions.
exclude:
- {racket-version: "6.6", racket-variant: "CS"}
- {racket-version: "6.7", racket-variant: "CS"}
- {racket-version: "6.8", racket-variant: "CS"}
- {racket-version: "6.9", racket-variant: "CS"}
- {racket-version: "6.10.1", racket-variant: "CS"}
- {racket-version: "6.11", racket-variant: "CS"}
- {racket-version: "6.12", racket-variant: "CS"}
- {racket-version: "7.0", racket-variant: "CS"}
- {racket-version: "7.1", racket-variant: "CS"}
- {racket-version: "7.2", racket-variant: "CS"}
- {racket-version: "7.3", racket-variant: "CS"}
steps:
- name: Checkout
uses: actions/checkout@master
- uses: Bogdanp/setup-racket@v0.11
- uses: actions/checkout@master
- uses: Bogdanp/setup-racket@v1.7
with:
distribution: 'full'
version: ${{ matrix.racket-version }}
version: stable
variant: ${{ matrix.racket-variant }}
- name: Install BR parser tools
run: raco pkg install --deps search-auto https://github.com/mbutterick/br-parser-tools.git?path=br-parser-tools-lib
- name: Run the br-parser-tools tests
run: xvfb-run raco test -p br-parser-tools-lib
- name: Install brag-lib
run: raco pkg install --deps search-auto https://github.com/mbutterick/brag.git?path=brag-lib
- name: Run the brag-lib tests
run: xvfb-run raco test -p brag-lib
- name: Install brag
run: raco pkg install --deps search-auto https://github.com/mbutterick/brag.git?path=brag
- name: Run the brag tests
run: xvfb-run raco test -p brag
- run: raco pkg install --batch --auto --link --name yaragg
- run: xvfb-run raco test --drdr --package yaragg

19
.gitignore vendored

@ -1,19 +1,2 @@
# for Racket
compiled/
*~
# for Mac OS X
.DS_Store
.AppleDouble
.LSOverride
Icon
# Thumbnails
._*
# Files that might appear on external disk
.Spotlight-V100
.Trashes
brag/*.html
brag/*.css
brag/*.js
doc/

@ -1,9 +1,7 @@
MIT License for `brag` (code only)
Copyright 2022 Jack Firth
© 2017-2020 Matthew Butterick
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

@ -1,24 +1,12 @@
## brag ![Build Status](https://github.com/mbutterick/brag/workflows/CI/badge.svg)
Racket DSL for generating parsers from BNF grammars.
## yaragg ![Build Status](https://github.com/jackfirth/yaragg/workflows/CI/badge.svg)
Yet Another Racket AST-Generator Generator. Racket DSL for generating parsers from BNF grammars. Fork of [`brag`](https://pkgs.racket-lang.org/package/brag), which is itself a fork of [`ragg`](https://pkgs.racket-lang.org/package/ragg).
## Install
`raco pkg install brag`
## Documentation
http://docs.racket-lang.org/brag/
`raco pkg install --auto yaragg`
## License
MIT. See `LICENSE.md`
## Project status
Complete. I will maintain the code but no major updates are planned.

@ -0,0 +1,11 @@
`yaragg` contains substantial portions of the software [`brag`](https://github.com/mbutterick/brag)
MIT License for `brag` (code only)
© 2017-2020 Matthew Butterick
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

@ -0,0 +1,11 @@
parser-tools
Copyright (c) 2010-2014 PLT Design Inc.
This package is distributed under the GNU Lesser General Public
License (LGPL). This means that you can link this package into proprietary
applications, provided you follow the rules stated in the LGPL. You
can also modify this package; if you distribute a modified version,
you must distribute it under the terms of the LGPL, which in
particular means that you must release the source code for the
modified software. See http://www.gnu.org/copyleft/lesser.html
for more information.

@ -1,4 +1,4 @@
`brag` contains substantial portions of the software [`ragg`](https://github.com/jbclements/ragg)
`yaragg` contains substantial portions of the software [`ragg`](https://github.com/jbclements/ragg)
MIT License for `ragg`

@ -1,4 +0,0 @@
#lang brag
top : expr (/"," expr)*
expr : "x" | list
list : "(" expr ("," expr)* ")"

@ -1,2 +0,0 @@
#lang brag
start: "a" "\"" "'" "\\" 'a' '"' '\'' '\\'

@ -1,4 +0,0 @@
#lang brag/examples/simple-line-drawing
3 9 X;
6 3 b 3 X 3 b;
3 9 X;

@ -1,3 +0,0 @@
#lang brag
top : ("start" | "atok")+

@ -1,3 +0,0 @@
#lang brag
/top : sub
sub : "x"

@ -1,3 +0,0 @@
#lang brag
/top : sub
/sub : "x"

@ -1,3 +0,0 @@
#lang brag
/top : sub
@sub : "x"

@ -1,4 +0,0 @@
#lang info
(define test-omit-paths '("examples/simple-line-drawing/examples/letter-i.rkt"))
(define compile-omit-paths '("test" "examples"))

@ -1,30 +0,0 @@
#lang racket/base
(require brag/examples/01-equal
rackunit)
(check-equal? (syntax->datum (parse ""))
'(equal))
(check-equal? (syntax->datum (parse "01"))
'(equal (zero (equal) "0")
(one (equal) "1")))
(check-equal? (syntax->datum (parse "10"))
'(equal (one (equal) "1")
(zero (equal) "0")))
(check-equal? (syntax->datum (parse "0011"))
'(equal (zero (equal) "0")
(one (equal (zero (equal) "0")
(one (equal) "1"))
"1")))
(check-equal? (syntax->datum (parse "0110"))
'(equal (one (equal (zero (equal) "0")
(one (equal) "1"))
"1")
(zero (equal) "0")))
(check-equal? (syntax->datum (parse "1100"))
'(equal (one (equal) "1")
(zero (equal (one (equal) "1")
(zero (equal) "0"))
"0")))

@ -1,50 +0,0 @@
#lang racket/base
(require brag/examples/0n1
brag/support
rackunit)
(define (lex ip)
(port-count-lines! ip)
(lambda ()
(define next-char (read-char ip))
(cond [(eof-object? next-char)
(token eof)]
[(char=? next-char #\0)
(token "0" "0")]
[(char=? next-char #\1)
(token "1" "1")])))
(check-equal? (syntax->datum (parse #f (lex (open-input-string "1"))))
'(rule "1"))
(check-equal? (syntax->datum (parse #f (lex (open-input-string "01"))))
'(rule "0" "1"))
(check-equal? (syntax->datum (parse #f (lex (open-input-string "001"))))
'(rule "0" "0" "1"))
(check-exn exn:fail:parsing?
(lambda ()
(parse #f (lex (open-input-string "0")))))
(check-exn exn:fail:parsing?
(lambda ()
(parse #f (lex (open-input-string "10")))))
(check-exn exn:fail:parsing?
(lambda ()
(parse #f (lex (open-input-string "010")))))
;; This should fail predictably because we're passing in tokens
;; that the parser doesn't know.
(check-exn exn:fail:parsing?
(lambda () (parse '("zero" "one" "zero"))))
(check-exn (regexp (regexp-quote
"Encountered unexpected token of type \"zero\" (value \"zero\") while parsing"))
(lambda () (parse '("zero" "one" "zero"))))

@ -1,49 +0,0 @@
#lang racket/base
(require brag/examples/0n1n
brag/support
rackunit)
(define (lex ip)
(port-count-lines! ip)
(lambda ()
(define next-char (read-char ip))
(cond [(eof-object? next-char)
(token eof)]
[(char=? next-char #\0)
(token "0" "0")]
[(char=? next-char #\1)
(token "1" "1")])))
;; The only rule in the grammar is:
;;
;; rule-0n1n: ["0" rule-0n1n "1"]
;;
;; It makes use of the "maybe" pattern. The result type of the
;; grammar rule is:
;;
;; rule-0n1n: (U #f
;; (list "0" rule-0n1n "1"))
(check-equal? (syntax->datum (parse #f (lex (open-input-string "0011"))))
'(rule-0n1n "0" (rule-0n1n "0" (rule-0n1n) "1") "1"))
(check-equal? (syntax->datum (parse #f (lex (open-input-string "01"))))
'(rule-0n1n "0" (rule-0n1n) "1"))
(check-equal? (syntax->datum (parse #f (lex (open-input-string ""))))
'(rule-0n1n))
(check-equal? (syntax->datum (parse #f (lex (open-input-string "000111"))))
'(rule-0n1n "0" (rule-0n1n "0" (rule-0n1n "0" (rule-0n1n) "1") "1") "1"))
(check-exn exn:fail:parsing?
(lambda () (parse #f (lex (open-input-string "0001111")))))
(check-exn exn:fail:parsing?
(lambda () (parse #f (lex (open-input-string "0001110")))))
(check-exn exn:fail:parsing?
(lambda () (parse #f (lex (open-input-string "10001110")))))

@ -1,26 +0,0 @@
#lang racket/base
(require "test-0n1.rkt"
"test-0n1n.rkt"
"test-01-equal.rkt"
"test-baby-json.rkt"
"test-baby-json-hider.rkt"
"test-curly-quantifier.rkt"
"test-cutter.rkt"
"test-empty-symbol.rkt"
"test-errors.rkt"
"test-flatten.rkt"
"test-hide-and-splice.rkt"
"test-lexer.rkt"
"test-nested-repeats.rkt"
"test-old-token.rkt"
"test-parser.rkt"
"test-quotation-marks-and-backslashes.rkt"
"test-simple-arithmetic-grammar.rkt"
"test-simple-line-drawing.rkt"
"test-start-and-atok.rkt"
"test-top-level-cut.rkt"
"test-weird-grammar.rkt"
"test-whitespace.rkt"
"test-wordy.rkt"
(submod brag/codegen/satisfaction test))

@ -1,27 +0,0 @@
#lang racket/base
(require brag/examples/baby-json-hider
brag/support
rackunit)
(define parse-result (parse (list "{"
(token 'ID "message")
":"
(token 'STRING "'hello world'")
"}")))
(check-equal? (syntax->datum parse-result) '(json (":")))
(define syntaxed-colon-parens (cadr (syntax->list parse-result)))
(check-equal? (syntax->datum (syntax-property syntaxed-colon-parens 'kvpair)) 'kvpair)
(check-equal?
(syntax->datum
(parse "[[[{}]],[],[[{}]]]"))
'(json
(array
"["
(json (array "[" (json (array "[" (json) "]")) "]"))
","
(json (array "[" "]"))
","
(json (array "[" (json (array "[" (json) "]")) "]"))
"]")))

@ -1,30 +0,0 @@
#lang racket/base
(require brag/examples/baby-json
(prefix-in alt: brag/examples/baby-json-alt)
brag/support
rackunit)
(let ([str (list "{"
(token 'ID "message")
":"
(token 'STRING "'hello world'")
"}")]
[result '(json (object "{"
(kvpair "message" ":" (json (string "'hello world'")))
"}"))])
(check-equal? (parse-to-datum str) result)
(check-equal? (alt:parse-to-datum str) result))
(let ([str "[[[{}]],[],[[{}]]]"]
[result '(json
(array
"["
(json (array "[" (json (array "[" (json (object "{" "}")) "]")) "]"))
","
(json (array "[" "]"))
","
(json (array "[" (json (array "[" (json (object "{" "}")) "]")) "]"))
"]"))])
(check-equal? (parse-to-datum str) result)
(check-equal? (alt:parse-to-datum str) result))

@ -1,10 +0,0 @@
#lang racket/base
(require brag/examples/codepoints
rackunit)
(check-equal? (parse-to-datum '("\"A\\" "'c\\" "*d\\\"\\ef\"" "hello world"))
'(start (A "\"A\\")
(c "'c\\")
(def "*d\\\"\\ef\"")
(hello-world "hello world")))

@ -1,28 +0,0 @@
#lang racket/base
(require brag/examples/curly-quantifier
brag/support
rackunit)
(check-exn exn:fail:parsing? (λ () (parse-to-datum "a")))
(check-equal? (parse-to-datum "aa") '(start (a-rule "a" "a")))
(check-exn exn:fail:parsing? (λ () (parse-to-datum "aaa")))
(check-equal? (parse-to-datum "") '(start (b-rule)))
(check-equal? (parse-to-datum "b") '(start (b-rule "b")))
(check-equal? (parse-to-datum "bb") '(start (b-rule "b" "b")))
(check-exn exn:fail:parsing? (λ () (parse-to-datum "bbb")))
(check-exn exn:fail:parsing? (λ () (parse-to-datum "c")))
(check-equal? (parse-to-datum "cc") '(start (c-rule "c" "c")))
(check-equal? (parse-to-datum "ccc") '(start (c-rule "c" "c" "c")))
(check-equal? (parse-to-datum "cccc") '(start (c-rule "c" "c" "c" "c")))
(check-exn exn:fail:parsing? (λ () (parse-to-datum "d")))
(check-equal? (parse-to-datum "dd") '(start (d-rule "d" "d")))
(check-equal? (parse-to-datum "ddd") '(start (d-rule "d" "d" "d")))
(check-exn exn:fail:parsing? (λ () (parse-to-datum "dddd")))
(check-equal? (syntax->datum ((make-rule-parser e-rule) "")) '(e-rule)) ; to prevent ambiguity with b-rule while parsing empty string
(check-equal? (parse-to-datum "e") '(start (e-rule "e")))
(check-equal? (parse-to-datum "ee") '(start (e-rule "e" "e")))
(check-equal? (parse-to-datum "eee") '(start (e-rule "e" "e" "e")))

@ -1,12 +0,0 @@
#lang racket/base
(require brag/examples/cutter-another
brag/support
rackunit)
(check-equal? (parse-tree "w") '(top (w)))
(check-equal? (parse-tree "x") '(top (x)))
(check-equal? (parse-tree "yy") '(top (y)))
(check-equal? (parse-tree "z") '(top (z)))
(check-equal? (parse-tree "a") '(top (a)))
(check-equal? (parse-tree "bb") '(top (b)))
(check-equal? (parse-tree "c") '(top (c)))

@ -1,9 +0,0 @@
#lang racket/base
(require brag/examples/cutter
brag/support
rackunit)
;; related to rule-flattening problem
(check-equal?
(parse-to-datum (list "(" "x" "," "x" ")"))
'(top (expr (list "(" (expr "x") "," (expr "x") ")"))))

@ -1,21 +0,0 @@
#lang racket/base
(require brag/examples/empty-symbol
brag/support
rackunit)
(check-true (and (member (parse-to-datum "") (list '(top (xs)) '(top (ys)) '(top (zs)))) #t))
;; x is normal
(check-equal? (parse-to-datum "x") '(top (xs "x" (xs))))
(check-equal? (parse-to-datum "xx") '(top (xs "x" (xs "x" (xs)))))
(check-equal? (parse-to-datum "xxx") '(top (xs "x" (xs "x" (xs "x" (xs))))))
;; y cuts
(check-equal? (parse-to-datum "y") '(top (ys "y")))
(check-equal? (parse-to-datum "yy") '(top (ys "y")))
(check-equal? (parse-to-datum "yyy") '(top (ys "y")))
;; z splices
(check-equal? (parse-to-datum "z") '(top (zs "z")))
(check-equal? (parse-to-datum "zz") '(top (zs "z" "z")))
(check-equal? (parse-to-datum "zzz") '(top (zs "z" "z" "z")))

@ -1,137 +0,0 @@
#lang racket/base
(require rackunit
(for-syntax racket/base))
;; The tests in this module make sure we produce proper error messages
;; on weird grammars.
(define-namespace-anchor anchor)
(define ns (namespace-anchor->namespace anchor))
(define (c prog)
(parameterize ([current-namespace ns]
[read-accept-reader #t])
(define ip (open-input-string prog))
(port-count-lines! ip)
(compile (read-syntax #f ip))))
;; Helper to let me quickly write compile-error checks.
(define-syntax (check-compile-error stx)
(syntax-case stx ()
[(_ prog expected-msg)
(quasisyntax/loc stx
(begin #,(syntax/loc stx
(check-exn (regexp (regexp-quote expected-msg))
(lambda ()
(c prog))))
#,(syntax/loc stx
(check-exn exn:fail:syntax?
(lambda ()
(c prog))))))]))
;; errors with position are sensitive to length of lang line
(define lang-line "#lang brag")
(check-compile-error (format "~a" lang-line)
"The grammar does not appear to have any rules")
(check-compile-error (format "~a\nfoo" lang-line)
"Error while parsing grammar near: foo [line=2, column=0, position=12]")
(check-compile-error (format "~a\nnumber : 42" lang-line)
"Error while parsing grammar near: 42 [line=2, column=9, position=21]")
(check-compile-error (format "~a\nnumber : 1" lang-line)
"Error while parsing grammar near: 1 [line=2, column=9, position=21]")
(check-compile-error "#lang brag\n x: NUMBER\nx:STRING"
"Rule x has a duplicate definition")
;; Check to see that missing definitions for rules also raise good syntax
;; errors:
(check-compile-error "#lang brag\nx:y"
"Rule y has no definition")
(check-compile-error "#lang brag\nnumber : 1flarbl"
"Rule 1flarbl has no definition")
(check-compile-error "#lang brag\nprogram: EOF"
"Token EOF is reserved and can not be used in a grammar")
;; Nontermination checks:
(check-compile-error "#lang brag\nx : x"
"Rule x has no finite derivation")
(check-compile-error #<<EOF
#lang brag
x : x y
y : "y"
EOF
"Rule x has no finite derivation")
; This should be illegal too:
(check-compile-error #<<EOF
#lang brag
a : "a" b
b : a | b
EOF
"Rule a has no finite derivation")
(check-compile-error #<<EOF
#lang brag
a : [b]
b : [c]
c : c
EOF
"Rule c has no finite derivation")
(check-compile-error #<<EOF
#lang brag
a : [b]
b : c
c : c
EOF
"Rule b has no finite derivation")
(check-compile-error #<<EOF
#lang brag
a : [a]
b : [b]
c : c
EOF
"Rule c has no finite derivation")
(check-compile-error #<<EOF
#lang racket/base
(require brag/examples/simple-line-drawing)
(define bad-parser (make-rule-parser crunchy))
EOF
"Rule crunchy is not defined in the grammar"
)

@ -1,204 +0,0 @@
#lang racket/base
(require brag/rules/stx-types
brag/codegen/flatten
rackunit)
(define (make-fresh-name)
(let ([n 0])
(lambda ()
(set! n (add1 n))
(string->symbol (format "r~a" n)))))
;; Simple literals
(check-equal? (map syntax->datum (flatten-rule #'(rule expr (lit "hello"))))
'((prim-rule lit expr [(lit "hello")])))
(check-equal? (map syntax->datum
(flatten-rule #'(rule expr
(seq (lit "hello")
(lit "world")))))
'((prim-rule seq expr [(lit "hello") (lit "world")])))
(check-equal? (map syntax->datum (flatten-rule #'(rule expr (token HELLO))))
'((prim-rule token expr [(token HELLO)])))
(check-equal? (map syntax->datum (flatten-rule #'(rule expr (id rule-2))))
'((prim-rule id expr [(id rule-2)])))
;; Sequences of primitives
(check-equal? (map syntax->datum
(flatten-rule #'(rule expr (seq (lit "1") (seq (lit "2") (lit "3"))))))
'((prim-rule seq expr
[(lit "1") (lit "2") (lit "3")])))
(check-equal? (map syntax->datum
(flatten-rule #'(rule expr (seq (seq (lit "1") (lit "2")) (lit "3")))))
'((prim-rule seq expr
[(lit "1") (lit "2") (lit "3")])))
(check-equal? (map syntax->datum
(flatten-rule #'(rule expr (seq (seq (lit "1")) (seq (lit "2") (lit "3"))))))
'((prim-rule seq expr
[(lit "1") (lit "2") (lit "3")])))
;; choices
(check-equal? (map syntax->datum
(flatten-rule #'(rule expr (choice (id rule-2) (id rule-3)))))
'((prim-rule choice expr
[(id rule-2)]
[(id rule-3)])))
(check-equal? (map syntax->datum
(flatten-rule #'(rule sexp (choice (seq (lit "(") (lit ")"))
(seq)))
#:fresh-name (make-fresh-name)))
'((prim-rule choice sexp
[(lit "(") (lit ")")] [])))
(check-equal? (map syntax->datum
(flatten-rule #'(rule sexp (choice (seq (seq (lit "(") (token BLAH))
(lit ")"))
(seq)))
#:fresh-name (make-fresh-name)))
'((prim-rule choice sexp
[(lit "(") (token BLAH) (lit ")")] [])))
;; maybe
(check-equal? (map syntax->datum
(flatten-rule #'(rule expr (maybe (id rule-2)))))
'((prim-rule maybe expr
[(id rule-2)]
[])))
(check-equal? (map syntax->datum
(flatten-rule #'(rule expr (maybe (token HUH)))))
'((prim-rule maybe expr
[(token HUH)]
[])))
(check-equal? (map syntax->datum
(flatten-rule #'(rule expr (maybe (seq (lit "hello") (lit "world"))))))
'((prim-rule maybe expr
[(lit "hello") (lit "world")]
[])))
;; repeat
(check-equal? (map syntax->datum
(flatten-rule #'(rule rule-2+ (repeat 0 #f (id rule-2)))))
'((prim-rule maybe rule-2+ ((inferred-id %rule1 repeat)) ())
(inferred-prim-rule repeat %rule1
((inferred-id %rule1 repeat) (id rule-2))
((id rule-2)))))
(check-equal? (map syntax->datum
(flatten-rule #'(rule rule-2+ (repeat 0 #f (seq (lit "+") (id rule-2))))))
'((prim-rule maybe rule-2+ ((inferred-id %rule2 repeat)) ())
(inferred-prim-rule repeat %rule2
((inferred-id %rule2 repeat) (lit "+") (id rule-2))
((lit "+") (id rule-2)))))
(check-equal? (map syntax->datum
(flatten-rule #'(rule rule-2+ (repeat 1 #f (id rule-2)))))
'((prim-rule repeat rule-2+
[(inferred-id rule-2+ repeat) (id rule-2)]
[(id rule-2)])))
(check-equal? (map syntax->datum
(flatten-rule #'(rule rule-2+ (repeat 1 #f (seq (lit "-") (id rule-2))))))
'((prim-rule repeat rule-2+
[(inferred-id rule-2+ repeat) (lit "-") (id rule-2)]
[(lit "-") (id rule-2)])))
;; Mixtures
;; choice and maybe
(check-equal? (map syntax->datum
(flatten-rule #'(rule sexp (choice (lit "x")
(maybe (lit "y"))))
#:fresh-name (make-fresh-name)))
'((prim-rule choice sexp
[(lit "x")]
[(inferred-id r1 maybe)])
(inferred-prim-rule maybe r1
[(lit "y")]
[])))
;; choice, maybe, repeat
(check-equal? (map syntax->datum
(flatten-rule #'(rule sexp (choice (lit "x")
(maybe (repeat 1 #f (lit "y")))))
#:fresh-name (make-fresh-name)))
'((prim-rule choice sexp
[(lit "x")]
[(inferred-id r1 maybe)])
(inferred-prim-rule maybe r1
[(inferred-id r2 repeat)]
[])
(inferred-prim-rule repeat r2
[(inferred-id r2 repeat) (lit "y")]
[(lit "y")])))
;; choice, seq
(check-equal? (map syntax->datum
(flatten-rule #'(rule sexp (choice (seq (lit "x") (lit "y"))
(seq (lit "z") (lit "w"))))
#:fresh-name (make-fresh-name)))
'((prim-rule choice sexp
[(lit "x") (lit "y")]
[(lit "z") (lit "w")])))
;; maybe, choice
(check-equal? (map syntax->datum
(flatten-rule #'(rule sexp (maybe (choice (seq (lit "x") (lit "y"))
(seq (lit "z") (lit "w")))))
#:fresh-name (make-fresh-name)))
'((prim-rule maybe sexp
[(inferred-id r1 choice)]
[])
(inferred-prim-rule choice r1
[(lit "x") (lit "y")]
[(lit "z") (lit "w")])))
;; seq, repeat
(check-equal? (map syntax->datum
(flatten-rule #'(rule expr (seq (id term) (repeat 0 #f (seq (lit "+") (id term)))))
#:fresh-name (make-fresh-name)))
'((prim-rule seq expr ((id term) (inferred-id r1 repeat)))
(prim-rule maybe r1 ((inferred-id r2 repeat)) ())
(inferred-prim-rule repeat r2
((inferred-id r2 repeat) (lit "+") (id term))
((lit "+") (id term)))))
;; larger example: simple arithmetic
(check-equal? (map syntax->datum
(flatten-rules (syntax->list
#'((rule expr (seq (id term) (repeat 0 #f (seq (lit "+") (id term)))))
(rule term (seq (id factor) (repeat 0 #f (seq (lit "*") (id factor)))))
(rule factor (token INT))))
#:fresh-name (make-fresh-name)))
'((prim-rule seq expr ((id term) (inferred-id r1 repeat)))
(prim-rule maybe r1 ((inferred-id r2 repeat)) ())
(inferred-prim-rule repeat r2
((inferred-id r2 repeat) (lit "+") (id term))
((lit "+") (id term)))
(prim-rule seq term ((id factor) (inferred-id r3 repeat)))
(prim-rule maybe r3 ((inferred-id r4 repeat)) ())
(inferred-prim-rule repeat r4
((inferred-id r4 repeat) (lit "*") (id factor))
((lit "*") (id factor)))
(prim-rule token factor ((token INT)))))

@ -1,9 +0,0 @@
#lang racket/base
(require brag/examples/hide-and-splice
brag/support
rackunit)
;; check that an id with both a splice and hide is handled correctly
(check-equal? (parse-to-datum "xxx") '(top ("x" "x" "x")))
(check-equal? (parse-to-datum "yyy") '(top "y" "y" "y"))

@ -1,75 +0,0 @@
#lang racket/base
(require brag/rules/lexer
rackunit
br-parser-tools/lex)
(define (l s)
(define t (lex/1 (open-input-string s)))
(list (token-name (position-token-token t))
(token-value (position-token-token t))
(position-offset (position-token-start-pos t))
(position-offset (position-token-end-pos t))))
;; WARNING: the offsets are not in terms of file positions. So they
;; start counting at 1, not 0.
(check-equal? (l " hi")
'(ID "hi" 2 4))
(check-equal? (l " hi")
'(ID "hi" 3 5))
(check-equal? (l "hi")
'(ID "hi" 1 3))
(check-equal? (l "# foobar\nhi")
'(ID "hi" 10 12))
(check-equal? (l "# foobar\rhi")
'(ID "hi" 10 12))
(check-equal? (l "# foobar\r\nhi")
'(ID "hi" 11 13))
(check-equal? (l "hi:")
'(RULE_HEAD "hi:" 1 4))
(check-equal? (l "hi :")
'(RULE_HEAD "hi :" 1 7))
(check-equal? (l "|")
'(PIPE "|" 1 2))
(check-equal? (l "(")
'(LPAREN "(" 1 2))
(check-equal? (l "[")
'(LBRACKET "[" 1 2))
(check-equal? (l ")")
'(RPAREN ")" 1 2))
(check-equal? (l "]")
'(RBRACKET "]" 1 2))
;; 220111: lexer now converts single-quoted lexemes
;; to standard Racket-style double-quoted string literal
(check-equal? (l "'hello'")
'(LIT "\"hello\"" 1 8))
(check-equal? (l "'he\\'llo'")
'(LIT "\"he'llo\"" 1 10))
(check-equal? (l "/")
'(HIDE "/" 1 2))
(check-equal? (l " /")
'(HIDE "/" 2 3))
(check-equal? (l "@")
'(SPLICE "@" 1 2))
(check-equal? (l " @")
'(SPLICE "@" 2 3))
(check-equal? (l "#:prefix-out val:")
(list 'EOF eof 18 18)) ; lexer skips kwarg

@ -1,17 +0,0 @@
#lang racket/base
(require rackunit
brag/support
brag/examples/subrule)
(define parse-next (make-rule-parser next))
(define parse-start (make-rule-parser start))
(check-equal? (syntax->datum (parse #f "0")) '(start (next "0")))
(check-equal? (syntax->datum (parse #f "0")) (syntax->datum (parse "0")))
(check-equal? (syntax->datum (parse-start #f "0")) '(start (next "0")))
(check-equal? (syntax->datum (parse-start #f "0")) (syntax->datum (parse-start "0")))
(check-equal? (syntax->datum (parse-next #f "0")) '(next "0"))
(check-equal? (syntax->datum (parse-next #f "0")) (syntax->datum (parse-next "0")))

@ -1,9 +0,0 @@
#lang racket/base
(require brag/examples/nested-repeats
rackunit)
(check-equal?
(syntax->datum (parse (list "X" "Y" "X")))
'(start "X" "Y" "X"))

@ -1,76 +0,0 @@
#lang racket/base
;; Make sure the old token type also works fine.
(require brag/examples/simple-line-drawing
brag/support
racket/list
br-parser-tools/lex
(prefix-in : br-parser-tools/lex-sre)
rackunit)
(define-tokens tokens (INTEGER STRING |;| EOF))
(define (make-tokenizer ip)
(port-count-lines! ip)
(define lex (lexer-src-pos
[(:+ numeric)
(token-INTEGER (string->number lexeme))]
[upper-case
(token-STRING lexeme)]
["b"
(token-STRING " ")]
[";"
(|token-;| lexeme)]
[whitespace
(return-without-pos (lex input-port))]
[(eof)
(token-EOF 'eof)]))
(lambda ()
(lex ip)))
(define the-parsed-object-stx
(parse (make-tokenizer (open-input-string #<<EOF
3 9 X;
6 3 b 3 X 3 b;
3 9 X;
EOF
))))
(check-true (syntax-original? the-parsed-object-stx))
;; Does the rule name "drawing" also have the proper "original?" property set?
(check-true (syntax-original? (first (syntax->list the-parsed-object-stx))))
(check-equal? (syntax->datum the-parsed-object-stx)
'(drawing (rows (repeat 3) (chunk 9 "X") ";")
(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";")
(rows (repeat 3) (chunk 9 "X") ";")))
(define the-parsed-object (syntax->list the-parsed-object-stx))
(check-equal? (syntax-line the-parsed-object-stx) 1)
(check-equal? (syntax-column the-parsed-object-stx) 0)
(check-equal? (syntax-position the-parsed-object-stx) 1)
(check-equal? (syntax-span the-parsed-object-stx) 28)
(check-equal? (length the-parsed-object) 4)
(check-equal? (syntax->datum (second the-parsed-object))
'(rows (repeat 3) (chunk 9 "X") ";"))
(check-equal? (syntax-line (list-ref the-parsed-object 1)) 1)
(check-equal? (syntax->datum (third the-parsed-object))
'(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";"))
(check-equal? (syntax-line (list-ref the-parsed-object 2)) 2)
(check-equal? (syntax->datum (fourth the-parsed-object))
'(rows (repeat 3) (chunk 9 "X") ";"))
(check-equal? (syntax-line (list-ref the-parsed-object 3)) 3)
;; FIXME: add tests to make sure location is as we expect.
;;
;; FIXME: handle the EOF issue better. Something in cfg-parser
;; appears to deviate from br-parser-tools/yacc with regards to the stop
;; token.

@ -1,183 +0,0 @@
#lang racket/base
(require rackunit
br-parser-tools/lex
brag/rules/parser
brag/rules/lexer
brag/rules/rule-structs)
;; quick-and-dirty helper for pos construction.
(define (p x)
(pos x #f #f))
;; FIXME: fix the test cases so they work on locations rather than just offsets.
(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'")))
(list (rule (p 1) (p 15)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-lit (p 8) (p 15) "hello" #f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON")))
(list (rule (p 1) (p 13)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-token (p 8) (p 13) "COLON" #f))))
(check-equal? (grammar-parser (tokenize (open-input-string "/expr : COLON")))
(list (rule (p 1) (p 14)
(lhs-id (p 1) (p 6) "expr" ''hide)
(pattern-token (p 9) (p 14) "COLON" #f))))
(check-equal? (grammar-parser (tokenize (open-input-string "@expr : COLON")))
(list (rule (p 1) (p 14)
(lhs-id (p 1) (p 6) "expr" ''splice)
(pattern-token (p 9) (p 14) "COLON" #f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : /COLON COLON")))
(list (rule (p 1) (p 20)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-seq (p 8) (p 20)
(list
(pattern-token (p 8) (p 14) "COLON" 'hide)
(pattern-token (p 15) (p 20) "COLON" #f))
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : /thing COLON")))
(list (rule (p 1) (p 20)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-seq (p 8) (p 20)
(list
(pattern-id (p 8) (p 14) "thing" 'hide)
(pattern-token (p 15) (p 20) "COLON" #f))
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : @thing COLON")))
(list (rule (p 1) (p 20)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-seq (p 8) (p 20)
(list
(pattern-id (p 8) (p 14) "thing" 'splice)
(pattern-token (p 15) (p 20) "COLON" #f))
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'*")))
(list (rule (p 1) (p 16)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-repeat (p 8) (p 16)
0 #f
(pattern-lit (p 8) (p 15) "hello" #f)
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'+")))
(list (rule (p 1) (p 16)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-repeat (p 8) (p 16)
1 #f
(pattern-lit (p 8) (p 15) "hello" #f)
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : [/'hello']")))
(list (rule (p 1) (p 18)
(lhs-id (p 1) (p 5) "expr" #f)
#;(pattern-maybe (p 8) (p 18)
(pattern-lit (p 9) (p 17) "hello" 'hide))
(pattern-repeat (p 8) (p 18)
0 1
(pattern-lit (p 9) (p 17) "hello" 'hide)
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH")))
(list (rule (p 1) (p 20)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-choice (p 8) (p 20)
(list (pattern-token (p 8) (p 13) "COLON" #f)
(pattern-token (p 16) (p 20) "BLAH" #f))
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH | BAZ expr")))
(list (rule (p 1) (p 31)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-choice (p 8) (p 31)
(list (pattern-token (p 8) (p 13) "COLON" #f)
(pattern-token (p 16) (p 20) "BLAH" #f)
(pattern-seq (p 23) (p 31)
(list (pattern-token (p 23) (p 26) "BAZ" #f)
(pattern-id (p 27) (p 31) "expr" #f))
#f))
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two /three")))
(list (rule (p 1) (p 22)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-seq (p 8) (p 22)
(list (pattern-id (p 8) (p 11) "one" #f)
(pattern-id (p 12) (p 15) "two" #f)
(pattern-id (p 16) (p 22) "three" 'hide))
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two three)")))
(list (rule (p 1) (p 23)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-seq (p 8) (p 23)
(list (pattern-id (p 9) (p 12) "one" #f)
(pattern-id (p 13) (p 16) "two" #f)
(pattern-id (p 17) (p 22) "three" #f))
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two* three")))
(list (rule (p 1) (p 22)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-seq (p 8) (p 22)
(list (pattern-id (p 8) (p 11) "one" #f)
(pattern-repeat (p 12) (p 16) 0 #f (pattern-id (p 12) (p 15) "two" #f) #f)
(pattern-id (p 17) (p 22) "three" #f))
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two+ three")))
(list (rule (p 1) (p 22)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-seq (p 8) (p 22)
(list (pattern-id (p 8) (p 11) "one" #f)
(pattern-repeat (p 12) (p 16) 1 #f (pattern-id (p 12) (p 15) "two" #f) #f)
(pattern-id (p 17) (p 22) "three" #f))
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two)+ three")))
(list (rule (p 1) (p 24)
(lhs-id (p 1) (p 5) "expr" #f)
(pattern-seq (p 8) (p 24)
(list (pattern-repeat (p 8) (p 18) 1 #f
(pattern-seq (p 8) (p 17)
(list (pattern-id (p 9) (p 12) "one" #f)
(pattern-id (p 13) (p 16) "two" #f))
#f)
#f)
(pattern-id (p 19) (p 24) "three" #f))
#f))))
(check-equal? (grammar-parser (tokenize (open-input-string #<<EOF
statlist : stat+
stat: ID '=' expr
| 'print' expr
EOF
)))
(list (rule (p 1) (p 17)
(lhs-id (p 1) (p 9) "statlist" #f)
(pattern-repeat (p 12) (p 17) 1 #f (pattern-id (p 12) (p 16) "stat" #f) #f))
(rule (p 18) (p 54)
(lhs-id (p 18) (p 22) "stat" #f)
(pattern-choice (p 24) (p 54)
(list (pattern-seq (p 24) (p 35)
(list (pattern-token (p 24) (p 26) "ID" #f)
(pattern-lit (p 27) (p 30) "=" #f)
(pattern-id (p 31) (p 35) "expr" #f))
#f)
(pattern-seq (p 42) (p 54)
(list (pattern-lit (p 42) (p 49) "print" #f)
(pattern-id (p 50) (p 54) "expr" #f))
#f))
#f))))

@ -1,6 +0,0 @@
#lang racket/base
(require brag/examples/quotation-marks-and-backslashes
brag/support
rackunit)
(check-equal? (parse-tree "a\"'\\a\"'\\") '(start "a" "\"" "'" "\\" "a" "\"" "'" "\\"))

@ -1,72 +0,0 @@
#lang racket/base
(require brag/examples/simple-arithmetic-grammar
brag/support
racket/set
br-parser-tools/lex
racket/list
rackunit)
(define (tokenize ip)
(port-count-lines! ip)
(define lex/1
(lexer-src-pos
[(repetition 1 +inf.0 numeric)
(token 'INT (string->number lexeme))]
[whitespace
(token 'WHITESPACE #:skip? #t)]
["+"
(token '+ "+")]
["*"
(token '* "*")]
[(eof)
(token eof)]))
(lambda ()
(lex/1 ip)))
;; expr : term ('+' term)*
;; term : factor (('*') factor)*
;; factor : INT
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "42"))))
'(expr (term (factor 42))))
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3+4"))))
'(expr (term (factor 3))
"+"
(term (factor 4))))
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3+4+5"))))
'(expr (term (factor 3))
"+"
(term (factor 4))
"+"
(term (factor 5))))
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3*4*5"))))
'(expr (term (factor 3) "*" (factor 4) "*" (factor 5))))
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3*4 + 5*6"))))
'(expr (term (factor 3) "*" (factor 4))
"+"
(term (factor 5) "*" (factor 6))))
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "4*5+6"))))
'(expr (term (factor 4) "*" (factor 5))
"+"
(term (factor 6))))
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "4+5 *6"))))
'(expr (term (factor 4))
"+"
(term (factor 5) "*" (factor 6))))
(check-exn exn:fail:parsing?
(lambda () (parse #f (tokenize (open-input-string "7+")))))
(check-exn exn:fail:parsing?
(lambda () (parse #f (tokenize (open-input-string "7+6+")))))
(check-equal? all-token-types
(set '+ '* 'INT))

@ -1,72 +0,0 @@
#lang racket/base
(require brag/examples/simple-line-drawing
brag/support
racket/list
br-parser-tools/lex
(prefix-in : br-parser-tools/lex-sre)
rackunit)
(define (make-tokenizer ip)
(port-count-lines! ip)
(define lex (lexer-src-pos
[(:+ numeric)
(token 'INTEGER (string->number lexeme))]
[upper-case
(token 'STRING lexeme)]
["b"
(token 'STRING " ")]
[";"
(token ";" lexeme)]
[whitespace
(token 'WHITESPACE lexeme #:skip? #t)]
[(eof)
(void)]))
(lambda ()
(lex ip)))
(define the-parsed-object-stx
(parse (make-tokenizer (open-input-string #<<EOF
3 9 X;
6 3 b 3 X 3 b;
3 9 X;
EOF
))))
(check-true (syntax-original? the-parsed-object-stx))
;; Does the rule name "drawing" also have the proper "original?" property set?
(check-true (syntax-original? (first (syntax->list the-parsed-object-stx))))
(check-equal? (syntax->datum the-parsed-object-stx)
'(drawing (rows (repeat 3) (chunk 9 "X") ";")
(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";")
(rows (repeat 3) (chunk 9 "X") ";")))
(define the-parsed-object (syntax->list the-parsed-object-stx))
(check-equal? (syntax-line the-parsed-object-stx) 1)
(check-equal? (syntax-column the-parsed-object-stx) 0)
(check-equal? (syntax-position the-parsed-object-stx) 1)
(check-equal? (syntax-span the-parsed-object-stx) 28)
(check-equal? (length the-parsed-object) 4)
(check-equal? (syntax->datum (second the-parsed-object))
'(rows (repeat 3) (chunk 9 "X") ";"))
(check-equal? (syntax-line (list-ref the-parsed-object 1)) 1)
(check-equal? (syntax->datum (third the-parsed-object))
'(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";"))
(check-equal? (syntax-line (list-ref the-parsed-object 2)) 2)
(check-equal? (syntax->datum (fourth the-parsed-object))
'(rows (repeat 3) (chunk 9 "X") ";"))
(check-equal? (syntax-line (list-ref the-parsed-object 3)) 3)
;; FIXME: add tests to make sure location is as we expect.
;;
;; FIXME: handle the EOF issue better. Something in cfg-parser
;; appears to deviate from br-parser-tools/yacc with regards to the stop
;; token.

@ -1,14 +0,0 @@
#lang racket/base
(require brag/examples/start-and-atok
brag/support
rackunit)
;; make sure that "start" and "atok" work as terminals.
(check-equal? (parse-to-datum (list "start")) '(top "start"))
(check-equal? (parse-to-datum (list "atok")) '(top "atok"))
(check-equal? (parse-to-datum (list "start" "atok")) '(top "start" "atok"))

@ -1,11 +0,0 @@
#lang racket/base
(require (prefix-in 1: brag/examples/top-level-cut-1)
(prefix-in 2: brag/examples/top-level-cut-2)
(prefix-in 3: brag/examples/top-level-cut-3)
brag/support
rackunit)
(check-equal? (1:parse-to-datum "x") '((sub "x")))
(check-equal? (2:parse-to-datum "x") '(("x")))
(check-equal? (3:parse-to-datum "x") '("x"))

@ -1,7 +0,0 @@
#lang racket/base
(require "weird-grammar.rkt"
rackunit)
(check-equal? (syntax->datum (parse '("foo")))
'(foo "foo"))

@ -1,16 +0,0 @@
#lang racket/base
(require brag/examples/whitespace
brag/support
rackunit)
(check-equal?
(parse-to-datum "\ty\n x\tz\r")
'(start (tab "\t") (letter "y") (newline "\n") (space " ") (letter "x") (tab "\t") (letter "z") (return "\r")))
(check-equal?
(parse-to-datum "\t\n \t\r")
'(start (tab "\t") (newline "\n") (space " ") (tab "\t") (return "\r")))
(check-equal?
(parse-to-datum "\a\b\t\n\v\f\r\e")
'(start (all "\a" "\b" "\t" "\n" "\v" "\f" "\r" "\e")))

@ -1,18 +0,0 @@
#lang racket/base
(require brag/examples/wordy
brag/support
rackunit)
(check-equal?
(syntax->datum
(parse (list "hello" "world")))
'(sentence (verb (greeting "hello")) (optional-adjective) (object "world")))
(check-equal?
(syntax->datum
(parse (list "hola" "frumpy" (token 'WORLD "세계"))))
'(sentence (verb (greeting "hola")) (optional-adjective "frumpy") (object "세계")))

@ -1,10 +0,0 @@
#lang info
(define collection 'multi)
(define deps '(["base" #:version "6.3"]
"br-parser-tools-lib"
"rackunit-lib"
"syntax-color-lib"))
(define implies '("br-parser-tools-lib"))

@ -1,4 +0,0 @@
#lang info
(define scribblings '(("brag.scrbl")))

@ -1,13 +0,0 @@
#lang info
(define collection 'multi)
(define deps '(["base" #:version "6.3"]
"brag-lib"))
(define build-deps '("at-exp-lib"
"br-parser-tools-doc"
"racket-doc"
"scribble-lib"))
(define implies '("brag-lib"))

@ -1,16 +1,16 @@
#lang racket/base
(require racket/list
racket/syntax
brag/rules/stx-types
yaragg/rules/stx-types
syntax/id-table
(prefix-in sat: "satisfaction.rkt")
(prefix-in sat: yaragg/codegen/satisfaction)
(for-template racket/base
brag/codegen/runtime
brag/private/internal-support))
yaragg/codegen/runtime
yaragg/private/internal-support))
(provide (all-defined-out)
(for-template (all-from-out brag/codegen/runtime
brag/private/internal-support)))
(for-template (all-from-out yaragg/codegen/runtime
yaragg/private/internal-support)))
;; Given a flattened rule, returns a syntax for the code that
;; preserves as much source location as possible.

@ -1,12 +1,12 @@
#lang racket/base
(require (for-syntax racket/base
racket/list
"codegen.rkt"
"runtime.rkt"
"flatten.rkt")
br-parser-tools/lex
br-parser-tools/cfg-parser
(prefix-in bs: brag/support)
yaragg/codegen/codegen
yaragg/codegen/runtime
yaragg/codegen/flatten)
yaragg/parser-tools/lex
yaragg/parser-tools/cfg-parser
(prefix-in bs: yaragg/support)
racket/set)
(provide (except-out (all-from-out racket/base) #%module-begin)
@ -39,7 +39,7 @@
[((TOKEN-TYPE . TOKEN-TYPE-CONSTRUCTOR) ...)
(for/list ([tt (in-list (rules->token-types rules))])
(cons tt (string->symbol (format "token-~a" tt))))]
;; Flatten rules to use the yacc-style ruleset that br-parser-tools supports
;; Flatten rules to use the yacc-style ruleset that yaragg-parser-tools supports
[GENERATED-RULE-CODES (map flat-rule->yacc-rule (flatten-rules rules))]
;; main exports. Break hygiene so they're also available at top-level / repl
[(PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
@ -48,13 +48,13 @@
[TOKEN (datum->syntax rules-stx 'token)] ; for repl
[RULE-IDS (map syntax-e rule-ids)]
[RULES-STX rules-stx])
;; this stx object represents the top level of a #lang brag module.
;; this stx object represents the top level of a #lang yaragg module.
;; so any `define`s are automatically available at the repl.
;; and only identifiers explicitly `provide`d are visible on import.
#'(#%module-begin
(provide PARSE PARSE-TO-DATUM PARSE-TREE MAKE-RULE-PARSER ALL-TOKEN-TYPES)
;; handle brag/support `token` with special identifier
;; handle yaragg/support `token` with special identifier
;; so it doesn't conflict with brag's internal `token` macro
;; defined but deliberately not provided so it's available at repl, but not on import
(define TOKEN bs:token)

@ -1,5 +1,5 @@
#lang racket/base
(require brag/rules/stx-types
(require yaragg/rules/stx-types
racket/list
(for-syntax racket/base))
(provide flatten-rule

@ -1,14 +1,14 @@
#lang s-exp syntax/module-reader
brag/codegen/expander
yaragg/codegen/expander
#:read my-read
#:read-syntax my-read-syntax
#:info my-get-info
#:whole-body-readers? #t
(require brag/rules/parser
brag/rules/lexer
brag/rules/stx
brag/rules/rule-structs)
(require yaragg/rules/parser
yaragg/rules/lexer
yaragg/rules/stx
yaragg/rules/rule-structs)
(define (my-read in) (syntax->datum (my-read-syntax #f in)))
@ -41,7 +41,7 @@ brag/codegen/expander
(define (my-get-info key default default-filter)
(case key
[(color-lexer) (dynamic-require 'brag/private/colorer 'color-brag (λ () #f))]
[(drracket:indentation) (dynamic-require 'brag/private/indenter 'indent-brag (λ () #f))]
[(color-lexer) (dynamic-require 'yaragg/private/colorer 'color-brag (λ () #f))]
[(drracket:indentation) (dynamic-require 'yaragg/private/indenter 'indent-brag (λ () #f))]
[else (default-filter key default)]))

@ -2,9 +2,9 @@
(require racket/match
racket/list
racket/generator
(prefix-in lex: br-parser-tools/lex)
brag/support
brag/private/internal-support)
(prefix-in lex: yaragg/parser-tools/lex)
yaragg/support
yaragg/private/internal-support)
(provide the-error-handler

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
## Equal numbers of 0 and 1s in a string.
##

@ -1,3 +1,3 @@
#lang brag
#lang yaragg
rule: "0"* "1"

@ -1,3 +1,3 @@
#lang brag
#lang yaragg
rule-0n1n: ["0" rule-0n1n "1"]

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
expr : term (/'+' term)*
@term : factor (/'*' @factor)*

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
;; Simple baby example of JSON structure
json ::= number | string

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
;; Simple baby example of JSON structure
json: number

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
#:prefix-out my:
;; Simple baby example of JSON structure

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
;; Simple baby example of JSON structure
json: number | string

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
## The following comes from: http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
start: A c def hello-world
A : "\"\101\\" ; A
c : '\'\U0063\\' ; c

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
;; test the curly quantifier
start : a-rule | b-rule | c-rule | d-rule | e-rule
a-rule : "a"{2} ; exactly 2

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
top : w | x | y | z | a | b | c
w : /"w" ; atom
x : /("x") ; seq

@ -0,0 +1,4 @@
#lang yaragg
top : expr (/"," expr)*
expr : "x" | list
list : "(" expr ("," expr)* ")"

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
top : xs | ys | zs
xs : () | "x" xs

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
top : x | y
/x : Ø | "x" @x
@y : Ø | "y" y

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
;; Lua parser, adapted from:
;; http://www.lua.org/manual/5.1/manual.html#8

@ -1,3 +1,3 @@
#lang brag
#lang yaragg
start : ( (X | X Y) A* )*

@ -1,3 +1,3 @@
#lang brag
#lang yaragg
nested-word-list: WORD
| LEFT-PAREN nested-word-list* RIGHT-PAREN

@ -0,0 +1,2 @@
#lang yaragg
start: "a" "\"" "'" "\\" 'a' '"' '\'' '\\'

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
expr : term ('+' term)*
term : factor ('*' factor)*

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
;;
;; See: http://stackoverflow.com/questions/12345647/rewrite-this-script-by-designing-an-interpreter-in-racket

@ -0,0 +1,4 @@
#lang yaragg/examples/simple-line-drawing
3 9 X;
6 3 b 3 X 3 b;
3 9 X;

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
;;
;; See: http://stackoverflow.com/questions/12345647/rewrite-this-script-by-designing-an-interpreter-in-racket

@ -1,12 +1,12 @@
#lang s-exp syntax/module-reader
brag/examples/simple-line-drawing/semantics
yaragg/examples/simple-line-drawing/semantics
#:read my-read
#:read-syntax my-read-syntax
#:info my-get-info
#:whole-body-readers? #t
(require brag/examples/simple-line-drawing/lexer
brag/examples/simple-line-drawing/grammar)
(require yaragg/examples/simple-line-drawing/lexer
yaragg/examples/simple-line-drawing/grammar)
(define (my-read in)
(syntax->datum (my-read-syntax #f in)))

@ -3,8 +3,8 @@
(provide tokenize)
;; A simple lexer for simple-line-drawing.
(require brag/support
br-parser-tools/lex)
(require yaragg/support
yaragg/parser-tools/lex)
(define (tokenize ip)
(port-count-lines! ip)

@ -0,0 +1,3 @@
#lang yaragg
top : ("start" | "atok")+

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
start: next
next: "0"

@ -0,0 +1,3 @@
#lang yaragg
/top : sub
sub : "x"

@ -0,0 +1,3 @@
#lang yaragg
/top : sub
/sub : "x"

@ -0,0 +1,3 @@
#lang yaragg
/top : sub
@sub : "x"

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
start: (tab | space | newline | letter | return | all)*
tab: '\t'
space: " "

@ -1,4 +1,4 @@
#lang brag
#lang yaragg
;; A parser for a silly language
sentence: verb optional-adjective object
verb: greeting

@ -0,0 +1,23 @@
#lang info
(define collection "yaragg")
(define scribblings
'(("yaragg.scrbl" (multi-page) (parsing-library))
("yaragg-parser-tools.scrbl" (multi-page) (parsing-library))))
(define deps '(["base" #:version "6.3"]
"syntax-color-lib"))
(define build-deps '("at-exp-lib"
"racket-doc"
"rackunit-lib"
"scribble-lib"
"syntax-color-doc"))
(define test-omit-paths '("examples/simple-line-drawing/examples/letter-i.rkt"))

@ -1,8 +1,8 @@
#lang racket/base
(module+ reader
(require "codegen/reader.rkt")
(provide (all-from-out "codegen/reader.rkt")))
(module reader racket/base
(require yaragg/codegen/reader)
(provide (all-from-out yaragg/codegen/reader)))
;; this creates dummy identifiers
;; so cross-refs in `brag` docs will work

@ -0,0 +1,876 @@
#lang racket/base
;; This module implements a parser form like the yaragg-parser-tools's
;; `parser', except that it works on an arbitrary CFG (returning
;; the first sucecssful parse).
;; I'm pretty sure that this is an implementation of Earley's
;; algorithm.
;; To a first approximation, it's a backtracking parser. Alternative
;; for a non-terminal are computed in parallel, and multiple attempts
;; to compute the same result block until the first one completes. If
;; you get into deadlock, such as when trying to match
;; <foo> := <foo>
;; then it means that there's no successful parse, so everything
;; that's blocked fails.
;; A cache holds the series of results for a particular non-terminal
;; at a particular starting location. (A series is used, instead of a
;; sinlge result, for backtracking.) Otherwise, the parser uses
;; backtracking search. Backtracking is implemented through explicit
;; success and failure continuations. Multiple results for a
;; particular nonterminal and location are kept only when they have
;; different lengths. (Otherwise, in the spirit of finding one
;; successful parse, only the first result is kept.)
;; The yaragg-parser-tools's `parse' is used to transform tokens in the
;; grammar to tokens specific to this parser. In other words, this
;; parser uses `parser' so that it doesn't have to know anything about
;; tokens.
;;
(require yaragg/parser-tools/yacc
yaragg/parser-tools/lex)
(require (for-syntax racket/base
syntax/boundmap
yaragg/parser-tools/private-lex/token-syntax))
(provide cfg-parser)
;; A raw token, wrapped so that we can recognize it:
(define-struct tok (name orig-name val start end))
;; Represents the thread scheduler:
(define-struct tasks (active active-back waits multi-waits cache progress?))
(define-for-syntax make-token-identifier-mapping make-hasheq)
(define-for-syntax (token-identifier-mapping-get t tok [fail #f])
(if fail
(hash-ref t (syntax-e tok) fail)
(hash-ref t (syntax-e tok))))
(define-for-syntax (token-identifier-mapping-put! t tok v)
(hash-set! t (syntax-e tok) v))
(define-for-syntax (token-identifier-mapping-map t f)
(hash-map t f))
;; Used to calculate information on the grammar, such as whether
;; a particular non-terminal is "simple" instead of recursively defined.
(define-for-syntax (nt-fixpoint nts proc nt-ids patss)
(define (ormap-all val f as bs)
(cond
[(null? as) val]
[else (ormap-all (or (f (car as) (car bs)) val)
f
(cdr as) (cdr bs))]))
(let loop ()
(when (ormap-all #f
(λ (nt pats)
(let ([old (bound-identifier-mapping-get nts nt)])
(let ([new (proc nt pats old)])
(if (equal? old new)
#f
(begin
(bound-identifier-mapping-put! nts nt new)
#t)))))
nt-ids patss)
(loop))))
;; Tries parse-a followed by parse-b. If parse-a is not simple,
;; then after parse-a succeeds once, we parallelize parse-b
;; and trying a second result for parse-a.
(define (parse-and simple-a? parse-a parse-b
stream last-consumed-token depth end success-k fail-k
max-depth tasks)
(define ((mk-got-k success-k fail-k) val stream last-consumed-token depth max-depth tasks next1-k)
(if simple-a?
(parse-b val stream last-consumed-token depth end
(mk-got2-k success-k fail-k next1-k)
(mk-fail2-k success-k fail-k next1-k)
max-depth tasks)
(parallel-or
(λ (success-k fail-k max-depth tasks)
(parse-b val stream last-consumed-token depth end
success-k fail-k
max-depth tasks))
(λ (success-k fail-k max-depth tasks)
(next1-k (mk-got-k success-k fail-k)
fail-k max-depth tasks))
success-k fail-k max-depth tasks)))
(define ((mk-got2-k success-k fail-k next1-k) val stream last-consumed-token depth max-depth tasks next-k)
(success-k val stream last-consumed-token depth max-depth tasks
(λ (success-k fail-k max-depth tasks)
(next-k (mk-got2-k success-k fail-k next1-k)
(mk-fail2-k success-k fail-k next1-k)
max-depth tasks))))
(define ((mk-fail2-k success-k fail-k next1-k) max-depth tasks)
(next1-k (mk-got-k success-k fail-k) fail-k max-depth tasks))
(parse-a stream last-consumed-token depth end
(mk-got-k success-k fail-k)
fail-k
max-depth tasks))
;; Parallel or for non-terminal alternatives
(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)
(parallel-or (λ (success-k fail-k max-depth tasks)
(parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks))
(λ (success-k fail-k max-depth tasks)
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks))
success-k fail-k max-depth tasks))
;; Generic parallel-or
(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks)
(define answer-key (gensym))
(define (gota-k val stream last-consumed-token depth max-depth tasks next-k)
(report-answer answer-key
max-depth
tasks
(list val stream last-consumed-token depth next-k)))
(define (faila-k max-depth tasks)
(report-answer answer-key
max-depth
tasks
null))
(let* ([tasks (queue-task tasks (λ (max-depth tasks)
(parse-a gota-k faila-k max-depth tasks)))]
[tasks (queue-task tasks (λ (max-depth tasks)
(parse-b gota-k faila-k max-depth tasks)))]
[queue-next (λ (next-k tasks)
(queue-task tasks (λ (max-depth tasks)
(next-k gota-k faila-k max-depth tasks))))])
(define ((mk-got-one immediate-next? get-nth success-k) val stream last-consumed-token depth max-depth tasks next-k)
(let ([tasks (if immediate-next?
(queue-next next-k tasks)
tasks)])
(success-k val stream last-consumed-token depth max-depth
tasks
(λ (success-k fail-k max-depth tasks)
(let ([tasks (if immediate-next?
tasks
(queue-next next-k tasks))])
(get-nth max-depth tasks success-k fail-k))))))
(define (get-first max-depth tasks success-k fail-k)
(wait-for-answer #f max-depth tasks answer-key
(mk-got-one #t get-first success-k)
(λ (max-depth tasks)
(get-second max-depth tasks success-k fail-k))
#f))
(define (get-second max-depth tasks success-k fail-k)
(wait-for-answer #f max-depth tasks answer-key
(mk-got-one #f get-second success-k)
fail-k #f))
(get-first max-depth tasks success-k fail-k)))
;; Non-terminal alternatives where the first is "simple" can be done
;; sequentially, which is simpler
(define (parse-or parse-a parse-b
stream last-consumed-token depth end success-k fail-k max-depth tasks)
(define ((mk-got-k success-k fail-k) val stream last-consumed-token depth max-depth tasks next-k)
(success-k val stream last-consumed-token depth
max-depth tasks
(λ (success-k fail-k max-depth tasks)
(next-k (mk-got-k success-k fail-k)
(mk-fail-k success-k fail-k)
max-depth tasks))))
(define ((mk-fail-k success-k fail-k) max-depth tasks)
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks))
(parse-a stream last-consumed-token depth end
(mk-got-k success-k fail-k)
(mk-fail-k success-k fail-k)
max-depth tasks))
;; Starts a thread
(define (queue-task tasks t [progress? #t])
(make-tasks (tasks-active tasks)
(cons t (tasks-active-back tasks))
(tasks-waits tasks)
(tasks-multi-waits tasks)
(tasks-cache tasks)
(or progress? (tasks-progress? tasks))))
;; Reports an answer to a waiting thread:
(define (report-answer answer-key max-depth tasks val)
(define v (hash-ref (tasks-waits tasks) answer-key (λ () #f)))
(if v
(let ([tasks (make-tasks (cons (v val) (tasks-active tasks))
(tasks-active-back tasks)
(tasks-waits tasks)
(tasks-multi-waits tasks)
(tasks-cache tasks)
#t)])
(hash-remove! (tasks-waits tasks) answer-key)
(swap-task max-depth tasks))
;; We have an answer ready too fast; wait
(swap-task max-depth
(queue-task tasks
(λ (max-depth tasks)
(report-answer answer-key max-depth tasks val))
#f))))
;; Reports an answer to multiple waiting threads:
(define (report-answer-all answer-key max-depth tasks val k)
(define v (hash-ref (tasks-multi-waits tasks) answer-key (λ () null)))
(hash-remove! (tasks-multi-waits tasks) answer-key)
(let ([tasks (make-tasks (append (map (λ (a) (a val)) v)
(tasks-active tasks))
(tasks-active-back tasks)
(tasks-waits tasks)
(tasks-multi-waits tasks)
(tasks-cache tasks)
#t)])
(k max-depth tasks)))
;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise
;; there might be many. Use wither #t or #f (and `report-answer' or
;; `report-answer-all', resptively) consistently for a particular answer key.
(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k)
(let ([wait (λ (val)
(λ (max-depth tasks)
(if val
(if (null? val)
(fail-k max-depth tasks)
(let-values ([(val stream last-consumed-token depth next-k) (apply values val)])
(success-k val stream last-consumed-token depth max-depth tasks next-k)))
(deadlock-k max-depth tasks))))])
(if multi?
(hash-set! (tasks-multi-waits tasks) answer-key
(cons wait (hash-ref (tasks-multi-waits tasks) answer-key
(λ () null))))
(hash-set! (tasks-waits tasks) answer-key wait))
(let ([tasks (make-tasks (tasks-active tasks)
(tasks-active-back tasks)
(tasks-waits tasks)
(tasks-multi-waits tasks)
(tasks-cache tasks)
#t)])
(swap-task max-depth tasks))))
;; Swap thread
(define (swap-task max-depth tasks)
;; Swap in first active:
(if (null? (tasks-active tasks))
(if (tasks-progress? tasks)
(swap-task max-depth
(make-tasks (reverse (tasks-active-back tasks))
null
(tasks-waits tasks)
(tasks-multi-waits tasks)
(tasks-cache tasks)
#f))
;; No progress, so issue failure for all multi-waits
(if (zero? (hash-count (tasks-multi-waits tasks)))
(error 'swap-task "Deadlock")
(swap-task max-depth
(make-tasks (apply
append
(hash-map (tasks-multi-waits tasks)
(λ (k l)
(map (λ (v) (v #f)) l))))
(tasks-active-back tasks)
(tasks-waits tasks)
(make-hasheq)
(tasks-cache tasks)
#t))))
(let ([t (car (tasks-active tasks))]
[tasks (make-tasks (cdr (tasks-active tasks))
(tasks-active-back tasks)
(tasks-waits tasks)
(tasks-multi-waits tasks)
(tasks-cache tasks)
(tasks-progress? tasks))])
(t max-depth tasks))))
;; Finds the symbolic representative of a token class
(define-for-syntax (map-token toks tok)
(car (token-identifier-mapping-get toks tok)))
(define no-pos-val (make-position #f #f #f))
(define-for-syntax no-pos
(let ([npv ((syntax-local-certifier) #'no-pos-val)])
(λ (stx) npv)))
(define-for-syntax ((at-tok-pos sel expr) stx)
#`(let ([v #,expr]) (if v (#,sel v) no-pos-val)))
;; Builds a matcher for a particular alternative
(define-for-syntax (build-match nts toks pat handle $ctx)
(let loop ([pat pat]
[pos 1])
(if (null? pat)
#`(success-k #,handle stream last-consumed-token depth max-depth tasks
(λ (success-k fail-k max-depth tasks)
(fail-k max-depth tasks)))
(let ([id (datum->syntax (car pat) (string->symbol (format "$~a" pos)))]
[id-start-pos (datum->syntax (car pat) (string->symbol (format "$~a-start-pos" pos)))]
[id-end-pos (datum->syntax (car pat) (string->symbol (format "$~a-end-pos" pos)))]
[n-end-pos (and (null? (cdr pat)) (datum->syntax (car pat) '$n-end-pos))])
(cond
[(bound-identifier-mapping-get nts (car pat) (λ () #f))
;; Match non-termimal
#`(parse-and
;; First part is simple? (If so, we don't have to parallelize the `and'.)
#,(let ([l (bound-identifier-mapping-get nts (car pat) (λ () #f))])
(or (not l)
(andmap values (caddr l))))
#,(car pat)
(let ([original-stream stream])
(λ (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks)
(let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream)
tok-end
tok-start)
#'(if (eq? original-stream stream)
last-consumed-token
(and (pair? original-stream)
(car original-stream))))]
[#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]
#,@(if n-end-pos
#`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)])
null))
#,(loop (cdr pat) (add1 pos)))))
stream last-consumed-token depth
#,(let ([cnt (apply +
(map (λ (item)
(cond
[(bound-identifier-mapping-get nts item (λ () #f))
=> (λ (l) (car l))]
[else 1]))
(cdr pat)))])
#`(- end #,cnt))
success-k fail-k max-depth tasks)]
[else
;; Match token
(let ([tok-id (map-token toks (car pat))])
#`(if (and (pair? stream)
(eq? '#,tok-id (tok-name (car stream))))
(let* ([stream-a (car stream)]
[#,id (tok-val stream-a)]
[last-consumed-token (car stream)]
[stream (cdr stream)]
[depth (add1 depth)])
(let ([max-depth (max max-depth depth)])
(let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)]
[#,id-end-pos (at-tok-pos #'tok-end #'stream-a)]
#,@(if n-end-pos
#`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)])
null))
#,(loop (cdr pat) (add1 pos)))))
(fail-k max-depth tasks)))])))))
;; Starts parsing to match a non-terminal. There's a minor
;; optimization that checks for known starting tokens. Otherwise,
;; use the cache, block if someone else is already trying the match,
;; and cache the result if it's computed.
;; The cache maps nontermial+startingpos+iteration to a result, where
;; the iteration is 0 for the first match attempt, 1 for the second,
;; etc.
(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k)
(if (and (positive? min-cnt)
(pair? stream)
(not (memq (tok-name (car stream)) init-tokens)))
;; No such leading token; give up
(fail-k max-depth tasks)
;; Run pattern
(let loop ([n 0]
[success-k success-k]
[fail-k fail-k]
[max-depth max-depth]
[tasks tasks]
[k k])
(define answer-key (gensym))
(define table-key (vector key depth n))
(define old-depth depth)
(define old-stream stream)
#;(printf "Loop ~a\n" table-key)
(cond
[(hash-ref (tasks-cache tasks) table-key (λ () #f))
=> (λ (result)
#;(printf "Reuse ~a\n" table-key)
(result success-k fail-k max-depth tasks))]
[else
#;(printf "Try ~a ~a\n" table-key (map tok-name stream))
(hash-set! (tasks-cache tasks) table-key
(λ (success-k fail-k max-depth tasks)
#;(printf "Wait ~a ~a\n" table-key answer-key)
(wait-for-answer #t max-depth tasks answer-key success-k fail-k
(λ (max-depth tasks)
#;(printf "Deadlock ~a ~a\n" table-key answer-key)
(fail-k max-depth tasks)))))
(let result-loop ([max-depth max-depth][tasks tasks][k k])
(define orig-stream stream)
(define (new-got-k val stream last-consumed-token depth max-depth tasks next-k)
;; Check whether we already have a result that consumed the same amount:
(define result-key (vector #f key old-depth depth))
(cond
[(hash-ref (tasks-cache tasks) result-key (λ () #f))
;; Go for the next-result
(result-loop max-depth
tasks
(λ (end max-depth tasks success-k fail-k)
(next-k success-k fail-k max-depth tasks)))]
[else
#;(printf "Success ~a ~a\n" table-key
(map tok-name (let loop ([d old-depth][s old-stream])
(if (= d depth)
null
(cons (car s) (loop (add1 d) (cdr s)))))))
(let ([next-k (λ (success-k fail-k max-depth tasks)
(loop (add1 n)
success-k
fail-k
max-depth
tasks
(λ (end max-depth tasks success-k fail-k)
(next-k success-k fail-k max-depth tasks))))])
(hash-set! (tasks-cache tasks) result-key #t)
(hash-set! (tasks-cache tasks) table-key
(λ (success-k fail-k max-depth tasks)
(success-k val stream last-consumed-token depth max-depth tasks next-k)))
(report-answer-all answer-key
max-depth
tasks
(list val stream last-consumed-token depth next-k)
(λ (max-depth tasks)
(success-k val stream last-consumed-token depth max-depth tasks next-k))))]))
(define (new-fail-k max-depth tasks)
#;(printf "Failure ~a\n" table-key)
(hash-set! (tasks-cache tasks) table-key
(λ (success-k fail-k max-depth tasks)
(fail-k max-depth tasks)))
(report-answer-all answer-key
max-depth
tasks
null
(λ (max-depth tasks)
(fail-k max-depth tasks))))
(k end max-depth tasks new-got-k new-fail-k))]))))
;; These temp identifiers can't be `gensym` or `generate-temporary`
;; because they have to be consistent between module loads
;; (IIUC, the parser is multi-threaded, and this approach is not thread-safe)
;; so I see no alternative to the old standby of making them ludicrously unlikely
(define-for-syntax start-id-temp 'start_jihqolbbafscgxvsufnepvmxqipnxgmlpxukmdoqxqzmzgaogaftbkbyqjttwwfimifowdxfyekjiixdmtprfkcvfciraehoeuaz)
(define-for-syntax atok-id-temp 'atok_wrutdjgecmybyfipiwsgjlvsveryodlgassuzcargiuznzgdghrykfqfbwcjgzdhdoeqxcucmtjkuyucskzethozhqkasphdwbht)
(define-syntax (cfg-parser stx)
(syntax-case stx ()
[(_ CLAUSE ...)
(let ([clauses (syntax->list #'(CLAUSE ...))])
(let-values ([(start grammar cfg-error parser-clauses src-pos?)
(let ([all-toks (apply
append
(for/list ([clause (in-list clauses)])
(syntax-case clause (tokens)
[(tokens T ...)
(apply
append
(for/list ([t (in-list (syntax->list #'(T ...)))])
(define v (syntax-local-value t (λ () #f)))
(cond
[(terminals-def? v)
(for/list ([v (in-list (syntax->list (terminals-def-t v)))])
(cons v #f))]
[(e-terminals-def? v)
(for/list ([v (in-list (syntax->list (e-terminals-def-t v)))])
(cons v #t))]
[else null])))]
[_else null])))]
[all-end-toks (apply
append
(for/list ([clause (in-list clauses)])
(syntax-case clause (end)
[(end T ...)
(syntax->list #'(T ...))]
[_else null])))])
(let loop ([clauses clauses]
[cfg-start #f]
[cfg-grammar #f]
[cfg-error #f]
[src-pos? #f]
[parser-clauses null])
(if (null? clauses)
(values cfg-start
cfg-grammar
cfg-error
(reverse parser-clauses)
src-pos?)
(syntax-case (car clauses) (start error grammar src-pos)
[(start TOK)
(loop (cdr clauses) #'TOK cfg-grammar cfg-error src-pos? parser-clauses)]
[(error EXPR)
(loop (cdr clauses) cfg-start cfg-grammar #'EXPR src-pos? parser-clauses)]
[(grammar [NT [PAT HANDLE0 HANDLE ...] ...] ...)
(let ([nts (make-bound-identifier-mapping)]
[toks (make-token-identifier-mapping)]
[end-toks (make-token-identifier-mapping)]
[nt-ids (syntax->list #'(NT ...))]
[patss (map (λ (stx)
(map syntax->list (syntax->list stx)))
(syntax->list #'((PAT ...) ...)))])
(for ([nt (in-list nt-ids)])
(bound-identifier-mapping-put! nts nt (list 0)))
(for ([t (in-list all-end-toks)])
(token-identifier-mapping-put! end-toks t #t))
(for ([t (in-list all-toks)]
#:unless (token-identifier-mapping-get end-toks (car t) (λ () #f)))
(define id (gensym (syntax-e (car t))))
(token-identifier-mapping-put! toks (car t) (cons id (cdr t))))
;; Compute min max size for each non-term:
(nt-fixpoint
nts
(λ (nt pats old-list)
(let ([new-cnt
(apply min (for/list ([pat (in-list pats)])
(for/sum ([elem (in-list pat)])
(car (bound-identifier-mapping-get
nts elem (λ () (list 1)))))))])
(if (new-cnt . > . (car old-list))
(cons new-cnt (cdr old-list))
old-list)))
nt-ids patss)
;; Compute set of toks that must appear at the beginning
;; for a non-terminal
(nt-fixpoint
nts
(λ (nt pats old-list)
(let ([new-list
(apply
append
(for/list ([pat (in-list pats)])
(let loop ([pat pat])
(if (pair? pat)
(let ([l (bound-identifier-mapping-get
nts
(car pat)
(λ ()
(list 1 (map-token toks (car pat)))))])
;; If the non-terminal can match 0 things,
;; then it might match something from the
;; next pattern element. Otherwise, it must
;; match the first element:
(if (zero? (car l))
(append (cdr l) (loop (cdr pat)))
(cdr l)))
null))))])
(let ([new (filter (λ (id)
(andmap (λ (id2)
(not (eq? id id2)))
(cdr old-list)))
new-list)])
(if (pair? new)
;; Drop dups in new list:
(let ([new (let loop ([new new])
(if (null? (cdr new))
new
(if (ormap (λ (id)
(eq? (car new) id))
(cdr new))
(loop (cdr new))
(cons (car new) (loop (cdr new))))))])
(cons (car old-list) (append new (cdr old-list))))
old-list))))
nt-ids patss)
;; Determine left-recursive clauses:
(for-each (λ (nt pats)
(let ([l (bound-identifier-mapping-get nts nt)])
(bound-identifier-mapping-put! nts nt (list (car l)
(cdr l)
(map (λ (x) #f) pats)))))
nt-ids patss)
(nt-fixpoint
nts
(λ (nt pats old-list)
(list (car old-list)
(cadr old-list)
(map (λ (pat simple?)
(or simple?
(let ([l (map (λ (elem)
(bound-identifier-mapping-get
nts
elem
(λ () #f)))
pat)])
(andmap (λ (i)
(or (not i)
(andmap values (caddr i))))
l))))
pats (caddr old-list))))
nt-ids patss)
;; Build a definition for each non-term:
(loop (cdr clauses)
cfg-start
(map (λ (nt pats handles $ctxs)
(define info (bound-identifier-mapping-get nts nt))
(list nt
#`(let ([key (gensym '#,nt)])
(λ (stream last-consumed-token depth end success-k fail-k max-depth tasks)
(parse-nt/share
key #,(car info) '#,(cadr info) stream last-consumed-token depth end
max-depth tasks
success-k fail-k
(λ (end max-depth tasks success-k fail-k)
#,(let loop ([pats pats]
[handles (syntax->list handles)]
[$ctxs (syntax->list $ctxs)]
[simple?s (caddr info)])
(if (null? pats)
#'(fail-k max-depth tasks)
#`(#,(if (or (null? (cdr pats))
(car simple?s))
#'parse-or
#'parse-parallel-or)
(λ (stream last-consumed-token depth end success-k fail-k max-depth tasks)
#,(build-match nts
toks
(car pats)
(car handles)
(car $ctxs)))
(λ (stream last-consumed-token depth end success-k fail-k max-depth tasks)
#,(loop (cdr pats)
(cdr handles)
(cdr $ctxs)
(cdr simple?s)))
stream last-consumed-token depth end success-k fail-k max-depth tasks)))))))))
nt-ids
patss
(syntax->list #'(((begin HANDLE0 HANDLE ...) ...) ...))
(syntax->list #'((HANDLE0 ...) ...)))
cfg-error
src-pos?
(list*
(with-syntax ([((tok tok-id . $e) ...)
(token-identifier-mapping-map toks
(λ (k v)
(list* k
(car v)
(if (cdr v)
#f
'$1))))]
[(pos ...)
(if src-pos?
#'($1-start-pos $1-end-pos)
#'(#f #f))]
;; rename `start` and `atok` to temp ids
;; so that "start" and "atok" can be used as literal string tokens in a grammar.
;; not sure why this works, but it passes all tests.
[%start start-id-temp]
[%atok atok-id-temp])
#`(grammar (%start [() null]
[(%atok %start) (cons $1 $2)])
(%atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...)))
(with-syntax ([%start start-id-temp])
#`(start %start))
parser-clauses)))]
[(grammar . _)
(raise-syntax-error
#f
"bad grammar clause"
stx
(car clauses))]
[(src-pos)
(loop (cdr clauses)
cfg-start
cfg-grammar
cfg-error
#t
(cons (car clauses) parser-clauses))]
[_else
(loop (cdr clauses)
cfg-start
cfg-grammar
cfg-error
src-pos?
(cons (car clauses) parser-clauses))]))))])
#`(let ([orig-parse (parser
[error (λ (a b c)
(error 'cfg-parser "unexpected ~a token: ~a" b c))]
. #,parser-clauses)]
[error-proc #,cfg-error])
(letrec #,grammar
(λ (get-tok)
(let ([tok-list (orig-parse get-tok)])
(letrec ([success-k
(λ (val stream last-consumed-token depth max-depth tasks next)
(if (null? stream)
val
(next success-k fail-k max-depth tasks)))]
[fail-k (λ (max-depth tasks)
(cond
[(null? tok-list)
(if error-proc
(error-proc #t
'no-tokens
#f
(make-position #f #f #f)
(make-position #f #f #f))
(error
'cfg-parse
"no tokens"))]
[else
(let ([bad-tok (list-ref tok-list
(min (sub1 (length tok-list))
max-depth))])
(if error-proc
(error-proc #t
(tok-orig-name bad-tok)
(tok-val bad-tok)
(tok-start bad-tok)
(tok-end bad-tok))
(error
'cfg-parse
"failed at ~a"
(tok-val bad-tok))))]))])
(#,start tok-list
;; we simulate a token at the very beginning with zero width
;; for use with the position-generating code (*-start-pos, *-end-pos).
(if (null? tok-list)
(tok #f #f #f
(position 1
#,(if src-pos? #'1 #'#f)
#,(if src-pos? #'0 #'#f))
(position 1
#,(if src-pos? #'1 #'#f)
#,(if src-pos? #'0 #'#f)))
(tok (tok-name (car tok-list))
(tok-orig-name (car tok-list))
(tok-val (car tok-list))
(tok-start (car tok-list))
(tok-start (car tok-list))))
0
(length tok-list)
success-k
fail-k
0
(make-tasks null null
(make-hasheq) (make-hasheq)
(make-hash) #t)))))))))]))
(module* test racket/base
(require (submod "..")
yaragg/parser-tools/lex
racket/block
rackunit)
;; Test: parsing regular expressions.
;; Here is a test case on locations:
(block
(define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF))
(define lex (lexer-src-pos ["|" (token-OR lexeme)]
["^" (token-ANCHOR lexeme)]
["*" (token-STAR lexeme)]
[(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)]
["(" (token-LPAREN lexeme)]
[")" (token-RPAREN lexeme)]
[whitespace (return-without-pos (lex input-port))]
[(eof) (token-EOF 'eof)]))
(define -parse (cfg-parser
(tokens regexp-tokens)
(start top)
(end EOF)
(src-pos)
(grammar [top [(maybe-anchor regexp)
(cond [$1
`(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
[else
`(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]]
[maybe-anchor [(ANCHOR) #t]
[() #f]]
[regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
[(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
[(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
[(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]])))
(define (pos->sexp pos)
(position-offset pos))
(define (parse s)
(define ip (open-input-string s))
(port-count-lines! ip)
(-parse (λ () (lex ip))))
(check-equal? (parse "abc")
'(unanchored (lit "abc" 1 4) 1 4))
(check-equal? (parse "a | (b*) | c")
'(unanchored (or (or (lit "a" 1 2)
(group (star (lit "b" 6 7) 6 8) 5 9)
1 9)
(lit "c" 12 13)
1 13)
1 13)))
;; Tests used during development
(define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF))
(define lex
(lexer
["+" (token-PLUS '+)]
["-" (token-MINUS '-)]
["*" (token-STAR '*)]
["|" (token-BAR '||)]
[":" (token-COLON '|:|)]
[whitespace (lex input-port)]
[(eof) (token-EOF 'eof)]))
(define parse
(cfg-parser
(tokens non-terminals)
(start <program>)
(end EOF)
(error (λ (a b stx)
(error 'parse "failed at ~s" stx)))
(grammar [<program> [(PLUS) "plus"]
[(<minus-program> BAR <minus-program>) (list $1 $2 $3)]
[(<program> COLON) (list $1)]]
[<minus-program> [(MINUS) "minus"]
[(<program> STAR) (cons $1 $2)]]
[<simple> [(<alts> <alts> <alts> MINUS) "yes"]]
[<alts> [(PLUS) 'plus]
[(MINUS) 'minus]]
[<random> [() '0]
[(<random> PLUS) (add1 $1)]
[(<random> PLUS) (add1 $1)]])))
(let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**"
#;"+*|+**|-" #;"-|-*|-|-*"
#;"-|-*|-|-**|-|-*|-|-**"
"-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-***
|-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|
-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****"
;; This one fails:
#;"+*")])
(check-equal? (parse (λ () (lex p)))
'((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
.
*)
||
(((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
.
*))
.
*)
||
(((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
.
*)
||
(((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
.
*))
.
*)))))

@ -0,0 +1,92 @@
#lang racket/base
;; An interactive calculator inspired by the calculator example in the bison manual.
;; Import the parser and lexer generators.
(require yaragg/parser-tools/yacc
yaragg/parser-tools/lex
(prefix-in : yaragg/parser-tools/lex-sre))
(define-tokens value-tokens (NUM VAR FNCT))
(define-empty-tokens op-tokens (newline = OP CP + - * / ^ EOF NEG))
;; A hash table to store variable values in for the calculator
(define vars (make-hash))
(define-lex-abbrevs
(lower-letter (:/ "a" "z"))
(upper-letter (:/ #\A #\Z))
;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too.
(digit (:/ "0" "9")))
(define calc-lex
(lexer
[(eof) 'EOF]
;; recursively call the lexer on the remaining input after a tab or space. Returning the
;; result of that operation. This effectively skips all whitespace.
[(:or #\tab #\space) (calc-lex input-port)]
;; (token-newline) returns 'newline
[#\newline (token-newline)]
;; Since (token-=) returns '=, just return the symbol directly
[(:or "=" "+" "-" "*" "/" "^") (string->symbol lexeme)]
["(" 'OP]
[")" 'CP]
["sin" (token-FNCT sin)]
[(:+ (:or lower-letter upper-letter)) (token-VAR (string->symbol lexeme))]
[(:+ digit) (token-NUM (string->number lexeme))]
[(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))]))
(define calc-parse
(parser
(start start)
(end newline EOF)
(tokens value-tokens op-tokens)
(error (lambda (a b c) (void)))
(precs (right =)
(left - +)
(left * /)
(left NEG)
(right ^))
(grammar
(start [() #f]
;; If there is an error, ignore everything before the error
;; and try to start over right after the error
[(error start) $2]
[(exp) $1])
(exp [(NUM) $1]
[(VAR) (hash-ref vars $1 (lambda () 0))]
[(VAR = exp) (begin (hash-set! vars $1 $3)
$3)]
[(FNCT OP exp CP) ($1 $3)]
[(exp + exp) (+ $1 $3)]
[(exp - exp) (- $1 $3)]
[(exp * exp) (* $1 $3)]
[(exp / exp) (/ $1 $3)]
[(- exp) (prec NEG) (- $2)]
[(exp ^ exp) (expt $1 $3)]
[(OP exp CP) $2]))))
;; run the calculator on the given input-port
(define (calc ip)
(port-count-lines! ip)
(let loop ()
(define result (calc-parse (λ () (calc-lex ip))))
(when result
(printf "~a\n" result)
(loop))))
(module+ test
(require rackunit)
(check-equal? (let ([o (open-output-string)])
(parameterize ([current-output-port o])
(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3")))
(get-output-string o)) "1\n-2\n"))

@ -0,0 +1,240 @@
#lang racket/base
;; This implements the equivalent of racket's read-syntax for R5RS scheme.
;; It has not been thoroughly tested. Also it will read an entire file into a
;; list of syntax objects, instead of returning one syntax object at a time
(require (for-syntax racket/base)
yaragg/parser-tools/lex
(prefix-in : yaragg/parser-tools/lex-sre)
yaragg/parser-tools/yacc
syntax/readerr)
(define-tokens data (DATUM))
(define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF))
(define scheme-lexer
(lexer-src-pos
;; Skip comments, without accumulating extra position information
[(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))]
["#t" (token-DATUM #t)]
["#f" (token-DATUM #f)]
[(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))]
["#\\space" (token-DATUM #\space)]
["#\\newline" (token-DATUM #\newline)]
[(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))]
[#\" (token-DATUM (list->string (get-string-token input-port)))]
[#\( 'OP]
[#\) 'CP]
[#\[ 'OP]
[#\] 'CP]
["#(" 'HASHOP]
[num2 (token-DATUM (string->number lexeme 2))]
[num8 (token-DATUM (string->number lexeme 8))]
[num10 (token-DATUM (string->number lexeme 10))]
[num16 (token-DATUM (string->number lexeme 16))]
["'" 'QUOTE]
["`" 'QUASIQUOTE]
["," 'UNQUOTE]
[",@" 'UNQUOTE-SPLICING]
["." 'DOT]
[(eof) 'EOF]))
(define get-string-token
(lexer
[(:~ #\" #\\) (cons (car (string->list lexeme))
(get-string-token input-port))]
[(:: #\\ #\\) (cons #\\ (get-string-token input-port))]
[(:: #\\ #\") (cons #\" (get-string-token input-port))]
[#\" null]))
(define-lex-abbrevs
[letter (:or (:/ "a" "z") (:/ #\A #\Z))]
[digit (:/ #\0 #\9)]
[scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)]
[initial (:or letter (char-set "!$%&*/:<=>?^_~@"))]
[subsequent (:or initial digit (char-set "+-.@"))]
[comment (:: #\; (:* (:~ #\newline)) #\newline)]
;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of
;; using regexp macros to avoid the cut and paste.
; [numR (:: prefixR complexR)]
; [complexR (:or realR
; (:: realR "@" realR)
; (:: realR "+" urealR "i")
; (:: realR "-" urealR "i")
; (:: realR "+i")
; (:: realR "-i")
; (:: "+" urealR "i")
; (:: "-" urealR "i")
; (:: "+i")
; (:: "-i"))]
; [realR (:: sign urealR)]
; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)]
; [uintegerR (:: (:+ digitR) (:* #\#))]
; [prefixR (:or (:: radixR exactness)
; (:: exactness radixR))]
[num2 (:: prefix2 complex2)]
[complex2 (:or real2
(:: real2 "@" real2)
(:: real2 "+" ureal2 "i")
(:: real2 "-" ureal2 "i")
(:: real2 "+i")
(:: real2 "-i")
(:: "+" ureal2 "i")
(:: "-" ureal2 "i")
(:: "+i")
(:: "-i"))]
[real2 (:: sign ureal2)]
[ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))]
[uinteger2 (:: (:+ digit2) (:* #\#))]
[prefix2 (:or (:: radix2 exactness)
(:: exactness radix2))]
[radix2 "#b"]
[digit2 (:or "0" "1")]
[num8 (:: prefix8 complex8)]
[complex8 (:or real8
(:: real8 "@" real8)
(:: real8 "+" ureal8 "i")
(:: real8 "-" ureal8 "i")
(:: real8 "+i")
(:: real8 "-i")
(:: "+" ureal8 "i")
(:: "-" ureal8 "i")
(:: "+i")
(:: "-i"))]
[real8 (:: sign ureal8)]
[ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))]
[uinteger8 (:: (:+ digit8) (:* #\#))]
[prefix8 (:or (:: radix8 exactness)
(:: exactness radix8))]
[radix8 "#o"]
[digit8 (:/ "0" "7")]
[num10 (:: prefix10 complex10)]
[complex10 (:or real10
(:: real10 "@" real10)
(:: real10 "+" ureal10 "i")
(:: real10 "-" ureal10 "i")
(:: real10 "+i")
(:: real10 "-i")
(:: "+" ureal10 "i")
(:: "-" ureal10 "i")
(:: "+i")
(:: "-i"))]
[real10 (:: sign ureal10)]
[ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)]
[uinteger10 (:: (:+ digit10) (:* #\#))]
[prefix10 (:or (:: radix10 exactness)
(:: exactness radix10))]
[radix10 (:? "#d")]
[digit10 digit]
[decimal10 (:or (:: uinteger10 suffix)
(:: #\. (:+ digit10) (:* #\#) suffix)
(:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix)
(:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))]
[num16 (:: prefix16 complex16)]
[complex16 (:or real16
(:: real16 "@" real16)
(:: real16 "+" ureal16 "i")
(:: real16 "-" ureal16 "i")
(:: real16 "+i")
(:: real16 "-i")
(:: "+" ureal16 "i")
(:: "-" ureal16 "i")
"+i"
"-i")]
[real16 (:: sign ureal16)]
[ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))]
[uinteger16 (:: (:+ digit16) (:* #\#))]
[prefix16 (:or (:: radix16 exactness)
(:: exactness radix16))]
[radix16 "#x"]
[digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))]
[suffix (:or "" (:: exponent-marker sign (:+ digit10)))]
[exponent-marker (:or "e" "s" "f" "d" "l")]
[sign (:or "" "+" "-")]
[exactness (:or "" "#i" "#e")])
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
;; A macro to build the syntax object
(define-syntax (build-so stx)
(syntax-case stx ()
((_ value start end)
(with-syntax ((start-pos (datum->syntax
#'end
(string->symbol
(format "$~a-start-pos"
(syntax->datum #'start)))))
(end-pos (datum->syntax
#'end
(string->symbol
(format "$~a-end-pos"
(syntax->datum #'end)))))
(source (datum->syntax
#'end
'source-name)))
(syntax
(datum->syntax
#f
value
(list source
(position-line start-pos)
(position-col start-pos)
(position-offset start-pos)
(- (position-offset end-pos)
(position-offset start-pos)))
stx-for-original-property))))))
(define (scheme-parser source-name)
(parser
(src-pos)
(start s)
(end EOF)
(error (lambda (a name val start end)
(raise-read-error
"read-error"
source-name
(position-line start)
(position-col start)
(position-offset start)
(- (position-offset end)
(position-offset start)))))
(tokens data delim)
(grammar
(s [(sexp-list) (reverse $1)])
(sexp [(DATUM) (build-so $1 1 1)]
[(OP sexp-list CP) (build-so (reverse $2) 1 3)]
[(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)]
[(QUOTE sexp) (build-so (list 'quote $2) 1 2)]
[(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)]
[(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)]
[(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)]
[(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)])
(sexp-list [() null]
[(sexp-list sexp) (cons $2 $1)]))))
(define (rs sn ip)
(port-count-lines! ip)
((scheme-parser sn) (lambda () (scheme-lexer ip))))
(define readsyntax
(case-lambda ((sn) (rs sn (current-input-port)))
((sn ip) (rs sn ip))))
(provide (rename-out [readsyntax read-syntax]))

@ -0,0 +1,23 @@
#lang racket/base
(require (for-syntax racket/base)
yaragg/parser-tools/lex
(prefix-in : yaragg/parser-tools/lex-sre))
(provide epsilon ~
(rename-out [:* *]
[:+ +]
[:? ?]
[:or :]
[:& &]
[:: @]
[:~ ^]
[:/ -]))
(define-lex-trans (epsilon stx)
(syntax-case stx ()
[(_) #'""]))
(define-lex-trans (~ stx)
(syntax-case stx ()
[(_ RE) #'(complement RE)]))

@ -0,0 +1,103 @@
#lang racket/base
(require (for-syntax racket/base)
yaragg/parser-tools/lex)
(provide (rename-out [sre-* *]
[sre-+ +]
[sre-= =]
[sre->= >=]
[sre-or or]
[sre-- -]
[sre-/ /])
? ** : seq & ~ /-only-chars)
(define-lex-trans (sre-* stx)
(syntax-case stx ()
[(_ RE ...)
#'(repetition 0 +inf.0 (union RE ...))]))
(define-lex-trans (sre-+ stx)
(syntax-case stx ()
[(_ RE ...)
#'(repetition 1 +inf.0 (union RE ...))]))
(define-lex-trans (? stx)
(syntax-case stx ()
[(_ RE ...)
#'(repetition 0 1 (union RE ...))]))
(define-lex-trans (sre-= stx)
(syntax-case stx ()
[(_ N RE ...)
#'(repetition N N (union RE ...))]))
(define-lex-trans (sre->= stx)
(syntax-case stx ()
[(_ N RE ...)
#'(repetition N +inf.0 (union RE ...))]))
(define-lex-trans (** stx)
(syntax-case stx ()
[(_ LOW #f RE ...)
#'(** LOW +inf.0 RE ...)]
[(_ LOW HIGH RE ...)
#'(repetition LOW HIGH (union RE ...))]))
(define-lex-trans (sre-or stx)
(syntax-case stx ()
[(_ RE ...)
#'(union RE ...)]))
(define-lex-trans (: stx)
(syntax-case stx ()
[(_ RE ...)
#'(concatenation RE ...)]))
(define-lex-trans (seq stx)
(syntax-case stx ()
[(_ RE ...)
#'(concatenation RE ...)]))
(define-lex-trans (& stx)
(syntax-case stx ()
[(_ RE ...)
#'(intersection RE ...)]))
(define-lex-trans (~ stx)
(syntax-case stx ()
[(_ RE ...)
#'(char-complement (union RE ...))]))
;; set difference
(define-lex-trans (sre-- stx)
(syntax-case stx ()
[(_)
(raise-syntax-error #f
"must have at least one argument"
stx)]
[(_ BIG-RE RE ...)
#'(& BIG-RE (complement (union RE ...)))]))
(define-lex-trans (sre-/ stx)
(syntax-case stx ()
[(_ RANGE ...)
(let ([chars
(apply append (for/list ([r (in-list (syntax->list #'(RANGE ...)))])
(let ([x (syntax-e r)])
(cond
[(char? x) (list x)]
[(string? x) (string->list x)]
[else
(raise-syntax-error #f "not a char or string" stx r)]))))])
(unless (even? (length chars))
(raise-syntax-error #f "not given an even number of characters" stx))
#`(/-only-chars #,@chars))]))
(define-lex-trans (/-only-chars stx)
(syntax-case stx ()
[(_ C1 C2)
#'(char-range C1 C2)]
[(_ C1 C2 C ...)
#'(union (char-range C1 C2) (/-only-chars C ...))]))

@ -0,0 +1,370 @@
#lang racket/base
;; Provides the syntax used to create lexers and the functions needed to
;; create and use the buffer that the lexer reads from. See docs.
(require (for-syntax racket/list
racket/syntax
syntax/stx
syntax/define
syntax/boundmap
"private-lex/util.rkt"
"private-lex/actions.rkt"
"private-lex/front.rkt"
"private-lex/unicode-chars.rkt"
racket/base
racket/promise))
(require racket/stxparam
syntax/readerr
"private-lex/token.rkt")
(provide lexer lexer-src-pos lexer-srcloc define-lex-abbrev define-lex-abbrevs define-lex-trans
;; Dealing with tokens and related structures
define-tokens define-empty-tokens token-name token-value token?
(struct-out position)
(struct-out position-token)
(struct-out srcloc-token)
;; File path for highlighting errors while lexing
file-path
lexer-file-path ;; alternate name
;; Lex abbrevs for unicode char sets.
any-char any-string nothing alphabetic lower-case upper-case title-case
numeric symbolic punctuation graphic whitespace blank iso-control
;; A regular expression operator
char-set)
;; wrap-action: syntax-object src-pos? -> syntax-object
(define-for-syntax (wrap-action action src-loc-style)
(with-syntax ([action-stx
(cond
[(eq? src-loc-style 'lexer-src-pos)
#`(let/ec ret
(syntax-parameterize
([return-without-pos (make-rename-transformer #'ret)])
(make-position-token #,action start-pos end-pos)))]
[(eq? src-loc-style 'lexer-srcloc)
#`(let/ec ret
(syntax-parameterize
([return-without-srcloc (make-rename-transformer #'ret)])
(make-srcloc-token #,action lexeme-srcloc)))]
[else action])])
(syntax/loc action
(λ (start-pos-p end-pos-p lexeme-p input-port-p)
(define lexeme-srcloc-p (make-srcloc (object-name input-port-p)
(position-line start-pos-p)
(position-col start-pos-p)
(position-offset start-pos-p)
(and (number? (position-offset end-pos-p))
(number? (position-offset start-pos-p))
(- (position-offset end-pos-p)
(position-offset start-pos-p)))))
(syntax-parameterize
([start-pos (make-rename-transformer #'start-pos-p)]
[end-pos (make-rename-transformer #'end-pos-p)]
[lexeme (make-rename-transformer #'lexeme-p)]
[input-port (make-rename-transformer #'input-port-p)]
[lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)])
action-stx)))))
(define-for-syntax (make-lexer-macro caller src-loc-style)
(λ (stx)
(syntax-case stx ()
[(_ . RE+ACTS)
(with-disappeared-uses
(let ()
(define spec/re-acts (syntax->list #'RE+ACTS))
(for/and ([x (in-list spec/re-acts)])
(syntax-case x ()
[(RE ACT) #t]
[else (raise-syntax-error caller "not a regular expression / action pair" stx x)]))
(define eof-act (get-special-action spec/re-acts #'eof (case src-loc-style
[(lexer-src-pos) #'(return-without-pos eof)]
[(lexer-srcloc) #'(return-without-srcloc eof)]
[else #'eof])))
(define spec-act (get-special-action spec/re-acts #'special #'(void)))
(define spec-comment-act (get-special-action spec/re-acts #'special-comment #'#f))
(define ids (list #'special #'special-comment #'eof))
(define re-acts (filter (λ (spec/re-act)
(syntax-case spec/re-act ()
[((special) act)
(not (ormap
(λ (x)
(and (identifier? #'special)
(module-or-top-identifier=? #'special x)))
ids))]
[_ #t])) spec/re-acts))
(define names (map (λ (x) (datum->syntax #f (gensym))) re-acts))
(define acts (map (λ (x) (stx-car (stx-cdr x))) re-acts))
(define re-actnames (map (λ (re-act name) (list (stx-car re-act) name)) re-acts names))
(when (null? spec/re-acts)
(raise-syntax-error caller "expected at least one action" stx))
(define-values (trans start action-names no-look) (build-lexer re-actnames))
(when (vector-ref action-names start) ;; Start state is final
(unless (and
;; All the successor states are final
(vector? (vector-ref trans start))
(andmap (λ (x) (vector-ref action-names (vector-ref x 2)))
(vector->list (vector-ref trans start)))
;; Each character has a successor state
(let loop ([check 0]
[nexts (vector->list (vector-ref trans start))])
(cond
[(null? nexts) #f]
[else
(let ([next (car nexts)])
(and (= (vector-ref next 0) check)
(let ([next-check (vector-ref next 1)])
(or (>= next-check max-char-num)
(loop (add1 next-check) (cdr nexts))))))])))
(eprintf "warning: lexer at ~a can accept the empty string\n" stx)))
(with-syntax ([START-STATE-STX start]
[TRANS-TABLE-STX trans]
[NO-LOOKAHEAD-STX no-look]
[(NAME ...) names]
[(ACT ...) (map (λ (a) (wrap-action a src-loc-style)) acts)]
[(ACT-NAME ...) (vector->list action-names)]
[SPEC-ACT-STX (wrap-action spec-act src-loc-style)]
[HAS-COMMENT-ACT?-STX (if (syntax-e spec-comment-act) #t #f)]
[SPEC-COMMENT-ACT-STX (wrap-action spec-comment-act src-loc-style)]
[EOF-ACT-STX (wrap-action eof-act src-loc-style)])
(syntax/loc stx (let ([NAME ACT] ...)
(let ([proc (lexer-body START-STATE-STX
TRANS-TABLE-STX
(vector ACT-NAME ...)
NO-LOOKAHEAD-STX
SPEC-ACT-STX
HAS-COMMENT-ACT?-STX
SPEC-COMMENT-ACT-STX
EOF-ACT-STX)])
;; reverse eta to get named procedures:
(λ (port) (proc port))))))))])))
(define-syntax lexer (make-lexer-macro 'lexer #f))
(define-syntax lexer-src-pos (make-lexer-macro 'lexer-src-pos 'lexer-src-pos))
(define-syntax lexer-srcloc (make-lexer-macro 'lexer-srcloc 'lexer-srcloc))
(define-syntax (define-lex-abbrev stx)
(syntax-case stx ()
[(_ NAME RE) (identifier? #'NAME)
(syntax/loc stx
(define-syntax NAME
(make-lex-abbrev (λ () (quote-syntax RE)))))]
[_ (raise-syntax-error 'define-lex-abbrev "form should be (define-lex-abbrev name re)" stx)]))
(define-syntax (define-lex-abbrevs stx)
(syntax-case stx ()
[(_ . XS)
(with-syntax ([(ABBREV ...) (map
(λ (a)
(syntax-case a ()
[(NAME RE) (identifier? #'NAME)
(syntax/loc a (define-lex-abbrev NAME RE))]
[_ (raise-syntax-error
#f
"form should be (define-lex-abbrevs (name re) ...)"
stx
a)]))
(syntax->list #'XS))])
(syntax/loc stx (begin ABBREV ...)))]
[_ (raise-syntax-error #f "form should be (define-lex-abbrevs (name re) ...)" stx)]))
(define-syntax (define-lex-trans stx)
(syntax-case stx ()
[(_ name-form body-form)
(let-values (((name body)
(normalize-definition #'(define-syntax name-form body-form) #'λ)))
#`(define-syntax #,name
(let ((func #,body))
(unless (procedure? func)
(raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func))
(unless (procedure-arity-includes? func 1)
(raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func))
(make-lex-trans func))))]
[_
(raise-syntax-error
#f
"form should be (define-lex-trans name transformer)"
stx)]))
(define (get-next-state-helper char min max table)
(cond
[(>= min max) #f]
[else
(define try (quotient (+ min max) 2))
(define el (vector-ref table try))
(define r1 (vector-ref el 0))
(define r2 (vector-ref el 1))
(cond
[(and (>= char r1) (<= char r2)) (vector-ref el 2)]
[(< char r1) (get-next-state-helper char min try table)]
[else (get-next-state-helper char (add1 try) max table)])]))
(define (get-next-state char table)
(and table (get-next-state-helper char 0 (vector-length table) table)))
(define ((lexer-body start-state trans-table actions no-lookahead special-action
has-special-comment-action? special-comment-action eof-action) ip)
(define (lexer ip)
(define first-pos (get-position ip))
(define first-char (peek-char-or-special ip 0))
;(printf "(peek-char-or-special port 0) = ~e\n" first-char)
(cond
[(eof-object? first-char)
(do-match ip first-pos eof-action (read-char-or-special ip))]
[(special-comment? first-char)
(read-char-or-special ip)
(cond
(has-special-comment-action?
(do-match ip first-pos special-comment-action #f))
(else (lexer ip)))]
[(not (char? first-char))
(do-match ip first-pos special-action (read-char-or-special ip))]
[else
(let lexer-loop (
;; current-state
[state start-state]
;; the character to transition on
[char first-char]
;; action for the longest match seen thus far
;; including a match at the current state
[longest-match-action
(vector-ref actions start-state)]
;; how many bytes precede char
[length-bytes 0]
;; how many characters have been read
;; including the one just read
[length-chars 1]
;; how many characters are in the longest match
[longest-match-length 0])
(define next-state
(cond
[(not (char? char)) #f]
[else (get-next-state (char->integer char)
(vector-ref trans-table state))]))
(cond
[(not next-state)
(check-match ip first-pos longest-match-length
length-chars longest-match-action)]
[(vector-ref no-lookahead next-state)
(define act (vector-ref actions next-state))
(check-match ip
first-pos
(if act length-chars longest-match-length)
length-chars
(if act act longest-match-action))]
[else
(define act (vector-ref actions next-state))
(define next-length-bytes (+ (char-utf-8-length char) length-bytes))
(define next-char (peek-char-or-special ip next-length-bytes))
#;(printf "(peek-char-or-special port ~e) = ~e\n"
next-length-bytes next-char)
(lexer-loop next-state
next-char
(if act
act
longest-match-action)
next-length-bytes
(add1 length-chars)
(if act
length-chars
longest-match-length))]))]))
(unless (input-port? ip)
(raise-argument-error 'lexer "input-port?" 0 ip))
(lexer ip))
(define (check-match lb first-pos longest-match-length length longest-match-action)
(unless longest-match-action
(define match (read-string length lb))
(define end-pos (get-position lb))
(raise-read-error
(format "lexer: No match found in input starting with: ~v" match)
(file-path)
(position-line first-pos)
(position-col first-pos)
(position-offset first-pos)
(- (position-offset end-pos) (position-offset first-pos))))
(define match (read-string longest-match-length lb))
;(printf "(read-string ~e port) = ~e\n" longest-match-length match)
(do-match lb first-pos longest-match-action match))
(define file-path (make-parameter #f))
(define lexer-file-path file-path)
(define (do-match ip first-pos action value)
#;(printf "(action ~a ~a ~a ~a)\n"
(position-offset first-pos) (position-offset (get-position ip)) value ip)
(action first-pos (get-position ip) value ip))
(define (get-position ip)
(define-values (line col off) (port-next-location ip))
(make-position off line col))
(define-syntax (create-unicode-abbrevs stx)
(syntax-case stx ()
[(_ CTXT)
(with-syntax ([(RANGES ...) (for/list ([range (in-list (list (force alphabetic-ranges)
(force lower-case-ranges)
(force upper-case-ranges)
(force title-case-ranges)
(force numeric-ranges)
(force symbolic-ranges)
(force punctuation-ranges)
(force graphic-ranges)
(force whitespace-ranges)
(force blank-ranges)
(force iso-control-ranges)))])
`(union ,@(map (λ (x)
`(char-range ,(integer->char (car x))
,(integer->char (cdr x))))
range)))]
[(NAMES ...) (for/list ([sym (in-list '(alphabetic
lower-case
upper-case
title-case
numeric
symbolic
punctuation
graphic
whitespace
blank
iso-control))])
(datum->syntax #'CTXT sym #f))])
#'(define-lex-abbrevs (NAMES RANGES) ...))]))
(define-lex-abbrev any-char (char-complement (union)))
(define-lex-abbrev any-string (intersection))
(define-lex-abbrev nothing (union))
(create-unicode-abbrevs #'here)
(define-lex-trans (char-set stx)
(syntax-case stx ()
[(_ STR)
(string? (syntax-e #'STR))
(with-syntax ([(CHAR ...) (string->list (syntax-e #'STR))])
#'(union CHAR ...))]))
(define-syntax provide-lex-keyword
(syntax-rules ()
[(_ ID ...)
(begin
(define-syntax-parameter ID
(make-set!-transformer
(λ (stx)
(raise-syntax-error
'provide-lex-keyword
(format "use of a lexer keyword (~a) is not in an appropriate lexer action" 'ID)
stx))))
...
(provide ID ...))]))
(provide-lex-keyword start-pos end-pos lexeme lexeme-srcloc input-port return-without-pos return-without-srcloc)

@ -0,0 +1,15 @@
#lang racket/base
(provide (all-defined-out))
(require syntax/stx)
;; get-special-action: (syntax-object list) syntax-object syntax-object -> syntax-object
;; Returns the first action from a rule of the form ((which-special) action)
(define (get-special-action rules which-special none)
(cond
[(null? rules) none]
[else
(syntax-case (car rules) ()
[((special) ACT)
(and (identifier? #'special) (module-or-top-identifier=? #'special which-special))
#'ACT]
[_ (get-special-action (cdr rules) which-special none)])]))

@ -0,0 +1,333 @@
#lang racket/base
(require racket/list
(prefix-in is: data/integer-set)
"re.rkt"
"util.rkt")
(provide build-dfa print-dfa (struct-out dfa))
(define e (build-epsilon))
(define z (build-zero))
;; Don't do anything with this one but extract the chars
(define all-chars (->re `(char-complement (union)) (make-cache)))
;; get-char-groups : re bool -> (list-of char-setR?)
;; Collects the char-setRs in r that could be used in
;; taking the derivative of r.
(define (get-char-groups r found-negation)
(cond
[(or (eq? r e) (eq? r z)) null]
[(char-setR? r) (list r)]
[(concatR? r)
(if (re-nullable? (concatR-re1 r))
(append (get-char-groups (concatR-re1 r) found-negation)
(get-char-groups (concatR-re2 r) found-negation))
(get-char-groups (concatR-re1 r) found-negation))]
[(repeatR? r)
(get-char-groups (repeatR-re r) found-negation)]
[(orR? r)
(apply append (map (λ (x) (get-char-groups x found-negation)) (orR-res r)))]
[(andR? r)
(apply append (map (λ (x) (get-char-groups x found-negation)) (andR-res r)))]
[(negR? r)
(if found-negation
(get-char-groups (negR-re r) #t)
(cons all-chars (get-char-groups (negR-re r) #t)))]))
(test-block ((c (make-cache))
(r1 (->re #\1 c))
(r2 (->re #\2 c)))
((get-char-groups e #f) null)
((get-char-groups z #f) null)
((get-char-groups r1 #f) (list r1))
((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f)
(list r1))
((get-char-groups (->re `(concatenation ,e ,r2) c) #f)
(list r2))
((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f)
(list r1 r2))
((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f)
(list r1))
((get-char-groups
(->re `(union (repetition 0 +inf.0 ,r1)
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
(list r1 r2 (->re "3" c) (->re "4" c)))
((get-char-groups (->re `(complement ,r1) c) #f)
(list all-chars r1))
((get-char-groups
(->re `(intersection (repetition 0 +inf.0 ,r1)
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
(list r1 r2 (->re "3" c) (->re "4" c)))
)
(define loc:member? is:member?)
;; deriveR : re char cache -> re
(define (deriveR r c cache)
(cond
[(or (eq? r e) (eq? r z)) z]
[(char-setR? r)
(if (loc:member? c (char-setR-chars r)) e z)]
[(concatR? r)
(define r1 (concatR-re1 r))
(define r2 (concatR-re2 r))
(define d (build-concat (deriveR r1 c cache) r2 cache))
(if (re-nullable? r1)
(build-or (list d (deriveR r2 c cache)) cache)
d)]
[(repeatR? r)
(build-concat (deriveR (repeatR-re r) c cache)
(build-repeat (sub1 (repeatR-low r))
(sub1 (repeatR-high r))
(repeatR-re r) cache)
cache)]
[(orR? r)
(build-or (map (λ (x) (deriveR x c cache))
(orR-res r))
cache)]
[(andR? r)
(build-and (map (λ (x) (deriveR x c cache))
(andR-res r))
cache)]
[(negR? r)
(build-neg (deriveR (negR-re r) c cache) cache)]))
(test-block ((c (make-cache))
(a (char->integer #\a))
(b (char->integer #\b))
(r1 (->re #\a c))
(r2 (->re `(repetition 0 +inf.0 #\a) c))
(r3 (->re `(repetition 0 +inf.0 ,r2) c))
(r4 (->re `(concatenation #\a ,r2) c))
(r5 (->re `(repetition 0 +inf.0 ,r4) c))
(r6 (->re `(union ,r5 #\a) c))
(r7 (->re `(concatenation ,r2 ,r2) c))
(r8 (->re `(complement ,r4) c))
(r9 (->re `(intersection ,r2 ,r4) c)))
((deriveR e a c) z)
((deriveR z a c) z)
((deriveR r1 b c) z)
((deriveR r1 a c) e)
((deriveR r2 a c) r2)
((deriveR r2 b c) z)
((deriveR r3 a c) r2)
((deriveR r3 b c) z)
((deriveR r4 a c) r2)
((deriveR r4 b c) z)
((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c))
((deriveR r5 b c) z)
((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c))
((deriveR r6 b c) z)
((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c))
((deriveR r7 b c) z)
((deriveR r8 a c) (->re `(complement, r2) c))
((deriveR r8 b c) (->re `(complement ,z) c))
((deriveR r9 a c) r2)
((deriveR r9 b c) z)
((deriveR (->re `(repetition 1 2 "ab") c) a c)
(->re `(concatenation "b" (repetition 0 1 "ab")) c)))
;; An re-action is (cons re action)
;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f)
;; applies deriveR to all the re-actions's re parts.
;; Returns #f if the derived state is equivalent to z.
(define (derive r c cache)
(define new-r (for/list ([ra (in-list r)])
(cons (deriveR (car ra) c cache) (cdr ra))))
(if (andmap (λ (x) (eq? z (car x))) new-r)
#f
new-r))
(test-block ((c (make-cache))
(r1 (->re #\1 c))
(r2 (->re #\2 c)))
((derive null (char->integer #\1) c) #f)
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c)
(list (cons e 1) (cons z 2)))
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f))
;; get-final : (list-of re-action) -> (union #f syntax-object)
;; An re that accepts e represents a final state. Return the
;; action from the first final state or #f if there is none.
(define (get-final res)
(cond
[(null? res) #f]
[(re-nullable? (caar res)) (cdar res)]
[else (get-final (cdr res))]))
(test-block ((c->i char->integer)
(c (make-cache))
(r1 (->re #\a c))
(r2 (->re #\b c))
(b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5)))
(a (list (cons r1 1) (cons r2 2))))
((derive null (c->i #\a) c) #f)
((derive a (c->i #\a) c) (list (cons e 1) (cons z 2)))
((derive a (c->i #\b) c) (list (cons z 1) (cons e 2)))
((derive a (c->i #\c) c) #f)
((derive (list (cons (->re `(union " " "\n" ",") c) 1)
(cons (->re `(concatenation (repetition 0 1 "-")
(repetition 1 +inf.0 (char-range "0" "9"))) c) 2)
(cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3)
(cons (->re "[" c) 4)
(cons (->re "]" c) 5)) (c->i #\[) c)
b)
((get-final a) #f)
((get-final (list (cons e 1) (cons e 2))) 1)
((get-final b) 4))
;; A state is (make-state (list-of re-action) nat)
(define-struct state (spec index))
;; get->key : re-action -> (list-of nat)
;; states are indexed by the list of indexes of their res
(define (get-key s)
(map (λ (x) (re-index (car x))) s))
(define loc:partition is:partition)
;; compute-chars : (list-of state) -> (list-of char-set)
;; Computed the sets of equivalent characters for taking the
;; derivative of the car of st. Only one derivative per set need to be taken.
(define (compute-chars st)
(cond
[(null? st) null]
[else
(loc:partition (map char-setR-chars
(apply append (map (λ (x) (get-char-groups (car x) #f))
(state-spec (car st))))))]))
(test-block ((c (make-cache))
(c->i char->integer)
(r1 (->re `(char-range #\1 #\4) c))
(r2 (->re `(char-range #\2 #\3) c)))
((compute-chars null) null)
((compute-chars (list (make-state null 1))) null)
((map is:integer-set-contents
(compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2))))
(list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3)))
(is:integer-set-contents (is:union (is:make-range (c->i #\1))
(is:make-range (c->i #\4)))))))
;; A dfa is (make-dfa int int
;; (list-of (cons int syntax-object))
;; (list-of (cons int (list-of (cons char-set int)))))
;; Each transitions is a state and a list of chars with the state to transition to.
;; The finals and transitions are sorted by state number, and duplicate free.
(define-struct dfa (num-states start-state final-states/actions transitions) #:inspector (make-inspector))
(define loc:get-integer is:get-integer)
;; build-dfa : (list-of re-action) cache -> dfa
(define (build-dfa rs cache)
(let* ([transitions (make-hash)]
[get-state-number (make-counter)]
[start (make-state rs (get-state-number))])
(cache (cons 'state (get-key rs)) (λ () start))
(let loop ([old-states (list start)]
[new-states null]
[all-states (list start)]
[cs (compute-chars (list start))])
(cond
[(and (null? old-states) (null? new-states))
(make-dfa (get-state-number) (state-index start)
(sort (for*/list ([state (in-list all-states)]
[val (in-value (cons (state-index state) (get-final (state-spec state))))]
#:when (cdr val))
val)
< #:key car)
(sort (hash-map transitions
(λ (state trans)
(cons (state-index state)
(for/list ([t (in-list trans)])
(cons (car t)
(state-index (cdr t)))))))
< #:key car))]
[(null? old-states)
(loop new-states null all-states (compute-chars new-states))]
[(null? cs)
(loop (cdr old-states) new-states all-states (compute-chars (cdr old-states)))]
[else
(define state (car old-states))
(define c (car cs))
(define new-re (derive (state-spec state) (loc:get-integer c) cache))
(cond
[new-re
(let* ([new-state? #f]
[new-state (cache (cons 'state (get-key new-re))
(λ ()
(set! new-state? #t)
(make-state new-re (get-state-number))))]
[new-all-states (if new-state? (cons new-state all-states) all-states)])
(hash-set! transitions
state
(cons (cons c new-state)
(hash-ref transitions state
(λ () null))))
(cond
[new-state?
(loop old-states (cons new-state new-states) new-all-states (cdr cs))]
[else
(loop old-states new-states new-all-states (cdr cs))]))]
[else (loop old-states new-states all-states (cdr cs))])]))))
(define (print-dfa x)
(printf "number of states: ~a\n" (dfa-num-states x))
(printf "start state: ~a\n" (dfa-start-state x))
(printf "final states: ~a\n" (map car (dfa-final-states/actions x)))
(for-each (λ (trans)
(printf "state: ~a\n" (car trans))
(for-each (λ (rule)
(printf " -~a-> ~a\n"
(is:integer-set-contents (car rule))
(cdr rule)))
(cdr trans)))
(dfa-transitions x)))
(define (build-test-dfa rs)
(define c (make-cache))
(build-dfa (map (λ (x) (cons (->re x c) 'action)) rs) c))
#|
(define t1 (build-test-dfa null))
(define t2 (build-test-dfa `(#\a)))
(define t3 (build-test-dfa `(#\a #\b)))
(define t4 (build-test-dfa `((repetition 0 +inf.0 #\a)
(repetition 0 +inf.0 (concatenation #\a #\b)))))
(define t5 (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\0 #\1)) #\1))))
(define t6 (build-test-dfa `((repetition 0 +inf.0 (repetition 0 +inf.0 #\a))
(repetition 0 +inf.0 (concatenation #\b (repetition 1 +inf.0 #\b))))))
(define t7 (build-test-dfa `((concatenation (repetition 0 +inf.0 #\a) (repetition 0 +inf.0 #\b)
(repetition 0 +inf.0 #\c) (repetition 0 +inf.0 #\d)
(repetition 0 +inf.0 #\e)))))
(define t8
(build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b)
(union #\a #\b) (union #\a #\b) (union #\a #\b)))))
(define t9 (build-test-dfa `((concatenation "/*"
(complement (concatenation (intersection) "*/" (intersection)))
"*/"))))
(define t11 (build-test-dfa `((complement "1"))))
(define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b")
(concatenation "a" (repetition 0 +inf.0 "b")))
"ab"))))
(define x (build-test-dfa `((union " " "\n" ",")
(concatenation (repetition 0 1 "-") (repetition 1 +inf.0 (char-range "0" "9")))
(concatenation "-" (repetition 1 +inf.0 "-"))
"["
"]")))
(define y (build-test-dfa
`((repetition 1 +inf.0
(union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|")
(concatenation "|" (repetition 0 +inf.0 (char-complement "|"))))))))
(define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection))
(complement (union (concatenation (intersection) "01")
(repetition 1 +inf.0 "1")))))))
(define t14 (build-test-dfa `((complement "1")))))
|#

@ -0,0 +1,85 @@
#lang racket/base
(module+ test
(require (for-syntax racket/base)
yaragg/parser-tools/lex
rackunit)
(define-syntax (catch-syn-error stx)
(syntax-case stx ()
[(_ arg)
(datum->syntax
#'here
(with-handlers ((exn:fail:syntax? exn-message))
(syntax-local-expand-expression #'arg)
"not-an-error"))]))
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev)))
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev a)))
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev (a b) v)))
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev 1 1)))
(check-regexp-match #rx"lex-abbrevs" (catch-syn-error (define-lex-abbrevs ())))
(check-regexp-match #rx"lex-trans" (catch-syn-error (define-lex-trans)))
(check-regexp-match #rx"lexer" (catch-syn-error (lexer)))
(check-regexp-match #rx"lexer" (catch-syn-error (lexer ("a" "b" "c"))))
(check-regexp-match #rx"lexer" (catch-syn-error (lexer ())))
(check-regexp-match #rx"lexer" (catch-syn-error (lexer (""))))
(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (a 1))))
(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer ((a) 1))))
(check-regexp-match #rx"regular-expression" (catch-syn-error (let ((a 1)) (lexer ((a) 1)))))
(check-regexp-match #rx"regular-expression"
(catch-syn-error (let-syntax ((a 1))
(lexer ((a) 1)))))
(check-regexp-match #rx"define-lex-trans"
(catch-syn-error
(let ()
(define-lex-trans a 1)
(let ()
(lexer ((a) 1))))))
;; Detecting mutual recursion cycle:
(check-regexp-match #rx"regular-expression"
(catch-syn-error
(let ()
(define-lex-abbrev a b)
(define-lex-abbrev b a)
(let ()
(lexer (a 1))))))
(check-regexp-match #rx"regular-expression"
(catch-syn-error
(let ()
(define-lex-abbrev a (repetition 0 1 b))
(define-lex-abbrev b (repetition 0 1 a))
(let ()
(lexer (a 1))))))
;; Detecting cycle within same abbreviation:
(check-regexp-match #rx"regular-expression"
(catch-syn-error
(let ()
(define-lex-abbrev balanced
(union (concatenation "(" balanced ")" balanced)
any-char))
(lexer
[balanced (string-append lexeme (balanced input-port))]
[(eof) ""]))))
(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (1 1))))
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition) 1))))
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition #\1 #\1 "3") 1))))
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 #\1 "3") 1))))
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 0 "3") 1))))
(check-regexp-match #rx"complement" (catch-syn-error (lexer ((complement) 1))))
(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range) 1))))
(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range #\9 #\0) 1))))
(check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement) 1))))
(check-regexp-match #rx"char-complement"
(catch-syn-error (lexer ((char-complement (concatenation "1" "2")) 1)))))

@ -0,0 +1,159 @@
#lang racket/base
(require racket/base
racket/match
(prefix-in is: data/integer-set)
racket/list
syntax/stx
"util.rkt"
"stx.rkt"
"re.rkt"
"deriv.rkt")
(provide build-lexer)
(define-syntax time-label
(syntax-rules ()
((_ l e ...)
(begin
(printf "~a: " l)
(time (begin e ...))))))
;; A table is either
;; - (vector-of (union #f nat))
;; - (vector-of (vector-of (vector nat nat nat)))
(define loc:integer-set-contents is:integer-set-contents)
;; dfa->1d-table : dfa -> (same as build-lexer)
(define (dfa->1d-table dfa)
(define state-table (make-vector (dfa-num-states dfa) #f))
(define transition-cache (make-hasheq))
(for ([trans (in-list (dfa-transitions dfa))])
(match-define (cons from-state all-chars/to) trans)
(define flat-all-chars/to
(sort
(for*/list ([chars/to (in-list all-chars/to)]
[char-ranges (in-value (loc:integer-set-contents (car chars/to)))]
[to (in-value (cdr chars/to))]
[char-range (in-list char-ranges)])
(define entry (vector (car char-range) (cdr char-range) to))
(hash-ref transition-cache entry (λ ()
(hash-set! transition-cache
entry
entry)
entry)))
< #:key (λ (v) (vector-ref v 0))))
(vector-set! state-table from-state (list->vector flat-all-chars/to)))
state-table)
(define loc:foldr is:foldr)
;; dfa->2d-table : dfa -> (same as build-lexer)
(define (dfa->2d-table dfa)
;; char-table : (vector-of (union #f nat))
;; The lexer table, one entry per state per char.
;; Each entry specifies a state to transition to.
;; #f indicates no transition
(define char-table (make-vector (* 256 (dfa-num-states dfa)) #f))
;; Fill the char-table vector
(for* ([trans (in-list (dfa-transitions dfa))]
[chars/to (in-list (cdr trans))])
(define from-state (car trans))
(define to-state (cdr chars/to))
(loc:foldr (λ (char _)
(vector-set! char-table
(bitwise-ior
char
(arithmetic-shift from-state 8))
to-state))
(void)
(car chars/to)))
char-table)
;; dfa->actions : dfa -> (vector-of (union #f syntax-object))
;; The action for each final state, #f if the state isn't final
(define (dfa->actions dfa)
(define actions (make-vector (dfa-num-states dfa) #f))
(for ([state/action (in-list (dfa-final-states/actions dfa))])
(vector-set! actions (car state/action) (cdr state/action)))
actions)
;; dfa->no-look : dfa -> (vector-of bool)
;; For each state whether the lexer can ignore the next input.
;; It can do this only if there are no transitions out of the
;; current state.
(define (dfa->no-look dfa)
(define no-look (make-vector (dfa-num-states dfa) #t))
(for ([trans (in-list (dfa-transitions dfa))])
(vector-set! no-look (car trans) #f))
no-look)
(test-block ((d1 (make-dfa 1 1 (list) (list)))
(d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
(list (cons 1 (list (cons (is:make-range 49 50) 1)
(cons (is:make-range 51) 2)))
(cons 2 (list (cons (is:make-range 49) 3))))))
(d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
(list (cons 1 (list (cons (is:make-range 100 200) 0)
(cons (is:make-range 49 50) 1)
(cons (is:make-range 51) 2)))
(cons 2 (list (cons (is:make-range 49) 3)))))))
((dfa->2d-table d1) (make-vector 256 #f))
((dfa->2d-table d2) (let ((v (make-vector 1024 #f)))
(vector-set! v 305 1)
(vector-set! v 306 1)
(vector-set! v 307 2)
(vector-set! v 561 3)
v))
((dfa->1d-table d1) (make-vector 1 #f))
((dfa->1d-table d2) #(#f
#(#(49 50 1) #(51 51 2))
#(#(49 49 3))
#f))
((dfa->1d-table d3) #(#f
#(#(49 50 1) #(51 51 2) #(100 200 0))
#(#(49 49 3))
#f))
((dfa->actions d1) (vector #f))
((dfa->actions d2) (vector #f #f 2 3))
((dfa->no-look d1) (vector #t))
((dfa->no-look d2) (vector #t #f #f #t)))
;; build-lexer : syntax-object list ->
;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object))
;; each syntax object has the form (re action)
(define (build-lexer sos)
(define s-re-acts (for/list ([so (in-list sos)])
(cons (parse (stx-car so))
(stx-car (stx-cdr so)))))
(define cache (make-cache))
(define re-acts (for/list ([s-re-act (in-list s-re-acts)])
(cons (->re (car s-re-act) cache)
(cdr s-re-act))))
(define dfa (build-dfa re-acts cache))
(define table (dfa->1d-table dfa))
;(print-dfa dfa)
#;(let ((num-states (vector-length table))
(num-vectors (length (filter values (vector->list table))))
(num-entries (apply + (map
(λ (x) (if x (vector-length x) 0))
(vector->list table))))
(num-different-entries
(let ((ht (make-hash)))
(for-each
(λ (x)
(when x
(for-each
(λ (y)
(hash-set! ht y #t))
(vector->list x))))
(vector->list table))
(length (hash-table-map ht cons)))))
(printf "~a states, ~aKB\n"
num-states
(/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries
(* 5 num-different-entries))) 1024)))
(values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)))

@ -0,0 +1,384 @@
#lang racket/base
(require racket/list
racket/match
(prefix-in is: data/integer-set)
"util.rkt")
(provide ->re build-epsilon build-zero build-char-set build-concat
build-repeat build-or build-and build-neg
epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR?
char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high
orR-res andR-res negR-re
re-nullable? re-index)
;; get-index : -> nat
(define get-index (make-counter))
;; An re is either
;; - (make-epsilonR bool nat)
;; - (make-zeroR bool nat)
;; - (make-char-setR bool nat char-set)
;; - (make-concatR bool nat re re)
;; - (make-repeatR bool nat nat nat-or-+inf.0 re)
;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs
;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs
;; - (make-negR bool nat re)
;;
;; Every re must have an index field globally different from all
;; other re index fields.
(define-struct re (nullable? index) #:inspector (make-inspector))
(define-struct (epsilonR re) () #:inspector (make-inspector))
(define-struct (zeroR re) () #:inspector (make-inspector))
(define-struct (char-setR re) (chars) #:inspector (make-inspector))
(define-struct (concatR re) (re1 re2) #:inspector (make-inspector))
(define-struct (repeatR re) (low high re) #:inspector (make-inspector))
(define-struct (orR re) (res) #:inspector (make-inspector))
(define-struct (andR re) (res) #:inspector (make-inspector))
(define-struct (negR re) (re) #:inspector (make-inspector))
;; e : re
;; The unique epsilon re
(define e (make-epsilonR #t (get-index)))
;; z : re
;; The unique zero re
(define z (make-zeroR #f (get-index)))
;; s-re = char constant
;; | string constant (sequence of characters)
;; | re a precompiled re
;; | (repetition low high s-re) repetition between low and high times (inclusive)
;; | (union s-re ...)
;; | (intersection s-re ...)
;; | (complement s-re)
;; | (concatenation s-re ...)
;; | (char-range rng rng) match any character between two (inclusive)
;; | (char-complement char-set) match any character not listed
;; low = natural-number
;; high = natural-number or +inf.0
;; rng = char or string with length 1
;; (concatenation) (repetition 0 0 x), and "" match the empty string.
;; (union) matches no strings.
;; (intersection) matches any string.
(define loc:make-range is:make-range)
(define loc:union is:union)
(define loc:split is:split)
(define loc:complement is:complement)
;; ->re : s-re cache -> re
(define (->re exp cache)
(match exp
[(? char?) (build-char-set (loc:make-range (char->integer exp)) cache)]
[(? string?) (->re `(concatenation ,@(string->list exp)) cache)]
[(? re?) exp]
[`(repetition ,low ,high ,r)
(build-repeat low high (->re r cache) cache)]
[`(union ,rs ...)
(build-or (flatten-res (map (λ (r) (->re r cache)) rs)
orR? orR-res loc:union cache)
cache)]
[`(intersection ,rs ...)
(build-and (flatten-res (map (λ (r) (->re r cache)) rs)
andR? andR-res (λ (a b)
(let-values (((i _ __) (loc:split a b))) i))
cache)
cache)]
[`(complement ,r) (build-neg (->re r cache) cache)]
[`(concatenation ,rs ...)
(foldr (λ (x y)
(build-concat (->re x cache) y cache))
e
rs)]
[`(char-range ,c1 ,c2)
(let ([i1 (char->integer (if (string? c1) (string-ref c1 0) c1))]
[i2 (char->integer (if (string? c2) (string-ref c2 0) c2))])
(if (<= i1 i2)
(build-char-set (loc:make-range i1 i2) cache)
z))]
[`(char-complement ,crs ...)
(let ([cs (->re `(union ,@crs) cache)])
(cond
[(zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)]
[(char-setR? cs)
(build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)]
[else z]))]))
;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re))
;; (char-set char-set -> char-set) cache -> (list-of re)
;; Takes all the char-sets in l and combines them into one char-set using the combine function.
;; Flattens out the values of type?. get-res only needs to function on things type? returns
;; true for.
(define (flatten-res l type? get-res combine cache)
(let loop ([res l]
;; chars : (union #f char-set)
[chars #f]
[no-chars null])
(cond
[(null? res)
(if chars
(cons (build-char-set chars cache) no-chars)
no-chars)]
[(char-setR? (car res))
(if chars
(loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars)
(loop (cdr res) (char-setR-chars (car res)) no-chars))]
[(type? (car res))
(loop (append (get-res (car res)) (cdr res)) chars no-chars)]
[else (loop (cdr res) chars (cons (car res) no-chars))])))
;; build-epsilon : -> re
(define (build-epsilon) e)
(define (build-zero) z)
(define loc:integer-set-contents is:integer-set-contents)
;; build-char-set : char-set cache -> re
(define (build-char-set cs cache)
(define l (loc:integer-set-contents cs))
(cond
[(null? l) z]
[else
(cache l
(λ ()
(make-char-setR #f (get-index) cs)))]))
;; build-concat : re re cache -> re
(define (build-concat r1 r2 cache)
(cond
[(eq? e r1) r2]
[(eq? e r2) r1]
[(or (eq? z r1) (eq? z r2)) z]
[else
(cache (cons 'concat (cons (re-index r1) (re-index r2)))
(λ ()
(make-concatR (and (re-nullable? r1) (re-nullable? r2))
(get-index)
r1 r2)))]))
;; build-repeat : nat nat-or-+inf.0 re cache -> re
(define (build-repeat low high r cache)
(let ([low (if (< low 0) 0 low)])
(cond
[(eq? r e) e]
[(and (= 0 low) (or (= 0 high) (eq? z r))) e]
[(and (= 1 low) (= 1 high)) r]
[(and (repeatR? r)
(eqv? (repeatR-high r) +inf.0)
(or (= 0 (repeatR-low r))
(= 1 (repeatR-low r))))
(build-repeat (* low (repeatR-low r))
+inf.0
(repeatR-re r)
cache)]
[else
(cache (cons 'repeat (cons low (cons high (re-index r))))
(λ ()
(make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r)))])))
;; build-or : (list-of re) cache -> re
(define (build-or rs cache)
(let ([rs
(filter
(λ (x) (not (eq? x z)))
(do-simple-equiv (replace rs orR? orR-res null) re-index))])
(cond
[(null? rs) z]
[(null? (cdr rs)) (car rs)]
[(memq (build-neg z cache) rs) (build-neg z cache)]
[else
(cache (cons 'or (map re-index rs))
(λ ()
(make-orR (ormap re-nullable? rs) (get-index) rs)))])))
;; build-and : (list-of re) cache -> re
(define (build-and rs cache)
(let ([rs (do-simple-equiv (replace rs andR? andR-res null) re-index)])
(cond
[(null? rs) (build-neg z cache)]
[(null? (cdr rs)) (car rs)]
[(memq z rs) z]
[else
(cache (cons 'and (map re-index rs))
(λ ()
(make-andR (andmap re-nullable? rs) (get-index) rs)))])))
;; build-neg : re cache -> re
(define (build-neg r cache)
(cond
[(negR? r) (negR-re r)]
[else
(cache (cons 'neg (re-index r))
(λ ()
(make-negR (not (re-nullable? r)) (get-index) r)))]))
;; Tests for the build-functions
(test-block ((c (make-cache))
(isc is:integer-set-contents)
(r1 (build-char-set (is:make-range (char->integer #\1)) c))
(r2 (build-char-set (is:make-range (char->integer #\2)) c))
(r3 (build-char-set (is:make-range (char->integer #\3)) c))
(rc (build-concat r1 r2 c))
(rc2 (build-concat r2 r1 c))
(rr (build-repeat 0 +inf.0 rc c))
(ro (build-or `(,rr ,rc ,rr) c))
(ro2 (build-or `(,rc ,rr ,z) c))
(ro3 (build-or `(,rr ,rc) c))
(ro4 (build-or `(,(build-or `(,r1 ,r2) c)
,(build-or `(,r2 ,r3) c)) c))
(ra (build-and `(,rr ,rc ,rr) c))
(ra2 (build-and `(,rc ,rr) c))
(ra3 (build-and `(,rr ,rc) c))
(ra4 (build-and `(,(build-and `(,r3 ,r2) c)
,(build-and `(,r2 ,r1) c)) c))
(rn (build-neg z c))
(rn2 (build-neg r1 c)))
((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1))))
((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2))))
((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3))))
((build-char-set (is:make-range) c) z)
((build-concat r1 e c) r1)
((build-concat e r1 c) r1)
((build-concat r1 z c) z)
((build-concat z r1 c) z)
((build-concat r1 r2 c) rc)
((concatR-re1 rc) r1)
((concatR-re2 rc) r2)
((concatR-re1 rc2) r2)
((concatR-re2 rc2) r1)
(ro ro2)
(ro ro3)
(ro4 (build-or `(,r1 ,r2 ,r3) c))
((orR-res ro) (list rc rr))
((orR-res ro4) (list r1 r2 r3))
((build-or null c) z)
((build-or `(,r1 ,z) c) r1)
((build-repeat 0 +inf.0 rc c) rr)
((build-repeat 0 1 z c) e)
((build-repeat 0 0 rc c) e)
((build-repeat 0 +inf.0 z c) e)
((build-repeat -1 +inf.0 z c) e)
((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c)
(build-repeat 0 +inf.0 rc c))
((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c)
(build-repeat 0 +inf.0 rc c))
((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c)
(build-repeat 20 +inf.0 rc c))
((build-repeat 1 1 rc c) rc)
((repeatR-re rr) rc)
(ra ra2)
(ra ra3)
(ra4 (build-and `(,r1 ,r2 ,r3) c))
((andR-res ra) (list rc rr))
((andR-res ra4) (list r1 r2 r3))
((build-and null c) (build-neg z c))
((build-and `(,r1 ,z) c) z)
((build-and `(,r1) c) r1)
((build-neg r1 c) (build-neg r1 c))
((build-neg (build-neg r1 c) c) r1)
((negR-re (build-neg r2 c)) r2)
((re-nullable? r1) #f)
((re-nullable? rc) #f)
((re-nullable? (build-concat rr rr c)) #t)
((re-nullable? rr) #t)
((re-nullable? (build-repeat 0 1 rc c)) #t)
((re-nullable? (build-repeat 1 2 rc c)) #f)
((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t)
((re-nullable? ro) #t)
((re-nullable? (build-or `(,r1 ,r2) c)) #f)
((re-nullable? (build-and `(,r1 ,e) c)) #f)
((re-nullable? (build-and `(,rr ,e) c)) #t)
((re-nullable? (build-neg r1 c)) #t)
((re-nullable? (build-neg rr c)) #f))
(test-block ((c (make-cache))
(isc is:integer-set-contents)
(r1 (->re #\1 c))
(r2 (->re #\2 c))
(r3-5 (->re '(char-range #\3 #\5) c))
(r4 (build-or `(,r1 ,r2) c))
(r5 (->re `(union ,r3-5 #\7) c))
(r6 (->re #\6 c)))
((flatten-res null orR? orR-res is:union c) null)
((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c))))
(isc (is:make-range (char->integer #\1))))
((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c))))
(isc (is:make-range (char->integer #\1) (char->integer #\2))))
((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1)
orR? orR-res is:union c))))
(isc (is:make-range (char->integer #\1) (char->integer #\7))))
((flatten-res `(,r1 ,r2) andR? andR-res (λ (x y)
(let-values (((i _ __)
(is:split x y)))
i))
c)
(list z)))
;; ->re
(test-block ((c (make-cache))
(isc is:integer-set-contents)
(r (->re #\a c))
(rr (->re `(concatenation ,r ,r) c))
(rrr (->re `(concatenation ,r ,rr) c))
(rrr* (->re `(repetition 0 +inf.0 ,rrr) c)))
((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a))))
((->re "" c) e)
((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c))
((->re r c) r)
((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c))
((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c))
((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c))
((->re `(repetition 0 1 ,rrr*) c) rrr*)
((->re `(union (union (char-range #\a #\c)
(char-complement (char-range #\000 #\110)
(char-range #\112 ,(integer->char max-char-num))))
(union (repetition 0 +inf.0 #\2))) c)
(build-or (list (build-char-set (is:union (is:make-range 73)
(is:make-range 97 99))
c)
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
c))
((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c))
((->re `(union ,r) c) r)
((->re `(union) c) z)
((->re `(intersection (intersection #\111
(char-complement (char-range #\000 #\110)
(char-range #\112 ,(integer->char max-char-num))))
(intersection (repetition 0 +inf.0 #\2))) c)
(build-and (list (build-char-set (is:make-range 73) c)
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
c))
((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110)
(char-range #\112 ,(integer->char max-char-num))))
(intersection (repetition 0 +inf.0 #\2))) c)
z)
((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c))
((->re `(intersection ,r) c) r)
((->re `(intersection) c) (build-neg z c))
((->re `(complement ,r) c) (build-neg r c))
((->re `(concatenation) c) e)
((->re `(concatenation ,rrr*) c) rrr*)
(rr (build-concat r r c))
((->re `(concatenation ,r ,rr ,rrr) c)
(build-concat r (build-concat rr rrr c) c))
((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49)))
((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57)))
((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49)))
((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57)))
((->re `(char-range "9" "1") c) z)
((isc (char-setR-chars (->re `(char-complement) c)))
(isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c))))
((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c)))
(isc (is:make-range 0)))
)

@ -0,0 +1,183 @@
#lang racket/base
(require yaragg/parser-tools/private-lex/util syntax/id-table racket/syntax)
(provide parse)
(define (bad-args stx num)
(raise-syntax-error #f (format "incorrect number of arguments (should have ~a)" num) stx))
;; char-range-arg: syntax-object syntax-object -> nat
;; If c contains is a character or length 1 string, returns the integer
;; for the character. Otherwise raises a syntax error.
(define (char-range-arg stx containing-stx)
(define c (syntax-e stx))
(cond
[(char? c) (char->integer c)]
[(and (string? c) (= (string-length c) 1))
(char->integer (string-ref c 0))]
[else
(raise-syntax-error
#f
"not a char or single-char string"
containing-stx stx)]))
(module+ test
(check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1))
(check-equal? (char-range-arg #'"1" #'here) (char->integer #\1)))
(define orig-insp (variable-reference->module-declaration-inspector
(#%variable-reference)))
(define (disarm stx)
(syntax-disarm stx orig-insp))
;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt)
;; checks for errors and generates the plain s-exp form for s
;; Expands lex-abbrevs and applies lex-trans.
(define (parse stx)
(let loop ([stx stx]
;; seen-lex-abbrevs: id-table
[seen-lex-abbrevs (make-immutable-free-id-table)])
(let ([recur (λ (s)
(loop (syntax-rearm s stx)
seen-lex-abbrevs))]
[recur/abbrev (λ (s id)
(loop (syntax-rearm s stx)
(free-id-table-set seen-lex-abbrevs id id)))])
(syntax-case (disarm stx) (repetition union intersection complement concatenation
char-range char-complement)
[_
(identifier? stx)
(let ([expansion (syntax-local-value/record stx (λ (v) #t))])
(unless (lex-abbrev? expansion)
(raise-syntax-error 'regular-expression
"undefined abbreviation"
stx))
;; Check for cycles.
(when (free-id-table-ref seen-lex-abbrevs stx (λ () #f))
(raise-syntax-error 'regular-expression
"illegal lex-abbrev cycle detected"
stx
#f
(list (free-id-table-ref seen-lex-abbrevs stx))))
(recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))]
[_
(or (char? (syntax-e stx)) (string? (syntax-e stx)))
(syntax-e stx)]
[(repetition ARG ...)
(let ([arg-list (syntax->list #'(ARG ...))])
(unless (= 3 (length arg-list))
(bad-args stx 2))
(define low (syntax-e (car arg-list)))
(define high (syntax-e (cadr arg-list)))
(define re (caddr arg-list))
(unless (and (number? low) (exact? low) (integer? low) (>= low 0))
(raise-syntax-error #f "not a non-negative exact integer" stx (car arg-list)))
(unless (or (and (number? high) (exact? high) (integer? high) (>= high 0))
(eqv? high +inf.0))
(raise-syntax-error #f "not a non-negative exact integer or +inf.0" stx (cadr arg-list)))
(unless (<= low high)
(raise-syntax-error #f "the first argument is not less than or equal to the second argument" stx))
`(repetition ,low ,high ,(recur re)))]
[(union RE ...)
`(union ,@(map recur (syntax->list #'(RE ...))))]
[(intersection RE ...)
`(intersection ,@(map recur (syntax->list #'(RE ...))))]
[(complement RE ...)
(let ([re-list (syntax->list #'(RE ...))])
(unless (= 1 (length re-list))
(bad-args stx 1))
`(complement ,(recur (car re-list))))]
[(concatenation RE ...)
`(concatenation ,@(map recur (syntax->list #'(RE ...))))]
[(char-range ARG ...)
(let ((arg-list (syntax->list #'(ARG ...))))
(unless (= 2 (length arg-list))
(bad-args stx 2))
(let ([i1 (char-range-arg (car arg-list) stx)]
[i2 (char-range-arg (cadr arg-list) stx)])
(if (<= i1 i2)
`(char-range ,(integer->char i1) ,(integer->char i2))
(raise-syntax-error #f "the first argument does not precede or equal second argument" stx))))]
[(char-complement ARG ...)
(let ([arg-list (syntax->list #'(ARG ...))])
(unless (= 1 (length arg-list))
(bad-args stx 1))
(define parsed (recur (car arg-list)))
(unless (char-set? parsed)
(raise-syntax-error #f "not a character set" stx (car arg-list)))
`(char-complement ,parsed))]
((OP form ...)
(identifier? #'OP)
(let* ([expansion (syntax-local-value/record #'OP (λ (v) #t))])
(cond
[(lex-trans? expansion)
(recur ((lex-trans-f expansion) (disarm stx)))]
[expansion
(raise-syntax-error 'regular-expression "not a lex-trans" stx)]
[else
(raise-syntax-error 'regular-expression "undefined operator" stx)])))
[_ (raise-syntax-error 'regular-expression "not a char, string, identifier, or (op args ...)" stx)]))))
;; char-set? : s-re -> bool
;; A char-set is an re that matches only strings of length 1.
;; char-set? is conservative.
(define (char-set? s-re)
(cond
[(char? s-re)]
[(string? s-re) (= (string-length s-re) 1)]
[(list? s-re) (case (car s-re)
[(union intersection) (andmap char-set? (cdr s-re))]
[(char-range char-complement) #t]
[(repetition) (and (= 1 (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re)))]
[(concatenation) (and (= 2 (length s-re)) (char-set? (cadr s-re)))]
(else #f))]
[else #f]))
(module+ test
(require rackunit)
(check-equal? (char-set? #\a) #t)
(check-equal? (char-set? "12") #f)
(check-equal? (char-set? "1") #t)
(check-equal? (char-set? '(repetition 1 2 #\1)) #f)
(check-equal? (char-set? '(repetition 1 1 "12")) #f)
(check-equal? (char-set? '(repetition 1 1 "1")) #t)
(check-equal? (char-set? '(repetition 6 6 "1")) #f)
(check-equal? (char-set? '(union "1" "2" "3")) #t)
(check-equal? (char-set? '(union "1" "" "3")) #f)
(check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t)
(check-equal? (char-set? '(intersection "1" "")) #f)
(check-equal? (char-set? '(complement "1")) #f)
(check-equal? (char-set? '(concatenation "1" "2")) #f)
(check-equal? (char-set? '(concatenation "" "2")) #f)
(check-equal? (char-set? '(concatenation "1")) #t)
(check-equal? (char-set? '(concatenation "12")) #f)
(check-equal? (char-set? '(char-range #\1 #\2)) #t)
(check-equal? (char-set? '(char-complement #\1)) #t))
;; yikes... these test cases all have the wrong arity, now.
;; and by "now", I mean it's been broken since before we
;; moved to git.
(module+ test
(check-equal? (parse #'#\a) #\a)
(check-equal? (parse #'"1") "1")
(check-equal? (parse #'(repetition 1 1 #\1))
'(repetition 1 1 #\1))
(check-equal? (parse #'(repetition 0 +inf.0 #\1)) '(repetition 0 +inf.0 #\1))
(check-equal? (parse #'(union #\1 (union "2") (union)))
'(union #\1 (union "2") (union)))
(check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)))
'(intersection #\1 (intersection "2") (intersection)))
(check-equal? (parse #'(complement (union #\1 #\2)))
'(complement (union #\1 #\2)))
(check-equal? (parse #'(concatenation "1" "2" (concatenation)))
'(concatenation "1" "2" (concatenation)))
(check-equal? (parse #'(char-range "1" #\1)) '(char-range #\1 #\1))
(check-equal? (parse #'(char-range #\1 "1")) '(char-range #\1 #\1))
(check-equal? (parse #'(char-range "1" "3")) '(char-range #\1 #\3))
(check-equal? (parse #'(char-complement (union "1" "2")))
'(char-complement (union "1" "2")))
(check-equal? (parse #'(char-complement (repetition 1 1 "1")))
'(char-complement (repetition 1 1 "1")))
(check-exn #rx"not a character set"
(λ () (parse #'(char-complement (repetition 6 6 "1"))))))

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save