move brag
parent
1dc08fbe92
commit
48cd7f0082
@ -1,165 +0,0 @@
|
|||||||
GNU LESSER GENERAL PUBLIC LICENSE
|
|
||||||
Version 3, 29 June 2007
|
|
||||||
|
|
||||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
|
||||||
Everyone is permitted to copy and distribute verbatim copies
|
|
||||||
of this license document, but changing it is not allowed.
|
|
||||||
|
|
||||||
|
|
||||||
This version of the GNU Lesser General Public License incorporates
|
|
||||||
the terms and conditions of version 3 of the GNU General Public
|
|
||||||
License, supplemented by the additional permissions listed below.
|
|
||||||
|
|
||||||
0. Additional Definitions.
|
|
||||||
|
|
||||||
As used herein, "this License" refers to version 3 of the GNU Lesser
|
|
||||||
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
|
||||||
General Public License.
|
|
||||||
|
|
||||||
"The Library" refers to a covered work governed by this License,
|
|
||||||
other than an Application or a Combined Work as defined below.
|
|
||||||
|
|
||||||
An "Application" is any work that makes use of an interface provided
|
|
||||||
by the Library, but which is not otherwise based on the Library.
|
|
||||||
Defining a subclass of a class defined by the Library is deemed a mode
|
|
||||||
of using an interface provided by the Library.
|
|
||||||
|
|
||||||
A "Combined Work" is a work produced by combining or linking an
|
|
||||||
Application with the Library. The particular version of the Library
|
|
||||||
with which the Combined Work was made is also called the "Linked
|
|
||||||
Version".
|
|
||||||
|
|
||||||
The "Minimal Corresponding Source" for a Combined Work means the
|
|
||||||
Corresponding Source for the Combined Work, excluding any source code
|
|
||||||
for portions of the Combined Work that, considered in isolation, are
|
|
||||||
based on the Application, and not on the Linked Version.
|
|
||||||
|
|
||||||
The "Corresponding Application Code" for a Combined Work means the
|
|
||||||
object code and/or source code for the Application, including any data
|
|
||||||
and utility programs needed for reproducing the Combined Work from the
|
|
||||||
Application, but excluding the System Libraries of the Combined Work.
|
|
||||||
|
|
||||||
1. Exception to Section 3 of the GNU GPL.
|
|
||||||
|
|
||||||
You may convey a covered work under sections 3 and 4 of this License
|
|
||||||
without being bound by section 3 of the GNU GPL.
|
|
||||||
|
|
||||||
2. Conveying Modified Versions.
|
|
||||||
|
|
||||||
If you modify a copy of the Library, and, in your modifications, a
|
|
||||||
facility refers to a function or data to be supplied by an Application
|
|
||||||
that uses the facility (other than as an argument passed when the
|
|
||||||
facility is invoked), then you may convey a copy of the modified
|
|
||||||
version:
|
|
||||||
|
|
||||||
a) under this License, provided that you make a good faith effort to
|
|
||||||
ensure that, in the event an Application does not supply the
|
|
||||||
function or data, the facility still operates, and performs
|
|
||||||
whatever part of its purpose remains meaningful, or
|
|
||||||
|
|
||||||
b) under the GNU GPL, with none of the additional permissions of
|
|
||||||
this License applicable to that copy.
|
|
||||||
|
|
||||||
3. Object Code Incorporating Material from Library Header Files.
|
|
||||||
|
|
||||||
The object code form of an Application may incorporate material from
|
|
||||||
a header file that is part of the Library. You may convey such object
|
|
||||||
code under terms of your choice, provided that, if the incorporated
|
|
||||||
material is not limited to numerical parameters, data structure
|
|
||||||
layouts and accessors, or small macros, inline functions and templates
|
|
||||||
(ten or fewer lines in length), you do both of the following:
|
|
||||||
|
|
||||||
a) Give prominent notice with each copy of the object code that the
|
|
||||||
Library is used in it and that the Library and its use are
|
|
||||||
covered by this License.
|
|
||||||
|
|
||||||
b) Accompany the object code with a copy of the GNU GPL and this license
|
|
||||||
document.
|
|
||||||
|
|
||||||
4. Combined Works.
|
|
||||||
|
|
||||||
You may convey a Combined Work under terms of your choice that,
|
|
||||||
taken together, effectively do not restrict modification of the
|
|
||||||
portions of the Library contained in the Combined Work and reverse
|
|
||||||
engineering for debugging such modifications, if you also do each of
|
|
||||||
the following:
|
|
||||||
|
|
||||||
a) Give prominent notice with each copy of the Combined Work that
|
|
||||||
the Library is used in it and that the Library and its use are
|
|
||||||
covered by this License.
|
|
||||||
|
|
||||||
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
|
||||||
document.
|
|
||||||
|
|
||||||
c) For a Combined Work that displays copyright notices during
|
|
||||||
execution, include the copyright notice for the Library among
|
|
||||||
these notices, as well as a reference directing the user to the
|
|
||||||
copies of the GNU GPL and this license document.
|
|
||||||
|
|
||||||
d) Do one of the following:
|
|
||||||
|
|
||||||
0) Convey the Minimal Corresponding Source under the terms of this
|
|
||||||
License, and the Corresponding Application Code in a form
|
|
||||||
suitable for, and under terms that permit, the user to
|
|
||||||
recombine or relink the Application with a modified version of
|
|
||||||
the Linked Version to produce a modified Combined Work, in the
|
|
||||||
manner specified by section 6 of the GNU GPL for conveying
|
|
||||||
Corresponding Source.
|
|
||||||
|
|
||||||
1) Use a suitable shared library mechanism for linking with the
|
|
||||||
Library. A suitable mechanism is one that (a) uses at run time
|
|
||||||
a copy of the Library already present on the user's computer
|
|
||||||
system, and (b) will operate properly with a modified version
|
|
||||||
of the Library that is interface-compatible with the Linked
|
|
||||||
Version.
|
|
||||||
|
|
||||||
e) Provide Installation Information, but only if you would otherwise
|
|
||||||
be required to provide such information under section 6 of the
|
|
||||||
GNU GPL, and only to the extent that such information is
|
|
||||||
necessary to install and execute a modified version of the
|
|
||||||
Combined Work produced by recombining or relinking the
|
|
||||||
Application with a modified version of the Linked Version. (If
|
|
||||||
you use option 4d0, the Installation Information must accompany
|
|
||||||
the Minimal Corresponding Source and Corresponding Application
|
|
||||||
Code. If you use option 4d1, you must provide the Installation
|
|
||||||
Information in the manner specified by section 6 of the GNU GPL
|
|
||||||
for conveying Corresponding Source.)
|
|
||||||
|
|
||||||
5. Combined Libraries.
|
|
||||||
|
|
||||||
You may place library facilities that are a work based on the
|
|
||||||
Library side by side in a single library together with other library
|
|
||||||
facilities that are not Applications and are not covered by this
|
|
||||||
License, and convey such a combined library under terms of your
|
|
||||||
choice, if you do both of the following:
|
|
||||||
|
|
||||||
a) Accompany the combined library with a copy of the same work based
|
|
||||||
on the Library, uncombined with any other library facilities,
|
|
||||||
conveyed under the terms of this License.
|
|
||||||
|
|
||||||
b) Give prominent notice with the combined library that part of it
|
|
||||||
is a work based on the Library, and explaining where to find the
|
|
||||||
accompanying uncombined form of the same work.
|
|
||||||
|
|
||||||
6. Revised Versions of the GNU Lesser General Public License.
|
|
||||||
|
|
||||||
The Free Software Foundation may publish revised and/or new versions
|
|
||||||
of the GNU Lesser General Public License from time to time. Such new
|
|
||||||
versions will be similar in spirit to the present version, but may
|
|
||||||
differ in detail to address new problems or concerns.
|
|
||||||
|
|
||||||
Each version is given a distinguishing version number. If the
|
|
||||||
Library as you received it specifies that a certain numbered version
|
|
||||||
of the GNU Lesser General Public License "or any later version"
|
|
||||||
applies to it, you have the option of following the terms and
|
|
||||||
conditions either of that published version or of any later version
|
|
||||||
published by the Free Software Foundation. If the Library as you
|
|
||||||
received it does not specify a version number of the GNU Lesser
|
|
||||||
General Public License, you may choose any version of the GNU Lesser
|
|
||||||
General Public License ever published by the Free Software Foundation.
|
|
||||||
|
|
||||||
If the Library as you received it specifies that a proxy can decide
|
|
||||||
whether future versions of the GNU Lesser General Public License shall
|
|
||||||
apply, that proxy's public statement of acceptance of any version is
|
|
||||||
permanent authorization for you to choose that version for the
|
|
||||||
Library.
|
|
@ -1,3 +0,0 @@
|
|||||||
Beautiful Racket
|
|
||||||
© 2016 Matthew Butterick
|
|
||||||
Licensed under the LGPL (see "LGPL.txt")
|
|
@ -1,11 +0,0 @@
|
|||||||
parser-tools-doc
|
|
||||||
Copyright (c) 2010-2014 PLT Design Inc.
|
|
||||||
|
|
||||||
This package is distributed under the GNU Lesser General Public
|
|
||||||
License (LGPL). This means that you can link this package into proprietary
|
|
||||||
applications, provided you follow the rules stated in the LGPL. You
|
|
||||||
can also modify this package; if you distribute a modified version,
|
|
||||||
you must distribute it under the terms of the LGPL, which in
|
|
||||||
particular means that you must release the source code for the
|
|
||||||
modified software. See http://www.gnu.org/copyleft/lesser.html
|
|
||||||
for more information.
|
|
@ -1,3 +0,0 @@
|
|||||||
#lang info
|
|
||||||
|
|
||||||
(define scribblings '(("br-parser-tools.scrbl" (multi-page) (parsing-library))))
|
|
@ -1,14 +0,0 @@
|
|||||||
#lang info
|
|
||||||
|
|
||||||
(define collection 'multi)
|
|
||||||
(define deps '("base"))
|
|
||||||
(define build-deps '("scheme-lib"
|
|
||||||
"racket-doc"
|
|
||||||
"syntax-color-doc"
|
|
||||||
"br-parser-tools-lib"
|
|
||||||
"scribble-lib"))
|
|
||||||
(define update-implies '("br-parser-tools-lib"))
|
|
||||||
|
|
||||||
(define pkg-desc "documentation part of \"br-parser-tools\"")
|
|
||||||
|
|
||||||
(define pkg-authors '(mflatt))
|
|
@ -1,11 +0,0 @@
|
|||||||
parser-tools-lib
|
|
||||||
Copyright (c) 2010-2014 PLT Design Inc.
|
|
||||||
|
|
||||||
This package is distributed under the GNU Lesser General Public
|
|
||||||
License (LGPL). This means that you can link this package into proprietary
|
|
||||||
applications, provided you follow the rules stated in the LGPL. You
|
|
||||||
can also modify this package; if you distribute a modified version,
|
|
||||||
you must distribute it under the terms of the LGPL, which in
|
|
||||||
particular means that you must release the source code for the
|
|
||||||
modified software. See http://www.gnu.org/copyleft/lesser.html
|
|
||||||
for more information.
|
|
@ -1,982 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
;; This module implements a parser form like the br-parser-tools's
|
|
||||||
;; `parser', except that it works on an arbitrary CFG (returning
|
|
||||||
;; the first sucecssful parse).
|
|
||||||
|
|
||||||
;; I'm pretty sure that this is an implementation of Earley's
|
|
||||||
;; algorithm.
|
|
||||||
|
|
||||||
;; To a first approximation, it's a backtracking parser. Alternative
|
|
||||||
;; for a non-terminal are computed in parallel, and multiple attempts
|
|
||||||
;; to compute the same result block until the first one completes. If
|
|
||||||
;; you get into deadlock, such as when trying to match
|
|
||||||
;; <foo> := <foo>
|
|
||||||
;; then it means that there's no successful parse, so everything
|
|
||||||
;; that's blocked fails.
|
|
||||||
|
|
||||||
;; A cache holds the series of results for a particular non-terminal
|
|
||||||
;; at a particular starting location. (A series is used, instead of a
|
|
||||||
;; sinlge result, for backtracking.) Otherwise, the parser uses
|
|
||||||
;; backtracking search. Backtracking is implemented through explicit
|
|
||||||
;; success and failure continuations. Multiple results for a
|
|
||||||
;; particular nonterminal and location are kept only when they have
|
|
||||||
;; different lengths. (Otherwise, in the spirit of finding one
|
|
||||||
;; successful parse, only the first result is kept.)
|
|
||||||
|
|
||||||
;; The br-parser-tools's `parse' is used to transform tokens in the
|
|
||||||
;; grammar to tokens specific to this parser. In other words, this
|
|
||||||
;; parser uses `parser' so that it doesn't have to know anything about
|
|
||||||
;; tokens.
|
|
||||||
;;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(require br-parser-tools/yacc
|
|
||||||
br-parser-tools/lex)
|
|
||||||
|
|
||||||
(require (for-syntax racket/base
|
|
||||||
syntax/boundmap
|
|
||||||
br-parser-tools/private-lex/token-syntax))
|
|
||||||
|
|
||||||
(provide cfg-parser)
|
|
||||||
|
|
||||||
;; A raw token, wrapped so that we can recognize it:
|
|
||||||
(define-struct tok (name orig-name val start end))
|
|
||||||
|
|
||||||
;; Represents the thread scheduler:
|
|
||||||
(define-struct tasks (active active-back waits multi-waits cache progress?))
|
|
||||||
|
|
||||||
(define-for-syntax make-token-identifier-mapping make-hasheq)
|
|
||||||
(define-for-syntax token-identifier-mapping-get
|
|
||||||
(case-lambda
|
|
||||||
[(t tok)
|
|
||||||
(hash-ref t (syntax-e tok))]
|
|
||||||
[(t tok fail)
|
|
||||||
(hash-ref t (syntax-e tok) fail)]))
|
|
||||||
(define-for-syntax token-identifier-mapping-put!
|
|
||||||
(lambda (t tok v)
|
|
||||||
(hash-set! t (syntax-e tok) v)))
|
|
||||||
(define-for-syntax token-identifier-mapping-map
|
|
||||||
(lambda (t f)
|
|
||||||
(hash-map t f)))
|
|
||||||
|
|
||||||
;; Used to calculate information on the grammar, such as whether
|
|
||||||
;; a particular non-terminal is "simple" instead of recursively defined.
|
|
||||||
(define-for-syntax (nt-fixpoint nts proc nt-ids patss)
|
|
||||||
(define (ormap-all val f as bs)
|
|
||||||
(cond
|
|
||||||
[(null? as) val]
|
|
||||||
[else (ormap-all (or (f (car as) (car bs)) val)
|
|
||||||
f
|
|
||||||
(cdr as) (cdr bs))]))
|
|
||||||
(let loop ()
|
|
||||||
(when (ormap-all #f
|
|
||||||
(lambda (nt pats)
|
|
||||||
(let ([old (bound-identifier-mapping-get nts nt)])
|
|
||||||
(let ([new (proc nt pats old)])
|
|
||||||
(if (equal? old new)
|
|
||||||
#f
|
|
||||||
(begin
|
|
||||||
(bound-identifier-mapping-put! nts nt new)
|
|
||||||
#t)))))
|
|
||||||
nt-ids patss)
|
|
||||||
(loop))))
|
|
||||||
|
|
||||||
;; Tries parse-a followed by parse-b. If parse-a is not simple,
|
|
||||||
;; then after parse-a succeeds once, we parallelize parse-b
|
|
||||||
;; and trying a second result for parse-a.
|
|
||||||
(define (parse-and simple-a? parse-a parse-b
|
|
||||||
stream last-consumed-token depth end success-k fail-k
|
|
||||||
max-depth tasks)
|
|
||||||
(letrec ([mk-got-k
|
|
||||||
(lambda (success-k fail-k)
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next1-k)
|
|
||||||
(if simple-a?
|
|
||||||
(parse-b val stream last-consumed-token depth end
|
|
||||||
(mk-got2-k success-k fail-k next1-k)
|
|
||||||
(mk-fail2-k success-k fail-k next1-k)
|
|
||||||
max-depth tasks)
|
|
||||||
(parallel-or
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(parse-b val stream last-consumed-token depth end
|
|
||||||
success-k fail-k
|
|
||||||
max-depth tasks))
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(next1-k (mk-got-k success-k fail-k)
|
|
||||||
fail-k max-depth tasks))
|
|
||||||
success-k fail-k max-depth tasks))))]
|
|
||||||
[mk-got2-k
|
|
||||||
(lambda (success-k fail-k next1-k)
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
|
||||||
(success-k val stream last-consumed-token depth max-depth tasks
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(next-k (mk-got2-k success-k fail-k next1-k)
|
|
||||||
(mk-fail2-k success-k fail-k next1-k)
|
|
||||||
max-depth tasks)))))]
|
|
||||||
[mk-fail2-k
|
|
||||||
(lambda (success-k fail-k next1-k)
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(next1-k (mk-got-k success-k fail-k)
|
|
||||||
fail-k
|
|
||||||
max-depth
|
|
||||||
tasks)))])
|
|
||||||
(parse-a stream last-consumed-token depth end
|
|
||||||
(mk-got-k success-k fail-k)
|
|
||||||
fail-k
|
|
||||||
max-depth tasks)))
|
|
||||||
|
|
||||||
;; Parallel or for non-terminal alternatives
|
|
||||||
(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
(parallel-or (lambda (success-k fail-k max-depth tasks)
|
|
||||||
(parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks))
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks))
|
|
||||||
success-k fail-k max-depth tasks))
|
|
||||||
|
|
||||||
;; Generic parallel-or
|
|
||||||
(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks)
|
|
||||||
(define answer-key (gensym))
|
|
||||||
(letrec ([gota-k
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
|
||||||
(report-answer answer-key
|
|
||||||
max-depth
|
|
||||||
tasks
|
|
||||||
(list val stream last-consumed-token depth next-k)))]
|
|
||||||
[faila-k
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(report-answer answer-key
|
|
||||||
max-depth
|
|
||||||
tasks
|
|
||||||
null))])
|
|
||||||
(let* ([tasks (queue-task
|
|
||||||
tasks
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(parse-a gota-k
|
|
||||||
faila-k
|
|
||||||
max-depth tasks)))]
|
|
||||||
[tasks (queue-task
|
|
||||||
tasks
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(parse-b gota-k
|
|
||||||
faila-k
|
|
||||||
max-depth tasks)))]
|
|
||||||
[queue-next (lambda (next-k tasks)
|
|
||||||
(queue-task tasks
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(next-k gota-k
|
|
||||||
faila-k
|
|
||||||
max-depth tasks))))])
|
|
||||||
(letrec ([mk-got-one
|
|
||||||
(lambda (immediate-next? get-nth success-k)
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
|
||||||
(let ([tasks (if immediate-next?
|
|
||||||
(queue-next next-k tasks)
|
|
||||||
tasks)])
|
|
||||||
(success-k val stream last-consumed-token depth max-depth
|
|
||||||
tasks
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(let ([tasks (if immediate-next?
|
|
||||||
tasks
|
|
||||||
(queue-next next-k tasks))])
|
|
||||||
(get-nth max-depth tasks success-k fail-k)))))))]
|
|
||||||
[get-first
|
|
||||||
(lambda (max-depth tasks success-k fail-k)
|
|
||||||
(wait-for-answer #f max-depth tasks answer-key
|
|
||||||
(mk-got-one #t get-first success-k)
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(get-second max-depth tasks success-k fail-k))
|
|
||||||
#f))]
|
|
||||||
[get-second
|
|
||||||
(lambda (max-depth tasks success-k fail-k)
|
|
||||||
(wait-for-answer #f max-depth tasks answer-key
|
|
||||||
(mk-got-one #f get-second success-k)
|
|
||||||
fail-k #f))])
|
|
||||||
(get-first max-depth tasks success-k fail-k)))))
|
|
||||||
|
|
||||||
;; Non-terminal alternatives where the first is "simple" can be done
|
|
||||||
;; sequentially, which is simpler
|
|
||||||
(define (parse-or parse-a parse-b
|
|
||||||
stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
(letrec ([mk-got-k
|
|
||||||
(lambda (success-k fail-k)
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
|
||||||
(success-k val stream last-consumed-token depth
|
|
||||||
max-depth tasks
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(next-k (mk-got-k success-k fail-k)
|
|
||||||
(mk-fail-k success-k fail-k)
|
|
||||||
max-depth tasks)))))]
|
|
||||||
[mk-fail-k
|
|
||||||
(lambda (success-k fail-k)
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))])
|
|
||||||
(parse-a stream last-consumed-token depth end
|
|
||||||
(mk-got-k success-k fail-k)
|
|
||||||
(mk-fail-k success-k fail-k)
|
|
||||||
max-depth tasks)))
|
|
||||||
|
|
||||||
;; Starts a thread
|
|
||||||
(define queue-task
|
|
||||||
(lambda (tasks t [progress? #t])
|
|
||||||
(make-tasks (tasks-active tasks)
|
|
||||||
(cons t (tasks-active-back tasks))
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
(or progress? (tasks-progress? tasks)))))
|
|
||||||
|
|
||||||
;; Reports an answer to a waiting thread:
|
|
||||||
(define (report-answer answer-key max-depth tasks val)
|
|
||||||
(let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))])
|
|
||||||
(if v
|
|
||||||
(let ([tasks (make-tasks (cons (v val)
|
|
||||||
(tasks-active tasks))
|
|
||||||
(tasks-active-back tasks)
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
#t)])
|
|
||||||
(hash-remove! (tasks-waits tasks) answer-key)
|
|
||||||
(swap-task max-depth tasks))
|
|
||||||
;; We have an answer ready too fast; wait
|
|
||||||
(swap-task max-depth
|
|
||||||
(queue-task tasks
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(report-answer answer-key max-depth tasks val))
|
|
||||||
#f)))))
|
|
||||||
|
|
||||||
;; Reports an answer to multiple waiting threads:
|
|
||||||
(define (report-answer-all answer-key max-depth tasks val k)
|
|
||||||
(let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))])
|
|
||||||
(hash-remove! (tasks-multi-waits tasks) answer-key)
|
|
||||||
(let ([tasks (make-tasks (append (map (lambda (a) (a val)) v)
|
|
||||||
(tasks-active tasks))
|
|
||||||
(tasks-active-back tasks)
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
#t)])
|
|
||||||
(k max-depth tasks))))
|
|
||||||
|
|
||||||
;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise
|
|
||||||
;; there might be many. Use wither #t or #f (and `report-answer' or
|
|
||||||
;; `report-answer-all', resptively) consistently for a particular answer key.
|
|
||||||
(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k)
|
|
||||||
(let ([wait (lambda (val)
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(if val
|
|
||||||
(if (null? val)
|
|
||||||
(fail-k max-depth tasks)
|
|
||||||
(let-values ([(val stream last-consumed-token depth next-k) (apply values val)])
|
|
||||||
(success-k val stream last-consumed-token depth max-depth tasks next-k)))
|
|
||||||
(deadlock-k max-depth tasks))))])
|
|
||||||
(if multi?
|
|
||||||
(hash-set! (tasks-multi-waits tasks) answer-key
|
|
||||||
(cons wait (hash-ref (tasks-multi-waits tasks) answer-key
|
|
||||||
(lambda () null))))
|
|
||||||
(hash-set! (tasks-waits tasks) answer-key wait))
|
|
||||||
(let ([tasks (make-tasks (tasks-active tasks)
|
|
||||||
(tasks-active-back tasks)
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
#t)])
|
|
||||||
(swap-task max-depth tasks))))
|
|
||||||
|
|
||||||
;; Swap thread
|
|
||||||
(define (swap-task max-depth tasks)
|
|
||||||
;; Swap in first active:
|
|
||||||
(if (null? (tasks-active tasks))
|
|
||||||
(if (tasks-progress? tasks)
|
|
||||||
(swap-task max-depth
|
|
||||||
(make-tasks (reverse (tasks-active-back tasks))
|
|
||||||
null
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
#f))
|
|
||||||
;; No progress, so issue failure for all multi-waits
|
|
||||||
(if (zero? (hash-count (tasks-multi-waits tasks)))
|
|
||||||
(error 'swap-task "Deadlock")
|
|
||||||
(swap-task max-depth
|
|
||||||
(make-tasks (apply
|
|
||||||
append
|
|
||||||
(hash-map (tasks-multi-waits tasks)
|
|
||||||
(lambda (k l)
|
|
||||||
(map (lambda (v) (v #f)) l))))
|
|
||||||
(tasks-active-back tasks)
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(make-hasheq)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
#t))))
|
|
||||||
(let ([t (car (tasks-active tasks))]
|
|
||||||
[tasks (make-tasks (cdr (tasks-active tasks))
|
|
||||||
(tasks-active-back tasks)
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
(tasks-progress? tasks))])
|
|
||||||
(t max-depth tasks))))
|
|
||||||
|
|
||||||
;; Finds the symbolic representative of a token class
|
|
||||||
(define-for-syntax (map-token toks tok)
|
|
||||||
(car (token-identifier-mapping-get toks tok)))
|
|
||||||
|
|
||||||
(define no-pos-val (make-position #f #f #f))
|
|
||||||
(define-for-syntax no-pos
|
|
||||||
(let ([npv ((syntax-local-certifier) #'no-pos-val)])
|
|
||||||
(lambda (stx) npv)))
|
|
||||||
(define-for-syntax at-tok-pos
|
|
||||||
(lambda (sel expr)
|
|
||||||
(lambda (stx)
|
|
||||||
#`(let ([v #,expr]) (if v (#,sel v) no-pos-val)))))
|
|
||||||
|
|
||||||
;; Builds a matcher for a particular alternative
|
|
||||||
(define-for-syntax (build-match nts toks pat handle $ctx)
|
|
||||||
(let loop ([pat pat]
|
|
||||||
[pos 1])
|
|
||||||
(if (null? pat)
|
|
||||||
#`(success-k #,handle stream last-consumed-token depth max-depth tasks
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(fail-k max-depth tasks)))
|
|
||||||
(let ([id (datum->syntax (car pat)
|
|
||||||
(string->symbol (format "$~a" pos)))]
|
|
||||||
[id-start-pos (datum->syntax (car pat)
|
|
||||||
(string->symbol (format "$~a-start-pos" pos)))]
|
|
||||||
[id-end-pos (datum->syntax (car pat)
|
|
||||||
(string->symbol (format "$~a-end-pos" pos)))]
|
|
||||||
[n-end-pos (and (null? (cdr pat))
|
|
||||||
(datum->syntax (car pat) '$n-end-pos))])
|
|
||||||
(cond
|
|
||||||
[(bound-identifier-mapping-get nts (car pat) (lambda () #f))
|
|
||||||
;; Match non-termimal
|
|
||||||
#`(parse-and
|
|
||||||
;; First part is simple? (If so, we don't have to parallelize the `and'.)
|
|
||||||
#,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))])
|
|
||||||
(or (not l)
|
|
||||||
(andmap values (caddr l))))
|
|
||||||
#,(car pat)
|
|
||||||
(let ([original-stream stream])
|
|
||||||
(lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
(let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream)
|
|
||||||
tok-end
|
|
||||||
tok-start)
|
|
||||||
#'(if (eq? original-stream stream)
|
|
||||||
last-consumed-token
|
|
||||||
(and (pair? original-stream)
|
|
||||||
(car original-stream))))]
|
|
||||||
[#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]
|
|
||||||
#,@(if n-end-pos
|
|
||||||
#`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)])
|
|
||||||
null))
|
|
||||||
#,(loop (cdr pat) (add1 pos)))))
|
|
||||||
stream last-consumed-token depth
|
|
||||||
#,(let ([cnt (apply +
|
|
||||||
(map (lambda (item)
|
|
||||||
(cond
|
|
||||||
[(bound-identifier-mapping-get nts item (lambda () #f))
|
|
||||||
=> (lambda (l) (car l))]
|
|
||||||
[else 1]))
|
|
||||||
(cdr pat)))])
|
|
||||||
#`(- end #,cnt))
|
|
||||||
success-k fail-k max-depth tasks)]
|
|
||||||
[else
|
|
||||||
;; Match token
|
|
||||||
(let ([tok-id (map-token toks (car pat))])
|
|
||||||
#`(if (and (pair? stream)
|
|
||||||
(eq? '#,tok-id (tok-name (car stream))))
|
|
||||||
(let* ([stream-a (car stream)]
|
|
||||||
[#,id (tok-val stream-a)]
|
|
||||||
[last-consumed-token (car stream)]
|
|
||||||
[stream (cdr stream)]
|
|
||||||
[depth (add1 depth)])
|
|
||||||
(let ([max-depth (max max-depth depth)])
|
|
||||||
(let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)]
|
|
||||||
[#,id-end-pos (at-tok-pos #'tok-end #'stream-a)]
|
|
||||||
#,@(if n-end-pos
|
|
||||||
#`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)])
|
|
||||||
null))
|
|
||||||
#,(loop (cdr pat) (add1 pos)))))
|
|
||||||
(fail-k max-depth tasks)))])))))
|
|
||||||
|
|
||||||
;; Starts parsing to match a non-terminal. There's a minor
|
|
||||||
;; optimization that checks for known starting tokens. Otherwise,
|
|
||||||
;; use the cache, block if someone else is already trying the match,
|
|
||||||
;; and cache the result if it's computed.
|
|
||||||
;; The cache maps nontermial+startingpos+iteration to a result, where
|
|
||||||
;; the iteration is 0 for the first match attempt, 1 for the second,
|
|
||||||
;; etc.
|
|
||||||
(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k)
|
|
||||||
(if (and (positive? min-cnt)
|
|
||||||
(pair? stream)
|
|
||||||
(not (memq (tok-name (car stream)) init-tokens)))
|
|
||||||
;; No such leading token; give up
|
|
||||||
(fail-k max-depth tasks)
|
|
||||||
;; Run pattern
|
|
||||||
(let loop ([n 0]
|
|
||||||
[success-k success-k]
|
|
||||||
[fail-k fail-k]
|
|
||||||
[max-depth max-depth]
|
|
||||||
[tasks tasks]
|
|
||||||
[k k])
|
|
||||||
(let ([answer-key (gensym)]
|
|
||||||
[table-key (vector key depth n)]
|
|
||||||
[old-depth depth]
|
|
||||||
[old-stream stream])
|
|
||||||
#;(printf "Loop ~a\n" table-key)
|
|
||||||
(cond
|
|
||||||
[(hash-ref (tasks-cache tasks) table-key (lambda () #f))
|
|
||||||
=> (lambda (result)
|
|
||||||
#;(printf "Reuse ~a\n" table-key)
|
|
||||||
(result success-k fail-k max-depth tasks))]
|
|
||||||
[else
|
|
||||||
#;(printf "Try ~a ~a\n" table-key (map tok-name stream))
|
|
||||||
(hash-set! (tasks-cache tasks) table-key
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
#;(printf "Wait ~a ~a\n" table-key answer-key)
|
|
||||||
(wait-for-answer #t max-depth tasks answer-key success-k fail-k
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
#;(printf "Deadlock ~a ~a\n" table-key answer-key)
|
|
||||||
(fail-k max-depth tasks)))))
|
|
||||||
(let result-loop ([max-depth max-depth][tasks tasks][k k])
|
|
||||||
(letrec ([orig-stream stream]
|
|
||||||
[new-got-k
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
|
||||||
;; Check whether we already have a result that consumed the same amount:
|
|
||||||
(let ([result-key (vector #f key old-depth depth)])
|
|
||||||
(cond
|
|
||||||
[(hash-ref (tasks-cache tasks) result-key (lambda () #f))
|
|
||||||
;; Go for the next-result
|
|
||||||
(result-loop max-depth
|
|
||||||
tasks
|
|
||||||
(lambda (end max-depth tasks success-k fail-k)
|
|
||||||
(next-k success-k fail-k max-depth tasks)))]
|
|
||||||
[else
|
|
||||||
#;(printf "Success ~a ~a\n" table-key
|
|
||||||
(map tok-name (let loop ([d old-depth][s old-stream])
|
|
||||||
(if (= d depth)
|
|
||||||
null
|
|
||||||
(cons (car s) (loop (add1 d) (cdr s)))))))
|
|
||||||
(let ([next-k (lambda (success-k fail-k max-depth tasks)
|
|
||||||
(loop (add1 n)
|
|
||||||
success-k
|
|
||||||
fail-k
|
|
||||||
max-depth
|
|
||||||
tasks
|
|
||||||
(lambda (end max-depth tasks success-k fail-k)
|
|
||||||
(next-k success-k fail-k max-depth tasks))))])
|
|
||||||
(hash-set! (tasks-cache tasks) result-key #t)
|
|
||||||
(hash-set! (tasks-cache tasks) table-key
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(success-k val stream last-consumed-token depth max-depth tasks next-k)))
|
|
||||||
(report-answer-all answer-key
|
|
||||||
max-depth
|
|
||||||
tasks
|
|
||||||
(list val stream last-consumed-token depth next-k)
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(success-k val stream last-consumed-token depth max-depth tasks next-k))))])))]
|
|
||||||
[new-fail-k
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
#;(printf "Failure ~a\n" table-key)
|
|
||||||
(hash-set! (tasks-cache tasks) table-key
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(fail-k max-depth tasks)))
|
|
||||||
(report-answer-all answer-key
|
|
||||||
max-depth
|
|
||||||
tasks
|
|
||||||
null
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(fail-k max-depth tasks))))])
|
|
||||||
(k end max-depth tasks new-got-k new-fail-k)))])))))
|
|
||||||
|
|
||||||
(define-syntax (cfg-parser stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
[(_ clause ...)
|
|
||||||
(let ([clauses (syntax->list #'(clause ...))])
|
|
||||||
(let-values ([(start grammar cfg-error parser-clauses src-pos?)
|
|
||||||
(let ([all-toks (apply
|
|
||||||
append
|
|
||||||
(map (lambda (clause)
|
|
||||||
(syntax-case clause (tokens)
|
|
||||||
[(tokens t ...)
|
|
||||||
(apply
|
|
||||||
append
|
|
||||||
(map (lambda (t)
|
|
||||||
(let ([v (syntax-local-value t (lambda () #f))])
|
|
||||||
(cond
|
|
||||||
[(terminals-def? v)
|
|
||||||
(map (lambda (v)
|
|
||||||
(cons v #f))
|
|
||||||
(syntax->list (terminals-def-t v)))]
|
|
||||||
[(e-terminals-def? v)
|
|
||||||
(map (lambda (v)
|
|
||||||
(cons v #t))
|
|
||||||
(syntax->list (e-terminals-def-t v)))]
|
|
||||||
[else null])))
|
|
||||||
(syntax->list #'(t ...))))]
|
|
||||||
[_else null]))
|
|
||||||
clauses))]
|
|
||||||
[all-end-toks (apply
|
|
||||||
append
|
|
||||||
(map (lambda (clause)
|
|
||||||
(syntax-case clause (end)
|
|
||||||
[(end t ...)
|
|
||||||
(syntax->list #'(t ...))]
|
|
||||||
[_else null]))
|
|
||||||
clauses))])
|
|
||||||
(let loop ([clauses clauses]
|
|
||||||
[cfg-start #f]
|
|
||||||
[cfg-grammar #f]
|
|
||||||
[cfg-error #f]
|
|
||||||
[src-pos? #f]
|
|
||||||
[parser-clauses null])
|
|
||||||
(if (null? clauses)
|
|
||||||
(values cfg-start
|
|
||||||
cfg-grammar
|
|
||||||
cfg-error
|
|
||||||
(reverse parser-clauses)
|
|
||||||
src-pos?)
|
|
||||||
(syntax-case (car clauses) (start error grammar src-pos)
|
|
||||||
[(start tok)
|
|
||||||
(loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)]
|
|
||||||
[(error expr)
|
|
||||||
(loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)]
|
|
||||||
[(grammar [nt [pat handle0 handle ...] ...] ...)
|
|
||||||
(let ([nts (make-bound-identifier-mapping)]
|
|
||||||
[toks (make-token-identifier-mapping)]
|
|
||||||
[end-toks (make-token-identifier-mapping)]
|
|
||||||
[nt-ids (syntax->list #'(nt ...))]
|
|
||||||
[patss (map (lambda (stx)
|
|
||||||
(map syntax->list (syntax->list stx)))
|
|
||||||
(syntax->list #'((pat ...) ...)))])
|
|
||||||
(for-each (lambda (nt)
|
|
||||||
(bound-identifier-mapping-put! nts nt (list 0)))
|
|
||||||
nt-ids)
|
|
||||||
(for-each (lambda (t)
|
|
||||||
(token-identifier-mapping-put! end-toks t #t))
|
|
||||||
all-end-toks)
|
|
||||||
(for-each (lambda (t)
|
|
||||||
(unless (token-identifier-mapping-get end-toks (car t) (lambda () #f))
|
|
||||||
(let ([id (gensym (syntax-e (car t)))])
|
|
||||||
(token-identifier-mapping-put! toks (car t)
|
|
||||||
(cons id (cdr t))))))
|
|
||||||
all-toks)
|
|
||||||
;; Compute min max size for each non-term:
|
|
||||||
(nt-fixpoint
|
|
||||||
nts
|
|
||||||
(lambda (nt pats old-list)
|
|
||||||
(let ([new-cnt
|
|
||||||
(apply
|
|
||||||
min
|
|
||||||
(map (lambda (pat)
|
|
||||||
(apply
|
|
||||||
+
|
|
||||||
(map (lambda (elem)
|
|
||||||
(car
|
|
||||||
(bound-identifier-mapping-get nts
|
|
||||||
elem
|
|
||||||
(lambda () (list 1)))))
|
|
||||||
pat)))
|
|
||||||
pats))])
|
|
||||||
(if (new-cnt . > . (car old-list))
|
|
||||||
(cons new-cnt (cdr old-list))
|
|
||||||
old-list)))
|
|
||||||
nt-ids patss)
|
|
||||||
;; Compute set of toks that must appear at the beginning
|
|
||||||
;; for a non-terminal
|
|
||||||
(nt-fixpoint
|
|
||||||
nts
|
|
||||||
(lambda (nt pats old-list)
|
|
||||||
(let ([new-list
|
|
||||||
(apply
|
|
||||||
append
|
|
||||||
(map (lambda (pat)
|
|
||||||
(let loop ([pat pat])
|
|
||||||
(if (pair? pat)
|
|
||||||
(let ([l (bound-identifier-mapping-get
|
|
||||||
nts
|
|
||||||
(car pat)
|
|
||||||
(lambda ()
|
|
||||||
(list 1 (map-token toks (car pat)))))])
|
|
||||||
;; If the non-terminal can match 0 things,
|
|
||||||
;; then it might match something from the
|
|
||||||
;; next pattern element. Otherwise, it must
|
|
||||||
;; match the first element:
|
|
||||||
(if (zero? (car l))
|
|
||||||
(append (cdr l) (loop (cdr pat)))
|
|
||||||
(cdr l)))
|
|
||||||
null)))
|
|
||||||
pats))])
|
|
||||||
(let ([new (filter (lambda (id)
|
|
||||||
(andmap (lambda (id2)
|
|
||||||
(not (eq? id id2)))
|
|
||||||
(cdr old-list)))
|
|
||||||
new-list)])
|
|
||||||
(if (pair? new)
|
|
||||||
;; Drop dups in new list:
|
|
||||||
(let ([new (let loop ([new new])
|
|
||||||
(if (null? (cdr new))
|
|
||||||
new
|
|
||||||
(if (ormap (lambda (id)
|
|
||||||
(eq? (car new) id))
|
|
||||||
(cdr new))
|
|
||||||
(loop (cdr new))
|
|
||||||
(cons (car new) (loop (cdr new))))))])
|
|
||||||
(cons (car old-list) (append new (cdr old-list))))
|
|
||||||
old-list))))
|
|
||||||
nt-ids patss)
|
|
||||||
;; Determine left-recursive clauses:
|
|
||||||
(for-each (lambda (nt pats)
|
|
||||||
(let ([l (bound-identifier-mapping-get nts nt)])
|
|
||||||
(bound-identifier-mapping-put! nts nt (list (car l)
|
|
||||||
(cdr l)
|
|
||||||
(map (lambda (x) #f) pats)))))
|
|
||||||
nt-ids patss)
|
|
||||||
(nt-fixpoint
|
|
||||||
nts
|
|
||||||
(lambda (nt pats old-list)
|
|
||||||
(list (car old-list)
|
|
||||||
(cadr old-list)
|
|
||||||
(map (lambda (pat simple?)
|
|
||||||
(or simple?
|
|
||||||
(let ([l (map (lambda (elem)
|
|
||||||
(bound-identifier-mapping-get
|
|
||||||
nts
|
|
||||||
elem
|
|
||||||
(lambda () #f)))
|
|
||||||
pat)])
|
|
||||||
(andmap (lambda (i)
|
|
||||||
(or (not i)
|
|
||||||
(andmap values (caddr i))))
|
|
||||||
l))))
|
|
||||||
pats (caddr old-list))))
|
|
||||||
nt-ids patss)
|
|
||||||
;; Build a definition for each non-term:
|
|
||||||
(loop (cdr clauses)
|
|
||||||
cfg-start
|
|
||||||
(map (lambda (nt pats handles $ctxs)
|
|
||||||
(define info (bound-identifier-mapping-get nts nt))
|
|
||||||
(list nt
|
|
||||||
#`(let ([key (gensym '#,nt)])
|
|
||||||
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
(parse-nt/share
|
|
||||||
key #,(car info) '#,(cadr info) stream last-consumed-token depth end
|
|
||||||
max-depth tasks
|
|
||||||
success-k fail-k
|
|
||||||
(lambda (end max-depth tasks success-k fail-k)
|
|
||||||
#,(let loop ([pats pats]
|
|
||||||
[handles (syntax->list handles)]
|
|
||||||
[$ctxs (syntax->list $ctxs)]
|
|
||||||
[simple?s (caddr info)])
|
|
||||||
(if (null? pats)
|
|
||||||
#'(fail-k max-depth tasks)
|
|
||||||
#`(#,(if (or (null? (cdr pats))
|
|
||||||
(car simple?s))
|
|
||||||
#'parse-or
|
|
||||||
#'parse-parallel-or)
|
|
||||||
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
#,(build-match nts
|
|
||||||
toks
|
|
||||||
(car pats)
|
|
||||||
(car handles)
|
|
||||||
(car $ctxs)))
|
|
||||||
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
#,(loop (cdr pats)
|
|
||||||
(cdr handles)
|
|
||||||
(cdr $ctxs)
|
|
||||||
(cdr simple?s)))
|
|
||||||
stream last-consumed-token depth end success-k fail-k max-depth tasks)))))))))
|
|
||||||
nt-ids
|
|
||||||
patss
|
|
||||||
(syntax->list #'(((begin handle0 handle ...) ...) ...))
|
|
||||||
(syntax->list #'((handle0 ...) ...)))
|
|
||||||
cfg-error
|
|
||||||
src-pos?
|
|
||||||
(list*
|
|
||||||
(with-syntax ([((tok tok-id . $e) ...)
|
|
||||||
(token-identifier-mapping-map toks
|
|
||||||
(lambda (k v)
|
|
||||||
(list* k
|
|
||||||
(car v)
|
|
||||||
(if (cdr v)
|
|
||||||
#f
|
|
||||||
'$1))))]
|
|
||||||
[(pos ...)
|
|
||||||
(if src-pos?
|
|
||||||
#'($1-start-pos $1-end-pos)
|
|
||||||
#'(#f #f))])
|
|
||||||
#`(grammar (start [() null]
|
|
||||||
[(atok start) (cons $1 $2)])
|
|
||||||
(atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...)))
|
|
||||||
#`(start start)
|
|
||||||
parser-clauses)))]
|
|
||||||
[(grammar . _)
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"bad grammar clause"
|
|
||||||
stx
|
|
||||||
(car clauses))]
|
|
||||||
[(src-pos)
|
|
||||||
(loop (cdr clauses)
|
|
||||||
cfg-start
|
|
||||||
cfg-grammar
|
|
||||||
cfg-error
|
|
||||||
#t
|
|
||||||
(cons (car clauses) parser-clauses))]
|
|
||||||
[_else
|
|
||||||
(loop (cdr clauses)
|
|
||||||
cfg-start
|
|
||||||
cfg-grammar
|
|
||||||
cfg-error
|
|
||||||
src-pos?
|
|
||||||
(cons (car clauses) parser-clauses))]))))])
|
|
||||||
#`(let ([orig-parse (parser
|
|
||||||
[error (lambda (a b c)
|
|
||||||
(error 'cfg-parser "unexpected ~a token: ~a" b c))]
|
|
||||||
. #,parser-clauses)]
|
|
||||||
[error-proc #,cfg-error])
|
|
||||||
(letrec #,grammar
|
|
||||||
(lambda (get-tok)
|
|
||||||
(let ([tok-list (orig-parse get-tok)])
|
|
||||||
(letrec ([success-k
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next)
|
|
||||||
(if (null? stream)
|
|
||||||
val
|
|
||||||
(next success-k fail-k max-depth tasks)))]
|
|
||||||
[fail-k (lambda (max-depth tasks)
|
|
||||||
(define (call-error-proc tok-ok? tok-name tok-value start-pos end-pos)
|
|
||||||
(cond
|
|
||||||
[(procedure-arity-includes? error-proc 5)
|
|
||||||
(error-proc tok-ok? tok-name tok-value start-pos end-pos)]
|
|
||||||
[else
|
|
||||||
(error-proc tok-ok? tok-name tok-value)]))
|
|
||||||
(cond
|
|
||||||
[(null? tok-list)
|
|
||||||
(if error-proc
|
|
||||||
(call-error-proc #t
|
|
||||||
'no-tokens
|
|
||||||
#f
|
|
||||||
(make-position #f #f #f)
|
|
||||||
(make-position #f #f #f))
|
|
||||||
(error
|
|
||||||
'cfg-parse
|
|
||||||
"no tokens"))]
|
|
||||||
[else
|
|
||||||
(let ([bad-tok (list-ref tok-list
|
|
||||||
(min (sub1 (length tok-list))
|
|
||||||
max-depth))])
|
|
||||||
(if error-proc
|
|
||||||
(call-error-proc #t
|
|
||||||
(tok-orig-name bad-tok)
|
|
||||||
(tok-val bad-tok)
|
|
||||||
(tok-start bad-tok)
|
|
||||||
(tok-end bad-tok))
|
|
||||||
(error
|
|
||||||
'cfg-parse
|
|
||||||
"failed at ~a"
|
|
||||||
(tok-val bad-tok))))]))])
|
|
||||||
(#,start tok-list
|
|
||||||
;; we simulate a token at the very beginning with zero width
|
|
||||||
;; for use with the position-generating code (*-start-pos, *-end-pos).
|
|
||||||
(if (null? tok-list)
|
|
||||||
(tok #f #f #f
|
|
||||||
(position 1
|
|
||||||
#,(if src-pos? #'1 #'#f)
|
|
||||||
#,(if src-pos? #'0 #'#f))
|
|
||||||
(position 1
|
|
||||||
#,(if src-pos? #'1 #'#f)
|
|
||||||
#,(if src-pos? #'0 #'#f)))
|
|
||||||
(tok (tok-name (car tok-list))
|
|
||||||
(tok-orig-name (car tok-list))
|
|
||||||
(tok-val (car tok-list))
|
|
||||||
(tok-start (car tok-list))
|
|
||||||
(tok-start (car tok-list))))
|
|
||||||
0
|
|
||||||
(length tok-list)
|
|
||||||
success-k
|
|
||||||
fail-k
|
|
||||||
0
|
|
||||||
(make-tasks null null
|
|
||||||
(make-hasheq) (make-hasheq)
|
|
||||||
(make-hash) #t)))))))))]))
|
|
||||||
|
|
||||||
|
|
||||||
(module* test racket/base
|
|
||||||
(require (submod "..")
|
|
||||||
br-parser-tools/lex
|
|
||||||
racket/block
|
|
||||||
racket/generator
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
;; Test: parsing regular expressions.
|
|
||||||
;; Here is a test case on locations:
|
|
||||||
(block
|
|
||||||
(define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF))
|
|
||||||
(define lex (lexer-src-pos ["|" (token-OR lexeme)]
|
|
||||||
["^" (token-ANCHOR lexeme)]
|
|
||||||
["*" (token-STAR lexeme)]
|
|
||||||
[(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)]
|
|
||||||
["(" (token-LPAREN lexeme)]
|
|
||||||
[")" (token-RPAREN lexeme)]
|
|
||||||
[whitespace (return-without-pos (lex input-port))]
|
|
||||||
[(eof) (token-EOF 'eof)]))
|
|
||||||
(define -parse (cfg-parser
|
|
||||||
(tokens regexp-tokens)
|
|
||||||
(start top)
|
|
||||||
(end EOF)
|
|
||||||
(src-pos)
|
|
||||||
(grammar [top [(maybe-anchor regexp)
|
|
||||||
(cond [$1
|
|
||||||
`(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
|
|
||||||
[else
|
|
||||||
`(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]]
|
|
||||||
[maybe-anchor [(ANCHOR) #t]
|
|
||||||
[() #f]]
|
|
||||||
[regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
|
|
||||||
[(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
|
|
||||||
[(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
|
|
||||||
[(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]])))
|
|
||||||
(define (pos->sexp pos)
|
|
||||||
(position-offset pos))
|
|
||||||
|
|
||||||
(define (parse s)
|
|
||||||
(define ip (open-input-string s))
|
|
||||||
(port-count-lines! ip)
|
|
||||||
(-parse (lambda () (lex ip))))
|
|
||||||
|
|
||||||
(check-equal? (parse "abc")
|
|
||||||
'(unanchored (lit "abc" 1 4) 1 4))
|
|
||||||
(check-equal? (parse "a | (b*) | c")
|
|
||||||
'(unanchored (or (or (lit "a" 1 2)
|
|
||||||
(group (star (lit "b" 6 7) 6 8) 5 9)
|
|
||||||
1 9)
|
|
||||||
(lit "c" 12 13)
|
|
||||||
1 13)
|
|
||||||
1 13)))
|
|
||||||
|
|
||||||
|
|
||||||
;; Check that cfg-parser can accept error functions of 3 arguments:
|
|
||||||
(block
|
|
||||||
(define-tokens non-terminals (ONE ZERO EOF))
|
|
||||||
(define parse
|
|
||||||
(cfg-parser (tokens non-terminals)
|
|
||||||
(start ones)
|
|
||||||
(end EOF)
|
|
||||||
(error (lambda (tok-ok tok-name tok-val)
|
|
||||||
(error (format "~a ~a ~a" tok-ok tok-name tok-val))))
|
|
||||||
(grammar [ones [() null]
|
|
||||||
[(ONE ones) (cons $1 $2)]])))
|
|
||||||
(define (sequence->tokenizer s)
|
|
||||||
(define-values (more? next) (sequence-generate s))
|
|
||||||
(lambda ()
|
|
||||||
(cond [(more?) (next)]
|
|
||||||
[else (token-EOF 'eof)])))
|
|
||||||
(check-exn #rx"#t ZERO zero"
|
|
||||||
(lambda () (parse (sequence->tokenizer (list (token-ZERO "zero")))))))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; Check that cfg-parser can accept error functions of 5 arguments:
|
|
||||||
(block
|
|
||||||
(define-tokens non-terminals (ONE ZERO EOF))
|
|
||||||
(define parse
|
|
||||||
(cfg-parser (tokens non-terminals)
|
|
||||||
(start ones)
|
|
||||||
(src-pos)
|
|
||||||
(end EOF)
|
|
||||||
(error (lambda (tok-ok tok-name tok-val start-pos end-pos)
|
|
||||||
(error (format "~a ~a ~a ~a ~a"
|
|
||||||
tok-ok tok-name tok-val
|
|
||||||
(position-offset start-pos)
|
|
||||||
(position-offset end-pos)))))
|
|
||||||
(grammar [ones [() null]
|
|
||||||
[(ONE ones) (cons $1 $2)]])))
|
|
||||||
(define (sequence->tokenizer s)
|
|
||||||
(define-values (more? next) (sequence-generate s))
|
|
||||||
(lambda ()
|
|
||||||
(cond [(more?) (next)]
|
|
||||||
[else (position-token (token-EOF 'eof)
|
|
||||||
(position #f #f #f)
|
|
||||||
(position #f #f #f))])))
|
|
||||||
(check-exn #rx"#t ZERO zero 2 3"
|
|
||||||
(lambda ()
|
|
||||||
(parse
|
|
||||||
(sequence->tokenizer
|
|
||||||
(list (position-token
|
|
||||||
(token-ZERO "zero")
|
|
||||||
(position 2 2 5)
|
|
||||||
(position 3 2 6))))))))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; Tests used during development
|
|
||||||
(define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF))
|
|
||||||
|
|
||||||
(define lex
|
|
||||||
(lexer
|
|
||||||
["+" (token-PLUS '+)]
|
|
||||||
["-" (token-MINUS '-)]
|
|
||||||
["*" (token-STAR '*)]
|
|
||||||
["|" (token-BAR '||)]
|
|
||||||
[":" (token-COLON '|:|)]
|
|
||||||
[whitespace (lex input-port)]
|
|
||||||
[(eof) (token-EOF 'eof)]))
|
|
||||||
|
|
||||||
(define parse
|
|
||||||
(cfg-parser
|
|
||||||
(tokens non-terminals)
|
|
||||||
(start <program>)
|
|
||||||
(end EOF)
|
|
||||||
(error (lambda (a b stx)
|
|
||||||
(error 'parse "failed at ~s" stx)))
|
|
||||||
(grammar [<program> [(PLUS) "plus"]
|
|
||||||
[(<minus-program> BAR <minus-program>) (list $1 $2 $3)]
|
|
||||||
[(<program> COLON) (list $1)]]
|
|
||||||
[<minus-program> [(MINUS) "minus"]
|
|
||||||
[(<program> STAR) (cons $1 $2)]]
|
|
||||||
[<simple> [(<alts> <alts> <alts> MINUS) "yes"]]
|
|
||||||
[<alts> [(PLUS) 'plus]
|
|
||||||
[(MINUS) 'minus]]
|
|
||||||
[<random> [() '0]
|
|
||||||
[(<random> PLUS) (add1 $1)]
|
|
||||||
[(<random> PLUS) (add1 $1)]])))
|
|
||||||
|
|
||||||
(let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**"
|
|
||||||
#;"+*|+**|-" #;"-|-*|-|-*"
|
|
||||||
#;"-|-*|-|-**|-|-*|-|-**"
|
|
||||||
"-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-***
|
|
||||||
|-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|
|
|
||||||
-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****"
|
|
||||||
;; This one fails:
|
|
||||||
#;"+*")])
|
|
||||||
(check-equal? (parse (lambda () (lex p)))
|
|
||||||
'((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
|
||||||
||
|
|
||||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
|
||||||
.
|
|
||||||
*)
|
|
||||||
||
|
|
||||||
(((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
|
||||||
||
|
|
||||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
|
||||||
.
|
|
||||||
*))
|
|
||||||
.
|
|
||||||
*)
|
|
||||||
||
|
|
||||||
(((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
|
||||||
||
|
|
||||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
|
||||||
.
|
|
||||||
*)
|
|
||||||
||
|
|
||||||
(((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
|
||||||
||
|
|
||||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
|
||||||
.
|
|
||||||
*))
|
|
||||||
.
|
|
||||||
*)))))
|
|
@ -1,89 +0,0 @@
|
|||||||
#lang scheme
|
|
||||||
|
|
||||||
;; An interactive calculator inspired by the calculator example in the bison manual.
|
|
||||||
|
|
||||||
|
|
||||||
;; Import the parser and lexer generators.
|
|
||||||
(require br-parser-tools/yacc
|
|
||||||
br-parser-tools/lex
|
|
||||||
(prefix-in : br-parser-tools/lex-sre))
|
|
||||||
|
|
||||||
(define-tokens value-tokens (NUM VAR FNCT))
|
|
||||||
(define-empty-tokens op-tokens (newline = OP CP + - * / ^ EOF NEG))
|
|
||||||
|
|
||||||
;; A hash table to store variable values in for the calculator
|
|
||||||
(define vars (make-hash))
|
|
||||||
|
|
||||||
(define-lex-abbrevs
|
|
||||||
(lower-letter (:/ "a" "z"))
|
|
||||||
|
|
||||||
(upper-letter (:/ #\A #\Z))
|
|
||||||
|
|
||||||
;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too.
|
|
||||||
(digit (:/ "0" "9")))
|
|
||||||
|
|
||||||
(define calcl
|
|
||||||
(lexer
|
|
||||||
[(eof) 'EOF]
|
|
||||||
;; recursively call the lexer on the remaining input after a tab or space. Returning the
|
|
||||||
;; result of that operation. This effectively skips all whitespace.
|
|
||||||
[(:or #\tab #\space) (calcl input-port)]
|
|
||||||
;; (token-newline) returns 'newline
|
|
||||||
[#\newline (token-newline)]
|
|
||||||
;; Since (token-=) returns '=, just return the symbol directly
|
|
||||||
[(:or "=" "+" "-" "*" "/" "^") (string->symbol lexeme)]
|
|
||||||
["(" 'OP]
|
|
||||||
[")" 'CP]
|
|
||||||
["sin" (token-FNCT sin)]
|
|
||||||
[(:+ (:or lower-letter upper-letter)) (token-VAR (string->symbol lexeme))]
|
|
||||||
[(:+ digit) (token-NUM (string->number lexeme))]
|
|
||||||
[(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))]))
|
|
||||||
|
|
||||||
|
|
||||||
(define calcp
|
|
||||||
(parser
|
|
||||||
|
|
||||||
(start start)
|
|
||||||
(end newline EOF)
|
|
||||||
(tokens value-tokens op-tokens)
|
|
||||||
(error (lambda (a b c) (void)))
|
|
||||||
|
|
||||||
(precs (right =)
|
|
||||||
(left - +)
|
|
||||||
(left * /)
|
|
||||||
(left NEG)
|
|
||||||
(right ^))
|
|
||||||
|
|
||||||
(grammar
|
|
||||||
|
|
||||||
(start [() #f]
|
|
||||||
;; If there is an error, ignore everything before the error
|
|
||||||
;; and try to start over right after the error
|
|
||||||
[(error start) $2]
|
|
||||||
[(exp) $1])
|
|
||||||
|
|
||||||
(exp [(NUM) $1]
|
|
||||||
[(VAR) (hash-ref vars $1 (lambda () 0))]
|
|
||||||
[(VAR = exp) (begin (hash-set! vars $1 $3)
|
|
||||||
$3)]
|
|
||||||
[(FNCT OP exp CP) ($1 $3)]
|
|
||||||
[(exp + exp) (+ $1 $3)]
|
|
||||||
[(exp - exp) (- $1 $3)]
|
|
||||||
[(exp * exp) (* $1 $3)]
|
|
||||||
[(exp / exp) (/ $1 $3)]
|
|
||||||
[(- exp) (prec NEG) (- $2)]
|
|
||||||
[(exp ^ exp) (expt $1 $3)]
|
|
||||||
[(OP exp CP) $2]))))
|
|
||||||
|
|
||||||
;; run the calculator on the given input-port
|
|
||||||
(define (calc ip)
|
|
||||||
(port-count-lines! ip)
|
|
||||||
(letrec ((one-line
|
|
||||||
(lambda ()
|
|
||||||
(let ((result (calcp (lambda () (calcl ip)))))
|
|
||||||
(when result
|
|
||||||
(printf "~a\n" result)
|
|
||||||
(one-line))))))
|
|
||||||
(one-line)))
|
|
||||||
|
|
||||||
(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3"))
|
|
@ -1,242 +0,0 @@
|
|||||||
;; This implements the equivalent of racket's read-syntax for R5RS scheme.
|
|
||||||
;; It has not been thoroughly tested. Also it will read an entire file into a
|
|
||||||
;; list of syntax objects, instead of returning one syntax object at a time
|
|
||||||
|
|
||||||
(module read mzscheme
|
|
||||||
|
|
||||||
(require br-parser-tools/lex
|
|
||||||
(prefix : br-parser-tools/lex-sre)
|
|
||||||
br-parser-tools/yacc
|
|
||||||
syntax/readerr)
|
|
||||||
|
|
||||||
(define-tokens data (DATUM))
|
|
||||||
(define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF))
|
|
||||||
|
|
||||||
(define scheme-lexer
|
|
||||||
(lexer-src-pos
|
|
||||||
|
|
||||||
;; Skip comments, without accumulating extra position information
|
|
||||||
[(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))]
|
|
||||||
|
|
||||||
["#t" (token-DATUM #t)]
|
|
||||||
["#f" (token-DATUM #f)]
|
|
||||||
[(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))]
|
|
||||||
["#\\space" (token-DATUM #\space)]
|
|
||||||
["#\\newline" (token-DATUM #\newline)]
|
|
||||||
[(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))]
|
|
||||||
[#\" (token-DATUM (list->string (get-string-token input-port)))]
|
|
||||||
[#\( 'OP]
|
|
||||||
[#\) 'CP]
|
|
||||||
[#\[ 'OP]
|
|
||||||
[#\] 'CP]
|
|
||||||
["#(" 'HASHOP]
|
|
||||||
[num2 (token-DATUM (string->number lexeme 2))]
|
|
||||||
[num8 (token-DATUM (string->number lexeme 8))]
|
|
||||||
[num10 (token-DATUM (string->number lexeme 10))]
|
|
||||||
[num16 (token-DATUM (string->number lexeme 16))]
|
|
||||||
["'" 'QUOTE]
|
|
||||||
["`" 'QUASIQUOTE]
|
|
||||||
["," 'UNQUOTE]
|
|
||||||
[",@" 'UNQUOTE-SPLICING]
|
|
||||||
["." 'DOT]
|
|
||||||
[(eof) 'EOF]))
|
|
||||||
|
|
||||||
(define get-string-token
|
|
||||||
(lexer
|
|
||||||
[(:~ #\" #\\) (cons (car (string->list lexeme))
|
|
||||||
(get-string-token input-port))]
|
|
||||||
[(:: #\\ #\\) (cons #\\ (get-string-token input-port))]
|
|
||||||
[(:: #\\ #\") (cons #\" (get-string-token input-port))]
|
|
||||||
[#\" null]))
|
|
||||||
|
|
||||||
|
|
||||||
(define-lex-abbrevs
|
|
||||||
[letter (:or (:/ "a" "z") (:/ #\A #\Z))]
|
|
||||||
[digit (:/ #\0 #\9)]
|
|
||||||
[scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)]
|
|
||||||
[initial (:or letter (char-set "!$%&*/:<=>?^_~@"))]
|
|
||||||
[subsequent (:or initial digit (char-set "+-.@"))]
|
|
||||||
[comment (:: #\; (:* (:~ #\newline)) #\newline)]
|
|
||||||
|
|
||||||
|
|
||||||
;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of
|
|
||||||
;; using regexp macros to avoid the cut and paste.
|
|
||||||
; [numR (:: prefixR complexR)]
|
|
||||||
; [complexR (:or realR
|
|
||||||
; (:: realR "@" realR)
|
|
||||||
; (:: realR "+" urealR "i")
|
|
||||||
; (:: realR "-" urealR "i")
|
|
||||||
; (:: realR "+i")
|
|
||||||
; (:: realR "-i")
|
|
||||||
; (:: "+" urealR "i")
|
|
||||||
; (:: "-" urealR "i")
|
|
||||||
; (:: "+i")
|
|
||||||
; (:: "-i"))]
|
|
||||||
; [realR (:: sign urealR)]
|
|
||||||
; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)]
|
|
||||||
; [uintegerR (:: (:+ digitR) (:* #\#))]
|
|
||||||
; [prefixR (:or (:: radixR exactness)
|
|
||||||
; (:: exactness radixR))]
|
|
||||||
|
|
||||||
[num2 (:: prefix2 complex2)]
|
|
||||||
[complex2 (:or real2
|
|
||||||
(:: real2 "@" real2)
|
|
||||||
(:: real2 "+" ureal2 "i")
|
|
||||||
(:: real2 "-" ureal2 "i")
|
|
||||||
(:: real2 "+i")
|
|
||||||
(:: real2 "-i")
|
|
||||||
(:: "+" ureal2 "i")
|
|
||||||
(:: "-" ureal2 "i")
|
|
||||||
(:: "+i")
|
|
||||||
(:: "-i"))]
|
|
||||||
[real2 (:: sign ureal2)]
|
|
||||||
[ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))]
|
|
||||||
[uinteger2 (:: (:+ digit2) (:* #\#))]
|
|
||||||
[prefix2 (:or (:: radix2 exactness)
|
|
||||||
(:: exactness radix2))]
|
|
||||||
[radix2 "#b"]
|
|
||||||
[digit2 (:or "0" "1")]
|
|
||||||
[num8 (:: prefix8 complex8)]
|
|
||||||
[complex8 (:or real8
|
|
||||||
(:: real8 "@" real8)
|
|
||||||
(:: real8 "+" ureal8 "i")
|
|
||||||
(:: real8 "-" ureal8 "i")
|
|
||||||
(:: real8 "+i")
|
|
||||||
(:: real8 "-i")
|
|
||||||
(:: "+" ureal8 "i")
|
|
||||||
(:: "-" ureal8 "i")
|
|
||||||
(:: "+i")
|
|
||||||
(:: "-i"))]
|
|
||||||
[real8 (:: sign ureal8)]
|
|
||||||
[ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))]
|
|
||||||
[uinteger8 (:: (:+ digit8) (:* #\#))]
|
|
||||||
[prefix8 (:or (:: radix8 exactness)
|
|
||||||
(:: exactness radix8))]
|
|
||||||
[radix8 "#o"]
|
|
||||||
[digit8 (:/ "0" "7")]
|
|
||||||
|
|
||||||
[num10 (:: prefix10 complex10)]
|
|
||||||
[complex10 (:or real10
|
|
||||||
(:: real10 "@" real10)
|
|
||||||
(:: real10 "+" ureal10 "i")
|
|
||||||
(:: real10 "-" ureal10 "i")
|
|
||||||
(:: real10 "+i")
|
|
||||||
(:: real10 "-i")
|
|
||||||
(:: "+" ureal10 "i")
|
|
||||||
(:: "-" ureal10 "i")
|
|
||||||
(:: "+i")
|
|
||||||
(:: "-i"))]
|
|
||||||
[real10 (:: sign ureal10)]
|
|
||||||
[ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)]
|
|
||||||
[uinteger10 (:: (:+ digit10) (:* #\#))]
|
|
||||||
[prefix10 (:or (:: radix10 exactness)
|
|
||||||
(:: exactness radix10))]
|
|
||||||
[radix10 (:? "#d")]
|
|
||||||
[digit10 digit]
|
|
||||||
[decimal10 (:or (:: uinteger10 suffix)
|
|
||||||
(:: #\. (:+ digit10) (:* #\#) suffix)
|
|
||||||
(:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix)
|
|
||||||
(:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))]
|
|
||||||
|
|
||||||
[num16 (:: prefix16 complex16)]
|
|
||||||
[complex16 (:or real16
|
|
||||||
(:: real16 "@" real16)
|
|
||||||
(:: real16 "+" ureal16 "i")
|
|
||||||
(:: real16 "-" ureal16 "i")
|
|
||||||
(:: real16 "+i")
|
|
||||||
(:: real16 "-i")
|
|
||||||
(:: "+" ureal16 "i")
|
|
||||||
(:: "-" ureal16 "i")
|
|
||||||
"+i"
|
|
||||||
"-i")]
|
|
||||||
[real16 (:: sign ureal16)]
|
|
||||||
[ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))]
|
|
||||||
[uinteger16 (:: (:+ digit16) (:* #\#))]
|
|
||||||
[prefix16 (:or (:: radix16 exactness)
|
|
||||||
(:: exactness radix16))]
|
|
||||||
[radix16 "#x"]
|
|
||||||
[digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))]
|
|
||||||
|
|
||||||
|
|
||||||
[suffix (:or "" (:: exponent-marker sign (:+ digit10)))]
|
|
||||||
[exponent-marker (:or "e" "s" "f" "d" "l")]
|
|
||||||
[sign (:or "" "+" "-")]
|
|
||||||
[exactness (:or "" "#i" "#e")])
|
|
||||||
|
|
||||||
|
|
||||||
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
|
|
||||||
|
|
||||||
;; A macro to build the syntax object
|
|
||||||
(define-syntax (build-so stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
((_ value start end)
|
|
||||||
(with-syntax ((start-pos (datum->syntax-object
|
|
||||||
(syntax end)
|
|
||||||
(string->symbol
|
|
||||||
(format "$~a-start-pos"
|
|
||||||
(syntax-object->datum (syntax start))))))
|
|
||||||
(end-pos (datum->syntax-object
|
|
||||||
(syntax end)
|
|
||||||
(string->symbol
|
|
||||||
(format "$~a-end-pos"
|
|
||||||
(syntax-object->datum (syntax end))))))
|
|
||||||
(source (datum->syntax-object
|
|
||||||
(syntax end)
|
|
||||||
'source-name)))
|
|
||||||
(syntax
|
|
||||||
(datum->syntax-object
|
|
||||||
#f
|
|
||||||
value
|
|
||||||
(list source
|
|
||||||
(position-line start-pos)
|
|
||||||
(position-col start-pos)
|
|
||||||
(position-offset start-pos)
|
|
||||||
(- (position-offset end-pos)
|
|
||||||
(position-offset start-pos)))
|
|
||||||
stx-for-original-property))))))
|
|
||||||
|
|
||||||
(define (scheme-parser source-name)
|
|
||||||
(parser
|
|
||||||
(src-pos)
|
|
||||||
|
|
||||||
(start s)
|
|
||||||
(end EOF)
|
|
||||||
(error (lambda (a name val start end)
|
|
||||||
(raise-read-error
|
|
||||||
"read-error"
|
|
||||||
source-name
|
|
||||||
(position-line start)
|
|
||||||
(position-col start)
|
|
||||||
(position-offset start)
|
|
||||||
(- (position-offset end)
|
|
||||||
(position-offset start)))))
|
|
||||||
(tokens data delim)
|
|
||||||
|
|
||||||
|
|
||||||
(grammar
|
|
||||||
|
|
||||||
(s [(sexp-list) (reverse $1)])
|
|
||||||
|
|
||||||
(sexp [(DATUM) (build-so $1 1 1)]
|
|
||||||
[(OP sexp-list CP) (build-so (reverse $2) 1 3)]
|
|
||||||
[(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)]
|
|
||||||
[(QUOTE sexp) (build-so (list 'quote $2) 1 2)]
|
|
||||||
[(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)]
|
|
||||||
[(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)]
|
|
||||||
[(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)]
|
|
||||||
[(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)])
|
|
||||||
|
|
||||||
(sexp-list [() null]
|
|
||||||
[(sexp-list sexp) (cons $2 $1)]))))
|
|
||||||
|
|
||||||
(define (rs sn ip)
|
|
||||||
(port-count-lines! ip)
|
|
||||||
((scheme-parser sn) (lambda () (scheme-lexer ip))))
|
|
||||||
|
|
||||||
(define readsyntax
|
|
||||||
(case-lambda ((sn) (rs sn (current-input-port)))
|
|
||||||
((sn ip) (rs sn ip))))
|
|
||||||
|
|
||||||
(provide (rename readsyntax read-syntax))
|
|
||||||
|
|
||||||
)
|
|
@ -1,3 +0,0 @@
|
|||||||
#lang info
|
|
||||||
|
|
||||||
(define compile-omit-paths '("private-lex/error-tests.rkt"))
|
|
@ -1,24 +0,0 @@
|
|||||||
(module lex-plt-v200 mzscheme
|
|
||||||
(require br-parser-tools/lex
|
|
||||||
(prefix : br-parser-tools/lex-sre))
|
|
||||||
|
|
||||||
(provide epsilon
|
|
||||||
~
|
|
||||||
(rename :* *)
|
|
||||||
(rename :+ +)
|
|
||||||
(rename :? ?)
|
|
||||||
(rename :or :)
|
|
||||||
(rename :& &)
|
|
||||||
(rename :: @)
|
|
||||||
(rename :~ ^)
|
|
||||||
(rename :/ -))
|
|
||||||
|
|
||||||
(define-lex-trans epsilon
|
|
||||||
(syntax-rules ()
|
|
||||||
((_) "")))
|
|
||||||
|
|
||||||
(define-lex-trans ~
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ re) (complement re)))))
|
|
||||||
|
|
||||||
|
|
@ -1,119 +0,0 @@
|
|||||||
(module lex-sre mzscheme
|
|
||||||
(require br-parser-tools/lex)
|
|
||||||
|
|
||||||
(provide (rename sre-* *)
|
|
||||||
(rename sre-+ +)
|
|
||||||
?
|
|
||||||
(rename sre-= =)
|
|
||||||
(rename sre->= >=)
|
|
||||||
**
|
|
||||||
(rename sre-or or)
|
|
||||||
:
|
|
||||||
seq
|
|
||||||
&
|
|
||||||
~
|
|
||||||
(rename sre-- -)
|
|
||||||
(rename sre-/ /)
|
|
||||||
/-only-chars)
|
|
||||||
|
|
||||||
(define-lex-trans sre-*
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ re ...)
|
|
||||||
(repetition 0 +inf.0 (union re ...)))))
|
|
||||||
|
|
||||||
(define-lex-trans sre-+
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ re ...)
|
|
||||||
(repetition 1 +inf.0 (union re ...)))))
|
|
||||||
|
|
||||||
(define-lex-trans ?
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ re ...)
|
|
||||||
(repetition 0 1 (union re ...)))))
|
|
||||||
|
|
||||||
(define-lex-trans sre-=
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ n re ...)
|
|
||||||
(repetition n n (union re ...)))))
|
|
||||||
|
|
||||||
(define-lex-trans sre->=
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ n re ...)
|
|
||||||
(repetition n +inf.0 (union re ...)))))
|
|
||||||
|
|
||||||
(define-lex-trans **
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ low #f re ...)
|
|
||||||
(** low +inf.0 re ...))
|
|
||||||
((_ low high re ...)
|
|
||||||
(repetition low high (union re ...)))))
|
|
||||||
|
|
||||||
(define-lex-trans sre-or
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ re ...)
|
|
||||||
(union re ...))))
|
|
||||||
|
|
||||||
(define-lex-trans :
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ re ...)
|
|
||||||
(concatenation re ...))))
|
|
||||||
|
|
||||||
(define-lex-trans seq
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ re ...)
|
|
||||||
(concatenation re ...))))
|
|
||||||
|
|
||||||
(define-lex-trans &
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ re ...)
|
|
||||||
(intersection re ...))))
|
|
||||||
|
|
||||||
(define-lex-trans ~
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ re ...)
|
|
||||||
(char-complement (union re ...)))))
|
|
||||||
|
|
||||||
;; set difference
|
|
||||||
(define-lex-trans (sre-- stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
((_)
|
|
||||||
(raise-syntax-error #f
|
|
||||||
"must have at least one argument"
|
|
||||||
stx))
|
|
||||||
((_ big-re re ...)
|
|
||||||
(syntax (& big-re (complement (union re ...)))))))
|
|
||||||
|
|
||||||
(define-lex-trans (sre-/ stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
((_ range ...)
|
|
||||||
(let ((chars
|
|
||||||
(apply append (map (lambda (r)
|
|
||||||
(let ((x (syntax-e r)))
|
|
||||||
(cond
|
|
||||||
((char? x) (list x))
|
|
||||||
((string? x) (string->list x))
|
|
||||||
(else
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"not a char or string"
|
|
||||||
stx
|
|
||||||
r)))))
|
|
||||||
(syntax->list (syntax (range ...)))))))
|
|
||||||
(unless (even? (length chars))
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"not given an even number of characters"
|
|
||||||
stx))
|
|
||||||
#`(/-only-chars #,@chars)))))
|
|
||||||
|
|
||||||
(define-lex-trans /-only-chars
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ c1 c2)
|
|
||||||
(char-range c1 c2))
|
|
||||||
((_ c1 c2 c ...)
|
|
||||||
(union (char-range c1 c2)
|
|
||||||
(/-only-chars c ...)))))
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
@ -1,412 +0,0 @@
|
|||||||
(module lex mzscheme
|
|
||||||
|
|
||||||
;; Provides the syntax used to create lexers and the functions needed to
|
|
||||||
;; create and use the buffer that the lexer reads from. See docs.
|
|
||||||
|
|
||||||
(require-for-syntax mzlib/list
|
|
||||||
syntax/stx
|
|
||||||
syntax/define
|
|
||||||
syntax/boundmap
|
|
||||||
"private-lex/util.rkt"
|
|
||||||
"private-lex/actions.rkt"
|
|
||||||
"private-lex/front.rkt"
|
|
||||||
"private-lex/unicode-chars.rkt")
|
|
||||||
|
|
||||||
(require mzlib/stxparam
|
|
||||||
syntax/readerr
|
|
||||||
"private-lex/token.rkt")
|
|
||||||
|
|
||||||
(provide lexer lexer-src-pos lexer-srcloc define-lex-abbrev define-lex-abbrevs define-lex-trans
|
|
||||||
|
|
||||||
;; Dealing with tokens and related structures
|
|
||||||
define-tokens define-empty-tokens token-name token-value token?
|
|
||||||
(struct position (offset line col))
|
|
||||||
(struct position-token (token start-pos end-pos))
|
|
||||||
(struct srcloc-token (token srcloc))
|
|
||||||
|
|
||||||
;; File path for highlighting errors while lexing
|
|
||||||
file-path
|
|
||||||
lexer-file-path ;; alternate name
|
|
||||||
|
|
||||||
;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4.
|
|
||||||
any-char any-string nothing alphabetic lower-case upper-case title-case
|
|
||||||
numeric symbolic punctuation graphic whitespace blank iso-control
|
|
||||||
|
|
||||||
;; A regular expression operator
|
|
||||||
char-set)
|
|
||||||
|
|
||||||
;; wrap-action: syntax-object src-pos? -> syntax-object
|
|
||||||
(define-for-syntax (wrap-action action src-loc-style)
|
|
||||||
(with-syntax ((action-stx
|
|
||||||
(cond
|
|
||||||
[(eq? src-loc-style 'lexer-src-pos)
|
|
||||||
#`(let/ec ret
|
|
||||||
(syntax-parameterize
|
|
||||||
([return-without-pos (make-rename-transformer #'ret)])
|
|
||||||
(make-position-token #,action start-pos end-pos)))]
|
|
||||||
[(eq? src-loc-style 'lexer-srcloc)
|
|
||||||
#`(let/ec ret
|
|
||||||
(syntax-parameterize
|
|
||||||
([return-without-srcloc (make-rename-transformer #'ret)])
|
|
||||||
(make-srcloc-token #,action lexeme-srcloc)))]
|
|
||||||
[else action])))
|
|
||||||
(syntax/loc action
|
|
||||||
(lambda (start-pos-p end-pos-p lexeme-p input-port-p)
|
|
||||||
(define lexeme-srcloc-p (make-srcloc (object-name input-port-p)
|
|
||||||
(position-line start-pos-p)
|
|
||||||
(position-col start-pos-p)
|
|
||||||
(position-offset start-pos-p)
|
|
||||||
(and (number? (position-offset end-pos-p))
|
|
||||||
(number? (position-offset start-pos-p))
|
|
||||||
(- (position-offset end-pos-p)
|
|
||||||
(position-offset start-pos-p)))))
|
|
||||||
(syntax-parameterize
|
|
||||||
([start-pos (make-rename-transformer #'start-pos-p)]
|
|
||||||
[end-pos (make-rename-transformer #'end-pos-p)]
|
|
||||||
[lexeme (make-rename-transformer #'lexeme-p)]
|
|
||||||
[input-port (make-rename-transformer #'input-port-p)]
|
|
||||||
[lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)])
|
|
||||||
action-stx)))))
|
|
||||||
|
|
||||||
(define-for-syntax (make-lexer-trans src-loc-style)
|
|
||||||
(lambda (stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
((_ re-act ...)
|
|
||||||
(begin
|
|
||||||
(for-each
|
|
||||||
(lambda (x)
|
|
||||||
(syntax-case x ()
|
|
||||||
((re act) (void))
|
|
||||||
(_ (raise-syntax-error #f
|
|
||||||
"not a regular expression / action pair"
|
|
||||||
stx
|
|
||||||
x))))
|
|
||||||
(syntax->list (syntax (re-act ...))))
|
|
||||||
(let* ((spec/re-act-lst
|
|
||||||
(syntax->list (syntax (re-act ...))))
|
|
||||||
(eof-act
|
|
||||||
(get-special-action spec/re-act-lst #'eof #''eof))
|
|
||||||
(spec-act
|
|
||||||
(get-special-action spec/re-act-lst #'special #'(void)))
|
|
||||||
(spec-comment-act
|
|
||||||
(get-special-action spec/re-act-lst #'special-comment #'#f))
|
|
||||||
(ids (list #'special #'special-comment #'eof))
|
|
||||||
(re-act-lst
|
|
||||||
(filter
|
|
||||||
(lambda (spec/re-act)
|
|
||||||
(syntax-case spec/re-act ()
|
|
||||||
(((special) act)
|
|
||||||
(not (ormap
|
|
||||||
(lambda (x)
|
|
||||||
(and (identifier? #'special)
|
|
||||||
(module-or-top-identifier=? (syntax special) x)))
|
|
||||||
ids)))
|
|
||||||
(_ #t)))
|
|
||||||
spec/re-act-lst))
|
|
||||||
(name-lst (map (lambda (x) (datum->syntax-object #f (gensym))) re-act-lst))
|
|
||||||
(act-lst (map (lambda (x) (stx-car (stx-cdr x))) re-act-lst))
|
|
||||||
(re-actname-lst (map (lambda (re-act name)
|
|
||||||
(list (stx-car re-act)
|
|
||||||
name))
|
|
||||||
re-act-lst
|
|
||||||
name-lst)))
|
|
||||||
(when (null? spec/re-act-lst)
|
|
||||||
(raise-syntax-error (or src-loc-style 'lexer) "expected at least one action" stx))
|
|
||||||
(let-values (((trans start action-names no-look disappeared-uses)
|
|
||||||
(build-lexer re-actname-lst)))
|
|
||||||
(when (vector-ref action-names start) ;; Start state is final
|
|
||||||
(unless (and
|
|
||||||
;; All the successor states are final
|
|
||||||
(andmap (lambda (x) (vector-ref action-names (vector-ref x 2)))
|
|
||||||
(vector->list (vector-ref trans start)))
|
|
||||||
;; Each character has a successor state
|
|
||||||
(let loop ((check 0)
|
|
||||||
(nexts (vector->list (vector-ref trans start))))
|
|
||||||
(cond
|
|
||||||
((null? nexts) #f)
|
|
||||||
(else
|
|
||||||
(let ((next (car nexts)))
|
|
||||||
(and (= (vector-ref next 0) check)
|
|
||||||
(let ((next-check (vector-ref next 1)))
|
|
||||||
(or (>= next-check max-char-num)
|
|
||||||
(loop (add1 next-check) (cdr nexts))))))))))
|
|
||||||
(eprintf "Warning: lexer at ~a can accept the empty string.\n" stx)))
|
|
||||||
(with-syntax ((start-state-stx start)
|
|
||||||
(trans-table-stx trans)
|
|
||||||
(no-lookahead-stx no-look)
|
|
||||||
((name ...) name-lst)
|
|
||||||
((act ...) (map (lambda (a)
|
|
||||||
(wrap-action a src-loc-style))
|
|
||||||
act-lst))
|
|
||||||
((act-name ...) (vector->list action-names))
|
|
||||||
(spec-act-stx
|
|
||||||
(wrap-action spec-act src-loc-style))
|
|
||||||
(has-comment-act?-stx
|
|
||||||
(if (syntax-e spec-comment-act) #t #f))
|
|
||||||
(spec-comment-act-stx
|
|
||||||
(wrap-action spec-comment-act src-loc-style))
|
|
||||||
(eof-act-stx (wrap-action eof-act src-loc-style)))
|
|
||||||
(syntax-property
|
|
||||||
(syntax/loc stx
|
|
||||||
(let ([name act] ...)
|
|
||||||
(let ([proc
|
|
||||||
(lexer-body start-state-stx
|
|
||||||
trans-table-stx
|
|
||||||
(vector act-name ...)
|
|
||||||
no-lookahead-stx
|
|
||||||
spec-act-stx
|
|
||||||
has-comment-act?-stx
|
|
||||||
spec-comment-act-stx
|
|
||||||
eof-act-stx)])
|
|
||||||
;; reverse eta to get named procedures:
|
|
||||||
(lambda (port) (proc port)))))
|
|
||||||
'disappeared-use
|
|
||||||
disappeared-uses)))))))))
|
|
||||||
|
|
||||||
(define-syntax lexer (make-lexer-trans #f))
|
|
||||||
(define-syntax lexer-src-pos (make-lexer-trans 'lexer-src-pos))
|
|
||||||
(define-syntax lexer-srcloc (make-lexer-trans 'lexer-srcloc))
|
|
||||||
|
|
||||||
(define-syntax (define-lex-abbrev stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
((_ name re)
|
|
||||||
(identifier? (syntax name))
|
|
||||||
(syntax/loc stx
|
|
||||||
(define-syntax name
|
|
||||||
(make-lex-abbrev (lambda () (quote-syntax re))))))
|
|
||||||
(_
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"form should be (define-lex-abbrev name re)"
|
|
||||||
stx))))
|
|
||||||
|
|
||||||
(define-syntax (define-lex-abbrevs stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
((_ x ...)
|
|
||||||
(with-syntax (((abbrev ...)
|
|
||||||
(map
|
|
||||||
(lambda (a)
|
|
||||||
(syntax-case a ()
|
|
||||||
((name re)
|
|
||||||
(identifier? (syntax name))
|
|
||||||
(syntax/loc a (define-lex-abbrev name re)))
|
|
||||||
(_ (raise-syntax-error
|
|
||||||
#f
|
|
||||||
"form should be (define-lex-abbrevs (name re) ...)"
|
|
||||||
stx
|
|
||||||
a))))
|
|
||||||
(syntax->list (syntax (x ...))))))
|
|
||||||
(syntax/loc stx (begin abbrev ...))))
|
|
||||||
(_
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"form should be (define-lex-abbrevs (name re) ...)"
|
|
||||||
stx))))
|
|
||||||
|
|
||||||
(define-syntax (define-lex-trans stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
((_ name-form body-form)
|
|
||||||
(let-values (((name body)
|
|
||||||
(normalize-definition (syntax (define-syntax name-form body-form)) #'lambda)))
|
|
||||||
|
|
||||||
#`(define-syntax #,name
|
|
||||||
(let ((func #,body))
|
|
||||||
(unless (procedure? func)
|
|
||||||
(raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func))
|
|
||||||
(unless (procedure-arity-includes? func 1)
|
|
||||||
(raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func))
|
|
||||||
(make-lex-trans func)))))
|
|
||||||
(_
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"form should be (define-lex-trans name transformer)"
|
|
||||||
stx))))
|
|
||||||
|
|
||||||
|
|
||||||
(define (get-next-state-helper char min max table)
|
|
||||||
(if (>= min max)
|
|
||||||
#f
|
|
||||||
(let* ((try (quotient (+ min max) 2))
|
|
||||||
(el (vector-ref table try))
|
|
||||||
(r1 (vector-ref el 0))
|
|
||||||
(r2 (vector-ref el 1)))
|
|
||||||
(cond
|
|
||||||
((and (>= char r1) (<= char r2)) (vector-ref el 2))
|
|
||||||
((< char r1) (get-next-state-helper char min try table))
|
|
||||||
(else (get-next-state-helper char (add1 try) max table))))))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(define (get-next-state char table)
|
|
||||||
(if table
|
|
||||||
(get-next-state-helper char 0 (vector-length table) table)
|
|
||||||
#f))
|
|
||||||
|
|
||||||
(define (lexer-body start-state trans-table actions no-lookahead special-action
|
|
||||||
has-special-comment-action? special-comment-action eof-action)
|
|
||||||
(letrec ((lexer
|
|
||||||
(lambda (ip)
|
|
||||||
(let ((first-pos (get-position ip))
|
|
||||||
(first-char (peek-char-or-special ip 0)))
|
|
||||||
;(printf "(peek-char-or-special port 0) = ~e\n" first-char)
|
|
||||||
(cond
|
|
||||||
((eof-object? first-char)
|
|
||||||
(do-match ip first-pos eof-action (read-char-or-special ip)))
|
|
||||||
((special-comment? first-char)
|
|
||||||
(read-char-or-special ip)
|
|
||||||
(cond
|
|
||||||
(has-special-comment-action?
|
|
||||||
(do-match ip first-pos special-comment-action #f))
|
|
||||||
(else (lexer ip))))
|
|
||||||
((not (char? first-char))
|
|
||||||
(do-match ip first-pos special-action (read-char-or-special ip)))
|
|
||||||
(else
|
|
||||||
(let lexer-loop (
|
|
||||||
;; current-state
|
|
||||||
(state start-state)
|
|
||||||
;; the character to transition on
|
|
||||||
(char first-char)
|
|
||||||
;; action for the longest match seen thus far
|
|
||||||
;; including a match at the current state
|
|
||||||
(longest-match-action
|
|
||||||
(vector-ref actions start-state))
|
|
||||||
;; how many bytes precede char
|
|
||||||
(length-bytes 0)
|
|
||||||
;; how many characters have been read
|
|
||||||
;; including the one just read
|
|
||||||
(length-chars 1)
|
|
||||||
;; how many characters are in the longest match
|
|
||||||
(longest-match-length 0))
|
|
||||||
(let ((next-state
|
|
||||||
(cond
|
|
||||||
((not (char? char)) #f)
|
|
||||||
(else (get-next-state (char->integer char)
|
|
||||||
(vector-ref trans-table state))))))
|
|
||||||
(cond
|
|
||||||
((not next-state)
|
|
||||||
(check-match ip first-pos longest-match-length
|
|
||||||
length-chars longest-match-action))
|
|
||||||
((vector-ref no-lookahead next-state)
|
|
||||||
(let ((act (vector-ref actions next-state)))
|
|
||||||
(check-match ip
|
|
||||||
first-pos
|
|
||||||
(if act length-chars longest-match-length)
|
|
||||||
length-chars
|
|
||||||
(if act act longest-match-action))))
|
|
||||||
(else
|
|
||||||
(let* ((act (vector-ref actions next-state))
|
|
||||||
(next-length-bytes (+ (char-utf-8-length char) length-bytes))
|
|
||||||
(next-char (peek-char-or-special ip next-length-bytes)))
|
|
||||||
#;(printf "(peek-char-or-special port ~e) = ~e\n"
|
|
||||||
next-length-bytes next-char)
|
|
||||||
(lexer-loop next-state
|
|
||||||
next-char
|
|
||||||
(if act
|
|
||||||
act
|
|
||||||
longest-match-action)
|
|
||||||
next-length-bytes
|
|
||||||
(add1 length-chars)
|
|
||||||
(if act
|
|
||||||
length-chars
|
|
||||||
longest-match-length)))))))))))))
|
|
||||||
(lambda (ip)
|
|
||||||
(unless (input-port? ip)
|
|
||||||
(raise-argument-error
|
|
||||||
'lexer
|
|
||||||
"input-port?"
|
|
||||||
0
|
|
||||||
ip))
|
|
||||||
(lexer ip))))
|
|
||||||
|
|
||||||
(define (check-match lb first-pos longest-match-length length longest-match-action)
|
|
||||||
(unless longest-match-action
|
|
||||||
(let* ((match (read-string length lb))
|
|
||||||
(end-pos (get-position lb)))
|
|
||||||
(raise-read-error
|
|
||||||
(format "lexer: No match found in input starting with: ~a" match)
|
|
||||||
(file-path)
|
|
||||||
(position-line first-pos)
|
|
||||||
(position-col first-pos)
|
|
||||||
(position-offset first-pos)
|
|
||||||
(- (position-offset end-pos) (position-offset first-pos)))))
|
|
||||||
(let ((match (read-string longest-match-length lb)))
|
|
||||||
;(printf "(read-string ~e port) = ~e\n" longest-match-length match)
|
|
||||||
(do-match lb first-pos longest-match-action match)))
|
|
||||||
|
|
||||||
(define file-path (make-parameter #f))
|
|
||||||
(define lexer-file-path file-path)
|
|
||||||
|
|
||||||
(define (do-match ip first-pos action value)
|
|
||||||
#;(printf "(action ~a ~a ~a ~a)\n"
|
|
||||||
(position-offset first-pos) (position-offset (get-position ip)) value ip)
|
|
||||||
(action first-pos (get-position ip) value ip))
|
|
||||||
|
|
||||||
(define (get-position ip)
|
|
||||||
(let-values (((line col off) (port-next-location ip)))
|
|
||||||
(make-position off line col)))
|
|
||||||
|
|
||||||
(define-syntax (create-unicode-abbrevs stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
((_ ctxt)
|
|
||||||
(with-syntax (((ranges ...) (map (lambda (range)
|
|
||||||
`(union ,@(map (lambda (x)
|
|
||||||
`(char-range ,(integer->char (car x))
|
|
||||||
,(integer->char (cdr x))))
|
|
||||||
range)))
|
|
||||||
(list (force alphabetic-ranges)
|
|
||||||
(force lower-case-ranges)
|
|
||||||
(force upper-case-ranges)
|
|
||||||
(force title-case-ranges)
|
|
||||||
(force numeric-ranges)
|
|
||||||
(force symbolic-ranges)
|
|
||||||
(force punctuation-ranges)
|
|
||||||
(force graphic-ranges)
|
|
||||||
(force whitespace-ranges)
|
|
||||||
(force blank-ranges)
|
|
||||||
(force iso-control-ranges))))
|
|
||||||
((names ...) (map (lambda (sym)
|
|
||||||
(datum->syntax-object (syntax ctxt) sym #f))
|
|
||||||
'(alphabetic
|
|
||||||
lower-case
|
|
||||||
upper-case
|
|
||||||
title-case
|
|
||||||
numeric
|
|
||||||
symbolic
|
|
||||||
punctuation
|
|
||||||
graphic
|
|
||||||
whitespace
|
|
||||||
blank
|
|
||||||
iso-control))))
|
|
||||||
(syntax (define-lex-abbrevs (names ranges) ...))))))
|
|
||||||
|
|
||||||
(define-lex-abbrev any-char (char-complement (union)))
|
|
||||||
(define-lex-abbrev any-string (intersection))
|
|
||||||
(define-lex-abbrev nothing (union))
|
|
||||||
(create-unicode-abbrevs #'here)
|
|
||||||
|
|
||||||
(define-lex-trans (char-set stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
((_ str)
|
|
||||||
(string? (syntax-e (syntax str)))
|
|
||||||
(with-syntax (((char ...) (string->list (syntax-e (syntax str)))))
|
|
||||||
(syntax (union char ...))))))
|
|
||||||
|
|
||||||
(define-syntax provide-lex-keyword
|
|
||||||
(syntax-rules ()
|
|
||||||
[(_ id ...)
|
|
||||||
(begin
|
|
||||||
(define-syntax-parameter id
|
|
||||||
(make-set!-transformer
|
|
||||||
(lambda (stx)
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
(format "use of a lexer keyword (~a) is not in an appropriate lexer action"
|
|
||||||
'id)
|
|
||||||
stx))))
|
|
||||||
...
|
|
||||||
(provide id ...))]))
|
|
||||||
|
|
||||||
(provide-lex-keyword start-pos end-pos lexeme lexeme-srcloc input-port return-without-pos return-without-srcloc)
|
|
||||||
|
|
||||||
)
|
|
@ -1,16 +0,0 @@
|
|||||||
#lang scheme/base
|
|
||||||
|
|
||||||
(provide (all-defined-out))
|
|
||||||
(require syntax/stx)
|
|
||||||
|
|
||||||
;; get-special-action: (syntax-object list) syntax-object syntax-object -> syntax-object
|
|
||||||
;; Returns the first action from a rule of the form ((which-special) action)
|
|
||||||
(define (get-special-action rules which-special none)
|
|
||||||
(cond
|
|
||||||
((null? rules) none)
|
|
||||||
(else
|
|
||||||
(syntax-case (car rules) ()
|
|
||||||
(((special) act)
|
|
||||||
(and (identifier? #'special) (module-or-top-identifier=? (syntax special) which-special))
|
|
||||||
(syntax act))
|
|
||||||
(_ (get-special-action (cdr rules) which-special none))))))
|
|
@ -1,339 +0,0 @@
|
|||||||
(module deriv mzscheme
|
|
||||||
|
|
||||||
(require mzlib/list
|
|
||||||
(prefix is: mzlib/integer-set)
|
|
||||||
"re.rkt"
|
|
||||||
"util.rkt")
|
|
||||||
|
|
||||||
(provide build-dfa print-dfa (struct dfa (num-states start-state final-states/actions transitions)))
|
|
||||||
|
|
||||||
(define e (build-epsilon))
|
|
||||||
(define z (build-zero))
|
|
||||||
|
|
||||||
|
|
||||||
;; Don't do anything with this one but extract the chars
|
|
||||||
(define all-chars (->re `(char-complement (union)) (make-cache)))
|
|
||||||
|
|
||||||
;; get-char-groups : re bool -> (list-of char-setR?)
|
|
||||||
;; Collects the char-setRs in r that could be used in
|
|
||||||
;; taking the derivative of r.
|
|
||||||
(define (get-char-groups r found-negation)
|
|
||||||
(cond
|
|
||||||
((or (eq? r e) (eq? r z)) null)
|
|
||||||
((char-setR? r) (list r))
|
|
||||||
((concatR? r)
|
|
||||||
(if (re-nullable? (concatR-re1 r))
|
|
||||||
(append (get-char-groups (concatR-re1 r) found-negation)
|
|
||||||
(get-char-groups (concatR-re2 r) found-negation))
|
|
||||||
(get-char-groups (concatR-re1 r) found-negation)))
|
|
||||||
((repeatR? r)
|
|
||||||
(get-char-groups (repeatR-re r) found-negation))
|
|
||||||
((orR? r)
|
|
||||||
(apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r))))
|
|
||||||
((andR? r)
|
|
||||||
(apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r))))
|
|
||||||
((negR? r)
|
|
||||||
(if found-negation
|
|
||||||
(get-char-groups (negR-re r) #t)
|
|
||||||
(cons all-chars (get-char-groups (negR-re r) #t))))))
|
|
||||||
|
|
||||||
(test-block ((c (make-cache))
|
|
||||||
(r1 (->re #\1 c))
|
|
||||||
(r2 (->re #\2 c)))
|
|
||||||
((get-char-groups e #f) null)
|
|
||||||
((get-char-groups z #f) null)
|
|
||||||
((get-char-groups r1 #f) (list r1))
|
|
||||||
((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f)
|
|
||||||
(list r1))
|
|
||||||
((get-char-groups (->re `(concatenation ,e ,r2) c) #f)
|
|
||||||
(list r2))
|
|
||||||
((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f)
|
|
||||||
(list r1 r2))
|
|
||||||
((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f)
|
|
||||||
(list r1))
|
|
||||||
((get-char-groups
|
|
||||||
(->re `(union (repetition 0 +inf.0 ,r1)
|
|
||||||
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
|
||||||
(list r1 r2 (->re "3" c) (->re "4" c)))
|
|
||||||
((get-char-groups (->re `(complement ,r1) c) #f)
|
|
||||||
(list all-chars r1))
|
|
||||||
((get-char-groups
|
|
||||||
(->re `(intersection (repetition 0 +inf.0 ,r1)
|
|
||||||
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
|
||||||
(list r1 r2 (->re "3" c) (->re "4" c)))
|
|
||||||
)
|
|
||||||
(define loc:member? is:member?)
|
|
||||||
|
|
||||||
;; deriveR : re char cache -> re
|
|
||||||
(define (deriveR r c cache)
|
|
||||||
(cond
|
|
||||||
((or (eq? r e) (eq? r z)) z)
|
|
||||||
((char-setR? r)
|
|
||||||
(if (loc:member? c (char-setR-chars r)) e z))
|
|
||||||
((concatR? r)
|
|
||||||
(let* ((r1 (concatR-re1 r))
|
|
||||||
(r2 (concatR-re2 r))
|
|
||||||
(d (build-concat (deriveR r1 c cache) r2 cache)))
|
|
||||||
(if (re-nullable? r1)
|
|
||||||
(build-or (list d (deriveR r2 c cache)) cache)
|
|
||||||
d)))
|
|
||||||
((repeatR? r)
|
|
||||||
(build-concat (deriveR (repeatR-re r) c cache)
|
|
||||||
(build-repeat (sub1 (repeatR-low r))
|
|
||||||
(sub1 (repeatR-high r))
|
|
||||||
(repeatR-re r) cache)
|
|
||||||
cache))
|
|
||||||
((orR? r)
|
|
||||||
(build-or (map (lambda (x) (deriveR x c cache))
|
|
||||||
(orR-res r))
|
|
||||||
cache))
|
|
||||||
((andR? r)
|
|
||||||
(build-and (map (lambda (x) (deriveR x c cache))
|
|
||||||
(andR-res r))
|
|
||||||
cache))
|
|
||||||
((negR? r)
|
|
||||||
(build-neg (deriveR (negR-re r) c cache) cache))))
|
|
||||||
|
|
||||||
(test-block ((c (make-cache))
|
|
||||||
(a (char->integer #\a))
|
|
||||||
(b (char->integer #\b))
|
|
||||||
(r1 (->re #\a c))
|
|
||||||
(r2 (->re `(repetition 0 +inf.0 #\a) c))
|
|
||||||
(r3 (->re `(repetition 0 +inf.0 ,r2) c))
|
|
||||||
(r4 (->re `(concatenation #\a ,r2) c))
|
|
||||||
(r5 (->re `(repetition 0 +inf.0 ,r4) c))
|
|
||||||
(r6 (->re `(union ,r5 #\a) c))
|
|
||||||
(r7 (->re `(concatenation ,r2 ,r2) c))
|
|
||||||
(r8 (->re `(complement ,r4) c))
|
|
||||||
(r9 (->re `(intersection ,r2 ,r4) c)))
|
|
||||||
((deriveR e a c) z)
|
|
||||||
((deriveR z a c) z)
|
|
||||||
((deriveR r1 b c) z)
|
|
||||||
((deriveR r1 a c) e)
|
|
||||||
((deriveR r2 a c) r2)
|
|
||||||
((deriveR r2 b c) z)
|
|
||||||
((deriveR r3 a c) r2)
|
|
||||||
((deriveR r3 b c) z)
|
|
||||||
((deriveR r4 a c) r2)
|
|
||||||
((deriveR r4 b c) z)
|
|
||||||
((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c))
|
|
||||||
((deriveR r5 b c) z)
|
|
||||||
((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c))
|
|
||||||
((deriveR r6 b c) z)
|
|
||||||
((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c))
|
|
||||||
((deriveR r7 b c) z)
|
|
||||||
((deriveR r8 a c) (->re `(complement, r2) c))
|
|
||||||
((deriveR r8 b c) (->re `(complement ,z) c))
|
|
||||||
((deriveR r9 a c) r2)
|
|
||||||
((deriveR r9 b c) z)
|
|
||||||
((deriveR (->re `(repetition 1 2 "ab") c) a c)
|
|
||||||
(->re `(concatenation "b" (repetition 0 1 "ab")) c)))
|
|
||||||
|
|
||||||
;; An re-action is (cons re action)
|
|
||||||
|
|
||||||
;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f)
|
|
||||||
;; applies deriveR to all the re-actions's re parts.
|
|
||||||
;; Returns #f if the derived state is equivalent to z.
|
|
||||||
(define (derive r c cache)
|
|
||||||
(let ((new-r (map (lambda (ra)
|
|
||||||
(cons (deriveR (car ra) c cache) (cdr ra)))
|
|
||||||
r)))
|
|
||||||
(if (andmap (lambda (x) (eq? z (car x)))
|
|
||||||
new-r)
|
|
||||||
#f
|
|
||||||
new-r)))
|
|
||||||
|
|
||||||
(test-block ((c (make-cache))
|
|
||||||
(r1 (->re #\1 c))
|
|
||||||
(r2 (->re #\2 c)))
|
|
||||||
((derive null (char->integer #\1) c) #f)
|
|
||||||
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c)
|
|
||||||
(list (cons e 1) (cons z 2)))
|
|
||||||
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f))
|
|
||||||
|
|
||||||
|
|
||||||
;; get-final : (list-of re-action) -> (union #f syntax-object)
|
|
||||||
;; An re that accepts e represents a final state. Return the
|
|
||||||
;; action from the first final state or #f if there is none.
|
|
||||||
(define (get-final res)
|
|
||||||
(cond
|
|
||||||
((null? res) #f)
|
|
||||||
((re-nullable? (caar res)) (cdar res))
|
|
||||||
(else (get-final (cdr res)))))
|
|
||||||
|
|
||||||
(test-block ((c->i char->integer)
|
|
||||||
(c (make-cache))
|
|
||||||
(r1 (->re #\a c))
|
|
||||||
(r2 (->re #\b c))
|
|
||||||
(b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5)))
|
|
||||||
(a (list (cons r1 1) (cons r2 2))))
|
|
||||||
((derive null (c->i #\a) c) #f)
|
|
||||||
((derive a (c->i #\a) c) (list (cons e 1) (cons z 2)))
|
|
||||||
((derive a (c->i #\b) c) (list (cons z 1) (cons e 2)))
|
|
||||||
((derive a (c->i #\c) c) #f)
|
|
||||||
((derive (list (cons (->re `(union " " "\n" ",") c) 1)
|
|
||||||
(cons (->re `(concatenation (repetition 0 1 "-")
|
|
||||||
(repetition 1 +inf.0 (char-range "0" "9"))) c) 2)
|
|
||||||
(cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3)
|
|
||||||
(cons (->re "[" c) 4)
|
|
||||||
(cons (->re "]" c) 5)) (c->i #\[) c)
|
|
||||||
b)
|
|
||||||
((get-final a) #f)
|
|
||||||
((get-final (list (cons e 1) (cons e 2))) 1)
|
|
||||||
((get-final b) 4))
|
|
||||||
|
|
||||||
|
|
||||||
;; A state is (make-state (list-of re-action) nat)
|
|
||||||
(define-struct state (spec index))
|
|
||||||
|
|
||||||
;; get->key : re-action -> (list-of nat)
|
|
||||||
;; states are indexed by the list of indexes of their res
|
|
||||||
(define (get-key s)
|
|
||||||
(map (lambda (x) (re-index (car x))) s))
|
|
||||||
|
|
||||||
(define loc:partition is:partition)
|
|
||||||
|
|
||||||
;; compute-chars : (list-of state) -> (list-of char-set)
|
|
||||||
;; Computed the sets of equivalent characters for taking the
|
|
||||||
;; derivative of the car of st. Only one derivative per set need to be taken.
|
|
||||||
(define (compute-chars st)
|
|
||||||
(cond
|
|
||||||
((null? st) null)
|
|
||||||
(else
|
|
||||||
(loc:partition (map char-setR-chars
|
|
||||||
(apply append (map (lambda (x) (get-char-groups (car x) #f))
|
|
||||||
(state-spec (car st)))))))))
|
|
||||||
|
|
||||||
(test-block ((c (make-cache))
|
|
||||||
(c->i char->integer)
|
|
||||||
(r1 (->re `(char-range #\1 #\4) c))
|
|
||||||
(r2 (->re `(char-range #\2 #\3) c)))
|
|
||||||
((compute-chars null) null)
|
|
||||||
((compute-chars (list (make-state null 1))) null)
|
|
||||||
((map is:integer-set-contents
|
|
||||||
(compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2))))
|
|
||||||
(list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3)))
|
|
||||||
(is:integer-set-contents (is:union (is:make-range (c->i #\1))
|
|
||||||
(is:make-range (c->i #\4)))))))
|
|
||||||
|
|
||||||
|
|
||||||
;; A dfa is (make-dfa int int
|
|
||||||
;; (list-of (cons int syntax-object))
|
|
||||||
;; (list-of (cons int (list-of (cons char-set int)))))
|
|
||||||
;; Each transitions is a state and a list of chars with the state to transition to.
|
|
||||||
;; The finals and transitions are sorted by state number, and duplicate free.
|
|
||||||
(define-struct dfa (num-states start-state final-states/actions transitions) (make-inspector))
|
|
||||||
|
|
||||||
(define loc:get-integer is:get-integer)
|
|
||||||
|
|
||||||
;; build-dfa : (list-of re-action) cache -> dfa
|
|
||||||
(define (build-dfa rs cache)
|
|
||||||
(let* ((transitions (make-hash-table))
|
|
||||||
(get-state-number (make-counter))
|
|
||||||
(start (make-state rs (get-state-number))))
|
|
||||||
(cache (cons 'state (get-key rs)) (lambda () start))
|
|
||||||
(let loop ((old-states (list start))
|
|
||||||
(new-states null)
|
|
||||||
(all-states (list start))
|
|
||||||
(cs (compute-chars (list start))))
|
|
||||||
(cond
|
|
||||||
((and (null? old-states) (null? new-states))
|
|
||||||
(make-dfa (get-state-number) (state-index start)
|
|
||||||
(sort (filter (lambda (x) (cdr x))
|
|
||||||
(map (lambda (state)
|
|
||||||
(cons (state-index state) (get-final (state-spec state))))
|
|
||||||
all-states))
|
|
||||||
(lambda (a b) (< (car a) (car b))))
|
|
||||||
(sort (hash-table-map transitions
|
|
||||||
(lambda (state trans)
|
|
||||||
(cons (state-index state)
|
|
||||||
(map (lambda (t)
|
|
||||||
(cons (car t)
|
|
||||||
(state-index (cdr t))))
|
|
||||||
trans))))
|
|
||||||
(lambda (a b) (< (car a) (car b))))))
|
|
||||||
((null? old-states)
|
|
||||||
(loop new-states null all-states (compute-chars new-states)))
|
|
||||||
((null? cs)
|
|
||||||
(loop (cdr old-states) new-states all-states (compute-chars (cdr old-states))))
|
|
||||||
(else
|
|
||||||
(let* ((state (car old-states))
|
|
||||||
(c (car cs))
|
|
||||||
(new-re (derive (state-spec state) (loc:get-integer c) cache)))
|
|
||||||
(cond
|
|
||||||
(new-re
|
|
||||||
(let* ((new-state? #f)
|
|
||||||
(new-state (cache (cons 'state (get-key new-re))
|
|
||||||
(lambda ()
|
|
||||||
(set! new-state? #t)
|
|
||||||
(make-state new-re (get-state-number)))))
|
|
||||||
(new-all-states (if new-state? (cons new-state all-states) all-states)))
|
|
||||||
(hash-table-put! transitions
|
|
||||||
state
|
|
||||||
(cons (cons c new-state)
|
|
||||||
(hash-table-get transitions state
|
|
||||||
(lambda () null))))
|
|
||||||
(cond
|
|
||||||
(new-state?
|
|
||||||
(loop old-states (cons new-state new-states) new-all-states (cdr cs)))
|
|
||||||
(else
|
|
||||||
(loop old-states new-states new-all-states (cdr cs))))))
|
|
||||||
(else (loop old-states new-states all-states (cdr cs))))))))))
|
|
||||||
|
|
||||||
(define (print-dfa x)
|
|
||||||
(printf "number of states: ~a\n" (dfa-num-states x))
|
|
||||||
(printf "start state: ~a\n" (dfa-start-state x))
|
|
||||||
(printf "final states: ~a\n" (map car (dfa-final-states/actions x)))
|
|
||||||
(for-each (lambda (trans)
|
|
||||||
(printf "state: ~a\n" (car trans))
|
|
||||||
(for-each (lambda (rule)
|
|
||||||
(printf " -~a-> ~a\n"
|
|
||||||
(is:integer-set-contents (car rule))
|
|
||||||
(cdr rule)))
|
|
||||||
(cdr trans)))
|
|
||||||
(dfa-transitions x)))
|
|
||||||
|
|
||||||
(define (build-test-dfa rs)
|
|
||||||
(let ((c (make-cache)))
|
|
||||||
(build-dfa (map (lambda (x) (cons (->re x c) 'action))
|
|
||||||
rs)
|
|
||||||
c)))
|
|
||||||
|
|
||||||
|
|
||||||
#|
|
|
||||||
(define t1 (build-test-dfa null))
|
|
||||||
(define t2 (build-test-dfa `(#\a)))
|
|
||||||
(define t3 (build-test-dfa `(#\a #\b)))
|
|
||||||
(define t4 (build-test-dfa `((repetition 0 +inf.0 #\a)
|
|
||||||
(repetition 0 +inf.0 (concatenation #\a #\b)))))
|
|
||||||
(define t5 (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\0 #\1)) #\1))))
|
|
||||||
(define t6 (build-test-dfa `((repetition 0 +inf.0 (repetition 0 +inf.0 #\a))
|
|
||||||
(repetition 0 +inf.0 (concatenation #\b (repetition 1 +inf.0 #\b))))))
|
|
||||||
(define t7 (build-test-dfa `((concatenation (repetition 0 +inf.0 #\a) (repetition 0 +inf.0 #\b)
|
|
||||||
(repetition 0 +inf.0 #\c) (repetition 0 +inf.0 #\d)
|
|
||||||
(repetition 0 +inf.0 #\e)))))
|
|
||||||
(define t8
|
|
||||||
(build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b)
|
|
||||||
(union #\a #\b) (union #\a #\b) (union #\a #\b)))))
|
|
||||||
(define t9 (build-test-dfa `((concatenation "/*"
|
|
||||||
(complement (concatenation (intersection) "*/" (intersection)))
|
|
||||||
"*/"))))
|
|
||||||
(define t11 (build-test-dfa `((complement "1"))))
|
|
||||||
(define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b")
|
|
||||||
(concatenation "a" (repetition 0 +inf.0 "b")))
|
|
||||||
"ab"))))
|
|
||||||
(define x (build-test-dfa `((union " " "\n" ",")
|
|
||||||
(concatenation (repetition 0 1 "-") (repetition 1 +inf.0 (char-range "0" "9")))
|
|
||||||
(concatenation "-" (repetition 1 +inf.0 "-"))
|
|
||||||
"["
|
|
||||||
"]")))
|
|
||||||
(define y (build-test-dfa
|
|
||||||
`((repetition 1 +inf.0
|
|
||||||
(union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|")
|
|
||||||
(concatenation "|" (repetition 0 +inf.0 (char-complement "|"))))))))
|
|
||||||
(define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection))
|
|
||||||
(complement (union (concatenation (intersection) "01")
|
|
||||||
(repetition 1 +inf.0 "1")))))))
|
|
||||||
(define t14 (build-test-dfa `((complement "1"))))
|
|
||||||
|#
|
|
||||||
)
|
|
@ -1,81 +0,0 @@
|
|||||||
#lang scheme/base
|
|
||||||
(require (for-syntax scheme/base)
|
|
||||||
"../lex.rkt"
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(define-syntax (catch-syn-error stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
[(_ arg)
|
|
||||||
(datum->syntax
|
|
||||||
#'here
|
|
||||||
(with-handlers ((exn:fail:syntax? exn-message))
|
|
||||||
(syntax-local-expand-expression #'arg)
|
|
||||||
"not-an-error"))]))
|
|
||||||
|
|
||||||
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev)))
|
|
||||||
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev a)))
|
|
||||||
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev (a b) v)))
|
|
||||||
(check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev 1 1)))
|
|
||||||
(check-regexp-match #rx"lex-abbrevs" (catch-syn-error (define-lex-abbrevs ())))
|
|
||||||
|
|
||||||
(check-regexp-match #rx"lex-trans" (catch-syn-error (define-lex-trans)))
|
|
||||||
|
|
||||||
(check-regexp-match #rx"lexer" (catch-syn-error (lexer)))
|
|
||||||
(check-regexp-match #rx"lexer" (catch-syn-error (lexer ("a" "b" "c"))))
|
|
||||||
(check-regexp-match #rx"lexer" (catch-syn-error (lexer ())))
|
|
||||||
(check-regexp-match #rx"lexer" (catch-syn-error (lexer (""))))
|
|
||||||
|
|
||||||
(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (a 1))))
|
|
||||||
(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer ((a) 1))))
|
|
||||||
(check-regexp-match #rx"regular-expression" (catch-syn-error (let ((a 1)) (lexer ((a) 1)))))
|
|
||||||
|
|
||||||
(check-regexp-match #rx"regular-expression"
|
|
||||||
(catch-syn-error (let-syntax ((a 1))
|
|
||||||
(lexer ((a) 1)))))
|
|
||||||
|
|
||||||
(check-regexp-match #rx"define-lex-trans"
|
|
||||||
(catch-syn-error
|
|
||||||
(let ()
|
|
||||||
(define-lex-trans a 1)
|
|
||||||
(let ()
|
|
||||||
(lexer ((a) 1))))))
|
|
||||||
|
|
||||||
;; Detecting mutual recursion cycle:
|
|
||||||
(check-regexp-match #rx"regular-expression"
|
|
||||||
(catch-syn-error
|
|
||||||
(let ()
|
|
||||||
(define-lex-abbrev a b)
|
|
||||||
(define-lex-abbrev b a)
|
|
||||||
(let ()
|
|
||||||
(lexer (a 1))))))
|
|
||||||
|
|
||||||
(check-regexp-match #rx"regular-expression"
|
|
||||||
(catch-syn-error
|
|
||||||
(let ()
|
|
||||||
(define-lex-abbrev a (repetition 0 1 b))
|
|
||||||
(define-lex-abbrev b (repetition 0 1 a))
|
|
||||||
(let ()
|
|
||||||
(lexer (a 1))))))
|
|
||||||
|
|
||||||
;; Detecting cycle within same abbreviation:
|
|
||||||
(check-regexp-match #rx"regular-expression"
|
|
||||||
(catch-syn-error
|
|
||||||
(let ()
|
|
||||||
(define-lex-abbrev balanced
|
|
||||||
(union (concatenation "(" balanced ")" balanced)
|
|
||||||
any-char))
|
|
||||||
(lexer
|
|
||||||
[balanced (string-append lexeme (balanced input-port))]
|
|
||||||
[(eof) ""]))))
|
|
||||||
|
|
||||||
|
|
||||||
(check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (1 1))))
|
|
||||||
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition) 1))))
|
|
||||||
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition #\1 #\1 "3") 1))))
|
|
||||||
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 #\1 "3") 1))))
|
|
||||||
(check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 0 "3") 1))))
|
|
||||||
(check-regexp-match #rx"complement" (catch-syn-error (lexer ((complement) 1))))
|
|
||||||
(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range) 1))))
|
|
||||||
(check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range #\9 #\0) 1))))
|
|
||||||
(check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement) 1))))
|
|
||||||
(check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement (concatenation "1" "2")) 1))))
|
|
@ -1,179 +0,0 @@
|
|||||||
(module front mzscheme
|
|
||||||
(require (prefix is: mzlib/integer-set)
|
|
||||||
mzlib/list
|
|
||||||
syntax/stx
|
|
||||||
"util.rkt"
|
|
||||||
"stx.rkt"
|
|
||||||
"re.rkt"
|
|
||||||
"deriv.rkt")
|
|
||||||
|
|
||||||
(provide build-lexer)
|
|
||||||
|
|
||||||
(define-syntax time-label
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ l e ...)
|
|
||||||
(begin
|
|
||||||
(printf "~a: " l)
|
|
||||||
(time (begin e ...))))))
|
|
||||||
|
|
||||||
;; A table is either
|
|
||||||
;; - (vector-of (union #f nat))
|
|
||||||
;; - (vector-of (vector-of (vector nat nat nat)))
|
|
||||||
|
|
||||||
(define loc:integer-set-contents is:integer-set-contents)
|
|
||||||
|
|
||||||
;; dfa->1d-table : dfa -> (same as build-lexer)
|
|
||||||
(define (dfa->1d-table dfa)
|
|
||||||
(let ((state-table (make-vector (dfa-num-states dfa) #f))
|
|
||||||
(transition-cache (make-hash-table 'equal)))
|
|
||||||
(for-each
|
|
||||||
(lambda (trans)
|
|
||||||
(let* ((from-state (car trans))
|
|
||||||
(all-chars/to (cdr trans))
|
|
||||||
(flat-all-chars/to
|
|
||||||
(sort
|
|
||||||
(apply append
|
|
||||||
(map (lambda (chars/to)
|
|
||||||
(let ((char-ranges (loc:integer-set-contents (car chars/to)))
|
|
||||||
(to (cdr chars/to)))
|
|
||||||
(map (lambda (char-range)
|
|
||||||
(let ((entry (vector (car char-range) (cdr char-range) to)))
|
|
||||||
(hash-table-get transition-cache entry
|
|
||||||
(lambda ()
|
|
||||||
(hash-table-put! transition-cache
|
|
||||||
entry
|
|
||||||
entry)
|
|
||||||
entry))))
|
|
||||||
char-ranges)))
|
|
||||||
all-chars/to))
|
|
||||||
(lambda (a b)
|
|
||||||
(< (vector-ref a 0) (vector-ref b 0))))))
|
|
||||||
(vector-set! state-table from-state (list->vector flat-all-chars/to))))
|
|
||||||
(dfa-transitions dfa))
|
|
||||||
state-table))
|
|
||||||
|
|
||||||
|
|
||||||
(define loc:foldr is:foldr)
|
|
||||||
|
|
||||||
;; dfa->2d-table : dfa -> (same as build-lexer)
|
|
||||||
(define (dfa->2d-table dfa)
|
|
||||||
(let (
|
|
||||||
;; char-table : (vector-of (union #f nat))
|
|
||||||
;; The lexer table, one entry per state per char.
|
|
||||||
;; Each entry specifies a state to transition to.
|
|
||||||
;; #f indicates no transition
|
|
||||||
(char-table (make-vector (* 256 (dfa-num-states dfa)) #f)))
|
|
||||||
|
|
||||||
;; Fill the char-table vector
|
|
||||||
(for-each
|
|
||||||
(lambda (trans)
|
|
||||||
(let ((from-state (car trans)))
|
|
||||||
(for-each (lambda (chars/to)
|
|
||||||
(let ((to-state (cdr chars/to)))
|
|
||||||
(loc:foldr (lambda (char _)
|
|
||||||
(vector-set! char-table
|
|
||||||
(bitwise-ior
|
|
||||||
char
|
|
||||||
(arithmetic-shift from-state 8))
|
|
||||||
to-state))
|
|
||||||
(void)
|
|
||||||
(car chars/to))))
|
|
||||||
(cdr trans))))
|
|
||||||
(dfa-transitions dfa))
|
|
||||||
char-table))
|
|
||||||
|
|
||||||
|
|
||||||
;; dfa->actions : dfa -> (vector-of (union #f syntax-object))
|
|
||||||
;; The action for each final state, #f if the state isn't final
|
|
||||||
(define (dfa->actions dfa)
|
|
||||||
(let ((actions (make-vector (dfa-num-states dfa) #f)))
|
|
||||||
(for-each (lambda (state/action)
|
|
||||||
(vector-set! actions (car state/action) (cdr state/action)))
|
|
||||||
(dfa-final-states/actions dfa))
|
|
||||||
actions))
|
|
||||||
|
|
||||||
;; dfa->no-look : dfa -> (vector-of bool)
|
|
||||||
;; For each state whether the lexer can ignore the next input.
|
|
||||||
;; It can do this only if there are no transitions out of the
|
|
||||||
;; current state.
|
|
||||||
(define (dfa->no-look dfa)
|
|
||||||
(let ((no-look (make-vector (dfa-num-states dfa) #t)))
|
|
||||||
(for-each (lambda (trans)
|
|
||||||
(vector-set! no-look (car trans) #f))
|
|
||||||
(dfa-transitions dfa))
|
|
||||||
no-look))
|
|
||||||
|
|
||||||
(test-block ((d1 (make-dfa 1 1 (list) (list)))
|
|
||||||
(d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
|
|
||||||
(list (cons 1 (list (cons (is:make-range 49 50) 1)
|
|
||||||
(cons (is:make-range 51) 2)))
|
|
||||||
(cons 2 (list (cons (is:make-range 49) 3))))))
|
|
||||||
(d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
|
|
||||||
(list (cons 1 (list (cons (is:make-range 100 200) 0)
|
|
||||||
(cons (is:make-range 49 50) 1)
|
|
||||||
(cons (is:make-range 51) 2)))
|
|
||||||
(cons 2 (list (cons (is:make-range 49) 3)))))))
|
|
||||||
((dfa->2d-table d1) (make-vector 256 #f))
|
|
||||||
((dfa->2d-table d2) (let ((v (make-vector 1024 #f)))
|
|
||||||
(vector-set! v 305 1)
|
|
||||||
(vector-set! v 306 1)
|
|
||||||
(vector-set! v 307 2)
|
|
||||||
(vector-set! v 561 3)
|
|
||||||
v))
|
|
||||||
((dfa->1d-table d1) (make-vector 1 #f))
|
|
||||||
((dfa->1d-table d2) #(#f
|
|
||||||
#(#(49 50 1) #(51 51 2))
|
|
||||||
#(#(49 49 3))
|
|
||||||
#f))
|
|
||||||
((dfa->1d-table d3) #(#f
|
|
||||||
#(#(49 50 1) #(51 51 2) #(100 200 0))
|
|
||||||
#(#(49 49 3))
|
|
||||||
#f))
|
|
||||||
((dfa->actions d1) (vector #f))
|
|
||||||
((dfa->actions d2) (vector #f #f 2 3))
|
|
||||||
((dfa->no-look d1) (vector #t))
|
|
||||||
((dfa->no-look d2) (vector #t #f #f #t)))
|
|
||||||
|
|
||||||
;; build-lexer : syntax-object list ->
|
|
||||||
;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object))
|
|
||||||
;; each syntax object has the form (re action)
|
|
||||||
(define (build-lexer sos)
|
|
||||||
(let* ((disappeared-uses (box null))
|
|
||||||
(s-re-acts (map (lambda (so)
|
|
||||||
(cons (parse (stx-car so) disappeared-uses)
|
|
||||||
(stx-car (stx-cdr so))))
|
|
||||||
sos))
|
|
||||||
|
|
||||||
(cache (make-cache))
|
|
||||||
|
|
||||||
(re-acts (map (lambda (s-re-act)
|
|
||||||
(cons (->re (car s-re-act) cache)
|
|
||||||
(cdr s-re-act)))
|
|
||||||
s-re-acts))
|
|
||||||
|
|
||||||
(dfa (build-dfa re-acts cache))
|
|
||||||
(table (dfa->1d-table dfa)))
|
|
||||||
;(print-dfa dfa)
|
|
||||||
#;(let ((num-states (vector-length table))
|
|
||||||
(num-vectors (length (filter values (vector->list table))))
|
|
||||||
(num-entries (apply + (map
|
|
||||||
(lambda (x) (if x (vector-length x) 0))
|
|
||||||
(vector->list table))))
|
|
||||||
(num-different-entries
|
|
||||||
(let ((ht (make-hash-table)))
|
|
||||||
(for-each
|
|
||||||
(lambda (x)
|
|
||||||
(when x
|
|
||||||
(for-each
|
|
||||||
(lambda (y)
|
|
||||||
(hash-table-put! ht y #t))
|
|
||||||
(vector->list x))))
|
|
||||||
(vector->list table))
|
|
||||||
(length (hash-table-map ht cons)))))
|
|
||||||
(printf "~a states, ~aKB\n"
|
|
||||||
num-states
|
|
||||||
(/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries
|
|
||||||
(* 5 num-different-entries))) 1024)))
|
|
||||||
(values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)
|
|
||||||
(unbox disappeared-uses))))
|
|
||||||
)
|
|
@ -1,385 +0,0 @@
|
|||||||
(module re mzscheme
|
|
||||||
(require mzlib/list
|
|
||||||
scheme/match
|
|
||||||
(prefix is: mzlib/integer-set)
|
|
||||||
"util.rkt")
|
|
||||||
|
|
||||||
(provide ->re build-epsilon build-zero build-char-set build-concat
|
|
||||||
build-repeat build-or build-and build-neg
|
|
||||||
epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR?
|
|
||||||
char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high
|
|
||||||
orR-res andR-res negR-re
|
|
||||||
re-nullable? re-index)
|
|
||||||
|
|
||||||
;; get-index : -> nat
|
|
||||||
(define get-index (make-counter))
|
|
||||||
|
|
||||||
;; An re is either
|
|
||||||
;; - (make-epsilonR bool nat)
|
|
||||||
;; - (make-zeroR bool nat)
|
|
||||||
;; - (make-char-setR bool nat char-set)
|
|
||||||
;; - (make-concatR bool nat re re)
|
|
||||||
;; - (make-repeatR bool nat nat nat-or-+inf.0 re)
|
|
||||||
;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs
|
|
||||||
;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs
|
|
||||||
;; - (make-negR bool nat re)
|
|
||||||
;;
|
|
||||||
;; Every re must have an index field globally different from all
|
|
||||||
;; other re index fields.
|
|
||||||
(define-struct re (nullable? index) (make-inspector))
|
|
||||||
(define-struct (epsilonR re) () (make-inspector))
|
|
||||||
(define-struct (zeroR re) () (make-inspector))
|
|
||||||
(define-struct (char-setR re) (chars) (make-inspector))
|
|
||||||
(define-struct (concatR re) (re1 re2) (make-inspector))
|
|
||||||
(define-struct (repeatR re) (low high re) (make-inspector))
|
|
||||||
(define-struct (orR re) (res) (make-inspector))
|
|
||||||
(define-struct (andR re) (res) (make-inspector))
|
|
||||||
(define-struct (negR re) (re) (make-inspector))
|
|
||||||
|
|
||||||
;; e : re
|
|
||||||
;; The unique epsilon re
|
|
||||||
(define e (make-epsilonR #t (get-index)))
|
|
||||||
|
|
||||||
;; z : re
|
|
||||||
;; The unique zero re
|
|
||||||
(define z (make-zeroR #f (get-index)))
|
|
||||||
|
|
||||||
|
|
||||||
;; s-re = char constant
|
|
||||||
;; | string constant (sequence of characters)
|
|
||||||
;; | re a precompiled re
|
|
||||||
;; | (repetition low high s-re) repetition between low and high times (inclusive)
|
|
||||||
;; | (union s-re ...)
|
|
||||||
;; | (intersection s-re ...)
|
|
||||||
;; | (complement s-re)
|
|
||||||
;; | (concatenation s-re ...)
|
|
||||||
;; | (char-range rng rng) match any character between two (inclusive)
|
|
||||||
;; | (char-complement char-set) match any character not listed
|
|
||||||
;; low = natural-number
|
|
||||||
;; high = natural-number or +inf.0
|
|
||||||
;; rng = char or string with length 1
|
|
||||||
;; (concatenation) (repetition 0 0 x), and "" match the empty string.
|
|
||||||
;; (union) matches no strings.
|
|
||||||
;; (intersection) matches any string.
|
|
||||||
|
|
||||||
(define loc:make-range is:make-range)
|
|
||||||
(define loc:union is:union)
|
|
||||||
(define loc:split is:split)
|
|
||||||
(define loc:complement is:complement)
|
|
||||||
|
|
||||||
;; ->re : s-re cache -> re
|
|
||||||
(define (->re exp cache)
|
|
||||||
(match exp
|
|
||||||
((? char?) (build-char-set (loc:make-range (char->integer exp)) cache))
|
|
||||||
((? string?) (->re `(concatenation ,@(string->list exp)) cache))
|
|
||||||
((? re?) exp)
|
|
||||||
(`(repetition ,low ,high ,r)
|
|
||||||
(build-repeat low high (->re r cache) cache))
|
|
||||||
(`(union ,rs ...)
|
|
||||||
(build-or (flatten-res (map (lambda (r) (->re r cache)) rs)
|
|
||||||
orR? orR-res loc:union cache)
|
|
||||||
cache))
|
|
||||||
(`(intersection ,rs ...)
|
|
||||||
(build-and (flatten-res (map (lambda (r) (->re r cache)) rs)
|
|
||||||
andR? andR-res (lambda (a b)
|
|
||||||
(let-values (((i _ __) (loc:split a b))) i))
|
|
||||||
cache)
|
|
||||||
cache))
|
|
||||||
(`(complement ,r)
|
|
||||||
(build-neg (->re r cache) cache))
|
|
||||||
(`(concatenation ,rs ...)
|
|
||||||
(foldr (lambda (x y)
|
|
||||||
(build-concat (->re x cache) y cache))
|
|
||||||
e
|
|
||||||
rs))
|
|
||||||
(`(char-range ,c1 ,c2)
|
|
||||||
(let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1)))
|
|
||||||
(i2 (char->integer (if (string? c2) (string-ref c2 0) c2))))
|
|
||||||
(if (<= i1 i2)
|
|
||||||
(build-char-set (loc:make-range i1 i2) cache)
|
|
||||||
z)))
|
|
||||||
(`(char-complement ,crs ...)
|
|
||||||
(let ((cs (->re `(union ,@crs) cache)))
|
|
||||||
(cond
|
|
||||||
((zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache))
|
|
||||||
((char-setR? cs)
|
|
||||||
(build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache))
|
|
||||||
(else z))))))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re))
|
|
||||||
;; (char-set char-set -> char-set) cache -> (list-of re)
|
|
||||||
;; Takes all the char-sets in l and combines them into one char-set using the combine function.
|
|
||||||
;; Flattens out the values of type?. get-res only needs to function on things type? returns
|
|
||||||
;; true for.
|
|
||||||
(define (flatten-res l type? get-res combine cache)
|
|
||||||
(let loop ((res l)
|
|
||||||
;; chars : (union #f char-set)
|
|
||||||
(chars #f)
|
|
||||||
(no-chars null))
|
|
||||||
(cond
|
|
||||||
((null? res)
|
|
||||||
(if chars
|
|
||||||
(cons (build-char-set chars cache) no-chars)
|
|
||||||
no-chars))
|
|
||||||
((char-setR? (car res))
|
|
||||||
(if chars
|
|
||||||
(loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars)
|
|
||||||
(loop (cdr res) (char-setR-chars (car res)) no-chars)))
|
|
||||||
((type? (car res))
|
|
||||||
(loop (append (get-res (car res)) (cdr res)) chars no-chars))
|
|
||||||
(else (loop (cdr res) chars (cons (car res) no-chars))))))
|
|
||||||
|
|
||||||
;; build-epsilon : -> re
|
|
||||||
(define (build-epsilon) e)
|
|
||||||
|
|
||||||
(define (build-zero) z)
|
|
||||||
|
|
||||||
(define loc:integer-set-contents is:integer-set-contents)
|
|
||||||
|
|
||||||
;; build-char-set : char-set cache -> re
|
|
||||||
(define (build-char-set cs cache)
|
|
||||||
(let ((l (loc:integer-set-contents cs)))
|
|
||||||
(cond
|
|
||||||
((null? l) z)
|
|
||||||
(else
|
|
||||||
(cache l
|
|
||||||
(lambda ()
|
|
||||||
(make-char-setR #f (get-index) cs)))))))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; build-concat : re re cache -> re
|
|
||||||
(define (build-concat r1 r2 cache)
|
|
||||||
(cond
|
|
||||||
((eq? e r1) r2)
|
|
||||||
((eq? e r2) r1)
|
|
||||||
((or (eq? z r1) (eq? z r2)) z)
|
|
||||||
(else
|
|
||||||
(cache (cons 'concat (cons (re-index r1) (re-index r2)))
|
|
||||||
(lambda ()
|
|
||||||
(make-concatR (and (re-nullable? r1) (re-nullable? r2))
|
|
||||||
(get-index)
|
|
||||||
r1 r2))))))
|
|
||||||
|
|
||||||
;; build-repeat : nat nat-or-+inf.0 re cache -> re
|
|
||||||
(define (build-repeat low high r cache)
|
|
||||||
(let ((low (if (< low 0) 0 low)))
|
|
||||||
(cond
|
|
||||||
((eq? r e) e)
|
|
||||||
((and (= 0 low) (or (= 0 high) (eq? z r))) e)
|
|
||||||
((and (= 1 low) (= 1 high)) r)
|
|
||||||
((and (repeatR? r)
|
|
||||||
(eq? (repeatR-high r) +inf.0)
|
|
||||||
(or (= 0 (repeatR-low r))
|
|
||||||
(= 1 (repeatR-low r))))
|
|
||||||
(build-repeat (* low (repeatR-low r))
|
|
||||||
+inf.0
|
|
||||||
(repeatR-re r)
|
|
||||||
cache))
|
|
||||||
(else
|
|
||||||
(cache (cons 'repeat (cons low (cons high (re-index r))))
|
|
||||||
(lambda ()
|
|
||||||
(make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r)))))))
|
|
||||||
|
|
||||||
|
|
||||||
;; build-or : (list-of re) cache -> re
|
|
||||||
(define (build-or rs cache)
|
|
||||||
(let ((rs
|
|
||||||
(filter
|
|
||||||
(lambda (x) (not (eq? x z)))
|
|
||||||
(do-simple-equiv (replace rs orR? orR-res null) re-index))))
|
|
||||||
(cond
|
|
||||||
((null? rs) z)
|
|
||||||
((null? (cdr rs)) (car rs))
|
|
||||||
((memq (build-neg z cache) rs) (build-neg z cache))
|
|
||||||
(else
|
|
||||||
(cache (cons 'or (map re-index rs))
|
|
||||||
(lambda ()
|
|
||||||
(make-orR (ormap re-nullable? rs) (get-index) rs)))))))
|
|
||||||
|
|
||||||
;; build-and : (list-of re) cache -> re
|
|
||||||
(define (build-and rs cache)
|
|
||||||
(let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index)))
|
|
||||||
(cond
|
|
||||||
((null? rs) (build-neg z cache))
|
|
||||||
((null? (cdr rs)) (car rs))
|
|
||||||
((memq z rs) z)
|
|
||||||
(else
|
|
||||||
(cache (cons 'and (map re-index rs))
|
|
||||||
(lambda ()
|
|
||||||
(make-andR (andmap re-nullable? rs) (get-index) rs)))))))
|
|
||||||
|
|
||||||
;; build-neg : re cache -> re
|
|
||||||
(define (build-neg r cache)
|
|
||||||
(cond
|
|
||||||
((negR? r) (negR-re r))
|
|
||||||
(else
|
|
||||||
(cache (cons 'neg (re-index r))
|
|
||||||
(lambda ()
|
|
||||||
(make-negR (not (re-nullable? r)) (get-index) r))))))
|
|
||||||
|
|
||||||
;; Tests for the build-functions
|
|
||||||
(test-block ((c (make-cache))
|
|
||||||
(isc is:integer-set-contents)
|
|
||||||
(r1 (build-char-set (is:make-range (char->integer #\1)) c))
|
|
||||||
(r2 (build-char-set (is:make-range (char->integer #\2)) c))
|
|
||||||
(r3 (build-char-set (is:make-range (char->integer #\3)) c))
|
|
||||||
(rc (build-concat r1 r2 c))
|
|
||||||
(rc2 (build-concat r2 r1 c))
|
|
||||||
(rr (build-repeat 0 +inf.0 rc c))
|
|
||||||
(ro (build-or `(,rr ,rc ,rr) c))
|
|
||||||
(ro2 (build-or `(,rc ,rr ,z) c))
|
|
||||||
(ro3 (build-or `(,rr ,rc) c))
|
|
||||||
(ro4 (build-or `(,(build-or `(,r1 ,r2) c)
|
|
||||||
,(build-or `(,r2 ,r3) c)) c))
|
|
||||||
(ra (build-and `(,rr ,rc ,rr) c))
|
|
||||||
(ra2 (build-and `(,rc ,rr) c))
|
|
||||||
(ra3 (build-and `(,rr ,rc) c))
|
|
||||||
(ra4 (build-and `(,(build-and `(,r3 ,r2) c)
|
|
||||||
,(build-and `(,r2 ,r1) c)) c))
|
|
||||||
(rn (build-neg z c))
|
|
||||||
(rn2 (build-neg r1 c)))
|
|
||||||
|
|
||||||
((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1))))
|
|
||||||
((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2))))
|
|
||||||
((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3))))
|
|
||||||
((build-char-set (is:make-range) c) z)
|
|
||||||
((build-concat r1 e c) r1)
|
|
||||||
((build-concat e r1 c) r1)
|
|
||||||
((build-concat r1 z c) z)
|
|
||||||
((build-concat z r1 c) z)
|
|
||||||
((build-concat r1 r2 c) rc)
|
|
||||||
((concatR-re1 rc) r1)
|
|
||||||
((concatR-re2 rc) r2)
|
|
||||||
((concatR-re1 rc2) r2)
|
|
||||||
((concatR-re2 rc2) r1)
|
|
||||||
(ro ro2)
|
|
||||||
(ro ro3)
|
|
||||||
(ro4 (build-or `(,r1 ,r2 ,r3) c))
|
|
||||||
((orR-res ro) (list rc rr))
|
|
||||||
((orR-res ro4) (list r1 r2 r3))
|
|
||||||
((build-or null c) z)
|
|
||||||
((build-or `(,r1 ,z) c) r1)
|
|
||||||
((build-repeat 0 +inf.0 rc c) rr)
|
|
||||||
((build-repeat 0 1 z c) e)
|
|
||||||
((build-repeat 0 0 rc c) e)
|
|
||||||
((build-repeat 0 +inf.0 z c) e)
|
|
||||||
((build-repeat -1 +inf.0 z c) e)
|
|
||||||
((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c)
|
|
||||||
(build-repeat 0 +inf.0 rc c))
|
|
||||||
((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c)
|
|
||||||
(build-repeat 0 +inf.0 rc c))
|
|
||||||
((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c)
|
|
||||||
(build-repeat 20 +inf.0 rc c))
|
|
||||||
((build-repeat 1 1 rc c) rc)
|
|
||||||
((repeatR-re rr) rc)
|
|
||||||
(ra ra2)
|
|
||||||
(ra ra3)
|
|
||||||
(ra4 (build-and `(,r1 ,r2 ,r3) c))
|
|
||||||
((andR-res ra) (list rc rr))
|
|
||||||
((andR-res ra4) (list r1 r2 r3))
|
|
||||||
((build-and null c) (build-neg z c))
|
|
||||||
((build-and `(,r1 ,z) c) z)
|
|
||||||
((build-and `(,r1) c) r1)
|
|
||||||
((build-neg r1 c) (build-neg r1 c))
|
|
||||||
((build-neg (build-neg r1 c) c) r1)
|
|
||||||
((negR-re (build-neg r2 c)) r2)
|
|
||||||
((re-nullable? r1) #f)
|
|
||||||
((re-nullable? rc) #f)
|
|
||||||
((re-nullable? (build-concat rr rr c)) #t)
|
|
||||||
((re-nullable? rr) #t)
|
|
||||||
((re-nullable? (build-repeat 0 1 rc c)) #t)
|
|
||||||
((re-nullable? (build-repeat 1 2 rc c)) #f)
|
|
||||||
((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t)
|
|
||||||
((re-nullable? ro) #t)
|
|
||||||
((re-nullable? (build-or `(,r1 ,r2) c)) #f)
|
|
||||||
((re-nullable? (build-and `(,r1 ,e) c)) #f)
|
|
||||||
((re-nullable? (build-and `(,rr ,e) c)) #t)
|
|
||||||
((re-nullable? (build-neg r1 c)) #t)
|
|
||||||
((re-nullable? (build-neg rr c)) #f))
|
|
||||||
|
|
||||||
(test-block ((c (make-cache))
|
|
||||||
(isc is:integer-set-contents)
|
|
||||||
(r1 (->re #\1 c))
|
|
||||||
(r2 (->re #\2 c))
|
|
||||||
(r3-5 (->re '(char-range #\3 #\5) c))
|
|
||||||
(r4 (build-or `(,r1 ,r2) c))
|
|
||||||
(r5 (->re `(union ,r3-5 #\7) c))
|
|
||||||
(r6 (->re #\6 c)))
|
|
||||||
((flatten-res null orR? orR-res is:union c) null)
|
|
||||||
((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c))))
|
|
||||||
(isc (is:make-range (char->integer #\1))))
|
|
||||||
((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c))))
|
|
||||||
(isc (is:make-range (char->integer #\1) (char->integer #\2))))
|
|
||||||
((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1)
|
|
||||||
orR? orR-res is:union c))))
|
|
||||||
(isc (is:make-range (char->integer #\1) (char->integer #\7))))
|
|
||||||
((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y)
|
|
||||||
(let-values (((i _ __)
|
|
||||||
(is:split x y)))
|
|
||||||
i))
|
|
||||||
c)
|
|
||||||
(list z)))
|
|
||||||
|
|
||||||
;; ->re
|
|
||||||
(test-block ((c (make-cache))
|
|
||||||
(isc is:integer-set-contents)
|
|
||||||
(r (->re #\a c))
|
|
||||||
(rr (->re `(concatenation ,r ,r) c))
|
|
||||||
(rrr (->re `(concatenation ,r ,rr) c))
|
|
||||||
(rrr* (->re `(repetition 0 +inf.0 ,rrr) c)))
|
|
||||||
((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a))))
|
|
||||||
((->re "" c) e)
|
|
||||||
((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c))
|
|
||||||
((->re r c) r)
|
|
||||||
((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c))
|
|
||||||
((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c))
|
|
||||||
((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c))
|
|
||||||
((->re `(repetition 0 1 ,rrr*) c) rrr*)
|
|
||||||
((->re `(union (union (char-range #\a #\c)
|
|
||||||
(char-complement (char-range #\000 #\110)
|
|
||||||
(char-range #\112 ,(integer->char max-char-num))))
|
|
||||||
(union (repetition 0 +inf.0 #\2))) c)
|
|
||||||
(build-or (list (build-char-set (is:union (is:make-range 73)
|
|
||||||
(is:make-range 97 99))
|
|
||||||
c)
|
|
||||||
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
|
|
||||||
c))
|
|
||||||
((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c))
|
|
||||||
((->re `(union ,r) c) r)
|
|
||||||
((->re `(union) c) z)
|
|
||||||
((->re `(intersection (intersection #\111
|
|
||||||
(char-complement (char-range #\000 #\110)
|
|
||||||
(char-range #\112 ,(integer->char max-char-num))))
|
|
||||||
(intersection (repetition 0 +inf.0 #\2))) c)
|
|
||||||
(build-and (list (build-char-set (is:make-range 73) c)
|
|
||||||
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
|
|
||||||
c))
|
|
||||||
((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110)
|
|
||||||
(char-range #\112 ,(integer->char max-char-num))))
|
|
||||||
(intersection (repetition 0 +inf.0 #\2))) c)
|
|
||||||
z)
|
|
||||||
((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c))
|
|
||||||
((->re `(intersection ,r) c) r)
|
|
||||||
((->re `(intersection) c) (build-neg z c))
|
|
||||||
((->re `(complement ,r) c) (build-neg r c))
|
|
||||||
((->re `(concatenation) c) e)
|
|
||||||
((->re `(concatenation ,rrr*) c) rrr*)
|
|
||||||
(rr (build-concat r r c))
|
|
||||||
((->re `(concatenation ,r ,rr ,rrr) c)
|
|
||||||
(build-concat r (build-concat rr rrr c) c))
|
|
||||||
((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49)))
|
|
||||||
((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57)))
|
|
||||||
((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49)))
|
|
||||||
((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57)))
|
|
||||||
((->re `(char-range "9" "1") c) z)
|
|
||||||
((isc (char-setR-chars (->re `(char-complement) c)))
|
|
||||||
(isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c))))
|
|
||||||
((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c)))
|
|
||||||
(isc (is:make-range 0)))
|
|
||||||
)
|
|
||||||
|
|
||||||
)
|
|
@ -1,220 +0,0 @@
|
|||||||
#lang racket
|
|
||||||
|
|
||||||
(require "util.rkt"
|
|
||||||
syntax/id-table)
|
|
||||||
|
|
||||||
(provide parse)
|
|
||||||
|
|
||||||
(define (bad-args stx num)
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
(format "incorrect number of arguments (should have ~a)" num)
|
|
||||||
stx))
|
|
||||||
|
|
||||||
;; char-range-arg: syntax-object syntax-object -> nat
|
|
||||||
;; If c contains is a character or length 1 string, returns the integer
|
|
||||||
;; for the character. Otherwise raises a syntax error.
|
|
||||||
(define (char-range-arg stx containing-stx)
|
|
||||||
(let ((c (syntax-e stx)))
|
|
||||||
(cond
|
|
||||||
((char? c) (char->integer c))
|
|
||||||
((and (string? c) (= (string-length c) 1))
|
|
||||||
(char->integer (string-ref c 0)))
|
|
||||||
(else
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"not a char or single-char string"
|
|
||||||
containing-stx stx)))))
|
|
||||||
(module+ test
|
|
||||||
(check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1))
|
|
||||||
(check-equal? (char-range-arg #'"1" #'here) (char->integer #\1)))
|
|
||||||
|
|
||||||
(define orig-insp (variable-reference->module-declaration-inspector
|
|
||||||
(#%variable-reference)))
|
|
||||||
(define (disarm stx)
|
|
||||||
(syntax-disarm stx orig-insp))
|
|
||||||
|
|
||||||
;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt)
|
|
||||||
;; checks for errors and generates the plain s-exp form for s
|
|
||||||
;; Expands lex-abbrevs and applies lex-trans.
|
|
||||||
(define (parse stx disappeared-uses)
|
|
||||||
(let loop ([stx stx]
|
|
||||||
[disappeared-uses disappeared-uses]
|
|
||||||
;; seen-lex-abbrevs: id-table
|
|
||||||
[seen-lex-abbrevs (make-immutable-free-id-table)])
|
|
||||||
(let ([recur (lambda (s)
|
|
||||||
(loop (syntax-rearm s stx)
|
|
||||||
disappeared-uses
|
|
||||||
seen-lex-abbrevs))]
|
|
||||||
[recur/abbrev (lambda (s id)
|
|
||||||
(loop (syntax-rearm s stx)
|
|
||||||
disappeared-uses
|
|
||||||
(free-id-table-set seen-lex-abbrevs id id)))])
|
|
||||||
(syntax-case (disarm stx) (repetition union intersection complement concatenation
|
|
||||||
char-range char-complement)
|
|
||||||
(_
|
|
||||||
(identifier? stx)
|
|
||||||
(let ((expansion (syntax-local-value stx (lambda () #f))))
|
|
||||||
(unless (lex-abbrev? expansion)
|
|
||||||
(raise-syntax-error 'regular-expression
|
|
||||||
"undefined abbreviation"
|
|
||||||
stx))
|
|
||||||
;; Check for cycles.
|
|
||||||
(when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f))
|
|
||||||
(raise-syntax-error 'regular-expression
|
|
||||||
"illegal lex-abbrev cycle detected"
|
|
||||||
stx
|
|
||||||
#f
|
|
||||||
(list (free-id-table-ref seen-lex-abbrevs stx))))
|
|
||||||
(set-box! disappeared-uses (cons stx (unbox disappeared-uses)))
|
|
||||||
(recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx)))
|
|
||||||
(_
|
|
||||||
(or (char? (syntax-e stx)) (string? (syntax-e stx)))
|
|
||||||
(syntax-e stx))
|
|
||||||
((repetition arg ...)
|
|
||||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
|
||||||
(unless (= 3 (length arg-list))
|
|
||||||
(bad-args stx 2))
|
|
||||||
(let ((low (syntax-e (car arg-list)))
|
|
||||||
(high (syntax-e (cadr arg-list)))
|
|
||||||
(re (caddr arg-list)))
|
|
||||||
(unless (and (number? low) (exact? low) (integer? low) (>= low 0))
|
|
||||||
(raise-syntax-error #f
|
|
||||||
"not a non-negative exact integer"
|
|
||||||
stx
|
|
||||||
(car arg-list)))
|
|
||||||
(unless (or (and (number? high) (exact? high) (integer? high) (>= high 0))
|
|
||||||
(eq? high +inf.0))
|
|
||||||
(raise-syntax-error #f
|
|
||||||
"not a non-negative exact integer or +inf.0"
|
|
||||||
stx
|
|
||||||
(cadr arg-list)))
|
|
||||||
(unless (<= low high)
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"the first argument is not less than or equal to the second argument"
|
|
||||||
stx))
|
|
||||||
`(repetition ,low ,high ,(recur re)))))
|
|
||||||
((union re ...)
|
|
||||||
`(union ,@(map recur (syntax->list (syntax (re ...))))))
|
|
||||||
((intersection re ...)
|
|
||||||
`(intersection ,@(map recur (syntax->list (syntax (re ...))))))
|
|
||||||
((complement re ...)
|
|
||||||
(let ((re-list (syntax->list (syntax (re ...)))))
|
|
||||||
(unless (= 1 (length re-list))
|
|
||||||
(bad-args stx 1))
|
|
||||||
`(complement ,(recur (car re-list)))))
|
|
||||||
((concatenation re ...)
|
|
||||||
`(concatenation ,@(map recur (syntax->list (syntax (re ...))))))
|
|
||||||
((char-range arg ...)
|
|
||||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
|
||||||
(unless (= 2 (length arg-list))
|
|
||||||
(bad-args stx 2))
|
|
||||||
(let ((i1 (char-range-arg (car arg-list) stx))
|
|
||||||
(i2 (char-range-arg (cadr arg-list) stx)))
|
|
||||||
(if (<= i1 i2)
|
|
||||||
`(char-range ,(integer->char i1) ,(integer->char i2))
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"the first argument does not precede or equal second argument"
|
|
||||||
stx)))))
|
|
||||||
((char-complement arg ...)
|
|
||||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
|
||||||
(unless (= 1 (length arg-list))
|
|
||||||
(bad-args stx 1))
|
|
||||||
(let ((parsed (recur (car arg-list))))
|
|
||||||
(unless (char-set? parsed)
|
|
||||||
(raise-syntax-error #f
|
|
||||||
"not a character set"
|
|
||||||
stx
|
|
||||||
(car arg-list)))
|
|
||||||
`(char-complement ,parsed))))
|
|
||||||
((op form ...)
|
|
||||||
(identifier? (syntax op))
|
|
||||||
(let* ((o (syntax op))
|
|
||||||
(expansion (syntax-local-value o (lambda () #f))))
|
|
||||||
(set-box! disappeared-uses (cons o (unbox disappeared-uses)))
|
|
||||||
(cond
|
|
||||||
((lex-trans? expansion)
|
|
||||||
(recur ((lex-trans-f expansion) (disarm stx))))
|
|
||||||
(expansion
|
|
||||||
(raise-syntax-error 'regular-expression
|
|
||||||
"not a lex-trans"
|
|
||||||
stx))
|
|
||||||
(else
|
|
||||||
(raise-syntax-error 'regular-expression
|
|
||||||
"undefined operator"
|
|
||||||
stx)))))
|
|
||||||
(_
|
|
||||||
(raise-syntax-error
|
|
||||||
'regular-expression
|
|
||||||
"not a char, string, identifier, or (op args ...)"
|
|
||||||
stx))))))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; char-set? : s-re -> bool
|
|
||||||
;; A char-set is an re that matches only strings of length 1.
|
|
||||||
;; char-set? is conservative.
|
|
||||||
(define (char-set? s-re)
|
|
||||||
(cond
|
|
||||||
((char? s-re) #t)
|
|
||||||
((string? s-re) (= (string-length s-re) 1))
|
|
||||||
((list? s-re)
|
|
||||||
(let ((op (car s-re)))
|
|
||||||
(case op
|
|
||||||
((union intersection) (andmap char-set? (cdr s-re)))
|
|
||||||
((char-range char-complement) #t)
|
|
||||||
((repetition)
|
|
||||||
(and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re))))
|
|
||||||
((concatenation)
|
|
||||||
(and (= 2 (length s-re)) (char-set? (cadr s-re))))
|
|
||||||
(else #f))))
|
|
||||||
(else #f)))
|
|
||||||
|
|
||||||
(module+ test
|
|
||||||
(require rackunit))
|
|
||||||
(module+ test
|
|
||||||
(check-equal? (char-set? #\a) #t)
|
|
||||||
(check-equal? (char-set? "12") #f)
|
|
||||||
(check-equal? (char-set? "1") #t)
|
|
||||||
(check-equal? (char-set? '(repetition 1 2 #\1)) #f)
|
|
||||||
(check-equal? (char-set? '(repetition 1 1 "12")) #f)
|
|
||||||
(check-equal? (char-set? '(repetition 1 1 "1")) #t)
|
|
||||||
(check-equal? (char-set? '(union "1" "2" "3")) #t)
|
|
||||||
(check-equal? (char-set? '(union "1" "" "3")) #f)
|
|
||||||
(check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t)
|
|
||||||
(check-equal? (char-set? '(intersection "1" "")) #f)
|
|
||||||
(check-equal? (char-set? '(complement "1")) #f)
|
|
||||||
(check-equal? (char-set? '(concatenation "1" "2")) #f)
|
|
||||||
(check-equal? (char-set? '(concatenation "" "2")) #f)
|
|
||||||
(check-equal? (char-set? '(concatenation "1")) #t)
|
|
||||||
(check-equal? (char-set? '(concatenation "12")) #f)
|
|
||||||
(check-equal? (char-set? '(char-range #\1 #\2)) #t)
|
|
||||||
(check-equal? (char-set? '(char-complement #\1)) #t))
|
|
||||||
|
|
||||||
;; yikes... these test cases all have the wrong arity, now.
|
|
||||||
;; and by "now", I mean it's been broken since before we
|
|
||||||
;; moved to git.
|
|
||||||
(module+ test
|
|
||||||
(check-equal? (parse #'#\a null) #\a)
|
|
||||||
(check-equal? (parse #'"1" null) "1")
|
|
||||||
(check-equal? (parse #'(repetition 1 1 #\1) null)
|
|
||||||
'(repetition 1 1 #\1))
|
|
||||||
(check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1))
|
|
||||||
(check-equal? (parse #'(union #\1 (union "2") (union)) null)
|
|
||||||
'(union #\1 (union "2") (union)))
|
|
||||||
(check-equal? (parse #'(intersection #\1 (intersection "2") (intersection))
|
|
||||||
null)
|
|
||||||
'(intersection #\1 (intersection "2") (intersection)))
|
|
||||||
(check-equal? (parse #'(complement (union #\1 #\2))
|
|
||||||
null)
|
|
||||||
'(complement (union #\1 #\2)))
|
|
||||||
(check-equal? (parse #'(concatenation "1" "2" (concatenation)) null)
|
|
||||||
'(concatenation "1" "2" (concatenation)))
|
|
||||||
(check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1))
|
|
||||||
(check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1))
|
|
||||||
(check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3))
|
|
||||||
(check-equal? (parse #'(char-complement (union "1" "2")) null)
|
|
||||||
'(char-complement (union "1" "2"))))
|
|
||||||
; )
|
|
@ -1,9 +0,0 @@
|
|||||||
(module token-syntax mzscheme
|
|
||||||
|
|
||||||
;; The things needed at compile time to handle definition of tokens
|
|
||||||
|
|
||||||
(provide make-terminals-def terminals-def-t terminals-def?
|
|
||||||
make-e-terminals-def e-terminals-def-t e-terminals-def?)
|
|
||||||
(define-struct terminals-def (t))
|
|
||||||
(define-struct e-terminals-def (t))
|
|
||||||
)
|
|
@ -1,92 +0,0 @@
|
|||||||
(module token mzscheme
|
|
||||||
|
|
||||||
(require-for-syntax "token-syntax.rkt")
|
|
||||||
|
|
||||||
;; Defining tokens
|
|
||||||
|
|
||||||
(provide define-tokens define-empty-tokens make-token token?
|
|
||||||
(protect (rename token-name real-token-name))
|
|
||||||
(protect (rename token-value real-token-value))
|
|
||||||
(rename token-name* token-name)
|
|
||||||
(rename token-value* token-value)
|
|
||||||
(struct position (offset line col))
|
|
||||||
(struct position-token (token start-pos end-pos))
|
|
||||||
(struct srcloc-token (token srcloc)))
|
|
||||||
|
|
||||||
|
|
||||||
;; A token is either
|
|
||||||
;; - symbol
|
|
||||||
;; - (make-token symbol any)
|
|
||||||
(define-struct token (name value) (make-inspector))
|
|
||||||
|
|
||||||
;; token-name*: token -> symbol
|
|
||||||
(define (token-name* t)
|
|
||||||
(cond
|
|
||||||
((symbol? t) t)
|
|
||||||
((token? t) (token-name t))
|
|
||||||
(else (raise-type-error
|
|
||||||
'token-name
|
|
||||||
"symbol or struct:token"
|
|
||||||
0
|
|
||||||
t))))
|
|
||||||
|
|
||||||
;; token-value*: token -> any
|
|
||||||
(define (token-value* t)
|
|
||||||
(cond
|
|
||||||
((symbol? t) #f)
|
|
||||||
((token? t) (token-value t))
|
|
||||||
(else (raise-type-error
|
|
||||||
'token-value
|
|
||||||
"symbol or struct:token"
|
|
||||||
0
|
|
||||||
t))))
|
|
||||||
|
|
||||||
(define-for-syntax (make-ctor-name n)
|
|
||||||
(datum->syntax-object n
|
|
||||||
(string->symbol (format "token-~a" (syntax-e n)))
|
|
||||||
n
|
|
||||||
n))
|
|
||||||
|
|
||||||
(define-for-syntax (make-define-tokens empty?)
|
|
||||||
(lambda (stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
((_ name (token ...))
|
|
||||||
(andmap identifier? (syntax->list (syntax (token ...))))
|
|
||||||
(with-syntax (((marked-token ...)
|
|
||||||
(map values #;(make-syntax-introducer)
|
|
||||||
(syntax->list (syntax (token ...))))))
|
|
||||||
(quasisyntax/loc stx
|
|
||||||
(begin
|
|
||||||
(define-syntax name
|
|
||||||
#,(if empty?
|
|
||||||
#'(make-e-terminals-def (quote-syntax (marked-token ...)))
|
|
||||||
#'(make-terminals-def (quote-syntax (marked-token ...)))))
|
|
||||||
#,@(map
|
|
||||||
(lambda (n)
|
|
||||||
(when (eq? (syntax-e n) 'error)
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"Cannot define a token named error."
|
|
||||||
stx))
|
|
||||||
(if empty?
|
|
||||||
#`(define (#,(make-ctor-name n))
|
|
||||||
'#,n)
|
|
||||||
#`(define (#,(make-ctor-name n) x)
|
|
||||||
(make-token '#,n x))))
|
|
||||||
(syntax->list (syntax (token ...))))
|
|
||||||
#;(define marked-token #f) #;...))))
|
|
||||||
((_ ...)
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))"
|
|
||||||
stx)))))
|
|
||||||
|
|
||||||
(define-syntax define-tokens (make-define-tokens #f))
|
|
||||||
(define-syntax define-empty-tokens (make-define-tokens #t))
|
|
||||||
|
|
||||||
(define-struct position (offset line col) #f)
|
|
||||||
(define-struct position-token (token start-pos end-pos) #f)
|
|
||||||
|
|
||||||
(define-struct srcloc-token (token srcloc) #f)
|
|
||||||
)
|
|
||||||
|
|
@ -1,69 +0,0 @@
|
|||||||
#lang racket
|
|
||||||
|
|
||||||
(require "util.rkt")
|
|
||||||
|
|
||||||
(provide (all-defined-out))
|
|
||||||
|
|
||||||
;; mapped-chars : (listof (list nat nat bool))
|
|
||||||
(define mapped-chars (make-known-char-range-list))
|
|
||||||
|
|
||||||
;; get-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat))
|
|
||||||
(define (get-chars-for char-x? mapped-chars)
|
|
||||||
(cond
|
|
||||||
((null? mapped-chars) null)
|
|
||||||
(else
|
|
||||||
(let* ((range (car mapped-chars))
|
|
||||||
(low (car range))
|
|
||||||
(high (cadr range))
|
|
||||||
(x (char-x? low)))
|
|
||||||
(cond
|
|
||||||
((caddr range)
|
|
||||||
(if x
|
|
||||||
(cons (cons low high)
|
|
||||||
(get-chars-for char-x? (cdr mapped-chars)))
|
|
||||||
(get-chars-for char-x? (cdr mapped-chars))))
|
|
||||||
(else
|
|
||||||
(let loop ((range-start low)
|
|
||||||
(i (car range))
|
|
||||||
(parity x))
|
|
||||||
(cond
|
|
||||||
((> i high)
|
|
||||||
(if parity
|
|
||||||
(cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars)))
|
|
||||||
(get-chars-for char-x? (cdr mapped-chars))))
|
|
||||||
((eq? parity (char-x? i))
|
|
||||||
(loop range-start (add1 i) parity))
|
|
||||||
(parity
|
|
||||||
(cons (cons range-start (sub1 i)) (loop i (add1 i) #f)))
|
|
||||||
(else
|
|
||||||
(loop i (add1 i) #t))))))))))
|
|
||||||
|
|
||||||
(define (compute-ranges x?)
|
|
||||||
(delay (get-chars-for (lambda (x) (x? (integer->char x))) mapped-chars)))
|
|
||||||
|
|
||||||
(define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325
|
|
||||||
(define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405
|
|
||||||
(define upper-case-ranges (compute-ranges char-upper-case?)) ;; 380
|
|
||||||
(define title-case-ranges (compute-ranges char-title-case?)) ;; 10
|
|
||||||
(define numeric-ranges (compute-ranges char-numeric?)) ;; 47
|
|
||||||
(define symbolic-ranges (compute-ranges char-symbolic?)) ;; 153
|
|
||||||
(define punctuation-ranges (compute-ranges char-punctuation?)) ;; 86
|
|
||||||
(define graphic-ranges (compute-ranges char-graphic?)) ;; 401
|
|
||||||
(define whitespace-ranges (compute-ranges char-whitespace?)) ;; 10
|
|
||||||
(define blank-ranges (compute-ranges char-blank?)) ;; 9
|
|
||||||
#;(define hexadecimal-ranges (compute-ranges char-hexadecimal?))
|
|
||||||
(define iso-control-ranges (compute-ranges char-iso-control?)) ;; 2
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(module+ test
|
|
||||||
(require rackunit)
|
|
||||||
(check-equal? (get-chars-for odd? '()) '())
|
|
||||||
(check-equal? (get-chars-for odd? '((1 4 #f) (8 13 #f)))
|
|
||||||
'((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13)))
|
|
||||||
(check-equal? (get-chars-for (lambda (x)
|
|
||||||
(odd? (quotient x 10)))
|
|
||||||
'((1 5 #t) (17 19 #t) (21 51 #f)))
|
|
||||||
'((17 . 19) (30 . 39) (50 . 51))))
|
|
||||||
|
|
||||||
|
|
@ -1,127 +0,0 @@
|
|||||||
#lang racket
|
|
||||||
|
|
||||||
(provide (all-defined-out))
|
|
||||||
|
|
||||||
(define max-char-num #x10FFFF)
|
|
||||||
|
|
||||||
(define-struct lex-abbrev (get-abbrev))
|
|
||||||
(define-struct lex-trans (f))
|
|
||||||
|
|
||||||
(module+ test
|
|
||||||
(require rackunit))
|
|
||||||
|
|
||||||
#;(define-syntax test-block
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ defs (code right-ans) ...)
|
|
||||||
(let* defs
|
|
||||||
(let ((real-ans code))
|
|
||||||
(unless (equal? real-ans right-ans)
|
|
||||||
(printf "Test failed: ~e gave ~e. Expected ~e\n"
|
|
||||||
'code real-ans 'right-ans))) ...))))
|
|
||||||
|
|
||||||
(define-syntax test-block
|
|
||||||
(syntax-rules ()
|
|
||||||
((_ x ...) (void))))
|
|
||||||
|
|
||||||
|
|
||||||
;; A cache is (X ( -> Y) -> Y)
|
|
||||||
;; make-cache : -> cache
|
|
||||||
;; table map Xs to Ys. If key is mapped, its value is returned.
|
|
||||||
;; Otherwise, build is invoked and its result is placed in the table and
|
|
||||||
;; returned.
|
|
||||||
;; Xs are compared with equal?
|
|
||||||
(define (make-cache)
|
|
||||||
(let ((table (make-hash)))
|
|
||||||
(lambda (key build)
|
|
||||||
(hash-ref table key
|
|
||||||
(lambda ()
|
|
||||||
(let ((new (build)))
|
|
||||||
(hash-set! table key new)
|
|
||||||
new))))))
|
|
||||||
|
|
||||||
(module+ test
|
|
||||||
(define cache (make-cache))
|
|
||||||
(check-equal? (cache '(s 1 2) (lambda () 9)) 9)
|
|
||||||
(check-equal? (cache '(s 2 1) (lambda () 8)) 8)
|
|
||||||
(check-equal? (cache '(s 1 2) (lambda () 1)) 9)
|
|
||||||
(check-equal? (cache (cons 's (cons 0 (cons +inf.0 10)))
|
|
||||||
(lambda () 22)) 22)
|
|
||||||
(check-equal? (cache (cons 's (cons 0 (cons +inf.0 10)))
|
|
||||||
(lambda () 1)) 22))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; make-counter : -> -> nat
|
|
||||||
;; makes a function that returns a higher number by 1, each time
|
|
||||||
;; it is called.
|
|
||||||
(define (make-counter)
|
|
||||||
(let ((counter 0))
|
|
||||||
(lambda ()
|
|
||||||
(begin0
|
|
||||||
counter
|
|
||||||
(set! counter (add1 counter))))))
|
|
||||||
|
|
||||||
(module+ test
|
|
||||||
(define c (make-counter))
|
|
||||||
(define d (make-counter))
|
|
||||||
(check-equal? (c) 0)
|
|
||||||
(check-equal? (d) 0)
|
|
||||||
(check-equal? (c) 1)
|
|
||||||
(check-equal? (d) 1)
|
|
||||||
(check-equal? (c) 2))
|
|
||||||
|
|
||||||
|
|
||||||
;; remove-dups : (list-of X) (X -> number) -> (list-of X)
|
|
||||||
;; removes the entries from l that have the same index as a
|
|
||||||
;; previous entry. l must be grouped by indexes.
|
|
||||||
(define (remove-dups l index acc)
|
|
||||||
(cond
|
|
||||||
((null? l) (reverse acc))
|
|
||||||
((null? acc) (remove-dups (cdr l) index (cons (car l) acc)))
|
|
||||||
((= (index (car acc)) (index (car l)))
|
|
||||||
(remove-dups (cdr l) index acc))
|
|
||||||
(else
|
|
||||||
(remove-dups (cdr l) index (cons (car l) acc)))))
|
|
||||||
|
|
||||||
|
|
||||||
(module+ test
|
|
||||||
(check-equal? (remove-dups '((1 2) (2 2) (1 3) (1 4)
|
|
||||||
(100 4) (0 5)) cadr null)
|
|
||||||
'((1 2) (1 3) (1 4) (0 5)))
|
|
||||||
(check-equal? (remove-dups null error null) null))
|
|
||||||
|
|
||||||
;; do-simple-equiv : (list-of X) (X -> nat) -> (list-of X)
|
|
||||||
;; Sorts l according to index and removes the entries with duplicate
|
|
||||||
;; indexes.
|
|
||||||
(define (do-simple-equiv l index)
|
|
||||||
(let ((ordered (sort l (lambda (a b) (< (index a) (index b))))))
|
|
||||||
(remove-dups ordered index null)))
|
|
||||||
|
|
||||||
(module+ test
|
|
||||||
(check-equal? (do-simple-equiv '((2 2) (1 4) (1 2)
|
|
||||||
(100 4) (1 3) (0 5))
|
|
||||||
cadr)
|
|
||||||
'((2 2) (1 3) (1 4) (0 5)))
|
|
||||||
(check-equal? (do-simple-equiv null error) null))
|
|
||||||
|
|
||||||
;; replace : (list-of X) (X -> bool) (X -> (list-of X)) (list-of X) ->
|
|
||||||
;; (list-of X)
|
|
||||||
;; If (pred? r) for some r in l, splice (get r) in place of r in the resulting
|
|
||||||
;; list.
|
|
||||||
(define (replace l pred? get acc)
|
|
||||||
(cond
|
|
||||||
((null? l) acc)
|
|
||||||
((pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc)))
|
|
||||||
(else (replace (cdr l) pred? get (cons (car l) acc)))))
|
|
||||||
|
|
||||||
|
|
||||||
(module+ test
|
|
||||||
(check-equal? (replace null void (lambda () (list 1)) null) null)
|
|
||||||
(check-equal? (replace '(1 2 3 4 3 5)
|
|
||||||
(lambda (x) (= x 3))
|
|
||||||
(lambda (x) (list 1 2 3))
|
|
||||||
null)
|
|
||||||
'(5 1 2 3 4 1 2 3 2 1)))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,280 +0,0 @@
|
|||||||
;; Constructs to create and access grammars, the internal
|
|
||||||
;; representation of the input to the parser generator.
|
|
||||||
|
|
||||||
(module grammar mzscheme
|
|
||||||
|
|
||||||
(require mzlib/class
|
|
||||||
mzlib/list
|
|
||||||
"yacc-helper.rkt"
|
|
||||||
racket/contract)
|
|
||||||
|
|
||||||
;; Each production has a unique index 0 <= index <= number of productions
|
|
||||||
(define-struct prod (lhs rhs index prec action) (make-inspector))
|
|
||||||
|
|
||||||
;; The dot-pos field is the index of the element in the rhs
|
|
||||||
;; of prod that the dot immediately precedes.
|
|
||||||
;; Thus 0 <= dot-pos <= (vector-length rhs).
|
|
||||||
(define-struct item (prod dot-pos) (make-inspector))
|
|
||||||
|
|
||||||
;; gram-sym = (union term? non-term?)
|
|
||||||
;; Each term has a unique index 0 <= index < number of terms
|
|
||||||
;; Each non-term has a unique index 0 <= index < number of non-terms
|
|
||||||
(define-struct term (sym index prec) (make-inspector))
|
|
||||||
(define-struct non-term (sym index) (make-inspector))
|
|
||||||
|
|
||||||
;; a precedence declaration.
|
|
||||||
(define-struct prec (num assoc) (make-inspector))
|
|
||||||
|
|
||||||
(provide/contract
|
|
||||||
(make-item (prod? (or/c #f natural-number/c) . -> . item?))
|
|
||||||
(make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?))
|
|
||||||
(make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?))
|
|
||||||
(make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?))
|
|
||||||
(make-prod (non-term? (vectorof (or/c non-term? term?))
|
|
||||||
(or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?)))
|
|
||||||
|
|
||||||
(provide
|
|
||||||
|
|
||||||
|
|
||||||
;; Things that work on items
|
|
||||||
start-item? item-prod item->string
|
|
||||||
sym-at-dot move-dot-right item<? item-dot-pos
|
|
||||||
|
|
||||||
;; Things that operate on grammar symbols
|
|
||||||
gram-sym-symbol gram-sym-index term-prec gram-sym->string
|
|
||||||
non-term? term? non-term<? term<?
|
|
||||||
term-list->bit-vector term-index non-term-index
|
|
||||||
|
|
||||||
;; Things that work on precs
|
|
||||||
prec-num prec-assoc
|
|
||||||
|
|
||||||
grammar%
|
|
||||||
|
|
||||||
;; Things that work on productions
|
|
||||||
prod-index prod-prec prod-rhs prod-lhs prod-action)
|
|
||||||
|
|
||||||
|
|
||||||
;;---------------------- LR items --------------------------
|
|
||||||
|
|
||||||
;; item<?: LR-item * LR-item -> bool
|
|
||||||
;; Lexicographic comparison on two items.
|
|
||||||
(define (item<? i1 i2)
|
|
||||||
(let ((p1 (prod-index (item-prod i1)))
|
|
||||||
(p2 (prod-index (item-prod i2))))
|
|
||||||
(or (< p1 p2)
|
|
||||||
(and (= p1 p2)
|
|
||||||
(let ((d1 (item-dot-pos i1))
|
|
||||||
(d2 (item-dot-pos i2)))
|
|
||||||
(< d1 d2))))))
|
|
||||||
|
|
||||||
;; start-item?: LR-item -> bool
|
|
||||||
;; The start production always has index 0
|
|
||||||
(define (start-item? i)
|
|
||||||
(= 0 (non-term-index (prod-lhs (item-prod i)))))
|
|
||||||
|
|
||||||
|
|
||||||
;; move-dot-right: LR-item -> LR-item | #f
|
|
||||||
;; moves the dot to the right in the item, unless it is at its
|
|
||||||
;; rightmost, then it returns false
|
|
||||||
(define (move-dot-right i)
|
|
||||||
(cond
|
|
||||||
((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f)
|
|
||||||
(else (make-item (item-prod i)
|
|
||||||
(add1 (item-dot-pos i))))))
|
|
||||||
|
|
||||||
;; sym-at-dot: LR-item -> gram-sym | #f
|
|
||||||
;; returns the symbol after the dot in the item or #f if there is none
|
|
||||||
(define (sym-at-dot i)
|
|
||||||
(let ((dp (item-dot-pos i))
|
|
||||||
(rhs (prod-rhs (item-prod i))))
|
|
||||||
(cond
|
|
||||||
((= dp (vector-length rhs)) #f)
|
|
||||||
(else (vector-ref rhs dp)))))
|
|
||||||
|
|
||||||
|
|
||||||
;; print-item: LR-item ->
|
|
||||||
(define (item->string it)
|
|
||||||
(let ((print-sym (lambda (i)
|
|
||||||
(let ((gs (vector-ref (prod-rhs (item-prod it)) i)))
|
|
||||||
(cond
|
|
||||||
((term? gs) (format "~a " (term-sym gs)))
|
|
||||||
(else (format "~a " (non-term-sym gs))))))))
|
|
||||||
(string-append
|
|
||||||
(format "~a -> " (non-term-sym (prod-lhs (item-prod it))))
|
|
||||||
(let loop ((i 0))
|
|
||||||
(cond
|
|
||||||
((= i (vector-length (prod-rhs (item-prod it))))
|
|
||||||
(if (= i (item-dot-pos it))
|
|
||||||
". "
|
|
||||||
""))
|
|
||||||
((= i (item-dot-pos it))
|
|
||||||
(string-append ". " (print-sym i) (loop (add1 i))))
|
|
||||||
(else (string-append (print-sym i) (loop (add1 i)))))))))
|
|
||||||
|
|
||||||
;; --------------------- Grammar Symbols --------------------------
|
|
||||||
|
|
||||||
(define (non-term<? nt1 nt2)
|
|
||||||
(< (non-term-index nt1) (non-term-index nt2)))
|
|
||||||
|
|
||||||
(define (term<? nt1 nt2)
|
|
||||||
(< (term-index nt1) (term-index nt2)))
|
|
||||||
|
|
||||||
(define (gram-sym-index gs)
|
|
||||||
(cond
|
|
||||||
((term? gs) (term-index gs))
|
|
||||||
(else (non-term-index gs))))
|
|
||||||
|
|
||||||
(define (gram-sym-symbol gs)
|
|
||||||
(cond
|
|
||||||
((term? gs) (term-sym gs))
|
|
||||||
(else (non-term-sym gs))))
|
|
||||||
|
|
||||||
(define (gram-sym->string gs)
|
|
||||||
(symbol->string (gram-sym-symbol gs)))
|
|
||||||
|
|
||||||
;; term-list->bit-vector: term list -> int
|
|
||||||
;; Creates a number where the nth bit is 1 if the term with index n is in
|
|
||||||
;; the list, and whose nth bit is 0 otherwise
|
|
||||||
(define (term-list->bit-vector terms)
|
|
||||||
(cond
|
|
||||||
((null? terms) 0)
|
|
||||||
(else
|
|
||||||
(bitwise-ior (arithmetic-shift 1 (term-index (car terms))) (term-list->bit-vector (cdr terms))))))
|
|
||||||
|
|
||||||
|
|
||||||
;; ------------------------- Grammar ------------------------------
|
|
||||||
|
|
||||||
(define grammar%
|
|
||||||
(class object%
|
|
||||||
(super-instantiate ())
|
|
||||||
;; prods: production list list
|
|
||||||
;; where there is one production list per non-term
|
|
||||||
(init prods)
|
|
||||||
;; init-prods: production list
|
|
||||||
;; The productions parsing can start from
|
|
||||||
;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable
|
|
||||||
(init-field init-prods terms non-terms end-terms)
|
|
||||||
|
|
||||||
;; list of all productions
|
|
||||||
(define all-prods (apply append prods))
|
|
||||||
(define num-prods (length all-prods))
|
|
||||||
(define num-terms (length terms))
|
|
||||||
(define num-non-terms (length non-terms))
|
|
||||||
|
|
||||||
(let ((count 0))
|
|
||||||
(for-each
|
|
||||||
(lambda (nt)
|
|
||||||
(set-non-term-index! nt count)
|
|
||||||
(set! count (add1 count)))
|
|
||||||
non-terms))
|
|
||||||
|
|
||||||
(let ((count 0))
|
|
||||||
(for-each
|
|
||||||
(lambda (t)
|
|
||||||
(set-term-index! t count)
|
|
||||||
(set! count (add1 count)))
|
|
||||||
terms))
|
|
||||||
|
|
||||||
(let ((count 0))
|
|
||||||
(for-each
|
|
||||||
(lambda (prod)
|
|
||||||
(set-prod-index! prod count)
|
|
||||||
(set! count (add1 count)))
|
|
||||||
all-prods))
|
|
||||||
|
|
||||||
;; indexed by the index of the non-term - contains the list of productions for that non-term
|
|
||||||
(define nt->prods
|
|
||||||
(let ((v (make-vector (length prods) #f)))
|
|
||||||
(for-each (lambda (prods)
|
|
||||||
(vector-set! v (non-term-index (prod-lhs (car prods))) prods))
|
|
||||||
prods)
|
|
||||||
v))
|
|
||||||
|
|
||||||
(define nullable-non-terms
|
|
||||||
(nullable all-prods num-non-terms))
|
|
||||||
|
|
||||||
(define/public (get-num-terms) num-terms)
|
|
||||||
(define/public (get-num-non-terms) num-non-terms)
|
|
||||||
|
|
||||||
(define/public (get-prods-for-non-term nt)
|
|
||||||
(vector-ref nt->prods (non-term-index nt)))
|
|
||||||
(define/public (get-prods) all-prods)
|
|
||||||
(define/public (get-init-prods) init-prods)
|
|
||||||
|
|
||||||
(define/public (get-terms) terms)
|
|
||||||
(define/public (get-non-terms) non-terms)
|
|
||||||
|
|
||||||
(define/public (get-num-prods) num-prods)
|
|
||||||
(define/public (get-end-terms) end-terms)
|
|
||||||
|
|
||||||
(define/public (nullable-non-term? nt)
|
|
||||||
(vector-ref nullable-non-terms (non-term-index nt)))
|
|
||||||
|
|
||||||
(define/public (nullable-after-dot? item)
|
|
||||||
(let* ((rhs (prod-rhs (item-prod item)))
|
|
||||||
(prod-length (vector-length rhs)))
|
|
||||||
(let loop ((i (item-dot-pos item)))
|
|
||||||
(cond
|
|
||||||
((< i prod-length)
|
|
||||||
(if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i)))
|
|
||||||
(loop (add1 i))
|
|
||||||
#f))
|
|
||||||
((= i prod-length) #t)))))
|
|
||||||
|
|
||||||
(define/public (nullable-non-term-thunk)
|
|
||||||
(lambda (nt)
|
|
||||||
(nullable-non-term? nt)))
|
|
||||||
(define/public (nullable-after-dot?-thunk)
|
|
||||||
(lambda (item)
|
|
||||||
(nullable-after-dot? item)))))
|
|
||||||
|
|
||||||
|
|
||||||
;; nullable: production list * int -> non-term set
|
|
||||||
;; determines which non-terminals can derive epsilon
|
|
||||||
(define (nullable prods num-nts)
|
|
||||||
(letrec ((nullable (make-vector num-nts #f))
|
|
||||||
(added #f)
|
|
||||||
|
|
||||||
;; possible-nullable: producion list -> production list
|
|
||||||
;; Removes all productions that have a terminal
|
|
||||||
(possible-nullable
|
|
||||||
(lambda (prods)
|
|
||||||
(filter (lambda (prod)
|
|
||||||
(vector-andmap non-term? (prod-rhs prod)))
|
|
||||||
prods)))
|
|
||||||
|
|
||||||
;; set-nullables: production list -> production list
|
|
||||||
;; makes one pass through the productions, adding the ones
|
|
||||||
;; known to be nullable now to nullable and returning a list
|
|
||||||
;; of productions that we don't know about yet.
|
|
||||||
(set-nullables
|
|
||||||
(lambda (prods)
|
|
||||||
(cond
|
|
||||||
((null? prods) null)
|
|
||||||
((vector-ref nullable
|
|
||||||
(gram-sym-index (prod-lhs (car prods))))
|
|
||||||
(set-nullables (cdr prods)))
|
|
||||||
((vector-andmap (lambda (nt)
|
|
||||||
(vector-ref nullable (gram-sym-index nt)))
|
|
||||||
(prod-rhs (car prods)))
|
|
||||||
(vector-set! nullable
|
|
||||||
(gram-sym-index (prod-lhs (car prods)))
|
|
||||||
#t)
|
|
||||||
(set! added #t)
|
|
||||||
(set-nullables (cdr prods)))
|
|
||||||
(else
|
|
||||||
(cons (car prods)
|
|
||||||
(set-nullables (cdr prods))))))))
|
|
||||||
|
|
||||||
(let loop ((P (possible-nullable prods)))
|
|
||||||
(cond
|
|
||||||
((null? P) nullable)
|
|
||||||
(else
|
|
||||||
(set! added #f)
|
|
||||||
(let ((new-P (set-nullables P)))
|
|
||||||
(if added
|
|
||||||
(loop new-P)
|
|
||||||
nullable)))))))
|
|
||||||
|
|
||||||
|
|
||||||
)
|
|
@ -1,61 +0,0 @@
|
|||||||
(module graph mzscheme
|
|
||||||
|
|
||||||
(provide digraph)
|
|
||||||
|
|
||||||
(define (zero-thunk) 0)
|
|
||||||
|
|
||||||
;; digraph:
|
|
||||||
;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b)
|
|
||||||
;; -> ('a -> 'b)
|
|
||||||
;; DeRemer and Pennello 1982
|
|
||||||
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
|
|
||||||
;; We use a hash-table to represent the result function 'a -> 'b set, so
|
|
||||||
;; the values of type 'a must be comparable with eq?.
|
|
||||||
(define (digraph nodes edges f- union fail)
|
|
||||||
(letrec [
|
|
||||||
;; Will map elements of 'a to 'b sets
|
|
||||||
(results (make-hash-table))
|
|
||||||
(f (lambda (x) (hash-table-get results x fail)))
|
|
||||||
|
|
||||||
;; Maps elements of 'a to integers.
|
|
||||||
(N (make-hash-table))
|
|
||||||
(get-N (lambda (x) (hash-table-get N x zero-thunk)))
|
|
||||||
(set-N (lambda (x d) (hash-table-put! N x d)))
|
|
||||||
|
|
||||||
(stack null)
|
|
||||||
(push (lambda (x)
|
|
||||||
(set! stack (cons x stack))))
|
|
||||||
(pop (lambda ()
|
|
||||||
(begin0
|
|
||||||
(car stack)
|
|
||||||
(set! stack (cdr stack)))))
|
|
||||||
(depth (lambda () (length stack)))
|
|
||||||
|
|
||||||
;; traverse: 'a ->
|
|
||||||
(traverse
|
|
||||||
(lambda (x)
|
|
||||||
(push x)
|
|
||||||
(let ((d (depth)))
|
|
||||||
(set-N x d)
|
|
||||||
(hash-table-put! results x (f- x))
|
|
||||||
(for-each (lambda (y)
|
|
||||||
(if (= 0 (get-N y))
|
|
||||||
(traverse y))
|
|
||||||
(hash-table-put! results
|
|
||||||
x
|
|
||||||
(union (f x) (f y)))
|
|
||||||
(set-N x (min (get-N x) (get-N y))))
|
|
||||||
(edges x))
|
|
||||||
(if (= d (get-N x))
|
|
||||||
(let loop ((p (pop)))
|
|
||||||
(set-N p +inf.0)
|
|
||||||
(hash-table-put! results p (f x))
|
|
||||||
(if (not (eq? x p))
|
|
||||||
(loop (pop))))))))]
|
|
||||||
(for-each (lambda (x)
|
|
||||||
(if (= 0 (get-N x))
|
|
||||||
(traverse x)))
|
|
||||||
nodes)
|
|
||||||
f))
|
|
||||||
|
|
||||||
)
|
|
@ -1,374 +0,0 @@
|
|||||||
(module input-file-parser mzscheme
|
|
||||||
|
|
||||||
;; routines for parsing the input to the parser generator and producing a
|
|
||||||
;; grammar (See grammar.rkt)
|
|
||||||
|
|
||||||
(require "yacc-helper.rkt"
|
|
||||||
"../private-lex/token-syntax.rkt"
|
|
||||||
"grammar.rkt"
|
|
||||||
mzlib/class
|
|
||||||
racket/contract)
|
|
||||||
(require-for-template mzscheme)
|
|
||||||
|
|
||||||
(define (is-a-grammar%? x) (is-a? x grammar%))
|
|
||||||
(provide/contract
|
|
||||||
(parse-input ((listof identifier?) (listof identifier?) (listof identifier?)
|
|
||||||
(or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?))
|
|
||||||
(get-term-list ((listof identifier?) . -> . (listof identifier?))))
|
|
||||||
|
|
||||||
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
|
|
||||||
|
|
||||||
;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx)))
|
|
||||||
(define (get-args i rhs src-pos term-defs)
|
|
||||||
(let ((empty-table (make-hash-table))
|
|
||||||
(biggest-pos #f))
|
|
||||||
(hash-table-put! empty-table 'error #t)
|
|
||||||
(for-each (lambda (td)
|
|
||||||
(let ((v (syntax-local-value td)))
|
|
||||||
(if (e-terminals-def? v)
|
|
||||||
(for-each (lambda (s)
|
|
||||||
(hash-table-put! empty-table (syntax-object->datum s) #t))
|
|
||||||
(syntax->list (e-terminals-def-t v))))))
|
|
||||||
term-defs)
|
|
||||||
(let ([args
|
|
||||||
(let get-args ((i i)
|
|
||||||
(rhs rhs))
|
|
||||||
(cond
|
|
||||||
((null? rhs) null)
|
|
||||||
(else
|
|
||||||
(let ((b (car rhs))
|
|
||||||
(name (if (hash-table-get empty-table (syntax-object->datum (car rhs)) (lambda () #f))
|
|
||||||
(gensym)
|
|
||||||
(string->symbol (format "$~a" i)))))
|
|
||||||
(cond
|
|
||||||
(src-pos
|
|
||||||
(let ([start-pos-id
|
|
||||||
(datum->syntax-object b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property)]
|
|
||||||
[end-pos-id
|
|
||||||
(datum->syntax-object b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property)])
|
|
||||||
(set! biggest-pos (cons start-pos-id end-pos-id))
|
|
||||||
`(,(datum->syntax-object b name b stx-for-original-property)
|
|
||||||
,start-pos-id
|
|
||||||
,end-pos-id
|
|
||||||
,@(get-args (add1 i) (cdr rhs)))))
|
|
||||||
(else
|
|
||||||
`(,(datum->syntax-object b name b stx-for-original-property)
|
|
||||||
,@(get-args (add1 i) (cdr rhs)))))))))])
|
|
||||||
(values args biggest-pos))))
|
|
||||||
|
|
||||||
;; Given the list of terminal symbols and the precedence/associativity definitions,
|
|
||||||
;; builds terminal structures (See grammar.rkt)
|
|
||||||
;; build-terms: symbol list * symbol list list -> term list
|
|
||||||
(define (build-terms term-list precs)
|
|
||||||
(let ((counter 0)
|
|
||||||
|
|
||||||
;;(term-list (cons (gensym) term-list))
|
|
||||||
|
|
||||||
;; Will map a terminal symbol to its precedence/associativity
|
|
||||||
(prec-table (make-hash-table)))
|
|
||||||
|
|
||||||
;; Fill the prec table
|
|
||||||
(for-each
|
|
||||||
(lambda (p-decl)
|
|
||||||
(begin0
|
|
||||||
(let ((assoc (car p-decl)))
|
|
||||||
(for-each
|
|
||||||
(lambda (term-sym)
|
|
||||||
(hash-table-put! prec-table term-sym (make-prec counter assoc)))
|
|
||||||
(cdr p-decl)))
|
|
||||||
(set! counter (add1 counter))))
|
|
||||||
precs)
|
|
||||||
|
|
||||||
;; Build the terminal structures
|
|
||||||
(map
|
|
||||||
(lambda (term-sym)
|
|
||||||
(make-term term-sym
|
|
||||||
#f
|
|
||||||
(hash-table-get prec-table term-sym (lambda () #f))))
|
|
||||||
term-list)))
|
|
||||||
|
|
||||||
;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt)
|
|
||||||
;; get-terms-from-def: identifier? -> (listof identifier?)
|
|
||||||
(define (get-terms-from-def term-syn)
|
|
||||||
(let ((t (syntax-local-value term-syn (lambda () #f))))
|
|
||||||
(cond
|
|
||||||
((terminals-def? t) (syntax->list (terminals-def-t t)))
|
|
||||||
((e-terminals-def? t) (syntax->list (e-terminals-def-t t)))
|
|
||||||
(else
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-tokens
|
|
||||||
"undefined token group"
|
|
||||||
term-syn)))))
|
|
||||||
|
|
||||||
(define (get-term-list term-group-names)
|
|
||||||
(remove-duplicates
|
|
||||||
(cons (datum->syntax-object #f 'error)
|
|
||||||
(apply append
|
|
||||||
(map get-terms-from-def term-group-names)))))
|
|
||||||
|
|
||||||
(define (parse-input term-defs start ends prec-decls prods src-pos)
|
|
||||||
(let* ((start-syms (map syntax-e start))
|
|
||||||
|
|
||||||
(list-of-terms (map syntax-e (get-term-list term-defs)))
|
|
||||||
|
|
||||||
(end-terms
|
|
||||||
(map
|
|
||||||
(lambda (end)
|
|
||||||
(unless (memq (syntax-e end) list-of-terms)
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-end-tokens
|
|
||||||
(format "End token ~a not defined as a token"
|
|
||||||
(syntax-e end))
|
|
||||||
end))
|
|
||||||
(syntax-e end))
|
|
||||||
ends))
|
|
||||||
|
|
||||||
;; Get the list of terminals out of input-terms
|
|
||||||
|
|
||||||
(list-of-non-terms
|
|
||||||
(syntax-case prods ()
|
|
||||||
(((non-term production ...) ...)
|
|
||||||
(begin
|
|
||||||
(for-each
|
|
||||||
(lambda (nts)
|
|
||||||
(if (memq (syntax-object->datum nts) list-of-terms)
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-non-terminals
|
|
||||||
(format "~a used as both token and non-terminal"
|
|
||||||
(syntax-object->datum nts))
|
|
||||||
nts)))
|
|
||||||
(syntax->list (syntax (non-term ...))))
|
|
||||||
|
|
||||||
(let ((dup (duplicate-list? (syntax-object->datum
|
|
||||||
(syntax (non-term ...))))))
|
|
||||||
(if dup
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-non-terminals
|
|
||||||
(format "non-terminal ~a defined multiple times"
|
|
||||||
dup)
|
|
||||||
prods)))
|
|
||||||
|
|
||||||
(syntax-object->datum (syntax (non-term ...)))))
|
|
||||||
(_
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-grammar
|
|
||||||
"Grammar must be of the form (grammar (non-terminal productions ...) ...)"
|
|
||||||
prods))))
|
|
||||||
|
|
||||||
;; Check the precedence declarations for errors and turn them into data
|
|
||||||
(precs
|
|
||||||
(syntax-case prec-decls ()
|
|
||||||
(((type term ...) ...)
|
|
||||||
(let ((p-terms
|
|
||||||
(syntax-object->datum (syntax (term ... ...)))))
|
|
||||||
(cond
|
|
||||||
((duplicate-list? p-terms) =>
|
|
||||||
(lambda (d)
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-precedences
|
|
||||||
(format "duplicate precedence declaration for token ~a"
|
|
||||||
d)
|
|
||||||
prec-decls)))
|
|
||||||
(else
|
|
||||||
(for-each
|
|
||||||
(lambda (a)
|
|
||||||
(for-each
|
|
||||||
(lambda (t)
|
|
||||||
(if (not (memq (syntax-object->datum t)
|
|
||||||
list-of-terms))
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-precedences
|
|
||||||
(format
|
|
||||||
"Precedence declared for non-token ~a"
|
|
||||||
(syntax-object->datum t))
|
|
||||||
t)))
|
|
||||||
(syntax->list a)))
|
|
||||||
(syntax->list (syntax ((term ...) ...))))
|
|
||||||
(for-each
|
|
||||||
(lambda (type)
|
|
||||||
(if (not (memq (syntax-object->datum type)
|
|
||||||
`(left right nonassoc)))
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-precedences
|
|
||||||
"Associativity must be left, right or nonassoc"
|
|
||||||
type)))
|
|
||||||
(syntax->list (syntax (type ...))))
|
|
||||||
(syntax-object->datum prec-decls)))))
|
|
||||||
(#f null)
|
|
||||||
(_
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-precedences
|
|
||||||
"Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc"
|
|
||||||
prec-decls))))
|
|
||||||
|
|
||||||
(terms (build-terms list-of-terms precs))
|
|
||||||
|
|
||||||
(non-terms (map (lambda (non-term) (make-non-term non-term #f))
|
|
||||||
list-of-non-terms))
|
|
||||||
(term-table (make-hash-table))
|
|
||||||
(non-term-table (make-hash-table)))
|
|
||||||
|
|
||||||
(for-each (lambda (t)
|
|
||||||
(hash-table-put! term-table (gram-sym-symbol t) t))
|
|
||||||
terms)
|
|
||||||
|
|
||||||
(for-each (lambda (nt)
|
|
||||||
(hash-table-put! non-term-table (gram-sym-symbol nt) nt))
|
|
||||||
non-terms)
|
|
||||||
|
|
||||||
(let* (
|
|
||||||
;; parse-prod: syntax-object -> gram-sym vector
|
|
||||||
(parse-prod
|
|
||||||
(lambda (prod-so)
|
|
||||||
(syntax-case prod-so ()
|
|
||||||
((prod-rhs-sym ...)
|
|
||||||
(andmap identifier? (syntax->list prod-so))
|
|
||||||
(begin
|
|
||||||
(for-each (lambda (t)
|
|
||||||
(if (memq (syntax-object->datum t) end-terms)
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-production-rhs
|
|
||||||
(format "~a is an end token and cannot be used in a production"
|
|
||||||
(syntax-object->datum t))
|
|
||||||
t)))
|
|
||||||
(syntax->list prod-so))
|
|
||||||
(list->vector
|
|
||||||
(map (lambda (s)
|
|
||||||
(hash-table-get
|
|
||||||
term-table
|
|
||||||
(syntax-object->datum s)
|
|
||||||
(lambda ()
|
|
||||||
(hash-table-get
|
|
||||||
non-term-table
|
|
||||||
(syntax-object->datum s)
|
|
||||||
(lambda ()
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-production-rhs
|
|
||||||
(format
|
|
||||||
"~a is not declared as a terminal or non-terminal"
|
|
||||||
(syntax-object->datum s))
|
|
||||||
s))))))
|
|
||||||
(syntax->list prod-so)))))
|
|
||||||
(_
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-production-rhs
|
|
||||||
"production right-hand-side must have form (symbol ...)"
|
|
||||||
prod-so)))))
|
|
||||||
|
|
||||||
;; parse-action: syntax-object * syntax-object -> syntax-object
|
|
||||||
(parse-action
|
|
||||||
(lambda (rhs act)
|
|
||||||
(let-values ([(args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs)])
|
|
||||||
(let ([act
|
|
||||||
(if biggest
|
|
||||||
(with-syntax ([$n-start-pos (datum->syntax-object (car biggest) '$n-start-pos)]
|
|
||||||
[$n-end-pos (datum->syntax-object (cdr biggest) '$n-end-pos)])
|
|
||||||
#`(let ([$n-start-pos #,(car biggest)]
|
|
||||||
[$n-end-pos #,(cdr biggest)])
|
|
||||||
#,act))
|
|
||||||
act)])
|
|
||||||
(quasisyntax/loc act
|
|
||||||
(lambda #,args
|
|
||||||
#,act))))))
|
|
||||||
|
|
||||||
;; parse-prod+action: non-term * syntax-object -> production
|
|
||||||
(parse-prod+action
|
|
||||||
(lambda (nt prod-so)
|
|
||||||
(syntax-case prod-so ()
|
|
||||||
((prod-rhs action)
|
|
||||||
(let ((p (parse-prod (syntax prod-rhs))))
|
|
||||||
(make-prod
|
|
||||||
nt
|
|
||||||
p
|
|
||||||
#f
|
|
||||||
(let loop ((i (sub1 (vector-length p))))
|
|
||||||
(if (>= i 0)
|
|
||||||
(let ((gs (vector-ref p i)))
|
|
||||||
(if (term? gs)
|
|
||||||
(term-prec gs)
|
|
||||||
(loop (sub1 i))))
|
|
||||||
#f))
|
|
||||||
(parse-action (syntax prod-rhs) (syntax action)))))
|
|
||||||
((prod-rhs (prec term) action)
|
|
||||||
(identifier? (syntax term))
|
|
||||||
(let ((p (parse-prod (syntax prod-rhs))))
|
|
||||||
(make-prod
|
|
||||||
nt
|
|
||||||
p
|
|
||||||
#f
|
|
||||||
(term-prec
|
|
||||||
(hash-table-get
|
|
||||||
term-table
|
|
||||||
(syntax-object->datum (syntax term))
|
|
||||||
(lambda ()
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-production-rhs
|
|
||||||
(format
|
|
||||||
"unrecognized terminal ~a in precedence declaration"
|
|
||||||
(syntax-object->datum (syntax term)))
|
|
||||||
(syntax term)))))
|
|
||||||
(parse-action (syntax prod-rhs) (syntax action)))))
|
|
||||||
(_
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-production-rhs
|
|
||||||
"production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]"
|
|
||||||
prod-so)))))
|
|
||||||
|
|
||||||
;; parse-prod-for-nt: syntax-object -> production list
|
|
||||||
(parse-prods-for-nt
|
|
||||||
(lambda (prods-so)
|
|
||||||
(syntax-case prods-so ()
|
|
||||||
((nt productions ...)
|
|
||||||
(> (length (syntax->list (syntax (productions ...)))) 0)
|
|
||||||
(let ((nt (hash-table-get non-term-table
|
|
||||||
(syntax-object->datum (syntax nt)))))
|
|
||||||
(map (lambda (p) (parse-prod+action nt p))
|
|
||||||
(syntax->list (syntax (productions ...))))))
|
|
||||||
(_
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-productions
|
|
||||||
"A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side"
|
|
||||||
prods-so))))))
|
|
||||||
|
|
||||||
(for-each
|
|
||||||
(lambda (sstx ssym)
|
|
||||||
(unless (memq ssym list-of-non-terms)
|
|
||||||
(raise-syntax-error
|
|
||||||
'parser-start
|
|
||||||
(format "Start symbol ~a not defined as a non-terminal" ssym)
|
|
||||||
sstx)))
|
|
||||||
start start-syms)
|
|
||||||
|
|
||||||
(let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
|
|
||||||
(end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
|
|
||||||
(parsed-prods (map parse-prods-for-nt (syntax->list prods)))
|
|
||||||
(start-prods
|
|
||||||
(map (lambda (start end-non-term)
|
|
||||||
(list (make-prod start (vector end-non-term) #f #f
|
|
||||||
(syntax (lambda (x) x)))))
|
|
||||||
starts end-non-terms))
|
|
||||||
(prods
|
|
||||||
`(,@start-prods
|
|
||||||
,@(map
|
|
||||||
(lambda (end-nt start-sym)
|
|
||||||
(map
|
|
||||||
(lambda (end)
|
|
||||||
(make-prod end-nt
|
|
||||||
(vector
|
|
||||||
(hash-table-get non-term-table start-sym)
|
|
||||||
(hash-table-get term-table end))
|
|
||||||
#f
|
|
||||||
#f
|
|
||||||
(syntax (lambda (x) x))))
|
|
||||||
end-terms))
|
|
||||||
end-non-terms start-syms)
|
|
||||||
,@parsed-prods)))
|
|
||||||
|
|
||||||
(make-object grammar%
|
|
||||||
prods
|
|
||||||
(map car start-prods)
|
|
||||||
terms
|
|
||||||
(append starts (append end-non-terms non-terms))
|
|
||||||
(map (lambda (term-name)
|
|
||||||
(hash-table-get term-table term-name))
|
|
||||||
end-terms)))))))
|
|
@ -1,277 +0,0 @@
|
|||||||
(module lalr mzscheme
|
|
||||||
|
|
||||||
;; Compute LALR lookaheads from DeRemer and Pennello 1982
|
|
||||||
|
|
||||||
(require "lr0.rkt"
|
|
||||||
"grammar.rkt"
|
|
||||||
mzlib/list
|
|
||||||
mzlib/class)
|
|
||||||
|
|
||||||
(provide compute-LA)
|
|
||||||
|
|
||||||
;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set)
|
|
||||||
;; computes for each state, non-term transition pair, the terminals
|
|
||||||
;; which can transition out of the resulting state
|
|
||||||
;; output term set is represented in bit-vector form
|
|
||||||
(define (compute-DR a g)
|
|
||||||
(lambda (tk)
|
|
||||||
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))))
|
|
||||||
(term-list->bit-vector
|
|
||||||
(filter
|
|
||||||
(lambda (term)
|
|
||||||
(send a run-automaton r term))
|
|
||||||
(send g get-terms))))))
|
|
||||||
|
|
||||||
;; compute-reads:
|
|
||||||
;; LR0-automaton * grammar -> (trans-key -> trans-key list)
|
|
||||||
(define (compute-reads a g)
|
|
||||||
(let ((nullable-non-terms
|
|
||||||
(filter (lambda (nt) (send g nullable-non-term? nt))
|
|
||||||
(send g get-non-terms))))
|
|
||||||
(lambda (tk)
|
|
||||||
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))))
|
|
||||||
(map (lambda (x) (make-trans-key r x))
|
|
||||||
(filter (lambda (non-term) (send a run-automaton r non-term))
|
|
||||||
nullable-non-terms))))))
|
|
||||||
|
|
||||||
;; compute-read: LR0-automaton * grammar -> (trans-key -> term set)
|
|
||||||
;; output term set is represented in bit-vector form
|
|
||||||
(define (compute-read a g)
|
|
||||||
(let* ((dr (compute-DR a g))
|
|
||||||
(reads (compute-reads a g)))
|
|
||||||
(digraph-tk->terml (send a get-mapped-non-term-keys)
|
|
||||||
reads
|
|
||||||
dr
|
|
||||||
(send a get-num-states))))
|
|
||||||
;; returns the list of all k such that state k transitions to state start on the
|
|
||||||
;; transitions in rhs (in order)
|
|
||||||
(define (run-lr0-backward a rhs dot-pos start num-states)
|
|
||||||
(let loop ((states (list start))
|
|
||||||
(i (sub1 dot-pos)))
|
|
||||||
(cond
|
|
||||||
((< i 0) states)
|
|
||||||
(else (loop (send a run-automaton-back states (vector-ref rhs i))
|
|
||||||
(sub1 i))))))
|
|
||||||
|
|
||||||
;; prod->items-for-include: grammar * prod * non-term -> lr0-item list
|
|
||||||
;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma)
|
|
||||||
;; and gamma =>* epsilon
|
|
||||||
(define (prod->items-for-include g prod nt)
|
|
||||||
(let* ((rhs (prod-rhs prod))
|
|
||||||
(rhs-l (vector-length rhs)))
|
|
||||||
(append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l))))
|
|
||||||
(list (make-item prod (sub1 rhs-l)))
|
|
||||||
null)
|
|
||||||
(let loop ((i (sub1 rhs-l)))
|
|
||||||
(cond
|
|
||||||
((and (> i 0)
|
|
||||||
(non-term? (vector-ref rhs i))
|
|
||||||
(send g nullable-non-term? (vector-ref rhs i)))
|
|
||||||
(if (eq? nt (vector-ref rhs (sub1 i)))
|
|
||||||
(cons (make-item prod (sub1 i))
|
|
||||||
(loop (sub1 i)))
|
|
||||||
(loop (sub1 i))))
|
|
||||||
(else null))))))
|
|
||||||
|
|
||||||
;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list
|
|
||||||
;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list
|
|
||||||
;; and gamma =>* epsilon
|
|
||||||
(define (prod-list->items-for-include g prod-list nt)
|
|
||||||
(apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list)))
|
|
||||||
|
|
||||||
;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list)
|
|
||||||
(define (compute-includes a g)
|
|
||||||
(let ((num-states (send a get-num-states))
|
|
||||||
(items-for-input-nt (make-vector (send g get-num-non-terms) null)))
|
|
||||||
(for-each
|
|
||||||
(lambda (input-nt)
|
|
||||||
(vector-set! items-for-input-nt (non-term-index input-nt)
|
|
||||||
(prod-list->items-for-include g (send g get-prods) input-nt)))
|
|
||||||
(send g get-non-terms))
|
|
||||||
(lambda (tk)
|
|
||||||
(let* ((goal-state (trans-key-st tk))
|
|
||||||
(non-term (trans-key-gs tk))
|
|
||||||
(items (vector-ref items-for-input-nt (non-term-index non-term))))
|
|
||||||
(trans-key-list-remove-dups
|
|
||||||
(apply append
|
|
||||||
(map (lambda (item)
|
|
||||||
(let* ((prod (item-prod item))
|
|
||||||
(rhs (prod-rhs prod))
|
|
||||||
(lhs (prod-lhs prod)))
|
|
||||||
(map (lambda (state)
|
|
||||||
(make-trans-key state lhs))
|
|
||||||
(run-lr0-backward a
|
|
||||||
rhs
|
|
||||||
(item-dot-pos item)
|
|
||||||
goal-state
|
|
||||||
num-states))))
|
|
||||||
items)))))))
|
|
||||||
|
|
||||||
;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list)
|
|
||||||
(define (compute-lookback a g)
|
|
||||||
(let ((num-states (send a get-num-states)))
|
|
||||||
(lambda (state prod)
|
|
||||||
(map (lambda (k) (make-trans-key k (prod-lhs prod)))
|
|
||||||
(run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states)))))
|
|
||||||
|
|
||||||
;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set)
|
|
||||||
;; output term set is represented in bit-vector form
|
|
||||||
(define (compute-follow a g includes)
|
|
||||||
(let ((read (compute-read a g)))
|
|
||||||
(digraph-tk->terml (send a get-mapped-non-term-keys)
|
|
||||||
includes
|
|
||||||
read
|
|
||||||
(send a get-num-states))))
|
|
||||||
|
|
||||||
;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set
|
|
||||||
;; output term set is represented in bit-vector form
|
|
||||||
(define (compute-LA a g)
|
|
||||||
(let* ((includes (compute-includes a g))
|
|
||||||
(lookback (compute-lookback a g))
|
|
||||||
(follow (compute-follow a g includes)))
|
|
||||||
(lambda (k p)
|
|
||||||
(let* ((l (lookback k p))
|
|
||||||
(f (map follow l)))
|
|
||||||
(apply bitwise-ior (cons 0 f))))))
|
|
||||||
|
|
||||||
(define (print-DR dr a g)
|
|
||||||
(print-input-st-sym dr "DR" a g print-output-terms))
|
|
||||||
(define (print-Read Read a g)
|
|
||||||
(print-input-st-sym Read "Read" a g print-output-terms))
|
|
||||||
(define (print-includes i a g)
|
|
||||||
(print-input-st-sym i "includes" a g print-output-st-nt))
|
|
||||||
(define (print-lookback l a g)
|
|
||||||
(print-input-st-prod l "lookback" a g print-output-st-nt))
|
|
||||||
(define (print-follow f a g)
|
|
||||||
(print-input-st-sym f "follow" a g print-output-terms))
|
|
||||||
(define (print-LA l a g)
|
|
||||||
(print-input-st-prod l "LA" a g print-output-terms))
|
|
||||||
|
|
||||||
(define (print-input-st-sym f name a g print-output)
|
|
||||||
(printf "~a:\n" name)
|
|
||||||
(send a for-each-state
|
|
||||||
(lambda (state)
|
|
||||||
(for-each
|
|
||||||
(lambda (non-term)
|
|
||||||
(let ((res (f (make-trans-key state non-term))))
|
|
||||||
(if (not (null? res))
|
|
||||||
(printf "~a(~a, ~a) = ~a\n"
|
|
||||||
name
|
|
||||||
state
|
|
||||||
(gram-sym-symbol non-term)
|
|
||||||
(print-output res)))))
|
|
||||||
(send g get-non-terms))))
|
|
||||||
(newline))
|
|
||||||
|
|
||||||
(define (print-input-st-prod f name a g print-output)
|
|
||||||
(printf "~a:\n" name)
|
|
||||||
(send a for-each-state
|
|
||||||
(lambda (state)
|
|
||||||
(for-each
|
|
||||||
(lambda (non-term)
|
|
||||||
(for-each
|
|
||||||
(lambda (prod)
|
|
||||||
(let ((res (f state prod)))
|
|
||||||
(if (not (null? res))
|
|
||||||
(printf "~a(~a, ~a) = ~a\n"
|
|
||||||
name
|
|
||||||
(kernel-index state)
|
|
||||||
(prod-index prod)
|
|
||||||
(print-output res)))))
|
|
||||||
(send g get-prods-for-non-term non-term)))
|
|
||||||
(send g get-non-terms)))))
|
|
||||||
|
|
||||||
(define (print-output-terms r)
|
|
||||||
(map
|
|
||||||
(lambda (p)
|
|
||||||
(gram-sym-symbol p))
|
|
||||||
r))
|
|
||||||
|
|
||||||
(define (print-output-st-nt r)
|
|
||||||
(map
|
|
||||||
(lambda (p)
|
|
||||||
(list
|
|
||||||
(kernel-index (trans-key-st p))
|
|
||||||
(gram-sym-symbol (trans-key-gs p))))
|
|
||||||
r))
|
|
||||||
|
|
||||||
;; init-tk-map : int -> (vectorof hashtable?)
|
|
||||||
(define (init-tk-map n)
|
|
||||||
(let ((v (make-vector n #f)))
|
|
||||||
(let loop ((i (sub1 (vector-length v))))
|
|
||||||
(when (>= i 0)
|
|
||||||
(vector-set! v i (make-hash-table))
|
|
||||||
(loop (sub1 i))))
|
|
||||||
v))
|
|
||||||
|
|
||||||
;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int
|
|
||||||
(define (lookup-tk-map map)
|
|
||||||
(lambda (tk)
|
|
||||||
(let ((st (trans-key-st tk))
|
|
||||||
(gs (trans-key-gs tk)))
|
|
||||||
(hash-table-get (vector-ref map (kernel-index st))
|
|
||||||
(gram-sym-symbol gs)
|
|
||||||
(lambda () 0)))))
|
|
||||||
|
|
||||||
;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int ->
|
|
||||||
(define (add-tk-map map)
|
|
||||||
(lambda (tk v)
|
|
||||||
(let ((st (trans-key-st tk))
|
|
||||||
(gs (trans-key-gs tk)))
|
|
||||||
(hash-table-put! (vector-ref map (kernel-index st))
|
|
||||||
(gram-sym-symbol gs)
|
|
||||||
v))))
|
|
||||||
|
|
||||||
;; digraph-tk->terml:
|
|
||||||
;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int
|
|
||||||
;; -> (trans-key -> term list)
|
|
||||||
;; DeRemer and Pennello 1982
|
|
||||||
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
|
|
||||||
;; A specialization of digraph in the file graph.rkt
|
|
||||||
(define (digraph-tk->terml nodes edges f- num-states)
|
|
||||||
(letrec [
|
|
||||||
;; Will map elements of trans-key to term sets represented as bit vectors
|
|
||||||
(results (init-tk-map num-states))
|
|
||||||
|
|
||||||
;; Maps elements of trans-keys to integers.
|
|
||||||
(N (init-tk-map num-states))
|
|
||||||
|
|
||||||
(get-N (lookup-tk-map N))
|
|
||||||
(set-N (add-tk-map N))
|
|
||||||
(get-f (lookup-tk-map results))
|
|
||||||
(set-f (add-tk-map results))
|
|
||||||
|
|
||||||
(stack null)
|
|
||||||
(push (lambda (x)
|
|
||||||
(set! stack (cons x stack))))
|
|
||||||
(pop (lambda ()
|
|
||||||
(begin0
|
|
||||||
(car stack)
|
|
||||||
(set! stack (cdr stack)))))
|
|
||||||
(depth (lambda () (length stack)))
|
|
||||||
|
|
||||||
;; traverse: 'a ->
|
|
||||||
(traverse
|
|
||||||
(lambda (x)
|
|
||||||
(push x)
|
|
||||||
(let ((d (depth)))
|
|
||||||
(set-N x d)
|
|
||||||
(set-f x (f- x))
|
|
||||||
(for-each (lambda (y)
|
|
||||||
(when (= 0 (get-N y))
|
|
||||||
(traverse y))
|
|
||||||
(set-f x (bitwise-ior (get-f x) (get-f y)))
|
|
||||||
(set-N x (min (get-N x) (get-N y))))
|
|
||||||
(edges x))
|
|
||||||
(when (= d (get-N x))
|
|
||||||
(let loop ((p (pop)))
|
|
||||||
(set-N p +inf.0)
|
|
||||||
(set-f p (get-f x))
|
|
||||||
(unless (equal? x p)
|
|
||||||
(loop (pop))))))))]
|
|
||||||
(for-each (lambda (x)
|
|
||||||
(when (= 0 (get-N x))
|
|
||||||
(traverse x)))
|
|
||||||
nodes)
|
|
||||||
get-f))
|
|
||||||
)
|
|
@ -1,372 +0,0 @@
|
|||||||
(module lr0 mzscheme
|
|
||||||
|
|
||||||
;; Handle the LR0 automaton
|
|
||||||
|
|
||||||
(require "grammar.rkt"
|
|
||||||
"graph.rkt"
|
|
||||||
mzlib/list
|
|
||||||
mzlib/class)
|
|
||||||
|
|
||||||
(provide build-lr0-automaton lr0%
|
|
||||||
(struct trans-key (st gs)) trans-key-list-remove-dups
|
|
||||||
kernel-items kernel-index)
|
|
||||||
|
|
||||||
;; kernel = (make-kernel (LR1-item list) index)
|
|
||||||
;; the list must be kept sorted according to item<? so that equal? can
|
|
||||||
;; be used to compare kernels
|
|
||||||
;; Each kernel is assigned a unique index, 0 <= index < number of states
|
|
||||||
;; trans-key = (make-trans-key kernel gram-sym)
|
|
||||||
(define-struct kernel (items index) (make-inspector))
|
|
||||||
(define-struct trans-key (st gs) (make-inspector))
|
|
||||||
|
|
||||||
(define (trans-key<? a b)
|
|
||||||
(let ((kia (kernel-index (trans-key-st a)))
|
|
||||||
(kib (kernel-index (trans-key-st b))))
|
|
||||||
(or (< kia kib)
|
|
||||||
(and (= kia kib)
|
|
||||||
(< (non-term-index (trans-key-gs a))
|
|
||||||
(non-term-index (trans-key-gs b)))))))
|
|
||||||
|
|
||||||
(define (trans-key-list-remove-dups tkl)
|
|
||||||
(let loop ((sorted (sort tkl trans-key<?)))
|
|
||||||
(cond
|
|
||||||
((null? sorted) null)
|
|
||||||
((null? (cdr sorted)) sorted)
|
|
||||||
(else
|
|
||||||
(if (and (= (non-term-index (trans-key-gs (car sorted)))
|
|
||||||
(non-term-index (trans-key-gs (cadr sorted))))
|
|
||||||
(= (kernel-index (trans-key-st (car sorted)))
|
|
||||||
(kernel-index (trans-key-st (cadr sorted)))))
|
|
||||||
(loop (cdr sorted))
|
|
||||||
(cons (car sorted) (loop (cdr sorted))))))))
|
|
||||||
|
|
||||||
|
|
||||||
;; build-transition-table : int (listof (cons/c trans-key X) ->
|
|
||||||
;; (vectorof (symbol X hashtable))
|
|
||||||
(define (build-transition-table num-states assoc)
|
|
||||||
(let ((transitions (make-vector num-states #f)))
|
|
||||||
(let loop ((i (sub1 (vector-length transitions))))
|
|
||||||
(when (>= i 0)
|
|
||||||
(vector-set! transitions i (make-hash-table))
|
|
||||||
(loop (sub1 i))))
|
|
||||||
(for-each
|
|
||||||
(lambda (trans-key/kernel)
|
|
||||||
(let ((tk (car trans-key/kernel)))
|
|
||||||
(hash-table-put! (vector-ref transitions (kernel-index (trans-key-st tk)))
|
|
||||||
(gram-sym-symbol (trans-key-gs tk))
|
|
||||||
(cdr trans-key/kernel))))
|
|
||||||
assoc)
|
|
||||||
transitions))
|
|
||||||
|
|
||||||
;; reverse-assoc : (listof (cons/c trans-key? kernel?)) ->
|
|
||||||
;; (listof (cons/c trans-key? (listof kernel?)))
|
|
||||||
(define (reverse-assoc assoc)
|
|
||||||
(let ((reverse-hash (make-hash-table 'equal))
|
|
||||||
(hash-table-add!
|
|
||||||
(lambda (ht k v)
|
|
||||||
(hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null)))))))
|
|
||||||
(for-each
|
|
||||||
(lambda (trans-key/kernel)
|
|
||||||
(let ((tk (car trans-key/kernel)))
|
|
||||||
(hash-table-add! reverse-hash
|
|
||||||
(make-trans-key (cdr trans-key/kernel)
|
|
||||||
(trans-key-gs tk))
|
|
||||||
(trans-key-st tk))))
|
|
||||||
assoc)
|
|
||||||
(hash-table-map reverse-hash cons)))
|
|
||||||
|
|
||||||
|
|
||||||
;; kernel-list-remove-duplicates
|
|
||||||
;; LR0-automaton = object of class lr0%
|
|
||||||
(define lr0%
|
|
||||||
(class object%
|
|
||||||
(super-instantiate ())
|
|
||||||
;; term-assoc : (listof (cons/c trans-key? kernel?))
|
|
||||||
;; non-term-assoc : (listof (cons/c trans-key? kernel?))
|
|
||||||
;; states : (vectorof kernel?)
|
|
||||||
;; epsilons : ???
|
|
||||||
(init-field term-assoc non-term-assoc states epsilons)
|
|
||||||
|
|
||||||
(define transitions (build-transition-table (vector-length states)
|
|
||||||
(append term-assoc non-term-assoc)))
|
|
||||||
|
|
||||||
(define reverse-term-assoc (reverse-assoc term-assoc))
|
|
||||||
(define reverse-non-term-assoc (reverse-assoc non-term-assoc))
|
|
||||||
(define reverse-transitions
|
|
||||||
(build-transition-table (vector-length states)
|
|
||||||
(append reverse-term-assoc reverse-non-term-assoc)))
|
|
||||||
|
|
||||||
(define mapped-non-terms (map car non-term-assoc))
|
|
||||||
|
|
||||||
(define/public (get-mapped-non-term-keys)
|
|
||||||
mapped-non-terms)
|
|
||||||
|
|
||||||
(define/public (get-num-states)
|
|
||||||
(vector-length states))
|
|
||||||
|
|
||||||
(define/public (get-epsilon-trans)
|
|
||||||
epsilons)
|
|
||||||
|
|
||||||
(define/public (get-transitions)
|
|
||||||
(append term-assoc non-term-assoc))
|
|
||||||
|
|
||||||
;; for-each-state : (state ->) ->
|
|
||||||
;; Iteration over the states in an automaton
|
|
||||||
(define/public (for-each-state f)
|
|
||||||
(let ((num-states (vector-length states)))
|
|
||||||
(let loop ((i 0))
|
|
||||||
(if (< i num-states)
|
|
||||||
(begin
|
|
||||||
(f (vector-ref states i))
|
|
||||||
(loop (add1 i)))))))
|
|
||||||
|
|
||||||
;; run-automaton: kernel? gram-sym? -> (union kernel #f)
|
|
||||||
;; returns the state reached from state k on input s, or #f when k
|
|
||||||
;; has no transition on s
|
|
||||||
(define/public (run-automaton k s)
|
|
||||||
(hash-table-get (vector-ref transitions (kernel-index k))
|
|
||||||
(gram-sym-symbol s)
|
|
||||||
(lambda () #f)))
|
|
||||||
|
|
||||||
;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel)
|
|
||||||
;; returns the list of states that can reach k by transitioning on s.
|
|
||||||
(define/public (run-automaton-back k s)
|
|
||||||
(apply append
|
|
||||||
(map
|
|
||||||
(lambda (k)
|
|
||||||
(hash-table-get (vector-ref reverse-transitions (kernel-index k))
|
|
||||||
(gram-sym-symbol s)
|
|
||||||
(lambda () null)))
|
|
||||||
k)))))
|
|
||||||
|
|
||||||
(define (union comp<?)
|
|
||||||
(letrec ((union
|
|
||||||
(lambda (l1 l2)
|
|
||||||
(cond
|
|
||||||
((null? l1) l2)
|
|
||||||
((null? l2) l1)
|
|
||||||
(else (let ((c1 (car l1))
|
|
||||||
(c2 (car l2)))
|
|
||||||
(cond
|
|
||||||
((comp<? c1 c2)
|
|
||||||
(cons c1 (union (cdr l1) l2)))
|
|
||||||
((comp<? c2 c1)
|
|
||||||
(cons c2 (union l1 (cdr l2))))
|
|
||||||
(else (union (cdr l1) l2)))))))))
|
|
||||||
union))
|
|
||||||
|
|
||||||
|
|
||||||
;; The kernels in the automaton are represented cannonically.
|
|
||||||
;; That is (equal? a b) <=> (eq? a b)
|
|
||||||
(define (kernel->string k)
|
|
||||||
(apply string-append
|
|
||||||
`("{" ,@(map (lambda (i) (string-append (item->string i) ", "))
|
|
||||||
(kernel-items k))
|
|
||||||
"}")))
|
|
||||||
|
|
||||||
;; build-LR0-automaton: grammar -> LR0-automaton
|
|
||||||
;; Constructs the kernels of the sets of LR(0) items of g
|
|
||||||
(define (build-lr0-automaton grammar)
|
|
||||||
; (printf "LR(0) automaton:\n")
|
|
||||||
(letrec (
|
|
||||||
(epsilons (make-hash-table 'equal))
|
|
||||||
(grammar-symbols (append (send grammar get-non-terms)
|
|
||||||
(send grammar get-terms)))
|
|
||||||
;; first-non-term: non-term -> non-term list
|
|
||||||
;; given a non-terminal symbol C, return those non-terminal
|
|
||||||
;; symbols A s.t. C -> An for some string of terminals and
|
|
||||||
;; non-terminals n where -> means a rightmost derivation in many
|
|
||||||
;; steps. Assumes that each non-term can be reduced to a string
|
|
||||||
;; of terms.
|
|
||||||
(first-non-term
|
|
||||||
(digraph (send grammar get-non-terms)
|
|
||||||
(lambda (nt)
|
|
||||||
(filter non-term?
|
|
||||||
(map (lambda (prod)
|
|
||||||
(sym-at-dot (make-item prod 0)))
|
|
||||||
(send grammar get-prods-for-non-term nt))))
|
|
||||||
(lambda (nt) (list nt))
|
|
||||||
(union non-term<?)
|
|
||||||
(lambda () null)))
|
|
||||||
|
|
||||||
;; closure: LR1-item list -> LR1-item list
|
|
||||||
;; Creates a set of items containing i s.t. if A -> n.Xm is in it,
|
|
||||||
;; X -> .o is in it too.
|
|
||||||
(LR0-closure
|
|
||||||
(lambda (i)
|
|
||||||
(cond
|
|
||||||
((null? i) null)
|
|
||||||
(else
|
|
||||||
(let ((next-gsym (sym-at-dot (car i))))
|
|
||||||
(cond
|
|
||||||
((non-term? next-gsym)
|
|
||||||
(cons (car i)
|
|
||||||
(append
|
|
||||||
(apply append
|
|
||||||
(map (lambda (non-term)
|
|
||||||
(map (lambda (x)
|
|
||||||
(make-item x 0))
|
|
||||||
(send grammar
|
|
||||||
get-prods-for-non-term
|
|
||||||
non-term)))
|
|
||||||
(first-non-term next-gsym)))
|
|
||||||
(LR0-closure (cdr i)))))
|
|
||||||
(else
|
|
||||||
(cons (car i) (LR0-closure (cdr i))))))))))
|
|
||||||
|
|
||||||
|
|
||||||
;; maps trans-keys to kernels
|
|
||||||
(automaton-term null)
|
|
||||||
(automaton-non-term null)
|
|
||||||
|
|
||||||
;; keeps the kernels we have seen, so we can have a unique
|
|
||||||
;; list for each kernel
|
|
||||||
(kernels (make-hash-table 'equal))
|
|
||||||
|
|
||||||
(counter 0)
|
|
||||||
|
|
||||||
;; goto: LR1-item list -> LR1-item list list
|
|
||||||
;; creates new kernels by moving the dot in each item in the
|
|
||||||
;; LR0-closure of kernel to the right, and grouping them by
|
|
||||||
;; the term/non-term moved over. Returns the kernels not
|
|
||||||
;; yet seen, and places the trans-keys into automaton
|
|
||||||
(goto
|
|
||||||
(lambda (kernel)
|
|
||||||
(let (
|
|
||||||
;; maps a gram-syms to a list of items
|
|
||||||
(table (make-hash-table))
|
|
||||||
|
|
||||||
;; add-item!:
|
|
||||||
;; (symbol (listof item) hashtable) item? ->
|
|
||||||
;; adds i into the table grouped with the grammar
|
|
||||||
;; symbol following its dot
|
|
||||||
(add-item!
|
|
||||||
(lambda (table i)
|
|
||||||
(let ((gs (sym-at-dot i)))
|
|
||||||
(cond
|
|
||||||
(gs
|
|
||||||
(let ((already
|
|
||||||
(hash-table-get table
|
|
||||||
(gram-sym-symbol gs)
|
|
||||||
(lambda () null))))
|
|
||||||
(unless (member i already)
|
|
||||||
(hash-table-put! table
|
|
||||||
(gram-sym-symbol gs)
|
|
||||||
(cons i already)))))
|
|
||||||
((= 0 (vector-length (prod-rhs (item-prod i))))
|
|
||||||
(let ((current (hash-table-get epsilons
|
|
||||||
kernel
|
|
||||||
(lambda () null))))
|
|
||||||
(hash-table-put! epsilons
|
|
||||||
kernel
|
|
||||||
(cons i current)))))))))
|
|
||||||
|
|
||||||
;; Group the items of the LR0 closure of the kernel
|
|
||||||
;; by the character after the dot
|
|
||||||
(for-each (lambda (item)
|
|
||||||
(add-item! table item))
|
|
||||||
(LR0-closure (kernel-items kernel)))
|
|
||||||
|
|
||||||
;; each group is a new kernel, with the dot advanced.
|
|
||||||
;; sorts the items in a kernel so kernels can be compared
|
|
||||||
;; with equal? for using the table kernels to make sure
|
|
||||||
;; only one representitive of each kernel is created
|
|
||||||
(filter
|
|
||||||
(lambda (x) x)
|
|
||||||
(map
|
|
||||||
(lambda (i)
|
|
||||||
(let* ((gs (car i))
|
|
||||||
(items (cadr i))
|
|
||||||
(new #f)
|
|
||||||
(new-kernel (sort
|
|
||||||
(filter (lambda (x) x)
|
|
||||||
(map move-dot-right items))
|
|
||||||
item<?))
|
|
||||||
(unique-kernel (hash-table-get
|
|
||||||
kernels
|
|
||||||
new-kernel
|
|
||||||
(lambda ()
|
|
||||||
(let ((k (make-kernel
|
|
||||||
new-kernel
|
|
||||||
counter)))
|
|
||||||
(set! new #t)
|
|
||||||
(set! counter (add1 counter))
|
|
||||||
(hash-table-put! kernels
|
|
||||||
new-kernel
|
|
||||||
k)
|
|
||||||
k)))))
|
|
||||||
(cond
|
|
||||||
((term? gs)
|
|
||||||
(set! automaton-term (cons (cons (make-trans-key kernel gs)
|
|
||||||
unique-kernel)
|
|
||||||
automaton-term)))
|
|
||||||
(else
|
|
||||||
(set! automaton-non-term (cons (cons (make-trans-key kernel gs)
|
|
||||||
unique-kernel)
|
|
||||||
automaton-non-term))))
|
|
||||||
#;(printf "~a -> ~a on ~a\n"
|
|
||||||
(kernel->string kernel)
|
|
||||||
(kernel->string unique-kernel)
|
|
||||||
(gram-sym-symbol gs))
|
|
||||||
(if new
|
|
||||||
unique-kernel
|
|
||||||
#f)))
|
|
||||||
(let loop ((gsyms grammar-symbols))
|
|
||||||
(cond
|
|
||||||
((null? gsyms) null)
|
|
||||||
(else
|
|
||||||
(let ((items (hash-table-get table
|
|
||||||
(gram-sym-symbol (car gsyms))
|
|
||||||
(lambda () null))))
|
|
||||||
(cond
|
|
||||||
((null? items) (loop (cdr gsyms)))
|
|
||||||
(else
|
|
||||||
(cons (list (car gsyms) items)
|
|
||||||
(loop (cdr gsyms))))))))))))))
|
|
||||||
|
|
||||||
(starts
|
|
||||||
(map (lambda (init-prod) (list (make-item init-prod 0)))
|
|
||||||
(send grammar get-init-prods)))
|
|
||||||
(startk
|
|
||||||
(map (lambda (start)
|
|
||||||
(let ((k (make-kernel start counter)))
|
|
||||||
(hash-table-put! kernels start k)
|
|
||||||
(set! counter (add1 counter))
|
|
||||||
k))
|
|
||||||
starts))
|
|
||||||
(new-kernels (make-queue)))
|
|
||||||
|
|
||||||
(let loop ((old-kernels startk)
|
|
||||||
(seen-kernels null))
|
|
||||||
(cond
|
|
||||||
((and (empty-queue? new-kernels) (null? old-kernels))
|
|
||||||
(make-object lr0%
|
|
||||||
automaton-term
|
|
||||||
automaton-non-term
|
|
||||||
(list->vector (reverse seen-kernels))
|
|
||||||
epsilons))
|
|
||||||
((null? old-kernels)
|
|
||||||
(loop (deq! new-kernels) seen-kernels))
|
|
||||||
(else
|
|
||||||
(enq! new-kernels (goto (car old-kernels)))
|
|
||||||
(loop (cdr old-kernels) (cons (car old-kernels) seen-kernels)))))))
|
|
||||||
|
|
||||||
(define-struct q (f l) (make-inspector))
|
|
||||||
(define (empty-queue? q)
|
|
||||||
(null? (q-f q)))
|
|
||||||
(define (make-queue)
|
|
||||||
(make-q null null))
|
|
||||||
(define (enq! q i)
|
|
||||||
(if (empty-queue? q)
|
|
||||||
(let ((i (mcons i null)))
|
|
||||||
(set-q-l! q i)
|
|
||||||
(set-q-f! q i))
|
|
||||||
(begin
|
|
||||||
(set-mcdr! (q-l q) (mcons i null))
|
|
||||||
(set-q-l! q (mcdr (q-l q))))))
|
|
||||||
(define (deq! q)
|
|
||||||
(begin0
|
|
||||||
(mcar (q-f q))
|
|
||||||
(set-q-f! q (mcdr (q-f q)))))
|
|
||||||
|
|
||||||
)
|
|
@ -1,54 +0,0 @@
|
|||||||
(module parser-actions mzscheme
|
|
||||||
(require "grammar.rkt")
|
|
||||||
(provide (all-defined-except make-reduce make-reduce*)
|
|
||||||
(rename make-reduce* make-reduce))
|
|
||||||
|
|
||||||
;; An action is
|
|
||||||
;; - (make-shift int)
|
|
||||||
;; - (make-reduce prod runtime-action)
|
|
||||||
;; - (make-accept)
|
|
||||||
;; - (make-goto int)
|
|
||||||
;; - (no-action)
|
|
||||||
;; A reduce contains a runtime-reduce so that sharing of the reduces can
|
|
||||||
;; be easily transferred to sharing of runtime-reduces.
|
|
||||||
|
|
||||||
(define-struct action () (make-inspector))
|
|
||||||
(define-struct (shift action) (state) (make-inspector))
|
|
||||||
(define-struct (reduce action) (prod runtime-reduce) (make-inspector))
|
|
||||||
(define-struct (accept action) () (make-inspector))
|
|
||||||
(define-struct (goto action) (state) (make-inspector))
|
|
||||||
(define-struct (no-action action) () (make-inspector))
|
|
||||||
|
|
||||||
(define (make-reduce* p)
|
|
||||||
(make-reduce p
|
|
||||||
(vector (prod-index p)
|
|
||||||
(gram-sym-symbol (prod-lhs p))
|
|
||||||
(vector-length (prod-rhs p)))))
|
|
||||||
|
|
||||||
;; A runtime-action is
|
|
||||||
;; non-negative-int (shift)
|
|
||||||
;; (vector int symbol int) (reduce)
|
|
||||||
;; 'accept (accept)
|
|
||||||
;; negative-int (goto)
|
|
||||||
;; #f (no-action)
|
|
||||||
|
|
||||||
(define (action->runtime-action a)
|
|
||||||
(cond
|
|
||||||
((shift? a) (shift-state a))
|
|
||||||
((reduce? a) (reduce-runtime-reduce a))
|
|
||||||
((accept? a) 'accept)
|
|
||||||
((goto? a) (- (+ (goto-state a) 1)))
|
|
||||||
((no-action? a) #f)))
|
|
||||||
|
|
||||||
(define (runtime-shift? x) (and (integer? x) (>= x 0)))
|
|
||||||
(define runtime-reduce? vector?)
|
|
||||||
(define (runtime-accept? x) (eq? x 'accept))
|
|
||||||
(define (runtime-goto? x) (and (integer? x) (< x 0)))
|
|
||||||
|
|
||||||
(define runtime-shift-state values)
|
|
||||||
(define (runtime-reduce-prod-num x) (vector-ref x 0))
|
|
||||||
(define (runtime-reduce-lhs x) (vector-ref x 1))
|
|
||||||
(define (runtime-reduce-rhs-length x) (vector-ref x 2))
|
|
||||||
(define (runtime-goto-state x) (- (+ x 1)))
|
|
||||||
|
|
||||||
)
|
|
@ -1,113 +0,0 @@
|
|||||||
(module parser-builder mzscheme
|
|
||||||
|
|
||||||
(require "input-file-parser.rkt"
|
|
||||||
"grammar.rkt"
|
|
||||||
"table.rkt"
|
|
||||||
mzlib/class
|
|
||||||
racket/contract)
|
|
||||||
(require-for-template mzscheme)
|
|
||||||
|
|
||||||
(provide/contract
|
|
||||||
(build-parser (-> string? any/c any/c
|
|
||||||
(listof identifier?)
|
|
||||||
(listof identifier?)
|
|
||||||
(listof identifier?)
|
|
||||||
(or/c syntax? #f)
|
|
||||||
syntax?
|
|
||||||
(values any/c any/c any/c any/c))))
|
|
||||||
|
|
||||||
;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?)
|
|
||||||
;; (union syntax? false/c) syntax?) -> syntax?
|
|
||||||
(define (fix-check-syntax input-terms start ends assocs prods)
|
|
||||||
(let* ((term-binders (get-term-list input-terms))
|
|
||||||
(get-term-binder
|
|
||||||
(let ((t (make-hash-table)))
|
|
||||||
(for-each
|
|
||||||
(lambda (term)
|
|
||||||
(hash-table-put! t (syntax-e term) term))
|
|
||||||
term-binders)
|
|
||||||
(lambda (x)
|
|
||||||
(let ((r (hash-table-get t (syntax-e x) (lambda () #f))))
|
|
||||||
(if r
|
|
||||||
(syntax-local-introduce (datum->syntax-object r (syntax-e x) x x))
|
|
||||||
x)))))
|
|
||||||
(rhs-list
|
|
||||||
(syntax-case prods ()
|
|
||||||
(((_ rhs ...) ...)
|
|
||||||
(syntax->list (syntax (rhs ... ...)))))))
|
|
||||||
(with-syntax (((tmp ...) (map syntax-local-introduce term-binders))
|
|
||||||
((term-group ...)
|
|
||||||
(map (lambda (tg)
|
|
||||||
(syntax-property
|
|
||||||
(datum->syntax-object tg #f)
|
|
||||||
'disappeared-use
|
|
||||||
tg))
|
|
||||||
input-terms))
|
|
||||||
((end ...)
|
|
||||||
(map get-term-binder ends))
|
|
||||||
((start ...)
|
|
||||||
(map get-term-binder start))
|
|
||||||
((bind ...)
|
|
||||||
(syntax-case prods ()
|
|
||||||
(((bind _ ...) ...)
|
|
||||||
(syntax->list (syntax (bind ...))))))
|
|
||||||
(((bound ...) ...)
|
|
||||||
(map
|
|
||||||
(lambda (rhs)
|
|
||||||
(syntax-case rhs ()
|
|
||||||
(((bound ...) (_ pbound) __)
|
|
||||||
(map get-term-binder
|
|
||||||
(cons (syntax pbound)
|
|
||||||
(syntax->list (syntax (bound ...))))))
|
|
||||||
(((bound ...) _)
|
|
||||||
(map get-term-binder
|
|
||||||
(syntax->list (syntax (bound ...)))))))
|
|
||||||
rhs-list))
|
|
||||||
((prec ...)
|
|
||||||
(if assocs
|
|
||||||
(map get-term-binder
|
|
||||||
(syntax-case assocs ()
|
|
||||||
(((__ term ...) ...)
|
|
||||||
(syntax->list (syntax (term ... ...))))))
|
|
||||||
null)))
|
|
||||||
#`(when #f
|
|
||||||
(let ((bind void) ... (tmp void) ...)
|
|
||||||
(void bound ... ... term-group ... start ... end ... prec ...))))))
|
|
||||||
(require mzlib/list "parser-actions.rkt")
|
|
||||||
(define (build-parser filename src-pos suppress input-terms start end assocs prods)
|
|
||||||
(let* ((grammar (parse-input input-terms start end assocs prods src-pos))
|
|
||||||
(table (build-table grammar filename suppress))
|
|
||||||
(all-tokens (make-hash-table))
|
|
||||||
(actions-code
|
|
||||||
`(vector ,@(map prod-action (send grammar get-prods)))))
|
|
||||||
(for-each (lambda (term)
|
|
||||||
(hash-table-put! all-tokens (gram-sym-symbol term) #t))
|
|
||||||
(send grammar get-terms))
|
|
||||||
#;(let ((num-states (vector-length table))
|
|
||||||
(num-gram-syms (+ (send grammar get-num-terms)
|
|
||||||
(send grammar get-num-non-terms)))
|
|
||||||
(num-ht-entries (apply + (map length (vector->list table))))
|
|
||||||
(num-reduces
|
|
||||||
(let ((ht (make-hash-table)))
|
|
||||||
(for-each
|
|
||||||
(lambda (x)
|
|
||||||
(when (reduce? x)
|
|
||||||
(hash-table-put! ht x #t)))
|
|
||||||
(map cdr (apply append (vector->list table))))
|
|
||||||
(length (hash-table-map ht void)))))
|
|
||||||
(printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n"
|
|
||||||
num-states num-gram-syms num-ht-entries num-reduces)
|
|
||||||
(printf "~a -- ~aKB, previously ~aKB\n"
|
|
||||||
(/ (+ 2 num-states
|
|
||||||
(* 4 num-states) (* 2 1.5 num-ht-entries)
|
|
||||||
(* 5 num-reduces)) 256.0)
|
|
||||||
(/ (+ 2 num-states
|
|
||||||
(* 4 num-states) (* 2 2.3 num-ht-entries)
|
|
||||||
(* 5 num-reduces)) 256.0)
|
|
||||||
(/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0)))
|
|
||||||
(values table
|
|
||||||
all-tokens
|
|
||||||
actions-code
|
|
||||||
(fix-check-syntax input-terms start end assocs prods))))
|
|
||||||
|
|
||||||
)
|
|
@ -1,290 +0,0 @@
|
|||||||
#lang scheme/base
|
|
||||||
|
|
||||||
;; Routine to build the LALR table
|
|
||||||
|
|
||||||
(require "grammar.rkt"
|
|
||||||
"lr0.rkt"
|
|
||||||
"lalr.rkt"
|
|
||||||
"parser-actions.rkt"
|
|
||||||
racket/contract
|
|
||||||
mzlib/list
|
|
||||||
mzlib/class)
|
|
||||||
|
|
||||||
(define (is-a-grammar%? x) (is-a? x grammar%))
|
|
||||||
(provide/contract
|
|
||||||
(build-table (-> is-a-grammar%? string? any/c
|
|
||||||
(vectorof (listof (cons/c (or/c term? non-term?) action?))))))
|
|
||||||
|
|
||||||
;; A parse-table is (vectorof (listof (cons/c gram-sym? action)))
|
|
||||||
;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action))))
|
|
||||||
|
|
||||||
;; make-parse-table : int -> parse-table
|
|
||||||
(define (make-parse-table num-states)
|
|
||||||
(make-vector num-states null))
|
|
||||||
|
|
||||||
;; table-add!: parse-table nat symbol action ->
|
|
||||||
(define (table-add! table state-index symbol val)
|
|
||||||
(vector-set! table state-index (cons (cons symbol val)
|
|
||||||
(vector-ref table state-index))))
|
|
||||||
|
|
||||||
;; group-table : parse-table -> grouped-parse-table
|
|
||||||
(define (group-table table)
|
|
||||||
(list->vector
|
|
||||||
(map
|
|
||||||
(lambda (state-entry)
|
|
||||||
(let ((ht (make-hash)))
|
|
||||||
(for-each
|
|
||||||
(lambda (gs/actions)
|
|
||||||
(let ((group (hash-ref ht (car gs/actions) (lambda () null))))
|
|
||||||
(unless (member (cdr gs/actions) group)
|
|
||||||
(hash-set! ht (car gs/actions) (cons (cdr gs/actions) group)))))
|
|
||||||
state-entry)
|
|
||||||
(hash-map ht cons)))
|
|
||||||
(vector->list table))))
|
|
||||||
|
|
||||||
;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) ->
|
|
||||||
;; (vectorof (listof (cons/c gram-sym? Y)))
|
|
||||||
(define (table-map f table)
|
|
||||||
(list->vector
|
|
||||||
(map
|
|
||||||
(lambda (state-entry)
|
|
||||||
(map
|
|
||||||
(lambda (gs/X)
|
|
||||||
(cons (car gs/X) (f (car gs/X) (cdr gs/X))))
|
|
||||||
state-entry))
|
|
||||||
(vector->list table))))
|
|
||||||
|
|
||||||
|
|
||||||
(define (bit-vector-for-each f bv)
|
|
||||||
(letrec ((for-each
|
|
||||||
(lambda (bv number)
|
|
||||||
(cond
|
|
||||||
((= 0 bv) (void))
|
|
||||||
((= 1 (bitwise-and 1 bv))
|
|
||||||
(f number)
|
|
||||||
(for-each (arithmetic-shift bv -1) (add1 number)))
|
|
||||||
(else (for-each (arithmetic-shift bv -1) (add1 number)))))))
|
|
||||||
(for-each bv 0)))
|
|
||||||
|
|
||||||
|
|
||||||
;; print-entry: symbol action output-port ->
|
|
||||||
;; prints the action a for lookahead sym to the given port
|
|
||||||
(define (print-entry sym a port)
|
|
||||||
(let ((s "\t~a\t\t\t\t\t~a\t~a\n"))
|
|
||||||
(cond
|
|
||||||
((shift? a)
|
|
||||||
(fprintf port s sym "shift" (shift-state a)))
|
|
||||||
((reduce? a)
|
|
||||||
(fprintf port s sym "reduce" (prod-index (reduce-prod a))))
|
|
||||||
((accept? a)
|
|
||||||
(fprintf port s sym "accept" ""))
|
|
||||||
((goto? a)
|
|
||||||
(fprintf port s sym "goto" (goto-state a))))))
|
|
||||||
|
|
||||||
|
|
||||||
;; count: ('a -> bool) * 'a list -> num
|
|
||||||
;; counts the number of elements in list that satisfy pred
|
|
||||||
(define (count pred list)
|
|
||||||
(cond
|
|
||||||
((null? list) 0)
|
|
||||||
((pred (car list)) (+ 1 (count pred (cdr list))))
|
|
||||||
(else (count pred (cdr list)))))
|
|
||||||
|
|
||||||
;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port ->
|
|
||||||
;; Prints out the parser given by table.
|
|
||||||
(define (display-parser a grouped-table prods port)
|
|
||||||
(let* ((SR-conflicts 0)
|
|
||||||
(RR-conflicts 0))
|
|
||||||
(for-each
|
|
||||||
(lambda (prod)
|
|
||||||
(fprintf port
|
|
||||||
"~a\t~a\t=\t~a\n"
|
|
||||||
(prod-index prod)
|
|
||||||
(gram-sym-symbol (prod-lhs prod))
|
|
||||||
(map gram-sym-symbol (vector->list (prod-rhs prod)))))
|
|
||||||
prods)
|
|
||||||
(send a for-each-state
|
|
||||||
(lambda (state)
|
|
||||||
(fprintf port "State ~a\n" (kernel-index state))
|
|
||||||
(for-each (lambda (item)
|
|
||||||
(fprintf port "\t~a\n" (item->string item)))
|
|
||||||
(kernel-items state))
|
|
||||||
(newline port)
|
|
||||||
(for-each
|
|
||||||
(lambda (gs/action)
|
|
||||||
(let ((sym (gram-sym-symbol (car gs/action)))
|
|
||||||
(act (cdr gs/action)))
|
|
||||||
(cond
|
|
||||||
((null? act) (void))
|
|
||||||
((null? (cdr act))
|
|
||||||
(print-entry sym (car act) port))
|
|
||||||
(else
|
|
||||||
(fprintf port "begin conflict:\n")
|
|
||||||
(when (> (count reduce? act) 1)
|
|
||||||
(set! RR-conflicts (add1 RR-conflicts)))
|
|
||||||
(when (> (count shift? act) 0)
|
|
||||||
(set! SR-conflicts (add1 SR-conflicts)))
|
|
||||||
(map (lambda (x) (print-entry sym x port)) act)
|
|
||||||
(fprintf port "end conflict\n")))))
|
|
||||||
(vector-ref grouped-table (kernel-index state)))
|
|
||||||
(newline port)))
|
|
||||||
|
|
||||||
(when (> SR-conflicts 0)
|
|
||||||
(fprintf port "~a shift/reduce conflict~a\n"
|
|
||||||
SR-conflicts
|
|
||||||
(if (= SR-conflicts 1) "" "s")))
|
|
||||||
(when (> RR-conflicts 0)
|
|
||||||
(fprintf port "~a reduce/reduce conflict~a\n"
|
|
||||||
RR-conflicts
|
|
||||||
(if (= RR-conflicts 1) "" "s")))))
|
|
||||||
|
|
||||||
;; resolve-conflict : (listof action?) -> action? bool bool
|
|
||||||
(define (resolve-conflict actions)
|
|
||||||
(cond
|
|
||||||
((null? actions) (values (make-no-action) #f #f))
|
|
||||||
((null? (cdr actions))
|
|
||||||
(values (car actions) #f #f))
|
|
||||||
(else
|
|
||||||
(let ((SR-conflict? (> (count shift? actions) 0))
|
|
||||||
(RR-conflict? (> (count reduce? actions) 1)))
|
|
||||||
(let loop ((current-guess #f)
|
|
||||||
(rest actions))
|
|
||||||
(cond
|
|
||||||
((null? rest) (values current-guess SR-conflict? RR-conflict?))
|
|
||||||
((shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?))
|
|
||||||
((not current-guess)
|
|
||||||
(loop (car rest) (cdr rest)))
|
|
||||||
((and (reduce? (car rest))
|
|
||||||
(< (prod-index (reduce-prod (car rest)))
|
|
||||||
(prod-index (reduce-prod current-guess))))
|
|
||||||
(loop (car rest) (cdr rest)))
|
|
||||||
((accept? (car rest))
|
|
||||||
(eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n")
|
|
||||||
(loop current-guess (cdr rest)))
|
|
||||||
(else (loop current-guess (cdr rest)))))))))
|
|
||||||
|
|
||||||
;; resolve-conflicts : grouped-parse-table bool -> parse-table
|
|
||||||
(define (resolve-conflicts grouped-table suppress)
|
|
||||||
(let* ((SR-conflicts 0)
|
|
||||||
(RR-conflicts 0)
|
|
||||||
(table (table-map
|
|
||||||
(lambda (gs actions)
|
|
||||||
(let-values (((action SR? RR?)
|
|
||||||
(resolve-conflict actions)))
|
|
||||||
(when SR?
|
|
||||||
(set! SR-conflicts (add1 SR-conflicts)))
|
|
||||||
(when RR?
|
|
||||||
(set! RR-conflicts (add1 RR-conflicts)))
|
|
||||||
action))
|
|
||||||
grouped-table)))
|
|
||||||
(unless suppress
|
|
||||||
(when (> SR-conflicts 0)
|
|
||||||
(eprintf "~a shift/reduce conflict~a\n"
|
|
||||||
SR-conflicts
|
|
||||||
(if (= SR-conflicts 1) "" "s")))
|
|
||||||
(when (> RR-conflicts 0)
|
|
||||||
(eprintf "~a reduce/reduce conflict~a\n"
|
|
||||||
RR-conflicts
|
|
||||||
(if (= RR-conflicts 1) "" "s"))))
|
|
||||||
table))
|
|
||||||
|
|
||||||
|
|
||||||
;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action)
|
|
||||||
;; Resolves a single shift-reduce conflict, if precedences are in place.
|
|
||||||
(define (resolve-sr-conflict/prec actions shift-prec)
|
|
||||||
(let* ((shift (if (shift? (car actions))
|
|
||||||
(car actions)
|
|
||||||
(cadr actions)))
|
|
||||||
(reduce (if (shift? (car actions))
|
|
||||||
(cadr actions)
|
|
||||||
(car actions)))
|
|
||||||
(reduce-prec (prod-prec (reduce-prod reduce))))
|
|
||||||
(cond
|
|
||||||
((and shift-prec reduce-prec)
|
|
||||||
(cond
|
|
||||||
((< (prec-num shift-prec) (prec-num reduce-prec))
|
|
||||||
(list reduce))
|
|
||||||
((> (prec-num shift-prec) (prec-num reduce-prec))
|
|
||||||
(list shift))
|
|
||||||
((eq? 'left (prec-assoc shift-prec))
|
|
||||||
(list reduce))
|
|
||||||
((eq? 'right (prec-assoc shift-prec))
|
|
||||||
(list shift))
|
|
||||||
(else null)))
|
|
||||||
(else actions))))
|
|
||||||
|
|
||||||
|
|
||||||
;; resolve-prec-conflicts : parse-table -> grouped-parse-table
|
|
||||||
(define (resolve-prec-conflicts table)
|
|
||||||
(table-map
|
|
||||||
(lambda (gs actions)
|
|
||||||
(cond
|
|
||||||
((and (term? gs)
|
|
||||||
(= 2 (length actions))
|
|
||||||
(or (shift? (car actions))
|
|
||||||
(shift? (cadr actions))))
|
|
||||||
(resolve-sr-conflict/prec actions (term-prec gs)))
|
|
||||||
(else actions)))
|
|
||||||
(group-table table)))
|
|
||||||
|
|
||||||
;; build-table: grammar string bool -> parse-table
|
|
||||||
(define (build-table g file suppress)
|
|
||||||
(let* ((a (build-lr0-automaton g))
|
|
||||||
(term-vector (list->vector (send g get-terms)))
|
|
||||||
(end-terms (send g get-end-terms))
|
|
||||||
(table (make-parse-table (send a get-num-states)))
|
|
||||||
(get-lookahead (compute-LA a g))
|
|
||||||
(reduce-cache (make-hash)))
|
|
||||||
|
|
||||||
(for-each
|
|
||||||
(lambda (trans-key/state)
|
|
||||||
(let ((from-state-index (kernel-index (trans-key-st (car trans-key/state))))
|
|
||||||
(gs (trans-key-gs (car trans-key/state)))
|
|
||||||
(to-state (cdr trans-key/state)))
|
|
||||||
(table-add! table from-state-index gs
|
|
||||||
(cond
|
|
||||||
((non-term? gs)
|
|
||||||
(make-goto (kernel-index to-state)))
|
|
||||||
((member gs end-terms)
|
|
||||||
(make-accept))
|
|
||||||
(else
|
|
||||||
(make-shift
|
|
||||||
(kernel-index to-state)))))))
|
|
||||||
(send a get-transitions))
|
|
||||||
|
|
||||||
(send a for-each-state
|
|
||||||
(lambda (state)
|
|
||||||
(for-each
|
|
||||||
(lambda (item)
|
|
||||||
(let ((item-prod (item-prod item)))
|
|
||||||
(bit-vector-for-each
|
|
||||||
(lambda (term-index)
|
|
||||||
(unless (start-item? item)
|
|
||||||
(let ((r (hash-ref reduce-cache item-prod
|
|
||||||
(lambda ()
|
|
||||||
(let ((r (make-reduce item-prod)))
|
|
||||||
(hash-set! reduce-cache item-prod r)
|
|
||||||
r)))))
|
|
||||||
(table-add! table
|
|
||||||
(kernel-index state)
|
|
||||||
(vector-ref term-vector term-index)
|
|
||||||
r))))
|
|
||||||
(get-lookahead state item-prod))))
|
|
||||||
(append (hash-ref (send a get-epsilon-trans) state (lambda () null))
|
|
||||||
(filter (lambda (item)
|
|
||||||
(not (move-dot-right item)))
|
|
||||||
(kernel-items state))))))
|
|
||||||
|
|
||||||
(let ((grouped-table (resolve-prec-conflicts table)))
|
|
||||||
(unless (string=? file "")
|
|
||||||
(with-handlers [(exn:fail:filesystem?
|
|
||||||
(lambda (e)
|
|
||||||
(eprintf
|
|
||||||
"Cannot write debug output to file \"~a\": ~a\n"
|
|
||||||
file
|
|
||||||
(exn-message e))))]
|
|
||||||
(call-with-output-file file
|
|
||||||
(lambda (port)
|
|
||||||
(display-parser a grouped-table (send g get-prods) port))
|
|
||||||
#:exists 'truncate)))
|
|
||||||
(resolve-conflicts grouped-table suppress))))
|
|
@ -1,118 +0,0 @@
|
|||||||
(module yacc-helper mzscheme
|
|
||||||
|
|
||||||
(require mzlib/list
|
|
||||||
"../private-lex/token-syntax.rkt")
|
|
||||||
|
|
||||||
;; General helper routines
|
|
||||||
|
|
||||||
(provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc)
|
|
||||||
|
|
||||||
(define (vector-andmap f v)
|
|
||||||
(let loop ((i 0))
|
|
||||||
(cond
|
|
||||||
((= i (vector-length v)) #t)
|
|
||||||
(else (if (f (vector-ref v i))
|
|
||||||
(loop (add1 i))
|
|
||||||
#f)))))
|
|
||||||
|
|
||||||
;; duplicate-list?: symbol list -> #f | symbol
|
|
||||||
;; returns a symbol that exists twice in l, or false if no such symbol
|
|
||||||
;; exists
|
|
||||||
(define (duplicate-list? l)
|
|
||||||
(letrec ((t (make-hash-table))
|
|
||||||
(dl? (lambda (l)
|
|
||||||
(cond
|
|
||||||
((null? l) #f)
|
|
||||||
((hash-table-get t (car l) (lambda () #f)) =>
|
|
||||||
(lambda (x) x))
|
|
||||||
(else
|
|
||||||
(hash-table-put! t (car l) (car l))
|
|
||||||
(dl? (cdr l)))))))
|
|
||||||
(dl? l)))
|
|
||||||
|
|
||||||
;; remove-duplicates: syntax-object list -> syntax-object list
|
|
||||||
;; removes the duplicates from the lists
|
|
||||||
(define (remove-duplicates sl)
|
|
||||||
(let ((t (make-hash-table)))
|
|
||||||
(letrec ((x
|
|
||||||
(lambda (sl)
|
|
||||||
(cond
|
|
||||||
((null? sl) sl)
|
|
||||||
((hash-table-get t (syntax-object->datum (car sl)) (lambda () #f))
|
|
||||||
(x (cdr sl)))
|
|
||||||
(else
|
|
||||||
(hash-table-put! t (syntax-object->datum (car sl)) #t)
|
|
||||||
(cons (car sl) (x (cdr sl))))))))
|
|
||||||
(x sl))))
|
|
||||||
|
|
||||||
;; overlap?: symbol list * symbol list -> #f | symbol
|
|
||||||
;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists
|
|
||||||
(define (overlap? l1 l2)
|
|
||||||
(let/ec ret
|
|
||||||
(let ((t (make-hash-table)))
|
|
||||||
(for-each (lambda (s1)
|
|
||||||
(hash-table-put! t s1 s1))
|
|
||||||
l1)
|
|
||||||
(for-each (lambda (s2)
|
|
||||||
(cond
|
|
||||||
((hash-table-get t s2 (lambda () #f)) =>
|
|
||||||
(lambda (o) (ret o)))))
|
|
||||||
l2)
|
|
||||||
#f)))
|
|
||||||
|
|
||||||
|
|
||||||
(define (display-yacc grammar tokens start precs port)
|
|
||||||
(let-syntax ((p (syntax-rules ()
|
|
||||||
((_ args ...) (fprintf port args ...)))))
|
|
||||||
(let* ((tokens (map syntax-local-value tokens))
|
|
||||||
(eterms (filter e-terminals-def? tokens))
|
|
||||||
(terms (filter terminals-def? tokens))
|
|
||||||
(term-table (make-hash-table))
|
|
||||||
(display-rhs
|
|
||||||
(lambda (rhs)
|
|
||||||
(for-each (lambda (sym) (p "~a " (hash-table-get term-table sym (lambda () sym))))
|
|
||||||
(car rhs))
|
|
||||||
(if (= 3 (length rhs))
|
|
||||||
(p "%prec ~a" (cadadr rhs)))
|
|
||||||
(p "\n"))))
|
|
||||||
(for-each
|
|
||||||
(lambda (t)
|
|
||||||
(for-each
|
|
||||||
(lambda (t)
|
|
||||||
(hash-table-put! term-table t (format "'~a'" t)))
|
|
||||||
(syntax-object->datum (e-terminals-def-t t))))
|
|
||||||
eterms)
|
|
||||||
(for-each
|
|
||||||
(lambda (t)
|
|
||||||
(for-each
|
|
||||||
(lambda (t)
|
|
||||||
(p "%token ~a\n" t)
|
|
||||||
(hash-table-put! term-table t (format "~a" t)))
|
|
||||||
(syntax-object->datum (terminals-def-t t))))
|
|
||||||
terms)
|
|
||||||
(if precs
|
|
||||||
(for-each (lambda (prec)
|
|
||||||
(p "%~a " (car prec))
|
|
||||||
(for-each (lambda (tok)
|
|
||||||
(p " ~a" (hash-table-get term-table tok)))
|
|
||||||
(cdr prec))
|
|
||||||
(p "\n"))
|
|
||||||
precs))
|
|
||||||
(p "%start ~a\n" start)
|
|
||||||
(p "%%\n")
|
|
||||||
|
|
||||||
(for-each (lambda (prod)
|
|
||||||
(let ((nt (car prod)))
|
|
||||||
(p "~a: " nt)
|
|
||||||
(display-rhs (cadr prod))
|
|
||||||
(for-each (lambda (rhs)
|
|
||||||
(p "| ")
|
|
||||||
(display-rhs rhs))
|
|
||||||
(cddr prod))
|
|
||||||
(p ";\n")))
|
|
||||||
grammar)
|
|
||||||
(p "%%\n"))))
|
|
||||||
|
|
||||||
|
|
||||||
)
|
|
||||||
|
|
@ -1,135 +0,0 @@
|
|||||||
(module yacc-to-scheme mzscheme
|
|
||||||
(require br-parser-tools/lex
|
|
||||||
(prefix : br-parser-tools/lex-sre)
|
|
||||||
br-parser-tools/yacc
|
|
||||||
syntax/readerr
|
|
||||||
mzlib/list)
|
|
||||||
(provide trans)
|
|
||||||
|
|
||||||
(define match-double-string
|
|
||||||
(lexer
|
|
||||||
((:+ (:~ #\" #\\)) (append (string->list lexeme)
|
|
||||||
(match-double-string input-port)))
|
|
||||||
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port)))
|
|
||||||
(#\" null)))
|
|
||||||
|
|
||||||
(define match-single-string
|
|
||||||
(lexer
|
|
||||||
((:+ (:~ #\' #\\)) (append (string->list lexeme)
|
|
||||||
(match-single-string input-port)))
|
|
||||||
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port)))
|
|
||||||
(#\' null)))
|
|
||||||
|
|
||||||
(define-lex-abbrevs
|
|
||||||
(letter (:or (:/ "a" "z") (:/ "A" "Z")))
|
|
||||||
(digit (:/ "0" "9"))
|
|
||||||
(initial (:or letter (char-set "!$%&*/<=>?^_~@")))
|
|
||||||
(subsequent (:or initial digit (char-set "+-.@")))
|
|
||||||
(comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/")))
|
|
||||||
|
|
||||||
(define-empty-tokens x
|
|
||||||
(EOF PIPE |:| SEMI |%%| %prec))
|
|
||||||
(define-tokens y
|
|
||||||
(SYM STRING))
|
|
||||||
|
|
||||||
(define get-token-grammar
|
|
||||||
(lexer-src-pos
|
|
||||||
("%%" '|%%|)
|
|
||||||
(":" (string->symbol lexeme))
|
|
||||||
("%prec" (string->symbol lexeme))
|
|
||||||
(#\| 'PIPE)
|
|
||||||
((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}")))
|
|
||||||
(return-without-pos (get-token-grammar input-port)))
|
|
||||||
(#\; 'SEMI)
|
|
||||||
(#\' (token-STRING (string->symbol (list->string (match-single-string input-port)))))
|
|
||||||
(#\" (token-STRING (string->symbol (list->string (match-double-string input-port)))))
|
|
||||||
((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme)))))
|
|
||||||
|
|
||||||
(define (parse-grammar enter-term enter-empty-term enter-non-term)
|
|
||||||
(parser
|
|
||||||
(tokens x y)
|
|
||||||
(src-pos)
|
|
||||||
(error (lambda (tok-ok tok-name tok-value start-pos end-pos)
|
|
||||||
(raise-read-error
|
|
||||||
(format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value)
|
|
||||||
(file-path)
|
|
||||||
(position-line start-pos)
|
|
||||||
(position-col start-pos)
|
|
||||||
(position-offset start-pos)
|
|
||||||
(- (position-offset end-pos) (position-offset start-pos)))))
|
|
||||||
|
|
||||||
(end |%%|)
|
|
||||||
(start gram)
|
|
||||||
(grammar
|
|
||||||
(gram
|
|
||||||
((production) (list $1))
|
|
||||||
((production gram) (cons $1 $2)))
|
|
||||||
(production
|
|
||||||
((SYM |:| prods SEMI)
|
|
||||||
(begin
|
|
||||||
(enter-non-term $1)
|
|
||||||
(cons $1 $3))))
|
|
||||||
(prods
|
|
||||||
((rhs) (list `(,$1 #f)))
|
|
||||||
((rhs prec) (list `(,$1 ,$2 #f)))
|
|
||||||
((rhs PIPE prods) (cons `(,$1 #f) $3))
|
|
||||||
((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4)))
|
|
||||||
(prec
|
|
||||||
((%prec SYM)
|
|
||||||
(begin
|
|
||||||
(enter-term $2)
|
|
||||||
(list 'prec $2)))
|
|
||||||
((%prec STRING)
|
|
||||||
(begin
|
|
||||||
(enter-empty-term $2)
|
|
||||||
(list 'prec $2))))
|
|
||||||
(rhs
|
|
||||||
(() null)
|
|
||||||
((SYM rhs)
|
|
||||||
(begin
|
|
||||||
(enter-term $1)
|
|
||||||
(cons $1 $2)))
|
|
||||||
((STRING rhs)
|
|
||||||
(begin
|
|
||||||
(enter-empty-term $1)
|
|
||||||
(cons $1 $2)))))))
|
|
||||||
|
|
||||||
(define (symbol<? a b)
|
|
||||||
(string<? (symbol->string a) (symbol->string b)))
|
|
||||||
|
|
||||||
(define (trans filename)
|
|
||||||
(let* ((i (open-input-file filename))
|
|
||||||
(terms (make-hash-table))
|
|
||||||
(eterms (make-hash-table))
|
|
||||||
(nterms (make-hash-table))
|
|
||||||
(enter-term
|
|
||||||
(lambda (s)
|
|
||||||
(if (not (hash-table-get nterms s (lambda () #f)))
|
|
||||||
(hash-table-put! terms s #t))))
|
|
||||||
(enter-empty-term
|
|
||||||
(lambda (s)
|
|
||||||
(if (not (hash-table-get nterms s (lambda () #f)))
|
|
||||||
(hash-table-put! eterms s #t))))
|
|
||||||
(enter-non-term
|
|
||||||
(lambda (s)
|
|
||||||
(hash-table-remove! terms s)
|
|
||||||
(hash-table-remove! eterms s)
|
|
||||||
(hash-table-put! nterms s #t))))
|
|
||||||
(port-count-lines! i)
|
|
||||||
(file-path filename)
|
|
||||||
(regexp-match "%%" i)
|
|
||||||
(begin0
|
|
||||||
(let ((gram ((parse-grammar enter-term enter-empty-term enter-non-term)
|
|
||||||
(lambda ()
|
|
||||||
(let ((t (get-token-grammar i)))
|
|
||||||
t)))))
|
|
||||||
`(begin
|
|
||||||
(define-tokens t ,(sort (hash-table-map terms (lambda (k v) k)) symbol<?))
|
|
||||||
(define-empty-tokens et ,(sort (hash-table-map eterms (lambda (k v) k)) symbol<?))
|
|
||||||
(parser
|
|
||||||
(start ___)
|
|
||||||
(end ___)
|
|
||||||
(error ___)
|
|
||||||
(tokens t et)
|
|
||||||
(grammar ,@gram))))
|
|
||||||
(close-input-port i)))))
|
|
@ -1,412 +0,0 @@
|
|||||||
#lang scheme/base
|
|
||||||
|
|
||||||
(require (for-syntax scheme/base
|
|
||||||
"private-yacc/parser-builder.rkt"
|
|
||||||
"private-yacc/grammar.rkt"
|
|
||||||
"private-yacc/yacc-helper.rkt"
|
|
||||||
"private-yacc/parser-actions.rkt"))
|
|
||||||
(require "private-lex/token.rkt"
|
|
||||||
"private-yacc/parser-actions.rkt"
|
|
||||||
mzlib/etc
|
|
||||||
mzlib/pretty
|
|
||||||
syntax/readerr)
|
|
||||||
|
|
||||||
(provide parser)
|
|
||||||
|
|
||||||
|
|
||||||
;; convert-parse-table : (vectorof (listof (cons/c gram-sym? action?))) ->
|
|
||||||
;; (vectorof (symbol runtime-action hashtable))
|
|
||||||
(define-for-syntax (convert-parse-table table)
|
|
||||||
(list->vector
|
|
||||||
(map
|
|
||||||
(lambda (state-entry)
|
|
||||||
(let ((ht (make-hasheq)))
|
|
||||||
(for-each
|
|
||||||
(lambda (gs/action)
|
|
||||||
(hash-set! ht
|
|
||||||
(gram-sym-symbol (car gs/action))
|
|
||||||
(action->runtime-action (cdr gs/action))))
|
|
||||||
state-entry)
|
|
||||||
ht))
|
|
||||||
(vector->list table))))
|
|
||||||
|
|
||||||
(define-syntax (parser stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
((_ args ...)
|
|
||||||
(let ((arg-list (syntax->list (syntax (args ...))))
|
|
||||||
(src-pos #f)
|
|
||||||
(debug #f)
|
|
||||||
(error #f)
|
|
||||||
(tokens #f)
|
|
||||||
(start #f)
|
|
||||||
(end #f)
|
|
||||||
(precs #f)
|
|
||||||
(suppress #f)
|
|
||||||
(grammar #f)
|
|
||||||
(yacc-output #f))
|
|
||||||
(for-each
|
|
||||||
(lambda (arg)
|
|
||||||
(syntax-case* arg (debug error tokens start end precs grammar
|
|
||||||
suppress src-pos yacc-output)
|
|
||||||
(lambda (a b)
|
|
||||||
(eq? (syntax-e a) (syntax-e b)))
|
|
||||||
((debug filename)
|
|
||||||
(cond
|
|
||||||
((not (string? (syntax-e (syntax filename))))
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"Debugging filename must be a string"
|
|
||||||
stx
|
|
||||||
(syntax filename)))
|
|
||||||
(debug
|
|
||||||
(raise-syntax-error #f "Multiple debug declarations" stx))
|
|
||||||
(else
|
|
||||||
(set! debug (syntax-e (syntax filename))))))
|
|
||||||
((suppress)
|
|
||||||
(set! suppress #t))
|
|
||||||
((src-pos)
|
|
||||||
(set! src-pos #t))
|
|
||||||
((error expression)
|
|
||||||
(if error
|
|
||||||
(raise-syntax-error #f "Multiple error declarations" stx)
|
|
||||||
(set! error (syntax expression))))
|
|
||||||
((tokens def ...)
|
|
||||||
(begin
|
|
||||||
(when tokens
|
|
||||||
(raise-syntax-error #f "Multiple tokens declarations" stx))
|
|
||||||
(let ((defs (syntax->list (syntax (def ...)))))
|
|
||||||
(for-each
|
|
||||||
(lambda (d)
|
|
||||||
(unless (identifier? d)
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"Token-group name must be an identifier"
|
|
||||||
stx
|
|
||||||
d)))
|
|
||||||
defs)
|
|
||||||
(set! tokens defs))))
|
|
||||||
((start symbol ...)
|
|
||||||
(let ((symbols (syntax->list (syntax (symbol ...)))))
|
|
||||||
(for-each
|
|
||||||
(lambda (sym)
|
|
||||||
(unless (identifier? sym)
|
|
||||||
(raise-syntax-error #f
|
|
||||||
"Start symbol must be a symbol"
|
|
||||||
stx
|
|
||||||
sym)))
|
|
||||||
symbols)
|
|
||||||
(when start
|
|
||||||
(raise-syntax-error #f "Multiple start declarations" stx))
|
|
||||||
(when (null? symbols)
|
|
||||||
(raise-syntax-error #f
|
|
||||||
"Missing start symbol"
|
|
||||||
stx
|
|
||||||
arg))
|
|
||||||
(set! start symbols)))
|
|
||||||
((end symbols ...)
|
|
||||||
(let ((symbols (syntax->list (syntax (symbols ...)))))
|
|
||||||
(for-each
|
|
||||||
(lambda (sym)
|
|
||||||
(unless (identifier? sym)
|
|
||||||
(raise-syntax-error #f
|
|
||||||
"End token must be a symbol"
|
|
||||||
stx
|
|
||||||
sym)))
|
|
||||||
symbols)
|
|
||||||
(let ((d (duplicate-list? (map syntax-e symbols))))
|
|
||||||
(when d
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
(format "Duplicate end token definition for ~a" d)
|
|
||||||
stx
|
|
||||||
arg))
|
|
||||||
(when (null? symbols)
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"end declaration must contain at least 1 token"
|
|
||||||
stx
|
|
||||||
arg))
|
|
||||||
(when end
|
|
||||||
(raise-syntax-error #f "Multiple end declarations" stx))
|
|
||||||
(set! end symbols))))
|
|
||||||
((precs decls ...)
|
|
||||||
(if precs
|
|
||||||
(raise-syntax-error #f "Multiple precs declarations" stx)
|
|
||||||
(set! precs (syntax/loc arg (decls ...)))))
|
|
||||||
((grammar prods ...)
|
|
||||||
(if grammar
|
|
||||||
(raise-syntax-error #f "Multiple grammar declarations" stx)
|
|
||||||
(set! grammar (syntax/loc arg (prods ...)))))
|
|
||||||
((yacc-output filename)
|
|
||||||
(cond
|
|
||||||
((not (string? (syntax-e (syntax filename))))
|
|
||||||
(raise-syntax-error #f
|
|
||||||
"Yacc-output filename must be a string"
|
|
||||||
stx
|
|
||||||
(syntax filename)))
|
|
||||||
(yacc-output
|
|
||||||
(raise-syntax-error #f "Multiple yacc-output declarations" stx))
|
|
||||||
(else
|
|
||||||
(set! yacc-output (syntax-e (syntax filename))))))
|
|
||||||
(_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg))))
|
|
||||||
(syntax->list (syntax (args ...))))
|
|
||||||
(unless tokens
|
|
||||||
(raise-syntax-error #f "missing tokens declaration" stx))
|
|
||||||
(unless error
|
|
||||||
(raise-syntax-error #f "missing error declaration" stx))
|
|
||||||
(unless grammar
|
|
||||||
(raise-syntax-error #f "missing grammar declaration" stx))
|
|
||||||
(unless end
|
|
||||||
(raise-syntax-error #f "missing end declaration" stx))
|
|
||||||
(unless start
|
|
||||||
(raise-syntax-error #f "missing start declaration" stx))
|
|
||||||
(let-values (((table all-term-syms actions check-syntax-fix)
|
|
||||||
(build-parser (if debug debug "")
|
|
||||||
src-pos
|
|
||||||
suppress
|
|
||||||
tokens
|
|
||||||
start
|
|
||||||
end
|
|
||||||
precs
|
|
||||||
grammar)))
|
|
||||||
(when (and yacc-output (not (string=? yacc-output "")))
|
|
||||||
(with-handlers [(exn:fail:filesystem?
|
|
||||||
(lambda (e)
|
|
||||||
(eprintf
|
|
||||||
"Cannot write yacc-output to file \"~a\"\n"
|
|
||||||
yacc-output)))]
|
|
||||||
(call-with-output-file yacc-output
|
|
||||||
(lambda (port)
|
|
||||||
(display-yacc (syntax->datum grammar)
|
|
||||||
tokens
|
|
||||||
(map syntax->datum start)
|
|
||||||
(if precs
|
|
||||||
(syntax->datum precs)
|
|
||||||
#f)
|
|
||||||
port))
|
|
||||||
#:exists 'truncate)))
|
|
||||||
(with-syntax ((check-syntax-fix check-syntax-fix)
|
|
||||||
(err error)
|
|
||||||
(ends end)
|
|
||||||
(starts start)
|
|
||||||
(debug debug)
|
|
||||||
(table (convert-parse-table table))
|
|
||||||
(all-term-syms all-term-syms)
|
|
||||||
(actions actions)
|
|
||||||
(src-pos src-pos))
|
|
||||||
(syntax
|
|
||||||
(begin
|
|
||||||
check-syntax-fix
|
|
||||||
(parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos)))))))
|
|
||||||
(_
|
|
||||||
(raise-syntax-error #f
|
|
||||||
"parser must have the form (parser args ...)"
|
|
||||||
stx))))
|
|
||||||
|
|
||||||
(define (reduce-stack stack num ret-vals src-pos)
|
|
||||||
(cond
|
|
||||||
((> num 0)
|
|
||||||
(let* ((top-frame (car stack))
|
|
||||||
(ret-vals
|
|
||||||
(if src-pos
|
|
||||||
(cons (stack-frame-value top-frame)
|
|
||||||
(cons (stack-frame-start-pos top-frame)
|
|
||||||
(cons (stack-frame-end-pos top-frame)
|
|
||||||
ret-vals)))
|
|
||||||
(cons (stack-frame-value top-frame) ret-vals))))
|
|
||||||
(reduce-stack (cdr stack) (sub1 num) ret-vals src-pos)))
|
|
||||||
(else (values stack ret-vals))))
|
|
||||||
|
|
||||||
;; extract-helper : (symbol or make-token) any any -> symbol any any any
|
|
||||||
(define (extract-helper tok v1 v2)
|
|
||||||
(cond
|
|
||||||
((symbol? tok)
|
|
||||||
(values tok #f v1 v2))
|
|
||||||
((token? tok)
|
|
||||||
(values (real-token-name tok) (real-token-value tok) v1 v2))
|
|
||||||
(else (raise-argument-error 'parser
|
|
||||||
"(or/c symbol? token?)"
|
|
||||||
0
|
|
||||||
tok))))
|
|
||||||
|
|
||||||
;; well-formed-position-token?: any -> boolean
|
|
||||||
;; Returns true if pt is a position token whose position-token-token
|
|
||||||
;; is itself a token or a symbol.
|
|
||||||
;; This is meant to help raise more precise error messages when
|
|
||||||
;; a tokenizer produces an erroneous position-token wrapped twice.
|
|
||||||
;; (as often happens when omitting return-without-pos).
|
|
||||||
(define (well-formed-token-field? t)
|
|
||||||
(or (symbol? t)
|
|
||||||
(token? t)))
|
|
||||||
|
|
||||||
(define (well-formed-position-token? pt)
|
|
||||||
(and (position-token? pt)
|
|
||||||
(well-formed-token-field? (position-token-token pt))))
|
|
||||||
|
|
||||||
(define (well-formed-srcloc-token? st)
|
|
||||||
(and (srcloc-token? st)
|
|
||||||
(well-formed-token-field? (srcloc-token-token st))))
|
|
||||||
|
|
||||||
;; extract-src-pos : position-token -> symbol any any any
|
|
||||||
(define (extract-src-pos ip)
|
|
||||||
(unless (well-formed-position-token? ip)
|
|
||||||
(raise-argument-error 'parser
|
|
||||||
"well-formed-position-token?"
|
|
||||||
0
|
|
||||||
ip))
|
|
||||||
(extract-helper (position-token-token ip)
|
|
||||||
(position-token-start-pos ip)
|
|
||||||
(position-token-end-pos ip)))
|
|
||||||
|
|
||||||
(define (extract-srcloc ip)
|
|
||||||
(unless (well-formed-srcloc-token? ip)
|
|
||||||
(raise-argument-error 'parser
|
|
||||||
"well-formed-srcloc-token?"
|
|
||||||
0
|
|
||||||
ip))
|
|
||||||
(let ([loc (srcloc-token-srcloc ip)])
|
|
||||||
(extract-helper (srcloc-token-token ip)
|
|
||||||
(position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc))
|
|
||||||
(position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f))))
|
|
||||||
|
|
||||||
|
|
||||||
;; extract-no-src-pos : (symbol or make-token) -> symbol any any any
|
|
||||||
(define (extract-no-src-pos ip)
|
|
||||||
(extract-helper ip #f #f))
|
|
||||||
|
|
||||||
(define-struct stack-frame (state value start-pos end-pos) #:inspector (make-inspector))
|
|
||||||
|
|
||||||
(define (make-empty-stack i) (list (make-stack-frame i #f #f #f)))
|
|
||||||
|
|
||||||
|
|
||||||
;; The table is a vector that maps each state to a hash-table that maps a
|
|
||||||
;; terminal symbol to either an accept, shift, reduce, or goto structure.
|
|
||||||
; We encode the structures according to the runtime-action data definition in
|
|
||||||
;; parser-actions.rkt
|
|
||||||
(define (parser-body debug? err starts ends table all-term-syms actions src-pos)
|
|
||||||
(local ((define extract
|
|
||||||
(if src-pos
|
|
||||||
extract-src-pos
|
|
||||||
extract-no-src-pos))
|
|
||||||
|
|
||||||
(define (fix-error stack tok val start-pos end-pos get-token)
|
|
||||||
(when debug? (pretty-print stack))
|
|
||||||
(local ((define (remove-input tok val start-pos end-pos)
|
|
||||||
(if (memq tok ends)
|
|
||||||
(raise-read-error "parser: Cannot continue after error"
|
|
||||||
#f #f #f #f #f)
|
|
||||||
(let ((a (find-action stack tok val start-pos end-pos)))
|
|
||||||
(cond
|
|
||||||
((runtime-shift? a)
|
|
||||||
;; (printf "shift:~a\n" (runtime-shift-state a))
|
|
||||||
(cons (make-stack-frame (runtime-shift-state a)
|
|
||||||
val
|
|
||||||
start-pos
|
|
||||||
end-pos)
|
|
||||||
stack))
|
|
||||||
(else
|
|
||||||
;; (printf "discard input:~a\n" tok)
|
|
||||||
(let-values (((tok val start-pos end-pos)
|
|
||||||
(extract (get-token))))
|
|
||||||
(remove-input tok val start-pos end-pos))))))))
|
|
||||||
(let remove-states ()
|
|
||||||
(let ((a (find-action stack 'error #f start-pos end-pos)))
|
|
||||||
(cond
|
|
||||||
((runtime-shift? a)
|
|
||||||
;; (printf "shift:~a\n" (runtime-shift-state a))
|
|
||||||
(set! stack
|
|
||||||
(cons
|
|
||||||
(make-stack-frame (runtime-shift-state a)
|
|
||||||
#f
|
|
||||||
start-pos
|
|
||||||
end-pos)
|
|
||||||
stack))
|
|
||||||
(remove-input tok val start-pos end-pos))
|
|
||||||
(else
|
|
||||||
;; (printf "discard state:~a\n" (car stack))
|
|
||||||
(cond
|
|
||||||
((< (length stack) 2)
|
|
||||||
(raise-read-error "parser: Cannot continue after error"
|
|
||||||
#f #f #f #f #f))
|
|
||||||
(else
|
|
||||||
(set! stack (cdr stack))
|
|
||||||
(remove-states)))))))))
|
|
||||||
|
|
||||||
(define (find-action stack tok val start-pos end-pos)
|
|
||||||
(unless (hash-ref all-term-syms
|
|
||||||
tok
|
|
||||||
#f)
|
|
||||||
(if src-pos
|
|
||||||
(err #f tok val start-pos end-pos)
|
|
||||||
(err #f tok val))
|
|
||||||
(raise-read-error (format "parser: got token of unknown type ~a" tok)
|
|
||||||
#f #f #f #f #f))
|
|
||||||
(hash-ref (vector-ref table (stack-frame-state (car stack)))
|
|
||||||
tok
|
|
||||||
#f))
|
|
||||||
|
|
||||||
(define (make-parser start-number)
|
|
||||||
(lambda (get-token)
|
|
||||||
(unless (and (procedure? get-token)
|
|
||||||
(procedure-arity-includes? get-token 0))
|
|
||||||
(error 'get-token "expected a nullary procedure, got ~e" get-token))
|
|
||||||
(let parsing-loop ((stack (make-empty-stack start-number))
|
|
||||||
(ip (get-token)))
|
|
||||||
(let-values (((tok val start-pos end-pos)
|
|
||||||
(extract ip)))
|
|
||||||
(let ((action (find-action stack tok val start-pos end-pos)))
|
|
||||||
(cond
|
|
||||||
((runtime-shift? action)
|
|
||||||
;; (printf "shift:~a\n" (runtime-shift-state action))
|
|
||||||
(parsing-loop (cons (make-stack-frame (runtime-shift-state action)
|
|
||||||
val
|
|
||||||
start-pos
|
|
||||||
end-pos)
|
|
||||||
stack)
|
|
||||||
(get-token)))
|
|
||||||
((runtime-reduce? action)
|
|
||||||
;; (printf "reduce:~a\n" (runtime-reduce-prod-num action))
|
|
||||||
(let-values (((new-stack args)
|
|
||||||
(reduce-stack stack
|
|
||||||
(runtime-reduce-rhs-length action)
|
|
||||||
null
|
|
||||||
src-pos)))
|
|
||||||
(let ((goto
|
|
||||||
(runtime-goto-state
|
|
||||||
(hash-ref
|
|
||||||
(vector-ref table (stack-frame-state (car new-stack)))
|
|
||||||
(runtime-reduce-lhs action)))))
|
|
||||||
(parsing-loop
|
|
||||||
(cons
|
|
||||||
(if src-pos
|
|
||||||
(make-stack-frame
|
|
||||||
goto
|
|
||||||
(apply (vector-ref actions (runtime-reduce-prod-num action)) args)
|
|
||||||
(if (null? args) start-pos (cadr args))
|
|
||||||
(if (null? args)
|
|
||||||
end-pos
|
|
||||||
(list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1))))
|
|
||||||
(make-stack-frame
|
|
||||||
goto
|
|
||||||
(apply (vector-ref actions (runtime-reduce-prod-num action)) args)
|
|
||||||
#f
|
|
||||||
#f))
|
|
||||||
new-stack)
|
|
||||||
ip))))
|
|
||||||
((runtime-accept? action)
|
|
||||||
;; (printf "accept\n")
|
|
||||||
(stack-frame-value (car stack)))
|
|
||||||
(else
|
|
||||||
(if src-pos
|
|
||||||
(err #t tok val start-pos end-pos)
|
|
||||||
(err #t tok val))
|
|
||||||
(parsing-loop (fix-error stack tok val start-pos end-pos get-token)
|
|
||||||
(get-token))))))))))
|
|
||||||
(cond
|
|
||||||
((null? (cdr starts)) (make-parser 0))
|
|
||||||
(else
|
|
||||||
(let loop ((l starts)
|
|
||||||
(i 0))
|
|
||||||
(cond
|
|
||||||
((null? l) null)
|
|
||||||
(else (cons (make-parser i) (loop (cdr l) (add1 i))))))))))
|
|
@ -1,11 +0,0 @@
|
|||||||
#lang info
|
|
||||||
|
|
||||||
(define collection 'multi)
|
|
||||||
(define deps '("scheme-lib"
|
|
||||||
"base"
|
|
||||||
"compatibility-lib"))
|
|
||||||
(define build-deps '("rackunit-lib"))
|
|
||||||
|
|
||||||
(define pkg-desc "implementation (no documentation) part of \"br-parser-tools\"")
|
|
||||||
|
|
||||||
(define pkg-authors '(mflatt))
|
|
@ -1,11 +0,0 @@
|
|||||||
parser-tools
|
|
||||||
Copyright (c) 2010-2014 PLT Design Inc.
|
|
||||||
|
|
||||||
This package is distributed under the GNU Lesser General Public
|
|
||||||
License (LGPL). This means that you can link this package into proprietary
|
|
||||||
applications, provided you follow the rules stated in the LGPL. You
|
|
||||||
can also modify this package; if you distribute a modified version,
|
|
||||||
you must distribute it under the terms of the LGPL, which in
|
|
||||||
particular means that you must release the source code for the
|
|
||||||
modified software. See http://www.gnu.org/copyleft/lesser.html
|
|
||||||
for more information.
|
|
@ -1,12 +0,0 @@
|
|||||||
#lang info
|
|
||||||
|
|
||||||
(define collection 'multi)
|
|
||||||
|
|
||||||
(define deps '("br-parser-tools-lib"
|
|
||||||
"br-parser-tools-doc"))
|
|
||||||
(define implies '("br-parser-tools-lib"
|
|
||||||
"br-parser-tools-doc"))
|
|
||||||
|
|
||||||
(define pkg-desc "Lex- and Yacc-style parsing tools")
|
|
||||||
|
|
||||||
(define pkg-authors '(mflatt))
|
|
@ -1,165 +0,0 @@
|
|||||||
GNU LESSER GENERAL PUBLIC LICENSE
|
|
||||||
Version 3, 29 June 2007
|
|
||||||
|
|
||||||
Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
|
|
||||||
Everyone is permitted to copy and distribute verbatim copies
|
|
||||||
of this license document, but changing it is not allowed.
|
|
||||||
|
|
||||||
|
|
||||||
This version of the GNU Lesser General Public License incorporates
|
|
||||||
the terms and conditions of version 3 of the GNU General Public
|
|
||||||
License, supplemented by the additional permissions listed below.
|
|
||||||
|
|
||||||
0. Additional Definitions.
|
|
||||||
|
|
||||||
As used herein, "this License" refers to version 3 of the GNU Lesser
|
|
||||||
General Public License, and the "GNU GPL" refers to version 3 of the GNU
|
|
||||||
General Public License.
|
|
||||||
|
|
||||||
"The Library" refers to a covered work governed by this License,
|
|
||||||
other than an Application or a Combined Work as defined below.
|
|
||||||
|
|
||||||
An "Application" is any work that makes use of an interface provided
|
|
||||||
by the Library, but which is not otherwise based on the Library.
|
|
||||||
Defining a subclass of a class defined by the Library is deemed a mode
|
|
||||||
of using an interface provided by the Library.
|
|
||||||
|
|
||||||
A "Combined Work" is a work produced by combining or linking an
|
|
||||||
Application with the Library. The particular version of the Library
|
|
||||||
with which the Combined Work was made is also called the "Linked
|
|
||||||
Version".
|
|
||||||
|
|
||||||
The "Minimal Corresponding Source" for a Combined Work means the
|
|
||||||
Corresponding Source for the Combined Work, excluding any source code
|
|
||||||
for portions of the Combined Work that, considered in isolation, are
|
|
||||||
based on the Application, and not on the Linked Version.
|
|
||||||
|
|
||||||
The "Corresponding Application Code" for a Combined Work means the
|
|
||||||
object code and/or source code for the Application, including any data
|
|
||||||
and utility programs needed for reproducing the Combined Work from the
|
|
||||||
Application, but excluding the System Libraries of the Combined Work.
|
|
||||||
|
|
||||||
1. Exception to Section 3 of the GNU GPL.
|
|
||||||
|
|
||||||
You may convey a covered work under sections 3 and 4 of this License
|
|
||||||
without being bound by section 3 of the GNU GPL.
|
|
||||||
|
|
||||||
2. Conveying Modified Versions.
|
|
||||||
|
|
||||||
If you modify a copy of the Library, and, in your modifications, a
|
|
||||||
facility refers to a function or data to be supplied by an Application
|
|
||||||
that uses the facility (other than as an argument passed when the
|
|
||||||
facility is invoked), then you may convey a copy of the modified
|
|
||||||
version:
|
|
||||||
|
|
||||||
a) under this License, provided that you make a good faith effort to
|
|
||||||
ensure that, in the event an Application does not supply the
|
|
||||||
function or data, the facility still operates, and performs
|
|
||||||
whatever part of its purpose remains meaningful, or
|
|
||||||
|
|
||||||
b) under the GNU GPL, with none of the additional permissions of
|
|
||||||
this License applicable to that copy.
|
|
||||||
|
|
||||||
3. Object Code Incorporating Material from Library Header Files.
|
|
||||||
|
|
||||||
The object code form of an Application may incorporate material from
|
|
||||||
a header file that is part of the Library. You may convey such object
|
|
||||||
code under terms of your choice, provided that, if the incorporated
|
|
||||||
material is not limited to numerical parameters, data structure
|
|
||||||
layouts and accessors, or small macros, inline functions and templates
|
|
||||||
(ten or fewer lines in length), you do both of the following:
|
|
||||||
|
|
||||||
a) Give prominent notice with each copy of the object code that the
|
|
||||||
Library is used in it and that the Library and its use are
|
|
||||||
covered by this License.
|
|
||||||
|
|
||||||
b) Accompany the object code with a copy of the GNU GPL and this license
|
|
||||||
document.
|
|
||||||
|
|
||||||
4. Combined Works.
|
|
||||||
|
|
||||||
You may convey a Combined Work under terms of your choice that,
|
|
||||||
taken together, effectively do not restrict modification of the
|
|
||||||
portions of the Library contained in the Combined Work and reverse
|
|
||||||
engineering for debugging such modifications, if you also do each of
|
|
||||||
the following:
|
|
||||||
|
|
||||||
a) Give prominent notice with each copy of the Combined Work that
|
|
||||||
the Library is used in it and that the Library and its use are
|
|
||||||
covered by this License.
|
|
||||||
|
|
||||||
b) Accompany the Combined Work with a copy of the GNU GPL and this license
|
|
||||||
document.
|
|
||||||
|
|
||||||
c) For a Combined Work that displays copyright notices during
|
|
||||||
execution, include the copyright notice for the Library among
|
|
||||||
these notices, as well as a reference directing the user to the
|
|
||||||
copies of the GNU GPL and this license document.
|
|
||||||
|
|
||||||
d) Do one of the following:
|
|
||||||
|
|
||||||
0) Convey the Minimal Corresponding Source under the terms of this
|
|
||||||
License, and the Corresponding Application Code in a form
|
|
||||||
suitable for, and under terms that permit, the user to
|
|
||||||
recombine or relink the Application with a modified version of
|
|
||||||
the Linked Version to produce a modified Combined Work, in the
|
|
||||||
manner specified by section 6 of the GNU GPL for conveying
|
|
||||||
Corresponding Source.
|
|
||||||
|
|
||||||
1) Use a suitable shared library mechanism for linking with the
|
|
||||||
Library. A suitable mechanism is one that (a) uses at run time
|
|
||||||
a copy of the Library already present on the user's computer
|
|
||||||
system, and (b) will operate properly with a modified version
|
|
||||||
of the Library that is interface-compatible with the Linked
|
|
||||||
Version.
|
|
||||||
|
|
||||||
e) Provide Installation Information, but only if you would otherwise
|
|
||||||
be required to provide such information under section 6 of the
|
|
||||||
GNU GPL, and only to the extent that such information is
|
|
||||||
necessary to install and execute a modified version of the
|
|
||||||
Combined Work produced by recombining or relinking the
|
|
||||||
Application with a modified version of the Linked Version. (If
|
|
||||||
you use option 4d0, the Installation Information must accompany
|
|
||||||
the Minimal Corresponding Source and Corresponding Application
|
|
||||||
Code. If you use option 4d1, you must provide the Installation
|
|
||||||
Information in the manner specified by section 6 of the GNU GPL
|
|
||||||
for conveying Corresponding Source.)
|
|
||||||
|
|
||||||
5. Combined Libraries.
|
|
||||||
|
|
||||||
You may place library facilities that are a work based on the
|
|
||||||
Library side by side in a single library together with other library
|
|
||||||
facilities that are not Applications and are not covered by this
|
|
||||||
License, and convey such a combined library under terms of your
|
|
||||||
choice, if you do both of the following:
|
|
||||||
|
|
||||||
a) Accompany the combined library with a copy of the same work based
|
|
||||||
on the Library, uncombined with any other library facilities,
|
|
||||||
conveyed under the terms of this License.
|
|
||||||
|
|
||||||
b) Give prominent notice with the combined library that part of it
|
|
||||||
is a work based on the Library, and explaining where to find the
|
|
||||||
accompanying uncombined form of the same work.
|
|
||||||
|
|
||||||
6. Revised Versions of the GNU Lesser General Public License.
|
|
||||||
|
|
||||||
The Free Software Foundation may publish revised and/or new versions
|
|
||||||
of the GNU Lesser General Public License from time to time. Such new
|
|
||||||
versions will be similar in spirit to the present version, but may
|
|
||||||
differ in detail to address new problems or concerns.
|
|
||||||
|
|
||||||
Each version is given a distinguishing version number. If the
|
|
||||||
Library as you received it specifies that a certain numbered version
|
|
||||||
of the GNU Lesser General Public License "or any later version"
|
|
||||||
applies to it, you have the option of following the terms and
|
|
||||||
conditions either of that published version or of any later version
|
|
||||||
published by the Free Software Foundation. If the Library as you
|
|
||||||
received it does not specify a version number of the GNU Lesser
|
|
||||||
General Public License, you may choose any version of the GNU Lesser
|
|
||||||
General Public License ever published by the Free Software Foundation.
|
|
||||||
|
|
||||||
If the Library as you received it specifies that a proxy can decide
|
|
||||||
whether future versions of the GNU Lesser General Public License shall
|
|
||||||
apply, that proxy's public statement of acceptance of any version is
|
|
||||||
permanent authorization for you to choose that version for the
|
|
||||||
Library.
|
|
@ -1,4 +0,0 @@
|
|||||||
This repo contains a fork of Danny Yoo's RAGG, a Racket AST Generator Generator,
|
|
||||||
also known as a parser generator.
|
|
||||||
|
|
||||||
Licensed under the LGPL.
|
|
@ -1,12 +0,0 @@
|
|||||||
doc:
|
|
||||||
scribble ++xref-in setup/xref load-collections-xref --redirect-main http://docs.racket-lang.org/ --dest-name index.html manual.scrbl
|
|
||||||
|
|
||||||
clean:
|
|
||||||
git clean -fdx .
|
|
||||||
|
|
||||||
web: clean plt doc
|
|
||||||
scp -r * hashcollision.org:webapps/htdocs/ragg/
|
|
||||||
|
|
||||||
|
|
||||||
plt:
|
|
||||||
raco pack --collect ragg.plt ragg
|
|
File diff suppressed because it is too large
Load Diff
@ -1,921 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
;; This module implements a parser form like the br-parser-tools's
|
|
||||||
;; `parser', except that it works on an arbitrary CFG (returning
|
|
||||||
;; the first sucecssful parse).
|
|
||||||
|
|
||||||
;; I'm pretty sure that this is an implementation of Earley's
|
|
||||||
;; algorithm.
|
|
||||||
|
|
||||||
;; To a first approximation, it's a backtracking parser. Alternative
|
|
||||||
;; for a non-terminal are computed in parallel, and multiple attempts
|
|
||||||
;; to compute the same result block until the first one completes. If
|
|
||||||
;; you get into deadlock, such as when trying to match
|
|
||||||
;; <foo> := <foo>
|
|
||||||
;; then it means that there's no successful parse, so everything
|
|
||||||
;; that's blocked fails.
|
|
||||||
|
|
||||||
;; A cache holds the series of results for a particular non-terminal
|
|
||||||
;; at a particular starting location. (A series is used, instead of a
|
|
||||||
;; sinlge result, for backtracking.) Otherwise, the parser uses
|
|
||||||
;; backtracking search. Backtracking is implemented through explicit
|
|
||||||
;; success and failure continuations. Multiple results for a
|
|
||||||
;; particular nonterminal and location are kept only when they have
|
|
||||||
;; different lengths. (Otherwise, in the spirit of finding one
|
|
||||||
;; successful parse, only the first result is kept.)
|
|
||||||
|
|
||||||
;; The br-parser-tools's `parse' is used to transform tokens in the
|
|
||||||
;; grammar to tokens specific to this parser. In other words, this
|
|
||||||
;; parser uses `parser' so that it doesn't have to know anything about
|
|
||||||
;; tokens.
|
|
||||||
;;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(require br-parser-tools/yacc
|
|
||||||
br-parser-tools/lex)
|
|
||||||
|
|
||||||
(require (for-syntax racket/base
|
|
||||||
syntax/boundmap
|
|
||||||
br-parser-tools/private-lex/token-syntax))
|
|
||||||
|
|
||||||
(provide cfg-parser)
|
|
||||||
|
|
||||||
;; A raw token, wrapped so that we can recognize it:
|
|
||||||
(define-struct tok (name orig-name val start end))
|
|
||||||
|
|
||||||
;; Represents the thread scheduler:
|
|
||||||
(define-struct tasks (active active-back waits multi-waits cache progress?))
|
|
||||||
|
|
||||||
(define-for-syntax make-token-identifier-mapping make-hasheq)
|
|
||||||
(define-for-syntax token-identifier-mapping-get
|
|
||||||
(case-lambda
|
|
||||||
[(t tok)
|
|
||||||
(hash-ref t (syntax-e tok))]
|
|
||||||
[(t tok fail)
|
|
||||||
(hash-ref t (syntax-e tok) fail)]))
|
|
||||||
(define-for-syntax token-identifier-mapping-put!
|
|
||||||
(lambda (t tok v)
|
|
||||||
(hash-set! t (syntax-e tok) v)))
|
|
||||||
(define-for-syntax token-identifier-mapping-map
|
|
||||||
(lambda (t f)
|
|
||||||
(hash-map t f)))
|
|
||||||
|
|
||||||
;; Used to calculate information on the grammar, such as whether
|
|
||||||
;; a particular non-terminal is "simple" instead of recursively defined.
|
|
||||||
(define-for-syntax (nt-fixpoint nts proc nt-ids patss)
|
|
||||||
(define (ormap-all val f as bs)
|
|
||||||
(cond
|
|
||||||
[(null? as) val]
|
|
||||||
[else (ormap-all (or (f (car as) (car bs)) val)
|
|
||||||
f
|
|
||||||
(cdr as) (cdr bs))]))
|
|
||||||
(let loop ()
|
|
||||||
(when (ormap-all #f
|
|
||||||
(lambda (nt pats)
|
|
||||||
(let ([old (bound-identifier-mapping-get nts nt)])
|
|
||||||
(let ([new (proc nt pats old)])
|
|
||||||
(if (equal? old new)
|
|
||||||
#f
|
|
||||||
(begin
|
|
||||||
(bound-identifier-mapping-put! nts nt new)
|
|
||||||
#t)))))
|
|
||||||
nt-ids patss)
|
|
||||||
(loop))))
|
|
||||||
|
|
||||||
;; Tries parse-a followed by parse-b. If parse-a is not simple,
|
|
||||||
;; then after parse-a succeeds once, we parallelize parse-b
|
|
||||||
;; and trying a second result for parse-a.
|
|
||||||
(define (parse-and simple-a? parse-a parse-b
|
|
||||||
stream last-consumed-token depth end success-k fail-k
|
|
||||||
max-depth tasks)
|
|
||||||
(letrec ([mk-got-k
|
|
||||||
(lambda (success-k fail-k)
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next1-k)
|
|
||||||
(if simple-a?
|
|
||||||
(parse-b val stream last-consumed-token depth end
|
|
||||||
(mk-got2-k success-k fail-k next1-k)
|
|
||||||
(mk-fail2-k success-k fail-k next1-k)
|
|
||||||
max-depth tasks)
|
|
||||||
(parallel-or
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(parse-b val stream last-consumed-token depth end
|
|
||||||
success-k fail-k
|
|
||||||
max-depth tasks))
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(next1-k (mk-got-k success-k fail-k)
|
|
||||||
fail-k max-depth tasks))
|
|
||||||
success-k fail-k max-depth tasks))))]
|
|
||||||
[mk-got2-k
|
|
||||||
(lambda (success-k fail-k next1-k)
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
|
||||||
(success-k val stream last-consumed-token depth max-depth tasks
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(next-k (mk-got2-k success-k fail-k next1-k)
|
|
||||||
(mk-fail2-k success-k fail-k next1-k)
|
|
||||||
max-depth tasks)))))]
|
|
||||||
[mk-fail2-k
|
|
||||||
(lambda (success-k fail-k next1-k)
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(next1-k (mk-got-k success-k fail-k)
|
|
||||||
fail-k
|
|
||||||
max-depth
|
|
||||||
tasks)))])
|
|
||||||
(parse-a stream last-consumed-token depth end
|
|
||||||
(mk-got-k success-k fail-k)
|
|
||||||
fail-k
|
|
||||||
max-depth tasks)))
|
|
||||||
|
|
||||||
;; Parallel or for non-terminal alternatives
|
|
||||||
(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
(parallel-or (lambda (success-k fail-k max-depth tasks)
|
|
||||||
(parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks))
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks))
|
|
||||||
success-k fail-k max-depth tasks))
|
|
||||||
|
|
||||||
;; Generic parallel-or
|
|
||||||
(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks)
|
|
||||||
(define answer-key (gensym))
|
|
||||||
(letrec ([gota-k
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
|
||||||
(report-answer answer-key
|
|
||||||
max-depth
|
|
||||||
tasks
|
|
||||||
(list val stream last-consumed-token depth next-k)))]
|
|
||||||
[faila-k
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(report-answer answer-key
|
|
||||||
max-depth
|
|
||||||
tasks
|
|
||||||
null))])
|
|
||||||
(let* ([tasks (queue-task
|
|
||||||
tasks
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(parse-a gota-k
|
|
||||||
faila-k
|
|
||||||
max-depth tasks)))]
|
|
||||||
[tasks (queue-task
|
|
||||||
tasks
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(parse-b gota-k
|
|
||||||
faila-k
|
|
||||||
max-depth tasks)))]
|
|
||||||
[queue-next (lambda (next-k tasks)
|
|
||||||
(queue-task tasks
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(next-k gota-k
|
|
||||||
faila-k
|
|
||||||
max-depth tasks))))])
|
|
||||||
(letrec ([mk-got-one
|
|
||||||
(lambda (immediate-next? get-nth success-k)
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
|
||||||
(let ([tasks (if immediate-next?
|
|
||||||
(queue-next next-k tasks)
|
|
||||||
tasks)])
|
|
||||||
(success-k val stream last-consumed-token depth max-depth
|
|
||||||
tasks
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(let ([tasks (if immediate-next?
|
|
||||||
tasks
|
|
||||||
(queue-next next-k tasks))])
|
|
||||||
(get-nth max-depth tasks success-k fail-k)))))))]
|
|
||||||
[get-first
|
|
||||||
(lambda (max-depth tasks success-k fail-k)
|
|
||||||
(wait-for-answer #f max-depth tasks answer-key
|
|
||||||
(mk-got-one #t get-first success-k)
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(get-second max-depth tasks success-k fail-k))
|
|
||||||
#f))]
|
|
||||||
[get-second
|
|
||||||
(lambda (max-depth tasks success-k fail-k)
|
|
||||||
(wait-for-answer #f max-depth tasks answer-key
|
|
||||||
(mk-got-one #f get-second success-k)
|
|
||||||
fail-k #f))])
|
|
||||||
(get-first max-depth tasks success-k fail-k)))))
|
|
||||||
|
|
||||||
;; Non-terminal alternatives where the first is "simple" can be done
|
|
||||||
;; sequentially, which is simpler
|
|
||||||
(define (parse-or parse-a parse-b
|
|
||||||
stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
(letrec ([mk-got-k
|
|
||||||
(lambda (success-k fail-k)
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
|
||||||
(success-k val stream last-consumed-token depth
|
|
||||||
max-depth tasks
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(next-k (mk-got-k success-k fail-k)
|
|
||||||
(mk-fail-k success-k fail-k)
|
|
||||||
max-depth tasks)))))]
|
|
||||||
[mk-fail-k
|
|
||||||
(lambda (success-k fail-k)
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))])
|
|
||||||
(parse-a stream last-consumed-token depth end
|
|
||||||
(mk-got-k success-k fail-k)
|
|
||||||
(mk-fail-k success-k fail-k)
|
|
||||||
max-depth tasks)))
|
|
||||||
|
|
||||||
;; Starts a thread
|
|
||||||
(define queue-task
|
|
||||||
(lambda (tasks t [progress? #t])
|
|
||||||
(make-tasks (tasks-active tasks)
|
|
||||||
(cons t (tasks-active-back tasks))
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
(or progress? (tasks-progress? tasks)))))
|
|
||||||
|
|
||||||
;; Reports an answer to a waiting thread:
|
|
||||||
(define (report-answer answer-key max-depth tasks val)
|
|
||||||
(let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))])
|
|
||||||
(if v
|
|
||||||
(let ([tasks (make-tasks (cons (v val)
|
|
||||||
(tasks-active tasks))
|
|
||||||
(tasks-active-back tasks)
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
#t)])
|
|
||||||
(hash-remove! (tasks-waits tasks) answer-key)
|
|
||||||
(swap-task max-depth tasks))
|
|
||||||
;; We have an answer ready too fast; wait
|
|
||||||
(swap-task max-depth
|
|
||||||
(queue-task tasks
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(report-answer answer-key max-depth tasks val))
|
|
||||||
#f)))))
|
|
||||||
|
|
||||||
;; Reports an answer to multiple waiting threads:
|
|
||||||
(define (report-answer-all answer-key max-depth tasks val k)
|
|
||||||
(let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))])
|
|
||||||
(hash-remove! (tasks-multi-waits tasks) answer-key)
|
|
||||||
(let ([tasks (make-tasks (append (map (lambda (a) (a val)) v)
|
|
||||||
(tasks-active tasks))
|
|
||||||
(tasks-active-back tasks)
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
#t)])
|
|
||||||
(k max-depth tasks))))
|
|
||||||
|
|
||||||
;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise
|
|
||||||
;; there might be many. Use wither #t or #f (and `report-answer' or
|
|
||||||
;; `report-answer-all', resptively) consistently for a particular answer key.
|
|
||||||
(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k)
|
|
||||||
(let ([wait (lambda (val)
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(if val
|
|
||||||
(if (null? val)
|
|
||||||
(fail-k max-depth tasks)
|
|
||||||
(let-values ([(val stream last-consumed-token depth next-k) (apply values val)])
|
|
||||||
(success-k val stream last-consumed-token depth max-depth tasks next-k)))
|
|
||||||
(deadlock-k max-depth tasks))))])
|
|
||||||
(if multi?
|
|
||||||
(hash-set! (tasks-multi-waits tasks) answer-key
|
|
||||||
(cons wait (hash-ref (tasks-multi-waits tasks) answer-key
|
|
||||||
(lambda () null))))
|
|
||||||
(hash-set! (tasks-waits tasks) answer-key wait))
|
|
||||||
(let ([tasks (make-tasks (tasks-active tasks)
|
|
||||||
(tasks-active-back tasks)
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
#t)])
|
|
||||||
(swap-task max-depth tasks))))
|
|
||||||
|
|
||||||
;; Swap thread
|
|
||||||
(define (swap-task max-depth tasks)
|
|
||||||
;; Swap in first active:
|
|
||||||
(if (null? (tasks-active tasks))
|
|
||||||
(if (tasks-progress? tasks)
|
|
||||||
(swap-task max-depth
|
|
||||||
(make-tasks (reverse (tasks-active-back tasks))
|
|
||||||
null
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
#f))
|
|
||||||
;; No progress, so issue failure for all multi-waits
|
|
||||||
(if (zero? (hash-count (tasks-multi-waits tasks)))
|
|
||||||
(error 'swap-task "Deadlock")
|
|
||||||
(swap-task max-depth
|
|
||||||
(make-tasks (apply
|
|
||||||
append
|
|
||||||
(hash-map (tasks-multi-waits tasks)
|
|
||||||
(lambda (k l)
|
|
||||||
(map (lambda (v) (v #f)) l))))
|
|
||||||
(tasks-active-back tasks)
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(make-hasheq)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
#t))))
|
|
||||||
(let ([t (car (tasks-active tasks))]
|
|
||||||
[tasks (make-tasks (cdr (tasks-active tasks))
|
|
||||||
(tasks-active-back tasks)
|
|
||||||
(tasks-waits tasks)
|
|
||||||
(tasks-multi-waits tasks)
|
|
||||||
(tasks-cache tasks)
|
|
||||||
(tasks-progress? tasks))])
|
|
||||||
(t max-depth tasks))))
|
|
||||||
|
|
||||||
;; Finds the symbolic representative of a token class
|
|
||||||
(define-for-syntax (map-token toks tok)
|
|
||||||
(car (token-identifier-mapping-get toks tok)))
|
|
||||||
|
|
||||||
(define no-pos-val (make-position #f #f #f))
|
|
||||||
(define-for-syntax no-pos
|
|
||||||
(let ([npv ((syntax-local-certifier) #'no-pos-val)])
|
|
||||||
(lambda (stx) npv)))
|
|
||||||
(define-for-syntax at-tok-pos
|
|
||||||
(lambda (sel expr)
|
|
||||||
(lambda (stx)
|
|
||||||
#`(let ([v #,expr]) (if v (#,sel v) no-pos-val)))))
|
|
||||||
|
|
||||||
;; Builds a matcher for a particular alternative
|
|
||||||
(define-for-syntax (build-match nts toks pat handle $ctx)
|
|
||||||
(let loop ([pat pat]
|
|
||||||
[pos 1])
|
|
||||||
(if (null? pat)
|
|
||||||
#`(success-k #,handle stream last-consumed-token depth max-depth tasks
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(fail-k max-depth tasks)))
|
|
||||||
(let ([id (datum->syntax (car pat)
|
|
||||||
(string->symbol (format "$~a" pos)))]
|
|
||||||
[id-start-pos (datum->syntax (car pat)
|
|
||||||
(string->symbol (format "$~a-start-pos" pos)))]
|
|
||||||
[id-end-pos (datum->syntax (car pat)
|
|
||||||
(string->symbol (format "$~a-end-pos" pos)))]
|
|
||||||
[n-end-pos (and (null? (cdr pat))
|
|
||||||
(datum->syntax (car pat) '$n-end-pos))])
|
|
||||||
(cond
|
|
||||||
[(bound-identifier-mapping-get nts (car pat) (lambda () #f))
|
|
||||||
;; Match non-termimal
|
|
||||||
#`(parse-and
|
|
||||||
;; First part is simple? (If so, we don't have to parallelize the `and'.)
|
|
||||||
#,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))])
|
|
||||||
(or (not l)
|
|
||||||
(andmap values (caddr l))))
|
|
||||||
#,(car pat)
|
|
||||||
(let ([original-stream stream])
|
|
||||||
(lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
(let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream)
|
|
||||||
tok-end
|
|
||||||
tok-start)
|
|
||||||
#'(if (eq? original-stream stream)
|
|
||||||
last-consumed-token
|
|
||||||
(and (pair? original-stream)
|
|
||||||
(car original-stream))))]
|
|
||||||
[#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]
|
|
||||||
#,@(if n-end-pos
|
|
||||||
#`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)])
|
|
||||||
null))
|
|
||||||
#,(loop (cdr pat) (add1 pos)))))
|
|
||||||
stream last-consumed-token depth
|
|
||||||
#,(let ([cnt (apply +
|
|
||||||
(map (lambda (item)
|
|
||||||
(cond
|
|
||||||
[(bound-identifier-mapping-get nts item (lambda () #f))
|
|
||||||
=> (lambda (l) (car l))]
|
|
||||||
[else 1]))
|
|
||||||
(cdr pat)))])
|
|
||||||
#`(- end #,cnt))
|
|
||||||
success-k fail-k max-depth tasks)]
|
|
||||||
[else
|
|
||||||
;; Match token
|
|
||||||
(let ([tok-id (map-token toks (car pat))])
|
|
||||||
#`(if (and (pair? stream)
|
|
||||||
(eq? '#,tok-id (tok-name (car stream))))
|
|
||||||
(let* ([stream-a (car stream)]
|
|
||||||
[#,id (tok-val stream-a)]
|
|
||||||
[last-consumed-token (car stream)]
|
|
||||||
[stream (cdr stream)]
|
|
||||||
[depth (add1 depth)])
|
|
||||||
(let ([max-depth (max max-depth depth)])
|
|
||||||
(let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)]
|
|
||||||
[#,id-end-pos (at-tok-pos #'tok-end #'stream-a)]
|
|
||||||
#,@(if n-end-pos
|
|
||||||
#`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)])
|
|
||||||
null))
|
|
||||||
#,(loop (cdr pat) (add1 pos)))))
|
|
||||||
(fail-k max-depth tasks)))])))))
|
|
||||||
|
|
||||||
;; Starts parsing to match a non-terminal. There's a minor
|
|
||||||
;; optimization that checks for known starting tokens. Otherwise,
|
|
||||||
;; use the cache, block if someone else is already trying the match,
|
|
||||||
;; and cache the result if it's computed.
|
|
||||||
;; The cache maps nontermial+startingpos+iteration to a result, where
|
|
||||||
;; the iteration is 0 for the first match attempt, 1 for the second,
|
|
||||||
;; etc.
|
|
||||||
(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k)
|
|
||||||
(if (and (positive? min-cnt)
|
|
||||||
(pair? stream)
|
|
||||||
(not (memq (tok-name (car stream)) init-tokens)))
|
|
||||||
;; No such leading token; give up
|
|
||||||
(fail-k max-depth tasks)
|
|
||||||
;; Run pattern
|
|
||||||
(let loop ([n 0]
|
|
||||||
[success-k success-k]
|
|
||||||
[fail-k fail-k]
|
|
||||||
[max-depth max-depth]
|
|
||||||
[tasks tasks]
|
|
||||||
[k k])
|
|
||||||
(let ([answer-key (gensym)]
|
|
||||||
[table-key (vector key depth n)]
|
|
||||||
[old-depth depth]
|
|
||||||
[old-stream stream])
|
|
||||||
#;(printf "Loop ~a\n" table-key)
|
|
||||||
(cond
|
|
||||||
[(hash-ref (tasks-cache tasks) table-key (lambda () #f))
|
|
||||||
=> (lambda (result)
|
|
||||||
#;(printf "Reuse ~a\n" table-key)
|
|
||||||
(result success-k fail-k max-depth tasks))]
|
|
||||||
[else
|
|
||||||
#;(printf "Try ~a ~a\n" table-key (map tok-name stream))
|
|
||||||
(hash-set! (tasks-cache tasks) table-key
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
#;(printf "Wait ~a ~a\n" table-key answer-key)
|
|
||||||
(wait-for-answer #t max-depth tasks answer-key success-k fail-k
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
#;(printf "Deadlock ~a ~a\n" table-key answer-key)
|
|
||||||
(fail-k max-depth tasks)))))
|
|
||||||
(let result-loop ([max-depth max-depth][tasks tasks][k k])
|
|
||||||
(letrec ([orig-stream stream]
|
|
||||||
[new-got-k
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
|
||||||
;; Check whether we already have a result that consumed the same amount:
|
|
||||||
(let ([result-key (vector #f key old-depth depth)])
|
|
||||||
(cond
|
|
||||||
[(hash-ref (tasks-cache tasks) result-key (lambda () #f))
|
|
||||||
;; Go for the next-result
|
|
||||||
(result-loop max-depth
|
|
||||||
tasks
|
|
||||||
(lambda (end max-depth tasks success-k fail-k)
|
|
||||||
(next-k success-k fail-k max-depth tasks)))]
|
|
||||||
[else
|
|
||||||
#;(printf "Success ~a ~a\n" table-key
|
|
||||||
(map tok-name (let loop ([d old-depth][s old-stream])
|
|
||||||
(if (= d depth)
|
|
||||||
null
|
|
||||||
(cons (car s) (loop (add1 d) (cdr s)))))))
|
|
||||||
(let ([next-k (lambda (success-k fail-k max-depth tasks)
|
|
||||||
(loop (add1 n)
|
|
||||||
success-k
|
|
||||||
fail-k
|
|
||||||
max-depth
|
|
||||||
tasks
|
|
||||||
(lambda (end max-depth tasks success-k fail-k)
|
|
||||||
(next-k success-k fail-k max-depth tasks))))])
|
|
||||||
(hash-set! (tasks-cache tasks) result-key #t)
|
|
||||||
(hash-set! (tasks-cache tasks) table-key
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(success-k val stream last-consumed-token depth max-depth tasks next-k)))
|
|
||||||
(report-answer-all answer-key
|
|
||||||
max-depth
|
|
||||||
tasks
|
|
||||||
(list val stream last-consumed-token depth next-k)
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(success-k val stream last-consumed-token depth max-depth tasks next-k))))])))]
|
|
||||||
[new-fail-k
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
#;(printf "Failure ~a\n" table-key)
|
|
||||||
(hash-set! (tasks-cache tasks) table-key
|
|
||||||
(lambda (success-k fail-k max-depth tasks)
|
|
||||||
(fail-k max-depth tasks)))
|
|
||||||
(report-answer-all answer-key
|
|
||||||
max-depth
|
|
||||||
tasks
|
|
||||||
null
|
|
||||||
(lambda (max-depth tasks)
|
|
||||||
(fail-k max-depth tasks))))])
|
|
||||||
(k end max-depth tasks new-got-k new-fail-k)))])))))
|
|
||||||
|
|
||||||
(define-syntax (cfg-parser stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
[(_ clause ...)
|
|
||||||
(let ([clauses (syntax->list #'(clause ...))])
|
|
||||||
(let-values ([(start grammar cfg-error parser-clauses src-pos?)
|
|
||||||
(let ([all-toks (apply
|
|
||||||
append
|
|
||||||
(map (lambda (clause)
|
|
||||||
(syntax-case clause (tokens)
|
|
||||||
[(tokens t ...)
|
|
||||||
(apply
|
|
||||||
append
|
|
||||||
(map (lambda (t)
|
|
||||||
(let ([v (syntax-local-value t (lambda () #f))])
|
|
||||||
(cond
|
|
||||||
[(terminals-def? v)
|
|
||||||
(map (lambda (v)
|
|
||||||
(cons v #f))
|
|
||||||
(syntax->list (terminals-def-t v)))]
|
|
||||||
[(e-terminals-def? v)
|
|
||||||
(map (lambda (v)
|
|
||||||
(cons v #t))
|
|
||||||
(syntax->list (e-terminals-def-t v)))]
|
|
||||||
[else null])))
|
|
||||||
(syntax->list #'(t ...))))]
|
|
||||||
[_else null]))
|
|
||||||
clauses))]
|
|
||||||
[all-end-toks (apply
|
|
||||||
append
|
|
||||||
(map (lambda (clause)
|
|
||||||
(syntax-case clause (end)
|
|
||||||
[(end t ...)
|
|
||||||
(syntax->list #'(t ...))]
|
|
||||||
[_else null]))
|
|
||||||
clauses))])
|
|
||||||
(let loop ([clauses clauses]
|
|
||||||
[cfg-start #f]
|
|
||||||
[cfg-grammar #f]
|
|
||||||
[cfg-error #f]
|
|
||||||
[src-pos? #f]
|
|
||||||
[parser-clauses null])
|
|
||||||
(if (null? clauses)
|
|
||||||
(values cfg-start
|
|
||||||
cfg-grammar
|
|
||||||
cfg-error
|
|
||||||
(reverse parser-clauses)
|
|
||||||
src-pos?)
|
|
||||||
(syntax-case (car clauses) (start error grammar src-pos)
|
|
||||||
[(start tok)
|
|
||||||
(loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)]
|
|
||||||
[(error expr)
|
|
||||||
(loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)]
|
|
||||||
[(grammar [nt [pat handle0 handle ...] ...] ...)
|
|
||||||
(let ([nts (make-bound-identifier-mapping)]
|
|
||||||
[toks (make-token-identifier-mapping)]
|
|
||||||
[end-toks (make-token-identifier-mapping)]
|
|
||||||
[nt-ids (syntax->list #'(nt ...))]
|
|
||||||
[patss (map (lambda (stx)
|
|
||||||
(map syntax->list (syntax->list stx)))
|
|
||||||
(syntax->list #'((pat ...) ...)))])
|
|
||||||
(for-each (lambda (nt)
|
|
||||||
(bound-identifier-mapping-put! nts nt (list 0)))
|
|
||||||
nt-ids)
|
|
||||||
(for-each (lambda (t)
|
|
||||||
(token-identifier-mapping-put! end-toks t #t))
|
|
||||||
all-end-toks)
|
|
||||||
(for-each (lambda (t)
|
|
||||||
(unless (token-identifier-mapping-get end-toks (car t) (lambda () #f))
|
|
||||||
(let ([id (gensym (syntax-e (car t)))])
|
|
||||||
(token-identifier-mapping-put! toks (car t)
|
|
||||||
(cons id (cdr t))))))
|
|
||||||
all-toks)
|
|
||||||
;; Compute min max size for each non-term:
|
|
||||||
(nt-fixpoint
|
|
||||||
nts
|
|
||||||
(lambda (nt pats old-list)
|
|
||||||
(let ([new-cnt
|
|
||||||
(apply
|
|
||||||
min
|
|
||||||
(map (lambda (pat)
|
|
||||||
(apply
|
|
||||||
+
|
|
||||||
(map (lambda (elem)
|
|
||||||
(car
|
|
||||||
(bound-identifier-mapping-get nts
|
|
||||||
elem
|
|
||||||
(lambda () (list 1)))))
|
|
||||||
pat)))
|
|
||||||
pats))])
|
|
||||||
(if (new-cnt . > . (car old-list))
|
|
||||||
(cons new-cnt (cdr old-list))
|
|
||||||
old-list)))
|
|
||||||
nt-ids patss)
|
|
||||||
;; Compute set of toks that must appear at the beginning
|
|
||||||
;; for a non-terminal
|
|
||||||
(nt-fixpoint
|
|
||||||
nts
|
|
||||||
(lambda (nt pats old-list)
|
|
||||||
(let ([new-list
|
|
||||||
(apply
|
|
||||||
append
|
|
||||||
(map (lambda (pat)
|
|
||||||
(let loop ([pat pat])
|
|
||||||
(if (pair? pat)
|
|
||||||
(let ([l (bound-identifier-mapping-get
|
|
||||||
nts
|
|
||||||
(car pat)
|
|
||||||
(lambda ()
|
|
||||||
(list 1 (map-token toks (car pat)))))])
|
|
||||||
;; If the non-terminal can match 0 things,
|
|
||||||
;; then it might match something from the
|
|
||||||
;; next pattern element. Otherwise, it must
|
|
||||||
;; match the first element:
|
|
||||||
(if (zero? (car l))
|
|
||||||
(append (cdr l) (loop (cdr pat)))
|
|
||||||
(cdr l)))
|
|
||||||
null)))
|
|
||||||
pats))])
|
|
||||||
(let ([new (filter (lambda (id)
|
|
||||||
(andmap (lambda (id2)
|
|
||||||
(not (eq? id id2)))
|
|
||||||
(cdr old-list)))
|
|
||||||
new-list)])
|
|
||||||
(if (pair? new)
|
|
||||||
;; Drop dups in new list:
|
|
||||||
(let ([new (let loop ([new new])
|
|
||||||
(if (null? (cdr new))
|
|
||||||
new
|
|
||||||
(if (ormap (lambda (id)
|
|
||||||
(eq? (car new) id))
|
|
||||||
(cdr new))
|
|
||||||
(loop (cdr new))
|
|
||||||
(cons (car new) (loop (cdr new))))))])
|
|
||||||
(cons (car old-list) (append new (cdr old-list))))
|
|
||||||
old-list))))
|
|
||||||
nt-ids patss)
|
|
||||||
;; Determine left-recursive clauses:
|
|
||||||
(for-each (lambda (nt pats)
|
|
||||||
(let ([l (bound-identifier-mapping-get nts nt)])
|
|
||||||
(bound-identifier-mapping-put! nts nt (list (car l)
|
|
||||||
(cdr l)
|
|
||||||
(map (lambda (x) #f) pats)))))
|
|
||||||
nt-ids patss)
|
|
||||||
(nt-fixpoint
|
|
||||||
nts
|
|
||||||
(lambda (nt pats old-list)
|
|
||||||
(list (car old-list)
|
|
||||||
(cadr old-list)
|
|
||||||
(map (lambda (pat simple?)
|
|
||||||
(or simple?
|
|
||||||
(let ([l (map (lambda (elem)
|
|
||||||
(bound-identifier-mapping-get
|
|
||||||
nts
|
|
||||||
elem
|
|
||||||
(lambda () #f)))
|
|
||||||
pat)])
|
|
||||||
(andmap (lambda (i)
|
|
||||||
(or (not i)
|
|
||||||
(andmap values (caddr i))))
|
|
||||||
l))))
|
|
||||||
pats (caddr old-list))))
|
|
||||||
nt-ids patss)
|
|
||||||
;; Build a definition for each non-term:
|
|
||||||
(loop (cdr clauses)
|
|
||||||
cfg-start
|
|
||||||
(map (lambda (nt pats handles $ctxs)
|
|
||||||
(define info (bound-identifier-mapping-get nts nt))
|
|
||||||
(list nt
|
|
||||||
#`(let ([key (gensym '#,nt)])
|
|
||||||
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
(parse-nt/share
|
|
||||||
key #,(car info) '#,(cadr info) stream last-consumed-token depth end
|
|
||||||
max-depth tasks
|
|
||||||
success-k fail-k
|
|
||||||
(lambda (end max-depth tasks success-k fail-k)
|
|
||||||
#,(let loop ([pats pats]
|
|
||||||
[handles (syntax->list handles)]
|
|
||||||
[$ctxs (syntax->list $ctxs)]
|
|
||||||
[simple?s (caddr info)])
|
|
||||||
(if (null? pats)
|
|
||||||
#'(fail-k max-depth tasks)
|
|
||||||
#`(#,(if (or (null? (cdr pats))
|
|
||||||
(car simple?s))
|
|
||||||
#'parse-or
|
|
||||||
#'parse-parallel-or)
|
|
||||||
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
#,(build-match nts
|
|
||||||
toks
|
|
||||||
(car pats)
|
|
||||||
(car handles)
|
|
||||||
(car $ctxs)))
|
|
||||||
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
|
||||||
#,(loop (cdr pats)
|
|
||||||
(cdr handles)
|
|
||||||
(cdr $ctxs)
|
|
||||||
(cdr simple?s)))
|
|
||||||
stream last-consumed-token depth end success-k fail-k max-depth tasks)))))))))
|
|
||||||
nt-ids
|
|
||||||
patss
|
|
||||||
(syntax->list #'(((begin handle0 handle ...) ...) ...))
|
|
||||||
(syntax->list #'((handle0 ...) ...)))
|
|
||||||
cfg-error
|
|
||||||
src-pos?
|
|
||||||
(list*
|
|
||||||
(with-syntax ([((tok tok-id . $e) ...)
|
|
||||||
(token-identifier-mapping-map toks
|
|
||||||
(lambda (k v)
|
|
||||||
(list* k
|
|
||||||
(car v)
|
|
||||||
(if (cdr v)
|
|
||||||
#f
|
|
||||||
'$1))))]
|
|
||||||
[(pos ...)
|
|
||||||
(if src-pos?
|
|
||||||
#'($1-start-pos $1-end-pos)
|
|
||||||
#'(#f #f))])
|
|
||||||
#`(grammar (start [() null]
|
|
||||||
[(atok start) (cons $1 $2)])
|
|
||||||
(atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...)))
|
|
||||||
#`(start start)
|
|
||||||
parser-clauses)))]
|
|
||||||
[(grammar . _)
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
"bad grammar clause"
|
|
||||||
stx
|
|
||||||
(car clauses))]
|
|
||||||
[(src-pos)
|
|
||||||
(loop (cdr clauses)
|
|
||||||
cfg-start
|
|
||||||
cfg-grammar
|
|
||||||
cfg-error
|
|
||||||
#t
|
|
||||||
(cons (car clauses) parser-clauses))]
|
|
||||||
[_else
|
|
||||||
(loop (cdr clauses)
|
|
||||||
cfg-start
|
|
||||||
cfg-grammar
|
|
||||||
cfg-error
|
|
||||||
src-pos?
|
|
||||||
(cons (car clauses) parser-clauses))]))))])
|
|
||||||
#`(let ([orig-parse (parser
|
|
||||||
[error (lambda (a b c)
|
|
||||||
(error 'cfg-parser "unexpected ~a token: ~a" b c))]
|
|
||||||
. #,parser-clauses)]
|
|
||||||
[error-proc #,cfg-error])
|
|
||||||
(letrec #,grammar
|
|
||||||
(lambda (get-tok)
|
|
||||||
(let ([tok-list (orig-parse get-tok)])
|
|
||||||
(letrec ([success-k
|
|
||||||
(lambda (val stream last-consumed-token depth max-depth tasks next)
|
|
||||||
(if (null? stream)
|
|
||||||
val
|
|
||||||
(next success-k fail-k max-depth tasks)))]
|
|
||||||
[fail-k (lambda (max-depth tasks)
|
|
||||||
(cond
|
|
||||||
[(null? tok-list)
|
|
||||||
(if error-proc
|
|
||||||
(error-proc #t
|
|
||||||
'no-tokens
|
|
||||||
#f
|
|
||||||
(make-position #f #f #f)
|
|
||||||
(make-position #f #f #f))
|
|
||||||
(error
|
|
||||||
'cfg-parse
|
|
||||||
"no tokens"))]
|
|
||||||
[else
|
|
||||||
(let ([bad-tok (list-ref tok-list
|
|
||||||
(min (sub1 (length tok-list))
|
|
||||||
max-depth))])
|
|
||||||
(if error-proc
|
|
||||||
(error-proc #t
|
|
||||||
(tok-orig-name bad-tok)
|
|
||||||
(tok-val bad-tok)
|
|
||||||
(tok-start bad-tok)
|
|
||||||
(tok-end bad-tok))
|
|
||||||
(error
|
|
||||||
'cfg-parse
|
|
||||||
"failed at ~a"
|
|
||||||
(tok-val bad-tok))))]))])
|
|
||||||
(#,start tok-list
|
|
||||||
;; we simulate a token at the very beginning with zero width
|
|
||||||
;; for use with the position-generating code (*-start-pos, *-end-pos).
|
|
||||||
(if (null? tok-list)
|
|
||||||
(tok #f #f #f
|
|
||||||
(position 1
|
|
||||||
#,(if src-pos? #'1 #'#f)
|
|
||||||
#,(if src-pos? #'0 #'#f))
|
|
||||||
(position 1
|
|
||||||
#,(if src-pos? #'1 #'#f)
|
|
||||||
#,(if src-pos? #'0 #'#f)))
|
|
||||||
(tok (tok-name (car tok-list))
|
|
||||||
(tok-orig-name (car tok-list))
|
|
||||||
(tok-val (car tok-list))
|
|
||||||
(tok-start (car tok-list))
|
|
||||||
(tok-start (car tok-list))))
|
|
||||||
0
|
|
||||||
(length tok-list)
|
|
||||||
success-k
|
|
||||||
fail-k
|
|
||||||
0
|
|
||||||
(make-tasks null null
|
|
||||||
(make-hasheq) (make-hasheq)
|
|
||||||
(make-hash) #t)))))))))]))
|
|
||||||
|
|
||||||
|
|
||||||
(module* test racket/base
|
|
||||||
(require (submod "..")
|
|
||||||
br-parser-tools/lex
|
|
||||||
racket/block
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
;; Test: parsing regular expressions.
|
|
||||||
;; Here is a test case on locations:
|
|
||||||
(block
|
|
||||||
(define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF))
|
|
||||||
(define lex (lexer-src-pos ["|" (token-OR lexeme)]
|
|
||||||
["^" (token-ANCHOR lexeme)]
|
|
||||||
["*" (token-STAR lexeme)]
|
|
||||||
[(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)]
|
|
||||||
["(" (token-LPAREN lexeme)]
|
|
||||||
[")" (token-RPAREN lexeme)]
|
|
||||||
[whitespace (return-without-pos (lex input-port))]
|
|
||||||
[(eof) (token-EOF 'eof)]))
|
|
||||||
(define -parse (cfg-parser
|
|
||||||
(tokens regexp-tokens)
|
|
||||||
(start top)
|
|
||||||
(end EOF)
|
|
||||||
(src-pos)
|
|
||||||
(grammar [top [(maybe-anchor regexp)
|
|
||||||
(cond [$1
|
|
||||||
`(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
|
|
||||||
[else
|
|
||||||
`(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]]
|
|
||||||
[maybe-anchor [(ANCHOR) #t]
|
|
||||||
[() #f]]
|
|
||||||
[regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
|
|
||||||
[(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
|
|
||||||
[(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
|
|
||||||
[(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]])))
|
|
||||||
(define (pos->sexp pos)
|
|
||||||
(position-offset pos))
|
|
||||||
|
|
||||||
(define (parse s)
|
|
||||||
(define ip (open-input-string s))
|
|
||||||
(port-count-lines! ip)
|
|
||||||
(-parse (lambda () (lex ip))))
|
|
||||||
|
|
||||||
(check-equal? (parse "abc")
|
|
||||||
'(unanchored (lit "abc" 1 4) 1 4))
|
|
||||||
(check-equal? (parse "a | (b*) | c")
|
|
||||||
'(unanchored (or (or (lit "a" 1 2)
|
|
||||||
(group (star (lit "b" 6 7) 6 8) 5 9)
|
|
||||||
1 9)
|
|
||||||
(lit "c" 12 13)
|
|
||||||
1 13)
|
|
||||||
1 13)))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; Tests used during development
|
|
||||||
(define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF))
|
|
||||||
|
|
||||||
(define lex
|
|
||||||
(lexer
|
|
||||||
["+" (token-PLUS '+)]
|
|
||||||
["-" (token-MINUS '-)]
|
|
||||||
["*" (token-STAR '*)]
|
|
||||||
["|" (token-BAR '||)]
|
|
||||||
[":" (token-COLON '|:|)]
|
|
||||||
[whitespace (lex input-port)]
|
|
||||||
[(eof) (token-EOF 'eof)]))
|
|
||||||
|
|
||||||
(define parse
|
|
||||||
(cfg-parser
|
|
||||||
(tokens non-terminals)
|
|
||||||
(start <program>)
|
|
||||||
(end EOF)
|
|
||||||
(error (lambda (a b stx)
|
|
||||||
(error 'parse "failed at ~s" stx)))
|
|
||||||
(grammar [<program> [(PLUS) "plus"]
|
|
||||||
[(<minus-program> BAR <minus-program>) (list $1 $2 $3)]
|
|
||||||
[(<program> COLON) (list $1)]]
|
|
||||||
[<minus-program> [(MINUS) "minus"]
|
|
||||||
[(<program> STAR) (cons $1 $2)]]
|
|
||||||
[<simple> [(<alts> <alts> <alts> MINUS) "yes"]]
|
|
||||||
[<alts> [(PLUS) 'plus]
|
|
||||||
[(MINUS) 'minus]]
|
|
||||||
[<random> [() '0]
|
|
||||||
[(<random> PLUS) (add1 $1)]
|
|
||||||
[(<random> PLUS) (add1 $1)]])))
|
|
||||||
|
|
||||||
(let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**"
|
|
||||||
#;"+*|+**|-" #;"-|-*|-|-*"
|
|
||||||
#;"-|-*|-|-**|-|-*|-|-**"
|
|
||||||
"-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-***
|
|
||||||
|-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|
|
|
||||||
-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****"
|
|
||||||
;; This one fails:
|
|
||||||
#;"+*")])
|
|
||||||
(check-equal? (parse (lambda () (lex p)))
|
|
||||||
'((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
|
||||||
||
|
|
||||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
|
||||||
.
|
|
||||||
*)
|
|
||||||
||
|
|
||||||
(((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
|
||||||
||
|
|
||||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
|
||||||
.
|
|
||||||
*))
|
|
||||||
.
|
|
||||||
*)
|
|
||||||
||
|
|
||||||
(((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
|
||||||
||
|
|
||||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
|
||||||
.
|
|
||||||
*)
|
|
||||||
||
|
|
||||||
(((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
|
||||||
||
|
|
||||||
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
|
||||||
.
|
|
||||||
*))
|
|
||||||
.
|
|
||||||
*)))))
|
|
@ -1,448 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(require (for-template racket/base)
|
|
||||||
racket/list
|
|
||||||
racket/set
|
|
||||||
racket/syntax
|
|
||||||
syntax/srcloc
|
|
||||||
brag/rules/stx-types
|
|
||||||
"flatten.rkt"
|
|
||||||
syntax/id-table
|
|
||||||
(prefix-in sat: "satisfaction.rkt")
|
|
||||||
(prefix-in support: brag/support)
|
|
||||||
(prefix-in stxparse: syntax/parse))
|
|
||||||
|
|
||||||
(provide rules-codegen)
|
|
||||||
|
|
||||||
|
|
||||||
;; Generates the body of the module.
|
|
||||||
;; FIXME: abstract this so we can just call (rules ...) without
|
|
||||||
;; generating the whole module body.
|
|
||||||
(define (rules-codegen stx
|
|
||||||
#:parser-provider-module [parser-provider-module 'br-parser-tools/yacc]
|
|
||||||
#:parser-provider-form [parser-provider-form 'parser])
|
|
||||||
(syntax-case stx ()
|
|
||||||
[(_ r ...)
|
|
||||||
(begin
|
|
||||||
;; (listof stx)
|
|
||||||
(define rules (syntax->list #'(r ...)))
|
|
||||||
|
|
||||||
(when (empty? rules)
|
|
||||||
(raise-syntax-error 'brag
|
|
||||||
(format "The grammar does not appear to have any rules")
|
|
||||||
stx))
|
|
||||||
|
|
||||||
(check-all-rules-defined! rules)
|
|
||||||
(check-all-rules-no-duplicates! rules)
|
|
||||||
(check-all-rules-satisfiable! rules)
|
|
||||||
|
|
||||||
;; We flatten the rules so we can use the yacc-style ruleset that br-parser-tools
|
|
||||||
;; supports.
|
|
||||||
(define flattened-rules (flatten-rules rules))
|
|
||||||
|
|
||||||
(define generated-rule-codes (map flat-rule->yacc-rule flattened-rules))
|
|
||||||
|
|
||||||
;; The first rule, by default, is the start rule.
|
|
||||||
(define rule-ids (for/list ([a-rule (in-list rules)])
|
|
||||||
(rule-id a-rule)))
|
|
||||||
(define start-id (first rule-ids))
|
|
||||||
|
|
||||||
|
|
||||||
(define-values (implicit-tokens ;; (listof identifier)
|
|
||||||
explicit-tokens) ;; (listof identifier)
|
|
||||||
(rules-collect-token-types rules))
|
|
||||||
|
|
||||||
;; (listof symbol)
|
|
||||||
(define implicit-token-types
|
|
||||||
(map string->symbol
|
|
||||||
(set->list (list->set (map syntax-e implicit-tokens)))))
|
|
||||||
|
|
||||||
;; (listof symbol)
|
|
||||||
(define explicit-token-types
|
|
||||||
(set->list (list->set (map syntax-e explicit-tokens))))
|
|
||||||
|
|
||||||
;; (listof symbol)
|
|
||||||
(define token-types
|
|
||||||
(set->list (list->set (append (map (lambda (x) (string->symbol (syntax-e x)))
|
|
||||||
implicit-tokens)
|
|
||||||
(map syntax-e explicit-tokens)))))
|
|
||||||
|
|
||||||
(with-syntax ([start-id start-id]
|
|
||||||
|
|
||||||
[(token-type ...) token-types]
|
|
||||||
|
|
||||||
[(token-type-constructor ...)
|
|
||||||
(map (lambda (x) (string->symbol (format "token-~a" x)))
|
|
||||||
token-types)]
|
|
||||||
|
|
||||||
[(explicit-token-types ...) explicit-token-types]
|
|
||||||
[(implicit-token-types ...) implicit-token-types]
|
|
||||||
[(implicit-token-types-str ...) (map symbol->string implicit-token-types)]
|
|
||||||
[(implicit-token-type-constructor ...)
|
|
||||||
(map (lambda (x) (string->symbol (format "token-~a" x)))
|
|
||||||
implicit-token-types)]
|
|
||||||
[generated-grammar #`(grammar #,@generated-rule-codes)]
|
|
||||||
[parser-module parser-provider-module]
|
|
||||||
[parser-form parser-provider-form])
|
|
||||||
(quasisyntax/loc stx
|
|
||||||
(begin
|
|
||||||
(require br-parser-tools/lex
|
|
||||||
parser-module
|
|
||||||
brag/codegen/runtime
|
|
||||||
brag/support
|
|
||||||
brag/private/internal-support
|
|
||||||
racket/set
|
|
||||||
(for-syntax syntax/parse racket/base))
|
|
||||||
|
|
||||||
(provide parse
|
|
||||||
make-rule-parser
|
|
||||||
all-token-types
|
|
||||||
#;current-source
|
|
||||||
#;current-parser-error-handler
|
|
||||||
#;current-tokenizer-error-handler
|
|
||||||
#;[struct-out exn:fail:parsing]
|
|
||||||
)
|
|
||||||
|
|
||||||
(define-tokens enumerated-tokens (token-type ...))
|
|
||||||
|
|
||||||
;; all-token-types lists all the tokens (except for EOF)
|
|
||||||
(define all-token-types
|
|
||||||
(set-remove (set 'token-type ...) 'EOF))
|
|
||||||
|
|
||||||
;; For internal use by the permissive tokenizer only:
|
|
||||||
(define all-tokens-hash/mutable
|
|
||||||
(make-hash (list ;; Note: we also allow the eof object here, to make
|
|
||||||
;; the permissive tokenizer even nicer to work with.
|
|
||||||
(cons eof token-EOF)
|
|
||||||
(cons 'token-type token-type-constructor) ...)))
|
|
||||||
|
|
||||||
|
|
||||||
#;(define default-lex/1
|
|
||||||
(lexer-src-pos [implicit-token-types-str
|
|
||||||
(token 'implicit-token-types lexeme)]
|
|
||||||
...
|
|
||||||
[(eof) (token eof)]))
|
|
||||||
|
|
||||||
(define-syntax (make-rule-parser stx-2)
|
|
||||||
(syntax-parse stx-2
|
|
||||||
[(_ start-rule:id)
|
|
||||||
(begin
|
|
||||||
;; HACK HACK HACK
|
|
||||||
;; The cfg-parser depends on the start-rule provided in (start ...) to have the same
|
|
||||||
;; context as the rest of this body, so I need to hack this. I don't like this, but
|
|
||||||
;; I don't know what else to do. Hence recolored-start-rule.
|
|
||||||
(unless (member (syntax-e #'start-rule)
|
|
||||||
'#,(map syntax-e rule-ids))
|
|
||||||
(raise-syntax-error #f
|
|
||||||
(format "Rule ~a is not defined in the grammar" (syntax-e #'start-rule))
|
|
||||||
stx-2))
|
|
||||||
|
|
||||||
(define recolored-start-rule (datum->syntax (syntax #,stx) (syntax-e #'start-rule)))
|
|
||||||
#`(let ([THE-GRAMMAR (parser-form (tokens enumerated-tokens)
|
|
||||||
(src-pos)
|
|
||||||
(start #,recolored-start-rule)
|
|
||||||
(end EOF)
|
|
||||||
(error THE-ERROR-HANDLER)
|
|
||||||
generated-grammar)])
|
|
||||||
(case-lambda [(tokenizer)
|
|
||||||
(define next-token
|
|
||||||
(make-permissive-tokenizer tokenizer all-tokens-hash/mutable))
|
|
||||||
(THE-GRAMMAR next-token)]
|
|
||||||
[(source tokenizer)
|
|
||||||
(parameterize ([current-source source])
|
|
||||||
(parse tokenizer))])))]))
|
|
||||||
|
|
||||||
(define parse (make-rule-parser start-id))
|
|
||||||
(provide parse-to-datum parse-tree)
|
|
||||||
|
|
||||||
(define (parse-to-datum x)
|
|
||||||
(let loop ([x (syntax->datum (parse x))])
|
|
||||||
(cond
|
|
||||||
[(list? x) (map loop x)]
|
|
||||||
[(char? x) (string x)]
|
|
||||||
[else x])))
|
|
||||||
|
|
||||||
(define parse-tree parse-to-datum)))))]))
|
|
||||||
|
|
||||||
|
|
||||||
;; Given a flattened rule, returns a syntax for the code that
|
|
||||||
;; preserves as much source location as possible.
|
|
||||||
;;
|
|
||||||
;; Each rule is defined to return a list with the following structure:
|
|
||||||
;;
|
|
||||||
;; stx :== (name (U tokens rule-stx) ...)
|
|
||||||
;;
|
|
||||||
(define (flat-rule->yacc-rule a-flat-rule)
|
|
||||||
(syntax-case a-flat-rule ()
|
|
||||||
[(rule-type origin name clauses ...)
|
|
||||||
(begin
|
|
||||||
(define translated-clauses
|
|
||||||
(map (lambda (clause) (translate-clause clause #'name #'origin))
|
|
||||||
(syntax->list #'(clauses ...))))
|
|
||||||
(with-syntax ([(translated-clause ...) translated-clauses])
|
|
||||||
#`[name translated-clause ...]))]))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; translates a single primitive rule clause.
|
|
||||||
;; A clause is a simple list of ids, lit, vals, and inferred-id elements.
|
|
||||||
;; The action taken depends on the pattern type.
|
|
||||||
(define (translate-clause a-clause rule-name/false origin)
|
|
||||||
(define translated-patterns
|
|
||||||
(let loop ([primitive-patterns (syntax->list a-clause)])
|
|
||||||
(cond
|
|
||||||
[(empty? primitive-patterns)
|
|
||||||
'()]
|
|
||||||
[else
|
|
||||||
(cons (syntax-case (first primitive-patterns) (id lit token inferred-id)
|
|
||||||
[(id val)
|
|
||||||
#'val]
|
|
||||||
[(lit val)
|
|
||||||
(datum->syntax #f (string->symbol (syntax-e #'val)) #'val)]
|
|
||||||
[(token val)
|
|
||||||
#'val]
|
|
||||||
[(inferred-id val reason)
|
|
||||||
#'val])
|
|
||||||
(loop (rest primitive-patterns)))])))
|
|
||||||
|
|
||||||
(define translated-actions
|
|
||||||
(for/list ([translated-pattern (in-list translated-patterns)]
|
|
||||||
[primitive-pattern (syntax->list a-clause)]
|
|
||||||
[pos (in-naturals 1)])
|
|
||||||
(if (eq? (syntax-property primitive-pattern 'hide) 'hide)
|
|
||||||
#'null
|
|
||||||
(with-syntax ([$X
|
|
||||||
(format-id translated-pattern "$~a" pos)]
|
|
||||||
[$X-start-pos
|
|
||||||
(format-id translated-pattern "$~a-start-pos" pos)]
|
|
||||||
[$X-end-pos
|
|
||||||
(format-id translated-pattern "$~a-end-pos" pos)])
|
|
||||||
(syntax-case primitive-pattern (id lit token inferred-id)
|
|
||||||
|
|
||||||
;; When a rule usage is inferred, the value of $X is a syntax object
|
|
||||||
;; whose head is the name of the inferred rule . We strip that out,
|
|
||||||
;; leaving the residue to be absorbed.
|
|
||||||
[(inferred-id val reason)
|
|
||||||
#'(syntax-case $X ()
|
|
||||||
[(inferred-rule-name . rest)
|
|
||||||
(syntax->list #'rest)])]
|
|
||||||
[(id val)
|
|
||||||
;; at this point, the 'hide property is either #f or "splice"
|
|
||||||
;; ('hide value is handled at the top of this conditional
|
|
||||||
;; we need to use boolean because a symbol is treated as an identifier.
|
|
||||||
;; also we'll separate it into its own property for clarity and test for it in "runtime.rkt"
|
|
||||||
#`(list (syntax-property $X 'splice-rh-id #,(and (syntax-property primitive-pattern 'hide) #t)))]
|
|
||||||
[(lit val)
|
|
||||||
#'(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))]
|
|
||||||
[(token val)
|
|
||||||
#'(list (atomic-datum->syntax $X $X-start-pos $X-end-pos))])))))
|
|
||||||
|
|
||||||
(define whole-rule-loc
|
|
||||||
(if (empty? translated-patterns)
|
|
||||||
#'(list (current-source) #f #f #f #f)
|
|
||||||
(with-syntax ([$1-start-pos (datum->syntax (first translated-patterns) '$1-start-pos)]
|
|
||||||
[$n-end-pos (format-id (last translated-patterns) "$~a-end-pos" (length translated-patterns))])
|
|
||||||
#`(positions->srcloc $1-start-pos $n-end-pos))))
|
|
||||||
|
|
||||||
;; move 'hide-or-splice-lhs-id property into function because name is datum-ized
|
|
||||||
(with-syntax ([(translated-pattern ...) translated-patterns]
|
|
||||||
[(translated-action ...) translated-actions])
|
|
||||||
#`[(translated-pattern ...)
|
|
||||||
(rule-components->syntax '#,rule-name/false translated-action ...
|
|
||||||
#:srcloc #,whole-rule-loc
|
|
||||||
#:hide-or-splice? #,(syntax-property rule-name/false 'hide-or-splice-lhs-id))]))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;; collect-token-types: (listof rule-syntax) -> (values (listof identifier) (listof identifier))
|
|
||||||
;;
|
|
||||||
;; Given a rule, automatically derive the list of implicit and
|
|
||||||
;; explicit token types we need to generate.
|
|
||||||
;;
|
|
||||||
;; Note: EOF is reserved, and will always be included in the list
|
|
||||||
;; of explicit token types, though the user is not allow to express it themselves.
|
|
||||||
(define (rules-collect-token-types rules)
|
|
||||||
(define-values (implicit explicit)
|
|
||||||
(for/fold ([implicit '()]
|
|
||||||
[explicit (list (datum->syntax (first rules) 'EOF))])
|
|
||||||
([r (in-list rules)])
|
|
||||||
(rule-collect-token-types r implicit explicit)))
|
|
||||||
(values (reverse implicit) (reverse explicit)))
|
|
||||||
|
|
||||||
(define (rule-collect-token-types a-rule implicit explicit)
|
|
||||||
(syntax-case a-rule (rule)
|
|
||||||
[(rule id a-pattern)
|
|
||||||
(pattern-collect-implicit-token-types #'a-pattern implicit explicit)]))
|
|
||||||
|
|
||||||
(define (pattern-collect-implicit-token-types a-pattern implicit explicit)
|
|
||||||
(let loop ([a-pattern a-pattern]
|
|
||||||
[implicit implicit]
|
|
||||||
[explicit explicit])
|
|
||||||
(syntax-case a-pattern (id lit token choice repeat maybe seq)
|
|
||||||
[(id val)
|
|
||||||
(values implicit explicit)]
|
|
||||||
[(lit val)
|
|
||||||
(values (cons #'val implicit) explicit)]
|
|
||||||
[(token val)
|
|
||||||
(begin
|
|
||||||
(when (eq? (syntax-e #'val) 'EOF)
|
|
||||||
(raise-syntax-error #f "Token EOF is reserved and can not be used in a grammar" #'val))
|
|
||||||
(values implicit (cons #'val explicit)))]
|
|
||||||
[(choice vals ...)
|
|
||||||
(for/fold ([implicit implicit]
|
|
||||||
[explicit explicit])
|
|
||||||
([v (in-list (syntax->list #'(vals ...)))])
|
|
||||||
(loop v implicit explicit))]
|
|
||||||
[(repeat min val)
|
|
||||||
(loop #'val implicit explicit)]
|
|
||||||
[(maybe val)
|
|
||||||
(loop #'val implicit explicit)]
|
|
||||||
[(seq vals ...)
|
|
||||||
(for/fold ([implicit implicit]
|
|
||||||
[explicit explicit])
|
|
||||||
([v (in-list (syntax->list #'(vals ...)))])
|
|
||||||
(loop v implicit explicit))])))
|
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;; rule-id: rule -> identifier-stx
|
|
||||||
;; Get the binding id of a rule.
|
|
||||||
(define (rule-id a-rule)
|
|
||||||
(syntax-case a-rule (rule)
|
|
||||||
[(rule id a-pattern)
|
|
||||||
#'id]))
|
|
||||||
|
|
||||||
(define (rule-pattern a-rule)
|
|
||||||
(syntax-case a-rule (rule)
|
|
||||||
[(rule id a-pattern)
|
|
||||||
#'a-pattern]))
|
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
|
|
||||||
;; check-all-rules-defined!: (listof rule-stx) -> void
|
|
||||||
(define (check-all-rules-defined! rules)
|
|
||||||
(define table (make-free-id-table))
|
|
||||||
;; Pass one: collect all the defined rule names.
|
|
||||||
(for ([a-rule (in-list rules)])
|
|
||||||
(free-id-table-set! table (rule-id a-rule) #t))
|
|
||||||
;; Pass two: check each referenced id, and make sure it's been defined.
|
|
||||||
(for ([a-rule (in-list rules)])
|
|
||||||
(for ([referenced-id (in-list (rule-collect-used-ids a-rule))])
|
|
||||||
(unless (free-id-table-ref table referenced-id (lambda () #f))
|
|
||||||
(raise-syntax-error #f (format "Rule ~a has no definition" (syntax-e referenced-id))
|
|
||||||
referenced-id)))))
|
|
||||||
|
|
||||||
;; check-all-rules-no-duplicates!: (listof rule-stx) -> void
|
|
||||||
(define (check-all-rules-no-duplicates! rules)
|
|
||||||
(define table (make-free-id-table))
|
|
||||||
;; Pass one: collect all the defined rule names.
|
|
||||||
(for ([a-rule (in-list rules)])
|
|
||||||
(define maybe-other-rule-id (free-id-table-ref table (rule-id a-rule) (lambda () #f)))
|
|
||||||
(when maybe-other-rule-id
|
|
||||||
(raise-syntax-error #f (format "Rule ~a has a duplicate definition" (syntax-e (rule-id a-rule)))
|
|
||||||
(rule-id a-rule)
|
|
||||||
#f
|
|
||||||
(list (rule-id a-rule) maybe-other-rule-id)))
|
|
||||||
(free-id-table-set! table (rule-id a-rule) (rule-id a-rule))))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; rule-collect-used-ids: rule-stx -> (listof identifier)
|
|
||||||
;; Given a rule, extracts a list of identifiers
|
|
||||||
(define (rule-collect-used-ids a-rule)
|
|
||||||
(syntax-case a-rule (rule)
|
|
||||||
[(rule id a-pattern)
|
|
||||||
(pattern-collect-used-ids #'a-pattern '())]))
|
|
||||||
|
|
||||||
;; pattern-collect-used-ids: pattern-stx (listof identifier) -> (listof identifier)
|
|
||||||
;; Returns a flat list of rule identifiers referenced in the pattern.
|
|
||||||
(define (pattern-collect-used-ids a-pattern acc)
|
|
||||||
(let loop ([a-pattern a-pattern]
|
|
||||||
[acc acc])
|
|
||||||
(syntax-case a-pattern (id lit token choice repeat maybe seq)
|
|
||||||
[(id val)
|
|
||||||
(cons #'val acc)]
|
|
||||||
[(lit val)
|
|
||||||
acc]
|
|
||||||
[(token val)
|
|
||||||
acc]
|
|
||||||
[(choice vals ...)
|
|
||||||
(for/fold ([acc acc])
|
|
||||||
([v (in-list (syntax->list #'(vals ...)))])
|
|
||||||
(loop v acc))]
|
|
||||||
[(repeat min val)
|
|
||||||
(loop #'val acc)]
|
|
||||||
[(maybe val)
|
|
||||||
(loop #'val acc)]
|
|
||||||
[(seq vals ...)
|
|
||||||
(for/fold ([acc acc])
|
|
||||||
([v (in-list (syntax->list #'(vals ...)))])
|
|
||||||
(loop v acc))])))
|
|
||||||
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;; check-all-rules-satisfiable: (listof rule-stx) -> void
|
|
||||||
;; Does a simple graph traversal / topological sort-like thing to make sure that, for
|
|
||||||
;; any rule, there's some finite sequence of tokens that
|
|
||||||
;; satisfies it. If this is not the case, then something horrible
|
|
||||||
;; has happened, and we need to tell the user about it.
|
|
||||||
;;
|
|
||||||
;; NOTE: Assumes all referenced rules have definitions.
|
|
||||||
(define (check-all-rules-satisfiable! rules)
|
|
||||||
(define toplevel-rule-table (make-free-id-table))
|
|
||||||
(for ([a-rule (in-list rules)])
|
|
||||||
(free-id-table-set! toplevel-rule-table
|
|
||||||
(rule-id a-rule)
|
|
||||||
(sat:make-and)))
|
|
||||||
(define leaves '())
|
|
||||||
|
|
||||||
(define (make-leaf)
|
|
||||||
(define a-leaf (sat:make-and))
|
|
||||||
(set! leaves (cons a-leaf leaves))
|
|
||||||
a-leaf)
|
|
||||||
|
|
||||||
(define (process-pattern a-pattern)
|
|
||||||
(syntax-case a-pattern (id lit token choice repeat maybe seq)
|
|
||||||
[(id val)
|
|
||||||
(free-id-table-ref toplevel-rule-table #'val)]
|
|
||||||
[(lit val)
|
|
||||||
(make-leaf)]
|
|
||||||
[(token val)
|
|
||||||
(make-leaf)]
|
|
||||||
[(choice vals ...)
|
|
||||||
(begin
|
|
||||||
(define an-or-node (sat:make-or))
|
|
||||||
(for ([v (in-list (syntax->list #'(vals ...)))])
|
|
||||||
(define a-child (process-pattern v))
|
|
||||||
(sat:add-child! an-or-node a-child))
|
|
||||||
an-or-node)]
|
|
||||||
[(repeat min val)
|
|
||||||
(syntax-case #'min ()
|
|
||||||
[0
|
|
||||||
(make-leaf)]
|
|
||||||
[else
|
|
||||||
(process-pattern #'val)])]
|
|
||||||
[(maybe val)
|
|
||||||
(make-leaf)]
|
|
||||||
[(seq vals ...)
|
|
||||||
(begin
|
|
||||||
(define an-and-node (sat:make-and))
|
|
||||||
(for ([v (in-list (syntax->list #'(vals ...)))])
|
|
||||||
(define a-child (process-pattern v))
|
|
||||||
(sat:add-child! an-and-node a-child))
|
|
||||||
an-and-node)]))
|
|
||||||
|
|
||||||
(for ([a-rule (in-list rules)])
|
|
||||||
(define rule-node (free-id-table-ref toplevel-rule-table (rule-id a-rule)))
|
|
||||||
(sat:add-child! rule-node (process-pattern (rule-pattern a-rule))))
|
|
||||||
|
|
||||||
(for ([a-leaf leaves])
|
|
||||||
(sat:visit! a-leaf))
|
|
||||||
|
|
||||||
(for ([a-rule (in-list rules)])
|
|
||||||
(define rule-node (free-id-table-ref toplevel-rule-table (rule-id a-rule)))
|
|
||||||
(unless (sat:node-yes? rule-node)
|
|
||||||
(raise-syntax-error #f (format "Rule ~a has no finite derivation" (syntax-e (rule-id a-rule)))
|
|
||||||
(rule-id a-rule)))))
|
|
@ -1,200 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require brag/rules/stx-types
|
|
||||||
(for-syntax racket/base))
|
|
||||||
|
|
||||||
(provide flatten-rule
|
|
||||||
flatten-rules
|
|
||||||
prim-rule)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(define (make-fresh-name)
|
|
||||||
(let ([n 0])
|
|
||||||
(lambda ()
|
|
||||||
(set! n (add1 n))
|
|
||||||
(string->symbol (format "%rule~a" n)))))
|
|
||||||
|
|
||||||
(define default-fresh-name
|
|
||||||
(make-fresh-name))
|
|
||||||
|
|
||||||
|
|
||||||
;; Translates rules to lists of primitive rules.
|
|
||||||
|
|
||||||
|
|
||||||
(define (flatten-rules rules #:fresh-name [fresh-name default-fresh-name])
|
|
||||||
(define ht (make-hash))
|
|
||||||
(apply append (map (lambda (a-rule) (flatten-rule a-rule
|
|
||||||
#:ht ht
|
|
||||||
#:fresh-name fresh-name))
|
|
||||||
rules)))
|
|
||||||
|
|
||||||
|
|
||||||
;; flatten-rule: rule -> (listof primitive-rule)
|
|
||||||
(define (flatten-rule a-rule
|
|
||||||
#:fresh-name [fresh-name default-fresh-name]
|
|
||||||
|
|
||||||
;; ht: (hashtableof pattern-hash-key pat)
|
|
||||||
#:ht [ht (make-hash)])
|
|
||||||
|
|
||||||
(let recur ([a-rule a-rule]
|
|
||||||
[inferred? #f])
|
|
||||||
|
|
||||||
;; lift-nonprimitive-pattern: pattern -> (values (listof primitive-rule) pattern)
|
|
||||||
;; Turns non-primitive patterns into primitive patterns, and produces a set of
|
|
||||||
;; derived rules.
|
|
||||||
(define (lift-nonprimitive-pattern a-pat)
|
|
||||||
(cond
|
|
||||||
[(primitive-pattern? a-pat)
|
|
||||||
(values '() (linearize-primitive-pattern a-pat))]
|
|
||||||
[(hash-has-key? ht (pattern->hash-key a-pat))
|
|
||||||
(values '() (list (hash-ref ht (pattern->hash-key a-pat))))]
|
|
||||||
[else
|
|
||||||
(define head (syntax-case a-pat () [(head rest ...) #'head]))
|
|
||||||
(define new-name (datum->syntax #f (fresh-name) a-pat))
|
|
||||||
(define new-inferred-id (datum->syntax #f `(inferred-id ,new-name ,head) a-pat))
|
|
||||||
(hash-set! ht (pattern->hash-key a-pat) new-inferred-id)
|
|
||||||
(values (recur #`(rule #,new-name #,a-pat) head)
|
|
||||||
(list new-inferred-id))]))
|
|
||||||
|
|
||||||
(define (lift-nonprimitive-patterns pats)
|
|
||||||
(define-values (rules patterns)
|
|
||||||
(for/fold ([inferred-ruless '()]
|
|
||||||
[patternss '()])
|
|
||||||
([p (in-list pats)])
|
|
||||||
(define-values (new-rules new-ps)
|
|
||||||
(lift-nonprimitive-pattern p))
|
|
||||||
(values (cons new-rules inferred-ruless)
|
|
||||||
(cons new-ps patternss))))
|
|
||||||
(values (apply append (reverse rules))
|
|
||||||
(apply append (reverse patterns))))
|
|
||||||
|
|
||||||
(with-syntax ([head (if inferred? #'inferred-prim-rule #'prim-rule)]
|
|
||||||
[origin (syntax-case a-rule (rule) [(rule name (pat-head rest ...)) #'pat-head])])
|
|
||||||
(syntax-case a-rule (rule)
|
|
||||||
[(rule name pat)
|
|
||||||
(syntax-case #'pat (id inferred-id lit token choice repeat maybe seq)
|
|
||||||
|
|
||||||
;; The primitive types stay as they are:
|
|
||||||
[(id val)
|
|
||||||
(list #'(head origin name [pat]))]
|
|
||||||
[(inferred-id val reason)
|
|
||||||
(list #'(head origin name [pat]))]
|
|
||||||
[(lit val)
|
|
||||||
(list #'(head origin name [pat]))]
|
|
||||||
[(token val)
|
|
||||||
(list #'(head origin name [pat]))]
|
|
||||||
|
|
||||||
|
|
||||||
;; Everything else might need lifting:
|
|
||||||
[(choice sub-pat ...)
|
|
||||||
(begin
|
|
||||||
(define-values (inferred-ruless/rev new-sub-patss/rev)
|
|
||||||
(for/fold ([rs '()] [ps '()])
|
|
||||||
([p (syntax->list #'(sub-pat ...))])
|
|
||||||
(let-values ([(new-r new-p)
|
|
||||||
(lift-nonprimitive-pattern p)])
|
|
||||||
(values (cons new-r rs) (cons new-p ps)))))
|
|
||||||
(with-syntax ([((sub-pat ...) ...) (reverse new-sub-patss/rev)])
|
|
||||||
(append (list #'(head origin name [sub-pat ...] ...))
|
|
||||||
(apply append (reverse inferred-ruless/rev)))))]
|
|
||||||
|
|
||||||
[(repeat min sub-pat)
|
|
||||||
(begin
|
|
||||||
(define-values (inferred-rules new-sub-pats)
|
|
||||||
(lift-nonprimitive-pattern #'sub-pat))
|
|
||||||
(with-syntax ([(sub-pat ...) new-sub-pats])
|
|
||||||
(cons (cond [(= (syntax-e #'min) 0)
|
|
||||||
#`(head origin name
|
|
||||||
[(inferred-id name repeat) sub-pat ...]
|
|
||||||
[])]
|
|
||||||
[(= (syntax-e #'min) 1)
|
|
||||||
#`(head origin name
|
|
||||||
[(inferred-id name repeat) sub-pat ...]
|
|
||||||
[sub-pat ...])])
|
|
||||||
inferred-rules)))]
|
|
||||||
|
|
||||||
[(maybe sub-pat)
|
|
||||||
(begin
|
|
||||||
(define-values (inferred-rules new-sub-pats)
|
|
||||||
(lift-nonprimitive-pattern #'sub-pat))
|
|
||||||
(with-syntax ([(sub-pat ...) new-sub-pats])
|
|
||||||
(cons #'(head origin name
|
|
||||||
[sub-pat ...]
|
|
||||||
[])
|
|
||||||
inferred-rules)))]
|
|
||||||
|
|
||||||
[(seq sub-pat ...)
|
|
||||||
(begin
|
|
||||||
(define-values (inferred-rules new-sub-pats)
|
|
||||||
(lift-nonprimitive-patterns (syntax->list #'(sub-pat ...))))
|
|
||||||
(with-syntax ([(sub-pat ...) new-sub-pats])
|
|
||||||
(cons #'(head origin name [sub-pat ...])
|
|
||||||
inferred-rules)))])]))))
|
|
||||||
|
|
||||||
|
|
||||||
;; Given a pattern, return a key appropriate for a hash.
|
|
||||||
;;
|
|
||||||
;; In the `ragg` days this used `syntax->datum` only.
|
|
||||||
;; The problem is that with cuts & splices in the mix, it creates ambiguity:
|
|
||||||
;; e.g., the pattern (/"," foo)* and ("," foo)* differ only in the 'hide syntax property
|
|
||||||
;; so `syntax->datum` does not capture their differences.
|
|
||||||
;; That means they produced the same hash key,
|
|
||||||
;; which meant they produced the same inferred pattern. Which is wrong.
|
|
||||||
;; So we adjust the key to take account of the 'hide property
|
|
||||||
;; by "lifting" it into the datum with cons.
|
|
||||||
;; Then the pattern-inference process treats them separately.
|
|
||||||
(define (pattern->hash-key a-pat)
|
|
||||||
(let loop ([x a-pat])
|
|
||||||
(let ([maybe-stx-list (syntax->list x)])
|
|
||||||
(if maybe-stx-list
|
|
||||||
(cons (syntax-property x 'hide) (map loop maybe-stx-list))
|
|
||||||
(syntax->datum x)))))
|
|
||||||
|
|
||||||
|
|
||||||
;; Returns true if the pattern looks primitive
|
|
||||||
(define (primitive-pattern? a-pat)
|
|
||||||
(syntax-case a-pat (id lit token choice repeat maybe seq)
|
|
||||||
[(id val)
|
|
||||||
#t]
|
|
||||||
[(lit val)
|
|
||||||
#t]
|
|
||||||
[(token val)
|
|
||||||
#t]
|
|
||||||
[(choice sub-pat ...)
|
|
||||||
#f]
|
|
||||||
[(repeat min val)
|
|
||||||
#f]
|
|
||||||
[(maybe sub-pat)
|
|
||||||
#f]
|
|
||||||
[(seq sub-pat ...)
|
|
||||||
(andmap primitive-pattern? (syntax->list #'(sub-pat ...)))]))
|
|
||||||
|
|
||||||
|
|
||||||
;; Given a primitive pattern (id, lit, token, and seqs only containing
|
|
||||||
;; primitive patterns), returns a linear sequence of just id, lits,
|
|
||||||
;; and tokens.
|
|
||||||
(define (linearize-primitive-pattern a-pat)
|
|
||||||
(define (traverse a-pat acc)
|
|
||||||
(syntax-case a-pat (id inferred-id lit token seq)
|
|
||||||
[(id val)
|
|
||||||
(cons a-pat acc)]
|
|
||||||
[(inferred-id val reason)
|
|
||||||
(cons a-pat acc)]
|
|
||||||
[(lit val)
|
|
||||||
(cons a-pat acc)]
|
|
||||||
[(token val)
|
|
||||||
(cons a-pat acc)]
|
|
||||||
[(seq vals ...)
|
|
||||||
(foldl traverse acc (syntax->list #'(vals ...)))]))
|
|
||||||
(reverse (traverse a-pat '())))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(define-syntax (prim-rule stx)
|
|
||||||
(raise-syntax-error #f "internal error: should not be macro expanded" stx))
|
|
||||||
|
|
||||||
(define-syntax (inferred-prim-rule stx)
|
|
||||||
(raise-syntax-error #f "internal error: should not be macro expanded" stx))
|
|
||||||
|
|
||||||
(define-syntax (inferred-id stx)
|
|
||||||
(raise-syntax-error #f "internal error: should not be macro expanded" stx))
|
|
@ -1,68 +0,0 @@
|
|||||||
#lang s-exp syntax/module-reader
|
|
||||||
brag/codegen/sexp-based-lang
|
|
||||||
#:read my-read
|
|
||||||
#:read-syntax my-read-syntax
|
|
||||||
#:info my-get-info
|
|
||||||
#:whole-body-readers? #t
|
|
||||||
|
|
||||||
(require brag/rules/parser
|
|
||||||
brag/rules/lexer
|
|
||||||
brag/rules/stx
|
|
||||||
brag/rules/rule-structs)
|
|
||||||
|
|
||||||
(define (my-read in)
|
|
||||||
(syntax->datum (my-read-syntax #f in)))
|
|
||||||
|
|
||||||
(define (my-read-syntax src in)
|
|
||||||
(define-values (first-line first-column first-position) (port-next-location in))
|
|
||||||
(define tokenizer (tokenize in))
|
|
||||||
(define rules
|
|
||||||
(parameterize ([current-parser-error-handler
|
|
||||||
(lambda (tok-ok? tok-name tok-value start-pos end-pos)
|
|
||||||
(raise-syntax-error
|
|
||||||
#f
|
|
||||||
(format "Error while parsing grammar near: ~a [line=~a, column=~a, position=~a]"
|
|
||||||
tok-value
|
|
||||||
(pos-line start-pos)
|
|
||||||
(pos-col start-pos)
|
|
||||||
(pos-offset start-pos))
|
|
||||||
(datum->syntax #f
|
|
||||||
(string->symbol (format "~a" tok-value))
|
|
||||||
(list src
|
|
||||||
(pos-line start-pos)
|
|
||||||
(pos-col start-pos)
|
|
||||||
(pos-offset start-pos)
|
|
||||||
(if (and (number? (pos-offset end-pos))
|
|
||||||
(number? (pos-offset start-pos)))
|
|
||||||
(- (pos-offset end-pos)
|
|
||||||
(pos-offset start-pos))
|
|
||||||
#f)))))])
|
|
||||||
(grammar-parser tokenizer)))
|
|
||||||
(define-values (last-line last-column last-position) (port-next-location in))
|
|
||||||
(list (rules->stx src rules
|
|
||||||
#:original-stx (datum->syntax #f 'original-stx
|
|
||||||
(list src
|
|
||||||
first-line
|
|
||||||
first-column
|
|
||||||
first-position
|
|
||||||
(if (and (number? last-position)
|
|
||||||
(number? first-position))
|
|
||||||
(- last-position first-position)
|
|
||||||
#f))))))
|
|
||||||
|
|
||||||
|
|
||||||
;; Extension: we'd like to cooperate with DrRacket and tell
|
|
||||||
;; it to use the default, textual lexer and color scheme when
|
|
||||||
;; editing bf programs.
|
|
||||||
;;
|
|
||||||
;; See: http://docs.racket-lang.org/guide/language-get-info.html
|
|
||||||
;; for more details, as well as the documentation in
|
|
||||||
;; syntax/module-reader.
|
|
||||||
(define (my-get-info key default default-filter)
|
|
||||||
(case key
|
|
||||||
[(color-lexer)
|
|
||||||
(dynamic-require 'syntax-color/default-lexer
|
|
||||||
'default-lexer)]
|
|
||||||
[else
|
|
||||||
(default-filter key default)]))
|
|
||||||
|
|
@ -1,212 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(require racket/match
|
|
||||||
racket/list
|
|
||||||
racket/generator
|
|
||||||
(prefix-in lex: br-parser-tools/lex)
|
|
||||||
brag/support
|
|
||||||
brag/private/internal-support)
|
|
||||||
|
|
||||||
|
|
||||||
(provide THE-ERROR-HANDLER
|
|
||||||
make-permissive-tokenizer
|
|
||||||
atomic-datum->syntax
|
|
||||||
positions->srcloc
|
|
||||||
rule-components->syntax)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; The level of indirection here is necessary since the yacc grammar wants a
|
|
||||||
;; function value for the error handler up front. We want to delay that decision
|
|
||||||
;; till parse time.
|
|
||||||
(define (THE-ERROR-HANDLER tok-ok? tok-name tok-value start-pos end-pos)
|
|
||||||
(match (positions->srcloc start-pos end-pos)
|
|
||||||
[(list src line col offset span)
|
|
||||||
((current-parser-error-handler) tok-name
|
|
||||||
tok-value
|
|
||||||
offset
|
|
||||||
line
|
|
||||||
col
|
|
||||||
span)]))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(define no-position (lex:position #f #f #f))
|
|
||||||
(define (no-position? p)
|
|
||||||
(not
|
|
||||||
(or (lex:position-line p)
|
|
||||||
(lex:position-col p)
|
|
||||||
(lex:position-offset p))))
|
|
||||||
|
|
||||||
|
|
||||||
;; make-permissive-tokenizer: (U (sequenceof (U token token-struct eof void)) (-> (U token token-struct eof void))) hash -> (-> position-token)
|
|
||||||
;; Creates a tokenizer from the given value.
|
|
||||||
;; FIXME: clean up code.
|
|
||||||
(define (make-permissive-tokenizer tokenizer token-type-hash)
|
|
||||||
(define tokenizer-thunk (cond
|
|
||||||
[(sequence? tokenizer)
|
|
||||||
(sequence->generator tokenizer)]
|
|
||||||
[(procedure? tokenizer)
|
|
||||||
tokenizer]))
|
|
||||||
|
|
||||||
;; lookup: symbol any pos pos -> position-token
|
|
||||||
(define (lookup type val start-pos end-pos)
|
|
||||||
(lex:position-token
|
|
||||||
((hash-ref token-type-hash type
|
|
||||||
(lambda ()
|
|
||||||
((current-tokenizer-error-handler) (format "~a" type) val
|
|
||||||
(lex:position-offset start-pos)
|
|
||||||
(lex:position-line start-pos)
|
|
||||||
(lex:position-col start-pos)
|
|
||||||
(and (number? (lex:position-offset start-pos))
|
|
||||||
(number? (lex:position-offset end-pos))
|
|
||||||
(- (lex:position-offset end-pos)
|
|
||||||
(lex:position-offset start-pos))))))
|
|
||||||
val)
|
|
||||||
start-pos end-pos))
|
|
||||||
|
|
||||||
(define (permissive-tokenizer)
|
|
||||||
(define next-token (tokenizer-thunk))
|
|
||||||
(let loop ([next-token next-token])
|
|
||||||
(match next-token
|
|
||||||
[(or (? eof-object?) (? void?))
|
|
||||||
(lookup 'EOF eof no-position no-position)]
|
|
||||||
|
|
||||||
[(? symbol?)
|
|
||||||
(lookup next-token next-token no-position no-position)]
|
|
||||||
|
|
||||||
[(? string?)
|
|
||||||
(lookup (string->symbol next-token) next-token no-position no-position)]
|
|
||||||
|
|
||||||
[(? char?)
|
|
||||||
(lookup (string->symbol (string next-token)) next-token no-position no-position)]
|
|
||||||
|
|
||||||
;; Compatibility
|
|
||||||
[(? lex:token?)
|
|
||||||
(loop (token (lex:token-name next-token)
|
|
||||||
(lex:token-value next-token)))]
|
|
||||||
|
|
||||||
[(token-struct type val offset line column span skip?)
|
|
||||||
(cond [skip?
|
|
||||||
;; skip whitespace, and just tokenize again.
|
|
||||||
(permissive-tokenizer)]
|
|
||||||
|
|
||||||
[(hash-has-key? token-type-hash type)
|
|
||||||
(define start-pos (lex:position offset line column))
|
|
||||||
;; try to synthesize a consistent end position.
|
|
||||||
(define end-pos (lex:position (if (and (number? offset) (number? span))
|
|
||||||
(+ offset span)
|
|
||||||
offset)
|
|
||||||
line
|
|
||||||
(if (and (number? column) (number? span))
|
|
||||||
(+ column span)
|
|
||||||
column)))
|
|
||||||
(lookup type val start-pos end-pos)]
|
|
||||||
[else
|
|
||||||
;; We ran into a token of unrecognized type. Let's raise an appropriate error.
|
|
||||||
((current-tokenizer-error-handler) type val
|
|
||||||
offset line column span)])]
|
|
||||||
|
|
||||||
[(lex:position-token t s e)
|
|
||||||
(define a-position-token (loop t))
|
|
||||||
(lex:position-token (lex:position-token-token a-position-token)
|
|
||||||
(if (no-position? (lex:position-token-start-pos a-position-token))
|
|
||||||
s
|
|
||||||
(lex:position-token-start-pos a-position-token))
|
|
||||||
(if (no-position? (lex:position-token-end-pos a-position-token))
|
|
||||||
e
|
|
||||||
(lex:position-token-end-pos a-position-token)))]
|
|
||||||
|
|
||||||
[(lex:srcloc-token t loc)
|
|
||||||
(define a-position-token (loop t))
|
|
||||||
(lex:position-token (lex:position-token-token a-position-token)
|
|
||||||
(if (no-position? (lex:position-token-start-pos a-position-token))
|
|
||||||
(lex:position (srcloc-position loc) (srcloc-line loc) (srcloc-column loc))
|
|
||||||
(lex:position-token-start-pos a-position-token))
|
|
||||||
(if (no-position? (lex:position-token-start-pos a-position-token))
|
|
||||||
(lex:position (+ (srcloc-position loc) (srcloc-span loc)) #f #f)
|
|
||||||
(lex:position-token-end-pos a-position-token)))]
|
|
||||||
|
|
||||||
[else
|
|
||||||
;; Otherwise, we have no idea how to treat this as a token.
|
|
||||||
((current-tokenizer-error-handler) 'unknown-type (format "~a" next-token)
|
|
||||||
#f #f #f #f)])))
|
|
||||||
permissive-tokenizer)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; positions->srcloc: position position -> (list source line column offset span)
|
|
||||||
;; Given two positions, returns a srcloc-like structure, where srcloc is the value
|
|
||||||
;; consumed as the third argument to datum->syntax.
|
|
||||||
(define (positions->srcloc start-pos end-pos)
|
|
||||||
(list (current-source)
|
|
||||||
(lex:position-line start-pos)
|
|
||||||
(lex:position-col start-pos)
|
|
||||||
(lex:position-offset start-pos)
|
|
||||||
(if (and (number? (lex:position-offset end-pos))
|
|
||||||
(number? (lex:position-offset start-pos)))
|
|
||||||
(- (lex:position-offset end-pos)
|
|
||||||
(lex:position-offset start-pos))
|
|
||||||
#f)))
|
|
||||||
|
|
||||||
#|
|
|
||||||
MB: the next three functions control the parse tree output.
|
|
||||||
This would be the place to check a syntax property for hiding.
|
|
||||||
|#
|
|
||||||
;; We create a syntax using read-syntax; by definition, it should have the
|
|
||||||
;; original? property set to #t, which we then copy over to syntaxes constructed
|
|
||||||
;; with atomic-datum->syntax and rule-components->syntax.
|
|
||||||
(define stx-with-original?-property
|
|
||||||
(read-syntax #f (open-input-string "meaningless-string")))
|
|
||||||
|
|
||||||
|
|
||||||
;; atomic-datum->syntax: datum position position
|
|
||||||
;; Helper that does the ugly work in wrapping a datum into a syntax
|
|
||||||
;; with source location.
|
|
||||||
(define (atomic-datum->syntax d start-pos end-pos)
|
|
||||||
(datum->syntax #f d (positions->srcloc start-pos end-pos) stx-with-original?-property))
|
|
||||||
|
|
||||||
|
|
||||||
(define (remove-rule-name component-stx [splice #f])
|
|
||||||
;; when removing a rule name, we apply it as a syntax property to the remaining elements
|
|
||||||
;; for possible later usage (aka, why throw away information)
|
|
||||||
(with-syntax ([(name . subcomponents) component-stx])
|
|
||||||
(let ([name-datum (syntax->datum #'name)])
|
|
||||||
(if splice
|
|
||||||
;; when splicing, returned list is a regular list, with each element having the property.
|
|
||||||
(map (λ(sc) (syntax-property sc name-datum #'name)) (syntax->list #'subcomponents))
|
|
||||||
;; when hiding, returned list should be a syntaxed list with the property
|
|
||||||
;; iow, basically the same as `component-stx`, minus the name
|
|
||||||
(syntax-property (datum->syntax component-stx #'subcomponents component-stx component-stx) name-datum #'name)))))
|
|
||||||
|
|
||||||
|
|
||||||
(define (preprocess-component-lists component-lists)
|
|
||||||
; "preprocess" means splicing and rule-name-hiding where indicated
|
|
||||||
(append*
|
|
||||||
;; each `component-list` is a list that's either empty, or has a single component-stx object
|
|
||||||
;; inside `component-stx` is a name followed by subcomponents
|
|
||||||
(for*/list ([component-list (in-list component-lists)]
|
|
||||||
[component-stx (in-list component-list)]) ; this has the effect of omitting any empty `component-list`
|
|
||||||
(list
|
|
||||||
(cond
|
|
||||||
[(eq? (syntax-property component-stx 'hide-or-splice) 'hide)
|
|
||||||
(list (remove-rule-name component-stx))] ; hidden version still wrapped in a sublist
|
|
||||||
[(or (eq? (syntax-property component-stx 'hide-or-splice) 'splice)
|
|
||||||
(syntax-property component-stx 'splice-rh-id))
|
|
||||||
(remove-rule-name component-stx #t)] ; spliced version is lifted out of the sublist
|
|
||||||
[else (list component-stx)])))))
|
|
||||||
|
|
||||||
|
|
||||||
;; rule-components->syntax: (U symbol false) (listof stx) ... #:srcloc (U #f (list src line column offset span)) -> stx
|
|
||||||
;; Creates an stx out of the rule name and its components.
|
|
||||||
;; The location information of the rule spans that of its components.
|
|
||||||
(define (rule-components->syntax rule-name/false #:srcloc [srcloc #f] #:hide-or-splice? [hide-or-splice #f] . component-lists)
|
|
||||||
(define new-rule-name (datum->syntax #f rule-name/false srcloc stx-with-original?-property))
|
|
||||||
(define new-rule-components (append* (preprocess-component-lists component-lists)))
|
|
||||||
(define rule-result (cons new-rule-name new-rule-components))
|
|
||||||
(define syntaxed-rule-result (datum->syntax #f rule-result srcloc stx-with-original?-property))
|
|
||||||
;; not 'hide-or-splice-lhs-id, because this will now become a (right-hand) component in a different (left-hand) rule
|
|
||||||
;; actual splicing happens when the parent rule is processed (with procedure above)
|
|
||||||
(syntax-property syntaxed-rule-result 'hide-or-splice hide-or-splice))
|
|
||||||
|
|
@ -1,207 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(provide make-and make-or node? node-val node-yes? visit! add-child!)
|
|
||||||
|
|
||||||
(require racket/match)
|
|
||||||
|
|
||||||
;; I can't get no... satisfaction.
|
|
||||||
;;
|
|
||||||
;; A small module to make sure a small constraint system can be satisfied.
|
|
||||||
;;
|
|
||||||
;; Small variation on topological sort where we need both AND and OR nodes.
|
|
||||||
|
|
||||||
|
|
||||||
(struct node (type val yes? parents count-to-satisfy) #:mutable)
|
|
||||||
;; or nodes are satisfied if any of the children is satisfied.
|
|
||||||
;; and nodes are satisfied if all of the children are satisfied.
|
|
||||||
|
|
||||||
|
|
||||||
;; visit!: node -> void
|
|
||||||
;; Visit a node, and marking it if it's all satisfied. Propagate
|
|
||||||
;; satisfaction to parents as appropriate.
|
|
||||||
(define visit!
|
|
||||||
(let ()
|
|
||||||
(define (dec! n)
|
|
||||||
(set-node-count-to-satisfy! n (max 0 (sub1 (node-count-to-satisfy n))))
|
|
||||||
(when (and (not (node-yes? n))
|
|
||||||
(= (node-count-to-satisfy n) 0))
|
|
||||||
(sat! n)))
|
|
||||||
|
|
||||||
(define (sat! n)
|
|
||||||
(set-node-yes?! n #t)
|
|
||||||
(for ([p (in-list (node-parents n))])
|
|
||||||
(dec! p)))
|
|
||||||
|
|
||||||
(lambda (n)
|
|
||||||
(unless (node-yes? n)
|
|
||||||
(when (= (node-count-to-satisfy n) 0)
|
|
||||||
(sat! n))))))
|
|
||||||
|
|
||||||
|
|
||||||
;; make-or: X -> node
|
|
||||||
;; Create an or node
|
|
||||||
(define (make-or [val #f])
|
|
||||||
(node 'or val #f '() 1))
|
|
||||||
|
|
||||||
|
|
||||||
;; make-and: X -> node
|
|
||||||
;; Create an and node
|
|
||||||
(define (make-and [val #f])
|
|
||||||
(node 'and val #f '() 0))
|
|
||||||
|
|
||||||
|
|
||||||
;; add-child!: node node -> void
|
|
||||||
;; Attach a child c to the parent node p.
|
|
||||||
(define (add-child! p c)
|
|
||||||
(set-node-parents! c (cons p (node-parents c)))
|
|
||||||
(match p
|
|
||||||
[(node 'or _ _ _ count-to-satisfy)
|
|
||||||
(void)]
|
|
||||||
[(node 'and _ _ _ count-to-satisfy)
|
|
||||||
(set-node-count-to-satisfy! p (add1 count-to-satisfy))]))
|
|
||||||
|
|
||||||
|
|
||||||
(module* test racket
|
|
||||||
(require (submod "..")
|
|
||||||
racket/block
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
;; a ::= a
|
|
||||||
(block
|
|
||||||
;; Self-looping "a" and-node should not say yes after visiting.
|
|
||||||
(define a (make-and 'a))
|
|
||||||
(add-child! a a)
|
|
||||||
(visit! a)
|
|
||||||
(check-false (node-yes? a)))
|
|
||||||
|
|
||||||
|
|
||||||
;; a ::= a
|
|
||||||
(block
|
|
||||||
;; Self-looping "a" or-node should not say yes after visiting.
|
|
||||||
(define a (make-or 'a))
|
|
||||||
(add-child! a a)
|
|
||||||
(visit! a)
|
|
||||||
(check-false (node-yes? a)))
|
|
||||||
|
|
||||||
|
|
||||||
;; This case should never happen in my situation, but we should check.
|
|
||||||
(block
|
|
||||||
;; Empty "a" or-node should not say yes after visiting.
|
|
||||||
(define a (make-or 'a))
|
|
||||||
(visit! a)
|
|
||||||
(check-false (node-yes? a)))
|
|
||||||
|
|
||||||
|
|
||||||
;; a : TOKEN
|
|
||||||
(block
|
|
||||||
;; Empty "a" and-node SHOULD say yes after visiting.
|
|
||||||
(define a (make-and 'a))
|
|
||||||
(visit! a)
|
|
||||||
(check-true (node-yes? a)))
|
|
||||||
|
|
||||||
|
|
||||||
;; a : a | b
|
|
||||||
;; b : TOKEN
|
|
||||||
(block
|
|
||||||
(define a (make-or 'a))
|
|
||||||
(add-child! a a)
|
|
||||||
(define b (make-and 'b))
|
|
||||||
(add-child! a b)
|
|
||||||
(visit! b)
|
|
||||||
(check-true (node-yes? a))
|
|
||||||
(check-true (node-yes? b)))
|
|
||||||
|
|
||||||
;; a : a b
|
|
||||||
;; b : TOKEN
|
|
||||||
(block
|
|
||||||
(define a (make-and 'a))
|
|
||||||
(define b (make-and 'b))
|
|
||||||
(define TOKEN (make-and 'TOKEN))
|
|
||||||
(add-child! a a)
|
|
||||||
(add-child! a b)
|
|
||||||
(add-child! b TOKEN)
|
|
||||||
(visit! TOKEN)
|
|
||||||
(check-false (node-yes? a))
|
|
||||||
(check-true (node-yes? b))
|
|
||||||
(check-true (node-yes? TOKEN)))
|
|
||||||
|
|
||||||
;; a : b
|
|
||||||
;; b : a
|
|
||||||
(block
|
|
||||||
(define a (make-and 'a))
|
|
||||||
(define b (make-and 'b))
|
|
||||||
(add-child! a b)
|
|
||||||
(add-child! b a)
|
|
||||||
(check-false (node-yes? a))
|
|
||||||
(check-false (node-yes? b)))
|
|
||||||
|
|
||||||
;; a : "a" b
|
|
||||||
;; b : a | b
|
|
||||||
(block
|
|
||||||
(define a (make-and 'a))
|
|
||||||
(define b (make-or 'b))
|
|
||||||
(define lit (make-and "a"))
|
|
||||||
(add-child! a lit)
|
|
||||||
(add-child! a b)
|
|
||||||
(add-child! b a)
|
|
||||||
(add-child! b b)
|
|
||||||
(visit! lit)
|
|
||||||
(check-false (node-yes? a))
|
|
||||||
(check-false (node-yes? b))
|
|
||||||
(check-true (node-yes? lit)))
|
|
||||||
|
|
||||||
|
|
||||||
;; x : x y
|
|
||||||
;; y : LIT
|
|
||||||
(block
|
|
||||||
(define x (make-and "x"))
|
|
||||||
(define y (make-and "y"))
|
|
||||||
(define lit (make-and "LIT"))
|
|
||||||
(add-child! x x)
|
|
||||||
(add-child! x y)
|
|
||||||
(add-child! y lit)
|
|
||||||
(visit! lit)
|
|
||||||
(check-false (node-yes? x))
|
|
||||||
(check-true (node-yes? y))
|
|
||||||
(check-true (node-yes? lit)))
|
|
||||||
|
|
||||||
|
|
||||||
;; expr: LPAREN expr RPAREN | ATOM
|
|
||||||
(block
|
|
||||||
(define LPAREN (make-and))
|
|
||||||
(define RPAREN (make-and))
|
|
||||||
(define expr (make-or))
|
|
||||||
(define expr-1 (make-and))
|
|
||||||
(define expr-2 (make-and))
|
|
||||||
(define ATOM (make-and))
|
|
||||||
(add-child! expr expr-1)
|
|
||||||
(add-child! expr expr-2)
|
|
||||||
(add-child! expr-1 LPAREN)
|
|
||||||
(add-child! expr-1 expr)
|
|
||||||
(add-child! expr-1 RPAREN)
|
|
||||||
(add-child! expr-2 ATOM)
|
|
||||||
(visit! LPAREN)
|
|
||||||
(visit! RPAREN)
|
|
||||||
(visit! ATOM)
|
|
||||||
(check-true (node-yes? expr)))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; expr: LPAREN expr RPAREN
|
|
||||||
(block
|
|
||||||
(define LPAREN (make-and))
|
|
||||||
(define RPAREN (make-and))
|
|
||||||
(define expr (make-or))
|
|
||||||
(define expr-1 (make-and))
|
|
||||||
(define expr-2 (make-and))
|
|
||||||
(define ATOM (make-and))
|
|
||||||
(add-child! expr expr-1)
|
|
||||||
(add-child! expr expr-2)
|
|
||||||
(add-child! expr-1 LPAREN)
|
|
||||||
(add-child! expr-1 expr)
|
|
||||||
(add-child! expr-1 RPAREN)
|
|
||||||
(visit! LPAREN)
|
|
||||||
(visit! RPAREN)
|
|
||||||
(check-false (node-yes? expr)))
|
|
||||||
|
|
||||||
)
|
|
@ -1,96 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
;; A language level for automatically generating parsers out of BNF grammars.
|
|
||||||
;;
|
|
||||||
;; Danny Yoo (dyoo@hashcollision.org)
|
|
||||||
;;
|
|
||||||
;; Intent: make it trivial to generate languages for Racket. At the
|
|
||||||
;; moment, I find it painful to use br-parser-tools. This library is
|
|
||||||
;; meant to make it less agonizing.
|
|
||||||
;;
|
|
||||||
;; The intended use of this language is as follows:
|
|
||||||
;;
|
|
||||||
;;;;; s-exp-grammar.rkt ;;;;;;;;;
|
|
||||||
;; #lang brag
|
|
||||||
;; s-exp : "(" s-exp* ")" | ATOM
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
|
|
||||||
|
|
||||||
;; What this generates is:
|
|
||||||
;;
|
|
||||||
;; * parse: a function that consumes a source and a
|
|
||||||
;; position-aware lexer, and produces a syntax object.
|
|
||||||
;;
|
|
||||||
;; * make-rule-parser: a custom parser given a provided start rule.
|
|
||||||
;;
|
|
||||||
;; You'll still need to do a little work, by providing a lexer that
|
|
||||||
;; defines what the uppercased tokens mean. For example, you can
|
|
||||||
;; use the br-parser-tools/lex lexer tools:
|
|
||||||
;;
|
|
||||||
;; (require brag/support
|
|
||||||
;; br-parser-tools/lex
|
|
||||||
;; br-parser-tools/lex-sre)
|
|
||||||
;;
|
|
||||||
;; (define tokenize
|
|
||||||
;; (lexer-src-pos
|
|
||||||
;; [(:+ alphabetic)
|
|
||||||
;; (token 'ATOM lexeme)]
|
|
||||||
;; [whitespace
|
|
||||||
;; (return-without-pos (tokenize/1 input-port))]
|
|
||||||
;; [(:or "(" ")")
|
|
||||||
;; (token lexeme lexeme)]))
|
|
||||||
;;
|
|
||||||
|
|
||||||
;; However, that should be all you need. The output of an
|
|
||||||
;; generated grammar is an honest-to-goodness syntax
|
|
||||||
;; object with source locations, fully-labeled by the rules.
|
|
||||||
;;
|
|
||||||
;; (parse (tokenize an-input-port))
|
|
||||||
;;
|
|
||||||
;;
|
|
||||||
|
|
||||||
;; The first rule is treated as the start rule; any successful parse
|
|
||||||
;; must finish with end-of-file.
|
|
||||||
|
|
||||||
|
|
||||||
;; Terminology:
|
|
||||||
;;
|
|
||||||
|
|
||||||
|
|
||||||
;; A rule is a rule identifier, followed by a colon ":", followed by a
|
|
||||||
;; pattern.
|
|
||||||
|
|
||||||
;; A rule identifier is an identifier that is not in upper case.
|
|
||||||
;; A rule identifier should follow the Racket rules for identifiers,
|
|
||||||
;; except that it can't contain * or +.
|
|
||||||
;;
|
|
||||||
;; A token is a rule identifier that is all in upper case.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; A pattern may either be
|
|
||||||
;;
|
|
||||||
;; * an implicit sequence of patterns,
|
|
||||||
;;
|
|
||||||
;; * a literal string,
|
|
||||||
;;
|
|
||||||
;; * a rule identifier,
|
|
||||||
;;
|
|
||||||
;; * a quanitifed pattern, either with "*" or "+",
|
|
||||||
;;
|
|
||||||
;; * an optional pattern: a pattern surrounded by "[" and "]", or
|
|
||||||
;;
|
|
||||||
;; * a grouped sequence: a pattern surrounded by "(" and ")".
|
|
||||||
|
|
||||||
|
|
||||||
(require (for-syntax racket/base
|
|
||||||
"codegen.rkt"))
|
|
||||||
|
|
||||||
(provide rules
|
|
||||||
(rename-out [#%plain-module-begin #%module-begin])
|
|
||||||
#%top-interaction)
|
|
||||||
|
|
||||||
(define-syntax (rules stx)
|
|
||||||
(rules-codegen #:parser-provider-module 'brag/cfg-parser/cfg-parser ;; 'br-parser-tools/yacc
|
|
||||||
#:parser-provider-form 'cfg-parser ;; 'parser
|
|
||||||
stx))
|
|
@ -1,12 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
## Equal numbers of 0 and 1s in a string.
|
|
||||||
##
|
|
||||||
## (Thanks to mithos28 for this one.)
|
|
||||||
|
|
||||||
|
|
||||||
equal : [zero one | one zero]
|
|
||||||
|
|
||||||
zero : "0" equal | equal "0"
|
|
||||||
|
|
||||||
one : "1" equal | equal "1"
|
|
@ -1,3 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
rule: "0"* "1"
|
|
@ -1,3 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
rule-0n1n: ["0" rule-0n1n "1"]
|
|
@ -1,7 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
expr : term (/'+' term)*
|
|
||||||
@term : factor (/'*' @factor)*
|
|
||||||
factor : ("0" | "1" | "2" | "3"
|
|
||||||
| "4" | "5" | "6" | "7"
|
|
||||||
| "8" | "9")+
|
|
@ -1,18 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
#:prefix-out my:
|
|
||||||
|
|
||||||
;; Simple baby example of JSON structure
|
|
||||||
json: number | string
|
|
||||||
| array
|
|
||||||
| @object
|
|
||||||
number: NUMBER
|
|
||||||
|
|
||||||
string: STRING
|
|
||||||
|
|
||||||
array: "[" [json ("," json)*] "]"
|
|
||||||
|
|
||||||
object: /"{" [kvpair ("," kvpair)*] /"}"
|
|
||||||
|
|
||||||
@kvpair : /ID colon /json
|
|
||||||
|
|
||||||
/colon : ":"
|
|
@ -1,16 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
;; Simple baby example of JSON structure
|
|
||||||
json: number | string
|
|
||||||
| array
|
|
||||||
| object
|
|
||||||
|
|
||||||
number: NUMBER
|
|
||||||
|
|
||||||
string: STRING
|
|
||||||
|
|
||||||
array: "[" [json ("," json)*] "]"
|
|
||||||
|
|
||||||
object: "{" [kvpair ("," kvpair)*] "}"
|
|
||||||
|
|
||||||
kvpair: ID ":" json
|
|
@ -1,13 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
## The following comes from: http://en.wikipedia.org/wiki/Backus%E2%80%93Naur_Form
|
|
||||||
|
|
||||||
<syntax> : <rule> | <rule> <syntax>
|
|
||||||
<rule> : <opt-whitespace> "<" <RULE-NAME> ">" <opt-whitespace> "::="
|
|
||||||
<opt-whitespace> <expression> <line-end>
|
|
||||||
<opt-whitespace> : " " <opt-whitespace> | "" ## "" is empty string, i.e. no whitespace
|
|
||||||
<expression> : <list> | <list> "|" <expression>
|
|
||||||
<line-end> : <opt-whitespace> <EOL> | <line-end> <line-end>
|
|
||||||
<list> : <term> | <term> <opt-whitespace> <list>
|
|
||||||
<term> : <literal> | "<" <RULE-NAME> ">"
|
|
||||||
<literal> : '"' <TEXT> '"' | "'" <TEXT> "'" ## actually, the original BNF did not use quotes
|
|
@ -1,4 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
top : expr (/"," expr)*
|
|
||||||
expr : "x" | list
|
|
||||||
list : "(" expr ("," expr)* ")"
|
|
@ -1,111 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
;; Lua parser, adapted from:
|
|
||||||
;; http://www.lua.org/manual/5.1/manual.html#8
|
|
||||||
;;
|
|
||||||
|
|
||||||
|
|
||||||
chunk : (stat ["; "])* [laststat ["; "]]
|
|
||||||
|
|
||||||
block : chunk
|
|
||||||
|
|
||||||
stat : varlist "=" explist |
|
|
||||||
functioncall |
|
|
||||||
DO block END |
|
|
||||||
WHILE exp DO block END |
|
|
||||||
REPEAT block UNTIL exp |
|
|
||||||
IF exp THEN block (ELSEIF exp THEN block)* [ELSE block] END |
|
|
||||||
FOR NAME "=" exp "," exp ["," exp] DO block END |
|
|
||||||
FOR namelist IN explist DO block END |
|
|
||||||
FUNCTION funcname funcbody |
|
|
||||||
LOCAL FUNCTION NAME funcbody |
|
|
||||||
LOCAL namelist ["=" explist]
|
|
||||||
|
|
||||||
laststat : RETURN [explist] | BREAK
|
|
||||||
|
|
||||||
funcname : NAME ("." NAME)* [":" NAME]
|
|
||||||
|
|
||||||
varlist : var ("," var)*
|
|
||||||
|
|
||||||
var : NAME | prefixexp "[" exp "]" | prefixexp "." NAME
|
|
||||||
|
|
||||||
namelist : NAME ("," NAME)*
|
|
||||||
|
|
||||||
explist : (exp ",")* exp
|
|
||||||
|
|
||||||
|
|
||||||
;; Note by dyoo: The parsing of exp deviates from Lua in that we have these administrative
|
|
||||||
;; rules to explicitly represent the precedence rules.
|
|
||||||
;;
|
|
||||||
;; See: http://www.lua.org/manual/5.1/manual.html#2.5.6
|
|
||||||
;;
|
|
||||||
;; Ragg doesn't yet automatically desugar operator precedence rules.
|
|
||||||
;; I'm doing it by hand at the moment, which is not ideal, so a future version of
|
|
||||||
;; ragg will have a story about describing precedence.
|
|
||||||
;;
|
|
||||||
;; Operator precedence in Lua follows the table below, from lower to higher priority:
|
|
||||||
;;
|
|
||||||
;; or exp_1
|
|
||||||
;; and exp_2
|
|
||||||
;; < > <= >= ~= == exp_3
|
|
||||||
;; .. exp_4
|
|
||||||
;; + - exp_5
|
|
||||||
;; * / % exp_6
|
|
||||||
;; not # - (unary) exp_7
|
|
||||||
;; ^ exp_8
|
|
||||||
;;
|
|
||||||
;; As usual, you can use parentheses to change the precedences of an expression.
|
|
||||||
;; The concatenation ('..') and exponentiation ('^') operators are right associative.
|
|
||||||
;; All other binary operators are left associative.
|
|
||||||
;;
|
|
||||||
;; The original grammar rule before encoding precedence was:
|
|
||||||
;;
|
|
||||||
;; exp : NIL | FALSE | TRUE | NUMBER | STRING | "..." | function |
|
|
||||||
;; prefixexp | tableconstructor | exp binop exp | unop exp
|
|
||||||
|
|
||||||
exp : exp_1
|
|
||||||
exp_1: exp_1 binop_1 exp_2 | exp_2
|
|
||||||
exp_2: exp_2 binop_2 exp_3 | exp_3
|
|
||||||
exp_3: exp_3 binop_3 exp_4 | exp_4
|
|
||||||
exp_4: exp_5 binop_4 exp_4 | exp_5 ;; right associative
|
|
||||||
exp_5: exp_5 binop_5 exp_6 | exp_6
|
|
||||||
exp_6: exp_6 binop_6 exp_7 | exp_7
|
|
||||||
exp_7: unop exp_8
|
|
||||||
exp_8: exp_9 binop_8 exp_8 | exp_9 ;; right associative
|
|
||||||
exp_9: NIL | FALSE | TRUE | NUMBER | STRING | "..." | function |
|
|
||||||
prefixexp | tableconstructor
|
|
||||||
binop_1: OR
|
|
||||||
binop_2: AND
|
|
||||||
binop_3: "<" | ">" | "<=" | ">=" | "~=" | "=="
|
|
||||||
binop_4: ".."
|
|
||||||
binop_5: "+" | "-"
|
|
||||||
binop_6: "*" | "/" | "%"
|
|
||||||
binop_8: "^"
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
|
|
||||||
|
|
||||||
prefixexp : var | functioncall | "(" exp ")"
|
|
||||||
|
|
||||||
functioncall : prefixexp args | prefixexp ":" NAME args
|
|
||||||
|
|
||||||
args : "(" [explist] ")" | tableconstructor | STRING
|
|
||||||
|
|
||||||
function : FUNCTION funcbody
|
|
||||||
|
|
||||||
funcbody : "(" [parlist] ")" block END
|
|
||||||
|
|
||||||
parlist : namelist ["," "..."] | "..."
|
|
||||||
|
|
||||||
tableconstructor : "{" [fieldlist] "}"
|
|
||||||
|
|
||||||
fieldlist : field (fieldsep field)* [fieldsep]
|
|
||||||
|
|
||||||
field : "[" exp "]" "=" exp | NAME "=" exp | exp
|
|
||||||
|
|
||||||
fieldsep : "," | ";"
|
|
||||||
|
|
||||||
binop : "+" | "-" | "*" | "/" | "^" | "%" | ".." |
|
|
||||||
"<" | "<=" | ">" | ">=" | "==" | "~=" |
|
|
||||||
AND | OR
|
|
||||||
|
|
||||||
unop : "-" | NOT | "#"
|
|
@ -1,3 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
nested-word-list: WORD
|
|
||||||
| LEFT-PAREN nested-word-list* RIGHT-PAREN
|
|
@ -1,5 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
expr : term ('+' term)*
|
|
||||||
term : factor ('*' factor)*
|
|
||||||
factor : INT
|
|
@ -1,10 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
;;
|
|
||||||
;; See: http://stackoverflow.com/questions/12345647/rewrite-this-script-by-designing-an-interpreter-in-racket
|
|
||||||
;;
|
|
||||||
|
|
||||||
drawing: rows*
|
|
||||||
rows: repeat chunk+ ";"
|
|
||||||
repeat: INTEGER
|
|
||||||
chunk: INTEGER STRING
|
|
@ -1,4 +0,0 @@
|
|||||||
#lang brag/examples/simple-line-drawing
|
|
||||||
3 9 X;
|
|
||||||
6 3 b 3 X 3 b;
|
|
||||||
3 9 X;
|
|
@ -1,10 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
;;
|
|
||||||
;; See: http://stackoverflow.com/questions/12345647/rewrite-this-script-by-designing-an-interpreter-in-racket
|
|
||||||
;;
|
|
||||||
|
|
||||||
drawing: rows*
|
|
||||||
rows: repeat chunk+ ";"
|
|
||||||
repeat: INTEGER
|
|
||||||
chunk: INTEGER STRING
|
|
@ -1,31 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require syntax/parse)
|
|
||||||
|
|
||||||
(provide interpret-drawing)
|
|
||||||
|
|
||||||
(define (interpret-drawing drawing-stx)
|
|
||||||
(syntax-parse drawing-stx
|
|
||||||
[({~literal drawing} row-stxs ...)
|
|
||||||
|
|
||||||
(for ([row-stx (syntax->list #'(row-stxs ...))])
|
|
||||||
(interpret-row row-stx))]))
|
|
||||||
|
|
||||||
|
|
||||||
(define (interpret-row row-stx)
|
|
||||||
(syntax-parse row-stx
|
|
||||||
[({~literal rows}
|
|
||||||
({~literal repeat} repeat-number)
|
|
||||||
chunks ... ";")
|
|
||||||
|
|
||||||
(for ([i (syntax-e #'repeat-number)])
|
|
||||||
(for ([chunk-stx (syntax->list #'(chunks ...))])
|
|
||||||
(interpret-chunk chunk-stx))
|
|
||||||
(newline))]))
|
|
||||||
|
|
||||||
|
|
||||||
(define (interpret-chunk chunk-stx)
|
|
||||||
(syntax-parse chunk-stx
|
|
||||||
[({~literal chunk} chunk-size chunk-string)
|
|
||||||
|
|
||||||
(for ([k (syntax-e #'chunk-size)])
|
|
||||||
(display (syntax-e #'chunk-string)))]))
|
|
@ -1,22 +0,0 @@
|
|||||||
#lang s-exp syntax/module-reader
|
|
||||||
brag/examples/simple-line-drawing/semantics
|
|
||||||
#:read my-read
|
|
||||||
#:read-syntax my-read-syntax
|
|
||||||
#:info my-get-info
|
|
||||||
#:whole-body-readers? #t
|
|
||||||
|
|
||||||
(require brag/examples/simple-line-drawing/lexer
|
|
||||||
brag/examples/simple-line-drawing/grammar)
|
|
||||||
|
|
||||||
(define (my-read in)
|
|
||||||
(syntax->datum (my-read-syntax #f in)))
|
|
||||||
|
|
||||||
(define (my-read-syntax src ip)
|
|
||||||
(list (parse src (tokenize ip))))
|
|
||||||
|
|
||||||
(define (my-get-info key default default-filter)
|
|
||||||
(case key
|
|
||||||
[(color-lexer)
|
|
||||||
(dynamic-require 'syntax-color/default-lexer 'default-lexer)]
|
|
||||||
[else
|
|
||||||
(default-filter key default)]))
|
|
@ -1,27 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(provide tokenize)
|
|
||||||
|
|
||||||
;; A simple lexer for simple-line-drawing.
|
|
||||||
(require brag/support
|
|
||||||
br-parser-tools/lex)
|
|
||||||
|
|
||||||
(define (tokenize ip)
|
|
||||||
(port-count-lines! ip)
|
|
||||||
(define my-lexer
|
|
||||||
(lexer-src-pos
|
|
||||||
[(repetition 1 +inf.0 numeric)
|
|
||||||
(token 'INTEGER (string->number lexeme))]
|
|
||||||
[upper-case
|
|
||||||
(token 'STRING lexeme)]
|
|
||||||
["b"
|
|
||||||
(token 'STRING " ")]
|
|
||||||
[";"
|
|
||||||
(token ";" lexeme)]
|
|
||||||
[whitespace
|
|
||||||
(token 'WHITESPACE lexeme #:skip? #t)]
|
|
||||||
[(eof)
|
|
||||||
(void)]))
|
|
||||||
(define (next-token) (my-lexer ip))
|
|
||||||
next-token)
|
|
||||||
|
|
@ -1,48 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require (for-syntax racket/base syntax/parse))
|
|
||||||
|
|
||||||
(provide #%module-begin
|
|
||||||
;; We reuse Racket's treatment of raw datums, specifically
|
|
||||||
;; for strings and numbers:
|
|
||||||
#%datum
|
|
||||||
|
|
||||||
;; And otherwise, we provide definitions of these three forms.
|
|
||||||
;; During compiliation, Racket uses these definitions to
|
|
||||||
;; rewrite into for loops, displays, and newlines.
|
|
||||||
drawing rows chunk)
|
|
||||||
|
|
||||||
;; Define a few compile-time functions to do the syntax rewriting:
|
|
||||||
(begin-for-syntax
|
|
||||||
(define (compile-drawing drawing-stx)
|
|
||||||
(syntax-parse drawing-stx
|
|
||||||
[({~literal drawing} row-stxs ...)
|
|
||||||
|
|
||||||
(syntax/loc drawing-stx
|
|
||||||
(begin row-stxs ...))]))
|
|
||||||
|
|
||||||
(define (compile-rows row-stx)
|
|
||||||
(syntax-parse row-stx
|
|
||||||
[({~literal rows}
|
|
||||||
({~literal repeat} repeat-number)
|
|
||||||
chunks ...
|
|
||||||
";")
|
|
||||||
|
|
||||||
(syntax/loc row-stx
|
|
||||||
(for ([i repeat-number])
|
|
||||||
chunks ...
|
|
||||||
(newline)))]))
|
|
||||||
|
|
||||||
(define (compile-chunk chunk-stx)
|
|
||||||
(syntax-parse chunk-stx
|
|
||||||
[({~literal chunk} chunk-size chunk-string)
|
|
||||||
|
|
||||||
(syntax/loc chunk-stx
|
|
||||||
(for ([k chunk-size])
|
|
||||||
(display chunk-string)))])))
|
|
||||||
|
|
||||||
|
|
||||||
;; Wire up the use of "drawing", "rows", and "chunk" to these
|
|
||||||
;; transformers:
|
|
||||||
(define-syntax drawing compile-drawing)
|
|
||||||
(define-syntax rows compile-rows)
|
|
||||||
(define-syntax chunk compile-chunk)
|
|
@ -1,14 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
## Statlist grammar
|
|
||||||
|
|
||||||
statlist : stat+
|
|
||||||
stat: ID '=' expr
|
|
||||||
| 'print' expr
|
|
||||||
|
|
||||||
expr: multExpr ('+' multExpr)*
|
|
||||||
multExpr: primary (('*'|'.') primary)*
|
|
||||||
primary :
|
|
||||||
INT
|
|
||||||
| ID
|
|
||||||
| '[' expr ("," expr)* ']'
|
|
@ -1,6 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
start: (tab | space | newline | letter)*
|
|
||||||
tab: '\t'
|
|
||||||
space: " "
|
|
||||||
newline: "\n"
|
|
||||||
letter: "x" | "y" | "z"
|
|
@ -1,7 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
;; A parser for a silly language
|
|
||||||
sentence: verb optional-adjective object
|
|
||||||
verb: greeting
|
|
||||||
optional-adjective: ["happy" | "frumpy"]
|
|
||||||
greeting: "hello" | "hola" | "aloha"
|
|
||||||
object: "world" | WORLD
|
|
@ -1,7 +0,0 @@
|
|||||||
#lang setup/infotab
|
|
||||||
(define name "brag")
|
|
||||||
(define version "1.0")
|
|
||||||
(define scribblings '(("brag.scrbl")))
|
|
||||||
(define blurb '("brag: the Beautiful Racket AST Generator. A fork of Danny Yoo's ragg. A design goal is to be easy for beginners to use. Given a grammar in EBNF, brag produces a parser that generates Racket's native syntax objects with full source location."))
|
|
||||||
(define deps (list))
|
|
||||||
(define test-omit-paths '("examples/simple-line-drawing/examples/letter-i.rkt"))
|
|
@ -1,5 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(module+ reader
|
|
||||||
(require "codegen/reader.rkt")
|
|
||||||
(provide (all-from-out "codegen/reader.rkt")))
|
|
@ -1,36 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(require brag/support)
|
|
||||||
|
|
||||||
(provide current-source
|
|
||||||
current-parser-error-handler
|
|
||||||
current-tokenizer-error-handler)
|
|
||||||
|
|
||||||
;; During parsing, we should define the source of the input.
|
|
||||||
(define current-source (make-parameter #f))
|
|
||||||
|
|
||||||
|
|
||||||
;; When an parse error happens, we call the current-parser-error-handler:
|
|
||||||
(define current-parser-error-handler
|
|
||||||
(make-parameter
|
|
||||||
(lambda (tok-name tok-value offset line col span)
|
|
||||||
(raise (exn:fail:parsing
|
|
||||||
(format "Encountered parsing error near ~e (token ~e) while parsing ~e [line=~a, column=~a, offset=~a]"
|
|
||||||
tok-value tok-name
|
|
||||||
(current-source)
|
|
||||||
line col offset)
|
|
||||||
(current-continuation-marks)
|
|
||||||
(list (srcloc (current-source) line col offset span)))))))
|
|
||||||
|
|
||||||
;; When a tokenization error happens, we call the current-tokenizer-error-handler.
|
|
||||||
(define current-tokenizer-error-handler
|
|
||||||
(make-parameter
|
|
||||||
(lambda (tok-type tok-value offset line column span)
|
|
||||||
(raise (exn:fail:parsing
|
|
||||||
(format "Encountered unexpected token ~e (~e) while parsing ~e [line=~a, column=~a, offset=~a]"
|
|
||||||
tok-type
|
|
||||||
tok-value
|
|
||||||
(current-source)
|
|
||||||
line column offset)
|
|
||||||
(current-continuation-marks)
|
|
||||||
(list (srcloc (current-source) line column offset span)))))))
|
|
@ -1,131 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require (for-syntax racket/base "parser.rkt"))
|
|
||||||
(require br-parser-tools/lex
|
|
||||||
(prefix-in : br-parser-tools/lex-sre)
|
|
||||||
"parser.rkt"
|
|
||||||
"rule-structs.rkt"
|
|
||||||
racket/string)
|
|
||||||
|
|
||||||
(provide lex/1 tokenize)
|
|
||||||
|
|
||||||
;; A newline can be any one of the following.
|
|
||||||
(define-lex-abbrev NL (:or "\r\n" "\r" "\n"))
|
|
||||||
|
|
||||||
;; chars used for quantifiers & parse-tree filtering
|
|
||||||
(define-for-syntax quantifiers "+:*") ; colon is reserved to separate rules and productions
|
|
||||||
(define-lex-trans reserved-chars
|
|
||||||
(λ(stx) #`(char-set #,(format "~a~a~a" quantifiers hide-char splice-char))))
|
|
||||||
|
|
||||||
(define-lex-trans hide-char-trans (λ(stx) #`(char-set #,(format "~a" hide-char))))
|
|
||||||
(define-lex-trans splice-char-trans (λ(stx) #`(char-set #,(format "~a" splice-char))))
|
|
||||||
|
|
||||||
(define-lex-abbrevs
|
|
||||||
[letter (:or (:/ "a" "z") (:/ #\A #\Z))]
|
|
||||||
[digit (:/ #\0 #\9)]
|
|
||||||
[id-char (:or letter digit (:& (char-set "+:*@!-.$%&/=?^_~<>") (char-complement (reserved-chars))))]
|
|
||||||
[hide-char (hide-char-trans)]
|
|
||||||
[splice-char (splice-char-trans)]
|
|
||||||
)
|
|
||||||
|
|
||||||
(define-lex-abbrev id (:& (complement (:+ digit)) (:+ id-char)))
|
|
||||||
|
|
||||||
(define lex/1
|
|
||||||
(lexer-src-pos
|
|
||||||
;; handle whitespace chars within quotes as literal tokens: "\n" "\t" '\n' '\t'
|
|
||||||
;; by matching the escaped version, and then unescaping them before they become token-LITs
|
|
||||||
[(:: "'"
|
|
||||||
(:* (:or "\\'" "\\n" "\\t" (:~ "'" "\\")))
|
|
||||||
"'")
|
|
||||||
(token-LIT (case lexeme
|
|
||||||
[("'\\n'") "'\n'"]
|
|
||||||
[("'\\t'") "'\t'"]
|
|
||||||
[else lexeme]))]
|
|
||||||
[(:: "\""
|
|
||||||
(:* (:or "\\\"" "\\n" "\\t" (:~ "\"" "\\")))
|
|
||||||
"\"")
|
|
||||||
(token-LIT (case lexeme
|
|
||||||
[("\"\\n\"") "\"\n\""]
|
|
||||||
[("\"\\t\"") "\"\t\""]
|
|
||||||
[else lexeme]))]
|
|
||||||
["("
|
|
||||||
(token-LPAREN lexeme)]
|
|
||||||
["["
|
|
||||||
(token-LBRACKET lexeme)]
|
|
||||||
[")"
|
|
||||||
(token-RPAREN lexeme)]
|
|
||||||
["]"
|
|
||||||
(token-RBRACKET lexeme)]
|
|
||||||
[hide-char
|
|
||||||
(token-HIDE lexeme)]
|
|
||||||
[splice-char
|
|
||||||
(token-SPLICE lexeme)]
|
|
||||||
["|"
|
|
||||||
(token-PIPE lexeme)]
|
|
||||||
[(:or "+" "*")
|
|
||||||
(token-REPEAT lexeme)]
|
|
||||||
[whitespace
|
|
||||||
;; Skip whitespace
|
|
||||||
(return-without-pos (lex/1 input-port))]
|
|
||||||
;; Skip comments up to end of line
|
|
||||||
;; but detect possble kwargs.
|
|
||||||
[(:: (:or "#" ";") ; remove # as comment char
|
|
||||||
(complement (:: (:* any-char) NL (:* any-char)))
|
|
||||||
(:or NL ""))
|
|
||||||
(let ([maybe-kwarg-match (regexp-match #px"^#:(.*?)\\s*(.*?)$" lexeme)])
|
|
||||||
(when maybe-kwarg-match
|
|
||||||
(let* ([parts (map string->symbol (string-split (string-trim lexeme "#:" #:right? #f)))]
|
|
||||||
[kw (car parts)][val (cadr parts)])
|
|
||||||
(case kw
|
|
||||||
[(prefix-out) (current-prefix-out val)]
|
|
||||||
[else (error 'lexer (format "got unknown keyword ~a" kw))])))
|
|
||||||
(return-without-pos (lex/1 input-port)))]
|
|
||||||
[(eof)
|
|
||||||
(token-EOF lexeme)]
|
|
||||||
[(:: id (:* whitespace) ":")
|
|
||||||
(token-RULE_HEAD lexeme)]
|
|
||||||
[(:: hide-char id (:* whitespace) ":")
|
|
||||||
(token-RULE_HEAD_HIDDEN lexeme)]
|
|
||||||
[(:: splice-char id (:* whitespace) ":")
|
|
||||||
(token-RULE_HEAD_SPLICED lexeme)]
|
|
||||||
[id
|
|
||||||
(token-ID lexeme)]
|
|
||||||
|
|
||||||
;; We call the error handler for everything else:
|
|
||||||
[(:: any-char)
|
|
||||||
(let-values ([(rest-of-text end-pos-2)
|
|
||||||
(lex-nonwhitespace input-port)])
|
|
||||||
((current-parser-error-handler)
|
|
||||||
#f
|
|
||||||
'error
|
|
||||||
(string-append lexeme rest-of-text)
|
|
||||||
(position->pos start-pos)
|
|
||||||
(position->pos end-pos-2)))]))
|
|
||||||
|
|
||||||
|
|
||||||
;; This is the helper for the error production.
|
|
||||||
(define lex-nonwhitespace
|
|
||||||
(lexer
|
|
||||||
[(:+ (char-complement whitespace))
|
|
||||||
(values lexeme end-pos)]
|
|
||||||
[any-char
|
|
||||||
(values lexeme end-pos)]
|
|
||||||
[(eof)
|
|
||||||
(values "" end-pos)]))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; position->pos: position -> pos
|
|
||||||
;; Coerses position structures from br-parser-tools/lex to our own pos structures.
|
|
||||||
(define (position->pos a-pos)
|
|
||||||
(pos (position-offset a-pos)
|
|
||||||
(position-line a-pos)
|
|
||||||
(position-col a-pos)))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; tokenize: input-port -> (-> token)
|
|
||||||
(define (tokenize ip
|
|
||||||
#:source [source (object-name ip)])
|
|
||||||
(lambda ()
|
|
||||||
(parameterize ([file-path source])
|
|
||||||
(lex/1 ip))))
|
|
@ -1,281 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require br-parser-tools/yacc
|
|
||||||
br-parser-tools/lex
|
|
||||||
racket/list
|
|
||||||
racket/match
|
|
||||||
"rule-structs.rkt")
|
|
||||||
|
|
||||||
;; A parser for grammars.
|
|
||||||
|
|
||||||
(provide hide-char
|
|
||||||
splice-char
|
|
||||||
tokens
|
|
||||||
token-LPAREN
|
|
||||||
token-RPAREN
|
|
||||||
token-HIDE ; for hider
|
|
||||||
token-SPLICE ; for splicer
|
|
||||||
token-LBRACKET
|
|
||||||
token-RBRACKET
|
|
||||||
token-PIPE
|
|
||||||
token-REPEAT
|
|
||||||
token-RULE_HEAD
|
|
||||||
token-RULE_HEAD_HIDDEN
|
|
||||||
token-RULE_HEAD_SPLICED
|
|
||||||
token-ID
|
|
||||||
token-LIT
|
|
||||||
token-EOF
|
|
||||||
grammar-parser
|
|
||||||
|
|
||||||
current-source
|
|
||||||
current-parser-error-handler
|
|
||||||
current-prefix-out
|
|
||||||
|
|
||||||
[struct-out rule]
|
|
||||||
[struct-out lhs-id]
|
|
||||||
[struct-out pattern]
|
|
||||||
[struct-out pattern-id]
|
|
||||||
[struct-out pattern-lit]
|
|
||||||
[struct-out pattern-token]
|
|
||||||
[struct-out pattern-choice]
|
|
||||||
[struct-out pattern-repeat]
|
|
||||||
[struct-out pattern-maybe]
|
|
||||||
[struct-out pattern-seq])
|
|
||||||
|
|
||||||
(define-tokens tokens (LPAREN
|
|
||||||
RPAREN
|
|
||||||
LBRACKET
|
|
||||||
RBRACKET
|
|
||||||
HIDE
|
|
||||||
SPLICE
|
|
||||||
PIPE
|
|
||||||
REPEAT
|
|
||||||
RULE_HEAD
|
|
||||||
RULE_HEAD_HIDDEN
|
|
||||||
RULE_HEAD_SPLICED
|
|
||||||
ID
|
|
||||||
LIT
|
|
||||||
EOF))
|
|
||||||
|
|
||||||
(define hide-char #\/)
|
|
||||||
(define splice-char #\@)
|
|
||||||
|
|
||||||
;; grammar-parser: (-> token) -> (listof rule)
|
|
||||||
(define grammar-parser
|
|
||||||
(parser
|
|
||||||
(tokens tokens)
|
|
||||||
(src-pos)
|
|
||||||
(start rules)
|
|
||||||
(end EOF)
|
|
||||||
|
|
||||||
(grammar
|
|
||||||
[rules
|
|
||||||
[(rules*) $1]]
|
|
||||||
|
|
||||||
[rules*
|
|
||||||
[(rule rules*)
|
|
||||||
(cons $1 $2)]
|
|
||||||
[()
|
|
||||||
'()]]
|
|
||||||
|
|
||||||
;; I have a separate token type for rule identifiers to avoid the
|
|
||||||
;; shift/reduce conflict that happens with the implicit sequencing
|
|
||||||
;; of top-level rules. i.e. the parser can't currently tell, when
|
|
||||||
;; it sees an ID, if it should shift or reduce to a new rule.
|
|
||||||
[rule
|
|
||||||
[(RULE_HEAD pattern)
|
|
||||||
(begin
|
|
||||||
(define trimmed (regexp-replace #px"\\s*:$" $1 ""))
|
|
||||||
(rule (position->pos $1-start-pos)
|
|
||||||
(position->pos $2-end-pos)
|
|
||||||
(lhs-id (position->pos $1-start-pos)
|
|
||||||
(pos (+ (position-offset $1-start-pos)
|
|
||||||
(string-length trimmed))
|
|
||||||
(position-line $1-start-pos)
|
|
||||||
(position-col $1-start-pos))
|
|
||||||
trimmed
|
|
||||||
#f)
|
|
||||||
$2))]
|
|
||||||
|
|
||||||
[(RULE_HEAD_HIDDEN pattern) ; slash indicates hiding
|
|
||||||
(begin
|
|
||||||
(define trimmed (cadr (regexp-match (pregexp (format "~a(\\S+)\\s*:$" hide-char)) $1)))
|
|
||||||
(rule (position->pos $1-start-pos)
|
|
||||||
(position->pos $2-end-pos)
|
|
||||||
(lhs-id (position->pos $1-start-pos)
|
|
||||||
(pos (+ (position-offset $1-start-pos)
|
|
||||||
(string-length trimmed)
|
|
||||||
(string-length "!"))
|
|
||||||
(position-line $1-start-pos)
|
|
||||||
(position-col $1-start-pos))
|
|
||||||
trimmed
|
|
||||||
''hide) ; symbol needs to be double quoted in this case
|
|
||||||
$2))]
|
|
||||||
|
|
||||||
[(RULE_HEAD_SPLICED pattern) ; atsign indicates splicing
|
|
||||||
(begin
|
|
||||||
(define trimmed (cadr (regexp-match (pregexp (format "~a(\\S+)\\s*:$" splice-char)) $1)))
|
|
||||||
(rule (position->pos $1-start-pos)
|
|
||||||
(position->pos $2-end-pos)
|
|
||||||
(lhs-id (position->pos $1-start-pos)
|
|
||||||
(pos (+ (position-offset $1-start-pos)
|
|
||||||
(string-length trimmed)
|
|
||||||
(string-length "@"))
|
|
||||||
(position-line $1-start-pos)
|
|
||||||
(position-col $1-start-pos))
|
|
||||||
trimmed
|
|
||||||
''splice) ; symbol needs to be double quoted in this case
|
|
||||||
$2))]]
|
|
||||||
|
|
||||||
[pattern
|
|
||||||
[(implicit-pattern-sequence PIPE pattern)
|
|
||||||
(if (pattern-choice? $3)
|
|
||||||
(pattern-choice (position->pos $1-start-pos)
|
|
||||||
(position->pos $3-end-pos)
|
|
||||||
(cons $1 (pattern-choice-vals $3)))
|
|
||||||
(pattern-choice (position->pos $1-start-pos)
|
|
||||||
(position->pos $3-end-pos)
|
|
||||||
(list $1 $3)))]
|
|
||||||
[(implicit-pattern-sequence)
|
|
||||||
$1]]
|
|
||||||
|
|
||||||
[implicit-pattern-sequence
|
|
||||||
[(repeatable-pattern implicit-pattern-sequence)
|
|
||||||
(if (pattern-seq? $2)
|
|
||||||
(pattern-seq (position->pos $1-start-pos)
|
|
||||||
(position->pos $2-end-pos)
|
|
||||||
(cons $1 (pattern-seq-vals $2)))
|
|
||||||
(pattern-seq (position->pos $1-start-pos)
|
|
||||||
(position->pos $2-end-pos)
|
|
||||||
(list $1 $2)))]
|
|
||||||
[(repeatable-pattern)
|
|
||||||
$1]]
|
|
||||||
|
|
||||||
[repeatable-pattern
|
|
||||||
[(atomic-pattern REPEAT)
|
|
||||||
(cond [(string=? $2 "*")
|
|
||||||
(pattern-repeat (position->pos $1-start-pos)
|
|
||||||
(position->pos $2-end-pos)
|
|
||||||
0 $1)]
|
|
||||||
[(string=? $2 "+")
|
|
||||||
(pattern-repeat (position->pos $1-start-pos)
|
|
||||||
(position->pos $2-end-pos)
|
|
||||||
1 $1)]
|
|
||||||
[else
|
|
||||||
(error 'grammar-parse "unknown repetition operator ~e" $2)])]
|
|
||||||
[(atomic-pattern)
|
|
||||||
$1]]
|
|
||||||
|
|
||||||
[atomic-pattern
|
|
||||||
[(LIT)
|
|
||||||
(pattern-lit (position->pos $1-start-pos)
|
|
||||||
(position->pos $1-end-pos)
|
|
||||||
(substring $1 1 (sub1 (string-length $1)))
|
|
||||||
#f)]
|
|
||||||
|
|
||||||
[(ID)
|
|
||||||
(if (token-id? $1)
|
|
||||||
(pattern-token (position->pos $1-start-pos)
|
|
||||||
(position->pos $1-end-pos)
|
|
||||||
$1
|
|
||||||
#f)
|
|
||||||
(pattern-id (position->pos $1-start-pos)
|
|
||||||
(position->pos $1-end-pos)
|
|
||||||
$1
|
|
||||||
#f))]
|
|
||||||
|
|
||||||
[(LBRACKET pattern RBRACKET)
|
|
||||||
(pattern-maybe (position->pos $1-start-pos)
|
|
||||||
(position->pos $3-end-pos)
|
|
||||||
$2)]
|
|
||||||
|
|
||||||
[(LPAREN pattern RPAREN)
|
|
||||||
(relocate-pattern $2 (position->pos $1-start-pos) (position->pos $3-end-pos))]
|
|
||||||
|
|
||||||
[(HIDE atomic-pattern)
|
|
||||||
(relocate-pattern $2 (position->pos $1-start-pos) (position->pos $2-end-pos) 'hide)]
|
|
||||||
|
|
||||||
[(SPLICE ID)
|
|
||||||
;; only works for nonterminals on the right side
|
|
||||||
;; (meaningless with terminals)
|
|
||||||
(if (token-id? $2)
|
|
||||||
(error 'brag "Can't use splice operator with terminal")
|
|
||||||
(pattern-id (position->pos $1-start-pos)
|
|
||||||
(position->pos $2-end-pos)
|
|
||||||
$2
|
|
||||||
'splice))]])
|
|
||||||
|
|
||||||
|
|
||||||
(error (lambda (tok-ok? tok-name tok-value start-pos end-pos)
|
|
||||||
((current-parser-error-handler) tok-ok? tok-name tok-value (position->pos start-pos) (position->pos end-pos))))))
|
|
||||||
|
|
||||||
;; relocate-pattern: pattern -> pattern
|
|
||||||
;; Rewrites the pattern's start and end pos accordingly.
|
|
||||||
(define (relocate-pattern a-pat start-pos end-pos [hide? #f])
|
|
||||||
(match a-pat
|
|
||||||
[(pattern-id _ _ v h)
|
|
||||||
(pattern-id start-pos end-pos v (or hide? h))]
|
|
||||||
[(pattern-token _ _ v h)
|
|
||||||
(pattern-token start-pos end-pos v (or hide? h))]
|
|
||||||
[(pattern-lit _ _ v h)
|
|
||||||
(pattern-lit start-pos end-pos v (or hide? h))]
|
|
||||||
[(pattern-choice _ _ vs)
|
|
||||||
(pattern-choice start-pos end-pos vs)]
|
|
||||||
[(pattern-repeat _ _ m v)
|
|
||||||
(pattern-repeat start-pos end-pos m v)]
|
|
||||||
[(pattern-maybe _ _ v)
|
|
||||||
(pattern-maybe start-pos end-pos v)]
|
|
||||||
[(pattern-seq _ _ vs)
|
|
||||||
(pattern-seq start-pos end-pos vs)]
|
|
||||||
[else
|
|
||||||
(error 'relocate-pattern "Internal error when relocating ~s\n" a-pat)]))
|
|
||||||
|
|
||||||
|
|
||||||
; token-id: string -> boolean
|
|
||||||
;; Produces true if the id we see should be treated as the name of a token.
|
|
||||||
;; By convention, tokens are all upper-cased.
|
|
||||||
(define (token-id? id)
|
|
||||||
(string=? (string-upcase id)
|
|
||||||
id))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; position->pos: position -> pos
|
|
||||||
;; Coerses position structures from br-parser-tools/lex to our own pos structures.
|
|
||||||
(define (position->pos a-pos)
|
|
||||||
(pos (position-offset a-pos)
|
|
||||||
(position-line a-pos)
|
|
||||||
(position-col a-pos)))
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
|
|
||||||
;; During parsing, we should define the source of the input.
|
|
||||||
(define current-source (make-parameter #f))
|
|
||||||
|
|
||||||
(define current-prefix-out (make-parameter #f))
|
|
||||||
|
|
||||||
|
|
||||||
;; When bad things happen, we need to emit errors with source location.
|
|
||||||
(struct exn:fail:parse-grammar exn:fail (srclocs)
|
|
||||||
#:transparent
|
|
||||||
#:property prop:exn:srclocs (lambda (instance)
|
|
||||||
(exn:fail:parse-grammar-srclocs instance)))
|
|
||||||
|
|
||||||
(define current-parser-error-handler
|
|
||||||
(make-parameter
|
|
||||||
(lambda (tok-ok? tok-name tok-value start-pos end-pos)
|
|
||||||
(raise (exn:fail:parse-grammar
|
|
||||||
(format "Error while parsing grammar near: ~e [line=~a, column=~a, position=~a]"
|
|
||||||
tok-value
|
|
||||||
(pos-line start-pos)
|
|
||||||
(pos-col start-pos)
|
|
||||||
(pos-offset start-pos))
|
|
||||||
(current-continuation-marks)
|
|
||||||
(list (srcloc (current-source)
|
|
||||||
(pos-line start-pos)
|
|
||||||
(pos-col start-pos)
|
|
||||||
(pos-offset start-pos)
|
|
||||||
(if (and (number? (pos-offset end-pos))
|
|
||||||
(number? (pos-offset start-pos)))
|
|
||||||
(- (pos-offset end-pos)
|
|
||||||
(pos-offset start-pos))
|
|
||||||
#f))))))))
|
|
@ -1,43 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(provide (all-defined-out))
|
|
||||||
|
|
||||||
|
|
||||||
;; We keep our own position structure because br-parser-tools/lex's position
|
|
||||||
;; structure is non-transparent, hence highly resistant to unit testing.
|
|
||||||
(struct pos (offset line col)
|
|
||||||
#:transparent)
|
|
||||||
|
|
||||||
(struct rule (start end lhs pattern)
|
|
||||||
#:transparent)
|
|
||||||
|
|
||||||
(struct lhs-id (start end val splice)
|
|
||||||
#:transparent)
|
|
||||||
|
|
||||||
;; A pattern can be one of the following:
|
|
||||||
(struct pattern (start end)
|
|
||||||
#:transparent)
|
|
||||||
|
|
||||||
(struct pattern-id pattern (val hide)
|
|
||||||
#:transparent)
|
|
||||||
|
|
||||||
;; Token structure to be defined by the user
|
|
||||||
(struct pattern-token pattern (val hide)
|
|
||||||
#:transparent)
|
|
||||||
|
|
||||||
;; Token structure defined as the literal string to be matched.
|
|
||||||
(struct pattern-lit pattern (val hide)
|
|
||||||
#:transparent)
|
|
||||||
|
|
||||||
(struct pattern-choice pattern (vals)
|
|
||||||
#:transparent)
|
|
||||||
|
|
||||||
(struct pattern-repeat pattern (min ;; either 0 or 1
|
|
||||||
val)
|
|
||||||
#:transparent)
|
|
||||||
|
|
||||||
(struct pattern-maybe pattern (val)
|
|
||||||
#:transparent)
|
|
||||||
|
|
||||||
(struct pattern-seq pattern (vals)
|
|
||||||
#:transparent)
|
|
||||||
|
|
@ -1,34 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require br-parser-tools/lex)
|
|
||||||
|
|
||||||
(provide (all-defined-out))
|
|
||||||
|
|
||||||
;; During parsing, we should define the source of the input.
|
|
||||||
(define current-source (make-parameter #f))
|
|
||||||
|
|
||||||
|
|
||||||
;; When bad things happen, we need to emit errors with source location.
|
|
||||||
(struct exn:fail:parse-grammar exn:fail (srclocs)
|
|
||||||
#:transparent
|
|
||||||
#:property prop:exn:srclocs (lambda (instance)
|
|
||||||
(exn:fail:parse-grammar-srclocs instance)))
|
|
||||||
|
|
||||||
(define current-parser-error-handler
|
|
||||||
(make-parameter
|
|
||||||
(lambda (tok-ok? tok-name tok-value start-pos end-pos)
|
|
||||||
(raise (exn:fail:parse-grammar
|
|
||||||
(format "Error while parsing grammar near: ~e [line=~a, column~a, position=~a]"
|
|
||||||
tok-value
|
|
||||||
(position-line start-pos)
|
|
||||||
(position-col start-pos)
|
|
||||||
(position-offset start-pos))
|
|
||||||
(current-continuation-marks)
|
|
||||||
(list (srcloc (current-source)
|
|
||||||
(position-line start-pos)
|
|
||||||
(position-col start-pos)
|
|
||||||
(position-offset start-pos)
|
|
||||||
(if (and (number? (position-offset end-pos))
|
|
||||||
(number? (position-offset start-pos)))
|
|
||||||
(- (position-offset end-pos)
|
|
||||||
(position-offset start-pos))
|
|
||||||
#f))))))))
|
|
@ -1,16 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(provide (all-defined-out))
|
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;; These are just here to provide bindings for Check Syntax.
|
|
||||||
;; Otherwise, we should never hit these, as the toplevel rules-codegen
|
|
||||||
;; should eliminate all uses of these if it does the right thing.
|
|
||||||
(define (rules stx) (raise-syntax-error #f "Used out of context of rules" stx))
|
|
||||||
(define (rule stx) (raise-syntax-error #f "Used out of context of rules" stx))
|
|
||||||
(define (id stx) (raise-syntax-error #f "Used out of context of rules" stx))
|
|
||||||
(define (lit stx) (raise-syntax-error #f "Used out of context of rules" stx))
|
|
||||||
(define (token stx) (raise-syntax-error #f "Used out of context of rules" stx))
|
|
||||||
(define (choice stx) (raise-syntax-error #f "Used out of context of rules" stx))
|
|
||||||
(define (repeat stx) (raise-syntax-error #f "Used out of context of rules" stx))
|
|
||||||
(define (maybe stx) (raise-syntax-error #f "Used out of context of rules" stx))
|
|
||||||
(define (seq stx) (raise-syntax-error #f "Used out of context of rules" stx))
|
|
@ -1,88 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(require "rule-structs.rkt"
|
|
||||||
br-parser-tools/lex
|
|
||||||
racket/match
|
|
||||||
syntax/strip-context)
|
|
||||||
|
|
||||||
(provide rules->stx)
|
|
||||||
|
|
||||||
;; Given a sequence of rules, we translate these to syntax objects.
|
|
||||||
|
|
||||||
;; rules->stx: (listof rule) -> syntax
|
|
||||||
(define (rules->stx source rules #:original-stx [original-stx #f])
|
|
||||||
(define rule-stxs
|
|
||||||
(map (lambda (stx) (rule->stx source stx))
|
|
||||||
rules))
|
|
||||||
(datum->syntax #f
|
|
||||||
`(rules ,@rule-stxs)
|
|
||||||
original-stx))
|
|
||||||
|
|
||||||
|
|
||||||
(define (rule->stx source a-rule)
|
|
||||||
(define id-stx
|
|
||||||
(syntax-property
|
|
||||||
(datum->syntax #f
|
|
||||||
(string->symbol (lhs-id-val (rule-lhs a-rule)))
|
|
||||||
(list source
|
|
||||||
(pos-line (lhs-id-start (rule-lhs a-rule)))
|
|
||||||
(pos-col (lhs-id-start (rule-lhs a-rule)))
|
|
||||||
(pos-offset (lhs-id-start (rule-lhs a-rule)))
|
|
||||||
(if (and (number? (pos-offset (lhs-id-start (rule-lhs a-rule))))
|
|
||||||
(number? (pos-offset (lhs-id-end (rule-lhs a-rule)))))
|
|
||||||
(- (pos-offset (lhs-id-end (rule-lhs a-rule)))
|
|
||||||
(pos-offset (lhs-id-start (rule-lhs a-rule))))
|
|
||||||
#f)))
|
|
||||||
'hide-or-splice-lhs-id (lhs-id-splice (rule-lhs a-rule))))
|
|
||||||
(define pattern-stx (pattern->stx source (rule-pattern a-rule)))
|
|
||||||
(define line (pos-line (rule-start a-rule)))
|
|
||||||
(define column (pos-col (rule-start a-rule)))
|
|
||||||
(define position (pos-offset (rule-start a-rule)))
|
|
||||||
(define span (if (and (number? (pos-offset (rule-start a-rule)))
|
|
||||||
(number? (pos-offset (rule-end a-rule))))
|
|
||||||
(- (pos-offset (rule-end a-rule))
|
|
||||||
(pos-offset (rule-start a-rule)))
|
|
||||||
#f))
|
|
||||||
(datum->syntax #f
|
|
||||||
`(rule ,id-stx ,pattern-stx)
|
|
||||||
(list source line column position span)))
|
|
||||||
|
|
||||||
(define (pattern->stx source a-pattern)
|
|
||||||
(define recur (lambda (s) (pattern->stx source s)))
|
|
||||||
|
|
||||||
(define line (pos-line (pattern-start a-pattern)))
|
|
||||||
(define column (pos-col (pattern-start a-pattern)))
|
|
||||||
(define position (pos-offset (pattern-start a-pattern)))
|
|
||||||
(define span (if (and (number? (pos-offset (pattern-start a-pattern)))
|
|
||||||
(number? (pos-offset (pattern-end a-pattern))))
|
|
||||||
(- (pos-offset (pattern-end a-pattern))
|
|
||||||
(pos-offset (pattern-start a-pattern)))
|
|
||||||
#f))
|
|
||||||
(define source-location (list source line column position span))
|
|
||||||
(match a-pattern
|
|
||||||
[(struct pattern-id (start end val hide))
|
|
||||||
(syntax-property
|
|
||||||
(datum->syntax #f
|
|
||||||
`(id ,(datum->syntax #f (string->symbol val) source-location))
|
|
||||||
source-location)
|
|
||||||
'hide hide)]
|
|
||||||
[(struct pattern-lit (start end val hide))
|
|
||||||
(syntax-property
|
|
||||||
(datum->syntax #f
|
|
||||||
`(lit ,(datum->syntax #f val source-location))
|
|
||||||
source-location)
|
|
||||||
'hide hide)]
|
|
||||||
[(struct pattern-token (start end val hide))
|
|
||||||
(syntax-property
|
|
||||||
(datum->syntax #f
|
|
||||||
`(token ,(datum->syntax #f (string->symbol val) source-location))
|
|
||||||
source-location)
|
|
||||||
'hide hide)]
|
|
||||||
[(struct pattern-choice (start end vals))
|
|
||||||
(datum->syntax #f`(choice ,@(map recur vals)) source-location)]
|
|
||||||
[(struct pattern-repeat (start end min val))
|
|
||||||
(datum->syntax #f`(repeat ,min ,(recur val)) source-location)]
|
|
||||||
[(struct pattern-maybe (start end val))
|
|
||||||
(datum->syntax #f`(maybe ,(recur val)) source-location)]
|
|
||||||
[(struct pattern-seq (start end vals))
|
|
||||||
(datum->syntax #f`(seq ,@(map recur vals)) source-location)]))
|
|
@ -1,143 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require br-parser-tools/lex
|
|
||||||
racket/string
|
|
||||||
racket/struct
|
|
||||||
(prefix-in : br-parser-tools/lex-sre)
|
|
||||||
(for-syntax racket/base))
|
|
||||||
(provide (all-from-out br-parser-tools/lex)
|
|
||||||
(all-from-out br-parser-tools/lex-sre)
|
|
||||||
[struct-out token-struct]
|
|
||||||
token
|
|
||||||
[struct-out exn:fail:parsing])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(define (token-print token port mode)
|
|
||||||
(write-string (format "~a"
|
|
||||||
(cons 'token-struct
|
|
||||||
(map (λ(proc) (format "~v" (proc token)))
|
|
||||||
(list
|
|
||||||
token-struct-type
|
|
||||||
token-struct-val
|
|
||||||
token-struct-line
|
|
||||||
token-struct-column
|
|
||||||
token-struct-offset
|
|
||||||
token-struct-span
|
|
||||||
token-struct-skip?)))) port))
|
|
||||||
|
|
||||||
|
|
||||||
(struct token-struct (type val offset line column span skip?)
|
|
||||||
#:auto-value #f
|
|
||||||
#:transparent)
|
|
||||||
|
|
||||||
|
|
||||||
;; Token constructor.
|
|
||||||
;; This is intended to be a general token structure constructor that's nice
|
|
||||||
;; to work with.
|
|
||||||
;; It should cooperate with the tokenizers constructed with make-permissive-tokenizer.
|
|
||||||
(define (token type ;; (U symbol string)
|
|
||||||
[val #f] ;; any
|
|
||||||
[srcloc #f]
|
|
||||||
#:position [position #f] ;; (U #f number)
|
|
||||||
#:line [line #f] ;; (U #f number)
|
|
||||||
#:column [column #f] ;; (U #f number)
|
|
||||||
#:span [span #f] ;; boolean
|
|
||||||
#:skip? [skip? #f])
|
|
||||||
(token-struct (if (string? type) (string->symbol type) type)
|
|
||||||
val
|
|
||||||
;; keyword values take precedence over srcloc values
|
|
||||||
(or position (and srcloc (srcloc-position srcloc)))
|
|
||||||
(or line (and srcloc (srcloc-line srcloc)))
|
|
||||||
(or column (and srcloc (srcloc-column srcloc)))
|
|
||||||
(or span (and srcloc (srcloc-span srcloc)))
|
|
||||||
skip?))
|
|
||||||
|
|
||||||
|
|
||||||
;; When bad things happen, we need to emit errors with source location.
|
|
||||||
(struct exn:fail:parsing exn:fail (srclocs)
|
|
||||||
#:transparent
|
|
||||||
#:property prop:exn:srclocs (lambda (instance)
|
|
||||||
(exn:fail:parsing-srclocs instance)))
|
|
||||||
|
|
||||||
|
|
||||||
(provide apply-lexer)
|
|
||||||
(define (apply-lexer lexer val)
|
|
||||||
(for/list ([t (in-port lexer (if (string? val) (open-input-string val) val))])
|
|
||||||
t))
|
|
||||||
|
|
||||||
(provide apply-tokenizer-maker
|
|
||||||
(rename-out [apply-tokenizer-maker apply-tokenizer]))
|
|
||||||
(define (apply-tokenizer-maker tokenize in)
|
|
||||||
(define input-port (if (string? in)
|
|
||||||
(open-input-string in)
|
|
||||||
in))
|
|
||||||
(define token-producer (tokenize input-port))
|
|
||||||
(for/list ([token (in-producer token-producer (λ(tok)
|
|
||||||
(define val (cond
|
|
||||||
;; position-tokens are produced by lexer-src-pos,
|
|
||||||
[(position-token? tok)
|
|
||||||
(position-token-token tok)]
|
|
||||||
;; and srcloc-tokens by lexer-srcloc
|
|
||||||
[(srcloc-token? tok)
|
|
||||||
(srcloc-token-token tok)]
|
|
||||||
[else tok]))
|
|
||||||
(or (eof-object? val) (void? val))))])
|
|
||||||
token))
|
|
||||||
|
|
||||||
(provide apply-colorer)
|
|
||||||
(define (apply-colorer colorer port-or-string)
|
|
||||||
(define p (if (string? port-or-string)
|
|
||||||
(open-input-string port-or-string)
|
|
||||||
port-or-string))
|
|
||||||
(let loop ([acc null])
|
|
||||||
(define-values (lex cat shape start end) (colorer p))
|
|
||||||
(if (or (eq? 'eof cat) (eof-object? lex))
|
|
||||||
(reverse acc)
|
|
||||||
(loop (cons (list lex cat shape start end) acc)))))
|
|
||||||
|
|
||||||
(provide trim-ends)
|
|
||||||
(define (trim-ends left lexeme right)
|
|
||||||
(string-trim (string-trim lexeme left #:right? #f) right #:left? #f))
|
|
||||||
|
|
||||||
(provide from/to)
|
|
||||||
(define-lex-trans from/to
|
|
||||||
(λ(stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
[(_ OPEN CLOSE)
|
|
||||||
;; (:seq any-string CLOSE any-string) pattern makes it non-greedy
|
|
||||||
#'(:seq OPEN (complement (:seq any-string CLOSE any-string)) CLOSE)])))
|
|
||||||
|
|
||||||
(provide from/stop-before)
|
|
||||||
(define-lex-trans from/stop-before
|
|
||||||
(λ(stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
[(_ OPEN CLOSE)
|
|
||||||
#'(:seq OPEN (:* (:~ CLOSE)))])))
|
|
||||||
|
|
||||||
(provide uc+lc)
|
|
||||||
(define-lex-trans uc+lc
|
|
||||||
(λ(stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
[(_ . STRS)
|
|
||||||
(with-syntax ([(UCSTR ...) (map (compose1 string-upcase syntax->datum) (syntax->list #'STRS))]
|
|
||||||
[(LCSTR ...) (map (compose1 string-downcase syntax->datum) (syntax->list #'STRS))])
|
|
||||||
#'(union (union UCSTR ...) (union LCSTR ...)))])))
|
|
||||||
|
|
||||||
;; change names of lexer abbreviations to be consistent with Racket srcloc conventions
|
|
||||||
|
|
||||||
(define-syntax-rule (dprt ID-IN ID-OUT)
|
|
||||||
(begin
|
|
||||||
(provide ID-IN)
|
|
||||||
(define-syntax ID-IN (make-rename-transformer (syntax ID-OUT)))))
|
|
||||||
|
|
||||||
(dprt lexeme-start start-pos)
|
|
||||||
(dprt lexeme-end end-pos)
|
|
||||||
(dprt line position-line)
|
|
||||||
(dprt col position-col)
|
|
||||||
(dprt pos position-offset)
|
|
||||||
|
|
||||||
(provide span)
|
|
||||||
(define (span lexeme-start lexeme-end)
|
|
||||||
(abs ; thus same result in reverse order
|
|
||||||
(- (pos lexeme-end)
|
|
||||||
(pos lexeme-start))))
|
|
@ -1,30 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(require brag/examples/01-equal
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse ""))
|
|
||||||
'(equal))
|
|
||||||
(check-equal? (syntax->datum (parse "01"))
|
|
||||||
'(equal (zero (equal) #\0)
|
|
||||||
(one (equal) #\1)))
|
|
||||||
(check-equal? (syntax->datum (parse "10"))
|
|
||||||
'(equal (one (equal) #\1)
|
|
||||||
(zero (equal) #\0)))
|
|
||||||
(check-equal? (syntax->datum (parse "0011"))
|
|
||||||
'(equal (zero (equal) #\0)
|
|
||||||
(one (equal (zero (equal) #\0)
|
|
||||||
(one (equal) #\1))
|
|
||||||
#\1)))
|
|
||||||
(check-equal? (syntax->datum (parse "0110"))
|
|
||||||
'(equal (one (equal (zero (equal) #\0)
|
|
||||||
(one (equal) #\1))
|
|
||||||
#\1)
|
|
||||||
(zero (equal) #\0)))
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse "1100"))
|
|
||||||
'(equal (one (equal) #\1)
|
|
||||||
(zero (equal (one (equal) #\1)
|
|
||||||
(zero (equal) #\0))
|
|
||||||
#\0)))
|
|
||||||
|
|
@ -1,50 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(require brag/examples/0n1
|
|
||||||
brag/support
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(define (lex ip)
|
|
||||||
(port-count-lines! ip)
|
|
||||||
(lambda ()
|
|
||||||
(define next-char (read-char ip))
|
|
||||||
(cond [(eof-object? next-char)
|
|
||||||
(token eof)]
|
|
||||||
[(char=? next-char #\0)
|
|
||||||
(token "0" "0")]
|
|
||||||
[(char=? next-char #\1)
|
|
||||||
(token "1" "1")])))
|
|
||||||
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (lex (open-input-string "1"))))
|
|
||||||
'(rule "1"))
|
|
||||||
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (lex (open-input-string "01"))))
|
|
||||||
'(rule "0" "1"))
|
|
||||||
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (lex (open-input-string "001"))))
|
|
||||||
'(rule "0" "0" "1"))
|
|
||||||
|
|
||||||
|
|
||||||
(check-exn exn:fail:parsing?
|
|
||||||
(lambda ()
|
|
||||||
(parse #f (lex (open-input-string "0")))))
|
|
||||||
|
|
||||||
(check-exn exn:fail:parsing?
|
|
||||||
(lambda ()
|
|
||||||
(parse #f (lex (open-input-string "10")))))
|
|
||||||
|
|
||||||
(check-exn exn:fail:parsing?
|
|
||||||
(lambda ()
|
|
||||||
(parse #f (lex (open-input-string "010")))))
|
|
||||||
|
|
||||||
|
|
||||||
;; This should fail predictably because we're passing in tokens
|
|
||||||
;; that the parser doesn't know.
|
|
||||||
(check-exn exn:fail:parsing?
|
|
||||||
(lambda () (parse '("zero" "one" "zero"))))
|
|
||||||
(check-exn (regexp (regexp-quote
|
|
||||||
"Encountered unexpected token \"zero\" (\"zero\") while parsing"))
|
|
||||||
(lambda () (parse '("zero" "one" "zero"))))
|
|
@ -1,49 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require brag/examples/0n1n
|
|
||||||
brag/support
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(define (lex ip)
|
|
||||||
(port-count-lines! ip)
|
|
||||||
(lambda ()
|
|
||||||
(define next-char (read-char ip))
|
|
||||||
(cond [(eof-object? next-char)
|
|
||||||
(token eof)]
|
|
||||||
[(char=? next-char #\0)
|
|
||||||
(token "0" "0")]
|
|
||||||
[(char=? next-char #\1)
|
|
||||||
(token "1" "1")])))
|
|
||||||
|
|
||||||
|
|
||||||
;; The only rule in the grammar is:
|
|
||||||
;;
|
|
||||||
;; rule-0n1n: ["0" rule-0n1n "1"]
|
|
||||||
;;
|
|
||||||
;; It makes use of the "maybe" pattern. The result type of the
|
|
||||||
;; grammar rule is:
|
|
||||||
;;
|
|
||||||
;; rule-0n1n: (U #f
|
|
||||||
;; (list "0" rule-0n1n "1"))
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (lex (open-input-string "0011"))))
|
|
||||||
'(rule-0n1n "0" (rule-0n1n "0" (rule-0n1n) "1") "1"))
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (lex (open-input-string "01"))))
|
|
||||||
'(rule-0n1n "0" (rule-0n1n) "1"))
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (lex (open-input-string ""))))
|
|
||||||
'(rule-0n1n))
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (lex (open-input-string "000111"))))
|
|
||||||
'(rule-0n1n "0" (rule-0n1n "0" (rule-0n1n "0" (rule-0n1n) "1") "1") "1"))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(check-exn exn:fail:parsing?
|
|
||||||
(lambda () (parse #f (lex (open-input-string "0001111")))))
|
|
||||||
|
|
||||||
(check-exn exn:fail:parsing?
|
|
||||||
(lambda () (parse #f (lex (open-input-string "0001110")))))
|
|
||||||
|
|
||||||
(check-exn exn:fail:parsing?
|
|
||||||
(lambda () (parse #f (lex (open-input-string "10001110")))))
|
|
@ -1,18 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
|
|
||||||
(require "test-0n1.rkt"
|
|
||||||
"test-0n1n.rkt"
|
|
||||||
"test-01-equal.rkt"
|
|
||||||
"test-simple-arithmetic-grammar.rkt"
|
|
||||||
"test-baby-json.rkt"
|
|
||||||
"test-baby-json-hider.rkt"
|
|
||||||
"test-wordy.rkt"
|
|
||||||
"test-simple-line-drawing.rkt"
|
|
||||||
"test-flatten.rkt"
|
|
||||||
"test-lexer.rkt"
|
|
||||||
"test-parser.rkt"
|
|
||||||
"test-errors.rkt"
|
|
||||||
"test-old-token.rkt"
|
|
||||||
"test-weird-grammar.rkt"
|
|
||||||
(submod brag/codegen/satisfaction test))
|
|
@ -1,19 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require brag/examples/baby-json-hider
|
|
||||||
brag/support
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(define parse-result (parse (list "{"
|
|
||||||
(token 'ID "message")
|
|
||||||
":"
|
|
||||||
(token 'STRING "'hello world'")
|
|
||||||
"}")))
|
|
||||||
(check-equal? (syntax->datum parse-result) '(json (":")))
|
|
||||||
|
|
||||||
(define syntaxed-colon-parens (cadr (syntax->list parse-result)))
|
|
||||||
(check-equal? (syntax->datum (syntax-property syntaxed-colon-parens 'kvpair)) 'kvpair)
|
|
||||||
|
|
||||||
(check-equal?
|
|
||||||
(syntax->datum
|
|
||||||
(parse "[[[{}]],[],[[{}]]]"))
|
|
||||||
'(json (array #\[ (json (array #\[ (json (array #\[ (json) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json) #\])) #\])) #\])))
|
|
@ -1,25 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require brag/examples/baby-json
|
|
||||||
brag/support
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(check-equal?
|
|
||||||
(syntax->datum
|
|
||||||
(parse (list "{"
|
|
||||||
(token 'ID "message")
|
|
||||||
":"
|
|
||||||
(token 'STRING "'hello world'")
|
|
||||||
"}")))
|
|
||||||
'(json (object "{"
|
|
||||||
(kvpair "message" ":" (json (string "'hello world'")))
|
|
||||||
"}")))
|
|
||||||
|
|
||||||
|
|
||||||
(check-equal?
|
|
||||||
(syntax->datum
|
|
||||||
(parse "[[[{}]],[],[[{}]]]"))
|
|
||||||
'(json (array #\[ (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\, (json (array #\[ #\])) #\, (json (array #\[ (json (array #\[ (json (object #\{ #\})) #\])) #\])) #\])))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,9 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require brag/examples/cutter
|
|
||||||
brag/support
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
;; related to rule-flattening problem
|
|
||||||
(check-equal?
|
|
||||||
(parse-to-datum (list "(" "x" "," "x" ")"))
|
|
||||||
'(top (expr (list "(" (expr "x") "," (expr "x") ")"))))
|
|
@ -1,137 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(require rackunit
|
|
||||||
(for-syntax racket/base))
|
|
||||||
|
|
||||||
;; The tests in this module make sure we produce proper error messages
|
|
||||||
;; on weird grammars.
|
|
||||||
|
|
||||||
|
|
||||||
(define-namespace-anchor anchor)
|
|
||||||
(define ns (namespace-anchor->namespace anchor))
|
|
||||||
(define (c prog)
|
|
||||||
(parameterize ([current-namespace ns]
|
|
||||||
[read-accept-reader #t])
|
|
||||||
(define ip (open-input-string prog))
|
|
||||||
(port-count-lines! ip)
|
|
||||||
(compile (read-syntax #f ip))))
|
|
||||||
|
|
||||||
|
|
||||||
;; Helper to let me quickly write compile-error checks.
|
|
||||||
(define-syntax (check-compile-error stx)
|
|
||||||
(syntax-case stx ()
|
|
||||||
[(_ prog expected-msg)
|
|
||||||
(quasisyntax/loc stx
|
|
||||||
(begin #,(syntax/loc stx
|
|
||||||
(check-exn (regexp (regexp-quote expected-msg))
|
|
||||||
(lambda ()
|
|
||||||
(c prog))))
|
|
||||||
#,(syntax/loc stx
|
|
||||||
(check-exn exn:fail:syntax?
|
|
||||||
(lambda ()
|
|
||||||
(c prog))))))]))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; errors with position are sensitive to length of lang line
|
|
||||||
(define lang-line "#lang brag")
|
|
||||||
|
|
||||||
(check-compile-error (format "~a" lang-line)
|
|
||||||
"The grammar does not appear to have any rules")
|
|
||||||
|
|
||||||
(check-compile-error (format "~a\nfoo" lang-line)
|
|
||||||
"Error while parsing grammar near: foo [line=2, column=0, position=12]")
|
|
||||||
|
|
||||||
(check-compile-error (format "~a\nnumber : 42" lang-line)
|
|
||||||
"Error while parsing grammar near: 42 [line=2, column=9, position=21]")
|
|
||||||
|
|
||||||
(check-compile-error (format "~a\nnumber : 1" lang-line)
|
|
||||||
"Error while parsing grammar near: 1 [line=2, column=9, position=21]")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(check-compile-error "#lang brag\n x: NUMBER\nx:STRING"
|
|
||||||
"Rule x has a duplicate definition")
|
|
||||||
|
|
||||||
;; Check to see that missing definitions for rules also raise good syntax
|
|
||||||
;; errors:
|
|
||||||
|
|
||||||
(check-compile-error "#lang brag\nx:y"
|
|
||||||
"Rule y has no definition")
|
|
||||||
|
|
||||||
(check-compile-error "#lang brag\nnumber : 1flarbl"
|
|
||||||
"Rule 1flarbl has no definition")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(check-compile-error "#lang brag\nprogram: EOF"
|
|
||||||
"Token EOF is reserved and can not be used in a grammar")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; Nontermination checks:
|
|
||||||
(check-compile-error "#lang brag\nx : x"
|
|
||||||
"Rule x has no finite derivation")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(check-compile-error #<<EOF
|
|
||||||
#lang brag
|
|
||||||
x : x y
|
|
||||||
y : "y"
|
|
||||||
EOF
|
|
||||||
"Rule x has no finite derivation")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
; This should be illegal too:
|
|
||||||
(check-compile-error #<<EOF
|
|
||||||
#lang brag
|
|
||||||
a : "a" b
|
|
||||||
b : a | b
|
|
||||||
EOF
|
|
||||||
"Rule a has no finite derivation")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(check-compile-error #<<EOF
|
|
||||||
#lang brag
|
|
||||||
a : [b]
|
|
||||||
b : [c]
|
|
||||||
c : c
|
|
||||||
EOF
|
|
||||||
"Rule c has no finite derivation")
|
|
||||||
|
|
||||||
|
|
||||||
(check-compile-error #<<EOF
|
|
||||||
#lang brag
|
|
||||||
a : [b]
|
|
||||||
b : c
|
|
||||||
c : c
|
|
||||||
EOF
|
|
||||||
"Rule b has no finite derivation")
|
|
||||||
|
|
||||||
|
|
||||||
(check-compile-error #<<EOF
|
|
||||||
#lang brag
|
|
||||||
a : [a]
|
|
||||||
b : [b]
|
|
||||||
c : c
|
|
||||||
EOF
|
|
||||||
"Rule c has no finite derivation")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(check-compile-error #<<EOF
|
|
||||||
#lang racket/base
|
|
||||||
(require brag/examples/simple-line-drawing)
|
|
||||||
(define bad-parser (make-rule-parser crunchy))
|
|
||||||
EOF
|
|
||||||
"Rule crunchy is not defined in the grammar"
|
|
||||||
)
|
|
@ -1,193 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require brag/rules/stx-types
|
|
||||||
brag/codegen/flatten
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
|
|
||||||
(define (make-fresh-name)
|
|
||||||
(let ([n 0])
|
|
||||||
(lambda ()
|
|
||||||
(set! n (add1 n))
|
|
||||||
(string->symbol (format "r~a" n)))))
|
|
||||||
|
|
||||||
|
|
||||||
;; Simple literals
|
|
||||||
(check-equal? (map syntax->datum (flatten-rule #'(rule expr (lit "hello"))))
|
|
||||||
'((prim-rule lit expr [(lit "hello")])))
|
|
||||||
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule expr
|
|
||||||
(seq (lit "hello")
|
|
||||||
(lit "world")))))
|
|
||||||
'((prim-rule seq expr [(lit "hello") (lit "world")])))
|
|
||||||
|
|
||||||
|
|
||||||
(check-equal? (map syntax->datum (flatten-rule #'(rule expr (token HELLO))))
|
|
||||||
'((prim-rule token expr [(token HELLO)])))
|
|
||||||
|
|
||||||
(check-equal? (map syntax->datum (flatten-rule #'(rule expr (id rule-2))))
|
|
||||||
'((prim-rule id expr [(id rule-2)])))
|
|
||||||
|
|
||||||
|
|
||||||
;; Sequences of primitives
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule expr (seq (lit "1") (seq (lit "2") (lit "3"))))))
|
|
||||||
'((prim-rule seq expr
|
|
||||||
[(lit "1") (lit "2") (lit "3")])))
|
|
||||||
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule expr (seq (seq (lit "1") (lit "2")) (lit "3")))))
|
|
||||||
'((prim-rule seq expr
|
|
||||||
[(lit "1") (lit "2") (lit "3")])))
|
|
||||||
|
|
||||||
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule expr (seq (seq (lit "1")) (seq (lit "2") (lit "3"))))))
|
|
||||||
'((prim-rule seq expr
|
|
||||||
[(lit "1") (lit "2") (lit "3")])))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; choices
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule expr (choice (id rule-2) (id rule-3)))))
|
|
||||||
'((prim-rule choice expr
|
|
||||||
[(id rule-2)]
|
|
||||||
[(id rule-3)])))
|
|
||||||
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule sexp (choice (seq (lit "(") (lit ")"))
|
|
||||||
(seq)))
|
|
||||||
#:fresh-name (make-fresh-name)))
|
|
||||||
'((prim-rule choice sexp
|
|
||||||
[(lit "(") (lit ")")] [])))
|
|
||||||
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule sexp (choice (seq (seq (lit "(") (token BLAH))
|
|
||||||
(lit ")"))
|
|
||||||
(seq)))
|
|
||||||
#:fresh-name (make-fresh-name)))
|
|
||||||
'((prim-rule choice sexp
|
|
||||||
[(lit "(") (token BLAH) (lit ")")] [])))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; maybe
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule expr (maybe (id rule-2)))))
|
|
||||||
'((prim-rule maybe expr
|
|
||||||
[(id rule-2)]
|
|
||||||
[])))
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule expr (maybe (token HUH)))))
|
|
||||||
'((prim-rule maybe expr
|
|
||||||
[(token HUH)]
|
|
||||||
[])))
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule expr (maybe (seq (lit "hello") (lit "world"))))))
|
|
||||||
'((prim-rule maybe expr
|
|
||||||
[(lit "hello") (lit "world")]
|
|
||||||
[])))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; repeat
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule rule-2+ (repeat 0 (id rule-2)))))
|
|
||||||
'((prim-rule repeat rule-2+
|
|
||||||
[(inferred-id rule-2+ repeat) (id rule-2)]
|
|
||||||
[])))
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule rule-2+ (repeat 0 (seq (lit "+") (id rule-2))))))
|
|
||||||
'((prim-rule repeat rule-2+
|
|
||||||
[(inferred-id rule-2+ repeat) (lit "+") (id rule-2)]
|
|
||||||
[])))
|
|
||||||
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule rule-2+ (repeat 1 (id rule-2)))))
|
|
||||||
'((prim-rule repeat rule-2+
|
|
||||||
[(inferred-id rule-2+ repeat) (id rule-2)]
|
|
||||||
[(id rule-2)])))
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule rule-2+ (repeat 1 (seq (lit "-") (id rule-2))))))
|
|
||||||
'((prim-rule repeat rule-2+
|
|
||||||
[(inferred-id rule-2+ repeat) (lit "-") (id rule-2)]
|
|
||||||
[(lit "-") (id rule-2)])))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; Mixtures
|
|
||||||
|
|
||||||
;; choice and maybe
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule sexp (choice (lit "x")
|
|
||||||
(maybe (lit "y"))))
|
|
||||||
#:fresh-name (make-fresh-name)))
|
|
||||||
'((prim-rule choice sexp
|
|
||||||
[(lit "x")]
|
|
||||||
[(inferred-id r1 maybe)])
|
|
||||||
(inferred-prim-rule maybe r1
|
|
||||||
[(lit "y")]
|
|
||||||
[])))
|
|
||||||
;; choice, maybe, repeat
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule sexp (choice (lit "x")
|
|
||||||
(maybe (repeat 1 (lit "y")))))
|
|
||||||
#:fresh-name (make-fresh-name)))
|
|
||||||
'((prim-rule choice sexp
|
|
||||||
[(lit "x")]
|
|
||||||
[(inferred-id r1 maybe)])
|
|
||||||
(inferred-prim-rule maybe r1
|
|
||||||
[(inferred-id r2 repeat)]
|
|
||||||
[])
|
|
||||||
(inferred-prim-rule repeat r2
|
|
||||||
[(inferred-id r2 repeat) (lit "y")]
|
|
||||||
[(lit "y")])))
|
|
||||||
;; choice, seq
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule sexp (choice (seq (lit "x") (lit "y"))
|
|
||||||
(seq (lit "z") (lit "w"))))
|
|
||||||
#:fresh-name (make-fresh-name)))
|
|
||||||
'((prim-rule choice sexp
|
|
||||||
[(lit "x") (lit "y")]
|
|
||||||
[(lit "z") (lit "w")])))
|
|
||||||
|
|
||||||
;; maybe, choice
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule sexp (maybe (choice (seq (lit "x") (lit "y"))
|
|
||||||
(seq (lit "z") (lit "w")))))
|
|
||||||
#:fresh-name (make-fresh-name)))
|
|
||||||
'((prim-rule maybe sexp
|
|
||||||
[(inferred-id r1 choice)]
|
|
||||||
[])
|
|
||||||
(inferred-prim-rule choice r1
|
|
||||||
[(lit "x") (lit "y")]
|
|
||||||
[(lit "z") (lit "w")])))
|
|
||||||
|
|
||||||
|
|
||||||
;; seq, repeat
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rule #'(rule expr (seq (id term) (repeat 0 (seq (lit "+") (id term)))))
|
|
||||||
#:fresh-name (make-fresh-name)))
|
|
||||||
'((prim-rule seq expr [(id term) (inferred-id r1 repeat)])
|
|
||||||
(inferred-prim-rule repeat r1 [(inferred-id r1 repeat) (lit "+") (id term)] [])))
|
|
||||||
|
|
||||||
|
|
||||||
;; larger example: simple arithmetic
|
|
||||||
(check-equal? (map syntax->datum
|
|
||||||
(flatten-rules (syntax->list
|
|
||||||
#'((rule expr (seq (id term) (repeat 0 (seq (lit "+") (id term)))))
|
|
||||||
(rule term (seq (id factor) (repeat 0 (seq (lit "*") (id factor)))))
|
|
||||||
(rule factor (token INT))))
|
|
||||||
#:fresh-name (make-fresh-name)))
|
|
||||||
|
|
||||||
'((prim-rule seq expr [(id term) (inferred-id r1 repeat)])
|
|
||||||
(inferred-prim-rule repeat r1 [(inferred-id r1 repeat) (lit "+") (id term)] [])
|
|
||||||
(prim-rule seq term [(id factor) (inferred-id r2 repeat)])
|
|
||||||
(inferred-prim-rule repeat r2 [(inferred-id r2 repeat) (lit "*") (id factor)] [])
|
|
||||||
(prim-rule token factor [(token INT)])))
|
|
@ -1,73 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require brag/rules/lexer
|
|
||||||
rackunit
|
|
||||||
br-parser-tools/lex)
|
|
||||||
|
|
||||||
(define (l s)
|
|
||||||
(define t (lex/1 (open-input-string s)))
|
|
||||||
(list (token-name (position-token-token t))
|
|
||||||
(token-value (position-token-token t))
|
|
||||||
(position-offset (position-token-start-pos t))
|
|
||||||
(position-offset (position-token-end-pos t))))
|
|
||||||
|
|
||||||
;; WARNING: the offsets are not in terms of file positions. So they
|
|
||||||
;; start counting at 1, not 0.
|
|
||||||
(check-equal? (l " hi")
|
|
||||||
'(ID "hi" 2 4))
|
|
||||||
|
|
||||||
(check-equal? (l " hi")
|
|
||||||
'(ID "hi" 3 5))
|
|
||||||
|
|
||||||
(check-equal? (l "hi")
|
|
||||||
'(ID "hi" 1 3))
|
|
||||||
|
|
||||||
(check-equal? (l "# foobar\nhi")
|
|
||||||
'(ID "hi" 10 12))
|
|
||||||
|
|
||||||
(check-equal? (l "# foobar\rhi")
|
|
||||||
'(ID "hi" 10 12))
|
|
||||||
|
|
||||||
(check-equal? (l "# foobar\r\nhi")
|
|
||||||
'(ID "hi" 11 13))
|
|
||||||
|
|
||||||
(check-equal? (l "hi:")
|
|
||||||
'(RULE_HEAD "hi:" 1 4))
|
|
||||||
|
|
||||||
(check-equal? (l "hi :")
|
|
||||||
'(RULE_HEAD "hi :" 1 7))
|
|
||||||
|
|
||||||
(check-equal? (l "|")
|
|
||||||
'(PIPE "|" 1 2))
|
|
||||||
|
|
||||||
(check-equal? (l "(")
|
|
||||||
'(LPAREN "(" 1 2))
|
|
||||||
|
|
||||||
(check-equal? (l "[")
|
|
||||||
'(LBRACKET "[" 1 2))
|
|
||||||
|
|
||||||
(check-equal? (l ")")
|
|
||||||
'(RPAREN ")" 1 2))
|
|
||||||
|
|
||||||
(check-equal? (l "]")
|
|
||||||
'(RBRACKET "]" 1 2))
|
|
||||||
|
|
||||||
(check-equal? (l "'hello'")
|
|
||||||
'(LIT "'hello'" 1 8))
|
|
||||||
|
|
||||||
(check-equal? (l "'he\\'llo'")
|
|
||||||
'(LIT "'he\\'llo'" 1 10))
|
|
||||||
|
|
||||||
(check-equal? (l "/")
|
|
||||||
'(HIDE "/" 1 2))
|
|
||||||
|
|
||||||
(check-equal? (l " /")
|
|
||||||
'(HIDE "/" 2 3))
|
|
||||||
|
|
||||||
(check-equal? (l "@")
|
|
||||||
'(SPLICE "@" 1 2))
|
|
||||||
|
|
||||||
(check-equal? (l " @")
|
|
||||||
'(SPLICE "@" 2 3))
|
|
||||||
|
|
||||||
(check-equal? (l "#:prefix-out val:")
|
|
||||||
(list 'EOF eof 18 18)) ; lexer skips kwarg
|
|
@ -1,76 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
;; Make sure the old token type also works fine.
|
|
||||||
|
|
||||||
(require brag/examples/simple-line-drawing
|
|
||||||
brag/support
|
|
||||||
racket/list
|
|
||||||
br-parser-tools/lex
|
|
||||||
(prefix-in : br-parser-tools/lex-sre)
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(define-tokens tokens (INTEGER STRING |;| EOF))
|
|
||||||
|
|
||||||
(define (make-tokenizer ip)
|
|
||||||
(port-count-lines! ip)
|
|
||||||
(define lex (lexer-src-pos
|
|
||||||
[(:+ numeric)
|
|
||||||
(token-INTEGER (string->number lexeme))]
|
|
||||||
[upper-case
|
|
||||||
(token-STRING lexeme)]
|
|
||||||
["b"
|
|
||||||
(token-STRING " ")]
|
|
||||||
[";"
|
|
||||||
(|token-;| lexeme)]
|
|
||||||
[whitespace
|
|
||||||
(return-without-pos (lex input-port))]
|
|
||||||
[(eof)
|
|
||||||
(token-EOF 'eof)]))
|
|
||||||
(lambda ()
|
|
||||||
(lex ip)))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(define the-parsed-object-stx
|
|
||||||
(parse (make-tokenizer (open-input-string #<<EOF
|
|
||||||
3 9 X;
|
|
||||||
6 3 b 3 X 3 b;
|
|
||||||
3 9 X;
|
|
||||||
EOF
|
|
||||||
))))
|
|
||||||
|
|
||||||
(check-true (syntax-original? the-parsed-object-stx))
|
|
||||||
;; Does the rule name "drawing" also have the proper "original?" property set?
|
|
||||||
(check-true (syntax-original? (first (syntax->list the-parsed-object-stx))))
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum the-parsed-object-stx)
|
|
||||||
'(drawing (rows (repeat 3) (chunk 9 "X") ";")
|
|
||||||
(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";")
|
|
||||||
(rows (repeat 3) (chunk 9 "X") ";")))
|
|
||||||
|
|
||||||
(define the-parsed-object (syntax->list the-parsed-object-stx))
|
|
||||||
|
|
||||||
(check-equal? (syntax-line the-parsed-object-stx) 1)
|
|
||||||
(check-equal? (syntax-column the-parsed-object-stx) 0)
|
|
||||||
(check-equal? (syntax-position the-parsed-object-stx) 1)
|
|
||||||
(check-equal? (syntax-span the-parsed-object-stx) 28)
|
|
||||||
|
|
||||||
(check-equal? (length the-parsed-object) 4)
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (second the-parsed-object))
|
|
||||||
'(rows (repeat 3) (chunk 9 "X") ";"))
|
|
||||||
(check-equal? (syntax-line (list-ref the-parsed-object 1)) 1)
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (third the-parsed-object))
|
|
||||||
'(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";"))
|
|
||||||
(check-equal? (syntax-line (list-ref the-parsed-object 2)) 2)
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (fourth the-parsed-object))
|
|
||||||
'(rows (repeat 3) (chunk 9 "X") ";"))
|
|
||||||
(check-equal? (syntax-line (list-ref the-parsed-object 3)) 3)
|
|
||||||
|
|
||||||
;; FIXME: add tests to make sure location is as we expect.
|
|
||||||
;;
|
|
||||||
;; FIXME: handle the EOF issue better. Something in cfg-parser
|
|
||||||
;; appears to deviate from br-parser-tools/yacc with regards to the stop
|
|
||||||
;; token.
|
|
@ -1,153 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
|
|
||||||
(require rackunit
|
|
||||||
br-parser-tools/lex
|
|
||||||
brag/rules/parser
|
|
||||||
brag/rules/lexer
|
|
||||||
brag/rules/rule-structs)
|
|
||||||
|
|
||||||
|
|
||||||
;; quick-and-dirty helper for pos construction.
|
|
||||||
(define (p x)
|
|
||||||
(pos x #f #f))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
;; FIXME: fix the test cases so they work on locations rather than just offsets.
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'")))
|
|
||||||
(list (rule (p 1) (p 15)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-lit (p 8) (p 15) "hello" #f))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON")))
|
|
||||||
(list (rule (p 1) (p 13)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-token (p 8) (p 13) "COLON" #f))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "/expr : COLON")))
|
|
||||||
(list (rule (p 1) (p 14)
|
|
||||||
(lhs-id (p 1) (p 6) "expr" ''hide)
|
|
||||||
(pattern-token (p 9) (p 14) "COLON" #f))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "@expr : COLON")))
|
|
||||||
(list (rule (p 1) (p 14)
|
|
||||||
(lhs-id (p 1) (p 6) "expr" ''splice)
|
|
||||||
(pattern-token (p 9) (p 14) "COLON" #f))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : /COLON COLON")))
|
|
||||||
(list (rule (p 1) (p 20)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-seq (p 8) (p 20)
|
|
||||||
(list
|
|
||||||
(pattern-token (p 8) (p 14) "COLON" 'hide)
|
|
||||||
(pattern-token (p 15) (p 20) "COLON" #f))))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : /thing COLON")))
|
|
||||||
(list (rule (p 1) (p 20)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-seq (p 8) (p 20)
|
|
||||||
(list
|
|
||||||
(pattern-id (p 8) (p 14) "thing" 'hide)
|
|
||||||
(pattern-token (p 15) (p 20) "COLON" #f))))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : @thing COLON")))
|
|
||||||
(list (rule (p 1) (p 20)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-seq (p 8) (p 20)
|
|
||||||
(list
|
|
||||||
(pattern-id (p 8) (p 14) "thing" 'splice)
|
|
||||||
(pattern-token (p 15) (p 20) "COLON" #f))))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'*")))
|
|
||||||
(list (rule (p 1) (p 16)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-repeat (p 8) (p 16)
|
|
||||||
0
|
|
||||||
(pattern-lit (p 8) (p 15) "hello" #f)))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : 'hello'+")))
|
|
||||||
(list (rule (p 1) (p 16)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-repeat (p 8) (p 16)
|
|
||||||
1
|
|
||||||
(pattern-lit (p 8) (p 15) "hello" #f)))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : [/'hello']")))
|
|
||||||
(list (rule (p 1) (p 18)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-maybe (p 8) (p 18)
|
|
||||||
(pattern-lit (p 9) (p 17) "hello" 'hide)))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH")))
|
|
||||||
(list (rule (p 1) (p 20)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-choice (p 8) (p 20)
|
|
||||||
(list (pattern-token (p 8) (p 13) "COLON" #f)
|
|
||||||
(pattern-token (p 16) (p 20) "BLAH" #f))))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : COLON | BLAH | BAZ expr")))
|
|
||||||
(list (rule (p 1) (p 31)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-choice (p 8) (p 31)
|
|
||||||
(list (pattern-token (p 8) (p 13) "COLON" #f)
|
|
||||||
(pattern-token (p 16) (p 20) "BLAH" #f)
|
|
||||||
(pattern-seq (p 23) (p 31)
|
|
||||||
(list (pattern-token (p 23) (p 26) "BAZ" #f)
|
|
||||||
(pattern-id (p 27) (p 31) "expr" #f))))))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two /three")))
|
|
||||||
(list (rule (p 1) (p 22)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-seq (p 8) (p 22) (list (pattern-id (p 8) (p 11) "one" #f)
|
|
||||||
(pattern-id (p 12) (p 15) "two" #f)
|
|
||||||
(pattern-id (p 16) (p 22) "three" 'hide))))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two three)")))
|
|
||||||
(list (rule (p 1) (p 23)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-seq (p 8) (p 23) (list (pattern-id (p 9) (p 12) "one" #f)
|
|
||||||
(pattern-id (p 13) (p 16) "two" #f)
|
|
||||||
(pattern-id (p 17) (p 22) "three" #f))))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two* three")))
|
|
||||||
(list (rule (p 1) (p 22)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-seq (p 8) (p 22) (list (pattern-id (p 8) (p 11) "one" #f)
|
|
||||||
(pattern-repeat (p 12) (p 16) 0 (pattern-id (p 12) (p 15) "two" #f))
|
|
||||||
(pattern-id (p 17) (p 22) "three" #f))))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : one two+ three")))
|
|
||||||
(list (rule (p 1) (p 22)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-seq (p 8) (p 22) (list (pattern-id (p 8) (p 11) "one" #f)
|
|
||||||
(pattern-repeat (p 12) (p 16) 1 (pattern-id (p 12) (p 15) "two" #f))
|
|
||||||
(pattern-id (p 17) (p 22) "three" #f))))))
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string "expr : (one two)+ three")))
|
|
||||||
(list (rule (p 1) (p 24)
|
|
||||||
(lhs-id (p 1) (p 5) "expr" #f)
|
|
||||||
(pattern-seq (p 8) (p 24) (list (pattern-repeat (p 8) (p 18) 1
|
|
||||||
(pattern-seq (p 8) (p 17)
|
|
||||||
(list (pattern-id (p 9) (p 12) "one" #f)
|
|
||||||
(pattern-id (p 13) (p 16) "two" #f))))
|
|
||||||
(pattern-id (p 19) (p 24) "three" #f))))))
|
|
||||||
|
|
||||||
|
|
||||||
(check-equal? (grammar-parser (tokenize (open-input-string #<<EOF
|
|
||||||
statlist : stat+
|
|
||||||
stat: ID '=' expr
|
|
||||||
| 'print' expr
|
|
||||||
EOF
|
|
||||||
)))
|
|
||||||
(list (rule (p 1) (p 17)
|
|
||||||
(lhs-id (p 1) (p 9) "statlist" #f)
|
|
||||||
(pattern-repeat (p 12) (p 17) 1 (pattern-id (p 12) (p 16) "stat" #f)))
|
|
||||||
(rule (p 18) (p 54)
|
|
||||||
(lhs-id (p 18) (p 22) "stat" #f)
|
|
||||||
(pattern-choice (p 24) (p 54) (list (pattern-seq (p 24) (p 35) (list (pattern-token (p 24) (p 26) "ID" #f)
|
|
||||||
(pattern-lit (p 27) (p 30) "=" #f)
|
|
||||||
(pattern-id (p 31) (p 35) "expr" #f)))
|
|
||||||
(pattern-seq (p 42) (p 54) (list (pattern-lit (p 42) (p 49) "print" #f)
|
|
||||||
(pattern-id (p 50) (p 54) "expr" #f))))))))
|
|
||||||
|
|
@ -1,72 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require brag/examples/simple-arithmetic-grammar
|
|
||||||
brag/support
|
|
||||||
racket/set
|
|
||||||
br-parser-tools/lex
|
|
||||||
racket/list
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(define (tokenize ip)
|
|
||||||
(port-count-lines! ip)
|
|
||||||
(define lex/1
|
|
||||||
(lexer-src-pos
|
|
||||||
[(repetition 1 +inf.0 numeric)
|
|
||||||
(token 'INT (string->number lexeme))]
|
|
||||||
[whitespace
|
|
||||||
(token 'WHITESPACE #:skip? #t)]
|
|
||||||
["+"
|
|
||||||
(token '+ "+")]
|
|
||||||
["*"
|
|
||||||
(token '* "*")]
|
|
||||||
[(eof)
|
|
||||||
(token eof)]))
|
|
||||||
(lambda ()
|
|
||||||
(lex/1 ip)))
|
|
||||||
|
|
||||||
|
|
||||||
;; expr : term ('+' term)*
|
|
||||||
;; term : factor (('*') factor)*
|
|
||||||
;; factor : INT
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "42"))))
|
|
||||||
'(expr (term (factor 42))))
|
|
||||||
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3+4"))))
|
|
||||||
'(expr (term (factor 3))
|
|
||||||
"+"
|
|
||||||
(term (factor 4))))
|
|
||||||
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3+4+5"))))
|
|
||||||
'(expr (term (factor 3))
|
|
||||||
"+"
|
|
||||||
(term (factor 4))
|
|
||||||
"+"
|
|
||||||
(term (factor 5))))
|
|
||||||
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3*4*5"))))
|
|
||||||
'(expr (term (factor 3) "*" (factor 4) "*" (factor 5))))
|
|
||||||
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "3*4 + 5*6"))))
|
|
||||||
'(expr (term (factor 3) "*" (factor 4))
|
|
||||||
"+"
|
|
||||||
(term (factor 5) "*" (factor 6))))
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "4*5+6"))))
|
|
||||||
'(expr (term (factor 4) "*" (factor 5))
|
|
||||||
"+"
|
|
||||||
(term (factor 6))))
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse #f (tokenize (open-input-string "4+5 *6"))))
|
|
||||||
'(expr (term (factor 4))
|
|
||||||
"+"
|
|
||||||
(term (factor 5) "*" (factor 6))))
|
|
||||||
|
|
||||||
|
|
||||||
(check-exn exn:fail:parsing?
|
|
||||||
(lambda () (parse #f (tokenize (open-input-string "7+")))))
|
|
||||||
(check-exn exn:fail:parsing?
|
|
||||||
(lambda () (parse #f (tokenize (open-input-string "7+6+")))))
|
|
||||||
|
|
||||||
|
|
||||||
(check-equal? all-token-types
|
|
||||||
(set '+ '* 'INT))
|
|
@ -1,72 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(require brag/examples/simple-line-drawing
|
|
||||||
brag/support
|
|
||||||
racket/list
|
|
||||||
br-parser-tools/lex
|
|
||||||
(prefix-in : br-parser-tools/lex-sre)
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(define (make-tokenizer ip)
|
|
||||||
(port-count-lines! ip)
|
|
||||||
(define lex (lexer-src-pos
|
|
||||||
[(:+ numeric)
|
|
||||||
(token 'INTEGER (string->number lexeme))]
|
|
||||||
[upper-case
|
|
||||||
(token 'STRING lexeme)]
|
|
||||||
["b"
|
|
||||||
(token 'STRING " ")]
|
|
||||||
[";"
|
|
||||||
(token ";" lexeme)]
|
|
||||||
[whitespace
|
|
||||||
(token 'WHITESPACE lexeme #:skip? #t)]
|
|
||||||
[(eof)
|
|
||||||
(void)]))
|
|
||||||
(lambda ()
|
|
||||||
(lex ip)))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(define the-parsed-object-stx
|
|
||||||
(parse (make-tokenizer (open-input-string #<<EOF
|
|
||||||
3 9 X;
|
|
||||||
6 3 b 3 X 3 b;
|
|
||||||
3 9 X;
|
|
||||||
EOF
|
|
||||||
))))
|
|
||||||
|
|
||||||
(check-true (syntax-original? the-parsed-object-stx))
|
|
||||||
;; Does the rule name "drawing" also have the proper "original?" property set?
|
|
||||||
(check-true (syntax-original? (first (syntax->list the-parsed-object-stx))))
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum the-parsed-object-stx)
|
|
||||||
'(drawing (rows (repeat 3) (chunk 9 "X") ";")
|
|
||||||
(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";")
|
|
||||||
(rows (repeat 3) (chunk 9 "X") ";")))
|
|
||||||
|
|
||||||
(define the-parsed-object (syntax->list the-parsed-object-stx))
|
|
||||||
|
|
||||||
(check-equal? (syntax-line the-parsed-object-stx) 1)
|
|
||||||
(check-equal? (syntax-column the-parsed-object-stx) 0)
|
|
||||||
(check-equal? (syntax-position the-parsed-object-stx) 1)
|
|
||||||
(check-equal? (syntax-span the-parsed-object-stx) 28)
|
|
||||||
|
|
||||||
(check-equal? (length the-parsed-object) 4)
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (second the-parsed-object))
|
|
||||||
'(rows (repeat 3) (chunk 9 "X") ";"))
|
|
||||||
(check-equal? (syntax-line (list-ref the-parsed-object 1)) 1)
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (third the-parsed-object))
|
|
||||||
'(rows (repeat 6) (chunk 3 " ") (chunk 3 "X") (chunk 3 " ") ";"))
|
|
||||||
(check-equal? (syntax-line (list-ref the-parsed-object 2)) 2)
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (fourth the-parsed-object))
|
|
||||||
'(rows (repeat 3) (chunk 9 "X") ";"))
|
|
||||||
(check-equal? (syntax-line (list-ref the-parsed-object 3)) 3)
|
|
||||||
|
|
||||||
;; FIXME: add tests to make sure location is as we expect.
|
|
||||||
;;
|
|
||||||
;; FIXME: handle the EOF issue better. Something in cfg-parser
|
|
||||||
;; appears to deviate from br-parser-tools/yacc with regards to the stop
|
|
||||||
;; token.
|
|
@ -1,7 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
|
|
||||||
(require "weird-grammar.rkt"
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(check-equal? (syntax->datum (parse '("foo")))
|
|
||||||
'(foo "foo"))
|
|
@ -1,12 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require brag/examples/whitespace
|
|
||||||
brag/support
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(check-equal?
|
|
||||||
(parse-to-datum "\ty\n x\tz")
|
|
||||||
'(start (tab "\t") (letter "y") (newline "\n") (space " ") (letter "x") (tab "\t") (letter "z")))
|
|
||||||
|
|
||||||
(check-equal?
|
|
||||||
(parse-to-datum "\t\n \t")
|
|
||||||
'(start (tab "\t") (newline "\n") (space " ") (tab "\t")))
|
|
@ -1,18 +0,0 @@
|
|||||||
#lang racket/base
|
|
||||||
(require brag/examples/wordy
|
|
||||||
brag/support
|
|
||||||
rackunit)
|
|
||||||
|
|
||||||
(check-equal?
|
|
||||||
(syntax->datum
|
|
||||||
(parse (list "hello" "world")))
|
|
||||||
'(sentence (verb (greeting "hello")) (optional-adjective) (object "world")))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
(check-equal?
|
|
||||||
(syntax->datum
|
|
||||||
(parse (list "hola" "frumpy" (token 'WORLD "세계"))))
|
|
||||||
|
|
||||||
'(sentence (verb (greeting "hola")) (optional-adjective "frumpy") (object "세계")))
|
|
||||||
|
|
@ -1,6 +0,0 @@
|
|||||||
#lang brag
|
|
||||||
|
|
||||||
;; This used to fail when we had the yacc-based backend, but
|
|
||||||
;; cfg-parser seems to be ok with it.
|
|
||||||
|
|
||||||
foo: "foo"
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue