next
parent
dfbca937b4
commit
7832b5a89d
@ -0,0 +1,11 @@
|
|||||||
|
parser-tools-doc
|
||||||
|
Copyright (c) 2010-2014 PLT Design Inc.
|
||||||
|
|
||||||
|
This package is distributed under the GNU Lesser General Public
|
||||||
|
License (LGPL). This means that you can link this package into proprietary
|
||||||
|
applications, provided you follow the rules stated in the LGPL. You
|
||||||
|
can also modify this package; if you distribute a modified version,
|
||||||
|
you must distribute it under the terms of the LGPL, which in
|
||||||
|
particular means that you must release the source code for the
|
||||||
|
modified software. See http://www.gnu.org/copyleft/lesser.html
|
||||||
|
for more information.
|
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("e40ef6f4ad8e94b16dd696e0e56aff8797e08366" . "20628650cd070c4f9b3a47399bfc46ffabd56006") (collects #"br-parser-tools" #"cfg-parser.rkt") (collects #"br-parser-tools" #"lex-plt-v200.rkt") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"scheme" #"base.rkt") (collects #"scheme" #"contract.rkt") (collects #"scribble" #"bnf.rkt") (collects #"scribble" #"doc" #"lang" #"reader.rkt") (collects #"scribble" #"doclang.rkt") (collects #"scribble" #"manual.rkt") (collects #"scribble" #"struct.rkt") (collects #"scribble" #"xref.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("4594481ac3bf7a1dfb75324c86a180c4a121ab41" . "64293529315a4e5ca67bc9da08b943f520e0d704") (collects #"info" #"main.rkt") (collects #"setup" #"infotab.rkt"))
|
Binary file not shown.
@ -0,0 +1,3 @@
|
|||||||
|
#lang info
|
||||||
|
|
||||||
|
(define scribblings '(("br-parser-tools.scrbl" (multi-page) (parsing-library))))
|
@ -0,0 +1,14 @@
|
|||||||
|
#lang info
|
||||||
|
|
||||||
|
(define collection 'multi)
|
||||||
|
(define deps '("base"))
|
||||||
|
(define build-deps '("scheme-lib"
|
||||||
|
"racket-doc"
|
||||||
|
"syntax-color-doc"
|
||||||
|
"br-parser-tools-lib"
|
||||||
|
"scribble-lib"))
|
||||||
|
(define update-implies '("br-parser-tools-lib"))
|
||||||
|
|
||||||
|
(define pkg-desc "documentation part of \"br-parser-tools\"")
|
||||||
|
|
||||||
|
(define pkg-authors '(mflatt))
|
@ -0,0 +1,11 @@
|
|||||||
|
parser-tools-lib
|
||||||
|
Copyright (c) 2010-2014 PLT Design Inc.
|
||||||
|
|
||||||
|
This package is distributed under the GNU Lesser General Public
|
||||||
|
License (LGPL). This means that you can link this package into proprietary
|
||||||
|
applications, provided you follow the rules stated in the LGPL. You
|
||||||
|
can also modify this package; if you distribute a modified version,
|
||||||
|
you must distribute it under the terms of the LGPL, which in
|
||||||
|
particular means that you must release the source code for the
|
||||||
|
modified software. See http://www.gnu.org/copyleft/lesser.html
|
||||||
|
for more information.
|
@ -0,0 +1,982 @@
|
|||||||
|
#lang racket/base
|
||||||
|
;; This module implements a parser form like the br-parser-tools's
|
||||||
|
;; `parser', except that it works on an arbitrary CFG (returning
|
||||||
|
;; the first sucecssful parse).
|
||||||
|
|
||||||
|
;; I'm pretty sure that this is an implementation of Earley's
|
||||||
|
;; algorithm.
|
||||||
|
|
||||||
|
;; To a first approximation, it's a backtracking parser. Alternative
|
||||||
|
;; for a non-terminal are computed in parallel, and multiple attempts
|
||||||
|
;; to compute the same result block until the first one completes. If
|
||||||
|
;; you get into deadlock, such as when trying to match
|
||||||
|
;; <foo> := <foo>
|
||||||
|
;; then it means that there's no successful parse, so everything
|
||||||
|
;; that's blocked fails.
|
||||||
|
|
||||||
|
;; A cache holds the series of results for a particular non-terminal
|
||||||
|
;; at a particular starting location. (A series is used, instead of a
|
||||||
|
;; sinlge result, for backtracking.) Otherwise, the parser uses
|
||||||
|
;; backtracking search. Backtracking is implemented through explicit
|
||||||
|
;; success and failure continuations. Multiple results for a
|
||||||
|
;; particular nonterminal and location are kept only when they have
|
||||||
|
;; different lengths. (Otherwise, in the spirit of finding one
|
||||||
|
;; successful parse, only the first result is kept.)
|
||||||
|
|
||||||
|
;; The br-parser-tools's `parse' is used to transform tokens in the
|
||||||
|
;; grammar to tokens specific to this parser. In other words, this
|
||||||
|
;; parser uses `parser' so that it doesn't have to know anything about
|
||||||
|
;; tokens.
|
||||||
|
;;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
(require br-parser-tools/yacc
|
||||||
|
br-parser-tools/lex)
|
||||||
|
|
||||||
|
(require (for-syntax racket/base
|
||||||
|
syntax/boundmap
|
||||||
|
br-parser-tools/private-lex/token-syntax))
|
||||||
|
|
||||||
|
(provide cfg-parser)
|
||||||
|
|
||||||
|
;; A raw token, wrapped so that we can recognize it:
|
||||||
|
(define-struct tok (name orig-name val start end))
|
||||||
|
|
||||||
|
;; Represents the thread scheduler:
|
||||||
|
(define-struct tasks (active active-back waits multi-waits cache progress?))
|
||||||
|
|
||||||
|
(define-for-syntax make-token-identifier-mapping make-hasheq)
|
||||||
|
(define-for-syntax token-identifier-mapping-get
|
||||||
|
(case-lambda
|
||||||
|
[(t tok)
|
||||||
|
(hash-ref t (syntax-e tok))]
|
||||||
|
[(t tok fail)
|
||||||
|
(hash-ref t (syntax-e tok) fail)]))
|
||||||
|
(define-for-syntax token-identifier-mapping-put!
|
||||||
|
(lambda (t tok v)
|
||||||
|
(hash-set! t (syntax-e tok) v)))
|
||||||
|
(define-for-syntax token-identifier-mapping-map
|
||||||
|
(lambda (t f)
|
||||||
|
(hash-map t f)))
|
||||||
|
|
||||||
|
;; Used to calculate information on the grammar, such as whether
|
||||||
|
;; a particular non-terminal is "simple" instead of recursively defined.
|
||||||
|
(define-for-syntax (nt-fixpoint nts proc nt-ids patss)
|
||||||
|
(define (ormap-all val f as bs)
|
||||||
|
(cond
|
||||||
|
[(null? as) val]
|
||||||
|
[else (ormap-all (or (f (car as) (car bs)) val)
|
||||||
|
f
|
||||||
|
(cdr as) (cdr bs))]))
|
||||||
|
(let loop ()
|
||||||
|
(when (ormap-all #f
|
||||||
|
(lambda (nt pats)
|
||||||
|
(let ([old (bound-identifier-mapping-get nts nt)])
|
||||||
|
(let ([new (proc nt pats old)])
|
||||||
|
(if (equal? old new)
|
||||||
|
#f
|
||||||
|
(begin
|
||||||
|
(bound-identifier-mapping-put! nts nt new)
|
||||||
|
#t)))))
|
||||||
|
nt-ids patss)
|
||||||
|
(loop))))
|
||||||
|
|
||||||
|
;; Tries parse-a followed by parse-b. If parse-a is not simple,
|
||||||
|
;; then after parse-a succeeds once, we parallelize parse-b
|
||||||
|
;; and trying a second result for parse-a.
|
||||||
|
(define (parse-and simple-a? parse-a parse-b
|
||||||
|
stream last-consumed-token depth end success-k fail-k
|
||||||
|
max-depth tasks)
|
||||||
|
(letrec ([mk-got-k
|
||||||
|
(lambda (success-k fail-k)
|
||||||
|
(lambda (val stream last-consumed-token depth max-depth tasks next1-k)
|
||||||
|
(if simple-a?
|
||||||
|
(parse-b val stream last-consumed-token depth end
|
||||||
|
(mk-got2-k success-k fail-k next1-k)
|
||||||
|
(mk-fail2-k success-k fail-k next1-k)
|
||||||
|
max-depth tasks)
|
||||||
|
(parallel-or
|
||||||
|
(lambda (success-k fail-k max-depth tasks)
|
||||||
|
(parse-b val stream last-consumed-token depth end
|
||||||
|
success-k fail-k
|
||||||
|
max-depth tasks))
|
||||||
|
(lambda (success-k fail-k max-depth tasks)
|
||||||
|
(next1-k (mk-got-k success-k fail-k)
|
||||||
|
fail-k max-depth tasks))
|
||||||
|
success-k fail-k max-depth tasks))))]
|
||||||
|
[mk-got2-k
|
||||||
|
(lambda (success-k fail-k next1-k)
|
||||||
|
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
||||||
|
(success-k val stream last-consumed-token depth max-depth tasks
|
||||||
|
(lambda (success-k fail-k max-depth tasks)
|
||||||
|
(next-k (mk-got2-k success-k fail-k next1-k)
|
||||||
|
(mk-fail2-k success-k fail-k next1-k)
|
||||||
|
max-depth tasks)))))]
|
||||||
|
[mk-fail2-k
|
||||||
|
(lambda (success-k fail-k next1-k)
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
(next1-k (mk-got-k success-k fail-k)
|
||||||
|
fail-k
|
||||||
|
max-depth
|
||||||
|
tasks)))])
|
||||||
|
(parse-a stream last-consumed-token depth end
|
||||||
|
(mk-got-k success-k fail-k)
|
||||||
|
fail-k
|
||||||
|
max-depth tasks)))
|
||||||
|
|
||||||
|
;; Parallel or for non-terminal alternatives
|
||||||
|
(define (parse-parallel-or parse-a parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||||
|
(parallel-or (lambda (success-k fail-k max-depth tasks)
|
||||||
|
(parse-a stream last-consumed-token depth end success-k fail-k max-depth tasks))
|
||||||
|
(lambda (success-k fail-k max-depth tasks)
|
||||||
|
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks))
|
||||||
|
success-k fail-k max-depth tasks))
|
||||||
|
|
||||||
|
;; Generic parallel-or
|
||||||
|
(define (parallel-or parse-a parse-b success-k fail-k max-depth tasks)
|
||||||
|
(define answer-key (gensym))
|
||||||
|
(letrec ([gota-k
|
||||||
|
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
||||||
|
(report-answer answer-key
|
||||||
|
max-depth
|
||||||
|
tasks
|
||||||
|
(list val stream last-consumed-token depth next-k)))]
|
||||||
|
[faila-k
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
(report-answer answer-key
|
||||||
|
max-depth
|
||||||
|
tasks
|
||||||
|
null))])
|
||||||
|
(let* ([tasks (queue-task
|
||||||
|
tasks
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
(parse-a gota-k
|
||||||
|
faila-k
|
||||||
|
max-depth tasks)))]
|
||||||
|
[tasks (queue-task
|
||||||
|
tasks
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
(parse-b gota-k
|
||||||
|
faila-k
|
||||||
|
max-depth tasks)))]
|
||||||
|
[queue-next (lambda (next-k tasks)
|
||||||
|
(queue-task tasks
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
(next-k gota-k
|
||||||
|
faila-k
|
||||||
|
max-depth tasks))))])
|
||||||
|
(letrec ([mk-got-one
|
||||||
|
(lambda (immediate-next? get-nth success-k)
|
||||||
|
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
||||||
|
(let ([tasks (if immediate-next?
|
||||||
|
(queue-next next-k tasks)
|
||||||
|
tasks)])
|
||||||
|
(success-k val stream last-consumed-token depth max-depth
|
||||||
|
tasks
|
||||||
|
(lambda (success-k fail-k max-depth tasks)
|
||||||
|
(let ([tasks (if immediate-next?
|
||||||
|
tasks
|
||||||
|
(queue-next next-k tasks))])
|
||||||
|
(get-nth max-depth tasks success-k fail-k)))))))]
|
||||||
|
[get-first
|
||||||
|
(lambda (max-depth tasks success-k fail-k)
|
||||||
|
(wait-for-answer #f max-depth tasks answer-key
|
||||||
|
(mk-got-one #t get-first success-k)
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
(get-second max-depth tasks success-k fail-k))
|
||||||
|
#f))]
|
||||||
|
[get-second
|
||||||
|
(lambda (max-depth tasks success-k fail-k)
|
||||||
|
(wait-for-answer #f max-depth tasks answer-key
|
||||||
|
(mk-got-one #f get-second success-k)
|
||||||
|
fail-k #f))])
|
||||||
|
(get-first max-depth tasks success-k fail-k)))))
|
||||||
|
|
||||||
|
;; Non-terminal alternatives where the first is "simple" can be done
|
||||||
|
;; sequentially, which is simpler
|
||||||
|
(define (parse-or parse-a parse-b
|
||||||
|
stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||||
|
(letrec ([mk-got-k
|
||||||
|
(lambda (success-k fail-k)
|
||||||
|
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
||||||
|
(success-k val stream last-consumed-token depth
|
||||||
|
max-depth tasks
|
||||||
|
(lambda (success-k fail-k max-depth tasks)
|
||||||
|
(next-k (mk-got-k success-k fail-k)
|
||||||
|
(mk-fail-k success-k fail-k)
|
||||||
|
max-depth tasks)))))]
|
||||||
|
[mk-fail-k
|
||||||
|
(lambda (success-k fail-k)
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
(parse-b stream last-consumed-token depth end success-k fail-k max-depth tasks)))])
|
||||||
|
(parse-a stream last-consumed-token depth end
|
||||||
|
(mk-got-k success-k fail-k)
|
||||||
|
(mk-fail-k success-k fail-k)
|
||||||
|
max-depth tasks)))
|
||||||
|
|
||||||
|
;; Starts a thread
|
||||||
|
(define queue-task
|
||||||
|
(lambda (tasks t [progress? #t])
|
||||||
|
(make-tasks (tasks-active tasks)
|
||||||
|
(cons t (tasks-active-back tasks))
|
||||||
|
(tasks-waits tasks)
|
||||||
|
(tasks-multi-waits tasks)
|
||||||
|
(tasks-cache tasks)
|
||||||
|
(or progress? (tasks-progress? tasks)))))
|
||||||
|
|
||||||
|
;; Reports an answer to a waiting thread:
|
||||||
|
(define (report-answer answer-key max-depth tasks val)
|
||||||
|
(let ([v (hash-ref (tasks-waits tasks) answer-key (lambda () #f))])
|
||||||
|
(if v
|
||||||
|
(let ([tasks (make-tasks (cons (v val)
|
||||||
|
(tasks-active tasks))
|
||||||
|
(tasks-active-back tasks)
|
||||||
|
(tasks-waits tasks)
|
||||||
|
(tasks-multi-waits tasks)
|
||||||
|
(tasks-cache tasks)
|
||||||
|
#t)])
|
||||||
|
(hash-remove! (tasks-waits tasks) answer-key)
|
||||||
|
(swap-task max-depth tasks))
|
||||||
|
;; We have an answer ready too fast; wait
|
||||||
|
(swap-task max-depth
|
||||||
|
(queue-task tasks
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
(report-answer answer-key max-depth tasks val))
|
||||||
|
#f)))))
|
||||||
|
|
||||||
|
;; Reports an answer to multiple waiting threads:
|
||||||
|
(define (report-answer-all answer-key max-depth tasks val k)
|
||||||
|
(let ([v (hash-ref (tasks-multi-waits tasks) answer-key (lambda () null))])
|
||||||
|
(hash-remove! (tasks-multi-waits tasks) answer-key)
|
||||||
|
(let ([tasks (make-tasks (append (map (lambda (a) (a val)) v)
|
||||||
|
(tasks-active tasks))
|
||||||
|
(tasks-active-back tasks)
|
||||||
|
(tasks-waits tasks)
|
||||||
|
(tasks-multi-waits tasks)
|
||||||
|
(tasks-cache tasks)
|
||||||
|
#t)])
|
||||||
|
(k max-depth tasks))))
|
||||||
|
|
||||||
|
;; Waits for an answer; if `multi?' is #f, this is sole waiter, otherwise
|
||||||
|
;; there might be many. Use wither #t or #f (and `report-answer' or
|
||||||
|
;; `report-answer-all', resptively) consistently for a particular answer key.
|
||||||
|
(define (wait-for-answer multi? max-depth tasks answer-key success-k fail-k deadlock-k)
|
||||||
|
(let ([wait (lambda (val)
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
(if val
|
||||||
|
(if (null? val)
|
||||||
|
(fail-k max-depth tasks)
|
||||||
|
(let-values ([(val stream last-consumed-token depth next-k) (apply values val)])
|
||||||
|
(success-k val stream last-consumed-token depth max-depth tasks next-k)))
|
||||||
|
(deadlock-k max-depth tasks))))])
|
||||||
|
(if multi?
|
||||||
|
(hash-set! (tasks-multi-waits tasks) answer-key
|
||||||
|
(cons wait (hash-ref (tasks-multi-waits tasks) answer-key
|
||||||
|
(lambda () null))))
|
||||||
|
(hash-set! (tasks-waits tasks) answer-key wait))
|
||||||
|
(let ([tasks (make-tasks (tasks-active tasks)
|
||||||
|
(tasks-active-back tasks)
|
||||||
|
(tasks-waits tasks)
|
||||||
|
(tasks-multi-waits tasks)
|
||||||
|
(tasks-cache tasks)
|
||||||
|
#t)])
|
||||||
|
(swap-task max-depth tasks))))
|
||||||
|
|
||||||
|
;; Swap thread
|
||||||
|
(define (swap-task max-depth tasks)
|
||||||
|
;; Swap in first active:
|
||||||
|
(if (null? (tasks-active tasks))
|
||||||
|
(if (tasks-progress? tasks)
|
||||||
|
(swap-task max-depth
|
||||||
|
(make-tasks (reverse (tasks-active-back tasks))
|
||||||
|
null
|
||||||
|
(tasks-waits tasks)
|
||||||
|
(tasks-multi-waits tasks)
|
||||||
|
(tasks-cache tasks)
|
||||||
|
#f))
|
||||||
|
;; No progress, so issue failure for all multi-waits
|
||||||
|
(if (zero? (hash-count (tasks-multi-waits tasks)))
|
||||||
|
(error 'swap-task "Deadlock")
|
||||||
|
(swap-task max-depth
|
||||||
|
(make-tasks (apply
|
||||||
|
append
|
||||||
|
(hash-map (tasks-multi-waits tasks)
|
||||||
|
(lambda (k l)
|
||||||
|
(map (lambda (v) (v #f)) l))))
|
||||||
|
(tasks-active-back tasks)
|
||||||
|
(tasks-waits tasks)
|
||||||
|
(make-hasheq)
|
||||||
|
(tasks-cache tasks)
|
||||||
|
#t))))
|
||||||
|
(let ([t (car (tasks-active tasks))]
|
||||||
|
[tasks (make-tasks (cdr (tasks-active tasks))
|
||||||
|
(tasks-active-back tasks)
|
||||||
|
(tasks-waits tasks)
|
||||||
|
(tasks-multi-waits tasks)
|
||||||
|
(tasks-cache tasks)
|
||||||
|
(tasks-progress? tasks))])
|
||||||
|
(t max-depth tasks))))
|
||||||
|
|
||||||
|
;; Finds the symbolic representative of a token class
|
||||||
|
(define-for-syntax (map-token toks tok)
|
||||||
|
(car (token-identifier-mapping-get toks tok)))
|
||||||
|
|
||||||
|
(define no-pos-val (make-position #f #f #f))
|
||||||
|
(define-for-syntax no-pos
|
||||||
|
(let ([npv ((syntax-local-certifier) #'no-pos-val)])
|
||||||
|
(lambda (stx) npv)))
|
||||||
|
(define-for-syntax at-tok-pos
|
||||||
|
(lambda (sel expr)
|
||||||
|
(lambda (stx)
|
||||||
|
#`(let ([v #,expr]) (if v (#,sel v) no-pos-val)))))
|
||||||
|
|
||||||
|
;; Builds a matcher for a particular alternative
|
||||||
|
(define-for-syntax (build-match nts toks pat handle $ctx)
|
||||||
|
(let loop ([pat pat]
|
||||||
|
[pos 1])
|
||||||
|
(if (null? pat)
|
||||||
|
#`(success-k #,handle stream last-consumed-token depth max-depth tasks
|
||||||
|
(lambda (success-k fail-k max-depth tasks)
|
||||||
|
(fail-k max-depth tasks)))
|
||||||
|
(let ([id (datum->syntax (car pat)
|
||||||
|
(string->symbol (format "$~a" pos)))]
|
||||||
|
[id-start-pos (datum->syntax (car pat)
|
||||||
|
(string->symbol (format "$~a-start-pos" pos)))]
|
||||||
|
[id-end-pos (datum->syntax (car pat)
|
||||||
|
(string->symbol (format "$~a-end-pos" pos)))]
|
||||||
|
[n-end-pos (and (null? (cdr pat))
|
||||||
|
(datum->syntax (car pat) '$n-end-pos))])
|
||||||
|
(cond
|
||||||
|
[(bound-identifier-mapping-get nts (car pat) (lambda () #f))
|
||||||
|
;; Match non-termimal
|
||||||
|
#`(parse-and
|
||||||
|
;; First part is simple? (If so, we don't have to parallelize the `and'.)
|
||||||
|
#,(let ([l (bound-identifier-mapping-get nts (car pat) (lambda () #f))])
|
||||||
|
(or (not l)
|
||||||
|
(andmap values (caddr l))))
|
||||||
|
#,(car pat)
|
||||||
|
(let ([original-stream stream])
|
||||||
|
(lambda (#,id stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||||
|
(let-syntax ([#,id-start-pos (at-tok-pos #'(if (eq? original-stream stream)
|
||||||
|
tok-end
|
||||||
|
tok-start)
|
||||||
|
#'(if (eq? original-stream stream)
|
||||||
|
last-consumed-token
|
||||||
|
(and (pair? original-stream)
|
||||||
|
(car original-stream))))]
|
||||||
|
[#,id-end-pos (at-tok-pos #'tok-end #'last-consumed-token)]
|
||||||
|
#,@(if n-end-pos
|
||||||
|
#`([#,n-end-pos (at-tok-pos #'tok-end #'last-consumed-token)])
|
||||||
|
null))
|
||||||
|
#,(loop (cdr pat) (add1 pos)))))
|
||||||
|
stream last-consumed-token depth
|
||||||
|
#,(let ([cnt (apply +
|
||||||
|
(map (lambda (item)
|
||||||
|
(cond
|
||||||
|
[(bound-identifier-mapping-get nts item (lambda () #f))
|
||||||
|
=> (lambda (l) (car l))]
|
||||||
|
[else 1]))
|
||||||
|
(cdr pat)))])
|
||||||
|
#`(- end #,cnt))
|
||||||
|
success-k fail-k max-depth tasks)]
|
||||||
|
[else
|
||||||
|
;; Match token
|
||||||
|
(let ([tok-id (map-token toks (car pat))])
|
||||||
|
#`(if (and (pair? stream)
|
||||||
|
(eq? '#,tok-id (tok-name (car stream))))
|
||||||
|
(let* ([stream-a (car stream)]
|
||||||
|
[#,id (tok-val stream-a)]
|
||||||
|
[last-consumed-token (car stream)]
|
||||||
|
[stream (cdr stream)]
|
||||||
|
[depth (add1 depth)])
|
||||||
|
(let ([max-depth (max max-depth depth)])
|
||||||
|
(let-syntax ([#,id-start-pos (at-tok-pos #'tok-start #'stream-a)]
|
||||||
|
[#,id-end-pos (at-tok-pos #'tok-end #'stream-a)]
|
||||||
|
#,@(if n-end-pos
|
||||||
|
#`([#,n-end-pos (at-tok-pos #'tok-end #'stream-a)])
|
||||||
|
null))
|
||||||
|
#,(loop (cdr pat) (add1 pos)))))
|
||||||
|
(fail-k max-depth tasks)))])))))
|
||||||
|
|
||||||
|
;; Starts parsing to match a non-terminal. There's a minor
|
||||||
|
;; optimization that checks for known starting tokens. Otherwise,
|
||||||
|
;; use the cache, block if someone else is already trying the match,
|
||||||
|
;; and cache the result if it's computed.
|
||||||
|
;; The cache maps nontermial+startingpos+iteration to a result, where
|
||||||
|
;; the iteration is 0 for the first match attempt, 1 for the second,
|
||||||
|
;; etc.
|
||||||
|
(define (parse-nt/share key min-cnt init-tokens stream last-consumed-token depth end max-depth tasks success-k fail-k k)
|
||||||
|
(if (and (positive? min-cnt)
|
||||||
|
(pair? stream)
|
||||||
|
(not (memq (tok-name (car stream)) init-tokens)))
|
||||||
|
;; No such leading token; give up
|
||||||
|
(fail-k max-depth tasks)
|
||||||
|
;; Run pattern
|
||||||
|
(let loop ([n 0]
|
||||||
|
[success-k success-k]
|
||||||
|
[fail-k fail-k]
|
||||||
|
[max-depth max-depth]
|
||||||
|
[tasks tasks]
|
||||||
|
[k k])
|
||||||
|
(let ([answer-key (gensym)]
|
||||||
|
[table-key (vector key depth n)]
|
||||||
|
[old-depth depth]
|
||||||
|
[old-stream stream])
|
||||||
|
#;(printf "Loop ~a\n" table-key)
|
||||||
|
(cond
|
||||||
|
[(hash-ref (tasks-cache tasks) table-key (lambda () #f))
|
||||||
|
=> (lambda (result)
|
||||||
|
#;(printf "Reuse ~a\n" table-key)
|
||||||
|
(result success-k fail-k max-depth tasks))]
|
||||||
|
[else
|
||||||
|
#;(printf "Try ~a ~a\n" table-key (map tok-name stream))
|
||||||
|
(hash-set! (tasks-cache tasks) table-key
|
||||||
|
(lambda (success-k fail-k max-depth tasks)
|
||||||
|
#;(printf "Wait ~a ~a\n" table-key answer-key)
|
||||||
|
(wait-for-answer #t max-depth tasks answer-key success-k fail-k
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
#;(printf "Deadlock ~a ~a\n" table-key answer-key)
|
||||||
|
(fail-k max-depth tasks)))))
|
||||||
|
(let result-loop ([max-depth max-depth][tasks tasks][k k])
|
||||||
|
(letrec ([orig-stream stream]
|
||||||
|
[new-got-k
|
||||||
|
(lambda (val stream last-consumed-token depth max-depth tasks next-k)
|
||||||
|
;; Check whether we already have a result that consumed the same amount:
|
||||||
|
(let ([result-key (vector #f key old-depth depth)])
|
||||||
|
(cond
|
||||||
|
[(hash-ref (tasks-cache tasks) result-key (lambda () #f))
|
||||||
|
;; Go for the next-result
|
||||||
|
(result-loop max-depth
|
||||||
|
tasks
|
||||||
|
(lambda (end max-depth tasks success-k fail-k)
|
||||||
|
(next-k success-k fail-k max-depth tasks)))]
|
||||||
|
[else
|
||||||
|
#;(printf "Success ~a ~a\n" table-key
|
||||||
|
(map tok-name (let loop ([d old-depth][s old-stream])
|
||||||
|
(if (= d depth)
|
||||||
|
null
|
||||||
|
(cons (car s) (loop (add1 d) (cdr s)))))))
|
||||||
|
(let ([next-k (lambda (success-k fail-k max-depth tasks)
|
||||||
|
(loop (add1 n)
|
||||||
|
success-k
|
||||||
|
fail-k
|
||||||
|
max-depth
|
||||||
|
tasks
|
||||||
|
(lambda (end max-depth tasks success-k fail-k)
|
||||||
|
(next-k success-k fail-k max-depth tasks))))])
|
||||||
|
(hash-set! (tasks-cache tasks) result-key #t)
|
||||||
|
(hash-set! (tasks-cache tasks) table-key
|
||||||
|
(lambda (success-k fail-k max-depth tasks)
|
||||||
|
(success-k val stream last-consumed-token depth max-depth tasks next-k)))
|
||||||
|
(report-answer-all answer-key
|
||||||
|
max-depth
|
||||||
|
tasks
|
||||||
|
(list val stream last-consumed-token depth next-k)
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
(success-k val stream last-consumed-token depth max-depth tasks next-k))))])))]
|
||||||
|
[new-fail-k
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
#;(printf "Failure ~a\n" table-key)
|
||||||
|
(hash-set! (tasks-cache tasks) table-key
|
||||||
|
(lambda (success-k fail-k max-depth tasks)
|
||||||
|
(fail-k max-depth tasks)))
|
||||||
|
(report-answer-all answer-key
|
||||||
|
max-depth
|
||||||
|
tasks
|
||||||
|
null
|
||||||
|
(lambda (max-depth tasks)
|
||||||
|
(fail-k max-depth tasks))))])
|
||||||
|
(k end max-depth tasks new-got-k new-fail-k)))])))))
|
||||||
|
|
||||||
|
(define-syntax (cfg-parser stx)
|
||||||
|
(syntax-case stx ()
|
||||||
|
[(_ clause ...)
|
||||||
|
(let ([clauses (syntax->list #'(clause ...))])
|
||||||
|
(let-values ([(start grammar cfg-error parser-clauses src-pos?)
|
||||||
|
(let ([all-toks (apply
|
||||||
|
append
|
||||||
|
(map (lambda (clause)
|
||||||
|
(syntax-case clause (tokens)
|
||||||
|
[(tokens t ...)
|
||||||
|
(apply
|
||||||
|
append
|
||||||
|
(map (lambda (t)
|
||||||
|
(let ([v (syntax-local-value t (lambda () #f))])
|
||||||
|
(cond
|
||||||
|
[(terminals-def? v)
|
||||||
|
(map (lambda (v)
|
||||||
|
(cons v #f))
|
||||||
|
(syntax->list (terminals-def-t v)))]
|
||||||
|
[(e-terminals-def? v)
|
||||||
|
(map (lambda (v)
|
||||||
|
(cons v #t))
|
||||||
|
(syntax->list (e-terminals-def-t v)))]
|
||||||
|
[else null])))
|
||||||
|
(syntax->list #'(t ...))))]
|
||||||
|
[_else null]))
|
||||||
|
clauses))]
|
||||||
|
[all-end-toks (apply
|
||||||
|
append
|
||||||
|
(map (lambda (clause)
|
||||||
|
(syntax-case clause (end)
|
||||||
|
[(end t ...)
|
||||||
|
(syntax->list #'(t ...))]
|
||||||
|
[_else null]))
|
||||||
|
clauses))])
|
||||||
|
(let loop ([clauses clauses]
|
||||||
|
[cfg-start #f]
|
||||||
|
[cfg-grammar #f]
|
||||||
|
[cfg-error #f]
|
||||||
|
[src-pos? #f]
|
||||||
|
[parser-clauses null])
|
||||||
|
(if (null? clauses)
|
||||||
|
(values cfg-start
|
||||||
|
cfg-grammar
|
||||||
|
cfg-error
|
||||||
|
(reverse parser-clauses)
|
||||||
|
src-pos?)
|
||||||
|
(syntax-case (car clauses) (start error grammar src-pos)
|
||||||
|
[(start tok)
|
||||||
|
(loop (cdr clauses) #'tok cfg-grammar cfg-error src-pos? parser-clauses)]
|
||||||
|
[(error expr)
|
||||||
|
(loop (cdr clauses) cfg-start cfg-grammar #'expr src-pos? parser-clauses)]
|
||||||
|
[(grammar [nt [pat handle0 handle ...] ...] ...)
|
||||||
|
(let ([nts (make-bound-identifier-mapping)]
|
||||||
|
[toks (make-token-identifier-mapping)]
|
||||||
|
[end-toks (make-token-identifier-mapping)]
|
||||||
|
[nt-ids (syntax->list #'(nt ...))]
|
||||||
|
[patss (map (lambda (stx)
|
||||||
|
(map syntax->list (syntax->list stx)))
|
||||||
|
(syntax->list #'((pat ...) ...)))])
|
||||||
|
(for-each (lambda (nt)
|
||||||
|
(bound-identifier-mapping-put! nts nt (list 0)))
|
||||||
|
nt-ids)
|
||||||
|
(for-each (lambda (t)
|
||||||
|
(token-identifier-mapping-put! end-toks t #t))
|
||||||
|
all-end-toks)
|
||||||
|
(for-each (lambda (t)
|
||||||
|
(unless (token-identifier-mapping-get end-toks (car t) (lambda () #f))
|
||||||
|
(let ([id (gensym (syntax-e (car t)))])
|
||||||
|
(token-identifier-mapping-put! toks (car t)
|
||||||
|
(cons id (cdr t))))))
|
||||||
|
all-toks)
|
||||||
|
;; Compute min max size for each non-term:
|
||||||
|
(nt-fixpoint
|
||||||
|
nts
|
||||||
|
(lambda (nt pats old-list)
|
||||||
|
(let ([new-cnt
|
||||||
|
(apply
|
||||||
|
min
|
||||||
|
(map (lambda (pat)
|
||||||
|
(apply
|
||||||
|
+
|
||||||
|
(map (lambda (elem)
|
||||||
|
(car
|
||||||
|
(bound-identifier-mapping-get nts
|
||||||
|
elem
|
||||||
|
(lambda () (list 1)))))
|
||||||
|
pat)))
|
||||||
|
pats))])
|
||||||
|
(if (new-cnt . > . (car old-list))
|
||||||
|
(cons new-cnt (cdr old-list))
|
||||||
|
old-list)))
|
||||||
|
nt-ids patss)
|
||||||
|
;; Compute set of toks that must appear at the beginning
|
||||||
|
;; for a non-terminal
|
||||||
|
(nt-fixpoint
|
||||||
|
nts
|
||||||
|
(lambda (nt pats old-list)
|
||||||
|
(let ([new-list
|
||||||
|
(apply
|
||||||
|
append
|
||||||
|
(map (lambda (pat)
|
||||||
|
(let loop ([pat pat])
|
||||||
|
(if (pair? pat)
|
||||||
|
(let ([l (bound-identifier-mapping-get
|
||||||
|
nts
|
||||||
|
(car pat)
|
||||||
|
(lambda ()
|
||||||
|
(list 1 (map-token toks (car pat)))))])
|
||||||
|
;; If the non-terminal can match 0 things,
|
||||||
|
;; then it might match something from the
|
||||||
|
;; next pattern element. Otherwise, it must
|
||||||
|
;; match the first element:
|
||||||
|
(if (zero? (car l))
|
||||||
|
(append (cdr l) (loop (cdr pat)))
|
||||||
|
(cdr l)))
|
||||||
|
null)))
|
||||||
|
pats))])
|
||||||
|
(let ([new (filter (lambda (id)
|
||||||
|
(andmap (lambda (id2)
|
||||||
|
(not (eq? id id2)))
|
||||||
|
(cdr old-list)))
|
||||||
|
new-list)])
|
||||||
|
(if (pair? new)
|
||||||
|
;; Drop dups in new list:
|
||||||
|
(let ([new (let loop ([new new])
|
||||||
|
(if (null? (cdr new))
|
||||||
|
new
|
||||||
|
(if (ormap (lambda (id)
|
||||||
|
(eq? (car new) id))
|
||||||
|
(cdr new))
|
||||||
|
(loop (cdr new))
|
||||||
|
(cons (car new) (loop (cdr new))))))])
|
||||||
|
(cons (car old-list) (append new (cdr old-list))))
|
||||||
|
old-list))))
|
||||||
|
nt-ids patss)
|
||||||
|
;; Determine left-recursive clauses:
|
||||||
|
(for-each (lambda (nt pats)
|
||||||
|
(let ([l (bound-identifier-mapping-get nts nt)])
|
||||||
|
(bound-identifier-mapping-put! nts nt (list (car l)
|
||||||
|
(cdr l)
|
||||||
|
(map (lambda (x) #f) pats)))))
|
||||||
|
nt-ids patss)
|
||||||
|
(nt-fixpoint
|
||||||
|
nts
|
||||||
|
(lambda (nt pats old-list)
|
||||||
|
(list (car old-list)
|
||||||
|
(cadr old-list)
|
||||||
|
(map (lambda (pat simple?)
|
||||||
|
(or simple?
|
||||||
|
(let ([l (map (lambda (elem)
|
||||||
|
(bound-identifier-mapping-get
|
||||||
|
nts
|
||||||
|
elem
|
||||||
|
(lambda () #f)))
|
||||||
|
pat)])
|
||||||
|
(andmap (lambda (i)
|
||||||
|
(or (not i)
|
||||||
|
(andmap values (caddr i))))
|
||||||
|
l))))
|
||||||
|
pats (caddr old-list))))
|
||||||
|
nt-ids patss)
|
||||||
|
;; Build a definition for each non-term:
|
||||||
|
(loop (cdr clauses)
|
||||||
|
cfg-start
|
||||||
|
(map (lambda (nt pats handles $ctxs)
|
||||||
|
(define info (bound-identifier-mapping-get nts nt))
|
||||||
|
(list nt
|
||||||
|
#`(let ([key (gensym '#,nt)])
|
||||||
|
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||||
|
(parse-nt/share
|
||||||
|
key #,(car info) '#,(cadr info) stream last-consumed-token depth end
|
||||||
|
max-depth tasks
|
||||||
|
success-k fail-k
|
||||||
|
(lambda (end max-depth tasks success-k fail-k)
|
||||||
|
#,(let loop ([pats pats]
|
||||||
|
[handles (syntax->list handles)]
|
||||||
|
[$ctxs (syntax->list $ctxs)]
|
||||||
|
[simple?s (caddr info)])
|
||||||
|
(if (null? pats)
|
||||||
|
#'(fail-k max-depth tasks)
|
||||||
|
#`(#,(if (or (null? (cdr pats))
|
||||||
|
(car simple?s))
|
||||||
|
#'parse-or
|
||||||
|
#'parse-parallel-or)
|
||||||
|
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||||
|
#,(build-match nts
|
||||||
|
toks
|
||||||
|
(car pats)
|
||||||
|
(car handles)
|
||||||
|
(car $ctxs)))
|
||||||
|
(lambda (stream last-consumed-token depth end success-k fail-k max-depth tasks)
|
||||||
|
#,(loop (cdr pats)
|
||||||
|
(cdr handles)
|
||||||
|
(cdr $ctxs)
|
||||||
|
(cdr simple?s)))
|
||||||
|
stream last-consumed-token depth end success-k fail-k max-depth tasks)))))))))
|
||||||
|
nt-ids
|
||||||
|
patss
|
||||||
|
(syntax->list #'(((begin handle0 handle ...) ...) ...))
|
||||||
|
(syntax->list #'((handle0 ...) ...)))
|
||||||
|
cfg-error
|
||||||
|
src-pos?
|
||||||
|
(list*
|
||||||
|
(with-syntax ([((tok tok-id . $e) ...)
|
||||||
|
(token-identifier-mapping-map toks
|
||||||
|
(lambda (k v)
|
||||||
|
(list* k
|
||||||
|
(car v)
|
||||||
|
(if (cdr v)
|
||||||
|
#f
|
||||||
|
'$1))))]
|
||||||
|
[(pos ...)
|
||||||
|
(if src-pos?
|
||||||
|
#'($1-start-pos $1-end-pos)
|
||||||
|
#'(#f #f))])
|
||||||
|
#`(grammar (start [() null]
|
||||||
|
[(atok start) (cons $1 $2)])
|
||||||
|
(atok [(tok) (make-tok 'tok-id 'tok $e pos ...)] ...)))
|
||||||
|
#`(start start)
|
||||||
|
parser-clauses)))]
|
||||||
|
[(grammar . _)
|
||||||
|
(raise-syntax-error
|
||||||
|
#f
|
||||||
|
"bad grammar clause"
|
||||||
|
stx
|
||||||
|
(car clauses))]
|
||||||
|
[(src-pos)
|
||||||
|
(loop (cdr clauses)
|
||||||
|
cfg-start
|
||||||
|
cfg-grammar
|
||||||
|
cfg-error
|
||||||
|
#t
|
||||||
|
(cons (car clauses) parser-clauses))]
|
||||||
|
[_else
|
||||||
|
(loop (cdr clauses)
|
||||||
|
cfg-start
|
||||||
|
cfg-grammar
|
||||||
|
cfg-error
|
||||||
|
src-pos?
|
||||||
|
(cons (car clauses) parser-clauses))]))))])
|
||||||
|
#`(let ([orig-parse (parser
|
||||||
|
[error (lambda (a b c)
|
||||||
|
(error 'cfg-parser "unexpected ~a token: ~a" b c))]
|
||||||
|
. #,parser-clauses)]
|
||||||
|
[error-proc #,cfg-error])
|
||||||
|
(letrec #,grammar
|
||||||
|
(lambda (get-tok)
|
||||||
|
(let ([tok-list (orig-parse get-tok)])
|
||||||
|
(letrec ([success-k
|
||||||
|
(lambda (val stream last-consumed-token depth max-depth tasks next)
|
||||||
|
(if (null? stream)
|
||||||
|
val
|
||||||
|
(next success-k fail-k max-depth tasks)))]
|
||||||
|
[fail-k (lambda (max-depth tasks)
|
||||||
|
(define (call-error-proc tok-ok? tok-name tok-value start-pos end-pos)
|
||||||
|
(cond
|
||||||
|
[(procedure-arity-includes? error-proc 5)
|
||||||
|
(error-proc tok-ok? tok-name tok-value start-pos end-pos)]
|
||||||
|
[else
|
||||||
|
(error-proc tok-ok? tok-name tok-value)]))
|
||||||
|
(cond
|
||||||
|
[(null? tok-list)
|
||||||
|
(if error-proc
|
||||||
|
(call-error-proc #t
|
||||||
|
'no-tokens
|
||||||
|
#f
|
||||||
|
(make-position #f #f #f)
|
||||||
|
(make-position #f #f #f))
|
||||||
|
(error
|
||||||
|
'cfg-parse
|
||||||
|
"no tokens"))]
|
||||||
|
[else
|
||||||
|
(let ([bad-tok (list-ref tok-list
|
||||||
|
(min (sub1 (length tok-list))
|
||||||
|
max-depth))])
|
||||||
|
(if error-proc
|
||||||
|
(call-error-proc #t
|
||||||
|
(tok-orig-name bad-tok)
|
||||||
|
(tok-val bad-tok)
|
||||||
|
(tok-start bad-tok)
|
||||||
|
(tok-end bad-tok))
|
||||||
|
(error
|
||||||
|
'cfg-parse
|
||||||
|
"failed at ~a"
|
||||||
|
(tok-val bad-tok))))]))])
|
||||||
|
(#,start tok-list
|
||||||
|
;; we simulate a token at the very beginning with zero width
|
||||||
|
;; for use with the position-generating code (*-start-pos, *-end-pos).
|
||||||
|
(if (null? tok-list)
|
||||||
|
(tok #f #f #f
|
||||||
|
(position 1
|
||||||
|
#,(if src-pos? #'1 #'#f)
|
||||||
|
#,(if src-pos? #'0 #'#f))
|
||||||
|
(position 1
|
||||||
|
#,(if src-pos? #'1 #'#f)
|
||||||
|
#,(if src-pos? #'0 #'#f)))
|
||||||
|
(tok (tok-name (car tok-list))
|
||||||
|
(tok-orig-name (car tok-list))
|
||||||
|
(tok-val (car tok-list))
|
||||||
|
(tok-start (car tok-list))
|
||||||
|
(tok-start (car tok-list))))
|
||||||
|
0
|
||||||
|
(length tok-list)
|
||||||
|
success-k
|
||||||
|
fail-k
|
||||||
|
0
|
||||||
|
(make-tasks null null
|
||||||
|
(make-hasheq) (make-hasheq)
|
||||||
|
(make-hash) #t)))))))))]))
|
||||||
|
|
||||||
|
|
||||||
|
(module* test racket/base
|
||||||
|
(require (submod "..")
|
||||||
|
br-parser-tools/lex
|
||||||
|
racket/block
|
||||||
|
racket/generator
|
||||||
|
rackunit)
|
||||||
|
|
||||||
|
;; Test: parsing regular expressions.
|
||||||
|
;; Here is a test case on locations:
|
||||||
|
(block
|
||||||
|
(define-tokens regexp-tokens (ANCHOR STAR OR LIT LPAREN RPAREN EOF))
|
||||||
|
(define lex (lexer-src-pos ["|" (token-OR lexeme)]
|
||||||
|
["^" (token-ANCHOR lexeme)]
|
||||||
|
["*" (token-STAR lexeme)]
|
||||||
|
[(repetition 1 +inf.0 alphabetic) (token-LIT lexeme)]
|
||||||
|
["(" (token-LPAREN lexeme)]
|
||||||
|
[")" (token-RPAREN lexeme)]
|
||||||
|
[whitespace (return-without-pos (lex input-port))]
|
||||||
|
[(eof) (token-EOF 'eof)]))
|
||||||
|
(define -parse (cfg-parser
|
||||||
|
(tokens regexp-tokens)
|
||||||
|
(start top)
|
||||||
|
(end EOF)
|
||||||
|
(src-pos)
|
||||||
|
(grammar [top [(maybe-anchor regexp)
|
||||||
|
(cond [$1
|
||||||
|
`(anchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
|
||||||
|
[else
|
||||||
|
`(unanchored ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))])]]
|
||||||
|
[maybe-anchor [(ANCHOR) #t]
|
||||||
|
[() #f]]
|
||||||
|
[regexp [(regexp STAR) `(star ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $2-end-pos))]
|
||||||
|
[(regexp OR regexp) `(or ,$1 ,$3 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
|
||||||
|
[(LPAREN regexp RPAREN) `(group ,$2 ,(pos->sexp $1-start-pos) ,(pos->sexp $3-end-pos))]
|
||||||
|
[(LIT) `(lit ,$1 ,(pos->sexp $1-start-pos) ,(pos->sexp $1-end-pos))]])))
|
||||||
|
(define (pos->sexp pos)
|
||||||
|
(position-offset pos))
|
||||||
|
|
||||||
|
(define (parse s)
|
||||||
|
(define ip (open-input-string s))
|
||||||
|
(port-count-lines! ip)
|
||||||
|
(-parse (lambda () (lex ip))))
|
||||||
|
|
||||||
|
(check-equal? (parse "abc")
|
||||||
|
'(unanchored (lit "abc" 1 4) 1 4))
|
||||||
|
(check-equal? (parse "a | (b*) | c")
|
||||||
|
'(unanchored (or (or (lit "a" 1 2)
|
||||||
|
(group (star (lit "b" 6 7) 6 8) 5 9)
|
||||||
|
1 9)
|
||||||
|
(lit "c" 12 13)
|
||||||
|
1 13)
|
||||||
|
1 13)))
|
||||||
|
|
||||||
|
|
||||||
|
;; Check that cfg-parser can accept error functions of 3 arguments:
|
||||||
|
(block
|
||||||
|
(define-tokens non-terminals (ONE ZERO EOF))
|
||||||
|
(define parse
|
||||||
|
(cfg-parser (tokens non-terminals)
|
||||||
|
(start ones)
|
||||||
|
(end EOF)
|
||||||
|
(error (lambda (tok-ok tok-name tok-val)
|
||||||
|
(error (format "~a ~a ~a" tok-ok tok-name tok-val))))
|
||||||
|
(grammar [ones [() null]
|
||||||
|
[(ONE ones) (cons $1 $2)]])))
|
||||||
|
(define (sequence->tokenizer s)
|
||||||
|
(define-values (more? next) (sequence-generate s))
|
||||||
|
(lambda ()
|
||||||
|
(cond [(more?) (next)]
|
||||||
|
[else (token-EOF 'eof)])))
|
||||||
|
(check-exn #rx"#t ZERO zero"
|
||||||
|
(lambda () (parse (sequence->tokenizer (list (token-ZERO "zero")))))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
;; Check that cfg-parser can accept error functions of 5 arguments:
|
||||||
|
(block
|
||||||
|
(define-tokens non-terminals (ONE ZERO EOF))
|
||||||
|
(define parse
|
||||||
|
(cfg-parser (tokens non-terminals)
|
||||||
|
(start ones)
|
||||||
|
(src-pos)
|
||||||
|
(end EOF)
|
||||||
|
(error (lambda (tok-ok tok-name tok-val start-pos end-pos)
|
||||||
|
(error (format "~a ~a ~a ~a ~a"
|
||||||
|
tok-ok tok-name tok-val
|
||||||
|
(position-offset start-pos)
|
||||||
|
(position-offset end-pos)))))
|
||||||
|
(grammar [ones [() null]
|
||||||
|
[(ONE ones) (cons $1 $2)]])))
|
||||||
|
(define (sequence->tokenizer s)
|
||||||
|
(define-values (more? next) (sequence-generate s))
|
||||||
|
(lambda ()
|
||||||
|
(cond [(more?) (next)]
|
||||||
|
[else (position-token (token-EOF 'eof)
|
||||||
|
(position #f #f #f)
|
||||||
|
(position #f #f #f))])))
|
||||||
|
(check-exn #rx"#t ZERO zero 2 3"
|
||||||
|
(lambda ()
|
||||||
|
(parse
|
||||||
|
(sequence->tokenizer
|
||||||
|
(list (position-token
|
||||||
|
(token-ZERO "zero")
|
||||||
|
(position 2 2 5)
|
||||||
|
(position 3 2 6))))))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
;; Tests used during development
|
||||||
|
(define-tokens non-terminals (PLUS MINUS STAR BAR COLON EOF))
|
||||||
|
|
||||||
|
(define lex
|
||||||
|
(lexer
|
||||||
|
["+" (token-PLUS '+)]
|
||||||
|
["-" (token-MINUS '-)]
|
||||||
|
["*" (token-STAR '*)]
|
||||||
|
["|" (token-BAR '||)]
|
||||||
|
[":" (token-COLON '|:|)]
|
||||||
|
[whitespace (lex input-port)]
|
||||||
|
[(eof) (token-EOF 'eof)]))
|
||||||
|
|
||||||
|
(define parse
|
||||||
|
(cfg-parser
|
||||||
|
(tokens non-terminals)
|
||||||
|
(start <program>)
|
||||||
|
(end EOF)
|
||||||
|
(error (lambda (a b stx)
|
||||||
|
(error 'parse "failed at ~s" stx)))
|
||||||
|
(grammar [<program> [(PLUS) "plus"]
|
||||||
|
[(<minus-program> BAR <minus-program>) (list $1 $2 $3)]
|
||||||
|
[(<program> COLON) (list $1)]]
|
||||||
|
[<minus-program> [(MINUS) "minus"]
|
||||||
|
[(<program> STAR) (cons $1 $2)]]
|
||||||
|
[<simple> [(<alts> <alts> <alts> MINUS) "yes"]]
|
||||||
|
[<alts> [(PLUS) 'plus]
|
||||||
|
[(MINUS) 'minus]]
|
||||||
|
[<random> [() '0]
|
||||||
|
[(<random> PLUS) (add1 $1)]
|
||||||
|
[(<random> PLUS) (add1 $1)]])))
|
||||||
|
|
||||||
|
(let ([p (open-input-string #;"+*|-|-*|+**" #;"-|+*|+**"
|
||||||
|
#;"+*|+**|-" #;"-|-*|-|-*"
|
||||||
|
#;"-|-*|-|-**|-|-*|-|-**"
|
||||||
|
"-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|-|-*|-|-**|-|-*|-|-***
|
||||||
|
|-|-*|-|-**|-|-*|-|-*****|-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-****|
|
||||||
|
-|-*|-|-**|-|-*|-|-***|-|-*|-|-**|-|-*|-|-*****"
|
||||||
|
;; This one fails:
|
||||||
|
#;"+*")])
|
||||||
|
(check-equal? (parse (lambda () (lex p)))
|
||||||
|
'((((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
||||||
|
||
|
||||||
|
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
||||||
|
.
|
||||||
|
*)
|
||||||
|
||
|
||||||
|
(((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
||||||
|
||
|
||||||
|
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
||||||
|
.
|
||||||
|
*))
|
||||||
|
.
|
||||||
|
*)
|
||||||
|
||
|
||||||
|
(((((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
||||||
|
||
|
||||||
|
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
||||||
|
.
|
||||||
|
*)
|
||||||
|
||
|
||||||
|
(((((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *)
|
||||||
|
||
|
||||||
|
(((((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *) || (((("minus" || "minus") . *) || (("minus" || "minus") . *)) . *)) . *))
|
||||||
|
.
|
||||||
|
*))
|
||||||
|
.
|
||||||
|
*)))))
|
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("1da87ebbdbd287c3141d81b344c83a22fdcaead1" . "913322440977cfa44185506acee3ea9ca2d4426d") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"racket" #"base.rkt") (collects #"racket" #"block.rkt") (collects #"racket" #"generator.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (collects #"syntax" #"boundmap.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("31789e05e19e6122c7619af755b6438bc1872e9b" . "b19638aea3046717541136642402ab336892a3aa") (collects #"br-parser-tools" #"lex.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("fa9d915c4fb46de94fa89c9eac68c0a0fe32cd40" . "6eb29578f87766fcd1ee8209b3edc21c1081b8e4") (collects #"br-parser-tools" #"private-lex" #"actions.rkt") (collects #"br-parser-tools" #"private-lex" #"front.rkt") (collects #"br-parser-tools" #"private-lex" #"token.rkt") (collects #"br-parser-tools" #"private-lex" #"unicode-chars.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzlib" #"stxparam.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"boundmap.rkt") (collects #"syntax" #"define.rkt") (collects #"syntax" #"readerr.rkt") (collects #"syntax" #"stx.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("100e497a060ad40147465b34990a741797ddb6c0" . "4a109ffd564a7614c177351282958ab6cc95da13") (collects #"br-parser-tools" #"private-lex" #"token.rkt") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-actions.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-builder.rkt") (collects #"br-parser-tools" #"private-yacc" #"yacc-helper.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"etc.rkt") (collects #"mzlib" #"pretty.rkt") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt") (collects #"syntax" #"readerr.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("31789e05e19e6122c7619af755b6438bc1872e9b" . "0a9e246cdda8f6239b7422f687b6513aa57dfb7f") (collects #"br-parser-tools" #"lex.rkt") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("340fcc1fd30e6afc450a6027068d0e71ff42234e" . "57d5de7788049c0521682559da14c2475e4e08b5") (collects #"br-parser-tools" #"private-lex" #"actions.rkt") (collects #"br-parser-tools" #"private-lex" #"front.rkt") (collects #"br-parser-tools" #"private-lex" #"token.rkt") (collects #"br-parser-tools" #"private-lex" #"unicode-chars.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzlib" #"stxparam.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"boundmap.rkt") (collects #"syntax" #"define.rkt") (collects #"syntax" #"readerr.rkt") (collects #"syntax" #"stx.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("0160a40a20d9e0d2a9dc08e0d3b6407cd43b669f" . "64293529315a4e5ca67bc9da08b943f520e0d704") (collects #"info" #"main.rkt") (collects #"setup" #"infotab.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("1c73d195a19fdaccf7ef3b12cf2e3b5d7fa49f8f" . "38b5833add35ba09a9b03d9a3edef53637cc159c") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("31789e05e19e6122c7619af755b6438bc1872e9b" . "6c5a05919183e8d1de083eff2db1966f6ae5ccb6") (collects #"br-parser-tools" #"lex.rkt") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("340fcc1fd30e6afc450a6027068d0e71ff42234e" . "8cc42f47b45f7984d90feb68df53180d93bdefeb") (collects #"br-parser-tools" #"private-lex" #"actions.rkt") (collects #"br-parser-tools" #"private-lex" #"front.rkt") (collects #"br-parser-tools" #"private-lex" #"token.rkt") (collects #"br-parser-tools" #"private-lex" #"unicode-chars.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzlib" #"stxparam.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"boundmap.rkt") (collects #"syntax" #"define.rkt") (collects #"syntax" #"readerr.rkt") (collects #"syntax" #"stx.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("d5abd58a5a7bfc4bc558dd51bd60ad27bf7d5be9" . "1ff2a8b025cffabf443820bc20e3a0c286570369") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"readerr.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("100e497a060ad40147465b34990a741797ddb6c0" . "35e63de458cf673b37751cb8bdcb77a583578019") (collects #"br-parser-tools" #"private-lex" #"token.rkt") (collects #"br-parser-tools" #"private-yacc" #"grammar.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-actions.rkt") (collects #"br-parser-tools" #"private-yacc" #"parser-builder.rkt") (collects #"br-parser-tools" #"private-yacc" #"yacc-helper.rkt") (collects #"mzlib" #"etc.rkt") (collects #"mzlib" #"pretty.rkt") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt") (collects #"syntax" #"readerr.rkt"))
|
Binary file not shown.
@ -0,0 +1,89 @@
|
|||||||
|
#lang scheme
|
||||||
|
|
||||||
|
;; An interactive calculator inspired by the calculator example in the bison manual.
|
||||||
|
|
||||||
|
|
||||||
|
;; Import the parser and lexer generators.
|
||||||
|
(require br-parser-tools/yacc
|
||||||
|
br-parser-tools/lex
|
||||||
|
(prefix-in : br-parser-tools/lex-sre))
|
||||||
|
|
||||||
|
(define-tokens value-tokens (NUM VAR FNCT))
|
||||||
|
(define-empty-tokens op-tokens (newline = OP CP + - * / ^ EOF NEG))
|
||||||
|
|
||||||
|
;; A hash table to store variable values in for the calculator
|
||||||
|
(define vars (make-hash))
|
||||||
|
|
||||||
|
(define-lex-abbrevs
|
||||||
|
(lower-letter (:/ "a" "z"))
|
||||||
|
|
||||||
|
(upper-letter (:/ #\A #\Z))
|
||||||
|
|
||||||
|
;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too.
|
||||||
|
(digit (:/ "0" "9")))
|
||||||
|
|
||||||
|
(define calcl
|
||||||
|
(lexer
|
||||||
|
[(eof) 'EOF]
|
||||||
|
;; recursively call the lexer on the remaining input after a tab or space. Returning the
|
||||||
|
;; result of that operation. This effectively skips all whitespace.
|
||||||
|
[(:or #\tab #\space) (calcl input-port)]
|
||||||
|
;; (token-newline) returns 'newline
|
||||||
|
[#\newline (token-newline)]
|
||||||
|
;; Since (token-=) returns '=, just return the symbol directly
|
||||||
|
[(:or "=" "+" "-" "*" "/" "^") (string->symbol lexeme)]
|
||||||
|
["(" 'OP]
|
||||||
|
[")" 'CP]
|
||||||
|
["sin" (token-FNCT sin)]
|
||||||
|
[(:+ (:or lower-letter upper-letter)) (token-VAR (string->symbol lexeme))]
|
||||||
|
[(:+ digit) (token-NUM (string->number lexeme))]
|
||||||
|
[(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))]))
|
||||||
|
|
||||||
|
|
||||||
|
(define calcp
|
||||||
|
(parser
|
||||||
|
|
||||||
|
(start start)
|
||||||
|
(end newline EOF)
|
||||||
|
(tokens value-tokens op-tokens)
|
||||||
|
(error (lambda (a b c) (void)))
|
||||||
|
|
||||||
|
(precs (right =)
|
||||||
|
(left - +)
|
||||||
|
(left * /)
|
||||||
|
(left NEG)
|
||||||
|
(right ^))
|
||||||
|
|
||||||
|
(grammar
|
||||||
|
|
||||||
|
(start [() #f]
|
||||||
|
;; If there is an error, ignore everything before the error
|
||||||
|
;; and try to start over right after the error
|
||||||
|
[(error start) $2]
|
||||||
|
[(exp) $1])
|
||||||
|
|
||||||
|
(exp [(NUM) $1]
|
||||||
|
[(VAR) (hash-ref vars $1 (lambda () 0))]
|
||||||
|
[(VAR = exp) (begin (hash-set! vars $1 $3)
|
||||||
|
$3)]
|
||||||
|
[(FNCT OP exp CP) ($1 $3)]
|
||||||
|
[(exp + exp) (+ $1 $3)]
|
||||||
|
[(exp - exp) (- $1 $3)]
|
||||||
|
[(exp * exp) (* $1 $3)]
|
||||||
|
[(exp / exp) (/ $1 $3)]
|
||||||
|
[(- exp) (prec NEG) (- $2)]
|
||||||
|
[(exp ^ exp) (expt $1 $3)]
|
||||||
|
[(OP exp CP) $2]))))
|
||||||
|
|
||||||
|
;; run the calculator on the given input-port
|
||||||
|
(define (calc ip)
|
||||||
|
(port-count-lines! ip)
|
||||||
|
(letrec ((one-line
|
||||||
|
(lambda ()
|
||||||
|
(let ((result (calcp (lambda () (calcl ip)))))
|
||||||
|
(when result
|
||||||
|
(printf "~a\n" result)
|
||||||
|
(one-line))))))
|
||||||
|
(one-line)))
|
||||||
|
|
||||||
|
(calc (open-input-string "x=1\n(x + 2 * 3) - (1+2)*3"))
|
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("e3352df1b72626dc220a94ee0bd16f165519bade" . "3c46fc3eda107e037940fbfb68032106839316da") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"scheme" #"main.rkt") (collects #"scheme" #"runtime-config.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("05c4bd3fd622dd1b33ebd5eec53f3018b9d64055" . "9a6409107f8f3a3566e2c5a71cb5bbf5b38f014e") (collects #"br-parser-tools" #"lex-sre.rkt") (collects #"br-parser-tools" #"lex.rkt") (collects #"br-parser-tools" #"yacc.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"readerr.rkt"))
|
Binary file not shown.
@ -0,0 +1,242 @@
|
|||||||
|
;; This implements the equivalent of racket's read-syntax for R5RS scheme.
|
||||||
|
;; It has not been thoroughly tested. Also it will read an entire file into a
|
||||||
|
;; list of syntax objects, instead of returning one syntax object at a time
|
||||||
|
|
||||||
|
(module read mzscheme
|
||||||
|
|
||||||
|
(require br-parser-tools/lex
|
||||||
|
(prefix : br-parser-tools/lex-sre)
|
||||||
|
br-parser-tools/yacc
|
||||||
|
syntax/readerr)
|
||||||
|
|
||||||
|
(define-tokens data (DATUM))
|
||||||
|
(define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF))
|
||||||
|
|
||||||
|
(define scheme-lexer
|
||||||
|
(lexer-src-pos
|
||||||
|
|
||||||
|
;; Skip comments, without accumulating extra position information
|
||||||
|
[(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))]
|
||||||
|
|
||||||
|
["#t" (token-DATUM #t)]
|
||||||
|
["#f" (token-DATUM #f)]
|
||||||
|
[(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))]
|
||||||
|
["#\\space" (token-DATUM #\space)]
|
||||||
|
["#\\newline" (token-DATUM #\newline)]
|
||||||
|
[(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))]
|
||||||
|
[#\" (token-DATUM (list->string (get-string-token input-port)))]
|
||||||
|
[#\( 'OP]
|
||||||
|
[#\) 'CP]
|
||||||
|
[#\[ 'OP]
|
||||||
|
[#\] 'CP]
|
||||||
|
["#(" 'HASHOP]
|
||||||
|
[num2 (token-DATUM (string->number lexeme 2))]
|
||||||
|
[num8 (token-DATUM (string->number lexeme 8))]
|
||||||
|
[num10 (token-DATUM (string->number lexeme 10))]
|
||||||
|
[num16 (token-DATUM (string->number lexeme 16))]
|
||||||
|
["'" 'QUOTE]
|
||||||
|
["`" 'QUASIQUOTE]
|
||||||
|
["," 'UNQUOTE]
|
||||||
|
[",@" 'UNQUOTE-SPLICING]
|
||||||
|
["." 'DOT]
|
||||||
|
[(eof) 'EOF]))
|
||||||
|
|
||||||
|
(define get-string-token
|
||||||
|
(lexer
|
||||||
|
[(:~ #\" #\\) (cons (car (string->list lexeme))
|
||||||
|
(get-string-token input-port))]
|
||||||
|
[(:: #\\ #\\) (cons #\\ (get-string-token input-port))]
|
||||||
|
[(:: #\\ #\") (cons #\" (get-string-token input-port))]
|
||||||
|
[#\" null]))
|
||||||
|
|
||||||
|
|
||||||
|
(define-lex-abbrevs
|
||||||
|
[letter (:or (:/ "a" "z") (:/ #\A #\Z))]
|
||||||
|
[digit (:/ #\0 #\9)]
|
||||||
|
[scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)]
|
||||||
|
[initial (:or letter (char-set "!$%&*/:<=>?^_~@"))]
|
||||||
|
[subsequent (:or initial digit (char-set "+-.@"))]
|
||||||
|
[comment (:: #\; (:* (:~ #\newline)) #\newline)]
|
||||||
|
|
||||||
|
|
||||||
|
;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of
|
||||||
|
;; using regexp macros to avoid the cut and paste.
|
||||||
|
; [numR (:: prefixR complexR)]
|
||||||
|
; [complexR (:or realR
|
||||||
|
; (:: realR "@" realR)
|
||||||
|
; (:: realR "+" urealR "i")
|
||||||
|
; (:: realR "-" urealR "i")
|
||||||
|
; (:: realR "+i")
|
||||||
|
; (:: realR "-i")
|
||||||
|
; (:: "+" urealR "i")
|
||||||
|
; (:: "-" urealR "i")
|
||||||
|
; (:: "+i")
|
||||||
|
; (:: "-i"))]
|
||||||
|
; [realR (:: sign urealR)]
|
||||||
|
; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)]
|
||||||
|
; [uintegerR (:: (:+ digitR) (:* #\#))]
|
||||||
|
; [prefixR (:or (:: radixR exactness)
|
||||||
|
; (:: exactness radixR))]
|
||||||
|
|
||||||
|
[num2 (:: prefix2 complex2)]
|
||||||
|
[complex2 (:or real2
|
||||||
|
(:: real2 "@" real2)
|
||||||
|
(:: real2 "+" ureal2 "i")
|
||||||
|
(:: real2 "-" ureal2 "i")
|
||||||
|
(:: real2 "+i")
|
||||||
|
(:: real2 "-i")
|
||||||
|
(:: "+" ureal2 "i")
|
||||||
|
(:: "-" ureal2 "i")
|
||||||
|
(:: "+i")
|
||||||
|
(:: "-i"))]
|
||||||
|
[real2 (:: sign ureal2)]
|
||||||
|
[ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))]
|
||||||
|
[uinteger2 (:: (:+ digit2) (:* #\#))]
|
||||||
|
[prefix2 (:or (:: radix2 exactness)
|
||||||
|
(:: exactness radix2))]
|
||||||
|
[radix2 "#b"]
|
||||||
|
[digit2 (:or "0" "1")]
|
||||||
|
[num8 (:: prefix8 complex8)]
|
||||||
|
[complex8 (:or real8
|
||||||
|
(:: real8 "@" real8)
|
||||||
|
(:: real8 "+" ureal8 "i")
|
||||||
|
(:: real8 "-" ureal8 "i")
|
||||||
|
(:: real8 "+i")
|
||||||
|
(:: real8 "-i")
|
||||||
|
(:: "+" ureal8 "i")
|
||||||
|
(:: "-" ureal8 "i")
|
||||||
|
(:: "+i")
|
||||||
|
(:: "-i"))]
|
||||||
|
[real8 (:: sign ureal8)]
|
||||||
|
[ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))]
|
||||||
|
[uinteger8 (:: (:+ digit8) (:* #\#))]
|
||||||
|
[prefix8 (:or (:: radix8 exactness)
|
||||||
|
(:: exactness radix8))]
|
||||||
|
[radix8 "#o"]
|
||||||
|
[digit8 (:/ "0" "7")]
|
||||||
|
|
||||||
|
[num10 (:: prefix10 complex10)]
|
||||||
|
[complex10 (:or real10
|
||||||
|
(:: real10 "@" real10)
|
||||||
|
(:: real10 "+" ureal10 "i")
|
||||||
|
(:: real10 "-" ureal10 "i")
|
||||||
|
(:: real10 "+i")
|
||||||
|
(:: real10 "-i")
|
||||||
|
(:: "+" ureal10 "i")
|
||||||
|
(:: "-" ureal10 "i")
|
||||||
|
(:: "+i")
|
||||||
|
(:: "-i"))]
|
||||||
|
[real10 (:: sign ureal10)]
|
||||||
|
[ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)]
|
||||||
|
[uinteger10 (:: (:+ digit10) (:* #\#))]
|
||||||
|
[prefix10 (:or (:: radix10 exactness)
|
||||||
|
(:: exactness radix10))]
|
||||||
|
[radix10 (:? "#d")]
|
||||||
|
[digit10 digit]
|
||||||
|
[decimal10 (:or (:: uinteger10 suffix)
|
||||||
|
(:: #\. (:+ digit10) (:* #\#) suffix)
|
||||||
|
(:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix)
|
||||||
|
(:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))]
|
||||||
|
|
||||||
|
[num16 (:: prefix16 complex16)]
|
||||||
|
[complex16 (:or real16
|
||||||
|
(:: real16 "@" real16)
|
||||||
|
(:: real16 "+" ureal16 "i")
|
||||||
|
(:: real16 "-" ureal16 "i")
|
||||||
|
(:: real16 "+i")
|
||||||
|
(:: real16 "-i")
|
||||||
|
(:: "+" ureal16 "i")
|
||||||
|
(:: "-" ureal16 "i")
|
||||||
|
"+i"
|
||||||
|
"-i")]
|
||||||
|
[real16 (:: sign ureal16)]
|
||||||
|
[ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))]
|
||||||
|
[uinteger16 (:: (:+ digit16) (:* #\#))]
|
||||||
|
[prefix16 (:or (:: radix16 exactness)
|
||||||
|
(:: exactness radix16))]
|
||||||
|
[radix16 "#x"]
|
||||||
|
[digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))]
|
||||||
|
|
||||||
|
|
||||||
|
[suffix (:or "" (:: exponent-marker sign (:+ digit10)))]
|
||||||
|
[exponent-marker (:or "e" "s" "f" "d" "l")]
|
||||||
|
[sign (:or "" "+" "-")]
|
||||||
|
[exactness (:or "" "#i" "#e")])
|
||||||
|
|
||||||
|
|
||||||
|
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
|
||||||
|
|
||||||
|
;; A macro to build the syntax object
|
||||||
|
(define-syntax (build-so stx)
|
||||||
|
(syntax-case stx ()
|
||||||
|
((_ value start end)
|
||||||
|
(with-syntax ((start-pos (datum->syntax-object
|
||||||
|
(syntax end)
|
||||||
|
(string->symbol
|
||||||
|
(format "$~a-start-pos"
|
||||||
|
(syntax-object->datum (syntax start))))))
|
||||||
|
(end-pos (datum->syntax-object
|
||||||
|
(syntax end)
|
||||||
|
(string->symbol
|
||||||
|
(format "$~a-end-pos"
|
||||||
|
(syntax-object->datum (syntax end))))))
|
||||||
|
(source (datum->syntax-object
|
||||||
|
(syntax end)
|
||||||
|
'source-name)))
|
||||||
|
(syntax
|
||||||
|
(datum->syntax-object
|
||||||
|
#f
|
||||||
|
value
|
||||||
|
(list source
|
||||||
|
(position-line start-pos)
|
||||||
|
(position-col start-pos)
|
||||||
|
(position-offset start-pos)
|
||||||
|
(- (position-offset end-pos)
|
||||||
|
(position-offset start-pos)))
|
||||||
|
stx-for-original-property))))))
|
||||||
|
|
||||||
|
(define (scheme-parser source-name)
|
||||||
|
(parser
|
||||||
|
(src-pos)
|
||||||
|
|
||||||
|
(start s)
|
||||||
|
(end EOF)
|
||||||
|
(error (lambda (a name val start end)
|
||||||
|
(raise-read-error
|
||||||
|
"read-error"
|
||||||
|
source-name
|
||||||
|
(position-line start)
|
||||||
|
(position-col start)
|
||||||
|
(position-offset start)
|
||||||
|
(- (position-offset end)
|
||||||
|
(position-offset start)))))
|
||||||
|
(tokens data delim)
|
||||||
|
|
||||||
|
|
||||||
|
(grammar
|
||||||
|
|
||||||
|
(s [(sexp-list) (reverse $1)])
|
||||||
|
|
||||||
|
(sexp [(DATUM) (build-so $1 1 1)]
|
||||||
|
[(OP sexp-list CP) (build-so (reverse $2) 1 3)]
|
||||||
|
[(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)]
|
||||||
|
[(QUOTE sexp) (build-so (list 'quote $2) 1 2)]
|
||||||
|
[(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)]
|
||||||
|
[(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)]
|
||||||
|
[(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)]
|
||||||
|
[(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)])
|
||||||
|
|
||||||
|
(sexp-list [() null]
|
||||||
|
[(sexp-list sexp) (cons $2 $1)]))))
|
||||||
|
|
||||||
|
(define (rs sn ip)
|
||||||
|
(port-count-lines! ip)
|
||||||
|
((scheme-parser sn) (lambda () (scheme-lexer ip))))
|
||||||
|
|
||||||
|
(define readsyntax
|
||||||
|
(case-lambda ((sn) (rs sn (current-input-port)))
|
||||||
|
((sn ip) (rs sn ip))))
|
||||||
|
|
||||||
|
(provide (rename readsyntax read-syntax))
|
||||||
|
|
||||||
|
)
|
@ -0,0 +1,3 @@
|
|||||||
|
#lang info
|
||||||
|
|
||||||
|
(define compile-omit-paths '("private-lex/error-tests.rkt"))
|
@ -0,0 +1,24 @@
|
|||||||
|
(module lex-plt-v200 mzscheme
|
||||||
|
(require br-parser-tools/lex
|
||||||
|
(prefix : br-parser-tools/lex-sre))
|
||||||
|
|
||||||
|
(provide epsilon
|
||||||
|
~
|
||||||
|
(rename :* *)
|
||||||
|
(rename :+ +)
|
||||||
|
(rename :? ?)
|
||||||
|
(rename :or :)
|
||||||
|
(rename :& &)
|
||||||
|
(rename :: @)
|
||||||
|
(rename :~ ^)
|
||||||
|
(rename :/ -))
|
||||||
|
|
||||||
|
(define-lex-trans epsilon
|
||||||
|
(syntax-rules ()
|
||||||
|
((_) "")))
|
||||||
|
|
||||||
|
(define-lex-trans ~
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ re) (complement re)))))
|
||||||
|
|
||||||
|
|
@ -0,0 +1,119 @@
|
|||||||
|
(module lex-sre mzscheme
|
||||||
|
(require br-parser-tools/lex)
|
||||||
|
|
||||||
|
(provide (rename sre-* *)
|
||||||
|
(rename sre-+ +)
|
||||||
|
?
|
||||||
|
(rename sre-= =)
|
||||||
|
(rename sre->= >=)
|
||||||
|
**
|
||||||
|
(rename sre-or or)
|
||||||
|
:
|
||||||
|
seq
|
||||||
|
&
|
||||||
|
~
|
||||||
|
(rename sre-- -)
|
||||||
|
(rename sre-/ /)
|
||||||
|
/-only-chars)
|
||||||
|
|
||||||
|
(define-lex-trans sre-*
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ re ...)
|
||||||
|
(repetition 0 +inf.0 (union re ...)))))
|
||||||
|
|
||||||
|
(define-lex-trans sre-+
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ re ...)
|
||||||
|
(repetition 1 +inf.0 (union re ...)))))
|
||||||
|
|
||||||
|
(define-lex-trans ?
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ re ...)
|
||||||
|
(repetition 0 1 (union re ...)))))
|
||||||
|
|
||||||
|
(define-lex-trans sre-=
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ n re ...)
|
||||||
|
(repetition n n (union re ...)))))
|
||||||
|
|
||||||
|
(define-lex-trans sre->=
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ n re ...)
|
||||||
|
(repetition n +inf.0 (union re ...)))))
|
||||||
|
|
||||||
|
(define-lex-trans **
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ low #f re ...)
|
||||||
|
(** low +inf.0 re ...))
|
||||||
|
((_ low high re ...)
|
||||||
|
(repetition low high (union re ...)))))
|
||||||
|
|
||||||
|
(define-lex-trans sre-or
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ re ...)
|
||||||
|
(union re ...))))
|
||||||
|
|
||||||
|
(define-lex-trans :
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ re ...)
|
||||||
|
(concatenation re ...))))
|
||||||
|
|
||||||
|
(define-lex-trans seq
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ re ...)
|
||||||
|
(concatenation re ...))))
|
||||||
|
|
||||||
|
(define-lex-trans &
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ re ...)
|
||||||
|
(intersection re ...))))
|
||||||
|
|
||||||
|
(define-lex-trans ~
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ re ...)
|
||||||
|
(char-complement (union re ...)))))
|
||||||
|
|
||||||
|
;; set difference
|
||||||
|
(define-lex-trans (sre-- stx)
|
||||||
|
(syntax-case stx ()
|
||||||
|
((_)
|
||||||
|
(raise-syntax-error #f
|
||||||
|
"must have at least one argument"
|
||||||
|
stx))
|
||||||
|
((_ big-re re ...)
|
||||||
|
(syntax (& big-re (complement (union re ...)))))))
|
||||||
|
|
||||||
|
(define-lex-trans (sre-/ stx)
|
||||||
|
(syntax-case stx ()
|
||||||
|
((_ range ...)
|
||||||
|
(let ((chars
|
||||||
|
(apply append (map (lambda (r)
|
||||||
|
(let ((x (syntax-e r)))
|
||||||
|
(cond
|
||||||
|
((char? x) (list x))
|
||||||
|
((string? x) (string->list x))
|
||||||
|
(else
|
||||||
|
(raise-syntax-error
|
||||||
|
#f
|
||||||
|
"not a char or string"
|
||||||
|
stx
|
||||||
|
r)))))
|
||||||
|
(syntax->list (syntax (range ...)))))))
|
||||||
|
(unless (even? (length chars))
|
||||||
|
(raise-syntax-error
|
||||||
|
#f
|
||||||
|
"not given an even number of characters"
|
||||||
|
stx))
|
||||||
|
#`(/-only-chars #,@chars)))))
|
||||||
|
|
||||||
|
(define-lex-trans /-only-chars
|
||||||
|
(syntax-rules ()
|
||||||
|
((_ c1 c2)
|
||||||
|
(char-range c1 c2))
|
||||||
|
((_ c1 c2 c ...)
|
||||||
|
(union (char-range c1 c2)
|
||||||
|
(/-only-chars c ...)))))
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
|
@ -0,0 +1,412 @@
|
|||||||
|
(module lex mzscheme
|
||||||
|
|
||||||
|
;; Provides the syntax used to create lexers and the functions needed to
|
||||||
|
;; create and use the buffer that the lexer reads from. See docs.
|
||||||
|
|
||||||
|
(require-for-syntax mzlib/list
|
||||||
|
syntax/stx
|
||||||
|
syntax/define
|
||||||
|
syntax/boundmap
|
||||||
|
"private-lex/util.rkt"
|
||||||
|
"private-lex/actions.rkt"
|
||||||
|
"private-lex/front.rkt"
|
||||||
|
"private-lex/unicode-chars.rkt")
|
||||||
|
|
||||||
|
(require mzlib/stxparam
|
||||||
|
syntax/readerr
|
||||||
|
"private-lex/token.rkt")
|
||||||
|
|
||||||
|
(provide lexer lexer-src-pos lexer-srcloc define-lex-abbrev define-lex-abbrevs define-lex-trans
|
||||||
|
|
||||||
|
;; Dealing with tokens and related structures
|
||||||
|
define-tokens define-empty-tokens token-name token-value token?
|
||||||
|
(struct position (offset line col))
|
||||||
|
(struct position-token (token start-pos end-pos))
|
||||||
|
(struct srcloc-token (token srcloc))
|
||||||
|
|
||||||
|
;; File path for highlighting errors while lexing
|
||||||
|
file-path
|
||||||
|
lexer-file-path ;; alternate name
|
||||||
|
|
||||||
|
;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4.
|
||||||
|
any-char any-string nothing alphabetic lower-case upper-case title-case
|
||||||
|
numeric symbolic punctuation graphic whitespace blank iso-control
|
||||||
|
|
||||||
|
;; A regular expression operator
|
||||||
|
char-set)
|
||||||
|
|
||||||
|
;; wrap-action: syntax-object src-pos? -> syntax-object
|
||||||
|
(define-for-syntax (wrap-action action src-loc-style)
|
||||||
|
(with-syntax ((action-stx
|
||||||
|
(cond
|
||||||
|
[(eq? src-loc-style 'lexer-src-pos)
|
||||||
|
#`(let/ec ret
|
||||||
|
(syntax-parameterize
|
||||||
|
([return-without-pos (make-rename-transformer #'ret)])
|
||||||
|
(make-position-token #,action start-pos end-pos)))]
|
||||||
|
[(eq? src-loc-style 'lexer-srcloc)
|
||||||
|
#`(let/ec ret
|
||||||
|
(syntax-parameterize
|
||||||
|
([return-without-srcloc (make-rename-transformer #'ret)])
|
||||||
|
(make-srcloc-token #,action lexeme-srcloc)))]
|
||||||
|
[else action])))
|
||||||
|
(syntax/loc action
|
||||||
|
(lambda (start-pos-p end-pos-p lexeme-p input-port-p)
|
||||||
|
(define lexeme-srcloc-p (make-srcloc (object-name input-port-p)
|
||||||
|
(position-line start-pos-p)
|
||||||
|
(position-col start-pos-p)
|
||||||
|
(position-offset start-pos-p)
|
||||||
|
(and (number? (position-offset end-pos-p))
|
||||||
|
(number? (position-offset start-pos-p))
|
||||||
|
(- (position-offset end-pos-p)
|
||||||
|
(position-offset start-pos-p)))))
|
||||||
|
(syntax-parameterize
|
||||||
|
([start-pos (make-rename-transformer #'start-pos-p)]
|
||||||
|
[end-pos (make-rename-transformer #'end-pos-p)]
|
||||||
|
[lexeme (make-rename-transformer #'lexeme-p)]
|
||||||
|
[input-port (make-rename-transformer #'input-port-p)]
|
||||||
|
[lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)])
|
||||||
|
action-stx)))))
|
||||||
|
|
||||||
|
(define-for-syntax (make-lexer-trans src-loc-style)
|
||||||
|
(lambda (stx)
|
||||||
|
(syntax-case stx ()
|
||||||
|
((_ re-act ...)
|
||||||
|
(begin
|
||||||
|
(for-each
|
||||||
|
(lambda (x)
|
||||||
|
(syntax-case x ()
|
||||||
|
((re act) (void))
|
||||||
|
(_ (raise-syntax-error #f
|
||||||
|
"not a regular expression / action pair"
|
||||||
|
stx
|
||||||
|
x))))
|
||||||
|
(syntax->list (syntax (re-act ...))))
|
||||||
|
(let* ((spec/re-act-lst
|
||||||
|
(syntax->list (syntax (re-act ...))))
|
||||||
|
(eof-act
|
||||||
|
(get-special-action spec/re-act-lst #'eof #''eof))
|
||||||
|
(spec-act
|
||||||
|
(get-special-action spec/re-act-lst #'special #'(void)))
|
||||||
|
(spec-comment-act
|
||||||
|
(get-special-action spec/re-act-lst #'special-comment #'#f))
|
||||||
|
(ids (list #'special #'special-comment #'eof))
|
||||||
|
(re-act-lst
|
||||||
|
(filter
|
||||||
|
(lambda (spec/re-act)
|
||||||
|
(syntax-case spec/re-act ()
|
||||||
|
(((special) act)
|
||||||
|
(not (ormap
|
||||||
|
(lambda (x)
|
||||||
|
(and (identifier? #'special)
|
||||||
|
(module-or-top-identifier=? (syntax special) x)))
|
||||||
|
ids)))
|
||||||
|
(_ #t)))
|
||||||
|
spec/re-act-lst))
|
||||||
|
(name-lst (map (lambda (x) (datum->syntax-object #f (gensym))) re-act-lst))
|
||||||
|
(act-lst (map (lambda (x) (stx-car (stx-cdr x))) re-act-lst))
|
||||||
|
(re-actname-lst (map (lambda (re-act name)
|
||||||
|
(list (stx-car re-act)
|
||||||
|
name))
|
||||||
|
re-act-lst
|
||||||
|
name-lst)))
|
||||||
|
(when (null? spec/re-act-lst)
|
||||||
|
(raise-syntax-error (or src-loc-style 'lexer) "expected at least one action" stx))
|
||||||
|
(let-values (((trans start action-names no-look disappeared-uses)
|
||||||
|
(build-lexer re-actname-lst)))
|
||||||
|
(when (vector-ref action-names start) ;; Start state is final
|
||||||
|
(unless (and
|
||||||
|
;; All the successor states are final
|
||||||
|
(andmap (lambda (x) (vector-ref action-names (vector-ref x 2)))
|
||||||
|
(vector->list (vector-ref trans start)))
|
||||||
|
;; Each character has a successor state
|
||||||
|
(let loop ((check 0)
|
||||||
|
(nexts (vector->list (vector-ref trans start))))
|
||||||
|
(cond
|
||||||
|
((null? nexts) #f)
|
||||||
|
(else
|
||||||
|
(let ((next (car nexts)))
|
||||||
|
(and (= (vector-ref next 0) check)
|
||||||
|
(let ((next-check (vector-ref next 1)))
|
||||||
|
(or (>= next-check max-char-num)
|
||||||
|
(loop (add1 next-check) (cdr nexts))))))))))
|
||||||
|
(eprintf "Warning: lexer at ~a can accept the empty string.\n" stx)))
|
||||||
|
(with-syntax ((start-state-stx start)
|
||||||
|
(trans-table-stx trans)
|
||||||
|
(no-lookahead-stx no-look)
|
||||||
|
((name ...) name-lst)
|
||||||
|
((act ...) (map (lambda (a)
|
||||||
|
(wrap-action a src-loc-style))
|
||||||
|
act-lst))
|
||||||
|
((act-name ...) (vector->list action-names))
|
||||||
|
(spec-act-stx
|
||||||
|
(wrap-action spec-act src-loc-style))
|
||||||
|
(has-comment-act?-stx
|
||||||
|
(if (syntax-e spec-comment-act) #t #f))
|
||||||
|
(spec-comment-act-stx
|
||||||
|
(wrap-action spec-comment-act src-loc-style))
|
||||||
|
(eof-act-stx (wrap-action eof-act src-loc-style)))
|
||||||
|
(syntax-property
|
||||||
|
(syntax/loc stx
|
||||||
|
(let ([name act] ...)
|
||||||
|
(let ([proc
|
||||||
|
(lexer-body start-state-stx
|
||||||
|
trans-table-stx
|
||||||
|
(vector act-name ...)
|
||||||
|
no-lookahead-stx
|
||||||
|
spec-act-stx
|
||||||
|
has-comment-act?-stx
|
||||||
|
spec-comment-act-stx
|
||||||
|
eof-act-stx)])
|
||||||
|
;; reverse eta to get named procedures:
|
||||||
|
(lambda (port) (proc port)))))
|
||||||
|
'disappeared-use
|
||||||
|
disappeared-uses)))))))))
|
||||||
|
|
||||||
|
(define-syntax lexer (make-lexer-trans #f))
|
||||||
|
(define-syntax lexer-src-pos (make-lexer-trans 'lexer-src-pos))
|
||||||
|
(define-syntax lexer-srcloc (make-lexer-trans 'lexer-srcloc))
|
||||||
|
|
||||||
|
(define-syntax (define-lex-abbrev stx)
|
||||||
|
(syntax-case stx ()
|
||||||
|
((_ name re)
|
||||||
|
(identifier? (syntax name))
|
||||||
|
(syntax/loc stx
|
||||||
|
(define-syntax name
|
||||||
|
(make-lex-abbrev (lambda () (quote-syntax re))))))
|
||||||
|
(_
|
||||||
|
(raise-syntax-error
|
||||||
|
#f
|
||||||
|
"form should be (define-lex-abbrev name re)"
|
||||||
|
stx))))
|
||||||
|
|
||||||
|
(define-syntax (define-lex-abbrevs stx)
|
||||||
|
(syntax-case stx ()
|
||||||
|
((_ x ...)
|
||||||
|
(with-syntax (((abbrev ...)
|
||||||
|
(map
|
||||||
|
(lambda (a)
|
||||||
|
(syntax-case a ()
|
||||||
|
((name re)
|
||||||
|
(identifier? (syntax name))
|
||||||
|
(syntax/loc a (define-lex-abbrev name re)))
|
||||||
|
(_ (raise-syntax-error
|
||||||
|
#f
|
||||||
|
"form should be (define-lex-abbrevs (name re) ...)"
|
||||||
|
stx
|
||||||
|
a))))
|
||||||
|
(syntax->list (syntax (x ...))))))
|
||||||
|
(syntax/loc stx (begin abbrev ...))))
|
||||||
|
(_
|
||||||
|
(raise-syntax-error
|
||||||
|
#f
|
||||||
|
"form should be (define-lex-abbrevs (name re) ...)"
|
||||||
|
stx))))
|
||||||
|
|
||||||
|
(define-syntax (define-lex-trans stx)
|
||||||
|
(syntax-case stx ()
|
||||||
|
((_ name-form body-form)
|
||||||
|
(let-values (((name body)
|
||||||
|
(normalize-definition (syntax (define-syntax name-form body-form)) #'lambda)))
|
||||||
|
|
||||||
|
#`(define-syntax #,name
|
||||||
|
(let ((func #,body))
|
||||||
|
(unless (procedure? func)
|
||||||
|
(raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func))
|
||||||
|
(unless (procedure-arity-includes? func 1)
|
||||||
|
(raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func))
|
||||||
|
(make-lex-trans func)))))
|
||||||
|
(_
|
||||||
|
(raise-syntax-error
|
||||||
|
#f
|
||||||
|
"form should be (define-lex-trans name transformer)"
|
||||||
|
stx))))
|
||||||
|
|
||||||
|
|
||||||
|
(define (get-next-state-helper char min max table)
|
||||||
|
(if (>= min max)
|
||||||
|
#f
|
||||||
|
(let* ((try (quotient (+ min max) 2))
|
||||||
|
(el (vector-ref table try))
|
||||||
|
(r1 (vector-ref el 0))
|
||||||
|
(r2 (vector-ref el 1)))
|
||||||
|
(cond
|
||||||
|
((and (>= char r1) (<= char r2)) (vector-ref el 2))
|
||||||
|
((< char r1) (get-next-state-helper char min try table))
|
||||||
|
(else (get-next-state-helper char (add1 try) max table))))))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
(define (get-next-state char table)
|
||||||
|
(if table
|
||||||
|
(get-next-state-helper char 0 (vector-length table) table)
|
||||||
|
#f))
|
||||||
|
|
||||||
|
(define (lexer-body start-state trans-table actions no-lookahead special-action
|
||||||
|
has-special-comment-action? special-comment-action eof-action)
|
||||||
|
(letrec ((lexer
|
||||||
|
(lambda (ip)
|
||||||
|
(let ((first-pos (get-position ip))
|
||||||
|
(first-char (peek-char-or-special ip 0)))
|
||||||
|
;(printf "(peek-char-or-special port 0) = ~e\n" first-char)
|
||||||
|
(cond
|
||||||
|
((eof-object? first-char)
|
||||||
|
(do-match ip first-pos eof-action (read-char-or-special ip)))
|
||||||
|
((special-comment? first-char)
|
||||||
|
(read-char-or-special ip)
|
||||||
|
(cond
|
||||||
|
(has-special-comment-action?
|
||||||
|
(do-match ip first-pos special-comment-action #f))
|
||||||
|
(else (lexer ip))))
|
||||||
|
((not (char? first-char))
|
||||||
|
(do-match ip first-pos special-action (read-char-or-special ip)))
|
||||||
|
(else
|
||||||
|
(let lexer-loop (
|
||||||
|
;; current-state
|
||||||
|
(state start-state)
|
||||||
|
;; the character to transition on
|
||||||
|
(char first-char)
|
||||||
|
;; action for the longest match seen thus far
|
||||||
|
;; including a match at the current state
|
||||||
|
(longest-match-action
|
||||||
|
(vector-ref actions start-state))
|
||||||
|
;; how many bytes precede char
|
||||||
|
(length-bytes 0)
|
||||||
|
;; how many characters have been read
|
||||||
|
;; including the one just read
|
||||||
|
(length-chars 1)
|
||||||
|
;; how many characters are in the longest match
|
||||||
|
(longest-match-length 0))
|
||||||
|
(let ((next-state
|
||||||
|
(cond
|
||||||
|
((not (char? char)) #f)
|
||||||
|
(else (get-next-state (char->integer char)
|
||||||
|
(vector-ref trans-table state))))))
|
||||||
|
(cond
|
||||||
|
((not next-state)
|
||||||
|
(check-match ip first-pos longest-match-length
|
||||||
|
length-chars longest-match-action))
|
||||||
|
((vector-ref no-lookahead next-state)
|
||||||
|
(let ((act (vector-ref actions next-state)))
|
||||||
|
(check-match ip
|
||||||
|
first-pos
|
||||||
|
(if act length-chars longest-match-length)
|
||||||
|
length-chars
|
||||||
|
(if act act longest-match-action))))
|
||||||
|
(else
|
||||||
|
(let* ((act (vector-ref actions next-state))
|
||||||
|
(next-length-bytes (+ (char-utf-8-length char) length-bytes))
|
||||||
|
(next-char (peek-char-or-special ip next-length-bytes)))
|
||||||
|
#;(printf "(peek-char-or-special port ~e) = ~e\n"
|
||||||
|
next-length-bytes next-char)
|
||||||
|
(lexer-loop next-state
|
||||||
|
next-char
|
||||||
|
(if act
|
||||||
|
act
|
||||||
|
longest-match-action)
|
||||||
|
next-length-bytes
|
||||||
|
(add1 length-chars)
|
||||||
|
(if act
|
||||||
|
length-chars
|
||||||
|
longest-match-length)))))))))))))
|
||||||
|
(lambda (ip)
|
||||||
|
(unless (input-port? ip)
|
||||||
|
(raise-argument-error
|
||||||
|
'lexer
|
||||||
|
"input-port?"
|
||||||
|
0
|
||||||
|
ip))
|
||||||
|
(lexer ip))))
|
||||||
|
|
||||||
|
(define (check-match lb first-pos longest-match-length length longest-match-action)
|
||||||
|
(unless longest-match-action
|
||||||
|
(let* ((match (read-string length lb))
|
||||||
|
(end-pos (get-position lb)))
|
||||||
|
(raise-read-error
|
||||||
|
(format "lexer: No match found in input starting with: ~a" match)
|
||||||
|
(file-path)
|
||||||
|
(position-line first-pos)
|
||||||
|
(position-col first-pos)
|
||||||
|
(position-offset first-pos)
|
||||||
|
(- (position-offset end-pos) (position-offset first-pos)))))
|
||||||
|
(let ((match (read-string longest-match-length lb)))
|
||||||
|
;(printf "(read-string ~e port) = ~e\n" longest-match-length match)
|
||||||
|
(do-match lb first-pos longest-match-action match)))
|
||||||
|
|
||||||
|
(define file-path (make-parameter #f))
|
||||||
|
(define lexer-file-path file-path)
|
||||||
|
|
||||||
|
(define (do-match ip first-pos action value)
|
||||||
|
#;(printf "(action ~a ~a ~a ~a)\n"
|
||||||
|
(position-offset first-pos) (position-offset (get-position ip)) value ip)
|
||||||
|
(action first-pos (get-position ip) value ip))
|
||||||
|
|
||||||
|
(define (get-position ip)
|
||||||
|
(let-values (((line col off) (port-next-location ip)))
|
||||||
|
(make-position off line col)))
|
||||||
|
|
||||||
|
(define-syntax (create-unicode-abbrevs stx)
|
||||||
|
(syntax-case stx ()
|
||||||
|
((_ ctxt)
|
||||||
|
(with-syntax (((ranges ...) (map (lambda (range)
|
||||||
|
`(union ,@(map (lambda (x)
|
||||||
|
`(char-range ,(integer->char (car x))
|
||||||
|
,(integer->char (cdr x))))
|
||||||
|
range)))
|
||||||
|
(list (force alphabetic-ranges)
|
||||||
|
(force lower-case-ranges)
|
||||||
|
(force upper-case-ranges)
|
||||||
|
(force title-case-ranges)
|
||||||
|
(force numeric-ranges)
|
||||||
|
(force symbolic-ranges)
|
||||||
|
(force punctuation-ranges)
|
||||||
|
(force graphic-ranges)
|
||||||
|
(force whitespace-ranges)
|
||||||
|
(force blank-ranges)
|
||||||
|
(force iso-control-ranges))))
|
||||||
|
((names ...) (map (lambda (sym)
|
||||||
|
(datum->syntax-object (syntax ctxt) sym #f))
|
||||||
|
'(alphabetic
|
||||||
|
lower-case
|
||||||
|
upper-case
|
||||||
|
title-case
|
||||||
|
numeric
|
||||||
|
symbolic
|
||||||
|
punctuation
|
||||||
|
graphic
|
||||||
|
whitespace
|
||||||
|
blank
|
||||||
|
iso-control))))
|
||||||
|
(syntax (define-lex-abbrevs (names ranges) ...))))))
|
||||||
|
|
||||||
|
(define-lex-abbrev any-char (char-complement (union)))
|
||||||
|
(define-lex-abbrev any-string (intersection))
|
||||||
|
(define-lex-abbrev nothing (union))
|
||||||
|
(create-unicode-abbrevs #'here)
|
||||||
|
|
||||||
|
(define-lex-trans (char-set stx)
|
||||||
|
(syntax-case stx ()
|
||||||
|
((_ str)
|
||||||
|
(string? (syntax-e (syntax str)))
|
||||||
|
(with-syntax (((char ...) (string->list (syntax-e (syntax str)))))
|
||||||
|
(syntax (union char ...))))))
|
||||||
|
|
||||||
|
(define-syntax provide-lex-keyword
|
||||||
|
(syntax-rules ()
|
||||||
|
[(_ id ...)
|
||||||
|
(begin
|
||||||
|
(define-syntax-parameter id
|
||||||
|
(make-set!-transformer
|
||||||
|
(lambda (stx)
|
||||||
|
(raise-syntax-error
|
||||||
|
#f
|
||||||
|
(format "use of a lexer keyword (~a) is not in an appropriate lexer action"
|
||||||
|
'id)
|
||||||
|
stx))))
|
||||||
|
...
|
||||||
|
(provide id ...))]))
|
||||||
|
|
||||||
|
(provide-lex-keyword start-pos end-pos lexeme lexeme-srcloc input-port return-without-pos return-without-srcloc)
|
||||||
|
|
||||||
|
)
|
@ -0,0 +1,16 @@
|
|||||||
|
#lang scheme/base
|
||||||
|
|
||||||
|
(provide (all-defined-out))
|
||||||
|
(require syntax/stx)
|
||||||
|
|
||||||
|
;; get-special-action: (syntax-object list) syntax-object syntax-object -> syntax-object
|
||||||
|
;; Returns the first action from a rule of the form ((which-special) action)
|
||||||
|
(define (get-special-action rules which-special none)
|
||||||
|
(cond
|
||||||
|
((null? rules) none)
|
||||||
|
(else
|
||||||
|
(syntax-case (car rules) ()
|
||||||
|
(((special) act)
|
||||||
|
(and (identifier? #'special) (module-or-top-identifier=? (syntax special) which-special))
|
||||||
|
(syntax act))
|
||||||
|
(_ (get-special-action (cdr rules) which-special none))))))
|
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("f095138d7dfc29ae18c12d7daf44f1c9b74ab56d" . "f0c7dd306804eb5e8da06235651b07296b23b36d") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt") (collects #"syntax" #"stx.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("1edbe8de6053f0e8735c4d7777b345672d547c86" . "980e8a36193a9253ed01c61fc421729123f6b314") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("f095138d7dfc29ae18c12d7daf44f1c9b74ab56d" . "f0c7dd306804eb5e8da06235651b07296b23b36d") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt") (collects #"syntax" #"stx.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("1edbe8de6053f0e8735c4d7777b345672d547c86" . "44623f82f80a88e2fe5683fec412332baf6a8ed3") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("f095138d7dfc29ae18c12d7daf44f1c9b74ab56d" . "4c8c1a7dddb28b104057c25eaf86b1a95e60048c") (collects #"errortrace" #"errortrace-key.rkt") (collects #"scheme" #"base.rkt") (collects #"scheme" #"runtime-config.rkt") (collects #"syntax" #"stx.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("1edbe8de6053f0e8735c4d7777b345672d547c86" . "02238d6deec4e15d91f05091ef67f522127eb47b") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("560290907963aeb1a4cc1e997b45b3113174dcc2" . "8b3fb56340e1383a464e9e09b878c80c8588c8db") (collects #"br-parser-tools" #"private-lex" #"deriv.rkt") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"stx.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"stx.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("c35e7c257a2700b22560bd1ef8fa4ec0d85440cf" . "b1551c6be564899f5570915abe65bd6754d6ee02") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"scheme" #"match.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("c716d0316e7115d4b50b04c3f923b22c43068fb2" . "de91880596de9c821e9e2c4828b024d42f6a199c") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (collects #"syntax" #"id-table.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("15ddc4ae61da4856338c06fe3f3aca89043495ec" . "1bd4c8735758355b04c6e172aa61084639448a7c") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("c47b01af4f27383fafb3cea5d0e14b2b92da094b" . "9988c5c353bec4edf7eafa42e08d630b02a9328a") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("11fb7e214dfb048796bfcbca53932424123137a2" . "b3606fa90704340e1a962ec5048ec3b01f440e06") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("54744a008c8ffd6704fc954ca59351a6f19774a4" . "5b3e3294fee47f7377adc85735ab90b85321865e") (collects #"errortrace" #"errortrace-key.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("560290907963aeb1a4cc1e997b45b3113174dcc2" . "53517572cdb75487afeb2fff3000f8f73fc998aa") (collects #"br-parser-tools" #"private-lex" #"deriv.rkt") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"stx.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"stx.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("c35e7c257a2700b22560bd1ef8fa4ec0d85440cf" . "b2bb34be5f7b01ff900aa10ee415738b2a19b0a0") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"scheme" #"match.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("c716d0316e7115d4b50b04c3f923b22c43068fb2" . "b47d873be742873e79b0099de8845e32143ab4d5") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (collects #"syntax" #"id-table.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("15ddc4ae61da4856338c06fe3f3aca89043495ec" . "46a0cffdba2f82aa32f0fd32a5783621a6a78323") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("c47b01af4f27383fafb3cea5d0e14b2b92da094b" . "5a9e9888caac8c6df5e32eab1eb77a9522bb1097") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("11fb7e214dfb048796bfcbca53932424123137a2" . "d1e63a1cc4d040c9bbc9481d0d8c6d890f9d2952") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("54744a008c8ffd6704fc954ca59351a6f19774a4" . "04b200a056cd736ad4bd3d7221f381755cf94607") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("560290907963aeb1a4cc1e997b45b3113174dcc2" . "11a0fde414e54b53f7c8a690972c091191515617") (collects #"br-parser-tools" #"private-lex" #"deriv.rkt") (collects #"br-parser-tools" #"private-lex" #"re.rkt") (collects #"br-parser-tools" #"private-lex" #"stx.rkt") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"syntax" #"stx.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("c35e7c257a2700b22560bd1ef8fa4ec0d85440cf" . "8309136e8195b423615deebdf156b1cf77168dfe") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"mzlib" #"integer-set.rkt") (collects #"mzlib" #"list.rkt") (collects #"mzscheme" #"main.rkt") (collects #"scheme" #"match.rkt") (indirect collects #"racket" #"match" #"gen-match.rkt") (indirect collects #"racket" #"match" #"parse.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("c716d0316e7115d4b50b04c3f923b22c43068fb2" . "01788f46755b7f7a3e2f5a04be22246525bde728") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt") (collects #"syntax" #"id-table.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("15ddc4ae61da4856338c06fe3f3aca89043495ec" . "46a0cffdba2f82aa32f0fd32a5783621a6a78323") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("c47b01af4f27383fafb3cea5d0e14b2b92da094b" . "5a9e9888caac8c6df5e32eab1eb77a9522bb1097") (collects #"br-parser-tools" #"private-lex" #"token-syntax.rkt") (collects #"mzscheme" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("11fb7e214dfb048796bfcbca53932424123137a2" . "c85260e8066b06a6c7e3ad05f21848a935912ffc") (collects #"br-parser-tools" #"private-lex" #"util.rkt") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1 @@
|
|||||||
|
("6.8.0.2" ("54744a008c8ffd6704fc954ca59351a6f19774a4" . "04b200a056cd736ad4bd3d7221f381755cf94607") (collects #"racket" #"main.rkt") (collects #"racket" #"runtime-config.rkt") (collects #"rackunit" #"main.rkt"))
|
Binary file not shown.
@ -0,0 +1,339 @@
|
|||||||
|
(module deriv mzscheme
|
||||||
|
|
||||||
|
(require mzlib/list
|
||||||
|
(prefix is: mzlib/integer-set)
|
||||||
|
"re.rkt"
|
||||||
|
"util.rkt")
|
||||||
|
|
||||||
|
(provide build-dfa print-dfa (struct dfa (num-states start-state final-states/actions transitions)))
|
||||||
|
|
||||||
|
(define e (build-epsilon))
|
||||||
|
(define z (build-zero))
|
||||||
|
|
||||||
|
|
||||||
|
;; Don't do anything with this one but extract the chars
|
||||||
|
(define all-chars (->re `(char-complement (union)) (make-cache)))
|
||||||
|
|
||||||
|
;; get-char-groups : re bool -> (list-of char-setR?)
|
||||||
|
;; Collects the char-setRs in r that could be used in
|
||||||
|
;; taking the derivative of r.
|
||||||
|
(define (get-char-groups r found-negation)
|
||||||
|
(cond
|
||||||
|
((or (eq? r e) (eq? r z)) null)
|
||||||
|
((char-setR? r) (list r))
|
||||||
|
((concatR? r)
|
||||||
|
(if (re-nullable? (concatR-re1 r))
|
||||||
|
(append (get-char-groups (concatR-re1 r) found-negation)
|
||||||
|
(get-char-groups (concatR-re2 r) found-negation))
|
||||||
|
(get-char-groups (concatR-re1 r) found-negation)))
|
||||||
|
((repeatR? r)
|
||||||
|
(get-char-groups (repeatR-re r) found-negation))
|
||||||
|
((orR? r)
|
||||||
|
(apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r))))
|
||||||
|
((andR? r)
|
||||||
|
(apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r))))
|
||||||
|
((negR? r)
|
||||||
|
(if found-negation
|
||||||
|
(get-char-groups (negR-re r) #t)
|
||||||
|
(cons all-chars (get-char-groups (negR-re r) #t))))))
|
||||||
|
|
||||||
|
(test-block ((c (make-cache))
|
||||||
|
(r1 (->re #\1 c))
|
||||||
|
(r2 (->re #\2 c)))
|
||||||
|
((get-char-groups e #f) null)
|
||||||
|
((get-char-groups z #f) null)
|
||||||
|
((get-char-groups r1 #f) (list r1))
|
||||||
|
((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f)
|
||||||
|
(list r1))
|
||||||
|
((get-char-groups (->re `(concatenation ,e ,r2) c) #f)
|
||||||
|
(list r2))
|
||||||
|
((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f)
|
||||||
|
(list r1 r2))
|
||||||
|
((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f)
|
||||||
|
(list r1))
|
||||||
|
((get-char-groups
|
||||||
|
(->re `(union (repetition 0 +inf.0 ,r1)
|
||||||
|
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
||||||
|
(list r1 r2 (->re "3" c) (->re "4" c)))
|
||||||
|
((get-char-groups (->re `(complement ,r1) c) #f)
|
||||||
|
(list all-chars r1))
|
||||||
|
((get-char-groups
|
||||||
|
(->re `(intersection (repetition 0 +inf.0 ,r1)
|
||||||
|
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
||||||
|
(list r1 r2 (->re "3" c) (->re "4" c)))
|
||||||
|
)
|
||||||
|
(define loc:member? is:member?)
|
||||||
|
|
||||||
|
;; deriveR : re char cache -> re
|
||||||
|
(define (deriveR r c cache)
|
||||||
|
(cond
|
||||||
|
((or (eq? r e) (eq? r z)) z)
|
||||||
|
((char-setR? r)
|
||||||
|
(if (loc:member? c (char-setR-chars r)) e z))
|
||||||
|
((concatR? r)
|
||||||
|
(let* ((r1 (concatR-re1 r))
|
||||||
|
(r2 (concatR-re2 r))
|
||||||
|
(d (build-concat (deriveR r1 c cache) r2 cache)))
|
||||||
|
(if (re-nullable? r1)
|
||||||
|
(build-or (list d (deriveR r2 c cache)) cache)
|
||||||
|
d)))
|
||||||
|
((repeatR? r)
|
||||||
|
(build-concat (deriveR (repeatR-re r) c cache)
|
||||||
|
(build-repeat (sub1 (repeatR-low r))
|
||||||
|
(sub1 (repeatR-high r))
|
||||||
|
(repeatR-re r) cache)
|
||||||
|
cache))
|
||||||
|
((orR? r)
|
||||||
|
(build-or (map (lambda (x) (deriveR x c cache))
|
||||||
|
(orR-res r))
|
||||||
|
cache))
|
||||||
|
((andR? r)
|
||||||
|
(build-and (map (lambda (x) (deriveR x c cache))
|
||||||
|
(andR-res r))
|
||||||
|
cache))
|
||||||
|
((negR? r)
|
||||||
|
(build-neg (deriveR (negR-re r) c cache) cache))))
|
||||||
|
|
||||||
|
(test-block ((c (make-cache))
|
||||||
|
(a (char->integer #\a))
|
||||||
|
(b (char->integer #\b))
|
||||||
|
(r1 (->re #\a c))
|
||||||
|
(r2 (->re `(repetition 0 +inf.0 #\a) c))
|
||||||
|
(r3 (->re `(repetition 0 +inf.0 ,r2) c))
|
||||||
|
(r4 (->re `(concatenation #\a ,r2) c))
|
||||||
|
(r5 (->re `(repetition 0 +inf.0 ,r4) c))
|
||||||
|
(r6 (->re `(union ,r5 #\a) c))
|
||||||
|
(r7 (->re `(concatenation ,r2 ,r2) c))
|
||||||
|
(r8 (->re `(complement ,r4) c))
|
||||||
|
(r9 (->re `(intersection ,r2 ,r4) c)))
|
||||||
|
((deriveR e a c) z)
|
||||||
|
((deriveR z a c) z)
|
||||||
|
((deriveR r1 b c) z)
|
||||||
|
((deriveR r1 a c) e)
|
||||||
|
((deriveR r2 a c) r2)
|
||||||
|
((deriveR r2 b c) z)
|
||||||
|
((deriveR r3 a c) r2)
|
||||||
|
((deriveR r3 b c) z)
|
||||||
|
((deriveR r4 a c) r2)
|
||||||
|
((deriveR r4 b c) z)
|
||||||
|
((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c))
|
||||||
|
((deriveR r5 b c) z)
|
||||||
|
((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c))
|
||||||
|
((deriveR r6 b c) z)
|
||||||
|
((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c))
|
||||||
|
((deriveR r7 b c) z)
|
||||||
|
((deriveR r8 a c) (->re `(complement, r2) c))
|
||||||
|
((deriveR r8 b c) (->re `(complement ,z) c))
|
||||||
|
((deriveR r9 a c) r2)
|
||||||
|
((deriveR r9 b c) z)
|
||||||
|
((deriveR (->re `(repetition 1 2 "ab") c) a c)
|
||||||
|
(->re `(concatenation "b" (repetition 0 1 "ab")) c)))
|
||||||
|
|
||||||
|
;; An re-action is (cons re action)
|
||||||
|
|
||||||
|
;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f)
|
||||||
|
;; applies deriveR to all the re-actions's re parts.
|
||||||
|
;; Returns #f if the derived state is equivalent to z.
|
||||||
|
(define (derive r c cache)
|
||||||
|
(let ((new-r (map (lambda (ra)
|
||||||
|
(cons (deriveR (car ra) c cache) (cdr ra)))
|
||||||
|
r)))
|
||||||
|
(if (andmap (lambda (x) (eq? z (car x)))
|
||||||
|
new-r)
|
||||||
|
#f
|
||||||
|
new-r)))
|
||||||
|
|
||||||
|
(test-block ((c (make-cache))
|
||||||
|
(r1 (->re #\1 c))
|
||||||
|
(r2 (->re #\2 c)))
|
||||||
|
((derive null (char->integer #\1) c) #f)
|
||||||
|
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c)
|
||||||
|
(list (cons e 1) (cons z 2)))
|
||||||
|
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f))
|
||||||
|
|
||||||
|
|
||||||
|
;; get-final : (list-of re-action) -> (union #f syntax-object)
|
||||||
|
;; An re that accepts e represents a final state. Return the
|
||||||
|
;; action from the first final state or #f if there is none.
|
||||||
|
(define (get-final res)
|
||||||
|
(cond
|
||||||
|
((null? res) #f)
|
||||||
|
((re-nullable? (caar res)) (cdar res))
|
||||||
|
(else (get-final (cdr res)))))
|
||||||
|
|
||||||
|
(test-block ((c->i char->integer)
|
||||||
|
(c (make-cache))
|
||||||
|
(r1 (->re #\a c))
|
||||||
|
(r2 (->re #\b c))
|
||||||
|
(b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5)))
|
||||||
|
(a (list (cons r1 1) (cons r2 2))))
|
||||||
|
((derive null (c->i #\a) c) #f)
|
||||||
|
((derive a (c->i #\a) c) (list (cons e 1) (cons z 2)))
|
||||||
|
((derive a (c->i #\b) c) (list (cons z 1) (cons e 2)))
|
||||||
|
((derive a (c->i #\c) c) #f)
|
||||||
|
((derive (list (cons (->re `(union " " "\n" ",") c) 1)
|
||||||
|
(cons (->re `(concatenation (repetition 0 1 "-")
|
||||||
|
(repetition 1 +inf.0 (char-range "0" "9"))) c) 2)
|
||||||
|
(cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3)
|
||||||
|
(cons (->re "[" c) 4)
|
||||||
|
(cons (->re "]" c) 5)) (c->i #\[) c)
|
||||||
|
b)
|
||||||
|
((get-final a) #f)
|
||||||
|
((get-final (list (cons e 1) (cons e 2))) 1)
|
||||||
|
((get-final b) 4))
|
||||||
|
|
||||||
|
|
||||||
|
;; A state is (make-state (list-of re-action) nat)
|
||||||
|
(define-struct state (spec index))
|
||||||
|
|
||||||
|
;; get->key : re-action -> (list-of nat)
|
||||||
|
;; states are indexed by the list of indexes of their res
|
||||||
|
(define (get-key s)
|
||||||
|
(map (lambda (x) (re-index (car x))) s))
|
||||||
|
|
||||||
|
(define loc:partition is:partition)
|
||||||
|
|
||||||
|
;; compute-chars : (list-of state) -> (list-of char-set)
|
||||||
|
;; Computed the sets of equivalent characters for taking the
|
||||||
|
;; derivative of the car of st. Only one derivative per set need to be taken.
|
||||||
|
(define (compute-chars st)
|
||||||
|
(cond
|
||||||
|
((null? st) null)
|
||||||
|
(else
|
||||||
|
(loc:partition (map char-setR-chars
|
||||||
|
(apply append (map (lambda (x) (get-char-groups (car x) #f))
|
||||||
|
(state-spec (car st)))))))))
|
||||||
|
|
||||||
|
(test-block ((c (make-cache))
|
||||||
|
(c->i char->integer)
|
||||||
|
(r1 (->re `(char-range #\1 #\4) c))
|
||||||
|
(r2 (->re `(char-range #\2 #\3) c)))
|
||||||
|
((compute-chars null) null)
|
||||||
|
((compute-chars (list (make-state null 1))) null)
|
||||||
|
((map is:integer-set-contents
|
||||||
|
(compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2))))
|
||||||
|
(list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3)))
|
||||||
|
(is:integer-set-contents (is:union (is:make-range (c->i #\1))
|
||||||
|
(is:make-range (c->i #\4)))))))
|
||||||
|
|
||||||
|
|
||||||
|
;; A dfa is (make-dfa int int
|
||||||
|
;; (list-of (cons int syntax-object))
|
||||||
|
;; (list-of (cons int (list-of (cons char-set int)))))
|
||||||
|
;; Each transitions is a state and a list of chars with the state to transition to.
|
||||||
|
;; The finals and transitions are sorted by state number, and duplicate free.
|
||||||
|
(define-struct dfa (num-states start-state final-states/actions transitions) (make-inspector))
|
||||||
|
|
||||||
|
(define loc:get-integer is:get-integer)
|
||||||
|
|
||||||
|
;; build-dfa : (list-of re-action) cache -> dfa
|
||||||
|
(define (build-dfa rs cache)
|
||||||
|
(let* ((transitions (make-hash-table))
|
||||||
|
(get-state-number (make-counter))
|
||||||
|
(start (make-state rs (get-state-number))))
|
||||||
|
(cache (cons 'state (get-key rs)) (lambda () start))
|
||||||
|
(let loop ((old-states (list start))
|
||||||
|
(new-states null)
|
||||||
|
(all-states (list start))
|
||||||
|
(cs (compute-chars (list start))))
|
||||||
|
(cond
|
||||||
|
((and (null? old-states) (null? new-states))
|
||||||
|
(make-dfa (get-state-number) (state-index start)
|
||||||
|
(sort (filter (lambda (x) (cdr x))
|
||||||
|
(map (lambda (state)
|
||||||
|
(cons (state-index state) (get-final (state-spec state))))
|
||||||
|
all-states))
|
||||||
|
(lambda (a b) (< (car a) (car b))))
|
||||||
|
(sort (hash-table-map transitions
|
||||||
|
(lambda (state trans)
|
||||||
|
(cons (state-index state)
|
||||||
|
(map (lambda (t)
|
||||||
|
(cons (car t)
|
||||||
|
(state-index (cdr t))))
|
||||||
|
trans))))
|
||||||
|
(lambda (a b) (< (car a) (car b))))))
|
||||||
|
((null? old-states)
|
||||||
|
(loop new-states null all-states (compute-chars new-states)))
|
||||||
|
((null? cs)
|
||||||
|
(loop (cdr old-states) new-states all-states (compute-chars (cdr old-states))))
|
||||||
|
(else
|
||||||
|
(let* ((state (car old-states))
|
||||||
|
(c (car cs))
|
||||||
|
(new-re (derive (state-spec state) (loc:get-integer c) cache)))
|
||||||
|
(cond
|
||||||
|
(new-re
|
||||||
|
(let* ((new-state? #f)
|
||||||
|
(new-state (cache (cons 'state (get-key new-re))
|
||||||
|
(lambda ()
|
||||||
|
(set! new-state? #t)
|
||||||
|
(make-state new-re (get-state-number)))))
|
||||||
|
(new-all-states (if new-state? (cons new-state all-states) all-states)))
|
||||||
|
(hash-table-put! transitions
|
||||||
|
state
|
||||||
|
(cons (cons c new-state)
|
||||||
|
(hash-table-get transitions state
|
||||||
|
(lambda () null))))
|
||||||
|
(cond
|
||||||
|
(new-state?
|
||||||
|
(loop old-states (cons new-state new-states) new-all-states (cdr cs)))
|
||||||
|
(else
|
||||||
|
(loop old-states new-states new-all-states (cdr cs))))))
|
||||||
|
(else (loop old-states new-states all-states (cdr cs))))))))))
|
||||||
|
|
||||||
|
(define (print-dfa x)
|
||||||
|
(printf "number of states: ~a\n" (dfa-num-states x))
|
||||||
|
(printf "start state: ~a\n" (dfa-start-state x))
|
||||||
|
(printf "final states: ~a\n" (map car (dfa-final-states/actions x)))
|
||||||
|
(for-each (lambda (trans)
|
||||||
|
(printf "state: ~a\n" (car trans))
|
||||||
|
(for-each (lambda (rule)
|
||||||
|
(printf " -~a-> ~a\n"
|
||||||
|
(is:integer-set-contents (car rule))
|
||||||
|
(cdr rule)))
|
||||||
|
(cdr trans)))
|
||||||
|
(dfa-transitions x)))
|
||||||
|
|
||||||
|
(define (build-test-dfa rs)
|
||||||
|
(let ((c (make-cache)))
|
||||||
|
(build-dfa (map (lambda (x) (cons (->re x c) 'action))
|
||||||
|
rs)
|
||||||
|
c)))
|
||||||
|
|
||||||
|
|
||||||
|
#|
|
||||||
|
(define t1 (build-test-dfa null))
|
||||||
|
(define t2 (build-test-dfa `(#\a)))
|
||||||
|
(define t3 (build-test-dfa `(#\a #\b)))
|
||||||
|
(define t4 (build-test-dfa `((repetition 0 +inf.0 #\a)
|
||||||
|
(repetition 0 +inf.0 (concatenation #\a #\b)))))
|
||||||
|
(define t5 (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\0 #\1)) #\1))))
|
||||||
|
(define t6 (build-test-dfa `((repetition 0 +inf.0 (repetition 0 +inf.0 #\a))
|
||||||
|
(repetition 0 +inf.0 (concatenation #\b (repetition 1 +inf.0 #\b))))))
|
||||||
|
(define t7 (build-test-dfa `((concatenation (repetition 0 +inf.0 #\a) (repetition 0 +inf.0 #\b)
|
||||||
|
(repetition 0 +inf.0 #\c) (repetition 0 +inf.0 #\d)
|
||||||
|
(repetition 0 +inf.0 #\e)))))
|
||||||
|
(define t8
|
||||||
|
(build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b)
|
||||||
|
(union #\a #\b) (union #\a #\b) (union #\a #\b)))))
|
||||||
|
(define t9 (build-test-dfa `((concatenation "/*"
|
||||||
|
(complement (concatenation (intersection) "*/" (intersection)))
|
||||||
|
"*/"))))
|
||||||
|
(define t11 (build-test-dfa `((complement "1"))))
|
||||||
|
(define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b")
|
||||||
|
(concatenation "a" (repetition 0 +inf.0 "b")))
|
||||||
|
"ab"))))
|
||||||
|
(define x (build-test-dfa `((union " " "\n" ",")
|
||||||
|
(concatenation (repetition 0 1 "-") (repetition 1 +inf.0 (char-range "0" "9")))
|
||||||
|
(concatenation "-" (repetition 1 +inf.0 "-"))
|
||||||
|
"["
|
||||||
|
"]")))
|
||||||
|
(define y (build-test-dfa
|
||||||
|
`((repetition 1 +inf.0
|
||||||
|
(union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|")
|
||||||
|
(concatenation "|" (repetition 0 +inf.0 (char-complement "|"))))))))
|
||||||
|
(define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection))
|
||||||
|
(complement (union (concatenation (intersection) "01")
|
||||||
|
(repetition 1 +inf.0 "1")))))))
|
||||||
|
(define t14 (build-test-dfa `((complement "1"))))
|
||||||
|
|#
|
||||||
|
)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue