Compare commits
1 Commits
Author | SHA1 | Date |
---|---|---|
Matthew Butterick | 2d5d7ecaaf | 6 years ago |
@ -1,45 +0,0 @@
|
||||
name: CI
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
run:
|
||||
name: "Build using Racket '${{ matrix.racket-version }}' (${{ matrix.racket-variant }})"
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
racket-version: ["6.6", "6.7", "6.8", "6.9", "6.10.1", "6.11", "6.12", "7.0", "7.1", "7.2", "7.3", "7.4", "7.5", "7.6", "7.7", "7.8", "7.9", "current"]
|
||||
racket-variant: ["BC", "CS"]
|
||||
# CS builds are only provided for versions 7.4 and up so avoid
|
||||
# running the job for prior versions.
|
||||
exclude:
|
||||
- {racket-version: "6.6", racket-variant: "CS"}
|
||||
- {racket-version: "6.7", racket-variant: "CS"}
|
||||
- {racket-version: "6.8", racket-variant: "CS"}
|
||||
- {racket-version: "6.9", racket-variant: "CS"}
|
||||
- {racket-version: "6.10.1", racket-variant: "CS"}
|
||||
- {racket-version: "6.11", racket-variant: "CS"}
|
||||
- {racket-version: "6.12", racket-variant: "CS"}
|
||||
- {racket-version: "7.0", racket-variant: "CS"}
|
||||
- {racket-version: "7.1", racket-variant: "CS"}
|
||||
- {racket-version: "7.2", racket-variant: "CS"}
|
||||
- {racket-version: "7.3", racket-variant: "CS"}
|
||||
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@master
|
||||
|
||||
- uses: Bogdanp/setup-racket@v0.11
|
||||
with:
|
||||
distribution: 'full'
|
||||
version: ${{ matrix.racket-version }}
|
||||
variant: ${{ matrix.racket-variant }}
|
||||
|
||||
- name: Install BR parser tools
|
||||
run: raco pkg install --deps search-auto https://github.com/mbutterick/br-parser-tools.git?path=br-parser-tools-lib
|
||||
|
||||
- name: Run the br-parser-tools tests
|
||||
run: xvfb-run raco test -p br-parser-tools-lib
|
||||
|
||||
|
@ -1,3 +0,0 @@
|
||||
#lang info
|
||||
|
||||
(define scribblings '(("br-parser-tools.scrbl" (multi-page) (parsing-library))))
|
@ -1,240 +0,0 @@
|
||||
#lang racket/base
|
||||
;; This implements the equivalent of racket's read-syntax for R5RS scheme.
|
||||
;; It has not been thoroughly tested. Also it will read an entire file into a
|
||||
;; list of syntax objects, instead of returning one syntax object at a time
|
||||
|
||||
(require (for-syntax racket/base)
|
||||
br-parser-tools/lex
|
||||
(prefix-in : br-parser-tools/lex-sre)
|
||||
br-parser-tools/yacc
|
||||
syntax/readerr)
|
||||
|
||||
(define-tokens data (DATUM))
|
||||
(define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF))
|
||||
|
||||
(define scheme-lexer
|
||||
(lexer-src-pos
|
||||
|
||||
;; Skip comments, without accumulating extra position information
|
||||
[(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))]
|
||||
|
||||
["#t" (token-DATUM #t)]
|
||||
["#f" (token-DATUM #f)]
|
||||
[(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))]
|
||||
["#\\space" (token-DATUM #\space)]
|
||||
["#\\newline" (token-DATUM #\newline)]
|
||||
[(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))]
|
||||
[#\" (token-DATUM (list->string (get-string-token input-port)))]
|
||||
[#\( 'OP]
|
||||
[#\) 'CP]
|
||||
[#\[ 'OP]
|
||||
[#\] 'CP]
|
||||
["#(" 'HASHOP]
|
||||
[num2 (token-DATUM (string->number lexeme 2))]
|
||||
[num8 (token-DATUM (string->number lexeme 8))]
|
||||
[num10 (token-DATUM (string->number lexeme 10))]
|
||||
[num16 (token-DATUM (string->number lexeme 16))]
|
||||
["'" 'QUOTE]
|
||||
["`" 'QUASIQUOTE]
|
||||
["," 'UNQUOTE]
|
||||
[",@" 'UNQUOTE-SPLICING]
|
||||
["." 'DOT]
|
||||
[(eof) 'EOF]))
|
||||
|
||||
(define get-string-token
|
||||
(lexer
|
||||
[(:~ #\" #\\) (cons (car (string->list lexeme))
|
||||
(get-string-token input-port))]
|
||||
[(:: #\\ #\\) (cons #\\ (get-string-token input-port))]
|
||||
[(:: #\\ #\") (cons #\" (get-string-token input-port))]
|
||||
[#\" null]))
|
||||
|
||||
|
||||
(define-lex-abbrevs
|
||||
[letter (:or (:/ "a" "z") (:/ #\A #\Z))]
|
||||
[digit (:/ #\0 #\9)]
|
||||
[scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)]
|
||||
[initial (:or letter (char-set "!$%&*/:<=>?^_~@"))]
|
||||
[subsequent (:or initial digit (char-set "+-.@"))]
|
||||
[comment (:: #\; (:* (:~ #\newline)) #\newline)]
|
||||
|
||||
|
||||
;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of
|
||||
;; using regexp macros to avoid the cut and paste.
|
||||
; [numR (:: prefixR complexR)]
|
||||
; [complexR (:or realR
|
||||
; (:: realR "@" realR)
|
||||
; (:: realR "+" urealR "i")
|
||||
; (:: realR "-" urealR "i")
|
||||
; (:: realR "+i")
|
||||
; (:: realR "-i")
|
||||
; (:: "+" urealR "i")
|
||||
; (:: "-" urealR "i")
|
||||
; (:: "+i")
|
||||
; (:: "-i"))]
|
||||
; [realR (:: sign urealR)]
|
||||
; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)]
|
||||
; [uintegerR (:: (:+ digitR) (:* #\#))]
|
||||
; [prefixR (:or (:: radixR exactness)
|
||||
; (:: exactness radixR))]
|
||||
|
||||
[num2 (:: prefix2 complex2)]
|
||||
[complex2 (:or real2
|
||||
(:: real2 "@" real2)
|
||||
(:: real2 "+" ureal2 "i")
|
||||
(:: real2 "-" ureal2 "i")
|
||||
(:: real2 "+i")
|
||||
(:: real2 "-i")
|
||||
(:: "+" ureal2 "i")
|
||||
(:: "-" ureal2 "i")
|
||||
(:: "+i")
|
||||
(:: "-i"))]
|
||||
[real2 (:: sign ureal2)]
|
||||
[ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))]
|
||||
[uinteger2 (:: (:+ digit2) (:* #\#))]
|
||||
[prefix2 (:or (:: radix2 exactness)
|
||||
(:: exactness radix2))]
|
||||
[radix2 "#b"]
|
||||
[digit2 (:or "0" "1")]
|
||||
[num8 (:: prefix8 complex8)]
|
||||
[complex8 (:or real8
|
||||
(:: real8 "@" real8)
|
||||
(:: real8 "+" ureal8 "i")
|
||||
(:: real8 "-" ureal8 "i")
|
||||
(:: real8 "+i")
|
||||
(:: real8 "-i")
|
||||
(:: "+" ureal8 "i")
|
||||
(:: "-" ureal8 "i")
|
||||
(:: "+i")
|
||||
(:: "-i"))]
|
||||
[real8 (:: sign ureal8)]
|
||||
[ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))]
|
||||
[uinteger8 (:: (:+ digit8) (:* #\#))]
|
||||
[prefix8 (:or (:: radix8 exactness)
|
||||
(:: exactness radix8))]
|
||||
[radix8 "#o"]
|
||||
[digit8 (:/ "0" "7")]
|
||||
|
||||
[num10 (:: prefix10 complex10)]
|
||||
[complex10 (:or real10
|
||||
(:: real10 "@" real10)
|
||||
(:: real10 "+" ureal10 "i")
|
||||
(:: real10 "-" ureal10 "i")
|
||||
(:: real10 "+i")
|
||||
(:: real10 "-i")
|
||||
(:: "+" ureal10 "i")
|
||||
(:: "-" ureal10 "i")
|
||||
(:: "+i")
|
||||
(:: "-i"))]
|
||||
[real10 (:: sign ureal10)]
|
||||
[ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)]
|
||||
[uinteger10 (:: (:+ digit10) (:* #\#))]
|
||||
[prefix10 (:or (:: radix10 exactness)
|
||||
(:: exactness radix10))]
|
||||
[radix10 (:? "#d")]
|
||||
[digit10 digit]
|
||||
[decimal10 (:or (:: uinteger10 suffix)
|
||||
(:: #\. (:+ digit10) (:* #\#) suffix)
|
||||
(:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix)
|
||||
(:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))]
|
||||
|
||||
[num16 (:: prefix16 complex16)]
|
||||
[complex16 (:or real16
|
||||
(:: real16 "@" real16)
|
||||
(:: real16 "+" ureal16 "i")
|
||||
(:: real16 "-" ureal16 "i")
|
||||
(:: real16 "+i")
|
||||
(:: real16 "-i")
|
||||
(:: "+" ureal16 "i")
|
||||
(:: "-" ureal16 "i")
|
||||
"+i"
|
||||
"-i")]
|
||||
[real16 (:: sign ureal16)]
|
||||
[ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))]
|
||||
[uinteger16 (:: (:+ digit16) (:* #\#))]
|
||||
[prefix16 (:or (:: radix16 exactness)
|
||||
(:: exactness radix16))]
|
||||
[radix16 "#x"]
|
||||
[digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))]
|
||||
|
||||
|
||||
[suffix (:or "" (:: exponent-marker sign (:+ digit10)))]
|
||||
[exponent-marker (:or "e" "s" "f" "d" "l")]
|
||||
[sign (:or "" "+" "-")]
|
||||
[exactness (:or "" "#i" "#e")])
|
||||
|
||||
|
||||
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
|
||||
|
||||
;; A macro to build the syntax object
|
||||
(define-syntax (build-so stx)
|
||||
(syntax-case stx ()
|
||||
((_ value start end)
|
||||
(with-syntax ((start-pos (datum->syntax
|
||||
#'end
|
||||
(string->symbol
|
||||
(format "$~a-start-pos"
|
||||
(syntax->datum #'start)))))
|
||||
(end-pos (datum->syntax
|
||||
#'end
|
||||
(string->symbol
|
||||
(format "$~a-end-pos"
|
||||
(syntax->datum #'end)))))
|
||||
(source (datum->syntax
|
||||
#'end
|
||||
'source-name)))
|
||||
(syntax
|
||||
(datum->syntax
|
||||
#f
|
||||
value
|
||||
(list source
|
||||
(position-line start-pos)
|
||||
(position-col start-pos)
|
||||
(position-offset start-pos)
|
||||
(- (position-offset end-pos)
|
||||
(position-offset start-pos)))
|
||||
stx-for-original-property))))))
|
||||
|
||||
(define (scheme-parser source-name)
|
||||
(parser
|
||||
(src-pos)
|
||||
|
||||
(start s)
|
||||
(end EOF)
|
||||
(error (lambda (a name val start end)
|
||||
(raise-read-error
|
||||
"read-error"
|
||||
source-name
|
||||
(position-line start)
|
||||
(position-col start)
|
||||
(position-offset start)
|
||||
(- (position-offset end)
|
||||
(position-offset start)))))
|
||||
(tokens data delim)
|
||||
|
||||
|
||||
(grammar
|
||||
|
||||
(s [(sexp-list) (reverse $1)])
|
||||
|
||||
(sexp [(DATUM) (build-so $1 1 1)]
|
||||
[(OP sexp-list CP) (build-so (reverse $2) 1 3)]
|
||||
[(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)]
|
||||
[(QUOTE sexp) (build-so (list 'quote $2) 1 2)]
|
||||
[(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)]
|
||||
[(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)]
|
||||
[(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)]
|
||||
[(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)])
|
||||
|
||||
(sexp-list [() null]
|
||||
[(sexp-list sexp) (cons $2 $1)]))))
|
||||
|
||||
(define (rs sn ip)
|
||||
(port-count-lines! ip)
|
||||
((scheme-parser sn) (lambda () (scheme-lexer ip))))
|
||||
|
||||
(define readsyntax
|
||||
(case-lambda ((sn) (rs sn (current-input-port)))
|
||||
((sn ip) (rs sn ip))))
|
||||
|
||||
(provide (rename-out [readsyntax read-syntax]))
|
@ -1,23 +0,0 @@
|
||||
#lang racket/base
|
||||
(require (for-syntax racket/base)
|
||||
br-parser-tools/lex
|
||||
(prefix-in : br-parser-tools/lex-sre))
|
||||
|
||||
(provide epsilon ~
|
||||
(rename-out [:* *]
|
||||
[:+ +]
|
||||
[:? ?]
|
||||
[:or :]
|
||||
[:& &]
|
||||
[:: @]
|
||||
[:~ ^]
|
||||
[:/ -]))
|
||||
|
||||
(define-lex-trans (epsilon stx)
|
||||
(syntax-case stx ()
|
||||
[(_) #'""]))
|
||||
|
||||
(define-lex-trans (~ stx)
|
||||
(syntax-case stx ()
|
||||
[(_ RE) #'(complement RE)]))
|
||||
|
@ -1,103 +0,0 @@
|
||||
#lang racket/base
|
||||
(require (for-syntax racket/base)
|
||||
br-parser-tools/lex)
|
||||
|
||||
(provide (rename-out [sre-* *]
|
||||
[sre-+ +]
|
||||
[sre-= =]
|
||||
[sre->= >=]
|
||||
[sre-or or]
|
||||
[sre-- -]
|
||||
[sre-/ /])
|
||||
? ** : seq & ~ /-only-chars)
|
||||
|
||||
(define-lex-trans (sre-* stx)
|
||||
(syntax-case stx ()
|
||||
[(_ RE ...)
|
||||
#'(repetition 0 +inf.0 (union RE ...))]))
|
||||
|
||||
(define-lex-trans (sre-+ stx)
|
||||
(syntax-case stx ()
|
||||
[(_ RE ...)
|
||||
#'(repetition 1 +inf.0 (union RE ...))]))
|
||||
|
||||
(define-lex-trans (? stx)
|
||||
(syntax-case stx ()
|
||||
[(_ RE ...)
|
||||
#'(repetition 0 1 (union RE ...))]))
|
||||
|
||||
(define-lex-trans (sre-= stx)
|
||||
(syntax-case stx ()
|
||||
[(_ N RE ...)
|
||||
#'(repetition N N (union RE ...))]))
|
||||
|
||||
(define-lex-trans (sre->= stx)
|
||||
(syntax-case stx ()
|
||||
[(_ N RE ...)
|
||||
#'(repetition N +inf.0 (union RE ...))]))
|
||||
|
||||
(define-lex-trans (** stx)
|
||||
(syntax-case stx ()
|
||||
[(_ LOW #f RE ...)
|
||||
#'(** LOW +inf.0 RE ...)]
|
||||
[(_ LOW HIGH RE ...)
|
||||
#'(repetition LOW HIGH (union RE ...))]))
|
||||
|
||||
(define-lex-trans (sre-or stx)
|
||||
(syntax-case stx ()
|
||||
[(_ RE ...)
|
||||
#'(union RE ...)]))
|
||||
|
||||
(define-lex-trans (: stx)
|
||||
(syntax-case stx ()
|
||||
[(_ RE ...)
|
||||
#'(concatenation RE ...)]))
|
||||
|
||||
(define-lex-trans (seq stx)
|
||||
(syntax-case stx ()
|
||||
[(_ RE ...)
|
||||
#'(concatenation RE ...)]))
|
||||
|
||||
(define-lex-trans (& stx)
|
||||
(syntax-case stx ()
|
||||
[(_ RE ...)
|
||||
#'(intersection RE ...)]))
|
||||
|
||||
(define-lex-trans (~ stx)
|
||||
(syntax-case stx ()
|
||||
[(_ RE ...)
|
||||
#'(char-complement (union RE ...))]))
|
||||
|
||||
;; set difference
|
||||
(define-lex-trans (sre-- stx)
|
||||
(syntax-case stx ()
|
||||
[(_)
|
||||
(raise-syntax-error #f
|
||||
"must have at least one argument"
|
||||
stx)]
|
||||
[(_ BIG-RE RE ...)
|
||||
#'(& BIG-RE (complement (union RE ...)))]))
|
||||
|
||||
(define-lex-trans (sre-/ stx)
|
||||
(syntax-case stx ()
|
||||
[(_ RANGE ...)
|
||||
(let ([chars
|
||||
(apply append (for/list ([r (in-list (syntax->list #'(RANGE ...)))])
|
||||
(let ([x (syntax-e r)])
|
||||
(cond
|
||||
[(char? x) (list x)]
|
||||
[(string? x) (string->list x)]
|
||||
[else
|
||||
(raise-syntax-error #f "not a char or string" stx r)]))))])
|
||||
(unless (even? (length chars))
|
||||
(raise-syntax-error #f "not given an even number of characters" stx))
|
||||
#`(/-only-chars #,@chars))]))
|
||||
|
||||
(define-lex-trans (/-only-chars stx)
|
||||
(syntax-case stx ()
|
||||
[(_ C1 C2)
|
||||
#'(char-range C1 C2)]
|
||||
[(_ C1 C2 C ...)
|
||||
#'(union (char-range C1 C2) (/-only-chars C ...))]))
|
||||
|
||||
|
@ -1,370 +0,0 @@
|
||||
#lang racket/base
|
||||
|
||||
;; Provides the syntax used to create lexers and the functions needed to
|
||||
;; create and use the buffer that the lexer reads from. See docs.
|
||||
|
||||
(require (for-syntax racket/list
|
||||
racket/syntax
|
||||
syntax/stx
|
||||
syntax/define
|
||||
syntax/boundmap
|
||||
"private-lex/util.rkt"
|
||||
"private-lex/actions.rkt"
|
||||
"private-lex/front.rkt"
|
||||
"private-lex/unicode-chars.rkt"
|
||||
racket/base
|
||||
racket/promise))
|
||||
|
||||
(require racket/stxparam
|
||||
syntax/readerr
|
||||
"private-lex/token.rkt")
|
||||
|
||||
(provide lexer lexer-src-pos lexer-srcloc define-lex-abbrev define-lex-abbrevs define-lex-trans
|
||||
|
||||
;; Dealing with tokens and related structures
|
||||
define-tokens define-empty-tokens token-name token-value token?
|
||||
(struct-out position)
|
||||
(struct-out position-token)
|
||||
(struct-out srcloc-token)
|
||||
|
||||
;; File path for highlighting errors while lexing
|
||||
file-path
|
||||
lexer-file-path ;; alternate name
|
||||
|
||||
;; Lex abbrevs for unicode char sets.
|
||||
any-char any-string nothing alphabetic lower-case upper-case title-case
|
||||
numeric symbolic punctuation graphic whitespace blank iso-control
|
||||
|
||||
;; A regular expression operator
|
||||
char-set)
|
||||
|
||||
;; wrap-action: syntax-object src-pos? -> syntax-object
|
||||
(define-for-syntax (wrap-action action src-loc-style)
|
||||
(with-syntax ([action-stx
|
||||
(cond
|
||||
[(eq? src-loc-style 'lexer-src-pos)
|
||||
#`(let/ec ret
|
||||
(syntax-parameterize
|
||||
([return-without-pos (make-rename-transformer #'ret)])
|
||||
(make-position-token #,action start-pos end-pos)))]
|
||||
[(eq? src-loc-style 'lexer-srcloc)
|
||||
#`(let/ec ret
|
||||
(syntax-parameterize
|
||||
([return-without-srcloc (make-rename-transformer #'ret)])
|
||||
(make-srcloc-token #,action lexeme-srcloc)))]
|
||||
[else action])])
|
||||
(syntax/loc action
|
||||
(λ (start-pos-p end-pos-p lexeme-p input-port-p)
|
||||
(define lexeme-srcloc-p (make-srcloc (object-name input-port-p)
|
||||
(position-line start-pos-p)
|
||||
(position-col start-pos-p)
|
||||
(position-offset start-pos-p)
|
||||
(and (number? (position-offset end-pos-p))
|
||||
(number? (position-offset start-pos-p))
|
||||
(- (position-offset end-pos-p)
|
||||
(position-offset start-pos-p)))))
|
||||
(syntax-parameterize
|
||||
([start-pos (make-rename-transformer #'start-pos-p)]
|
||||
[end-pos (make-rename-transformer #'end-pos-p)]
|
||||
[lexeme (make-rename-transformer #'lexeme-p)]
|
||||
[input-port (make-rename-transformer #'input-port-p)]
|
||||
[lexeme-srcloc (make-rename-transformer #'lexeme-srcloc-p)])
|
||||
action-stx)))))
|
||||
|
||||
(define-for-syntax (make-lexer-macro caller src-loc-style)
|
||||
(λ (stx)
|
||||
(syntax-case stx ()
|
||||
[(_ . RE+ACTS)
|
||||
(with-disappeared-uses
|
||||
(let ()
|
||||
(define spec/re-acts (syntax->list #'RE+ACTS))
|
||||
(for/and ([x (in-list spec/re-acts)])
|
||||
(syntax-case x ()
|
||||
[(RE ACT) #t]
|
||||
[else (raise-syntax-error caller "not a regular expression / action pair" stx x)]))
|
||||
(define eof-act (get-special-action spec/re-acts #'eof (case src-loc-style
|
||||
[(lexer-src-pos) #'(return-without-pos eof)]
|
||||
[(lexer-srcloc) #'(return-without-srcloc eof)]
|
||||
[else #'eof])))
|
||||
(define spec-act (get-special-action spec/re-acts #'special #'(void)))
|
||||
(define spec-comment-act (get-special-action spec/re-acts #'special-comment #'#f))
|
||||
(define ids (list #'special #'special-comment #'eof))
|
||||
(define re-acts (filter (λ (spec/re-act)
|
||||
(syntax-case spec/re-act ()
|
||||
[((special) act)
|
||||
(not (ormap
|
||||
(λ (x)
|
||||
(and (identifier? #'special)
|
||||
(module-or-top-identifier=? #'special x)))
|
||||
ids))]
|
||||
[_ #t])) spec/re-acts))
|
||||
(define names (map (λ (x) (datum->syntax #f (gensym))) re-acts))
|
||||
(define acts (map (λ (x) (stx-car (stx-cdr x))) re-acts))
|
||||
(define re-actnames (map (λ (re-act name) (list (stx-car re-act) name)) re-acts names))
|
||||
(when (null? spec/re-acts)
|
||||
(raise-syntax-error caller "expected at least one action" stx))
|
||||
(define-values (trans start action-names no-look) (build-lexer re-actnames))
|
||||
(when (vector-ref action-names start) ;; Start state is final
|
||||
(unless (and
|
||||
;; All the successor states are final
|
||||
(vector? (vector-ref trans start))
|
||||
(andmap (λ (x) (vector-ref action-names (vector-ref x 2)))
|
||||
(vector->list (vector-ref trans start)))
|
||||
;; Each character has a successor state
|
||||
(let loop ([check 0]
|
||||
[nexts (vector->list (vector-ref trans start))])
|
||||
(cond
|
||||
[(null? nexts) #f]
|
||||
[else
|
||||
(let ([next (car nexts)])
|
||||
(and (= (vector-ref next 0) check)
|
||||
(let ([next-check (vector-ref next 1)])
|
||||
(or (>= next-check max-char-num)
|
||||
(loop (add1 next-check) (cdr nexts))))))])))
|
||||
(eprintf "warning: lexer at ~a can accept the empty string\n" stx)))
|
||||
(with-syntax ([START-STATE-STX start]
|
||||
[TRANS-TABLE-STX trans]
|
||||
[NO-LOOKAHEAD-STX no-look]
|
||||
[(NAME ...) names]
|
||||
[(ACT ...) (map (λ (a) (wrap-action a src-loc-style)) acts)]
|
||||
[(ACT-NAME ...) (vector->list action-names)]
|
||||
[SPEC-ACT-STX (wrap-action spec-act src-loc-style)]
|
||||
[HAS-COMMENT-ACT?-STX (if (syntax-e spec-comment-act) #t #f)]
|
||||
[SPEC-COMMENT-ACT-STX (wrap-action spec-comment-act src-loc-style)]
|
||||
[EOF-ACT-STX (wrap-action eof-act src-loc-style)])
|
||||
(syntax/loc stx (let ([NAME ACT] ...)
|
||||
(let ([proc (lexer-body START-STATE-STX
|
||||
TRANS-TABLE-STX
|
||||
(vector ACT-NAME ...)
|
||||
NO-LOOKAHEAD-STX
|
||||
SPEC-ACT-STX
|
||||
HAS-COMMENT-ACT?-STX
|
||||
SPEC-COMMENT-ACT-STX
|
||||
EOF-ACT-STX)])
|
||||
;; reverse eta to get named procedures:
|
||||
(λ (port) (proc port))))))))])))
|
||||
|
||||
(define-syntax lexer (make-lexer-macro 'lexer #f))
|
||||
(define-syntax lexer-src-pos (make-lexer-macro 'lexer-src-pos 'lexer-src-pos))
|
||||
(define-syntax lexer-srcloc (make-lexer-macro 'lexer-srcloc 'lexer-srcloc))
|
||||
|
||||
(define-syntax (define-lex-abbrev stx)
|
||||
(syntax-case stx ()
|
||||
[(_ NAME RE) (identifier? #'NAME)
|
||||
(syntax/loc stx
|
||||
(define-syntax NAME
|
||||
(make-lex-abbrev (λ () (quote-syntax RE)))))]
|
||||
[_ (raise-syntax-error 'define-lex-abbrev "form should be (define-lex-abbrev name re)" stx)]))
|
||||
|
||||
(define-syntax (define-lex-abbrevs stx)
|
||||
(syntax-case stx ()
|
||||
[(_ . XS)
|
||||
(with-syntax ([(ABBREV ...) (map
|
||||
(λ (a)
|
||||
(syntax-case a ()
|
||||
[(NAME RE) (identifier? #'NAME)
|
||||
(syntax/loc a (define-lex-abbrev NAME RE))]
|
||||
[_ (raise-syntax-error
|
||||
#f
|
||||
"form should be (define-lex-abbrevs (name re) ...)"
|
||||
stx
|
||||
a)]))
|
||||
(syntax->list #'XS))])
|
||||
(syntax/loc stx (begin ABBREV ...)))]
|
||||
[_ (raise-syntax-error #f "form should be (define-lex-abbrevs (name re) ...)" stx)]))
|
||||
|
||||
(define-syntax (define-lex-trans stx)
|
||||
(syntax-case stx ()
|
||||
[(_ name-form body-form)
|
||||
(let-values (((name body)
|
||||
(normalize-definition #'(define-syntax name-form body-form) #'λ)))
|
||||
|
||||
#`(define-syntax #,name
|
||||
(let ((func #,body))
|
||||
(unless (procedure? func)
|
||||
(raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func))
|
||||
(unless (procedure-arity-includes? func 1)
|
||||
(raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func))
|
||||
(make-lex-trans func))))]
|
||||
[_
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"form should be (define-lex-trans name transformer)"
|
||||
stx)]))
|
||||
|
||||
|
||||
(define (get-next-state-helper char min max table)
|
||||
(cond
|
||||
[(>= min max) #f]
|
||||
[else
|
||||
(define try (quotient (+ min max) 2))
|
||||
(define el (vector-ref table try))
|
||||
(define r1 (vector-ref el 0))
|
||||
(define r2 (vector-ref el 1))
|
||||
(cond
|
||||
[(and (>= char r1) (<= char r2)) (vector-ref el 2)]
|
||||
[(< char r1) (get-next-state-helper char min try table)]
|
||||
[else (get-next-state-helper char (add1 try) max table)])]))
|
||||
|
||||
|
||||
|
||||
|
||||
(define (get-next-state char table)
|
||||
(and table (get-next-state-helper char 0 (vector-length table) table)))
|
||||
|
||||
(define ((lexer-body start-state trans-table actions no-lookahead special-action
|
||||
has-special-comment-action? special-comment-action eof-action) ip)
|
||||
(define (lexer ip)
|
||||
(define first-pos (get-position ip))
|
||||
(define first-char (peek-char-or-special ip 0))
|
||||
;(printf "(peek-char-or-special port 0) = ~e\n" first-char)
|
||||
(cond
|
||||
[(eof-object? first-char)
|
||||
(do-match ip first-pos eof-action (read-char-or-special ip))]
|
||||
[(special-comment? first-char)
|
||||
(read-char-or-special ip)
|
||||
(cond
|
||||
(has-special-comment-action?
|
||||
(do-match ip first-pos special-comment-action #f))
|
||||
(else (lexer ip)))]
|
||||
[(not (char? first-char))
|
||||
(do-match ip first-pos special-action (read-char-or-special ip))]
|
||||
[else
|
||||
(let lexer-loop (
|
||||
;; current-state
|
||||
[state start-state]
|
||||
;; the character to transition on
|
||||
[char first-char]
|
||||
;; action for the longest match seen thus far
|
||||
;; including a match at the current state
|
||||
[longest-match-action
|
||||
(vector-ref actions start-state)]
|
||||
;; how many bytes precede char
|
||||
[length-bytes 0]
|
||||
;; how many characters have been read
|
||||
;; including the one just read
|
||||
[length-chars 1]
|
||||
;; how many characters are in the longest match
|
||||
[longest-match-length 0])
|
||||
(define next-state
|
||||
(cond
|
||||
[(not (char? char)) #f]
|
||||
[else (get-next-state (char->integer char)
|
||||
(vector-ref trans-table state))]))
|
||||
(cond
|
||||
[(not next-state)
|
||||
(check-match ip first-pos longest-match-length
|
||||
length-chars longest-match-action)]
|
||||
[(vector-ref no-lookahead next-state)
|
||||
(define act (vector-ref actions next-state))
|
||||
(check-match ip
|
||||
first-pos
|
||||
(if act length-chars longest-match-length)
|
||||
length-chars
|
||||
(if act act longest-match-action))]
|
||||
[else
|
||||
(define act (vector-ref actions next-state))
|
||||
(define next-length-bytes (+ (char-utf-8-length char) length-bytes))
|
||||
(define next-char (peek-char-or-special ip next-length-bytes))
|
||||
#;(printf "(peek-char-or-special port ~e) = ~e\n"
|
||||
next-length-bytes next-char)
|
||||
(lexer-loop next-state
|
||||
next-char
|
||||
(if act
|
||||
act
|
||||
longest-match-action)
|
||||
next-length-bytes
|
||||
(add1 length-chars)
|
||||
(if act
|
||||
length-chars
|
||||
longest-match-length))]))]))
|
||||
(unless (input-port? ip)
|
||||
(raise-argument-error 'lexer "input-port?" 0 ip))
|
||||
(lexer ip))
|
||||
|
||||
(define (check-match lb first-pos longest-match-length length longest-match-action)
|
||||
(unless longest-match-action
|
||||
(define match (read-string length lb))
|
||||
(define end-pos (get-position lb))
|
||||
(raise-read-error
|
||||
(format "lexer: No match found in input starting with: ~v" match)
|
||||
(file-path)
|
||||
(position-line first-pos)
|
||||
(position-col first-pos)
|
||||
(position-offset first-pos)
|
||||
(- (position-offset end-pos) (position-offset first-pos))))
|
||||
(define match (read-string longest-match-length lb))
|
||||
;(printf "(read-string ~e port) = ~e\n" longest-match-length match)
|
||||
(do-match lb first-pos longest-match-action match))
|
||||
|
||||
(define file-path (make-parameter #f))
|
||||
(define lexer-file-path file-path)
|
||||
|
||||
(define (do-match ip first-pos action value)
|
||||
#;(printf "(action ~a ~a ~a ~a)\n"
|
||||
(position-offset first-pos) (position-offset (get-position ip)) value ip)
|
||||
(action first-pos (get-position ip) value ip))
|
||||
|
||||
(define (get-position ip)
|
||||
(define-values (line col off) (port-next-location ip))
|
||||
(make-position off line col))
|
||||
|
||||
(define-syntax (create-unicode-abbrevs stx)
|
||||
(syntax-case stx ()
|
||||
[(_ CTXT)
|
||||
(with-syntax ([(RANGES ...) (for/list ([range (in-list (list (force alphabetic-ranges)
|
||||
(force lower-case-ranges)
|
||||
(force upper-case-ranges)
|
||||
(force title-case-ranges)
|
||||
(force numeric-ranges)
|
||||
(force symbolic-ranges)
|
||||
(force punctuation-ranges)
|
||||
(force graphic-ranges)
|
||||
(force whitespace-ranges)
|
||||
(force blank-ranges)
|
||||
(force iso-control-ranges)))])
|
||||
`(union ,@(map (λ (x)
|
||||
`(char-range ,(integer->char (car x))
|
||||
,(integer->char (cdr x))))
|
||||
range)))]
|
||||
[(NAMES ...) (for/list ([sym (in-list '(alphabetic
|
||||
lower-case
|
||||
upper-case
|
||||
title-case
|
||||
numeric
|
||||
symbolic
|
||||
punctuation
|
||||
graphic
|
||||
whitespace
|
||||
blank
|
||||
iso-control))])
|
||||
(datum->syntax #'CTXT sym #f))])
|
||||
#'(define-lex-abbrevs (NAMES RANGES) ...))]))
|
||||
|
||||
(define-lex-abbrev any-char (char-complement (union)))
|
||||
(define-lex-abbrev any-string (intersection))
|
||||
(define-lex-abbrev nothing (union))
|
||||
(create-unicode-abbrevs #'here)
|
||||
|
||||
(define-lex-trans (char-set stx)
|
||||
(syntax-case stx ()
|
||||
[(_ STR)
|
||||
(string? (syntax-e #'STR))
|
||||
(with-syntax ([(CHAR ...) (string->list (syntax-e #'STR))])
|
||||
#'(union CHAR ...))]))
|
||||
|
||||
(define-syntax provide-lex-keyword
|
||||
(syntax-rules ()
|
||||
[(_ ID ...)
|
||||
(begin
|
||||
(define-syntax-parameter ID
|
||||
(make-set!-transformer
|
||||
(λ (stx)
|
||||
(raise-syntax-error
|
||||
'provide-lex-keyword
|
||||
(format "use of a lexer keyword (~a) is not in an appropriate lexer action" 'ID)
|
||||
stx))))
|
||||
...
|
||||
(provide ID ...))]))
|
||||
|
||||
(provide-lex-keyword start-pos end-pos lexeme lexeme-srcloc input-port return-without-pos return-without-srcloc)
|
@ -1,333 +0,0 @@
|
||||
#lang racket/base
|
||||
(require racket/list
|
||||
(prefix-in is: data/integer-set)
|
||||
"re.rkt"
|
||||
"util.rkt")
|
||||
|
||||
(provide build-dfa print-dfa (struct-out dfa))
|
||||
|
||||
(define e (build-epsilon))
|
||||
(define z (build-zero))
|
||||
|
||||
|
||||
;; Don't do anything with this one but extract the chars
|
||||
(define all-chars (->re `(char-complement (union)) (make-cache)))
|
||||
|
||||
;; get-char-groups : re bool -> (list-of char-setR?)
|
||||
;; Collects the char-setRs in r that could be used in
|
||||
;; taking the derivative of r.
|
||||
(define (get-char-groups r found-negation)
|
||||
(cond
|
||||
[(or (eq? r e) (eq? r z)) null]
|
||||
[(char-setR? r) (list r)]
|
||||
[(concatR? r)
|
||||
(if (re-nullable? (concatR-re1 r))
|
||||
(append (get-char-groups (concatR-re1 r) found-negation)
|
||||
(get-char-groups (concatR-re2 r) found-negation))
|
||||
(get-char-groups (concatR-re1 r) found-negation))]
|
||||
[(repeatR? r)
|
||||
(get-char-groups (repeatR-re r) found-negation)]
|
||||
[(orR? r)
|
||||
(apply append (map (λ (x) (get-char-groups x found-negation)) (orR-res r)))]
|
||||
[(andR? r)
|
||||
(apply append (map (λ (x) (get-char-groups x found-negation)) (andR-res r)))]
|
||||
[(negR? r)
|
||||
(if found-negation
|
||||
(get-char-groups (negR-re r) #t)
|
||||
(cons all-chars (get-char-groups (negR-re r) #t)))]))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(r1 (->re #\1 c))
|
||||
(r2 (->re #\2 c)))
|
||||
((get-char-groups e #f) null)
|
||||
((get-char-groups z #f) null)
|
||||
((get-char-groups r1 #f) (list r1))
|
||||
((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f)
|
||||
(list r1))
|
||||
((get-char-groups (->re `(concatenation ,e ,r2) c) #f)
|
||||
(list r2))
|
||||
((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f)
|
||||
(list r1 r2))
|
||||
((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f)
|
||||
(list r1))
|
||||
((get-char-groups
|
||||
(->re `(union (repetition 0 +inf.0 ,r1)
|
||||
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
||||
(list r1 r2 (->re "3" c) (->re "4" c)))
|
||||
((get-char-groups (->re `(complement ,r1) c) #f)
|
||||
(list all-chars r1))
|
||||
((get-char-groups
|
||||
(->re `(intersection (repetition 0 +inf.0 ,r1)
|
||||
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
||||
(list r1 r2 (->re "3" c) (->re "4" c)))
|
||||
)
|
||||
(define loc:member? is:member?)
|
||||
|
||||
;; deriveR : re char cache -> re
|
||||
(define (deriveR r c cache)
|
||||
(cond
|
||||
[(or (eq? r e) (eq? r z)) z]
|
||||
[(char-setR? r)
|
||||
(if (loc:member? c (char-setR-chars r)) e z)]
|
||||
[(concatR? r)
|
||||
(define r1 (concatR-re1 r))
|
||||
(define r2 (concatR-re2 r))
|
||||
(define d (build-concat (deriveR r1 c cache) r2 cache))
|
||||
(if (re-nullable? r1)
|
||||
(build-or (list d (deriveR r2 c cache)) cache)
|
||||
d)]
|
||||
[(repeatR? r)
|
||||
(build-concat (deriveR (repeatR-re r) c cache)
|
||||
(build-repeat (sub1 (repeatR-low r))
|
||||
(sub1 (repeatR-high r))
|
||||
(repeatR-re r) cache)
|
||||
cache)]
|
||||
[(orR? r)
|
||||
(build-or (map (λ (x) (deriveR x c cache))
|
||||
(orR-res r))
|
||||
cache)]
|
||||
[(andR? r)
|
||||
(build-and (map (λ (x) (deriveR x c cache))
|
||||
(andR-res r))
|
||||
cache)]
|
||||
[(negR? r)
|
||||
(build-neg (deriveR (negR-re r) c cache) cache)]))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(a (char->integer #\a))
|
||||
(b (char->integer #\b))
|
||||
(r1 (->re #\a c))
|
||||
(r2 (->re `(repetition 0 +inf.0 #\a) c))
|
||||
(r3 (->re `(repetition 0 +inf.0 ,r2) c))
|
||||
(r4 (->re `(concatenation #\a ,r2) c))
|
||||
(r5 (->re `(repetition 0 +inf.0 ,r4) c))
|
||||
(r6 (->re `(union ,r5 #\a) c))
|
||||
(r7 (->re `(concatenation ,r2 ,r2) c))
|
||||
(r8 (->re `(complement ,r4) c))
|
||||
(r9 (->re `(intersection ,r2 ,r4) c)))
|
||||
((deriveR e a c) z)
|
||||
((deriveR z a c) z)
|
||||
((deriveR r1 b c) z)
|
||||
((deriveR r1 a c) e)
|
||||
((deriveR r2 a c) r2)
|
||||
((deriveR r2 b c) z)
|
||||
((deriveR r3 a c) r2)
|
||||
((deriveR r3 b c) z)
|
||||
((deriveR r4 a c) r2)
|
||||
((deriveR r4 b c) z)
|
||||
((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c))
|
||||
((deriveR r5 b c) z)
|
||||
((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c))
|
||||
((deriveR r6 b c) z)
|
||||
((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c))
|
||||
((deriveR r7 b c) z)
|
||||
((deriveR r8 a c) (->re `(complement, r2) c))
|
||||
((deriveR r8 b c) (->re `(complement ,z) c))
|
||||
((deriveR r9 a c) r2)
|
||||
((deriveR r9 b c) z)
|
||||
((deriveR (->re `(repetition 1 2 "ab") c) a c)
|
||||
(->re `(concatenation "b" (repetition 0 1 "ab")) c)))
|
||||
|
||||
;; An re-action is (cons re action)
|
||||
|
||||
;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f)
|
||||
;; applies deriveR to all the re-actions's re parts.
|
||||
;; Returns #f if the derived state is equivalent to z.
|
||||
(define (derive r c cache)
|
||||
(define new-r (for/list ([ra (in-list r)])
|
||||
(cons (deriveR (car ra) c cache) (cdr ra))))
|
||||
(if (andmap (λ (x) (eq? z (car x))) new-r)
|
||||
#f
|
||||
new-r))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(r1 (->re #\1 c))
|
||||
(r2 (->re #\2 c)))
|
||||
((derive null (char->integer #\1) c) #f)
|
||||
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c)
|
||||
(list (cons e 1) (cons z 2)))
|
||||
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f))
|
||||
|
||||
|
||||
;; get-final : (list-of re-action) -> (union #f syntax-object)
|
||||
;; An re that accepts e represents a final state. Return the
|
||||
;; action from the first final state or #f if there is none.
|
||||
(define (get-final res)
|
||||
(cond
|
||||
[(null? res) #f]
|
||||
[(re-nullable? (caar res)) (cdar res)]
|
||||
[else (get-final (cdr res))]))
|
||||
|
||||
(test-block ((c->i char->integer)
|
||||
(c (make-cache))
|
||||
(r1 (->re #\a c))
|
||||
(r2 (->re #\b c))
|
||||
(b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5)))
|
||||
(a (list (cons r1 1) (cons r2 2))))
|
||||
((derive null (c->i #\a) c) #f)
|
||||
((derive a (c->i #\a) c) (list (cons e 1) (cons z 2)))
|
||||
((derive a (c->i #\b) c) (list (cons z 1) (cons e 2)))
|
||||
((derive a (c->i #\c) c) #f)
|
||||
((derive (list (cons (->re `(union " " "\n" ",") c) 1)
|
||||
(cons (->re `(concatenation (repetition 0 1 "-")
|
||||
(repetition 1 +inf.0 (char-range "0" "9"))) c) 2)
|
||||
(cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3)
|
||||
(cons (->re "[" c) 4)
|
||||
(cons (->re "]" c) 5)) (c->i #\[) c)
|
||||
b)
|
||||
((get-final a) #f)
|
||||
((get-final (list (cons e 1) (cons e 2))) 1)
|
||||
((get-final b) 4))
|
||||
|
||||
|
||||
;; A state is (make-state (list-of re-action) nat)
|
||||
(define-struct state (spec index))
|
||||
|
||||
;; get->key : re-action -> (list-of nat)
|
||||
;; states are indexed by the list of indexes of their res
|
||||
(define (get-key s)
|
||||
(map (λ (x) (re-index (car x))) s))
|
||||
|
||||
(define loc:partition is:partition)
|
||||
|
||||
;; compute-chars : (list-of state) -> (list-of char-set)
|
||||
;; Computed the sets of equivalent characters for taking the
|
||||
;; derivative of the car of st. Only one derivative per set need to be taken.
|
||||
(define (compute-chars st)
|
||||
(cond
|
||||
[(null? st) null]
|
||||
[else
|
||||
(loc:partition (map char-setR-chars
|
||||
(apply append (map (λ (x) (get-char-groups (car x) #f))
|
||||
(state-spec (car st))))))]))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(c->i char->integer)
|
||||
(r1 (->re `(char-range #\1 #\4) c))
|
||||
(r2 (->re `(char-range #\2 #\3) c)))
|
||||
((compute-chars null) null)
|
||||
((compute-chars (list (make-state null 1))) null)
|
||||
((map is:integer-set-contents
|
||||
(compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2))))
|
||||
(list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3)))
|
||||
(is:integer-set-contents (is:union (is:make-range (c->i #\1))
|
||||
(is:make-range (c->i #\4)))))))
|
||||
|
||||
|
||||
;; A dfa is (make-dfa int int
|
||||
;; (list-of (cons int syntax-object))
|
||||
;; (list-of (cons int (list-of (cons char-set int)))))
|
||||
;; Each transitions is a state and a list of chars with the state to transition to.
|
||||
;; The finals and transitions are sorted by state number, and duplicate free.
|
||||
(define-struct dfa (num-states start-state final-states/actions transitions) #:inspector (make-inspector))
|
||||
|
||||
(define loc:get-integer is:get-integer)
|
||||
|
||||
;; build-dfa : (list-of re-action) cache -> dfa
|
||||
(define (build-dfa rs cache)
|
||||
(let* ([transitions (make-hash)]
|
||||
[get-state-number (make-counter)]
|
||||
[start (make-state rs (get-state-number))])
|
||||
(cache (cons 'state (get-key rs)) (λ () start))
|
||||
(let loop ([old-states (list start)]
|
||||
[new-states null]
|
||||
[all-states (list start)]
|
||||
[cs (compute-chars (list start))])
|
||||
(cond
|
||||
[(and (null? old-states) (null? new-states))
|
||||
(make-dfa (get-state-number) (state-index start)
|
||||
(sort (for*/list ([state (in-list all-states)]
|
||||
[val (in-value (cons (state-index state) (get-final (state-spec state))))]
|
||||
#:when (cdr val))
|
||||
val)
|
||||
< #:key car)
|
||||
(sort (hash-map transitions
|
||||
(λ (state trans)
|
||||
(cons (state-index state)
|
||||
(for/list ([t (in-list trans)])
|
||||
(cons (car t)
|
||||
(state-index (cdr t)))))))
|
||||
< #:key car))]
|
||||
[(null? old-states)
|
||||
(loop new-states null all-states (compute-chars new-states))]
|
||||
[(null? cs)
|
||||
(loop (cdr old-states) new-states all-states (compute-chars (cdr old-states)))]
|
||||
[else
|
||||
(define state (car old-states))
|
||||
(define c (car cs))
|
||||
(define new-re (derive (state-spec state) (loc:get-integer c) cache))
|
||||
(cond
|
||||
[new-re
|
||||
(let* ([new-state? #f]
|
||||
[new-state (cache (cons 'state (get-key new-re))
|
||||
(λ ()
|
||||
(set! new-state? #t)
|
||||
(make-state new-re (get-state-number))))]
|
||||
[new-all-states (if new-state? (cons new-state all-states) all-states)])
|
||||
(hash-set! transitions
|
||||
state
|
||||
(cons (cons c new-state)
|
||||
(hash-ref transitions state
|
||||
(λ () null))))
|
||||
(cond
|
||||
[new-state?
|
||||
(loop old-states (cons new-state new-states) new-all-states (cdr cs))]
|
||||
[else
|
||||
(loop old-states new-states new-all-states (cdr cs))]))]
|
||||
[else (loop old-states new-states all-states (cdr cs))])]))))
|
||||
|
||||
(define (print-dfa x)
|
||||
(printf "number of states: ~a\n" (dfa-num-states x))
|
||||
(printf "start state: ~a\n" (dfa-start-state x))
|
||||
(printf "final states: ~a\n" (map car (dfa-final-states/actions x)))
|
||||
(for-each (λ (trans)
|
||||
(printf "state: ~a\n" (car trans))
|
||||
(for-each (λ (rule)
|
||||
(printf " -~a-> ~a\n"
|
||||
(is:integer-set-contents (car rule))
|
||||
(cdr rule)))
|
||||
(cdr trans)))
|
||||
(dfa-transitions x)))
|
||||
|
||||
(define (build-test-dfa rs)
|
||||
(define c (make-cache))
|
||||
(build-dfa (map (λ (x) (cons (->re x c) 'action)) rs) c))
|
||||
|
||||
|
||||
#|
|
||||
(define t1 (build-test-dfa null))
|
||||
(define t2 (build-test-dfa `(#\a)))
|
||||
(define t3 (build-test-dfa `(#\a #\b)))
|
||||
(define t4 (build-test-dfa `((repetition 0 +inf.0 #\a)
|
||||
(repetition 0 +inf.0 (concatenation #\a #\b)))))
|
||||
(define t5 (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\0 #\1)) #\1))))
|
||||
(define t6 (build-test-dfa `((repetition 0 +inf.0 (repetition 0 +inf.0 #\a))
|
||||
(repetition 0 +inf.0 (concatenation #\b (repetition 1 +inf.0 #\b))))))
|
||||
(define t7 (build-test-dfa `((concatenation (repetition 0 +inf.0 #\a) (repetition 0 +inf.0 #\b)
|
||||
(repetition 0 +inf.0 #\c) (repetition 0 +inf.0 #\d)
|
||||
(repetition 0 +inf.0 #\e)))))
|
||||
(define t8
|
||||
(build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b)
|
||||
(union #\a #\b) (union #\a #\b) (union #\a #\b)))))
|
||||
(define t9 (build-test-dfa `((concatenation "/*"
|
||||
(complement (concatenation (intersection) "*/" (intersection)))
|
||||
"*/"))))
|
||||
(define t11 (build-test-dfa `((complement "1"))))
|
||||
(define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b")
|
||||
(concatenation "a" (repetition 0 +inf.0 "b")))
|
||||
"ab"))))
|
||||
(define x (build-test-dfa `((union " " "\n" ",")
|
||||
(concatenation (repetition 0 1 "-") (repetition 1 +inf.0 (char-range "0" "9")))
|
||||
(concatenation "-" (repetition 1 +inf.0 "-"))
|
||||
"["
|
||||
"]")))
|
||||
(define y (build-test-dfa
|
||||
`((repetition 1 +inf.0
|
||||
(union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|")
|
||||
(concatenation "|" (repetition 0 +inf.0 (char-complement "|"))))))))
|
||||
(define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection))
|
||||
(complement (union (concatenation (intersection) "01")
|
||||
(repetition 1 +inf.0 "1")))))))
|
||||
(define t14 (build-test-dfa `((complement "1")))))
|
||||
|
||||
|#
|
@ -1,159 +0,0 @@
|
||||
#lang racket/base
|
||||
(require racket/base
|
||||
racket/match
|
||||
(prefix-in is: data/integer-set)
|
||||
racket/list
|
||||
syntax/stx
|
||||
"util.rkt"
|
||||
"stx.rkt"
|
||||
"re.rkt"
|
||||
"deriv.rkt")
|
||||
|
||||
(provide build-lexer)
|
||||
|
||||
(define-syntax time-label
|
||||
(syntax-rules ()
|
||||
((_ l e ...)
|
||||
(begin
|
||||
(printf "~a: " l)
|
||||
(time (begin e ...))))))
|
||||
|
||||
;; A table is either
|
||||
;; - (vector-of (union #f nat))
|
||||
;; - (vector-of (vector-of (vector nat nat nat)))
|
||||
|
||||
(define loc:integer-set-contents is:integer-set-contents)
|
||||
|
||||
;; dfa->1d-table : dfa -> (same as build-lexer)
|
||||
(define (dfa->1d-table dfa)
|
||||
(define state-table (make-vector (dfa-num-states dfa) #f))
|
||||
(define transition-cache (make-hasheq))
|
||||
(for ([trans (in-list (dfa-transitions dfa))])
|
||||
(match-define (cons from-state all-chars/to) trans)
|
||||
(define flat-all-chars/to
|
||||
(sort
|
||||
(for*/list ([chars/to (in-list all-chars/to)]
|
||||
[char-ranges (in-value (loc:integer-set-contents (car chars/to)))]
|
||||
[to (in-value (cdr chars/to))]
|
||||
[char-range (in-list char-ranges)])
|
||||
(define entry (vector (car char-range) (cdr char-range) to))
|
||||
(hash-ref transition-cache entry (λ ()
|
||||
(hash-set! transition-cache
|
||||
entry
|
||||
entry)
|
||||
entry)))
|
||||
< #:key (λ (v) (vector-ref v 0))))
|
||||
(vector-set! state-table from-state (list->vector flat-all-chars/to)))
|
||||
state-table)
|
||||
|
||||
|
||||
(define loc:foldr is:foldr)
|
||||
|
||||
;; dfa->2d-table : dfa -> (same as build-lexer)
|
||||
(define (dfa->2d-table dfa)
|
||||
;; char-table : (vector-of (union #f nat))
|
||||
;; The lexer table, one entry per state per char.
|
||||
;; Each entry specifies a state to transition to.
|
||||
;; #f indicates no transition
|
||||
(define char-table (make-vector (* 256 (dfa-num-states dfa)) #f))
|
||||
;; Fill the char-table vector
|
||||
(for* ([trans (in-list (dfa-transitions dfa))]
|
||||
[chars/to (in-list (cdr trans))])
|
||||
(define from-state (car trans))
|
||||
(define to-state (cdr chars/to))
|
||||
(loc:foldr (λ (char _)
|
||||
(vector-set! char-table
|
||||
(bitwise-ior
|
||||
char
|
||||
(arithmetic-shift from-state 8))
|
||||
to-state))
|
||||
(void)
|
||||
(car chars/to)))
|
||||
char-table)
|
||||
|
||||
|
||||
;; dfa->actions : dfa -> (vector-of (union #f syntax-object))
|
||||
;; The action for each final state, #f if the state isn't final
|
||||
(define (dfa->actions dfa)
|
||||
(define actions (make-vector (dfa-num-states dfa) #f))
|
||||
(for ([state/action (in-list (dfa-final-states/actions dfa))])
|
||||
(vector-set! actions (car state/action) (cdr state/action)))
|
||||
actions)
|
||||
|
||||
;; dfa->no-look : dfa -> (vector-of bool)
|
||||
;; For each state whether the lexer can ignore the next input.
|
||||
;; It can do this only if there are no transitions out of the
|
||||
;; current state.
|
||||
(define (dfa->no-look dfa)
|
||||
(define no-look (make-vector (dfa-num-states dfa) #t))
|
||||
(for ([trans (in-list (dfa-transitions dfa))])
|
||||
(vector-set! no-look (car trans) #f))
|
||||
no-look)
|
||||
|
||||
(test-block ((d1 (make-dfa 1 1 (list) (list)))
|
||||
(d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
|
||||
(list (cons 1 (list (cons (is:make-range 49 50) 1)
|
||||
(cons (is:make-range 51) 2)))
|
||||
(cons 2 (list (cons (is:make-range 49) 3))))))
|
||||
(d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
|
||||
(list (cons 1 (list (cons (is:make-range 100 200) 0)
|
||||
(cons (is:make-range 49 50) 1)
|
||||
(cons (is:make-range 51) 2)))
|
||||
(cons 2 (list (cons (is:make-range 49) 3)))))))
|
||||
((dfa->2d-table d1) (make-vector 256 #f))
|
||||
((dfa->2d-table d2) (let ((v (make-vector 1024 #f)))
|
||||
(vector-set! v 305 1)
|
||||
(vector-set! v 306 1)
|
||||
(vector-set! v 307 2)
|
||||
(vector-set! v 561 3)
|
||||
v))
|
||||
((dfa->1d-table d1) (make-vector 1 #f))
|
||||
((dfa->1d-table d2) #(#f
|
||||
#(#(49 50 1) #(51 51 2))
|
||||
#(#(49 49 3))
|
||||
#f))
|
||||
((dfa->1d-table d3) #(#f
|
||||
#(#(49 50 1) #(51 51 2) #(100 200 0))
|
||||
#(#(49 49 3))
|
||||
#f))
|
||||
((dfa->actions d1) (vector #f))
|
||||
((dfa->actions d2) (vector #f #f 2 3))
|
||||
((dfa->no-look d1) (vector #t))
|
||||
((dfa->no-look d2) (vector #t #f #f #t)))
|
||||
|
||||
;; build-lexer : syntax-object list ->
|
||||
;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object))
|
||||
;; each syntax object has the form (re action)
|
||||
(define (build-lexer sos)
|
||||
(define s-re-acts (for/list ([so (in-list sos)])
|
||||
(cons (parse (stx-car so))
|
||||
(stx-car (stx-cdr so)))))
|
||||
(define cache (make-cache))
|
||||
(define re-acts (for/list ([s-re-act (in-list s-re-acts)])
|
||||
(cons (->re (car s-re-act) cache)
|
||||
(cdr s-re-act))))
|
||||
(define dfa (build-dfa re-acts cache))
|
||||
(define table (dfa->1d-table dfa))
|
||||
;(print-dfa dfa)
|
||||
#;(let ((num-states (vector-length table))
|
||||
(num-vectors (length (filter values (vector->list table))))
|
||||
(num-entries (apply + (map
|
||||
(λ (x) (if x (vector-length x) 0))
|
||||
(vector->list table))))
|
||||
(num-different-entries
|
||||
(let ((ht (make-hash)))
|
||||
(for-each
|
||||
(λ (x)
|
||||
(when x
|
||||
(for-each
|
||||
(λ (y)
|
||||
(hash-set! ht y #t))
|
||||
(vector->list x))))
|
||||
(vector->list table))
|
||||
(length (hash-table-map ht cons)))))
|
||||
(printf "~a states, ~aKB\n"
|
||||
num-states
|
||||
(/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries
|
||||
(* 5 num-different-entries))) 1024)))
|
||||
(values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)))
|
||||
|
@ -1,384 +0,0 @@
|
||||
#lang racket/base
|
||||
(require racket/list
|
||||
racket/match
|
||||
(prefix-in is: data/integer-set)
|
||||
"util.rkt")
|
||||
|
||||
(provide ->re build-epsilon build-zero build-char-set build-concat
|
||||
build-repeat build-or build-and build-neg
|
||||
epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR?
|
||||
char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high
|
||||
orR-res andR-res negR-re
|
||||
re-nullable? re-index)
|
||||
|
||||
;; get-index : -> nat
|
||||
(define get-index (make-counter))
|
||||
|
||||
;; An re is either
|
||||
;; - (make-epsilonR bool nat)
|
||||
;; - (make-zeroR bool nat)
|
||||
;; - (make-char-setR bool nat char-set)
|
||||
;; - (make-concatR bool nat re re)
|
||||
;; - (make-repeatR bool nat nat nat-or-+inf.0 re)
|
||||
;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs
|
||||
;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs
|
||||
;; - (make-negR bool nat re)
|
||||
;;
|
||||
;; Every re must have an index field globally different from all
|
||||
;; other re index fields.
|
||||
(define-struct re (nullable? index) #:inspector (make-inspector))
|
||||
(define-struct (epsilonR re) () #:inspector (make-inspector))
|
||||
(define-struct (zeroR re) () #:inspector (make-inspector))
|
||||
(define-struct (char-setR re) (chars) #:inspector (make-inspector))
|
||||
(define-struct (concatR re) (re1 re2) #:inspector (make-inspector))
|
||||
(define-struct (repeatR re) (low high re) #:inspector (make-inspector))
|
||||
(define-struct (orR re) (res) #:inspector (make-inspector))
|
||||
(define-struct (andR re) (res) #:inspector (make-inspector))
|
||||
(define-struct (negR re) (re) #:inspector (make-inspector))
|
||||
|
||||
;; e : re
|
||||
;; The unique epsilon re
|
||||
(define e (make-epsilonR #t (get-index)))
|
||||
|
||||
;; z : re
|
||||
;; The unique zero re
|
||||
(define z (make-zeroR #f (get-index)))
|
||||
|
||||
|
||||
;; s-re = char constant
|
||||
;; | string constant (sequence of characters)
|
||||
;; | re a precompiled re
|
||||
;; | (repetition low high s-re) repetition between low and high times (inclusive)
|
||||
;; | (union s-re ...)
|
||||
;; | (intersection s-re ...)
|
||||
;; | (complement s-re)
|
||||
;; | (concatenation s-re ...)
|
||||
;; | (char-range rng rng) match any character between two (inclusive)
|
||||
;; | (char-complement char-set) match any character not listed
|
||||
;; low = natural-number
|
||||
;; high = natural-number or +inf.0
|
||||
;; rng = char or string with length 1
|
||||
;; (concatenation) (repetition 0 0 x), and "" match the empty string.
|
||||
;; (union) matches no strings.
|
||||
;; (intersection) matches any string.
|
||||
|
||||
(define loc:make-range is:make-range)
|
||||
(define loc:union is:union)
|
||||
(define loc:split is:split)
|
||||
(define loc:complement is:complement)
|
||||
|
||||
;; ->re : s-re cache -> re
|
||||
(define (->re exp cache)
|
||||
(match exp
|
||||
[(? char?) (build-char-set (loc:make-range (char->integer exp)) cache)]
|
||||
[(? string?) (->re `(concatenation ,@(string->list exp)) cache)]
|
||||
[(? re?) exp]
|
||||
[`(repetition ,low ,high ,r)
|
||||
(build-repeat low high (->re r cache) cache)]
|
||||
[`(union ,rs ...)
|
||||
(build-or (flatten-res (map (λ (r) (->re r cache)) rs)
|
||||
orR? orR-res loc:union cache)
|
||||
cache)]
|
||||
[`(intersection ,rs ...)
|
||||
(build-and (flatten-res (map (λ (r) (->re r cache)) rs)
|
||||
andR? andR-res (λ (a b)
|
||||
(let-values (((i _ __) (loc:split a b))) i))
|
||||
cache)
|
||||
cache)]
|
||||
[`(complement ,r) (build-neg (->re r cache) cache)]
|
||||
[`(concatenation ,rs ...)
|
||||
(foldr (λ (x y)
|
||||
(build-concat (->re x cache) y cache))
|
||||
e
|
||||
rs)]
|
||||
[`(char-range ,c1 ,c2)
|
||||
(let ([i1 (char->integer (if (string? c1) (string-ref c1 0) c1))]
|
||||
[i2 (char->integer (if (string? c2) (string-ref c2 0) c2))])
|
||||
(if (<= i1 i2)
|
||||
(build-char-set (loc:make-range i1 i2) cache)
|
||||
z))]
|
||||
[`(char-complement ,crs ...)
|
||||
(let ([cs (->re `(union ,@crs) cache)])
|
||||
(cond
|
||||
[(zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)]
|
||||
[(char-setR? cs)
|
||||
(build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)]
|
||||
[else z]))]))
|
||||
|
||||
|
||||
|
||||
|
||||
;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re))
|
||||
;; (char-set char-set -> char-set) cache -> (list-of re)
|
||||
;; Takes all the char-sets in l and combines them into one char-set using the combine function.
|
||||
;; Flattens out the values of type?. get-res only needs to function on things type? returns
|
||||
;; true for.
|
||||
(define (flatten-res l type? get-res combine cache)
|
||||
(let loop ([res l]
|
||||
;; chars : (union #f char-set)
|
||||
[chars #f]
|
||||
[no-chars null])
|
||||
(cond
|
||||
[(null? res)
|
||||
(if chars
|
||||
(cons (build-char-set chars cache) no-chars)
|
||||
no-chars)]
|
||||
[(char-setR? (car res))
|
||||
(if chars
|
||||
(loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars)
|
||||
(loop (cdr res) (char-setR-chars (car res)) no-chars))]
|
||||
[(type? (car res))
|
||||
(loop (append (get-res (car res)) (cdr res)) chars no-chars)]
|
||||
[else (loop (cdr res) chars (cons (car res) no-chars))])))
|
||||
|
||||
;; build-epsilon : -> re
|
||||
(define (build-epsilon) e)
|
||||
|
||||
(define (build-zero) z)
|
||||
|
||||
(define loc:integer-set-contents is:integer-set-contents)
|
||||
|
||||
;; build-char-set : char-set cache -> re
|
||||
(define (build-char-set cs cache)
|
||||
(define l (loc:integer-set-contents cs))
|
||||
(cond
|
||||
[(null? l) z]
|
||||
[else
|
||||
(cache l
|
||||
(λ ()
|
||||
(make-char-setR #f (get-index) cs)))]))
|
||||
|
||||
|
||||
|
||||
;; build-concat : re re cache -> re
|
||||
(define (build-concat r1 r2 cache)
|
||||
(cond
|
||||
[(eq? e r1) r2]
|
||||
[(eq? e r2) r1]
|
||||
[(or (eq? z r1) (eq? z r2)) z]
|
||||
[else
|
||||
(cache (cons 'concat (cons (re-index r1) (re-index r2)))
|
||||
(λ ()
|
||||
(make-concatR (and (re-nullable? r1) (re-nullable? r2))
|
||||
(get-index)
|
||||
r1 r2)))]))
|
||||
|
||||
;; build-repeat : nat nat-or-+inf.0 re cache -> re
|
||||
(define (build-repeat low high r cache)
|
||||
(let ([low (if (< low 0) 0 low)])
|
||||
(cond
|
||||
[(eq? r e) e]
|
||||
[(and (= 0 low) (or (= 0 high) (eq? z r))) e]
|
||||
[(and (= 1 low) (= 1 high)) r]
|
||||
[(and (repeatR? r)
|
||||
(eqv? (repeatR-high r) +inf.0)
|
||||
(or (= 0 (repeatR-low r))
|
||||
(= 1 (repeatR-low r))))
|
||||
(build-repeat (* low (repeatR-low r))
|
||||
+inf.0
|
||||
(repeatR-re r)
|
||||
cache)]
|
||||
[else
|
||||
(cache (cons 'repeat (cons low (cons high (re-index r))))
|
||||
(λ ()
|
||||
(make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r)))])))
|
||||
|
||||
|
||||
;; build-or : (list-of re) cache -> re
|
||||
(define (build-or rs cache)
|
||||
(let ([rs
|
||||
(filter
|
||||
(λ (x) (not (eq? x z)))
|
||||
(do-simple-equiv (replace rs orR? orR-res null) re-index))])
|
||||
(cond
|
||||
[(null? rs) z]
|
||||
[(null? (cdr rs)) (car rs)]
|
||||
[(memq (build-neg z cache) rs) (build-neg z cache)]
|
||||
[else
|
||||
(cache (cons 'or (map re-index rs))
|
||||
(λ ()
|
||||
(make-orR (ormap re-nullable? rs) (get-index) rs)))])))
|
||||
|
||||
;; build-and : (list-of re) cache -> re
|
||||
(define (build-and rs cache)
|
||||
(let ([rs (do-simple-equiv (replace rs andR? andR-res null) re-index)])
|
||||
(cond
|
||||
[(null? rs) (build-neg z cache)]
|
||||
[(null? (cdr rs)) (car rs)]
|
||||
[(memq z rs) z]
|
||||
[else
|
||||
(cache (cons 'and (map re-index rs))
|
||||
(λ ()
|
||||
(make-andR (andmap re-nullable? rs) (get-index) rs)))])))
|
||||
|
||||
;; build-neg : re cache -> re
|
||||
(define (build-neg r cache)
|
||||
(cond
|
||||
[(negR? r) (negR-re r)]
|
||||
[else
|
||||
(cache (cons 'neg (re-index r))
|
||||
(λ ()
|
||||
(make-negR (not (re-nullable? r)) (get-index) r)))]))
|
||||
|
||||
;; Tests for the build-functions
|
||||
(test-block ((c (make-cache))
|
||||
(isc is:integer-set-contents)
|
||||
(r1 (build-char-set (is:make-range (char->integer #\1)) c))
|
||||
(r2 (build-char-set (is:make-range (char->integer #\2)) c))
|
||||
(r3 (build-char-set (is:make-range (char->integer #\3)) c))
|
||||
(rc (build-concat r1 r2 c))
|
||||
(rc2 (build-concat r2 r1 c))
|
||||
(rr (build-repeat 0 +inf.0 rc c))
|
||||
(ro (build-or `(,rr ,rc ,rr) c))
|
||||
(ro2 (build-or `(,rc ,rr ,z) c))
|
||||
(ro3 (build-or `(,rr ,rc) c))
|
||||
(ro4 (build-or `(,(build-or `(,r1 ,r2) c)
|
||||
,(build-or `(,r2 ,r3) c)) c))
|
||||
(ra (build-and `(,rr ,rc ,rr) c))
|
||||
(ra2 (build-and `(,rc ,rr) c))
|
||||
(ra3 (build-and `(,rr ,rc) c))
|
||||
(ra4 (build-and `(,(build-and `(,r3 ,r2) c)
|
||||
,(build-and `(,r2 ,r1) c)) c))
|
||||
(rn (build-neg z c))
|
||||
(rn2 (build-neg r1 c)))
|
||||
|
||||
((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1))))
|
||||
((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2))))
|
||||
((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3))))
|
||||
((build-char-set (is:make-range) c) z)
|
||||
((build-concat r1 e c) r1)
|
||||
((build-concat e r1 c) r1)
|
||||
((build-concat r1 z c) z)
|
||||
((build-concat z r1 c) z)
|
||||
((build-concat r1 r2 c) rc)
|
||||
((concatR-re1 rc) r1)
|
||||
((concatR-re2 rc) r2)
|
||||
((concatR-re1 rc2) r2)
|
||||
((concatR-re2 rc2) r1)
|
||||
(ro ro2)
|
||||
(ro ro3)
|
||||
(ro4 (build-or `(,r1 ,r2 ,r3) c))
|
||||
((orR-res ro) (list rc rr))
|
||||
((orR-res ro4) (list r1 r2 r3))
|
||||
((build-or null c) z)
|
||||
((build-or `(,r1 ,z) c) r1)
|
||||
((build-repeat 0 +inf.0 rc c) rr)
|
||||
((build-repeat 0 1 z c) e)
|
||||
((build-repeat 0 0 rc c) e)
|
||||
((build-repeat 0 +inf.0 z c) e)
|
||||
((build-repeat -1 +inf.0 z c) e)
|
||||
((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c)
|
||||
(build-repeat 0 +inf.0 rc c))
|
||||
((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c)
|
||||
(build-repeat 0 +inf.0 rc c))
|
||||
((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c)
|
||||
(build-repeat 20 +inf.0 rc c))
|
||||
((build-repeat 1 1 rc c) rc)
|
||||
((repeatR-re rr) rc)
|
||||
(ra ra2)
|
||||
(ra ra3)
|
||||
(ra4 (build-and `(,r1 ,r2 ,r3) c))
|
||||
((andR-res ra) (list rc rr))
|
||||
((andR-res ra4) (list r1 r2 r3))
|
||||
((build-and null c) (build-neg z c))
|
||||
((build-and `(,r1 ,z) c) z)
|
||||
((build-and `(,r1) c) r1)
|
||||
((build-neg r1 c) (build-neg r1 c))
|
||||
((build-neg (build-neg r1 c) c) r1)
|
||||
((negR-re (build-neg r2 c)) r2)
|
||||
((re-nullable? r1) #f)
|
||||
((re-nullable? rc) #f)
|
||||
((re-nullable? (build-concat rr rr c)) #t)
|
||||
((re-nullable? rr) #t)
|
||||
((re-nullable? (build-repeat 0 1 rc c)) #t)
|
||||
((re-nullable? (build-repeat 1 2 rc c)) #f)
|
||||
((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t)
|
||||
((re-nullable? ro) #t)
|
||||
((re-nullable? (build-or `(,r1 ,r2) c)) #f)
|
||||
((re-nullable? (build-and `(,r1 ,e) c)) #f)
|
||||
((re-nullable? (build-and `(,rr ,e) c)) #t)
|
||||
((re-nullable? (build-neg r1 c)) #t)
|
||||
((re-nullable? (build-neg rr c)) #f))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(isc is:integer-set-contents)
|
||||
(r1 (->re #\1 c))
|
||||
(r2 (->re #\2 c))
|
||||
(r3-5 (->re '(char-range #\3 #\5) c))
|
||||
(r4 (build-or `(,r1 ,r2) c))
|
||||
(r5 (->re `(union ,r3-5 #\7) c))
|
||||
(r6 (->re #\6 c)))
|
||||
((flatten-res null orR? orR-res is:union c) null)
|
||||
((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c))))
|
||||
(isc (is:make-range (char->integer #\1))))
|
||||
((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c))))
|
||||
(isc (is:make-range (char->integer #\1) (char->integer #\2))))
|
||||
((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1)
|
||||
orR? orR-res is:union c))))
|
||||
(isc (is:make-range (char->integer #\1) (char->integer #\7))))
|
||||
((flatten-res `(,r1 ,r2) andR? andR-res (λ (x y)
|
||||
(let-values (((i _ __)
|
||||
(is:split x y)))
|
||||
i))
|
||||
c)
|
||||
(list z)))
|
||||
|
||||
;; ->re
|
||||
(test-block ((c (make-cache))
|
||||
(isc is:integer-set-contents)
|
||||
(r (->re #\a c))
|
||||
(rr (->re `(concatenation ,r ,r) c))
|
||||
(rrr (->re `(concatenation ,r ,rr) c))
|
||||
(rrr* (->re `(repetition 0 +inf.0 ,rrr) c)))
|
||||
((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a))))
|
||||
((->re "" c) e)
|
||||
((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c))
|
||||
((->re r c) r)
|
||||
((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c))
|
||||
((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c))
|
||||
((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c))
|
||||
((->re `(repetition 0 1 ,rrr*) c) rrr*)
|
||||
((->re `(union (union (char-range #\a #\c)
|
||||
(char-complement (char-range #\000 #\110)
|
||||
(char-range #\112 ,(integer->char max-char-num))))
|
||||
(union (repetition 0 +inf.0 #\2))) c)
|
||||
(build-or (list (build-char-set (is:union (is:make-range 73)
|
||||
(is:make-range 97 99))
|
||||
c)
|
||||
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
|
||||
c))
|
||||
((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c))
|
||||
((->re `(union ,r) c) r)
|
||||
((->re `(union) c) z)
|
||||
((->re `(intersection (intersection #\111
|
||||
(char-complement (char-range #\000 #\110)
|
||||
(char-range #\112 ,(integer->char max-char-num))))
|
||||
(intersection (repetition 0 +inf.0 #\2))) c)
|
||||
(build-and (list (build-char-set (is:make-range 73) c)
|
||||
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
|
||||
c))
|
||||
((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110)
|
||||
(char-range #\112 ,(integer->char max-char-num))))
|
||||
(intersection (repetition 0 +inf.0 #\2))) c)
|
||||
z)
|
||||
((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c))
|
||||
((->re `(intersection ,r) c) r)
|
||||
((->re `(intersection) c) (build-neg z c))
|
||||
((->re `(complement ,r) c) (build-neg r c))
|
||||
((->re `(concatenation) c) e)
|
||||
((->re `(concatenation ,rrr*) c) rrr*)
|
||||
(rr (build-concat r r c))
|
||||
((->re `(concatenation ,r ,rr ,rrr) c)
|
||||
(build-concat r (build-concat rr rrr c) c))
|
||||
((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49)))
|
||||
((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57)))
|
||||
((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49)))
|
||||
((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57)))
|
||||
((->re `(char-range "9" "1") c) z)
|
||||
((isc (char-setR-chars (->re `(char-complement) c)))
|
||||
(isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c))))
|
||||
((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c)))
|
||||
(isc (is:make-range 0)))
|
||||
)
|
||||
|
||||
|
@ -1,183 +0,0 @@
|
||||
#lang racket/base
|
||||
(require "util.rkt" syntax/id-table racket/syntax)
|
||||
(provide parse)
|
||||
|
||||
(define (bad-args stx num)
|
||||
(raise-syntax-error #f (format "incorrect number of arguments (should have ~a)" num) stx))
|
||||
|
||||
;; char-range-arg: syntax-object syntax-object -> nat
|
||||
;; If c contains is a character or length 1 string, returns the integer
|
||||
;; for the character. Otherwise raises a syntax error.
|
||||
(define (char-range-arg stx containing-stx)
|
||||
(define c (syntax-e stx))
|
||||
(cond
|
||||
[(char? c) (char->integer c)]
|
||||
[(and (string? c) (= (string-length c) 1))
|
||||
(char->integer (string-ref c 0))]
|
||||
[else
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"not a char or single-char string"
|
||||
containing-stx stx)]))
|
||||
|
||||
(module+ test
|
||||
(check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1))
|
||||
(check-equal? (char-range-arg #'"1" #'here) (char->integer #\1)))
|
||||
|
||||
(define orig-insp (variable-reference->module-declaration-inspector
|
||||
(#%variable-reference)))
|
||||
(define (disarm stx)
|
||||
(syntax-disarm stx orig-insp))
|
||||
|
||||
;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt)
|
||||
;; checks for errors and generates the plain s-exp form for s
|
||||
;; Expands lex-abbrevs and applies lex-trans.
|
||||
(define (parse stx)
|
||||
(let loop ([stx stx]
|
||||
;; seen-lex-abbrevs: id-table
|
||||
[seen-lex-abbrevs (make-immutable-free-id-table)])
|
||||
(let ([recur (λ (s)
|
||||
(loop (syntax-rearm s stx)
|
||||
seen-lex-abbrevs))]
|
||||
[recur/abbrev (λ (s id)
|
||||
(loop (syntax-rearm s stx)
|
||||
(free-id-table-set seen-lex-abbrevs id id)))])
|
||||
(syntax-case (disarm stx) (repetition union intersection complement concatenation
|
||||
char-range char-complement)
|
||||
[_
|
||||
(identifier? stx)
|
||||
(let ([expansion (syntax-local-value/record stx (λ (v) #t))])
|
||||
(unless (lex-abbrev? expansion)
|
||||
(raise-syntax-error 'regular-expression
|
||||
"undefined abbreviation"
|
||||
stx))
|
||||
;; Check for cycles.
|
||||
(when (free-id-table-ref seen-lex-abbrevs stx (λ () #f))
|
||||
(raise-syntax-error 'regular-expression
|
||||
"illegal lex-abbrev cycle detected"
|
||||
stx
|
||||
#f
|
||||
(list (free-id-table-ref seen-lex-abbrevs stx))))
|
||||
(recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))]
|
||||
[_
|
||||
(or (char? (syntax-e stx)) (string? (syntax-e stx)))
|
||||
(syntax-e stx)]
|
||||
[(repetition ARG ...)
|
||||
(let ([arg-list (syntax->list #'(ARG ...))])
|
||||
(unless (= 3 (length arg-list))
|
||||
(bad-args stx 2))
|
||||
(define low (syntax-e (car arg-list)))
|
||||
(define high (syntax-e (cadr arg-list)))
|
||||
(define re (caddr arg-list))
|
||||
(unless (and (number? low) (exact? low) (integer? low) (>= low 0))
|
||||
(raise-syntax-error #f "not a non-negative exact integer" stx (car arg-list)))
|
||||
(unless (or (and (number? high) (exact? high) (integer? high) (>= high 0))
|
||||
(eqv? high +inf.0))
|
||||
(raise-syntax-error #f "not a non-negative exact integer or +inf.0" stx (cadr arg-list)))
|
||||
(unless (<= low high)
|
||||
(raise-syntax-error #f "the first argument is not less than or equal to the second argument" stx))
|
||||
`(repetition ,low ,high ,(recur re)))]
|
||||
[(union RE ...)
|
||||
`(union ,@(map recur (syntax->list #'(RE ...))))]
|
||||
[(intersection RE ...)
|
||||
`(intersection ,@(map recur (syntax->list #'(RE ...))))]
|
||||
[(complement RE ...)
|
||||
(let ([re-list (syntax->list #'(RE ...))])
|
||||
(unless (= 1 (length re-list))
|
||||
(bad-args stx 1))
|
||||
`(complement ,(recur (car re-list))))]
|
||||
[(concatenation RE ...)
|
||||
`(concatenation ,@(map recur (syntax->list #'(RE ...))))]
|
||||
[(char-range ARG ...)
|
||||
(let ((arg-list (syntax->list #'(ARG ...))))
|
||||
(unless (= 2 (length arg-list))
|
||||
(bad-args stx 2))
|
||||
(let ([i1 (char-range-arg (car arg-list) stx)]
|
||||
[i2 (char-range-arg (cadr arg-list) stx)])
|
||||
(if (<= i1 i2)
|
||||
`(char-range ,(integer->char i1) ,(integer->char i2))
|
||||
(raise-syntax-error #f "the first argument does not precede or equal second argument" stx))))]
|
||||
[(char-complement ARG ...)
|
||||
(let ([arg-list (syntax->list #'(ARG ...))])
|
||||
(unless (= 1 (length arg-list))
|
||||
(bad-args stx 1))
|
||||
(define parsed (recur (car arg-list)))
|
||||
(unless (char-set? parsed)
|
||||
(raise-syntax-error #f "not a character set" stx (car arg-list)))
|
||||
`(char-complement ,parsed))]
|
||||
((OP form ...)
|
||||
(identifier? #'OP)
|
||||
(let* ([expansion (syntax-local-value/record #'OP (λ (v) #t))])
|
||||
(cond
|
||||
[(lex-trans? expansion)
|
||||
(recur ((lex-trans-f expansion) (disarm stx)))]
|
||||
[expansion
|
||||
(raise-syntax-error 'regular-expression "not a lex-trans" stx)]
|
||||
[else
|
||||
(raise-syntax-error 'regular-expression "undefined operator" stx)])))
|
||||
[_ (raise-syntax-error 'regular-expression "not a char, string, identifier, or (op args ...)" stx)]))))
|
||||
|
||||
|
||||
|
||||
;; char-set? : s-re -> bool
|
||||
;; A char-set is an re that matches only strings of length 1.
|
||||
;; char-set? is conservative.
|
||||
(define (char-set? s-re)
|
||||
(cond
|
||||
[(char? s-re)]
|
||||
[(string? s-re) (= (string-length s-re) 1)]
|
||||
[(list? s-re) (case (car s-re)
|
||||
[(union intersection) (andmap char-set? (cdr s-re))]
|
||||
[(char-range char-complement) #t]
|
||||
[(repetition) (and (= 1 (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re)))]
|
||||
[(concatenation) (and (= 2 (length s-re)) (char-set? (cadr s-re)))]
|
||||
(else #f))]
|
||||
[else #f]))
|
||||
|
||||
(module+ test
|
||||
(require rackunit)
|
||||
(check-equal? (char-set? #\a) #t)
|
||||
(check-equal? (char-set? "12") #f)
|
||||
(check-equal? (char-set? "1") #t)
|
||||
(check-equal? (char-set? '(repetition 1 2 #\1)) #f)
|
||||
(check-equal? (char-set? '(repetition 1 1 "12")) #f)
|
||||
(check-equal? (char-set? '(repetition 1 1 "1")) #t)
|
||||
(check-equal? (char-set? '(repetition 6 6 "1")) #f)
|
||||
(check-equal? (char-set? '(union "1" "2" "3")) #t)
|
||||
(check-equal? (char-set? '(union "1" "" "3")) #f)
|
||||
(check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t)
|
||||
(check-equal? (char-set? '(intersection "1" "")) #f)
|
||||
(check-equal? (char-set? '(complement "1")) #f)
|
||||
(check-equal? (char-set? '(concatenation "1" "2")) #f)
|
||||
(check-equal? (char-set? '(concatenation "" "2")) #f)
|
||||
(check-equal? (char-set? '(concatenation "1")) #t)
|
||||
(check-equal? (char-set? '(concatenation "12")) #f)
|
||||
(check-equal? (char-set? '(char-range #\1 #\2)) #t)
|
||||
(check-equal? (char-set? '(char-complement #\1)) #t))
|
||||
|
||||
;; yikes... these test cases all have the wrong arity, now.
|
||||
;; and by "now", I mean it's been broken since before we
|
||||
;; moved to git.
|
||||
(module+ test
|
||||
(check-equal? (parse #'#\a) #\a)
|
||||
(check-equal? (parse #'"1") "1")
|
||||
(check-equal? (parse #'(repetition 1 1 #\1))
|
||||
'(repetition 1 1 #\1))
|
||||
(check-equal? (parse #'(repetition 0 +inf.0 #\1)) '(repetition 0 +inf.0 #\1))
|
||||
(check-equal? (parse #'(union #\1 (union "2") (union)))
|
||||
'(union #\1 (union "2") (union)))
|
||||
(check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)))
|
||||
'(intersection #\1 (intersection "2") (intersection)))
|
||||
(check-equal? (parse #'(complement (union #\1 #\2)))
|
||||
'(complement (union #\1 #\2)))
|
||||
(check-equal? (parse #'(concatenation "1" "2" (concatenation)))
|
||||
'(concatenation "1" "2" (concatenation)))
|
||||
(check-equal? (parse #'(char-range "1" #\1)) '(char-range #\1 #\1))
|
||||
(check-equal? (parse #'(char-range #\1 "1")) '(char-range #\1 #\1))
|
||||
(check-equal? (parse #'(char-range "1" "3")) '(char-range #\1 #\3))
|
||||
(check-equal? (parse #'(char-complement (union "1" "2")))
|
||||
'(char-complement (union "1" "2")))
|
||||
(check-equal? (parse #'(char-complement (repetition 1 1 "1")))
|
||||
'(char-complement (repetition 1 1 "1")))
|
||||
(check-exn #rx"not a character set"
|
||||
(λ () (parse #'(char-complement (repetition 6 6 "1"))))))
|
@ -1,7 +0,0 @@
|
||||
#lang racket/base
|
||||
(provide make-terminals-def terminals-def-t terminals-def?
|
||||
make-e-terminals-def e-terminals-def-t e-terminals-def?)
|
||||
|
||||
;; The things needed at compile time to handle definition of tokens
|
||||
(define-struct terminals-def (t))
|
||||
(define-struct e-terminals-def (t))
|
@ -1,80 +0,0 @@
|
||||
#lang racket/base
|
||||
(require (for-syntax racket/base "token-syntax.rkt"))
|
||||
|
||||
;; Defining tokens
|
||||
|
||||
(provide define-tokens define-empty-tokens make-token token?
|
||||
(protect-out (rename-out [token-name real-token-name]))
|
||||
(protect-out (rename-out [token-value real-token-value]))
|
||||
(rename-out [token-name* token-name][token-value* token-value])
|
||||
(struct-out position)
|
||||
(struct-out position-token)
|
||||
(struct-out srcloc-token))
|
||||
|
||||
|
||||
;; A token is either
|
||||
;; - symbol
|
||||
;; - (make-token symbol any)
|
||||
(define-struct token (name value) #:inspector (make-inspector))
|
||||
|
||||
;; token-name*: token -> symbol
|
||||
(define (token-name* t)
|
||||
(cond
|
||||
[(symbol? t) t]
|
||||
[(token? t) (token-name t)]
|
||||
[else (raise-type-error 'token-name "symbol or struct:token" 0 t)]))
|
||||
|
||||
;; token-value*: token -> any
|
||||
(define (token-value* t)
|
||||
(cond
|
||||
[(symbol? t) #f]
|
||||
[(token? t) (token-value t)]
|
||||
[else (raise-type-error 'token-value "symbol or struct:token" 0 t)]))
|
||||
|
||||
(define-for-syntax (make-ctor-name n)
|
||||
(datum->syntax n
|
||||
(string->symbol (format "token-~a" (syntax-e n)))
|
||||
n
|
||||
n))
|
||||
|
||||
(define-for-syntax ((make-define-tokens empty?) stx)
|
||||
(syntax-case stx ()
|
||||
[(_ NAME (TOKEN ...))
|
||||
(andmap identifier? (syntax->list #'(TOKEN ...)))
|
||||
(with-syntax (((marked-token ...)
|
||||
(map values #;(make-syntax-introducer)
|
||||
(syntax->list #'(TOKEN ...)))))
|
||||
(quasisyntax/loc stx
|
||||
(begin
|
||||
(define-syntax NAME
|
||||
#,(if empty?
|
||||
#'(make-e-terminals-def (quote-syntax (marked-token ...)))
|
||||
#'(make-terminals-def (quote-syntax (marked-token ...)))))
|
||||
#,@(map
|
||||
(λ (n)
|
||||
(when (eq? (syntax-e n) 'error)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"Cannot define a token named error."
|
||||
stx))
|
||||
(if empty?
|
||||
#`(define (#,(make-ctor-name n))
|
||||
'#,n)
|
||||
#`(define (#,(make-ctor-name n) x)
|
||||
(make-token '#,n x))))
|
||||
(syntax->list #'(TOKEN ...)))
|
||||
#;(define marked-token #f) #;...)))]
|
||||
[(_ ...)
|
||||
(raise-syntax-error #f
|
||||
"must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))"
|
||||
stx)]))
|
||||
|
||||
(define-syntax define-tokens (make-define-tokens #f))
|
||||
(define-syntax define-empty-tokens (make-define-tokens #t))
|
||||
|
||||
(define-struct position (offset line col) #:inspector #f)
|
||||
(define-struct position-token (token start-pos end-pos) #:inspector #f)
|
||||
|
||||
(define-struct srcloc-token (token srcloc) #:inspector #f)
|
||||
|
||||
|
@ -1,250 +0,0 @@
|
||||
#lang racket/base
|
||||
;; Constructs to create and access grammars, the internal
|
||||
;; representation of the input to the parser generator.
|
||||
|
||||
(require racket/class
|
||||
(except-in racket/list remove-duplicates)
|
||||
"yacc-helper.rkt"
|
||||
racket/contract)
|
||||
|
||||
;; Each production has a unique index 0 <= index <= number of productions
|
||||
(define-struct prod (lhs rhs index prec action) #:inspector (make-inspector) #:mutable)
|
||||
|
||||
;; The dot-pos field is the index of the element in the rhs
|
||||
;; of prod that the dot immediately precedes.
|
||||
;; Thus 0 <= dot-pos <= (vector-length rhs).
|
||||
(define-struct item (prod dot-pos) #:inspector (make-inspector))
|
||||
|
||||
;; gram-sym = (union term? non-term?)
|
||||
;; Each term has a unique index 0 <= index < number of terms
|
||||
;; Each non-term has a unique index 0 <= index < number of non-terms
|
||||
(define-struct term (sym index prec) #:inspector (make-inspector) #:mutable)
|
||||
(define-struct non-term (sym index) #:inspector (make-inspector) #:mutable)
|
||||
|
||||
;; a precedence declaration.
|
||||
(define-struct prec (num assoc) #:inspector (make-inspector))
|
||||
|
||||
(provide/contract
|
||||
[make-item (prod? (or/c #f natural-number/c) . -> . item?)]
|
||||
[make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?)]
|
||||
[make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?)]
|
||||
[make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?)]
|
||||
[make-prod (non-term? (vectorof (or/c non-term? term?))
|
||||
(or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?)])
|
||||
|
||||
(provide
|
||||
;; Things that work on items
|
||||
start-item? item-prod item->string
|
||||
sym-at-dot move-dot-right item<? item-dot-pos
|
||||
|
||||
;; Things that operate on grammar symbols
|
||||
gram-sym-symbol gram-sym-index term-prec gram-sym->string
|
||||
non-term? term? non-term<? term<?
|
||||
term-list->bit-vector term-index non-term-index
|
||||
|
||||
;; Things that work on precs
|
||||
prec-num prec-assoc
|
||||
|
||||
grammar%
|
||||
|
||||
;; Things that work on productions
|
||||
prod-index prod-prec prod-rhs prod-lhs prod-action)
|
||||
|
||||
|
||||
;;---------------------- LR items --------------------------
|
||||
|
||||
;; item<?: LR-item * LR-item -> bool
|
||||
;; Lexicographic comparison on two items.
|
||||
(define (item<? i1 i2)
|
||||
(define p1 (prod-index (item-prod i1)))
|
||||
(define p2 (prod-index (item-prod i2)))
|
||||
(or (< p1 p2)
|
||||
(and (= p1 p2)
|
||||
(< (item-dot-pos i1) (item-dot-pos i2)))))
|
||||
|
||||
;; start-item?: LR-item -> bool
|
||||
;; The start production always has index 0
|
||||
(define (start-item? i)
|
||||
(zero? (non-term-index (prod-lhs (item-prod i)))))
|
||||
|
||||
|
||||
;; move-dot-right: LR-item -> LR-item | #f
|
||||
;; moves the dot to the right in the item, unless it is at its
|
||||
;; rightmost, then it returns false
|
||||
(define (move-dot-right i)
|
||||
(cond
|
||||
[(= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f]
|
||||
[else (make-item (item-prod i)
|
||||
(add1 (item-dot-pos i)))]))
|
||||
|
||||
;; sym-at-dot: LR-item -> gram-sym | #f
|
||||
;; returns the symbol after the dot in the item or #f if there is none
|
||||
(define (sym-at-dot i)
|
||||
(define dp (item-dot-pos i))
|
||||
(define rhs (prod-rhs (item-prod i)))
|
||||
(cond
|
||||
[(= dp (vector-length rhs)) #f]
|
||||
[else (vector-ref rhs dp)]))
|
||||
|
||||
|
||||
;; print-item: LR-item ->
|
||||
(define (item->string it)
|
||||
(define print-sym (λ (i)
|
||||
(let ((gs (vector-ref (prod-rhs (item-prod it)) i)))
|
||||
(cond
|
||||
((term? gs) (format "~a " (term-sym gs)))
|
||||
(else (format "~a " (non-term-sym gs)))))))
|
||||
(string-append
|
||||
(format "~a -> " (non-term-sym (prod-lhs (item-prod it))))
|
||||
(let loop ((i 0))
|
||||
(cond
|
||||
[(= i (vector-length (prod-rhs (item-prod it))))
|
||||
(if (= i (item-dot-pos it))
|
||||
". "
|
||||
"")]
|
||||
[(= i (item-dot-pos it))
|
||||
(string-append ". " (print-sym i) (loop (add1 i)))]
|
||||
[else (string-append (print-sym i) (loop (add1 i)))]))))
|
||||
|
||||
;; --------------------- Grammar Symbols --------------------------
|
||||
|
||||
(define (non-term<? nt1 nt2)
|
||||
(< (non-term-index nt1) (non-term-index nt2)))
|
||||
|
||||
(define (term<? nt1 nt2)
|
||||
(< (term-index nt1) (term-index nt2)))
|
||||
|
||||
(define (gram-sym-index gs)
|
||||
(if (term? gs)
|
||||
(term-index gs)
|
||||
(non-term-index gs)))
|
||||
|
||||
(define (gram-sym-symbol gs)
|
||||
(if (term? gs)
|
||||
(term-sym gs)
|
||||
(non-term-sym gs)))
|
||||
|
||||
(define (gram-sym->string gs)
|
||||
(symbol->string (gram-sym-symbol gs)))
|
||||
|
||||
;; term-list->bit-vector: term list -> int
|
||||
;; Creates a number where the nth bit is 1 if the term with index n is in
|
||||
;; the list, and whose nth bit is 0 otherwise
|
||||
(define (term-list->bit-vector terms)
|
||||
(if (null? terms)
|
||||
0
|
||||
(bitwise-ior (arithmetic-shift 1 (term-index (car terms)))
|
||||
(term-list->bit-vector (cdr terms)))))
|
||||
|
||||
|
||||
;; ------------------------- Grammar ------------------------------
|
||||
|
||||
(define grammar%
|
||||
(class object%
|
||||
(super-instantiate ())
|
||||
;; prods: production list list
|
||||
;; where there is one production list per non-term
|
||||
(init prods)
|
||||
;; init-prods: production list
|
||||
;; The productions parsing can start from
|
||||
;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable
|
||||
(init-field init-prods terms non-terms end-terms)
|
||||
|
||||
;; list of all productions
|
||||
(define all-prods (apply append prods))
|
||||
(define num-prods (length all-prods))
|
||||
(define num-terms (length terms))
|
||||
(define num-non-terms (length non-terms))
|
||||
|
||||
(for ([(nt count) (in-indexed non-terms)])
|
||||
(set-non-term-index! nt count))
|
||||
|
||||
(for ([(t count) (in-indexed terms)])
|
||||
(set-term-index! t count))
|
||||
|
||||
(for ([(prod count) (in-indexed all-prods)])
|
||||
(set-prod-index! prod count))
|
||||
|
||||
;; indexed by the index of the non-term - contains the list of productions for that non-term
|
||||
(define nt->prods
|
||||
(let ((v (make-vector (length prods) #f)))
|
||||
(for ([prods (in-list prods)])
|
||||
(vector-set! v (non-term-index (prod-lhs (car prods))) prods))
|
||||
v))
|
||||
|
||||
(define nullable-non-terms
|
||||
(nullable all-prods num-non-terms))
|
||||
|
||||
(define/public (get-num-terms) num-terms)
|
||||
(define/public (get-num-non-terms) num-non-terms)
|
||||
|
||||
(define/public (get-prods-for-non-term nt)
|
||||
(vector-ref nt->prods (non-term-index nt)))
|
||||
(define/public (get-prods) all-prods)
|
||||
(define/public (get-init-prods) init-prods)
|
||||
|
||||
(define/public (get-terms) terms)
|
||||
(define/public (get-non-terms) non-terms)
|
||||
|
||||
(define/public (get-num-prods) num-prods)
|
||||
(define/public (get-end-terms) end-terms)
|
||||
|
||||
(define/public (nullable-non-term? nt)
|
||||
(vector-ref nullable-non-terms (non-term-index nt)))
|
||||
|
||||
(define/public (nullable-after-dot? item)
|
||||
(define rhs (prod-rhs (item-prod item)))
|
||||
(define prod-length (vector-length rhs))
|
||||
(let loop ((i (item-dot-pos item)))
|
||||
(cond
|
||||
[(< i prod-length)
|
||||
(and (non-term? (vector-ref rhs i))
|
||||
(nullable-non-term? (vector-ref rhs i))
|
||||
(loop (add1 i)))]
|
||||
[(= i prod-length)])))
|
||||
|
||||
(define/public (nullable-non-term-thunk)
|
||||
(λ (nt) (nullable-non-term? nt)))
|
||||
(define/public (nullable-after-dot?-thunk)
|
||||
(λ (item) (nullable-after-dot? item)))))
|
||||
|
||||
|
||||
;; nullable: production list * int -> non-term set
|
||||
;; determines which non-terminals can derive epsilon
|
||||
(define (nullable prods num-nts)
|
||||
(define nullable (make-vector num-nts #f))
|
||||
(define added #f)
|
||||
|
||||
;; possible-nullable: producion list -> production list
|
||||
;; Removes all productions that have a terminal
|
||||
(define (possible-nullable prods)
|
||||
(for/list ([prod (in-list prods)]
|
||||
#:when (vector-andmap non-term? (prod-rhs prod)))
|
||||
prod))
|
||||
|
||||
;; set-nullables: production list -> production list
|
||||
;; makes one pass through the productions, adding the ones
|
||||
;; known to be nullable now to nullable and returning a list
|
||||
;; of productions that we don't know about yet.
|
||||
(define (set-nullables prods)
|
||||
(cond
|
||||
[(null? prods) null]
|
||||
[(vector-ref nullable (gram-sym-index (prod-lhs (car prods))))
|
||||
(set-nullables (cdr prods))]
|
||||
[(vector-andmap (λ (nt) (vector-ref nullable (gram-sym-index nt))) (prod-rhs (car prods)))
|
||||
(vector-set! nullable (gram-sym-index (prod-lhs (car prods))) #t)
|
||||
(set! added #t)
|
||||
(set-nullables (cdr prods))]
|
||||
[else (cons (car prods) (set-nullables (cdr prods)))]))
|
||||
(let loop ((P (possible-nullable prods)))
|
||||
(cond
|
||||
[(null? P) nullable]
|
||||
[else
|
||||
(set! added #f)
|
||||
(define new-P (set-nullables P))
|
||||
(if added
|
||||
(loop new-P)
|
||||
nullable)])))
|
||||
|
||||
|
||||
|
@ -1,53 +0,0 @@
|
||||
#lang racket/base
|
||||
(provide digraph)
|
||||
|
||||
(define (zero-thunk) 0)
|
||||
|
||||
;; digraph:
|
||||
;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b)
|
||||
;; -> ('a -> 'b)
|
||||
;; DeRemer and Pennello 1982
|
||||
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
|
||||
;; We use a hash-table to represent the result function 'a -> 'b set, so
|
||||
;; the values of type 'a must be comparable with eq?.
|
||||
|
||||
(define (digraph nodes edges f- union fail)
|
||||
(define results (make-hasheq))
|
||||
(define (f x) (hash-ref results x fail))
|
||||
;; Maps elements of 'a to integers.
|
||||
(define N (make-hasheq))
|
||||
(define (get-N x) (hash-ref N x zero-thunk))
|
||||
(define (set-N x d) (hash-set! N x d))
|
||||
(define stack null)
|
||||
(define (push x) (set! stack (cons x stack)))
|
||||
(define (pop) (begin0
|
||||
(car stack)
|
||||
(set! stack (cdr stack))))
|
||||
(define (depth) (length stack))
|
||||
|
||||
;; traverse: 'a ->
|
||||
(define (traverse x)
|
||||
(push x)
|
||||
(define d (depth))
|
||||
(set-N x d)
|
||||
(hash-set! results x (f- x))
|
||||
(for-each (λ (y)
|
||||
(when (= 0 (get-N y))
|
||||
(traverse y))
|
||||
(hash-set! results
|
||||
x
|
||||
(union (f x) (f y)))
|
||||
(set-N x (min (get-N x) (get-N y))))
|
||||
(edges x))
|
||||
(when (= d (get-N x))
|
||||
(let loop ([p (pop)])
|
||||
(set-N p +inf.0)
|
||||
(hash-set! results p (f x))
|
||||
(when (not (eq? x p))
|
||||
(loop (pop))))))
|
||||
;; Will map elements of 'a to 'b sets
|
||||
(for ([x (in-list nodes)]
|
||||
#:when (zero? (get-N x)))
|
||||
(traverse x))
|
||||
f)
|
||||
|
@ -1,297 +0,0 @@
|
||||
#lang racket/base
|
||||
(require "yacc-helper.rkt"
|
||||
"../private-lex/token-syntax.rkt"
|
||||
"grammar.rkt"
|
||||
racket/class
|
||||
racket/contract
|
||||
(for-template racket/base))
|
||||
|
||||
;; routines for parsing the input to the parser generator and producing a
|
||||
;; grammar (See grammar.rkt)
|
||||
|
||||
(define (is-a-grammar%? x) (is-a? x grammar%))
|
||||
(provide/contract
|
||||
[parse-input ((listof identifier?) (listof identifier?) (listof identifier?)
|
||||
(or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?)]
|
||||
[get-term-list ((listof identifier?) . -> . (listof identifier?))])
|
||||
|
||||
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
|
||||
|
||||
;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx)))
|
||||
(define (get-args i rhs src-pos term-defs)
|
||||
(define empty-table (make-hasheq))
|
||||
(define biggest-pos #f)
|
||||
(hash-set! empty-table 'error #t)
|
||||
(for* ([td (in-list term-defs)]
|
||||
[v (in-value (syntax-local-value td))]
|
||||
#:when (e-terminals-def? v)
|
||||
[s (in-list (syntax->list (e-terminals-def-t v)))])
|
||||
(hash-set! empty-table (syntax->datum s) #t))
|
||||
(define args
|
||||
(let get-args ([i i][rhs rhs])
|
||||
(cond
|
||||
[(null? rhs) null]
|
||||
[else
|
||||
(define b (car rhs))
|
||||
(define name (if (hash-ref empty-table (syntax->datum (car rhs)) #f)
|
||||
(gensym)
|
||||
(string->symbol (format "$~a" i))))
|
||||
(cond
|
||||
[src-pos
|
||||
(define start-pos-id
|
||||
(datum->syntax b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property))
|
||||
(define end-pos-id
|
||||
(datum->syntax b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property))
|
||||
(set! biggest-pos (cons start-pos-id end-pos-id))
|
||||
(list* (datum->syntax b name b stx-for-original-property)
|
||||
start-pos-id
|
||||
end-pos-id
|
||||
(get-args (add1 i) (cdr rhs)))]
|
||||
[else
|
||||
(list* (datum->syntax b name b stx-for-original-property)
|
||||
(get-args (add1 i) (cdr rhs)))])])))
|
||||
(values args biggest-pos))
|
||||
|
||||
;; Given the list of terminal symbols and the precedence/associativity definitions,
|
||||
;; builds terminal structures (See grammar.rkt)
|
||||
;; build-terms: symbol list * symbol list list -> term list
|
||||
(define (build-terms term-list precs)
|
||||
(define counter 0)
|
||||
;;(term-list (cons (gensym) term-list))
|
||||
;; Will map a terminal symbol to its precedence/associativity
|
||||
(define prec-table (make-hasheq))
|
||||
|
||||
;; Fill the prec table
|
||||
(for ([p-decl (in-list precs)])
|
||||
(define assoc (car p-decl))
|
||||
(for ([term-sym (in-list (cdr p-decl))])
|
||||
(hash-set! prec-table term-sym (make-prec counter assoc)))
|
||||
(set! counter (add1 counter)))
|
||||
|
||||
;; Build the terminal structures
|
||||
(for/list ([term-sym (in-list term-list)])
|
||||
(make-term term-sym
|
||||
#f
|
||||
(hash-ref prec-table term-sym (λ () #f)))))
|
||||
|
||||
;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt)
|
||||
;; get-terms-from-def: identifier? -> (listof identifier?)
|
||||
(define (get-terms-from-def term-syn)
|
||||
(define t (syntax-local-value term-syn #f))
|
||||
(cond
|
||||
[(terminals-def? t) (syntax->list (terminals-def-t t))]
|
||||
[(e-terminals-def? t) (syntax->list (e-terminals-def-t t))]
|
||||
[else
|
||||
(raise-syntax-error
|
||||
'parser-tokens
|
||||
"undefined token group"
|
||||
term-syn)]))
|
||||
|
||||
(define (get-term-list term-group-names)
|
||||
(remove-duplicates
|
||||
(cons (datum->syntax #f 'error)
|
||||
(apply append (map get-terms-from-def term-group-names)))))
|
||||
|
||||
(define (parse-input term-defs start ends prec-decls prods src-pos)
|
||||
(define start-syms (map syntax-e start))
|
||||
(define list-of-terms (map syntax-e (get-term-list term-defs)))
|
||||
(define end-terms
|
||||
(for/list ([end (in-list ends)])
|
||||
(unless (memq (syntax-e end) list-of-terms)
|
||||
(raise-syntax-error
|
||||
'parser-end-tokens
|
||||
(format "End token ~a not defined as a token"
|
||||
(syntax-e end))
|
||||
end))
|
||||
(syntax-e end)))
|
||||
;; Get the list of terminals out of input-terms
|
||||
(define list-of-non-terms
|
||||
(syntax-case prods ()
|
||||
[((NON-TERM PRODUCTION ...) ...)
|
||||
(begin
|
||||
(for ([nts (in-list (syntax->list #'(NON-TERM ...)))]
|
||||
#:when (memq (syntax->datum nts) list-of-terms))
|
||||
(raise-syntax-error
|
||||
'parser-non-terminals
|
||||
(format "~a used as both token and non-terminal" (syntax->datum nts))
|
||||
nts))
|
||||
(let ([dup (duplicate-list? (syntax->datum #'(NON-TERM ...)))])
|
||||
(when dup
|
||||
(raise-syntax-error
|
||||
'parser-non-terminals
|
||||
(format "non-terminal ~a defined multiple times" dup)
|
||||
prods)))
|
||||
(syntax->datum #'(NON-TERM ...)))]
|
||||
[_ (raise-syntax-error
|
||||
'parser-grammar
|
||||
"Grammar must be of the form (grammar (non-terminal productions ...) ...)"
|
||||
prods)]))
|
||||
;; Check the precedence declarations for errors and turn them into data
|
||||
(define precs
|
||||
(syntax-case prec-decls ()
|
||||
[((TYPE TERM ...) ...)
|
||||
(let ([p-terms (syntax->datum #'(TERM ... ...))])
|
||||
(cond
|
||||
[(duplicate-list? p-terms) =>
|
||||
(λ (d)
|
||||
(raise-syntax-error
|
||||
'parser-precedences
|
||||
(format "duplicate precedence declaration for token ~a" d)
|
||||
prec-decls))]
|
||||
[else (for ([t (in-list (syntax->list #'(TERM ... ...)))]
|
||||
#:when (not (memq (syntax->datum t) list-of-terms)))
|
||||
(raise-syntax-error
|
||||
'parser-precedences
|
||||
(format "Precedence declared for non-token ~a" (syntax->datum t))
|
||||
t))
|
||||
(for ([type (in-list (syntax->list #'(TYPE ...)))]
|
||||
#:unless (memq (syntax->datum type) `(left right nonassoc)))
|
||||
(raise-syntax-error
|
||||
'parser-precedences
|
||||
"Associativity must be left, right or nonassoc"
|
||||
type))
|
||||
(syntax->datum prec-decls)]))]
|
||||
[#f null]
|
||||
[_ (raise-syntax-error
|
||||
'parser-precedences
|
||||
"Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc"
|
||||
prec-decls)]))
|
||||
|
||||
(define terms (build-terms list-of-terms precs))
|
||||
(define non-terms (map (λ (non-term) (make-non-term non-term #f))
|
||||
list-of-non-terms))
|
||||
(define term-table (make-hasheq))
|
||||
(define non-term-table (make-hasheq))
|
||||
|
||||
(for ([t (in-list terms)])
|
||||
(hash-set! term-table (gram-sym-symbol t) t))
|
||||
|
||||
(for ([nt (in-list non-terms)])
|
||||
(hash-set! non-term-table (gram-sym-symbol nt) nt))
|
||||
|
||||
;; parse-prod: syntax-object -> gram-sym vector
|
||||
(define (parse-prod prod-so)
|
||||
(syntax-case prod-so ()
|
||||
[(PROD-RHS-SYM ...)
|
||||
(andmap identifier? (syntax->list prod-so))
|
||||
(begin
|
||||
(for ([t (in-list (syntax->list prod-so))]
|
||||
#:when (memq (syntax->datum t) end-terms))
|
||||
(raise-syntax-error
|
||||
'parser-production-rhs
|
||||
(format "~a is an end token and cannot be used in a production" (syntax->datum t))
|
||||
t))
|
||||
(for/vector ([s (in-list (syntax->list prod-so))])
|
||||
(cond
|
||||
[(hash-ref term-table (syntax->datum s) #f)]
|
||||
[(hash-ref non-term-table (syntax->datum s) #f)]
|
||||
[else (raise-syntax-error
|
||||
'parser-production-rhs
|
||||
(format "~a is not declared as a terminal or non-terminal" (syntax->datum s))
|
||||
s)])))]
|
||||
[_ (raise-syntax-error
|
||||
'parser-production-rhs
|
||||
"production right-hand-side must have form (symbol ...)"
|
||||
prod-so)]))
|
||||
|
||||
;; parse-action: syntax-object * syntax-object -> syntax-object
|
||||
(define (parse-action rhs act-in)
|
||||
(define-values (args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs))
|
||||
(define act
|
||||
(if biggest
|
||||
(with-syntax ([(CAR-BIGGEST . CDR-BIGGEST) biggest]
|
||||
[$N-START-POS (datum->syntax (car biggest) '$n-start-pos)]
|
||||
[$N-END-POS (datum->syntax (cdr biggest) '$n-end-pos)]
|
||||
[ACT-IN act-in])
|
||||
#'(let ([$N-START-POS CAR-BIGGEST]
|
||||
[$N-END-POS CDR-BIGGEST])
|
||||
ACT-IN))
|
||||
act-in))
|
||||
(with-syntax ([ARGS args][ACT act])
|
||||
(syntax/loc #'ACT (λ ARGS ACT))))
|
||||
|
||||
;; parse-prod+action: non-term * syntax-object -> production
|
||||
(define (parse-prod+action nt prod-so)
|
||||
(syntax-case prod-so ()
|
||||
[(PROD-RHS ACTION)
|
||||
(let ([p (parse-prod #'PROD-RHS)])
|
||||
(make-prod
|
||||
nt
|
||||
p
|
||||
#f
|
||||
(let loop ([i (sub1 (vector-length p))])
|
||||
(if (>= i 0)
|
||||
(let ([gs (vector-ref p i)])
|
||||
(if (term? gs)
|
||||
(term-prec gs)
|
||||
(loop (sub1 i))))
|
||||
#f))
|
||||
(parse-action #'PROD-RHS #'ACTION)))]
|
||||
[(PROD-RHS (PREC TERM) ACTION)
|
||||
(identifier? #'TERM)
|
||||
(let ([p (parse-prod #'PROD-RHS)])
|
||||
(make-prod
|
||||
nt
|
||||
p
|
||||
#f
|
||||
(term-prec
|
||||
(cond
|
||||
[(hash-ref term-table (syntax->datum #'TERM) #f)]
|
||||
[else (raise-syntax-error
|
||||
'parser-production-rhs
|
||||
(format
|
||||
"unrecognized terminal ~a in precedence declaration"
|
||||
(syntax->datum #'TERM))
|
||||
#'TERM)]))
|
||||
(parse-action #'PROD-RHS #'ACTION)))]
|
||||
[_ (raise-syntax-error
|
||||
'parser-production-rhs
|
||||
"production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]"
|
||||
prod-so)]))
|
||||
|
||||
;; parse-prod-for-nt: syntax-object -> production list
|
||||
(define (parse-prods-for-nt prods-so)
|
||||
(syntax-case prods-so ()
|
||||
[(NT PRODUCTIONS ...)
|
||||
(positive? (length (syntax->list #'(PRODUCTIONS ...))))
|
||||
(let ([nt (hash-ref non-term-table (syntax->datum #'NT))])
|
||||
(map (λ (p) (parse-prod+action nt p)) (syntax->list #'(PRODUCTIONS ...))))]
|
||||
[_ (raise-syntax-error
|
||||
'parser-productions
|
||||
"A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side"
|
||||
prods-so)]))
|
||||
|
||||
(for ([sstx (in-list start)]
|
||||
[ssym (in-list start-syms)]
|
||||
#:unless (memq ssym list-of-non-terms))
|
||||
(raise-syntax-error
|
||||
'parser-start
|
||||
(format "Start symbol ~a not defined as a non-terminal" ssym)
|
||||
sstx))
|
||||
|
||||
(define starts (map (λ (x) (make-non-term (gensym) #f)) start-syms))
|
||||
(define end-non-terms (map (λ (x) (make-non-term (gensym) #f)) start-syms))
|
||||
(define parsed-prods (map parse-prods-for-nt (syntax->list prods)))
|
||||
(define start-prods (for/list ([start (in-list starts)]
|
||||
[end-non-term (in-list end-non-terms)])
|
||||
(list (make-prod start (vector end-non-term) #f #f #'values))))
|
||||
(define new-prods
|
||||
(append start-prods
|
||||
(for/list ([end-nt (in-list end-non-terms)]
|
||||
[start-sym (in-list start-syms)])
|
||||
(for/list ([end (in-list end-terms)])
|
||||
(make-prod end-nt
|
||||
(vector
|
||||
(hash-ref non-term-table start-sym)
|
||||
(hash-ref term-table end))
|
||||
#f
|
||||
#f
|
||||
#'values)))
|
||||
parsed-prods))
|
||||
|
||||
(make-object grammar%
|
||||
new-prods
|
||||
(map car start-prods)
|
||||
terms
|
||||
(append starts (append end-non-terms non-terms))
|
||||
(map (λ (term-name) (hash-ref term-table term-name)) end-terms)))
|
@ -1,252 +0,0 @@
|
||||
#lang racket/base
|
||||
(require "lr0.rkt"
|
||||
"grammar.rkt"
|
||||
racket/list
|
||||
racket/class)
|
||||
|
||||
;; Compute LALR lookaheads from DeRemer and Pennello 1982
|
||||
|
||||
(provide compute-LA)
|
||||
|
||||
;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set)
|
||||
;; computes for each state, non-term transition pair, the terminals
|
||||
;; which can transition out of the resulting state
|
||||
;; output term set is represented in bit-vector form
|
||||
(define ((compute-DR a g) tk)
|
||||
(define r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))
|
||||
(term-list->bit-vector
|
||||
(filter (λ (term) (send a run-automaton r term)) (send g get-terms))))
|
||||
|
||||
;; compute-reads:
|
||||
;; LR0-automaton * grammar -> (trans-key -> trans-key list)
|
||||
(define (compute-reads a g)
|
||||
(define nullable-non-terms (filter (λ (nt) (send g nullable-non-term? nt)) (send g get-non-terms)))
|
||||
(λ (tk)
|
||||
(define r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))
|
||||
(for/list ([non-term (in-list nullable-non-terms)]
|
||||
#:when (send a run-automaton r non-term))
|
||||
(make-trans-key r non-term))))
|
||||
|
||||
;; compute-read: LR0-automaton * grammar -> (trans-key -> term set)
|
||||
;; output term set is represented in bit-vector form
|
||||
(define (compute-read a g)
|
||||
(define dr (compute-DR a g))
|
||||
(define reads (compute-reads a g))
|
||||
(digraph-tk->terml (send a get-mapped-non-term-keys)
|
||||
reads
|
||||
dr
|
||||
(send a get-num-states)))
|
||||
;; returns the list of all k such that state k transitions to state start on the
|
||||
;; transitions in rhs (in order)
|
||||
(define (run-lr0-backward a rhs dot-pos start num-states)
|
||||
(let loop ([states (list start)]
|
||||
[i (sub1 dot-pos)])
|
||||
(cond
|
||||
[(< i 0) states]
|
||||
[else (loop (send a run-automaton-back states (vector-ref rhs i))
|
||||
(sub1 i))])))
|
||||
|
||||
;; prod->items-for-include: grammar * prod * non-term -> lr0-item list
|
||||
;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma)
|
||||
;; and gamma =>* epsilon
|
||||
(define (prod->items-for-include g prod nt)
|
||||
(define rhs (prod-rhs prod))
|
||||
(define rhs-l (vector-length rhs))
|
||||
(append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l))))
|
||||
(list (make-item prod (sub1 rhs-l)))
|
||||
null)
|
||||
(let loop ([i (sub1 rhs-l)])
|
||||
(cond
|
||||
[(and (> i 0)
|
||||
(non-term? (vector-ref rhs i))
|
||||
(send g nullable-non-term? (vector-ref rhs i)))
|
||||
(if (eq? nt (vector-ref rhs (sub1 i)))
|
||||
(cons (make-item prod (sub1 i))
|
||||
(loop (sub1 i)))
|
||||
(loop (sub1 i)))]
|
||||
[else null]))))
|
||||
|
||||
;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list
|
||||
;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list
|
||||
;; and gamma =>* epsilon
|
||||
(define (prod-list->items-for-include g prod-list nt)
|
||||
(apply append (map (λ (prod) (prod->items-for-include g prod nt)) prod-list)))
|
||||
|
||||
;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list)
|
||||
(define (compute-includes a g)
|
||||
(define num-states (send a get-num-states))
|
||||
(define items-for-input-nt (make-vector (send g get-num-non-terms) null))
|
||||
(for ([input-nt (in-list (send g get-non-terms))])
|
||||
(vector-set! items-for-input-nt (non-term-index input-nt)
|
||||
(prod-list->items-for-include g (send g get-prods) input-nt)))
|
||||
(λ (tk)
|
||||
(define goal-state (trans-key-st tk))
|
||||
(define non-term (trans-key-gs tk))
|
||||
(define items (vector-ref items-for-input-nt (non-term-index non-term)))
|
||||
(trans-key-list-remove-dups
|
||||
(apply append
|
||||
(for/list ([item (in-list items)])
|
||||
(define prod (item-prod item))
|
||||
(define rhs (prod-rhs prod))
|
||||
(define lhs (prod-lhs prod))
|
||||
(map (λ (state) (make-trans-key state lhs))
|
||||
(run-lr0-backward a
|
||||
rhs
|
||||
(item-dot-pos item)
|
||||
goal-state
|
||||
num-states)))))))
|
||||
|
||||
;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list)
|
||||
(define (compute-lookback a g)
|
||||
(define num-states (send a get-num-states))
|
||||
(λ (state prod)
|
||||
(map (λ (k) (make-trans-key k (prod-lhs prod)))
|
||||
(run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states))))
|
||||
|
||||
;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set)
|
||||
;; output term set is represented in bit-vector form
|
||||
(define (compute-follow a g includes)
|
||||
(define read (compute-read a g))
|
||||
(digraph-tk->terml (send a get-mapped-non-term-keys)
|
||||
includes
|
||||
read
|
||||
(send a get-num-states)))
|
||||
|
||||
;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set
|
||||
;; output term set is represented in bit-vector form
|
||||
(define (compute-LA a g)
|
||||
(define includes (compute-includes a g))
|
||||
(define lookback (compute-lookback a g))
|
||||
(define follow (compute-follow a g includes))
|
||||
(λ (k p)
|
||||
(define l (lookback k p))
|
||||
(define f (map follow l))
|
||||
(apply bitwise-ior (cons 0 f))))
|
||||
|
||||
|
||||
(define (print-DR dr a g)
|
||||
(print-input-st-sym dr "DR" a g print-output-terms))
|
||||
(define (print-Read Read a g)
|
||||
(print-input-st-sym Read "Read" a g print-output-terms))
|
||||
(define (print-includes i a g)
|
||||
(print-input-st-sym i "includes" a g print-output-st-nt))
|
||||
(define (print-lookback l a g)
|
||||
(print-input-st-prod l "lookback" a g print-output-st-nt))
|
||||
(define (print-follow f a g)
|
||||
(print-input-st-sym f "follow" a g print-output-terms))
|
||||
(define (print-LA l a g)
|
||||
(print-input-st-prod l "LA" a g print-output-terms))
|
||||
|
||||
(define (print-input-st-sym f name a g print-output)
|
||||
(printf "~a:\n" name)
|
||||
(send a for-each-state
|
||||
(λ (state)
|
||||
(for-each
|
||||
(λ (non-term)
|
||||
(let ([res (f (make-trans-key state non-term))])
|
||||
(when (not (null? res))
|
||||
(printf "~a(~a, ~a) = ~a\n"
|
||||
name
|
||||
state
|
||||
(gram-sym-symbol non-term)
|
||||
(print-output res)))))
|
||||
(send g get-non-terms))))
|
||||
(newline))
|
||||
|
||||
(define (print-input-st-prod f name a g print-output)
|
||||
(printf "~a:\n" name)
|
||||
(send a for-each-state
|
||||
(λ (state)
|
||||
(for-each
|
||||
(λ (non-term)
|
||||
(for-each
|
||||
(λ (prod)
|
||||
(let ([res (f state prod)])
|
||||
(when (not (null? res))
|
||||
(printf "~a(~a, ~a) = ~a\n"
|
||||
name
|
||||
(kernel-index state)
|
||||
(prod-index prod)
|
||||
(print-output res)))))
|
||||
(send g get-prods-for-non-term non-term)))
|
||||
(send g get-non-terms)))))
|
||||
|
||||
(define (print-output-terms r)
|
||||
(map gram-sym-symbol r))
|
||||
|
||||
(define (print-output-st-nt r)
|
||||
(map (λ (p) (list (kernel-index (trans-key-st p)) (gram-sym-symbol (trans-key-gs p)))) r))
|
||||
|
||||
;; init-tk-map : int -> (vectorof hashtable?)
|
||||
(define (init-tk-map n)
|
||||
(define v (make-vector n #f))
|
||||
(let loop ([i (sub1 (vector-length v))])
|
||||
(when (>= i 0)
|
||||
(vector-set! v i (make-hasheq))
|
||||
(loop (sub1 i))))
|
||||
v)
|
||||
|
||||
;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int
|
||||
(define ((lookup-tk-map map) tk)
|
||||
(define st (trans-key-st tk))
|
||||
(define gs (trans-key-gs tk))
|
||||
(hash-ref (vector-ref map (kernel-index st))
|
||||
(gram-sym-symbol gs)
|
||||
(λ () 0)))
|
||||
|
||||
;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int ->
|
||||
(define ((add-tk-map map) tk v)
|
||||
(define st (trans-key-st tk))
|
||||
(define gs (trans-key-gs tk))
|
||||
(hash-set! (vector-ref map (kernel-index st))
|
||||
(gram-sym-symbol gs)
|
||||
v))
|
||||
|
||||
;; digraph-tk->terml:
|
||||
;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int
|
||||
;; -> (trans-key -> term list)
|
||||
;; DeRemer and Pennello 1982
|
||||
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
|
||||
;; A specialization of digraph in the file graph.rkt
|
||||
(define (digraph-tk->terml nodes edges f- num-states)
|
||||
;; Will map elements of trans-key to term sets represented as bit vectors
|
||||
(define results (init-tk-map num-states))
|
||||
|
||||
;; Maps elements of trans-keys to integers.
|
||||
(define N (init-tk-map num-states))
|
||||
|
||||
(define get-N (lookup-tk-map N))
|
||||
(define set-N (add-tk-map N))
|
||||
(define get-f (lookup-tk-map results))
|
||||
(define set-f (add-tk-map results))
|
||||
|
||||
(define stack null)
|
||||
(define (push x) (set! stack (cons x stack)))
|
||||
(define (pop) (begin0
|
||||
(car stack)
|
||||
(set! stack (cdr stack))))
|
||||
(define (depth) (length stack))
|
||||
|
||||
;; traverse: 'a ->
|
||||
(define (traverse x)
|
||||
(push x)
|
||||
(let ([d (depth)])
|
||||
(set-N x d)
|
||||
(set-f x (f- x))
|
||||
(for-each (λ (y)
|
||||
(when (= 0 (get-N y))
|
||||
(traverse y))
|
||||
(set-f x (bitwise-ior (get-f x) (get-f y)))
|
||||
(set-N x (min (get-N x) (get-N y))))
|
||||
(edges x))
|
||||
(when (= d (get-N x))
|
||||
(let loop ([p (pop)])
|
||||
(set-N p +inf.0)
|
||||
(set-f p (get-f x))
|
||||
(unless (equal? x p)
|
||||
(loop (pop)))))))
|
||||
|
||||
(for ([x (in-list nodes)]
|
||||
#:when (zero? (get-N x)))
|
||||
(traverse x))
|
||||
get-f)
|
@ -1,54 +0,0 @@
|
||||
#lang racket/base
|
||||
(require "grammar.rkt")
|
||||
(provide (except-out (all-defined-out) make-reduce make-reduce*)
|
||||
(rename-out [make-reduce* make-reduce]))
|
||||
|
||||
;; An action is
|
||||
;; - (make-shift int)
|
||||
;; - (make-reduce prod runtime-action)
|
||||
;; - (make-accept)
|
||||
;; - (make-goto int)
|
||||
;; - (no-action)
|
||||
;; A reduce contains a runtime-reduce so that sharing of the reduces can
|
||||
;; be easily transferred to sharing of runtime-reduces.
|
||||
|
||||
(define-struct action () #:inspector (make-inspector))
|
||||
(define-struct (shift action) (state) #:inspector (make-inspector))
|
||||
(define-struct (reduce action) (prod runtime-reduce) #:inspector (make-inspector))
|
||||
(define-struct (accept action) () #:inspector (make-inspector))
|
||||
(define-struct (goto action) (state) #:inspector (make-inspector))
|
||||
(define-struct (no-action action) () #:inspector (make-inspector))
|
||||
|
||||
(define (make-reduce* p)
|
||||
(make-reduce p
|
||||
(vector (prod-index p)
|
||||
(gram-sym-symbol (prod-lhs p))
|
||||
(vector-length (prod-rhs p)))))
|
||||
|
||||
;; A runtime-action is
|
||||
;; non-negative-int (shift)
|
||||
;; (vector int symbol int) (reduce)
|
||||
;; 'accept (accept)
|
||||
;; negative-int (goto)
|
||||
;; #f (no-action)
|
||||
|
||||
(define (action->runtime-action a)
|
||||
(cond
|
||||
[(shift? a) (shift-state a)]
|
||||
[(reduce? a) (reduce-runtime-reduce a)]
|
||||
[(accept? a) 'accept]
|
||||
[(goto? a) (- (+ (goto-state a) 1))]
|
||||
[(no-action? a) #f]))
|
||||
|
||||
(define (runtime-shift? x) (and (integer? x) (>= x 0)))
|
||||
(define runtime-reduce? vector?)
|
||||
(define (runtime-accept? x) (eq? x 'accept))
|
||||
(define (runtime-goto? x) (and (integer? x) (< x 0)))
|
||||
|
||||
(define runtime-shift-state values)
|
||||
(define (runtime-reduce-prod-num x) (vector-ref x 0))
|
||||
(define (runtime-reduce-lhs x) (vector-ref x 1))
|
||||
(define (runtime-reduce-rhs-length x) (vector-ref x 2))
|
||||
(define (runtime-goto-state x) (- (+ x 1)))
|
||||
|
||||
|
@ -1,103 +0,0 @@
|
||||
#lang racket/base
|
||||
(require "input-file-parser.rkt"
|
||||
"grammar.rkt"
|
||||
"table.rkt"
|
||||
racket/class
|
||||
racket/contract)
|
||||
(require (for-template racket/base))
|
||||
|
||||
(provide/contract [build-parser (-> string? any/c any/c
|
||||
(listof identifier?)
|
||||
(listof identifier?)
|
||||
(listof identifier?)
|
||||
(or/c syntax? #f)
|
||||
syntax?
|
||||
(values any/c any/c any/c any/c))])
|
||||
|
||||
;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?)
|
||||
;; (union syntax? false/c) syntax?) -> syntax?
|
||||
(define (fix-check-syntax input-terms start ends assocs prods)
|
||||
(define term-binders (get-term-list input-terms))
|
||||
(define get-term-binder
|
||||
(let ([t (make-hasheq)])
|
||||
(for ([term (in-list term-binders)])
|
||||
(hash-set! t (syntax-e term) term))
|
||||
(λ (x)
|
||||
(define r (hash-ref t (syntax-e x) (λ () #f)))
|
||||
(if r
|
||||
(syntax-local-introduce (datum->syntax r (syntax-e x) x x))
|
||||
x))))
|
||||
(define rhs-list (syntax-case prods ()
|
||||
[((_ RHS ...) ...) (syntax->list #'(RHS ... ...))]))
|
||||
(with-syntax ([(TMP ...) (map syntax-local-introduce term-binders)]
|
||||
[(TERM-GROUP ...)
|
||||
(map (λ (tg)
|
||||
(syntax-property
|
||||
(datum->syntax tg #f)
|
||||
'disappeared-use
|
||||
tg))
|
||||
input-terms)]
|
||||
[(END ...) (map get-term-binder ends)]
|
||||
[(START ...) (map get-term-binder start)]
|
||||
[(BIND ...) (syntax-case prods ()
|
||||
(((BIND _ ...) ...)
|
||||
(syntax->list #'(BIND ...))))]
|
||||
[((BOUND ...) ...)
|
||||
(map (λ (rhs)
|
||||
(syntax-case rhs ()
|
||||
[((BOUND ...) (_ PBOUND) __)
|
||||
(map get-term-binder
|
||||
(cons #'PBOUND (syntax->list #'(BOUND ...))))]
|
||||
[((BOUND ...) _)
|
||||
(map get-term-binder
|
||||
(syntax->list #'(BOUND ...)))]))
|
||||
rhs-list)]
|
||||
[(PREC ...)
|
||||
(if assocs
|
||||
(map get-term-binder
|
||||
(syntax-case assocs ()
|
||||
(((__ TERM ...) ...)
|
||||
(syntax->list #'(TERM ... ...)))))
|
||||
null)])
|
||||
#`(when #f
|
||||
(let ((BIND void) ... (TMP void) ...)
|
||||
(void BOUND ... ... TERM-GROUP ... START ... END ... PREC ...)))))
|
||||
|
||||
(require racket/list "parser-actions.rkt")
|
||||
|
||||
(define (build-parser filename src-pos suppress input-terms start end assocs prods)
|
||||
(define grammar (parse-input input-terms start end assocs prods src-pos))
|
||||
(define table (build-table grammar filename suppress))
|
||||
(define all-tokens (make-hasheq))
|
||||
(define actions-code `(vector ,@(map prod-action (send grammar get-prods))))
|
||||
|
||||
(for ([term (in-list (send grammar get-terms))])
|
||||
(hash-set! all-tokens (gram-sym-symbol term) #t))
|
||||
|
||||
#;(let ((num-states (vector-length table))
|
||||
(num-gram-syms (+ (send grammar get-num-terms)
|
||||
(send grammar get-num-non-terms)))
|
||||
(num-ht-entries (apply + (map length (vector->list table))))
|
||||
(num-reduces
|
||||
(let ((ht (make-hasheq)))
|
||||
(for-each
|
||||
(λ (x)
|
||||
(when (reduce? x)
|
||||
(hash-set! ht x #t)))
|
||||
(map cdr (apply append (vector->list table))))
|
||||
(length (hash-table-map ht void)))))
|
||||
(printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n"
|
||||
num-states num-gram-syms num-ht-entries num-reduces)
|
||||
(printf "~a -- ~aKB, previously ~aKB\n"
|
||||
(/ (+ 2 num-states
|
||||
(* 4 num-states) (* 2 1.5 num-ht-entries)
|
||||
(* 5 num-reduces)) 256.0)
|
||||
(/ (+ 2 num-states
|
||||
(* 4 num-states) (* 2 2.3 num-ht-entries)
|
||||
(* 5 num-reduces)) 256.0)
|
||||
(/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0)))
|
||||
(values table
|
||||
all-tokens
|
||||
actions-code
|
||||
(fix-check-syntax input-terms start end assocs prods)))
|
||||
|
@ -1,264 +0,0 @@
|
||||
#lang racket/base
|
||||
(require "grammar.rkt"
|
||||
"lr0.rkt"
|
||||
"lalr.rkt"
|
||||
"parser-actions.rkt"
|
||||
racket/contract
|
||||
racket/list
|
||||
racket/class)
|
||||
|
||||
;; Routine to build the LALR table
|
||||
|
||||
|
||||
(define (is-a-grammar%? x) (is-a? x grammar%))
|
||||
(provide/contract
|
||||
(build-table (-> is-a-grammar%? string? any/c
|
||||
(vectorof (listof (cons/c (or/c term? non-term?) action?))))))
|
||||
|
||||
;; A parse-table is (vectorof (listof (cons/c gram-sym? action)))
|
||||
;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action))))
|
||||
|
||||
;; make-parse-table : int -> parse-table
|
||||
(define (make-parse-table num-states)
|
||||
(make-vector num-states null))
|
||||
|
||||
;; table-add!: parse-table nat symbol action ->
|
||||
(define (table-add! table state-index symbol val)
|
||||
(vector-set! table state-index (cons (cons symbol val)
|
||||
(vector-ref table state-index))))
|
||||
|
||||
;; group-table : parse-table -> grouped-parse-table
|
||||
(define (group-table table)
|
||||
(list->vector
|
||||
(for/list ([state-entry (in-list (vector->list table))])
|
||||
(define ht (make-hasheq))
|
||||
(for* ([gs/actions (in-list state-entry)]
|
||||
[group (in-value (hash-ref ht (car gs/actions) (λ () null)))]
|
||||
#:unless (member (cdr gs/actions) group))
|
||||
(hash-set! ht (car gs/actions) (cons (cdr gs/actions) group)))
|
||||
(hash-map ht cons))))
|
||||
|
||||
;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) ->
|
||||
;; (vectorof (listof (cons/c gram-sym? Y)))
|
||||
(define (table-map f table)
|
||||
(list->vector
|
||||
(for/list ([state-entry (in-list (vector->list table))])
|
||||
(for/list ([gs/X (in-list state-entry)])
|
||||
(cons (car gs/X) (f (car gs/X) (cdr gs/X)))))))
|
||||
|
||||
(define (bit-vector-for-each f bv)
|
||||
(let loop ([bv bv] [number 0])
|
||||
(cond
|
||||
[(zero? bv) (void)]
|
||||
[(= 1 (bitwise-and 1 bv))
|
||||
(f number)
|
||||
(loop (arithmetic-shift bv -1) (add1 number))]
|
||||
[else (loop (arithmetic-shift bv -1) (add1 number))])))
|
||||
|
||||
|
||||
;; print-entry: symbol action output-port ->
|
||||
;; prints the action a for lookahead sym to the given port
|
||||
(define (print-entry sym a port)
|
||||
(define s "\t~a\t\t\t\t\t~a\t~a\n")
|
||||
(cond
|
||||
[(shift? a) (fprintf port s sym "shift" (shift-state a))]
|
||||
[(reduce? a) (fprintf port s sym "reduce" (prod-index (reduce-prod a)))]
|
||||
[(accept? a) (fprintf port s sym "accept" "")]
|
||||
[(goto? a) (fprintf port s sym "goto" (goto-state a))]))
|
||||
|
||||
|
||||
;; count: ('a -> bool) * 'a list -> num
|
||||
;; counts the number of elements in list that satisfy pred
|
||||
(define (count pred list)
|
||||
(cond
|
||||
[(null? list) 0]
|
||||
[(pred (car list)) (+ 1 (count pred (cdr list)))]
|
||||
[else (count pred (cdr list))]))
|
||||
|
||||
;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port ->
|
||||
;; Prints out the parser given by table.
|
||||
(define (display-parser a grouped-table prods port)
|
||||
(define SR-conflicts 0)
|
||||
(define RR-conflicts 0)
|
||||
(for ([prod (in-list prods)])
|
||||
(fprintf port
|
||||
"~a\t~a\t=\t~a\n"
|
||||
(prod-index prod)
|
||||
(gram-sym-symbol (prod-lhs prod))
|
||||
(map gram-sym-symbol (vector->list (prod-rhs prod)))))
|
||||
|
||||
(send a for-each-state
|
||||
(λ (state)
|
||||
(fprintf port "State ~a\n" (kernel-index state))
|
||||
(for ([item (in-list (kernel-items state))])
|
||||
(fprintf port "\t~a\n" (item->string item)))
|
||||
(newline port)
|
||||
(for ([gs/action (in-list (vector-ref grouped-table (kernel-index state)))])
|
||||
(define sym (gram-sym-symbol (car gs/action)))
|
||||
(define act (cdr gs/action))
|
||||
(cond
|
||||
[(null? act) (void)]
|
||||
[(null? (cdr act))
|
||||
(print-entry sym (car act) port)]
|
||||
[else
|
||||
(fprintf port "begin conflict:\n")
|
||||
(when (> (count reduce? act) 1)
|
||||
(set! RR-conflicts (add1 RR-conflicts)))
|
||||
(when (> (count shift? act) 0)
|
||||
(set! SR-conflicts (add1 SR-conflicts)))
|
||||
(map (λ (x) (print-entry sym x port)) act)
|
||||
(fprintf port "end conflict\n")]))
|
||||
(newline port)))
|
||||
|
||||
(when (> SR-conflicts 0)
|
||||
(fprintf port "~a shift/reduce conflict~a\n"
|
||||
SR-conflicts
|
||||
(if (= SR-conflicts 1) "" "s")))
|
||||
(when (> RR-conflicts 0)
|
||||
(fprintf port "~a reduce/reduce conflict~a\n"
|
||||
RR-conflicts
|
||||
(if (= RR-conflicts 1) "" "s"))))
|
||||
|
||||
;; resolve-conflict : (listof action?) -> action? bool bool
|
||||
(define (resolve-conflict actions)
|
||||
(cond
|
||||
[(null? actions) (values (make-no-action) #f #f)]
|
||||
[(null? (cdr actions)) (values (car actions) #f #f)]
|
||||
[else
|
||||
(define SR-conflict? (> (count shift? actions) 0))
|
||||
(define RR-conflict? (> (count reduce? actions) 1))
|
||||
(let loop ((current-guess #f)
|
||||
(rest actions))
|
||||
(cond
|
||||
[(null? rest) (values current-guess SR-conflict? RR-conflict?)]
|
||||
[(shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?)]
|
||||
[(not current-guess) (loop (car rest) (cdr rest))]
|
||||
[(and (reduce? (car rest))
|
||||
(< (prod-index (reduce-prod (car rest)))
|
||||
(prod-index (reduce-prod current-guess))))
|
||||
(loop (car rest) (cdr rest))]
|
||||
[(accept? (car rest))
|
||||
(eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n")
|
||||
(loop current-guess (cdr rest))]
|
||||
[else (loop current-guess (cdr rest))]))]))
|
||||
|
||||
;; resolve-conflicts : grouped-parse-table bool -> parse-table
|
||||
(define (resolve-conflicts grouped-table suppress)
|
||||
(define SR-conflicts 0)
|
||||
(define RR-conflicts 0)
|
||||
(define table (table-map
|
||||
(λ (gs actions)
|
||||
(let-values ([(action SR? RR?)
|
||||
(resolve-conflict actions)])
|
||||
(when SR?
|
||||
(set! SR-conflicts (add1 SR-conflicts)))
|
||||
(when RR?
|
||||
(set! RR-conflicts (add1 RR-conflicts)))
|
||||
action))
|
||||
grouped-table))
|
||||
(unless suppress
|
||||
(when (> SR-conflicts 0)
|
||||
(eprintf "~a shift/reduce conflict~a\n"
|
||||
SR-conflicts
|
||||
(if (= SR-conflicts 1) "" "s")))
|
||||
(when (> RR-conflicts 0)
|
||||
(eprintf "~a reduce/reduce conflict~a\n"
|
||||
RR-conflicts
|
||||
(if (= RR-conflicts 1) "" "s"))))
|
||||
table)
|
||||
|
||||
|
||||
;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action)
|
||||
;; Resolves a single shift-reduce conflict, if precedences are in place.
|
||||
(define (resolve-sr-conflict/prec actions shift-prec)
|
||||
(define shift (if (shift? (car actions))
|
||||
(car actions)
|
||||
(cadr actions)))
|
||||
(define reduce (if (shift? (car actions))
|
||||
(cadr actions)
|
||||
(car actions)))
|
||||
(define reduce-prec (prod-prec (reduce-prod reduce)))
|
||||
(cond
|
||||
[(and shift-prec reduce-prec)
|
||||
(cond
|
||||
[(< (prec-num shift-prec) (prec-num reduce-prec))
|
||||
(list reduce)]
|
||||
[(> (prec-num shift-prec) (prec-num reduce-prec))
|
||||
(list shift)]
|
||||
[(eq? 'left (prec-assoc shift-prec))
|
||||
(list reduce)]
|
||||
[(eq? 'right (prec-assoc shift-prec))
|
||||
(list shift)]
|
||||
[else null])]
|
||||
[else actions]))
|
||||
|
||||
|
||||
;; resolve-prec-conflicts : parse-table -> grouped-parse-table
|
||||
(define (resolve-prec-conflicts table)
|
||||
(table-map
|
||||
(λ (gs actions)
|
||||
(cond
|
||||
[(and (term? gs)
|
||||
(= 2 (length actions))
|
||||
(or (shift? (car actions))
|
||||
(shift? (cadr actions))))
|
||||
(resolve-sr-conflict/prec actions (term-prec gs))]
|
||||
[else actions]))
|
||||
(group-table table)))
|
||||
|
||||
;; build-table: grammar string bool -> parse-table
|
||||
(define (build-table g file suppress)
|
||||
(define a (build-lr0-automaton g))
|
||||
(define term-vector (list->vector (send g get-terms)))
|
||||
(define end-terms (send g get-end-terms))
|
||||
(define table (make-parse-table (send a get-num-states)))
|
||||
(define get-lookahead (compute-LA a g))
|
||||
(define reduce-cache (make-hash))
|
||||
(for ([trans-key/state (in-list (send a get-transitions))])
|
||||
(define from-state-index (kernel-index (trans-key-st (car trans-key/state))))
|
||||
(define gs (trans-key-gs (car trans-key/state)))
|
||||
(define to-state (cdr trans-key/state))
|
||||
|
||||
(table-add! table from-state-index gs
|
||||
(cond
|
||||
((non-term? gs)
|
||||
(make-goto (kernel-index to-state)))
|
||||
((member gs end-terms)
|
||||
(make-accept))
|
||||
(else
|
||||
(make-shift
|
||||
(kernel-index to-state))))))
|
||||
(send a for-each-state
|
||||
(λ (state)
|
||||
(for ([item (in-list (append (hash-ref (send a get-epsilon-trans) state (λ () null))
|
||||
(filter (λ (item)
|
||||
(not (move-dot-right item)))
|
||||
(kernel-items state))))])
|
||||
(let ([item-prod (item-prod item)])
|
||||
(bit-vector-for-each
|
||||
(λ (term-index)
|
||||
(unless (start-item? item)
|
||||
(let ((r (hash-ref reduce-cache item-prod
|
||||
(λ ()
|
||||
(let ((r (make-reduce item-prod)))
|
||||
(hash-set! reduce-cache item-prod r)
|
||||
r)))))
|
||||
(table-add! table
|
||||
(kernel-index state)
|
||||
(vector-ref term-vector term-index)
|
||||
r))))
|
||||
(get-lookahead state item-prod))))))
|
||||
|
||||
(define grouped-table (resolve-prec-conflicts table))
|
||||
(unless (string=? file "")
|
||||
(with-handlers [(exn:fail:filesystem?
|
||||
(λ (e)
|
||||
(eprintf
|
||||
"Cannot write debug output to file \"~a\": ~a\n"
|
||||
file
|
||||
(exn-message e))))]
|
||||
(call-with-output-file file
|
||||
(λ (port)
|
||||
(display-parser a grouped-table (send g get-prods) port))
|
||||
#:exists 'truncate)))
|
||||
(resolve-conflicts grouped-table suppress))
|
@ -1,71 +0,0 @@
|
||||
#lang racket/base
|
||||
(require (prefix-in rl: racket/list)
|
||||
"../private-lex/token-syntax.rkt")
|
||||
|
||||
;; General helper routines
|
||||
(provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc)
|
||||
|
||||
(define (vector-andmap pred vec)
|
||||
(for/and ([item (in-vector vec)])
|
||||
(pred vec)))
|
||||
|
||||
;; duplicate-list?: symbol list -> #f | symbol
|
||||
;; returns a symbol that exists twice in l, or false if no such symbol
|
||||
;; exists
|
||||
(define (duplicate-list? syms)
|
||||
(rl:check-duplicates syms eq?))
|
||||
|
||||
;; remove-duplicates: syntax-object list -> syntax-object list
|
||||
;; removes the duplicates from the lists
|
||||
(define (remove-duplicates syms)
|
||||
(rl:remove-duplicates syms equal? #:key syntax->datum))
|
||||
|
||||
;; overlap?: symbol list * symbol list -> #f | symbol
|
||||
;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists
|
||||
(define (overlap? syms1 syms2)
|
||||
(for/first ([sym1 (in-list syms1)]
|
||||
#:when (memq sym1 syms2))
|
||||
sym1))
|
||||
|
||||
|
||||
(define (display-yacc grammar tokens start precs port)
|
||||
(let-syntax ([p (syntax-rules ()
|
||||
((_ args ...) (fprintf port args ...)))])
|
||||
(let* ([tokens (map syntax-local-value tokens)]
|
||||
[eterms (filter e-terminals-def? tokens)]
|
||||
[terms (filter terminals-def? tokens)]
|
||||
[term-table (make-hasheq)]
|
||||
[display-rhs
|
||||
(λ (rhs)
|
||||
(for ([sym (in-list (car rhs))])
|
||||
(p "~a " (hash-ref term-table sym (λ () sym))))
|
||||
(when (= 3 (length rhs))
|
||||
(p "%prec ~a" (cadadr rhs)))
|
||||
(p "\n"))])
|
||||
(for* ([t (in-list eterms)]
|
||||
[t (in-list (syntax->datum (e-terminals-def-t t)))])
|
||||
(hash-set! term-table t (format "'~a'" t)))
|
||||
(for* ([t (in-list terms)]
|
||||
[t (in-list (syntax->datum (terminals-def-t t)))])
|
||||
(p "%token ~a\n" t)
|
||||
(hash-set! term-table t (format "~a" t)))
|
||||
(when precs
|
||||
(for ([prec (in-list precs)])
|
||||
(p "%~a " (car prec))
|
||||
(for ([tok (in-list (cdr prec))])
|
||||
(p " ~a" (hash-ref term-table tok)))
|
||||
(p "\n")))
|
||||
(p "%start ~a\n" start)
|
||||
(p "%%\n")
|
||||
(for ([prod (in-list grammar)])
|
||||
(define nt (car prod))
|
||||
(p "~a: " nt)
|
||||
(display-rhs (cadr prod))
|
||||
(for ([rhs (in-list (cddr prod))])
|
||||
(p "| ")
|
||||
(display-rhs rhs))
|
||||
(p ";\n"))
|
||||
(p "%%\n"))))
|
||||
|
||||
|
||||
|
@ -1,130 +0,0 @@
|
||||
#lang racket/base
|
||||
(require br-parser-tools/lex
|
||||
(prefix-in : br-parser-tools/lex-sre)
|
||||
br-parser-tools/yacc
|
||||
syntax/readerr
|
||||
racket/list)
|
||||
(provide trans)
|
||||
|
||||
(define match-double-string
|
||||
(lexer
|
||||
[(:+ (:~ #\" #\\)) (append (string->list lexeme)
|
||||
(match-double-string input-port))]
|
||||
[(:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port))]
|
||||
[#\" null]))
|
||||
|
||||
(define match-single-string
|
||||
(lexer
|
||||
[(:+ (:~ #\' #\\)) (append (string->list lexeme)
|
||||
(match-single-string input-port))]
|
||||
[(:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port))]
|
||||
[#\' null]))
|
||||
|
||||
(define-lex-abbrevs
|
||||
[letter (:or (:/ "a" "z") (:/ "A" "Z"))]
|
||||
[digit (:/ "0" "9")]
|
||||
[initial (:or letter (char-set "!$%&*/<=>?^_~@"))]
|
||||
[subsequent (:or initial digit (char-set "+-.@"))]
|
||||
[comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/")])
|
||||
|
||||
(define-empty-tokens x (EOF PIPE |:| SEMI |%%| %prec))
|
||||
(define-tokens y (SYM STRING))
|
||||
|
||||
(define get-token-grammar
|
||||
(lexer-src-pos
|
||||
["%%" '|%%|]
|
||||
[":" (string->symbol lexeme)]
|
||||
["%prec" (string->symbol lexeme)]
|
||||
[#\| 'PIPE]
|
||||
[(:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}")))
|
||||
(return-without-pos (get-token-grammar input-port))]
|
||||
[#\; 'SEMI]
|
||||
[#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))]
|
||||
[#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))]
|
||||
[(:: initial (:* subsequent)) (token-SYM (string->symbol lexeme))]))
|
||||
|
||||
(define (parse-grammar enter-term enter-empty-term enter-non-term)
|
||||
(parser
|
||||
(tokens x y)
|
||||
(src-pos)
|
||||
(error (λ (tok-ok tok-name tok-value start-pos end-pos)
|
||||
(raise-read-error
|
||||
(format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value)
|
||||
(file-path)
|
||||
(position-line start-pos)
|
||||
(position-col start-pos)
|
||||
(position-offset start-pos)
|
||||
(- (position-offset end-pos) (position-offset start-pos)))))
|
||||
|
||||
(end |%%|)
|
||||
(start gram)
|
||||
(grammar
|
||||
(gram
|
||||
((production) (list $1))
|
||||
((production gram) (cons $1 $2)))
|
||||
(production
|
||||
((SYM |:| prods SEMI)
|
||||
(begin
|
||||
(enter-non-term $1)
|
||||
(cons $1 $3))))
|
||||
(prods
|
||||
((rhs) (list `(,$1 #f)))
|
||||
((rhs prec) (list `(,$1 ,$2 #f)))
|
||||
((rhs PIPE prods) (cons `(,$1 #f) $3))
|
||||
((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4)))
|
||||
(prec
|
||||
((%prec SYM)
|
||||
(begin
|
||||
(enter-term $2)
|
||||
(list 'prec $2)))
|
||||
((%prec STRING)
|
||||
(begin
|
||||
(enter-empty-term $2)
|
||||
(list 'prec $2))))
|
||||
(rhs
|
||||
(() null)
|
||||
((SYM rhs)
|
||||
(begin
|
||||
(enter-term $1)
|
||||
(cons $1 $2)))
|
||||
((STRING rhs)
|
||||
(begin
|
||||
(enter-empty-term $1)
|
||||
(cons $1 $2)))))))
|
||||
|
||||
(define (symbol<? a b)
|
||||
(string<? (symbol->string a) (symbol->string b)))
|
||||
|
||||
(define (trans filename)
|
||||
(define i (open-input-file filename))
|
||||
(define terms (make-hasheq))
|
||||
(define eterms (make-hasheq))
|
||||
(define nterms (make-hasheq))
|
||||
(define (enter-term s)
|
||||
(when (not (hash-ref nterms s (λ () #f)))
|
||||
(hash-set! terms s #t)))
|
||||
(define (enter-empty-term s)
|
||||
(when (not (hash-ref nterms s (λ () #f)))
|
||||
(hash-set! eterms s #t)))
|
||||
(define (enter-non-term s)
|
||||
(hash-remove! terms s)
|
||||
(hash-remove! eterms s)
|
||||
(hash-set! nterms s #t))
|
||||
(port-count-lines! i)
|
||||
(file-path filename)
|
||||
(regexp-match "%%" i)
|
||||
(begin0
|
||||
(let ([gram ((parse-grammar enter-term enter-empty-term enter-non-term)
|
||||
(λ ()
|
||||
(let ((t (get-token-grammar i)))
|
||||
t)))])
|
||||
`(begin
|
||||
(define-tokens t ,(sort (hash-map terms (λ (k v) k)) symbol<?))
|
||||
(define-empty-tokens et ,(sort (hash-map eterms (λ (k v) k)) symbol<?))
|
||||
(parser
|
||||
(start ___)
|
||||
(end ___)
|
||||
(error ___)
|
||||
(tokens t et)
|
||||
(grammar ,@gram))))
|
||||
(close-input-port i)))
|
@ -1,334 +0,0 @@
|
||||
#lang racket/base
|
||||
(require (for-syntax racket/base
|
||||
"private-yacc/parser-builder.rkt"
|
||||
"private-yacc/grammar.rkt"
|
||||
"private-yacc/yacc-helper.rkt"
|
||||
"private-yacc/parser-actions.rkt")
|
||||
"private-lex/token.rkt"
|
||||
"private-yacc/parser-actions.rkt"
|
||||
racket/local
|
||||
racket/pretty
|
||||
syntax/readerr)
|
||||
|
||||
(provide parser)
|
||||
|
||||
|
||||
;; convert-parse-table : (vectorof (listof (cons/c gram-sym? action?))) ->
|
||||
;; (vectorof (symbol runtime-action hashtable))
|
||||
(define-for-syntax (convert-parse-table table)
|
||||
(for/vector ([state-entry (in-vector table)])
|
||||
(let ([ht (make-hasheq)])
|
||||
(for ([gs/action (in-list state-entry)])
|
||||
(hash-set! ht
|
||||
(gram-sym-symbol (car gs/action))
|
||||
(action->runtime-action (cdr gs/action))))
|
||||
ht)))
|
||||
|
||||
(define-syntax (parser stx)
|
||||
(syntax-case stx ()
|
||||
[(_ ARGS ...)
|
||||
(let ([arg-list (syntax->list #'(ARGS ...))]
|
||||
[src-pos #f]
|
||||
[debug #f]
|
||||
[error #f]
|
||||
[tokens #f]
|
||||
[start #f]
|
||||
[end #f]
|
||||
[precs #f]
|
||||
[suppress #f]
|
||||
[grammar #f]
|
||||
[yacc-output #f])
|
||||
(for ([arg (in-list (syntax->list #'(ARGS ...)))])
|
||||
(syntax-case* arg (debug error tokens start end precs grammar
|
||||
suppress src-pos yacc-output)
|
||||
(λ (a b) (eq? (syntax-e a) (syntax-e b)))
|
||||
[(debug FILENAME)
|
||||
(cond
|
||||
[(not (string? (syntax-e #'FILENAME)))
|
||||
(raise-syntax-error #f "Debugging filename must be a string" stx #'FILENAME)]
|
||||
[debug (raise-syntax-error #f "Multiple debug declarations" stx)]
|
||||
[else (set! debug (syntax-e #'FILENAME))])]
|
||||
[(suppress) (set! suppress #t)]
|
||||
[(src-pos) (set! src-pos #t)]
|
||||
[(error EXPRESSION)
|
||||
(if error
|
||||
(raise-syntax-error #f "Multiple error declarations" stx)
|
||||
(set! error #'EXPRESSION))]
|
||||
[(tokens DEF ...)
|
||||
(begin
|
||||
(when tokens
|
||||
(raise-syntax-error #f "Multiple tokens declarations" stx))
|
||||
(let ((defs (syntax->list #'(DEF ...))))
|
||||
(for ([d (in-list defs)]
|
||||
#:unless (identifier? d))
|
||||
(raise-syntax-error #f "Token-group name must be an identifier" stx d))
|
||||
(set! tokens defs)))]
|
||||
[(start symbol ...)
|
||||
(let ([symbols (syntax->list #'(symbol ...))])
|
||||
(for ([sym (in-list symbols)]
|
||||
#:unless (identifier? sym))
|
||||
(raise-syntax-error #f "Start symbol must be a symbol" stx sym))
|
||||
(when start
|
||||
(raise-syntax-error #f "Multiple start declarations" stx))
|
||||
(when (null? symbols)
|
||||
(raise-syntax-error #f "Missing start symbol" stx arg))
|
||||
(set! start symbols))]
|
||||
[(end SYMBOLS ...)
|
||||
(let ((symbols (syntax->list #'(SYMBOLS ...))))
|
||||
(for ([sym (in-list symbols)]
|
||||
#:unless (identifier? sym))
|
||||
(raise-syntax-error #f "End token must be a symbol" stx sym))
|
||||
(let ([d (duplicate-list? (map syntax-e symbols))])
|
||||
(when d
|
||||
(raise-syntax-error #f (format "Duplicate end token definition for ~a" d) stx arg))
|
||||
(when (null? symbols)
|
||||
(raise-syntax-error #f "end declaration must contain at least 1 token" stx arg))
|
||||
(when end
|
||||
(raise-syntax-error #f "Multiple end declarations" stx))
|
||||
(set! end symbols)))]
|
||||
[(precs DECLS ...)
|
||||
(if precs
|
||||
(raise-syntax-error #f "Multiple precs declarations" stx)
|
||||
(set! precs (syntax/loc arg (DECLS ...))))]
|
||||
[(grammar PRODS ...)
|
||||
(if grammar
|
||||
(raise-syntax-error #f "Multiple grammar declarations" stx)
|
||||
(set! grammar (syntax/loc arg (PRODS ...))))]
|
||||
[(yacc-output FILENAME)
|
||||
(cond
|
||||
[(not (string? (syntax-e #'FILENAME)))
|
||||
(raise-syntax-error #f "Yacc-output filename must be a string" stx #'FILENAME)]
|
||||
[yacc-output
|
||||
(raise-syntax-error #f "Multiple yacc-output declarations" stx)]
|
||||
[else
|
||||
(set! yacc-output (syntax-e #'FILENAME))])]
|
||||
[_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)]))
|
||||
(unless tokens
|
||||
(raise-syntax-error #f "missing tokens declaration" stx))
|
||||
(unless error
|
||||
(raise-syntax-error #f "missing error declaration" stx))
|
||||
(unless grammar
|
||||
(raise-syntax-error #f "missing grammar declaration" stx))
|
||||
(unless end
|
||||
(raise-syntax-error #f "missing end declaration" stx))
|
||||
(unless start
|
||||
(raise-syntax-error #f "missing start declaration" stx))
|
||||
(let-values ([(table all-term-syms actions check-syntax-fix)
|
||||
(build-parser (if debug debug "")
|
||||
src-pos
|
||||
suppress
|
||||
tokens
|
||||
start
|
||||
end
|
||||
precs
|
||||
grammar)])
|
||||
(when (and yacc-output (not (string=? yacc-output "")))
|
||||
(with-handlers [(exn:fail:filesystem?
|
||||
(λ (e) (eprintf "Cannot write yacc-output to file \"~a\"\n" yacc-output)))]
|
||||
(call-with-output-file yacc-output
|
||||
(λ (port)
|
||||
(display-yacc (syntax->datum grammar)
|
||||
tokens
|
||||
(map syntax->datum start)
|
||||
(and precs (syntax->datum precs))
|
||||
port))
|
||||
#:exists 'truncate)))
|
||||
(with-syntax ([check-syntax-fix check-syntax-fix]
|
||||
[err error]
|
||||
[ends end]
|
||||
[starts start]
|
||||
[debug debug]
|
||||
[table (convert-parse-table table)]
|
||||
[all-term-syms all-term-syms]
|
||||
[actions actions]
|
||||
[src-pos src-pos])
|
||||
#'(begin
|
||||
check-syntax-fix
|
||||
(parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos)))))]
|
||||
[_ (raise-syntax-error #f "parser must have the form (parser args ...)" stx)]))
|
||||
|
||||
(define (reduce-stack stack num ret-vals src-pos)
|
||||
(cond
|
||||
[(positive? num)
|
||||
(define top-frame (car stack))
|
||||
(let ([ret-vals (if src-pos
|
||||
(cons (stack-frame-value top-frame)
|
||||
(cons (stack-frame-start-pos top-frame)
|
||||
(cons (stack-frame-end-pos top-frame)
|
||||
ret-vals)))
|
||||
(cons (stack-frame-value top-frame) ret-vals))])
|
||||
(reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))]
|
||||
[else (values stack ret-vals)]))
|
||||
|
||||
;; extract-helper : (symbol or make-token) any any -> symbol any any any
|
||||
(define (extract-helper tok v1 v2)
|
||||
(cond
|
||||
[(symbol? tok) (values tok #f v1 v2)]
|
||||
[(token? tok) (values (real-token-name tok) (real-token-value tok) v1 v2)]
|
||||
[else (raise-argument-error 'parser "(or/c symbol? token?)" 0 tok)]))
|
||||
|
||||
;; well-formed-position-token?: any -> boolean
|
||||
;; Returns true if pt is a position token whose position-token-token
|
||||
;; is itself a token or a symbol.
|
||||
;; This is meant to help raise more precise error messages when
|
||||
;; a tokenizer produces an erroneous position-token wrapped twice.
|
||||
;; (as often happens when omitting return-without-pos).
|
||||
(define (well-formed-token-field? t)
|
||||
(or (symbol? t) (token? t)))
|
||||
|
||||
(define (well-formed-position-token? pt)
|
||||
(and (position-token? pt)
|
||||
(well-formed-token-field? (position-token-token pt))))
|
||||
|
||||
(define (well-formed-srcloc-token? st)
|
||||
(and (srcloc-token? st)
|
||||
(well-formed-token-field? (srcloc-token-token st))))
|
||||
|
||||
;; extract-src-pos : position-token -> symbol any any any
|
||||
(define (extract-src-pos ip)
|
||||
(unless (well-formed-position-token? ip)
|
||||
(raise-argument-error 'parser "well-formed-position-token?" 0 ip))
|
||||
(extract-helper (position-token-token ip)
|
||||
(position-token-start-pos ip)
|
||||
(position-token-end-pos ip)))
|
||||
|
||||
(define (extract-srcloc ip)
|
||||
(unless (well-formed-srcloc-token? ip)
|
||||
(raise-argument-error 'parser "well-formed-srcloc-token?" 0 ip))
|
||||
(define loc (srcloc-token-srcloc ip))
|
||||
(extract-helper (srcloc-token-token ip)
|
||||
(position-token (srcloc-position loc) (srcloc-line loc) (srcloc-column loc))
|
||||
(position-token (+ (srcloc-position loc) (srcloc-span loc)) #f #f)))
|
||||
|
||||
|
||||
;; extract-no-src-pos : (symbol or make-token) -> symbol any any any
|
||||
(define (extract-no-src-pos ip)
|
||||
(extract-helper ip #f #f))
|
||||
|
||||
(define-struct stack-frame (state value start-pos end-pos) #:inspector (make-inspector))
|
||||
|
||||
(define (make-empty-stack i) (list (make-stack-frame i #f #f #f)))
|
||||
|
||||
|
||||
;; The table is a vector that maps each state to a hash-table that maps a
|
||||
;; terminal symbol to either an accept, shift, reduce, or goto structure.
|
||||
; We encode the structures according to the runtime-action data definition in
|
||||
;; parser-actions.rkt
|
||||
(define (parser-body debug? err starts ends table all-term-syms actions src-pos)
|
||||
(local ((define extract
|
||||
(if src-pos
|
||||
extract-src-pos
|
||||
extract-no-src-pos))
|
||||
|
||||
(define (fix-error stack tok val start-pos end-pos get-token)
|
||||
(when debug? (pretty-print stack))
|
||||
(local ((define (remove-input tok val start-pos end-pos)
|
||||
(if (memq tok ends)
|
||||
(raise-read-error "parser: Cannot continue after error"
|
||||
#f #f #f #f #f)
|
||||
(let ([a (find-action stack tok val start-pos end-pos)])
|
||||
(cond
|
||||
[(runtime-shift? a)
|
||||
;; (printf "shift:~a\n" (runtime-shift-state a))
|
||||
(cons (make-stack-frame (runtime-shift-state a)
|
||||
val
|
||||
start-pos
|
||||
end-pos)
|
||||
stack)]
|
||||
[else
|
||||
;; (printf "discard input:~a\n" tok)
|
||||
(let-values ([(tok val start-pos end-pos)
|
||||
(extract (get-token))])
|
||||
(remove-input tok val start-pos end-pos))])))))
|
||||
(let remove-states ()
|
||||
(let ([a (find-action stack 'error #f start-pos end-pos)])
|
||||
(cond
|
||||
[(runtime-shift? a)
|
||||
;; (printf "shift:~a\n" (runtime-shift-state a))
|
||||
(set! stack
|
||||
(cons
|
||||
(make-stack-frame (runtime-shift-state a)
|
||||
#f
|
||||
start-pos
|
||||
end-pos)
|
||||
stack))
|
||||
(remove-input tok val start-pos end-pos)]
|
||||
[else
|
||||
;; (printf "discard state:~a\n" (car stack))
|
||||
(cond
|
||||
[(< (length stack) 2)
|
||||
(raise-read-error "parser: Cannot continue after error"
|
||||
#f #f #f #f #f)]
|
||||
[else
|
||||
(set! stack (cdr stack))
|
||||
(remove-states)])])))))
|
||||
|
||||
(define (find-action stack tok val start-pos end-pos)
|
||||
(unless (hash-ref all-term-syms tok #f)
|
||||
(if src-pos
|
||||
(err #f tok val start-pos end-pos)
|
||||
(err #f tok val))
|
||||
(raise-read-error (format "parser: got token of unknown type ~a" tok)
|
||||
#f #f #f #f #f))
|
||||
(hash-ref (vector-ref table (stack-frame-state (car stack))) tok #f))
|
||||
|
||||
(define ((make-parser start-number) get-token)
|
||||
(unless (and (procedure? get-token)
|
||||
(procedure-arity-includes? get-token 0))
|
||||
(error 'get-token "expected a nullary procedure, got ~e" get-token))
|
||||
(let parsing-loop ([stack (make-empty-stack start-number)]
|
||||
[ip (get-token)])
|
||||
(let-values ([(tok val start-pos end-pos) (extract ip)])
|
||||
(let ([action (find-action stack tok val start-pos end-pos)])
|
||||
(cond
|
||||
[(runtime-shift? action)
|
||||
;; (printf "shift:~a\n" (runtime-shift-state action))
|
||||
(parsing-loop (cons (make-stack-frame (runtime-shift-state action)
|
||||
val
|
||||
start-pos
|
||||
end-pos)
|
||||
stack)
|
||||
(get-token))]
|
||||
[(runtime-reduce? action)
|
||||
;; (printf "reduce:~a\n" (runtime-reduce-prod-num action))
|
||||
(let-values ([(new-stack args)
|
||||
(reduce-stack stack
|
||||
(runtime-reduce-rhs-length action)
|
||||
null
|
||||
src-pos)])
|
||||
(let ([goto
|
||||
(runtime-goto-state
|
||||
(hash-ref
|
||||
(vector-ref table (stack-frame-state (car new-stack)))
|
||||
(runtime-reduce-lhs action)))])
|
||||
(parsing-loop
|
||||
(cons
|
||||
(if src-pos
|
||||
(make-stack-frame
|
||||
goto
|
||||
(apply (vector-ref actions (runtime-reduce-prod-num action)) args)
|
||||
(if (null? args) start-pos (cadr args))
|
||||
(if (null? args)
|
||||
end-pos
|
||||
(list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1))))
|
||||
(make-stack-frame
|
||||
goto
|
||||
(apply (vector-ref actions (runtime-reduce-prod-num action)) args)
|
||||
#f
|
||||
#f))
|
||||
new-stack)
|
||||
ip)))]
|
||||
[(runtime-accept? action)
|
||||
;; (printf "accept\n")
|
||||
(stack-frame-value (car stack))]
|
||||
[else
|
||||
(if src-pos
|
||||
(err #t tok val start-pos end-pos)
|
||||
(err #t tok val))
|
||||
(parsing-loop (fix-error stack tok val start-pos end-pos get-token)
|
||||
(get-token))]))))))
|
||||
(cond
|
||||
[(null? (cdr starts)) (make-parser 0)]
|
||||
[else
|
||||
(for/list ([(l i) (in-indexed starts)])
|
||||
(make-parser i))])))
|
@ -1,12 +0,0 @@
|
||||
#lang info
|
||||
|
||||
(define collection 'multi)
|
||||
|
||||
(define deps '("br-parser-tools-lib"
|
||||
"br-parser-tools-doc"))
|
||||
(define implies '("br-parser-tools-lib"
|
||||
"br-parser-tools-doc"))
|
||||
|
||||
(define pkg-desc "Lex- and Yacc-style parsing tools")
|
||||
|
||||
(define pkg-authors '(mflatt))
|
@ -0,0 +1,3 @@
|
||||
#lang info
|
||||
|
||||
(define scribblings '(("parser-tools.scrbl" (multi-page) (parsing-library))))
|
@ -0,0 +1,242 @@
|
||||
;; This implements the equivalent of racket's read-syntax for R5RS scheme.
|
||||
;; It has not been thoroughly tested. Also it will read an entire file into a
|
||||
;; list of syntax objects, instead of returning one syntax object at a time
|
||||
|
||||
(module read mzscheme
|
||||
|
||||
(require parser-tools/lex
|
||||
(prefix : parser-tools/lex-sre)
|
||||
parser-tools/yacc
|
||||
syntax/readerr)
|
||||
|
||||
(define-tokens data (DATUM))
|
||||
(define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF))
|
||||
|
||||
(define scheme-lexer
|
||||
(lexer-src-pos
|
||||
|
||||
;; Skip comments, without accumulating extra position information
|
||||
[(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))]
|
||||
|
||||
["#t" (token-DATUM #t)]
|
||||
["#f" (token-DATUM #f)]
|
||||
[(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))]
|
||||
["#\\space" (token-DATUM #\space)]
|
||||
["#\\newline" (token-DATUM #\newline)]
|
||||
[(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))]
|
||||
[#\" (token-DATUM (list->string (get-string-token input-port)))]
|
||||
[#\( 'OP]
|
||||
[#\) 'CP]
|
||||
[#\[ 'OP]
|
||||
[#\] 'CP]
|
||||
["#(" 'HASHOP]
|
||||
[num2 (token-DATUM (string->number lexeme 2))]
|
||||
[num8 (token-DATUM (string->number lexeme 8))]
|
||||
[num10 (token-DATUM (string->number lexeme 10))]
|
||||
[num16 (token-DATUM (string->number lexeme 16))]
|
||||
["'" 'QUOTE]
|
||||
["`" 'QUASIQUOTE]
|
||||
["," 'UNQUOTE]
|
||||
[",@" 'UNQUOTE-SPLICING]
|
||||
["." 'DOT]
|
||||
[(eof) 'EOF]))
|
||||
|
||||
(define get-string-token
|
||||
(lexer
|
||||
[(:~ #\" #\\) (cons (car (string->list lexeme))
|
||||
(get-string-token input-port))]
|
||||
[(:: #\\ #\\) (cons #\\ (get-string-token input-port))]
|
||||
[(:: #\\ #\") (cons #\" (get-string-token input-port))]
|
||||
[#\" null]))
|
||||
|
||||
|
||||
(define-lex-abbrevs
|
||||
[letter (:or (:/ "a" "z") (:/ #\A #\Z))]
|
||||
[digit (:/ #\0 #\9)]
|
||||
[scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)]
|
||||
[initial (:or letter (char-set "!$%&*/:<=>?^_~@"))]
|
||||
[subsequent (:or initial digit (char-set "+-.@"))]
|
||||
[comment (:: #\; (:* (:~ #\newline)) #\newline)]
|
||||
|
||||
|
||||
;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of
|
||||
;; using regexp macros to avoid the cut and paste.
|
||||
; [numR (:: prefixR complexR)]
|
||||
; [complexR (:or realR
|
||||
; (:: realR "@" realR)
|
||||
; (:: realR "+" urealR "i")
|
||||
; (:: realR "-" urealR "i")
|
||||
; (:: realR "+i")
|
||||
; (:: realR "-i")
|
||||
; (:: "+" urealR "i")
|
||||
; (:: "-" urealR "i")
|
||||
; (:: "+i")
|
||||
; (:: "-i"))]
|
||||
; [realR (:: sign urealR)]
|
||||
; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)]
|
||||
; [uintegerR (:: (:+ digitR) (:* #\#))]
|
||||
; [prefixR (:or (:: radixR exactness)
|
||||
; (:: exactness radixR))]
|
||||
|
||||
[num2 (:: prefix2 complex2)]
|
||||
[complex2 (:or real2
|
||||
(:: real2 "@" real2)
|
||||
(:: real2 "+" ureal2 "i")
|
||||
(:: real2 "-" ureal2 "i")
|
||||
(:: real2 "+i")
|
||||
(:: real2 "-i")
|
||||
(:: "+" ureal2 "i")
|
||||
(:: "-" ureal2 "i")
|
||||
(:: "+i")
|
||||
(:: "-i"))]
|
||||
[real2 (:: sign ureal2)]
|
||||
[ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))]
|
||||
[uinteger2 (:: (:+ digit2) (:* #\#))]
|
||||
[prefix2 (:or (:: radix2 exactness)
|
||||
(:: exactness radix2))]
|
||||
[radix2 "#b"]
|
||||
[digit2 (:or "0" "1")]
|
||||
[num8 (:: prefix8 complex8)]
|
||||
[complex8 (:or real8
|
||||
(:: real8 "@" real8)
|
||||
(:: real8 "+" ureal8 "i")
|
||||
(:: real8 "-" ureal8 "i")
|
||||
(:: real8 "+i")
|
||||
(:: real8 "-i")
|
||||
(:: "+" ureal8 "i")
|
||||
(:: "-" ureal8 "i")
|
||||
(:: "+i")
|
||||
(:: "-i"))]
|
||||
[real8 (:: sign ureal8)]
|
||||
[ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))]
|
||||
[uinteger8 (:: (:+ digit8) (:* #\#))]
|
||||
[prefix8 (:or (:: radix8 exactness)
|
||||
(:: exactness radix8))]
|
||||
[radix8 "#o"]
|
||||
[digit8 (:/ "0" "7")]
|
||||
|
||||
[num10 (:: prefix10 complex10)]
|
||||
[complex10 (:or real10
|
||||
(:: real10 "@" real10)
|
||||
(:: real10 "+" ureal10 "i")
|
||||
(:: real10 "-" ureal10 "i")
|
||||
(:: real10 "+i")
|
||||
(:: real10 "-i")
|
||||
(:: "+" ureal10 "i")
|
||||
(:: "-" ureal10 "i")
|
||||
(:: "+i")
|
||||
(:: "-i"))]
|
||||
[real10 (:: sign ureal10)]
|
||||
[ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)]
|
||||
[uinteger10 (:: (:+ digit10) (:* #\#))]
|
||||
[prefix10 (:or (:: radix10 exactness)
|
||||
(:: exactness radix10))]
|
||||
[radix10 (:? "#d")]
|
||||
[digit10 digit]
|
||||
[decimal10 (:or (:: uinteger10 suffix)
|
||||
(:: #\. (:+ digit10) (:* #\#) suffix)
|
||||
(:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix)
|
||||
(:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))]
|
||||
|
||||
[num16 (:: prefix16 complex16)]
|
||||
[complex16 (:or real16
|
||||
(:: real16 "@" real16)
|
||||
(:: real16 "+" ureal16 "i")
|
||||
(:: real16 "-" ureal16 "i")
|
||||
(:: real16 "+i")
|
||||
(:: real16 "-i")
|
||||
(:: "+" ureal16 "i")
|
||||
(:: "-" ureal16 "i")
|
||||
"+i"
|
||||
"-i")]
|
||||
[real16 (:: sign ureal16)]
|
||||
[ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))]
|
||||
[uinteger16 (:: (:+ digit16) (:* #\#))]
|
||||
[prefix16 (:or (:: radix16 exactness)
|
||||
(:: exactness radix16))]
|
||||
[radix16 "#x"]
|
||||
[digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))]
|
||||
|
||||
|
||||
[suffix (:or "" (:: exponent-marker sign (:+ digit10)))]
|
||||
[exponent-marker (:or "e" "s" "f" "d" "l")]
|
||||
[sign (:or "" "+" "-")]
|
||||
[exactness (:or "" "#i" "#e")])
|
||||
|
||||
|
||||
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
|
||||
|
||||
;; A macro to build the syntax object
|
||||
(define-syntax (build-so stx)
|
||||
(syntax-case stx ()
|
||||
((_ value start end)
|
||||
(with-syntax ((start-pos (datum->syntax-object
|
||||
(syntax end)
|
||||
(string->symbol
|
||||
(format "$~a-start-pos"
|
||||
(syntax-object->datum (syntax start))))))
|
||||
(end-pos (datum->syntax-object
|
||||
(syntax end)
|
||||
(string->symbol
|
||||
(format "$~a-end-pos"
|
||||
(syntax-object->datum (syntax end))))))
|
||||
(source (datum->syntax-object
|
||||
(syntax end)
|
||||
'source-name)))
|
||||
(syntax
|
||||
(datum->syntax-object
|
||||
#f
|
||||
value
|
||||
(list source
|
||||
(position-line start-pos)
|
||||
(position-col start-pos)
|
||||
(position-offset start-pos)
|
||||
(- (position-offset end-pos)
|
||||
(position-offset start-pos)))
|
||||
stx-for-original-property))))))
|
||||
|
||||
(define (scheme-parser source-name)
|
||||
(parser
|
||||
(src-pos)
|
||||
|
||||
(start s)
|
||||
(end EOF)
|
||||
(error (lambda (a name val start end)
|
||||
(raise-read-error
|
||||
"read-error"
|
||||
source-name
|
||||
(position-line start)
|
||||
(position-col start)
|
||||
(position-offset start)
|
||||
(- (position-offset end)
|
||||
(position-offset start)))))
|
||||
(tokens data delim)
|
||||
|
||||
|
||||
(grammar
|
||||
|
||||
(s [(sexp-list) (reverse $1)])
|
||||
|
||||
(sexp [(DATUM) (build-so $1 1 1)]
|
||||
[(OP sexp-list CP) (build-so (reverse $2) 1 3)]
|
||||
[(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)]
|
||||
[(QUOTE sexp) (build-so (list 'quote $2) 1 2)]
|
||||
[(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)]
|
||||
[(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)]
|
||||
[(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)]
|
||||
[(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)])
|
||||
|
||||
(sexp-list [() null]
|
||||
[(sexp-list sexp) (cons $2 $1)]))))
|
||||
|
||||
(define (rs sn ip)
|
||||
(port-count-lines! ip)
|
||||
((scheme-parser sn) (lambda () (scheme-lexer ip))))
|
||||
|
||||
(define readsyntax
|
||||
(case-lambda ((sn) (rs sn (current-input-port)))
|
||||
((sn ip) (rs sn ip))))
|
||||
|
||||
(provide (rename readsyntax read-syntax))
|
||||
|
||||
)
|
@ -0,0 +1,24 @@
|
||||
(module lex-plt-v200 mzscheme
|
||||
(require parser-tools/lex
|
||||
(prefix : parser-tools/lex-sre))
|
||||
|
||||
(provide epsilon
|
||||
~
|
||||
(rename :* *)
|
||||
(rename :+ +)
|
||||
(rename :? ?)
|
||||
(rename :or :)
|
||||
(rename :& &)
|
||||
(rename :: @)
|
||||
(rename :~ ^)
|
||||
(rename :/ -))
|
||||
|
||||
(define-lex-trans epsilon
|
||||
(syntax-rules ()
|
||||
((_) "")))
|
||||
|
||||
(define-lex-trans ~
|
||||
(syntax-rules ()
|
||||
((_ re) (complement re)))))
|
||||
|
||||
|
@ -0,0 +1,119 @@
|
||||
(module lex-sre mzscheme
|
||||
(require parser-tools/lex)
|
||||
|
||||
(provide (rename sre-* *)
|
||||
(rename sre-+ +)
|
||||
?
|
||||
(rename sre-= =)
|
||||
(rename sre->= >=)
|
||||
**
|
||||
(rename sre-or or)
|
||||
:
|
||||
seq
|
||||
&
|
||||
~
|
||||
(rename sre-- -)
|
||||
(rename sre-/ /)
|
||||
/-only-chars)
|
||||
|
||||
(define-lex-trans sre-*
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(repetition 0 +inf.0 (union re ...)))))
|
||||
|
||||
(define-lex-trans sre-+
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(repetition 1 +inf.0 (union re ...)))))
|
||||
|
||||
(define-lex-trans ?
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(repetition 0 1 (union re ...)))))
|
||||
|
||||
(define-lex-trans sre-=
|
||||
(syntax-rules ()
|
||||
((_ n re ...)
|
||||
(repetition n n (union re ...)))))
|
||||
|
||||
(define-lex-trans sre->=
|
||||
(syntax-rules ()
|
||||
((_ n re ...)
|
||||
(repetition n +inf.0 (union re ...)))))
|
||||
|
||||
(define-lex-trans **
|
||||
(syntax-rules ()
|
||||
((_ low #f re ...)
|
||||
(** low +inf.0 re ...))
|
||||
((_ low high re ...)
|
||||
(repetition low high (union re ...)))))
|
||||
|
||||
(define-lex-trans sre-or
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(union re ...))))
|
||||
|
||||
(define-lex-trans :
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(concatenation re ...))))
|
||||
|
||||
(define-lex-trans seq
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(concatenation re ...))))
|
||||
|
||||
(define-lex-trans &
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(intersection re ...))))
|
||||
|
||||
(define-lex-trans ~
|
||||
(syntax-rules ()
|
||||
((_ re ...)
|
||||
(char-complement (union re ...)))))
|
||||
|
||||
;; set difference
|
||||
(define-lex-trans (sre-- stx)
|
||||
(syntax-case stx ()
|
||||
((_)
|
||||
(raise-syntax-error #f
|
||||
"must have at least one argument"
|
||||
stx))
|
||||
((_ big-re re ...)
|
||||
(syntax (& big-re (complement (union re ...)))))))
|
||||
|
||||
(define-lex-trans (sre-/ stx)
|
||||
(syntax-case stx ()
|
||||
((_ range ...)
|
||||
(let ((chars
|
||||
(apply append (map (lambda (r)
|
||||
(let ((x (syntax-e r)))
|
||||
(cond
|
||||
((char? x) (list x))
|
||||
((string? x) (string->list x))
|
||||
(else
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"not a char or string"
|
||||
stx
|
||||
r)))))
|
||||
(syntax->list (syntax (range ...)))))))
|
||||
(unless (even? (length chars))
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"not given an even number of characters"
|
||||
stx))
|
||||
#`(/-only-chars #,@chars)))))
|
||||
|
||||
(define-lex-trans /-only-chars
|
||||
(syntax-rules ()
|
||||
((_ c1 c2)
|
||||
(char-range c1 c2))
|
||||
((_ c1 c2 c ...)
|
||||
(union (char-range c1 c2)
|
||||
(/-only-chars c ...)))))
|
||||
|
||||
)
|
||||
|
||||
|
@ -0,0 +1,393 @@
|
||||
(module lex mzscheme
|
||||
|
||||
;; Provides the syntax used to create lexers and the functions needed to
|
||||
;; create and use the buffer that the lexer reads from. See docs.
|
||||
|
||||
(require-for-syntax mzlib/list
|
||||
syntax/stx
|
||||
syntax/define
|
||||
syntax/boundmap
|
||||
"private-lex/util.rkt"
|
||||
"private-lex/actions.rkt"
|
||||
"private-lex/front.rkt"
|
||||
"private-lex/unicode-chars.rkt")
|
||||
|
||||
(require mzlib/stxparam
|
||||
syntax/readerr
|
||||
"private-lex/token.rkt")
|
||||
|
||||
(provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs define-lex-trans
|
||||
|
||||
;; Dealing with tokens and related structures
|
||||
define-tokens define-empty-tokens token-name token-value token?
|
||||
(struct position (offset line col))
|
||||
(struct position-token (token start-pos end-pos))
|
||||
|
||||
;; File path for highlighting errors while lexing
|
||||
file-path
|
||||
|
||||
;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4.
|
||||
any-char any-string nothing alphabetic lower-case upper-case title-case
|
||||
numeric symbolic punctuation graphic whitespace blank iso-control
|
||||
|
||||
;; A regular expression operator
|
||||
char-set)
|
||||
|
||||
;; wrap-action: syntax-object src-pos? -> syntax-object
|
||||
(define-for-syntax (wrap-action action src-pos?)
|
||||
(with-syntax ((action-stx
|
||||
(if src-pos?
|
||||
#`(let/ec ret
|
||||
(syntax-parameterize
|
||||
((return-without-pos (make-rename-transformer #'ret)))
|
||||
(make-position-token #,action start-pos end-pos)))
|
||||
action)))
|
||||
(syntax/loc action
|
||||
(lambda (start-pos-p end-pos-p lexeme-p input-port-p)
|
||||
(syntax-parameterize
|
||||
((start-pos (make-rename-transformer #'start-pos-p))
|
||||
(end-pos (make-rename-transformer #'end-pos-p))
|
||||
(lexeme (make-rename-transformer #'lexeme-p))
|
||||
(input-port (make-rename-transformer #'input-port-p)))
|
||||
action-stx)))))
|
||||
|
||||
(define-for-syntax (make-lexer-trans src-pos?)
|
||||
(lambda (stx)
|
||||
(syntax-case stx ()
|
||||
((_ re-act ...)
|
||||
(begin
|
||||
(for-each
|
||||
(lambda (x)
|
||||
(syntax-case x ()
|
||||
((re act) (void))
|
||||
(_ (raise-syntax-error #f
|
||||
"not a regular expression / action pair"
|
||||
stx
|
||||
x))))
|
||||
(syntax->list (syntax (re-act ...))))
|
||||
(let* ((spec/re-act-lst
|
||||
(syntax->list (syntax (re-act ...))))
|
||||
(eof-act
|
||||
(get-special-action spec/re-act-lst #'eof #''eof))
|
||||
(spec-act
|
||||
(get-special-action spec/re-act-lst #'special #'(void)))
|
||||
(spec-comment-act
|
||||
(get-special-action spec/re-act-lst #'special-comment #'#f))
|
||||
(ids (list #'special #'special-comment #'eof))
|
||||
(re-act-lst
|
||||
(filter
|
||||
(lambda (spec/re-act)
|
||||
(syntax-case spec/re-act ()
|
||||
(((special) act)
|
||||
(not (ormap
|
||||
(lambda (x)
|
||||
(and (identifier? #'special)
|
||||
(module-or-top-identifier=? (syntax special) x)))
|
||||
ids)))
|
||||
(_ #t)))
|
||||
spec/re-act-lst))
|
||||
(name-lst (map (lambda (x) (datum->syntax-object #f (gensym))) re-act-lst))
|
||||
(act-lst (map (lambda (x) (stx-car (stx-cdr x))) re-act-lst))
|
||||
(re-actname-lst (map (lambda (re-act name)
|
||||
(list (stx-car re-act)
|
||||
name))
|
||||
re-act-lst
|
||||
name-lst)))
|
||||
(when (null? spec/re-act-lst)
|
||||
(raise-syntax-error (if src-pos? 'lexer/src-pos 'lexer) "expected at least one action" stx))
|
||||
(let-values (((trans start action-names no-look disappeared-uses)
|
||||
(build-lexer re-actname-lst)))
|
||||
(when (vector-ref action-names start) ;; Start state is final
|
||||
(unless (and
|
||||
;; All the successor states are final
|
||||
(andmap (lambda (x) (vector-ref action-names (vector-ref x 2)))
|
||||
(vector->list (vector-ref trans start)))
|
||||
;; Each character has a successor state
|
||||
(let loop ((check 0)
|
||||
(nexts (vector->list (vector-ref trans start))))
|
||||
(cond
|
||||
((null? nexts) #f)
|
||||
(else
|
||||
(let ((next (car nexts)))
|
||||
(and (= (vector-ref next 0) check)
|
||||
(let ((next-check (vector-ref next 1)))
|
||||
(or (>= next-check max-char-num)
|
||||
(loop (add1 next-check) (cdr nexts))))))))))
|
||||
(eprintf "Warning: lexer at ~a can accept the empty string.\n" stx)))
|
||||
(with-syntax ((start-state-stx start)
|
||||
(trans-table-stx trans)
|
||||
(no-lookahead-stx no-look)
|
||||
((name ...) name-lst)
|
||||
((act ...) (map (lambda (a)
|
||||
(wrap-action a src-pos?))
|
||||
act-lst))
|
||||
((act-name ...) (vector->list action-names))
|
||||
(spec-act-stx
|
||||
(wrap-action spec-act src-pos?))
|
||||
(has-comment-act?-stx
|
||||
(if (syntax-e spec-comment-act) #t #f))
|
||||
(spec-comment-act-stx
|
||||
(wrap-action spec-comment-act src-pos?))
|
||||
(eof-act-stx (wrap-action eof-act src-pos?)))
|
||||
(syntax-property
|
||||
(syntax/loc stx
|
||||
(let ([name act] ...)
|
||||
(let ([proc
|
||||
(lexer-body start-state-stx
|
||||
trans-table-stx
|
||||
(vector act-name ...)
|
||||
no-lookahead-stx
|
||||
spec-act-stx
|
||||
has-comment-act?-stx
|
||||
spec-comment-act-stx
|
||||
eof-act-stx)])
|
||||
;; reverse eta to get named procedures:
|
||||
(lambda (port) (proc port)))))
|
||||
'disappeared-use
|
||||
disappeared-uses)))))))))
|
||||
|
||||
(define-syntax lexer (make-lexer-trans #f))
|
||||
(define-syntax lexer-src-pos (make-lexer-trans #t))
|
||||
|
||||
(define-syntax (define-lex-abbrev stx)
|
||||
(syntax-case stx ()
|
||||
((_ name re)
|
||||
(identifier? (syntax name))
|
||||
(syntax/loc stx
|
||||
(define-syntax name
|
||||
(make-lex-abbrev (lambda () (quote-syntax re))))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"form should be (define-lex-abbrev name re)"
|
||||
stx))))
|
||||
|
||||
(define-syntax (define-lex-abbrevs stx)
|
||||
(syntax-case stx ()
|
||||
((_ x ...)
|
||||
(with-syntax (((abbrev ...)
|
||||
(map
|
||||
(lambda (a)
|
||||
(syntax-case a ()
|
||||
((name re)
|
||||
(identifier? (syntax name))
|
||||
(syntax/loc a (define-lex-abbrev name re)))
|
||||
(_ (raise-syntax-error
|
||||
#f
|
||||
"form should be (define-lex-abbrevs (name re) ...)"
|
||||
stx
|
||||
a))))
|
||||
(syntax->list (syntax (x ...))))))
|
||||
(syntax/loc stx (begin abbrev ...))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"form should be (define-lex-abbrevs (name re) ...)"
|
||||
stx))))
|
||||
|
||||
(define-syntax (define-lex-trans stx)
|
||||
(syntax-case stx ()
|
||||
((_ name-form body-form)
|
||||
(let-values (((name body)
|
||||
(normalize-definition (syntax (define-syntax name-form body-form)) #'lambda)))
|
||||
|
||||
#`(define-syntax #,name
|
||||
(let ((func #,body))
|
||||
(unless (procedure? func)
|
||||
(raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func))
|
||||
(unless (procedure-arity-includes? func 1)
|
||||
(raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func))
|
||||
(make-lex-trans func)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"form should be (define-lex-trans name transformer)"
|
||||
stx))))
|
||||
|
||||
|
||||
(define (get-next-state-helper char min max table)
|
||||
(if (>= min max)
|
||||
#f
|
||||
(let* ((try (quotient (+ min max) 2))
|
||||
(el (vector-ref table try))
|
||||
(r1 (vector-ref el 0))
|
||||
(r2 (vector-ref el 1)))
|
||||
(cond
|
||||
((and (>= char r1) (<= char r2)) (vector-ref el 2))
|
||||
((< char r1) (get-next-state-helper char min try table))
|
||||
(else (get-next-state-helper char (add1 try) max table))))))
|
||||
|
||||
|
||||
|
||||
|
||||
(define (get-next-state char table)
|
||||
(if table
|
||||
(get-next-state-helper char 0 (vector-length table) table)
|
||||
#f))
|
||||
|
||||
(define (lexer-body start-state trans-table actions no-lookahead special-action
|
||||
has-special-comment-action? special-comment-action eof-action)
|
||||
(letrec ((lexer
|
||||
(lambda (ip)
|
||||
(let ((first-pos (get-position ip))
|
||||
(first-char (peek-char-or-special ip 0)))
|
||||
;(printf "(peek-char-or-special port 0) = ~e\n" first-char)
|
||||
(cond
|
||||
((eof-object? first-char)
|
||||
(do-match ip first-pos eof-action (read-char-or-special ip)))
|
||||
((special-comment? first-char)
|
||||
(read-char-or-special ip)
|
||||
(cond
|
||||
(has-special-comment-action?
|
||||
(do-match ip first-pos special-comment-action #f))
|
||||
(else (lexer ip))))
|
||||
((not (char? first-char))
|
||||
(do-match ip first-pos special-action (read-char-or-special ip)))
|
||||
(else
|
||||
(let lexer-loop (
|
||||
;; current-state
|
||||
(state start-state)
|
||||
;; the character to transition on
|
||||
(char first-char)
|
||||
;; action for the longest match seen thus far
|
||||
;; including a match at the current state
|
||||
(longest-match-action
|
||||
(vector-ref actions start-state))
|
||||
;; how many bytes precede char
|
||||
(length-bytes 0)
|
||||
;; how many characters have been read
|
||||
;; including the one just read
|
||||
(length-chars 1)
|
||||
;; how many characters are in the longest match
|
||||
(longest-match-length 0))
|
||||
(let ((next-state
|
||||
(cond
|
||||
((not (char? char)) #f)
|
||||
(else (get-next-state (char->integer char)
|
||||
(vector-ref trans-table state))))))
|
||||
(cond
|
||||
((not next-state)
|
||||
(check-match ip first-pos longest-match-length
|
||||
length-chars longest-match-action))
|
||||
((vector-ref no-lookahead next-state)
|
||||
(let ((act (vector-ref actions next-state)))
|
||||
(check-match ip
|
||||
first-pos
|
||||
(if act length-chars longest-match-length)
|
||||
length-chars
|
||||
(if act act longest-match-action))))
|
||||
(else
|
||||
(let* ((act (vector-ref actions next-state))
|
||||
(next-length-bytes (+ (char-utf-8-length char) length-bytes))
|
||||
(next-char (peek-char-or-special ip next-length-bytes)))
|
||||
#;(printf "(peek-char-or-special port ~e) = ~e\n"
|
||||
next-length-bytes next-char)
|
||||
(lexer-loop next-state
|
||||
next-char
|
||||
(if act
|
||||
act
|
||||
longest-match-action)
|
||||
next-length-bytes
|
||||
(add1 length-chars)
|
||||
(if act
|
||||
length-chars
|
||||
longest-match-length)))))))))))))
|
||||
(lambda (ip)
|
||||
(unless (input-port? ip)
|
||||
(raise-argument-error
|
||||
'lexer
|
||||
"input-port?"
|
||||
0
|
||||
ip))
|
||||
(lexer ip))))
|
||||
|
||||
(define (check-match lb first-pos longest-match-length length longest-match-action)
|
||||
(unless longest-match-action
|
||||
(let* ((match (read-string length lb))
|
||||
(end-pos (get-position lb)))
|
||||
(raise-read-error
|
||||
(format "lexer: No match found in input starting with: ~a" match)
|
||||
(file-path)
|
||||
(position-line first-pos)
|
||||
(position-col first-pos)
|
||||
(position-offset first-pos)
|
||||
(- (position-offset end-pos) (position-offset first-pos)))))
|
||||
(let ((match (read-string longest-match-length lb)))
|
||||
;(printf "(read-string ~e port) = ~e\n" longest-match-length match)
|
||||
(do-match lb first-pos longest-match-action match)))
|
||||
|
||||
(define file-path (make-parameter #f))
|
||||
|
||||
(define (do-match ip first-pos action value)
|
||||
#;(printf "(action ~a ~a ~a ~a)\n"
|
||||
(position-offset first-pos) (position-offset (get-position ip)) value ip)
|
||||
(action first-pos (get-position ip) value ip))
|
||||
|
||||
(define (get-position ip)
|
||||
(let-values (((line col off) (port-next-location ip)))
|
||||
(make-position off line col)))
|
||||
|
||||
(define-syntax (create-unicode-abbrevs stx)
|
||||
(syntax-case stx ()
|
||||
((_ ctxt)
|
||||
(with-syntax (((ranges ...) (map (lambda (range)
|
||||
`(union ,@(map (lambda (x)
|
||||
`(char-range ,(integer->char (car x))
|
||||
,(integer->char (cdr x))))
|
||||
range)))
|
||||
(list (force alphabetic-ranges)
|
||||
(force lower-case-ranges)
|
||||
(force upper-case-ranges)
|
||||
(force title-case-ranges)
|
||||
(force numeric-ranges)
|
||||
(force symbolic-ranges)
|
||||
(force punctuation-ranges)
|
||||
(force graphic-ranges)
|
||||
(force whitespace-ranges)
|
||||
(force blank-ranges)
|
||||
(force iso-control-ranges))))
|
||||
((names ...) (map (lambda (sym)
|
||||
(datum->syntax-object (syntax ctxt) sym #f))
|
||||
'(alphabetic
|
||||
lower-case
|
||||
upper-case
|
||||
title-case
|
||||
numeric
|
||||
symbolic
|
||||
punctuation
|
||||
graphic
|
||||
whitespace
|
||||
blank
|
||||
iso-control))))
|
||||
(syntax (define-lex-abbrevs (names ranges) ...))))))
|
||||
|
||||
(define-lex-abbrev any-char (char-complement (union)))
|
||||
(define-lex-abbrev any-string (intersection))
|
||||
(define-lex-abbrev nothing (union))
|
||||
(create-unicode-abbrevs #'here)
|
||||
|
||||
(define-lex-trans (char-set stx)
|
||||
(syntax-case stx ()
|
||||
((_ str)
|
||||
(string? (syntax-e (syntax str)))
|
||||
(with-syntax (((char ...) (string->list (syntax-e (syntax str)))))
|
||||
(syntax (union char ...))))))
|
||||
|
||||
(define-syntax provide-lex-keyword
|
||||
(syntax-rules ()
|
||||
[(_ id ...)
|
||||
(begin
|
||||
(define-syntax-parameter id
|
||||
(make-set!-transformer
|
||||
(lambda (stx)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
(format "use of a lexer keyword (~a) is not in an appropriate lexer action"
|
||||
'id)
|
||||
stx))))
|
||||
...
|
||||
(provide id ...))]))
|
||||
|
||||
(provide-lex-keyword start-pos end-pos lexeme input-port return-without-pos)
|
||||
|
||||
)
|
@ -0,0 +1,339 @@
|
||||
(module deriv mzscheme
|
||||
|
||||
(require mzlib/list
|
||||
(prefix is: mzlib/integer-set)
|
||||
"re.rkt"
|
||||
"util.rkt")
|
||||
|
||||
(provide build-dfa print-dfa (struct dfa (num-states start-state final-states/actions transitions)))
|
||||
|
||||
(define e (build-epsilon))
|
||||
(define z (build-zero))
|
||||
|
||||
|
||||
;; Don't do anything with this one but extract the chars
|
||||
(define all-chars (->re `(char-complement (union)) (make-cache)))
|
||||
|
||||
;; get-char-groups : re bool -> (list-of char-setR?)
|
||||
;; Collects the char-setRs in r that could be used in
|
||||
;; taking the derivative of r.
|
||||
(define (get-char-groups r found-negation)
|
||||
(cond
|
||||
((or (eq? r e) (eq? r z)) null)
|
||||
((char-setR? r) (list r))
|
||||
((concatR? r)
|
||||
(if (re-nullable? (concatR-re1 r))
|
||||
(append (get-char-groups (concatR-re1 r) found-negation)
|
||||
(get-char-groups (concatR-re2 r) found-negation))
|
||||
(get-char-groups (concatR-re1 r) found-negation)))
|
||||
((repeatR? r)
|
||||
(get-char-groups (repeatR-re r) found-negation))
|
||||
((orR? r)
|
||||
(apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r))))
|
||||
((andR? r)
|
||||
(apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r))))
|
||||
((negR? r)
|
||||
(if found-negation
|
||||
(get-char-groups (negR-re r) #t)
|
||||
(cons all-chars (get-char-groups (negR-re r) #t))))))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(r1 (->re #\1 c))
|
||||
(r2 (->re #\2 c)))
|
||||
((get-char-groups e #f) null)
|
||||
((get-char-groups z #f) null)
|
||||
((get-char-groups r1 #f) (list r1))
|
||||
((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f)
|
||||
(list r1))
|
||||
((get-char-groups (->re `(concatenation ,e ,r2) c) #f)
|
||||
(list r2))
|
||||
((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f)
|
||||
(list r1 r2))
|
||||
((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f)
|
||||
(list r1))
|
||||
((get-char-groups
|
||||
(->re `(union (repetition 0 +inf.0 ,r1)
|
||||
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
||||
(list r1 r2 (->re "3" c) (->re "4" c)))
|
||||
((get-char-groups (->re `(complement ,r1) c) #f)
|
||||
(list all-chars r1))
|
||||
((get-char-groups
|
||||
(->re `(intersection (repetition 0 +inf.0 ,r1)
|
||||
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
||||
(list r1 r2 (->re "3" c) (->re "4" c)))
|
||||
)
|
||||
(define loc:member? is:member?)
|
||||
|
||||
;; deriveR : re char cache -> re
|
||||
(define (deriveR r c cache)
|
||||
(cond
|
||||
((or (eq? r e) (eq? r z)) z)
|
||||
((char-setR? r)
|
||||
(if (loc:member? c (char-setR-chars r)) e z))
|
||||
((concatR? r)
|
||||
(let* ((r1 (concatR-re1 r))
|
||||
(r2 (concatR-re2 r))
|
||||
(d (build-concat (deriveR r1 c cache) r2 cache)))
|
||||
(if (re-nullable? r1)
|
||||
(build-or (list d (deriveR r2 c cache)) cache)
|
||||
d)))
|
||||
((repeatR? r)
|
||||
(build-concat (deriveR (repeatR-re r) c cache)
|
||||
(build-repeat (sub1 (repeatR-low r))
|
||||
(sub1 (repeatR-high r))
|
||||
(repeatR-re r) cache)
|
||||
cache))
|
||||
((orR? r)
|
||||
(build-or (map (lambda (x) (deriveR x c cache))
|
||||
(orR-res r))
|
||||
cache))
|
||||
((andR? r)
|
||||
(build-and (map (lambda (x) (deriveR x c cache))
|
||||
(andR-res r))
|
||||
cache))
|
||||
((negR? r)
|
||||
(build-neg (deriveR (negR-re r) c cache) cache))))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(a (char->integer #\a))
|
||||
(b (char->integer #\b))
|
||||
(r1 (->re #\a c))
|
||||
(r2 (->re `(repetition 0 +inf.0 #\a) c))
|
||||
(r3 (->re `(repetition 0 +inf.0 ,r2) c))
|
||||
(r4 (->re `(concatenation #\a ,r2) c))
|
||||
(r5 (->re `(repetition 0 +inf.0 ,r4) c))
|
||||
(r6 (->re `(union ,r5 #\a) c))
|
||||
(r7 (->re `(concatenation ,r2 ,r2) c))
|
||||
(r8 (->re `(complement ,r4) c))
|
||||
(r9 (->re `(intersection ,r2 ,r4) c)))
|
||||
((deriveR e a c) z)
|
||||
((deriveR z a c) z)
|
||||
((deriveR r1 b c) z)
|
||||
((deriveR r1 a c) e)
|
||||
((deriveR r2 a c) r2)
|
||||
((deriveR r2 b c) z)
|
||||
((deriveR r3 a c) r2)
|
||||
((deriveR r3 b c) z)
|
||||
((deriveR r4 a c) r2)
|
||||
((deriveR r4 b c) z)
|
||||
((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c))
|
||||
((deriveR r5 b c) z)
|
||||
((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c))
|
||||
((deriveR r6 b c) z)
|
||||
((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c))
|
||||
((deriveR r7 b c) z)
|
||||
((deriveR r8 a c) (->re `(complement, r2) c))
|
||||
((deriveR r8 b c) (->re `(complement ,z) c))
|
||||
((deriveR r9 a c) r2)
|
||||
((deriveR r9 b c) z)
|
||||
((deriveR (->re `(repetition 1 2 "ab") c) a c)
|
||||
(->re `(concatenation "b" (repetition 0 1 "ab")) c)))
|
||||
|
||||
;; An re-action is (cons re action)
|
||||
|
||||
;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f)
|
||||
;; applies deriveR to all the re-actions's re parts.
|
||||
;; Returns #f if the derived state is equivalent to z.
|
||||
(define (derive r c cache)
|
||||
(let ((new-r (map (lambda (ra)
|
||||
(cons (deriveR (car ra) c cache) (cdr ra)))
|
||||
r)))
|
||||
(if (andmap (lambda (x) (eq? z (car x)))
|
||||
new-r)
|
||||
#f
|
||||
new-r)))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(r1 (->re #\1 c))
|
||||
(r2 (->re #\2 c)))
|
||||
((derive null (char->integer #\1) c) #f)
|
||||
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c)
|
||||
(list (cons e 1) (cons z 2)))
|
||||
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f))
|
||||
|
||||
|
||||
;; get-final : (list-of re-action) -> (union #f syntax-object)
|
||||
;; An re that accepts e represents a final state. Return the
|
||||
;; action from the first final state or #f if there is none.
|
||||
(define (get-final res)
|
||||
(cond
|
||||
((null? res) #f)
|
||||
((re-nullable? (caar res)) (cdar res))
|
||||
(else (get-final (cdr res)))))
|
||||
|
||||
(test-block ((c->i char->integer)
|
||||
(c (make-cache))
|
||||
(r1 (->re #\a c))
|
||||
(r2 (->re #\b c))
|
||||
(b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5)))
|
||||
(a (list (cons r1 1) (cons r2 2))))
|
||||
((derive null (c->i #\a) c) #f)
|
||||
((derive a (c->i #\a) c) (list (cons e 1) (cons z 2)))
|
||||
((derive a (c->i #\b) c) (list (cons z 1) (cons e 2)))
|
||||
((derive a (c->i #\c) c) #f)
|
||||
((derive (list (cons (->re `(union " " "\n" ",") c) 1)
|
||||
(cons (->re `(concatenation (repetition 0 1 "-")
|
||||
(repetition 1 +inf.0 (char-range "0" "9"))) c) 2)
|
||||
(cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3)
|
||||
(cons (->re "[" c) 4)
|
||||
(cons (->re "]" c) 5)) (c->i #\[) c)
|
||||
b)
|
||||
((get-final a) #f)
|
||||
((get-final (list (cons e 1) (cons e 2))) 1)
|
||||
((get-final b) 4))
|
||||
|
||||
|
||||
;; A state is (make-state (list-of re-action) nat)
|
||||
(define-struct state (spec index))
|
||||
|
||||
;; get->key : re-action -> (list-of nat)
|
||||
;; states are indexed by the list of indexes of their res
|
||||
(define (get-key s)
|
||||
(map (lambda (x) (re-index (car x))) s))
|
||||
|
||||
(define loc:partition is:partition)
|
||||
|
||||
;; compute-chars : (list-of state) -> (list-of char-set)
|
||||
;; Computed the sets of equivalent characters for taking the
|
||||
;; derivative of the car of st. Only one derivative per set need to be taken.
|
||||
(define (compute-chars st)
|
||||
(cond
|
||||
((null? st) null)
|
||||
(else
|
||||
(loc:partition (map char-setR-chars
|
||||
(apply append (map (lambda (x) (get-char-groups (car x) #f))
|
||||
(state-spec (car st)))))))))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(c->i char->integer)
|
||||
(r1 (->re `(char-range #\1 #\4) c))
|
||||
(r2 (->re `(char-range #\2 #\3) c)))
|
||||
((compute-chars null) null)
|
||||
((compute-chars (list (make-state null 1))) null)
|
||||
((map is:integer-set-contents
|
||||
(compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2))))
|
||||
(list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3)))
|
||||
(is:integer-set-contents (is:union (is:make-range (c->i #\1))
|
||||
(is:make-range (c->i #\4)))))))
|
||||
|
||||
|
||||
;; A dfa is (make-dfa int int
|
||||
;; (list-of (cons int syntax-object))
|
||||
;; (list-of (cons int (list-of (cons char-set int)))))
|
||||
;; Each transitions is a state and a list of chars with the state to transition to.
|
||||
;; The finals and transitions are sorted by state number, and duplicate free.
|
||||
(define-struct dfa (num-states start-state final-states/actions transitions) (make-inspector))
|
||||
|
||||
(define loc:get-integer is:get-integer)
|
||||
|
||||
;; build-dfa : (list-of re-action) cache -> dfa
|
||||
(define (build-dfa rs cache)
|
||||
(let* ((transitions (make-hash-table))
|
||||
(get-state-number (make-counter))
|
||||
(start (make-state rs (get-state-number))))
|
||||
(cache (cons 'state (get-key rs)) (lambda () start))
|
||||
(let loop ((old-states (list start))
|
||||
(new-states null)
|
||||
(all-states (list start))
|
||||
(cs (compute-chars (list start))))
|
||||
(cond
|
||||
((and (null? old-states) (null? new-states))
|
||||
(make-dfa (get-state-number) (state-index start)
|
||||
(sort (filter (lambda (x) (cdr x))
|
||||
(map (lambda (state)
|
||||
(cons (state-index state) (get-final (state-spec state))))
|
||||
all-states))
|
||||
(lambda (a b) (< (car a) (car b))))
|
||||
(sort (hash-table-map transitions
|
||||
(lambda (state trans)
|
||||
(cons (state-index state)
|
||||
(map (lambda (t)
|
||||
(cons (car t)
|
||||
(state-index (cdr t))))
|
||||
trans))))
|
||||
(lambda (a b) (< (car a) (car b))))))
|
||||
((null? old-states)
|
||||
(loop new-states null all-states (compute-chars new-states)))
|
||||
((null? cs)
|
||||
(loop (cdr old-states) new-states all-states (compute-chars (cdr old-states))))
|
||||
(else
|
||||
(let* ((state (car old-states))
|
||||
(c (car cs))
|
||||
(new-re (derive (state-spec state) (loc:get-integer c) cache)))
|
||||
(cond
|
||||
(new-re
|
||||
(let* ((new-state? #f)
|
||||
(new-state (cache (cons 'state (get-key new-re))
|
||||
(lambda ()
|
||||
(set! new-state? #t)
|
||||
(make-state new-re (get-state-number)))))
|
||||
(new-all-states (if new-state? (cons new-state all-states) all-states)))
|
||||
(hash-table-put! transitions
|
||||
state
|
||||
(cons (cons c new-state)
|
||||
(hash-table-get transitions state
|
||||
(lambda () null))))
|
||||
(cond
|
||||
(new-state?
|
||||
(loop old-states (cons new-state new-states) new-all-states (cdr cs)))
|
||||
(else
|
||||
(loop old-states new-states new-all-states (cdr cs))))))
|
||||
(else (loop old-states new-states all-states (cdr cs))))))))))
|
||||
|
||||
(define (print-dfa x)
|
||||
(printf "number of states: ~a\n" (dfa-num-states x))
|
||||
(printf "start state: ~a\n" (dfa-start-state x))
|
||||
(printf "final states: ~a\n" (map car (dfa-final-states/actions x)))
|
||||
(for-each (lambda (trans)
|
||||
(printf "state: ~a\n" (car trans))
|
||||
(for-each (lambda (rule)
|
||||
(printf " -~a-> ~a\n"
|
||||
(is:integer-set-contents (car rule))
|
||||
(cdr rule)))
|
||||
(cdr trans)))
|
||||
(dfa-transitions x)))
|
||||
|
||||
(define (build-test-dfa rs)
|
||||
(let ((c (make-cache)))
|
||||
(build-dfa (map (lambda (x) (cons (->re x c) 'action))
|
||||
rs)
|
||||
c)))
|
||||
|
||||
|
||||
#|
|
||||
(define t1 (build-test-dfa null))
|
||||
(define t2 (build-test-dfa `(#\a)))
|
||||
(define t3 (build-test-dfa `(#\a #\b)))
|
||||
(define t4 (build-test-dfa `((repetition 0 +inf.0 #\a)
|
||||
(repetition 0 +inf.0 (concatenation #\a #\b)))))
|
||||
(define t5 (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\0 #\1)) #\1))))
|
||||
(define t6 (build-test-dfa `((repetition 0 +inf.0 (repetition 0 +inf.0 #\a))
|
||||
(repetition 0 +inf.0 (concatenation #\b (repetition 1 +inf.0 #\b))))))
|
||||
(define t7 (build-test-dfa `((concatenation (repetition 0 +inf.0 #\a) (repetition 0 +inf.0 #\b)
|
||||
(repetition 0 +inf.0 #\c) (repetition 0 +inf.0 #\d)
|
||||
(repetition 0 +inf.0 #\e)))))
|
||||
(define t8
|
||||
(build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b)
|
||||
(union #\a #\b) (union #\a #\b) (union #\a #\b)))))
|
||||
(define t9 (build-test-dfa `((concatenation "/*"
|
||||
(complement (concatenation (intersection) "*/" (intersection)))
|
||||
"*/"))))
|
||||
(define t11 (build-test-dfa `((complement "1"))))
|
||||
(define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b")
|
||||
(concatenation "a" (repetition 0 +inf.0 "b")))
|
||||
"ab"))))
|
||||
(define x (build-test-dfa `((union " " "\n" ",")
|
||||
(concatenation (repetition 0 1 "-") (repetition 1 +inf.0 (char-range "0" "9")))
|
||||
(concatenation "-" (repetition 1 +inf.0 "-"))
|
||||
"["
|
||||
"]")))
|
||||
(define y (build-test-dfa
|
||||
`((repetition 1 +inf.0
|
||||
(union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|")
|
||||
(concatenation "|" (repetition 0 +inf.0 (char-complement "|"))))))))
|
||||
(define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection))
|
||||
(complement (union (concatenation (intersection) "01")
|
||||
(repetition 1 +inf.0 "1")))))))
|
||||
(define t14 (build-test-dfa `((complement "1"))))
|
||||
|#
|
||||
)
|
@ -1,5 +1,5 @@
|
||||
#lang racket/base
|
||||
(require (for-syntax racket/base)
|
||||
#lang scheme/base
|
||||
(require (for-syntax scheme/base)
|
||||
"../lex.rkt"
|
||||
rackunit)
|
||||
|
@ -0,0 +1,179 @@
|
||||
(module front mzscheme
|
||||
(require (prefix is: mzlib/integer-set)
|
||||
mzlib/list
|
||||
syntax/stx
|
||||
"util.rkt"
|
||||
"stx.rkt"
|
||||
"re.rkt"
|
||||
"deriv.rkt")
|
||||
|
||||
(provide build-lexer)
|
||||
|
||||
(define-syntax time-label
|
||||
(syntax-rules ()
|
||||
((_ l e ...)
|
||||
(begin
|
||||
(printf "~a: " l)
|
||||
(time (begin e ...))))))
|
||||
|
||||
;; A table is either
|
||||
;; - (vector-of (union #f nat))
|
||||
;; - (vector-of (vector-of (vector nat nat nat)))
|
||||
|
||||
(define loc:integer-set-contents is:integer-set-contents)
|
||||
|
||||
;; dfa->1d-table : dfa -> (same as build-lexer)
|
||||
(define (dfa->1d-table dfa)
|
||||
(let ((state-table (make-vector (dfa-num-states dfa) #f))
|
||||
(transition-cache (make-hash-table 'equal)))
|
||||
(for-each
|
||||
(lambda (trans)
|
||||
(let* ((from-state (car trans))
|
||||
(all-chars/to (cdr trans))
|
||||
(flat-all-chars/to
|
||||
(sort
|
||||
(apply append
|
||||
(map (lambda (chars/to)
|
||||
(let ((char-ranges (loc:integer-set-contents (car chars/to)))
|
||||
(to (cdr chars/to)))
|
||||
(map (lambda (char-range)
|
||||
(let ((entry (vector (car char-range) (cdr char-range) to)))
|
||||
(hash-table-get transition-cache entry
|
||||
(lambda ()
|
||||
(hash-table-put! transition-cache
|
||||
entry
|
||||
entry)
|
||||
entry))))
|
||||
char-ranges)))
|
||||
all-chars/to))
|
||||
(lambda (a b)
|
||||
(< (vector-ref a 0) (vector-ref b 0))))))
|
||||
(vector-set! state-table from-state (list->vector flat-all-chars/to))))
|
||||
(dfa-transitions dfa))
|
||||
state-table))
|
||||
|
||||
|
||||
(define loc:foldr is:foldr)
|
||||
|
||||
;; dfa->2d-table : dfa -> (same as build-lexer)
|
||||
(define (dfa->2d-table dfa)
|
||||
(let (
|
||||
;; char-table : (vector-of (union #f nat))
|
||||
;; The lexer table, one entry per state per char.
|
||||
;; Each entry specifies a state to transition to.
|
||||
;; #f indicates no transition
|
||||
(char-table (make-vector (* 256 (dfa-num-states dfa)) #f)))
|
||||
|
||||
;; Fill the char-table vector
|
||||
(for-each
|
||||
(lambda (trans)
|
||||
(let ((from-state (car trans)))
|
||||
(for-each (lambda (chars/to)
|
||||
(let ((to-state (cdr chars/to)))
|
||||
(loc:foldr (lambda (char _)
|
||||
(vector-set! char-table
|
||||
(bitwise-ior
|
||||
char
|
||||
(arithmetic-shift from-state 8))
|
||||
to-state))
|
||||
(void)
|
||||
(car chars/to))))
|
||||
(cdr trans))))
|
||||
(dfa-transitions dfa))
|
||||
char-table))
|
||||
|
||||
|
||||
;; dfa->actions : dfa -> (vector-of (union #f syntax-object))
|
||||
;; The action for each final state, #f if the state isn't final
|
||||
(define (dfa->actions dfa)
|
||||
(let ((actions (make-vector (dfa-num-states dfa) #f)))
|
||||
(for-each (lambda (state/action)
|
||||
(vector-set! actions (car state/action) (cdr state/action)))
|
||||
(dfa-final-states/actions dfa))
|
||||
actions))
|
||||
|
||||
;; dfa->no-look : dfa -> (vector-of bool)
|
||||
;; For each state whether the lexer can ignore the next input.
|
||||
;; It can do this only if there are no transitions out of the
|
||||
;; current state.
|
||||
(define (dfa->no-look dfa)
|
||||
(let ((no-look (make-vector (dfa-num-states dfa) #t)))
|
||||
(for-each (lambda (trans)
|
||||
(vector-set! no-look (car trans) #f))
|
||||
(dfa-transitions dfa))
|
||||
no-look))
|
||||
|
||||
(test-block ((d1 (make-dfa 1 1 (list) (list)))
|
||||
(d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
|
||||
(list (cons 1 (list (cons (is:make-range 49 50) 1)
|
||||
(cons (is:make-range 51) 2)))
|
||||
(cons 2 (list (cons (is:make-range 49) 3))))))
|
||||
(d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
|
||||
(list (cons 1 (list (cons (is:make-range 100 200) 0)
|
||||
(cons (is:make-range 49 50) 1)
|
||||
(cons (is:make-range 51) 2)))
|
||||
(cons 2 (list (cons (is:make-range 49) 3)))))))
|
||||
((dfa->2d-table d1) (make-vector 256 #f))
|
||||
((dfa->2d-table d2) (let ((v (make-vector 1024 #f)))
|
||||
(vector-set! v 305 1)
|
||||
(vector-set! v 306 1)
|
||||
(vector-set! v 307 2)
|
||||
(vector-set! v 561 3)
|
||||
v))
|
||||
((dfa->1d-table d1) (make-vector 1 #f))
|
||||
((dfa->1d-table d2) #(#f
|
||||
#(#(49 50 1) #(51 51 2))
|
||||
#(#(49 49 3))
|
||||
#f))
|
||||
((dfa->1d-table d3) #(#f
|
||||
#(#(49 50 1) #(51 51 2) #(100 200 0))
|
||||
#(#(49 49 3))
|
||||
#f))
|
||||
((dfa->actions d1) (vector #f))
|
||||
((dfa->actions d2) (vector #f #f 2 3))
|
||||
((dfa->no-look d1) (vector #t))
|
||||
((dfa->no-look d2) (vector #t #f #f #t)))
|
||||
|
||||
;; build-lexer : syntax-object list ->
|
||||
;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object))
|
||||
;; each syntax object has the form (re action)
|
||||
(define (build-lexer sos)
|
||||
(let* ((disappeared-uses (box null))
|
||||
(s-re-acts (map (lambda (so)
|
||||
(cons (parse (stx-car so) disappeared-uses)
|
||||
(stx-car (stx-cdr so))))
|
||||
sos))
|
||||
|
||||
(cache (make-cache))
|
||||
|
||||
(re-acts (map (lambda (s-re-act)
|
||||
(cons (->re (car s-re-act) cache)
|
||||
(cdr s-re-act)))
|
||||
s-re-acts))
|
||||
|
||||
(dfa (build-dfa re-acts cache))
|
||||
(table (dfa->1d-table dfa)))
|
||||
;(print-dfa dfa)
|
||||
#;(let ((num-states (vector-length table))
|
||||
(num-vectors (length (filter values (vector->list table))))
|
||||
(num-entries (apply + (map
|
||||
(lambda (x) (if x (vector-length x) 0))
|
||||
(vector->list table))))
|
||||
(num-different-entries
|
||||
(let ((ht (make-hash-table)))
|
||||
(for-each
|
||||
(lambda (x)
|
||||
(when x
|
||||
(for-each
|
||||
(lambda (y)
|
||||
(hash-table-put! ht y #t))
|
||||
(vector->list x))))
|
||||
(vector->list table))
|
||||
(length (hash-table-map ht cons)))))
|
||||
(printf "~a states, ~aKB\n"
|
||||
num-states
|
||||
(/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries
|
||||
(* 5 num-different-entries))) 1024)))
|
||||
(values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)
|
||||
(unbox disappeared-uses))))
|
||||
)
|
@ -0,0 +1,385 @@
|
||||
(module re mzscheme
|
||||
(require mzlib/list
|
||||
scheme/match
|
||||
(prefix is: mzlib/integer-set)
|
||||
"util.rkt")
|
||||
|
||||
(provide ->re build-epsilon build-zero build-char-set build-concat
|
||||
build-repeat build-or build-and build-neg
|
||||
epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR?
|
||||
char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high
|
||||
orR-res andR-res negR-re
|
||||
re-nullable? re-index)
|
||||
|
||||
;; get-index : -> nat
|
||||
(define get-index (make-counter))
|
||||
|
||||
;; An re is either
|
||||
;; - (make-epsilonR bool nat)
|
||||
;; - (make-zeroR bool nat)
|
||||
;; - (make-char-setR bool nat char-set)
|
||||
;; - (make-concatR bool nat re re)
|
||||
;; - (make-repeatR bool nat nat nat-or-+inf.0 re)
|
||||
;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs
|
||||
;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs
|
||||
;; - (make-negR bool nat re)
|
||||
;;
|
||||
;; Every re must have an index field globally different from all
|
||||
;; other re index fields.
|
||||
(define-struct re (nullable? index) (make-inspector))
|
||||
(define-struct (epsilonR re) () (make-inspector))
|
||||
(define-struct (zeroR re) () (make-inspector))
|
||||
(define-struct (char-setR re) (chars) (make-inspector))
|
||||
(define-struct (concatR re) (re1 re2) (make-inspector))
|
||||
(define-struct (repeatR re) (low high re) (make-inspector))
|
||||
(define-struct (orR re) (res) (make-inspector))
|
||||
(define-struct (andR re) (res) (make-inspector))
|
||||
(define-struct (negR re) (re) (make-inspector))
|
||||
|
||||
;; e : re
|
||||
;; The unique epsilon re
|
||||
(define e (make-epsilonR #t (get-index)))
|
||||
|
||||
;; z : re
|
||||
;; The unique zero re
|
||||
(define z (make-zeroR #f (get-index)))
|
||||
|
||||
|
||||
;; s-re = char constant
|
||||
;; | string constant (sequence of characters)
|
||||
;; | re a precompiled re
|
||||
;; | (repetition low high s-re) repetition between low and high times (inclusive)
|
||||
;; | (union s-re ...)
|
||||
;; | (intersection s-re ...)
|
||||
;; | (complement s-re)
|
||||
;; | (concatenation s-re ...)
|
||||
;; | (char-range rng rng) match any character between two (inclusive)
|
||||
;; | (char-complement char-set) match any character not listed
|
||||
;; low = natural-number
|
||||
;; high = natural-number or +inf.0
|
||||
;; rng = char or string with length 1
|
||||
;; (concatenation) (repetition 0 0 x), and "" match the empty string.
|
||||
;; (union) matches no strings.
|
||||
;; (intersection) matches any string.
|
||||
|
||||
(define loc:make-range is:make-range)
|
||||
(define loc:union is:union)
|
||||
(define loc:split is:split)
|
||||
(define loc:complement is:complement)
|
||||
|
||||
;; ->re : s-re cache -> re
|
||||
(define (->re exp cache)
|
||||
(match exp
|
||||
((? char?) (build-char-set (loc:make-range (char->integer exp)) cache))
|
||||
((? string?) (->re `(concatenation ,@(string->list exp)) cache))
|
||||
((? re?) exp)
|
||||
(`(repetition ,low ,high ,r)
|
||||
(build-repeat low high (->re r cache) cache))
|
||||
(`(union ,rs ...)
|
||||
(build-or (flatten-res (map (lambda (r) (->re r cache)) rs)
|
||||
orR? orR-res loc:union cache)
|
||||
cache))
|
||||
(`(intersection ,rs ...)
|
||||
(build-and (flatten-res (map (lambda (r) (->re r cache)) rs)
|
||||
andR? andR-res (lambda (a b)
|
||||
(let-values (((i _ __) (loc:split a b))) i))
|
||||
cache)
|
||||
cache))
|
||||
(`(complement ,r)
|
||||
(build-neg (->re r cache) cache))
|
||||
(`(concatenation ,rs ...)
|
||||
(foldr (lambda (x y)
|
||||
(build-concat (->re x cache) y cache))
|
||||
e
|
||||
rs))
|
||||
(`(char-range ,c1 ,c2)
|
||||
(let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1)))
|
||||
(i2 (char->integer (if (string? c2) (string-ref c2 0) c2))))
|
||||
(if (<= i1 i2)
|
||||
(build-char-set (loc:make-range i1 i2) cache)
|
||||
z)))
|
||||
(`(char-complement ,crs ...)
|
||||
(let ((cs (->re `(union ,@crs) cache)))
|
||||
(cond
|
||||
((zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache))
|
||||
((char-setR? cs)
|
||||
(build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache))
|
||||
(else z))))))
|
||||
|
||||
|
||||
|
||||
|
||||
;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re))
|
||||
;; (char-set char-set -> char-set) cache -> (list-of re)
|
||||
;; Takes all the char-sets in l and combines them into one char-set using the combine function.
|
||||
;; Flattens out the values of type?. get-res only needs to function on things type? returns
|
||||
;; true for.
|
||||
(define (flatten-res l type? get-res combine cache)
|
||||
(let loop ((res l)
|
||||
;; chars : (union #f char-set)
|
||||
(chars #f)
|
||||
(no-chars null))
|
||||
(cond
|
||||
((null? res)
|
||||
(if chars
|
||||
(cons (build-char-set chars cache) no-chars)
|
||||
no-chars))
|
||||
((char-setR? (car res))
|
||||
(if chars
|
||||
(loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars)
|
||||
(loop (cdr res) (char-setR-chars (car res)) no-chars)))
|
||||
((type? (car res))
|
||||
(loop (append (get-res (car res)) (cdr res)) chars no-chars))
|
||||
(else (loop (cdr res) chars (cons (car res) no-chars))))))
|
||||
|
||||
;; build-epsilon : -> re
|
||||
(define (build-epsilon) e)
|
||||
|
||||
(define (build-zero) z)
|
||||
|
||||
(define loc:integer-set-contents is:integer-set-contents)
|
||||
|
||||
;; build-char-set : char-set cache -> re
|
||||
(define (build-char-set cs cache)
|
||||
(let ((l (loc:integer-set-contents cs)))
|
||||
(cond
|
||||
((null? l) z)
|
||||
(else
|
||||
(cache l
|
||||
(lambda ()
|
||||
(make-char-setR #f (get-index) cs)))))))
|
||||
|
||||
|
||||
|
||||
;; build-concat : re re cache -> re
|
||||
(define (build-concat r1 r2 cache)
|
||||
(cond
|
||||
((eq? e r1) r2)
|
||||
((eq? e r2) r1)
|
||||
((or (eq? z r1) (eq? z r2)) z)
|
||||
(else
|
||||
(cache (cons 'concat (cons (re-index r1) (re-index r2)))
|
||||
(lambda ()
|
||||
(make-concatR (and (re-nullable? r1) (re-nullable? r2))
|
||||
(get-index)
|
||||
r1 r2))))))
|
||||
|
||||
;; build-repeat : nat nat-or-+inf.0 re cache -> re
|
||||
(define (build-repeat low high r cache)
|
||||
(let ((low (if (< low 0) 0 low)))
|
||||
(cond
|
||||
((eq? r e) e)
|
||||
((and (= 0 low) (or (= 0 high) (eq? z r))) e)
|
||||
((and (= 1 low) (= 1 high)) r)
|
||||
((and (repeatR? r)
|
||||
(eq? (repeatR-high r) +inf.0)
|
||||
(or (= 0 (repeatR-low r))
|
||||
(= 1 (repeatR-low r))))
|
||||
(build-repeat (* low (repeatR-low r))
|
||||
+inf.0
|
||||
(repeatR-re r)
|
||||
cache))
|
||||
(else
|
||||
(cache (cons 'repeat (cons low (cons high (re-index r))))
|
||||
(lambda ()
|
||||
(make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r)))))))
|
||||
|
||||
|
||||
;; build-or : (list-of re) cache -> re
|
||||
(define (build-or rs cache)
|
||||
(let ((rs
|
||||
(filter
|
||||
(lambda (x) (not (eq? x z)))
|
||||
(do-simple-equiv (replace rs orR? orR-res null) re-index))))
|
||||
(cond
|
||||
((null? rs) z)
|
||||
((null? (cdr rs)) (car rs))
|
||||
((memq (build-neg z cache) rs) (build-neg z cache))
|
||||
(else
|
||||
(cache (cons 'or (map re-index rs))
|
||||
(lambda ()
|
||||
(make-orR (ormap re-nullable? rs) (get-index) rs)))))))
|
||||
|
||||
;; build-and : (list-of re) cache -> re
|
||||
(define (build-and rs cache)
|
||||
(let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index)))
|
||||
(cond
|
||||
((null? rs) (build-neg z cache))
|
||||
((null? (cdr rs)) (car rs))
|
||||
((memq z rs) z)
|
||||
(else
|
||||
(cache (cons 'and (map re-index rs))
|
||||
(lambda ()
|
||||
(make-andR (andmap re-nullable? rs) (get-index) rs)))))))
|
||||
|
||||
;; build-neg : re cache -> re
|
||||
(define (build-neg r cache)
|
||||
(cond
|
||||
((negR? r) (negR-re r))
|
||||
(else
|
||||
(cache (cons 'neg (re-index r))
|
||||
(lambda ()
|
||||
(make-negR (not (re-nullable? r)) (get-index) r))))))
|
||||
|
||||
;; Tests for the build-functions
|
||||
(test-block ((c (make-cache))
|
||||
(isc is:integer-set-contents)
|
||||
(r1 (build-char-set (is:make-range (char->integer #\1)) c))
|
||||
(r2 (build-char-set (is:make-range (char->integer #\2)) c))
|
||||
(r3 (build-char-set (is:make-range (char->integer #\3)) c))
|
||||
(rc (build-concat r1 r2 c))
|
||||
(rc2 (build-concat r2 r1 c))
|
||||
(rr (build-repeat 0 +inf.0 rc c))
|
||||
(ro (build-or `(,rr ,rc ,rr) c))
|
||||
(ro2 (build-or `(,rc ,rr ,z) c))
|
||||
(ro3 (build-or `(,rr ,rc) c))
|
||||
(ro4 (build-or `(,(build-or `(,r1 ,r2) c)
|
||||
,(build-or `(,r2 ,r3) c)) c))
|
||||
(ra (build-and `(,rr ,rc ,rr) c))
|
||||
(ra2 (build-and `(,rc ,rr) c))
|
||||
(ra3 (build-and `(,rr ,rc) c))
|
||||
(ra4 (build-and `(,(build-and `(,r3 ,r2) c)
|
||||
,(build-and `(,r2 ,r1) c)) c))
|
||||
(rn (build-neg z c))
|
||||
(rn2 (build-neg r1 c)))
|
||||
|
||||
((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1))))
|
||||
((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2))))
|
||||
((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3))))
|
||||
((build-char-set (is:make-range) c) z)
|
||||
((build-concat r1 e c) r1)
|
||||
((build-concat e r1 c) r1)
|
||||
((build-concat r1 z c) z)
|
||||
((build-concat z r1 c) z)
|
||||
((build-concat r1 r2 c) rc)
|
||||
((concatR-re1 rc) r1)
|
||||
((concatR-re2 rc) r2)
|
||||
((concatR-re1 rc2) r2)
|
||||
((concatR-re2 rc2) r1)
|
||||
(ro ro2)
|
||||
(ro ro3)
|
||||
(ro4 (build-or `(,r1 ,r2 ,r3) c))
|
||||
((orR-res ro) (list rc rr))
|
||||
((orR-res ro4) (list r1 r2 r3))
|
||||
((build-or null c) z)
|
||||
((build-or `(,r1 ,z) c) r1)
|
||||
((build-repeat 0 +inf.0 rc c) rr)
|
||||
((build-repeat 0 1 z c) e)
|
||||
((build-repeat 0 0 rc c) e)
|
||||
((build-repeat 0 +inf.0 z c) e)
|
||||
((build-repeat -1 +inf.0 z c) e)
|
||||
((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c)
|
||||
(build-repeat 0 +inf.0 rc c))
|
||||
((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c)
|
||||
(build-repeat 0 +inf.0 rc c))
|
||||
((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c)
|
||||
(build-repeat 20 +inf.0 rc c))
|
||||
((build-repeat 1 1 rc c) rc)
|
||||
((repeatR-re rr) rc)
|
||||
(ra ra2)
|
||||
(ra ra3)
|
||||
(ra4 (build-and `(,r1 ,r2 ,r3) c))
|
||||
((andR-res ra) (list rc rr))
|
||||
((andR-res ra4) (list r1 r2 r3))
|
||||
((build-and null c) (build-neg z c))
|
||||
((build-and `(,r1 ,z) c) z)
|
||||
((build-and `(,r1) c) r1)
|
||||
((build-neg r1 c) (build-neg r1 c))
|
||||
((build-neg (build-neg r1 c) c) r1)
|
||||
((negR-re (build-neg r2 c)) r2)
|
||||
((re-nullable? r1) #f)
|
||||
((re-nullable? rc) #f)
|
||||
((re-nullable? (build-concat rr rr c)) #t)
|
||||
((re-nullable? rr) #t)
|
||||
((re-nullable? (build-repeat 0 1 rc c)) #t)
|
||||
((re-nullable? (build-repeat 1 2 rc c)) #f)
|
||||
((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t)
|
||||
((re-nullable? ro) #t)
|
||||
((re-nullable? (build-or `(,r1 ,r2) c)) #f)
|
||||
((re-nullable? (build-and `(,r1 ,e) c)) #f)
|
||||
((re-nullable? (build-and `(,rr ,e) c)) #t)
|
||||
((re-nullable? (build-neg r1 c)) #t)
|
||||
((re-nullable? (build-neg rr c)) #f))
|
||||
|
||||
(test-block ((c (make-cache))
|
||||
(isc is:integer-set-contents)
|
||||
(r1 (->re #\1 c))
|
||||
(r2 (->re #\2 c))
|
||||
(r3-5 (->re '(char-range #\3 #\5) c))
|
||||
(r4 (build-or `(,r1 ,r2) c))
|
||||
(r5 (->re `(union ,r3-5 #\7) c))
|
||||
(r6 (->re #\6 c)))
|
||||
((flatten-res null orR? orR-res is:union c) null)
|
||||
((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c))))
|
||||
(isc (is:make-range (char->integer #\1))))
|
||||
((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c))))
|
||||
(isc (is:make-range (char->integer #\1) (char->integer #\2))))
|
||||
((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1)
|
||||
orR? orR-res is:union c))))
|
||||
(isc (is:make-range (char->integer #\1) (char->integer #\7))))
|
||||
((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y)
|
||||
(let-values (((i _ __)
|
||||
(is:split x y)))
|
||||
i))
|
||||
c)
|
||||
(list z)))
|
||||
|
||||
;; ->re
|
||||
(test-block ((c (make-cache))
|
||||
(isc is:integer-set-contents)
|
||||
(r (->re #\a c))
|
||||
(rr (->re `(concatenation ,r ,r) c))
|
||||
(rrr (->re `(concatenation ,r ,rr) c))
|
||||
(rrr* (->re `(repetition 0 +inf.0 ,rrr) c)))
|
||||
((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a))))
|
||||
((->re "" c) e)
|
||||
((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c))
|
||||
((->re r c) r)
|
||||
((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c))
|
||||
((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c))
|
||||
((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c))
|
||||
((->re `(repetition 0 1 ,rrr*) c) rrr*)
|
||||
((->re `(union (union (char-range #\a #\c)
|
||||
(char-complement (char-range #\000 #\110)
|
||||
(char-range #\112 ,(integer->char max-char-num))))
|
||||
(union (repetition 0 +inf.0 #\2))) c)
|
||||
(build-or (list (build-char-set (is:union (is:make-range 73)
|
||||
(is:make-range 97 99))
|
||||
c)
|
||||
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
|
||||
c))
|
||||
((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c))
|
||||
((->re `(union ,r) c) r)
|
||||
((->re `(union) c) z)
|
||||
((->re `(intersection (intersection #\111
|
||||
(char-complement (char-range #\000 #\110)
|
||||
(char-range #\112 ,(integer->char max-char-num))))
|
||||
(intersection (repetition 0 +inf.0 #\2))) c)
|
||||
(build-and (list (build-char-set (is:make-range 73) c)
|
||||
(build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c))
|
||||
c))
|
||||
((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110)
|
||||
(char-range #\112 ,(integer->char max-char-num))))
|
||||
(intersection (repetition 0 +inf.0 #\2))) c)
|
||||
z)
|
||||
((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c))
|
||||
((->re `(intersection ,r) c) r)
|
||||
((->re `(intersection) c) (build-neg z c))
|
||||
((->re `(complement ,r) c) (build-neg r c))
|
||||
((->re `(concatenation) c) e)
|
||||
((->re `(concatenation ,rrr*) c) rrr*)
|
||||
(rr (build-concat r r c))
|
||||
((->re `(concatenation ,r ,rr ,rrr) c)
|
||||
(build-concat r (build-concat rr rrr c) c))
|
||||
((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49)))
|
||||
((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57)))
|
||||
((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49)))
|
||||
((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57)))
|
||||
((->re `(char-range "9" "1") c) z)
|
||||
((isc (char-setR-chars (->re `(char-complement) c)))
|
||||
(isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c))))
|
||||
((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c)))
|
||||
(isc (is:make-range 0)))
|
||||
)
|
||||
|
||||
)
|
@ -0,0 +1,220 @@
|
||||
#lang racket
|
||||
|
||||
(require "util.rkt"
|
||||
syntax/id-table)
|
||||
|
||||
(provide parse)
|
||||
|
||||
(define (bad-args stx num)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
(format "incorrect number of arguments (should have ~a)" num)
|
||||
stx))
|
||||
|
||||
;; char-range-arg: syntax-object syntax-object -> nat
|
||||
;; If c contains is a character or length 1 string, returns the integer
|
||||
;; for the character. Otherwise raises a syntax error.
|
||||
(define (char-range-arg stx containing-stx)
|
||||
(let ((c (syntax-e stx)))
|
||||
(cond
|
||||
((char? c) (char->integer c))
|
||||
((and (string? c) (= (string-length c) 1))
|
||||
(char->integer (string-ref c 0)))
|
||||
(else
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"not a char or single-char string"
|
||||
containing-stx stx)))))
|
||||
(module+ test
|
||||
(check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1))
|
||||
(check-equal? (char-range-arg #'"1" #'here) (char->integer #\1)))
|
||||
|
||||
(define orig-insp (variable-reference->module-declaration-inspector
|
||||
(#%variable-reference)))
|
||||
(define (disarm stx)
|
||||
(syntax-disarm stx orig-insp))
|
||||
|
||||
;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt)
|
||||
;; checks for errors and generates the plain s-exp form for s
|
||||
;; Expands lex-abbrevs and applies lex-trans.
|
||||
(define (parse stx disappeared-uses)
|
||||
(let loop ([stx stx]
|
||||
[disappeared-uses disappeared-uses]
|
||||
;; seen-lex-abbrevs: id-table
|
||||
[seen-lex-abbrevs (make-immutable-free-id-table)])
|
||||
(let ([recur (lambda (s)
|
||||
(loop (syntax-rearm s stx)
|
||||
disappeared-uses
|
||||
seen-lex-abbrevs))]
|
||||
[recur/abbrev (lambda (s id)
|
||||
(loop (syntax-rearm s stx)
|
||||
disappeared-uses
|
||||
(free-id-table-set seen-lex-abbrevs id id)))])
|
||||
(syntax-case (disarm stx) (repetition union intersection complement concatenation
|
||||
char-range char-complement)
|
||||
(_
|
||||
(identifier? stx)
|
||||
(let ((expansion (syntax-local-value stx (lambda () #f))))
|
||||
(unless (lex-abbrev? expansion)
|
||||
(raise-syntax-error 'regular-expression
|
||||
"undefined abbreviation"
|
||||
stx))
|
||||
;; Check for cycles.
|
||||
(when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f))
|
||||
(raise-syntax-error 'regular-expression
|
||||
"illegal lex-abbrev cycle detected"
|
||||
stx
|
||||
#f
|
||||
(list (free-id-table-ref seen-lex-abbrevs stx))))
|
||||
(set-box! disappeared-uses (cons stx (unbox disappeared-uses)))
|
||||
(recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx)))
|
||||
(_
|
||||
(or (char? (syntax-e stx)) (string? (syntax-e stx)))
|
||||
(syntax-e stx))
|
||||
((repetition arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 3 (length arg-list))
|
||||
(bad-args stx 2))
|
||||
(let ((low (syntax-e (car arg-list)))
|
||||
(high (syntax-e (cadr arg-list)))
|
||||
(re (caddr arg-list)))
|
||||
(unless (and (number? low) (exact? low) (integer? low) (>= low 0))
|
||||
(raise-syntax-error #f
|
||||
"not a non-negative exact integer"
|
||||
stx
|
||||
(car arg-list)))
|
||||
(unless (or (and (number? high) (exact? high) (integer? high) (>= high 0))
|
||||
(eq? high +inf.0))
|
||||
(raise-syntax-error #f
|
||||
"not a non-negative exact integer or +inf.0"
|
||||
stx
|
||||
(cadr arg-list)))
|
||||
(unless (<= low high)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"the first argument is not less than or equal to the second argument"
|
||||
stx))
|
||||
`(repetition ,low ,high ,(recur re)))))
|
||||
((union re ...)
|
||||
`(union ,@(map recur (syntax->list (syntax (re ...))))))
|
||||
((intersection re ...)
|
||||
`(intersection ,@(map recur (syntax->list (syntax (re ...))))))
|
||||
((complement re ...)
|
||||
(let ((re-list (syntax->list (syntax (re ...)))))
|
||||
(unless (= 1 (length re-list))
|
||||
(bad-args stx 1))
|
||||
`(complement ,(recur (car re-list)))))
|
||||
((concatenation re ...)
|
||||
`(concatenation ,@(map recur (syntax->list (syntax (re ...))))))
|
||||
((char-range arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 2 (length arg-list))
|
||||
(bad-args stx 2))
|
||||
(let ((i1 (char-range-arg (car arg-list) stx))
|
||||
(i2 (char-range-arg (cadr arg-list) stx)))
|
||||
(if (<= i1 i2)
|
||||
`(char-range ,(integer->char i1) ,(integer->char i2))
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"the first argument does not precede or equal second argument"
|
||||
stx)))))
|
||||
((char-complement arg ...)
|
||||
(let ((arg-list (syntax->list (syntax (arg ...)))))
|
||||
(unless (= 1 (length arg-list))
|
||||
(bad-args stx 1))
|
||||
(let ((parsed (recur (car arg-list))))
|
||||
(unless (char-set? parsed)
|
||||
(raise-syntax-error #f
|
||||
"not a character set"
|
||||
stx
|
||||
(car arg-list)))
|
||||
`(char-complement ,parsed))))
|
||||
((op form ...)
|
||||
(identifier? (syntax op))
|
||||
(let* ((o (syntax op))
|
||||
(expansion (syntax-local-value o (lambda () #f))))
|
||||
(set-box! disappeared-uses (cons o (unbox disappeared-uses)))
|
||||
(cond
|
||||
((lex-trans? expansion)
|
||||
(recur ((lex-trans-f expansion) (disarm stx))))
|
||||
(expansion
|
||||
(raise-syntax-error 'regular-expression
|
||||
"not a lex-trans"
|
||||
stx))
|
||||
(else
|
||||
(raise-syntax-error 'regular-expression
|
||||
"undefined operator"
|
||||
stx)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'regular-expression
|
||||
"not a char, string, identifier, or (op args ...)"
|
||||
stx))))))
|
||||
|
||||
|
||||
|
||||
;; char-set? : s-re -> bool
|
||||
;; A char-set is an re that matches only strings of length 1.
|
||||
;; char-set? is conservative.
|
||||
(define (char-set? s-re)
|
||||
(cond
|
||||
((char? s-re) #t)
|
||||
((string? s-re) (= (string-length s-re) 1))
|
||||
((list? s-re)
|
||||
(let ((op (car s-re)))
|
||||
(case op
|
||||
((union intersection) (andmap char-set? (cdr s-re)))
|
||||
((char-range char-complement) #t)
|
||||
((repetition)
|
||||
(and (= (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re))))
|
||||
((concatenation)
|
||||
(and (= 2 (length s-re)) (char-set? (cadr s-re))))
|
||||
(else #f))))
|
||||
(else #f)))
|
||||
|
||||
(module+ test
|
||||
(require rackunit))
|
||||
(module+ test
|
||||
(check-equal? (char-set? #\a) #t)
|
||||
(check-equal? (char-set? "12") #f)
|
||||
(check-equal? (char-set? "1") #t)
|
||||
(check-equal? (char-set? '(repetition 1 2 #\1)) #f)
|
||||
(check-equal? (char-set? '(repetition 1 1 "12")) #f)
|
||||
(check-equal? (char-set? '(repetition 1 1 "1")) #t)
|
||||
(check-equal? (char-set? '(union "1" "2" "3")) #t)
|
||||
(check-equal? (char-set? '(union "1" "" "3")) #f)
|
||||
(check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t)
|
||||
(check-equal? (char-set? '(intersection "1" "")) #f)
|
||||
(check-equal? (char-set? '(complement "1")) #f)
|
||||
(check-equal? (char-set? '(concatenation "1" "2")) #f)
|
||||
(check-equal? (char-set? '(concatenation "" "2")) #f)
|
||||
(check-equal? (char-set? '(concatenation "1")) #t)
|
||||
(check-equal? (char-set? '(concatenation "12")) #f)
|
||||
(check-equal? (char-set? '(char-range #\1 #\2)) #t)
|
||||
(check-equal? (char-set? '(char-complement #\1)) #t))
|
||||
|
||||
;; yikes... these test cases all have the wrong arity, now.
|
||||
;; and by "now", I mean it's been broken since before we
|
||||
;; moved to git.
|
||||
(module+ test
|
||||
(check-equal? (parse #'#\a null) #\a)
|
||||
(check-equal? (parse #'"1" null) "1")
|
||||
(check-equal? (parse #'(repetition 1 1 #\1) null)
|
||||
'(repetition 1 1 #\1))
|
||||
(check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1))
|
||||
(check-equal? (parse #'(union #\1 (union "2") (union)) null)
|
||||
'(union #\1 (union "2") (union)))
|
||||
(check-equal? (parse #'(intersection #\1 (intersection "2") (intersection))
|
||||
null)
|
||||
'(intersection #\1 (intersection "2") (intersection)))
|
||||
(check-equal? (parse #'(complement (union #\1 #\2))
|
||||
null)
|
||||
'(complement (union #\1 #\2)))
|
||||
(check-equal? (parse #'(concatenation "1" "2" (concatenation)) null)
|
||||
'(concatenation "1" "2" (concatenation)))
|
||||
(check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1))
|
||||
(check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1))
|
||||
(check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3))
|
||||
(check-equal? (parse #'(char-complement (union "1" "2")) null)
|
||||
'(char-complement (union "1" "2"))))
|
||||
; )
|
@ -0,0 +1,9 @@
|
||||
(module token-syntax mzscheme
|
||||
|
||||
;; The things needed at compile time to handle definition of tokens
|
||||
|
||||
(provide make-terminals-def terminals-def-t terminals-def?
|
||||
make-e-terminals-def e-terminals-def-t e-terminals-def?)
|
||||
(define-struct terminals-def (t))
|
||||
(define-struct e-terminals-def (t))
|
||||
)
|
@ -0,0 +1,89 @@
|
||||
(module token mzscheme
|
||||
|
||||
(require-for-syntax "token-syntax.rkt")
|
||||
|
||||
;; Defining tokens
|
||||
|
||||
(provide define-tokens define-empty-tokens make-token token?
|
||||
(protect (rename token-name real-token-name))
|
||||
(protect (rename token-value real-token-value))
|
||||
(rename token-name* token-name)
|
||||
(rename token-value* token-value)
|
||||
(struct position (offset line col))
|
||||
(struct position-token (token start-pos end-pos)))
|
||||
|
||||
|
||||
;; A token is either
|
||||
;; - symbol
|
||||
;; - (make-token symbol any)
|
||||
(define-struct token (name value) (make-inspector))
|
||||
|
||||
;; token-name*: token -> symbol
|
||||
(define (token-name* t)
|
||||
(cond
|
||||
((symbol? t) t)
|
||||
((token? t) (token-name t))
|
||||
(else (raise-type-error
|
||||
'token-name
|
||||
"symbol or struct:token"
|
||||
0
|
||||
t))))
|
||||
|
||||
;; token-value*: token -> any
|
||||
(define (token-value* t)
|
||||
(cond
|
||||
((symbol? t) #f)
|
||||
((token? t) (token-value t))
|
||||
(else (raise-type-error
|
||||
'token-value
|
||||
"symbol or struct:token"
|
||||
0
|
||||
t))))
|
||||
|
||||
(define-for-syntax (make-ctor-name n)
|
||||
(datum->syntax-object n
|
||||
(string->symbol (format "token-~a" (syntax-e n)))
|
||||
n
|
||||
n))
|
||||
|
||||
(define-for-syntax (make-define-tokens empty?)
|
||||
(lambda (stx)
|
||||
(syntax-case stx ()
|
||||
((_ name (token ...))
|
||||
(andmap identifier? (syntax->list (syntax (token ...))))
|
||||
(with-syntax (((marked-token ...)
|
||||
(map values #;(make-syntax-introducer)
|
||||
(syntax->list (syntax (token ...))))))
|
||||
(quasisyntax/loc stx
|
||||
(begin
|
||||
(define-syntax name
|
||||
#,(if empty?
|
||||
#'(make-e-terminals-def (quote-syntax (marked-token ...)))
|
||||
#'(make-terminals-def (quote-syntax (marked-token ...)))))
|
||||
#,@(map
|
||||
(lambda (n)
|
||||
(when (eq? (syntax-e n) 'error)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"Cannot define a token named error."
|
||||
stx))
|
||||
(if empty?
|
||||
#`(define (#,(make-ctor-name n))
|
||||
'#,n)
|
||||
#`(define (#,(make-ctor-name n) x)
|
||||
(make-token '#,n x))))
|
||||
(syntax->list (syntax (token ...))))
|
||||
#;(define marked-token #f) #;...))))
|
||||
((_ ...)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))"
|
||||
stx)))))
|
||||
|
||||
(define-syntax define-tokens (make-define-tokens #f))
|
||||
(define-syntax define-empty-tokens (make-define-tokens #t))
|
||||
|
||||
(define-struct position (offset line col) #f)
|
||||
(define-struct position-token (token start-pos end-pos) #f)
|
||||
)
|
||||
|
@ -0,0 +1,280 @@
|
||||
;; Constructs to create and access grammars, the internal
|
||||
;; representation of the input to the parser generator.
|
||||
|
||||
(module grammar mzscheme
|
||||
|
||||
(require mzlib/class
|
||||
mzlib/list
|
||||
"yacc-helper.rkt"
|
||||
racket/contract)
|
||||
|
||||
;; Each production has a unique index 0 <= index <= number of productions
|
||||
(define-struct prod (lhs rhs index prec action) (make-inspector))
|
||||
|
||||
;; The dot-pos field is the index of the element in the rhs
|
||||
;; of prod that the dot immediately precedes.
|
||||
;; Thus 0 <= dot-pos <= (vector-length rhs).
|
||||
(define-struct item (prod dot-pos) (make-inspector))
|
||||
|
||||
;; gram-sym = (union term? non-term?)
|
||||
;; Each term has a unique index 0 <= index < number of terms
|
||||
;; Each non-term has a unique index 0 <= index < number of non-terms
|
||||
(define-struct term (sym index prec) (make-inspector))
|
||||
(define-struct non-term (sym index) (make-inspector))
|
||||
|
||||
;; a precedence declaration.
|
||||
(define-struct prec (num assoc) (make-inspector))
|
||||
|
||||
(provide/contract
|
||||
(make-item (prod? (or/c #f natural-number/c) . -> . item?))
|
||||
(make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?))
|
||||
(make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?))
|
||||
(make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?))
|
||||
(make-prod (non-term? (vectorof (or/c non-term? term?))
|
||||
(or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?)))
|
||||
|
||||
(provide
|
||||
|
||||
|
||||
;; Things that work on items
|
||||
start-item? item-prod item->string
|
||||
sym-at-dot move-dot-right item<? item-dot-pos
|
||||
|
||||
;; Things that operate on grammar symbols
|
||||
gram-sym-symbol gram-sym-index term-prec gram-sym->string
|
||||
non-term? term? non-term<? term<?
|
||||
term-list->bit-vector term-index non-term-index
|
||||
|
||||
;; Things that work on precs
|
||||
prec-num prec-assoc
|
||||
|
||||
grammar%
|
||||
|
||||
;; Things that work on productions
|
||||
prod-index prod-prec prod-rhs prod-lhs prod-action)
|
||||
|
||||
|
||||
;;---------------------- LR items --------------------------
|
||||
|
||||
;; item<?: LR-item * LR-item -> bool
|
||||
;; Lexicographic comparison on two items.
|
||||
(define (item<? i1 i2)
|
||||
(let ((p1 (prod-index (item-prod i1)))
|
||||
(p2 (prod-index (item-prod i2))))
|
||||
(or (< p1 p2)
|
||||
(and (= p1 p2)
|
||||
(let ((d1 (item-dot-pos i1))
|
||||
(d2 (item-dot-pos i2)))
|
||||
(< d1 d2))))))
|
||||
|
||||
;; start-item?: LR-item -> bool
|
||||
;; The start production always has index 0
|
||||
(define (start-item? i)
|
||||
(= 0 (non-term-index (prod-lhs (item-prod i)))))
|
||||
|
||||
|
||||
;; move-dot-right: LR-item -> LR-item | #f
|
||||
;; moves the dot to the right in the item, unless it is at its
|
||||
;; rightmost, then it returns false
|
||||
(define (move-dot-right i)
|
||||
(cond
|
||||
((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f)
|
||||
(else (make-item (item-prod i)
|
||||
(add1 (item-dot-pos i))))))
|
||||
|
||||
;; sym-at-dot: LR-item -> gram-sym | #f
|
||||
;; returns the symbol after the dot in the item or #f if there is none
|
||||
(define (sym-at-dot i)
|
||||
(let ((dp (item-dot-pos i))
|
||||
(rhs (prod-rhs (item-prod i))))
|
||||
(cond
|
||||
((= dp (vector-length rhs)) #f)
|
||||
(else (vector-ref rhs dp)))))
|
||||
|
||||
|
||||
;; print-item: LR-item ->
|
||||
(define (item->string it)
|
||||
(let ((print-sym (lambda (i)
|
||||
(let ((gs (vector-ref (prod-rhs (item-prod it)) i)))
|
||||
(cond
|
||||
((term? gs) (format "~a " (term-sym gs)))
|
||||
(else (format "~a " (non-term-sym gs))))))))
|
||||
(string-append
|
||||
(format "~a -> " (non-term-sym (prod-lhs (item-prod it))))
|
||||
(let loop ((i 0))
|
||||
(cond
|
||||
((= i (vector-length (prod-rhs (item-prod it))))
|
||||
(if (= i (item-dot-pos it))
|
||||
". "
|
||||
""))
|
||||
((= i (item-dot-pos it))
|
||||
(string-append ". " (print-sym i) (loop (add1 i))))
|
||||
(else (string-append (print-sym i) (loop (add1 i)))))))))
|
||||
|
||||
;; --------------------- Grammar Symbols --------------------------
|
||||
|
||||
(define (non-term<? nt1 nt2)
|
||||
(< (non-term-index nt1) (non-term-index nt2)))
|
||||
|
||||
(define (term<? nt1 nt2)
|
||||
(< (term-index nt1) (term-index nt2)))
|
||||
|
||||
(define (gram-sym-index gs)
|
||||
(cond
|
||||
((term? gs) (term-index gs))
|
||||
(else (non-term-index gs))))
|
||||
|
||||
(define (gram-sym-symbol gs)
|
||||
(cond
|
||||
((term? gs) (term-sym gs))
|
||||
(else (non-term-sym gs))))
|
||||
|
||||
(define (gram-sym->string gs)
|
||||
(symbol->string (gram-sym-symbol gs)))
|
||||
|
||||
;; term-list->bit-vector: term list -> int
|
||||
;; Creates a number where the nth bit is 1 if the term with index n is in
|
||||
;; the list, and whose nth bit is 0 otherwise
|
||||
(define (term-list->bit-vector terms)
|
||||
(cond
|
||||
((null? terms) 0)
|
||||
(else
|
||||
(bitwise-ior (arithmetic-shift 1 (term-index (car terms))) (term-list->bit-vector (cdr terms))))))
|
||||
|
||||
|
||||
;; ------------------------- Grammar ------------------------------
|
||||
|
||||
(define grammar%
|
||||
(class object%
|
||||
(super-instantiate ())
|
||||
;; prods: production list list
|
||||
;; where there is one production list per non-term
|
||||
(init prods)
|
||||
;; init-prods: production list
|
||||
;; The productions parsing can start from
|
||||
;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable
|
||||
(init-field init-prods terms non-terms end-terms)
|
||||
|
||||
;; list of all productions
|
||||
(define all-prods (apply append prods))
|
||||
(define num-prods (length all-prods))
|
||||
(define num-terms (length terms))
|
||||
(define num-non-terms (length non-terms))
|
||||
|
||||
(let ((count 0))
|
||||
(for-each
|
||||
(lambda (nt)
|
||||
(set-non-term-index! nt count)
|
||||
(set! count (add1 count)))
|
||||
non-terms))
|
||||
|
||||
(let ((count 0))
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(set-term-index! t count)
|
||||
(set! count (add1 count)))
|
||||
terms))
|
||||
|
||||
(let ((count 0))
|
||||
(for-each
|
||||
(lambda (prod)
|
||||
(set-prod-index! prod count)
|
||||
(set! count (add1 count)))
|
||||
all-prods))
|
||||
|
||||
;; indexed by the index of the non-term - contains the list of productions for that non-term
|
||||
(define nt->prods
|
||||
(let ((v (make-vector (length prods) #f)))
|
||||
(for-each (lambda (prods)
|
||||
(vector-set! v (non-term-index (prod-lhs (car prods))) prods))
|
||||
prods)
|
||||
v))
|
||||
|
||||
(define nullable-non-terms
|
||||
(nullable all-prods num-non-terms))
|
||||
|
||||
(define/public (get-num-terms) num-terms)
|
||||
(define/public (get-num-non-terms) num-non-terms)
|
||||
|
||||
(define/public (get-prods-for-non-term nt)
|
||||
(vector-ref nt->prods (non-term-index nt)))
|
||||
(define/public (get-prods) all-prods)
|
||||
(define/public (get-init-prods) init-prods)
|
||||
|
||||
(define/public (get-terms) terms)
|
||||
(define/public (get-non-terms) non-terms)
|
||||
|
||||
(define/public (get-num-prods) num-prods)
|
||||
(define/public (get-end-terms) end-terms)
|
||||
|
||||
(define/public (nullable-non-term? nt)
|
||||
(vector-ref nullable-non-terms (non-term-index nt)))
|
||||
|
||||
(define/public (nullable-after-dot? item)
|
||||
(let* ((rhs (prod-rhs (item-prod item)))
|
||||
(prod-length (vector-length rhs)))
|
||||
(let loop ((i (item-dot-pos item)))
|
||||
(cond
|
||||
((< i prod-length)
|
||||
(if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i)))
|
||||
(loop (add1 i))
|
||||
#f))
|
||||
((= i prod-length) #t)))))
|
||||
|
||||
(define/public (nullable-non-term-thunk)
|
||||
(lambda (nt)
|
||||
(nullable-non-term? nt)))
|
||||
(define/public (nullable-after-dot?-thunk)
|
||||
(lambda (item)
|
||||
(nullable-after-dot? item)))))
|
||||
|
||||
|
||||
;; nullable: production list * int -> non-term set
|
||||
;; determines which non-terminals can derive epsilon
|
||||
(define (nullable prods num-nts)
|
||||
(letrec ((nullable (make-vector num-nts #f))
|
||||
(added #f)
|
||||
|
||||
;; possible-nullable: producion list -> production list
|
||||
;; Removes all productions that have a terminal
|
||||
(possible-nullable
|
||||
(lambda (prods)
|
||||
(filter (lambda (prod)
|
||||
(vector-andmap non-term? (prod-rhs prod)))
|
||||
prods)))
|
||||
|
||||
;; set-nullables: production list -> production list
|
||||
;; makes one pass through the productions, adding the ones
|
||||
;; known to be nullable now to nullable and returning a list
|
||||
;; of productions that we don't know about yet.
|
||||
(set-nullables
|
||||
(lambda (prods)
|
||||
(cond
|
||||
((null? prods) null)
|
||||
((vector-ref nullable
|
||||
(gram-sym-index (prod-lhs (car prods))))
|
||||
(set-nullables (cdr prods)))
|
||||
((vector-andmap (lambda (nt)
|
||||
(vector-ref nullable (gram-sym-index nt)))
|
||||
(prod-rhs (car prods)))
|
||||
(vector-set! nullable
|
||||
(gram-sym-index (prod-lhs (car prods)))
|
||||
#t)
|
||||
(set! added #t)
|
||||
(set-nullables (cdr prods)))
|
||||
(else
|
||||
(cons (car prods)
|
||||
(set-nullables (cdr prods))))))))
|
||||
|
||||
(let loop ((P (possible-nullable prods)))
|
||||
(cond
|
||||
((null? P) nullable)
|
||||
(else
|
||||
(set! added #f)
|
||||
(let ((new-P (set-nullables P)))
|
||||
(if added
|
||||
(loop new-P)
|
||||
nullable)))))))
|
||||
|
||||
|
||||
)
|
@ -0,0 +1,61 @@
|
||||
(module graph mzscheme
|
||||
|
||||
(provide digraph)
|
||||
|
||||
(define (zero-thunk) 0)
|
||||
|
||||
;; digraph:
|
||||
;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b)
|
||||
;; -> ('a -> 'b)
|
||||
;; DeRemer and Pennello 1982
|
||||
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
|
||||
;; We use a hash-table to represent the result function 'a -> 'b set, so
|
||||
;; the values of type 'a must be comparable with eq?.
|
||||
(define (digraph nodes edges f- union fail)
|
||||
(letrec [
|
||||
;; Will map elements of 'a to 'b sets
|
||||
(results (make-hash-table))
|
||||
(f (lambda (x) (hash-table-get results x fail)))
|
||||
|
||||
;; Maps elements of 'a to integers.
|
||||
(N (make-hash-table))
|
||||
(get-N (lambda (x) (hash-table-get N x zero-thunk)))
|
||||
(set-N (lambda (x d) (hash-table-put! N x d)))
|
||||
|
||||
(stack null)
|
||||
(push (lambda (x)
|
||||
(set! stack (cons x stack))))
|
||||
(pop (lambda ()
|
||||
(begin0
|
||||
(car stack)
|
||||
(set! stack (cdr stack)))))
|
||||
(depth (lambda () (length stack)))
|
||||
|
||||
;; traverse: 'a ->
|
||||
(traverse
|
||||
(lambda (x)
|
||||
(push x)
|
||||
(let ((d (depth)))
|
||||
(set-N x d)
|
||||
(hash-table-put! results x (f- x))
|
||||
(for-each (lambda (y)
|
||||
(if (= 0 (get-N y))
|
||||
(traverse y))
|
||||
(hash-table-put! results
|
||||
x
|
||||
(union (f x) (f y)))
|
||||
(set-N x (min (get-N x) (get-N y))))
|
||||
(edges x))
|
||||
(if (= d (get-N x))
|
||||
(let loop ((p (pop)))
|
||||
(set-N p +inf.0)
|
||||
(hash-table-put! results p (f x))
|
||||
(if (not (eq? x p))
|
||||
(loop (pop))))))))]
|
||||
(for-each (lambda (x)
|
||||
(if (= 0 (get-N x))
|
||||
(traverse x)))
|
||||
nodes)
|
||||
f))
|
||||
|
||||
)
|
@ -0,0 +1,374 @@
|
||||
(module input-file-parser mzscheme
|
||||
|
||||
;; routines for parsing the input to the parser generator and producing a
|
||||
;; grammar (See grammar.rkt)
|
||||
|
||||
(require "yacc-helper.rkt"
|
||||
"../private-lex/token-syntax.rkt"
|
||||
"grammar.rkt"
|
||||
mzlib/class
|
||||
racket/contract)
|
||||
(require-for-template mzscheme)
|
||||
|
||||
(define (is-a-grammar%? x) (is-a? x grammar%))
|
||||
(provide/contract
|
||||
(parse-input ((listof identifier?) (listof identifier?) (listof identifier?)
|
||||
(or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?))
|
||||
(get-term-list ((listof identifier?) . -> . (listof identifier?))))
|
||||
|
||||
(define stx-for-original-property (read-syntax #f (open-input-string "original")))
|
||||
|
||||
;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx)))
|
||||
(define (get-args i rhs src-pos term-defs)
|
||||
(let ((empty-table (make-hash-table))
|
||||
(biggest-pos #f))
|
||||
(hash-table-put! empty-table 'error #t)
|
||||
(for-each (lambda (td)
|
||||
(let ((v (syntax-local-value td)))
|
||||
(if (e-terminals-def? v)
|
||||
(for-each (lambda (s)
|
||||
(hash-table-put! empty-table (syntax-object->datum s) #t))
|
||||
(syntax->list (e-terminals-def-t v))))))
|
||||
term-defs)
|
||||
(let ([args
|
||||
(let get-args ((i i)
|
||||
(rhs rhs))
|
||||
(cond
|
||||
((null? rhs) null)
|
||||
(else
|
||||
(let ((b (car rhs))
|
||||
(name (if (hash-table-get empty-table (syntax-object->datum (car rhs)) (lambda () #f))
|
||||
(gensym)
|
||||
(string->symbol (format "$~a" i)))))
|
||||
(cond
|
||||
(src-pos
|
||||
(let ([start-pos-id
|
||||
(datum->syntax-object b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property)]
|
||||
[end-pos-id
|
||||
(datum->syntax-object b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property)])
|
||||
(set! biggest-pos (cons start-pos-id end-pos-id))
|
||||
`(,(datum->syntax-object b name b stx-for-original-property)
|
||||
,start-pos-id
|
||||
,end-pos-id
|
||||
,@(get-args (add1 i) (cdr rhs)))))
|
||||
(else
|
||||
`(,(datum->syntax-object b name b stx-for-original-property)
|
||||
,@(get-args (add1 i) (cdr rhs)))))))))])
|
||||
(values args biggest-pos))))
|
||||
|
||||
;; Given the list of terminal symbols and the precedence/associativity definitions,
|
||||
;; builds terminal structures (See grammar.rkt)
|
||||
;; build-terms: symbol list * symbol list list -> term list
|
||||
(define (build-terms term-list precs)
|
||||
(let ((counter 0)
|
||||
|
||||
;;(term-list (cons (gensym) term-list))
|
||||
|
||||
;; Will map a terminal symbol to its precedence/associativity
|
||||
(prec-table (make-hash-table)))
|
||||
|
||||
;; Fill the prec table
|
||||
(for-each
|
||||
(lambda (p-decl)
|
||||
(begin0
|
||||
(let ((assoc (car p-decl)))
|
||||
(for-each
|
||||
(lambda (term-sym)
|
||||
(hash-table-put! prec-table term-sym (make-prec counter assoc)))
|
||||
(cdr p-decl)))
|
||||
(set! counter (add1 counter))))
|
||||
precs)
|
||||
|
||||
;; Build the terminal structures
|
||||
(map
|
||||
(lambda (term-sym)
|
||||
(make-term term-sym
|
||||
#f
|
||||
(hash-table-get prec-table term-sym (lambda () #f))))
|
||||
term-list)))
|
||||
|
||||
;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt)
|
||||
;; get-terms-from-def: identifier? -> (listof identifier?)
|
||||
(define (get-terms-from-def term-syn)
|
||||
(let ((t (syntax-local-value term-syn (lambda () #f))))
|
||||
(cond
|
||||
((terminals-def? t) (syntax->list (terminals-def-t t)))
|
||||
((e-terminals-def? t) (syntax->list (e-terminals-def-t t)))
|
||||
(else
|
||||
(raise-syntax-error
|
||||
'parser-tokens
|
||||
"undefined token group"
|
||||
term-syn)))))
|
||||
|
||||
(define (get-term-list term-group-names)
|
||||
(remove-duplicates
|
||||
(cons (datum->syntax-object #f 'error)
|
||||
(apply append
|
||||
(map get-terms-from-def term-group-names)))))
|
||||
|
||||
(define (parse-input term-defs start ends prec-decls prods src-pos)
|
||||
(let* ((start-syms (map syntax-e start))
|
||||
|
||||
(list-of-terms (map syntax-e (get-term-list term-defs)))
|
||||
|
||||
(end-terms
|
||||
(map
|
||||
(lambda (end)
|
||||
(unless (memq (syntax-e end) list-of-terms)
|
||||
(raise-syntax-error
|
||||
'parser-end-tokens
|
||||
(format "End token ~a not defined as a token"
|
||||
(syntax-e end))
|
||||
end))
|
||||
(syntax-e end))
|
||||
ends))
|
||||
|
||||
;; Get the list of terminals out of input-terms
|
||||
|
||||
(list-of-non-terms
|
||||
(syntax-case prods ()
|
||||
(((non-term production ...) ...)
|
||||
(begin
|
||||
(for-each
|
||||
(lambda (nts)
|
||||
(if (memq (syntax-object->datum nts) list-of-terms)
|
||||
(raise-syntax-error
|
||||
'parser-non-terminals
|
||||
(format "~a used as both token and non-terminal"
|
||||
(syntax-object->datum nts))
|
||||
nts)))
|
||||
(syntax->list (syntax (non-term ...))))
|
||||
|
||||
(let ((dup (duplicate-list? (syntax-object->datum
|
||||
(syntax (non-term ...))))))
|
||||
(if dup
|
||||
(raise-syntax-error
|
||||
'parser-non-terminals
|
||||
(format "non-terminal ~a defined multiple times"
|
||||
dup)
|
||||
prods)))
|
||||
|
||||
(syntax-object->datum (syntax (non-term ...)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'parser-grammar
|
||||
"Grammar must be of the form (grammar (non-terminal productions ...) ...)"
|
||||
prods))))
|
||||
|
||||
;; Check the precedence declarations for errors and turn them into data
|
||||
(precs
|
||||
(syntax-case prec-decls ()
|
||||
(((type term ...) ...)
|
||||
(let ((p-terms
|
||||
(syntax-object->datum (syntax (term ... ...)))))
|
||||
(cond
|
||||
((duplicate-list? p-terms) =>
|
||||
(lambda (d)
|
||||
(raise-syntax-error
|
||||
'parser-precedences
|
||||
(format "duplicate precedence declaration for token ~a"
|
||||
d)
|
||||
prec-decls)))
|
||||
(else
|
||||
(for-each
|
||||
(lambda (a)
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(if (not (memq (syntax-object->datum t)
|
||||
list-of-terms))
|
||||
(raise-syntax-error
|
||||
'parser-precedences
|
||||
(format
|
||||
"Precedence declared for non-token ~a"
|
||||
(syntax-object->datum t))
|
||||
t)))
|
||||
(syntax->list a)))
|
||||
(syntax->list (syntax ((term ...) ...))))
|
||||
(for-each
|
||||
(lambda (type)
|
||||
(if (not (memq (syntax-object->datum type)
|
||||
`(left right nonassoc)))
|
||||
(raise-syntax-error
|
||||
'parser-precedences
|
||||
"Associativity must be left, right or nonassoc"
|
||||
type)))
|
||||
(syntax->list (syntax (type ...))))
|
||||
(syntax-object->datum prec-decls)))))
|
||||
(#f null)
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'parser-precedences
|
||||
"Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc"
|
||||
prec-decls))))
|
||||
|
||||
(terms (build-terms list-of-terms precs))
|
||||
|
||||
(non-terms (map (lambda (non-term) (make-non-term non-term #f))
|
||||
list-of-non-terms))
|
||||
(term-table (make-hash-table))
|
||||
(non-term-table (make-hash-table)))
|
||||
|
||||
(for-each (lambda (t)
|
||||
(hash-table-put! term-table (gram-sym-symbol t) t))
|
||||
terms)
|
||||
|
||||
(for-each (lambda (nt)
|
||||
(hash-table-put! non-term-table (gram-sym-symbol nt) nt))
|
||||
non-terms)
|
||||
|
||||
(let* (
|
||||
;; parse-prod: syntax-object -> gram-sym vector
|
||||
(parse-prod
|
||||
(lambda (prod-so)
|
||||
(syntax-case prod-so ()
|
||||
((prod-rhs-sym ...)
|
||||
(andmap identifier? (syntax->list prod-so))
|
||||
(begin
|
||||
(for-each (lambda (t)
|
||||
(if (memq (syntax-object->datum t) end-terms)
|
||||
(raise-syntax-error
|
||||
'parser-production-rhs
|
||||
(format "~a is an end token and cannot be used in a production"
|
||||
(syntax-object->datum t))
|
||||
t)))
|
||||
(syntax->list prod-so))
|
||||
(list->vector
|
||||
(map (lambda (s)
|
||||
(hash-table-get
|
||||
term-table
|
||||
(syntax-object->datum s)
|
||||
(lambda ()
|
||||
(hash-table-get
|
||||
non-term-table
|
||||
(syntax-object->datum s)
|
||||
(lambda ()
|
||||
(raise-syntax-error
|
||||
'parser-production-rhs
|
||||
(format
|
||||
"~a is not declared as a terminal or non-terminal"
|
||||
(syntax-object->datum s))
|
||||
s))))))
|
||||
(syntax->list prod-so)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'parser-production-rhs
|
||||
"production right-hand-side must have form (symbol ...)"
|
||||
prod-so)))))
|
||||
|
||||
;; parse-action: syntax-object * syntax-object -> syntax-object
|
||||
(parse-action
|
||||
(lambda (rhs act)
|
||||
(let-values ([(args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs)])
|
||||
(let ([act
|
||||
(if biggest
|
||||
(with-syntax ([$n-start-pos (datum->syntax-object (car biggest) '$n-start-pos)]
|
||||
[$n-end-pos (datum->syntax-object (cdr biggest) '$n-end-pos)])
|
||||
#`(let ([$n-start-pos #,(car biggest)]
|
||||
[$n-end-pos #,(cdr biggest)])
|
||||
#,act))
|
||||
act)])
|
||||
(quasisyntax/loc act
|
||||
(lambda #,args
|
||||
#,act))))))
|
||||
|
||||
;; parse-prod+action: non-term * syntax-object -> production
|
||||
(parse-prod+action
|
||||
(lambda (nt prod-so)
|
||||
(syntax-case prod-so ()
|
||||
((prod-rhs action)
|
||||
(let ((p (parse-prod (syntax prod-rhs))))
|
||||
(make-prod
|
||||
nt
|
||||
p
|
||||
#f
|
||||
(let loop ((i (sub1 (vector-length p))))
|
||||
(if (>= i 0)
|
||||
(let ((gs (vector-ref p i)))
|
||||
(if (term? gs)
|
||||
(term-prec gs)
|
||||
(loop (sub1 i))))
|
||||
#f))
|
||||
(parse-action (syntax prod-rhs) (syntax action)))))
|
||||
((prod-rhs (prec term) action)
|
||||
(identifier? (syntax term))
|
||||
(let ((p (parse-prod (syntax prod-rhs))))
|
||||
(make-prod
|
||||
nt
|
||||
p
|
||||
#f
|
||||
(term-prec
|
||||
(hash-table-get
|
||||
term-table
|
||||
(syntax-object->datum (syntax term))
|
||||
(lambda ()
|
||||
(raise-syntax-error
|
||||
'parser-production-rhs
|
||||
(format
|
||||
"unrecognized terminal ~a in precedence declaration"
|
||||
(syntax-object->datum (syntax term)))
|
||||
(syntax term)))))
|
||||
(parse-action (syntax prod-rhs) (syntax action)))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'parser-production-rhs
|
||||
"production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]"
|
||||
prod-so)))))
|
||||
|
||||
;; parse-prod-for-nt: syntax-object -> production list
|
||||
(parse-prods-for-nt
|
||||
(lambda (prods-so)
|
||||
(syntax-case prods-so ()
|
||||
((nt productions ...)
|
||||
(> (length (syntax->list (syntax (productions ...)))) 0)
|
||||
(let ((nt (hash-table-get non-term-table
|
||||
(syntax-object->datum (syntax nt)))))
|
||||
(map (lambda (p) (parse-prod+action nt p))
|
||||
(syntax->list (syntax (productions ...))))))
|
||||
(_
|
||||
(raise-syntax-error
|
||||
'parser-productions
|
||||
"A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side"
|
||||
prods-so))))))
|
||||
|
||||
(for-each
|
||||
(lambda (sstx ssym)
|
||||
(unless (memq ssym list-of-non-terms)
|
||||
(raise-syntax-error
|
||||
'parser-start
|
||||
(format "Start symbol ~a not defined as a non-terminal" ssym)
|
||||
sstx)))
|
||||
start start-syms)
|
||||
|
||||
(let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
|
||||
(end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms))
|
||||
(parsed-prods (map parse-prods-for-nt (syntax->list prods)))
|
||||
(start-prods
|
||||
(map (lambda (start end-non-term)
|
||||
(list (make-prod start (vector end-non-term) #f #f
|
||||
(syntax (lambda (x) x)))))
|
||||
starts end-non-terms))
|
||||
(prods
|
||||
`(,@start-prods
|
||||
,@(map
|
||||
(lambda (end-nt start-sym)
|
||||
(map
|
||||
(lambda (end)
|
||||
(make-prod end-nt
|
||||
(vector
|
||||
(hash-table-get non-term-table start-sym)
|
||||
(hash-table-get term-table end))
|
||||
#f
|
||||
#f
|
||||
(syntax (lambda (x) x))))
|
||||
end-terms))
|
||||
end-non-terms start-syms)
|
||||
,@parsed-prods)))
|
||||
|
||||
(make-object grammar%
|
||||
prods
|
||||
(map car start-prods)
|
||||
terms
|
||||
(append starts (append end-non-terms non-terms))
|
||||
(map (lambda (term-name)
|
||||
(hash-table-get term-table term-name))
|
||||
end-terms)))))))
|
@ -0,0 +1,277 @@
|
||||
(module lalr mzscheme
|
||||
|
||||
;; Compute LALR lookaheads from DeRemer and Pennello 1982
|
||||
|
||||
(require "lr0.rkt"
|
||||
"grammar.rkt"
|
||||
mzlib/list
|
||||
mzlib/class)
|
||||
|
||||
(provide compute-LA)
|
||||
|
||||
;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set)
|
||||
;; computes for each state, non-term transition pair, the terminals
|
||||
;; which can transition out of the resulting state
|
||||
;; output term set is represented in bit-vector form
|
||||
(define (compute-DR a g)
|
||||
(lambda (tk)
|
||||
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))))
|
||||
(term-list->bit-vector
|
||||
(filter
|
||||
(lambda (term)
|
||||
(send a run-automaton r term))
|
||||
(send g get-terms))))))
|
||||
|
||||
;; compute-reads:
|
||||
;; LR0-automaton * grammar -> (trans-key -> trans-key list)
|
||||
(define (compute-reads a g)
|
||||
(let ((nullable-non-terms
|
||||
(filter (lambda (nt) (send g nullable-non-term? nt))
|
||||
(send g get-non-terms))))
|
||||
(lambda (tk)
|
||||
(let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk))))
|
||||
(map (lambda (x) (make-trans-key r x))
|
||||
(filter (lambda (non-term) (send a run-automaton r non-term))
|
||||
nullable-non-terms))))))
|
||||
|
||||
;; compute-read: LR0-automaton * grammar -> (trans-key -> term set)
|
||||
;; output term set is represented in bit-vector form
|
||||
(define (compute-read a g)
|
||||
(let* ((dr (compute-DR a g))
|
||||
(reads (compute-reads a g)))
|
||||
(digraph-tk->terml (send a get-mapped-non-term-keys)
|
||||
reads
|
||||
dr
|
||||
(send a get-num-states))))
|
||||
;; returns the list of all k such that state k transitions to state start on the
|
||||
;; transitions in rhs (in order)
|
||||
(define (run-lr0-backward a rhs dot-pos start num-states)
|
||||
(let loop ((states (list start))
|
||||
(i (sub1 dot-pos)))
|
||||
(cond
|
||||
((< i 0) states)
|
||||
(else (loop (send a run-automaton-back states (vector-ref rhs i))
|
||||
(sub1 i))))))
|
||||
|
||||
;; prod->items-for-include: grammar * prod * non-term -> lr0-item list
|
||||
;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma)
|
||||
;; and gamma =>* epsilon
|
||||
(define (prod->items-for-include g prod nt)
|
||||
(let* ((rhs (prod-rhs prod))
|
||||
(rhs-l (vector-length rhs)))
|
||||
(append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l))))
|
||||
(list (make-item prod (sub1 rhs-l)))
|
||||
null)
|
||||
(let loop ((i (sub1 rhs-l)))
|
||||
(cond
|
||||
((and (> i 0)
|
||||
(non-term? (vector-ref rhs i))
|
||||
(send g nullable-non-term? (vector-ref rhs i)))
|
||||
(if (eq? nt (vector-ref rhs (sub1 i)))
|
||||
(cons (make-item prod (sub1 i))
|
||||
(loop (sub1 i)))
|
||||
(loop (sub1 i))))
|
||||
(else null))))))
|
||||
|
||||
;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list
|
||||
;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list
|
||||
;; and gamma =>* epsilon
|
||||
(define (prod-list->items-for-include g prod-list nt)
|
||||
(apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list)))
|
||||
|
||||
;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list)
|
||||
(define (compute-includes a g)
|
||||
(let ((num-states (send a get-num-states))
|
||||
(items-for-input-nt (make-vector (send g get-num-non-terms) null)))
|
||||
(for-each
|
||||
(lambda (input-nt)
|
||||
(vector-set! items-for-input-nt (non-term-index input-nt)
|
||||
(prod-list->items-for-include g (send g get-prods) input-nt)))
|
||||
(send g get-non-terms))
|
||||
(lambda (tk)
|
||||
(let* ((goal-state (trans-key-st tk))
|
||||
(non-term (trans-key-gs tk))
|
||||
(items (vector-ref items-for-input-nt (non-term-index non-term))))
|
||||
(trans-key-list-remove-dups
|
||||
(apply append
|
||||
(map (lambda (item)
|
||||
(let* ((prod (item-prod item))
|
||||
(rhs (prod-rhs prod))
|
||||
(lhs (prod-lhs prod)))
|
||||
(map (lambda (state)
|
||||
(make-trans-key state lhs))
|
||||
(run-lr0-backward a
|
||||
rhs
|
||||
(item-dot-pos item)
|
||||
goal-state
|
||||
num-states))))
|
||||
items)))))))
|
||||
|
||||
;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list)
|
||||
(define (compute-lookback a g)
|
||||
(let ((num-states (send a get-num-states)))
|
||||
(lambda (state prod)
|
||||
(map (lambda (k) (make-trans-key k (prod-lhs prod)))
|
||||
(run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states)))))
|
||||
|
||||
;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set)
|
||||
;; output term set is represented in bit-vector form
|
||||
(define (compute-follow a g includes)
|
||||
(let ((read (compute-read a g)))
|
||||
(digraph-tk->terml (send a get-mapped-non-term-keys)
|
||||
includes
|
||||
read
|
||||
(send a get-num-states))))
|
||||
|
||||
;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set
|
||||
;; output term set is represented in bit-vector form
|
||||
(define (compute-LA a g)
|
||||
(let* ((includes (compute-includes a g))
|
||||
(lookback (compute-lookback a g))
|
||||
(follow (compute-follow a g includes)))
|
||||
(lambda (k p)
|
||||
(let* ((l (lookback k p))
|
||||
(f (map follow l)))
|
||||
(apply bitwise-ior (cons 0 f))))))
|
||||
|
||||
(define (print-DR dr a g)
|
||||
(print-input-st-sym dr "DR" a g print-output-terms))
|
||||
(define (print-Read Read a g)
|
||||
(print-input-st-sym Read "Read" a g print-output-terms))
|
||||
(define (print-includes i a g)
|
||||
(print-input-st-sym i "includes" a g print-output-st-nt))
|
||||
(define (print-lookback l a g)
|
||||
(print-input-st-prod l "lookback" a g print-output-st-nt))
|
||||
(define (print-follow f a g)
|
||||
(print-input-st-sym f "follow" a g print-output-terms))
|
||||
(define (print-LA l a g)
|
||||
(print-input-st-prod l "LA" a g print-output-terms))
|
||||
|
||||
(define (print-input-st-sym f name a g print-output)
|
||||
(printf "~a:\n" name)
|
||||
(send a for-each-state
|
||||
(lambda (state)
|
||||
(for-each
|
||||
(lambda (non-term)
|
||||
(let ((res (f (make-trans-key state non-term))))
|
||||
(if (not (null? res))
|
||||
(printf "~a(~a, ~a) = ~a\n"
|
||||
name
|
||||
state
|
||||
(gram-sym-symbol non-term)
|
||||
(print-output res)))))
|
||||
(send g get-non-terms))))
|
||||
(newline))
|
||||
|
||||
(define (print-input-st-prod f name a g print-output)
|
||||
(printf "~a:\n" name)
|
||||
(send a for-each-state
|
||||
(lambda (state)
|
||||
(for-each
|
||||
(lambda (non-term)
|
||||
(for-each
|
||||
(lambda (prod)
|
||||
(let ((res (f state prod)))
|
||||
(if (not (null? res))
|
||||
(printf "~a(~a, ~a) = ~a\n"
|
||||
name
|
||||
(kernel-index state)
|
||||
(prod-index prod)
|
||||
(print-output res)))))
|
||||
(send g get-prods-for-non-term non-term)))
|
||||
(send g get-non-terms)))))
|
||||
|
||||
(define (print-output-terms r)
|
||||
(map
|
||||
(lambda (p)
|
||||
(gram-sym-symbol p))
|
||||
r))
|
||||
|
||||
(define (print-output-st-nt r)
|
||||
(map
|
||||
(lambda (p)
|
||||
(list
|
||||
(kernel-index (trans-key-st p))
|
||||
(gram-sym-symbol (trans-key-gs p))))
|
||||
r))
|
||||
|
||||
;; init-tk-map : int -> (vectorof hashtable?)
|
||||
(define (init-tk-map n)
|
||||
(let ((v (make-vector n #f)))
|
||||
(let loop ((i (sub1 (vector-length v))))
|
||||
(when (>= i 0)
|
||||
(vector-set! v i (make-hash-table))
|
||||
(loop (sub1 i))))
|
||||
v))
|
||||
|
||||
;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int
|
||||
(define (lookup-tk-map map)
|
||||
(lambda (tk)
|
||||
(let ((st (trans-key-st tk))
|
||||
(gs (trans-key-gs tk)))
|
||||
(hash-table-get (vector-ref map (kernel-index st))
|
||||
(gram-sym-symbol gs)
|
||||
(lambda () 0)))))
|
||||
|
||||
;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int ->
|
||||
(define (add-tk-map map)
|
||||
(lambda (tk v)
|
||||
(let ((st (trans-key-st tk))
|
||||
(gs (trans-key-gs tk)))
|
||||
(hash-table-put! (vector-ref map (kernel-index st))
|
||||
(gram-sym-symbol gs)
|
||||
v))))
|
||||
|
||||
;; digraph-tk->terml:
|
||||
;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int
|
||||
;; -> (trans-key -> term list)
|
||||
;; DeRemer and Pennello 1982
|
||||
;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)}
|
||||
;; A specialization of digraph in the file graph.rkt
|
||||
(define (digraph-tk->terml nodes edges f- num-states)
|
||||
(letrec [
|
||||
;; Will map elements of trans-key to term sets represented as bit vectors
|
||||
(results (init-tk-map num-states))
|
||||
|
||||
;; Maps elements of trans-keys to integers.
|
||||
(N (init-tk-map num-states))
|
||||
|
||||
(get-N (lookup-tk-map N))
|
||||
(set-N (add-tk-map N))
|
||||
(get-f (lookup-tk-map results))
|
||||
(set-f (add-tk-map results))
|
||||
|
||||
(stack null)
|
||||
(push (lambda (x)
|
||||
(set! stack (cons x stack))))
|
||||
(pop (lambda ()
|
||||
(begin0
|
||||
(car stack)
|
||||
(set! stack (cdr stack)))))
|
||||
(depth (lambda () (length stack)))
|
||||
|
||||
;; traverse: 'a ->
|
||||
(traverse
|
||||
(lambda (x)
|
||||
(push x)
|
||||
(let ((d (depth)))
|
||||
(set-N x d)
|
||||
(set-f x (f- x))
|
||||
(for-each (lambda (y)
|
||||
(when (= 0 (get-N y))
|
||||
(traverse y))
|
||||
(set-f x (bitwise-ior (get-f x) (get-f y)))
|
||||
(set-N x (min (get-N x) (get-N y))))
|
||||
(edges x))
|
||||
(when (= d (get-N x))
|
||||
(let loop ((p (pop)))
|
||||
(set-N p +inf.0)
|
||||
(set-f p (get-f x))
|
||||
(unless (equal? x p)
|
||||
(loop (pop))))))))]
|
||||
(for-each (lambda (x)
|
||||
(when (= 0 (get-N x))
|
||||
(traverse x)))
|
||||
nodes)
|
||||
get-f))
|
||||
)
|
@ -0,0 +1,372 @@
|
||||
(module lr0 mzscheme
|
||||
|
||||
;; Handle the LR0 automaton
|
||||
|
||||
(require "grammar.rkt"
|
||||
"graph.rkt"
|
||||
mzlib/list
|
||||
mzlib/class)
|
||||
|
||||
(provide build-lr0-automaton lr0%
|
||||
(struct trans-key (st gs)) trans-key-list-remove-dups
|
||||
kernel-items kernel-index)
|
||||
|
||||
;; kernel = (make-kernel (LR1-item list) index)
|
||||
;; the list must be kept sorted according to item<? so that equal? can
|
||||
;; be used to compare kernels
|
||||
;; Each kernel is assigned a unique index, 0 <= index < number of states
|
||||
;; trans-key = (make-trans-key kernel gram-sym)
|
||||
(define-struct kernel (items index) (make-inspector))
|
||||
(define-struct trans-key (st gs) (make-inspector))
|
||||
|
||||
(define (trans-key<? a b)
|
||||
(let ((kia (kernel-index (trans-key-st a)))
|
||||
(kib (kernel-index (trans-key-st b))))
|
||||
(or (< kia kib)
|
||||
(and (= kia kib)
|
||||
(< (non-term-index (trans-key-gs a))
|
||||
(non-term-index (trans-key-gs b)))))))
|
||||
|
||||
(define (trans-key-list-remove-dups tkl)
|
||||
(let loop ((sorted (sort tkl trans-key<?)))
|
||||
(cond
|
||||
((null? sorted) null)
|
||||
((null? (cdr sorted)) sorted)
|
||||
(else
|
||||
(if (and (= (non-term-index (trans-key-gs (car sorted)))
|
||||
(non-term-index (trans-key-gs (cadr sorted))))
|
||||
(= (kernel-index (trans-key-st (car sorted)))
|
||||
(kernel-index (trans-key-st (cadr sorted)))))
|
||||
(loop (cdr sorted))
|
||||
(cons (car sorted) (loop (cdr sorted))))))))
|
||||
|
||||
|
||||
;; build-transition-table : int (listof (cons/c trans-key X) ->
|
||||
;; (vectorof (symbol X hashtable))
|
||||
(define (build-transition-table num-states assoc)
|
||||
(let ((transitions (make-vector num-states #f)))
|
||||
(let loop ((i (sub1 (vector-length transitions))))
|
||||
(when (>= i 0)
|
||||
(vector-set! transitions i (make-hash-table))
|
||||
(loop (sub1 i))))
|
||||
(for-each
|
||||
(lambda (trans-key/kernel)
|
||||
(let ((tk (car trans-key/kernel)))
|
||||
(hash-table-put! (vector-ref transitions (kernel-index (trans-key-st tk)))
|
||||
(gram-sym-symbol (trans-key-gs tk))
|
||||
(cdr trans-key/kernel))))
|
||||
assoc)
|
||||
transitions))
|
||||
|
||||
;; reverse-assoc : (listof (cons/c trans-key? kernel?)) ->
|
||||
;; (listof (cons/c trans-key? (listof kernel?)))
|
||||
(define (reverse-assoc assoc)
|
||||
(let ((reverse-hash (make-hash-table 'equal))
|
||||
(hash-table-add!
|
||||
(lambda (ht k v)
|
||||
(hash-table-put! ht k (cons v (hash-table-get ht k (lambda () null)))))))
|
||||
(for-each
|
||||
(lambda (trans-key/kernel)
|
||||
(let ((tk (car trans-key/kernel)))
|
||||
(hash-table-add! reverse-hash
|
||||
(make-trans-key (cdr trans-key/kernel)
|
||||
(trans-key-gs tk))
|
||||
(trans-key-st tk))))
|
||||
assoc)
|
||||
(hash-table-map reverse-hash cons)))
|
||||
|
||||
|
||||
;; kernel-list-remove-duplicates
|
||||
;; LR0-automaton = object of class lr0%
|
||||
(define lr0%
|
||||
(class object%
|
||||
(super-instantiate ())
|
||||
;; term-assoc : (listof (cons/c trans-key? kernel?))
|
||||
;; non-term-assoc : (listof (cons/c trans-key? kernel?))
|
||||
;; states : (vectorof kernel?)
|
||||
;; epsilons : ???
|
||||
(init-field term-assoc non-term-assoc states epsilons)
|
||||
|
||||
(define transitions (build-transition-table (vector-length states)
|
||||
(append term-assoc non-term-assoc)))
|
||||
|
||||
(define reverse-term-assoc (reverse-assoc term-assoc))
|
||||
(define reverse-non-term-assoc (reverse-assoc non-term-assoc))
|
||||
(define reverse-transitions
|
||||
(build-transition-table (vector-length states)
|
||||
(append reverse-term-assoc reverse-non-term-assoc)))
|
||||
|
||||
(define mapped-non-terms (map car non-term-assoc))
|
||||
|
||||
(define/public (get-mapped-non-term-keys)
|
||||
mapped-non-terms)
|
||||
|
||||
(define/public (get-num-states)
|
||||
(vector-length states))
|
||||
|
||||
(define/public (get-epsilon-trans)
|
||||
epsilons)
|
||||
|
||||
(define/public (get-transitions)
|
||||
(append term-assoc non-term-assoc))
|
||||
|
||||
;; for-each-state : (state ->) ->
|
||||
;; Iteration over the states in an automaton
|
||||
(define/public (for-each-state f)
|
||||
(let ((num-states (vector-length states)))
|
||||
(let loop ((i 0))
|
||||
(if (< i num-states)
|
||||
(begin
|
||||
(f (vector-ref states i))
|
||||
(loop (add1 i)))))))
|
||||
|
||||
;; run-automaton: kernel? gram-sym? -> (union kernel #f)
|
||||
;; returns the state reached from state k on input s, or #f when k
|
||||
;; has no transition on s
|
||||
(define/public (run-automaton k s)
|
||||
(hash-table-get (vector-ref transitions (kernel-index k))
|
||||
(gram-sym-symbol s)
|
||||
(lambda () #f)))
|
||||
|
||||
;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel)
|
||||
;; returns the list of states that can reach k by transitioning on s.
|
||||
(define/public (run-automaton-back k s)
|
||||
(apply append
|
||||
(map
|
||||
(lambda (k)
|
||||
(hash-table-get (vector-ref reverse-transitions (kernel-index k))
|
||||
(gram-sym-symbol s)
|
||||
(lambda () null)))
|
||||
k)))))
|
||||
|
||||
(define (union comp<?)
|
||||
(letrec ((union
|
||||
(lambda (l1 l2)
|
||||
(cond
|
||||
((null? l1) l2)
|
||||
((null? l2) l1)
|
||||
(else (let ((c1 (car l1))
|
||||
(c2 (car l2)))
|
||||
(cond
|
||||
((comp<? c1 c2)
|
||||
(cons c1 (union (cdr l1) l2)))
|
||||
((comp<? c2 c1)
|
||||
(cons c2 (union l1 (cdr l2))))
|
||||
(else (union (cdr l1) l2)))))))))
|
||||
union))
|
||||
|
||||
|
||||
;; The kernels in the automaton are represented cannonically.
|
||||
;; That is (equal? a b) <=> (eq? a b)
|
||||
(define (kernel->string k)
|
||||
(apply string-append
|
||||
`("{" ,@(map (lambda (i) (string-append (item->string i) ", "))
|
||||
(kernel-items k))
|
||||
"}")))
|
||||
|
||||
;; build-LR0-automaton: grammar -> LR0-automaton
|
||||
;; Constructs the kernels of the sets of LR(0) items of g
|
||||
(define (build-lr0-automaton grammar)
|
||||
; (printf "LR(0) automaton:\n")
|
||||
(letrec (
|
||||
(epsilons (make-hash-table 'equal))
|
||||
(grammar-symbols (append (send grammar get-non-terms)
|
||||
(send grammar get-terms)))
|
||||
;; first-non-term: non-term -> non-term list
|
||||
;; given a non-terminal symbol C, return those non-terminal
|
||||
;; symbols A s.t. C -> An for some string of terminals and
|
||||
;; non-terminals n where -> means a rightmost derivation in many
|
||||
;; steps. Assumes that each non-term can be reduced to a string
|
||||
;; of terms.
|
||||
(first-non-term
|
||||
(digraph (send grammar get-non-terms)
|
||||
(lambda (nt)
|
||||
(filter non-term?
|
||||
(map (lambda (prod)
|
||||
(sym-at-dot (make-item prod 0)))
|
||||
(send grammar get-prods-for-non-term nt))))
|
||||
(lambda (nt) (list nt))
|
||||
(union non-term<?)
|
||||
(lambda () null)))
|
||||
|
||||
;; closure: LR1-item list -> LR1-item list
|
||||
;; Creates a set of items containing i s.t. if A -> n.Xm is in it,
|
||||
;; X -> .o is in it too.
|
||||
(LR0-closure
|
||||
(lambda (i)
|
||||
(cond
|
||||
((null? i) null)
|
||||
(else
|
||||
(let ((next-gsym (sym-at-dot (car i))))
|
||||
(cond
|
||||
((non-term? next-gsym)
|
||||
(cons (car i)
|
||||
(append
|
||||
(apply append
|
||||
(map (lambda (non-term)
|
||||
(map (lambda (x)
|
||||
(make-item x 0))
|
||||
(send grammar
|
||||
get-prods-for-non-term
|
||||
non-term)))
|
||||
(first-non-term next-gsym)))
|
||||
(LR0-closure (cdr i)))))
|
||||
(else
|
||||
(cons (car i) (LR0-closure (cdr i))))))))))
|
||||
|
||||
|
||||
;; maps trans-keys to kernels
|
||||
(automaton-term null)
|
||||
(automaton-non-term null)
|
||||
|
||||
;; keeps the kernels we have seen, so we can have a unique
|
||||
;; list for each kernel
|
||||
(kernels (make-hash-table 'equal))
|
||||
|
||||
(counter 0)
|
||||
|
||||
;; goto: LR1-item list -> LR1-item list list
|
||||
;; creates new kernels by moving the dot in each item in the
|
||||
;; LR0-closure of kernel to the right, and grouping them by
|
||||
;; the term/non-term moved over. Returns the kernels not
|
||||
;; yet seen, and places the trans-keys into automaton
|
||||
(goto
|
||||
(lambda (kernel)
|
||||
(let (
|
||||
;; maps a gram-syms to a list of items
|
||||
(table (make-hash-table))
|
||||
|
||||
;; add-item!:
|
||||
;; (symbol (listof item) hashtable) item? ->
|
||||
;; adds i into the table grouped with the grammar
|
||||
;; symbol following its dot
|
||||
(add-item!
|
||||
(lambda (table i)
|
||||
(let ((gs (sym-at-dot i)))
|
||||
(cond
|
||||
(gs
|
||||
(let ((already
|
||||
(hash-table-get table
|
||||
(gram-sym-symbol gs)
|
||||
(lambda () null))))
|
||||
(unless (member i already)
|
||||
(hash-table-put! table
|
||||
(gram-sym-symbol gs)
|
||||
(cons i already)))))
|
||||
((= 0 (vector-length (prod-rhs (item-prod i))))
|
||||
(let ((current (hash-table-get epsilons
|
||||
kernel
|
||||
(lambda () null))))
|
||||
(hash-table-put! epsilons
|
||||
kernel
|
||||
(cons i current)))))))))
|
||||
|
||||
;; Group the items of the LR0 closure of the kernel
|
||||
;; by the character after the dot
|
||||
(for-each (lambda (item)
|
||||
(add-item! table item))
|
||||
(LR0-closure (kernel-items kernel)))
|
||||
|
||||
;; each group is a new kernel, with the dot advanced.
|
||||
;; sorts the items in a kernel so kernels can be compared
|
||||
;; with equal? for using the table kernels to make sure
|
||||
;; only one representitive of each kernel is created
|
||||
(filter
|
||||
(lambda (x) x)
|
||||
(map
|
||||
(lambda (i)
|
||||
(let* ((gs (car i))
|
||||
(items (cadr i))
|
||||
(new #f)
|
||||
(new-kernel (sort
|
||||
(filter (lambda (x) x)
|
||||
(map move-dot-right items))
|
||||
item<?))
|
||||
(unique-kernel (hash-table-get
|
||||
kernels
|
||||
new-kernel
|
||||
(lambda ()
|
||||
(let ((k (make-kernel
|
||||
new-kernel
|
||||
counter)))
|
||||
(set! new #t)
|
||||
(set! counter (add1 counter))
|
||||
(hash-table-put! kernels
|
||||
new-kernel
|
||||
k)
|
||||
k)))))
|
||||
(cond
|
||||
((term? gs)
|
||||
(set! automaton-term (cons (cons (make-trans-key kernel gs)
|
||||
unique-kernel)
|
||||
automaton-term)))
|
||||
(else
|
||||
(set! automaton-non-term (cons (cons (make-trans-key kernel gs)
|
||||
unique-kernel)
|
||||
automaton-non-term))))
|
||||
#;(printf "~a -> ~a on ~a\n"
|
||||
(kernel->string kernel)
|
||||
(kernel->string unique-kernel)
|
||||
(gram-sym-symbol gs))
|
||||
(if new
|
||||
unique-kernel
|
||||
#f)))
|
||||
(let loop ((gsyms grammar-symbols))
|
||||
(cond
|
||||
((null? gsyms) null)
|
||||
(else
|
||||
(let ((items (hash-table-get table
|
||||
(gram-sym-symbol (car gsyms))
|
||||
(lambda () null))))
|
||||
(cond
|
||||
((null? items) (loop (cdr gsyms)))
|
||||
(else
|
||||
(cons (list (car gsyms) items)
|
||||
(loop (cdr gsyms))))))))))))))
|
||||
|
||||
(starts
|
||||
(map (lambda (init-prod) (list (make-item init-prod 0)))
|
||||
(send grammar get-init-prods)))
|
||||
(startk
|
||||
(map (lambda (start)
|
||||
(let ((k (make-kernel start counter)))
|
||||
(hash-table-put! kernels start k)
|
||||
(set! counter (add1 counter))
|
||||
k))
|
||||
starts))
|
||||
(new-kernels (make-queue)))
|
||||
|
||||
(let loop ((old-kernels startk)
|
||||
(seen-kernels null))
|
||||
(cond
|
||||
((and (empty-queue? new-kernels) (null? old-kernels))
|
||||
(make-object lr0%
|
||||
automaton-term
|
||||
automaton-non-term
|
||||
(list->vector (reverse seen-kernels))
|
||||
epsilons))
|
||||
((null? old-kernels)
|
||||
(loop (deq! new-kernels) seen-kernels))
|
||||
(else
|
||||
(enq! new-kernels (goto (car old-kernels)))
|
||||
(loop (cdr old-kernels) (cons (car old-kernels) seen-kernels)))))))
|
||||
|
||||
(define-struct q (f l) (make-inspector))
|
||||
(define (empty-queue? q)
|
||||
(null? (q-f q)))
|
||||
(define (make-queue)
|
||||
(make-q null null))
|
||||
(define (enq! q i)
|
||||
(if (empty-queue? q)
|
||||
(let ((i (mcons i null)))
|
||||
(set-q-l! q i)
|
||||
(set-q-f! q i))
|
||||
(begin
|
||||
(set-mcdr! (q-l q) (mcons i null))
|
||||
(set-q-l! q (mcdr (q-l q))))))
|
||||
(define (deq! q)
|
||||
(begin0
|
||||
(mcar (q-f q))
|
||||
(set-q-f! q (mcdr (q-f q)))))
|
||||
|
||||
)
|
@ -0,0 +1,54 @@
|
||||
(module parser-actions mzscheme
|
||||
(require "grammar.rkt")
|
||||
(provide (all-defined-except make-reduce make-reduce*)
|
||||
(rename make-reduce* make-reduce))
|
||||
|
||||
;; An action is
|
||||
;; - (make-shift int)
|
||||
;; - (make-reduce prod runtime-action)
|
||||
;; - (make-accept)
|
||||
;; - (make-goto int)
|
||||
;; - (no-action)
|
||||
;; A reduce contains a runtime-reduce so that sharing of the reduces can
|
||||
;; be easily transferred to sharing of runtime-reduces.
|
||||
|
||||
(define-struct action () (make-inspector))
|
||||
(define-struct (shift action) (state) (make-inspector))
|
||||
(define-struct (reduce action) (prod runtime-reduce) (make-inspector))
|
||||
(define-struct (accept action) () (make-inspector))
|
||||
(define-struct (goto action) (state) (make-inspector))
|
||||
(define-struct (no-action action) () (make-inspector))
|
||||
|
||||
(define (make-reduce* p)
|
||||
(make-reduce p
|
||||
(vector (prod-index p)
|
||||
(gram-sym-symbol (prod-lhs p))
|
||||
(vector-length (prod-rhs p)))))
|
||||
|
||||
;; A runtime-action is
|
||||
;; non-negative-int (shift)
|
||||
;; (vector int symbol int) (reduce)
|
||||
;; 'accept (accept)
|
||||
;; negative-int (goto)
|
||||
;; #f (no-action)
|
||||
|
||||
(define (action->runtime-action a)
|
||||
(cond
|
||||
((shift? a) (shift-state a))
|
||||
((reduce? a) (reduce-runtime-reduce a))
|
||||
((accept? a) 'accept)
|
||||
((goto? a) (- (+ (goto-state a) 1)))
|
||||
((no-action? a) #f)))
|
||||
|
||||
(define (runtime-shift? x) (and (integer? x) (>= x 0)))
|
||||
(define runtime-reduce? vector?)
|
||||
(define (runtime-accept? x) (eq? x 'accept))
|
||||
(define (runtime-goto? x) (and (integer? x) (< x 0)))
|
||||
|
||||
(define runtime-shift-state values)
|
||||
(define (runtime-reduce-prod-num x) (vector-ref x 0))
|
||||
(define (runtime-reduce-lhs x) (vector-ref x 1))
|
||||
(define (runtime-reduce-rhs-length x) (vector-ref x 2))
|
||||
(define (runtime-goto-state x) (- (+ x 1)))
|
||||
|
||||
)
|
@ -0,0 +1,113 @@
|
||||
(module parser-builder mzscheme
|
||||
|
||||
(require "input-file-parser.rkt"
|
||||
"grammar.rkt"
|
||||
"table.rkt"
|
||||
mzlib/class
|
||||
racket/contract)
|
||||
(require-for-template mzscheme)
|
||||
|
||||
(provide/contract
|
||||
(build-parser (-> string? any/c any/c
|
||||
(listof identifier?)
|
||||
(listof identifier?)
|
||||
(listof identifier?)
|
||||
(or/c syntax? #f)
|
||||
syntax?
|
||||
(values any/c any/c any/c any/c))))
|
||||
|
||||
;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?)
|
||||
;; (union syntax? false/c) syntax?) -> syntax?
|
||||
(define (fix-check-syntax input-terms start ends assocs prods)
|
||||
(let* ((term-binders (get-term-list input-terms))
|
||||
(get-term-binder
|
||||
(let ((t (make-hash-table)))
|
||||
(for-each
|
||||
(lambda (term)
|
||||
(hash-table-put! t (syntax-e term) term))
|
||||
term-binders)
|
||||
(lambda (x)
|
||||
(let ((r (hash-table-get t (syntax-e x) (lambda () #f))))
|
||||
(if r
|
||||
(syntax-local-introduce (datum->syntax-object r (syntax-e x) x x))
|
||||
x)))))
|
||||
(rhs-list
|
||||
(syntax-case prods ()
|
||||
(((_ rhs ...) ...)
|
||||
(syntax->list (syntax (rhs ... ...)))))))
|
||||
(with-syntax (((tmp ...) (map syntax-local-introduce term-binders))
|
||||
((term-group ...)
|
||||
(map (lambda (tg)
|
||||
(syntax-property
|
||||
(datum->syntax-object tg #f)
|
||||
'disappeared-use
|
||||
tg))
|
||||
input-terms))
|
||||
((end ...)
|
||||
(map get-term-binder ends))
|
||||
((start ...)
|
||||
(map get-term-binder start))
|
||||
((bind ...)
|
||||
(syntax-case prods ()
|
||||
(((bind _ ...) ...)
|
||||
(syntax->list (syntax (bind ...))))))
|
||||
(((bound ...) ...)
|
||||
(map
|
||||
(lambda (rhs)
|
||||
(syntax-case rhs ()
|
||||
(((bound ...) (_ pbound) __)
|
||||
(map get-term-binder
|
||||
(cons (syntax pbound)
|
||||
(syntax->list (syntax (bound ...))))))
|
||||
(((bound ...) _)
|
||||
(map get-term-binder
|
||||
(syntax->list (syntax (bound ...)))))))
|
||||
rhs-list))
|
||||
((prec ...)
|
||||
(if assocs
|
||||
(map get-term-binder
|
||||
(syntax-case assocs ()
|
||||
(((__ term ...) ...)
|
||||
(syntax->list (syntax (term ... ...))))))
|
||||
null)))
|
||||
#`(when #f
|
||||
(let ((bind void) ... (tmp void) ...)
|
||||
(void bound ... ... term-group ... start ... end ... prec ...))))))
|
||||
(require mzlib/list "parser-actions.rkt")
|
||||
(define (build-parser filename src-pos suppress input-terms start end assocs prods)
|
||||
(let* ((grammar (parse-input input-terms start end assocs prods src-pos))
|
||||
(table (build-table grammar filename suppress))
|
||||
(all-tokens (make-hash-table))
|
||||
(actions-code
|
||||
`(vector ,@(map prod-action (send grammar get-prods)))))
|
||||
(for-each (lambda (term)
|
||||
(hash-table-put! all-tokens (gram-sym-symbol term) #t))
|
||||
(send grammar get-terms))
|
||||
#;(let ((num-states (vector-length table))
|
||||
(num-gram-syms (+ (send grammar get-num-terms)
|
||||
(send grammar get-num-non-terms)))
|
||||
(num-ht-entries (apply + (map length (vector->list table))))
|
||||
(num-reduces
|
||||
(let ((ht (make-hash-table)))
|
||||
(for-each
|
||||
(lambda (x)
|
||||
(when (reduce? x)
|
||||
(hash-table-put! ht x #t)))
|
||||
(map cdr (apply append (vector->list table))))
|
||||
(length (hash-table-map ht void)))))
|
||||
(printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n"
|
||||
num-states num-gram-syms num-ht-entries num-reduces)
|
||||
(printf "~a -- ~aKB, previously ~aKB\n"
|
||||
(/ (+ 2 num-states
|
||||
(* 4 num-states) (* 2 1.5 num-ht-entries)
|
||||
(* 5 num-reduces)) 256.0)
|
||||
(/ (+ 2 num-states
|
||||
(* 4 num-states) (* 2 2.3 num-ht-entries)
|
||||
(* 5 num-reduces)) 256.0)
|
||||
(/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0)))
|
||||
(values table
|
||||
all-tokens
|
||||
actions-code
|
||||
(fix-check-syntax input-terms start end assocs prods))))
|
||||
|
||||
)
|
@ -0,0 +1,290 @@
|
||||
#lang scheme/base
|
||||
|
||||
;; Routine to build the LALR table
|
||||
|
||||
(require "grammar.rkt"
|
||||
"lr0.rkt"
|
||||
"lalr.rkt"
|
||||
"parser-actions.rkt"
|
||||
racket/contract
|
||||
mzlib/list
|
||||
mzlib/class)
|
||||
|
||||
(define (is-a-grammar%? x) (is-a? x grammar%))
|
||||
(provide/contract
|
||||
(build-table (-> is-a-grammar%? string? any/c
|
||||
(vectorof (listof (cons/c (or/c term? non-term?) action?))))))
|
||||
|
||||
;; A parse-table is (vectorof (listof (cons/c gram-sym? action)))
|
||||
;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action))))
|
||||
|
||||
;; make-parse-table : int -> parse-table
|
||||
(define (make-parse-table num-states)
|
||||
(make-vector num-states null))
|
||||
|
||||
;; table-add!: parse-table nat symbol action ->
|
||||
(define (table-add! table state-index symbol val)
|
||||
(vector-set! table state-index (cons (cons symbol val)
|
||||
(vector-ref table state-index))))
|
||||
|
||||
;; group-table : parse-table -> grouped-parse-table
|
||||
(define (group-table table)
|
||||
(list->vector
|
||||
(map
|
||||
(lambda (state-entry)
|
||||
(let ((ht (make-hash)))
|
||||
(for-each
|
||||
(lambda (gs/actions)
|
||||
(let ((group (hash-ref ht (car gs/actions) (lambda () null))))
|
||||
(unless (member (cdr gs/actions) group)
|
||||
(hash-set! ht (car gs/actions) (cons (cdr gs/actions) group)))))
|
||||
state-entry)
|
||||
(hash-map ht cons)))
|
||||
(vector->list table))))
|
||||
|
||||
;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) ->
|
||||
;; (vectorof (listof (cons/c gram-sym? Y)))
|
||||
(define (table-map f table)
|
||||
(list->vector
|
||||
(map
|
||||
(lambda (state-entry)
|
||||
(map
|
||||
(lambda (gs/X)
|
||||
(cons (car gs/X) (f (car gs/X) (cdr gs/X))))
|
||||
state-entry))
|
||||
(vector->list table))))
|
||||
|
||||
|
||||
(define (bit-vector-for-each f bv)
|
||||
(letrec ((for-each
|
||||
(lambda (bv number)
|
||||
(cond
|
||||
((= 0 bv) (void))
|
||||
((= 1 (bitwise-and 1 bv))
|
||||
(f number)
|
||||
(for-each (arithmetic-shift bv -1) (add1 number)))
|
||||
(else (for-each (arithmetic-shift bv -1) (add1 number)))))))
|
||||
(for-each bv 0)))
|
||||
|
||||
|
||||
;; print-entry: symbol action output-port ->
|
||||
;; prints the action a for lookahead sym to the given port
|
||||
(define (print-entry sym a port)
|
||||
(let ((s "\t~a\t\t\t\t\t~a\t~a\n"))
|
||||
(cond
|
||||
((shift? a)
|
||||
(fprintf port s sym "shift" (shift-state a)))
|
||||
((reduce? a)
|
||||
(fprintf port s sym "reduce" (prod-index (reduce-prod a))))
|
||||
((accept? a)
|
||||
(fprintf port s sym "accept" ""))
|
||||
((goto? a)
|
||||
(fprintf port s sym "goto" (goto-state a))))))
|
||||
|
||||
|
||||
;; count: ('a -> bool) * 'a list -> num
|
||||
;; counts the number of elements in list that satisfy pred
|
||||
(define (count pred list)
|
||||
(cond
|
||||
((null? list) 0)
|
||||
((pred (car list)) (+ 1 (count pred (cdr list))))
|
||||
(else (count pred (cdr list)))))
|
||||
|
||||
;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port ->
|
||||
;; Prints out the parser given by table.
|
||||
(define (display-parser a grouped-table prods port)
|
||||
(let* ((SR-conflicts 0)
|
||||
(RR-conflicts 0))
|
||||
(for-each
|
||||
(lambda (prod)
|
||||
(fprintf port
|
||||
"~a\t~a\t=\t~a\n"
|
||||
(prod-index prod)
|
||||
(gram-sym-symbol (prod-lhs prod))
|
||||
(map gram-sym-symbol (vector->list (prod-rhs prod)))))
|
||||
prods)
|
||||
(send a for-each-state
|
||||
(lambda (state)
|
||||
(fprintf port "State ~a\n" (kernel-index state))
|
||||
(for-each (lambda (item)
|
||||
(fprintf port "\t~a\n" (item->string item)))
|
||||
(kernel-items state))
|
||||
(newline port)
|
||||
(for-each
|
||||
(lambda (gs/action)
|
||||
(let ((sym (gram-sym-symbol (car gs/action)))
|
||||
(act (cdr gs/action)))
|
||||
(cond
|
||||
((null? act) (void))
|
||||
((null? (cdr act))
|
||||
(print-entry sym (car act) port))
|
||||
(else
|
||||
(fprintf port "begin conflict:\n")
|
||||
(when (> (count reduce? act) 1)
|
||||
(set! RR-conflicts (add1 RR-conflicts)))
|
||||
(when (> (count shift? act) 0)
|
||||
(set! SR-conflicts (add1 SR-conflicts)))
|
||||
(map (lambda (x) (print-entry sym x port)) act)
|
||||
(fprintf port "end conflict\n")))))
|
||||
(vector-ref grouped-table (kernel-index state)))
|
||||
(newline port)))
|
||||
|
||||
(when (> SR-conflicts 0)
|
||||
(fprintf port "~a shift/reduce conflict~a\n"
|
||||
SR-conflicts
|
||||
(if (= SR-conflicts 1) "" "s")))
|
||||
(when (> RR-conflicts 0)
|
||||
(fprintf port "~a reduce/reduce conflict~a\n"
|
||||
RR-conflicts
|
||||
(if (= RR-conflicts 1) "" "s")))))
|
||||
|
||||
;; resolve-conflict : (listof action?) -> action? bool bool
|
||||
(define (resolve-conflict actions)
|
||||
(cond
|
||||
((null? actions) (values (make-no-action) #f #f))
|
||||
((null? (cdr actions))
|
||||
(values (car actions) #f #f))
|
||||
(else
|
||||
(let ((SR-conflict? (> (count shift? actions) 0))
|
||||
(RR-conflict? (> (count reduce? actions) 1)))
|
||||
(let loop ((current-guess #f)
|
||||
(rest actions))
|
||||
(cond
|
||||
((null? rest) (values current-guess SR-conflict? RR-conflict?))
|
||||
((shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?))
|
||||
((not current-guess)
|
||||
(loop (car rest) (cdr rest)))
|
||||
((and (reduce? (car rest))
|
||||
(< (prod-index (reduce-prod (car rest)))
|
||||
(prod-index (reduce-prod current-guess))))
|
||||
(loop (car rest) (cdr rest)))
|
||||
((accept? (car rest))
|
||||
(eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n")
|
||||
(loop current-guess (cdr rest)))
|
||||
(else (loop current-guess (cdr rest)))))))))
|
||||
|
||||
;; resolve-conflicts : grouped-parse-table bool -> parse-table
|
||||
(define (resolve-conflicts grouped-table suppress)
|
||||
(let* ((SR-conflicts 0)
|
||||
(RR-conflicts 0)
|
||||
(table (table-map
|
||||
(lambda (gs actions)
|
||||
(let-values (((action SR? RR?)
|
||||
(resolve-conflict actions)))
|
||||
(when SR?
|
||||
(set! SR-conflicts (add1 SR-conflicts)))
|
||||
(when RR?
|
||||
(set! RR-conflicts (add1 RR-conflicts)))
|
||||
action))
|
||||
grouped-table)))
|
||||
(unless suppress
|
||||
(when (> SR-conflicts 0)
|
||||
(eprintf "~a shift/reduce conflict~a\n"
|
||||
SR-conflicts
|
||||
(if (= SR-conflicts 1) "" "s")))
|
||||
(when (> RR-conflicts 0)
|
||||
(eprintf "~a reduce/reduce conflict~a\n"
|
||||
RR-conflicts
|
||||
(if (= RR-conflicts 1) "" "s"))))
|
||||
table))
|
||||
|
||||
|
||||
;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action)
|
||||
;; Resolves a single shift-reduce conflict, if precedences are in place.
|
||||
(define (resolve-sr-conflict/prec actions shift-prec)
|
||||
(let* ((shift (if (shift? (car actions))
|
||||
(car actions)
|
||||
(cadr actions)))
|
||||
(reduce (if (shift? (car actions))
|
||||
(cadr actions)
|
||||
(car actions)))
|
||||
(reduce-prec (prod-prec (reduce-prod reduce))))
|
||||
(cond
|
||||
((and shift-prec reduce-prec)
|
||||
(cond
|
||||
((< (prec-num shift-prec) (prec-num reduce-prec))
|
||||
(list reduce))
|
||||
((> (prec-num shift-prec) (prec-num reduce-prec))
|
||||
(list shift))
|
||||
((eq? 'left (prec-assoc shift-prec))
|
||||
(list reduce))
|
||||
((eq? 'right (prec-assoc shift-prec))
|
||||
(list shift))
|
||||
(else null)))
|
||||
(else actions))))
|
||||
|
||||
|
||||
;; resolve-prec-conflicts : parse-table -> grouped-parse-table
|
||||
(define (resolve-prec-conflicts table)
|
||||
(table-map
|
||||
(lambda (gs actions)
|
||||
(cond
|
||||
((and (term? gs)
|
||||
(= 2 (length actions))
|
||||
(or (shift? (car actions))
|
||||
(shift? (cadr actions))))
|
||||
(resolve-sr-conflict/prec actions (term-prec gs)))
|
||||
(else actions)))
|
||||
(group-table table)))
|
||||
|
||||
;; build-table: grammar string bool -> parse-table
|
||||
(define (build-table g file suppress)
|
||||
(let* ((a (build-lr0-automaton g))
|
||||
(term-vector (list->vector (send g get-terms)))
|
||||
(end-terms (send g get-end-terms))
|
||||
(table (make-parse-table (send a get-num-states)))
|
||||
(get-lookahead (compute-LA a g))
|
||||
(reduce-cache (make-hash)))
|
||||
|
||||
(for-each
|
||||
(lambda (trans-key/state)
|
||||
(let ((from-state-index (kernel-index (trans-key-st (car trans-key/state))))
|
||||
(gs (trans-key-gs (car trans-key/state)))
|
||||
(to-state (cdr trans-key/state)))
|
||||
(table-add! table from-state-index gs
|
||||
(cond
|
||||
((non-term? gs)
|
||||
(make-goto (kernel-index to-state)))
|
||||
((member gs end-terms)
|
||||
(make-accept))
|
||||
(else
|
||||
(make-shift
|
||||
(kernel-index to-state)))))))
|
||||
(send a get-transitions))
|
||||
|
||||
(send a for-each-state
|
||||
(lambda (state)
|
||||
(for-each
|
||||
(lambda (item)
|
||||
(let ((item-prod (item-prod item)))
|
||||
(bit-vector-for-each
|
||||
(lambda (term-index)
|
||||
(unless (start-item? item)
|
||||
(let ((r (hash-ref reduce-cache item-prod
|
||||
(lambda ()
|
||||
(let ((r (make-reduce item-prod)))
|
||||
(hash-set! reduce-cache item-prod r)
|
||||
r)))))
|
||||
(table-add! table
|
||||
(kernel-index state)
|
||||
(vector-ref term-vector term-index)
|
||||
r))))
|
||||
(get-lookahead state item-prod))))
|
||||
(append (hash-ref (send a get-epsilon-trans) state (lambda () null))
|
||||
(filter (lambda (item)
|
||||
(not (move-dot-right item)))
|
||||
(kernel-items state))))))
|
||||
|
||||
(let ((grouped-table (resolve-prec-conflicts table)))
|
||||
(unless (string=? file "")
|
||||
(with-handlers [(exn:fail:filesystem?
|
||||
(lambda (e)
|
||||
(eprintf
|
||||
"Cannot write debug output to file \"~a\": ~a\n"
|
||||
file
|
||||
(exn-message e))))]
|
||||
(call-with-output-file file
|
||||
(lambda (port)
|
||||
(display-parser a grouped-table (send g get-prods) port))
|
||||
#:exists 'truncate)))
|
||||
(resolve-conflicts grouped-table suppress))))
|
@ -0,0 +1,118 @@
|
||||
(module yacc-helper mzscheme
|
||||
|
||||
(require mzlib/list
|
||||
"../private-lex/token-syntax.rkt")
|
||||
|
||||
;; General helper routines
|
||||
|
||||
(provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc)
|
||||
|
||||
(define (vector-andmap f v)
|
||||
(let loop ((i 0))
|
||||
(cond
|
||||
((= i (vector-length v)) #t)
|
||||
(else (if (f (vector-ref v i))
|
||||
(loop (add1 i))
|
||||
#f)))))
|
||||
|
||||
;; duplicate-list?: symbol list -> #f | symbol
|
||||
;; returns a symbol that exists twice in l, or false if no such symbol
|
||||
;; exists
|
||||
(define (duplicate-list? l)
|
||||
(letrec ((t (make-hash-table))
|
||||
(dl? (lambda (l)
|
||||
(cond
|
||||
((null? l) #f)
|
||||
((hash-table-get t (car l) (lambda () #f)) =>
|
||||
(lambda (x) x))
|
||||
(else
|
||||
(hash-table-put! t (car l) (car l))
|
||||
(dl? (cdr l)))))))
|
||||
(dl? l)))
|
||||
|
||||
;; remove-duplicates: syntax-object list -> syntax-object list
|
||||
;; removes the duplicates from the lists
|
||||
(define (remove-duplicates sl)
|
||||
(let ((t (make-hash-table)))
|
||||
(letrec ((x
|
||||
(lambda (sl)
|
||||
(cond
|
||||
((null? sl) sl)
|
||||
((hash-table-get t (syntax-object->datum (car sl)) (lambda () #f))
|
||||
(x (cdr sl)))
|
||||
(else
|
||||
(hash-table-put! t (syntax-object->datum (car sl)) #t)
|
||||
(cons (car sl) (x (cdr sl))))))))
|
||||
(x sl))))
|
||||
|
||||
;; overlap?: symbol list * symbol list -> #f | symbol
|
||||
;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists
|
||||
(define (overlap? l1 l2)
|
||||
(let/ec ret
|
||||
(let ((t (make-hash-table)))
|
||||
(for-each (lambda (s1)
|
||||
(hash-table-put! t s1 s1))
|
||||
l1)
|
||||
(for-each (lambda (s2)
|
||||
(cond
|
||||
((hash-table-get t s2 (lambda () #f)) =>
|
||||
(lambda (o) (ret o)))))
|
||||
l2)
|
||||
#f)))
|
||||
|
||||
|
||||
(define (display-yacc grammar tokens start precs port)
|
||||
(let-syntax ((p (syntax-rules ()
|
||||
((_ args ...) (fprintf port args ...)))))
|
||||
(let* ((tokens (map syntax-local-value tokens))
|
||||
(eterms (filter e-terminals-def? tokens))
|
||||
(terms (filter terminals-def? tokens))
|
||||
(term-table (make-hash-table))
|
||||
(display-rhs
|
||||
(lambda (rhs)
|
||||
(for-each (lambda (sym) (p "~a " (hash-table-get term-table sym (lambda () sym))))
|
||||
(car rhs))
|
||||
(if (= 3 (length rhs))
|
||||
(p "%prec ~a" (cadadr rhs)))
|
||||
(p "\n"))))
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(hash-table-put! term-table t (format "'~a'" t)))
|
||||
(syntax-object->datum (e-terminals-def-t t))))
|
||||
eterms)
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(for-each
|
||||
(lambda (t)
|
||||
(p "%token ~a\n" t)
|
||||
(hash-table-put! term-table t (format "~a" t)))
|
||||
(syntax-object->datum (terminals-def-t t))))
|
||||
terms)
|
||||
(if precs
|
||||
(for-each (lambda (prec)
|
||||
(p "%~a " (car prec))
|
||||
(for-each (lambda (tok)
|
||||
(p " ~a" (hash-table-get term-table tok)))
|
||||
(cdr prec))
|
||||
(p "\n"))
|
||||
precs))
|
||||
(p "%start ~a\n" start)
|
||||
(p "%%\n")
|
||||
|
||||
(for-each (lambda (prod)
|
||||
(let ((nt (car prod)))
|
||||
(p "~a: " nt)
|
||||
(display-rhs (cadr prod))
|
||||
(for-each (lambda (rhs)
|
||||
(p "| ")
|
||||
(display-rhs rhs))
|
||||
(cddr prod))
|
||||
(p ";\n")))
|
||||
grammar)
|
||||
(p "%%\n"))))
|
||||
|
||||
|
||||
)
|
||||
|
@ -0,0 +1,135 @@
|
||||
(module yacc-to-scheme mzscheme
|
||||
(require parser-tools/lex
|
||||
(prefix : parser-tools/lex-sre)
|
||||
parser-tools/yacc
|
||||
syntax/readerr
|
||||
mzlib/list)
|
||||
(provide trans)
|
||||
|
||||
(define match-double-string
|
||||
(lexer
|
||||
((:+ (:~ #\" #\\)) (append (string->list lexeme)
|
||||
(match-double-string input-port)))
|
||||
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port)))
|
||||
(#\" null)))
|
||||
|
||||
(define match-single-string
|
||||
(lexer
|
||||
((:+ (:~ #\' #\\)) (append (string->list lexeme)
|
||||
(match-single-string input-port)))
|
||||
((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port)))
|
||||
(#\' null)))
|
||||
|
||||
(define-lex-abbrevs
|
||||
(letter (:or (:/ "a" "z") (:/ "A" "Z")))
|
||||
(digit (:/ "0" "9"))
|
||||
(initial (:or letter (char-set "!$%&*/<=>?^_~@")))
|
||||
(subsequent (:or initial digit (char-set "+-.@")))
|
||||
(comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/")))
|
||||
|
||||
(define-empty-tokens x
|
||||
(EOF PIPE |:| SEMI |%%| %prec))
|
||||
(define-tokens y
|
||||
(SYM STRING))
|
||||
|
||||
(define get-token-grammar
|
||||
(lexer-src-pos
|
||||
("%%" '|%%|)
|
||||
(":" (string->symbol lexeme))
|
||||
("%prec" (string->symbol lexeme))
|
||||
(#\| 'PIPE)
|
||||
((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}")))
|
||||
(return-without-pos (get-token-grammar input-port)))
|
||||
(#\; 'SEMI)
|
||||
(#\' (token-STRING (string->symbol (list->string (match-single-string input-port)))))
|
||||
(#\" (token-STRING (string->symbol (list->string (match-double-string input-port)))))
|
||||
((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme)))))
|
||||
|
||||
(define (parse-grammar enter-term enter-empty-term enter-non-term)
|
||||
(parser
|
||||
(tokens x y)
|
||||
(src-pos)
|
||||
(error (lambda (tok-ok tok-name tok-value start-pos end-pos)
|
||||
(raise-read-error
|
||||
(format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value)
|
||||
(file-path)
|
||||
(position-line start-pos)
|
||||
(position-col start-pos)
|
||||
(position-offset start-pos)
|
||||
(- (position-offset end-pos) (position-offset start-pos)))))
|
||||
|
||||
(end |%%|)
|
||||
(start gram)
|
||||
(grammar
|
||||
(gram
|
||||
((production) (list $1))
|
||||
((production gram) (cons $1 $2)))
|
||||
(production
|
||||
((SYM |:| prods SEMI)
|
||||
(begin
|
||||
(enter-non-term $1)
|
||||
(cons $1 $3))))
|
||||
(prods
|
||||
((rhs) (list `(,$1 #f)))
|
||||
((rhs prec) (list `(,$1 ,$2 #f)))
|
||||
((rhs PIPE prods) (cons `(,$1 #f) $3))
|
||||
((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4)))
|
||||
(prec
|
||||
((%prec SYM)
|
||||
(begin
|
||||
(enter-term $2)
|
||||
(list 'prec $2)))
|
||||
((%prec STRING)
|
||||
(begin
|
||||
(enter-empty-term $2)
|
||||
(list 'prec $2))))
|
||||
(rhs
|
||||
(() null)
|
||||
((SYM rhs)
|
||||
(begin
|
||||
(enter-term $1)
|
||||
(cons $1 $2)))
|
||||
((STRING rhs)
|
||||
(begin
|
||||
(enter-empty-term $1)
|
||||
(cons $1 $2)))))))
|
||||
|
||||
(define (symbol<? a b)
|
||||
(string<? (symbol->string a) (symbol->string b)))
|
||||
|
||||
(define (trans filename)
|
||||
(let* ((i (open-input-file filename))
|
||||
(terms (make-hash-table))
|
||||
(eterms (make-hash-table))
|
||||
(nterms (make-hash-table))
|
||||
(enter-term
|
||||
(lambda (s)
|
||||
(if (not (hash-table-get nterms s (lambda () #f)))
|
||||
(hash-table-put! terms s #t))))
|
||||
(enter-empty-term
|
||||
(lambda (s)
|
||||
(if (not (hash-table-get nterms s (lambda () #f)))
|
||||
(hash-table-put! eterms s #t))))
|
||||
(enter-non-term
|
||||
(lambda (s)
|
||||
(hash-table-remove! terms s)
|
||||
(hash-table-remove! eterms s)
|
||||
(hash-table-put! nterms s #t))))
|
||||
(port-count-lines! i)
|
||||
(file-path filename)
|
||||
(regexp-match "%%" i)
|
||||
(begin0
|
||||
(let ((gram ((parse-grammar enter-term enter-empty-term enter-non-term)
|
||||
(lambda ()
|
||||
(let ((t (get-token-grammar i)))
|
||||
t)))))
|
||||
`(begin
|
||||
(define-tokens t ,(sort (hash-table-map terms (lambda (k v) k)) symbol<?))
|
||||
(define-empty-tokens et ,(sort (hash-table-map eterms (lambda (k v) k)) symbol<?))
|
||||
(parser
|
||||
(start ___)
|
||||
(end ___)
|
||||
(error ___)
|
||||
(tokens t et)
|
||||
(grammar ,@gram))))
|
||||
(close-input-port i)))))
|
@ -0,0 +1,396 @@
|
||||
#lang scheme/base
|
||||
|
||||
(require (for-syntax scheme/base
|
||||
"private-yacc/parser-builder.rkt"
|
||||
"private-yacc/grammar.rkt"
|
||||
"private-yacc/yacc-helper.rkt"
|
||||
"private-yacc/parser-actions.rkt"))
|
||||
(require "private-lex/token.rkt"
|
||||
"private-yacc/parser-actions.rkt"
|
||||
mzlib/etc
|
||||
mzlib/pretty
|
||||
syntax/readerr)
|
||||
|
||||
(provide parser)
|
||||
|
||||
|
||||
;; convert-parse-table : (vectorof (listof (cons/c gram-sym? action?))) ->
|
||||
;; (vectorof (symbol runtime-action hashtable))
|
||||
(define-for-syntax (convert-parse-table table)
|
||||
(list->vector
|
||||
(map
|
||||
(lambda (state-entry)
|
||||
(let ((ht (make-hasheq)))
|
||||
(for-each
|
||||
(lambda (gs/action)
|
||||
(hash-set! ht
|
||||
(gram-sym-symbol (car gs/action))
|
||||
(action->runtime-action (cdr gs/action))))
|
||||
state-entry)
|
||||
ht))
|
||||
(vector->list table))))
|
||||
|
||||
(define-syntax (parser stx)
|
||||
(syntax-case stx ()
|
||||
((_ args ...)
|
||||
(let ((arg-list (syntax->list (syntax (args ...))))
|
||||
(src-pos #f)
|
||||
(debug #f)
|
||||
(error #f)
|
||||
(tokens #f)
|
||||
(start #f)
|
||||
(end #f)
|
||||
(precs #f)
|
||||
(suppress #f)
|
||||
(grammar #f)
|
||||
(yacc-output #f))
|
||||
(for-each
|
||||
(lambda (arg)
|
||||
(syntax-case* arg (debug error tokens start end precs grammar
|
||||
suppress src-pos yacc-output)
|
||||
(lambda (a b)
|
||||
(eq? (syntax-e a) (syntax-e b)))
|
||||
((debug filename)
|
||||
(cond
|
||||
((not (string? (syntax-e (syntax filename))))
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"Debugging filename must be a string"
|
||||
stx
|
||||
(syntax filename)))
|
||||
(debug
|
||||
(raise-syntax-error #f "Multiple debug declarations" stx))
|
||||
(else
|
||||
(set! debug (syntax-e (syntax filename))))))
|
||||
((suppress)
|
||||
(set! suppress #t))
|
||||
((src-pos)
|
||||
(set! src-pos #t))
|
||||
((error expression)
|
||||
(if error
|
||||
(raise-syntax-error #f "Multiple error declarations" stx)
|
||||
(set! error (syntax expression))))
|
||||
((tokens def ...)
|
||||
(begin
|
||||
(when tokens
|
||||
(raise-syntax-error #f "Multiple tokens declarations" stx))
|
||||
(let ((defs (syntax->list (syntax (def ...)))))
|
||||
(for-each
|
||||
(lambda (d)
|
||||
(unless (identifier? d)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"Token-group name must be an identifier"
|
||||
stx
|
||||
d)))
|
||||
defs)
|
||||
(set! tokens defs))))
|
||||
((start symbol ...)
|
||||
(let ((symbols (syntax->list (syntax (symbol ...)))))
|
||||
(for-each
|
||||
(lambda (sym)
|
||||
(unless (identifier? sym)
|
||||
(raise-syntax-error #f
|
||||
"Start symbol must be a symbol"
|
||||
stx
|
||||
sym)))
|
||||
symbols)
|
||||
(when start
|
||||
(raise-syntax-error #f "Multiple start declarations" stx))
|
||||
(when (null? symbols)
|
||||
(raise-syntax-error #f
|
||||
"Missing start symbol"
|
||||
stx
|
||||
arg))
|
||||
(set! start symbols)))
|
||||
((end symbols ...)
|
||||
(let ((symbols (syntax->list (syntax (symbols ...)))))
|
||||
(for-each
|
||||
(lambda (sym)
|
||||
(unless (identifier? sym)
|
||||
(raise-syntax-error #f
|
||||
"End token must be a symbol"
|
||||
stx
|
||||
sym)))
|
||||
symbols)
|
||||
(let ((d (duplicate-list? (map syntax-e symbols))))
|
||||
(when d
|
||||
(raise-syntax-error
|
||||
#f
|
||||
(format "Duplicate end token definition for ~a" d)
|
||||
stx
|
||||
arg))
|
||||
(when (null? symbols)
|
||||
(raise-syntax-error
|
||||
#f
|
||||
"end declaration must contain at least 1 token"
|
||||
stx
|
||||
arg))
|
||||
(when end
|
||||
(raise-syntax-error #f "Multiple end declarations" stx))
|
||||
(set! end symbols))))
|
||||
((precs decls ...)
|
||||
(if precs
|
||||
(raise-syntax-error #f "Multiple precs declarations" stx)
|
||||
(set! precs (syntax/loc arg (decls ...)))))
|
||||
((grammar prods ...)
|
||||
(if grammar
|
||||
(raise-syntax-error #f "Multiple grammar declarations" stx)
|
||||
(set! grammar (syntax/loc arg (prods ...)))))
|
||||
((yacc-output filename)
|
||||
(cond
|
||||
((not (string? (syntax-e (syntax filename))))
|
||||
(raise-syntax-error #f
|
||||
"Yacc-output filename must be a string"
|
||||
stx
|
||||
(syntax filename)))
|
||||
(yacc-output
|
||||
(raise-syntax-error #f "Multiple yacc-output declarations" stx))
|
||||
(else
|
||||
(set! yacc-output (syntax-e (syntax filename))))))
|
||||
(_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg))))
|
||||
(syntax->list (syntax (args ...))))
|
||||
(unless tokens
|
||||
(raise-syntax-error #f "missing tokens declaration" stx))
|
||||
(unless error
|
||||
(raise-syntax-error #f "missing error declaration" stx))
|
||||
(unless grammar
|
||||
(raise-syntax-error #f "missing grammar declaration" stx))
|
||||
(unless end
|
||||
(raise-syntax-error #f "missing end declaration" stx))
|
||||
(unless start
|
||||
(raise-syntax-error #f "missing start declaration" stx))
|
||||
(let-values (((table all-term-syms actions check-syntax-fix)
|
||||
(build-parser (if debug debug "")
|
||||
src-pos
|
||||
suppress
|
||||
tokens
|
||||
start
|
||||
end
|
||||
precs
|
||||
grammar)))
|
||||
(when (and yacc-output (not (string=? yacc-output "")))
|
||||
(with-handlers [(exn:fail:filesystem?
|
||||
(lambda (e)
|
||||
(eprintf
|
||||
"Cannot write yacc-output to file \"~a\"\n"
|
||||
yacc-output)))]
|
||||
(call-with-output-file yacc-output
|
||||
(lambda (port)
|
||||
(display-yacc (syntax->datum grammar)
|
||||
tokens
|
||||
(map syntax->datum start)
|
||||
(if precs
|
||||
(syntax->datum precs)
|
||||
#f)
|
||||
port))
|
||||
#:exists 'truncate)))
|
||||
(with-syntax ((check-syntax-fix check-syntax-fix)
|
||||
(err error)
|
||||
(ends end)
|
||||
(starts start)
|
||||
(debug debug)
|
||||
(table (convert-parse-table table))
|
||||
(all-term-syms all-term-syms)
|
||||
(actions actions)
|
||||
(src-pos src-pos))
|
||||
(syntax
|
||||
(begin
|
||||
check-syntax-fix
|
||||
(parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos)))))))
|
||||
(_
|
||||
(raise-syntax-error #f
|
||||
"parser must have the form (parser args ...)"
|
||||
stx))))
|
||||
|
||||
(define (reduce-stack stack num ret-vals src-pos)
|
||||
(cond
|
||||
((> num 0)
|
||||
(let* ((top-frame (car stack))
|
||||
(ret-vals
|
||||
(if src-pos
|
||||
(cons (stack-frame-value top-frame)
|
||||
(cons (stack-frame-start-pos top-frame)
|
||||
(cons (stack-frame-end-pos top-frame)
|
||||
ret-vals)))
|
||||
(cons (stack-frame-value top-frame) ret-vals))))
|
||||
(reduce-stack (cdr stack) (sub1 num) ret-vals src-pos)))
|
||||
(else (values stack ret-vals))))
|
||||
|
||||
;; extract-helper : (symbol or make-token) any any -> symbol any any any
|
||||
(define (extract-helper tok v1 v2)
|
||||
(cond
|
||||
((symbol? tok)
|
||||
(values tok #f v1 v2))
|
||||
((token? tok)
|
||||
(values (real-token-name tok) (real-token-value tok) v1 v2))
|
||||
(else (raise-argument-error 'parser
|
||||
"(or/c symbol? token?)"
|
||||
0
|
||||
tok))))
|
||||
|
||||
;; well-formed-position-token?: any -> boolean
|
||||
;; Returns true if pt is a position token whose position-token-token
|
||||
;; is itself a token or a symbol.
|
||||
;; This is meant to help raise more precise error messages when
|
||||
;; a tokenizer produces an erroneous position-token wrapped twice.
|
||||
;; (as often happens when omitting return-without-pos).
|
||||
(define (well-formed-position-token? pt)
|
||||
(and (position-token? pt)
|
||||
(let ([t (position-token-token pt)])
|
||||
(or (symbol? t)
|
||||
(token? t)))))
|
||||
|
||||
;; extract-src-pos : position-token -> symbol any any any
|
||||
(define (extract-src-pos ip)
|
||||
(cond
|
||||
((well-formed-position-token? ip)
|
||||
(extract-helper (position-token-token ip)
|
||||
(position-token-start-pos ip)
|
||||
(position-token-end-pos ip)))
|
||||
(else
|
||||
(raise-argument-error 'parser
|
||||
"well-formed-position-token?"
|
||||
0
|
||||
ip))))
|
||||
|
||||
;; extract-no-src-pos : (symbol or make-token) -> symbol any any any
|
||||
(define (extract-no-src-pos ip)
|
||||
(extract-helper ip #f #f))
|
||||
|
||||
(define-struct stack-frame (state value start-pos end-pos) #:inspector (make-inspector))
|
||||
|
||||
(define (make-empty-stack i) (list (make-stack-frame i #f #f #f)))
|
||||
|
||||
|
||||
;; The table is a vector that maps each state to a hash-table that maps a
|
||||
;; terminal symbol to either an accept, shift, reduce, or goto structure.
|
||||
; We encode the structures according to the runtime-action data definition in
|
||||
;; parser-actions.rkt
|
||||
(define (parser-body debug? err starts ends table all-term-syms actions src-pos)
|
||||
(local ((define extract
|
||||
(if src-pos
|
||||
extract-src-pos
|
||||
extract-no-src-pos))
|
||||
|
||||
(define (fix-error stack tok val start-pos end-pos get-token)
|
||||
(when debug? (pretty-print stack))
|
||||
(local ((define (remove-input tok val start-pos end-pos)
|
||||
(if (memq tok ends)
|
||||
(raise-read-error "parser: Cannot continue after error"
|
||||
#f #f #f #f #f)
|
||||
(let ((a (find-action stack tok val start-pos end-pos)))
|
||||
(cond
|
||||
((runtime-shift? a)
|
||||
;; (printf "shift:~a\n" (runtime-shift-state a))
|
||||
(cons (make-stack-frame (runtime-shift-state a)
|
||||
val
|
||||
start-pos
|
||||
end-pos)
|
||||
stack))
|
||||
(else
|
||||
;; (printf "discard input:~a\n" tok)
|
||||
(let-values (((tok val start-pos end-pos)
|
||||
(extract (get-token))))
|
||||
(remove-input tok val start-pos end-pos))))))))
|
||||
(let remove-states ()
|
||||
(let ((a (find-action stack 'error #f start-pos end-pos)))
|
||||
(cond
|
||||
((runtime-shift? a)
|
||||
;; (printf "shift:~a\n" (runtime-shift-state a))
|
||||
(set! stack
|
||||
(cons
|
||||
(make-stack-frame (runtime-shift-state a)
|
||||
#f
|
||||
start-pos
|
||||
end-pos)
|
||||
stack))
|
||||
(remove-input tok val start-pos end-pos))
|
||||
(else
|
||||
;; (printf "discard state:~a\n" (car stack))
|
||||
(cond
|
||||
((< (length stack) 2)
|
||||
(raise-read-error "parser: Cannot continue after error"
|
||||
#f #f #f #f #f))
|
||||
(else
|
||||
(set! stack (cdr stack))
|
||||
(remove-states)))))))))
|
||||
|
||||
(define (find-action stack tok val start-pos end-pos)
|
||||
(unless (hash-ref all-term-syms
|
||||
tok
|
||||
#f)
|
||||
(if src-pos
|
||||
(err #f tok val start-pos end-pos)
|
||||
(err #f tok val))
|
||||
(raise-read-error (format "parser: got token of unknown type ~a" tok)
|
||||
#f #f #f #f #f))
|
||||
(hash-ref (vector-ref table (stack-frame-state (car stack)))
|
||||
tok
|
||||
#f))
|
||||
|
||||
(define (make-parser start-number)
|
||||
(lambda (get-token)
|
||||
(unless (and (procedure? get-token)
|
||||
(procedure-arity-includes? get-token 0))
|
||||
(error 'get-token "expected a nullary procedure, got ~e" get-token))
|
||||
(let parsing-loop ((stack (make-empty-stack start-number))
|
||||
(ip (get-token)))
|
||||
(let-values (((tok val start-pos end-pos)
|
||||
(extract ip)))
|
||||
(let ((action (find-action stack tok val start-pos end-pos)))
|
||||
(cond
|
||||
((runtime-shift? action)
|
||||
;; (printf "shift:~a\n" (runtime-shift-state action))
|
||||
(parsing-loop (cons (make-stack-frame (runtime-shift-state action)
|
||||
val
|
||||
start-pos
|
||||
end-pos)
|
||||
stack)
|
||||
(get-token)))
|
||||
((runtime-reduce? action)
|
||||
;; (printf "reduce:~a\n" (runtime-reduce-prod-num action))
|
||||
(let-values (((new-stack args)
|
||||
(reduce-stack stack
|
||||
(runtime-reduce-rhs-length action)
|
||||
null
|
||||
src-pos)))
|
||||
(let ((goto
|
||||
(runtime-goto-state
|
||||
(hash-ref
|
||||
(vector-ref table (stack-frame-state (car new-stack)))
|
||||
(runtime-reduce-lhs action)))))
|
||||
(parsing-loop
|
||||
(cons
|
||||
(if src-pos
|
||||
(make-stack-frame
|
||||
goto
|
||||
(apply (vector-ref actions (runtime-reduce-prod-num action)) args)
|
||||
(if (null? args) start-pos (cadr args))
|
||||
(if (null? args)
|
||||
end-pos
|
||||
(list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1))))
|
||||
(make-stack-frame
|
||||
goto
|
||||
(apply (vector-ref actions (runtime-reduce-prod-num action)) args)
|
||||
#f
|
||||
#f))
|
||||
new-stack)
|
||||
ip))))
|
||||
((runtime-accept? action)
|
||||
;; (printf "accept\n")
|
||||
(stack-frame-value (car stack)))
|
||||
(else
|
||||
(if src-pos
|
||||
(err #t tok val start-pos end-pos)
|
||||
(err #t tok val))
|
||||
(parsing-loop (fix-error stack tok val start-pos end-pos get-token)
|
||||
(get-token))))))))))
|
||||
(cond
|
||||
((null? (cdr starts)) (make-parser 0))
|
||||
(else
|
||||
(let loop ((l starts)
|
||||
(i 0))
|
||||
(cond
|
||||
((null? l) null)
|
||||
(else (cons (make-parser i) (loop (cdr l) (add1 i))))))))))
|
@ -0,0 +1,12 @@
|
||||
#lang info
|
||||
|
||||
(define collection 'multi)
|
||||
|
||||
(define deps '("parser-tools-lib"
|
||||
"parser-tools-doc"))
|
||||
(define implies '("parser-tools-lib"
|
||||
"parser-tools-doc"))
|
||||
|
||||
(define pkg-desc "Lex- and Yacc-style parsing tools")
|
||||
|
||||
(define pkg-authors '(mflatt))
|
Loading…
Reference in New Issue