You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
333 lines
14 KiB
Racket
333 lines
14 KiB
Racket
#lang racket/base
|
|
(require (prefix-in is: data/integer-set)
|
|
yaragg/parser-tools/private-lex/re
|
|
yaragg/parser-tools/private-lex/util)
|
|
|
|
(provide build-dfa print-dfa (struct-out dfa))
|
|
|
|
(define e (build-epsilon))
|
|
(define z (build-zero))
|
|
|
|
|
|
;; Don't do anything with this one but extract the chars
|
|
(define all-chars (->re `(char-complement (union)) (make-cache)))
|
|
|
|
;; get-char-groups : re bool -> (list-of char-setR?)
|
|
;; Collects the char-setRs in r that could be used in
|
|
;; taking the derivative of r.
|
|
(define (get-char-groups r found-negation)
|
|
(cond
|
|
[(or (eq? r e) (eq? r z)) null]
|
|
[(char-setR? r) (list r)]
|
|
[(concatR? r)
|
|
(if (re-nullable? (concatR-re1 r))
|
|
(append (get-char-groups (concatR-re1 r) found-negation)
|
|
(get-char-groups (concatR-re2 r) found-negation))
|
|
(get-char-groups (concatR-re1 r) found-negation))]
|
|
[(repeatR? r)
|
|
(get-char-groups (repeatR-re r) found-negation)]
|
|
[(orR? r)
|
|
(apply append (map (λ (x) (get-char-groups x found-negation)) (orR-res r)))]
|
|
[(andR? r)
|
|
(apply append (map (λ (x) (get-char-groups x found-negation)) (andR-res r)))]
|
|
[(negR? r)
|
|
(if found-negation
|
|
(get-char-groups (negR-re r) #t)
|
|
(cons all-chars (get-char-groups (negR-re r) #t)))]))
|
|
|
|
(test-block ((c (make-cache))
|
|
(r1 (->re #\1 c))
|
|
(r2 (->re #\2 c)))
|
|
((get-char-groups e #f) null)
|
|
((get-char-groups z #f) null)
|
|
((get-char-groups r1 #f) (list r1))
|
|
((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f)
|
|
(list r1))
|
|
((get-char-groups (->re `(concatenation ,e ,r2) c) #f)
|
|
(list r2))
|
|
((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f)
|
|
(list r1 r2))
|
|
((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f)
|
|
(list r1))
|
|
((get-char-groups
|
|
(->re `(union (repetition 0 +inf.0 ,r1)
|
|
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
|
(list r1 r2 (->re "3" c) (->re "4" c)))
|
|
((get-char-groups (->re `(complement ,r1) c) #f)
|
|
(list all-chars r1))
|
|
((get-char-groups
|
|
(->re `(intersection (repetition 0 +inf.0 ,r1)
|
|
(concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f)
|
|
(list r1 r2 (->re "3" c) (->re "4" c)))
|
|
)
|
|
(define loc:member? is:member?)
|
|
|
|
;; deriveR : re char cache -> re
|
|
(define (deriveR r c cache)
|
|
(cond
|
|
[(or (eq? r e) (eq? r z)) z]
|
|
[(char-setR? r)
|
|
(if (loc:member? c (char-setR-chars r)) e z)]
|
|
[(concatR? r)
|
|
(define r1 (concatR-re1 r))
|
|
(define r2 (concatR-re2 r))
|
|
(define d (build-concat (deriveR r1 c cache) r2 cache))
|
|
(if (re-nullable? r1)
|
|
(build-or (list d (deriveR r2 c cache)) cache)
|
|
d)]
|
|
[(repeatR? r)
|
|
(build-concat (deriveR (repeatR-re r) c cache)
|
|
(build-repeat (sub1 (repeatR-low r))
|
|
(sub1 (repeatR-high r))
|
|
(repeatR-re r) cache)
|
|
cache)]
|
|
[(orR? r)
|
|
(build-or (map (λ (x) (deriveR x c cache))
|
|
(orR-res r))
|
|
cache)]
|
|
[(andR? r)
|
|
(build-and (map (λ (x) (deriveR x c cache))
|
|
(andR-res r))
|
|
cache)]
|
|
[(negR? r)
|
|
(build-neg (deriveR (negR-re r) c cache) cache)]))
|
|
|
|
(test-block ((c (make-cache))
|
|
(a (char->integer #\a))
|
|
(b (char->integer #\b))
|
|
(r1 (->re #\a c))
|
|
(r2 (->re `(repetition 0 +inf.0 #\a) c))
|
|
(r3 (->re `(repetition 0 +inf.0 ,r2) c))
|
|
(r4 (->re `(concatenation #\a ,r2) c))
|
|
(r5 (->re `(repetition 0 +inf.0 ,r4) c))
|
|
(r6 (->re `(union ,r5 #\a) c))
|
|
(r7 (->re `(concatenation ,r2 ,r2) c))
|
|
(r8 (->re `(complement ,r4) c))
|
|
(r9 (->re `(intersection ,r2 ,r4) c)))
|
|
((deriveR e a c) z)
|
|
((deriveR z a c) z)
|
|
((deriveR r1 b c) z)
|
|
((deriveR r1 a c) e)
|
|
((deriveR r2 a c) r2)
|
|
((deriveR r2 b c) z)
|
|
((deriveR r3 a c) r2)
|
|
((deriveR r3 b c) z)
|
|
((deriveR r4 a c) r2)
|
|
((deriveR r4 b c) z)
|
|
((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c))
|
|
((deriveR r5 b c) z)
|
|
((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c))
|
|
((deriveR r6 b c) z)
|
|
((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c))
|
|
((deriveR r7 b c) z)
|
|
((deriveR r8 a c) (->re `(complement, r2) c))
|
|
((deriveR r8 b c) (->re `(complement ,z) c))
|
|
((deriveR r9 a c) r2)
|
|
((deriveR r9 b c) z)
|
|
((deriveR (->re `(repetition 1 2 "ab") c) a c)
|
|
(->re `(concatenation "b" (repetition 0 1 "ab")) c)))
|
|
|
|
;; An re-action is (cons re action)
|
|
|
|
;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f)
|
|
;; applies deriveR to all the re-actions's re parts.
|
|
;; Returns #f if the derived state is equivalent to z.
|
|
(define (derive r c cache)
|
|
(define new-r (for/list ([ra (in-list r)])
|
|
(cons (deriveR (car ra) c cache) (cdr ra))))
|
|
(if (andmap (λ (x) (eq? z (car x))) new-r)
|
|
#f
|
|
new-r))
|
|
|
|
(test-block ((c (make-cache))
|
|
(r1 (->re #\1 c))
|
|
(r2 (->re #\2 c)))
|
|
((derive null (char->integer #\1) c) #f)
|
|
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c)
|
|
(list (cons e 1) (cons z 2)))
|
|
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f))
|
|
|
|
|
|
;; get-final : (list-of re-action) -> (union #f syntax-object)
|
|
;; An re that accepts e represents a final state. Return the
|
|
;; action from the first final state or #f if there is none.
|
|
(define (get-final res)
|
|
(cond
|
|
[(null? res) #f]
|
|
[(re-nullable? (caar res)) (cdar res)]
|
|
[else (get-final (cdr res))]))
|
|
|
|
(test-block ((c->i char->integer)
|
|
(c (make-cache))
|
|
(r1 (->re #\a c))
|
|
(r2 (->re #\b c))
|
|
(b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5)))
|
|
(a (list (cons r1 1) (cons r2 2))))
|
|
((derive null (c->i #\a) c) #f)
|
|
((derive a (c->i #\a) c) (list (cons e 1) (cons z 2)))
|
|
((derive a (c->i #\b) c) (list (cons z 1) (cons e 2)))
|
|
((derive a (c->i #\c) c) #f)
|
|
((derive (list (cons (->re `(union " " "\n" ",") c) 1)
|
|
(cons (->re `(concatenation (repetition 0 1 "-")
|
|
(repetition 1 +inf.0 (char-range "0" "9"))) c) 2)
|
|
(cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3)
|
|
(cons (->re "[" c) 4)
|
|
(cons (->re "]" c) 5)) (c->i #\[) c)
|
|
b)
|
|
((get-final a) #f)
|
|
((get-final (list (cons e 1) (cons e 2))) 1)
|
|
((get-final b) 4))
|
|
|
|
|
|
;; A state is (make-state (list-of re-action) nat)
|
|
(define-struct state (spec index))
|
|
|
|
;; get->key : re-action -> (list-of nat)
|
|
;; states are indexed by the list of indexes of their res
|
|
(define (get-key s)
|
|
(map (λ (x) (re-index (car x))) s))
|
|
|
|
(define loc:partition is:partition)
|
|
|
|
;; compute-chars : (list-of state) -> (list-of char-set)
|
|
;; Computed the sets of equivalent characters for taking the
|
|
;; derivative of the car of st. Only one derivative per set need to be taken.
|
|
(define (compute-chars st)
|
|
(cond
|
|
[(null? st) null]
|
|
[else
|
|
(loc:partition (map char-setR-chars
|
|
(apply append (map (λ (x) (get-char-groups (car x) #f))
|
|
(state-spec (car st))))))]))
|
|
|
|
(test-block ((c (make-cache))
|
|
(c->i char->integer)
|
|
(r1 (->re `(char-range #\1 #\4) c))
|
|
(r2 (->re `(char-range #\2 #\3) c)))
|
|
((compute-chars null) null)
|
|
((compute-chars (list (make-state null 1))) null)
|
|
((map is:integer-set-contents
|
|
(compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2))))
|
|
(list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3)))
|
|
(is:integer-set-contents (is:union (is:make-range (c->i #\1))
|
|
(is:make-range (c->i #\4)))))))
|
|
|
|
|
|
;; A dfa is (make-dfa int int
|
|
;; (list-of (cons int syntax-object))
|
|
;; (list-of (cons int (list-of (cons char-set int)))))
|
|
;; Each transitions is a state and a list of chars with the state to transition to.
|
|
;; The finals and transitions are sorted by state number, and duplicate free.
|
|
(define-struct dfa (num-states start-state final-states/actions transitions) #:inspector (make-inspector))
|
|
|
|
(define loc:get-integer is:get-integer)
|
|
|
|
;; build-dfa : (list-of re-action) cache -> dfa
|
|
(define (build-dfa rs cache)
|
|
(let* ([transitions (make-hash)]
|
|
[get-state-number (make-counter)]
|
|
[start (make-state rs (get-state-number))])
|
|
(cache (cons 'state (get-key rs)) (λ () start))
|
|
(let loop ([old-states (list start)]
|
|
[new-states null]
|
|
[all-states (list start)]
|
|
[cs (compute-chars (list start))])
|
|
(cond
|
|
[(and (null? old-states) (null? new-states))
|
|
(make-dfa (get-state-number) (state-index start)
|
|
(sort (for*/list ([state (in-list all-states)]
|
|
[val (in-value (cons (state-index state) (get-final (state-spec state))))]
|
|
#:when (cdr val))
|
|
val)
|
|
< #:key car)
|
|
(sort (hash-map transitions
|
|
(λ (state trans)
|
|
(cons (state-index state)
|
|
(for/list ([t (in-list trans)])
|
|
(cons (car t)
|
|
(state-index (cdr t)))))))
|
|
< #:key car))]
|
|
[(null? old-states)
|
|
(loop new-states null all-states (compute-chars new-states))]
|
|
[(null? cs)
|
|
(loop (cdr old-states) new-states all-states (compute-chars (cdr old-states)))]
|
|
[else
|
|
(define state (car old-states))
|
|
(define c (car cs))
|
|
(define new-re (derive (state-spec state) (loc:get-integer c) cache))
|
|
(cond
|
|
[new-re
|
|
(let* ([new-state? #f]
|
|
[new-state (cache (cons 'state (get-key new-re))
|
|
(λ ()
|
|
(set! new-state? #t)
|
|
(make-state new-re (get-state-number))))]
|
|
[new-all-states (if new-state? (cons new-state all-states) all-states)])
|
|
(hash-set! transitions
|
|
state
|
|
(cons (cons c new-state)
|
|
(hash-ref transitions state
|
|
(λ () null))))
|
|
(cond
|
|
[new-state?
|
|
(loop old-states (cons new-state new-states) new-all-states (cdr cs))]
|
|
[else
|
|
(loop old-states new-states new-all-states (cdr cs))]))]
|
|
[else (loop old-states new-states all-states (cdr cs))])]))))
|
|
|
|
(define (print-dfa x)
|
|
(printf "number of states: ~a\n" (dfa-num-states x))
|
|
(printf "start state: ~a\n" (dfa-start-state x))
|
|
(printf "final states: ~a\n" (map car (dfa-final-states/actions x)))
|
|
(for-each (λ (trans)
|
|
(printf "state: ~a\n" (car trans))
|
|
(for-each (λ (rule)
|
|
(printf " -~a-> ~a\n"
|
|
(is:integer-set-contents (car rule))
|
|
(cdr rule)))
|
|
(cdr trans)))
|
|
(dfa-transitions x)))
|
|
|
|
(define (build-test-dfa rs)
|
|
(define c (make-cache))
|
|
(build-dfa (map (λ (x) (cons (->re x c) 'action)) rs) c))
|
|
|
|
|
|
#|
|
|
(define t1 (build-test-dfa null))
|
|
(define t2 (build-test-dfa `(#\a)))
|
|
(define t3 (build-test-dfa `(#\a #\b)))
|
|
(define t4 (build-test-dfa `((repetition 0 +inf.0 #\a)
|
|
(repetition 0 +inf.0 (concatenation #\a #\b)))))
|
|
(define t5 (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\0 #\1)) #\1))))
|
|
(define t6 (build-test-dfa `((repetition 0 +inf.0 (repetition 0 +inf.0 #\a))
|
|
(repetition 0 +inf.0 (concatenation #\b (repetition 1 +inf.0 #\b))))))
|
|
(define t7 (build-test-dfa `((concatenation (repetition 0 +inf.0 #\a) (repetition 0 +inf.0 #\b)
|
|
(repetition 0 +inf.0 #\c) (repetition 0 +inf.0 #\d)
|
|
(repetition 0 +inf.0 #\e)))))
|
|
(define t8
|
|
(build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b)
|
|
(union #\a #\b) (union #\a #\b) (union #\a #\b)))))
|
|
(define t9 (build-test-dfa `((concatenation "/*"
|
|
(complement (concatenation (intersection) "*/" (intersection)))
|
|
"*/"))))
|
|
(define t11 (build-test-dfa `((complement "1"))))
|
|
(define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b")
|
|
(concatenation "a" (repetition 0 +inf.0 "b")))
|
|
"ab"))))
|
|
(define x (build-test-dfa `((union " " "\n" ",")
|
|
(concatenation (repetition 0 1 "-") (repetition 1 +inf.0 (char-range "0" "9")))
|
|
(concatenation "-" (repetition 1 +inf.0 "-"))
|
|
"["
|
|
"]")))
|
|
(define y (build-test-dfa
|
|
`((repetition 1 +inf.0
|
|
(union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|")
|
|
(concatenation "|" (repetition 0 +inf.0 (char-complement "|"))))))))
|
|
(define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection))
|
|
(complement (union (concatenation (intersection) "01")
|
|
(repetition 1 +inf.0 "1")))))))
|
|
(define t14 (build-test-dfa `((complement "1")))))
|
|
|
|
|#
|