*** empty log message ***

original commit: e3f6144096066ad767c54dd1b427ee0b171b4913
tokens
Scott Owens 21 years ago
parent 7e603958b5
commit e6af00242c

@ -9,38 +9,52 @@
(define e (build-epsilon)) (define e (build-epsilon))
(define z (build-zero)) (define z (build-zero))
;; get-char-groups : re -> (list-of char-setR?)
;; Don't do anything with this one but extract the chars
(define all-chars (->re `(^) (make-cache)))
;; get-char-groups : re bool -> (list-of char-setR?)
;; Collects the char-setRs in r that could be used in ;; Collects the char-setRs in r that could be used in
;; taking the derivative of r. ;; taking the derivative of r.
(define (get-char-groups r) (define (get-char-groups r found-negation)
(cond (cond
((or (eq? r e) (eq? r z)) null) ((or (eq? r e) (eq? r z)) null)
((char-setR? r) (list r)) ((char-setR? r) (list r))
((concatR? r) ((concatR? r)
(if (re-nullable? (concatR-re1 r)) (if (re-nullable? (concatR-re1 r))
(append (get-char-groups (concatR-re1 r)) (append (get-char-groups (concatR-re1 r) found-negation)
(get-char-groups (concatR-re2 r))) (get-char-groups (concatR-re2 r) found-negation))
(get-char-groups (concatR-re1 r)))) (get-char-groups (concatR-re1 r) found-negation)))
((repeatR? r) ((repeatR? r)
(get-char-groups (repeatR-re r))) (get-char-groups (repeatR-re r) found-negation))
((orR? r) ((orR? r)
(apply append (map get-char-groups (orR-res r)))))) (apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r))))
((andR? r)
(apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r))))
((negR? r)
(if found-negation
(get-char-groups (negR-re r) #t)
(cons all-chars (get-char-groups (negR-re r) #t))))))
(test-block ((c (make-cache)) (test-block ((c (make-cache))
(r1 (->re #\1 c)) (r1 (->re #\1 c))
(r2 (->re #\2 c))) (r2 (->re #\2 c)))
((get-char-groups e) null) ((get-char-groups e #f) null)
((get-char-groups z) null) ((get-char-groups z #f) null)
((get-char-groups r1) (list r1)) ((get-char-groups r1 #f) (list r1))
((get-char-groups (->re `(@ ,r1 ,r2) c)) ((get-char-groups (->re `(@ ,r1 ,r2) c) #f)
(list r1)) (list r1))
((get-char-groups (->re `(@ ,e ,r2) c)) ((get-char-groups (->re `(@ ,e ,r2) c) #f)
(list r2)) (list r2))
((get-char-groups (->re `(@ (* ,r1) ,r2) c)) ((get-char-groups (->re `(@ (* ,r1) ,r2) c) #f)
(list r1 r2)) (list r1 r2))
((get-char-groups (->re `(* ,r1) c)) ((get-char-groups (->re `(* ,r1) c) #f)
(list r1)) (list r1))
((get-char-groups (->re `(: (* ,r1) (@ (* ,r2) "3") "4") c)) ((get-char-groups (->re `(: (* ,r1) (@ (* ,r2) "3") "4") c) #f)
(list r1 r2 (->re "3" c) (->re "4" c)))
((get-char-groups (->re `(~ ,r1) c) #f)
(list all-chars r1))
((get-char-groups (->re `(& (* ,r1) (@ (* ,r2) "3") "4") c) #f)
(list r1 r2 (->re "3" c) (->re "4" c))) (list r1 r2 (->re "3" c) (->re "4" c)))
) )
@ -130,7 +144,9 @@
(r4 (->re `(@ #\a ,r2) c)) (r4 (->re `(@ #\a ,r2) c))
(r5 (->re `(* ,r4) c)) (r5 (->re `(* ,r4) c))
(r6 (->re `(: ,r5 #\a) c)) (r6 (->re `(: ,r5 #\a) c))
(r7 (->re `(@ ,r2 ,r2) c))) (r7 (->re `(@ ,r2 ,r2) c))
(r8 (->re `(~ ,r4) c))
(r9 (->re `(& ,r2 ,r3) c)))
((deriveR e #\a c) z) ((deriveR e #\a c) z)
((deriveR z #\a c) z) ((deriveR z #\a c) z)
((deriveR r1 #\b c) z) ((deriveR r1 #\b c) z)
@ -146,8 +162,11 @@
((deriveR r6 #\a c) (->re `(: (@ ,r2 ,r5) (epsilon)) c)) ((deriveR r6 #\a c) (->re `(: (@ ,r2 ,r5) (epsilon)) c))
((deriveR r6 #\b c) z) ((deriveR r6 #\b c) z)
((deriveR r7 #\a c) (->re `(: (@ ,r2 ,r2) ,r2) c)) ((deriveR r7 #\a c) (->re `(: (@ ,r2 ,r2) ,r2) c))
((deriveR r7 #\b c) z)) ((deriveR r7 #\b c) z)
((deriveR r8 #\a c) (->re `(~, r2) c))
((deriveR r8 #\b c) (->re `(~ ,z) c))
((deriveR r9 #\a c) (->re `(& ,r2 (@ ,r2 ,r3)) c))
((deriveR r9 #\b c) z))
;; An re-action is (cons re action) ;; An re-action is (cons re action)
@ -217,7 +236,7 @@
((null? st) null) ((null? st) null)
(else (else
(partition (map char-setR-chars (partition (map char-setR-chars
(apply append (map (lambda (x) (get-char-groups (car x))) (apply append (map (lambda (x) (get-char-groups (car x) #f))
(state-spec (car st))))))))) (state-spec (car st)))))))))
(test-block ((c (make-cache)) (test-block ((c (make-cache))
@ -322,6 +341,13 @@
(define t7 (build-test-dfa `((@ (* #\a) (* #\b) (* #\c) (* #\d) (* #\e))))) (define t7 (build-test-dfa `((@ (* #\a) (* #\b) (* #\c) (* #\d) (* #\e)))))
(define t8 (define t8
(build-test-dfa `((@ (* (: #\a #\b)) #\a (: #\a #\b) (: #\a #\b) (: #\a #\b) (: #\a #\b))))) (build-test-dfa `((@ (* (: #\a #\b)) #\a (: #\a #\b) (: #\a #\b) (: #\a #\b) (: #\a #\b)))))
(define t9 (build-test-dfa `((@ "/*"
(~ (@ (&) "*/" (&)))
"*/"))))
(define t11 (build-test-dfa `((~ "1"))))
(define t12 (build-test-dfa `((@ (& (@ (* "a") "b")
(@ "a" (* "b")))
"ab"))))
(define x (build-test-dfa `((: " " "\n" ",") (define x (build-test-dfa `((: " " "\n" ",")
(@ (? "-") (+ (- "0" "9"))) (@ (? "-") (+ (- "0" "9")))
(@ "-" (+ "-")) (@ "-" (+ "-"))
@ -329,5 +355,8 @@
"]"))) "]")))
(define y (build-test-dfa `((+ (: (@ "|" (* (^ "|")) "|") (define y (build-test-dfa `((+ (: (@ "|" (* (^ "|")) "|")
(@ "|" (* (^ "|")))))))) (@ "|" (* (^ "|"))))))))
(define t13 (build-test-dfa `((& (@ (&) "111" (&))
(~ (: (@ (&) "01")
(+ "1")))))))
|# |#
) )

@ -32,8 +32,8 @@
(define-struct (concatR re) (re1 re2) (make-inspector)) (define-struct (concatR re) (re1 re2) (make-inspector))
(define-struct (repeatR re) (re)) (define-struct (repeatR re) (re))
(define-struct (orR re) (res) (make-inspector)) (define-struct (orR re) (res) (make-inspector))
(define-struct (andR re) (res)) (define-struct (andR re) (res) (make-inspector))
(define-struct (negR re) (re)) (define-struct (negR re) (re) (make-inspector))
;; e : re ;; e : re
;; The unique epsilon re ;; The unique epsilon re
@ -52,12 +52,14 @@
;; | (+ s-re) match 1 or more ;; | (+ s-re) match 1 or more
;; | (? s-re) match 0 or 1 ;; | (? s-re) match 0 or 1
;; | (: s-re ...) match one of the sub-expressions ;; | (: s-re ...) match one of the sub-expressions
;; | (& s-re ...) match iff all sub-expressions match
;; | (~ s-re) match iff s-re doesn't match
;; | (@ s-re ...) match each sub-expression in succession ;; | (@ s-re ...) match each sub-expression in succession
;; | (- char char) match any character between two (inclusive) ;; | (- char char) match any character between two (inclusive)
;; | (^ char_or_range ...1) match any character not listed ;; | (^ char_or_range ...) match any character not listed
;; (the null concatenation `(@) means epsilon as does "". ;; (The null concatenation `(@) means epsilon as does "".
;; the null or `(:) means match nothing. The null carat `(^) means match ;; The null or `(:) means match nothing. The null carat `(^) means match
;; any character.) ;; any character. The null intersection `(&) means match string.)
;; ->re : s-re cache -> re ;; ->re : s-re cache -> re
(define (->re exp cache) (define (->re exp cache)
@ -76,8 +78,17 @@
c c
(build-or (list e c) cache)))) (build-or (list e c) cache))))
(`(: ,rs ...) (`(: ,rs ...)
(build-or (flatten-res (map (lambda (r) (->re r cache)) rs) cache) (build-or (flatten-res (map (lambda (r) (->re r cache)) rs)
orR? orR-res merge cache)
cache)) cache))
(`(& ,rs ...)
(build-and (flatten-res (map (lambda (r) (->re r cache)) rs)
andR? andR-res (lambda (a b)
(let-values (((i _ __) (split a b))) i))
cache)
cache))
(`(~ ,r)
(build-neg (->re r cache) cache))
(`(@ ,rs ...) (`(@ ,rs ...)
(foldr (lambda (x y) (foldr (lambda (x y)
(build-concat (->re x cache) y cache)) (build-concat (->re x cache) y cache))
@ -111,22 +122,26 @@
;; flatten-res: (list-of re) cache -> (list-of re) ;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re))
;; Takes all the char-sets in l and combines them into one element. ;; ((list-of char) (list-of char) -> (list-of char)) cache -> (list-of re)
;; Removes orRs too, ;; Takes all the char-sets in l and combines them into one element using the combine function.
(define (flatten-res l cache) ;; Flattens out the values of type?. get-res only needs to function on things type? returns
;; true for.
(define (flatten-res l type? get-res combine cache)
(let loop ((res l) (let loop ((res l)
(chars null) (chars #f)
(no-chars null)) (no-chars null))
(cond (cond
((null? res) ((null? res)
(if (null? chars) (if chars
no-chars (cons (build-char-set (mergesort chars char<?) cache) no-chars)
(cons (build-char-set (mergesort chars char<?) cache) no-chars))) no-chars))
((char-setR? (car res)) ((char-setR? (car res))
(loop (cdr res) (merge (char-setR-chars (car res)) chars) no-chars)) (if chars
((orR? (car res)) (loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars)
(loop (append (orR-res (car res)) (cdr res)) chars no-chars)) (loop (cdr res) (char-setR-chars (car res)) no-chars)))
((type? (car res))
(loop (append (get-res (car res)) (cdr res)) chars no-chars))
(else (loop (cdr res) chars (cons (car res) no-chars)))))) (else (loop (cdr res) chars (cons (car res) no-chars))))))
;; build-epsilon : -> re ;; build-epsilon : -> re
@ -181,6 +196,7 @@
(cond (cond
((null? rs) z) ((null? rs) z)
((null? (cdr rs)) (car rs)) ((null? (cdr rs)) (car rs))
((memq (build-neg z cache) rs) (build-neg z cache))
(else (else
(cache (cons 'or (map re-index rs)) (cache (cons 'or (map re-index rs))
(lambda () (lambda ()
@ -188,9 +204,11 @@
;; build-and : (list-of re) cache -> re ;; build-and : (list-of re) cache -> re
(define (build-and rs cache) (define (build-and rs cache)
(let ((rs (do-simple-equiv (replace rs andR? andR-res null) rs))) (let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index)))
(cond (cond
((ormap (lambda (x) (eq? x z)) rs) z) ((null? rs) (build-neg z cache))
((null? (cdr rs)) (car rs))
((memq z rs) z)
(else (else
(cache (cons 'and (map re-index rs)) (cache (cons 'and (map re-index rs))
(lambda () (lambda ()
@ -198,57 +216,77 @@
;; build-neg : re cache -> re ;; build-neg : re cache -> re
(define (build-neg r cache) (define (build-neg r cache)
(cond
((negR? r) (negR-re r))
(else
(cache (cons 'neg (re-index r)) (cache (cons 'neg (re-index r))
(lambda () (lambda ()
(make-negR (not (re-nullable? r)) (get-index) r)))) (make-negR (not (re-nullable? r)) (get-index) r))))))
;; Tests for the build-functions
(test-block ((c (make-cache)) (test-block ((c (make-cache))
(r1 (->re #\1 c)) (r1 (build-char-set `(#\1) c))
(r2 (->re #\2 c)) (r2 (build-char-set `(#\2) c))
(r3 (->re #\3 c)) (r3 (build-char-set `(#\3) c))
(rc (->re `(@ ,r1 ,r2) c)) (rc (build-concat r1 r2 c))
(rc2 (->re `(@ ,r2 ,r1) c)) (rc2 (build-concat r2 r1 c))
(rc3 (->re `(@ ,r1 ,r2 ,r3) c)) (rr (build-repeat rc c))
(rr (->re `(* ,rc) c)) (ro (build-or `(,rr ,rc ,rr) c))
(ro (->re `(: ,rr ,rc ,rr) c)) (ro2 (build-or `(,rc ,rr ,z) c))
(ro2 (->re `(: ,rc ,rr ,z) c)) (ro3 (build-or `(,rr ,rc) c))
(ro3 (->re `(: ,rr ,rc) c)) (ro4 (build-or `(,(build-or `(,r1 ,r2) c)
(real1 (->re `(+ (: (@ "|" (* (^ "|")) "|") ,(build-or `(,r2 ,r3) c)) c))
(@ "|" (* (^ "|"))))) c))) (ra (build-and `(,rr ,rc ,rr) c))
(ra2 (build-and `(,rc ,rr) c))
(ra3 (build-and `(,rr ,rc) c))
(ra4 (build-and `(,(build-and `(,r3 ,r2) c)
,(build-and `(,r2 ,r1) c)) c))
(rn (build-neg z c))
(rn2 (build-neg r1 c)))
((char-setR-chars r1) `(#\1)) ((char-setR-chars r1) `(#\1))
((char-setR-chars r2) `(#\2)) ((char-setR-chars r2) `(#\2))
((char-setR-chars r3) `(#\3)) ((char-setR-chars r3) `(#\3))
((build-char-set null c) z)
((build-concat r1 e c) r1)
((build-concat e r1 c) r1)
((build-concat r1 z c) z)
((build-concat z r1 c) z)
((build-concat r1 r2 c) rc)
((concatR-re1 rc) r1)
((concatR-re2 rc) r2)
((concatR-re1 rc2) r2)
((concatR-re2 rc2) r1)
(ro ro2) (ro ro2)
(ro ro3) (ro ro3)
((->re `(* ,rc) c) rr) (ro4 (build-or `(,r1 ,r2 ,r3) c))
((build-char-set null c) z)
((->re `(@) c) e)
((->re `(:) c) z)
((->re `(@ ,r1 (epsilon)) c) r1)
((->re `(@ (epsilon) ,r1) c) r1)
((->re `(@ ,r1 ,z) c) z)
((->re `(@ ,z ,r1) c) z)
((->re `(@ ,z (epsilon)) c) z)
((->re `(@ (epsilon) ,z) c) z)
((->re `(:) c) z)
((->re `(: ,rr) c) rr)
((build-or `(,z ,r1 ,z) c) r1)
((build-or (list
(build-or (list r1 r2) c)
(build-or (list rc rr) c))
c)
(build-or (list r1 r2 rc rr) c))
((concatR-re1 rc3) r1)
((concatR-re1 (concatR-re2 rc3)) r2)
((concatR-re2 (concatR-re2 rc3)) r3)
((orR-res ro) (list rc rr)) ((orR-res ro) (list rc rr))
((orR-res ro4) (list r1 r2 r3))
((build-or null c) z)
((build-or `(,r1 ,z) c) r1)
((build-repeat rc c) rr)
((repeatR-re rr) rc) ((repeatR-re rr) rc)
(ra ra2)
(ra ra3)
(ra4 (build-and `(,r1 ,r2 ,r3) c))
((andR-res ra) (list rc rr))
((andR-res ra4) (list r1 r2 r3))
((build-and null c) (build-neg z c))
((build-and `(,r1 ,z) c) z)
((build-and `(,r1) c) r1)
((build-neg r1 c) (build-neg r1 c))
((build-neg (build-neg r1 c) c) r1)
((negR-re (build-neg r2 c)) r2)
((re-nullable? r1) #f) ((re-nullable? r1) #f)
((re-nullable? rc) #f) ((re-nullable? rc) #f)
((re-nullable? (->re `(@ ,rr ,rr) c)) #t) ((re-nullable? (build-concat rr rr c)) #t)
((re-nullable? rr) #t) ((re-nullable? rr) #t)
((re-nullable? ro) #t) ((re-nullable? ro) #t)
((re-nullable? (->re `(: ,r1 ,r2) c)) #f)) ((re-nullable? (build-or `(,r1 ,r2) c)) #f)
((re-nullable? (build-and `(,r1 ,e) c)) #f)
((re-nullable? (build-and `(,rr ,e) c)) #t)
((re-nullable? (build-neg r1 c)) #t)
((re-nullable? (build-neg rr c)) #f))
(test-block ((c (make-cache)) (test-block ((c (make-cache))
(r1 (->re #\1 c)) (r1 (->re #\1 c))
@ -257,12 +295,19 @@
(r4 (build-or `(,r1 ,r2) c)) (r4 (build-or `(,r1 ,r2) c))
(r5 (->re `(: ,r3-5 #\7) c)) (r5 (->re `(: ,r3-5 #\7) c))
(r6 (->re #\6 c))) (r6 (->re #\6 c)))
((flatten-res null c) null) ((flatten-res null orR? orR-res merge c) null)
((char-setR-chars (car (flatten-res `(,r1) c))) '(#\1)) ((char-setR-chars (car (flatten-res `(,r1) orR? orR-res merge c))) '(#\1))
((char-setR-chars (car (flatten-res `(,r4) c))) '(#\1 #\2)) ((char-setR-chars (car (flatten-res `(,r4) orR? orR-res merge c))) '(#\1 #\2))
((char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) c))) ((char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) orR? orR-res merge c)))
(string->list "1234567"))) (string->list "1234567"))
((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y)
(let-values (((i _ __)
(split x y)))
i))
c)
(list z)))
;; ->re
(test-block ((c (make-cache)) (test-block ((c (make-cache))
(r (->re #\a c)) (r (->re #\a c))
(rr (->re `(@ ,r ,r) c)) (rr (->re `(@ ,r ,r) c))
@ -285,6 +330,18 @@
((->re `(: ,rr ,rrr) c) (build-or (list rr rrr) c)) ((->re `(: ,rr ,rrr) c) (build-or (list rr rrr) c))
((->re `(: ,r) c) r) ((->re `(: ,r) c) r)
((->re `(:) c) z) ((->re `(:) c) z)
((->re `(& (& #\111 (^ (- #\000 #\110) (- #\112 #\377)))
(& (* #\2))) c)
(build-and (list (build-char-set '(#\111) c)
(build-repeat (build-char-set '(#\2) c) c))
c))
((->re `(& (& #\000 (^ (- #\000 #\110) (- #\112 #\377)))
(& (* #\2))) c)
z)
((->re `(& ,rr ,rrr) c) (build-and (list rr rrr) c))
((->re `(& ,r) c) r)
((->re `(&) c) (build-neg z c))
((->re `(~ ,r) c) (build-neg r c))
((->re `(@) c) e) ((->re `(@) c) e)
((->re `(@ ,rrr*) c) rrr*) ((->re `(@ ,rrr*) c) rrr*)
(rr (build-concat r r c)) (rr (build-concat r r c))

@ -43,7 +43,12 @@
(unless (= num-args 1) (unless (= num-args 1)
(num-arg-err s 1 num-args)) (num-arg-err s 1 num-args))
`(? ,(parse (car ar)))) `(? ,(parse (car ar))))
((~)
(unless (= num-args 1)
(num-arg-err s 1 num-args))
`(~ ,(parse (car ar))))
((:) `(: ,@(map parse ar))) ((:) `(: ,@(map parse ar)))
((&) `(& ,@(map parse ar)))
((@) `(@ ,@(map parse ar))) ((@) `(@ ,@(map parse ar)))
((-) ((-)
(unless (= num-args 2) (unless (= num-args 2)

Loading…
Cancel
Save