*** empty log message ***

original commit: 0cbf61bf22c7e31fdb3a7bfafd56ec219eba186a
tokens
Scott Owens 21 years ago
parent 1bab38b019
commit c8ffdb88cc

@ -59,8 +59,6 @@
)
;; A char-set is a (list-of char) that is sorted and duplicate-free
;; partition : (list-of char-set) -> (list-of char-set)
;; The coarsest refinment r of sets such that the char-sets in r
;; are pairwise disjoint.
@ -89,16 +87,18 @@
(else
(cons i (cons s2 rest))))))))))
(test-block ((sl string->list))
(test-block ((sl (lambda (str)
(foldr (lambda (c cs)
(merge (make-range (char->integer c) (char->integer c))
cs))
null
(string->list str)))))
((partition null) null)
((partition (list (sl "1234"))) (list (sl "1234")))
((partition (list (sl "1234") (sl "0235")))
(list (sl "23") (sl "05") (sl "14")))
((partition (list (sl "12349") (sl "02359") (sl "67") (sl "29")))
(list (sl "29") (sl "67") (sl "3") (sl "05") (sl "14")))
)
(test-block ((sl string->list))
((partition1 (sl "bcdjw") null) (list (sl "bcdjw")))
((partition1 null null) null)
((partition1 null (list (sl "a") (sl "b") (sl "1")))
@ -111,12 +111,12 @@
;; deriveR : re * char cache -> re
;; deriveR : re char cache -> re
(define (deriveR r c cache)
(cond
((or (eq? r e) (eq? r z)) z)
((char-setR? r)
(if (memq c (char-setR-chars r)) e z))
(if (char-in-set? c (char-setR-chars r)) e z))
((concatR? r)
(let* ((r1 (concatR-re1 r))
(r2 (concatR-re2 r))
@ -138,6 +138,8 @@
(build-neg (deriveR (negR-re r) c cache) cache))))
(test-block ((c (make-cache))
(a (char->integer #\a))
(b (char->integer #\b))
(r1 (->re #\a c))
(r2 (->re `(* #\a) c))
(r3 (->re `(* ,r2) c))
@ -146,27 +148,27 @@
(r6 (->re `(: ,r5 #\a) c))
(r7 (->re `(@ ,r2 ,r2) c))
(r8 (->re `(~ ,r4) c))
(r9 (->re `(& ,r2 ,r3) c)))
((deriveR e #\a c) z)
((deriveR z #\a c) z)
((deriveR r1 #\b c) z)
((deriveR r1 #\a c) e)
((deriveR r2 #\a c) r2)
((deriveR r2 #\b c) z)
((deriveR r3 #\a c) (->re `(@ ,r2 ,r3) c))
((deriveR r3 #\b c) z)
((deriveR r4 #\a c) r2)
((deriveR r4 #\b c) z)
((deriveR r5 #\a c) (->re `(@ ,r2 ,r5) c))
((deriveR r5 #\b c) z)
((deriveR r6 #\a c) (->re `(: (@ ,r2 ,r5) (epsilon)) c))
((deriveR r6 #\b c) z)
((deriveR r7 #\a c) (->re `(: (@ ,r2 ,r2) ,r2) c))
((deriveR r7 #\b c) z)
((deriveR r8 #\a c) (->re `(~, r2) c))
((deriveR r8 #\b c) (->re `(~ ,z) c))
((deriveR r9 #\a c) (->re `(& ,r2 (@ ,r2 ,r3)) c))
((deriveR r9 #\b c) z))
(r9 (->re `(& ,r2 ,r4) c)))
((deriveR e a c) z)
((deriveR z a c) z)
((deriveR r1 b c) z)
((deriveR r1 a c) e)
((deriveR r2 a c) r2)
((deriveR r2 b c) z)
((deriveR r3 a c) r2)
((deriveR r3 b c) z)
((deriveR r4 a c) r2)
((deriveR r4 b c) z)
((deriveR r5 a c) (->re `(@ ,r2 ,r5) c))
((deriveR r5 b c) z)
((deriveR r6 a c) (->re `(: (@ ,r2 ,r5) (epsilon)) c))
((deriveR r6 b c) z)
((deriveR r7 a c) (->re `(: (@ ,r2 ,r2) ,r2) c))
((deriveR r7 b c) z)
((deriveR r8 a c) (->re `(~, r2) c))
((deriveR r8 b c) (->re `(~ ,z) c))
((deriveR r9 a c) r2)
((deriveR r9 b c) z))
;; An re-action is (cons re action)
@ -185,10 +187,10 @@
(test-block ((c (make-cache))
(r1 (->re #\1 c))
(r2 (->re #\2 c)))
((derive null #\1 c) #f)
((derive (list (cons r1 1) (cons r2 2)) #\1 c)
((derive null (char->integer #\1) c) #f)
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c)
(list (cons e 1) (cons z 2)))
((derive (list (cons r1 1) (cons r2 2)) #\3 c) #f))
((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f))
;; get-final : (list-of re-action) -> (union #f syntax-object)
@ -200,20 +202,21 @@
((re-nullable? (caar res)) (cdar res))
(else (get-final (cdr res)))))
(print-struct #t)
(test-block ((c (make-cache))
(test-block ((c->i char->integer)
(c (make-cache))
(r1 (->re #\a c))
(r2 (->re #\b c))
(b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5)))
(a (list (cons r1 1) (cons r2 2))))
((derive null #\a c) #f)
((derive a #\a c) (list (cons e 1) (cons z 2)))
((derive a #\b c) (list (cons z 1) (cons e 2)))
((derive a #\c c) #f)
((derive null (c->i #\a) c) #f)
((derive a (c->i #\a) c) (list (cons e 1) (cons z 2)))
((derive a (c->i #\b) c) (list (cons z 1) (cons e 2)))
((derive a (c->i #\c) c) #f)
((derive (list (cons (->re `(: " " "\n" ",") c) 1)
(cons (->re `(@ (? "-") (+ (- "0" "9"))) c) 2)
(cons (->re `(@ "-" (+ "-")) c) 3)
(cons (->re "[" c) 4)
(cons (->re "]" c) 5)) #\[ c)
(cons (->re "]" c) 5)) (c->i #\[) c)
b)
((get-final a) #f)
((get-final (list (cons e 1) (cons e 2))) 1)
@ -240,20 +243,22 @@
(state-spec (car st)))))))))
(test-block ((c (make-cache))
(c->i char->integer)
(r1 (->re `(- #\1 #\4) c))
(r2 (->re `(- #\2 #\3) c)))
((compute-chars null) null)
((compute-chars (list (make-state null 1))) null)
((compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2)))
(list (list #\2 #\3) (list #\1 #\4))))
(list (make-range (c->i #\2) (c->i #\3)) (append (make-range (c->i #\1) (c->i #\1))
(make-range (c->i #\4) (c->i #\4))))))
;; A dfa is (make-dfa int int
;; (list-of (cons int syntax-object))
;; (list-of (cons int (list-of (cons (list-of char) int)))))
;; (list-of (cons int (list-of (cons char-set int)))))
;; Each transitions is a state and a list of chars with the state to transition to.
;; The finals and transitions are sorted by state number, and duplicate free.
(define-struct dfa (num-states start-state final-states/actions transitions))
(define-struct dfa (num-states start-state final-states/actions transitions) (make-inspector))
;; build-dfa : (list-of re-action) cache -> dfa
(define (build-dfa rs cache)
@ -288,7 +293,7 @@
(else
(let* ((state (car old-states))
(c (car cs))
(new-re (derive (state-spec state) (car c) cache)))
(new-re (derive (state-spec state) (get-a-char (car c)) cache)))
(cond
(new-re
(let* ((new-state? #f)
@ -317,7 +322,7 @@
(printf "state: ~a~n" (car trans))
(for-each (lambda (rule)
(printf " -~a-> ~a~n"
(car rule)
(char-set->string (car rule))
(cdr rule)))
(cdr trans)))
(dfa-transitions x)))

@ -39,10 +39,10 @@
(let ((from-state (car trans)))
(for-each (lambda (chars/to)
(let ((to-state (cdr chars/to)))
(for-each (lambda (char)
(char-set-for-each (lambda (char)
(vector-set! char-table
(bitwise-ior
(char->integer char)
char
(arithmetic-shift from-state 8))
to-state))
(car chars/to))))
@ -66,9 +66,9 @@
(list (make-vector 256 #f) 1 (vector #f) (make-vector 1 #t)))
((call-with-values (lambda ()
(dfa->table (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
(list (cons 1 (list (cons (list #\1 #\2) 1)
(cons (list #\3) 2)))
(cons 2 (list (cons (list #\1) 3)))))))
(list (cons 1 (list (cons (make-range 49 50) 1)
(cons (make-range 51 51) 2)))
(cons 2 (list (cons (make-range 49 49) 3)))))))
list)
(list (let ((v (make-vector 1024 #f)))
(vector-set! v 305 1)

@ -30,7 +30,7 @@
(define-struct (zeroR re) () (make-inspector))
(define-struct (char-setR re) (chars) (make-inspector))
(define-struct (concatR re) (re1 re2) (make-inspector))
(define-struct (repeatR re) (re))
(define-struct (repeatR re) (re) (make-inspector))
(define-struct (orR re) (res) (make-inspector))
(define-struct (andR re) (res) (make-inspector))
(define-struct (negR re) (re) (make-inspector))
@ -103,38 +103,28 @@
(`(^ ,crs ...)
(let ((cs (->re `(: ,@crs) cache)))
(cond
((zeroR? cs) (build-char-set (make-range 0 (sub1 (expt 2 32))) cache))
((zeroR? cs) (build-char-set (make-range 0 255) cache))
((char-setR? cs)
(build-char-set
(let loop ((bad-chars (map char->integer
(char-setR-chars cs)))
(i 0))
(cond
((> i 255) null)
((and (not (null? bad-chars))
(= i (car bad-chars)))
(loop (cdr bad-chars) (add1 i)))
(else
(cons (integer->char i) (loop bad-chars (add1 i))))))
cache))
(build-char-set (complement (char-setR-chars cs) 255) cache))
(else z))))))
;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re))
;; ((list-of char) (list-of char) -> (list-of char)) cache -> (list-of re)
;; Takes all the char-sets in l and combines them into one element using the combine function.
;; (char-set char-set -> char-set) cache -> (list-of re)
;; Takes all the char-sets in l and combines them into one char-set using the combine function.
;; Flattens out the values of type?. get-res only needs to function on things type? returns
;; true for.
(define (flatten-res l type? get-res combine cache)
(let loop ((res l)
;; chars : (union #f char-set)
(chars #f)
(no-chars null))
(cond
((null? res)
(if chars
(cons (build-char-set (mergesort chars char<?) cache) no-chars)
(cons (build-char-set chars cache) no-chars)
no-chars))
((char-setR? (car res))
(if chars
@ -149,14 +139,15 @@
(define (build-zero) z)
;; build-char-set : (list-of char) cache -> re
;; cs must be sorted
;; build-char-set : char-set cache -> re
#;(define (build-char-set cs cache)
(cond
((null? cs) z)
(else
(make-char-setR #f (get-index) cs))))
;; build-char-set : char-set cache -> re
(define (build-char-set cs cache)
(cond
((null? cs) z)
@ -182,9 +173,13 @@
;; build-repeat : re cache -> re
(define (build-repeat r cache)
(cond
((eq? z r) e)
((repeatR? r) r)
(else
(cache (cons 'repeat (re-index r))
(lambda ()
(make-repeatR #t (get-index) r))))
(make-repeatR #t (get-index) r))))))
;; build-or : (list-of re) cache -> re
@ -225,9 +220,9 @@
;; Tests for the build-functions
(test-block ((c (make-cache))
(r1 (build-char-set `(#\1) c))
(r2 (build-char-set `(#\2) c))
(r3 (build-char-set `(#\3) c))
(r1 (build-char-set (make-range (char->integer #\1) (char->integer #\1)) c))
(r2 (build-char-set (make-range (char->integer #\2) (char->integer #\2)) c))
(r3 (build-char-set (make-range (char->integer #\3) (char->integer #\3)) c))
(rc (build-concat r1 r2 c))
(rc2 (build-concat r2 r1 c))
(rr (build-repeat rc c))
@ -244,9 +239,9 @@
(rn (build-neg z c))
(rn2 (build-neg r1 c)))
((char-setR-chars r1) `(#\1))
((char-setR-chars r2) `(#\2))
((char-setR-chars r3) `(#\3))
((char-setR-chars r1) (make-range (char->integer #\1) (char->integer #\1)))
((char-setR-chars r2) (make-range (char->integer #\2) (char->integer #\2)))
((char-setR-chars r3) (make-range (char->integer #\3) (char->integer #\3)))
((build-char-set null c) z)
((build-concat r1 e c) r1)
((build-concat e r1 c) r1)
@ -265,6 +260,8 @@
((build-or null c) z)
((build-or `(,r1 ,z) c) r1)
((build-repeat rc c) rr)
((build-repeat z c) e)
((build-repeat (build-repeat rc c) c) (build-repeat rc c))
((repeatR-re rr) rc)
(ra ra2)
(ra ra3)
@ -296,10 +293,12 @@
(r5 (->re `(: ,r3-5 #\7) c))
(r6 (->re #\6 c)))
((flatten-res null orR? orR-res merge c) null)
((char-setR-chars (car (flatten-res `(,r1) orR? orR-res merge c))) '(#\1))
((char-setR-chars (car (flatten-res `(,r4) orR? orR-res merge c))) '(#\1 #\2))
((char-setR-chars (car (flatten-res `(,r1) orR? orR-res merge c)))
(make-range (char->integer #\1) (char->integer #\1)))
((char-setR-chars (car (flatten-res `(,r4) orR? orR-res merge c)))
(make-range (char->integer #\1) (char->integer #\2)))
((char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) orR? orR-res merge c)))
(string->list "1234567"))
(make-range (char->integer #\1) (char->integer #\7)))
((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y)
(let-values (((i _ __)
(split x y)))
@ -313,7 +312,7 @@
(rr (->re `(@ ,r ,r) c))
(rrr (->re `(@ ,r ,rr) c))
(rrr* (->re `(* ,rrr) c)))
((char-setR-chars r) '(#\a))
((char-setR-chars r) (make-range (char->integer #\a) (char->integer #\a)))
((->re "" c) e)
((->re "asdf" c) (->re `(@ #\a #\s #\d #\f) c))
((->re r c) r)
@ -324,16 +323,18 @@
((->re `(? ,rrr*) c) rrr*)
((->re `(: (: (- #\a #\c) (^ (- #\000 #\110) (- #\112 #\377)))
(: (* #\2))) c)
(build-or (list (build-char-set (list #\111 #\a #\b #\c) c)
(build-repeat (build-char-set '(#\2) c) c))
(build-or (list (build-char-set (append (make-range 73 73)
(make-range 97 99))
c)
(build-repeat (build-char-set (make-range 50 50) c) c))
c))
((->re `(: ,rr ,rrr) c) (build-or (list rr rrr) c))
((->re `(: ,r) c) r)
((->re `(:) c) z)
((->re `(& (& #\111 (^ (- #\000 #\110) (- #\112 #\377)))
(& (* #\2))) c)
(build-and (list (build-char-set '(#\111) c)
(build-repeat (build-char-set '(#\2) c) c))
(build-and (list (build-char-set (make-range 73 73) c)
(build-repeat (build-char-set (make-range 50 50) c) c))
c))
((->re `(& (& #\000 (^ (- #\000 #\110) (- #\112 #\377)))
(& (* #\2))) c)
@ -347,14 +348,14 @@
(rr (build-concat r r c))
((->re `(@ ,r ,rr ,rrr) c)
(build-concat r (build-concat rr rrr c) c))
((char-setR-chars (->re `(- #\1 #\1) c)) '(#\1))
((char-setR-chars (->re `(- #\1 #\9) c)) (string->list "123456789"))
((char-setR-chars (->re `(- "1" "1") c)) '(#\1))
((char-setR-chars (->re `(- "1" "9") c)) (string->list "123456789"))
((char-setR-chars (->re `(- #\1 #\1) c)) (make-range 49 49))
((char-setR-chars (->re `(- #\1 #\9) c)) (make-range 49 57))
((char-setR-chars (->re `(- "1" "1") c)) (make-range 49 49))
((char-setR-chars (->re `(- "1" "9") c)) (make-range 49 57))
((->re `(- "9" "1") c) z)
((char-setR-chars (->re `(^) c))
(char-setR-chars (->re `(- #\000 #\377) c)))
((char-setR-chars (->re `(^ #\001 (- #\002 #\377)) c)) `(#\000))
((char-setR-chars (->re `(^ #\001 (- #\002 #\377)) c)) (make-range 0 0))
)
)

@ -1,7 +1,7 @@
(module util mzscheme
(require (lib "list.ss"))
(provide (all-defined-except split-acc))
(provide (all-defined-except split-acc complement-acc))
(define-struct lex-abbrev (abbrev))
@ -307,4 +307,75 @@
((6 . 6) (50 . 60) (201 . 220))
((1 . 1) (9 . 9) (100 . 100) (500 . 600) (600 . 700))))
)
;; complement-acc : char-set nat nat -> char-set
;; As complement. The current-nat accumulator keeps track of where the
;; next range in the complement should start.
(define (complement-acc s current-nat max)
(cond
((null? s) (if (<= current-nat max)
(list (cons current-nat max))
null))
(else
(let ((s-car (car s)))
(cond
((< current-nat (car s-car))
(cons (cons current-nat (sub1 (car s-car)))
(complement-acc (cdr s) (add1 (cdr s-car)) max)))
((<= current-nat (cdr s-car))
(complement-acc (cdr s) (add1 (cdr s-car)) max))
(else
(complement-acc (cdr s) current-nat max)))))))
;; complement : char-set nat -> char-set
;; A set of all the nats not in s, up to and including max.
;; (cdr (last-pair s)) <= max
(define (complement s max)
(complement-acc s 0 max))
(test-block ()
((complement null 255) '((0 . 255)))
((complement '((1 . 5) (7 . 7) (10 . 200)) 255)
'((0 . 0) (6 . 6) (8 . 9) (201 . 255)))
((complement '((0 . 254)) 255) '((255 . 255)))
((complement '((1 . 255)) 255) '((0 . 0)))
((complement '((0 . 255)) 255) null))
;; char-in-set? : nat char-set -> bool
(define (char-in-set? c cs)
(and
(pair? cs)
(or (<= (caar cs) c (cdar cs))
(char-in-set? c (cdr cs)))))
(test-block ()
((char-in-set? 1 null) #f)
((char-in-set? 19 '((1 . 18) (20 . 21))) #f)
((char-in-set? 19 '((1 . 2) (19 . 19) (20 . 21))) #t))
(define get-a-char car)
(define (char-set->string cs)
(cond
((null? cs) "")
(else
(string-append (format "~a(~a)-~a(~a) "
(caar cs) (integer->char (caar cs))
(cdar cs) (integer->char (cdar cs)))
(char-set->string (cdr cs))))))
(define (char-for-each-acc f start stop cs)
(cond
((and (> start stop) (null? cs)) (void))
((> start stop)
(char-for-each-acc f (caar cs) (cdar cs) (cdr cs)))
(else
(f start)
(char-for-each-acc f (add1 start) stop cs))))
(define (char-set-for-each f cs)
(cond
((null? cs) (void))
(else
(char-for-each-acc f (caar cs) (cdar cs) (cdr cs)))))
)
Loading…
Cancel
Save