*** empty log message ***

original commit: c98860b8ff15485b0a2d415a0b7a03c1a32e1099
tokens
Scott Owens 21 years ago
parent 578ee09436
commit 51f67183a2

@ -1,6 +1,7 @@
(module deriv mzscheme
(require (lib "list.ss")
(prefix is: (lib "integer-set.ss"))
"re.ss"
"util.ss")
@ -57,58 +58,6 @@
((get-char-groups (->re `(& (* ,r1) (@ (* ,r2) "3") "4") c) #f)
(list r1 r2 (->re "3" c) (->re "4" c)))
)
;; partition : (list-of char-set) -> (list-of char-set)
;; The coarsest refinment r of sets such that the char-sets in r
;; are pairwise disjoint.
(define (partition sets)
(cond
((null? sets) null)
(else
(partition1 (car sets) (partition (cdr sets))))))
;; partition1 : char-set (list-of char-set) -> (list-of char-set)
;; All the char-sets in sets must be pairwise disjoint. Splits set
;; against each element in sets.
(define (partition1 set sets)
(cond
((null? set) sets)
((null? sets) (list set))
(else
(let ((set2 (car sets)))
(let-values (((i s1 s2) (split set set2)))
(let ((rest (partition1 s1 (cdr sets))))
(cond
((null? i)
(cons s2 rest))
((null? s2)
(cons i rest))
(else
(cons i (cons s2 rest))))))))))
(test-block ((sl (lambda (str)
(foldr (lambda (c cs)
(merge (make-range (char->integer c) (char->integer c))
cs))
null
(string->list str)))))
((partition null) null)
((partition (list (sl "1234"))) (list (sl "1234")))
((partition (list (sl "1234") (sl "0235")))
(list (sl "23") (sl "05") (sl "14")))
((partition (list (sl "12349") (sl "02359") (sl "67") (sl "29")))
(list (sl "29") (sl "67") (sl "3") (sl "05") (sl "14")))
((partition1 (sl "bcdjw") null) (list (sl "bcdjw")))
((partition1 null null) null)
((partition1 null (list (sl "a") (sl "b") (sl "1")))
(list (sl "a") (sl "b") (sl "1")))
((partition1 (sl "bcdjw")
(list (sl "z")
(sl "ab")
(sl "dj")))
(list (sl "z") (sl "b") (sl "a") (sl "dj") (sl "cw"))))
;; deriveR : re char cache -> re
@ -116,7 +65,7 @@
(cond
((or (eq? r e) (eq? r z)) z)
((char-setR? r)
(if (char-in-set? c (char-setR-chars r)) e z))
(if (is:member? c (char-setR-chars r)) e z))
((concatR? r)
(let* ((r1 (concatR-re1 r))
(r2 (concatR-re2 r))
@ -238,9 +187,9 @@
(cond
((null? st) null)
(else
(partition (map char-setR-chars
(apply append (map (lambda (x) (get-char-groups (car x) #f))
(state-spec (car st)))))))))
(is:partition (map char-setR-chars
(apply append (map (lambda (x) (get-char-groups (car x) #f))
(state-spec (car st)))))))))
(test-block ((c (make-cache))
(c->i char->integer)
@ -248,9 +197,10 @@
(r2 (->re `(- #\2 #\3) c)))
((compute-chars null) null)
((compute-chars (list (make-state null 1))) null)
((compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2)))
(list (make-range (c->i #\2) (c->i #\3)) (append (make-range (c->i #\1) (c->i #\1))
(make-range (c->i #\4) (c->i #\4))))))
((map is:integer-set-contents (compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2))))
(list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3)))
(is:integer-set-contents (is:union (is:make-range (c->i #\1))
(is:make-range (c->i #\4)))))))
;; A dfa is (make-dfa int int
@ -293,7 +243,7 @@
(else
(let* ((state (car old-states))
(c (car cs))
(new-re (derive (state-spec state) (get-a-char (car c)) cache)))
(new-re (derive (state-spec state) (is:get-integer c) cache)))
(cond
(new-re
(let* ((new-state? #f)
@ -322,7 +272,7 @@
(printf "state: ~a~n" (car trans))
(for-each (lambda (rule)
(printf " -~a-> ~a~n"
(char-set->string (car rule))
(is:integer-set-contents (car rule))
(cdr rule)))
(cdr trans)))
(dfa-transitions x)))

@ -1,5 +1,6 @@
(module front mzscheme
(require "util.ss"
(require (prefix is: (lib "integer-set.ss"))
"util.ss"
"stx.ss"
"re.ss"
"deriv.ss")
@ -13,20 +14,18 @@
(printf "~a: " l)
(time (begin e ...))))))
;; A table is either
;; - (vector-of (union #f nat))
;; - (vector-of (vector-of (cons (cons nat nat) nat)))
;; dfa->table : dfa -> (same as build-lexer)
(define (dfa->table dfa)
;; dfa->1d-table : dfa -> (same as build-lexer)
; (define (dfa->1d-table dfa)
; (let (
;; dfa->2d-table : dfa -> (same as build-lexer)
(define (dfa->2d-table dfa)
(let (
;; no-look : (vector-of bool)
;; For each state whether the lexer can ignore the next input.
;; It can do this only if there are no transitions out of the
;; current state.
(no-look (make-vector (dfa-num-states dfa) #t))
;; actions : (vector-of (union #f syntax-object))
;; The action for each final state, #f if the state isn't final
(actions (make-vector (dfa-num-states dfa) #f))
;; char-table : (vector-of (union #f nat))
;; The lexer table, one entry per state per char.
;; Each entry specifies a state to transition to.
@ -39,48 +38,57 @@
(let ((from-state (car trans)))
(for-each (lambda (chars/to)
(let ((to-state (cdr chars/to)))
(char-set-for-each (lambda (char)
(vector-set! char-table
(bitwise-ior
char
(arithmetic-shift from-state 8))
to-state))
(car chars/to))))
(is:foldr (lambda (char _)
(vector-set! char-table
(bitwise-ior
char
(arithmetic-shift from-state 8))
to-state))
(void)
(car chars/to))))
(cdr trans))))
(dfa-transitions dfa))
char-table))
(for-each (lambda (trans)
(vector-set! no-look (car trans) #f))
(dfa-transitions dfa))
;; dfa->actions : dfa -> (vector-of (union #f syntax-object))
;; The action for each final state, #f if the state isn't final
(define (dfa->actions dfa)
(let ((actions (make-vector (dfa-num-states dfa) #f)))
(for-each (lambda (state/action)
(vector-set! actions (car state/action) (cdr state/action)))
(dfa-final-states/actions dfa))
actions))
(values char-table (dfa-start-state dfa) actions no-look)))
(test-block ()
((call-with-values (lambda ()
(dfa->table (make-dfa 1 1 (list) (list))))
list)
(list (make-vector 256 #f) 1 (vector #f) (make-vector 1 #t)))
((call-with-values (lambda ()
(dfa->table (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
(list (cons 1 (list (cons (make-range 49 50) 1)
(cons (make-range 51 51) 2)))
(cons 2 (list (cons (make-range 49 49) 3)))))))
list)
(list (let ((v (make-vector 1024 #f)))
(vector-set! v 305 1)
(vector-set! v 306 1)
(vector-set! v 307 2)
(vector-set! v 561 3)
v)
1
(vector #f #f 2 3)
(vector #t #f #f #t))))
;; build-lexer : syntax-object list -> (values (vector-of (union #f nat)) nat (vector-of (union #f syntax-object)) (vector-of bool))
;; dfa->no-look : dfa -> (vector-of bool)
;; For each state whether the lexer can ignore the next input.
;; It can do this only if there are no transitions out of the
;; current state.
(define (dfa->no-look dfa)
(let ((no-look (make-vector (dfa-num-states dfa) #t)))
(for-each (lambda (trans)
(vector-set! no-look (car trans) #f))
(dfa-transitions dfa))
no-look))
(test-block ((d1 (make-dfa 1 1 (list) (list)))
(d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3))
(list (cons 1 (list (cons (is:make-range 49 50) 1)
(cons (is:make-range 51) 2)))
(cons 2 (list (cons (is:make-range 49) 3)))))))
((dfa->2d-table d1) (make-vector 256 #f))
((dfa->2d-table d2) (let ((v (make-vector 1024 #f)))
(vector-set! v 305 1)
(vector-set! v 306 1)
(vector-set! v 307 2)
(vector-set! v 561 3)
v))
((dfa->actions d1) (vector #f))
((dfa->actions d2) (vector #f #f 2 3))
((dfa->no-look d1) (vector #t))
((dfa->no-look d2) (vector #t #f #f #t)))
;; build-lexer : syntax-object list -> (values table nat (vector-of (union #f syntax-object)) (vector-of bool))
;; each syntax object has the form (re action)
(define (build-lexer sos)
(let* ((s-re-acts (map (lambda (so)
@ -98,5 +106,5 @@
(dfa (build-dfa re-acts cache)))
;(print-dfa dfa)
;(printf "states: ~a~n" (dfa-num-states dfa))
(dfa->table dfa)))
(values (dfa->2d-table dfa) (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa))))
)

@ -1,6 +1,7 @@
(module re mzscheme
(require (lib "match.ss")
(lib "list.ss")
(prefix is: (lib "integer-set.ss"))
"util.ss")
(provide ->re build-epsilon build-zero build-char-set build-concat
@ -64,7 +65,7 @@
;; ->re : s-re cache -> re
(define (->re exp cache)
(match exp
((? char?) (build-char-set (make-range (char->integer exp) (char->integer exp)) cache))
((? char?) (build-char-set (is:make-range (char->integer exp)) cache))
((? string?) (->re `(@ ,@(string->list exp)) cache))
((? re?) exp)
(`(epsilon) (build-epsilon))
@ -79,12 +80,12 @@
(build-or (list e c) cache))))
(`(: ,rs ...)
(build-or (flatten-res (map (lambda (r) (->re r cache)) rs)
orR? orR-res merge cache)
orR? orR-res is:union cache)
cache))
(`(& ,rs ...)
(build-and (flatten-res (map (lambda (r) (->re r cache)) rs)
andR? andR-res (lambda (a b)
(let-values (((i _ __) (split a b))) i))
(let-values (((i _ __) (is:split a b))) i))
cache)
cache))
(`(~ ,r)
@ -98,14 +99,14 @@
(let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1)))
(i2 (char->integer (if (string? c2) (string-ref c2 0) c2))))
(if (<= i1 i2)
(build-char-set (make-range i1 i2) cache)
(build-char-set (is:make-range i1 i2) cache)
z)))
(`(^ ,crs ...)
(let ((cs (->re `(: ,@crs) cache)))
(cond
((zeroR? cs) (build-char-set (make-range 0 255) cache))
((zeroR? cs) (build-char-set (is:make-range 0 255) cache))
((char-setR? cs)
(build-char-set (complement (char-setR-chars cs) 255) cache))
(build-char-set (is:complement (char-setR-chars cs) 0 255) cache))
(else z))))))
@ -140,21 +141,14 @@
(define (build-zero) z)
;; build-char-set : char-set cache -> re
#;(define (build-char-set cs cache)
(define (build-char-set cs cache)
(let ((l (is:integer-set-contents cs)))
(cond
((null? cs) z)
((null? l) z)
(else
(make-char-setR #f (get-index) cs))))
;; build-char-set : char-set cache -> re
(define (build-char-set cs cache)
(cond
((null? cs) z)
(else
(cache cs
(lambda ()
(make-char-setR #f (get-index) cs))))))
(cache l
(lambda ()
(make-char-setR #f (get-index) cs)))))))
@ -220,9 +214,10 @@
;; Tests for the build-functions
(test-block ((c (make-cache))
(r1 (build-char-set (make-range (char->integer #\1) (char->integer #\1)) c))
(r2 (build-char-set (make-range (char->integer #\2) (char->integer #\2)) c))
(r3 (build-char-set (make-range (char->integer #\3) (char->integer #\3)) c))
(isc is:integer-set-contents)
(r1 (build-char-set (is:make-range (char->integer #\1)) c))
(r2 (build-char-set (is:make-range (char->integer #\2)) c))
(r3 (build-char-set (is:make-range (char->integer #\3)) c))
(rc (build-concat r1 r2 c))
(rc2 (build-concat r2 r1 c))
(rr (build-repeat rc c))
@ -239,10 +234,10 @@
(rn (build-neg z c))
(rn2 (build-neg r1 c)))
((char-setR-chars r1) (make-range (char->integer #\1) (char->integer #\1)))
((char-setR-chars r2) (make-range (char->integer #\2) (char->integer #\2)))
((char-setR-chars r3) (make-range (char->integer #\3) (char->integer #\3)))
((build-char-set null c) z)
((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1))))
((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2))))
((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3))))
((build-char-set (is:make-range) c) z)
((build-concat r1 e c) r1)
((build-concat e r1 c) r1)
((build-concat r1 z c) z)
@ -286,33 +281,35 @@
((re-nullable? (build-neg rr c)) #f))
(test-block ((c (make-cache))
(isc is:integer-set-contents)
(r1 (->re #\1 c))
(r2 (->re #\2 c))
(r3-5 (->re '(- #\3 #\5) c))
(r4 (build-or `(,r1 ,r2) c))
(r5 (->re `(: ,r3-5 #\7) c))
(r6 (->re #\6 c)))
((flatten-res null orR? orR-res merge c) null)
((char-setR-chars (car (flatten-res `(,r1) orR? orR-res merge c)))
(make-range (char->integer #\1) (char->integer #\1)))
((char-setR-chars (car (flatten-res `(,r4) orR? orR-res merge c)))
(make-range (char->integer #\1) (char->integer #\2)))
((char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) orR? orR-res merge c)))
(make-range (char->integer #\1) (char->integer #\7)))
((flatten-res null orR? orR-res is:union c) null)
((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c))))
(isc (is:make-range (char->integer #\1))))
((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c))))
(isc (is:make-range (char->integer #\1) (char->integer #\2))))
((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) orR? orR-res is:union c))))
(isc (is:make-range (char->integer #\1) (char->integer #\7))))
((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y)
(let-values (((i _ __)
(split x y)))
(is:split x y)))
i))
c)
(list z)))
;; ->re
(test-block ((c (make-cache))
(isc is:integer-set-contents)
(r (->re #\a c))
(rr (->re `(@ ,r ,r) c))
(rrr (->re `(@ ,r ,rr) c))
(rrr* (->re `(* ,rrr) c)))
((char-setR-chars r) (make-range (char->integer #\a) (char->integer #\a)))
((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a))))
((->re "" c) e)
((->re "asdf" c) (->re `(@ #\a #\s #\d #\f) c))
((->re r c) r)
@ -323,18 +320,18 @@
((->re `(? ,rrr*) c) rrr*)
((->re `(: (: (- #\a #\c) (^ (- #\000 #\110) (- #\112 #\377)))
(: (* #\2))) c)
(build-or (list (build-char-set (append (make-range 73 73)
(make-range 97 99))
(build-or (list (build-char-set (is:union (is:make-range 73)
(is:make-range 97 99))
c)
(build-repeat (build-char-set (make-range 50 50) c) c))
(build-repeat (build-char-set (is:make-range 50) c) c))
c))
((->re `(: ,rr ,rrr) c) (build-or (list rr rrr) c))
((->re `(: ,r) c) r)
((->re `(:) c) z)
((->re `(& (& #\111 (^ (- #\000 #\110) (- #\112 #\377)))
(& (* #\2))) c)
(build-and (list (build-char-set (make-range 73 73) c)
(build-repeat (build-char-set (make-range 50 50) c) c))
(build-and (list (build-char-set (is:make-range 73) c)
(build-repeat (build-char-set (is:make-range 50) c) c))
c))
((->re `(& (& #\000 (^ (- #\000 #\110) (- #\112 #\377)))
(& (* #\2))) c)
@ -348,14 +345,14 @@
(rr (build-concat r r c))
((->re `(@ ,r ,rr ,rrr) c)
(build-concat r (build-concat rr rrr c) c))
((char-setR-chars (->re `(- #\1 #\1) c)) (make-range 49 49))
((char-setR-chars (->re `(- #\1 #\9) c)) (make-range 49 57))
((char-setR-chars (->re `(- "1" "1") c)) (make-range 49 49))
((char-setR-chars (->re `(- "1" "9") c)) (make-range 49 57))
((isc (char-setR-chars (->re `(- #\1 #\1) c))) (isc (is:make-range 49)))
((isc (char-setR-chars (->re `(- #\1 #\9) c))) (isc (is:make-range 49 57)))
((isc (char-setR-chars (->re `(- "1" "1") c))) (isc (is:make-range 49)))
((isc (char-setR-chars (->re `(- "1" "9") c))) (isc (is:make-range 49 57)))
((->re `(- "9" "1") c) z)
((char-setR-chars (->re `(^) c))
(char-setR-chars (->re `(- #\000 #\377) c)))
((char-setR-chars (->re `(^ #\001 (- #\002 #\377)) c)) (make-range 0 0))
((isc (char-setR-chars (->re `(^) c)))
(isc (char-setR-chars (->re `(- #\000 #\377) c))))
((isc (char-setR-chars (->re `(^ #\001 (- #\002 #\377)) c))) (isc (is:make-range 0)))
)
)

@ -1,7 +1,7 @@
(module util mzscheme
(require (lib "list.ss"))
(provide (all-defined-except split-acc complement-acc))
(provide (all-defined))
(define-struct lex-abbrev (abbrev))
@ -106,276 +106,6 @@
null)
'(5 1 2 3 4 1 2 3 2 1)))
;; A char-set is (list-of (cons nat nat))
;; Each cons represents a range of characters, and the entire
;; set is the union of the ranges. The ranges must be disjoint and
;; increasing. Further, adjacent ranges must have at least
;; one number between them.
(define (nat? x)
(and (integer? x) (exact? x) (>= x 0)))
;; char-set? : X -> bool
(define (char-set? x)
(let loop ((set x)
(current-num -2))
(or
(null? set)
(and (pair? set)
(pair? (car set))
(nat? (caar set))
(nat? (cdar set))
(< (add1 current-num) (caar set))
(<= (caar set) (cdar set))
(loop (cdr set) (cdar set))))))
(test-block ()
((char-set? '((0 . 4) (7 . 9))) #t)
((char-set? '((-1 . 4))) #f)
((char-set? '((11 . 10))) #f)
((char-set? '((0 . 10) (8 . 12))) #f)
((char-set? '((10 . 20) (1 . 2))) #f)
((char-set? '((1 . 1))) #t)
((char-set? '((1 . 1) (2 . 3))) #f)
((char-set? '((1 . 1) (3 . 3))) #t)
((char-set? null) #t))
;; make-range : int * int -> char-set
;; creates a set of chars between i and j. i <= j
(define (make-range i j)
(list (cons i j)))
(test-block ()
((make-range 97 110) '((97 . 110)))
((make-range 111 111) '((111 . 111))))
;; sub-range? : (cons int int) (cons int int) -> bool
;; true iff the interval [(car r1), (cdr r1)] is a subset of
;; [(car r2), (cdr r2)]
(define (sub-range? r1 r2)
(and (>= (car r1) (car r2))
(<= (cdr r1) (cdr r2))))
;; overlap? : (cons int int) (cons int int) -> bool
;; true iff the intervals [(car r1), (cdr r1)] and [(car r2), (cdr r2)]
;; have non-empty intersections and (car r1) >= (car r2)
(define (overlap? r1 r2)
(and (>= (car r1) (car r2))
(>= (cdr r1) (cdr r2))
(<= (car r1) (cdr r2))))
;; merge : char-set char-set -> char-set
;; unions 2 char-sets
(define (merge s1 s2)
(cond
((null? s2) s1)
((null? s1) s2)
(else
(let ((r1 (car s1))
(r2 (car s2)))
(cond
((sub-range? r1 r2) (merge (cdr s1) s2))
((sub-range? r2 r1) (merge s1 (cdr s2)))
((or (overlap? r1 r2) (= (car r1) (add1 (cdr r2))))
(merge (cons (cons (car r2) (cdr r1)) (cdr s1)) (cdr s2)))
((or (overlap? r2 r1) (= (car r2) (add1 (cdr r1))))
(merge (cdr s1) (cons (cons (car r1) (cdr r2)) (cdr s2))))
((< (car r1) (car r2))
(cons r1 (merge (cdr s1) s2)))
(else
(cons r2 (merge s1 (cdr s2)))))))))
(test-block ()
((merge null null) null)
((merge null '((1 . 10))) '((1 . 10)))
((merge '((1 . 10)) null) '((1 . 10)))
;; r1 in r2
((merge '((5 . 10)) '((5 . 10))) '((5 . 10)))
((merge '((6 . 9)) '((5 . 10))) '((5 . 10)))
((merge '((7 . 7)) '((5 . 10))) '((5 . 10)))
;; r2 in r1
((merge '((5 . 10)) '((5 . 10))) '((5 . 10)))
((merge '((5 . 10)) '((6 . 9))) '((5 . 10)))
((merge '((5 . 10)) '((7 . 7))) '((5 . 10)))
;; r2 and r1 are disjoint
((merge '((5 . 10)) '((12 . 14))) '((5 . 10) (12 . 14)))
((merge '((12 . 14)) '((5 . 10))) '((5 . 10) (12 . 14)))
;; r1 and r1 are adjacent
((merge '((5 . 10)) '((11 . 13))) '((5 . 13)))
((merge '((11 . 13)) '((5 . 10))) '((5 . 13)))
;; r1 and r2 overlap
((merge '((5 . 10)) '((7 . 14))) '((5 . 14)))
((merge '((7 . 14)) '((5 . 10))) '((5 . 14)))
((merge '((5 . 10)) '((10 . 14))) '((5 . 14)))
((merge '((7 . 10)) '((5 . 7))) '((5 . 10)))
;; with lists
((merge '((1 . 1) (3 . 3) (5 . 10) (100 . 200))
'((2 . 2) (10 . 12) (300 . 300)))
'((1 . 3) (5 . 12) (100 . 200) (300 . 300)))
((merge '((1 . 1) (3 . 3) (5 . 5) (8 . 8) (10 . 10) (12 . 12))
'((2 . 2) (4 . 4) (6 . 7) (9 . 9) (11 . 11)))
'((1 . 12)))
((merge '((2 . 2) (4 . 4) (6 . 7) (9 . 9) (11 . 11))
'((1 . 1) (3 . 3) (5 . 5) (8 . 8) (10 . 10) (12 . 12)))
'((1 . 12))))
;; split-sub-range : (cons int int) (cons int int) -> char-set
;; (subrange? r1 r2) must hold.
;; returns [(car r2), (cdr r2)] - ([(car r1), (cdr r1)] intersect [(car r2), (cdr r2)]).
(define (split-sub-range r1 r2)
(let ((r1-car (car r1))
(r1-cdr (cdr r1))
(r2-car (car r2))
(r2-cdr (cdr r2)))
(cond
((and (= r1-car r2-car) (= r1-cdr r2-cdr)) null)
((= r1-car r2-car) (list (cons (add1 r1-cdr) r2-cdr)))
((= r1-cdr r2-cdr) (list (cons r2-car (sub1 r1-car))))
(else
(list (cons r2-car (sub1 r1-car)) (cons (add1 r1-cdr) r2-cdr))))))
(test-block ()
((split-sub-range '(1 . 10) '(1 . 10)) '())
((split-sub-range '(1 . 5) '(1 . 10)) '((6 . 10)))
((split-sub-range '(2 . 10) '(1 . 10)) '((1 . 1)))
((split-sub-range '(2 . 5) '(1 . 10)) '((1 . 1) (6 . 10))))
(define (split-acc s1 s2 i s1-i s2-i)
(cond
((null? s1) (values (reverse! i) (reverse! s1-i) (reverse! (append! (reverse s2) s2-i))))
((null? s2) (values (reverse! i) (reverse! (append! (reverse s1) s1-i)) (reverse! s2-i)))
(else
(let ((r1 (car s1))
(r2 (car s2)))
(cond
((sub-range? r1 r2)
(split-acc (cdr s1) (append (split-sub-range r1 r2) (cdr s2))
(cons r1 i) s1-i s2-i))
((sub-range? r2 r1)
(split-acc (append (split-sub-range r2 r1) (cdr s1)) (cdr s2)
(cons r2 i) s1-i s2-i))
((overlap? r1 r2)
(split-acc (cons (cons (add1 (cdr r2)) (cdr r1)) (cdr s1))
(cdr s2)
(cons (cons (car r1) (cdr r2)) i)
s1-i
(cons (cons (car r2) (sub1 (car r1))) s2-i)))
((overlap? r2 r1)
(split-acc (cdr s1)
(cons (cons (add1 (cdr r1)) (cdr r2)) (cdr s2))
(cons (cons (car r2) (cdr r1)) i)
(cons (cons (car r1) (sub1 (car r2)))s1-i )
s2-i))
((< (car r1) (car r2))
(split-acc (cdr s1) s2 i (cons r1 s1-i) s2-i))
(else
(split-acc s1 (cdr s2) i s1-i (cons r2 s2-i))))))))
;; split : char-set -> char-set char-set char-set
;; returns (l1 intersect l2), l1 - (l1 intersect l2) and l2 - (l1 intersect l2)
(define (split s1 s2)
(split-acc s1 s2 null null null))
(test-block ((s (lambda (s1 s2)
(call-with-values (lambda () (split s1 s2)) list))))
((s null null) '(() () ()))
((s '((1 . 10)) null) '(() ((1 . 10)) ()))
((s null '((1 . 10))) '(() () ((1 . 10))))
((s '((1 . 10)) null) '(() ((1 . 10)) ()))
((s '((1 . 10)) '((1 . 10))) '(((1 . 10)) () ()))
((s '((1 . 10)) '((2 . 5))) '(((2 . 5)) ((1 . 1) (6 . 10)) ()))
((s '((2 . 5)) '((1 . 10))) '(((2 . 5)) () ((1 . 1) (6 . 10))))
((s '((2 . 5)) '((5 . 10))) '(((5 . 5)) ((2 . 4)) ((6 . 10))))
((s '((5 . 10)) '((2 . 5))) '(((5 . 5)) ((6 . 10)) ((2 . 4))))
((s '((2 . 10)) '((5 . 14))) '(((5 . 10)) ((2 . 4)) ((11 . 14))))
((s '((5 . 14)) '((2 . 10))) '(((5 . 10)) ((11 . 14)) ((2 . 4))))
((s '((10 . 20)) '((30 . 50))) '(() ((10 . 20)) ((30 . 50))))
((s '((100 . 200)) '((30 . 50))) '(() ((100 . 200)) ((30 . 50))))
((s '((1 . 5) (7 . 9) (100 . 200) (500 . 600) (600 . 700))
'((2 . 8) (50 . 60) (101 . 104) (105 . 220)))
'(((2 . 5) (7 . 8) (101 . 104) (105 . 200))
((1 . 1) (9 . 9) (100 . 100) (500 . 600) (600 . 700))
((6 . 6) (50 . 60) (201 . 220))))
((s '((2 . 8) (50 . 60) (101 . 104) (105 . 220))
'((1 . 5) (7 . 9) (100 . 200) (500 . 600) (600 . 700)))
'(((2 . 5) (7 . 8) (101 . 104) (105 . 200))
((6 . 6) (50 . 60) (201 . 220))
((1 . 1) (9 . 9) (100 . 100) (500 . 600) (600 . 700))))
)
;; complement-acc : char-set nat nat -> char-set
;; As complement. The current-nat accumulator keeps track of where the
;; next range in the complement should start.
(define (complement-acc s current-nat max)
(cond
((null? s) (if (<= current-nat max)
(list (cons current-nat max))
null))
(else
(let ((s-car (car s)))
(cond
((< current-nat (car s-car))
(cons (cons current-nat (sub1 (car s-car)))
(complement-acc (cdr s) (add1 (cdr s-car)) max)))
((<= current-nat (cdr s-car))
(complement-acc (cdr s) (add1 (cdr s-car)) max))
(else
(complement-acc (cdr s) current-nat max)))))))
;; complement : char-set nat -> char-set
;; A set of all the nats not in s, up to and including max.
;; (cdr (last-pair s)) <= max
(define (complement s max)
(complement-acc s 0 max))
(test-block ()
((complement null 255) '((0 . 255)))
((complement '((1 . 5) (7 . 7) (10 . 200)) 255)
'((0 . 0) (6 . 6) (8 . 9) (201 . 255)))
((complement '((0 . 254)) 255) '((255 . 255)))
((complement '((1 . 255)) 255) '((0 . 0)))
((complement '((0 . 255)) 255) null))
;; char-in-set? : nat char-set -> bool
(define (char-in-set? c cs)
(and
(pair? cs)
(or (<= (caar cs) c (cdar cs))
(char-in-set? c (cdr cs)))))
(test-block ()
((char-in-set? 1 null) #f)
((char-in-set? 19 '((1 . 18) (20 . 21))) #f)
((char-in-set? 19 '((1 . 2) (19 . 19) (20 . 21))) #t))
(define get-a-char car)
(define (char-set->string cs)
(cond
((null? cs) "")
(else
(string-append (format "~a(~a)-~a(~a) "
(caar cs) (integer->char (caar cs))
(cdar cs) (integer->char (cdar cs)))
(char-set->string (cdr cs))))))
(define (char-for-each-acc f start stop cs)
(cond
((and (> start stop) (null? cs)) (void))
((> start stop)
(char-for-each-acc f (caar cs) (cdar cs) (cdr cs)))
(else
(f start)
(char-for-each-acc f (add1 start) stop cs))))
(define (char-set-for-each f cs)
(cond
((null? cs) (void))
(else
(char-for-each-acc f (caar cs) (cdar cs) (cdr cs)))))
)

Loading…
Cancel
Save