diff --git a/collects/parser-tools/private-lex/deriv.ss b/collects/parser-tools/private-lex/deriv.ss index 7777290..566f919 100644 --- a/collects/parser-tools/private-lex/deriv.ss +++ b/collects/parser-tools/private-lex/deriv.ss @@ -1,6 +1,7 @@ (module deriv mzscheme (require (lib "list.ss") + (prefix is: (lib "integer-set.ss")) "re.ss" "util.ss") @@ -57,58 +58,6 @@ ((get-char-groups (->re `(& (* ,r1) (@ (* ,r2) "3") "4") c) #f) (list r1 r2 (->re "3" c) (->re "4" c))) ) - - - ;; partition : (list-of char-set) -> (list-of char-set) - ;; The coarsest refinment r of sets such that the char-sets in r - ;; are pairwise disjoint. - (define (partition sets) - (cond - ((null? sets) null) - (else - (partition1 (car sets) (partition (cdr sets)))))) - - ;; partition1 : char-set (list-of char-set) -> (list-of char-set) - ;; All the char-sets in sets must be pairwise disjoint. Splits set - ;; against each element in sets. - (define (partition1 set sets) - (cond - ((null? set) sets) - ((null? sets) (list set)) - (else - (let ((set2 (car sets))) - (let-values (((i s1 s2) (split set set2))) - (let ((rest (partition1 s1 (cdr sets)))) - (cond - ((null? i) - (cons s2 rest)) - ((null? s2) - (cons i rest)) - (else - (cons i (cons s2 rest)))))))))) - - (test-block ((sl (lambda (str) - (foldr (lambda (c cs) - (merge (make-range (char->integer c) (char->integer c)) - cs)) - null - (string->list str))))) - ((partition null) null) - ((partition (list (sl "1234"))) (list (sl "1234"))) - ((partition (list (sl "1234") (sl "0235"))) - (list (sl "23") (sl "05") (sl "14"))) - ((partition (list (sl "12349") (sl "02359") (sl "67") (sl "29"))) - (list (sl "29") (sl "67") (sl "3") (sl "05") (sl "14"))) - ((partition1 (sl "bcdjw") null) (list (sl "bcdjw"))) - ((partition1 null null) null) - ((partition1 null (list (sl "a") (sl "b") (sl "1"))) - (list (sl "a") (sl "b") (sl "1"))) - ((partition1 (sl "bcdjw") - (list (sl "z") - (sl "ab") - (sl "dj"))) - (list (sl "z") (sl "b") (sl "a") (sl "dj") (sl "cw")))) - ;; deriveR : re char cache -> re @@ -116,7 +65,7 @@ (cond ((or (eq? r e) (eq? r z)) z) ((char-setR? r) - (if (char-in-set? c (char-setR-chars r)) e z)) + (if (is:member? c (char-setR-chars r)) e z)) ((concatR? r) (let* ((r1 (concatR-re1 r)) (r2 (concatR-re2 r)) @@ -238,9 +187,9 @@ (cond ((null? st) null) (else - (partition (map char-setR-chars - (apply append (map (lambda (x) (get-char-groups (car x) #f)) - (state-spec (car st))))))))) + (is:partition (map char-setR-chars + (apply append (map (lambda (x) (get-char-groups (car x) #f)) + (state-spec (car st))))))))) (test-block ((c (make-cache)) (c->i char->integer) @@ -248,9 +197,10 @@ (r2 (->re `(- #\2 #\3) c))) ((compute-chars null) null) ((compute-chars (list (make-state null 1))) null) - ((compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2))) - (list (make-range (c->i #\2) (c->i #\3)) (append (make-range (c->i #\1) (c->i #\1)) - (make-range (c->i #\4) (c->i #\4)))))) + ((map is:integer-set-contents (compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2)))) + (list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3))) + (is:integer-set-contents (is:union (is:make-range (c->i #\1)) + (is:make-range (c->i #\4))))))) ;; A dfa is (make-dfa int int @@ -293,7 +243,7 @@ (else (let* ((state (car old-states)) (c (car cs)) - (new-re (derive (state-spec state) (get-a-char (car c)) cache))) + (new-re (derive (state-spec state) (is:get-integer c) cache))) (cond (new-re (let* ((new-state? #f) @@ -322,7 +272,7 @@ (printf "state: ~a~n" (car trans)) (for-each (lambda (rule) (printf " -~a-> ~a~n" - (char-set->string (car rule)) + (is:integer-set-contents (car rule)) (cdr rule))) (cdr trans))) (dfa-transitions x))) diff --git a/collects/parser-tools/private-lex/front.ss b/collects/parser-tools/private-lex/front.ss index 1123fab..97ea07e 100644 --- a/collects/parser-tools/private-lex/front.ss +++ b/collects/parser-tools/private-lex/front.ss @@ -1,5 +1,6 @@ (module front mzscheme - (require "util.ss" + (require (prefix is: (lib "integer-set.ss")) + "util.ss" "stx.ss" "re.ss" "deriv.ss") @@ -13,20 +14,18 @@ (printf "~a: " l) (time (begin e ...)))))) + ;; A table is either + ;; - (vector-of (union #f nat)) + ;; - (vector-of (vector-of (cons (cons nat nat) nat))) - ;; dfa->table : dfa -> (same as build-lexer) - (define (dfa->table dfa) + ;; dfa->1d-table : dfa -> (same as build-lexer) +; (define (dfa->1d-table dfa) +; (let ( + + + ;; dfa->2d-table : dfa -> (same as build-lexer) + (define (dfa->2d-table dfa) (let ( - ;; no-look : (vector-of bool) - ;; For each state whether the lexer can ignore the next input. - ;; It can do this only if there are no transitions out of the - ;; current state. - (no-look (make-vector (dfa-num-states dfa) #t)) - - ;; actions : (vector-of (union #f syntax-object)) - ;; The action for each final state, #f if the state isn't final - (actions (make-vector (dfa-num-states dfa) #f)) - ;; char-table : (vector-of (union #f nat)) ;; The lexer table, one entry per state per char. ;; Each entry specifies a state to transition to. @@ -39,48 +38,57 @@ (let ((from-state (car trans))) (for-each (lambda (chars/to) (let ((to-state (cdr chars/to))) - (char-set-for-each (lambda (char) - (vector-set! char-table - (bitwise-ior - char - (arithmetic-shift from-state 8)) - to-state)) - (car chars/to)))) + (is:foldr (lambda (char _) + (vector-set! char-table + (bitwise-ior + char + (arithmetic-shift from-state 8)) + to-state)) + (void) + (car chars/to)))) (cdr trans)))) (dfa-transitions dfa)) + char-table)) - (for-each (lambda (trans) - (vector-set! no-look (car trans) #f)) - (dfa-transitions dfa)) + ;; dfa->actions : dfa -> (vector-of (union #f syntax-object)) + ;; The action for each final state, #f if the state isn't final + (define (dfa->actions dfa) + (let ((actions (make-vector (dfa-num-states dfa) #f))) (for-each (lambda (state/action) (vector-set! actions (car state/action) (cdr state/action))) (dfa-final-states/actions dfa)) + actions)) - (values char-table (dfa-start-state dfa) actions no-look))) - - (test-block () - ((call-with-values (lambda () - (dfa->table (make-dfa 1 1 (list) (list)))) - list) - (list (make-vector 256 #f) 1 (vector #f) (make-vector 1 #t))) - ((call-with-values (lambda () - (dfa->table (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) - (list (cons 1 (list (cons (make-range 49 50) 1) - (cons (make-range 51 51) 2))) - (cons 2 (list (cons (make-range 49 49) 3))))))) - list) - (list (let ((v (make-vector 1024 #f))) - (vector-set! v 305 1) - (vector-set! v 306 1) - (vector-set! v 307 2) - (vector-set! v 561 3) - v) - 1 - (vector #f #f 2 3) - (vector #t #f #f #t)))) - - ;; build-lexer : syntax-object list -> (values (vector-of (union #f nat)) nat (vector-of (union #f syntax-object)) (vector-of bool)) + ;; dfa->no-look : dfa -> (vector-of bool) + ;; For each state whether the lexer can ignore the next input. + ;; It can do this only if there are no transitions out of the + ;; current state. + (define (dfa->no-look dfa) + (let ((no-look (make-vector (dfa-num-states dfa) #t))) + (for-each (lambda (trans) + (vector-set! no-look (car trans) #f)) + (dfa-transitions dfa)) + no-look)) + + (test-block ((d1 (make-dfa 1 1 (list) (list))) + (d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) + (list (cons 1 (list (cons (is:make-range 49 50) 1) + (cons (is:make-range 51) 2))) + (cons 2 (list (cons (is:make-range 49) 3))))))) + ((dfa->2d-table d1) (make-vector 256 #f)) + ((dfa->2d-table d2) (let ((v (make-vector 1024 #f))) + (vector-set! v 305 1) + (vector-set! v 306 1) + (vector-set! v 307 2) + (vector-set! v 561 3) + v)) + ((dfa->actions d1) (vector #f)) + ((dfa->actions d2) (vector #f #f 2 3)) + ((dfa->no-look d1) (vector #t)) + ((dfa->no-look d2) (vector #t #f #f #t))) + + ;; build-lexer : syntax-object list -> (values table nat (vector-of (union #f syntax-object)) (vector-of bool)) ;; each syntax object has the form (re action) (define (build-lexer sos) (let* ((s-re-acts (map (lambda (so) @@ -98,5 +106,5 @@ (dfa (build-dfa re-acts cache))) ;(print-dfa dfa) ;(printf "states: ~a~n" (dfa-num-states dfa)) - (dfa->table dfa))) + (values (dfa->2d-table dfa) (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa)))) ) diff --git a/collects/parser-tools/private-lex/re.ss b/collects/parser-tools/private-lex/re.ss index 2c04359..cdae42d 100644 --- a/collects/parser-tools/private-lex/re.ss +++ b/collects/parser-tools/private-lex/re.ss @@ -1,6 +1,7 @@ (module re mzscheme (require (lib "match.ss") (lib "list.ss") + (prefix is: (lib "integer-set.ss")) "util.ss") (provide ->re build-epsilon build-zero build-char-set build-concat @@ -64,7 +65,7 @@ ;; ->re : s-re cache -> re (define (->re exp cache) (match exp - ((? char?) (build-char-set (make-range (char->integer exp) (char->integer exp)) cache)) + ((? char?) (build-char-set (is:make-range (char->integer exp)) cache)) ((? string?) (->re `(@ ,@(string->list exp)) cache)) ((? re?) exp) (`(epsilon) (build-epsilon)) @@ -79,12 +80,12 @@ (build-or (list e c) cache)))) (`(: ,rs ...) (build-or (flatten-res (map (lambda (r) (->re r cache)) rs) - orR? orR-res merge cache) + orR? orR-res is:union cache) cache)) (`(& ,rs ...) (build-and (flatten-res (map (lambda (r) (->re r cache)) rs) andR? andR-res (lambda (a b) - (let-values (((i _ __) (split a b))) i)) + (let-values (((i _ __) (is:split a b))) i)) cache) cache)) (`(~ ,r) @@ -98,14 +99,14 @@ (let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1))) (i2 (char->integer (if (string? c2) (string-ref c2 0) c2)))) (if (<= i1 i2) - (build-char-set (make-range i1 i2) cache) + (build-char-set (is:make-range i1 i2) cache) z))) (`(^ ,crs ...) (let ((cs (->re `(: ,@crs) cache))) (cond - ((zeroR? cs) (build-char-set (make-range 0 255) cache)) + ((zeroR? cs) (build-char-set (is:make-range 0 255) cache)) ((char-setR? cs) - (build-char-set (complement (char-setR-chars cs) 255) cache)) + (build-char-set (is:complement (char-setR-chars cs) 0 255) cache)) (else z)))))) @@ -140,21 +141,14 @@ (define (build-zero) z) ;; build-char-set : char-set cache -> re - #;(define (build-char-set cs cache) + (define (build-char-set cs cache) + (let ((l (is:integer-set-contents cs))) (cond - ((null? cs) z) + ((null? l) z) (else - (make-char-setR #f (get-index) cs)))) - - - ;; build-char-set : char-set cache -> re - (define (build-char-set cs cache) - (cond - ((null? cs) z) - (else - (cache cs - (lambda () - (make-char-setR #f (get-index) cs)))))) + (cache l + (lambda () + (make-char-setR #f (get-index) cs))))))) @@ -220,9 +214,10 @@ ;; Tests for the build-functions (test-block ((c (make-cache)) - (r1 (build-char-set (make-range (char->integer #\1) (char->integer #\1)) c)) - (r2 (build-char-set (make-range (char->integer #\2) (char->integer #\2)) c)) - (r3 (build-char-set (make-range (char->integer #\3) (char->integer #\3)) c)) + (isc is:integer-set-contents) + (r1 (build-char-set (is:make-range (char->integer #\1)) c)) + (r2 (build-char-set (is:make-range (char->integer #\2)) c)) + (r3 (build-char-set (is:make-range (char->integer #\3)) c)) (rc (build-concat r1 r2 c)) (rc2 (build-concat r2 r1 c)) (rr (build-repeat rc c)) @@ -239,10 +234,10 @@ (rn (build-neg z c)) (rn2 (build-neg r1 c))) - ((char-setR-chars r1) (make-range (char->integer #\1) (char->integer #\1))) - ((char-setR-chars r2) (make-range (char->integer #\2) (char->integer #\2))) - ((char-setR-chars r3) (make-range (char->integer #\3) (char->integer #\3))) - ((build-char-set null c) z) + ((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1)))) + ((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2)))) + ((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3)))) + ((build-char-set (is:make-range) c) z) ((build-concat r1 e c) r1) ((build-concat e r1 c) r1) ((build-concat r1 z c) z) @@ -286,33 +281,35 @@ ((re-nullable? (build-neg rr c)) #f)) (test-block ((c (make-cache)) + (isc is:integer-set-contents) (r1 (->re #\1 c)) (r2 (->re #\2 c)) (r3-5 (->re '(- #\3 #\5) c)) (r4 (build-or `(,r1 ,r2) c)) (r5 (->re `(: ,r3-5 #\7) c)) (r6 (->re #\6 c))) - ((flatten-res null orR? orR-res merge c) null) - ((char-setR-chars (car (flatten-res `(,r1) orR? orR-res merge c))) - (make-range (char->integer #\1) (char->integer #\1))) - ((char-setR-chars (car (flatten-res `(,r4) orR? orR-res merge c))) - (make-range (char->integer #\1) (char->integer #\2))) - ((char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) orR? orR-res merge c))) - (make-range (char->integer #\1) (char->integer #\7))) + ((flatten-res null orR? orR-res is:union c) null) + ((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c)))) + (isc (is:make-range (char->integer #\1)))) + ((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c)))) + (isc (is:make-range (char->integer #\1) (char->integer #\2)))) + ((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) orR? orR-res is:union c)))) + (isc (is:make-range (char->integer #\1) (char->integer #\7)))) ((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y) (let-values (((i _ __) - (split x y))) + (is:split x y))) i)) c) (list z))) ;; ->re (test-block ((c (make-cache)) + (isc is:integer-set-contents) (r (->re #\a c)) (rr (->re `(@ ,r ,r) c)) (rrr (->re `(@ ,r ,rr) c)) (rrr* (->re `(* ,rrr) c))) - ((char-setR-chars r) (make-range (char->integer #\a) (char->integer #\a))) + ((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a)))) ((->re "" c) e) ((->re "asdf" c) (->re `(@ #\a #\s #\d #\f) c)) ((->re r c) r) @@ -323,18 +320,18 @@ ((->re `(? ,rrr*) c) rrr*) ((->re `(: (: (- #\a #\c) (^ (- #\000 #\110) (- #\112 #\377))) (: (* #\2))) c) - (build-or (list (build-char-set (append (make-range 73 73) - (make-range 97 99)) + (build-or (list (build-char-set (is:union (is:make-range 73) + (is:make-range 97 99)) c) - (build-repeat (build-char-set (make-range 50 50) c) c)) + (build-repeat (build-char-set (is:make-range 50) c) c)) c)) ((->re `(: ,rr ,rrr) c) (build-or (list rr rrr) c)) ((->re `(: ,r) c) r) ((->re `(:) c) z) ((->re `(& (& #\111 (^ (- #\000 #\110) (- #\112 #\377))) (& (* #\2))) c) - (build-and (list (build-char-set (make-range 73 73) c) - (build-repeat (build-char-set (make-range 50 50) c) c)) + (build-and (list (build-char-set (is:make-range 73) c) + (build-repeat (build-char-set (is:make-range 50) c) c)) c)) ((->re `(& (& #\000 (^ (- #\000 #\110) (- #\112 #\377))) (& (* #\2))) c) @@ -348,14 +345,14 @@ (rr (build-concat r r c)) ((->re `(@ ,r ,rr ,rrr) c) (build-concat r (build-concat rr rrr c) c)) - ((char-setR-chars (->re `(- #\1 #\1) c)) (make-range 49 49)) - ((char-setR-chars (->re `(- #\1 #\9) c)) (make-range 49 57)) - ((char-setR-chars (->re `(- "1" "1") c)) (make-range 49 49)) - ((char-setR-chars (->re `(- "1" "9") c)) (make-range 49 57)) + ((isc (char-setR-chars (->re `(- #\1 #\1) c))) (isc (is:make-range 49))) + ((isc (char-setR-chars (->re `(- #\1 #\9) c))) (isc (is:make-range 49 57))) + ((isc (char-setR-chars (->re `(- "1" "1") c))) (isc (is:make-range 49))) + ((isc (char-setR-chars (->re `(- "1" "9") c))) (isc (is:make-range 49 57))) ((->re `(- "9" "1") c) z) - ((char-setR-chars (->re `(^) c)) - (char-setR-chars (->re `(- #\000 #\377) c))) - ((char-setR-chars (->re `(^ #\001 (- #\002 #\377)) c)) (make-range 0 0)) + ((isc (char-setR-chars (->re `(^) c))) + (isc (char-setR-chars (->re `(- #\000 #\377) c)))) + ((isc (char-setR-chars (->re `(^ #\001 (- #\002 #\377)) c))) (isc (is:make-range 0))) ) ) \ No newline at end of file diff --git a/collects/parser-tools/private-lex/util.ss b/collects/parser-tools/private-lex/util.ss index c27400a..721ce0e 100644 --- a/collects/parser-tools/private-lex/util.ss +++ b/collects/parser-tools/private-lex/util.ss @@ -1,7 +1,7 @@ (module util mzscheme (require (lib "list.ss")) - (provide (all-defined-except split-acc complement-acc)) + (provide (all-defined)) (define-struct lex-abbrev (abbrev)) @@ -106,276 +106,6 @@ null) '(5 1 2 3 4 1 2 3 2 1))) - - ;; A char-set is (list-of (cons nat nat)) - ;; Each cons represents a range of characters, and the entire - ;; set is the union of the ranges. The ranges must be disjoint and - ;; increasing. Further, adjacent ranges must have at least - ;; one number between them. - - - (define (nat? x) - (and (integer? x) (exact? x) (>= x 0))) - - ;; char-set? : X -> bool - (define (char-set? x) - (let loop ((set x) - (current-num -2)) - (or - (null? set) - (and (pair? set) - (pair? (car set)) - (nat? (caar set)) - (nat? (cdar set)) - (< (add1 current-num) (caar set)) - (<= (caar set) (cdar set)) - (loop (cdr set) (cdar set)))))) - (test-block () - ((char-set? '((0 . 4) (7 . 9))) #t) - ((char-set? '((-1 . 4))) #f) - ((char-set? '((11 . 10))) #f) - ((char-set? '((0 . 10) (8 . 12))) #f) - ((char-set? '((10 . 20) (1 . 2))) #f) - ((char-set? '((1 . 1))) #t) - ((char-set? '((1 . 1) (2 . 3))) #f) - ((char-set? '((1 . 1) (3 . 3))) #t) - ((char-set? null) #t)) - - - - ;; make-range : int * int -> char-set - ;; creates a set of chars between i and j. i <= j - (define (make-range i j) - (list (cons i j))) - (test-block () - ((make-range 97 110) '((97 . 110))) - ((make-range 111 111) '((111 . 111)))) - - - ;; sub-range? : (cons int int) (cons int int) -> bool - ;; true iff the interval [(car r1), (cdr r1)] is a subset of - ;; [(car r2), (cdr r2)] - (define (sub-range? r1 r2) - (and (>= (car r1) (car r2)) - (<= (cdr r1) (cdr r2)))) - - ;; overlap? : (cons int int) (cons int int) -> bool - ;; true iff the intervals [(car r1), (cdr r1)] and [(car r2), (cdr r2)] - ;; have non-empty intersections and (car r1) >= (car r2) - (define (overlap? r1 r2) - (and (>= (car r1) (car r2)) - (>= (cdr r1) (cdr r2)) - (<= (car r1) (cdr r2)))) - - - ;; merge : char-set char-set -> char-set - ;; unions 2 char-sets - (define (merge s1 s2) - (cond - ((null? s2) s1) - ((null? s1) s2) - (else - (let ((r1 (car s1)) - (r2 (car s2))) - (cond - ((sub-range? r1 r2) (merge (cdr s1) s2)) - ((sub-range? r2 r1) (merge s1 (cdr s2))) - ((or (overlap? r1 r2) (= (car r1) (add1 (cdr r2)))) - (merge (cons (cons (car r2) (cdr r1)) (cdr s1)) (cdr s2))) - ((or (overlap? r2 r1) (= (car r2) (add1 (cdr r1)))) - (merge (cdr s1) (cons (cons (car r1) (cdr r2)) (cdr s2)))) - ((< (car r1) (car r2)) - (cons r1 (merge (cdr s1) s2))) - (else - (cons r2 (merge s1 (cdr s2))))))))) - (test-block () - ((merge null null) null) - ((merge null '((1 . 10))) '((1 . 10))) - ((merge '((1 . 10)) null) '((1 . 10))) - ;; r1 in r2 - ((merge '((5 . 10)) '((5 . 10))) '((5 . 10))) - ((merge '((6 . 9)) '((5 . 10))) '((5 . 10))) - ((merge '((7 . 7)) '((5 . 10))) '((5 . 10))) - ;; r2 in r1 - ((merge '((5 . 10)) '((5 . 10))) '((5 . 10))) - ((merge '((5 . 10)) '((6 . 9))) '((5 . 10))) - ((merge '((5 . 10)) '((7 . 7))) '((5 . 10))) - ;; r2 and r1 are disjoint - ((merge '((5 . 10)) '((12 . 14))) '((5 . 10) (12 . 14))) - ((merge '((12 . 14)) '((5 . 10))) '((5 . 10) (12 . 14))) - ;; r1 and r1 are adjacent - ((merge '((5 . 10)) '((11 . 13))) '((5 . 13))) - ((merge '((11 . 13)) '((5 . 10))) '((5 . 13))) - ;; r1 and r2 overlap - ((merge '((5 . 10)) '((7 . 14))) '((5 . 14))) - ((merge '((7 . 14)) '((5 . 10))) '((5 . 14))) - ((merge '((5 . 10)) '((10 . 14))) '((5 . 14))) - ((merge '((7 . 10)) '((5 . 7))) '((5 . 10))) - ;; with lists - ((merge '((1 . 1) (3 . 3) (5 . 10) (100 . 200)) - '((2 . 2) (10 . 12) (300 . 300))) - '((1 . 3) (5 . 12) (100 . 200) (300 . 300))) - ((merge '((1 . 1) (3 . 3) (5 . 5) (8 . 8) (10 . 10) (12 . 12)) - '((2 . 2) (4 . 4) (6 . 7) (9 . 9) (11 . 11))) - '((1 . 12))) - ((merge '((2 . 2) (4 . 4) (6 . 7) (9 . 9) (11 . 11)) - '((1 . 1) (3 . 3) (5 . 5) (8 . 8) (10 . 10) (12 . 12))) - '((1 . 12)))) - - - ;; split-sub-range : (cons int int) (cons int int) -> char-set - ;; (subrange? r1 r2) must hold. - ;; returns [(car r2), (cdr r2)] - ([(car r1), (cdr r1)] intersect [(car r2), (cdr r2)]). - (define (split-sub-range r1 r2) - (let ((r1-car (car r1)) - (r1-cdr (cdr r1)) - (r2-car (car r2)) - (r2-cdr (cdr r2))) - (cond - ((and (= r1-car r2-car) (= r1-cdr r2-cdr)) null) - ((= r1-car r2-car) (list (cons (add1 r1-cdr) r2-cdr))) - ((= r1-cdr r2-cdr) (list (cons r2-car (sub1 r1-car)))) - (else - (list (cons r2-car (sub1 r1-car)) (cons (add1 r1-cdr) r2-cdr)))))) - (test-block () - ((split-sub-range '(1 . 10) '(1 . 10)) '()) - ((split-sub-range '(1 . 5) '(1 . 10)) '((6 . 10))) - ((split-sub-range '(2 . 10) '(1 . 10)) '((1 . 1))) - ((split-sub-range '(2 . 5) '(1 . 10)) '((1 . 1) (6 . 10)))) - - - (define (split-acc s1 s2 i s1-i s2-i) - (cond - ((null? s1) (values (reverse! i) (reverse! s1-i) (reverse! (append! (reverse s2) s2-i)))) - ((null? s2) (values (reverse! i) (reverse! (append! (reverse s1) s1-i)) (reverse! s2-i))) - (else - (let ((r1 (car s1)) - (r2 (car s2))) - (cond - ((sub-range? r1 r2) - (split-acc (cdr s1) (append (split-sub-range r1 r2) (cdr s2)) - (cons r1 i) s1-i s2-i)) - ((sub-range? r2 r1) - (split-acc (append (split-sub-range r2 r1) (cdr s1)) (cdr s2) - (cons r2 i) s1-i s2-i)) - ((overlap? r1 r2) - (split-acc (cons (cons (add1 (cdr r2)) (cdr r1)) (cdr s1)) - (cdr s2) - (cons (cons (car r1) (cdr r2)) i) - s1-i - (cons (cons (car r2) (sub1 (car r1))) s2-i))) - ((overlap? r2 r1) - (split-acc (cdr s1) - (cons (cons (add1 (cdr r1)) (cdr r2)) (cdr s2)) - (cons (cons (car r2) (cdr r1)) i) - (cons (cons (car r1) (sub1 (car r2)))s1-i ) - s2-i)) - ((< (car r1) (car r2)) - (split-acc (cdr s1) s2 i (cons r1 s1-i) s2-i)) - (else - (split-acc s1 (cdr s2) i s1-i (cons r2 s2-i)))))))) - - ;; split : char-set -> char-set char-set char-set - ;; returns (l1 intersect l2), l1 - (l1 intersect l2) and l2 - (l1 intersect l2) - (define (split s1 s2) - (split-acc s1 s2 null null null)) - - (test-block ((s (lambda (s1 s2) - (call-with-values (lambda () (split s1 s2)) list)))) - ((s null null) '(() () ())) - ((s '((1 . 10)) null) '(() ((1 . 10)) ())) - ((s null '((1 . 10))) '(() () ((1 . 10)))) - ((s '((1 . 10)) null) '(() ((1 . 10)) ())) - ((s '((1 . 10)) '((1 . 10))) '(((1 . 10)) () ())) - ((s '((1 . 10)) '((2 . 5))) '(((2 . 5)) ((1 . 1) (6 . 10)) ())) - ((s '((2 . 5)) '((1 . 10))) '(((2 . 5)) () ((1 . 1) (6 . 10)))) - ((s '((2 . 5)) '((5 . 10))) '(((5 . 5)) ((2 . 4)) ((6 . 10)))) - ((s '((5 . 10)) '((2 . 5))) '(((5 . 5)) ((6 . 10)) ((2 . 4)))) - ((s '((2 . 10)) '((5 . 14))) '(((5 . 10)) ((2 . 4)) ((11 . 14)))) - ((s '((5 . 14)) '((2 . 10))) '(((5 . 10)) ((11 . 14)) ((2 . 4)))) - ((s '((10 . 20)) '((30 . 50))) '(() ((10 . 20)) ((30 . 50)))) - ((s '((100 . 200)) '((30 . 50))) '(() ((100 . 200)) ((30 . 50)))) - ((s '((1 . 5) (7 . 9) (100 . 200) (500 . 600) (600 . 700)) - '((2 . 8) (50 . 60) (101 . 104) (105 . 220))) - '(((2 . 5) (7 . 8) (101 . 104) (105 . 200)) - ((1 . 1) (9 . 9) (100 . 100) (500 . 600) (600 . 700)) - ((6 . 6) (50 . 60) (201 . 220)))) - ((s '((2 . 8) (50 . 60) (101 . 104) (105 . 220)) - '((1 . 5) (7 . 9) (100 . 200) (500 . 600) (600 . 700))) - '(((2 . 5) (7 . 8) (101 . 104) (105 . 200)) - ((6 . 6) (50 . 60) (201 . 220)) - ((1 . 1) (9 . 9) (100 . 100) (500 . 600) (600 . 700)))) - ) - - ;; complement-acc : char-set nat nat -> char-set - ;; As complement. The current-nat accumulator keeps track of where the - ;; next range in the complement should start. - (define (complement-acc s current-nat max) - (cond - ((null? s) (if (<= current-nat max) - (list (cons current-nat max)) - null)) - (else - (let ((s-car (car s))) - (cond - ((< current-nat (car s-car)) - (cons (cons current-nat (sub1 (car s-car))) - (complement-acc (cdr s) (add1 (cdr s-car)) max))) - ((<= current-nat (cdr s-car)) - (complement-acc (cdr s) (add1 (cdr s-car)) max)) - (else - (complement-acc (cdr s) current-nat max))))))) - - - ;; complement : char-set nat -> char-set - ;; A set of all the nats not in s, up to and including max. - ;; (cdr (last-pair s)) <= max - (define (complement s max) - (complement-acc s 0 max)) - (test-block () - ((complement null 255) '((0 . 255))) - ((complement '((1 . 5) (7 . 7) (10 . 200)) 255) - '((0 . 0) (6 . 6) (8 . 9) (201 . 255))) - ((complement '((0 . 254)) 255) '((255 . 255))) - ((complement '((1 . 255)) 255) '((0 . 0))) - ((complement '((0 . 255)) 255) null)) - - ;; char-in-set? : nat char-set -> bool - (define (char-in-set? c cs) - (and - (pair? cs) - (or (<= (caar cs) c (cdar cs)) - (char-in-set? c (cdr cs))))) - (test-block () - ((char-in-set? 1 null) #f) - ((char-in-set? 19 '((1 . 18) (20 . 21))) #f) - ((char-in-set? 19 '((1 . 2) (19 . 19) (20 . 21))) #t)) - - (define get-a-char car) - - (define (char-set->string cs) - (cond - ((null? cs) "") - (else - (string-append (format "~a(~a)-~a(~a) " - (caar cs) (integer->char (caar cs)) - (cdar cs) (integer->char (cdar cs))) - (char-set->string (cdr cs)))))) - - (define (char-for-each-acc f start stop cs) - (cond - ((and (> start stop) (null? cs)) (void)) - ((> start stop) - (char-for-each-acc f (caar cs) (cdar cs) (cdr cs))) - (else - (f start) - (char-for-each-acc f (add1 start) stop cs)))) - - (define (char-set-for-each f cs) - (cond - ((null? cs) (void)) - (else - (char-for-each-acc f (caar cs) (cdar cs) (cdr cs))))) )