*** empty log message ***

original commit: 5da545dd89283fd1586b58262df29697900d37ff
tokens
Scott Owens 20 years ago
parent e6af00242c
commit 1bab38b019

@ -16,7 +16,7 @@
;; An re is either ;; An re is either
;; - (make-epsilonR bool nat) ;; - (make-epsilonR bool nat)
;; - (make-zeroR bool nat) ;; - (make-zeroR bool nat)
;; - (make-char-setR bool nat (list-of char)) The list must be sorted ;; - (make-char-setR bool nat char-set)
;; - (make-concatR bool nat re re) ;; - (make-concatR bool nat re re)
;; - (make-repeatR bool nat re) ;; - (make-repeatR bool nat re)
;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs ;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs
@ -64,7 +64,7 @@
;; ->re : s-re cache -> re ;; ->re : s-re cache -> re
(define (->re exp cache) (define (->re exp cache)
(match exp (match exp
((? char?) (build-char-set (list exp) cache)) ((? char?) (build-char-set (make-range (char->integer exp) (char->integer exp)) cache))
((? string?) (->re `(@ ,@(string->list exp)) cache)) ((? string?) (->re `(@ ,@(string->list exp)) cache))
((? re?) exp) ((? re?) exp)
(`(epsilon) (build-epsilon)) (`(epsilon) (build-epsilon))
@ -103,7 +103,7 @@
(`(^ ,crs ...) (`(^ ,crs ...)
(let ((cs (->re `(: ,@crs) cache))) (let ((cs (->re `(: ,@crs) cache)))
(cond (cond
((zeroR? cs) (build-char-set (make-range 0 255) cache)) ((zeroR? cs) (build-char-set (make-range 0 (sub1 (expt 2 32))) cache))
((char-setR? cs) ((char-setR? cs)
(build-char-set (build-char-set
(let loop ((bad-chars (map char->integer (let loop ((bad-chars (map char->integer

@ -106,75 +106,205 @@
null) null)
'(5 1 2 3 4 1 2 3 2 1))) '(5 1 2 3 4 1 2 3 2 1)))
;; make-range : int * int -> char list
;; creates a list of all chars between i and j. i <= j ;; A char-set is (list-of (cons nat nat))
;; Each cons represents a range of characters, and the entire
;; set is the union of the ranges. The ranges must be disjoint and
;; increasing. Further, adjacent ranges must have at least
;; one number between them.
(define (nat? x)
(and (integer? x) (exact? x) (>= x 0)))
;; char-set? : X -> bool
(define (char-set? x)
(let loop ((set x)
(current-num -2))
(or
(null? set)
(and (pair? set)
(pair? (car set))
(nat? (caar set))
(nat? (cdar set))
(< (add1 current-num) (caar set))
(<= (caar set) (cdar set))
(loop (cdr set) (cdar set))))))
(test-block ()
((char-set? '((0 . 4) (7 . 9))) #t)
((char-set? '((-1 . 4))) #f)
((char-set? '((11 . 10))) #f)
((char-set? '((0 . 10) (8 . 12))) #f)
((char-set? '((10 . 20) (1 . 2))) #f)
((char-set? '((1 . 1))) #t)
((char-set? '((1 . 1) (2 . 3))) #f)
((char-set? '((1 . 1) (3 . 3))) #t)
((char-set? null) #t))
;; make-range : int * int -> char-set
;; creates a set of chars between i and j. i <= j
(define (make-range i j) (define (make-range i j)
(letrec ((make-range (list (cons i j)))
(lambda (i j)
(cond
((= i j) (list (integer->char i)))
(else
(cons (integer->char i) (make-range (add1 i) j)))))))
(make-range i j)))
(test-block () (test-block ()
((make-range 97 110) (string->list "abcdefghijklmn")) ((make-range 97 110) '((97 . 110)))
((make-range 111 111) '(#\o))) ((make-range 111 111) '((111 . 111))))
;; sub-range? : (cons int int) (cons int int) -> bool
;; true iff the interval [(car r1), (cdr r1)] is a subset of
;; [(car r2), (cdr r2)]
(define (sub-range? r1 r2)
(and (>= (car r1) (car r2))
(<= (cdr r1) (cdr r2))))
;; overlap? : (cons int int) (cons int int) -> bool
;; true iff the intervals [(car r1), (cdr r1)] and [(car r2), (cdr r2)]
;; have non-empty intersections and (car r1) >= (car r2)
(define (overlap? r1 r2)
(and (>= (car r1) (car r2))
(>= (cdr r1) (cdr r2))
(<= (car r1) (cdr r2))))
;; merge : (list-of char) (list-of char) -> (list-of char)
;; Combines 2 sorted, duplicate-free lists into 1, removing duplicates. ;; merge : char-set char-set -> char-set
(define (merge l1 l2) ;; unions 2 char-sets
(define (merge s1 s2)
(cond (cond
((null? l2) l1) ((null? s2) s1)
((null? l1) l2) ((null? s1) s2)
(else (let ((cl1 (car l1)) (else
(cl2 (car l2))) (let ((r1 (car s1))
(cond (r2 (car s2)))
((> (char->integer cl1) (char->integer cl2)) (cond
(cons cl2 (merge l1 (cdr l2)))) ((sub-range? r1 r2) (merge (cdr s1) s2))
((< (char->integer cl1) (char->integer cl2)) ((sub-range? r2 r1) (merge s1 (cdr s2)))
(cons cl1 (merge (cdr l1) l2))) ((or (overlap? r1 r2) (= (car r1) (add1 (cdr r2))))
(else (merge (cdr l1) l2))))))) (merge (cons (cons (car r2) (cdr r1)) (cdr s1)) (cdr s2)))
((or (overlap? r2 r1) (= (car r2) (add1 (cdr r1))))
(merge (cdr s1) (cons (cons (car r1) (cdr r2)) (cdr s2))))
((< (car r1) (car r2))
(cons r1 (merge (cdr s1) s2)))
(else
(cons r2 (merge s1 (cdr s2)))))))))
(test-block () (test-block ()
((merge (string->list "abcd")
(string->list "abde"))
(string->list "abcde"))
((merge null null) null) ((merge null null) null)
((merge null '(#\1)) '(#\1)) ((merge null '((1 . 10))) '((1 . 10)))
((merge '(#\1) null) '(#\1))) ((merge '((1 . 10)) null) '((1 . 10)))
;; r1 in r2
((merge '((5 . 10)) '((5 . 10))) '((5 . 10)))
((merge '((6 . 9)) '((5 . 10))) '((5 . 10)))
((merge '((7 . 7)) '((5 . 10))) '((5 . 10)))
;; r2 in r1
((merge '((5 . 10)) '((5 . 10))) '((5 . 10)))
((merge '((5 . 10)) '((6 . 9))) '((5 . 10)))
((merge '((5 . 10)) '((7 . 7))) '((5 . 10)))
;; r2 and r1 are disjoint
((merge '((5 . 10)) '((12 . 14))) '((5 . 10) (12 . 14)))
((merge '((12 . 14)) '((5 . 10))) '((5 . 10) (12 . 14)))
;; r1 and r1 are adjacent
((merge '((5 . 10)) '((11 . 13))) '((5 . 13)))
((merge '((11 . 13)) '((5 . 10))) '((5 . 13)))
;; r1 and r2 overlap
((merge '((5 . 10)) '((7 . 14))) '((5 . 14)))
((merge '((7 . 14)) '((5 . 10))) '((5 . 14)))
((merge '((5 . 10)) '((10 . 14))) '((5 . 14)))
((merge '((7 . 10)) '((5 . 7))) '((5 . 10)))
;; with lists
((merge '((1 . 1) (3 . 3) (5 . 10) (100 . 200))
'((2 . 2) (10 . 12) (300 . 300)))
'((1 . 3) (5 . 12) (100 . 200) (300 . 300)))
((merge '((1 . 1) (3 . 3) (5 . 5) (8 . 8) (10 . 10) (12 . 12))
'((2 . 2) (4 . 4) (6 . 7) (9 . 9) (11 . 11)))
'((1 . 12)))
((merge '((2 . 2) (4 . 4) (6 . 7) (9 . 9) (11 . 11))
'((1 . 1) (3 . 3) (5 . 5) (8 . 8) (10 . 10) (12 . 12)))
'((1 . 12))))
(define (split-acc l1 l2 i l1-i l2-i)
(cond ;; split-sub-range : (cons int int) (cons int int) -> char-set
((null? l1) (values (reverse! i) (reverse! l1-i) (reverse! (append! (reverse l2) l2-i)))) ;; (subrange? r1 r2) must hold.
((null? l2) (values (reverse! i) (reverse! (append! (reverse l1) l1-i)) (reverse! l2-i))) ;; returns [(car r2), (cdr r2)] - ([(car r1), (cdr r1)] intersect [(car r2), (cdr r2)]).
(else (let ((cl1 (car l1)) (define (split-sub-range r1 r2)
(cl2 (car l2))) (let ((r1-car (car r1))
(cond (r1-cdr (cdr r1))
((> (char->integer cl1) (char->integer cl2)) (r2-car (car r2))
(split-acc l1 (cdr l2) i l1-i (cons cl2 l2-i))) (r2-cdr (cdr r2)))
((< (char->integer cl1) (char->integer cl2)) (cond
(split-acc (cdr l1) l2 i (cons cl1 l1-i) l2-i)) ((and (= r1-car r2-car) (= r1-cdr r2-cdr)) null)
(else ((= r1-car r2-car) (list (cons (add1 r1-cdr) r2-cdr)))
(split-acc (cdr l1) (cdr l2) (cons cl1 i) l1-i l2-i))))))) ((= r1-cdr r2-cdr) (list (cons r2-car (sub1 r1-car))))
(else
;; split : (list-of char) (list-of char) -> (list-of char) (list-of char) (list-of char) (list (cons r2-car (sub1 r1-car)) (cons (add1 r1-cdr) r2-cdr))))))
;; Takes sorted, duplicate-free l1 and l2 and returns (l1 intersect l2),
;; l1 - (l1 intersect l2) and l2 - (l1 intersect l2)
(define (split l1 l2)
(split-acc l1 l2 null null null))
(test-block () (test-block ()
((let-values (((a b c) ((split-sub-range '(1 . 10) '(1 . 10)) '())
(split (string->list "abcdghjkl") ((split-sub-range '(1 . 5) '(1 . 10)) '((6 . 10)))
(string->list "abdeijmn")))) ((split-sub-range '(2 . 10) '(1 . 10)) '((1 . 1)))
(list a b c)) ((split-sub-range '(2 . 5) '(1 . 10)) '((1 . 1) (6 . 10))))
(list (string->list "abdj") (string->list "cghkl") (string->list "eimn")))
((let-values (((a b c) (split null null)))
(list a b c)) (list null null null)) (define (split-acc s1 s2 i s1-i s2-i)
((let-values (((a b c) (split '(#\1) null))) (cond
(list a b c)) (list null '(#\1) null)) ((null? s1) (values (reverse! i) (reverse! s1-i) (reverse! (append! (reverse s2) s2-i))))
((let-values (((a b c) (split null '(#\1)))) ((null? s2) (values (reverse! i) (reverse! (append! (reverse s1) s1-i)) (reverse! s2-i)))
(list a b c)) (list null null '(#\1)))) (else
(let ((r1 (car s1))
(r2 (car s2)))
(cond
((sub-range? r1 r2)
(split-acc (cdr s1) (append (split-sub-range r1 r2) (cdr s2))
(cons r1 i) s1-i s2-i))
((sub-range? r2 r1)
(split-acc (append (split-sub-range r2 r1) (cdr s1)) (cdr s2)
(cons r2 i) s1-i s2-i))
((overlap? r1 r2)
(split-acc (cons (cons (add1 (cdr r2)) (cdr r1)) (cdr s1))
(cdr s2)
(cons (cons (car r1) (cdr r2)) i)
s1-i
(cons (cons (car r2) (sub1 (car r1))) s2-i)))
((overlap? r2 r1)
(split-acc (cdr s1)
(cons (cons (add1 (cdr r1)) (cdr r2)) (cdr s2))
(cons (cons (car r2) (cdr r1)) i)
(cons (cons (car r1) (sub1 (car r2)))s1-i )
s2-i))
((< (car r1) (car r2))
(split-acc (cdr s1) s2 i (cons r1 s1-i) s2-i))
(else
(split-acc s1 (cdr s2) i s1-i (cons r2 s2-i))))))))
;; split : char-set -> char-set char-set char-set
;; returns (l1 intersect l2), l1 - (l1 intersect l2) and l2 - (l1 intersect l2)
(define (split s1 s2)
(split-acc s1 s2 null null null))
(test-block ((s (lambda (s1 s2)
(call-with-values (lambda () (split s1 s2)) list))))
((s null null) '(() () ()))
((s '((1 . 10)) null) '(() ((1 . 10)) ()))
((s null '((1 . 10))) '(() () ((1 . 10))))
((s '((1 . 10)) null) '(() ((1 . 10)) ()))
((s '((1 . 10)) '((1 . 10))) '(((1 . 10)) () ()))
((s '((1 . 10)) '((2 . 5))) '(((2 . 5)) ((1 . 1) (6 . 10)) ()))
((s '((2 . 5)) '((1 . 10))) '(((2 . 5)) () ((1 . 1) (6 . 10))))
((s '((2 . 5)) '((5 . 10))) '(((5 . 5)) ((2 . 4)) ((6 . 10))))
((s '((5 . 10)) '((2 . 5))) '(((5 . 5)) ((6 . 10)) ((2 . 4))))
((s '((2 . 10)) '((5 . 14))) '(((5 . 10)) ((2 . 4)) ((11 . 14))))
((s '((5 . 14)) '((2 . 10))) '(((5 . 10)) ((11 . 14)) ((2 . 4))))
((s '((10 . 20)) '((30 . 50))) '(() ((10 . 20)) ((30 . 50))))
((s '((100 . 200)) '((30 . 50))) '(() ((100 . 200)) ((30 . 50))))
((s '((1 . 5) (7 . 9) (100 . 200) (500 . 600) (600 . 700))
'((2 . 8) (50 . 60) (101 . 104) (105 . 220)))
'(((2 . 5) (7 . 8) (101 . 104) (105 . 200))
((1 . 1) (9 . 9) (100 . 100) (500 . 600) (600 . 700))
((6 . 6) (50 . 60) (201 . 220))))
((s '((2 . 8) (50 . 60) (101 . 104) (105 . 220))
'((1 . 5) (7 . 9) (100 . 200) (500 . 600) (600 . 700)))
'(((2 . 5) (7 . 8) (101 . 104) (105 . 200))
((6 . 6) (50 . 60) (201 . 220))
((1 . 1) (9 . 9) (100 . 100) (500 . 600) (600 . 700))))
)
) )
Loading…
Cancel
Save