From e6af00242c53a74255d23a98acc46cf0ebd0a612 Mon Sep 17 00:00:00 2001 From: Scott Owens Date: Tue, 13 Apr 2004 19:08:07 +0000 Subject: [PATCH] *** empty log message *** original commit: e3f6144096066ad767c54dd1b427ee0b171b4913 --- collects/parser-tools/private-lex/deriv.ss | 67 +++++--- collects/parser-tools/private-lex/re.ss | 183 ++++++++++++++------- collects/parser-tools/private-lex/stx.ss | 5 + 3 files changed, 173 insertions(+), 82 deletions(-) diff --git a/collects/parser-tools/private-lex/deriv.ss b/collects/parser-tools/private-lex/deriv.ss index 3c34078..321bcce 100644 --- a/collects/parser-tools/private-lex/deriv.ss +++ b/collects/parser-tools/private-lex/deriv.ss @@ -9,38 +9,52 @@ (define e (build-epsilon)) (define z (build-zero)) - ;; get-char-groups : re -> (list-of char-setR?) + + ;; Don't do anything with this one but extract the chars + (define all-chars (->re `(^) (make-cache))) + + ;; get-char-groups : re bool -> (list-of char-setR?) ;; Collects the char-setRs in r that could be used in ;; taking the derivative of r. - (define (get-char-groups r) + (define (get-char-groups r found-negation) (cond ((or (eq? r e) (eq? r z)) null) ((char-setR? r) (list r)) ((concatR? r) (if (re-nullable? (concatR-re1 r)) - (append (get-char-groups (concatR-re1 r)) - (get-char-groups (concatR-re2 r))) - (get-char-groups (concatR-re1 r)))) + (append (get-char-groups (concatR-re1 r) found-negation) + (get-char-groups (concatR-re2 r) found-negation)) + (get-char-groups (concatR-re1 r) found-negation))) ((repeatR? r) - (get-char-groups (repeatR-re r))) + (get-char-groups (repeatR-re r) found-negation)) ((orR? r) - (apply append (map get-char-groups (orR-res r)))))) + (apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r)))) + ((andR? r) + (apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r)))) + ((negR? r) + (if found-negation + (get-char-groups (negR-re r) #t) + (cons all-chars (get-char-groups (negR-re r) #t)))))) (test-block ((c (make-cache)) (r1 (->re #\1 c)) (r2 (->re #\2 c))) - ((get-char-groups e) null) - ((get-char-groups z) null) - ((get-char-groups r1) (list r1)) - ((get-char-groups (->re `(@ ,r1 ,r2) c)) + ((get-char-groups e #f) null) + ((get-char-groups z #f) null) + ((get-char-groups r1 #f) (list r1)) + ((get-char-groups (->re `(@ ,r1 ,r2) c) #f) (list r1)) - ((get-char-groups (->re `(@ ,e ,r2) c)) + ((get-char-groups (->re `(@ ,e ,r2) c) #f) (list r2)) - ((get-char-groups (->re `(@ (* ,r1) ,r2) c)) + ((get-char-groups (->re `(@ (* ,r1) ,r2) c) #f) (list r1 r2)) - ((get-char-groups (->re `(* ,r1) c)) + ((get-char-groups (->re `(* ,r1) c) #f) (list r1)) - ((get-char-groups (->re `(: (* ,r1) (@ (* ,r2) "3") "4") c)) + ((get-char-groups (->re `(: (* ,r1) (@ (* ,r2) "3") "4") c) #f) + (list r1 r2 (->re "3" c) (->re "4" c))) + ((get-char-groups (->re `(~ ,r1) c) #f) + (list all-chars r1)) + ((get-char-groups (->re `(& (* ,r1) (@ (* ,r2) "3") "4") c) #f) (list r1 r2 (->re "3" c) (->re "4" c))) ) @@ -130,7 +144,9 @@ (r4 (->re `(@ #\a ,r2) c)) (r5 (->re `(* ,r4) c)) (r6 (->re `(: ,r5 #\a) c)) - (r7 (->re `(@ ,r2 ,r2) c))) + (r7 (->re `(@ ,r2 ,r2) c)) + (r8 (->re `(~ ,r4) c)) + (r9 (->re `(& ,r2 ,r3) c))) ((deriveR e #\a c) z) ((deriveR z #\a c) z) ((deriveR r1 #\b c) z) @@ -146,8 +162,11 @@ ((deriveR r6 #\a c) (->re `(: (@ ,r2 ,r5) (epsilon)) c)) ((deriveR r6 #\b c) z) ((deriveR r7 #\a c) (->re `(: (@ ,r2 ,r2) ,r2) c)) - ((deriveR r7 #\b c) z)) - + ((deriveR r7 #\b c) z) + ((deriveR r8 #\a c) (->re `(~, r2) c)) + ((deriveR r8 #\b c) (->re `(~ ,z) c)) + ((deriveR r9 #\a c) (->re `(& ,r2 (@ ,r2 ,r3)) c)) + ((deriveR r9 #\b c) z)) ;; An re-action is (cons re action) @@ -217,7 +236,7 @@ ((null? st) null) (else (partition (map char-setR-chars - (apply append (map (lambda (x) (get-char-groups (car x))) + (apply append (map (lambda (x) (get-char-groups (car x) #f)) (state-spec (car st))))))))) (test-block ((c (make-cache)) @@ -322,6 +341,13 @@ (define t7 (build-test-dfa `((@ (* #\a) (* #\b) (* #\c) (* #\d) (* #\e))))) (define t8 (build-test-dfa `((@ (* (: #\a #\b)) #\a (: #\a #\b) (: #\a #\b) (: #\a #\b) (: #\a #\b))))) + (define t9 (build-test-dfa `((@ "/*" + (~ (@ (&) "*/" (&))) + "*/")))) + (define t11 (build-test-dfa `((~ "1")))) + (define t12 (build-test-dfa `((@ (& (@ (* "a") "b") + (@ "a" (* "b"))) + "ab")))) (define x (build-test-dfa `((: " " "\n" ",") (@ (? "-") (+ (- "0" "9"))) (@ "-" (+ "-")) @@ -329,5 +355,8 @@ "]"))) (define y (build-test-dfa `((+ (: (@ "|" (* (^ "|")) "|") (@ "|" (* (^ "|")))))))) + (define t13 (build-test-dfa `((& (@ (&) "111" (&)) + (~ (: (@ (&) "01") + (+ "1"))))))) |# ) \ No newline at end of file diff --git a/collects/parser-tools/private-lex/re.ss b/collects/parser-tools/private-lex/re.ss index d90a59e..f64bf46 100644 --- a/collects/parser-tools/private-lex/re.ss +++ b/collects/parser-tools/private-lex/re.ss @@ -32,8 +32,8 @@ (define-struct (concatR re) (re1 re2) (make-inspector)) (define-struct (repeatR re) (re)) (define-struct (orR re) (res) (make-inspector)) - (define-struct (andR re) (res)) - (define-struct (negR re) (re)) + (define-struct (andR re) (res) (make-inspector)) + (define-struct (negR re) (re) (make-inspector)) ;; e : re ;; The unique epsilon re @@ -52,12 +52,14 @@ ;; | (+ s-re) match 1 or more ;; | (? s-re) match 0 or 1 ;; | (: s-re ...) match one of the sub-expressions + ;; | (& s-re ...) match iff all sub-expressions match + ;; | (~ s-re) match iff s-re doesn't match ;; | (@ s-re ...) match each sub-expression in succession ;; | (- char char) match any character between two (inclusive) - ;; | (^ char_or_range ...1) match any character not listed - ;; (the null concatenation `(@) means epsilon as does "". - ;; the null or `(:) means match nothing. The null carat `(^) means match - ;; any character.) + ;; | (^ char_or_range ...) match any character not listed + ;; (The null concatenation `(@) means epsilon as does "". + ;; The null or `(:) means match nothing. The null carat `(^) means match + ;; any character. The null intersection `(&) means match string.) ;; ->re : s-re cache -> re (define (->re exp cache) @@ -76,8 +78,17 @@ c (build-or (list e c) cache)))) (`(: ,rs ...) - (build-or (flatten-res (map (lambda (r) (->re r cache)) rs) cache) + (build-or (flatten-res (map (lambda (r) (->re r cache)) rs) + orR? orR-res merge cache) cache)) + (`(& ,rs ...) + (build-and (flatten-res (map (lambda (r) (->re r cache)) rs) + andR? andR-res (lambda (a b) + (let-values (((i _ __) (split a b))) i)) + cache) + cache)) + (`(~ ,r) + (build-neg (->re r cache) cache)) (`(@ ,rs ...) (foldr (lambda (x y) (build-concat (->re x cache) y cache)) @@ -111,22 +122,26 @@ - ;; flatten-res: (list-of re) cache -> (list-of re) - ;; Takes all the char-sets in l and combines them into one element. - ;; Removes orRs too, - (define (flatten-res l cache) + ;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re)) + ;; ((list-of char) (list-of char) -> (list-of char)) cache -> (list-of re) + ;; Takes all the char-sets in l and combines them into one element using the combine function. + ;; Flattens out the values of type?. get-res only needs to function on things type? returns + ;; true for. + (define (flatten-res l type? get-res combine cache) (let loop ((res l) - (chars null) + (chars #f) (no-chars null)) (cond ((null? res) - (if (null? chars) - no-chars - (cons (build-char-set (mergesort chars char re @@ -181,6 +196,7 @@ (cond ((null? rs) z) ((null? (cdr rs)) (car rs)) + ((memq (build-neg z cache) rs) (build-neg z cache)) (else (cache (cons 'or (map re-index rs)) (lambda () @@ -188,9 +204,11 @@ ;; build-and : (list-of re) cache -> re (define (build-and rs cache) - (let ((rs (do-simple-equiv (replace rs andR? andR-res null) rs))) + (let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index))) (cond - ((ormap (lambda (x) (eq? x z)) rs) z) + ((null? rs) (build-neg z cache)) + ((null? (cdr rs)) (car rs)) + ((memq z rs) z) (else (cache (cons 'and (map re-index rs)) (lambda () @@ -198,57 +216,77 @@ ;; build-neg : re cache -> re (define (build-neg r cache) - (cache (cons 'neg (re-index r)) - (lambda () - (make-negR (not (re-nullable? r)) (get-index) r)))) + (cond + ((negR? r) (negR-re r)) + (else + (cache (cons 'neg (re-index r)) + (lambda () + (make-negR (not (re-nullable? r)) (get-index) r)))))) + ;; Tests for the build-functions (test-block ((c (make-cache)) - (r1 (->re #\1 c)) - (r2 (->re #\2 c)) - (r3 (->re #\3 c)) - (rc (->re `(@ ,r1 ,r2) c)) - (rc2 (->re `(@ ,r2 ,r1) c)) - (rc3 (->re `(@ ,r1 ,r2 ,r3) c)) - (rr (->re `(* ,rc) c)) - (ro (->re `(: ,rr ,rc ,rr) c)) - (ro2 (->re `(: ,rc ,rr ,z) c)) - (ro3 (->re `(: ,rr ,rc) c)) - (real1 (->re `(+ (: (@ "|" (* (^ "|")) "|") - (@ "|" (* (^ "|"))))) c))) + (r1 (build-char-set `(#\1) c)) + (r2 (build-char-set `(#\2) c)) + (r3 (build-char-set `(#\3) c)) + (rc (build-concat r1 r2 c)) + (rc2 (build-concat r2 r1 c)) + (rr (build-repeat rc c)) + (ro (build-or `(,rr ,rc ,rr) c)) + (ro2 (build-or `(,rc ,rr ,z) c)) + (ro3 (build-or `(,rr ,rc) c)) + (ro4 (build-or `(,(build-or `(,r1 ,r2) c) + ,(build-or `(,r2 ,r3) c)) c)) + (ra (build-and `(,rr ,rc ,rr) c)) + (ra2 (build-and `(,rc ,rr) c)) + (ra3 (build-and `(,rr ,rc) c)) + (ra4 (build-and `(,(build-and `(,r3 ,r2) c) + ,(build-and `(,r2 ,r1) c)) c)) + (rn (build-neg z c)) + (rn2 (build-neg r1 c))) + ((char-setR-chars r1) `(#\1)) ((char-setR-chars r2) `(#\2)) ((char-setR-chars r3) `(#\3)) + ((build-char-set null c) z) + ((build-concat r1 e c) r1) + ((build-concat e r1 c) r1) + ((build-concat r1 z c) z) + ((build-concat z r1 c) z) + ((build-concat r1 r2 c) rc) + ((concatR-re1 rc) r1) + ((concatR-re2 rc) r2) + ((concatR-re1 rc2) r2) + ((concatR-re2 rc2) r1) (ro ro2) (ro ro3) - ((->re `(* ,rc) c) rr) - ((build-char-set null c) z) - ((->re `(@) c) e) - ((->re `(:) c) z) - ((->re `(@ ,r1 (epsilon)) c) r1) - ((->re `(@ (epsilon) ,r1) c) r1) - ((->re `(@ ,r1 ,z) c) z) - ((->re `(@ ,z ,r1) c) z) - ((->re `(@ ,z (epsilon)) c) z) - ((->re `(@ (epsilon) ,z) c) z) - ((->re `(:) c) z) - ((->re `(: ,rr) c) rr) - ((build-or `(,z ,r1 ,z) c) r1) - ((build-or (list - (build-or (list r1 r2) c) - (build-or (list rc rr) c)) - c) - (build-or (list r1 r2 rc rr) c)) - ((concatR-re1 rc3) r1) - ((concatR-re1 (concatR-re2 rc3)) r2) - ((concatR-re2 (concatR-re2 rc3)) r3) + (ro4 (build-or `(,r1 ,r2 ,r3) c)) ((orR-res ro) (list rc rr)) + ((orR-res ro4) (list r1 r2 r3)) + ((build-or null c) z) + ((build-or `(,r1 ,z) c) r1) + ((build-repeat rc c) rr) ((repeatR-re rr) rc) + (ra ra2) + (ra ra3) + (ra4 (build-and `(,r1 ,r2 ,r3) c)) + ((andR-res ra) (list rc rr)) + ((andR-res ra4) (list r1 r2 r3)) + ((build-and null c) (build-neg z c)) + ((build-and `(,r1 ,z) c) z) + ((build-and `(,r1) c) r1) + ((build-neg r1 c) (build-neg r1 c)) + ((build-neg (build-neg r1 c) c) r1) + ((negR-re (build-neg r2 c)) r2) ((re-nullable? r1) #f) ((re-nullable? rc) #f) - ((re-nullable? (->re `(@ ,rr ,rr) c)) #t) + ((re-nullable? (build-concat rr rr c)) #t) ((re-nullable? rr) #t) ((re-nullable? ro) #t) - ((re-nullable? (->re `(: ,r1 ,r2) c)) #f)) + ((re-nullable? (build-or `(,r1 ,r2) c)) #f) + ((re-nullable? (build-and `(,r1 ,e) c)) #f) + ((re-nullable? (build-and `(,rr ,e) c)) #t) + ((re-nullable? (build-neg r1 c)) #t) + ((re-nullable? (build-neg rr c)) #f)) (test-block ((c (make-cache)) (r1 (->re #\1 c)) @@ -257,12 +295,19 @@ (r4 (build-or `(,r1 ,r2) c)) (r5 (->re `(: ,r3-5 #\7) c)) (r6 (->re #\6 c))) - ((flatten-res null c) null) - ((char-setR-chars (car (flatten-res `(,r1) c))) '(#\1)) - ((char-setR-chars (car (flatten-res `(,r4) c))) '(#\1 #\2)) - ((char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) c))) - (string->list "1234567"))) + ((flatten-res null orR? orR-res merge c) null) + ((char-setR-chars (car (flatten-res `(,r1) orR? orR-res merge c))) '(#\1)) + ((char-setR-chars (car (flatten-res `(,r4) orR? orR-res merge c))) '(#\1 #\2)) + ((char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) orR? orR-res merge c))) + (string->list "1234567")) + ((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y) + (let-values (((i _ __) + (split x y))) + i)) + c) + (list z))) + ;; ->re (test-block ((c (make-cache)) (r (->re #\a c)) (rr (->re `(@ ,r ,r) c)) @@ -285,6 +330,18 @@ ((->re `(: ,rr ,rrr) c) (build-or (list rr rrr) c)) ((->re `(: ,r) c) r) ((->re `(:) c) z) + ((->re `(& (& #\111 (^ (- #\000 #\110) (- #\112 #\377))) + (& (* #\2))) c) + (build-and (list (build-char-set '(#\111) c) + (build-repeat (build-char-set '(#\2) c) c)) + c)) + ((->re `(& (& #\000 (^ (- #\000 #\110) (- #\112 #\377))) + (& (* #\2))) c) + z) + ((->re `(& ,rr ,rrr) c) (build-and (list rr rrr) c)) + ((->re `(& ,r) c) r) + ((->re `(&) c) (build-neg z c)) + ((->re `(~ ,r) c) (build-neg r c)) ((->re `(@) c) e) ((->re `(@ ,rrr*) c) rrr*) (rr (build-concat r r c)) diff --git a/collects/parser-tools/private-lex/stx.ss b/collects/parser-tools/private-lex/stx.ss index 7f8fd4b..08f82e7 100644 --- a/collects/parser-tools/private-lex/stx.ss +++ b/collects/parser-tools/private-lex/stx.ss @@ -43,7 +43,12 @@ (unless (= num-args 1) (num-arg-err s 1 num-args)) `(? ,(parse (car ar)))) + ((~) + (unless (= num-args 1) + (num-arg-err s 1 num-args)) + `(~ ,(parse (car ar)))) ((:) `(: ,@(map parse ar))) + ((&) `(& ,@(map parse ar))) ((@) `(@ ,@(map parse ar))) ((-) (unless (= num-args 2)