From 37f150fe211179d1f0164e125d45bb63a8c9e3d4 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Thu, 8 Feb 2018 08:58:55 -0800 Subject: [PATCH] stricter atomizing --- quad/quad/atomize.rkt | 55 ++++++++++++++++++++++++++++++------------- quad/quad/quad.rkt | 16 ++++++++----- 2 files changed, 49 insertions(+), 22 deletions(-) diff --git a/quad/quad/atomize.rkt b/quad/quad/atomize.rkt index 2c215aa9..d6bf6bce 100644 --- a/quad/quad/atomize.rkt +++ b/quad/quad/atomize.rkt @@ -1,7 +1,8 @@ -#lang sugar/debug racket/base -(require racket/contract racket/match racket/list txexpr racket/dict +#lang debug racket/base +(require racket/contract racket/match racket/list txexpr racket/dict sugar/list racket/function "quad.rkt" "qexpr.rkt" "param.rkt") (provide (all-defined-out)) +(module+ test (require rackunit)) (define (update-with base-hash . update-hashes) ;; starting with base-hash, add or update keys found in update-hashes @@ -12,26 +13,48 @@ ((hasheq 'foo "bar" 'zim "zam") . update-with . (hasheq 'zim "BANG") (hasheq 'toe "jam") (hasheq 'foo "zay")) '#hasheq((zim . "BANG") (foo . "zay") (toe . "jam")))) +(define (merge-whitespace aqs) + ;; collapse each sequence of whitespace aqs to the first one, and make it a space + ;; also drop leading & trailing whitespaces + ;; (same behavior as web browsers) + (define (white-aq? aq) (char-whitespace? (car (qe aq)))) + (let loop ([acc null][aqs aqs]) + (if (null? aqs) + (trimf (flatten acc) white-aq?) + (let*-values ([(ws rest) (splitf-at aqs white-aq?)] + [(bs rest) (splitf-at rest (negate white-aq?))]) + (loop (list acc (match ws + [(list ($quad attrs elems) rest ...) (break attrs #\space)] + [else null]) bs) rest))))) + +(module+ test + (check-equal? (merge-whitespace (list (q #\space) (q #\newline) (q #\H) (q #\space) (q #\newline) (q #\space) (q #\i) (q #\newline))) + (list (q #\H) (b #\space) (q #\i)))) + (define/contract (atomize qx) ;; normalize a quad by reducing it to one-character quads. ;; propagate attrs downward. (quad? . -> . (listof atomic-quad?)) - (let loop ([x qx][attrs (current-default-attrs)]) - (match x - [(? char? c) (list (q attrs c))] - [(? string?) (append* (for/list ([c (in-string x)]) ;; strings are exploded - (loop c attrs)))] - [($quad this-attrs elems) ;; qexprs with attributes are recursed - (define merged-attrs (attrs . update-with . this-attrs)) - (append* (for/list ([elem (in-list elems)]) - (loop elem merged-attrs)))] - [else (raise-argument-error 'atomize "valid item" x)]))) + (define atomic-quads + (let loop ([x qx][attrs (current-default-attrs)]) + (match x + [(? char? c) (list (q attrs c))] + [(? string?) (append* (for/list ([c (in-string x)]) ;; strings are exploded + (loop c attrs)))] + [($quad this-attrs elems) ;; qexprs with attributes are recursed + (define merged-attrs (attrs . update-with . this-attrs)) + (append* (for/list ([elem (in-list elems)]) + (loop elem merged-attrs)))] + [else (raise-argument-error 'atomize "valid item" x)]))) + (merge-whitespace atomic-quads)) (module+ test (require rackunit) (check-equal? (atomize (q "Hi")) (list (q #\H) (q #\i))) - (check-equal? (atomize (q "Hi " (q "You"))) (list (q #\H) (q #\i) (q #\space) (q #\Y) (q #\o) (q #\u))) + (check-equal? (atomize (q "Hi " (q "You"))) (list (q #\H) (q #\i) (b #\space) (q #\Y) (q #\o) (q #\u))) (check-exn exn:fail:contract? (λ () (atomize #t))) + (check-equal? (atomize (q "H i")) (list (q #\H) (b #\space) (q #\i))) + (check-equal? (atomize (q "H \n\n i")) (list (q #\H) (b #\space) (q #\i))) ;; collapse whitespace to single ;; with attributes (check-equal? (atomize (q (hasheq 'k "v") "Hi")) (list (q (hasheq 'k "v") #\H) (q (hasheq 'k "v") #\i))) @@ -39,15 +62,15 @@ (list ($quad '#hasheq((k . "v")) '(#\H)) ($quad '#hasheq((k . "v")) '(#\i)) - ($quad '#hasheq((k . "v")) '(#\space)) + ($break '#hasheq((k . "v")) '(#\space)) ($quad '#hasheq((k . "v")) '(#\Y)) ($quad '#hasheq((k . "v")) '(#\o)) ($quad '#hasheq((k . "v")) '(#\u)))) - (check-equal? (atomize (q (hasheq 'k1 "v1" 'k2 42) "Hi " (q (hasheq 'k1 "v2" 'k3 "foo") "You"))) + (check-equal? (atomize (q (hasheq 'k1 "v1" 'k2 42) "Hi \n\n" (q (hasheq 'k1 "v2" 'k3 "foo") "\n \nYou"))) (list ($quad '#hasheq((k1 . "v1") (k2 . 42)) '(#\H)) ($quad '#hasheq((k1 . "v1") (k2 . 42)) '(#\i)) - ($quad '#hasheq((k1 . "v1") (k2 . 42)) '(#\space)) + ($break '#hasheq((k1 . "v1") (k2 . 42)) '(#\space)) ($quad '#hasheq((k1 . "v2") (k2 . 42) (k3 . "foo")) '(#\Y)) ($quad '#hasheq((k1 . "v2") (k2 . 42) (k3 . "foo")) '(#\o)) ($quad '#hasheq((k1 . "v2") (k2 . 42) (k3 . "foo")) '(#\u))))) \ No newline at end of file diff --git a/quad/quad/quad.rkt b/quad/quad/quad.rkt index f343b051..0802a28c 100644 --- a/quad/quad/quad.rkt +++ b/quad/quad/quad.rkt @@ -8,16 +8,16 @@ (define (quad-attrs? x) (and (hash? x) (hash-eq? x))) (define (quad-elem? x) (or (char? x) (string? x) ($quad? x))) (define (quad-elems? xs) (and (pair? xs) (andmap quad-elem? xs))) -(define (quad . xs) +(define (quad #:type [type $quad] . xs) (match xs - [(list #f xs ...) (apply quad (hasheq) xs)] - [(list (? quad-attrs? attrs) (? quad-elem? elems) ...) ($quad attrs elems)] - [(list (? quad-elem? elems) ...) (apply quad #f elems)] + [(list #f xs ...) (apply quad #:type type (hasheq) xs)] + [(list (? quad-attrs? attrs) (? quad-elem? elems) ...) (type attrs elems)] + [(list (? quad-elem? elems) ...) (apply quad #:type type #f elems)] [else (error 'bad-quad-input)])) (define (quads? xs) (and (pair? xs) (andmap quad? xs))) (define (atomic-quad? x) (and (quad? x) (match (qe x) - [(list (? char?)) #t] - [else #f]))) + [(list (? char?)) #t] + [else #f]))) (define (atomic-quads? xs) (and (pair? xs) (andmap atomic-quad? xs))) (module+ test (check-true (atomic-quad? ($quad '#hasheq() '(#\H)))) @@ -31,3 +31,7 @@ (define qs? quads?) (define qa quad-attrs) (define qe quad-elems) + +(struct $break $quad () #:transparent) +(define (break . xs) (apply quad #:type $break xs)) +(define b break)