From b5df2c48c99a0b03db2b89c14303099df58f4d5b Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 11 Aug 2013 15:12:37 -0700 Subject: [PATCH] put in more match --- tests/pollen-lang-test.p | 2 +- tests/requires/include-me.rkt | 213 +++++++++++++++++++++++++++++----- 2 files changed, 182 insertions(+), 33 deletions(-) diff --git a/tests/pollen-lang-test.p b/tests/pollen-lang-test.p index eda4c51..b0f9548 100644 --- a/tests/pollen-lang-test.p +++ b/tests/pollen-lang-test.p @@ -7,7 +7,7 @@ "Hello" world, aren't you --- yes, you — about 1--2 inches tall? -◊em{Love} +We ◊fooble{"Love"} Goodnight moon diff --git a/tests/requires/include-me.rkt b/tests/requires/include-me.rkt index 5678b75..33496eb 100644 --- a/tests/requires/include-me.rkt +++ b/tests/requires/include-me.rkt @@ -6,9 +6,13 @@ (module+ test (require rackunit)) -(register-block-name 'boqi) -;; todo: contracts & unit tests +;; register custom block tags +(register-block-name 'bloq) +(register-block-name 'fooble) + + +;; handle meta tags (define/contract (meta-proc meta) (meta-xexpr? . -> . named-xexpr?) `(meta ((name ,(second meta))(content ,(third meta))))) @@ -17,55 +21,200 @@ (check-equal? (meta-proc '(meta "key" "value")) '(meta ((name "key")(content "value"))))) ;; is x a paragraph break? -(define/contract (paragraph-break? x) - (any/c . -> . boolean?) - ;; paragraph break = a string of two or more newlines - (and (string? x) (>= (len x) 2) (equal? x (make-string (len x) #\newline)))) +(define/contract (paragraph-break? x #:pattern [paragraph-pattern #px"^\n\n+$"]) + ((any/c) (#:pattern pregexp?) . ->* . boolean?) + (and (string? x) (->boolean (regexp-match paragraph-pattern x)))) (module+ test (check-false (paragraph-break? "foo")) (check-false (paragraph-break? "\n")) (check-false (paragraph-break? "\n \n")) + (check-true (paragraph-break? "\n \n" #:pattern #px"^\n \n$")) (check-true (paragraph-break? "\n\n")) (check-true (paragraph-break? "\n\n\n"))) -;; convert single newline to br tag -;; only if neither adjacent tag is a block -;; otherwise delete -;; todo: contracts & unit tests -(define (convert-linebreaks x) ; x is list +(define/contract (convert-linebreaks xc #:newline [newline "\n"]) + ((xexpr-content?) (#:newline string?) . ->* . xexpr-content?) + ;; todo: should this test be not block + not whitespace? + (define not-block? (λ(i) (not (block-xexpr? i)))) (filter-not empty? - (for/list ([i (len x)]) - (cond - [(equal? (get x i) "\n") ; todo: don't hardcode this - (if (andmap (λ(i) (not (block-xexpr? i))) (list (get x (sub1 i)) (get x (add1 i)))) - '(br) - '())] - [else (get x i)])))) - -;; todo: contracts & unit tests -(define (prep-paragraph-flow x) - (convert-linebreaks (merge-newlines (trim x whitespace?)))) + (for/list ([i (len xc)]) + (let ([item (get xc i)]) + (cond + ;; skip first and last + [(or (= i 0) (= i (sub1 (len xc)))) item] + [(equal? item newline) + (match (get xc (- i 1) (+ i 2)) ; a three-element slice with x[i] in the middle + ;; only convert if neither adjacent tag is a block + ;; (because blocks automatically force a newline before & after) + [(list (? not-block?) newline (? not-block?)) '(br)] + [else empty])] ; otherwise delete + [else item]))))) + +(module+ test + (check-equal? (convert-linebreaks '("foo" "\n" "bar")) '("foo" (br) "bar")) + (check-equal? (convert-linebreaks '("\n" "foo" "\n" "bar" "\n")) '("\n" "foo" (br) "bar" "\n")) + (check-equal? (convert-linebreaks '((p "foo") "\n" (p "bar"))) '((p "foo") (p "bar"))) + (check-equal? (convert-linebreaks '("foo" "\n" (p "bar"))) '("foo" (p "bar"))) + (check-equal? (convert-linebreaks '("foo" "moo" "bar")) '("foo" "moo" "bar")) + (check-equal? (convert-linebreaks '("foo" "moo" "bar") #:newline "moo") '("foo" (br) "bar")) + (check-equal? (convert-linebreaks '("foo" "\n\n" "bar")) '("foo" "\n\n" "bar"))) + +;; prepare content for paragraph testing +(define/contract (prep-paragraph-flow xc) + (xexpr-content? . -> . xexpr-content?) + (convert-linebreaks (merge-newlines (trim xc whitespace?)))) (module+ test (check-equal? (prep-paragraph-flow '("\n" "foo" "\n" "\n" "bar" "\n" "ino" "\n")) '("foo" "\n\n" "bar" (br) "ino"))) -;; todo: contracts & unit tests -(define (wrap-paragraph x) ; x is a list containing paragraph pieces - ; if paragraph is just one block-level xexpr - (if (and (= (length x) 1) (block-xexpr? (car x))) - (car x) ; leave it - `(p ,@x))) ; otherwise wrap in p tag +;; apply paragraph tag +(define/contract (wrap-paragraph xc #:tag [tag 'p]) + ((xexpr-content?) (#:tag symbol?) . ->* . block-xexpr?) + (match xc + [(list (? block-xexpr? bx)) bx] ; leave a single block xexpr alone + [else (make-named-xexpr tag empty xc)])) ; otherwise wrap in p tag + +(module+ test + (check-equal? (wrap-paragraph '("foo" "bar")) '(p "foo" "bar")) + (check-equal? (begin (register-block-name 'para) (wrap-paragraph #:tag 'para '("foo" "bar"))) + '(para "foo" "bar")) + (check-equal? (wrap-paragraph '((p "bar" "foo"))) '(p "bar" "foo")) + (check-equal? (wrap-paragraph '((div "bar" "foo") "Hi" )) '(p (div "bar" "foo") "Hi"))) + -;; todo: contracts & unit tests -(define (xexpr-content-proc content) +;; detect paragraphs +;; todo: unit tests +(define/contract (xexpr-content-proc content) + (xexpr-content? . -> . xexpr-content?) (let ([content (prep-paragraph-flow content)]) (if (ormap paragraph-break? content) ; need this condition to prevent infinite recursion (map wrap-paragraph (splitf-at* content paragraph-break?)) ; split into ¶¶ content))) + +;; insert nbsp between last two words +(define/contract (nonbreaking-last-space x #:nbsp-char [nbsp #\ ]) + ((named-xexpr?) (#:nbsp-char char?) . ->* . named-xexpr?) + (define minimum-word-length (add1 5)) ; add1 to account for final punctuation + ; todo: parameterize this, as it will be different for each project + (define tags-to-pay-attention-to '(p aside)) ; only apply to paragraphs + + (define (replace-last-space str) + (if (#\space . in . str) + (let ([reversed-str-list (reverse (string->list str))]) + (define-values (last-word-chars other-chars) + (splitf-at reversed-str-list (λ(i) (not (eq? i #\space))))) + (list->string (reverse (append last-word-chars + ; OK for long words to be on their own line. + (if (< (len last-word-chars) minimum-word-length) + ; first char of other-chars will be the space, so use cdr + (cons nbsp (cdr other-chars)) + other-chars))))) + str)) + + (define (find-last-word-space x) ; recursively traverse xexpr + (cond + [(string? x) (replace-last-space x)] + [(named-xexpr? x) + (let-values([(name attr content) (break-named-xexpr x)]) + (if (> (length content) 0) ; content is list of xexprs + (let-values ([(all-but-last last) (split-at content (sub1 (length content)))]) + (make-named-xexpr name attr `(,@all-but-last ,(find-last-word-space (car last))))) + x))] + [else x])) + + (if ((car x) . in . tags-to-pay-attention-to) + (find-last-word-space x) + x)) + +;; todo: make some tougher tests, it gets flaky with edge cases +(module+ test + (check-equal? (nonbreaking-last-space '(p "Hi there")) '(p "Hi there")) ; nbsp in between last two words + (check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp-char #\Ø) '(p "HiØthere")) ; but let's make it visible + (check-equal? (nonbreaking-last-space '(p "Hi here" (em "ho there")) #:nbsp-char #\Ø) '(p "Hi here" (em "hoØthere")))) + + +; wrap initial quotes for hanging punctuation +; todo: improve this +; does not handle

thing properly +(define/contract (wrap-hanging-quotes nx) + (named-xexpr? . -> . named-xexpr?) + (define two-char-string? (λ(i) (and (string? i) (>= (len i) 2)))) + (define-values (name attr content) (break-named-xexpr nx)) + (define new-car-content + (match (car content) + [(? two-char-string? str) + (define str-first (get str 0)) + (define str-rest (get str 1 'end)) + (cond + [(str-first . in . '("\"" "“")) + ;; can wrap with any inline tag + ;; so that linebreak detection etc still works + `(hang-double-quote ,(->string #\“) ,str-rest)] + [(str-first . in . '("\'" "‘")) + `(hang-single-quote ,(->string #\‘) ,str-rest)] + [else str])] + [(? named-xexpr? nx) (wrap-hanging-quotes nx)] + [else (car content)])) + (make-named-xexpr name attr (cons new-car-content (cdr content)))) + + +(module+ test + (check-equal? (wrap-hanging-quotes '(p "\"Hi\" there")) '(p (hang-double-quote "“" "Hi\" there"))) + (check-equal? (wrap-hanging-quotes '(p "'Hi' there")) '(p (hang-single-quote "‘" "Hi' there")))) + + +(define (block-xexpr-proc bx) + (named-xexpr? . -> . named-xexpr?) + (wrap-hanging-quotes (nonbreaking-last-space bx))) + + +;; insert typographic niceties +;; ligatures are handled in css +(define (typogrify str) + (string? . -> . string?) + ;; make set of functions for replacers + (define (make-replacer query replacement) + (λ(str) (regexp-replace* query str replacement))) + + ;; just store the query strings + replacement strings + (define dashes + ;; fix em dashes first, else they'll be mistaken for en dashes + ;; [\\s ] is whitespace + nonbreaking space + '((#px"[\\s ]*(---|—)[\\s ]*" "—") ; em dash + (#px"[\\s ]*(--|–)[\\s ]*" "–"))) ; en dash + + (define smart-quotes + '((#px"(?<=\\w)'(?=\\w)" "’") ; apostrophe + (#px"(? . string?) + (typogrify str)) + + (define (root . items) (named-xexpr? . -> . named-xexpr?) (decode (cons 'root items) @@ -73,9 +222,9 @@ ; #:xexpr-name-proc [xexpr-name-proc (λ(x)x)] ; #:xexpr-attr-proc [xexpr-attr-proc (λ(x)x)] #:xexpr-content-proc xexpr-content-proc - ; #:block-xexpr-proc [block-xexpr-proc (λ(x)x)] + #:block-xexpr-proc block-xexpr-proc ; #:inline-xexpr-proc [inline-xexpr-proc (λ(x)x)] - ; #:string-proc string-proc + #:string-proc string-proc #:meta-proc meta-proc ))