|
|
#lang racket/base
|
|
|
(require xml txexpr racket/string racket/match racket/list (prefix-in html: pollen/html) sugar/list sugar/container sugar/len sugar/define sugar/coerce sugar/test)
|
|
|
(require "debug.rkt" "world.rkt")
|
|
|
|
|
|
|
|
|
(define (symbols? x) (and (list? x) (andmap symbol? x)))
|
|
|
|
|
|
(define+provide (to-string x)
|
|
|
(if (string? x)
|
|
|
x ; fast exit for strings
|
|
|
(with-handlers ([exn:fail? (λ(exn) (error (format "Pollen decoder: can't convert ~v to ~a" x 'string)))])
|
|
|
(cond
|
|
|
[(equal? '() x) ""]
|
|
|
[(symbol? x) (symbol->string x)]
|
|
|
[(number? x) (number->string x)]
|
|
|
[(path? x) (path->string x)]
|
|
|
[(char? x) (format "~a" x)]
|
|
|
[(void? x) ""]
|
|
|
;; todo: guard against weird shit like lists of procedures
|
|
|
[(or (list? x) (hash? x) (vector? x)) (format "~v" x)] ; ok to convert datatypes
|
|
|
[else (error)])))) ; but things like procedures should throw an error
|
|
|
|
|
|
(define decode-proc-output-contract (or/c xexpr/c (non-empty-listof xexpr/c)))
|
|
|
|
|
|
|
|
|
(define (->list/tx x)
|
|
|
;; same as ->list but catches special case of single txexpr,
|
|
|
;; which is itself a list, but in this case should be wrapped into a list,
|
|
|
;; for use with append-map.
|
|
|
(if (txexpr? x)
|
|
|
(list x)
|
|
|
(->list x)))
|
|
|
|
|
|
|
|
|
;; decoder wireframe
|
|
|
(define+provide/contract (decode tx-in
|
|
|
#:txexpr-tag-proc [txexpr-tag-proc (λ(x)x)]
|
|
|
#:txexpr-attrs-proc [txexpr-attrs-proc (λ(x)x)]
|
|
|
#:txexpr-elements-proc [txexpr-elements-proc (λ(x)x)]
|
|
|
#:block-txexpr-proc [block-txexpr-proc (λ(x)x)]
|
|
|
#:inline-txexpr-proc [inline-txexpr-proc (λ(x)x)]
|
|
|
#:string-proc [string-proc (λ(x)x)]
|
|
|
#:entity-proc [entity-proc (λ(x)x)]
|
|
|
#:cdata-proc [cdata-proc (λ(x)x)]
|
|
|
#:exclude-tags [excluded-tags '()]
|
|
|
#:exclude-attrs [excluded-attrs '()])
|
|
|
((xexpr/c)
|
|
|
(#:txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?)
|
|
|
#:txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?)
|
|
|
#:txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?)
|
|
|
#:block-txexpr-proc (block-txexpr? . -> . decode-proc-output-contract)
|
|
|
#:inline-txexpr-proc (txexpr? . -> . decode-proc-output-contract)
|
|
|
#:string-proc (string? . -> . decode-proc-output-contract)
|
|
|
#:entity-proc ((or/c symbol? valid-char?) . -> . decode-proc-output-contract)
|
|
|
#:cdata-proc (cdata? . -> . decode-proc-output-contract)
|
|
|
#:exclude-tags (listof txexpr-tag?)
|
|
|
#:exclude-attrs txexpr-attrs?) . ->* . (or/c xexpr/c (non-empty-listof xexpr/c)))
|
|
|
(let loop ([x tx-in])
|
|
|
(cond
|
|
|
[(txexpr? x) (let-values([(tag attrs elements) (txexpr->values x)])
|
|
|
(if (or (member tag excluded-tags) (ormap (λ(attr) (member attr excluded-attrs)) attrs))
|
|
|
x ; because it's excluded
|
|
|
;; we apply processing here rather than do recursive descent on the pieces
|
|
|
;; because if we send them back through loop, certain element types are ambiguous
|
|
|
;; e.g., ((p "foo")) tests out as both txexpr-attrs and txexpr-elements
|
|
|
(let ([decoded-txexpr
|
|
|
(apply make-txexpr (list (txexpr-tag-proc tag)
|
|
|
(txexpr-attrs-proc attrs)
|
|
|
(txexpr-elements-proc (append-map (compose1 ->list/tx loop) elements))))])
|
|
|
((if (block-txexpr? decoded-txexpr)
|
|
|
block-txexpr-proc
|
|
|
inline-txexpr-proc) decoded-txexpr))))]
|
|
|
[(string? x) (string-proc x)]
|
|
|
[(or (symbol? x) (valid-char? x)) (entity-proc x)]
|
|
|
[(cdata? x) (cdata-proc x)]
|
|
|
[else (error "decode: can't decode" x)])))
|
|
|
|
|
|
(module-test-external
|
|
|
(require racket/list txexpr racket/function)
|
|
|
(define (doubler x) (list x x))
|
|
|
(check-equal? (decode #:txexpr-elements-proc identity '(p "foo")) '(p "foo"))
|
|
|
;; can't use doubler on txexpr-elements because it needs a list, not list of lists
|
|
|
(check-equal? (decode #:txexpr-elements-proc (λ(elems) (append elems elems)) '(p "foo")) '(p "foo" "foo"))
|
|
|
(check-equal? (decode #:block-txexpr-proc identity '(p "foo")) '(p "foo"))
|
|
|
(check-equal? (decode #:block-txexpr-proc doubler '(p "foo")) (list '(p "foo") '(p "foo")))
|
|
|
(check-equal? (decode #:inline-txexpr-proc identity '(p (span "foo"))) '(p (span "foo")))
|
|
|
(check-equal? (decode #:inline-txexpr-proc doubler '(p (span "foo"))) '(p (span "foo") (span "foo")))
|
|
|
(check-equal? (decode #:string-proc identity '(p (span "foo"))) '(p (span "foo")))
|
|
|
(check-equal? (decode #:string-proc doubler '(p (span "foo"))) '(p (span "foo" "foo")))
|
|
|
(check-equal? (decode #:entity-proc identity '(p (span "foo" 'amp))) '(p (span "foo" 'amp)))
|
|
|
(check-equal? (decode #:entity-proc identity '(p 42)) '(p 42))
|
|
|
(check-equal? (decode #:entity-proc doubler '(p 42)) '(p 42 42))
|
|
|
(check-equal? (decode #:entity-proc identity '(p amp)) '(p amp))
|
|
|
;; next text doesn't work because list of symbol elements is ambiguous with tagged X-expression
|
|
|
;; is there a general patch for this? maybe, but for now it's better to not patch selectively
|
|
|
;; otherwise ambiguous expressions will have erratic misbehavior (instead of merely consistent misbehavior)
|
|
|
;;(check-equal? (decode #:entity-proc doubler '(p amp)) '(p amp amp))
|
|
|
(check-equal? (decode-elements #:string-proc identity '("foo")) '("foo"))
|
|
|
(check-equal? (decode-elements #:string-proc doubler '("foo")) '("foo" "foo")))
|
|
|
|
|
|
;; it would be nice to not repeat this, but with all the keywords, it's simpler to repeat than do a macro
|
|
|
(define+provide/contract (decode-elements elements
|
|
|
#:txexpr-tag-proc [txexpr-tag-proc (λ(x)x)]
|
|
|
#:txexpr-attrs-proc [txexpr-attrs-proc (λ(x)x)]
|
|
|
#:txexpr-elements-proc [txexpr-elements-proc (λ(x)x)]
|
|
|
#:block-txexpr-proc [block-txexpr-proc (λ(x)x)]
|
|
|
#:inline-txexpr-proc [inline-txexpr-proc (λ(x)x)]
|
|
|
#:string-proc [string-proc (λ(x)x)]
|
|
|
#:entity-proc [entity-proc (λ(x)x)]
|
|
|
#:cdata-proc [cdata-proc (λ(x)x)]
|
|
|
#:exclude-tags [excluded-tags '()]
|
|
|
#:exclude-attrs [excluded-attrs '()])
|
|
|
((txexpr-elements?)
|
|
|
(#:txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?)
|
|
|
#:txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?)
|
|
|
#:txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?)
|
|
|
#:block-txexpr-proc (block-txexpr? . -> . decode-proc-output-contract)
|
|
|
#:inline-txexpr-proc (txexpr? . -> . decode-proc-output-contract)
|
|
|
#:string-proc (string? . -> . decode-proc-output-contract)
|
|
|
#:entity-proc ((or/c symbol? valid-char?) . -> . decode-proc-output-contract)
|
|
|
#:cdata-proc (cdata? . -> . decode-proc-output-contract)
|
|
|
#:exclude-tags (listof txexpr-tag?)
|
|
|
#:exclude-attrs txexpr-attrs?) . ->* . (or/c xexpr/c (non-empty-listof xexpr/c)))
|
|
|
|
|
|
(define temp-tag (gensym "temp-tag"))
|
|
|
(define decode-result (decode `(temp-tag ,@elements)
|
|
|
#:txexpr-tag-proc txexpr-tag-proc
|
|
|
#:txexpr-attrs-proc txexpr-attrs-proc
|
|
|
#:txexpr-elements-proc txexpr-elements-proc
|
|
|
#:block-txexpr-proc block-txexpr-proc
|
|
|
#:inline-txexpr-proc inline-txexpr-proc
|
|
|
#:string-proc string-proc
|
|
|
#:entity-proc entity-proc
|
|
|
#:cdata-proc cdata-proc
|
|
|
#:exclude-tags excluded-tags
|
|
|
#:exclude-attrs excluded-attrs))
|
|
|
(get-elements decode-result))
|
|
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
;; Blocks
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
;; initial set of block tags: from html
|
|
|
(define+provide project-block-tags
|
|
|
(make-parameter html:block-tags))
|
|
|
|
|
|
|
|
|
;; tags are inline unless they're registered as block tags.
|
|
|
(define+provide/contract (block-txexpr? x)
|
|
|
(any/c . -> . boolean?)
|
|
|
(and (txexpr? x) (member (get-tag x) (project-block-tags)) #t))
|
|
|
|
|
|
|
|
|
(define+provide/contract (register-block-tag tag)
|
|
|
(txexpr-tag? . -> . void?)
|
|
|
(project-block-tags (cons tag (project-block-tags))))
|
|
|
|
|
|
(module-test-external
|
|
|
(check-true (begin (register-block-tag 'barfoo) (block-txexpr? '(barfoo "foo")))))
|
|
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
;; Typography
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
|
|
|
|
(define (make-replacer query+replacement)
|
|
|
(let ([queries (map car query+replacement)]
|
|
|
[replacements (map second query+replacement)])
|
|
|
;; reverse because first in list should be first applied to str (and compose1 works right-to-left)
|
|
|
(apply compose1 (reverse (map (λ(query replacement) (λ(str) (regexp-replace* query str replacement))) queries replacements)))))
|
|
|
|
|
|
(define+provide/contract (smart-dashes str)
|
|
|
(string? . -> . string?)
|
|
|
(define dashes
|
|
|
;; fix em dashes first, else they'll be mistaken for en dashes
|
|
|
;; \\s is whitespace + #\u00A0 is nonbreaking space
|
|
|
'((#px"[\\s#\u00A0]*(---|—)[\\s#\u00A0]*" "—") ; em dash
|
|
|
(#px"[\\s#\u00A0]*(--|–)[\\s#\u00A0]*" "–"))) ; en dash
|
|
|
((make-replacer dashes) str))
|
|
|
|
|
|
(module-test-external
|
|
|
(check-equal? (smart-dashes "I had --- maybe 13 -- 20 --- hob-nobs.") "I had—maybe 13–20—hob-nobs.")
|
|
|
(check-equal? (smart-quotes "\"Why,\" she could've asked, \"are we in O‘ahu watching 'Mame'?\"")
|
|
|
"“Why,” she could’ve asked, “are we in O‘ahu watching ‘Mame’?”")
|
|
|
(check-equal? (smart-quotes "\"\'Impossible.\' Yes.\"") "“‘Impossible.’ Yes.”"))
|
|
|
|
|
|
|
|
|
(define+provide/contract (smart-quotes str)
|
|
|
(string? . -> . string?)
|
|
|
|
|
|
(define quotes
|
|
|
'((#px"(?<=\\w)'(?=\\w)" "’") ; apostrophe
|
|
|
(#px"(?<!\\w)'(?=\\S)" "‘") ; single_at_beginning
|
|
|
(#px"(?<=\\S)'(?!\\w)" "’") ; single_at_end
|
|
|
(#px"(?<!\\w)\"(?=\\S)" "“") ; double_at_beginning
|
|
|
(#px"(?<=\\S)\"(?!\\w)" "”"))) ; double_at_end
|
|
|
|
|
|
((make-replacer quotes) str))
|
|
|
|
|
|
|
|
|
|
|
|
;; insert nbsp between last two words
|
|
|
(define+provide/contract (nonbreaking-last-space x #:nbsp [nbsp (->string #\u00A0)]
|
|
|
#:minimum-word-length [minimum-word-length 6]
|
|
|
#:last-word-proc [last-word-proc (λ(x) x)])
|
|
|
((txexpr?) (#:nbsp string? #:minimum-word-length integer? #:last-word-proc procedure?) . ->* . txexpr?)
|
|
|
|
|
|
;; todo: parameterize this, as it will be different for each project
|
|
|
(define tags-to-pay-attention-to '(p aside)) ; only apply to paragraphs
|
|
|
|
|
|
(define (replace-last-space str)
|
|
|
(if (#\space . in? . str)
|
|
|
(let ([reversed-str-list (reverse (string->list str))]
|
|
|
[reversed-nbsp (reverse (string->list (->string nbsp)))])
|
|
|
(define-values (last-word-chars other-chars)
|
|
|
(splitf-at reversed-str-list (λ(i) (not (eq? i #\space)))))
|
|
|
|
|
|
(define front-chars (if (< (len last-word-chars) minimum-word-length) ; OK for long words to be on their own line
|
|
|
; first char of other-chars will be the space, so use cdr
|
|
|
(string-append (list->string (reverse (cdr other-chars))) (->string nbsp))
|
|
|
(list->string (reverse other-chars))))
|
|
|
(define last-word (list->string (reverse last-word-chars)))
|
|
|
`(,front-chars ,(last-word-proc last-word))) ; don't concatenate last word bc last-word-proc might be a txexpr wrapper
|
|
|
(list str)))
|
|
|
|
|
|
(define (find-last-word-space x) ; recursively traverse xexpr
|
|
|
(cond
|
|
|
[(string? x) (replace-last-space x)] ; todo: this assumes a paragraph only has one string in it.
|
|
|
[(txexpr? x)
|
|
|
(let-values([(tag attr elements) (txexpr->values x)])
|
|
|
(if (> (length elements) 0) ; elements is list of xexprs
|
|
|
(let-values ([(all-but-last last) (split-at elements (sub1 (length elements)))])
|
|
|
(define result (find-last-word-space (car last)))
|
|
|
(define result-items (if (txexpr? result) (list result) result)) ; might be txexpr, or list of new elements
|
|
|
(make-txexpr tag attr `(,@all-but-last ,@result-items)))
|
|
|
x))]
|
|
|
[else x]))
|
|
|
|
|
|
(if ((car x) . in? . tags-to-pay-attention-to)
|
|
|
(find-last-word-space x)
|
|
|
x))
|
|
|
|
|
|
(module-test-external
|
|
|
;; todo: make some tougher tests, it gets flaky with edge cases
|
|
|
(check-equal? (nonbreaking-last-space '(p "Hi there")) '(p "Hi " "there")) ; nbsp in between last two words
|
|
|
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "Ø") '(p "HiØ" "there")) ; but let's make it visible
|
|
|
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_") '(p "Hi_up_" "there"))
|
|
|
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_" #:minimum-word-length 3)
|
|
|
'(p "Hi " "there"))
|
|
|
(check-equal? (nonbreaking-last-space '(p "Hi here" (em "ho there")) #:nbsp "Ø") '(p "Hi here" (em "hoØ" "there"))))
|
|
|
|
|
|
; wrap initial quotes for hanging punctuation
|
|
|
; todo: improve this
|
|
|
; does not handle <p>“<em>thing</em> properly
|
|
|
(define+provide/contract (wrap-hanging-quotes nx
|
|
|
#:single-prepend [single-pp '(squo)]
|
|
|
#:double-prepend [double-pp '(dquo)])
|
|
|
((txexpr?) (#:single-prepend list? #:double-prepend list?) . ->* . txexpr?)
|
|
|
|
|
|
(define two-or-more-char-string? (λ(i) (and (string? i) (>= (len i) 2))))
|
|
|
(define-values (tag attr elements) (txexpr->values nx))
|
|
|
(make-txexpr tag attr
|
|
|
(if (and (list? elements) (not (empty? elements)))
|
|
|
(let ([new-car-elements (match (car elements)
|
|
|
[(? two-or-more-char-string? tcs)
|
|
|
(define str-first (get tcs 0))
|
|
|
(define str-rest (get tcs 1 (len tcs)))
|
|
|
(cond
|
|
|
[(str-first . in? . '("\"" "“"))
|
|
|
;; can wrap with any inline tag
|
|
|
;; so that linebreak detection etc still works
|
|
|
`(,@double-pp ,(->string #\“) ,str-rest)]
|
|
|
[(str-first . in? . '("\'" "‘"))
|
|
|
`(,@single-pp ,(->string #\‘) ,str-rest)]
|
|
|
[else tcs])]
|
|
|
[(? txexpr? nx) (wrap-hanging-quotes nx)]
|
|
|
[else (car elements)])])
|
|
|
(cons new-car-elements (cdr elements)))
|
|
|
elements)))
|
|
|
|
|
|
(module-test-external
|
|
|
(check-equal? (wrap-hanging-quotes '(p "\"Hi\" there")) '(p (dquo "“" "Hi\" there")))
|
|
|
(check-equal? (wrap-hanging-quotes '(p "'Hi' there")) '(p (squo "‘" "Hi' there")))
|
|
|
(check-equal? (wrap-hanging-quotes '(p "'Hi' there") #:single-prepend '(foo ((bar "ino"))))
|
|
|
'(p (foo ((bar "ino")) "‘" "Hi' there")))
|
|
|
|
|
|
;; make sure txexpr without elements passes through unscathed
|
|
|
(check-equal? (wrap-hanging-quotes '(div ((style "height:2em")))) '(div ((style "height:2em")))))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
;; Lines, blocks, paragraphs
|
|
|
|
|
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
|
|
|
|
|
|
|
;; turn the right items into <br> tags
|
|
|
(define+provide/contract (detect-linebreaks xc
|
|
|
#:separator [newline (world:current-linebreak-separator)]
|
|
|
#:insert [linebreak '(br)])
|
|
|
((txexpr-elements?) (#:separator string? #:insert xexpr?) . ->* . txexpr-elements?)
|
|
|
;; todo: should this test be not block + not whitespace?
|
|
|
(define not-block? (λ(i) (not (block-txexpr? i))))
|
|
|
(filter-not empty?
|
|
|
(for/list ([i (in-range (len xc))])
|
|
|
(let ([item (get xc i)])
|
|
|
(cond
|
|
|
;; skip first and last
|
|
|
[(or (= i 0) (= i (sub1 (len xc)))) item]
|
|
|
[(equal? item newline)
|
|
|
(match (get xc (- i 1) (+ i 2)) ; a three-element slice with x[i] in the middle
|
|
|
;; only convert if neither adjacent tag is a block
|
|
|
;; (because blocks automatically force a newline before & after)
|
|
|
[(list (? not-block?) newline (? not-block?)) linebreak]
|
|
|
[else empty])] ; otherwise delete
|
|
|
[else item])))))
|
|
|
|
|
|
(module-test-external
|
|
|
(check-equal? (detect-linebreaks '("foo" "\n" "bar")) '("foo" (br) "bar"))
|
|
|
(check-equal? (detect-linebreaks '("\n" "foo" "\n" "bar" "\n")) '("\n" "foo" (br) "bar" "\n"))
|
|
|
(check-equal? (detect-linebreaks '((p "foo") "\n" (p "bar"))) '((p "foo") (p "bar")))
|
|
|
(check-equal? (detect-linebreaks '("foo" "\n" (p "bar"))) '("foo" (p "bar")))
|
|
|
(check-equal? (detect-linebreaks '("foo" "moo" "bar")) '("foo" "moo" "bar"))
|
|
|
(check-equal? (detect-linebreaks '("foo" "moo" "bar") #:insert "moo") '("foo" "moo" "bar"))
|
|
|
(check-equal? (detect-linebreaks '("foo" "\n\n" "bar")) '("foo" "\n\n" "bar")))
|
|
|
|
|
|
|
|
|
(define+provide/contract (whitespace? x [nbsp? #f])
|
|
|
((any/c)(boolean?) . ->* . coerce/boolean?)
|
|
|
(define pat (pregexp (format "^[\\s~a]+$" (if nbsp? #\u00A0 ""))))
|
|
|
(cond
|
|
|
[(equal? "" x) #t] ; empty string is deemed whitespace
|
|
|
[(or (string? x) (symbol? x)) (regexp-match pat (->string x))]
|
|
|
[(or (list? x) (vector? x)) (and (not (empty? x)) (andmap (λ(i) (whitespace? i nbsp?)) (->list x)))] ; andmap returns #t for empty lists
|
|
|
[else #f]))
|
|
|
|
|
|
(module-test-external
|
|
|
(require racket/format)
|
|
|
(check-true (whitespace? " "))
|
|
|
(check-false (whitespace? (~a #\u00A0)))
|
|
|
(check-true (whitespace/nbsp? (~a #\u00A0)))
|
|
|
(check-true (whitespace/nbsp? (vector (~a #\u00A0))))
|
|
|
(check-false (whitespace? (format " ~a " #\u00A0)))
|
|
|
(check-true (whitespace/nbsp? (format " ~a " #\u00A0))))
|
|
|
|
|
|
|
|
|
(define+provide/contract (whitespace/nbsp? x)
|
|
|
(any/c . -> . coerce/boolean?)
|
|
|
(whitespace? x #t))
|
|
|
|
|
|
|
|
|
;; is x a paragraph break?
|
|
|
(define+provide/contract (paragraph-break? x #:separator [sep (world:current-paragraph-separator)])
|
|
|
((any/c) (#:separator pregexp?) . ->* . coerce/boolean?)
|
|
|
(define paragraph-pattern (pregexp (format "^~a+$" sep)))
|
|
|
(and (string? x) (regexp-match paragraph-pattern x)))
|
|
|
|
|
|
|
|
|
|
|
|
(define (newline? x)
|
|
|
(and (string? x) (equal? (world:current-newline) x)))
|
|
|
(define (not-newline? x)
|
|
|
(not (newline? x)))
|
|
|
|
|
|
(define (do-merge xs [acc '()])
|
|
|
(if (empty? xs)
|
|
|
acc
|
|
|
;; Try to peel the newlines off the front.
|
|
|
(let-values ([(leading-newlines remainder) (splitf-at xs newline?)])
|
|
|
(if (not (empty? leading-newlines)) ; if you got newlines ...
|
|
|
;; combine them into a string and append them to the accumulator,
|
|
|
;; and recurse on the rest
|
|
|
(do-merge remainder (append acc (list (apply string-append leading-newlines))))
|
|
|
;; otherwise peel off elements up to the next newline, append them to accumulator,
|
|
|
;; and recurse on the rest
|
|
|
(do-merge (dropf remainder not-newline?)
|
|
|
(append acc (takef remainder not-newline?)))))))
|
|
|
|
|
|
|
|
|
;; Find adjacent newline characters in a list and merge them into one item
|
|
|
;; Scribble, by default, makes each newline a separate list item
|
|
|
;; In practice, this is worthless.
|
|
|
(define+provide/contract (merge-newlines x)
|
|
|
(txexpr-elements? . -> . txexpr-elements?)
|
|
|
(cond
|
|
|
[(list? x) (do-merge (map merge-newlines x))]
|
|
|
[else x]))
|
|
|
|
|
|
(module-test-external
|
|
|
(check-equal? (merge-newlines '(p "\n" "foo" "\n" "\n" "bar" (em "\n" "\n" "\n")))
|
|
|
'(p "\n" "foo" "\n\n" "bar" (em "\n\n\n"))))
|
|
|
|
|
|
|
|
|
|
|
|
;; detect paragraphs
|
|
|
;; todo: unit tests
|
|
|
(define+provide/contract (detect-paragraphs elements #:tag [tag 'p]
|
|
|
#:separator [sep (world:current-paragraph-separator)]
|
|
|
#:linebreak-proc [linebreak-proc detect-linebreaks]
|
|
|
#:force? [force-paragraph #f])
|
|
|
((txexpr-elements?) (#:tag symbol? #:separator string? #:linebreak-proc (txexpr-elements? . -> . txexpr-elements?) #:force? boolean?)
|
|
|
. ->* . txexpr-elements?)
|
|
|
|
|
|
;; prepare elements for paragraph testing
|
|
|
(define (prep-paragraph-flow elems)
|
|
|
(linebreak-proc (merge-newlines (trimf elems whitespace?))))
|
|
|
|
|
|
(define my-paragraph-break? (λ(x) (and (paragraph-break? x #:separator sep) #t)))
|
|
|
|
|
|
(define (wrap-paragraph elems)
|
|
|
(match elems
|
|
|
[(list (? block-txexpr? bxs) ...) bxs] ; leave a series of block xexprs alone
|
|
|
[else (list (make-txexpr tag empty elems))])) ; otherwise wrap in p tag
|
|
|
|
|
|
(let ([elements (prep-paragraph-flow elements)])
|
|
|
(define explicit-or-implicit-paragraph-break? (λ(x) (or (my-paragraph-break? x) (block-txexpr? x))))
|
|
|
(if (ormap explicit-or-implicit-paragraph-break? elements) ; need this condition to prevent infinite recursion
|
|
|
;; use append-map on wrap-paragraph rather than map to permit return of multiple elements
|
|
|
(append-map wrap-paragraph (append-map (λ(es) (filter-split es my-paragraph-break?)) (slicef elements block-txexpr?))) ; split into ¶¶, using both implied and explicit paragraph breaks
|
|
|
(if force-paragraph
|
|
|
(append-map wrap-paragraph (slicef elements block-txexpr?)) ; upconverts non-block elements to paragraphs
|
|
|
elements))))
|
|
|
|
|
|
(module-test-external
|
|
|
(check-equal? (detect-paragraphs '("First para" "\n\n" "Second para"))
|
|
|
'((p "First para") (p "Second para")))
|
|
|
(check-equal? (detect-paragraphs '("First para" "\n\n" "Second para" "\n" "Second line"))
|
|
|
'((p "First para") (p "Second para" (br) "Second line")))
|
|
|
(check-equal? (detect-paragraphs '("First para" "\n\n" (div "Second block")))
|
|
|
'((p "First para") (div "Second block")))
|
|
|
(check-equal? (detect-paragraphs '((div "First block") "\n\n" (div "Second block")))
|
|
|
'((div "First block") (div "Second block")))
|
|
|
(check-equal? (detect-paragraphs '("First para" "\n\n" "Second para") #:tag 'ns:p)
|
|
|
'((ns:p "First para") (ns:p "Second para")))
|
|
|
(check-equal? (detect-paragraphs '("First para" "\n\n" "Second para" "\n" "Second line")
|
|
|
#:linebreak-proc (λ(x) (detect-linebreaks x #:insert '(newline))))
|
|
|
'((p "First para") (p "Second para" (newline) "Second line")))
|
|
|
(check-equal? (detect-paragraphs '("foo" "\n\n" (div "bar") (div "zam")))
|
|
|
'((p "foo") (div "bar") (div "zam")))
|
|
|
(check-equal? (detect-paragraphs '("foo" "\n\n" (div "bar") "\n\n" (div "zam")))
|
|
|
'((p "foo") (div "bar") (div "zam")))
|
|
|
|
|
|
(check-equal? (detect-paragraphs '("foo")) '("foo"))
|
|
|
(check-equal? (detect-paragraphs '("foo") #:force? #t) '((p "foo")))
|
|
|
(check-equal? (detect-paragraphs '((div "foo"))) '((div "foo")))
|
|
|
(check-equal? (detect-paragraphs '((div "foo")) #:force? #t) '((div "foo")))
|
|
|
(check-equal? (detect-paragraphs '("foo" "\n\n" (div "bar"))) '((p "foo") (div "bar")))
|
|
|
(check-equal? (detect-paragraphs '("foo" (div "bar"))) '((p "foo") (div "bar")))
|
|
|
(check-equal? (detect-paragraphs '("foo" (div "bar")) #:force? #t) '((p "foo") (div "bar")))
|
|
|
(check-equal? (detect-paragraphs '("foo" (div "bar") "zam")) '((p "foo") (div "bar") (p "zam")))
|
|
|
(check-equal? (detect-paragraphs '("foo" (span "zing") (div "bar") "zam")) '((p "foo" (span "zing")) (div "bar") (p "zam")))
|
|
|
(check-equal? (detect-paragraphs '("foo" (span "zing") (div "bar") "zam") #:force? #t) '((p "foo" (span "zing")) (div "bar") (p "zam")))) |