diff --git a/decode.rkt b/decode.rkt index c4d2e59..2687b04 100644 --- a/decode.rkt +++ b/decode.rkt @@ -1,60 +1,11 @@ #lang racket/base -(require racket/contract racket/list racket/string racket/match) -(require (only-in racket/format ~a)) -(require (only-in racket/bool nor)) +(require racket/contract) (require (only-in xml xexpr/c)) -(module+ test (require rackunit)) - (require "tools.rkt") -(provide (all-defined-out)) - - -;; Find adjacent newline characters in a list and merge them into one item -;; Scribble, by default, makes each newline a separate list item -;; In practice, this is worthless. -(define/contract (merge-newlines x) - (list? . -> . list?) - (define (newline? x) - (and (string? x) (equal? "\n" x))) - (define (not-newline? x) - (not (newline? x))) - - (define (really-merge-newlines xs [acc '()]) - (if (empty? xs) - acc - ;; Try to peel the newlines off the front. - (let-values ([(leading-newlines remainder) (splitf-at xs newline?)]) - (if (not (empty? leading-newlines)) ; if you got newlines ... - ;; combine them into a string and append them to the accumulator, - ;; and recurse on the rest - (really-merge-newlines remainder (append acc (list (string-join leading-newlines "")))) - ;; otherwise peel off elements up to the next newline, append them to accumulator, - ;; and recurse on the rest - (really-merge-newlines (dropf remainder not-newline?) - (append acc (takef remainder not-newline?))))))) - - (cond - [(list? x) (really-merge-newlines (map merge-newlines x))] - [else x])) - -(module+ test - (check-equal? (merge-newlines '(p "\n" "foo" "\n" "\n" "bar" (em "\n" "\n" "\n"))) - '(p "\n" "foo" "\n\n" "bar" (em "\n\n\n")))) +(module+ test (require rackunit)) - -;; todo: add native support for list-xexpr -;; decode triple newlines to list items - - -;; convert numbers to strings -;; maybe this isn't necessary -(define (stringify x) - (map-tree (λ(i) (if (number? i) (->string i) i)) x)) - -(module+ test - (check-equal? (stringify '(p 1 2 "foo" (em 4 "bar"))) '(p "1" "2" "foo" (em "4" "bar")))) - +(provide (all-defined-out)) ;; decoder wireframe @@ -66,15 +17,19 @@ #:block-xexpr-proc [block-xexpr-proc (λ(x)x)] #:inline-xexpr-proc [inline-xexpr-proc (λ(x)x)] #:string-proc [string-proc (λ(x)x)]) - ;; use xexpr/c for contract because it gives better error messages - ((xexpr/c) (#:exclude-xexpr-tags list? - #:xexpr-tag-proc procedure? - #:xexpr-attr-proc procedure? - #:xexpr-elements-proc procedure? - #:block-xexpr-proc procedure? - #:inline-xexpr-proc procedure? - #:string-proc procedure?) - . ->* . tagged-xexpr?) + ((xexpr/c) ;; use xexpr/c for contract on nx because it gives better error messages + + ;; todo: how to write more specific contracts for these procedures? + ;; e.g., string-proc should be restricted to procs that accept a string as input + ;; and return a string as output + (#:exclude-xexpr-tags list? + #:xexpr-tag-proc procedure? + #:xexpr-attr-proc procedure? + #:xexpr-elements-proc procedure? + #:block-xexpr-proc procedure? + #:inline-xexpr-proc procedure? + #:string-proc procedure?) + . ->* . tagged-xexpr?) (when (not (tagged-xexpr? nx)) (error (format "decode: ~v not a full tagged-xexpr" nx))) @@ -85,7 +40,7 @@ (if (tag . in? . excluded-xexpr-tags) x ; let x pass through untouched (let ([decoded-xexpr (apply make-tagged-xexpr - (map &decode (list tag attr elements)))]) + (map &decode (list tag attr elements)))]) ((if (block-xexpr? decoded-xexpr) block-xexpr-proc inline-xexpr-proc) decoded-xexpr))))] diff --git a/library/decode-tools.rkt b/library/decode-tools.rkt new file mode 100644 index 0000000..f6e4065 --- /dev/null +++ b/library/decode-tools.rkt @@ -0,0 +1,259 @@ +#lang racket/base +(require racket/contract racket/list racket/string racket/match) +(require "../readability.rkt" "../predicates.rkt" "../tools.rkt") + +(module+ test (require rackunit)) + +(provide (all-defined-out)) + +;; This module is a library of functions to be used in building pollen decoders. + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Typography + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + + +;; insert typographic niceties +;; ligatures are handled in css +(define (typogrify str) + (string? . -> . string?) + ;; make set of functions for replacers + (define (make-replacer query replacement) + (λ(str) (regexp-replace* query str replacement))) + + ;; just store the query strings + replacement strings + (define dashes + ;; fix em dashes first, else they'll be mistaken for en dashes + ;; [\\s ] is whitespace + nonbreaking space + '((#px"[\\s ]*(---|—)[\\s ]*" "—") ; em dash + (#px"[\\s ]*(--|–)[\\s ]*" "–"))) ; en dash + + (define smart-quotes + '((#px"(?<=\\w)'(?=\\w)" "’") ; apostrophe + (#px"(?string #\u00A0)] + #:minimum-word-length [minimum-word-length 6]) + ((tagged-xexpr?) (#:nbsp string? #:minimum-word-length integer?) . ->* . tagged-xexpr?) + ;; todo: parameterize this, as it will be different for each project + (define tags-to-pay-attention-to '(p aside)) ; only apply to paragraphs + + (define (replace-last-space str) + (if (#\space . in? . str) + (let ([reversed-str-list (reverse (string->list str))] + [reversed-nbsp (reverse (string->list nbsp))]) + (define-values (last-word-chars other-chars) + (splitf-at reversed-str-list (λ(i) (not (eq? i #\space))))) + (list->string (reverse (append last-word-chars + ; OK for long words to be on their own line. + (if (< (len last-word-chars) minimum-word-length) + ; first char of other-chars will be the space, so use cdr + (append reversed-nbsp (cdr other-chars)) + other-chars))))) + str)) + + (define (find-last-word-space x) ; recursively traverse xexpr + (cond + [(string? x) (replace-last-space x)] + [(tagged-xexpr? x) + (let-values([(tag attr elements) (break-tagged-xexpr x)]) + (if (> (length elements) 0) ; elements is list of xexprs + (let-values ([(all-but-last last) (split-at elements (sub1 (length elements)))]) + (make-tagged-xexpr tag attr `(,@all-but-last ,(find-last-word-space (car last))))) + x))] + [else x])) + + (if ((car x) . in? . tags-to-pay-attention-to) + (find-last-word-space x) + x)) + +;; todo: make some tougher tests, it gets flaky with edge cases +(module+ test + (check-equal? (nonbreaking-last-space '(p "Hi there")) '(p "Hi there")) ; nbsp in between last two words + (check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "Ø") '(p "HiØthere")) ; but let's make it visible + (check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_") '(p "Hi_up_there")) + (check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_" #:minimum-word-length 3) + '(p "Hi there")) + (check-equal? (nonbreaking-last-space '(p "Hi here" (em "ho there")) #:nbsp "Ø") '(p "Hi here" (em "hoØthere")))) + + +; wrap initial quotes for hanging punctuation +; todo: improve this +; does not handle
“thing properly
+(define/contract (wrap-hanging-quotes nx
+ #:single-prepend [single-pp '(squo)]
+ #:double-prepend [double-pp '(dquo)])
+ ((tagged-xexpr?) (#:single-prepend list? #:double-prepend list?) . ->* . tagged-xexpr?)
+ (define two-char-string? (λ(i) (and (string? i) (>= (len i) 2))))
+ (define-values (tag attr elements) (break-tagged-xexpr nx))
+ (define new-car-elements
+ (match (car elements)
+ [(? two-char-string? tcs)
+ (define str-first (get tcs 0))
+ (define str-rest (get tcs 1 'end))
+ (cond
+ [(str-first . in? . '("\"" "“"))
+ ;; can wrap with any inline tag
+ ;; so that linebreak detection etc still works
+ `(,@double-pp ,(->string #\“) ,str-rest)]
+ [(str-first . in? . '("\'" "‘"))
+ `(,@single-pp ,(->string #\‘) ,str-rest)]
+ [else tcs])]
+ [(? tagged-xexpr? nx) (wrap-hanging-quotes nx)]
+ [else (car elements)]))
+ (make-tagged-xexpr tag attr (cons new-car-elements (cdr elements))))
+
+
+(module+ test
+ (check-equal? (wrap-hanging-quotes '(p "\"Hi\" there")) '(p (dquo "“" "Hi\" there")))
+ (check-equal? (wrap-hanging-quotes '(p "'Hi' there")) '(p (squo "‘" "Hi' there")))
+ (check-equal? (wrap-hanging-quotes '(p "'Hi' there") #:single-prepend '(foo ((bar "ino"))))
+ '(p (foo ((bar "ino")) "‘" "Hi' there"))))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Lines, blocks, paragraphs
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+;; turn the right items into
tags
+(define/contract (convert-linebreaks xc #:newline [newline "\n"])
+ ((xexpr-elements?) (#:newline string?) . ->* . xexpr-elements?)
+ ;; todo: should this test be not block + not whitespace?
+ (define not-block? (λ(i) (not (block-xexpr? i))))
+ (filter-not empty?
+ (for/list ([i (len xc)])
+ (let ([item (get xc i)])
+ (cond
+ ;; skip first and last
+ [(or (= i 0) (= i (sub1 (len xc)))) item]
+ [(equal? item newline)
+ (match (get xc (- i 1) (+ i 2)) ; a three-element slice with x[i] in the middle
+ ;; only convert if neither adjacent tag is a block
+ ;; (because blocks automatically force a newline before & after)
+ [(list (? not-block?) newline (? not-block?)) '(br)]
+ [else empty])] ; otherwise delete
+ [else item])))))
+
+(module+ test
+ (check-equal? (convert-linebreaks '("foo" "\n" "bar")) '("foo" (br) "bar"))
+ (check-equal? (convert-linebreaks '("\n" "foo" "\n" "bar" "\n")) '("\n" "foo" (br) "bar" "\n"))
+ (check-equal? (convert-linebreaks '((p "foo") "\n" (p "bar"))) '((p "foo") (p "bar")))
+ (check-equal? (convert-linebreaks '("foo" "\n" (p "bar"))) '("foo" (p "bar")))
+ (check-equal? (convert-linebreaks '("foo" "moo" "bar")) '("foo" "moo" "bar"))
+ (check-equal? (convert-linebreaks '("foo" "moo" "bar") #:newline "moo") '("foo" (br) "bar"))
+ (check-equal? (convert-linebreaks '("foo" "\n\n" "bar")) '("foo" "\n\n" "bar")))
+
+
+
+;; is x a paragraph break?
+(define/contract (paragraph-break? x #:pattern [paragraph-pattern #px"^\n\n+$"])
+ ((any/c) (#:pattern pregexp?) . ->* . boolean?)
+ (and (string? x) (->boolean (regexp-match paragraph-pattern x))))
+
+(module+ test
+ (check-false (paragraph-break? "foo"))
+ (check-false (paragraph-break? "\n"))
+ (check-false (paragraph-break? "\n \n"))
+ (check-true (paragraph-break? "\n \n" #:pattern #px"^\n \n$"))
+ (check-true (paragraph-break? "\n\n"))
+ (check-true (paragraph-break? "\n\n\n")))
+
+
+
+;; Find adjacent newline characters in a list and merge them into one item
+;; Scribble, by default, makes each newline a separate list item
+;; In practice, this is worthless.
+(define/contract (merge-newlines x)
+ (list? . -> . list?)
+ (define (newline? x)
+ (and (string? x) (equal? "\n" x)))
+ (define (not-newline? x)
+ (not (newline? x)))
+
+ (define (really-merge-newlines xs [acc '()])
+ (if (empty? xs)
+ acc
+ ;; Try to peel the newlines off the front.
+ (let-values ([(leading-newlines remainder) (splitf-at xs newline?)])
+ (if (not (empty? leading-newlines)) ; if you got newlines ...
+ ;; combine them into a string and append them to the accumulator,
+ ;; and recurse on the rest
+ (really-merge-newlines remainder (append acc (list (string-join leading-newlines ""))))
+ ;; otherwise peel off elements up to the next newline, append them to accumulator,
+ ;; and recurse on the rest
+ (really-merge-newlines (dropf remainder not-newline?)
+ (append acc (takef remainder not-newline?)))))))
+
+ (cond
+ [(list? x) (really-merge-newlines (map merge-newlines x))]
+ [else x]))
+
+(module+ test
+ (check-equal? (merge-newlines '(p "\n" "foo" "\n" "\n" "bar" (em "\n" "\n" "\n")))
+ '(p "\n" "foo" "\n\n" "bar" (em "\n\n\n"))))
+
+
+
+;; todo: add native support for list-xexpr
+;; decode triple newlines to list items
+
+
+;; prepare elements for paragraph testing
+(define/contract (prep-paragraph-flow xc)
+ (xexpr-elements? . -> . xexpr-elements?)
+ (convert-linebreaks (merge-newlines (trim xc whitespace?))))
+
+(module+ test
+ (check-equal? (prep-paragraph-flow '("\n" "foo" "\n" "\n" "bar" "\n" "ino" "\n"))
+ '("foo" "\n\n" "bar" (br) "ino")))
+
+;; apply paragraph tag
+(define/contract (wrap-paragraph xc #:tag [tag 'p])
+ ((xexpr-elements?) (#:tag symbol?) . ->* . block-xexpr?)
+ (match xc
+ [(list (? block-xexpr? bx)) bx] ; leave a single block xexpr alone
+ [else (make-tagged-xexpr tag empty xc)])) ; otherwise wrap in p tag
+
+(module+ test
+ (check-equal? (wrap-paragraph '("foo" "bar")) '(p "foo" "bar"))
+ (check-equal? (begin (register-block-tag 'para) (wrap-paragraph #:tag 'para '("foo" "bar")))
+ '(para "foo" "bar"))
+ (check-equal? (wrap-paragraph '((p "bar" "foo"))) '(p "bar" "foo"))
+ (check-equal? (wrap-paragraph '((div "bar" "foo") "Hi" )) '(p (div "bar" "foo") "Hi")))
+
+
+;; detect paragraphs
+;; todo: unit tests
+(define/contract (detect-paragraphs elements)
+ (xexpr-elements? . -> . xexpr-elements?)
+ (let ([elements (prep-paragraph-flow elements)])
+ (if (ormap paragraph-break? elements) ; need this condition to prevent infinite recursion
+ (map wrap-paragraph (splitf-at* elements paragraph-break?)) ; split into ¶¶
+ elements)))
\ No newline at end of file
diff --git a/main-helper.rkt b/main-helper.rkt
index ee943d2..0bcaefc 100644
--- a/main-helper.rkt
+++ b/main-helper.rkt
@@ -13,26 +13,23 @@
;; Look for an EXTRAS_DIR directory local to the source file.
;; and require all the .rkt files therein.
;; optionally provide them.
-(define-syntax (require-extras stx #:provide [provide #f])
+
+(define-syntax (require-and-provide-extras stx)
+ (cond
+ [(directory-exists? EXTRAS_DIR)
+ (let ([files-in-require-form (make-files-in-require-form EXTRAS_DIR)])
+ (datum->syntax stx `(begin
+ (require ,@files-in-require-form)
+ (provide (all-from-out ,@files-in-require-form)))))]
+ ; if no files to import, do nothing
+ [else #'(begin)]))
+
+(define-syntax (require-extras stx)
(cond
[(directory-exists? EXTRAS_DIR)
- ;; This will be resolved in the context of current-directory.
- ;; So when called from outside the project directory,
- ;; current-directory must be properly set with 'parameterize'
- (define (make-complete-path path)
- ;; todo: document why this function is necessary (it definitely is, but I forgot why)
- (define-values (start_dir name _ignore) (split-path (path->complete-path path)))
- (build-path start_dir EXTRAS_DIR name))
- (define files (map make-complete-path (filter (λ(i) (has-ext? i 'rkt)) (directory-list EXTRAS_DIR))))
- (define files-in-require-form
- (map (λ(file) `(file ,(->string file))) files))
- (datum->syntax stx
- (if provide
- `(begin
- (require ,@files-in-require-form)
- (provide (all-from-out ,@files-in-require-form)))
- `(begin
- (require ,@files-in-require-form))))]
+ (let ([files-in-require-form (make-files-in-require-form EXTRAS_DIR)])
+ (datum->syntax stx `(begin
+ (require ,@files-in-require-form))))]
; if no files to import, do nothing
[else #'(begin)]))
diff --git a/main-pre.rkt b/main-pre.rkt
index 3a7a15c..bdea615 100644
--- a/main-pre.rkt
+++ b/main-pre.rkt
@@ -1,7 +1,7 @@
#lang racket/base
(require (only-in (planet mb/pollen/readability) ->list)
- (only-in (planet mb/pollen/decode) trim)
+ (only-in (planet mb/pollen/tools) trim)
(only-in (planet mb/pollen/predicates) whitespace?))
(provide (except-out (all-from-out racket/base) #%module-begin)
@@ -21,7 +21,7 @@
; helpful because it collects & exports content via 'doc
(module pollen-inner (planet mb/pollen/lang/doclang2_raw)
(require (planet mb/pollen/tools) (planet mb/pollen/main-helper))
- (require-extras #:provide #t) ; brings in the project require files
+ (require-and-provide-extras) ; brings in the project require files
expr ...) ; body of module
diff --git a/main.rkt b/main.rkt
index efb86ee..4160cfd 100644
--- a/main.rkt
+++ b/main.rkt
@@ -28,7 +28,7 @@
;; use same requires as top of main.rkt
;; (can't import them from surrounding module due to submodule rules)
(require (planet mb/pollen/tools) (planet mb/pollen/main-helper))
- (require-extras #:provide #t) ; brings in the project require files
+ (require-and-provide-extras) ; brings in the project require files
;; #%top binding catches ids that aren't defined
;; here, convert them to basic xexpr
@@ -63,7 +63,7 @@
[(string? doc) (list doc)]
[(tagged-xexpr? doc) (list doc)] ; if it's a single nx, just leave it
[(list? doc) doc]))) ; if it's nx content, splice it in
-
+
;; split out the metas now (in raw form)
(define-values (metas-raw main-raw)
@@ -76,14 +76,15 @@
;; Unlike Scribble, which insists on decoding,
;; Pollen just passes through the minimally processed data.
;; one exception: if file extension marks it as pmap, send it to the pmap decoder instead.
- (define pmap-source?
+
;; this tests inner-here (which is always the file name)
- ;; rather than (get metas 'here) which might have been overridden.
- ;; Because if it's overridden to something other than *.pmap,
- ;; pmap processing will fail.
- ;; This defeats rule that pmap file suffix triggers pmap decoding.
- ((->path inner-here) . has-ext? . POLLEN_MAP_EXT))
- (define main (apply (if pmap-source?
+ ;; rather than (get metas 'here) which might have been overridden.
+ ;; Because if it's overridden to something other than *.pmap,
+ ;; pmap processing will fail.
+ ;; This defeats rule that pmap file suffix triggers pmap decoding.
+ (define here-is-pmap? (pmap-source? (->path inner-here)))
+
+ (define main (apply (if here-is-pmap?
;; pmap source files will go this way,
pmap-source-decode
;; ... but other files, including pollen, will go this way.
@@ -98,12 +99,12 @@
(module+ main
(displayln ";-------------------------")
- (displayln (string-append "; pollen decoded 'main" (if source-is-pmap? " (as pmap)" "")))
+ (displayln (string-append "; pollen decoded 'main" (if here-is-pmap? " (as pmap)" "")))
(displayln ";-------------------------")
main
(displayln "")
- (if source-is-pmap?
+ (if here-is-pmap?
(displayln (format "(pmap? main) ~a" (pmap? main)))
(displayln (format "(tagged-xexpr? main) ~a" (tagged-xexpr? main))))
(displayln "")
diff --git a/regenerate.rkt b/regenerate.rkt
index 77d8dfa..119b56b 100644
--- a/regenerate.rkt
+++ b/regenerate.rkt
@@ -289,8 +289,10 @@
[current-directory source-dir]
[current-output-port (open-output-nowhere)])
(namespace-require 'racket) ; use namespace-require for FIRST require, then eval after
+ ;; for include-template (used below)
(eval '(require web-server/templates) (current-namespace))
- (eval '(require (planet mb/pollen/pmap)) (current-namespace))
+ ;; for pmap navigation functions, and template commands
+ (eval '(require (planet mb/pollen/pmap)(planet mb/pollen/template)) (current-namespace))
;; import source into eval space. This sets up main & metas
(eval `(require ,(path->string source-name)) (current-namespace))
(eval `(include-template #:command-char ,TEMPLATE_FIELD_DELIMITER ,(->string template-name)) (current-namespace))))
diff --git a/tests/requires/include-me.rkt b/tests/requires/include-me.rkt
index cc57eb1..191943e 100644
--- a/tests/requires/include-me.rkt
+++ b/tests/requires/include-me.rkt
@@ -1,83 +1,17 @@
#lang racket/base
(require racket/contract racket/list racket/match)
-(require (planet mb/pollen/tools) (planet mb/pollen/decode))
+(require (planet mb/pollen/tools) (planet mb/pollen/decode)
+ (planet mb/pollen/library/decode-tools))
(provide (all-defined-out))
(module+ test (require rackunit))
-
;; register custom block tags
(register-block-tag 'bloq)
(register-block-tag 'fooble)
-
-;; is x a paragraph break?
-(define/contract (paragraph-break? x #:pattern [paragraph-pattern #px"^\n\n+$"])
- ((any/c) (#:pattern pregexp?) . ->* . boolean?)
- (and (string? x) (->boolean (regexp-match paragraph-pattern x))))
-
-(module+ test
- (check-false (paragraph-break? "foo"))
- (check-false (paragraph-break? "\n"))
- (check-false (paragraph-break? "\n \n"))
- (check-true (paragraph-break? "\n \n" #:pattern #px"^\n \n$"))
- (check-true (paragraph-break? "\n\n"))
- (check-true (paragraph-break? "\n\n\n")))
-
-
-(define/contract (convert-linebreaks xc #:newline [newline "\n"])
- ((xexpr-elements?) (#:newline string?) . ->* . xexpr-elements?)
- ;; todo: should this test be not block + not whitespace?
- (define not-block? (λ(i) (not (block-xexpr? i))))
- (filter-not empty?
- (for/list ([i (len xc)])
- (let ([item (get xc i)])
- (cond
- ;; skip first and last
- [(or (= i 0) (= i (sub1 (len xc)))) item]
- [(equal? item newline)
- (match (get xc (- i 1) (+ i 2)) ; a three-element slice with x[i] in the middle
- ;; only convert if neither adjacent tag is a block
- ;; (because blocks automatically force a newline before & after)
- [(list (? not-block?) newline (? not-block?)) '(br)]
- [else empty])] ; otherwise delete
- [else item])))))
-
-(module+ test
- (check-equal? (convert-linebreaks '("foo" "\n" "bar")) '("foo" (br) "bar"))
- (check-equal? (convert-linebreaks '("\n" "foo" "\n" "bar" "\n")) '("\n" "foo" (br) "bar" "\n"))
- (check-equal? (convert-linebreaks '((p "foo") "\n" (p "bar"))) '((p "foo") (p "bar")))
- (check-equal? (convert-linebreaks '("foo" "\n" (p "bar"))) '("foo" (p "bar")))
- (check-equal? (convert-linebreaks '("foo" "moo" "bar")) '("foo" "moo" "bar"))
- (check-equal? (convert-linebreaks '("foo" "moo" "bar") #:newline "moo") '("foo" (br) "bar"))
- (check-equal? (convert-linebreaks '("foo" "\n\n" "bar")) '("foo" "\n\n" "bar")))
-
-;; prepare elements for paragraph testing
-(define/contract (prep-paragraph-flow xc)
- (xexpr-elements? . -> . xexpr-elements?)
- (convert-linebreaks (merge-newlines (trim xc whitespace?))))
-
-(module+ test
- (check-equal? (prep-paragraph-flow '("\n" "foo" "\n" "\n" "bar" "\n" "ino" "\n"))
- '("foo" "\n\n" "bar" (br) "ino")))
-
-;; apply paragraph tag
-(define/contract (wrap-paragraph xc #:tag [tag 'p])
- ((xexpr-elements?) (#:tag symbol?) . ->* . block-xexpr?)
- (match xc
- [(list (? block-xexpr? bx)) bx] ; leave a single block xexpr alone
- [else (make-tagged-xexpr tag empty xc)])) ; otherwise wrap in p tag
-
-(module+ test
- (check-equal? (wrap-paragraph '("foo" "bar")) '(p "foo" "bar"))
- (check-equal? (begin (register-block-tag 'para) (wrap-paragraph #:tag 'para '("foo" "bar")))
- '(para "foo" "bar"))
- (check-equal? (wrap-paragraph '((p "bar" "foo"))) '(p "bar" "foo"))
- (check-equal? (wrap-paragraph '((div "bar" "foo") "Hi" )) '(p (div "bar" "foo") "Hi")))
-
-
;; detect paragraphs
;; todo: unit tests
(define/contract (xexpr-elements-proc elements)
@@ -88,129 +22,11 @@
elements)))
-;; insert nbsp between last two words
-(define/contract (nonbreaking-last-space x
- #:nbsp [nbsp (->string #\u00A0)]
- #:minimum-word-length [minimum-word-length 6])
- ((tagged-xexpr?) (#:nbsp string? #:minimum-word-length integer?) . ->* . tagged-xexpr?)
- ;; todo: parameterize this, as it will be different for each project
- (define tags-to-pay-attention-to '(p aside)) ; only apply to paragraphs
-
- (define (replace-last-space str)
- (if (#\space . in? . str)
- (let ([reversed-str-list (reverse (string->list str))]
- [reversed-nbsp (reverse (string->list nbsp))])
- (define-values (last-word-chars other-chars)
- (splitf-at reversed-str-list (λ(i) (not (eq? i #\space)))))
- (list->string (reverse (append last-word-chars
- ; OK for long words to be on their own line.
- (if (< (len last-word-chars) minimum-word-length)
- ; first char of other-chars will be the space, so use cdr
- (append reversed-nbsp (cdr other-chars))
- other-chars)))))
- str))
-
- (define (find-last-word-space x) ; recursively traverse xexpr
- (cond
- [(string? x) (replace-last-space x)]
- [(tagged-xexpr? x)
- (let-values([(tag attr elements) (break-tagged-xexpr x)])
- (if (> (length elements) 0) ; elements is list of xexprs
- (let-values ([(all-but-last last) (split-at elements (sub1 (length elements)))])
- (make-tagged-xexpr tag attr `(,@all-but-last ,(find-last-word-space (car last)))))
- x))]
- [else x]))
-
- (if ((car x) . in? . tags-to-pay-attention-to)
- (find-last-word-space x)
- x))
-
-;; todo: make some tougher tests, it gets flaky with edge cases
-(module+ test
- (check-equal? (nonbreaking-last-space '(p "Hi there")) '(p "Hi there")) ; nbsp in between last two words
- (check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "Ø") '(p "HiØthere")) ; but let's make it visible
- (check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_") '(p "Hi_up_there"))
- (check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_" #:minimum-word-length 3)
- '(p "Hi there"))
- (check-equal? (nonbreaking-last-space '(p "Hi here" (em "ho there")) #:nbsp "Ø") '(p "Hi here" (em "hoØthere"))))
-
-
-; wrap initial quotes for hanging punctuation
-; todo: improve this
-; does not handle
“thing properly -(define/contract (wrap-hanging-quotes nx - #:single-prepend [single-pp '(squo)] - #:double-prepend [double-pp '(dquo)]) - ((tagged-xexpr?) (#:single-prepend list? #:double-prepend list?) . ->* . tagged-xexpr?) - (define two-char-string? (λ(i) (and (string? i) (>= (len i) 2)))) - (define-values (tag attr elements) (break-tagged-xexpr nx)) - (define new-car-elements - (match (car elements) - [(? two-char-string? tcs) - (define str-first (get tcs 0)) - (define str-rest (get tcs 1 'end)) - (cond - [(str-first . in? . '("\"" "“")) - ;; can wrap with any inline tag - ;; so that linebreak detection etc still works - `(,@double-pp ,(->string #\“) ,str-rest)] - [(str-first . in? . '("\'" "‘")) - `(,@single-pp ,(->string #\‘) ,str-rest)] - [else tcs])] - [(? tagged-xexpr? nx) (wrap-hanging-quotes nx)] - [else (car elements)])) - (make-tagged-xexpr tag attr (cons new-car-elements (cdr elements)))) - - -(module+ test - (check-equal? (wrap-hanging-quotes '(p "\"Hi\" there")) '(p (dquo "“" "Hi\" there"))) - (check-equal? (wrap-hanging-quotes '(p "'Hi' there")) '(p (squo "‘" "Hi' there"))) - (check-equal? (wrap-hanging-quotes '(p "'Hi' there") #:single-prepend '(foo ((bar "ino")))) - '(p (foo ((bar "ino")) "‘" "Hi' there")))) - - -(define (block-xexpr-proc bx) +(define/contract (block-xexpr-proc bx) (tagged-xexpr? . -> . tagged-xexpr?) (wrap-hanging-quotes (nonbreaking-last-space bx))) -;; insert typographic niceties -;; ligatures are handled in css -(define (typogrify str) - (string? . -> . string?) - ;; make set of functions for replacers - (define (make-replacer query replacement) - (λ(str) (regexp-replace* query str replacement))) - - ;; just store the query strings + replacement strings - (define dashes - ;; fix em dashes first, else they'll be mistaken for en dashes - ;; [\\s ] is whitespace + nonbreaking space - '((#px"[\\s ]*(---|—)[\\s ]*" "—") ; em dash - (#px"[\\s ]*(--|–)[\\s ]*" "–"))) ; en dash - - (define smart-quotes - '((#px"(?<=\\w)'(?=\\w)" "’") ; apostrophe - (#px"(? . string?) diff --git a/tools.rkt b/tools.rkt index 2728a11..99fcd4e 100644 --- a/tools.rkt +++ b/tools.rkt @@ -11,6 +11,19 @@ ;; setup for test cases (module+ test (require rackunit)) +;; helper function for pollen/main and pollen/main-pre +(define (make-files-in-require-form file-directory) + ;; This will be resolved in the context of current-directory. + ;; So when called from outside the project directory, + ;; current-directory must be properly set with 'parameterize' + (define (make-complete-path path) + ;; todo: document why this function is necessary (it definitely is, but I forgot why) + (define-values (start_dir name _ignore) (split-path (path->complete-path path))) + (build-path start_dir file-directory name)) + (define files (map make-complete-path (filter (λ(i) (has-ext? i 'rkt)) (directory-list file-directory)))) + (define files-in-require-form + (map (λ(file) `(file ,(->string file))) files)) + files-in-require-form) ;; helper for comparison of values