working on fixing preprocessor

pull/9/head
Matthew Butterick 11 years ago
parent b2ad19e327
commit 6183e58bcc

@ -1,60 +1,11 @@
#lang racket/base
(require racket/contract racket/list racket/string racket/match)
(require (only-in racket/format ~a))
(require (only-in racket/bool nor))
(require racket/contract)
(require (only-in xml xexpr/c))
(module+ test (require rackunit))
(require "tools.rkt")
(provide (all-defined-out))
;; Find adjacent newline characters in a list and merge them into one item
;; Scribble, by default, makes each newline a separate list item
;; In practice, this is worthless.
(define/contract (merge-newlines x)
(list? . -> . list?)
(define (newline? x)
(and (string? x) (equal? "\n" x)))
(define (not-newline? x)
(not (newline? x)))
(define (really-merge-newlines xs [acc '()])
(if (empty? xs)
acc
;; Try to peel the newlines off the front.
(let-values ([(leading-newlines remainder) (splitf-at xs newline?)])
(if (not (empty? leading-newlines)) ; if you got newlines ...
;; combine them into a string and append them to the accumulator,
;; and recurse on the rest
(really-merge-newlines remainder (append acc (list (string-join leading-newlines ""))))
;; otherwise peel off elements up to the next newline, append them to accumulator,
;; and recurse on the rest
(really-merge-newlines (dropf remainder not-newline?)
(append acc (takef remainder not-newline?)))))))
(cond
[(list? x) (really-merge-newlines (map merge-newlines x))]
[else x]))
(module+ test
(check-equal? (merge-newlines '(p "\n" "foo" "\n" "\n" "bar" (em "\n" "\n" "\n")))
'(p "\n" "foo" "\n\n" "bar" (em "\n\n\n"))))
(module+ test (require rackunit))
;; todo: add native support for list-xexpr
;; decode triple newlines to list items
;; convert numbers to strings
;; maybe this isn't necessary
(define (stringify x)
(map-tree (λ(i) (if (number? i) (->string i) i)) x))
(module+ test
(check-equal? (stringify '(p 1 2 "foo" (em 4 "bar"))) '(p "1" "2" "foo" (em "4" "bar"))))
(provide (all-defined-out))
;; decoder wireframe
@ -66,15 +17,19 @@
#:block-xexpr-proc [block-xexpr-proc (λ(x)x)]
#:inline-xexpr-proc [inline-xexpr-proc (λ(x)x)]
#:string-proc [string-proc (λ(x)x)])
;; use xexpr/c for contract because it gives better error messages
((xexpr/c) (#:exclude-xexpr-tags list?
#:xexpr-tag-proc procedure?
#:xexpr-attr-proc procedure?
#:xexpr-elements-proc procedure?
#:block-xexpr-proc procedure?
#:inline-xexpr-proc procedure?
#:string-proc procedure?)
. ->* . tagged-xexpr?)
((xexpr/c) ;; use xexpr/c for contract on nx because it gives better error messages
;; todo: how to write more specific contracts for these procedures?
;; e.g., string-proc should be restricted to procs that accept a string as input
;; and return a string as output
(#:exclude-xexpr-tags list?
#:xexpr-tag-proc procedure?
#:xexpr-attr-proc procedure?
#:xexpr-elements-proc procedure?
#:block-xexpr-proc procedure?
#:inline-xexpr-proc procedure?
#:string-proc procedure?)
. ->* . tagged-xexpr?)
(when (not (tagged-xexpr? nx))
(error (format "decode: ~v not a full tagged-xexpr" nx)))
@ -85,7 +40,7 @@
(if (tag . in? . excluded-xexpr-tags)
x ; let x pass through untouched
(let ([decoded-xexpr (apply make-tagged-xexpr
(map &decode (list tag attr elements)))])
(map &decode (list tag attr elements)))])
((if (block-xexpr? decoded-xexpr)
block-xexpr-proc
inline-xexpr-proc) decoded-xexpr))))]

@ -0,0 +1,259 @@
#lang racket/base
(require racket/contract racket/list racket/string racket/match)
(require "../readability.rkt" "../predicates.rkt" "../tools.rkt")
(module+ test (require rackunit))
(provide (all-defined-out))
;; This module is a library of functions to be used in building pollen decoders.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Typography
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; insert typographic niceties
;; ligatures are handled in css
(define (typogrify str)
(string? . -> . string?)
;; make set of functions for replacers
(define (make-replacer query replacement)
(λ(str) (regexp-replace* query str replacement)))
;; just store the query strings + replacement strings
(define dashes
;; fix em dashes first, else they'll be mistaken for en dashes
;; [\\s ] is whitespace + nonbreaking space
'((#px"[\\s ]*(---|—)[\\s ]*" "") ; em dash
(#px"[\\s ]*(--|)[\\s ]*" ""))) ; en dash
(define smart-quotes
'((#px"(?<=\\w)'(?=\\w)" "") ; apostrophe
(#px"(?<!\\w)'(?=\\w)" "") ; single_at_beginning
(#px"(?<=\\S)'(?!\\w)" "") ; single_at_end
(#px"(?<!\\w)\"(?=\\w)" "") ; double_at_beginning
(#px"(?<=\\S)\"(?!\\w)" ""))) ; double_at_end
;; put replacers in desired order here
(let* ([typogrifiers (append dashes smart-quotes)]
[queries (map first typogrifiers)]
[replacements (map second typogrifiers)])
(define replacers (map make-replacer queries replacements))
;; compose goes from last to first, so reverse order
((apply compose1 (reverse replacers)) str)))
(module+ test
(check-equal? (typogrify "I had --- maybe 13 -- 20 --- hob-nobs.") "I had—maybe 1320—hob-nobs.")
(check-equal? (typogrify "\"Why,\" she could've asked, \"are we in Oahu watching 'Mame'?\"")
"“Why,” she couldve asked, “are we in Oahu watching Mame?”"))
;; insert nbsp between last two words
(define/contract (nonbreaking-last-space x
#:nbsp [nbsp (->string #\u00A0)]
#:minimum-word-length [minimum-word-length 6])
((tagged-xexpr?) (#:nbsp string? #:minimum-word-length integer?) . ->* . tagged-xexpr?)
;; todo: parameterize this, as it will be different for each project
(define tags-to-pay-attention-to '(p aside)) ; only apply to paragraphs
(define (replace-last-space str)
(if (#\space . in? . str)
(let ([reversed-str-list (reverse (string->list str))]
[reversed-nbsp (reverse (string->list nbsp))])
(define-values (last-word-chars other-chars)
(splitf-at reversed-str-list (λ(i) (not (eq? i #\space)))))
(list->string (reverse (append last-word-chars
; OK for long words to be on their own line.
(if (< (len last-word-chars) minimum-word-length)
; first char of other-chars will be the space, so use cdr
(append reversed-nbsp (cdr other-chars))
other-chars)))))
str))
(define (find-last-word-space x) ; recursively traverse xexpr
(cond
[(string? x) (replace-last-space x)]
[(tagged-xexpr? x)
(let-values([(tag attr elements) (break-tagged-xexpr x)])
(if (> (length elements) 0) ; elements is list of xexprs
(let-values ([(all-but-last last) (split-at elements (sub1 (length elements)))])
(make-tagged-xexpr tag attr `(,@all-but-last ,(find-last-word-space (car last)))))
x))]
[else x]))
(if ((car x) . in? . tags-to-pay-attention-to)
(find-last-word-space x)
x))
;; todo: make some tougher tests, it gets flaky with edge cases
(module+ test
(check-equal? (nonbreaking-last-space '(p "Hi there")) '(p "Hi there")) ; nbsp in between last two words
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "Ø") '(p "HiØthere")) ; but let's make it visible
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_") '(p "Hi_up_there"))
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_" #:minimum-word-length 3)
'(p "Hi there"))
(check-equal? (nonbreaking-last-space '(p "Hi here" (em "ho there")) #:nbsp "Ø") '(p "Hi here" (em "hoØthere"))))
; wrap initial quotes for hanging punctuation
; todo: improve this
; does not handle <p>“<em>thing</em> properly
(define/contract (wrap-hanging-quotes nx
#:single-prepend [single-pp '(squo)]
#:double-prepend [double-pp '(dquo)])
((tagged-xexpr?) (#:single-prepend list? #:double-prepend list?) . ->* . tagged-xexpr?)
(define two-char-string? (λ(i) (and (string? i) (>= (len i) 2))))
(define-values (tag attr elements) (break-tagged-xexpr nx))
(define new-car-elements
(match (car elements)
[(? two-char-string? tcs)
(define str-first (get tcs 0))
(define str-rest (get tcs 1 'end))
(cond
[(str-first . in? . '("\"" ""))
;; can wrap with any inline tag
;; so that linebreak detection etc still works
`(,@double-pp ,(->string #\“) ,str-rest)]
[(str-first . in? . '("\'" ""))
`(,@single-pp ,(->string #\) ,str-rest)]
[else tcs])]
[(? tagged-xexpr? nx) (wrap-hanging-quotes nx)]
[else (car elements)]))
(make-tagged-xexpr tag attr (cons new-car-elements (cdr elements))))
(module+ test
(check-equal? (wrap-hanging-quotes '(p "\"Hi\" there")) '(p (dquo "" "Hi\" there")))
(check-equal? (wrap-hanging-quotes '(p "'Hi' there")) '(p (squo "" "Hi' there")))
(check-equal? (wrap-hanging-quotes '(p "'Hi' there") #:single-prepend '(foo ((bar "ino"))))
'(p (foo ((bar "ino")) "" "Hi' there"))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Lines, blocks, paragraphs
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; turn the right items into <br> tags
(define/contract (convert-linebreaks xc #:newline [newline "\n"])
((xexpr-elements?) (#:newline string?) . ->* . xexpr-elements?)
;; todo: should this test be not block + not whitespace?
(define not-block? (λ(i) (not (block-xexpr? i))))
(filter-not empty?
(for/list ([i (len xc)])
(let ([item (get xc i)])
(cond
;; skip first and last
[(or (= i 0) (= i (sub1 (len xc)))) item]
[(equal? item newline)
(match (get xc (- i 1) (+ i 2)) ; a three-element slice with x[i] in the middle
;; only convert if neither adjacent tag is a block
;; (because blocks automatically force a newline before & after)
[(list (? not-block?) newline (? not-block?)) '(br)]
[else empty])] ; otherwise delete
[else item])))))
(module+ test
(check-equal? (convert-linebreaks '("foo" "\n" "bar")) '("foo" (br) "bar"))
(check-equal? (convert-linebreaks '("\n" "foo" "\n" "bar" "\n")) '("\n" "foo" (br) "bar" "\n"))
(check-equal? (convert-linebreaks '((p "foo") "\n" (p "bar"))) '((p "foo") (p "bar")))
(check-equal? (convert-linebreaks '("foo" "\n" (p "bar"))) '("foo" (p "bar")))
(check-equal? (convert-linebreaks '("foo" "moo" "bar")) '("foo" "moo" "bar"))
(check-equal? (convert-linebreaks '("foo" "moo" "bar") #:newline "moo") '("foo" (br) "bar"))
(check-equal? (convert-linebreaks '("foo" "\n\n" "bar")) '("foo" "\n\n" "bar")))
;; is x a paragraph break?
(define/contract (paragraph-break? x #:pattern [paragraph-pattern #px"^\n\n+$"])
((any/c) (#:pattern pregexp?) . ->* . boolean?)
(and (string? x) (->boolean (regexp-match paragraph-pattern x))))
(module+ test
(check-false (paragraph-break? "foo"))
(check-false (paragraph-break? "\n"))
(check-false (paragraph-break? "\n \n"))
(check-true (paragraph-break? "\n \n" #:pattern #px"^\n \n$"))
(check-true (paragraph-break? "\n\n"))
(check-true (paragraph-break? "\n\n\n")))
;; Find adjacent newline characters in a list and merge them into one item
;; Scribble, by default, makes each newline a separate list item
;; In practice, this is worthless.
(define/contract (merge-newlines x)
(list? . -> . list?)
(define (newline? x)
(and (string? x) (equal? "\n" x)))
(define (not-newline? x)
(not (newline? x)))
(define (really-merge-newlines xs [acc '()])
(if (empty? xs)
acc
;; Try to peel the newlines off the front.
(let-values ([(leading-newlines remainder) (splitf-at xs newline?)])
(if (not (empty? leading-newlines)) ; if you got newlines ...
;; combine them into a string and append them to the accumulator,
;; and recurse on the rest
(really-merge-newlines remainder (append acc (list (string-join leading-newlines ""))))
;; otherwise peel off elements up to the next newline, append them to accumulator,
;; and recurse on the rest
(really-merge-newlines (dropf remainder not-newline?)
(append acc (takef remainder not-newline?)))))))
(cond
[(list? x) (really-merge-newlines (map merge-newlines x))]
[else x]))
(module+ test
(check-equal? (merge-newlines '(p "\n" "foo" "\n" "\n" "bar" (em "\n" "\n" "\n")))
'(p "\n" "foo" "\n\n" "bar" (em "\n\n\n"))))
;; todo: add native support for list-xexpr
;; decode triple newlines to list items
;; prepare elements for paragraph testing
(define/contract (prep-paragraph-flow xc)
(xexpr-elements? . -> . xexpr-elements?)
(convert-linebreaks (merge-newlines (trim xc whitespace?))))
(module+ test
(check-equal? (prep-paragraph-flow '("\n" "foo" "\n" "\n" "bar" "\n" "ino" "\n"))
'("foo" "\n\n" "bar" (br) "ino")))
;; apply paragraph tag
(define/contract (wrap-paragraph xc #:tag [tag 'p])
((xexpr-elements?) (#:tag symbol?) . ->* . block-xexpr?)
(match xc
[(list (? block-xexpr? bx)) bx] ; leave a single block xexpr alone
[else (make-tagged-xexpr tag empty xc)])) ; otherwise wrap in p tag
(module+ test
(check-equal? (wrap-paragraph '("foo" "bar")) '(p "foo" "bar"))
(check-equal? (begin (register-block-tag 'para) (wrap-paragraph #:tag 'para '("foo" "bar")))
'(para "foo" "bar"))
(check-equal? (wrap-paragraph '((p "bar" "foo"))) '(p "bar" "foo"))
(check-equal? (wrap-paragraph '((div "bar" "foo") "Hi" )) '(p (div "bar" "foo") "Hi")))
;; detect paragraphs
;; todo: unit tests
(define/contract (detect-paragraphs elements)
(xexpr-elements? . -> . xexpr-elements?)
(let ([elements (prep-paragraph-flow elements)])
(if (ormap paragraph-break? elements) ; need this condition to prevent infinite recursion
(map wrap-paragraph (splitf-at* elements paragraph-break?)) ; split into ¶¶
elements)))

@ -13,26 +13,23 @@
;; Look for an EXTRAS_DIR directory local to the source file.
;; and require all the .rkt files therein.
;; optionally provide them.
(define-syntax (require-extras stx #:provide [provide #f])
(define-syntax (require-and-provide-extras stx)
(cond
[(directory-exists? EXTRAS_DIR)
(let ([files-in-require-form (make-files-in-require-form EXTRAS_DIR)])
(datum->syntax stx `(begin
(require ,@files-in-require-form)
(provide (all-from-out ,@files-in-require-form)))))]
; if no files to import, do nothing
[else #'(begin)]))
(define-syntax (require-extras stx)
(cond
[(directory-exists? EXTRAS_DIR)
;; This will be resolved in the context of current-directory.
;; So when called from outside the project directory,
;; current-directory must be properly set with 'parameterize'
(define (make-complete-path path)
;; todo: document why this function is necessary (it definitely is, but I forgot why)
(define-values (start_dir name _ignore) (split-path (path->complete-path path)))
(build-path start_dir EXTRAS_DIR name))
(define files (map make-complete-path (filter (λ(i) (has-ext? i 'rkt)) (directory-list EXTRAS_DIR))))
(define files-in-require-form
(map (λ(file) `(file ,(->string file))) files))
(datum->syntax stx
(if provide
`(begin
(require ,@files-in-require-form)
(provide (all-from-out ,@files-in-require-form)))
`(begin
(require ,@files-in-require-form))))]
(let ([files-in-require-form (make-files-in-require-form EXTRAS_DIR)])
(datum->syntax stx `(begin
(require ,@files-in-require-form))))]
; if no files to import, do nothing
[else #'(begin)]))

@ -1,7 +1,7 @@
#lang racket/base
(require (only-in (planet mb/pollen/readability) ->list)
(only-in (planet mb/pollen/decode) trim)
(only-in (planet mb/pollen/tools) trim)
(only-in (planet mb/pollen/predicates) whitespace?))
(provide (except-out (all-from-out racket/base) #%module-begin)
@ -21,7 +21,7 @@
; helpful because it collects & exports content via 'doc
(module pollen-inner (planet mb/pollen/lang/doclang2_raw)
(require (planet mb/pollen/tools) (planet mb/pollen/main-helper))
(require-extras #:provide #t) ; brings in the project require files
(require-and-provide-extras) ; brings in the project require files
expr ...) ; body of module

@ -28,7 +28,7 @@
;; use same requires as top of main.rkt
;; (can't import them from surrounding module due to submodule rules)
(require (planet mb/pollen/tools) (planet mb/pollen/main-helper))
(require-extras #:provide #t) ; brings in the project require files
(require-and-provide-extras) ; brings in the project require files
;; #%top binding catches ids that aren't defined
;; here, convert them to basic xexpr
@ -76,14 +76,15 @@
;; Unlike Scribble, which insists on decoding,
;; Pollen just passes through the minimally processed data.
;; one exception: if file extension marks it as pmap, send it to the pmap decoder instead.
(define pmap-source?
;; this tests inner-here (which is always the file name)
;; rather than (get metas 'here) which might have been overridden.
;; Because if it's overridden to something other than *.pmap,
;; pmap processing will fail.
;; This defeats rule that pmap file suffix triggers pmap decoding.
((->path inner-here) . has-ext? . POLLEN_MAP_EXT))
(define main (apply (if pmap-source?
;; rather than (get metas 'here) which might have been overridden.
;; Because if it's overridden to something other than *.pmap,
;; pmap processing will fail.
;; This defeats rule that pmap file suffix triggers pmap decoding.
(define here-is-pmap? (pmap-source? (->path inner-here)))
(define main (apply (if here-is-pmap?
;; pmap source files will go this way,
pmap-source-decode
;; ... but other files, including pollen, will go this way.
@ -98,12 +99,12 @@
(module+ main
(displayln ";-------------------------")
(displayln (string-append "; pollen decoded 'main" (if source-is-pmap? " (as pmap)" "")))
(displayln (string-append "; pollen decoded 'main" (if here-is-pmap? " (as pmap)" "")))
(displayln ";-------------------------")
main
(displayln "")
(if source-is-pmap?
(if here-is-pmap?
(displayln (format "(pmap? main) ~a" (pmap? main)))
(displayln (format "(tagged-xexpr? main) ~a" (tagged-xexpr? main))))
(displayln "")

@ -289,8 +289,10 @@
[current-directory source-dir]
[current-output-port (open-output-nowhere)])
(namespace-require 'racket) ; use namespace-require for FIRST require, then eval after
;; for include-template (used below)
(eval '(require web-server/templates) (current-namespace))
(eval '(require (planet mb/pollen/pmap)) (current-namespace))
;; for pmap navigation functions, and template commands
(eval '(require (planet mb/pollen/pmap)(planet mb/pollen/template)) (current-namespace))
;; import source into eval space. This sets up main & metas
(eval `(require ,(path->string source-name)) (current-namespace))
(eval `(include-template #:command-char ,TEMPLATE_FIELD_DELIMITER ,(->string template-name)) (current-namespace))))

@ -1,83 +1,17 @@
#lang racket/base
(require racket/contract racket/list racket/match)
(require (planet mb/pollen/tools) (planet mb/pollen/decode))
(require (planet mb/pollen/tools) (planet mb/pollen/decode)
(planet mb/pollen/library/decode-tools))
(provide (all-defined-out))
(module+ test (require rackunit))
;; register custom block tags
(register-block-tag 'bloq)
(register-block-tag 'fooble)
;; is x a paragraph break?
(define/contract (paragraph-break? x #:pattern [paragraph-pattern #px"^\n\n+$"])
((any/c) (#:pattern pregexp?) . ->* . boolean?)
(and (string? x) (->boolean (regexp-match paragraph-pattern x))))
(module+ test
(check-false (paragraph-break? "foo"))
(check-false (paragraph-break? "\n"))
(check-false (paragraph-break? "\n \n"))
(check-true (paragraph-break? "\n \n" #:pattern #px"^\n \n$"))
(check-true (paragraph-break? "\n\n"))
(check-true (paragraph-break? "\n\n\n")))
(define/contract (convert-linebreaks xc #:newline [newline "\n"])
((xexpr-elements?) (#:newline string?) . ->* . xexpr-elements?)
;; todo: should this test be not block + not whitespace?
(define not-block? (λ(i) (not (block-xexpr? i))))
(filter-not empty?
(for/list ([i (len xc)])
(let ([item (get xc i)])
(cond
;; skip first and last
[(or (= i 0) (= i (sub1 (len xc)))) item]
[(equal? item newline)
(match (get xc (- i 1) (+ i 2)) ; a three-element slice with x[i] in the middle
;; only convert if neither adjacent tag is a block
;; (because blocks automatically force a newline before & after)
[(list (? not-block?) newline (? not-block?)) '(br)]
[else empty])] ; otherwise delete
[else item])))))
(module+ test
(check-equal? (convert-linebreaks '("foo" "\n" "bar")) '("foo" (br) "bar"))
(check-equal? (convert-linebreaks '("\n" "foo" "\n" "bar" "\n")) '("\n" "foo" (br) "bar" "\n"))
(check-equal? (convert-linebreaks '((p "foo") "\n" (p "bar"))) '((p "foo") (p "bar")))
(check-equal? (convert-linebreaks '("foo" "\n" (p "bar"))) '("foo" (p "bar")))
(check-equal? (convert-linebreaks '("foo" "moo" "bar")) '("foo" "moo" "bar"))
(check-equal? (convert-linebreaks '("foo" "moo" "bar") #:newline "moo") '("foo" (br) "bar"))
(check-equal? (convert-linebreaks '("foo" "\n\n" "bar")) '("foo" "\n\n" "bar")))
;; prepare elements for paragraph testing
(define/contract (prep-paragraph-flow xc)
(xexpr-elements? . -> . xexpr-elements?)
(convert-linebreaks (merge-newlines (trim xc whitespace?))))
(module+ test
(check-equal? (prep-paragraph-flow '("\n" "foo" "\n" "\n" "bar" "\n" "ino" "\n"))
'("foo" "\n\n" "bar" (br) "ino")))
;; apply paragraph tag
(define/contract (wrap-paragraph xc #:tag [tag 'p])
((xexpr-elements?) (#:tag symbol?) . ->* . block-xexpr?)
(match xc
[(list (? block-xexpr? bx)) bx] ; leave a single block xexpr alone
[else (make-tagged-xexpr tag empty xc)])) ; otherwise wrap in p tag
(module+ test
(check-equal? (wrap-paragraph '("foo" "bar")) '(p "foo" "bar"))
(check-equal? (begin (register-block-tag 'para) (wrap-paragraph #:tag 'para '("foo" "bar")))
'(para "foo" "bar"))
(check-equal? (wrap-paragraph '((p "bar" "foo"))) '(p "bar" "foo"))
(check-equal? (wrap-paragraph '((div "bar" "foo") "Hi" )) '(p (div "bar" "foo") "Hi")))
;; detect paragraphs
;; todo: unit tests
(define/contract (xexpr-elements-proc elements)
@ -88,129 +22,11 @@
elements)))
;; insert nbsp between last two words
(define/contract (nonbreaking-last-space x
#:nbsp [nbsp (->string #\u00A0)]
#:minimum-word-length [minimum-word-length 6])
((tagged-xexpr?) (#:nbsp string? #:minimum-word-length integer?) . ->* . tagged-xexpr?)
;; todo: parameterize this, as it will be different for each project
(define tags-to-pay-attention-to '(p aside)) ; only apply to paragraphs
(define (replace-last-space str)
(if (#\space . in? . str)
(let ([reversed-str-list (reverse (string->list str))]
[reversed-nbsp (reverse (string->list nbsp))])
(define-values (last-word-chars other-chars)
(splitf-at reversed-str-list (λ(i) (not (eq? i #\space)))))
(list->string (reverse (append last-word-chars
; OK for long words to be on their own line.
(if (< (len last-word-chars) minimum-word-length)
; first char of other-chars will be the space, so use cdr
(append reversed-nbsp (cdr other-chars))
other-chars)))))
str))
(define (find-last-word-space x) ; recursively traverse xexpr
(cond
[(string? x) (replace-last-space x)]
[(tagged-xexpr? x)
(let-values([(tag attr elements) (break-tagged-xexpr x)])
(if (> (length elements) 0) ; elements is list of xexprs
(let-values ([(all-but-last last) (split-at elements (sub1 (length elements)))])
(make-tagged-xexpr tag attr `(,@all-but-last ,(find-last-word-space (car last)))))
x))]
[else x]))
(if ((car x) . in? . tags-to-pay-attention-to)
(find-last-word-space x)
x))
;; todo: make some tougher tests, it gets flaky with edge cases
(module+ test
(check-equal? (nonbreaking-last-space '(p "Hi there")) '(p "Hi there")) ; nbsp in between last two words
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "Ø") '(p "HiØthere")) ; but let's make it visible
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_") '(p "Hi_up_there"))
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_" #:minimum-word-length 3)
'(p "Hi there"))
(check-equal? (nonbreaking-last-space '(p "Hi here" (em "ho there")) #:nbsp "Ø") '(p "Hi here" (em "hoØthere"))))
; wrap initial quotes for hanging punctuation
; todo: improve this
; does not handle <p>“<em>thing</em> properly
(define/contract (wrap-hanging-quotes nx
#:single-prepend [single-pp '(squo)]
#:double-prepend [double-pp '(dquo)])
((tagged-xexpr?) (#:single-prepend list? #:double-prepend list?) . ->* . tagged-xexpr?)
(define two-char-string? (λ(i) (and (string? i) (>= (len i) 2))))
(define-values (tag attr elements) (break-tagged-xexpr nx))
(define new-car-elements
(match (car elements)
[(? two-char-string? tcs)
(define str-first (get tcs 0))
(define str-rest (get tcs 1 'end))
(cond
[(str-first . in? . '("\"" ""))
;; can wrap with any inline tag
;; so that linebreak detection etc still works
`(,@double-pp ,(->string #\“) ,str-rest)]
[(str-first . in? . '("\'" ""))
`(,@single-pp ,(->string #\) ,str-rest)]
[else tcs])]
[(? tagged-xexpr? nx) (wrap-hanging-quotes nx)]
[else (car elements)]))
(make-tagged-xexpr tag attr (cons new-car-elements (cdr elements))))
(module+ test
(check-equal? (wrap-hanging-quotes '(p "\"Hi\" there")) '(p (dquo "" "Hi\" there")))
(check-equal? (wrap-hanging-quotes '(p "'Hi' there")) '(p (squo "" "Hi' there")))
(check-equal? (wrap-hanging-quotes '(p "'Hi' there") #:single-prepend '(foo ((bar "ino"))))
'(p (foo ((bar "ino")) "" "Hi' there"))))
(define (block-xexpr-proc bx)
(define/contract (block-xexpr-proc bx)
(tagged-xexpr? . -> . tagged-xexpr?)
(wrap-hanging-quotes (nonbreaking-last-space bx)))
;; insert typographic niceties
;; ligatures are handled in css
(define (typogrify str)
(string? . -> . string?)
;; make set of functions for replacers
(define (make-replacer query replacement)
(λ(str) (regexp-replace* query str replacement)))
;; just store the query strings + replacement strings
(define dashes
;; fix em dashes first, else they'll be mistaken for en dashes
;; [\\s ] is whitespace + nonbreaking space
'((#px"[\\s ]*(---|—)[\\s ]*" "") ; em dash
(#px"[\\s ]*(--|)[\\s ]*" ""))) ; en dash
(define smart-quotes
'((#px"(?<=\\w)'(?=\\w)" "") ; apostrophe
(#px"(?<!\\w)'(?=\\w)" "") ; single_at_beginning
(#px"(?<=\\S)'(?!\\w)" "") ; single_at_end
(#px"(?<!\\w)\"(?=\\w)" "") ; double_at_beginning
(#px"(?<=\\S)\"(?!\\w)" ""))) ; double_at_end
;; put replacers in desired order here
(let* ([typogrifiers (append dashes smart-quotes)]
[queries (map first typogrifiers)]
[replacements (map second typogrifiers)])
(define replacers (map make-replacer queries replacements))
;; compose goes from last to first, so reverse order
((apply compose1 (reverse replacers)) str)))
(module+ test
(check-equal? (typogrify "I had --- maybe 13 -- 20 --- hob-nobs.") "I had—maybe 1320—hob-nobs.")
(check-equal? (typogrify "\"Why,\" she could've asked, \"are we in Oahu watching 'Mame'?\"")
"“Why,” she couldve asked, “are we in Oahu watching Mame?”"))
(define (string-proc str)
(string? . -> . string?)

@ -11,6 +11,19 @@
;; setup for test cases
(module+ test (require rackunit))
;; helper function for pollen/main and pollen/main-pre
(define (make-files-in-require-form file-directory)
;; This will be resolved in the context of current-directory.
;; So when called from outside the project directory,
;; current-directory must be properly set with 'parameterize'
(define (make-complete-path path)
;; todo: document why this function is necessary (it definitely is, but I forgot why)
(define-values (start_dir name _ignore) (split-path (path->complete-path path)))
(build-path start_dir file-directory name))
(define files (map make-complete-path (filter (λ(i) (has-ext? i 'rkt)) (directory-list file-directory))))
(define files-in-require-form
(map (λ(file) `(file ,(->string file))) files))
files-in-require-form)
;; helper for comparison of values

Loading…
Cancel
Save