fold hyphenatef into hyphenate; expand #:omit options

main
Matthew Butterick 10 years ago
parent e950f59f06
commit efde5d3c8c

@ -151,24 +151,28 @@
(if (char? joiner) (format "~a" joiner) joiner))
;; helper macro that applies proc to all strings found in xexpr input
(define (apply-xexpr-strings proc x [omit-test (λ(x) #f)])
; ((procedure? txexpr?) ((or/c null (listof txexpr-tag?))) . ->* . txexpr?)
(define (apply-xexpr-strings proc x [omit-string (λ(x) #f)] [omit-txexpr (λ(x) #f)])
; ((procedure? txexpr?) ((or/c null (listof txexpr-tag?))) . ->* . txexpr?)
(let loop ([x x])
(cond
[(and (string? x) (not (omit-test x))) (proc x)]
[(and (txexpr? x) (not (omit-test x))) (cons (car x) (map loop (cdr x)))]
[(and (string? x) (not (omit-string x))) (proc x)]
[(and (txexpr? x) (not (omit-txexpr x))) (cons (car x) (map loop (cdr x)))]
[else x])))
;; Hyphenate using a filter procedure.
(define+provide+safe (hyphenatef x proc [joiner default-joiner]
#:exceptions [extra-exceptions '()]
#:min-length [min-length default-min-length]
#:omit [omit-test (λ(x) #f)])
((xexpr? procedure?) ((or/c char? string?)
#:exceptions (listof exception-word?)
#:min-length (or/c integer? #f)
#:omit ((or/c string? txexpr?) . -> . any/c)) . ->* . xexpr/c)
(define+provide+safe (hyphenate x [joiner default-joiner]
#:exceptions [extra-exceptions '()]
#:min-length [min-length default-min-length]
#:omit-word [omit-word? (λ(x) #f)]
#:omit-string [omit-string? (λ(x) #f)]
#:omit-txexpr [omit-txexpr? (λ(x) #f)])
((xexpr?) ((or/c char? string?)
#:exceptions (listof exception-word?)
#:min-length (or/c integer? #f)
#:omit-word (string? . -> . any/c)
#:omit-string (string? . -> . any/c)
#:omit-txexpr (txexpr? . -> . any/c)) . ->* . xexpr/c)
;; set up module data
;; todo?: change set! to parameterize
@ -179,30 +183,23 @@
(define word-pattern #px"\\w+") ;; more restrictive than exception-word
;; todo?: connect this regexp pattern to the one used in word? predicate
(define (insert-hyphens text)
(regexp-replace* word-pattern text (λ(word) (if (proc word) (string-join (word->hyphenation-points word min-length) joiner-string) word))))
(regexp-replace* word-pattern text (λ(word) (if (not (omit-word? word)) (string-join (word->hyphenation-points word min-length) joiner-string) word))))
(apply-xexpr-strings insert-hyphens x omit-test))
(apply-xexpr-strings insert-hyphens x omit-string? omit-txexpr?))
;; Default hyphenate is a special case of hyphenatef.
(define+provide+safe (hyphenate x [joiner default-joiner]
#:exceptions [extra-exceptions '()]
#:min-length [min-length default-min-length]
#:omit [omit-test (λ(x) #f)])
((xexpr/c) ((or/c char? string?)
#:exceptions (listof exception-word?)
#:min-length (or/c integer? #f)
#:omit ((or/c string? txexpr?) . -> . any/c)) . ->* . xexpr/c)
(hyphenatef x (λ(x) #t) joiner #:exceptions extra-exceptions #:min-length min-length #:omit omit-test))
;; Remove hyphens.
(define+provide+safe (unhyphenate x [joiner default-joiner]
#:omit-word [omit-word? (λ(x) #f)]
#:omit-string [omit-string? (λ(x) #f)]
#:omit-txexpr [omit-txexpr? (λ(x) #f)])
((xexpr/c) ((or/c char? string?)
#:omit-word (string? . -> . any/c)
#:omit-string (string? . -> . any/c)
#:omit-txexpr (txexpr? . -> . any/c)) . ->* . xexpr/c)
(define (remove-hyphens text)
(string-replace text (joiner->string joiner) ""))
;; Remove hyphens.
(define+provide+safe (unhyphenate x [joiner default-joiner] #:omit [omit-test (λ(x) #f)])
((xexpr/c) ((or/c char? string?) #:omit ((or/c string? txexpr?) . -> . any/c)) . ->* . xexpr/c)
(define (remove-hyphens text)
(string-replace text (joiner->string joiner) ""))
(apply-xexpr-strings remove-hyphens x omit-test))
(apply-xexpr-strings remove-hyphens x omit-string? omit-txexpr?))

@ -40,7 +40,9 @@ Safe mode enables the function contracts documented below. Use safe mode by impo
[joiner (or/c char? string?) (integer->char #x00AD)]
[#:exceptions exceptions (listof string?) empty]
[#:min-length length (or/c integer? false?) 5]
[#:omit test ((or/c string? txexpr?) . -> . any/c) (λ(x) #f)])
[#:omit-word word-test (string? . -> . any/c) (λ(x) #f)]
[#:omit-string string-test (string? . -> . any/c) (λ(x) #f)]
[#:omit-txexpr txexpr-test (txexpr? . -> . any/c) (λ(x) #f)])
xexpr/c]
Hyphenate @racket[_xexpr] by calculating hyphenation points and inserting @racket[_joiner] at those points. By default, @racket[_joiner] is the soft hyphen (Unicode 00AD = decimal 173). Words shorter than @racket[#:min-length] @racket[_length] will not be hyphenated. To hyphenate words of any length, use @racket[#:min-length] @racket[#f].
@ -103,56 +105,54 @@ Don't send raw HTML or XML through @racket[hyphenate]. It can't distinguish tags
(xexpr->string (hyphenate (string->xexpr html) #\-))
]
If you're working with HTML, be careful not to include any @code{<script>} or @code{<style>} blocks, which contain non-hyphenatable data. You can protect that data by using the @racket[#:omit] keyword to provide a @racket[_test]. The @racket[_test] will be applied to all tagged X-expressions (see @racket[txexpr?]) and strings. When @racket[_test] evaluates to true, the item will be skipped.
If you're working with HTML, be careful not to include any @code{<script>} or @code{<style>} blocks, which contain non-hyphenatable data. You can protect that data by using the @racket[#:omit-txexpr] keyword to specify a @racket[_txexpr-test]. The test will be applied to all tagged X-expressions (see @racket[txexpr?]). When @racket[_txexpr-test] evaluates to true, the item will be skipped.
@examples[#:eval my-eval
(hyphenate '(body "processing" (script "no processing")) #\-)
(hyphenate '(body "processing" (script "no processing")) #\-
#:omit (λ(x) (and (txexpr? x) (member (get-tag x) '(script)))))
(hyphenate '(body "processing" (script "no processing")) #\-
#:omit (λ(x) (and (string? x) (regexp-match #rx"^no" x))))
]
#:omit-txexpr (λ(tx) (member (get-tag tx) '(script))))
]
You can also use @racket[#:omit-txexpr] to omit tagged X-expressions with particular attributes. This can be used to selectively suppress hyphenation at the markup level.
@defproc[
(hyphenatef
[xexpr xexpr/c]
[pred procedure?]
[joiner (or/c char? string?) (integer->char \#x00AD)]
[#:exceptions exceptions (listof string?) empty]
[#:min-length length (or/c integer? false?) 5]
[#:omit test ((or/c string? txexpr?) . -> . any/c) (λ(x) #f)])
xexpr/c]
Like @racket[hyphenate], but only words matching @racket[_pred] are hyphenated. Convenient if you want to prevent hyphenation of certain sets of words, like proper names:
@examples[#:eval my-eval
(hyphenate '(p (span "processing") (span [[klh "no"]] "processing")) #\-)
(hyphenate '(p (span "processing") (span [[klh "no"]] "processing")) #\-
#:omit-txexpr (λ(tx) (and (attrs-have-key? tx 'klh)
(equal? (attr-ref tx 'klh) "no"))))
]
Similarly, you can use the @racket[#:omit-word] argument to avoid words that match @racket[_word-test]. Convenient if you want to prevent hyphenation of certain sets of words, like proper names:
@examples[#:eval my-eval
(hyphenate "Brennan Huff likes fancy sauce" #\-)
(define uncapitalized? (λ(word) (let ([letter (substring word 0 1)])
(equal? letter (string-downcase letter)))))
(hyphenatef "Brennan Huff likes fancy sauce" uncapitalized? #\-)
(define capitalized? (λ(word) (let ([letter (substring word 0 1)])
(equal? letter (string-upcase letter)))))
(hyphenate "Brennan Huff likes fancy sauce" #:omit-word capitalized? #\-)
]
Sometimes you need @racket[hyphenatef] to prevent unintended consequences. For instance, if you're using ligatures in CSS, certain groups of characters (fi, fl, ffi, et al.) will be replaced by a single glyph. That looks snazzy, but adding soft hyphens between any of these pairs will defeat the ligature substitution, creating inconsistent results. With @racket[hyphenatef], you can skip these words:
Sometimes you need @racket[#:omit-word] to prevent unintended consequences. For instance, if you're using ligatures in CSS, certain groups of characters (fi, fl, ffi, et al.) will be replaced by a single glyph. That looks snazzy, but adding soft hyphens between any of these pairs will defeat the ligature substitution, creating inconsistent results. With @racket[#:omit-word], you can skip these words:
@margin-note{``Wouldn't it be better to exclude certain pairs of letters rather than whole words?'' Yes. But for now, that's not supported.}
@examples[#:eval my-eval
(hyphenate "Hufflepuff golfing final on Tuesday" #\-)
(define (no-ligs? word)
(not (ormap (λ(lig) (regexp-match lig word))
'("ff" "fi" "fl" "ffi" "ffl"))))
(hyphenatef "Hufflepuff golfing final on Tuesday" no-ligs? #\-)
(define (ligs? word)
(ormap (λ(lig) (regexp-match lig word))
'("ff" "fi" "fl" "ffi" "ffl")))
(hyphenate "Hufflepuff golfing final on Tuesday" #:omit-word ligs? #\-)
]
It's possible to do fancier kinds of hyphenation restrictions that take account of context, like not hyphenating the last word of a paragraph. But @racket[hyphenatef] only operates on words. So you'll have to write some fancier code. Separate out the words eligible for hyphenation, and then send them through good old @racket[hyphenate].
@defproc[
(unhyphenate
[xexpr xexpr/c]
[joiner (or/c char? string?) @(integer->char #x00AD)]
[#:omit test ((or/c string? txexpr?) . -> . any/c) (λ(x) #f)])
[#:omit-word word-test (string? . -> . any/c) (λ(x) #f)]
[#:omit-string string-test (string? . -> . any/c) (λ(x) #f)]
[#:omit-txexpr txexpr-test (txexpr? . -> . any/c) (λ(x) #f)])
xexpr/c]
Remove @racket[_joiner] from @racket[_xexpr]. Like @racket[hyphenate], it works on nested X-expressions.
Remove @racket[_joiner] from @racket[_xexpr]. Like @racket[hyphenate], it works on nested X-expressions, and offers the same @racket[#:omit-] options.
@examples[#:eval my-eval
(hyphenate '(p "strangely" (em "formatted" (strong "snowmen"))) #\-)

@ -7,7 +7,6 @@
(check-equal? (hyphenate "polymorphism" #:min-length 100) "polymorphism")
(check-equal? (hyphenate "ugly" #:min-length 1) "ug\u00ADly")
(check-equal? (unhyphenate "poly\u00ADmor\u00ADphism") "polymorphism")
(check-equal? (hyphenatef "polymorphism" (λ(x) #f)) "polymorphism")
(check-equal? (hyphenate "polymorphism" #\-) "poly-mor-phism")
(check-equal? (hyphenate "polymorphism" "foo") "polyfoomorfoophism")
(check-equal? (unhyphenate "polyfoomorfoophism" "foo") "polymorphism")
@ -27,27 +26,31 @@
;; omit certain tags
(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering")) #:omit (λ(x) (and (txexpr? x) (member (car x) '(em)))))
(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering")) #:omit-txexpr (λ(x) (member (car x) '(em))))
'(p "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism" amp (em "squandering")))
(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering")) #:omit (λ(x) (and (txexpr? x) (member (car x) '(p)))))
(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering")) #:omit-txexpr (λ(x) (member (car x) '(p))))
'(p "circular polymorphism" amp (em "squandering")))
(check-equal? (hyphenate '(p (foo "circular") (bar "circular") (zam "circular")) #:omit (λ(x) (and (txexpr? x) (member (car x) '(foo zam)))))
(check-equal? (hyphenate '(p (foo "circular") (bar "circular") (zam "circular")) #:omit-txexpr (λ(x) (member (car x) '(foo zam))))
'(p (foo "circular") (bar "cir\u00ADcu\u00ADlar") (zam "circular")))
(require txexpr)
; omit txexprs with an attribute
(check-equal? (hyphenate '(p (foo ((hyphens "no-thanks")) "circular") (foo "circular"))
#:omit (λ(x) (and (txexpr? x)
(with-handlers ([exn:fail? (λ(exn) #f)])
(equal? (attr-ref x 'hyphens) "no-thanks")))))
#:omit-txexpr (λ(x) (with-handlers ([exn:fail? (λ(exn) #f)])
(equal? (attr-ref x 'hyphens) "no-thanks"))))
'(p (foo ((hyphens "no-thanks")) "circular") (foo "cir\u00ADcu\u00ADlar")))
;; omit strings that end with "s"
(check-equal? (hyphenate '(p (foo "tailfeathers") (foo "tailfeather")) #:omit (λ(x) (and (string? x) (regexp-match #rx"s$" x))))
'(p (foo "tailfeathers") (foo "tail\u00ADfeath\u00ADer")))
(check-equal? (hyphenate '(p (foo "curses tailfeathers") (foo "curses tailfeather")) #:omit-string (λ(x) (regexp-match #rx"s$" x)))
'(p (foo "curses tailfeathers") (foo "curs\u00ADes tail\u00ADfeath\u00ADer")))
(check-equal? (unhyphenate '(p (script "tail-feathers") (em "tail-feathers")) #\- #:omit (λ(x) (and (txexpr? x) (member (car x) '(script)))))
;; omit words that end with "s"
(check-equal? (hyphenate '(p (foo "curses tailfeathers") (foo "curses tailfeather")) #:omit-word (λ(x) (regexp-match #rx"s$" x)))
'(p (foo "curses tailfeathers") (foo "curses tail\u00ADfeath\u00ADer")))
(check-equal? (unhyphenate '(p (script "tail-feathers") (em "tail-feathers")) #\- #:omit-txexpr (λ(x) (member (car x) '(script))))
'(p (script "tail-feathers") (em "tailfeathers")))
Loading…
Cancel
Save