add xexpr support

main
Matthew Butterick 10 years ago
parent 92741a90c7
commit 8f77d5e4e9

@ -1,4 +1,4 @@
#lang info
(define collection "hyphenate")
(define scribblings '(("scribblings/hyphenate.scrbl" ())))
(define deps '("txexpr"))

@ -1,9 +1,7 @@
#lang racket/base
(require (for-syntax racket/base))
(require racket/string racket/list racket/contract racket/vector)
(require "patterns.rkt" "exceptions.rkt" tagged-xexpr xml)
(module+ test (require rackunit))
(require "patterns.rkt" "exceptions.rkt" txexpr xml)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Hyphenate module
@ -25,10 +23,10 @@
(provide (contract-out [name contract]))
(define name body ...))]))
;; global data, define now but set! them later (because they're potentially big & slow)
;; module data, define now but set! them later (because they're potentially big & slow)
(define exceptions #f)
(define pattern-tree #f)
;; global default values
;; module default values
(define default-min-length 5)
(define default-joiner (integer->char #x00AD))
@ -43,18 +41,10 @@
(make-hash (map (λ(x) (cons (make-key x) (make-value x))) exn-strings)))
;; An exception-word is a string of word characters or hyphens.
(define (exception-word? x)
(if (regexp-match #px"^[\\w-]+$" x) #t #f))
(module+ test
(check-true (exception-word? "Foobar"))
(check-true (exception-word? "foobar"))
(check-false (exception-word? "foobar!"))
(check-true (exception-word? "foo-bar"))
(check-false (exception-word? "foo bar")))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Helper functions
@ -157,6 +147,15 @@
(define (joiner->string joiner)
(if (char? joiner) (format "~a" joiner) joiner))
;; helper macro that applies proc to all strings found in xexpr input
(define-syntax (apply-xexpr-strings stx)
(syntax-case stx ()
[(_ proc val) #'(let loop ([x val])
(cond
[(string? x) (proc x)]
[(txexpr? x) (map-elements loop x)]
[else x]))]))
;; Hyphenate using a filter procedure.
(define+provide/contract (hyphenatef x proc [joiner default-joiner]
#:exceptions [extra-exceptions '()]
@ -176,37 +175,26 @@
(define (insert-hyphens text)
(regexp-replace* word-pattern text (λ(word) (if (proc word) (string-join (word->hyphenation-points word min-length) joiner-string) word))))
(let &hyphenate ([x x])
(cond
[(string? x) (insert-hyphens x)]
[(tagged-xexpr? x) (map-elements &hyphenate x)]
[else x])))
(apply-xexpr-strings insert-hyphens x))
;; Default hyphenate function.
;; Default hyphenate is a special case of hyphenatef.
(define+provide/contract (hyphenate x [joiner default-joiner]
#:exceptions [extra-exceptions '()]
#:min-length [min-length default-min-length])
((xexpr/c) ((or/c char? string?)
#:exceptions (listof exception-word?)
#:min-length (or/c integer? #f)) . ->* . xexpr/c)
#:exceptions (listof exception-word?)
#:min-length (or/c integer? #f)) . ->* . xexpr/c)
(hyphenatef x (λ(x) #t) joiner #:exceptions extra-exceptions #:min-length min-length))
;; Remove hyphens.
(define+provide/contract (unhyphenate x [joiner default-joiner])
((xexpr/c) ((or/c char? string?)) . ->* . xexpr/c)
(define (remove-hyphens text)
(string-replace text (joiner->string joiner) ""))
(let &unhyphenate ([x x])
(cond
[(string? x) (remove-hyphens x)]
[(tagged-xexpr? x) (map-elements &unhyphenate x)]
[else x])))
(apply-xexpr-strings remove-hyphens x))

@ -36,7 +36,7 @@ After that, you can update the package like so:
xexpr/c]
Hyphenate @racket[_xexpr] by calculating hyphenation points and inserting @racket[_joiner] at those points. By default, @racket[_joiner] is the soft hyphen (Unicode 00AD = decimal 173). Words shorter than @racket[#:min-length] @racket[_length] will not be hyphenated. To hyphenate words of any length, use @racket[#:min-length] @racket[#f].
@margin-note{The REPL displays a soft hyphen as \u00AD. But in ordinary use, you'll only see a soft hyphen when it appears at the end of a line or page as part of a hyphenated word. Otherwise it's not displayed. In most of the examples here, I use a standard hyphen for clarity.}
@margin-note{The REPL displays a soft hyphen as @code{\u00AD}. But in ordinary use, you'll only see a soft hyphen when it appears at the end of a line or page as part of a hyphenated word. Otherwise it's not displayed. In most of the examples here, I use a standard hyphen for clarity (by adding @code{#\-} as an argument).}
@examples[#:eval my-eval
(hyphenate "ergo polymorphic")
@ -49,7 +49,7 @@ Because the hyphenation is based on an algorithm rather than a dictionary, it ma
@examples[#:eval my-eval
(hyphenate "scraunched strengths" #\-)
(hyphenate "Racketcon" #\-)
(hyphenate "RacketCon" #\-)
(hyphenate "supercalifragilisticexpialidocious" #\-)
]
@ -79,18 +79,20 @@ For this reason, certain words can't be hyphenated algorithmically, because the
This is the right result. If you used @italic{adder} to mean the machine, it would be hyphenated @italic{add-er}; if you meant the snake, it would be @italic{ad-der}. Better to avoid hyphenation than to hyphenate incorrectly.
You can send HTML-style X-expressions through @racket[hyphenate]. It will recursively hyphenate the text strings, while leaving the tags and attributes alone.
You can send HTML-style X-expressions through @racket[hyphenate]. It will recursively hyphenate the text strings, while leaving the tags and attributes alone, as well as non-hyphenatable material (like character entities and CDATA).
@examples[#:eval my-eval
(hyphenate '(p "polymorphically" (em "formatted" (strong "snowmen"))))
(hyphenate '(p "strangely" (em "formatted" (strong "snowmen"))) #\-)
(hyphenate '(headline [[class "headline"]] "headline") #\-)
(hyphenate '(div "The (span epsilon) entity:" epsilon) #\-)
]
Don't send raw HTML through @racket[hyphenate]. It can't distinguish HTML tags and attributes from textual content, so everything will be hyphenated, thus goofing up your file. But you can easily convert HTML to an X-expression, hyphenate it, and then convert back.
Don't send raw HTML or XML through @racket[hyphenate]. It can't distinguish tags and attributes from textual content, so everything will be hyphenated, thus goofing up your file. But you can easily convert HTML or XML to an X-expression, hyphenate it, and then convert back.
@examples[#:eval my-eval
(define html "<body style=\"background: yellow\">Hello snowman</body>")
(hyphenate html)
(xexpr->string (hyphenate (string->xexpr html)))
(define html "<body style=\"background: yellow\">Hello</body>")
(hyphenate html #\-)
(xexpr->string (hyphenate (string->xexpr html) #\-))
]
@ -116,11 +118,12 @@ Sometimes you need @racket[hyphenatef] to prevent unintended consequences. For i
@examples[#:eval my-eval
(hyphenate "Hufflepuff golfing final on Tuesday" #\-)
(define (no-ligs? word)
(not (ormap (λ(lig) (regexp-match lig word)) '("ff" "fi" "fl" "ffi" "ffl"))))
(not (ormap (λ(lig) (regexp-match lig word))
'("ff" "fi" "fl" "ffi" "ffl"))))
(hyphenatef "Hufflepuff golfing final on Tuesday" no-ligs? #\-)
]
@margin-note{``Wouldn't it be better to exclude certain pairs of letters rather than whole words?'' Yes. But for now, not supported.}
@margin-note{``Wouldn't it be better to exclude certain pairs of letters rather than whole words?'' Yes. But for now, that's not supported.}
It's possible to do fancier kinds of hyphenation restrictions that take account of context, like not hyphenating the last word of a paragraph. But @racket[hyphenatef] only operates on words. So you'll have to write some fancier code. Separate out the words eligible for hyphenation, and then send them through good old @racket[hyphenate].
@ -129,7 +132,12 @@ It's possible to do fancier kinds of hyphenation restrictions that take account
[xexpr xexpr/c]
[joiner (or/c char? string?) @(integer->char #x00AD)])
xexpr/c]
Remove @racket[_joiner] from @racket[_xexpr].
Remove @racket[_joiner] from @racket[_xexpr]. Like @racket[hyphenate], it works on nested X-expressions.
@examples[#:eval my-eval
(hyphenate '(p "strangely" (em "formatted" (strong "snowmen"))) #\-)
(unhyphenate '(p "strange-ly" (em "for-mat-ted" (strong "snow-men"))) #\-)
]
A side effect of using @racket[hyphenate] is that soft hyphens (or whatever the @racket[_joiner] is) will be embedded in the output text. If you need to support copying of text, for instance in a GUI application, you'll probably want to strip out the hyphenation before the copied text is moved to the clipboard.
@ -145,7 +153,7 @@ Use this function cautiously — if @racket[_joiner] appeared in the original in
(unhyphenate (hyphenate "ribbon-cutting ceremony" #\-) #\-)
]
It's also possible that soft hyphens could appear in your input string. Certain word processors allow users to @link["http://practicaltypography.com/optional-hyphens.html"]{insert soft hyphens} in their text.
Keep in mind that soft hyphens could appear in your input string. Certain word processors allow users to @link["http://practicaltypography.com/optional-hyphens.html"]{insert soft hyphens} in their text.
@examples[#:eval my-eval
(hyphenate "True\u00ADType typefaces")

@ -3,7 +3,7 @@
(require rackunit)
(require/expose "main.rkt" (word->hyphenation-points))
(require/expose "main.rkt" (word->hyphenation-points exception-word?))
(check-equal? (hyphenate "polymorphism") "poly\u00ADmor\u00ADphism")
(check-equal? (hyphenate "polymorphism" #:min-length 100) "polymorphism")
@ -21,3 +21,8 @@
(check-equal? (word->hyphenation-points "polymorphism") '("poly" "mor" "phism"))
(check-equal? (word->hyphenation-points "present") '("present")) ; exception word
(check-true (exception-word? "Foobar"))
(check-true (exception-word? "foobar"))
(check-false (exception-word? "foobar!"))
(check-true (exception-word? "foo-bar"))
(check-false (exception-word? "foo bar"))
Loading…
Cancel
Save