diff --git a/hyphenate/info.rkt b/hyphenate/info.rkt index 669b1011..329e1fdc 100644 --- a/hyphenate/info.rkt +++ b/hyphenate/info.rkt @@ -1,4 +1,4 @@ #lang info (define collection "hyphenate") - (define scribblings '(("scribblings/hyphenate.scrbl" ()))) +(define deps '("txexpr")) \ No newline at end of file diff --git a/hyphenate/main.rkt b/hyphenate/main.rkt index 3d7d2289..c10259a2 100644 --- a/hyphenate/main.rkt +++ b/hyphenate/main.rkt @@ -1,6 +1,7 @@ #lang racket/base +(require (for-syntax racket/base)) (require racket/string racket/list racket/contract racket/vector) -(require "patterns.rkt" "exceptions.rkt") +(require "patterns.rkt" "exceptions.rkt" txexpr xml) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Hyphenate module @@ -12,20 +13,20 @@ ;;; (also in the public domain) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(provide (contract-out - [hyphenate - ((string?) ((or/c char? string?) #:exceptions (listof exception-word?) #:min-length (or/c integer? #f)) . ->* . string?)]) - (contract-out - [hyphenatef - ((string? procedure?) ((or/c char? string?) #:exceptions (listof exception-word?) #:min-length (or/c integer? #f)) . ->* . string?)]) - (contract-out - [unhyphenate - ((string?) ((or/c char? string?)) . ->* . string?)])) - -;; global data, define now but set! them later (because they're potentially big & slow) +(define-syntax (define+provide/contract stx) + (syntax-case stx () + [(_ (proc arg ... . rest-arg) contract body ...) + #'(define+provide/contract proc contract + (λ(arg ... . rest-arg) body ...))] + [(_ name contract body ...) + #'(begin + (provide (contract-out [name contract])) + (define name body ...))])) + +;; module data, define now but set! them later (because they're potentially big & slow) (define exceptions #f) (define pattern-tree #f) -;; global default values +;; module default values (define default-min-length 5) (define default-joiner (integer->char #x00AD)) @@ -40,8 +41,6 @@ (make-hash (map (λ(x) (cons (make-key x) (make-value x))) exn-strings))) - - ;; An exception-word is a string of word characters or hyphens. (define (exception-word? x) (if (regexp-match #px"^[\\w-]+$" x) #t #f)) @@ -147,11 +146,22 @@ (define (joiner->string joiner) (if (char? joiner) (format "~a" joiner) joiner)) +;; helper macro that applies proc to all strings found in xexpr input +(define-syntax (apply-xexpr-strings stx) + (syntax-case stx () + [(_ proc val) #'(let loop ([x val]) + (cond + [(string? x) (proc x)] + [(txexpr? x) (map-elements loop x)] + [else x]))])) + ;; Hyphenate using a filter procedure. -;; Theoretically possible to do this externally, -;; but it would just mean doing the regexp-replace twice. -(define (hyphenatef text proc [joiner default-joiner] #:exceptions [extra-exceptions '()] #:min-length [min-length default-min-length]) - +(define+provide/contract (hyphenatef x proc [joiner default-joiner] + #:exceptions [extra-exceptions '()] + #:min-length [min-length default-min-length]) + ((xexpr? procedure?) ((or/c char? string?) + #:exceptions (listof exception-word?) + #:min-length (or/c integer? #f)) . ->* . xexpr/c) ;; set up module data ;; todo?: change set! to parameterize @@ -161,15 +171,29 @@ (define joiner-string (joiner->string joiner)) (define word-pattern #px"\\w+") ;; more restrictive than exception-word ;; todo?: connect this regexp pattern to the one used in word? predicate - (regexp-replace* word-pattern text (λ(word) (if (proc word) (string-join (word->hyphenation-points word min-length) joiner-string) word)))) + (define (insert-hyphens text) + (regexp-replace* word-pattern text (λ(word) (if (proc word) (string-join (word->hyphenation-points word min-length) joiner-string) word)))) + + (apply-xexpr-strings insert-hyphens x)) -;; Default hyphenate function. -(define (hyphenate text [joiner default-joiner] #:exceptions [extra-exceptions '()] #:min-length [min-length default-min-length]) - (hyphenatef text (λ(x) #t) joiner #:exceptions extra-exceptions #:min-length min-length)) +;; Default hyphenate is a special case of hyphenatef. +(define+provide/contract (hyphenate x [joiner default-joiner] + #:exceptions [extra-exceptions '()] + #:min-length [min-length default-min-length]) + ((xexpr/c) ((or/c char? string?) + #:exceptions (listof exception-word?) + #:min-length (or/c integer? #f)) . ->* . xexpr/c) + (hyphenatef x (λ(x) #t) joiner #:exceptions extra-exceptions #:min-length min-length)) -(define (unhyphenate text [joiner default-joiner]) - (string-replace text (joiner->string joiner) "")) +;; Remove hyphens. +(define+provide/contract (unhyphenate x [joiner default-joiner]) + ((xexpr/c) ((or/c char? string?)) . ->* . xexpr/c) + + (define (remove-hyphens text) + (string-replace text (joiner->string joiner) "")) + + (apply-xexpr-strings remove-hyphens x)) diff --git a/hyphenate/scribblings/hyphenate.scrbl b/hyphenate/scribblings/hyphenate.scrbl index e40c0cce..832fffa0 100644 --- a/hyphenate/scribblings/hyphenate.scrbl +++ b/hyphenate/scribblings/hyphenate.scrbl @@ -1,9 +1,9 @@ #lang scribble/manual -@(require scribble/eval (for-label racket "../main.rkt")) +@(require scribble/eval (for-label racket "../main.rkt" xml)) @(define my-eval (make-base-eval)) -@(my-eval `(require hyphenate)) +@(my-eval `(require hyphenate xml)) @title{Hyphenate} @@ -14,12 +14,12 @@ A simple hyphenation engine that uses the Knuth–Liang hyphenation algorithm or I originally put together this module to handle hyphenation for my web-based book @link["http://practicaltypography.com"]{Butterick's Practical Typography} (which I made with Racket & Scribble). Though support for CSS-based hyphenation in web browsers is @link["http://caniuse.com/#search=hyphen"]{still iffy}, soft hyphens work reliably well. But putting them into the text manually is a drag. Thus a module was born. -@section{Installation & updates} +@section{Installation} At the command line: @verbatim{raco pkg install hyphenate} -After that, you can update the package from the command line: +After that, you can update the package like so: @verbatim{raco pkg update hyphenate} @@ -27,17 +27,16 @@ After that, you can update the package from the command line: @defmodule[hyphenate] - @defproc[ (hyphenate -[text string?] +[xexpr xexpr/c] [joiner (or/c char? string?) (integer->char #x00AD)] [#:exceptions exceptions (listof string?) empty] [#:min-length length (or/c integer? false?) 5]) -string?] -Hyphenate @racket[_text] by calculating hyphenation points and inserting @racket[_joiner] at those points. By default, @racket[_joiner] is the soft hyphen (Unicode 00AD = decimal 173). Words shorter than @racket[#:min-length] @racket[_length] will not be hyphenated. To hyphenate words of any length, use @racket[#:min-length] @racket[#f]. +xexpr/c] +Hyphenate @racket[_xexpr] by calculating hyphenation points and inserting @racket[_joiner] at those points. By default, @racket[_joiner] is the soft hyphen (Unicode 00AD = decimal 173). Words shorter than @racket[#:min-length] @racket[_length] will not be hyphenated. To hyphenate words of any length, use @racket[#:min-length] @racket[#f]. -@margin-note{The REPL displays a soft hyphen as \u00AD. But in ordinary use, you'll only see a soft hyphen when it appears at the end of a line or page as part of a hyphenated word. Otherwise it's not displayed. In most of the examples here, I use a standard hyphen for clarity.} +@margin-note{The REPL displays a soft hyphen as @code{\u00AD}. But in ordinary use, you'll only see a soft hyphen when it appears at the end of a line or page as part of a hyphenated word. Otherwise it's not displayed. In most of the examples here, I use a standard hyphen for clarity (by adding @code{#\-} as an argument).} @examples[#:eval my-eval (hyphenate "ergo polymorphic") @@ -50,7 +49,7 @@ Because the hyphenation is based on an algorithm rather than a dictionary, it ma @examples[#:eval my-eval (hyphenate "scraunched strengths" #\-) - (hyphenate "Racketcon" #\-) + (hyphenate "RacketCon" #\-) (hyphenate "supercalifragilisticexpialidocious" #\-) ] @@ -80,23 +79,31 @@ For this reason, certain words can't be hyphenated algorithmically, because the This is the right result. If you used @italic{adder} to mean the machine, it would be hyphenated @italic{add-er}; if you meant the snake, it would be @italic{ad-der}. Better to avoid hyphenation than to hyphenate incorrectly. +You can send HTML-style X-expressions through @racket[hyphenate]. It will recursively hyphenate the text strings, while leaving the tags and attributes alone, as well as non-hyphenatable material (like character entities and CDATA). + +@examples[#:eval my-eval + (hyphenate '(p "strangely" (em "formatted" (strong "snowmen"))) #\-) + (hyphenate '(headline [[class "headline"]] "headline") #\-) + (hyphenate '(div "The (span epsilon) entity:" epsilon) #\-) + ] -Don't send raw HTML through @racket[hyphenate]. It can't distinguish HTML tags and attributes from textual content, so it will hyphenate everything, which will goof up your file. +Don't send raw HTML or XML through @racket[hyphenate]. It can't distinguish tags and attributes from textual content, so everything will be hyphenated, thus goofing up your file. But you can easily convert HTML or XML to an X-expression, hyphenate it, and then convert back. @examples[#:eval my-eval - (hyphenate "Hello world") + (define html "Hello") + (hyphenate html #\-) + (xexpr->string (hyphenate (string->xexpr html) #\-)) ] -Instead, send your textual content through @racket[hyphenate] @italic{before} you put it into your HTML template. Or convert your HTML to an X-expression and process it selectively (e.g., with @racket[match]). @defproc[ (hyphenatef -[text string?] +[xexpr xexpr/c] [pred procedure?] [joiner (or/c char? string?) (integer->char \#x00AD)] [#:exceptions exceptions (listof string?) empty] [#:min-length length (or/c integer? false?) 5]) -string?] +xexpr/c] Like @racket[hyphenate], but only words matching @racket[_pred] are hyphenated. Convenient if you want to prevent hyphenation of certain sets of words, like proper names: @examples[#:eval my-eval @@ -118,15 +125,21 @@ Sometimes you need @racket[hyphenatef] to prevent unintended consequences. For i (hyphenatef "Hufflepuff golfing final on Tuesday" no-ligs? #\-) ] +@margin-note{``Wouldn't it be better to exclude certain pairs of letters rather than whole words?'' Yes. But for now, that's not supported.} It's possible to do fancier kinds of hyphenation restrictions that take account of context, like not hyphenating the last word of a paragraph. But @racket[hyphenatef] only operates on words. So you'll have to write some fancier code. Separate out the words eligible for hyphenation, and then send them through good old @racket[hyphenate]. @defproc[ (unhyphenate -[text string?] +[xexpr xexpr/c] [joiner (or/c char? string?) @(integer->char #x00AD)]) -string?] -Remove @racket[_joiner] from @racket[_text] using @racket[string-replace]. +xexpr/c] +Remove @racket[_joiner] from @racket[_xexpr]. Like @racket[hyphenate], it works on nested X-expressions. + +@examples[#:eval my-eval + (hyphenate '(p "strangely" (em "formatted" (strong "snowmen"))) #\-) + (unhyphenate '(p "strange-ly" (em "for-mat-ted" (strong "snow-men"))) #\-) +] A side effect of using @racket[hyphenate] is that soft hyphens (or whatever the @racket[_joiner] is) will be embedded in the output text. If you need to support copying of text, for instance in a GUI application, you'll probably want to strip out the hyphenation before the copied text is moved to the clipboard. @@ -142,7 +155,7 @@ Use this function cautiously — if @racket[_joiner] appeared in the original in (unhyphenate (hyphenate "ribbon-cutting ceremony" #\-) #\-) ] -It's also possible that soft hyphens could appear in your input string. Certain word processors allow users to @link["http://practicaltypography.com/optional-hyphens.html"]{insert soft hyphens} in their text. +Keep in mind that soft hyphens could appear in your input string. Certain word processors allow users to @link["http://practicaltypography.com/optional-hyphens.html"]{insert soft hyphens} in their text. @examples[#:eval my-eval (hyphenate "True\u00ADType typefaces") diff --git a/hyphenate/tests.rkt b/hyphenate/tests.rkt index 37aeeede..6e5a4aad 100644 --- a/hyphenate/tests.rkt +++ b/hyphenate/tests.rkt @@ -1,16 +1,7 @@ #lang racket/base -(require rackunit) +(require "main.rkt" rackunit) -(require "main.rkt") - -(require/expose "main.rkt" (word->hyphenation-points)) -(require/expose "main.rkt" (exception-word?)) - -(check-true (exception-word? "Foobar")) -(check-true (exception-word? "foobar")) -(check-false (exception-word? "foobar!")) -(check-true (exception-word? "foo-bar")) -(check-false (exception-word? "foo bar")) +(require/expose "main.rkt" (word->hyphenation-points exception-word?)) (check-equal? (hyphenate "polymorphism") "poly\u00ADmor\u00ADphism") (check-equal? (hyphenate "polymorphism" #:min-length 100) "polymorphism") @@ -22,7 +13,14 @@ (check-equal? (unhyphenate "polyfoomorfoophism" "foo") "polymorphism") (check-equal? (hyphenate "polymorphism" #\* #:exceptions '("polymo-rphism")) "polymo*rphism") (check-equal? (hyphenate "circular polymorphism squandering") "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism squan\u00ADder\u00ADing") +(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering"))) '(p "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism" amp (em "squan\u00ADder\u00ADing"))) (check-equal? (hyphenate "present project") "present project") ; exception words ;; test these last so exceptions have been set up already (check-equal? (word->hyphenation-points "polymorphism") '("poly" "mor" "phism")) (check-equal? (word->hyphenation-points "present") '("present")) ; exception word + +(check-true (exception-word? "Foobar")) +(check-true (exception-word? "foobar")) +(check-false (exception-word? "foobar!")) +(check-true (exception-word? "foo-bar")) +(check-false (exception-word? "foo bar"))