add support for multiple languages (incl French); remove typed/hyphenate support (for lack of interest)

In safe mode (with contracts): In safe mode (with contracts):
(require (submod hyphenate safe)) (require (submod hyphenate safe))
Or in typed mode:
(require typed/hyphenate)
And enjoy: And enjoy:

#lang racket/base
(require (for-syntax racket/base syntax/strip-context))
(provide build-main)
(define-syntax (build-main stx)
(syntax-case stx ()
[(_ dir)
(with-syntax ([patterns-path (datum->syntax stx (format "~a/patterns.rkt" (syntax->datum #'dir)))]
[exceptions-path (datum->syntax stx (format "~a/exceptions.rkt" (syntax->datum #'dir)))])
(replace-context stx
(require txexpr sugar/define (only-in xml xexpr/c)
(prefix-in core: hyphenate/core) hyphenate/params patterns-path exceptions-path)
(provide (all-from-out hyphenate/params))
(module+ safe
;; An exception-word is a string of word characters or hyphens.
(define (exception-word? x)
(and (string? x) (regexp-match #px"^[\\w-]+$" x) #t))
(define (exception-words? xs)
(and (list? xs) (andmap exception-word? xs))))
(define+provide+safe hyphenate
((xexpr?) ((or/c char? string?)
#:exceptions exception-words?
#:min-length (or/c integer? #f)
#:omit-word (string? . -> . any/c)
#:omit-string (string? . -> . any/c)
#:omit-txexpr (txexpr? . -> . any/c)
#:min-left-length (or/c (and/c integer? positive?) #f)
#:min-right-length (or/c (and/c integer? positive?) #f)) . ->* . xexpr/c)
(make-keyword-procedure (λ (kws kw-args . rest)
(parameterize ([current-word-cache (make-hash)]
[current-patterns patterns]
[current-exceptions exceptions])
(keyword-apply core:hyphenate kws kw-args rest)))))
(define+provide+safe unhyphenate
((xexpr/c) ((or/c char? string?)
#:omit-word (string? . -> . any/c)
#:omit-string (string? . -> . any/c)
#:omit-txexpr (txexpr? . -> . any/c)) . ->* . xexpr/c)
(make-keyword-procedure (λ (kws kw-args . rest)
(keyword-apply core:unhyphenate kws kw-args rest)))))))]))

#lang racket/base
(require txexpr racket/string racket/list "params.rkt")
(provide hyphenate unhyphenate word->hyphenation-points convert-exception-word string->hashpair)
;; module default values
(define default-min-length 5)
(define default-min-left-length 2)
(define default-min-right-length 2)
(define default-joiner #\u00AD)
(define (cache-word pat)
(hash-set! (current-word-cache) (car pat) (cdr pat)))
;; Convert the hyphenated pattern into a point array for use later.
(define (convert-exception-word exception)
(define (make-key x)
(format ".~a." (string-replace x "-" "")))
(define (make-value x)
`(0 ,@(map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"[a-z]" x)) 0))
(list (make-key exception) (make-value exception)))
(define (add-exception-word word)
(current-exceptions (apply hash-set (current-exceptions) (convert-exception-word word))))
(define (string->natural i)
(let* ([result (string->number i)]
[result (and (number? result) (inexact->exact result))]
[result (and (exact-nonnegative-integer? result) result)])
(define (string->hashpair pat)
(define boundary-name ".")
;; first convert the pattern to a list of alternating letters and numbers.
;; insert zeroes where there isn't a number in the pattern.
(define new-pat
(let* ([pat (regexp-match* #rx"." pat)] ; convert to list
[pat (map (λ(i) (or (string->natural i) i)) pat)] ; convert numbers
[pat (if (string? (car pat)) (cons 0 pat) pat)] ; add zeroes to front where needed
[pat (if (string? (car (reverse pat))) (reverse (cons 0 (reverse pat))) pat)]) ; and back
(apply append
(reverse (for/fold([acc null])
([current (in-list pat)][i (in-naturals)])
(if (= i (sub1 (length pat)))
(cons (reverse (list current)) acc)
(let ([next (list-ref pat (add1 i))])
;; insert zeroes where there isn't a number
(cons (reverse (if (and (or (equal? current boundary-name) (string? current)) (string? next))
(list current 0)
(list current))) acc))))))))
;; then slice out the string & numerical parts to be a key / value pair.
(define value (filter exact-nonnegative-integer? new-pat))
(define key (filter string? new-pat))
(list (apply string-append key) value))
(define (make-points word)
;; walk through all the substrings and see if there's a matching pattern.
;; if so, pad it out to full length (so we can (apply map max ...) later on)
(define word-with-dots (format ".~a." (string-downcase word)))
(define matching-patterns
[(hash-has-key? (current-word-cache) word-with-dots) (list (hash-ref (current-word-cache) word-with-dots))]
[(hash-has-key? (current-exceptions) word-with-dots) (list (hash-ref (current-exceptions) word-with-dots))]
(let ([word-as-list (string->list word-with-dots)])
;; ensures there's at least one (null) element in return value
(define starting-value (make-list (add1 (length word-as-list)) 0))
(reverse (for*/fold ([acc (cons starting-value null)])
([len (in-range (length word-as-list))]
[index (in-range (- (length word-as-list) len))])
(define substring (list->string (take (drop word-as-list index) (add1 len))))
[(hash-has-key? (current-patterns) substring)
(define value (hash-ref (current-patterns) substring))
;; put together head padding + value + tail padding
(define pattern-to-add (append (make-list index 0) value (make-list (- (add1 (length word-as-list)) (length value) index) 0)))
(cons pattern-to-add acc)]
[else acc]))))]))
(define (apply-map-max xss)
(if (ormap empty? (list xss (car xss)))
(cons (apply max (map car xss))
(apply-map-max (map cdr xss)))))
(define max-value-pattern (apply-map-max matching-patterns))
(cache-word (cons word-with-dots max-value-pattern))
;; for point list,
;; drop first two elements because they represent hyphenation weight
;; before the starting "." and between "." and the first letter.
;; drop last element because it represents hyphen after last "."
;; after you drop these two, then each number corresponds to
;; whether a hyphen goes after that letter.
(drop-right (drop max-value-pattern 2) 1))
;; Find hyphenation points in a word. This is not quite synonymous with syllables.
(define (word->hyphenation-points word
[min-length default-min-length]
[min-left-length default-min-left-length]
[min-right-length default-min-right-length])
#;((string?) ((or/c #f exact-nonnegative-integer?)(or/c #f exact-nonnegative-integer?)(or/c #f exact-nonnegative-integer?)) . ->* . (listof string?))
(define (add-no-hyphen-zone points)
;; points is a list corresponding to the letters of the word.
;; to create a no-hyphenation zone of length n, zero out the first n-1 points
;; and the last n points (because the last value in points is always superfluous)
(let* ([min-left-length (min (or min-left-length default-min-left-length) (length points))]
[min-right-length (min (or min-right-length default-min-right-length) (length points))])
(define points-with-zeroes-on-left
(append (make-list (sub1 min-left-length) 0) (drop points (sub1 min-left-length))))
(define points-with-zeroes-on-left-and-right
(append (drop-right points-with-zeroes-on-left min-right-length) (make-list min-right-length 0)))
(define (make-pieces word)
(define-values (word-pieces last-piece)
(for/fold ([word-pieces empty]
[current-piece empty])
([str (in-list (regexp-match* #rx"." word))] ; explodes word into list of one-character strings (char list is slower)
[point (in-list (add-no-hyphen-zone (make-points word)))])
(define updated-current-piece (cons str current-piece))
(if (even? point)
(values word-pieces updated-current-piece) ; even point denotes character
(values (cons (string-join (reverse updated-current-piece) "") word-pieces) empty)))) ; odd point denotes char + syllable
(reverse (cons (string-join (reverse last-piece) "") word-pieces)))
(if (and min-length (< (string-length word) min-length))
(list word)
(make-pieces word)))
;; joiner contract allows char or string; this coerces to string.
(define (joiner->string joiner)
(format "~a" joiner))
(define (apply-proc proc x [omit-string (λ(x) #f)] [omit-txexpr (λ(x) #f)])
(let loop ([x x])
[(and (string? x) (not (omit-string x))) (proc x)]
[(and (txexpr? x) (not (omit-txexpr x)))
(make-txexpr (get-tag x) (get-attrs x) (map loop (get-elements x)))]
[else x])))
(require sugar/debug)
(define (hyphenate x [joiner default-joiner]
#:exceptions [extra-exceptions empty]
#:min-length [min-length default-min-length]
#:min-left-length [min-left-length default-min-left-length]
#:min-right-length [min-right-length default-min-right-length]
#:omit-word [omit-word? (λ(x) #f)]
#:omit-string [omit-string? (λ(x) #f)]
#:omit-txexpr [omit-txexpr? (λ(x) #f)])
;; todo?: connect this regexp pattern to the one used in word? predicate
(for-each add-exception-word extra-exceptions)
(define word-pattern #px"\\w+") ;; more restrictive than exception-word
(define (replacer word . words)
(if (not (omit-word? word))
(string-join (word->hyphenation-points word min-length min-left-length min-right-length) (joiner->string joiner))
(define (insert-hyphens text)
(regexp-replace* word-pattern text replacer))
(apply-proc insert-hyphens x omit-string? omit-txexpr?))
(define (unhyphenate x [joiner default-joiner]
#:omit-word [omit-word? (λ(x) #f)]
#:omit-string [omit-string? (λ(x) #f)]
#:omit-txexpr [omit-txexpr? (λ(x) #f)])
(define word-pattern (pregexp (format "[\\w~a]+" joiner)))
(define (replacer word . words)
(if (not (omit-word? word))
(string-replace word (joiner->string joiner) "")
(define (remove-hyphens text)
(regexp-replace* word-pattern text replacer))
(apply-proc remove-hyphens x omit-string? omit-txexpr?))
#;(module+ main
(report (current-word-cache))
(hyphenate "snowman" "-")
(parameterize ([current-word-cache (make-hash)]
[current-exceptions '("snow-man")])
(report (current-patterns))
(hyphenate "snowman" "-"))
(report (current-word-cache))
(hyphenate "snowman" "-" )
#;(define t "supercalifragilisticexpialidocious")
#;(hyphenate t "-"))

#lang racket/base
(module reader racket/base
(require racket/port syntax/strip-context)
(provide (rename-out [exception-prep-read read]
[exception-prep-read-syntax read-syntax]))
(define (exception-prep-read in)
(syntax->datum (exception-prep-read-syntax #f in)))
(define (exception-prep-read-syntax src in)
(with-syntax ([str (port->string in)])
#'(module exception-prep racket/base
(require racket/string racket/list hyphenate/core)
(provide exceptions)
(define exceptions (apply hash (append-map convert-exception-word (string-split str)))))))))

#lang racket
(require hyphenate/fr rackunit)
(hyphenate "formidable")

#lang racket/base
(require hyphenate/bootstrap)
(build-main fr)

#lang hyphenate/exception-prep

#lang racket/base
(require sugar/define)
(require-via-wormhole "../typed/hyphenate/core-predicates.rkt")

#lang racket/base
(require sugar/define)
(require-via-wormhole "../typed/hyphenate/exceptions.rkt")
(provide default-exceptions)

#lang info
(define scribblings '(("scribblings/hyphenate.scrbl" ())))
(define compile-omit-paths '("tests.rkt"))

#lang racket/base
(require (for-syntax racket/base sugar/string sugar/coerce racket/syntax))
(require sugar/list txexpr)

#lang racket/base
(require sugar/define txexpr (only-in xml xexpr/c))
(require-via-wormhole "../typed/hyphenate/main.rkt")
[hyphenate ((xexpr?)
((or/c char? string?)
#:exceptions exception-words?
#:min-length (or/c integer? #f)
#:omit-word (string? . -> . any/c)
#:omit-string (string? . -> . any/c)
#:omit-txexpr (txexpr? . -> . any/c)
#:min-left-length (or/c (and/c integer? positive?) #f)
#:min-right-length (or/c (and/c integer? positive?) #f)) . ->* . xexpr/c)]
[unhyphenate ((xexpr/c)
((or/c char? string?)
#:omit-word (string? . -> . any/c)
#:omit-string (string? . -> . any/c)
#:omit-txexpr (txexpr? . -> . any/c)) . ->* . xexpr/c)]
[word->hyphenation-points ((string?) ((or/c #f exact-nonnegative-integer?)(or/c #f exact-nonnegative-integer?)(or/c #f exact-nonnegative-integer?)) . ->* . (listof string?))]
[exception-word? (string? . -> . boolean?)])

#lang racket/base
(require sugar/define)
(require-via-wormhole "../typed/hyphenate/patterns-hashed.rkt")
(provide hashed-patterns)

#lang racket/base
(require sugar/define)
(require-via-wormhole "../typed/hyphenate/patterns.rkt")

#lang racket/base
(require (for-syntax racket/base racket/syntax))
(define-syntax (eval-as-untyped stx)
(syntax-case stx ()
[(_ exprs ...)
(with-syntax ([sym (syntax-e (generate-temporary))])
(datum->syntax stx `(begin
(module ,(syntax->datum #'sym) typed/racket/base/no-check
(require rackunit "main.rkt" txexpr)
,@(syntax->datum #'(exprs ...)))
(require ',(syntax->datum #'sym))) stx))]))
(define-syntax (eval-as-untyped-safe stx)
(syntax-case stx ()
[(_ exprs ...)
(with-syntax ([sym (syntax-e (generate-temporary))])
(datum->syntax stx `(begin
(module ,(syntax->datum #'sym) typed/racket/base/no-check
(require rackunit (submod "main.rkt" safe) txexpr)
,@(syntax->datum #'(exprs ...)))
(require ',(syntax->datum #'sym))) stx))]))
(define-syntax (eval-as-typed stx)
(syntax-case stx ()
[(_ exprs ...)
(with-syntax ([sym (syntax-e (generate-temporary))])
(datum->syntax stx `(begin
(module ,(syntax->datum #'sym) typed/racket
(require typed/rackunit "../typed/hyphenate/main.rkt" typed/txexpr)
,@(syntax->datum #'(exprs ...)))
(require ',(syntax->datum #'sym))) stx))]))
(define-syntax-rule (eval-as-typed-and-untyped exprs ...)
(eval-as-untyped exprs ...)
(eval-as-untyped-safe exprs ...)
(eval-as-typed exprs ...)))
(define omit-em-tag (λ:([x : Txexpr]) (member (car x) '(em))))
(define omit-p-tag (λ:([x : Txexpr]) (member (car x) '(p))))
(define omit-foo-zam-tag (λ:([x : Txexpr]) (member (car x) '(foo zam))))
(define ends-with-s (λ:([x : String]) (regexp-match #rx"s$" x)))
(define omit-script-tag (λ:([x : Txexpr]) (member (car x) '(script))))
(define tx-with-attr (λ:([x : Txexpr]) (with-handlers ([exn:fail? (λ(exn) #f)])
(equal? (attr-ref x 'hyphens) "no-thanks"))))
(check-equal? (hyphenate "edges") "edges") ;; word without matching patterns
(check-equal? (hyphenate "polymorphism") "poly\u00ADmor\u00ADphism")
(check-equal? (hyphenate "POLYmorPHISM") "POLY\u00ADmor\u00ADPHISM")
(check-equal? (hyphenate "polymorphism" #:min-length 100) "polymorphism")
(check-equal? (hyphenate "ugly" #:min-length 1) "ug\u00ADly")
(check-equal? (unhyphenate "poly\u00ADmor\u00ADphism") "polymorphism")
(check-equal? (hyphenate "polymorphism" #\-) "poly-mor-phism")
(check-equal? (hyphenate "polymorphism" "foo") "polyfoomorfoophism")
(check-equal? (unhyphenate "polyfoomorfoophism" "foo") "polymorphism")
(check-equal? (hyphenate "circular polymorphism squandering") "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism squan\u00ADder\u00ADing")
(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering"))) '(p "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism" amp (em "squan\u00ADder\u00ADing")))
(check-equal? (hyphenate "present project") "present project") ; exception words
;; test these last so exceptions have been set up already
(check-equal? (word->hyphenation-points "polymorphism") '("poly" "mor" "phism"))
(check-equal? (word->hyphenation-points "present") '("present")) ; exception word
(check-true (exception-word? "Foobar"))
(check-true (exception-word? "foobar"))
(check-false (exception-word? "foobar!"))
(check-true (exception-word? "foo-bar"))
(check-false (exception-word? "foo bar"))
;; omit certain tags
(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering")) #:omit-txexpr omit-em-tag)
'(p "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism" amp (em "squandering")))
(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering")) #:omit-txexpr omit-p-tag)
'(p "circular polymorphism" amp (em "squandering")))
(check-equal? (hyphenate '(p (foo "circular") (bar "circular") (zam "circular")) #:omit-txexpr omit-foo-zam-tag)
'(p (foo "circular") (bar "cir\u00ADcu\u00ADlar") (zam "circular")))
; omit txexprs with an attribute
(check-equal? (hyphenate '(p (foo ((hyphens "no-thanks")) "circular") (foo "circular"))
#:omit-txexpr tx-with-attr)
'(p (foo ((hyphens "no-thanks")) "circular") (foo "cir\u00ADcu\u00ADlar")))
;; omit strings that end with "s"
(check-equal? (hyphenate '(p (foo "curses tailfeathers") (foo "curses tailfeather")) #:omit-string ends-with-s)
'(p (foo "curses tailfeathers") (foo "curs\u00ADes tail\u00ADfeath\u00ADer")))
;; omit words that end with "s"
(check-equal? (hyphenate '(p (foo "curses tailfeathers") (foo "curses tailfeather")) #:omit-word ends-with-s)
'(p (foo "curses tailfeathers") (foo "curses tail\u00ADfeath\u00ADer")))
(check-equal? (unhyphenate '(p (script "tail-feathers") (em "tail-feathers")) #\- #:omit-txexpr omit-script-tag)
'(p (script "tail-feathers") (em "tailfeathers")))
(check-equal? (unhyphenate '(p "cir-cu-lar poly-mor-phism" "cir-cu-lar poly-mor-phisms") #\- #:omit-string ends-with-s)
'(p "circular polymorphism" "cir-cu-lar poly-mor-phisms"))
(check-equal? (hyphenate "polymorphism" #\- #:min-left-length 5 #:min-right-length 5) "polymor-phism")
(check-equal? (hyphenate "polymorphism" #\- #:min-left-length 3 #:min-right-length 7) "poly-morphism")
(check-equal? (hyphenate "polymorphism" #\- #:min-left-length 7 #:min-right-length 7) "polymorphism")
(check-equal? (hyphenate "polymorphism" #\* #:exceptions '("polymo-rphism")) "polymo*rphism"))

#lang info #lang info
(define collection "hyphenate")
(define deps '("base" "sugar" "txexpr" "rackunit-lib"))
(define update-implies '("txexpr" "sugar")) (define update-implies '("txexpr" "sugar"))
(define build-deps '("scribble-lib" "racket-doc"))
(define scribblings '(("scribblings/hyphenate.scrbl" ())))
(define compile-omit-paths '("tests.rkt"))

#lang racket
(require "us.rkt")
(provide (all-from-out "us.rkt"))
(module+ safe
(require (submod "us.rkt" safe))
(provide (all-from-out (submod "us.rkt" safe))))

#lang racket/base
(provide (all-defined-out))
(define current-patterns (make-parameter (make-hash)))
(define current-exceptions (make-parameter (make-hash)))
(define current-word-cache (make-parameter (make-hash)))

#lang racket/base
(module reader racket/base
(require racket/port syntax/strip-context)
(provide (rename-out [pattern-prep-read read]
[pattern-prep-read-syntax read-syntax]))
(define (pattern-prep-read in)
(syntax->datum (pattern-prep-read-syntax #f in)))
(define (pattern-prep-read-syntax src in)
(with-syntax ([str (port->string in)])
#'(module pattern-prep racket/base
(require hyphenate/core racket/list racket/string)
(provide patterns)
(define patterns (apply hash (append-map string->hashpair (string-split str)))))))))

@author[(author+email "Matthew Butterick" "")] @author[(author+email "Matthew Butterick" "")]
@defmodule[#:multi (hyphenate (submod hyphenate safe) typed/hyphenate)] @defmodule[#:multi (hyphenate (submod hyphenate safe))]
A simple hyphenation engine that uses the KnuthLiang hyphenation algorithm originally developed for TeX. I have added little to their work. Accordingly, I take little credit. A simple hyphenation engine that uses the KnuthLiang hyphenation algorithm originally developed for TeX. I have added little to their work. Accordingly, I take little credit.
I originally put together this module to handle hyphenation for my web-based book @link[""]{Butterick's Practical Typography} (which I made with Racket & Scribble). Though support for CSS-based hyphenation in web browsers is @link[""]{still iffy}, soft hyphens work reliably well. But putting them into the text manually is a drag. Thus a module was born.
I thank Benjamin Greenman and Alexander Knauth for helpful suggestions on the typed version.
@section{Installation} @section{Installation}
At the command line: At the command line:
@ -29,17 +25,12 @@ After that, you can update the package like so:
@section{Importing the module} @section{Importing the module}
The module can be invoked three ways: fast, safe, and typed. The module can be invoked two ways: fast or safe.
Fast mode is the default, which you get by importing the module in the usual way: @code{(require hyphenate)}. Fast mode is the default, which you get by importing the module in the usual way: @code{(require hyphenate)}.
Safe mode enables the function contracts documented below. Use safe mode by importing the module as @code{(require (submod hyphenate safe))}. Safe mode enables the function contracts documented below. Use safe mode by importing the module as @code{(require (submod hyphenate safe))}.
The typed version is invoked as @code{(require typed/hyphenate)}. The typed version is implemented ``natively'' in the sense that it is compiled separately with type annotations. It is not a @racket[require/typed] wrapper around the untyped code. This avoids the contract barrier that is otherwise automatically imposed between typed and untyped code.
@margin-note{I explain more about this cross-compiling technique in @link[""]{Making a dual typed / untyped Racket library}.}
@section{Interface} @section{Interface}
@defproc[ @defproc[
@ -203,6 +194,33 @@ Keep in mind that soft hyphens could appear in your input string. Certain word p
] ]
@defmodule[#:multi (hyphenate/fr (submod hyphenate/fr safe))]
French hyphenation is available by importing the module as @racketmodname[hyphenate/fr] or @racketmodname[(submod hyphenate/fr safe)] and using the @racket[hyphenate] function normally. Below, notice that the word ``formidable'' hyphenates differently in French.
@examples[#:eval my-eval
(hyphenate "formidable" #\-)
(module fr racket/base
(require hyphenate/fr)
(hyphenate "formidable" #\-))
(require 'fr)
The two languages are in separate submodules for performance reasons. That way, they can maintain separate caches of hyphenated words.
There is no way to use @racket[hyphenate] in ``polyglot'' mode, where English and French are detected automatically. It is possible, however, to mix both the English and French @racket[hyphenate] functions in a single file, and apply them as needed. To avoid a name conflict between the two @racket[hyphenate] functions, you'll need to use @racket[prefix-in]:
@examples[#:eval my-eval
(require (prefix-in fr: hyphenate/fr))
(hyphenate "formidable" #\-)
(fr:hyphenate "formidable" #\-)
@section{License & source code} @section{License & source code}
This module is licensed under the LGPL. This module is licensed under the LGPL.

p, .SIntrapara {
display: block;
margin: 0 0 1em 0;
line-height: 1.4;
.compact {
padding: 0 0 1em 0;
li {
list-style-position: outside;
margin-left: 1.2em;
h1, h2, h3, h4, h5, h6, h7, h8 {
font-family: 'Fira';
font-weight: 300;
font-size: 1.6rem;
color: #333;
margin-top: inherit;
margin-bottom: 1rem;
line-height: 1.25;
-moz-font-feature-settings: 'tnum=1';
-moz-font-feature-settings: 'tnum' 1;
-webkit-font-feature-settings: 'tnum' 1;
-o-font-feature-settings: 'tnum' 1;
-ms-font-feature-settings: 'tnum' 1;
font-feature-settings: 'tnum' 1;
h3, h4, h5, h6, h7, h8 {
border-top: 1px solid black;
h2 { /* per-page main title */
font-family: 'Miso';
font-weight: bold;
margin-top: 4rem;
font-size: 3rem;
line-height: 1.1;
width: 90%;
h3, h4, h5, h6, h7, h8 {
margin-top: 2em;
padding-top: 0.1em;
margin-bottom: 0.75em;
/* ---------------------------------------- */
/* Main */
body {
color: black;
background-color: white;
.maincolumn {
width: auto;
margin-top: 4rem;
margin-left: 17rem;
margin-right: 2rem;
margin-bottom: 10rem; /* to avoid fixed bottom nav bar */
max-width: 700px;
min-width: 370px; /* below this size, code samples don't fit */
a {
text-decoration: inherit;
a, .toclink, .toptoclink, .tocviewlink, .tocviewselflink, .tocviewtoggle, .plainlink,
.techinside, .techoutside:hover, .techinside:hover {
color: #07A;
a:hover {
text-decoration: underline;
/* ---------------------------------------- */
/* Navigation */
.navsettop, .navsetbottom {
left: 0;
width: 15rem;
height: 6rem;
font-family: 'Fira';
font-size: 0.9rem;
border-bottom: 0px solid hsl(216, 15%, 70%);
background-color: inherit;
padding: 0;
.navsettop {
position: absolute;
top: 0;
left: 0;
margin-bottom: 0;
border-bottom: 0;
.navsettop a, .navsetbottom a {
color: black;
.navsettop a:hover, .navsetbottom a:hover {
background: hsl(216, 78%, 95%);
text-decoration: none;
.navleft, .navright {
position: static;
float: none;
margin: 0;
white-space: normal;
.navleft a {
display: inline-block;
.navright a {
display: inline-block;
text-align: center;
.navleft a, .navright a, .navright span {
display: inline-block;
padding: 0.5rem;
min-width: 1rem;
.navright {
height: 2rem;
white-space: nowrap;
.navsetbottom {
display: none;
.nonavigation {
color: #889;
.searchform {
display: block;
margin: 0;
padding: 0;
border-bottom: 1px solid #eee;
height: 4rem;
.nosearchform {
margin: 0;
padding: 0;
height: 4rem;
.searchbox {
font-size: 1rem;
width: 12rem;
margin: 1rem;
padding: 0.25rem;
vertical-align: middle;
background-color: white;
#search_box {
font-size: 0.8rem;
/* ---------------------------------------- */
/* Version */
.versionbox {
position: absolute;
float: none;
top: 0.25rem;
left: 17rem;
z-index: 11000;
height: 2em;
font-size: 70%;
font-weight: lighter;
width: inherit;
margin: 0;
.version, .versionNoNav {
font-size: inherit;
.version:before, .versionNoNav:before {
content: "v.";
/* ---------------------------------------- */
/* Margin notes */
/* cancel scribble.css styles: */
.refpara, .refelem {
position: static;
float: none;
height: auto;
width: auto;
margin: 0;
.refcolumn {
position: static;
display: block;
width: auto;
font-size: inherit;
margin: 2rem;
margin-left: 2rem;
padding: 0.5em;
padding-left: 0.75em;
padding-right: 1em;
background: hsl(60, 29%, 94%);
border: 1px solid #ccb;
border-left: 0.4rem solid #ccb;
/* slightly different handling for margin-note* on narrow screens */
@media all and (max-width:1260px) {
span.refcolumn {
float: right;
width: 50%;
margin-left: 1rem;
margin-bottom: 0.8rem;
margin-top: 1.2rem;
.refcontent, .refcontent p {
line-height: 1.5;
margin: 0;
.refcontent p + p {
margin-top: 1em;
.refcontent a {
font-weight: 400;
.refpara, .refparaleft {
top: -1em;
@media all and (max-width:600px) {
.refcolumn {
margin-left: 0;
margin-right: 0;
@media all and (min-width:1260px) {
.refcolumn {
position: absolute;
left: 66rem; right: 3em;
margin: 0;
float: right;
max-width: 18rem;
.refcontent {
font-family: 'Fira';
font-size: 1rem;
line-height: 1.6;
margin: 0 0 0 0;
.refparaleft, .refelemleft {
position: relative;
float: left;
right: 2em;
height: 0em;
width: 13em;
margin: 0em 0em 0em -13em;
.refcolumnleft {
background-color: hsl(60, 29%, 94%);
display: block;
position: relative;
width: 13em;
font-size: 85%;
border: 0.5em solid hsl(60, 29%, 94%);
margin: 0 0 0 0;
/* ---------------------------------------- */
/* Table of contents, left margin */
.tocset {
position: absolute;
float: none;
left: 0;
top: 0rem;
width: 14rem;
padding: 7rem 0.5rem 0.5rem 0.5rem;
background-color: hsl(216, 15%, 70%);
margin: 0;
.tocset td {
vertical-align: text-top;
padding-bottom: 0.4rem;
padding-left: 0.2rem;
line-height: 1.1;
font-family: 'Fira';
-moz-font-feature-settings: 'tnum=1';
-moz-font-feature-settings: 'tnum' 1;
-webkit-font-feature-settings: 'tnum' 1;
-o-font-feature-settings: 'tnum' 1;
-ms-font-feature-settings: 'tnum' 1;
font-feature-settings: 'tnum' 1;
.tocset td a {
color: black;
font-weight: 400;
.tocview {
text-align: left;
background-color: inherit;
.tocview td, .tocsub td {
line-height: 1.3;
.tocview table, .tocsub table {
width: 90%;
.tocset td a.tocviewselflink {
font-weight: lighter;
font-size: 110%; /* monospaced styles below don't need to enlarge */
color: white;
.tocviewselflink {
text-decoration: none;
.tocsub {
text-align: left;
margin-top: 0.5em;
background-color: inherit;
.tocviewlist, .tocsublist {
margin-left: 0.2em;
margin-right: 0.2em;
padding-top: 0.2em;
padding-bottom: 0.2em;
.tocviewlist table {
font-size: 82%;
.tocviewlisttopspace {
margin-bottom: 1em;
.tocviewsublist, .tocviewsublistonly, .tocviewsublisttop, .tocviewsublistbottom {
margin-left: 0.4em;
border-left: 1px solid #99a;
padding-left: 0.8em;
.tocviewsublist {
margin-bottom: 1em;
.tocviewsublist table,
.tocviewsublistonly table,
.tocviewsublisttop table,
.tocviewsublistbottom table,
table.tocsublist {
font-size: 1rem;
.tocviewsublist td, .tocviewsublistbottom td, .tocviewsublisttop td, .tocsub td,
.tocviewsublistonly td {
font-size: 90%;
.tocviewtoggle {
font-size: 75%; /* looks better, and avoids bounce when toggling sub-sections due to font alignments */
.tocsublist td {
padding-left: 0.5rem;
padding-top: 0.25rem;
text-indent: 0;
.tocsublinknumber {
font-size: 100%;
.tocsublink {
font-size: 82%;
text-decoration: none;
.tocsubseclink {
font-size: 100%;
text-decoration: none;
.tocsubnonseclink {
font-size: 82%;
text-decoration: none;
margin-left: 1rem;
padding-left: 0;
display: inline-block;
/* the label "on this page" */
.tocsubtitle {
display: block;
font-size: 62%;
font-family: 'Fira';
font-weight: bolder;
font-style: normal;
letter-spacing: 2px;
text-transform: uppercase;
margin: 0.5em;
.toptoclink {
font-weight: bold;
font-size: 110%;
margin-bottom: 0.5rem;
margin-top: 1.5rem;
display: inline-block;
.toclink {
font-size: inherit;
/* ---------------------------------------- */
/* Some inline styles */
.indexlink {
text-decoration: none;
pre {
margin-left: 2em;
blockquote {
margin-left: 2em;
margin-right: 2em;
margin-bottom: 1em;
.SCodeFlow {
border-left: 1px dotted black;
padding-left: 1em;
padding-right: 1em;
margin-top: 1em;
margin-bottom: 1em;
margin-left: 0em;
margin-right: 2em;
white-space: nowrap;
line-height: 1.5;
.SCodeFlow img {
margin-top: 0.5em;
margin-bottom: 0.5em;
.boxed {
margin: 0;
margin-top: 2em;
padding: 0.25em;
padding-bottom: 0.5em;
background: #f3f3f3;
border-top: 1px solid #99b;
background: hsl(216, 78%, 95%);
background: -moz-linear-gradient(to bottom left, hsl(0, 0%, 99%) 0%, hsl(216, 78%, 95%) 100%);
background: -webkit-linear-gradient(to bottom left, hsl(0, 0%, 99%) 0%, hsl(216, 78%, 95%) 100%);
background: -o-linear-gradient(to bottom left, hsl(0, 0%, 99%) 0%, hsl(216, 78%, 95%) 100%);
background: -ms-linear-gradient(to bottom left, hsl(0, 0%, 99%) 0%, hsl(216, 78%, 95%) 100%);
background: linear-gradient(to bottom left, hsl(0, 0%, 99%) 0%, hsl(216, 78%, 95%) 100%);
blockquote > blockquote.SVInsetFlow {
/* resolves issue in e.g. /reference/notation.html */
margin-top: 0em;
.leftindent .SVInsetFlow { /* see e.g. section 4.5 of Racket Guide */
margin-top: 1em;
margin-bottom: 1em;
.SVInsetFlow a, .SCodeFlow a {
color: #07A;
font-weight: 500;
.SubFlow {
display: block;
margin: 0em;
.boxed {
width: 100%;
background-color: inherit;
.techoutside { text-decoration: none; }
.SAuthorListBox {
position: static;
float: none;
font-family: 'Fira';
font-weight: 300;
font-size: 110%;
margin-top: 1rem;
margin-bottom: 3rem;
width: 30rem;
height: auto;
.author > a { /* email links within author block */
font-weight: inherit;
color: inherit;
.SAuthorList {
font-size: 82%;
.SAuthorList:before {
content: "by ";
.author {
display: inline;
white-space: nowrap;
/* phone + tablet styles */
@media all and (max-width:720px){
@media all and (max-width:720px){
@media all {html {font-size: 15px;}}
@media all and (max-width:700px){html {font-size: 14px;}}
@media all and (max-width:630px){html {font-size: 13px;}}
@media all and (max-width:610px){html {font-size: 12px;}}
@media all and (max-width:550px){html {font-size: 11px;}}
@media all and (max-width:520px){html {font-size: 10px;}}
.navsettop, .navsetbottom {
display: block;
position: absolute;
width: 100%;
height: 4rem;
border: 0;
background-color: hsl(216, 15%, 70%);
.searchform {
display: inline;
border: 0;
.navright {
position: absolute;
right: 1.5rem;
margin-top: 1rem;
border: 0px solid red;
.navsetbottom {
display: block;
margin-top: 8rem;
.tocset {
display: none;
.tocset table, .tocset tbody, .tocset tr, .tocset td {
display: inline;
.tocview {
display: none;
.tocsub .tocsubtitle {
display: none;
.versionbox {
top: 4.5rem;
left: 1rem; /* same distance as main-column */
z-index: 11000;
height: 2em;
font-size: 70%;
font-weight: lighter;
.maincolumn {
margin-left: 1em;
margin-top: 7rem;
margin-bottom: 0rem;
/* print styles : hide the navigation elements */
@media print {
.navsetbottom { display: none; }
.maincolumn {
width: auto;
margin-right: 13em;
margin-left: 0;

#lang racket/base
(require (submod hyphenate safe) txexpr rackunit)
(define omit-em-tag (λ(x) (member (car x) '(em))))
(define omit-p-tag (λ(x) (member (car x) '(p))))
(define omit-foo-zam-tag (λ(x) (member (car x) '(foo zam))))
(define ends-with-s (λ(x) (regexp-match #rx"s$" x)))
(define omit-script-tag (λ(x) (member (car x) '(script))))
(define tx-with-attr (λ(x) (with-handlers ([exn:fail? (λ(exn) #f)])
(equal? (attr-ref x 'hyphens) "no-thanks"))))
(check-equal? (hyphenate "edges") "edges") ;; word without matching patterns
(check-equal? (hyphenate "polymorphism") "poly\u00ADmor\u00ADphism")
(check-equal? (hyphenate "POLYmorPHISM") "POLY\u00ADmor\u00ADPHISM")
(check-equal? (hyphenate "polymorphism" #:min-length 100) "polymorphism")
(check-equal? (hyphenate "ugly" #:min-length 1) "ug\u00ADly")
(check-equal? (unhyphenate "poly\u00ADmor\u00ADphism") "polymorphism")
(check-equal? (hyphenate "polymorphism" #\-) "poly-mor-phism")
(check-equal? (hyphenate "polymorphism" "foo") "polyfoomorfoophism")
(check-equal? (unhyphenate "polyfoomorfoophism" "foo") "polymorphism")
(check-equal? (hyphenate "circular polymorphism squandering") "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism squan\u00ADder\u00ADing")
(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering"))) '(p "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism" amp (em "squan\u00ADder\u00ADing")))
(check-equal? (hyphenate "present project") "present project") ; exception words
;; test these last so exceptions have been set up already
;(check-equal? (word->hyphenation-points "polymorphism") '("poly" "mor" "phism"))
;(check-equal? (word->hyphenation-points "present") '("present")) ; exception word
;(check-true (exception-word? "Foobar"))
;(check-true (exception-word? "foobar"))
;(check-false (exception-word? "foobar!"))
;(check-true (exception-word? "foo-bar"))
;(check-false (exception-word? "foo bar"))
;; omit certain tags
(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering")) #:omit-txexpr omit-em-tag)
'(p "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism" amp (em "squandering")))
(check-equal? (hyphenate '(p "circular polymorphism" amp (em "squandering")) #:omit-txexpr omit-p-tag)
'(p "circular polymorphism" amp (em "squandering")))
(check-equal? (hyphenate '(p (foo "circular") (bar "circular") (zam "circular")) #:omit-txexpr omit-foo-zam-tag)
'(p (foo "circular") (bar "cir\u00ADcu\u00ADlar") (zam "circular")))
; omit txexprs with an attribute
(check-equal? (hyphenate '(p (foo ((hyphens "no-thanks")) "circular") (foo "circular"))
#:omit-txexpr tx-with-attr)
'(p (foo ((hyphens "no-thanks")) "circular") (foo "cir\u00ADcu\u00ADlar")))
;; omit strings that end with "s"
(check-equal? (hyphenate '(p (foo "curses tailfeathers") (foo "curses tailfeather")) #:omit-string ends-with-s)
'(p (foo "curses tailfeathers") (foo "curs\u00ADes tail\u00ADfeath\u00ADer")))
;; omit words that end with "s"
(check-equal? (hyphenate '(p (foo "curses tailfeathers") (foo "curses tailfeather")) #:omit-word ends-with-s)
'(p (foo "curses tailfeathers") (foo "curses tail\u00ADfeath\u00ADer")))
(check-equal? (unhyphenate '(p (script "tail-feathers") (em "tail-feathers")) #\- #:omit-txexpr omit-script-tag)
'(p (script "tail-feathers") (em "tailfeathers")))
(check-equal? (unhyphenate '(p "cir-cu-lar poly-mor-phism" "cir-cu-lar poly-mor-phisms") #\- #:omit-string ends-with-s)
'(p "circular polymorphism" "cir-cu-lar poly-mor-phisms"))
(check-equal? (hyphenate "polymorphism" #\- #:min-left-length 5 #:min-right-length 5) "polymor-phism")
(check-equal? (hyphenate "polymorphism" #\- #:min-left-length 3 #:min-right-length 7) "poly-morphism")
(check-equal? (hyphenate "polymorphism" #\- #:min-left-length 7 #:min-right-length 7) "polymorphism")
(check-equal? (hyphenate "polymorphism" #\* #:exceptions '("polymo-rphism")) "polymo*rphism")
(check-equal? (hyphenate "formidable" #\-) "for-mi-da-ble")
(module french racket/base
(require (submod hyphenate/fr safe) rackunit)
(check-equal? (hyphenate "formidable" #\-) "for-mi-dable")) ; hyphenates differently in French
(require 'french)

#lang typed/racket/base
(require "hyphenate/main.rkt")
(provide hyphenate unhyphenate)

#lang typed/racket/base
(provide (all-defined-out))
(define-type Pattern String)
(define-type Patterns (Listof String))
(define-type Pattern-Hash-Key Pattern)
(define-type Pattern-Hash-Value (Listof Natural))
(define-type Pattern-Hash (HashTable Pattern-Hash-Key Pattern-Hash-Value))
(define-type Pattern-Hash-Pair (Pairof Pattern-Hash-Key Pattern-Hash-Value))
(define-type Exception-Word String)

#lang typed/racket/base
(require "core-predicates.rkt")
(provide default-exceptions)
; Knuth and Liang's original exception patterns from classic TeX.
; In the public domain.
(define: kl-exceptions : Patterns
(map symbol->string '(as-so-ciate as-so-ciates dec-li-na-tion oblig-a-tory phil-an-thropic present presents project projects reci-procity re-cog-ni-zance ref-or-ma-tion ret-ri-bu-tion ta-ble)))
(define: mb-exceptions : Patterns
(map symbol->string '(real-ly law-yer law-yers law-yered law-yer-ing law-yer-ly oki-na oki-nas)))
(define: default-exceptions : Patterns
(append kl-exceptions mb-exceptions))

#lang typed/racket/base
(require (for-syntax typed/racket/base typed/sugar/string typed/sugar/coerce racket/syntax))
(require typed/sugar/define typed/sugar/list typed/txexpr)

#lang typed/racket/base
(require sugar/include)
(include-without-lang-line "main-helper.rkt")
(require typed/sugar/define racket/string racket/list racket/bool)
(require "patterns-hashed.rkt" "exceptions.rkt" "core-predicates.rkt")
(provide hyphenate unhyphenate reset-patterns word->hyphenation-points exception-word? exception-words?)
;; module data, define now but set! them later (because they're potentially big & slow)
(define: patterns : Pattern-Hash (make-hash))
(define: pattern-cache : Pattern-Hash (make-hash))
;; module default values
(define: default-min-length : Natural 5)
(define: default-min-left-length : Natural 2)
(define: default-min-right-length : Natural 2)
(define: default-joiner : Char #\u00AD)
(define/typed (add-pattern-to-cache pat)
(Pattern-Hash-Pair -> Void)
(hash-set! pattern-cache (car pat) (cdr pat)))
;; Convert the hyphenated pattern into a point array for use later.
(define/typed (add-exception exception)
(Pattern -> Void)
(define/typed (make-key x)
(Pattern -> Pattern-Hash-Key)
(format ".~a." (string-replace x "-" "")))
(define/typed (make-value x)
(Pattern -> Pattern-Hash-Value)
`(0 ,@(map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"[a-z]" x)) 0))
(add-pattern-to-cache (cons (make-key exception) (make-value exception)))
(define-syntax-rule (hash-empty? h) (zero? (hash-count h)))
(define/typed (initialize-patterns)
(-> Void)
(when (hash-empty? pattern-cache)
(for-each add-exception default-exceptions))
(when (hash-empty? patterns)
(set! patterns hashed-patterns)))
(define/typed (reset-patterns)
(-> Void)
(define: blank : Pattern-Hash (make-hash))
(set! pattern-cache (hash-copy blank))
(set! patterns (hash-copy blank))
;; An exception-word is a string of word characters or hyphens.
(define/typed (exception-word? x)
(Any -> Boolean)
(and (string? x) (regexp-match #px"^[\\w-]+$" x) #t))
(define/typed (exception-words? xs)
(Any -> Boolean)
(and (list? xs) (andmap exception-word? xs)))
(define/typed (string->natural i)
(String -> (Option Natural))
(let* ([result (string->number i)]
[result (and (number? result) (inexact->exact result))]
[result (and (exact-nonnegative-integer? result) result)])
(define/typed (string->hashpair pat)
(String -> Pattern-Hash-Pair)
(define boundary-name ".")
;; first convert the pattern to a list of alternating letters and numbers.
;; insert zeroes where there isn't a number in the pattern.
(define new-pat
(let*: ([pat : (Listof String) (regexp-match* #rx"." pat)] ; convert to list
[pat : (Listof (U String Natural)) ((inst map (U String Natural) String) (λ(i) (or (string->natural i) i)) pat)] ; convert numbers
[pat : (Listof (U String Natural)) (if (string? (car pat)) (cons 0 pat) pat)] ; add zeroes to front where needed
[pat : (Listof (U String Natural)) (if (string? (car (reverse pat))) (reverse (cons 0 (reverse pat))) pat)]) ; and back
(apply append
(reverse (for/fold: ([acc : (Listof (Listof (U String Natural))) null])
([current (in-list pat)][i (in-naturals)])
(if (= i (sub1 (length pat)))
(cons (reverse (list current)) acc)
(let ([next (list-ref pat (add1 i))])
;; insert zeroes where there isn't a number
(cons (reverse (if (and (or (equal? current boundary-name) (string? current)) (string? next))
(list current 0)
(list current))) acc))))))))
;; then slice out the string & numerical parts to be a key / value pair.
(define value (filter exact-nonnegative-integer? new-pat))
(define key (filter string? new-pat))
(cons (apply string-append key) value))
(define/typed (make-points word)
(String -> Pattern-Hash-Value)
;; walk through all the substrings and see if there's a matching pattern.
;; if so, pad it out to full length (so we can (apply map max ...) later on)
(define: word-with-dots : String (format ".~a." (string-downcase word)))
(define: matching-patterns : (Listof Pattern-Hash-Value)
[(hash-has-key? pattern-cache word-with-dots) (list (hash-ref pattern-cache word-with-dots))]
(let ([word-as-list (string->list word-with-dots)])
;; ensures there's at least one (null) element in return value
(define starting-value (make-list (add1 (length word-as-list)) 0))
(reverse (for*/fold: ([acc : (Listof Pattern-Hash-Value) (cons starting-value null)])
([len (in-range (length word-as-list))]
[index (in-range (- (length word-as-list) len))])
(define substring (list->string (take (drop word-as-list index) (add1 len))))
[(hash-has-key? patterns substring)
(define value (hash-ref patterns substring))
;; put together head padding + value + tail padding
(define pattern-to-add (append (make-list index 0) value (make-list (- (add1 (length word-as-list)) (length value) index) 0)))
(cons pattern-to-add acc)]
[else acc]))))]))
(define/typed (apply-map-max xss)
((Listof Pattern-Hash-Value) -> Pattern-Hash-Value)
(if (ormap empty? (list xss (car xss)))
(cons (apply max ((inst map Natural Pattern-Hash-Value) car xss))
(apply-map-max ((inst map Pattern-Hash-Value Pattern-Hash-Value) cdr xss)))))
(define: max-value-pattern : Pattern-Hash-Value (apply-map-max matching-patterns))
(add-pattern-to-cache (cons word-with-dots max-value-pattern))
;; for point list,
;; drop first two elements because they represent hyphenation weight
;; before the starting "." and between "." and the first letter.
;; drop last element because it represents hyphen after last "."
;; after you drop these two, then each number corresponds to
;; whether a hyphen goes after that letter.
(drop-right (drop max-value-pattern 2) 1))
;; Find hyphenation points in a word. This is not quite synonymous with syllables.
(define/typed (word->hyphenation-points word
[min-length default-min-length]
[min-left-length default-min-left-length]
[min-right-length default-min-right-length])
(case-> (String -> (Listof String))
(String (Option Natural) -> (Listof String))
(String (Option Natural)(Option Natural) -> (Listof String))
(String (Option Natural)(Option Natural)(Option Natural) -> (Listof String)))
(define/typed (add-no-hyphen-zone points)
((Listof Natural) -> (Listof Natural))
;; points is a list corresponding to the letters of the word.
;; to create a no-hyphenation zone of length n, zero out the first n-1 points
;; and the last n points (because the last value in points is always superfluous)
(let* ([min-left-length (min (or min-left-length default-min-left-length) (length points))]
[min-right-length (min (or min-right-length default-min-right-length) (length points))])
(define points-with-zeroes-on-left
(append (make-list (sub1 min-left-length) 0) (drop points (sub1 min-left-length))))
(define points-with-zeroes-on-left-and-right
(append (drop-right points-with-zeroes-on-left min-right-length) (make-list min-right-length 0)))
(define/typed (make-pieces word)
(String -> (Listof String))
(define-values (word-pieces last-piece)
(for/fold: ([word-pieces : (Listof String) empty]
[current-piece : (Listof String) empty])
([str (in-list (regexp-match* #rx"." word))] ; explodes word into list of one-character strings (char list is slower)
[point (in-list (add-no-hyphen-zone (make-points word)))])
(define updated-current-piece (cons str current-piece))
(if (even? point)
(values word-pieces updated-current-piece) ; even point denotes character
(values (cons (string-join (reverse updated-current-piece) "") word-pieces) empty)))) ; odd point denotes char + syllable
(reverse (cons (string-join (reverse last-piece) "") word-pieces)))
(if (and min-length (< (string-length word) min-length))
(list word)
(make-pieces word)))
;; joiner contract allows char or string; this coerces to string.
(define/typed (joiner->string joiner)
((U Char String) -> String)
(format "~a" joiner))
(define/typed (apply-proc proc x [omit-string (λ(x) #f)] [omit-txexpr (λ(x) #f)])
((String -> String) Xexpr -> Xexpr)
((String -> String) Xexpr (String -> Any) -> Xexpr)
((String -> String) Xexpr (String -> Any) (Txexpr -> Any) -> Xexpr))
(let loop ([x x])
[(and (string? x) (not (omit-string x))) (proc x)]
[(and (txexpr? x) (not (omit-txexpr x)))
(make-txexpr (get-tag x) (get-attrs x) ((inst map Txexpr-Element Txexpr-Element) loop (get-elements x)))]
[else x])))
(define/typed (hyphenate x [joiner default-joiner]
#:exceptions [extra-exceptions empty]
#:min-length [min-length default-min-length]
#:min-left-length [min-left-length default-min-left-length]
#:min-right-length [min-right-length default-min-right-length]
#:omit-word [omit-word? (λ(x) #f)]
#:omit-string [omit-string? (λ(x) #f)]
#:omit-txexpr [omit-txexpr? (λ(x) #f)])
[#:exceptions (Listof String)]
[#:min-length (Option Natural)]
[#:min-left-length (Option Natural)]
[#:min-right-length (Option Natural)]
[#:omit-word (String -> Any)]
[#:omit-string (String -> Any)]
[#:omit-txexpr (Txexpr -> Any)] -> Xexpr)
(Xexpr (U Char String)
[#:exceptions (Listof String)]
[#:min-length (Option Natural)]
[#:min-left-length (Option Natural)]
[#:min-right-length (Option Natural)]
[#:omit-word (String -> Any)]
[#:omit-string (String -> Any)]
[#:omit-txexpr (Txexpr -> Any)] -> Xexpr))
(initialize-patterns) ; reset everything each time hyphenate is called
(for-each add-exception extra-exceptions)
;; todo?: connect this regexp pattern to the one used in word? predicate
(define word-pattern #px"\\w+") ;; more restrictive than exception-word
(define/typed (replacer word . words)
(String String * -> String)
(if (not (omit-word? word))
(string-join (word->hyphenation-points word min-length min-left-length min-right-length) (joiner->string joiner))
(define/typed (insert-hyphens text)
(String -> String)
(regexp-replace* word-pattern text replacer))
(apply-proc insert-hyphens x omit-string? omit-txexpr?))
(define/typed (unhyphenate x [joiner default-joiner]
#:omit-word [omit-word? (λ(x) #f)]
#:omit-string [omit-string? (λ(x) #f)]
#:omit-txexpr [omit-txexpr? (λ(x) #f)])
[#:omit-word (String -> Any)]
[#:omit-string (String -> Any)]
[#:omit-txexpr (Txexpr -> Any)] -> Xexpr)
(Xexpr (U Char String)
[#:omit-word (String -> Any)]
[#:omit-string (String -> Any)]
[#:omit-txexpr (Txexpr -> Any)] -> Xexpr))
(define word-pattern (pregexp (format "[\\w~a]+" joiner)))
(define/typed (replacer word . words)
(String String * -> String)
(if (not (omit-word? word))
(string-replace word (joiner->string joiner) "")
(define/typed (remove-hyphens text)
(String -> String)
(regexp-replace* word-pattern text replacer))
(apply-proc remove-hyphens x omit-string? omit-txexpr?))
(module+ main
(hyphenate "supercalifragilisticexpialidocious" "-")
#;(define t "supercalifragilisticexpialidocious")
#;(hyphenate t "-"))

#lang racket/base
(require hyphenate/bootstrap)
(build-main us)

@ -0,0 +1,23 @@
