updating hyphenate; moving shit around

pull/9/head
Matthew Butterick 11 years ago
parent 5c7777ab6a
commit 5ba9b92c77

@ -9,7 +9,7 @@
(let* ([args (current-command-line-arguments)]
[arg (if (> (len args) 0) (get args 0) "")])
(case arg
[("serve") `(require "server.rkt")]
[("start") `(require "server.rkt")]
[("regenerate") `(begin
;; todo: take extensions off the comand line
(displayln "Regenerate preproc & pmap files ...")

@ -8,6 +8,13 @@
; todo: contracts, tests, docs
(require (prefix-in williams: (planet williams/describe/describe)))
(define (describe x)
(williams:describe x)
x)
; debug utilities
(define (message . items)
(define (zero-fill str count)

@ -8,31 +8,6 @@
(require "tools.rkt")
(provide (all-defined-out))
;; split list into list of sublists using test-proc
(define/contract (splitf-at* xs split-test)
;; todo: better error message when split-test is not a predicate
(list? predicate/c . -> . (listof list?))
(define (&splitf-at* xs [acc '()]) ; use acc for tail recursion
(if (empty? xs)
;; reverse because accumulation is happening backward
;; (because I'm using cons to push latest match onto front of list)
(reverse acc)
(let-values ([(item rest)
;; drop matching elements from front
;; then split on nonmatching
;; = nonmatching item + other elements (which will start with matching)
(splitf-at (dropf xs split-test) (compose1 not split-test))])
;; recurse, and store new item in accumulator
(&splitf-at* rest (cons item acc)))))
;; trim off elements matching split-test
(&splitf-at* (trim xs split-test)))
(module+ test
(check-equal? (splitf-at* '(1 2 3 4 5 6) even?) '((1)(3)(5)))
(check-equal? (splitf-at* '("foo" " " "bar" "\n" "\n" "ino") whitespace?) '(("foo")("bar")("ino"))))
;; Find adjacent newline characters in a list and merge them into one item
;; Scribble, by default, makes each newline a separate list item
@ -81,15 +56,6 @@
(check-equal? (stringify '(p 1 2 "foo" (em 4 "bar"))) '(p "1" "2" "foo" (em "4" "bar"))))
;; trim from beginning & end of list
(define (trim items test-proc)
(list? procedure? . -> . list?)
(dropf-right (dropf items test-proc) test-proc))
(module+ test
(check-equal? (trim (list "\n" " " 1 2 3 "\n") whitespace?) '(1 2 3))
(check-equal? (trim (list 1 3 2 4 5 6 8 9 13) odd?) '(2 4 5 6 8)))
;; decoder wireframe
(define/contract (decode nx

@ -21,10 +21,10 @@
; general function for creating groups of css properties
; with browser prefixes and one value
(define (map-suffix suffix prefixes)
(map (ƒ(prefix) (string-append prefix suffix)) prefixes))
(map (λ(prefix) (string-append prefix suffix)) prefixes))
(define (join-css-prop-and-value p v)
(string-join (list (str p) (str v)) ": "))
(string-join (list (->string p) (->string v)) ": "))
(define properties (map-suffix property-suffix property-prefixes))
@ -75,12 +75,12 @@
; use single quotes in the formatter because css string might be used in an inline tag
; with form style="[string]" so double quotes are irritating
(define feature-tag-string (string-join (map (ƒ(tag value) (format "'~a' ~a" tag value)) feature-tags feature-values) ", "))
(define feature-tag-string (string-join (map (λ(tag value) (format "'~a' ~a" tag value)) feature-tags feature-values) ", "))
; I hate accommodating old browsers but I'll make an exception because OT support is
; critical to most MB projects
; if this comes before new-style -moz- declaration, it will work for all.
(define feature-tag-string-old-firefox (string-join (map (ƒ(tag value) (format "'~a=~a'" tag value)) feature-tags feature-values) ", "))
(define feature-tag-string-old-firefox (string-join (map (λ(tag value) (format "'~a=~a'" tag value)) feature-tags feature-values) ", "))
(define feature-tag-property "font-feature-settings")
@ -103,12 +103,12 @@
(when (not stops) ; distribute colors evenly between 0 and 100
; new-stops is range of steps incremented properly and rounded to int, then append 100 to end
(let ([new-stops `(,@(map int (range 0 100 (/ 100 (sub1 (len colors))))) 100)])
(let ([new-stops `(,@(map ->int (range 0 100 (/ 100 (sub1 (len colors))))) 100)])
; convert to list of percentages
(set! stops (map (ƒ(x) (format "~a%" x)) new-stops))))
(set! stops (map (λ(x) (format "~a%" x)) new-stops))))
; color / percentage pairs separated by commas
(define color-stop-string (string-join (map (ƒ(color stop) (format "~a ~a" color stop)) colors stops) ", "))
(define color-stop-string (string-join (map (λ(color stop) (format "~a ~a" color stop)) colors stops) ", "))
; set up gradient options
(define gradient-type (if radial "radial" "linear"))
@ -116,7 +116,7 @@
; can't use standard make-css-strings in this case because the prefixes appear in the value,
; not in the property (which is always "background")
(define gradient-strings (map (ƒ(prefix) (format "background: ~a~a-gradient(~a, ~a)" prefix gradient-type gradient-direction color-stop-string)) css-property-prefixes))
(define gradient-strings (map (λ(prefix) (format "background: ~a~a-gradient(~a, ~a)" prefix gradient-type gradient-direction color-stop-string)) css-property-prefixes))
; just fill with the last color if gradient not available
(define fallback-string (format "background: ~a" (last colors)))

@ -1,8 +1,10 @@
#lang racket/base
(require racket/string racket/list)
(require (planet mb/pollen/hyphenation-data))
(require (planet mb/pollen/readability))
(require (planet mb/pollen/tools))
(require racket/string racket/list racket/contract)
(require "hyphenation-data.rkt")
(require "../readability.rkt")
(require "../tools.rkt")
(module+ test (require rackunit))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Hyphenate.rkt
@ -25,10 +27,10 @@
(string-replace x "-" ""))
(define (make-value x)
(list->vector (cons 0 (map (ƒ(x) (int (=str x "-"))) (regexp-split #px"[a-z]" x)))))
(list->vector (cons 0 (map (λ(x) (if (equal? x "-") 1 0)) (regexp-split #px"[a-z]" x)))))
(make-hash
(map (ƒ(x) (cons (make-key x) (make-value x))) exception-data)))
(map (λ(x) (cons (make-key x) (make-value x))) exception-data)))
; global data, so this only needs to be defined once
(define exceptions (make-exceptions exception-data))
@ -41,10 +43,11 @@
(define tree (make-hash))
(define (insert-pattern pat)
(let* ([chars (regexp-replace* #px"[0-9]" pat "")]
[points (map (λ(x) (int x)) (regexp-split #px"[.a-z]" pat))]
;; regexp returns list of strings
[points (map (λ(x) (if (> (len x) 0) (string->number x) 0)) (regexp-split #px"[.a-z]" pat))]
[tree tree])
(for ([char chars])
(when (not (in? tree char))
(when (not (char . in? . tree))
(change tree char (make-hash)))
(set! tree (get tree char)))
(change tree empty points)))
@ -60,21 +63,21 @@
; controls hyphenation zone from edges of word
; todo: parameterize this setting
; todo: does this count end-of-word punctuation? it shouldn't.
(map (ƒ(i) (change points i 0)) (list 1 2 (- (len points) 2) (- (len points) 3)))
(map (λ(i) (change points i 0)) (list 1 2 (- (len points) 2) (- (len points) 3)))
points)
(let* ([word (to-lc word)]
(let* ([word (string-downcase word)]
[points
(if (in? exceptions word)
(if (word . in? . exceptions)
(get exceptions word)
(let* ([work (str "." word ".")]
(let* ([work (string-append "." (->string word) ".")]
[points (make-vector (add1 (len work)) 0)])
(for ([i (len work)])
(let ([tree pattern-tree])
(for ([char (get work i 'end)]
#:break (not (in? tree char)))
#:break (not (char . in? . tree)))
(set! tree (get tree char))
(when (in? tree empty)
(when (empty . in? . tree)
(let ([point (get tree empty)])
(for ([j (len point)])
(change points (+ i j) (max (get points (+ i j)) (get point j)))))))))
@ -92,40 +95,47 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (hyphenate-word word #:filter [filter (λ(x)x)])
(define/contract (word->hyphenated-pieces word #:omit [omit? (λ(x) #f)])
((string?) (#:omit procedure?) . ->* . (listof string?))
; Given a word, returns a list of pieces,
; broken at the possible hyphenation points.
(if (or (<= (len word) 4) (filter word))
; Short words aren't hyphenated.
(as-list word)
(if (or (<= (len word) 4) (omit? word))
;; boundary conditions:
;; Short words aren't hyphenated, nor omitted words
(->list word)
; Examine the points to build the pieces list.
(string-split ; split on whitespace
(list->string ; concatenate chars
(flatten ; get rid of cons pairs
(for/list ([char word] [point (make-points word)])
(for/list ([char word]
[point (make-points word)])
(if (even? point)
char ; even point denotes character
(cons char #\ )))))))) ; odd point denotes char + syllable
(define (hyphenate-string text #:joiner [joiner (integer->char #x00AD)] #:filter [filter (λ(x)x)])
(regexp-replace* #px"\\w+" text (ƒ(word) (string-join (hyphenate-word word #:filter filter) (as-string joiner)))))
(define (hyphenate-string text #:joiner [joiner (integer->char #x00AD)] #:omit [omit? (λ(x)#f)])
(regexp-replace* #px"\\w+" text (λ(word) (string-join (word->hyphenated-pieces word #:omit omit?) (->string joiner)))))
(define (capitalized? word)
; match property = \\p
; match unicode uppercase = {Lu}
(regexp-match #px"\\p{Lu}" (get word 0)))
(define (ligated? word)
(ormap (λ(lig) (regexp-match lig word)) '("ff" "fi" "fl" "ffi" "ffl")))
(define (hyphenate x #:only [only-proc (ƒ(x) x)]) ; recursively hyphenate strings within xexpr
(define exclusions '(style script)) ; omit these from ever being hyphenated
(define (capitalized-or-ligated? word)
; filter function for hyphenate
; filtering ligatable words because once the soft hyphens go in,
; the browser won't automatically substitute the ligs.
; so it looks weird, because some are ligated and some not.
; not ideal, because it removes hyphenation options but ... whatever
(or (capitalized? word) (any (ƒ(lig) (regexp-match lig word)) '("ff" "fi" "fl" "ffi" "ffl"))))
(or (capitalized? word) (ligated? word)))
(define (hyphenate x #:only [only-proc (λ(x) x)]) ; recursively hyphenate strings within xexpr
(define exclusions '(style script)) ; omit these from ever being hyphenated
(cond
; todo: the only-proc semantics are illogical.
@ -135,30 +145,14 @@
; Won't it make hyphenation naturally overinclusive?
; Problem with opt-in: conceals a lot of tags that naturally live inside other tags
; only reaches text at the "root level" of the tag.
[(named-xexpr? x) (if (and (only-proc x) (not (in? exclusions (car x))))
(map-xexpr-content hyphenate x)
(map-xexpr-content hyphenate x #:only named-xexpr?))] ; only process subxexprs
[(string? x)
; hyphenate everything but last word
; todo: problem here is that it's string-based, not paragraph based.
; meaning, the last word of every STRING gets exempted,
; even if that word doesn't fall at the end of a block.
; should work the way nonbreak spacer works.
; todo: question - should hyphenator ignore possible ligature pairs, like fi?
; because auto ligatures will skip combos with a soft hyphen between
; regexp matches everything up to last word, and allows trailing whitespace
; parenthesized matches become series of lambda arguments. Arity must match
; [^\\s\u00A0] = characters that are neither whitespace nor nbsp (which is not included in \s)
; +\\s*$ = catches trailing whitespace up to end
(regexp-replace #px"(.*?)([^\\s\u00A0]+\\s*$)"
x
; by default, filter out capitalized words and words with ligatable combos
; m0 m1 m2 are the match groups from regexp-replace
(ƒ(m0 m1 m2) (string-append (hyphenate-string m1 #:filter capitalized-or-ligated?) m2)))]
[(tagged-xexpr? x) (if (and (only-proc x) (not ((car x) . in? . exclusions)))
(map-xexpr-elements hyphenate x)
(map-xexpr-elements (λ(x) (if (tagged-xexpr? x) (hyphenate x) x)) x))] ; only process subxexprs
[(string? x) (hyphenate-string x)]
[else x]))
(module+ main
(hyphenate '(p "circular firing squad") #:only (ƒ(xexpr) (in? '(p) (first xexpr)))))
(module+ test
(check-equal? (word->hyphenated-pieces "polymorphism") '("poly" "mor" "phism"))
(check-equal? (hyphenate "circular polymorphism squandering") "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism squan\u00ADder\u00ADing")
(check-equal? (hyphenate '(p "circular polymorphism")) '(p "cir\u00ADcu\u00ADlar poly\u00ADmor\u00ADphism")))

@ -65,15 +65,17 @@
;; and can be made relative by the caller (or otherwise altered).
(->string here-path)))))
(module+ test
(check-equal? (get-here) "main-helper.rkt"))
;; todo: update tests
;(module+ test
; (check-equal? (get-here) "main-helper.rkt"))
; Second step: apply a separate syntax transform to the identifier itself
; We can't do this in one step, because if the macro goes from identifier to function definition,
; The macro processor will evaluate the body at compile-time, not at runtime.
(define-syntax here (λ(stx) (datum->syntax stx '(get-here))))
(module+ test
(check-equal? here "main-helper.rkt"))
;; todo: update test
;(module+ test
; (check-equal? here "main-helper.rkt"))

@ -6,21 +6,27 @@
(provide (all-defined-out))
; get the values out of the file, or make them up
;; get the values out of the file, or make them up
(define pmap-file (build-path START_DIR DEFAULT_POLLEN_MAP))
(define pmap-main empty)
;; todo next: why doesn't this line work?
(report (dynamic-require pmap-file 'main))
(error 'stop)
;; todo: this ain't a function
(if (file-exists? pmap-file)
; load it, or ...
; load it ....
(set! pmap-main (dynamic-require pmap-file POLLEN_ROOT))
; ... synthesize it
; ... or else synthesize it
(let ([files (directory-list START_DIR)])
(set! files (map remove-ext (filter (λ(x) (has-ext? x POLLEN_SOURCE_EXT)) files)))
(set! pmap-main (make-tagged-xexpr 'pmap-root empty (map path->string files)))))
;; recursively processes map, converting map locations & their parents into xexprs of this shape:
;; '(location ((parent "parent")))
(define/contract (add-parents x [parent empty])

@ -1,6 +1,6 @@
#lang racket/base
(require racket/contract)
(require (only-in racket/list empty? range))
(require (only-in racket/list empty? range splitf-at dropf dropf-right))
(require (only-in racket/format ~a))
(require (only-in racket/string string-join))
(require (only-in racket/vector vector-member))
@ -10,6 +10,19 @@
(provide (all-defined-out))
;; general way of coercing to integer
(define/contract (->int x)
(any/c . -> . integer?)
(cond
[(integer? x) x]
[(real? x) (floor x)]
[(and (string? x) (> (len x) 0)) (->int (string->number x))]
[(symbol? x) (->int (->string x))]
[(char? x) (char->integer x)]
[(has-length? x) (len x)]
[else (error "Can't convert to integer:" x)]))
;; general way of coercing to string
(define/contract (->string x)
(any/c . -> . string?)
@ -127,6 +140,16 @@
(ormap (λ(proc) (proc x)) (list sliceable-container? hash?)))
;; general way of setting an item in a mutable container
(define/contract (change x i value)
((or/c vector? hash?) any/c any/c . -> . void?)
; general-purpose mutable data object setter
(cond
[(vector? x) (vector-set! x i value)]
[(hash? x) (hash-set! x i value)]
[else (error "Can't set this datatype using change")]))
;; general way of fetching an item from a container
(define/contract (get container start [end #f])
((gettable-container? any/c) ((λ(i)(or (integer? i) (and (symbol? i) (equal? i 'end)))))
@ -229,3 +252,39 @@
(check-true ("foobar" . ends-with? . "r"))
(check-true ("foobar" . ends-with? . "foobar"))
(check-false ("foobar" . ends-with? . "foo")))
;; trim from beginning & end of list
(define (trim items test-proc)
(list? procedure? . -> . list?)
(dropf-right (dropf items test-proc) test-proc))
(module+ test
; (check-equal? (trim (list "\n" " " 1 2 3 "\n") whitespace?) '(1 2 3))
(check-equal? (trim (list 1 3 2 4 5 6 8 9 13) odd?) '(2 4 5 6 8)))
;; split list into list of sublists using test-proc
(define/contract (splitf-at* xs split-test)
;; todo: better error message when split-test is not a predicate
(list? predicate/c . -> . (listof list?))
(define (&splitf-at* xs [acc '()]) ; use acc for tail recursion
(if (empty? xs)
;; reverse because accumulation is happening backward
;; (because I'm using cons to push latest match onto front of list)
(reverse acc)
(let-values ([(item rest)
;; drop matching elements from front
;; then split on nonmatching
;; = nonmatching item + other elements (which will start with matching)
(splitf-at (dropf xs split-test) (compose1 not split-test))])
;; recurse, and store new item in accumulator
(&splitf-at* rest (cons item acc)))))
;; trim off elements matching split-test
(&splitf-at* (trim xs split-test)))
(module+ test
; (check-equal? (splitf-at* '("foo" " " "bar" "\n" "\n" "ino") whitespace?) '(("foo")("bar")("ino")))
(check-equal? (splitf-at* '(1 2 3 4 5 6) even?) '((1)(3)(5))))

@ -44,7 +44,7 @@
(check-equal? (format-as-code '(p "foo")) '(div ((style "white-space:pre-wrap;font-family:AlixFB,monospaced")) (p "foo"))))
;; server routes
;; these all produce an xexpr, which is handled upstream by (response/xexpr x)
;; these all produce an xexpr, which is handled upstream by response/xexpr
;; server route that returns html
;; todo: what is this for?
@ -105,12 +105,13 @@
;; Utility function for making file rows
(define (make-file-row file routes)
;; Utility function for making cells
(define (make-link-cell type)
(letrec ([source (add-ext (remove-ext file) POLLEN_SOURCE_EXT)]
(let* ([source (add-ext (remove-ext file) POLLEN_SOURCE_EXT)]
[preproc-source (add-ext file POLLEN_PREPROC_EXT)]
[file-string (path->string file)]
[file-string (->string file)]
[name (case type
['direct (->string file-string)]
['direct file-string]
['preproc-source "source"]
[else (->string type)])]
[target (case type
@ -128,12 +129,13 @@
(style ((type "text/css")) "td a { display: block; width: 100%; height: 100%; padding: 8px; }"
"td:hover {background: #eee}")
(table ((style "font-family:Concourse T3;font-size:115%"))
; options for pmap files and template files
;; options for pmap files and template files
,@(map (λ(file) (make-file-row file '(raw))) (append pmap-files template-files))
; options for pollen files
;; options for pollen files
,@(map (λ(file) (make-file-row file '(raw source xexpr force))) post-pollen-files)
; options for preproc files
; branching in λ is needed so these files can be interleaved on the list
;; options for preproc files
;; branching in λ is needed so these files can be interleaved on the list
,@(map (λ(file) (make-file-row file '(raw preproc-source))) post-preproc-files)))))
@ -141,9 +143,8 @@
; query is parsed as list of pairs, key is symbol, value is string
; '((key . "value") ... )
(let ([result (memf (λ(x) (equal? (car x) key)) (url-query url))])
(if result
(cdar result) ; second value of first result
result)))
(and result (cdar result)))) ; second value of first result
; default route w/preproc support
(define (route-preproc path #:force force-value)

@ -35,15 +35,15 @@
(define/contract (find-in px query)
(define/contract (find px query)
(puttable-item? query-key? . -> . (or/c xexpr-elements? false?))
(or (find-in-metas px query) (find-in-main px query)))
(module+ test
(parameterize ([current-directory "tests/template"])
(check-false (find-in "put" "nonexistent-key"))
(check-equal? (find-in "put" "foo") (list "bar"))
(check-equal? (find-in "put" "em") (list "One" "paragraph"))))
(check-false (find "put" "nonexistent-key"))
(check-equal? (find "put" "foo") (list "bar"))
(check-equal? (find "put" "em") (list "One" "paragraph"))))
(define/contract (find-in-metas px key)
(puttable-item? query-key? . -> . (or/c xexpr-elements? false?))

@ -173,6 +173,18 @@
(check-equal? (map-tree (λ(i) (if (symbol? i) 'foo i)) '(p 1 2 3 (em 4 5))) '(foo 1 2 3 (foo 4 5))))
(define/contract (map-xexpr-elements proc tx)
(procedure? tagged-xexpr? . -> . tagged-xexpr?)
(define-values (tag attr elements) (break-tagged-xexpr tx))
(make-tagged-xexpr tag attr (map proc elements)))
(module+ test
(check-equal? (map-xexpr-elements (λ(x) (if (string? x) "boing" x))
'(p "foo" "bar" (em "square")))
'(p "boing" "boing" (em "square"))))
;; function to split tag out of tagged-xexpr
(define/contract (split-tag-from-xexpr tag tx)

@ -43,4 +43,5 @@
(string->path ".")
dir)))
(provide (all-defined-out))
Loading…
Cancel
Save