From 9719e6354bc76db95e271107a535ce0a4917fa0b Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Thu, 13 Feb 2014 10:07:05 -0800 Subject: [PATCH] delete readability file --- hyphenate/main.rkt | 64 +++++++++++----- hyphenate/readability.rkt | 153 -------------------------------------- 2 files changed, 46 insertions(+), 171 deletions(-) delete mode 100644 hyphenate/readability.rkt diff --git a/hyphenate/main.rkt b/hyphenate/main.rkt index 92a97f4c..3b7a5121 100644 --- a/hyphenate/main.rkt +++ b/hyphenate/main.rkt @@ -1,11 +1,11 @@ #lang racket/base (require racket/string racket/list racket/contract racket/vector racket/bool) -(require "data.rkt" "readability.rkt") +(require "hyphenation-patterns.rkt") (module+ test (require rackunit)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; Hyphenate.rkt +;;; Hyphenate module ;;; Racket port of Ned Batchelder's hyphenate.py ;;; http://nedbatchelder.com/code/modules/hyphenate.html ;;; (in the public domain) @@ -16,13 +16,13 @@ (provide (contract-out [hyphenate - ((string?) ((or/c char? string?) #:exceptions (listof word?) #:min-length (or/c integer? false?)) . ->* . string?)]) + ((string?) ((or/c char? string?) #:exceptions (listof word?) #:min-length (or/c integer? false?)) . ->* . string?)]) (contract-out [hyphenatef - ((string? procedure?) ((or/c char? string?) #:exceptions (listof word?) #:min-length (or/c integer? false?)) . ->* . string?)]) + ((string? procedure?) ((or/c char? string?) #:exceptions (listof word?) #:min-length (or/c integer? false?)) . ->* . string?)]) (contract-out [unhyphenate - ((string?) ((or/c char? string?)) . ->* . string?)])) + ((string?) ((or/c char? string?)) . ->* . string?)])) ;; global data, define now but set! them later (because they're potentially big & slow) (define exceptions #f) @@ -46,7 +46,7 @@ ;; A word is a string without whitespace. (define (word? x) - (->boolean (regexp-match #px"^\\S+$" x))) + (if (regexp-match #px"^\\S+$" x) #t #f)) (module+ test (check-true (word? "Foobar")) @@ -69,7 +69,7 @@ (define (insert-pattern pat) (let* ([chars (regexp-replace* #px"[0-9]" pat "")] ;; regexp returns list of strings - [points (map (λ(x) (if (> (len x) 0) (string->number x) 0)) (regexp-split #px"[.a-z]" pat))] + [points (map (λ(x) (if (> (string-length x) 0) (string->number x) 0)) (regexp-split #px"[.a-z]" pat))] [tree tree]) (for ([char chars]) (when (not (hash-has-key? tree char)) @@ -87,23 +87,23 @@ ; controls hyphenation zone from edges of word ; todo: parameterize this setting ; todo: does this count end-of-word punctuation? it shouldn't. - (map (λ(i) (vector-set! points i 0)) (list 1 2 (- (len points) 2) (- (len points) 3))) + (map (λ(i) (vector-set! points i 0)) (list 1 2 (- (vector-length points) 2) (- (vector-length points) 3))) points) (let* ([word (string-downcase word)] [points (if (hash-has-key? exceptions word) (hash-ref exceptions word) - (let* ([work (string-append "." (->string word) ".")] - [points (make-vector (add1 (len work)) 0)]) - (for ([i (len work)]) + (let* ([work (string-append "." word ".")] + [points (make-vector (add1 (string-length work)) 0)]) + (for ([i (string-length work)]) (let ([tree pattern-tree]) - (for ([char (substring work i (len work))] + (for ([char (substring work i (string-length work))] #:break (not (hash-has-key? tree char))) (set! tree (hash-ref tree char)) (when (hash-has-key? tree empty) (let ([point (hash-ref tree empty)]) - (for ([j (len point)]) + (for ([j (length point)]) (vector-set! points (+ i j) (max (vector-ref points (+ i j)) (list-ref point j))))))))) points))]) @@ -112,6 +112,28 @@ (vector-drop (make-zeroes points) 2))) +;; helpful extension of splitf-at +(define (splitf-at* xs split-test) + + (define (trim items test-proc) + (dropf-right (dropf items test-proc) test-proc)) + + (define (&splitf-at* xs [acc '()]) + (if (empty? xs) + ;; reverse because accumulation is happening backward + ;; (because I'm using cons to push latest match onto front of list) + (reverse acc) + (let-values ([(item rest) + ;; drop matching elements from front + ;; then split on nonmatching + ;; = nonmatching item + other elements (which will start with matching) + (splitf-at (dropf xs split-test) (compose1 not split-test))]) + ;; recurse, and store new item in accumulator + (&splitf-at* rest (cons item acc))))) + + ;; trim off elements matching split-test + (&splitf-at* (trim xs split-test))) + ;; Find hyphenatable pieces of a word. This is not quite synonymous with syllables. (define (word->pieces word [min-length default-min-length]) @@ -124,11 +146,15 @@ (cons char 'syllable))))) ; odd point denotes char + syllable (map list->string (splitf-at* word-dissected symbol?))) - (if (and min-length (< (len word) min-length)) + (if (and min-length (< (string-length word) min-length)) (list word) (make-pieces word))) +;; joiner contract allows char or string; this coerces to string. +(define (joiner->string joiner) + (if (char? joiner) (format "~a" joiner) joiner)) + ;; Hyphenate using a filter procedure. ;; Theoretically possible to do this externally, ;; but it would just mean doing the regexp-replace twice. @@ -136,18 +162,20 @@ ;; set up module data - (set! exceptions (list->exceptions (append default-exceptions (map ->string extra-exceptions)))) + (set! exceptions (list->exceptions (append default-exceptions extra-exceptions))) (when (not pattern-tree) (set! pattern-tree (make-pattern-tree default-patterns))) - (regexp-replace* #px"\\w+" text (λ(word) (if (proc word) (string-join (word->pieces word min-length) (->string joiner)) word)))) + (define joiner-string (joiner->string joiner)) + (regexp-replace* #px"\\w+" text (λ(word) (if (proc word) (string-join (word->pieces word min-length) joiner-string) word)))) ;; Default hyphenate function. (define (hyphenate text [joiner default-joiner] #:exceptions [extra-exceptions '()] #:min-length [min-length default-min-length]) (hyphenatef text (λ(x) #t) joiner #:exceptions extra-exceptions #:min-length min-length)) -(define (unhyphenate text [joiner default-joiner]) - (string-replace text (->string joiner) "")) +(define (unhyphenate text [joiner default-joiner]) + (define joiner-string (joiner->string joiner)) + (string-replace text joiner-string "")) (module+ test diff --git a/hyphenate/readability.rkt b/hyphenate/readability.rkt deleted file mode 100644 index f4ed8b7a..00000000 --- a/hyphenate/readability.rkt +++ /dev/null @@ -1,153 +0,0 @@ -#lang racket/base -(require racket/contract net/url xml) -(require (only-in racket/list empty? range splitf-at dropf dropf-right)) -(require (only-in racket/format ~a)) -(require (only-in racket/string string-join)) -(require (only-in racket/vector vector-member)) -(require (only-in racket/set set set->list set?)) -(module+ test (require rackunit)) - -(provide (all-defined-out)) - - - -;; general way of coercing to string -(define/contract (->string x) - (any/c . -> . string?) - (cond - [(string? x) x] - [(empty? x) ""] - [(symbol? x) (symbol->string x)] - [(number? x) (number->string x)] - [(path? x) (path->string x)] - [(char? x) (~a x)] - [(xexpr? x) (xexpr->string x)] ; put this last so other xexprish things don't get caught - [else (error (format "Can't make ~a into string" x))])) - -(module+ test - (check-equal? (->string "foo") "foo") - (check-equal? (->string '()) "") - (check-equal? (->string 'foo) "foo") - (check-equal? (->string 123) "123") - (define file-name-as-text "foo.txt") - (check-equal? (->string (string->path file-name-as-text)) file-name-as-text) - (check-equal? (->string #\¶) "¶") - (check-equal? (->string '(foo "bar")) "bar")) - - - - - - - -;; general way of coercing to a list -(define/contract (->list x) - (any/c . -> . list?) - (cond - [(list? x) x] - [(vector? x) (vector->list x)] - [(set? x) (set->list x)] - [else (list x)])) - -(module+ test - (check-equal? (->list '(1 2 3)) '(1 2 3)) - (check-equal? (->list (list->vector '(1 2 3))) '(1 2 3)) - (check-equal? (->list (set 1 2 3)) '(3 2 1)) - (check-equal? (->list "foo") (list "foo"))) - -;; general way of coercing to vector -(define (->vector x) - (any/c . -> . vector?) - ; todo: on bad input, it will pop a list error rather than vector error - (cond - [(vector? x) x] - [else (list->vector (->list x))])) - - - -;; general way of coercing to boolean -(define/contract (->boolean x) - (any/c . -> . boolean?) - ;; in Racket, everything but #f is true - (if x #t #f)) - -(module+ test - (check-true (->boolean #t)) - (check-false (->boolean #f)) - (check-true (->boolean "#f")) - (check-true (->boolean "foo")) - (check-true (->boolean '())) - (check-true (->boolean '(1 2 3)))) - - -(define/contract (has-length? x) - (any/c . -> . boolean?) - (ormap (λ(proc) (proc x)) (list list? string? symbol? vector? hash? set?))) - -;; general way of asking for length -(define/contract (len x) - (has-length? . -> . integer?) - (cond - [(list? x) (length x)] - [(string? x) (string-length x)] - [(symbol? x) (len (->string x))] - [(vector? x) (len (->list x))] - [(hash? x) (len (hash-keys x))] - [(set? x) (len (->list x))] - [else #f])) - -(module+ test - (check-equal? (len '(1 2 3)) 3) - (check-not-equal? (len '(1 2)) 3) ; len 2 - (check-equal? (len "foo") 3) - (check-not-equal? (len "fo") 3) ; len 2 - (check-equal? (len 'foo) 3) - (check-not-equal? (len 'fo) 3) ; len 2 - (check-equal? (len (list->vector '(1 2 3))) 3) - (check-not-equal? (len (list->vector '(1 2))) 3) ; len 2 - (check-equal? (len (set 1 2 3)) 3) - (check-not-equal? (len (set 1 2)) 3) ; len 2 - (check-equal? (len (make-hash '((a . 1) (b . 2) (c . 3)))) 3) - (check-not-equal? (len (make-hash '((a . 1) (b . 2)))) 3)) ; len 2 - - - - - - -;; trim from beginning & end of list -(define (trim items test-proc) - (list? procedure? . -> . list?) - (dropf-right (dropf items test-proc) test-proc)) - -(module+ test - ; (check-equal? (trim (list "\n" " " 1 2 3 "\n") whitespace?) '(1 2 3)) - (check-equal? (trim (list 1 3 2 4 5 6 8 9 13) odd?) '(2 4 5 6 8))) - - - -;; split list into list of sublists using test-proc -(define/contract (splitf-at* xs split-test) - ;; todo: better error message when split-test is not a predicate - (list? predicate/c . -> . (listof list?)) - (define (&splitf-at* xs [acc '()]) ; use acc for tail recursion - (if (empty? xs) - ;; reverse because accumulation is happening backward - ;; (because I'm using cons to push latest match onto front of list) - (reverse acc) - (let-values ([(item rest) - ;; drop matching elements from front - ;; then split on nonmatching - ;; = nonmatching item + other elements (which will start with matching) - (splitf-at (dropf xs split-test) (compose1 not split-test))]) - ;; recurse, and store new item in accumulator - (&splitf-at* rest (cons item acc))))) - - ;; trim off elements matching split-test - (&splitf-at* (trim xs split-test))) - -(module+ test - ; (check-equal? (splitf-at* '("foo" " " "bar" "\n" "\n" "ino") whitespace?) '(("foo")("bar")("ino"))) - (check-equal? (splitf-at* '(1 2 3 4 5 6) even?) '((1)(3)(5)))) - -