From 7c138d29157ad2788c9cc36f4bf74afe74ebd9f4 Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Sun, 2 Mar 2014 14:38:21 -0800 Subject: [PATCH] streamline decode --- decode.rkt | 89 +++++++++++++------ decode/fast.rkt | 63 ------------- .../{typography-fast.rkt => typography.rkt} | 13 ++- 3 files changed, 71 insertions(+), 94 deletions(-) delete mode 100644 decode/fast.rkt rename decode/{typography-fast.rkt => typography.rkt} (91%) diff --git a/decode.rkt b/decode.rkt index 4ef68d2..ebf1ed0 100644 --- a/decode.rkt +++ b/decode.rkt @@ -1,30 +1,61 @@ #lang racket/base -(require racket/contract xml txexpr) -(require "decode/fast.rkt" "predicates.rkt" "decode/typography-fast.rkt") - -(provide to-string (contract-out [register-block-tag (symbol? . -> . void?)] - [decode ((xexpr/c) ;; use xexpr/c for contract on nx because it gives better error messages - - ;; todo: how to write more specific contracts for these procedures? - ;; e.g., string-proc should be restricted to procs that accept a string as input - ;; and return a string as output - (#:exclude-xexpr-tags list? - #:xexpr-tag-proc procedure? - #:xexpr-attrs-proc procedure? - #:xexpr-elements-proc procedure? - #:block-xexpr-proc procedure? - #:inline-xexpr-proc procedure? - #:string-proc procedure?) - . ->* . txexpr?)] - - [typogrify (string? . -> . string?)] - [nonbreaking-last-space ((txexpr?) (#:nbsp string? #:minimum-word-length integer?) . ->* . txexpr?)] - [wrap-hanging-quotes ((txexpr?) (#:single-prepend list? #:double-prepend list?) . ->* . txexpr?)] - [convert-linebreaks ((txexpr-elements?) (#:newline string?) . ->* . txexpr-elements?)] - [whitespace? (any/c . -> . boolean?)] - [paragraph-break? ((any/c) (#:pattern pregexp?) . ->* . boolean?)] - [merge-newlines (list? . -> . list?)] - [prep-paragraph-flow (txexpr-elements? . -> . txexpr-elements?)] - [wrap-paragraph ((txexpr-elements?) (#:tag symbol?) . ->* . block-xexpr?)] - [detect-paragraphs (txexpr-elements? . -> . txexpr-elements?)] - )) \ No newline at end of file +(require xml txexpr sugar/define) +(require "predicates.rkt" "decode/typography.rkt") + +(provide (all-from-out "decode/typography.rkt")) + + +(define+provide (to-string x) + (if (string? x) + x ; fast exit for strings + (with-handlers ([exn:fail? (λ(exn) (error (format "Pollen parser: can't convert ~v to ~a" x 'string)))]) + (cond + [(equal? '() x) ""] + [(symbol? x) (symbol->string x)] + [(number? x) (number->string x)] + [(path? x) (path->string x)] + [(char? x) (format "~a" x)] + [else (error)])))) ; put this last so other xexprish things don't get caught + +;; add a block tag to the list +(define+provide/contract (register-block-tag tag) + (symbol? . -> . void?) + (append-block-tag tag)) + + +;; decoder wireframe +(define+provide/contract (decode nx + #:exclude-xexpr-tags [excluded-xexpr-tags '()] + #:xexpr-tag-proc [xexpr-tag-proc (λ(x)x)] + #:xexpr-attrs-proc [xexpr-attrs-proc (λ(x)x)] + #:xexpr-elements-proc [xexpr-elements-proc (λ(x)x)] + #:block-xexpr-proc [block-xexpr-proc (λ(x)x)] + #:inline-xexpr-proc [inline-xexpr-proc (λ(x)x)] + #:string-proc [string-proc (λ(x)x)]) + ((xexpr/c) + (#:exclude-xexpr-tags list? + #:xexpr-tag-proc procedure? + #:xexpr-attrs-proc procedure? + #:xexpr-elements-proc procedure? + #:block-xexpr-proc procedure? + #:inline-xexpr-proc procedure? + #:string-proc procedure?) . ->* . txexpr?) + + + (let loop ([x (validate-txexpr? nx)]) + (cond + [(txexpr? x) (let-values([(tag attr elements) (txexpr->values x)]) + (if (member tag excluded-xexpr-tags) + x ; let x pass through untouched + (let ([decoded-xexpr (apply make-txexpr (map loop (list tag attr elements)))]) + ((if (block-xexpr? decoded-xexpr) + block-xexpr-proc + inline-xexpr-proc) decoded-xexpr))))] + [(txexpr-tag? x) (xexpr-tag-proc x)] + [(txexpr-attrs? x) (xexpr-attrs-proc x)] + ;; need this for operations that may depend on context in list + [(txexpr-elements? x) (map loop (xexpr-elements-proc x))] + [(string? x) (string-proc x)] + ;; if something has made it through undecoded, that's a problem + [else (error "decode: can't decode" x)]))) + diff --git a/decode/fast.rkt b/decode/fast.rkt deleted file mode 100644 index e8825fa..0000000 --- a/decode/fast.rkt +++ /dev/null @@ -1,63 +0,0 @@ -#lang racket/base -(require racket/match xml) -(require "../tools.rkt" "../predicates.rkt" txexpr "typography-fast.rkt") - - -(provide (all-defined-out) (all-from-out "typography-fast.rkt")) - -;; general way of coercing to string -(define (to-string x) - (if (string? x) - x ; fast exit for strings - (with-handlers ([exn:fail? (λ(exn) (error (format "Pollen parser: can't convert ~v to ~a" x 'string)))]) - (cond - [(equal? '() x) ""] - [(symbol? x) (symbol->string x)] - [(number? x) (number->string x)] - [(path? x) (path->string x)] - [(char? x) (format "~a" x)] - [else (error)])))) ; put this last so other xexprish things don't get caught - - -;; add a block tag to the list -;; this function is among the predicates because it alters a predicate globally. -(define (register-block-tag tag) - (append-block-tag tag)) - - - -;; decoder wireframe -(define (decode nx - #:exclude-xexpr-tags [excluded-xexpr-tags '()] - #:xexpr-tag-proc [xexpr-tag-proc (λ(x)x)] - #:xexpr-attrs-proc [xexpr-attrs-proc (λ(x)x)] - #:xexpr-elements-proc [xexpr-elements-proc (λ(x)x)] - #:block-xexpr-proc [block-xexpr-proc (λ(x)x)] - #:inline-xexpr-proc [inline-xexpr-proc (λ(x)x)] - #:string-proc [string-proc (λ(x)x)]) - - (when (not (txexpr? nx)) - (error (format "decode: ~v not a full txexpr" nx))) - - - (define (&decode x) - (cond - [(txexpr? x) (let-values([(tag attr elements) (txexpr->values x)]) - (if (member tag excluded-xexpr-tags) - x ; let x pass through untouched - (let ([decoded-xexpr (apply make-txexpr - (map &decode (list tag attr elements)))]) - ((if (block-xexpr? decoded-xexpr) - block-xexpr-proc - inline-xexpr-proc) decoded-xexpr))))] - [(txexpr-tag? x) (xexpr-tag-proc x)] - [(txexpr-attrs? x) (xexpr-attrs-proc x)] - ;; need this for operations that may depend on context in list - [(txexpr-elements? x) (map &decode (xexpr-elements-proc x))] - [(string? x) (string-proc x)] - ;; if something has made it through undecoded, that's a problem - [else (error "Can't decode" x)])) - - - (&decode nx)) - diff --git a/decode/typography-fast.rkt b/decode/typography.rkt similarity index 91% rename from decode/typography-fast.rkt rename to decode/typography.rkt index 03b416d..ae8341f 100644 --- a/decode/typography-fast.rkt +++ b/decode/typography.rkt @@ -3,8 +3,17 @@ (require "../tools.rkt" "../predicates.rkt" sugar txexpr) -(provide (all-defined-out)) - +(provide (contract-out + [typogrify (string? . -> . string?)] + [nonbreaking-last-space ((txexpr?) (#:nbsp string? #:minimum-word-length integer?) . ->* . txexpr?)] + [wrap-hanging-quotes ((txexpr?) (#:single-prepend list? #:double-prepend list?) . ->* . txexpr?)] + [convert-linebreaks ((txexpr-elements?) (#:newline string?) . ->* . txexpr-elements?)] + [whitespace? (any/c . -> . boolean?)] + [paragraph-break? ((any/c) (#:pattern pregexp?) . ->* . boolean?)] + [merge-newlines (list? . -> . list?)] + [prep-paragraph-flow (txexpr-elements? . -> . txexpr-elements?)] + [wrap-paragraph ((txexpr-elements?) (#:tag symbol?) . ->* . block-xexpr?)] + [detect-paragraphs (txexpr-elements? . -> . txexpr-elements?)])) ;; This module is a library of functions to be used in building pollen decoders.