pollen/decode.rkt

#lang racket/base
(require xml txexpr sugar racket/match racket/list (prefix-in html: pollen/html))
(require "debug.rkt" "world.rkt")


(define (symbols? x) (and (list? x) (andmap symbol? x)))

(define+provide (to-string x)
  (if (string? x)
      x ; fast exit for strings
      (with-handlers  ([exn:fail? (λ(exn) (error (format "Pollen decoder: can't convert ~v to ~a" x 'string)))])
        (cond
          [(equal? '() x) ""]
          [(symbol? x) (symbol->string x)]
          [(number? x) (number->string x)]
          [(path? x) (path->string x)]
          [(char? x) (format "~a" x)]
          [(void? x) ""]
          ;; todo: guard against weird shit like lists of procedures
          [(or (list? x) (hash? x) (vector? x)) (format "~v" x)] ; ok to convert datatypes
          [else (error)])))) ; but things like procedures should throw an error


;; decoder wireframe
(define+provide/contract (decode txexpr
                                 #:txexpr-tag-proc [txexpr-tag-proc (λ(x)x)]
                                 #:txexpr-attrs-proc [txexpr-attrs-proc (λ(x)x)]
                                 #:txexpr-elements-proc [txexpr-elements-proc (λ(x)x)]
                                 #:block-txexpr-proc [block-txexpr-proc (λ(x)x)]
                                 #:inline-txexpr-proc [inline-txexpr-proc (λ(x)x)]
                                 #:string-proc [string-proc (λ(x)x)]
                                 #:symbol-proc [symbol-proc (λ(x)x)]
                                 #:valid-char-proc [valid-char-proc (λ(x)x)]
                                 #:cdata-proc [cdata-proc (λ(x)x)]
                                 #:exclude-tags [excluded-tags '()]
                                 #:exclude-attrs [excluded-attrs '()])
  ((xexpr/c)  
   (#:txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?)
                      #:txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?)
                      #:txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?)
                      #:block-txexpr-proc (block-txexpr? . -> . xexpr?)
                      #:inline-txexpr-proc (txexpr? . -> . xexpr?)
                      #:string-proc (string? . -> . xexpr?)
                      #:symbol-proc (symbol? . -> . xexpr?)
                      #:valid-char-proc (valid-char? . -> . xexpr?)
                      #:cdata-proc (cdata? . -> . xexpr?)
                      #:exclude-tags (listof txexpr-tag?)
                      #:exclude-attrs txexpr-attrs?) . ->* . txexpr?)
  
  
  (let loop ([x txexpr])
    (cond
      [(txexpr? x) (let-values([(tag attrs elements) (txexpr->values x)]) 
                     (if (or (member tag excluded-tags) (ormap (λ(attr) (member attr excluded-attrs)) attrs)) 
                         x ; because it's excluded
                         ;; we apply processing here rather than do recursive descent on the pieces
                         ;; because if we send them back through loop, certain element types are ambiguous
                         ;; e.g., ((p "foo")) tests out as both txexpr-attrs and txexpr-elements
                         (let ([decoded-txexpr 
                                (apply make-txexpr (list (txexpr-tag-proc tag) 
                                                         (txexpr-attrs-proc attrs) 
                                                         (map loop (txexpr-elements-proc elements))))])
                           ((if (block-txexpr? decoded-txexpr)
                                block-txexpr-proc
                                inline-txexpr-proc) decoded-txexpr))))]
      [(string? x) (string-proc x)]
      [(symbol? x) (symbol-proc x)]
      [(valid-char? x) (valid-char-proc x)]
      [(cdata? x) (cdata-proc x)]
      [else (error "decode: can't decode" x)])))


(define+provide/contract (decode-elements elements
                                          #:txexpr-tag-proc [txexpr-tag-proc (λ(x)x)]
                                          #:txexpr-attrs-proc [txexpr-attrs-proc (λ(x)x)]
                                          #:txexpr-elements-proc [txexpr-elements-proc (λ(x)x)]
                                          #:block-txexpr-proc [block-txexpr-proc (λ(x)x)]
                                          #:inline-txexpr-proc [inline-txexpr-proc (λ(x)x)]
                                          #:string-proc [string-proc (λ(x)x)]
                                          #:symbol-proc [symbol-proc (λ(x)x)]
                                          #:valid-char-proc [valid-char-proc (λ(x)x)]
                                          #:cdata-proc [cdata-proc (λ(x)x)]
                                          #:exclude-tags [excluded-tags '()]
                                          #:exclude-attrs [excluded-attrs '()])
  ((txexpr-elements?)  
   (#:txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?)
                      #:txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?)
                      #:txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?)
                      #:block-txexpr-proc (block-txexpr? . -> . xexpr?)
                      #:inline-txexpr-proc (txexpr? . -> . xexpr?)
                      #:string-proc (string? . -> . xexpr?)
                      #:symbol-proc (symbol? . -> . xexpr?)
                      #:valid-char-proc (valid-char? . -> . xexpr?)
                      #:cdata-proc (cdata? . -> . xexpr?)
                      #:exclude-tags (listof txexpr-tag?)
                      #:exclude-attrs txexpr-attrs?) . ->* . txexpr-elements?)
  
  (define temp-tag (gensym "temp-tag"))
  (define decode-result (decode `(temp-tag ,@elements)
                                #:txexpr-tag-proc txexpr-tag-proc
                                #:txexpr-attrs-proc txexpr-attrs-proc
                                #:txexpr-elements-proc txexpr-elements-proc
                                #:block-txexpr-proc block-txexpr-proc
                                #:inline-txexpr-proc inline-txexpr-proc
                                #:string-proc string-proc
                                #:symbol-proc symbol-proc
                                #:valid-char-proc valid-char-proc
                                #:cdata-proc cdata-proc
                                #:exclude-tags excluded-tags
                                #:exclude-attrs excluded-attrs))
  (get-elements decode-result))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Blocks

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; initial set of block tags: from html
(define+provide project-block-tags 
  (make-parameter html:block-tags))


;; tags are inline unless they're registered as block tags.
(define+provide/contract (block-txexpr? x)
  (any/c . -> . boolean?)
  (and (txexpr? x) (member (get-tag x) (project-block-tags)) #t))


(define+provide/contract (register-block-tag tag)
  (txexpr-tag? . -> . void?)
  (project-block-tags (cons tag (project-block-tags))))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Typography

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;


(define (make-replacer query+replacement)
  (let ([queries (map car query+replacement)]
        [replacements (map second query+replacement)])
    ;; reverse because first in list should be first applied to str (and compose1 works right-to-left)
    (apply compose1 (reverse (map (λ(query replacement) (λ(str) (regexp-replace* query str replacement))) queries replacements)))))

(define+provide/contract (smart-dashes str)
  (string? . -> . string?)
  
  (define dashes 
    ;; fix em dashes first, else they'll be mistaken for en dashes
    ;; \\s is whitespace + #\u00A0 is nonbreaking space
    '((#px"[\\s#\u00A0]*(---|—)[\\s#\u00A0]*" "—") ; em dash
      (#px"[\\s#\u00A0]*(--|–)[\\s#\u00A0]*" "–"))) ; en dash
  
  
  ((make-replacer dashes) str))


(define+provide/contract (smart-quotes str)
  (string? . -> . string?)
  
  (define quotes
    '((#px"(?<=\\w)'(?=\\w)" "’") ; apostrophe
      (#px"(?<!\\w)'(?=\\S)" "‘") ; single_at_beginning
      (#px"(?<=\\S)'(?!\\w)" "’") ; single_at_end
      (#px"(?<!\\w)\"(?=\\S)" "“") ; double_at_beginning
      (#px"(?<=\\S)\"(?!\\w)" "”"))) ; double_at_end
  
  ((make-replacer quotes) str))


;; insert nbsp between last two words
(define+provide/contract (nonbreaking-last-space x #:nbsp [nbsp (->string #\u00A0)] 
                                                 #:minimum-word-length [minimum-word-length 6]
                                                 #:last-word-proc [last-word-proc (λ(x) x)])
  ((txexpr?) (#:nbsp string? #:minimum-word-length integer? #:last-word-proc procedure?) . ->* . txexpr?)
  
  ;; todo: parameterize this, as it will be different for each project
  (define tags-to-pay-attention-to '(p aside)) ; only apply to paragraphs
  
  (define (replace-last-space str)
    (if (#\space . in? . str)
        (let ([reversed-str-list (reverse (string->list str))]
              [reversed-nbsp (reverse (string->list (->string nbsp)))])
          (define-values (last-word-chars other-chars) 
            (splitf-at reversed-str-list (λ(i) (not (eq? i #\space)))))
          
          (define front-chars (if (< (len last-word-chars) minimum-word-length) ; OK for long words to be on their own line
                                  ; first char of other-chars will be the space, so use cdr
                                  (string-append (list->string (reverse (cdr other-chars))) (->string nbsp))
                                  (list->string (reverse other-chars))))
          (define last-word (list->string (reverse last-word-chars)))
          `(,front-chars ,(last-word-proc last-word))) ; don't concatenate last word bc last-word-proc might be a txexpr wrapper
        (list str)))
  
  (define (find-last-word-space x) ; recursively traverse xexpr
    (cond
      [(string? x) (replace-last-space x)] ; todo: this assumes a paragraph only has one string in it.
      [(txexpr? x)
       (let-values([(tag attr elements) (txexpr->values x)])
         (if (> (length elements) 0) ; elements is list of xexprs
             (let-values ([(all-but-last last) (split-at elements (sub1 (length elements)))]) 
               (define result (find-last-word-space (car last)))
               (define result-items (if (txexpr? result) (list result) result)) ; might be txexpr, or list of new elements
               (make-txexpr tag attr `(,@all-but-last ,@result-items)))
             x))]
      [else x]))
  
  (if ((car x) . in? . tags-to-pay-attention-to)
      (find-last-word-space x)
      x))


; wrap initial quotes for hanging punctuation
; todo: improve this
; does not handle <p>“<em>thing</em> properly
(define+provide/contract (wrap-hanging-quotes nx 
                                              #:single-prepend [single-pp '(squo)]
                                              #:double-prepend  [double-pp '(dquo)])
  ((txexpr?) (#:single-prepend list? #:double-prepend list?) . ->* . txexpr?)
  
  (define two-or-more-char-string? (λ(i) (and (string? i) (>= (len i) 2))))
  (define-values (tag attr elements) (txexpr->values nx))
  (make-txexpr tag attr
               (if (and (list? elements) (not (empty? elements)))
                   (let ([new-car-elements  (match (car elements)
                                              [(? two-or-more-char-string? tcs)
                                               (define str-first (get tcs 0))
                                               (define str-rest (get tcs 1 (len tcs)))
                                               (cond
                                                 [(str-first . in? . '("\"" "“"))
                                                  ;; can wrap with any inline tag
                                                  ;; so that linebreak detection etc still works
                                                  `(,@double-pp ,(->string #\“) ,str-rest)]
                                                 [(str-first . in? . '("\'" "‘")) 
                                                  `(,@single-pp ,(->string #\‘) ,str-rest)]
                                                 [else tcs])]
                                              [(? txexpr? nx) (wrap-hanging-quotes nx)]
                                              [else (car elements)])])
                     (cons new-car-elements (cdr elements)))
                   elements)))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Lines, blocks, paragraphs

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;


;; turn the right items into <br> tags
(define+provide/contract (detect-linebreaks xc 
                                            #:separator [newline world:linebreak-separator]
                                            #:insert [linebreak '(br)])
  ((txexpr-elements?) (#:separator string? #:insert xexpr?) . ->* . txexpr-elements?)
  ;; todo: should this test be not block + not whitespace?
  (define not-block? (λ(i) (not (block-txexpr? i))))
  (filter-not empty?
              (for/list ([i (in-range (len xc))])
                (let ([item (get xc i)])
                  (cond
                    ;; skip first and last
                    [(or (= i 0) (= i (sub1 (len xc)))) item]
                    [(equal? item newline)                   
                     (match (get xc (- i 1) (+ i 2)) ; a three-element slice with x[i] in the middle
                       ;; only convert if neither adjacent tag is a block
                       ;; (because blocks automatically force a newline before & after)
                       [(list (? not-block?) newline (? not-block?)) linebreak]
                       [else empty])] ; otherwise delete
                    [else item])))))


(define+provide/contract (whitespace? x [nbsp? #f])
  ((any/c)(boolean?) . ->* . coerce/boolean?)
  (define pat (pregexp (format "^[\\s~a]+$" (if nbsp? #\u00A0 ""))))
  (cond
    [(equal? "" x) #t] ; empty string is deemed whitespace
    [(or (string? x) (symbol? x)) (regexp-match pat (->string x))]
    [(or (list? x) (vector? x)) (and (not (empty? x)) (andmap (λ(i) (whitespace? i nbsp?)) (->list x)))] ; andmap returns #t for empty lists
    [else #f]))

(define+provide/contract (whitespace/nbsp? x)
  (any/c . -> . coerce/boolean?)
  (whitespace? x #t))


;; is x a paragraph break?
(define+provide/contract (paragraph-break? x #:separator [sep world:paragraph-separator])
  ((any/c) (#:separator pregexp?) . ->* . coerce/boolean?)
  (define paragraph-pattern (pregexp (format "^~a+$" sep)))
  (and (string? x) (regexp-match paragraph-pattern x)))


(define (newline? x)
  (and (string? x) (equal? world:newline x)))
(define (not-newline? x)
  (not (newline? x)))

(define (do-merge xs [acc '()])
  (if (empty? xs)
      acc
      ;; Try to peel the newlines off the front.
      (let-values ([(leading-newlines remainder) (splitf-at xs newline?)])
        (if (not (empty? leading-newlines)) ; if you got newlines ...
            ;; combine them into a string and append them to the accumulator, 
            ;; and recurse on the rest
            (do-merge remainder (append acc (list (apply string-append leading-newlines))))
            ;; otherwise peel off elements up to the next newline, append them to accumulator,
            ;; and recurse on the rest
            (do-merge (dropf remainder not-newline?) 
                      (append acc (takef remainder not-newline?)))))))


;; Find adjacent newline characters in a list and merge them into one item
;; Scribble, by default, makes each newline a separate list item
;; In practice, this is worthless.
(define+provide/contract (merge-newlines x)
  (txexpr-elements? . -> . txexpr-elements?)  
  (cond
    [(list? x) (do-merge (map merge-newlines x))]
    [else x]))


;; detect paragraphs
;; todo: unit tests
(define+provide/contract (detect-paragraphs elements #:tag [tag 'p]
                                            #:separator [sep world:paragraph-separator]
                                            #:linebreak-proc [linebreak-proc detect-linebreaks]
                                            #:force? [force-paragraph #f])
  ((txexpr-elements?) (#:tag symbol? #:separator string? #:linebreak-proc (txexpr-elements? . -> . txexpr-elements?) #:force? boolean?) 
                      . ->* . txexpr-elements?)
  
  ;; prepare elements for paragraph testing
  (define (prep-paragraph-flow elems)
    (linebreak-proc (merge-newlines (trimf elems whitespace?))))
  
  (define my-paragraph-break? (λ(x) (and (paragraph-break? x #:separator sep) #t)))
  
  (define (wrap-paragraph elems) 
    (match elems
      [(list (? block-txexpr? bxs) ...) bxs] ; leave a series of block xexprs alone
      [else (list (make-txexpr tag empty elems))])) ; otherwise wrap in p tag
  
  (let ([elements (prep-paragraph-flow elements)])
    (define explicit-or-implicit-paragraph-break? (λ(x) (or (my-paragraph-break? x) (block-txexpr? x))))
    (if (ormap explicit-or-implicit-paragraph-break? elements) ; need this condition to prevent infinite recursion
        ;; use append-map on wrap-paragraph rather than map to permit return of multiple elements
        (append-map wrap-paragraph (append-map (λ(es) (filter-split es my-paragraph-break?)) (slicef elements block-txexpr?))) ; split into ¶¶, using both implied and explicit paragraph breaks
        (if force-paragraph
            (append-map wrap-paragraph (slicef elements block-txexpr?)) ; upconverts non-block elements to paragraphs
            elements))))
-												sun eve

											
										
										
											11 years ago
+								#lang racket/base
-												remove dependencies on css-tools and hyphenate

											
										
										
											11 years ago
+								(require xml txexpr sugar racket/match racket/list (prefix-in html: pollen/html))
-												consolidate decode modules

											
										
										
											11 years ago
+								(require "debug.rkt" "world.rkt")
-												streamline decode

											
										
										
											11 years ago
-												remove listof contracts

											
										
										
											10 years ago
+								(define (symbols? x) (and (list? x) (andmap symbol? x)))
-												streamline decode

											
										
										
											11 years ago
 								(define+provide (to-string x)
 								  (if (string? x)
 								      x ; fast exit for strings
-												more liberal text conversion in decode

											
										
										
											11 years ago
+								      (with-handlers  ([exn:fail? (λ(exn) (error (format "Pollen decoder: can't convert ~v to ~a" x 'string)))])
-												streamline decode

											
										
										
											11 years ago
+								        (cond
 								          [(equal? '() x) ""]
 								          [(symbol? x) (symbol->string x)]
 								          [(number? x) (number->string x)]
 								          [(path? x) (path->string x)]
 								          [(char? x) (format "~a" x)]
-												adding void to to-string

											
										
										
											10 years ago
+								          [(void? x) ""]
-												updates

											
										
										
											11 years ago
+								          ;; todo: guard against weird shit like lists of procedures
 								          [(or (list? x) (hash? x) (vector? x)) (format "~v" x)] ; ok to convert datatypes
 								          [else (error)])))) ; but things like procedures should throw an error
-												streamline decode

											
										
										
											11 years ago
 								;; decoder wireframe
-												oo that was a bad bug

											
										
										
											11 years ago
+								(define+provide/contract (decode txexpr
-												decode improvements: contracts on decode arguments, new decode/block module

											
										
										
											11 years ago
+								                                 #:txexpr-tag-proc [txexpr-tag-proc (λ(x)x)]
 								                                 #:txexpr-attrs-proc [txexpr-attrs-proc (λ(x)x)]
 								                                 #:txexpr-elements-proc [txexpr-elements-proc (λ(x)x)]
 								                                 #:block-txexpr-proc [block-txexpr-proc (λ(x)x)]
 								                                 #:inline-txexpr-proc [inline-txexpr-proc (λ(x)x)]
-												oo that was a bad bug

											
										
										
											11 years ago
+								                                 #:string-proc [string-proc (λ(x)x)]
 								                                 #:symbol-proc [symbol-proc (λ(x)x)]
 								                                 #:valid-char-proc [valid-char-proc (λ(x)x)]
-												decode improvements: contracts on decode arguments, new decode/block module

											
										
										
											11 years ago
+								                                 #:cdata-proc [cdata-proc (λ(x)x)]
-												add #:exclude-attrs to `decode`

											
										
										
											10 years ago
+								                                 #:exclude-tags [excluded-tags '()]
 								                                 #:exclude-attrs [excluded-attrs '()])
-												streamline decode

											
										
										
											11 years ago
+								  ((xexpr/c)
-												decode improvements: contracts on decode arguments, new decode/block module

											
										
										
											11 years ago
+								   (#:txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?)
-												updates

											
										
										
											11 years ago
+								                      #:txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?)
 								                      #:txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?)
 								                      #:block-txexpr-proc (block-txexpr? . -> . xexpr?)
 								                      #:inline-txexpr-proc (txexpr? . -> . xexpr?)
 								                      #:string-proc (string? . -> . xexpr?)
 								                      #:symbol-proc (symbol? . -> . xexpr?)
 								                      #:valid-char-proc (valid-char? . -> . xexpr?)
 								                      #:cdata-proc (cdata? . -> . xexpr?)
-												add #:exclude-attrs to `decode`

											
										
										
											10 years ago
+								                      #:exclude-tags (listof txexpr-tag?)
 								                      #:exclude-attrs txexpr-attrs?) . ->* . txexpr?)
-												updates

											
										
										
											11 years ago
-												streamline decode

											
										
										
											11 years ago
-												oo that was a bad bug

											
										
										
											11 years ago
+								  (let loop ([x txexpr])
-												streamline decode

											
										
										
											11 years ago
+								    (cond
-												oo that was a bad bug

											
										
										
											11 years ago
+								      [(txexpr? x) (let-values([(tag attrs elements) (txexpr->values x)])
-												add #:exclude-attrs to `decode`

											
										
										
											10 years ago
+								                     (if (or (member tag excluded-tags) (ormap (λ(attr) (member attr excluded-attrs)) attrs))
-												oo that was a bad bug

											
										
										
											11 years ago
+								                         x ; because it's excluded
 								                         ;; we apply processing here rather than do recursive descent on the pieces
 								                         ;; because if we send them back through loop, certain element types are ambiguous
 								                         ;; e.g., ((p "foo")) tests out as both txexpr-attrs and txexpr-elements
-												decode improvements: contracts on decode arguments, new decode/block module

											
										
										
											11 years ago
+								                         (let ([decoded-txexpr
 								                                (apply make-txexpr (list (txexpr-tag-proc tag)
 								                                                         (txexpr-attrs-proc attrs)
 								                                                         (map loop (txexpr-elements-proc elements))))])
 								                           ((if (block-txexpr? decoded-txexpr)
 								                                block-txexpr-proc
 								                                inline-txexpr-proc) decoded-txexpr))))]
-												streamline decode

											
										
										
											11 years ago
+								      [(string? x) (string-proc x)]
-												oo that was a bad bug

											
										
										
											11 years ago
+								      [(symbol? x) (symbol-proc x)]
 								      [(valid-char? x) (valid-char-proc x)]
 								      [(cdata? x) (cdata-proc x)]
-												streamline decode

											
										
										
											11 years ago
+								      [else (error "decode: can't decode" x)])))
-												decode improvements: contracts on decode arguments, new decode/block module

											
										
										
											11 years ago
-												add `decode-elements`

											
										
										
											10 years ago
+								(define+provide/contract (decode-elements elements
-												add #:force? option to `detect-paragraphs`

											
										
										
											10 years ago
+								                                          #:txexpr-tag-proc [txexpr-tag-proc (λ(x)x)]
 								                                          #:txexpr-attrs-proc [txexpr-attrs-proc (λ(x)x)]
 								                                          #:txexpr-elements-proc [txexpr-elements-proc (λ(x)x)]
 								                                          #:block-txexpr-proc [block-txexpr-proc (λ(x)x)]
 								                                          #:inline-txexpr-proc [inline-txexpr-proc (λ(x)x)]
 								                                          #:string-proc [string-proc (λ(x)x)]
 								                                          #:symbol-proc [symbol-proc (λ(x)x)]
 								                                          #:valid-char-proc [valid-char-proc (λ(x)x)]
 								                                          #:cdata-proc [cdata-proc (λ(x)x)]
-												add #:exclude-attrs to `decode`

											
										
										
											10 years ago
+								                                          #:exclude-tags [excluded-tags '()]
 								                                          #:exclude-attrs [excluded-attrs '()])
-												add `decode-elements`

											
										
										
											10 years ago
+								  ((txexpr-elements?)
 								   (#:txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?)
 								                      #:txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?)
 								                      #:txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?)
 								                      #:block-txexpr-proc (block-txexpr? . -> . xexpr?)
 								                      #:inline-txexpr-proc (txexpr? . -> . xexpr?)
 								                      #:string-proc (string? . -> . xexpr?)
 								                      #:symbol-proc (symbol? . -> . xexpr?)
 								                      #:valid-char-proc (valid-char? . -> . xexpr?)
 								                      #:cdata-proc (cdata? . -> . xexpr?)
-												add #:exclude-attrs to `decode`

											
										
										
											10 years ago
+								                      #:exclude-tags (listof txexpr-tag?)
 								                      #:exclude-attrs txexpr-attrs?) . ->* . txexpr-elements?)
-												add `decode-elements`

											
										
										
											10 years ago
 								  (define temp-tag (gensym "temp-tag"))
 								  (define decode-result (decode `(temp-tag ,@elements)
 								                                #:txexpr-tag-proc txexpr-tag-proc
-												add #:force? option to `detect-paragraphs`

											
										
										
											10 years ago
+								                                #:txexpr-attrs-proc txexpr-attrs-proc
 								                                #:txexpr-elements-proc txexpr-elements-proc
 								                                #:block-txexpr-proc block-txexpr-proc
 								                                #:inline-txexpr-proc inline-txexpr-proc
 								                                #:string-proc string-proc
 								                                #:symbol-proc symbol-proc
 								                                #:valid-char-proc valid-char-proc
 								                                #:cdata-proc cdata-proc
-												add #:exclude-attrs to `decode`

											
										
										
											10 years ago
+								                                #:exclude-tags excluded-tags
 								                                #:exclude-attrs excluded-attrs))
-												add `decode-elements`

											
										
										
											10 years ago
+								  (get-elements decode-result))
-												add #:force? option to `detect-paragraphs`

											
										
										
											10 years ago
-												decode improvements: contracts on decode arguments, new decode/block module

											
										
										
											11 years ago
-												consolidate decode modules

											
										
										
											11 years ago
+								;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-												decode improvements: contracts on decode arguments, new decode/block module

											
										
										
											11 years ago
-												consolidate decode modules

											
										
										
											11 years ago
+								;; Blocks
-												decode improvements: contracts on decode arguments, new decode/block module

											
										
										
											11 years ago
-												consolidate decode modules

											
										
										
											11 years ago
+								;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-												decode improvements: contracts on decode arguments, new decode/block module

											
										
										
											11 years ago
-												consolidate decode modules

											
										
										
											11 years ago
+								;; initial set of block tags: from html
 								(define+provide project-block-tags
 								  (make-parameter html:block-tags))
 								;; tags are inline unless they're registered as block tags.
 								(define+provide/contract (block-txexpr? x)
 								  (any/c . -> . boolean?)
 								  (and (txexpr? x) (member (get-tag x) (project-block-tags)) #t))
 								(define+provide/contract (register-block-tag tag)
 								  (txexpr-tag? . -> . void?)
 								  (project-block-tags (cons tag (project-block-tags))))
 								;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 								;; Typography
 								;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 								(define (make-replacer query+replacement)
 								  (let ([queries (map car query+replacement)]
 								        [replacements (map second query+replacement)])
 								    ;; reverse because first in list should be first applied to str (and compose1 works right-to-left)
 								    (apply compose1 (reverse (map (λ(query replacement) (λ(str) (regexp-replace* query str replacement))) queries replacements)))))
 								(define+provide/contract (smart-dashes str)
 								  (string? . -> . string?)
 								  (define dashes
 								    ;; fix em dashes first, else they'll be mistaken for en dashes
 								    ;; \\s is whitespace + #\u00A0 is nonbreaking space
 								    '((#px"[\\s#\u00A0]*(---|—)[\\s#\u00A0]*" "—") ; em dash
 								      (#px"[\\s#\u00A0]*(--|–)[\\s#\u00A0]*" "–"))) ; en dash
 								  ((make-replacer dashes) str))
 								(define+provide/contract (smart-quotes str)
 								  (string? . -> . string?)
 								  (define quotes
 								    '((#px"(?<=\\w)'(?=\\w)" "’") ; apostrophe
-												small fixes to smart-quotes and wrap-hanging-quotes

											
										
										
											10 years ago
+								      (#px"(?<!\\w)'(?=\\S)" "‘") ; single_at_beginning
-												consolidate decode modules

											
										
										
											11 years ago
+								      (#px"(?<=\\S)'(?!\\w)" "’") ; single_at_end
-												small fixes to smart-quotes and wrap-hanging-quotes

											
										
										
											10 years ago
+								      (#px"(?<!\\w)\"(?=\\S)" "“") ; double_at_beginning
-												consolidate decode modules

											
										
										
											11 years ago
+								      (#px"(?<=\\S)\"(?!\\w)" "”"))) ; double_at_end
 								  ((make-replacer quotes) str))
 								;; insert nbsp between last two words
 								(define+provide/contract (nonbreaking-last-space x #:nbsp [nbsp (->string #\u00A0)]
-												updates

											
										
										
											11 years ago
+								                                                 #:minimum-word-length [minimum-word-length 6]
 								                                                 #:last-word-proc [last-word-proc (λ(x) x)])
 								  ((txexpr?) (#:nbsp string? #:minimum-word-length integer? #:last-word-proc procedure?) . ->* . txexpr?)
-												consolidate decode modules

											
										
										
											11 years ago
 								  ;; todo: parameterize this, as it will be different for each project
 								  (define tags-to-pay-attention-to '(p aside)) ; only apply to paragraphs
 								  (define (replace-last-space str)
 								    (if (#\space . in? . str)
 								        (let ([reversed-str-list (reverse (string->list str))]
 								              [reversed-nbsp (reverse (string->list (->string nbsp)))])
 								          (define-values (last-word-chars other-chars)
 								            (splitf-at reversed-str-list (λ(i) (not (eq? i #\space)))))
 								          (define front-chars (if (< (len last-word-chars) minimum-word-length) ; OK for long words to be on their own line
 								                                  ; first char of other-chars will be the space, so use cdr
 								                                  (string-append (list->string (reverse (cdr other-chars))) (->string nbsp))
 								                                  (list->string (reverse other-chars))))
-												updates

											
										
										
											11 years ago
+								          (define last-word (list->string (reverse last-word-chars)))
 								          `(,front-chars ,(last-word-proc last-word))) ; don't concatenate last word bc last-word-proc might be a txexpr wrapper
-												consolidate decode modules

											
										
										
											11 years ago
+								        (list str)))
 								  (define (find-last-word-space x) ; recursively traverse xexpr
 								    (cond
 								      [(string? x) (replace-last-space x)] ; todo: this assumes a paragraph only has one string in it.
 								      [(txexpr? x)
 								       (let-values([(tag attr elements) (txexpr->values x)])
 								         (if (> (length elements) 0) ; elements is list of xexprs
 								             (let-values ([(all-but-last last) (split-at elements (sub1 (length elements)))])
 								               (define result (find-last-word-space (car last)))
 								               (define result-items (if (txexpr? result) (list result) result)) ; might be txexpr, or list of new elements
 								               (make-txexpr tag attr `(,@all-but-last ,@result-items)))
 								             x))]
 								      [else x]))
 								  (if ((car x) . in? . tags-to-pay-attention-to)
 								      (find-last-word-space x)
 								      x))
 								; wrap initial quotes for hanging punctuation
 								; todo: improve this
 								; does not handle <p>“<em>thing</em> properly
 								(define+provide/contract (wrap-hanging-quotes nx
-												small fixes to smart-quotes and wrap-hanging-quotes

											
										
										
											10 years ago
+								                                              #:single-prepend [single-pp '(squo)]
 								                                              #:double-prepend  [double-pp '(dquo)])
-												consolidate decode modules

											
										
										
											11 years ago
+								  ((txexpr?) (#:single-prepend list? #:double-prepend list?) . ->* . txexpr?)
 								  (define two-or-more-char-string? (λ(i) (and (string? i) (>= (len i) 2))))
 								  (define-values (tag attr elements) (txexpr->values nx))
 								  (make-txexpr tag attr
 								               (if (and (list? elements) (not (empty? elements)))
 								                   (let ([new-car-elements  (match (car elements)
 								                                              [(? two-or-more-char-string? tcs)
 								                                               (define str-first (get tcs 0))
-												update use of 'get'

											
										
										
											10 years ago
+								                                               (define str-rest (get tcs 1 (len tcs)))
-												consolidate decode modules

											
										
										
											11 years ago
+								                                               (cond
 								                                                 [(str-first . in? . '("\"" "“"))
 								                                                  ;; can wrap with any inline tag
 								                                                  ;; so that linebreak detection etc still works
-												small fixes to smart-quotes and wrap-hanging-quotes

											
										
										
											10 years ago
+								                                                  `(,@double-pp ,(->string #\“) ,str-rest)]
-												consolidate decode modules

											
										
										
											11 years ago
+								                                                 [(str-first . in? . '("\'" "‘"))
-												small fixes to smart-quotes and wrap-hanging-quotes

											
										
										
											10 years ago
+								                                                  `(,@single-pp ,(->string #\‘) ,str-rest)]
-												consolidate decode modules

											
										
										
											11 years ago
+								                                                 [else tcs])]
 								                                              [(? txexpr? nx) (wrap-hanging-quotes nx)]
 								                                              [else (car elements)])])
 								                     (cons new-car-elements (cdr elements)))
 								                   elements)))
 								;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 								;; Lines, blocks, paragraphs
 								;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 								;; turn the right items into <br> tags
 								(define+provide/contract (detect-linebreaks xc
-												updates

											
										
										
											11 years ago
+								                                            #:separator [newline world:linebreak-separator]
 								                                            #:insert [linebreak '(br)])
-												consolidate decode modules

											
										
										
											11 years ago
+								  ((txexpr-elements?) (#:separator string? #:insert xexpr?) . ->* . txexpr-elements?)
 								  ;; todo: should this test be not block + not whitespace?
 								  (define not-block? (λ(i) (not (block-txexpr? i))))
 								  (filter-not empty?
-												add in- iterators

											
										
										
											10 years ago
+								              (for/list ([i (in-range (len xc))])
-												consolidate decode modules

											
										
										
											11 years ago
+								                (let ([item (get xc i)])
 								                  (cond
 								                    ;; skip first and last
 								                    [(or (= i 0) (= i (sub1 (len xc)))) item]
 								                    [(equal? item newline)
 								                     (match (get xc (- i 1) (+ i 2)) ; a three-element slice with x[i] in the middle
 								                       ;; only convert if neither adjacent tag is a block
 								                       ;; (because blocks automatically force a newline before & after)
 								                       [(list (? not-block?) newline (? not-block?)) linebreak]
 								                       [else empty])] ; otherwise delete
 								                    [else item])))))
-												fix whitespace/nbsp?

											
										
										
											10 years ago
+								(define+provide/contract (whitespace? x [nbsp? #f])
 								  ((any/c)(boolean?) . ->* . coerce/boolean?)
 								  (define pat (pregexp (format "^[\\s~a]+$" (if nbsp? #\u00A0 ""))))
-												consolidate decode modules

											
										
										
											11 years ago
+								  (cond
 								    [(equal? "" x) #t] ; empty string is deemed whitespace
-												fix whitespace/nbsp?

											
										
										
											10 years ago
+								    [(or (string? x) (symbol? x)) (regexp-match pat (->string x))]
 								    [(or (list? x) (vector? x)) (and (not (empty? x)) (andmap (λ(i) (whitespace? i nbsp?)) (->list x)))] ; andmap returns #t for empty lists
-												consolidate decode modules

											
										
										
											11 years ago
+								    [else #f]))
-												fix whitespace/nbsp?

											
										
										
											10 years ago
+								(define+provide/contract (whitespace/nbsp? x)
-												consolidate decode modules

											
										
										
											11 years ago
+								  (any/c . -> . coerce/boolean?)
-												fix whitespace/nbsp?

											
										
										
											10 years ago
+								  (whitespace? x #t))
-												consolidate decode modules

											
										
										
											11 years ago
 								;; is x a paragraph break?
 								(define+provide/contract (paragraph-break? x #:separator [sep world:paragraph-separator])
 								  ((any/c) (#:separator pregexp?) . ->* . coerce/boolean?)
 								  (define paragraph-pattern (pregexp (format "^~a+$" sep)))
 								  (and (string? x) (regexp-match paragraph-pattern x)))
 								(define (newline? x)
 								  (and (string? x) (equal? world:newline x)))
 								(define (not-newline? x)
 								  (not (newline? x)))
 								(define (do-merge xs [acc '()])
 								  (if (empty? xs)
 								      acc
 								      ;; Try to peel the newlines off the front.
 								      (let-values ([(leading-newlines remainder) (splitf-at xs newline?)])
 								        (if (not (empty? leading-newlines)) ; if you got newlines ...
 								            ;; combine them into a string and append them to the accumulator,
 								            ;; and recurse on the rest
 								            (do-merge remainder (append acc (list (apply string-append leading-newlines))))
 								            ;; otherwise peel off elements up to the next newline, append them to accumulator,
 								            ;; and recurse on the rest
 								            (do-merge (dropf remainder not-newline?)
 								                      (append acc (takef remainder not-newline?)))))))
 								;; Find adjacent newline characters in a list and merge them into one item
 								;; Scribble, by default, makes each newline a separate list item
 								;; In practice, this is worthless.
 								(define+provide/contract (merge-newlines x)
 								  (txexpr-elements? . -> . txexpr-elements?)
 								  (cond
 								    [(list? x) (do-merge (map merge-newlines x))]
 								    [else x]))
 								;; detect paragraphs
 								;; todo: unit tests
 								(define+provide/contract (detect-paragraphs elements #:tag [tag 'p]
 								                                            #:separator [sep world:paragraph-separator]
-												add #:force? option to `detect-paragraphs`

											
										
										
											10 years ago
+								                                            #:linebreak-proc [linebreak-proc detect-linebreaks]
 								                                            #:force? [force-paragraph #f])
 								  ((txexpr-elements?) (#:tag symbol? #:separator string? #:linebreak-proc (txexpr-elements? . -> . txexpr-elements?) #:force? boolean?)
-												updates

											
										
										
											11 years ago
+								                      . ->* . txexpr-elements?)
-												consolidate decode modules

											
										
										
											11 years ago
 								  ;; prepare elements for paragraph testing
-												add #:force? option to `detect-paragraphs`

											
										
										
											10 years ago
+								  (define (prep-paragraph-flow elems)
 								    (linebreak-proc (merge-newlines (trimf elems whitespace?))))
-												consolidate decode modules

											
										
										
											11 years ago
 								  (define my-paragraph-break? (λ(x) (and (paragraph-break? x #:separator sep) #t)))
-												add #:force? option to `detect-paragraphs`

											
										
										
											10 years ago
+								  (define (wrap-paragraph elems)
 								    (match elems
-												correct `detect-paragraphs` behavior with series of blocks

											
										
										
											10 years ago
+								      [(list (? block-txexpr? bxs) ...) bxs] ; leave a series of block xexprs alone
-												add #:force? option to `detect-paragraphs`

											
										
										
											10 years ago
+								      [else (list (make-txexpr tag empty elems))])) ; otherwise wrap in p tag
-												consolidate decode modules

											
										
										
											11 years ago
-												make `detect-paragraphs` work more intuitively

											
										
										
											10 years ago
+								  (let ([elements (prep-paragraph-flow elements)])
 								    (define explicit-or-implicit-paragraph-break? (λ(x) (or (my-paragraph-break? x) (block-txexpr? x))))
 								    (if (ormap explicit-or-implicit-paragraph-break? elements) ; need this condition to prevent infinite recursion
 								        ;; use append-map on wrap-paragraph rather than map to permit return of multiple elements
 								        (append-map wrap-paragraph (append-map (λ(es) (filter-split es my-paragraph-break?)) (slicef elements block-txexpr?))) ; split into ¶¶, using both implied and explicit paragraph breaks
-												add #:force? option to `detect-paragraphs`

											
										
										
											10 years ago
+								        (if force-paragraph
 								            (append-map wrap-paragraph (slicef elements block-txexpr?)) ; upconverts non-block elements to paragraphs
 								            elements))))