You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
pollen/pollen 130801/tools.rkt

599 lines
20 KiB
Racket

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#lang racket/base
(require racket/list)
(require xml)
(require (only-in racket/function thunk))
(require racket/string)
(require racket/file)
(require xml/path)
(require (only-in racket/format ~a ~s ~v))
(require (prefix-in scribble: (only-in scribble/decode whitespace?)))
(require (only-in racket/path filename-extension))
(require (only-in (planet mb/pollen/library/html) inline-tags))
(require (planet mb/pollen/world))
(require (planet mb/pollen/readability))
;(require (planet mb/pollen/hyphenate))
(define nbsp " ") ; use this for readability in code
(define lozenge "") ; use this instead of escape syntax
(provide (all-defined-out)
describe whitespace? xexpr->string xexpr? filter-not flatten
(all-from-out (planet mb/pollen/readability)))
(define (hash-ref-or hash key [default #f])
(if (in? hash key)
(get hash key)
default))
(define (make-meta-hash x)
(define keys (se-path*/list '(meta #:name) x))
(define values (se-path*/list '(meta #:content) x))
(define meta-hash (make-hash))
;todo: convert this to for/list because map does not guarantee ordering
; probably want to keep it in sequence
(map (ƒ(key value) (change meta-hash (as-symbol key) (as-string value))) keys values)
meta-hash)
(define (magic-directory? path)
(and (directory-exists? path)
(or (ends-with? (path->string path) "requires")
(ends-with? (path->string path) "compiled")
)))
(define (filename-of path)
(let-values ([(dir filename ignored) (split-path path)])
filename))
(define (pollen-script? path)
(let ([path-string (path->string (filename-of path))])
(or (starts-with? path-string "pollen_") (starts-with? path-string "pollen-"))))
(define (racket-file? path)
(has-ext? path 'rkt))
(define (pmap-source? path)
(has-ext? path POLLEN_MAP_EXT))
(define (template-source? path)
(starts-with? (path->string (filename-of path)) (~a TEMPLATE_FILE_PREFIX)))
(define (preproc-source? path)
(has-ext? path POLLEN_PREPROC_EXT))
(define (make-preproc-in-path path)
(add-ext path POLLEN_PREPROC_EXT))
(define (make-preproc-out-path path)
(remove-ext path))
(define (has-preproc-source? path)
(file-exists? (make-preproc-in-path path)))
(define (pollen-source? path)
(has-ext? path POLLEN_SOURCE_EXT))
(define (make-pollen-source-path thing)
(add-ext (remove-ext (as-path thing)) POLLEN_SOURCE_EXT))
(define (has-pollen-source? path)
(file-exists? (make-pollen-source-path path)))
(define (insert-subdir path [subdir-in OUTPUT_SUBDIR])
(let-values ([(dir filename ignored) (split-path path)])
(when (equal? dir 'relative)
(set! dir (string->path ".")))
(letrec ([subdir-name (string->path (~a subdir-in))]
[subdir (build-path dir subdir-name)])
(when (not (directory-exists? subdir))
(make-directory subdir))
(build-path subdir filename))))
;;;;;;;;;;;;;;
; Moved from template.rkt
;;;;;;;;;;;;;;
; All from* functions should return a named-xexpr
(define (from x query)
; cache x
(let ([x (put x)])
; try finding it in metas, if not, find it in main, if not then return false
(or (from-metas x query) (from-main x query))))
(define (from-main x query) ; this used to be plain from
; check results first
(let* ([x (put x)]
[results (se-path*/list (list query) x)])
; if results exist, send back xexpr as output
(if (not (empty? results))
`(,query ,@results) ; todo: why use query as tag?
#f)))
(define (from-metas x key)
(let* ([x (put x)]
[meta-hash (make-meta-hash x)]
[key (as-symbol key)])
(if (in? meta-hash key)
`(value ,(get meta-hash key)) ;todo: why use value as tag?
#f)))
(define (put x)
; handles either xexpr or pollen file as input
(cond
; pass through xexpr as is
; put is optional for xexprs.
; it's only here to make the idiom smooth.
[(named-xexpr? x) x]
; todo: how to externalize pollen main tag into world name?
[(file-exists? (as-path x)) (dynamic-require x 'main)]
; also try adding pollen file extension
; this makes put compatible with map references
[(let ([x (make-pollen-source-path x)])
(when (file-exists? x)
(put x)))]
[else (error "put: need named xexpr or pollen file, but got" x)]))
(define (merge x)
(cond
[(named-xexpr? x)
; return content of xexpr.
; pollen language rules will splice these into the main flow.
(if (empty? x)
""
(let-values([(name attr content) (xexplode x)])
content))]
[(string? x) (list x)]))
#|(define (merge-strings x)
(when (empty? x) (error "merge-strings got empty x"))
;todo: filter metas?
; leaning toward no. Simplest behavior.
; function is not intended to be used with whole pollen body anyhow.
(let ([x (merge x)])
(string-join (filter string? (flatten x)) " ")))|#
(define (merge-strings x)
(string-join (filter string? (flatten x)) " "))
(define (make-html x)
(if (named-xexpr? x)
(xexpr->string x)
(let ([x (as-list x)])
(when (all xexpr? x)
(string-join (map xexpr->string x) "")))))
; generate *-as-html versions of functions
(define-values (put-as-html merge-as-html merge-strings-as-html)
(apply values (map (ƒ(proc) (ƒ(x) (make-html (proc x)))) (list put merge merge-strings))))
(define (as-literal x)
(set! x (flatten (list x))) ; coerce text or list to new list
(merge `(literal-thing ,@x)))
(define (make-url x)
(if (exists? x)
(str x ".html")
"#")) ; funny null url that means "stay here"
;;;;;;;;;;;;;;;;;;;;;;;;;
; make these independent of local includes
(define (map-topic topic . subtopics)
`(,(string->symbol topic) ,@(filter-not whitespace? subtopics)))
(define (meta key value)
`(meta ((name ,(as-string key))(content ,(as-string value)))))
; scribble's whitespace function misses trailing spaces wrapped in a list
(define (whitespace? x)
(cond
[(list? x) (all scribble:whitespace? x)]
[else (scribble:whitespace? x)]))
; remove empty elements
(define (remove-empty x)
(cond
[(list? x) (map remove-empty (filter-not empty? x))]
[else x]))
(define (remove-void x)
(cond
[(list? x) (map remove-void (filter-not void? x))]
[else x]))
; common idiom with lists:
; if list is empty, return empty
; otherwise do procedure
(define (empty/else thing proc)
(if (empty? thing)
empty
(proc thing)))
; common idiom with files:
; if the file exists, do procedure with it
(define (file-exists?/do path proc)
(if (file-exists? path)
(proc path)
#f))
; simple timer
(define-syntax-rule (time expr)
(begin
(define start-time (current-inexact-milliseconds))
(define result expr)
(define stop-time (current-inexact-milliseconds))
(message "Time for" 'expr "=" (- stop-time start-time))
result))
; utilities for working with file extensions
(define (.+ x) (format ".~a" x))
(define (get-ext path)
(bytes->string/utf-8 (filename-extension path)))
(define (has-ext? path ext)
(let ([path-ext (filename-extension path)])
; returns true if f-ext exists, and equals ext, otherwise false
(and path-ext (equal? (bytes->string/utf-8 path-ext) (~a ext)))))
(define (remove-ext path)
(path-replace-suffix path ""))
(define (add-ext path ext)
(string->path (string-append (path->string path) (.+ ext))))
; find all xexpr names within another xexpr
(define (gather-xexpr-names x)
(cond
[(named-xexpr? x)
(let-values([(name attr content) (xexplode x)])
(flatten (cons name (map gather-xexpr-names content))))]
[else empty]))
; shorthand for define + dynamic require
(define-syntax-rule (define-from module symbol)
(define symbol (dynamic-require module 'symbol)))
; dynamic require or return false if not found
; allows constructs like:
; (or (require-now module symbol) "default value")
(define (require-now module symbol)
(dynamic-require module symbol (thunk #f)))
; define & provide in one easy step
(define-syntax-rule (define/provide name expr ...)
(begin (define name expr ...)(provide name)))
; xexpr->html
(define (xexpr->html x)
; (string-join (map xexpr->string x)))
(xexpr->string x))
(define (html->xexpr . stuff)
(string->xexpr (string-join stuff "")))
; do a set of actions on same item
(define ((tee . procs) thing)
(apply values (map (ƒ(proc)(proc thing)) procs)))
; python-style try/except syntax
(define-syntax-rule (try body (except tests ...))
(with-handlers (tests ...) body))
; xexpr shortcut; map tag across items
(define (map-tag tag-name xs)
(map (ƒ(x) (list tag-name x)) xs))
; trim from beginning & end of list
(define (trim things test)
(dropf-right (dropf things test) test))
; trim whitespace from beginning & end of list
(define (trim-whitespace things)
(trim things whitespace?))
; ----------------------------
; DECODER
; ----------------------------
(define (splice-xexpr-content x [acc '()])
; takes a list and splices top-level sublists into main list
; used by merge function
(cond
[(empty? x) acc]
[(and (xexpr-content? (car x)) (not (named-xexpr? (car x)))) (splice-xexpr-content (cdr x) `(,@acc ,@(car x)))]
[else (splice-xexpr-content (cdr x) `(,@acc ,(car x)))]))
(define (named-xexpr? x)
; meets basic xexpr contract, and is also a list starting with a symbol
; todo: rewrite this using match?
; todo: rewrite this recurively so errors can be pinpointed (for debugging)
(and (xexpr? x) (list? x) (symbol? (car x))))
(define (xexpr-attr-list? x)
(define (attr-pair? x)
; list with two elements: first element is a symbol, second is a string
(and (list? x) (= (length x) 2) (symbol? (car x)) (string? (second x))))
; a list where elements are attr pairs
(and (list? x) (all attr-pair? x)))
(define (xexpr-content? x)
; it's a list whose elements meet xexpr contract
(and (list? x) (all xexpr? x)))
(define (xexpr-has-attrs? x)
(and (named-xexpr? x) (> (length x) 1) (xexpr-attr-list? (second x))))
(define (make-xexpr name (attr empty) (content empty))
(when (not (symbol? name)) (error "make-xexpr: need a name, dude"))
(when (not (xexpr-attr-list? attr))
(error "make-xexpr: attr must be list of attr pairs"))
; todo: fix xexpr-content? test so I can use it here
; (when (not (xexpr-content? content)) content)
(when (not (list? content)) (error "make-xexpr: content must be a list"))
(define xexpr `(,name))
(when (exists? attr) (set! xexpr `(,@xexpr ,attr)))
(when (exists? content) (set! xexpr `(,@xexpr ,@content)))
xexpr)
(define (xexplode x)
(when (not (named-xexpr? x)) (error (format "xexplode: ~v not a named-xexpr" x)))
(define-values (name attr content) (values (car x) empty empty))
(if (xexpr-has-attrs? x)
(set!-values (attr content) (values (second x) (cddr x))) ; attr comes back as a list of lists
(set! content (cdr x))) ; content always comes back as a list
(values name attr content))
; block is a named expression that's not on the inline list
; todo: bear in mind that browsers take the opposite view:
; that only elements on the block list are blocks
; and otherwise are treated as inline
(define (block-xexpr? x)
(and (named-xexpr? x) (not (in? inline-tags (car x)))))
(define (wrap-paragraph x) ; x is a list containing paragraph pieces
; if paragraph is just one block-level xexpr
(if (and (= (length x) 1) (block-xexpr? (car x)))
(car x) ; leave it
`(p ,@x))) ; otherwise wrap in p tag
; wrap initial quotes for hanging punctuation
; todo: improve this
; does not handle <p>“<em>thing</em> properly
(define (wrap-hanging-quotes x) ; x is one paragraph
(define-values (name attr content) (xexplode x))
(cond
[(and (not (empty? content))
(string? (car content))
(> (string-length (car content)) 1))
(let ([new-car
(letrec ([x (car content)]
[first (get x 0)]
[rest (get x 1 'end)])
(cond
[(member first '("\"" ""))
; this has to be span so that it's explicitly
; an inline element. If not,
; things like linebreak detection won't work.
`(span ((class "dquo")) ,(~a #\“) ,rest)]
[(member first '("\'" ""))
`(span ((class "squo")) ,(~a #\) ,rest)]
[else x]))])
(make-xexpr name attr (cons new-car (cdr content))))]
[(and (exists? content) (named-xexpr? (car content)))
(make-xexpr name attr (cons (wrap-hanging-quotes (car content)) (cdr content)))]
[else x]))
; how a list-item break is denoted: three or more newlines
(define (list-item-break? x)
(and (string? x) (regexp-match #rx"^\n\n\n+$" x)))
; how a paragraph break is denoted: two or more newlines
(define (paragraph-break? x)
; (equal? x PARAGRAPH_BREAK) ; obsolete: two newlines only
(and (string? x) (regexp-match #rx"^\n\n+$" x)))
; convert single newline to br tag
; only if neither adjacent tag is a block
; otherwise delete
(define (convert-linebreaks x) ; x is list
(remove-empty
(for/list ([i (len x)])
(cond
[(equal? (get x i) LINE_BREAK)
(if (none block-xexpr? (list (get x (sub1 i)) (get x (add1 i))))
'(br)
'())]
[else (get x i)]))))
; find two or more adjacent newlines and bring them together
; works on any number of newlines
(define (merge-newlines x)
(define (newline? x)
(and (string? x) (equal? "\n" x)))
(define (not-newline? x)
(not (newline? x)))
(define (merge-newlines-inner x [acc '()]) ; x is list
(if (empty? x)
acc
(let-values ([(leading-newlines remainder) (splitf-at x newline?)])
(if (not (empty? leading-newlines))
(merge-newlines-inner remainder `(,@acc ,(string-join leading-newlines "")))
(merge-newlines-inner (dropf remainder not-newline?) `(,@acc ,@(takef remainder not-newline?)))))))
(cond
((list? x) (merge-newlines-inner (map merge-newlines x)))
(else x)))
(define (typogrify string)
; make set of functions for replacers
(define (make-replacers query+subs)
(map (ƒ(q+s) (ƒ(str) (regexp-replace* (first q+s) str (second q+s)))) query+subs))
; just store the query strings + replacement strings
(define dashes
; fix em dashes first, else they'll be mistaken for en dashes
; [\\s ] is whitespace + nonbreaking space
'((#px"[\\s ]*(---|—)[\\s ]*" "") ; em dash
(#px"[\\s ]*(--|)[\\s ]*" ""))) ; en dash
(define smart-quotes
'((#px"(?<=\\w)'(?=\\w)" "") ; apostrophe
(#px"(?<!\\w)'(?=\\w)" "") ; single_at_beginning
(#px"(?<=\\S)'(?!\\w)" "") ; single_at_end
(#px"(?<!\\w)\"(?=\\w)" "") ; double_at_beginning
(#px"(?<=\\S)\"(?!\\w)" ""))) ; double_at_end
; todo: is this transformation obsolete due to css ligatures?
; maybe not, because soft hyphens mess up css ligature function.
; \u00AD is a soft hyphen, which might appear in between letters
(define ligatures
'((#px"f\u00AD?i" "")
(#px"f\u00AD?f" "")
(#px"f\u00AD?l" "")
(#px"f\u00AD?f\u00AD?i" "")
(#px"f\u00AD?f\u00AD?l" "")))
; put replacers in desired order here
(define replacers (make-replacers (append dashes smart-quotes)))
; compose goes from last to first, so reverse order
; ((apply compose1 hyphenate-text-soft (reverse replacers)) string))
((apply compose1 (reverse replacers)) string))
; find the last word space and replace it with a nonbreaking space
; doesn't work on weirdo cases that need backtracking, like:
; (define t4 '(p "hello from all the freaks" "at the " (em "factory.")))
; but cures enough problems to be worthwhile.
(define (nonbreaking-last-space x)
(define nbsp #\ ) ; use an Ø if you want to make the results visible
(define minimum-word-length (add1 5)) ; add1 to account for final punctuation
; todo: parameterize this, as it will be different for each project
(define tags-to-pay-attention-to '(p aside)) ; only apply to paragraphs
(define (replace-last-space str)
(if (in? str #\space)
(let ([reversed-str-list (reverse (string->list str))])
(define-values (last-word-chars other-chars)
(splitf-at reversed-str-list (λ(i) (not (eq? i #\space)))))
(list->string (reverse (append last-word-chars
; OK for long words to be on their own line.
(if (< (len last-word-chars) minimum-word-length)
; first char of other-chars will be the space, so use cdr
(cons nbsp (cdr other-chars))
other-chars)))))
str))
(define (find-last-word-space x) ; recursively traverse xexpr
(cond
[(string? x) (replace-last-space x)]
[(named-xexpr? x)
(let-values([(name attr content) (xexplode x)])
(if (> (length content) 0) ; content is list of xexprs
(let-values ([(all-but-last last) (split-at content (sub1 (length content)))])
(make-xexpr name attr `(,@all-but-last ,(find-last-word-space (car last)))))
x))]
[else x]))
(if (in? tags-to-pay-attention-to (car x))
(find-last-word-space x)
x))
(define (prep-paragraph-flow x)
(convert-linebreaks (merge-newlines (trim-whitespace x))))
(define (map-xexpr-content proc x #:only [only-proc (ƒ(x) x)])
; why map and not apply? Because map guarantees a list of the same length.
; whereas apply does not. So it works as an implied constraint.
(if (named-xexpr? x)
(let-values([(name attr content) (xexplode x)])
(make-xexpr name attr (map (ƒ(x) (if (only-proc x) (proc x) x)) content)))
(error "map-xexpr-content: Input is not a named xexpr and has no content:" x)))
; Main decode function
(define (decode x)
(define (&decode x)
(cond
[(named-xexpr? x)
(let-values([(name attr content) (xexplode x)])
(define decoded-x (make-xexpr name attr (&decode content)))
(if (block-xexpr? decoded-x)
; add nonbreaking-last-space to the next line when ready
(wrap-hanging-quotes (nonbreaking-last-space decoded-x)) ; do special processing for block xexprs
decoded-x))]
[(xexpr-content? x) ; a list of xexprs
(let ([x (prep-paragraph-flow x)])
(map &decode (if (any paragraph-break? x) ; need this condition to prevent infinite recursion
(map wrap-paragraph (splitf-at* x paragraph-break?)) ; split into ¶¶
x)))]
[(string? x) (typogrify x)]
[else x]))
(define (stringify x) ; convert numbers to strings
(cond
[(list? x) (map stringify x)]
[(number? x) (~a x)]
[else x]))
(let* ([x (stringify x)]
[x (trim-whitespace x)])
(if (named-xexpr? x)
(&decode x)
;todo: improve this error message, more specific location
; now, it just spits out the whole defective content
(error (format "decode: ~v not a full named-xexpr" x)))))
(define (splitf-at* pieces test)
; split list into list of sublists using test
(define (splitf-at*-inner pieces [acc '()]) ; use acc for tail recursion
(if (empty? pieces)
acc
(let-values ([(item rest)
(splitf-at (dropf pieces test) (compose1 not test))])
(splitf-at*-inner rest `(,@acc ,item)))))
(splitf-at*-inner (trim pieces test)))
(define (make-missing-source-files map-xexpr)
; use cdr to omit body tag
(define source-names (map (ƒ(x) (add-ext (string->path (as-string x)) POLLEN_SOURCE_EXT)) (flatten (cdr map-xexpr))))
(define (make-source-if-missing x)
(if (not (file-exists? x))
(begin
(display-to-file MISSING_FILE_BOILERPLATE x)
(format "Created file: ~a" x))
(format "Already exists: ~a" x)))
(display (string-join (map make-source-if-missing source-names) "\n")))