cleaned up pmap parsing

pull/9/head
Matthew Butterick 11 years ago
parent dae519a907
commit edb0c49f46

@ -2,6 +2,7 @@
(require racket/list)
(require (planet mb/pollen/tools) (planet mb/pollen/main-helper))
(require (only-in (planet mb/pollen/pmap) pmap-decode))
(require (only-in (planet mb/pollen/predicates) pmap?))
(require (only-in (planet mb/pollen/pollen-file-tools) has-ext?))
(require (only-in (planet mb/pollen/world) POLLEN_MAP_EXT))
(provide (except-out (all-from-out racket/base) #%module-begin)
@ -10,30 +11,35 @@
(define-syntax-rule (module-begin expr ...)
(#%module-begin
; this is here only so that dynamic-rerequire of a pollen module
; transitively reloads the extras also.
; if this isn't here, then dynamic-rerequire can't see them
; and thus they are not tracked for changes.
;; this is here only so that dynamic-rerequire of a pollen module
;; transitively reloads the extras also.
;; if this isn't here, then dynamic-rerequire can't see them
;; and thus they are not tracked for changes.
(require-extras)
; We want our module language to support require & provide
; which are only supported at the module level, so ...
; create a submodule to contain the input
; and export as needed
;; We want our module language to support require & provide
;; which are only supported at the module level, so ...
;; create a submodule to contain the input
;; and export as needed
; doclang2_raw is a clone of scribble/doclang2 with decode disabled
; helpful because it collects & exports content via 'doc
;; doclang2_raw is a clone of scribble/doclang2 with decode disabled
;; helpful because it collects & exports content via 'doc
(module pollen-inner (planet mb/pollen/lang/doclang2_raw)
; use same requires as top of main.rkt
; (can't import them from surrounding module due to submodule rules)
;; use same requires as top of main.rkt
;; (can't import them from surrounding module due to submodule rules)
(require (planet mb/pollen/tools) (planet mb/pollen/main-helper))
(require (only-in (planet mb/pollen/pmap) pmap-subtopic))
(require-extras #:provide #t) ; brings in the project require files
; #%top binding catches ids that aren't defined
; here, convert them to basic xexpr
; #%top is a syntax transformer that returns a function
; λ x captures all the args (vs. λ(x), which only catches one)
; and id is not spliced because it's syntax, not a true variable
;; #%top binding catches ids that aren't defined
;; here, convert them to basic xexpr
;; #%top is a syntax transformer that returns a function
;; λ x captures all the args (vs. λ(x), which only catches one)
;; and id is not spliced because it's syntax, not a true variable
;; WARNING! This is convenient for writing pollen documents
;; (which is why it works this way)
;; but it makes debugging tricky, because an undefined (symbol item ...)
;; is just treated as a valid tagged-xexpr, not an undefined function.
(define-syntax-rule (#%top . id)
(λ x `(id ,@x)))
@ -48,25 +54,35 @@
;; prepare the elements, and append inner-here as meta.
(define all-elements (append
;; append inner-here as meta
;; put it first so it can be overridden by custom meta later on
(list `(meta "here" ,inner-here))
(cond
;; doc is probably a list, but might be a single string
[(string? doc) (list doc)]
[(tagged-xexpr? doc) (list doc)] ; if it's a single nx, just leave it
[(list? doc) doc]) ; if it's nx content, splice it in
(list `(meta "here" ,inner-here)))) ; append inner-here as meta
[(list? doc) doc]))) ; if it's nx content, splice it in
;; split out the metas now (in raw form)
(define-values (main-raw metas-raw)
(extract-tag-from-xexpr 'meta (make-tagged-xexpr 'irrelevant-tag empty all-elements)))
(define-values (metas-raw main-raw)
(split-tag-from-xexpr 'meta (make-tagged-xexpr 'irrelevant-tag empty all-elements)))
(define metas (make-meta-hash metas-raw))
;; Policy: here in the core lang, do as little to main as possible.
;; The point is just to set it up for further processing.
;; One of the annoyances of Scribble is its insistence on decoding.
;; Better just to pass through the minimally processed data.
;; Unlike Scribble, which insists on decoding,
;; Pollen just passes through the minimally processed data.
;; one exception: if file extension marks it as pmap, send it to the pmap decoder instead.
(define main (apply (if ((->path (get metas "here")) . has-ext? . POLLEN_MAP_EXT)
(define source-is-pmap?
;; this tests inner-here (which is always the file name)
;; rather than (get metas 'here) which might have been overridden.
;; Because if it's overridden to something other than *.pmap,
;; pmap processing will fail.
;; This defeats rule that pmap file suffix triggers pmap decoding.
((->path inner-here) . has-ext? . POLLEN_MAP_EXT))
(define main (apply (if source-is-pmap?
pmap-decode
;; most files will go this way.
;; Root is treated as a function.
@ -80,11 +96,14 @@
(module+ main
(displayln ";-------------------------")
(displayln "; pollen decoded 'main")
(displayln (string-append "; pollen decoded 'main" (if source-is-pmap? " (as pmap)" "")))
(displayln ";-------------------------")
main
(displayln "")
(displayln (format "(tagged-xexpr? main) ~a" (tagged-xexpr? main)))
(if source-is-pmap?
(displayln (format "(pmap? main) ~a" (pmap? main)))
(displayln (format "(tagged-xexpr? main) ~a" (tagged-xexpr? main))))
(displayln "")
(displayln ";-------------------------")
(displayln "; pollen 'metas")

@ -10,10 +10,12 @@
(define pmap-file (build-path START_DIR DEFAULT_MAP))
(define pmap-main empty)
; make these independent of local includes
(define (pmap-subtopic topic . subtopics)
(make-tagged-xexpr (->symbol topic) empty (filter-not whitespace? subtopics)))
;; handle pmap-subtopics
(define/contract (pmap-subtopic topic . subtopics)
((string?) #:rest (listof xexpr-element?) . ->* . tagged-xexpr?)
(make-tagged-xexpr (->symbol topic) empty subtopics))
;; todo: tests for pmap-subtopics
;; todo: this ain't a function
@ -25,16 +27,6 @@
(set! files (map remove-ext (filter (λ(x) (has-ext? x POLLEN_SOURCE_EXT)) files)))
(set! pmap-main (make-tagged-xexpr 'map-main empty (map path->string files)))))
;; todo: restrict this test
;; all names must be unique
(define/contract (pmap? x)
(any/c . -> . boolean?)
(and (tagged-xexpr? x)
;; all locations must be unique. Check this by converting x to a list of strings ...
(let ([locations (map ->string (flatten (remove-attrs x)))])
;; and then coercing to set (because set impliedly enforces uniqueness)
;; If set has same number of elements as original, all are unique.
(= (len (apply set locations)) (len locations)))))
;; recursively processes map, converting map locations & their parents into xexprs of this shape:
;; '(location ((parent "parent")))
@ -54,9 +46,21 @@
(module+ test
(define test-pmap-main `(pmap-main "foo" "bar" ,(pmap-subtopic "one" (pmap-subtopic "two" "three"))))
(check-equal? (add-parents test-pmap-main)
(check-equal? (main->pmap test-pmap-main)
'(pmap-main ((parent "")) (foo ((parent "pmap-main"))) (bar ((parent "pmap-main"))) (one ((parent "pmap-main")) (two ((parent "one")) (three ((parent "two"))))))))
;; this sets default input for following functions
(define/contract (main->pmap tx)
(tagged-xexpr? . -> . pmap?)
(add-parents tx))
(define pmap (main->pmap pmap-main))
;; remove parents from map (i.e., just remove attrs)
;; is not the inverse of add-parents, i.e., you do not get back your original input.
(define/contract (remove-parents mt)
@ -69,27 +73,15 @@
(one ((parent "")) (two ((parent "one")) (three ((parent "two")))))))
'(pmap-main (foo) (bar) (one (two (three))))))
;; todo: what is this for?
(define/contract (main->pmap main)
(tagged-xexpr? . -> . pmap?)
(let-values ([(nx metas) (extract-tag-from-xexpr 'meta main)])
(add-parents nx)))
(module+ test
(define mt-pmap `(pmap-main "foo" "bar" ,(pmap-subtopic "one" (pmap-subtopic "two" "three")) (meta "foo" "bar")))
(check-equal? (main->pmap mt-pmap)
'(pmap-main ((parent "")) (foo ((parent "pmap-main"))) (bar ((parent "pmap-main"))) (one ((parent "pmap-main")) (two ((parent "one")) (three ((parent "two"))))))))
(define sample-main `(pmap-root "foo" "bar" ,(pmap-subtopic "one" (pmap-subtopic "two" "three"))))
(check-equal? (main->pmap sample-main)
'(pmap-root ((parent "")) (foo ((parent "pmap-root"))) (bar ((parent "pmap-root"))) (one ((parent "pmap-root")) (two ((parent "one")) (three ((parent "two"))))))))
;; todo: what is this for? to have default input?
(define pmap (main->pmap pmap-main))
(define/contract (pmap-key? x)
(any/c . -> . boolean?)
;; OK for map-key to be #f
(or (symbol? x) (string? x) (eq? x #f)))
;; return the parent of a given name
(define/contract (parent element [pmap pmap])
((pmap-key?) (pmap?) . ->* . (or/c string? boolean?))
@ -255,13 +247,33 @@
(check-equal? (next-page 'one test-pmap) "two")
(check-false (next-page 'three test-pmap)))
(module+ test
#|(module+ test
;; need to parameterize current-directory
;; because pollen main depends on it to find the include functions
(define pm (parameterize ([current-directory "./tests/"])
(main->pmap (dynamic-require "test-pmap.p" 'main))))
(main->pmap (dynamic-require "test.pmap" 'main))))
(check-equal? (previous-page (parent 'printers-and-paper pm) pm) "ligatures"))
|#
(define/contract (pmap-decode . elements)
(() #:rest xexpr-elements? . ->* . any/c)
"hello, this is pmap-decode")
(() #:rest (and/c
;; todo: how to put these contracts under a let?
;; all elements must be valid pmap keys
(flat-named-contract 'valid-pmap-keys
(λ(e) (andmap (λ(x) (pmap-key? #:loud #t x))
(filter-not whitespace? (flatten e)))))
;; they must also be unique
(flat-named-contract 'unique-pmap-keys
(λ(e) (elements-unique? #:loud #t
(map ->string ; to make keys comparable
(filter-not whitespace? (flatten e)))))))
. ->* . pmap?)
(main->pmap (decode (cons 'pmap-root elements)
; #:exclude-xexpr-tags 'em
; #:xexpr-tag-proc [xexpr-tag-proc (λ(x)x)]
; #:xexpr-attr-proc [xexpr-attr-proc (λ(x)x)]
#:xexpr-elements-proc (λ(xs) (filter-not whitespace? xs))
; #:block-xexpr-proc block-xexpr-proc
; #:inline-xexpr-proc [inline-xexpr-proc (λ(x)x)]
)))

@ -1,11 +1,12 @@
#lang racket/base
(require racket/contract racket/match racket/list xml)
(require racket/contract racket/match racket/list xml racket/set)
(require (prefix-in scribble: (only-in scribble/decode whitespace?)))
(require (prefix-in html: "library/html.rkt"))
(require "world.rkt" "readability.rkt" "pollen-file-tools.rkt")
(module+ test (require rackunit))
(provide (all-defined-out)
(all-from-out "pollen-file-tools.rkt"))
@ -112,18 +113,80 @@
(check-false (meta-xexpr? '(meta "key" "value" "foo")))
(check-false (meta-xexpr? '(meta))))
;; exploit uniqueness constraint of set data structure
(define/contract (elements-unique? x #:loud [loud #f])
((any/c) (#:loud boolean?) . ->* . boolean?)
(define result
(cond
[(list? x) (= (len (apply set x)) (len x))]
[(vector? x) (elements-unique? (->list x))]
[(string? x) (elements-unique? (string->list x))]
[else #t]))
(if (and (not result) loud)
;; todo: calculate nonunique keys
(error "Not unique keys:" x)
result))
(module+ test
(check-true (elements-unique? '(1 2 3)))
(check-false (elements-unique? '(1 2 2)))
(check-true (elements-unique? (->vector '(1 2 3))))
(check-false (elements-unique? (->vector '(1 2 2))))
(check-true (elements-unique? "fob"))
(check-false (elements-unique? "foo")))
;; todo: how to restrict this test?
;; pmap requirements are enforced at compile-time.
;; (such as pmap-keys must be unique).
;; (and every element must have a parent attr).
;; otherwise this becomes a rather expensive contract
;; because every function in pmap.rkt uses it
(define/contract (pmap? x)
(any/c . -> . boolean?)
(tagged-xexpr? x))
;; pmap location must represent a possible valid filename
(define/contract (pmap-key? x #:loud [loud #f])
((any/c) (#:loud boolean?) . ->* . boolean?)
;; todo: how to express the fact that the pmap-location must be
;; a valid base name for a file?
;; however, don't restrict it to existing files
;; (author may want to use pmap as wireframe)
(define result
(or (eq? x #f) ; OK for map-key to be #f
(and (or (symbol? x) (string? x))
(not (= (len x) 0)) ; not empty
; no whitespace
(andmap (compose not whitespace?) (map ->string (string->list (->string x)))))))
(if (and (not result) loud)
(error "Not a valid pmap key:" x)
result))
(module+ test
(check-true (pmap-key? #f))
(check-true (pmap-key? "foo-bar"))
(check-true (pmap-key? 'foo-bar))
(check-false (pmap-key? ""))
(check-false (pmap-key? " ")))
;; recursive whitespace test
;; Scribble's version misses whitespace in a list
(define (whitespace? x)
(define/contract (whitespace? x)
(any/c . -> . boolean?)
(cond
[(list? x) (andmap whitespace? x)]
[else (scribble:whitespace? x)]))
[(or (vector? x) (list? x) (set? x)) (andmap whitespace? (->list x))]
[(or (symbol? x) (string? x)) (->boolean (regexp-match #px"^\\s+$" (->string x)))]
[else #f]))
(module+ test
(check-true (whitespace? " "))
(check-false (whitespace? "foo"))
(check-false (whitespace? " ")) ; a nonbreaking space
(check-false (whitespace? 'foo))
(check-false (whitespace? #\Ø))
(check-false (whitespace? " ")) ; a nonbreaking space. todo: why is this so?
(check-true (whitespace? "\n \n"))
(check-true (whitespace? (list "\n" " " "\n")))
(check-true (whitespace? (list "\n" " " "\n" (list "\n" "\n")))))

@ -61,6 +61,15 @@
(check-equal? (->list (set 1 2 3)) '(1 2 3))
(check-equal? (->list "foo") (list "foo")))
;; general way of coercing to vector
(define (->vector x)
(any/c . -> . vector?)
; todo: on bad input, it will pop a list error rather than vector error
(cond
[(vector? x) x]
[else (list->vector (->list x))]))
;; general way of coercing to boolean
(define/contract (->boolean x)

@ -1,26 +0,0 @@
#lang planet mb/pollen
pmap-topic{index
typography-in-ten-minutes
summary-of-key-rules
foreword
introduction
how-to-use
how-to-pay-for-this-book
pmap-topic{why-typography-matters
what-is-typography
where-do-the-rules-come-from}
pmap-topic{type-composition
straight-and-curly-quotes
one-space-between-sentences
trademark-and-copyright-symbols
ligatures}
pmap-topic{appendix
printers-and-paper
how-to-make-a-pdf
typewriter-habits
common-accented-characters
identifying-fonts
bibliography
charter
mb-lectures-and-articles}}

@ -23,4 +23,5 @@ how-to-pay-for-this-book
identifying-fonts
bibliography
charter
mb-lectures-and-articles}
mb-lectures-and-articles}

@ -5,7 +5,6 @@
(require (only-in racket/string string-join))
(require (only-in xml xexpr? xexpr/c))
(require "readability.rkt" "debug.rkt" "predicates.rkt")
(provide (all-defined-out) (all-from-out "readability.rkt" "debug.rkt" "predicates.rkt"))
@ -174,9 +173,10 @@
(check-equal? (map-tree (λ(i) (if (symbol? i) 'foo i)) '(p 1 2 3 (em 4 5))) '(foo 1 2 3 (foo 4 5))))
;; function to strip metas (or any tag)
(define/contract (extract-tag-from-xexpr tag nx)
(xexpr-tag? tagged-xexpr? . -> . (values tagged-xexpr? xexpr-elements?))
;; function to split tag out of tagged-xexpr
(define/contract (split-tag-from-xexpr tag tx)
(xexpr-tag? tagged-xexpr? . -> . (values xexpr-elements? tagged-xexpr? ))
(define matches '())
(define (extract-tag x)
(cond
@ -189,25 +189,28 @@
(make-tagged-xexpr tag attr (extract-tag body)))]
[(xexpr-elements? x) (filter-not empty? (map extract-tag x))]
[else x]))
(values (extract-tag nx) (reverse matches)))
(define tx-extracted (extract-tag tx)) ;; do this first to fill matches
(values (reverse matches) tx-extracted))
(module+ test
(define x '(root (meta "foo" "bar") "hello" "world" (meta "foo2" "bar2")
(define xx '(root (meta "foo" "bar") "hello" "world" (meta "foo2" "bar2")
(em "goodnight" "moon" (meta "foo3" "bar3"))))
(check-equal? (values->list (extract-tag-from-xexpr 'meta x))
(list '(root "hello" "world" (em "goodnight" "moon"))
'((meta "foo" "bar") (meta "foo2" "bar2") (meta "foo3" "bar3")))))
(check-equal? (values->list (split-tag-from-xexpr 'meta xx))
(list '((meta "foo" "bar") (meta "foo2" "bar2") (meta "foo3" "bar3"))
'(root "hello" "world" (em "goodnight" "moon")))))
;; convert list of meta tags to a hash for export from pollen document.
;; every meta is form (meta "key" "value") (enforced by contract)
;; later metas with the same name will override earlier ones.
(define/contract (make-meta-hash mxs)
((listof meta-xexpr?) . -> . hash?)
(apply hash (append-map tagged-xexpr-elements mxs)))
(module+ test
(check-equal? (make-meta-hash '((meta "foo" "bar")(meta "hee" "haw")))
(hash "foo" "bar" "hee" "haw")))
(hash "foo" "bar" "hee" "haw"))
(check-equal? (make-meta-hash '((meta "foo" "bar")(meta "foo" "haw")))
(hash "foo" "haw")))

Loading…
Cancel
Save