cleaned up pmap parsing

pull/9/head
Matthew Butterick 11 years ago
parent dae519a907
commit edb0c49f46

@ -2,6 +2,7 @@
(require racket/list) (require racket/list)
(require (planet mb/pollen/tools) (planet mb/pollen/main-helper)) (require (planet mb/pollen/tools) (planet mb/pollen/main-helper))
(require (only-in (planet mb/pollen/pmap) pmap-decode)) (require (only-in (planet mb/pollen/pmap) pmap-decode))
(require (only-in (planet mb/pollen/predicates) pmap?))
(require (only-in (planet mb/pollen/pollen-file-tools) has-ext?)) (require (only-in (planet mb/pollen/pollen-file-tools) has-ext?))
(require (only-in (planet mb/pollen/world) POLLEN_MAP_EXT)) (require (only-in (planet mb/pollen/world) POLLEN_MAP_EXT))
(provide (except-out (all-from-out racket/base) #%module-begin) (provide (except-out (all-from-out racket/base) #%module-begin)
@ -10,30 +11,35 @@
(define-syntax-rule (module-begin expr ...) (define-syntax-rule (module-begin expr ...)
(#%module-begin (#%module-begin
; this is here only so that dynamic-rerequire of a pollen module ;; this is here only so that dynamic-rerequire of a pollen module
; transitively reloads the extras also. ;; transitively reloads the extras also.
; if this isn't here, then dynamic-rerequire can't see them ;; if this isn't here, then dynamic-rerequire can't see them
; and thus they are not tracked for changes. ;; and thus they are not tracked for changes.
(require-extras) (require-extras)
; We want our module language to support require & provide ;; We want our module language to support require & provide
; which are only supported at the module level, so ... ;; which are only supported at the module level, so ...
; create a submodule to contain the input ;; create a submodule to contain the input
; and export as needed ;; and export as needed
; doclang2_raw is a clone of scribble/doclang2 with decode disabled ;; doclang2_raw is a clone of scribble/doclang2 with decode disabled
; helpful because it collects & exports content via 'doc ;; helpful because it collects & exports content via 'doc
(module pollen-inner (planet mb/pollen/lang/doclang2_raw) (module pollen-inner (planet mb/pollen/lang/doclang2_raw)
; use same requires as top of main.rkt ;; use same requires as top of main.rkt
; (can't import them from surrounding module due to submodule rules) ;; (can't import them from surrounding module due to submodule rules)
(require (planet mb/pollen/tools) (planet mb/pollen/main-helper)) (require (planet mb/pollen/tools) (planet mb/pollen/main-helper))
(require (only-in (planet mb/pollen/pmap) pmap-subtopic))
(require-extras #:provide #t) ; brings in the project require files (require-extras #:provide #t) ; brings in the project require files
; #%top binding catches ids that aren't defined ;; #%top binding catches ids that aren't defined
; here, convert them to basic xexpr ;; here, convert them to basic xexpr
; #%top is a syntax transformer that returns a function ;; #%top is a syntax transformer that returns a function
; λ x captures all the args (vs. λ(x), which only catches one) ;; λ x captures all the args (vs. λ(x), which only catches one)
; and id is not spliced because it's syntax, not a true variable ;; and id is not spliced because it's syntax, not a true variable
;; WARNING! This is convenient for writing pollen documents
;; (which is why it works this way)
;; but it makes debugging tricky, because an undefined (symbol item ...)
;; is just treated as a valid tagged-xexpr, not an undefined function.
(define-syntax-rule (#%top . id) (define-syntax-rule (#%top . id)
(λ x `(id ,@x))) (λ x `(id ,@x)))
@ -48,25 +54,35 @@
;; prepare the elements, and append inner-here as meta. ;; prepare the elements, and append inner-here as meta.
(define all-elements (append (define all-elements (append
;; append inner-here as meta
;; put it first so it can be overridden by custom meta later on
(list `(meta "here" ,inner-here))
(cond (cond
;; doc is probably a list, but might be a single string ;; doc is probably a list, but might be a single string
[(string? doc) (list doc)] [(string? doc) (list doc)]
[(tagged-xexpr? doc) (list doc)] ; if it's a single nx, just leave it [(tagged-xexpr? doc) (list doc)] ; if it's a single nx, just leave it
[(list? doc) doc]) ; if it's nx content, splice it in [(list? doc) doc]))) ; if it's nx content, splice it in
(list `(meta "here" ,inner-here)))) ; append inner-here as meta
;; split out the metas now (in raw form) ;; split out the metas now (in raw form)
(define-values (main-raw metas-raw) (define-values (metas-raw main-raw)
(extract-tag-from-xexpr 'meta (make-tagged-xexpr 'irrelevant-tag empty all-elements))) (split-tag-from-xexpr 'meta (make-tagged-xexpr 'irrelevant-tag empty all-elements)))
(define metas (make-meta-hash metas-raw)) (define metas (make-meta-hash metas-raw))
;; Policy: here in the core lang, do as little to main as possible. ;; Policy: here in the core lang, do as little to main as possible.
;; The point is just to set it up for further processing. ;; The point is just to set it up for further processing.
;; One of the annoyances of Scribble is its insistence on decoding. ;; Unlike Scribble, which insists on decoding,
;; Better just to pass through the minimally processed data. ;; Pollen just passes through the minimally processed data.
;; one exception: if file extension marks it as pmap, send it to the pmap decoder instead. ;; one exception: if file extension marks it as pmap, send it to the pmap decoder instead.
(define main (apply (if ((->path (get metas "here")) . has-ext? . POLLEN_MAP_EXT) (define source-is-pmap?
;; this tests inner-here (which is always the file name)
;; rather than (get metas 'here) which might have been overridden.
;; Because if it's overridden to something other than *.pmap,
;; pmap processing will fail.
;; This defeats rule that pmap file suffix triggers pmap decoding.
((->path inner-here) . has-ext? . POLLEN_MAP_EXT))
(define main (apply (if source-is-pmap?
pmap-decode pmap-decode
;; most files will go this way. ;; most files will go this way.
;; Root is treated as a function. ;; Root is treated as a function.
@ -80,11 +96,14 @@
(module+ main (module+ main
(displayln ";-------------------------") (displayln ";-------------------------")
(displayln "; pollen decoded 'main") (displayln (string-append "; pollen decoded 'main" (if source-is-pmap? " (as pmap)" "")))
(displayln ";-------------------------") (displayln ";-------------------------")
main main
(displayln "") (displayln "")
(displayln (format "(tagged-xexpr? main) ~a" (tagged-xexpr? main)))
(if source-is-pmap?
(displayln (format "(pmap? main) ~a" (pmap? main)))
(displayln (format "(tagged-xexpr? main) ~a" (tagged-xexpr? main))))
(displayln "") (displayln "")
(displayln ";-------------------------") (displayln ";-------------------------")
(displayln "; pollen 'metas") (displayln "; pollen 'metas")

@ -10,10 +10,12 @@
(define pmap-file (build-path START_DIR DEFAULT_MAP)) (define pmap-file (build-path START_DIR DEFAULT_MAP))
(define pmap-main empty) (define pmap-main empty)
; make these independent of local includes ;; handle pmap-subtopics
(define (pmap-subtopic topic . subtopics) (define/contract (pmap-subtopic topic . subtopics)
(make-tagged-xexpr (->symbol topic) empty (filter-not whitespace? subtopics))) ((string?) #:rest (listof xexpr-element?) . ->* . tagged-xexpr?)
(make-tagged-xexpr (->symbol topic) empty subtopics))
;; todo: tests for pmap-subtopics
;; todo: this ain't a function ;; todo: this ain't a function
@ -25,16 +27,6 @@
(set! files (map remove-ext (filter (λ(x) (has-ext? x POLLEN_SOURCE_EXT)) files))) (set! files (map remove-ext (filter (λ(x) (has-ext? x POLLEN_SOURCE_EXT)) files)))
(set! pmap-main (make-tagged-xexpr 'map-main empty (map path->string files))))) (set! pmap-main (make-tagged-xexpr 'map-main empty (map path->string files)))))
;; todo: restrict this test
;; all names must be unique
(define/contract (pmap? x)
(any/c . -> . boolean?)
(and (tagged-xexpr? x)
;; all locations must be unique. Check this by converting x to a list of strings ...
(let ([locations (map ->string (flatten (remove-attrs x)))])
;; and then coercing to set (because set impliedly enforces uniqueness)
;; If set has same number of elements as original, all are unique.
(= (len (apply set locations)) (len locations)))))
;; recursively processes map, converting map locations & their parents into xexprs of this shape: ;; recursively processes map, converting map locations & their parents into xexprs of this shape:
;; '(location ((parent "parent"))) ;; '(location ((parent "parent")))
@ -54,9 +46,21 @@
(module+ test (module+ test
(define test-pmap-main `(pmap-main "foo" "bar" ,(pmap-subtopic "one" (pmap-subtopic "two" "three")))) (define test-pmap-main `(pmap-main "foo" "bar" ,(pmap-subtopic "one" (pmap-subtopic "two" "three"))))
(check-equal? (add-parents test-pmap-main) (check-equal? (main->pmap test-pmap-main)
'(pmap-main ((parent "")) (foo ((parent "pmap-main"))) (bar ((parent "pmap-main"))) (one ((parent "pmap-main")) (two ((parent "one")) (three ((parent "two")))))))) '(pmap-main ((parent "")) (foo ((parent "pmap-main"))) (bar ((parent "pmap-main"))) (one ((parent "pmap-main")) (two ((parent "one")) (three ((parent "two"))))))))
;; this sets default input for following functions
(define/contract (main->pmap tx)
(tagged-xexpr? . -> . pmap?)
(add-parents tx))
(define pmap (main->pmap pmap-main))
;; remove parents from map (i.e., just remove attrs) ;; remove parents from map (i.e., just remove attrs)
;; is not the inverse of add-parents, i.e., you do not get back your original input. ;; is not the inverse of add-parents, i.e., you do not get back your original input.
(define/contract (remove-parents mt) (define/contract (remove-parents mt)
@ -69,27 +73,15 @@
(one ((parent "")) (two ((parent "one")) (three ((parent "two"))))))) (one ((parent "")) (two ((parent "one")) (three ((parent "two")))))))
'(pmap-main (foo) (bar) (one (two (three)))))) '(pmap-main (foo) (bar) (one (two (three))))))
;; todo: what is this for?
(define/contract (main->pmap main)
(tagged-xexpr? . -> . pmap?)
(let-values ([(nx metas) (extract-tag-from-xexpr 'meta main)])
(add-parents nx)))
(module+ test (module+ test
(define mt-pmap `(pmap-main "foo" "bar" ,(pmap-subtopic "one" (pmap-subtopic "two" "three")) (meta "foo" "bar"))) (define sample-main `(pmap-root "foo" "bar" ,(pmap-subtopic "one" (pmap-subtopic "two" "three"))))
(check-equal? (main->pmap mt-pmap) (check-equal? (main->pmap sample-main)
'(pmap-main ((parent "")) (foo ((parent "pmap-main"))) (bar ((parent "pmap-main"))) (one ((parent "pmap-main")) (two ((parent "one")) (three ((parent "two")))))))) '(pmap-root ((parent "")) (foo ((parent "pmap-root"))) (bar ((parent "pmap-root"))) (one ((parent "pmap-root")) (two ((parent "one")) (three ((parent "two"))))))))
;; todo: what is this for? to have default input?
(define pmap (main->pmap pmap-main))
(define/contract (pmap-key? x)
(any/c . -> . boolean?)
;; OK for map-key to be #f
(or (symbol? x) (string? x) (eq? x #f)))
;; return the parent of a given name ;; return the parent of a given name
(define/contract (parent element [pmap pmap]) (define/contract (parent element [pmap pmap])
((pmap-key?) (pmap?) . ->* . (or/c string? boolean?)) ((pmap-key?) (pmap?) . ->* . (or/c string? boolean?))
@ -255,13 +247,33 @@
(check-equal? (next-page 'one test-pmap) "two") (check-equal? (next-page 'one test-pmap) "two")
(check-false (next-page 'three test-pmap))) (check-false (next-page 'three test-pmap)))
(module+ test #|(module+ test
;; need to parameterize current-directory ;; need to parameterize current-directory
;; because pollen main depends on it to find the include functions ;; because pollen main depends on it to find the include functions
(define pm (parameterize ([current-directory "./tests/"]) (define pm (parameterize ([current-directory "./tests/"])
(main->pmap (dynamic-require "test-pmap.p" 'main)))) (main->pmap (dynamic-require "test.pmap" 'main))))
(check-equal? (previous-page (parent 'printers-and-paper pm) pm) "ligatures")) (check-equal? (previous-page (parent 'printers-and-paper pm) pm) "ligatures"))
|#
(define/contract (pmap-decode . elements) (define/contract (pmap-decode . elements)
(() #:rest xexpr-elements? . ->* . any/c) (() #:rest (and/c
"hello, this is pmap-decode") ;; todo: how to put these contracts under a let?
;; all elements must be valid pmap keys
(flat-named-contract 'valid-pmap-keys
(λ(e) (andmap (λ(x) (pmap-key? #:loud #t x))
(filter-not whitespace? (flatten e)))))
;; they must also be unique
(flat-named-contract 'unique-pmap-keys
(λ(e) (elements-unique? #:loud #t
(map ->string ; to make keys comparable
(filter-not whitespace? (flatten e)))))))
. ->* . pmap?)
(main->pmap (decode (cons 'pmap-root elements)
; #:exclude-xexpr-tags 'em
; #:xexpr-tag-proc [xexpr-tag-proc (λ(x)x)]
; #:xexpr-attr-proc [xexpr-attr-proc (λ(x)x)]
#:xexpr-elements-proc (λ(xs) (filter-not whitespace? xs))
; #:block-xexpr-proc block-xexpr-proc
; #:inline-xexpr-proc [inline-xexpr-proc (λ(x)x)]
)))

@ -1,11 +1,12 @@
#lang racket/base #lang racket/base
(require racket/contract racket/match racket/list xml) (require racket/contract racket/match racket/list xml racket/set)
(require (prefix-in scribble: (only-in scribble/decode whitespace?))) (require (prefix-in scribble: (only-in scribble/decode whitespace?)))
(require (prefix-in html: "library/html.rkt")) (require (prefix-in html: "library/html.rkt"))
(require "world.rkt" "readability.rkt" "pollen-file-tools.rkt") (require "world.rkt" "readability.rkt" "pollen-file-tools.rkt")
(module+ test (require rackunit)) (module+ test (require rackunit))
(provide (all-defined-out) (provide (all-defined-out)
(all-from-out "pollen-file-tools.rkt")) (all-from-out "pollen-file-tools.rkt"))
@ -112,18 +113,80 @@
(check-false (meta-xexpr? '(meta "key" "value" "foo"))) (check-false (meta-xexpr? '(meta "key" "value" "foo")))
(check-false (meta-xexpr? '(meta)))) (check-false (meta-xexpr? '(meta))))
;; exploit uniqueness constraint of set data structure
(define/contract (elements-unique? x #:loud [loud #f])
((any/c) (#:loud boolean?) . ->* . boolean?)
(define result
(cond
[(list? x) (= (len (apply set x)) (len x))]
[(vector? x) (elements-unique? (->list x))]
[(string? x) (elements-unique? (string->list x))]
[else #t]))
(if (and (not result) loud)
;; todo: calculate nonunique keys
(error "Not unique keys:" x)
result))
(module+ test
(check-true (elements-unique? '(1 2 3)))
(check-false (elements-unique? '(1 2 2)))
(check-true (elements-unique? (->vector '(1 2 3))))
(check-false (elements-unique? (->vector '(1 2 2))))
(check-true (elements-unique? "fob"))
(check-false (elements-unique? "foo")))
;; todo: how to restrict this test?
;; pmap requirements are enforced at compile-time.
;; (such as pmap-keys must be unique).
;; (and every element must have a parent attr).
;; otherwise this becomes a rather expensive contract
;; because every function in pmap.rkt uses it
(define/contract (pmap? x)
(any/c . -> . boolean?)
(tagged-xexpr? x))
;; pmap location must represent a possible valid filename
(define/contract (pmap-key? x #:loud [loud #f])
((any/c) (#:loud boolean?) . ->* . boolean?)
;; todo: how to express the fact that the pmap-location must be
;; a valid base name for a file?
;; however, don't restrict it to existing files
;; (author may want to use pmap as wireframe)
(define result
(or (eq? x #f) ; OK for map-key to be #f
(and (or (symbol? x) (string? x))
(not (= (len x) 0)) ; not empty
; no whitespace
(andmap (compose not whitespace?) (map ->string (string->list (->string x)))))))
(if (and (not result) loud)
(error "Not a valid pmap key:" x)
result))
(module+ test
(check-true (pmap-key? #f))
(check-true (pmap-key? "foo-bar"))
(check-true (pmap-key? 'foo-bar))
(check-false (pmap-key? ""))
(check-false (pmap-key? " ")))
;; recursive whitespace test ;; recursive whitespace test
;; Scribble's version misses whitespace in a list ;; Scribble's version misses whitespace in a list
(define (whitespace? x) (define/contract (whitespace? x)
(any/c . -> . boolean?)
(cond (cond
[(list? x) (andmap whitespace? x)] [(or (vector? x) (list? x) (set? x)) (andmap whitespace? (->list x))]
[else (scribble:whitespace? x)])) [(or (symbol? x) (string? x)) (->boolean (regexp-match #px"^\\s+$" (->string x)))]
[else #f]))
(module+ test (module+ test
(check-true (whitespace? " ")) (check-true (whitespace? " "))
(check-false (whitespace? "foo")) (check-false (whitespace? "foo"))
(check-false (whitespace? " ")) ; a nonbreaking space (check-false (whitespace? 'foo))
(check-false (whitespace? #\Ø))
(check-false (whitespace? " ")) ; a nonbreaking space. todo: why is this so?
(check-true (whitespace? "\n \n")) (check-true (whitespace? "\n \n"))
(check-true (whitespace? (list "\n" " " "\n"))) (check-true (whitespace? (list "\n" " " "\n")))
(check-true (whitespace? (list "\n" " " "\n" (list "\n" "\n"))))) (check-true (whitespace? (list "\n" " " "\n" (list "\n" "\n")))))

@ -61,6 +61,15 @@
(check-equal? (->list (set 1 2 3)) '(1 2 3)) (check-equal? (->list (set 1 2 3)) '(1 2 3))
(check-equal? (->list "foo") (list "foo"))) (check-equal? (->list "foo") (list "foo")))
;; general way of coercing to vector
(define (->vector x)
(any/c . -> . vector?)
; todo: on bad input, it will pop a list error rather than vector error
(cond
[(vector? x) x]
[else (list->vector (->list x))]))
;; general way of coercing to boolean ;; general way of coercing to boolean
(define/contract (->boolean x) (define/contract (->boolean x)

@ -1,26 +0,0 @@
#lang planet mb/pollen
pmap-topic{index
typography-in-ten-minutes
summary-of-key-rules
foreword
introduction
how-to-use
how-to-pay-for-this-book
pmap-topic{why-typography-matters
what-is-typography
where-do-the-rules-come-from}
pmap-topic{type-composition
straight-and-curly-quotes
one-space-between-sentences
trademark-and-copyright-symbols
ligatures}
pmap-topic{appendix
printers-and-paper
how-to-make-a-pdf
typewriter-habits
common-accented-characters
identifying-fonts
bibliography
charter
mb-lectures-and-articles}}

@ -24,3 +24,4 @@ how-to-pay-for-this-book
bibliography bibliography
charter charter
mb-lectures-and-articles} mb-lectures-and-articles}

@ -5,7 +5,6 @@
(require (only-in racket/string string-join)) (require (only-in racket/string string-join))
(require (only-in xml xexpr? xexpr/c)) (require (only-in xml xexpr? xexpr/c))
(require "readability.rkt" "debug.rkt" "predicates.rkt") (require "readability.rkt" "debug.rkt" "predicates.rkt")
(provide (all-defined-out) (all-from-out "readability.rkt" "debug.rkt" "predicates.rkt")) (provide (all-defined-out) (all-from-out "readability.rkt" "debug.rkt" "predicates.rkt"))
@ -174,9 +173,10 @@
(check-equal? (map-tree (λ(i) (if (symbol? i) 'foo i)) '(p 1 2 3 (em 4 5))) '(foo 1 2 3 (foo 4 5)))) (check-equal? (map-tree (λ(i) (if (symbol? i) 'foo i)) '(p 1 2 3 (em 4 5))) '(foo 1 2 3 (foo 4 5))))
;; function to strip metas (or any tag)
(define/contract (extract-tag-from-xexpr tag nx) ;; function to split tag out of tagged-xexpr
(xexpr-tag? tagged-xexpr? . -> . (values tagged-xexpr? xexpr-elements?)) (define/contract (split-tag-from-xexpr tag tx)
(xexpr-tag? tagged-xexpr? . -> . (values xexpr-elements? tagged-xexpr? ))
(define matches '()) (define matches '())
(define (extract-tag x) (define (extract-tag x)
(cond (cond
@ -189,25 +189,28 @@
(make-tagged-xexpr tag attr (extract-tag body)))] (make-tagged-xexpr tag attr (extract-tag body)))]
[(xexpr-elements? x) (filter-not empty? (map extract-tag x))] [(xexpr-elements? x) (filter-not empty? (map extract-tag x))]
[else x])) [else x]))
(values (extract-tag nx) (reverse matches))) (define tx-extracted (extract-tag tx)) ;; do this first to fill matches
(values (reverse matches) tx-extracted))
(module+ test (module+ test
(define x '(root (meta "foo" "bar") "hello" "world" (meta "foo2" "bar2") (define xx '(root (meta "foo" "bar") "hello" "world" (meta "foo2" "bar2")
(em "goodnight" "moon" (meta "foo3" "bar3")))) (em "goodnight" "moon" (meta "foo3" "bar3"))))
(check-equal? (values->list (extract-tag-from-xexpr 'meta x)) (check-equal? (values->list (split-tag-from-xexpr 'meta xx))
(list '(root "hello" "world" (em "goodnight" "moon")) (list '((meta "foo" "bar") (meta "foo2" "bar2") (meta "foo3" "bar3"))
'((meta "foo" "bar") (meta "foo2" "bar2") (meta "foo3" "bar3"))))) '(root "hello" "world" (em "goodnight" "moon")))))
;; convert list of meta tags to a hash for export from pollen document. ;; convert list of meta tags to a hash for export from pollen document.
;; every meta is form (meta "key" "value") (enforced by contract) ;; every meta is form (meta "key" "value") (enforced by contract)
;; later metas with the same name will override earlier ones.
(define/contract (make-meta-hash mxs) (define/contract (make-meta-hash mxs)
((listof meta-xexpr?) . -> . hash?) ((listof meta-xexpr?) . -> . hash?)
(apply hash (append-map tagged-xexpr-elements mxs))) (apply hash (append-map tagged-xexpr-elements mxs)))
(module+ test (module+ test
(check-equal? (make-meta-hash '((meta "foo" "bar")(meta "hee" "haw"))) (check-equal? (make-meta-hash '((meta "foo" "bar")(meta "hee" "haw")))
(hash "foo" "bar" "hee" "haw"))) (hash "foo" "bar" "hee" "haw"))
(check-equal? (make-meta-hash '((meta "foo" "bar")(meta "foo" "haw")))
(hash "foo" "haw")))

Loading…
Cancel
Save