renamings: convert named-xexpr to tagged-xexpr, name to tag, content to elements

pull/9/head
Matthew Butterick 11 years ago
parent b5df2c48c9
commit 7cce7ae59f

@ -6,7 +6,8 @@
(require (prefix-in scribble: (only-in scribble/decode whitespace?)))
(module+ test (require rackunit))
(require "tools.rkt" "library/html.rkt")
(require "tools.rkt")
(require (prefix-in html: "library/html.rkt"))
(provide (all-defined-out))
@ -55,14 +56,14 @@
'(p "\n" "foo" "\n\n" "bar" (em "\n\n\n"))))
(define block-names block-tags)
(define (register-block-name tag)
(set! block-names (cons tag block-names)))
(define block-tags html:block-tags)
(define (register-block-tag tag)
(set! block-tags (cons tag block-tags)))
;; todo: add native support for list-xexpr
;; decode triple newlines to list items
;; is the named-xexpr a block element (as opposed to inline)
;; is the tagged-xexpr a block element (as opposed to inline)
(define/contract (block-xexpr? x)
(any/c . -> . boolean?)
;; this is a change in behavior since first pollen
@ -70,7 +71,7 @@
;; todo: make sure this is what I want.
;; this is, however, more consistent with browser behavior
;; (browsers assume that tags are inline by default)
((named-xexpr? x) . and . (->boolean ((named-xexpr-name x) . in . block-names))))
((tagged-xexpr? x) . and . (->boolean ((tagged-xexpr-tag x) . in . block-tags))))
(module+ test
(check-true (block-xexpr? '(p "foo")))
@ -129,18 +130,18 @@
;; function to strip metas (or any tag)
(define/contract (extract-tag-from-xexpr tag nx)
(xexpr-name? named-xexpr? . -> . (values named-xexpr? xexpr-content?))
(xexpr-tag? tagged-xexpr? . -> . (values tagged-xexpr? xexpr-elements?))
(define matches '())
(define (extract-tag x)
(cond
[(and (named-xexpr? x) (equal? tag (car x)))
[(and (tagged-xexpr? x) (equal? tag (car x)))
; stash matched tag but return empty value
(begin
(set! matches (cons x matches))
empty)]
[(named-xexpr? x) (let-values([(name attr body) (break-named-xexpr x)])
(make-named-xexpr name attr (extract-tag body)))]
[(xexpr-content? x) (filter-not empty? (map extract-tag x))]
[(tagged-xexpr? x) (let-values([(tag attr body) (break-tagged-xexpr x)])
(make-tagged-xexpr tag attr (extract-tag body)))]
[(xexpr-elements? x) (filter-not empty? (map extract-tag x))]
[else x]))
(values (extract-tag nx) (reverse matches)))
@ -154,42 +155,42 @@
'((meta "foo" "bar") (meta "foo2" "bar2") (meta "foo3" "bar3")))))
;; decoder wireframe
(define/contract (decode nx
#:exclude-xexpr-names [excluded-xexpr-names '()]
#:xexpr-name-proc [xexpr-name-proc (λ(x)x)]
#:exclude-xexpr-tags [excluded-xexpr-tags '()]
#:xexpr-tag-proc [xexpr-tag-proc (λ(x)x)]
#:xexpr-attr-proc [xexpr-attr-proc (λ(x)x)]
#:xexpr-content-proc [xexpr-content-proc (λ(x)x)]
#:xexpr-elements-proc [xexpr-elements-proc (λ(x)x)]
#:block-xexpr-proc [block-xexpr-proc (λ(x)x)]
#:inline-xexpr-proc [inline-xexpr-proc (λ(x)x)]
#:string-proc [string-proc (λ(x)x)]
#:meta-proc [meta-proc (λ(x)x)])
;; use xexpr/c for contract because it gives better error messages
((xexpr/c) (#:exclude-xexpr-names (λ(i) (or (symbol? i) (list? i)))
#:xexpr-name-proc procedure?
((xexpr/c) (#:exclude-xexpr-tags (λ(i) (or (symbol? i) (list? i)))
#:xexpr-tag-proc procedure?
#:xexpr-attr-proc procedure?
#:xexpr-content-proc procedure?
#:xexpr-elements-proc procedure?
#:block-xexpr-proc procedure?
#:inline-xexpr-proc procedure?
#:string-proc procedure?
#:meta-proc procedure?)
. ->* . named-xexpr?)
(when (not (named-xexpr? nx))
(error (format "decode: ~v not a full named-xexpr" nx)))
. ->* . tagged-xexpr?)
(when (not (tagged-xexpr? nx))
(error (format "decode: ~v not a full tagged-xexpr" nx)))
(define metas (list))
(define (&decode x)
(cond
[(named-xexpr? x) (let-values([(name attr content) (break-named-xexpr x)])
(if (name . in . (->list excluded-xexpr-names))
[(tagged-xexpr? x) (let-values([(tag attr elements) (break-tagged-xexpr x)])
(if (tag . in . (->list excluded-xexpr-tags))
x
(let ([decoded-xexpr
(apply make-named-xexpr (map &decode (list name attr content)))])
(apply make-tagged-xexpr (map &decode (list tag attr elements)))])
((if (block-xexpr? decoded-xexpr)
block-xexpr-proc
inline-xexpr-proc) decoded-xexpr))))]
[(xexpr-name? x) (xexpr-name-proc x)]
[(xexpr-tag? x) (xexpr-tag-proc x)]
[(xexpr-attr? x) (xexpr-attr-proc x)]
[(xexpr-content? x) (map &decode (xexpr-content-proc x))]
[(xexpr-elements? x) (map &decode (xexpr-elements-proc x))]
[(string? x) (string-proc x)]
[else x]))

@ -51,12 +51,12 @@
;; One of the annoyances of Scribble is its insistence on decoding.
;; Better just to pass through the minimally processed data.
;; Root is treated as a function.
;; If it's not defined elsewhere, it just hits #%top and becomes a named-xexpr.
;; If it's not defined elsewhere, it just hits #%top and becomes a tagged-xexpr.
(define main (apply root
(append
(cond
[(string? doc) (list doc)] ; doc is probably a list, but might be a single string
[(named-xexpr? doc) (list doc)] ; if it's a single nx, just leave it
[(tagged-xexpr? doc) (list doc)] ; if it's a single nx, just leave it
[(list? doc) doc]) ; if it's nx content, splice it in
(list `(meta "here" ,inner-here))))) ; append inner-here as meta
@ -67,4 +67,4 @@
(module+ main
(print main)
(displayln "")
(displayln (format "named-xexpr? ~a" (named-xexpr? main))))))
(displayln (format "tagged-xexpr? ~a" (tagged-xexpr? main))))))

@ -8,13 +8,13 @@
;; register custom block tags
(register-block-name 'bloq)
(register-block-name 'fooble)
(register-block-tag 'bloq)
(register-block-tag 'fooble)
;; handle meta tags
(define/contract (meta-proc meta)
(meta-xexpr? . -> . named-xexpr?)
(meta-xexpr? . -> . tagged-xexpr?)
`(meta ((name ,(second meta))(content ,(third meta)))))
(module+ test
@ -35,7 +35,7 @@
(define/contract (convert-linebreaks xc #:newline [newline "\n"])
((xexpr-content?) (#:newline string?) . ->* . xexpr-content?)
((xexpr-elements?) (#:newline string?) . ->* . xexpr-elements?)
;; todo: should this test be not block + not whitespace?
(define not-block? (λ(i) (not (block-xexpr? i))))
(filter-not empty?
@ -61,9 +61,9 @@
(check-equal? (convert-linebreaks '("foo" "moo" "bar") #:newline "moo") '("foo" (br) "bar"))
(check-equal? (convert-linebreaks '("foo" "\n\n" "bar")) '("foo" "\n\n" "bar")))
;; prepare content for paragraph testing
;; prepare elements for paragraph testing
(define/contract (prep-paragraph-flow xc)
(xexpr-content? . -> . xexpr-content?)
(xexpr-elements? . -> . xexpr-elements?)
(convert-linebreaks (merge-newlines (trim xc whitespace?))))
(module+ test
@ -72,14 +72,14 @@
;; apply paragraph tag
(define/contract (wrap-paragraph xc #:tag [tag 'p])
((xexpr-content?) (#:tag symbol?) . ->* . block-xexpr?)
((xexpr-elements?) (#:tag symbol?) . ->* . block-xexpr?)
(match xc
[(list (? block-xexpr? bx)) bx] ; leave a single block xexpr alone
[else (make-named-xexpr tag empty xc)])) ; otherwise wrap in p tag
[else (make-tagged-xexpr tag empty xc)])) ; otherwise wrap in p tag
(module+ test
(check-equal? (wrap-paragraph '("foo" "bar")) '(p "foo" "bar"))
(check-equal? (begin (register-block-name 'para) (wrap-paragraph #:tag 'para '("foo" "bar")))
(check-equal? (begin (register-block-tag 'para) (wrap-paragraph #:tag 'para '("foo" "bar")))
'(para "foo" "bar"))
(check-equal? (wrap-paragraph '((p "bar" "foo"))) '(p "bar" "foo"))
(check-equal? (wrap-paragraph '((div "bar" "foo") "Hi" )) '(p (div "bar" "foo") "Hi")))
@ -87,42 +87,44 @@
;; detect paragraphs
;; todo: unit tests
(define/contract (xexpr-content-proc content)
(xexpr-content? . -> . xexpr-content?)
(let ([content (prep-paragraph-flow content)])
(if (ormap paragraph-break? content) ; need this condition to prevent infinite recursion
(map wrap-paragraph (splitf-at* content paragraph-break?)) ; split into ¶¶
content)))
(define/contract (xexpr-elements-proc elements)
(xexpr-elements? . -> . xexpr-elements?)
(let ([elements (prep-paragraph-flow elements)])
(if (ormap paragraph-break? elements) ; need this condition to prevent infinite recursion
(map wrap-paragraph (splitf-at* elements paragraph-break?)) ; split into ¶¶
elements)))
;; insert nbsp between last two words
(define/contract (nonbreaking-last-space x #:nbsp-char [nbsp #\ ])
((named-xexpr?) (#:nbsp-char char?) . ->* . named-xexpr?)
(define minimum-word-length (add1 5)) ; add1 to account for final punctuation
; todo: parameterize this, as it will be different for each project
(define/contract (nonbreaking-last-space x
#:nbsp [nbsp (->string #\u00A0)]
#:minimum-word-length [minimum-word-length 6])
((tagged-xexpr?) (#:nbsp string? #:minimum-word-length integer?) . ->* . tagged-xexpr?)
;; todo: parameterize this, as it will be different for each project
(define tags-to-pay-attention-to '(p aside)) ; only apply to paragraphs
(define (replace-last-space str)
(if (#\space . in . str)
(let ([reversed-str-list (reverse (string->list str))])
(let ([reversed-str-list (reverse (string->list str))]
[reversed-nbsp (reverse (string->list nbsp))])
(define-values (last-word-chars other-chars)
(splitf-at reversed-str-list (λ(i) (not (eq? i #\space)))))
(list->string (reverse (append last-word-chars
; OK for long words to be on their own line.
(if (< (len last-word-chars) minimum-word-length)
; first char of other-chars will be the space, so use cdr
(cons nbsp (cdr other-chars))
(append reversed-nbsp (cdr other-chars))
other-chars)))))
str))
(define (find-last-word-space x) ; recursively traverse xexpr
(cond
[(string? x) (replace-last-space x)]
[(named-xexpr? x)
(let-values([(name attr content) (break-named-xexpr x)])
(if (> (length content) 0) ; content is list of xexprs
(let-values ([(all-but-last last) (split-at content (sub1 (length content)))])
(make-named-xexpr name attr `(,@all-but-last ,(find-last-word-space (car last)))))
[(tagged-xexpr? x)
(let-values([(tag attr elements) (break-tagged-xexpr x)])
(if (> (length elements) 0) ; elements is list of xexprs
(let-values ([(all-but-last last) (split-at elements (sub1 (length elements)))])
(make-tagged-xexpr tag attr `(,@all-but-last ,(find-last-word-space (car last)))))
x))]
[else x]))
@ -132,23 +134,26 @@
;; todo: make some tougher tests, it gets flaky with edge cases
(module+ test
(check-equal? (nonbreaking-last-space '(p "Hi there")) '(p "Hi there")) ; nbsp in between last two words
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp-char #\Ø) '(p "HiØthere")) ; but let's make it visible
(check-equal? (nonbreaking-last-space '(p "Hi here" (em "ho there")) #:nbsp-char #\Ø) '(p "Hi here" (em "hoØthere"))))
(check-equal? (nonbreaking-last-space '(p "Hi there")) '(p "Hi there")) ; nbsp in between last two words
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "Ø") '(p "HiØthere")) ; but let's make it visible
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_") '(p "Hi_up_there"))
(check-equal? (nonbreaking-last-space '(p "Hi there") #:nbsp "_up_" #:minimum-word-length 3)
'(p "Hi there"))
(check-equal? (nonbreaking-last-space '(p "Hi here" (em "ho there")) #:nbsp "Ø") '(p "Hi here" (em "hoØthere"))))
; wrap initial quotes for hanging punctuation
; todo: improve this
; does not handle <p>“<em>thing</em> properly
(define/contract (wrap-hanging-quotes nx)
(named-xexpr? . -> . named-xexpr?)
(tagged-xexpr? . -> . tagged-xexpr?)
(define two-char-string? (λ(i) (and (string? i) (>= (len i) 2))))
(define-values (name attr content) (break-named-xexpr nx))
(define new-car-content
(match (car content)
[(? two-char-string? str)
(define str-first (get str 0))
(define str-rest (get str 1 'end))
(define-values (tag attr elements) (break-tagged-xexpr nx))
(define new-car-elements
(match (car elements)
[(? two-char-string? tcs)
(define str-first (get tcs 0))
(define str-rest (get tcs 1 'end))
(cond
[(str-first . in . '("\"" ""))
;; can wrap with any inline tag
@ -156,10 +161,10 @@
`(hang-double-quote ,(->string #\“) ,str-rest)]
[(str-first . in . '("\'" ""))
`(hang-single-quote ,(->string #\) ,str-rest)]
[else str])]
[(? named-xexpr? nx) (wrap-hanging-quotes nx)]
[else (car content)]))
(make-named-xexpr name attr (cons new-car-content (cdr content))))
[else tcs])]
[(? tagged-xexpr? nx) (wrap-hanging-quotes nx)]
[else (car elements)]))
(make-tagged-xexpr tag attr (cons new-car-elements (cdr elements))))
(module+ test
@ -168,7 +173,7 @@
(define (block-xexpr-proc bx)
(named-xexpr? . -> . named-xexpr?)
(tagged-xexpr? . -> . tagged-xexpr?)
(wrap-hanging-quotes (nonbreaking-last-space bx)))
@ -216,12 +221,12 @@
(define (root . items)
(named-xexpr? . -> . named-xexpr?)
(tagged-xexpr? . -> . tagged-xexpr?)
(decode (cons 'root items)
; #:exclude-xexpr-names 'em
; #:xexpr-name-proc [xexpr-name-proc (λ(x)x)]
; #:exclude-xexpr-tags 'em
; #:xexpr-tag-proc [xexpr-tag-proc (λ(x)x)]
; #:xexpr-attr-proc [xexpr-attr-proc (λ(x)x)]
#:xexpr-content-proc xexpr-content-proc
#:xexpr-elements-proc xexpr-elements-proc
#:block-xexpr-proc block-xexpr-proc
; #:inline-xexpr-proc [inline-xexpr-proc (λ(x)x)]
#:string-proc string-proc

@ -58,8 +58,8 @@
(check-not-equal? (remove-all-ext foo.bar.txt-path) foo.bar-path) ; removes more than one ext
(check-equal? (remove-all-ext foo.bar.txt-path) foo-path))
;; is it an xexpr name?
(define/contract (xexpr-name? x)
;; is it an xexpr tag?
(define/contract (xexpr-tag? x)
(any/c . -> . boolean?)
(symbol? x))
@ -81,41 +81,41 @@
;; is it xexpr content?
(define/contract (xexpr-content? x)
(define/contract (xexpr-elements? x)
(any/c . -> . boolean?)
(match x
;; this is more strict than xexpr definition in xml module
;; don't allow symbols or numbers to be part of content
[(list elem ...) (andmap (λ(i) (or (string? i) (named-xexpr? i))) elem)]
[(list elem ...) (andmap (λ(i) (or (string? i) (tagged-xexpr? i))) elem)]
[else #f]))
(module+ test
(check-true (xexpr-content? '("p" "foo" "123")))
(check-false (xexpr-content? "foo")) ; not a list
(check-false (xexpr-content? '("p" "foo" 123))) ; includes number
(check-false (xexpr-content? '(p "foo" "123"))) ; includes symbol
(check-false (xexpr-content? '(((key "value")) "foo" "bar"))) ; includes attr
(check-false (xexpr-content? '("foo" "bar" ((key "value")))))) ; malformed
(check-true (xexpr-elements? '("p" "foo" "123")))
(check-false (xexpr-elements? "foo")) ; not a list
(check-false (xexpr-elements? '("p" "foo" 123))) ; includes number
(check-false (xexpr-elements? '(p "foo" "123"))) ; includes symbol
(check-false (xexpr-elements? '(((key "value")) "foo" "bar"))) ; includes attr
(check-false (xexpr-elements? '("foo" "bar" ((key "value")))))) ; malformed
;; is it a named x-expression?
;; todo: rewrite this recurively so errors can be pinpointed (for debugging)
(define/contract (named-xexpr? x)
(define/contract (tagged-xexpr? x)
(any/c . -> . boolean?)
(and (xexpr? x) ; meets basic xexpr contract
(match x
[(list (? symbol? name) rest ...) ; is a list starting with a symbol
(or (xexpr-content? rest) ; the rest is content or ...
(and (xexpr-attr? (car rest)) (xexpr-content? (cdr rest))))] ; attr + content
(or (xexpr-elements? rest) ; the rest is content or ...
(and (xexpr-attr? (car rest)) (xexpr-elements? (cdr rest))))] ; attr + content
[else #f])))
(module+ test
(check-true (named-xexpr? '(p "foo" "bar")))
(check-true (named-xexpr? '(p ((key "value")) "foo" "bar")))
(check-false (named-xexpr? "foo")) ; not a list with symbol
(check-false (named-xexpr? '(p "foo" "bar" ((key "value"))))) ; malformed
(check-false (named-xexpr? '("p" "foo" "bar"))) ; no name
(check-false (named-xexpr? '(p 123)))) ; content is a number
(check-true (tagged-xexpr? '(p "foo" "bar")))
(check-true (tagged-xexpr? '(p ((key "value")) "foo" "bar")))
(check-false (tagged-xexpr? "foo")) ; not a list with symbol
(check-false (tagged-xexpr? '(p "foo" "bar" ((key "value"))))) ; malformed
(check-false (tagged-xexpr? '("p" "foo" "bar"))) ; no name
(check-false (tagged-xexpr? '(p 123)))) ; content is a number
@ -125,61 +125,61 @@
(call-with-values (λ() vs) list))
;; create named-xexpr from parts (opposite of break-named-xexpr)
(define/contract (make-named-xexpr name [attr empty] [content empty])
((symbol?) (xexpr-attr? xexpr-content?) . ->* . named-xexpr?)
;; create tagged-xexpr from parts (opposite of break-tagged-xexpr)
(define/contract (make-tagged-xexpr name [attr empty] [content empty])
((symbol?) (xexpr-attr? xexpr-elements?) . ->* . tagged-xexpr?)
(filter-not empty? `(,name ,attr ,@content)))
(module+ test
(check-equal? (make-named-xexpr 'p) '(p))
(check-equal? (make-named-xexpr 'p '((key "value"))) '(p ((key "value"))))
(check-equal? (make-named-xexpr 'p empty '("foo" "bar")) '(p "foo" "bar"))
(check-equal? (make-named-xexpr 'p '((key "value")) (list "foo" "bar"))
(check-equal? (make-tagged-xexpr 'p) '(p))
(check-equal? (make-tagged-xexpr 'p '((key "value"))) '(p ((key "value"))))
(check-equal? (make-tagged-xexpr 'p empty '("foo" "bar")) '(p "foo" "bar"))
(check-equal? (make-tagged-xexpr 'p '((key "value")) (list "foo" "bar"))
'(p ((key "value")) "foo" "bar")))
;; decompose named-xexpr into parts (opposite of make-named-xexpr)
(define/contract (break-named-xexpr nx)
(named-xexpr? . -> . (values symbol? xexpr-attr? xexpr-content?))
;; decompose tagged-xexpr into parts (opposite of make-tagged-xexpr)
(define/contract (break-tagged-xexpr nx)
(tagged-xexpr? . -> . (values symbol? xexpr-attr? xexpr-elements?))
(match
; named-xexpr may or may not have attr
; tagged-xexpr may or may not have attr
; if not, add empty attr so that decomposition only handles one case
(match nx
[(list _ (? xexpr-attr?) _ ...) nx]
[else `(,(car nx) ,empty ,@(cdr nx))])
[(list name attr content ...) (values name attr content)]))
[(list tag attr content ...) (values tag attr content)]))
(module+ test
(check-equal? (values->list (break-named-xexpr '(p)))
(check-equal? (values->list (break-tagged-xexpr '(p)))
(values->list (values 'p empty empty)))
(check-equal? (values->list (break-named-xexpr '(p "foo")))
(check-equal? (values->list (break-tagged-xexpr '(p "foo")))
(values->list (values 'p empty '("foo"))))
(check-equal? (values->list (break-named-xexpr '(p ((key "value")))))
(check-equal? (values->list (break-tagged-xexpr '(p ((key "value")))))
(values->list (values 'p '((key "value")) empty)))
(check-equal? (values->list (break-named-xexpr '(p ((key "value")) "foo")))
(check-equal? (values->list (break-tagged-xexpr '(p ((key "value")) "foo")))
(values->list (values 'p '((key "value")) '("foo")))))
;; convenience functions to retrieve only one part of named-xexpr
(define (named-xexpr-name nx)
(named-xexpr? . -> . symbol?)
(define-values (name attr content) (break-named-xexpr nx))
name)
;; convenience functions to retrieve only one part of tagged-xexpr
(define (tagged-xexpr-tag nx)
(tagged-xexpr? . -> . xexpr-tag?)
(define-values (tag attr content) (break-tagged-xexpr nx))
tag)
(define (named-xexpr-attr nx)
(named-xexpr? . -> . xexpr-attr?)
(define-values (name attr content) (break-named-xexpr nx))
(define (tagged-xexpr-attr nx)
(tagged-xexpr? . -> . xexpr-attr?)
(define-values (tag attr content) (break-tagged-xexpr nx))
attr)
(define (named-xexpr-content nx)
(named-xexpr? . -> . xexpr-content?)
(define-values (name attr content) (break-named-xexpr nx))
content)
(define (tagged-xexpr-elements nx)
(tagged-xexpr? . -> . xexpr-elements?)
(define-values (tag attrt elements) (break-tagged-xexpr nx))
elements)
(module+ test
(check-equal? (named-xexpr-name '(p ((key "value"))"foo" "bar" (em "square"))) 'p)
(check-equal? (named-xexpr-attr '(p ((key "value"))"foo" "bar" (em "square"))) '((key "value")))
(check-equal? (named-xexpr-content '(p ((key "value"))"foo" "bar" (em "square")))
(check-equal? (tagged-xexpr-tag '(p ((key "value"))"foo" "bar" (em "square"))) 'p)
(check-equal? (tagged-xexpr-attr '(p ((key "value"))"foo" "bar" (em "square"))) '((key "value")))
(check-equal? (tagged-xexpr-elements '(p ((key "value"))"foo" "bar" (em "square")))
'("foo" "bar" (em "square"))))
@ -214,7 +214,7 @@
(check-equal? (filter-not-tree string? '(p)) '(p))
(check-equal? (filter-not-tree string? '(p "foo" "bar")) '(p))
(check-equal? (filter-not-tree string? '(p "foo" (p "bar"))) '(p (p)))
;(check-equal? (filter-tree (λ(i) (and (named-xexpr? i) (equal? 'em (car i)))) '(p "foo" (em "bar"))) '(p "foo"))
;(check-equal? (filter-tree (λ(i) (and (tagged-xexpr? i) (equal? 'em (car i)))) '(p "foo" (em "bar"))) '(p "foo"))
)

Loading…
Cancel
Save