allow `decode` procedures to return lists of values (which get spliced)

pull/84/head
Matthew Butterick 9 years ago
parent 8d1d0712d3
commit 998a617a38

@ -1,5 +1,5 @@
#lang racket/base #lang racket/base
(require xml txexpr sugar racket/match racket/list (prefix-in html: pollen/html) sugar/test) (require xml txexpr racket/string racket/match racket/list (prefix-in html: pollen/html) sugar/list sugar/container sugar/len sugar/define sugar/coerce sugar/test)
(require "debug.rkt" "world.rkt") (require "debug.rkt" "world.rkt")
@ -20,17 +20,32 @@
[(or (list? x) (hash? x) (vector? x)) (format "~v" x)] ; ok to convert datatypes [(or (list? x) (hash? x) (vector? x)) (format "~v" x)] ; ok to convert datatypes
[else (error)])))) ; but things like procedures should throw an error [else (error)])))) ; but things like procedures should throw an error
(define decode-proc-output-contract (or/c xexpr? (non-empty-listof xexpr?)))
(define multiple-entity-signal (gensym "multiple-entity"))
(define (->list/tx x)
;; same as ->list but catches special case of single txexpr,
;; which is itself a list, but in this case should be wrapped into a list,
;; for use with append-map.
(cond
;; use multiple-entity-signal to distinguish list of entities from txexprs
;; ambiguous example '(copy copy)
;; but with signal, it's '(multiple-entity-signal copy copy)
[(and (pair? x) (eq? (car x) multiple-entity-signal)) (cdr x)]
[(txexpr? x) (list x)]
[else (->list x)]))
;; decoder wireframe ;; decoder wireframe
(define+provide/contract (decode txexpr (define+provide/contract (decode tx-in
#:txexpr-tag-proc [txexpr-tag-proc (λ(x)x)] #:txexpr-tag-proc [txexpr-tag-proc (λ(x)x)]
#:txexpr-attrs-proc [txexpr-attrs-proc (λ(x)x)] #:txexpr-attrs-proc [txexpr-attrs-proc (λ(x)x)]
#:txexpr-elements-proc [txexpr-elements-proc (λ(x)x)] #:txexpr-elements-proc [txexpr-elements-proc (λ(x)x)]
#:block-txexpr-proc [block-txexpr-proc (λ(x)x)] #:block-txexpr-proc [block-txexpr-proc (λ(x)x)]
#:inline-txexpr-proc [inline-txexpr-proc (λ(x)x)] #:inline-txexpr-proc [inline-txexpr-proc (λ(x)x)]
#:string-proc [string-proc (λ(x)x)] #:string-proc [string-proc (λ(x)x)]
#:symbol-proc [symbol-proc (λ(x)x)] #:entity-proc [entity-proc (λ(x)x)]
#:valid-char-proc [valid-char-proc (λ(x)x)]
#:cdata-proc [cdata-proc (λ(x)x)] #:cdata-proc [cdata-proc (λ(x)x)]
#:exclude-tags [excluded-tags '()] #:exclude-tags [excluded-tags '()]
#:exclude-attrs [excluded-attrs '()]) #:exclude-attrs [excluded-attrs '()])
@ -38,17 +53,14 @@
(#:txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?) (#:txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?)
#:txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?) #:txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?)
#:txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?) #:txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?)
#:block-txexpr-proc (block-txexpr? . -> . xexpr?) #:block-txexpr-proc (block-txexpr? . -> . decode-proc-output-contract)
#:inline-txexpr-proc (txexpr? . -> . xexpr?) #:inline-txexpr-proc (txexpr? . -> . decode-proc-output-contract)
#:string-proc (string? . -> . xexpr?) #:string-proc (string? . -> . decode-proc-output-contract)
#:symbol-proc (symbol? . -> . xexpr?) #:entity-proc ((or/c symbol? valid-char?) . -> . decode-proc-output-contract)
#:valid-char-proc (valid-char? . -> . xexpr?) #:cdata-proc (cdata? . -> . decode-proc-output-contract)
#:cdata-proc (cdata? . -> . xexpr?)
#:exclude-tags (listof txexpr-tag?) #:exclude-tags (listof txexpr-tag?)
#:exclude-attrs txexpr-attrs?) . ->* . txexpr?) #:exclude-attrs txexpr-attrs?) . ->* . txexpr?)
(let loop ([x tx-in])
(let loop ([x txexpr])
(cond (cond
[(txexpr? x) (let-values([(tag attrs elements) (txexpr->values x)]) [(txexpr? x) (let-values([(tag attrs elements) (txexpr->values x)])
(if (or (member tag excluded-tags) (ormap (λ(attr) (member attr excluded-attrs)) attrs)) (if (or (member tag excluded-tags) (ormap (λ(attr) (member attr excluded-attrs)) attrs))
@ -59,17 +71,43 @@
(let ([decoded-txexpr (let ([decoded-txexpr
(apply make-txexpr (list (txexpr-tag-proc tag) (apply make-txexpr (list (txexpr-tag-proc tag)
(txexpr-attrs-proc attrs) (txexpr-attrs-proc attrs)
(map loop (txexpr-elements-proc elements))))]) (txexpr-elements-proc (append-map (compose1 ->list/tx loop) elements))))])
((if (block-txexpr? decoded-txexpr) ((if (block-txexpr? decoded-txexpr)
block-txexpr-proc block-txexpr-proc
inline-txexpr-proc) decoded-txexpr))))] inline-txexpr-proc) decoded-txexpr))))]
[(string? x) (string-proc x)] [(string? x) (string-proc x)]
[(symbol? x) (symbol-proc x)] [(or (symbol? x) (valid-char? x))
[(valid-char? x) (valid-char-proc x)] (define result (entity-proc x))
(if (list? result)
;; add a signal to list of multiple entities to avoid downstream ambiguity with txexpr
;; for instance '(copy copy) is a list of entities, but also a txexpr
;; stick a signal on the front, which will be picked up later
(cons multiple-entity-signal result)
result)]
[(cdata? x) (cdata-proc x)] [(cdata? x) (cdata-proc x)]
[else (error "decode: can't decode" x)]))) [else (error "decode: can't decode" x)])))
(module-test-external
(require racket/list txexpr racket/function)
(define (doubler x) (list x x))
(check-equal? (decode #:txexpr-elements-proc identity '(p "foo")) '(p "foo"))
;; can't use doubler on txexpr-elements because it eneds a list, not list of lists
(check-equal? (decode #:txexpr-elements-proc (λ(elems) (append elems elems)) '(p "foo")) '(p "foo" "foo"))
(check-equal? (decode #:block-txexpr-proc identity '(p "foo")) '(p "foo"))
(check-equal? (decode #:block-txexpr-proc doubler '(p "foo")) (list '(p "foo") '(p "foo")))
(check-equal? (decode #:inline-txexpr-proc identity '(p (span "foo"))) '(p (span "foo")))
(check-equal? (decode #:inline-txexpr-proc doubler '(p (span "foo"))) '(p (span "foo") (span "foo")))
(check-equal? (decode #:string-proc identity '(p (span "foo"))) '(p (span "foo")))
(check-equal? (decode #:string-proc doubler '(p (span "foo"))) '(p (span "foo" "foo")))
(check-equal? (decode #:entity-proc identity '(p (span "foo" 'amp))) '(p (span "foo" 'amp)))
(check-equal? (decode #:entity-proc identity '(p 42)) '(p 42))
(check-equal? (decode #:entity-proc doubler '(p 42)) '(p 42 42))
(check-equal? (decode #:entity-proc identity '(p amp)) '(p amp))
(check-equal? (decode #:entity-proc doubler '(p amp)) '(p amp amp))
(check-equal? (decode-elements #:string-proc identity '("foo")) '("foo"))
(check-equal? (decode-elements #:string-proc doubler '("foo")) '("foo" "foo")))
;; it would be nice to not repeat this, but with all the keywords, it's simpler to repeat than do a macro
(define+provide/contract (decode-elements elements (define+provide/contract (decode-elements elements
#:txexpr-tag-proc [txexpr-tag-proc (λ(x)x)] #:txexpr-tag-proc [txexpr-tag-proc (λ(x)x)]
#:txexpr-attrs-proc [txexpr-attrs-proc (λ(x)x)] #:txexpr-attrs-proc [txexpr-attrs-proc (λ(x)x)]
@ -77,8 +115,7 @@
#:block-txexpr-proc [block-txexpr-proc (λ(x)x)] #:block-txexpr-proc [block-txexpr-proc (λ(x)x)]
#:inline-txexpr-proc [inline-txexpr-proc (λ(x)x)] #:inline-txexpr-proc [inline-txexpr-proc (λ(x)x)]
#:string-proc [string-proc (λ(x)x)] #:string-proc [string-proc (λ(x)x)]
#:symbol-proc [symbol-proc (λ(x)x)] #:entity-proc [entity-proc (λ(x)x)]
#:valid-char-proc [valid-char-proc (λ(x)x)]
#:cdata-proc [cdata-proc (λ(x)x)] #:cdata-proc [cdata-proc (λ(x)x)]
#:exclude-tags [excluded-tags '()] #:exclude-tags [excluded-tags '()]
#:exclude-attrs [excluded-attrs '()]) #:exclude-attrs [excluded-attrs '()])
@ -86,14 +123,13 @@
(#:txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?) (#:txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?)
#:txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?) #:txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?)
#:txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?) #:txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?)
#:block-txexpr-proc (block-txexpr? . -> . xexpr?) #:block-txexpr-proc (block-txexpr? . -> . decode-proc-output-contract)
#:inline-txexpr-proc (txexpr? . -> . xexpr?) #:inline-txexpr-proc (txexpr? . -> . decode-proc-output-contract)
#:string-proc (string? . -> . xexpr?) #:string-proc (string? . -> . decode-proc-output-contract)
#:symbol-proc (symbol? . -> . xexpr?) #:entity-proc ((or/c symbol? valid-char?) . -> . decode-proc-output-contract)
#:valid-char-proc (valid-char? . -> . xexpr?) #:cdata-proc (cdata? . -> . decode-proc-output-contract)
#:cdata-proc (cdata? . -> . xexpr?)
#:exclude-tags (listof txexpr-tag?) #:exclude-tags (listof txexpr-tag?)
#:exclude-attrs txexpr-attrs?) . ->* . txexpr-elements?) #:exclude-attrs txexpr-attrs?) . ->* . txexpr?)
(define temp-tag (gensym "temp-tag")) (define temp-tag (gensym "temp-tag"))
(define decode-result (decode `(temp-tag ,@elements) (define decode-result (decode `(temp-tag ,@elements)
@ -103,8 +139,7 @@
#:block-txexpr-proc block-txexpr-proc #:block-txexpr-proc block-txexpr-proc
#:inline-txexpr-proc inline-txexpr-proc #:inline-txexpr-proc inline-txexpr-proc
#:string-proc string-proc #:string-proc string-proc
#:symbol-proc symbol-proc #:entity-proc entity-proc
#:valid-char-proc valid-char-proc
#:cdata-proc cdata-proc #:cdata-proc cdata-proc
#:exclude-tags excluded-tags #:exclude-tags excluded-tags
#:exclude-attrs excluded-attrs)) #:exclude-attrs excluded-attrs))

@ -28,12 +28,11 @@ Another example is conversion of output into a particular data format. Most Poll
[#:txexpr-tag-proc txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?) (λ(tag) tag)] [#:txexpr-tag-proc txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?) (λ(tag) tag)]
[#:txexpr-attrs-proc txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?) (λ(attrs) attrs)] [#:txexpr-attrs-proc txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?) (λ(attrs) attrs)]
[#:txexpr-elements-proc txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?) (λ(elements) elements)] [#:txexpr-elements-proc txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?) (λ(elements) elements)]
[#:block-txexpr-proc block-txexpr-proc (block-txexpr? . -> . xexpr?) (λ(tx) tx)] [#:block-txexpr-proc block-txexpr-proc (block-txexpr? . -> . (or/c xexpr? (non-empty-listof xexpr?))) (λ(tx) tx)]
[#:inline-txexpr-proc inline-txexpr-proc (txexpr? . -> . xexpr?) (λ(tx) tx)] [#:inline-txexpr-proc inline-txexpr-proc (txexpr? . -> . (or/c xexpr? (non-empty-listof xexpr?))) (λ(tx) tx)]
[#:string-proc string-proc (string? . -> . xexpr?) (λ(str) str)] [#:string-proc string-proc (string? . -> . (or/c xexpr? (non-empty-listof xexpr?))) (λ(str) str)]
[#:symbol-proc symbol-proc (symbol? . -> . xexpr?) (λ(sym) sym)] [#:entity-proc entity-proc ((or/c symbol? valid-char?) . -> . (or/c xexpr? (non-empty-listof xexpr?))) (λ(ent) ent)]
[#:valid-char-proc valid-char-proc (valid-char? . -> . xexpr?) (λ(vc) vc)] [#:cdata-proc cdata-proc (cdata? . -> . (or/c xexpr? (non-empty-listof xexpr?))) (λ(cdata) cdata)]
[#:cdata-proc cdata-proc (cdata? . -> . xexpr?) (λ(cdata) cdata)]
[#:exclude-tags tags-to-exclude (listof txexpr-tag?) null] [#:exclude-tags tags-to-exclude (listof txexpr-tag?) null]
[#:exclude-attrs attrs-to-exclude txexpr-attrs? null] [#:exclude-attrs attrs-to-exclude txexpr-attrs? null]
) )
@ -97,20 +96,18 @@ Note that @racket[_txexpr-attrs-proc] will change the attributes of every tagged
] ]
The @racket[_txexpr-elements-proc] argument is a procedure that operates on the list of elements that represents the content of each tagged X-expression. Note that each element of an X-expression is subject to two passes through the decoder: once now, as a member of the list of elements, and also later, through its type-specific decoder (i.e., @racket[_string-proc], @racket[_symbol-proc], and so on). The @racket[_txexpr-elements-proc] argument is a procedure that operates on the list of elements that represents the content of each tagged X-expression. Note that each element of an X-expression is subject to two passes through the decoder: once now, as a member of the list of elements, and also later, through its type-specific decoder (i.e., @racket[_string-proc], @racket[_entity-proc], and so on).
@examples[#:eval my-eval @examples[#:eval my-eval
(define tx '(div "Double" "\n" "toil" amp "trouble")) (define tx '(div "Double" "\n" "toil" amp "trouble"))
(code:comment @#,t{Every element gets doubled ...}) (code:comment @#,t{Every element gets doubled ...})
(decode tx #:txexpr-elements-proc (λ(es) (append-map (λ(e) `(,e ,e)) es))) (decode tx #:txexpr-elements-proc (λ(es) (append-map (λ(e) (list e e)) es)))
(code:comment @#,t{... but only strings get capitalized}) (code:comment @#,t{... but only strings get capitalized})
(decode tx #:txexpr-elements-proc (λ(es) (append-map (λ(e) `(,e ,e)) es)) (decode tx #:txexpr-elements-proc (λ(es) (append-map (λ(e) (list e e)) es))
#:string-proc (λ(s) (string-upcase s))) #:string-proc (λ(s) (string-upcase s)))
] ]
So why do you need @racket[_txexpr-elements-proc]? Because some types of element decoding depend on context, thus it's necessary to handle the elements as a group. For instance, the doubling function above, though useless, requires handling the element list as a whole, because elements are being added. So why do you need @racket[_txexpr-elements-proc]? Because some types of element decoding depend on context, thus it's necessary to handle the elements as a group. For instance, paragraph detection. The behavior is not merely a @racket[map] across each element, because elements are being removed and altered contextually:
A more useful example: paragraph detection. The behavior is not merely a @racket[map] across each element:
@examples[#:eval my-eval @examples[#:eval my-eval
(define (paras tx) (decode tx #:txexpr-elements-proc detect-paragraphs)) (define (paras tx) (decode tx #:txexpr-elements-proc detect-paragraphs))
@ -141,21 +138,58 @@ Of course, if you want block and inline elements to be handled the same way, you
(decode tx #:block-txexpr-proc add-ns #:inline-txexpr-proc add-ns) (decode tx #:block-txexpr-proc add-ns #:inline-txexpr-proc add-ns)
] ]
The @racket[_string-proc], @racket[_symbol-proc], @racket[_valid-char-proc], and @racket[_cdata-proc] arguments are procedures that operate on X-expressions that are strings, symbols, valid-chars, and CDATA, respectively. Deliberately, the output contracts for these procedures accept any kind of X-expression (meaning, the procedure can change the X-expression type). The @racket[_string-proc], @racket[_entity-proc], and @racket[_cdata-proc] arguments are procedures that operate on X-expressions that are strings, entities, and CDATA, respectively. Deliberately, the output contracts for these procedures accept any kind of X-expression (meaning, the procedure can change the X-expression type).
@examples[#:eval my-eval @examples[#:eval my-eval
(code:comment @#,t{A div with string, entity, character, and cdata elements}) (code:comment @#,t{A div with string, entity, and cdata elements})
(define tx `(div "Moe" amp 62 ,(cdata #f #f "3 > 2;"))) (define tx `(div "Moe" amp 62 ,(cdata #f #f "3 > 2;")))
(define rulify (λ(x) '(hr))) (define rulify (λ(x) '(hr)))
(code:comment @#,t{The rulify function is selectively applied to each}) (code:comment @#,t{The rulify function is selectively applied to each})
(print (decode tx #:string-proc rulify)) (print (decode tx #:string-proc rulify))
(print (decode tx #:symbol-proc rulify)) (print (decode tx #:entity-proc rulify))
(print (decode tx #:valid-char-proc rulify))
(print (decode tx #:cdata-proc rulify)) (print (decode tx #:cdata-proc rulify))
] ]
Note that entities come in two flavors — symbolic and numeric — and @racket[_entity-proc] affects both. If you only want to affect one or the other, you can add a test within @racket[_entity-proc]. Symbolic entities can be detected with @racket[symbol?], and numeric entities with @racket[valid-char?]:
@examples[#:eval my-eval
(define tx `(div amp 62))
(define symbolic-detonate (λ(x) (if (symbol? x) 'BOOM x)))
(print (decode tx #:entity-proc symbolic-detonate))
(define numeric-detonate (λ(x) (if (valid-char? x) 'BOOM x)))
(print (decode tx #:entity-proc numeric-detonate))
]
The five previous procedures — @racket[_block-txexpr-proc], @racket[_inline-txexpr-proc], @racket[_string-proc], @racket[_entity-proc], and @racket[_cdata-proc] — can return either a single X-expression, or a list of X-expressions, which will be spliced into the parent at the same point.
For instance, earlier we saw how to double elements by using @racket[_txexpr-elements-proc]. But you can accomplish the same thing on a case-by-case basis by returning a list of values:
@examples[#:eval my-eval
(code:comment @#,t{A div with string, entity, and inline-txexpr elements})
(define tx `(div "Axl" amp (span "Slash")))
(define doubler (λ(x) (list x x)))
(code:comment @#,t{The doubler function is selectively applied to each type of element})
(print (decode tx #:string-proc doubler))
(print (decode tx #:entity-proc doubler))
(print (decode tx #:inline-txexpr-proc doubler))
]
Caution: when returning list values, it's possible to trip over the unavoidable ambiguity between a @racket[txexpr?] and a list of @racket[txexpr-element?]s that happens to begin with a symbolic entity:
@examples[#:eval my-eval
(code:comment @#,t{An ambiguous expression})
(define amb '(guitar "player-name"))
(and (txexpr-elements? amb) (txexpr? amb))
(code:comment @#,t{Ambiguity in context})
(define x '(gnr "Izzy" "Slash"))
(define rockit (λ(str) (list 'guitar str)))
(code:comment @#,t{Expecting '(gnr guitar "Izzy" guitar "Slash") from next line,
but return value will be treated as tagged X-expression})
(decode x #:string-proc rockit)
(code:comment @#,t{Changing the order makes it unambiguous})
(define rockit2 (λ(str) (list str 'guitar)))
(decode x #:string-proc rockit2)
]
The @racket[_tags-to-exclude] argument is a list of tags that will be exempted from decoding. Though you could get the same result by testing the input within the individual decoding functions, that's tedious and potentially slower. The @racket[_tags-to-exclude] argument is a list of tags that will be exempted from decoding. Though you could get the same result by testing the input within the individual decoding functions, that's tedious and potentially slower.
@ -189,12 +223,11 @@ Finally, the @racket[_attrs-to-exclude] argument works the same way as @racket[_
[#:txexpr-tag-proc txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?) (λ(tag) tag)] [#:txexpr-tag-proc txexpr-tag-proc (txexpr-tag? . -> . txexpr-tag?) (λ(tag) tag)]
[#:txexpr-attrs-proc txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?) (λ(attrs) attrs)] [#:txexpr-attrs-proc txexpr-attrs-proc (txexpr-attrs? . -> . txexpr-attrs?) (λ(attrs) attrs)]
[#:txexpr-elements-proc txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?) (λ(elements) elements)] [#:txexpr-elements-proc txexpr-elements-proc (txexpr-elements? . -> . txexpr-elements?) (λ(elements) elements)]
[#:block-txexpr-proc block-txexpr-proc (block-txexpr? . -> . xexpr?) (λ(tx) tx)] [#:block-txexpr-proc block-txexpr-proc (block-txexpr? . -> . (or/c xexpr? (non-empty-listof xexpr?))) (λ(tx) tx)]
[#:inline-txexpr-proc inline-txexpr-proc (txexpr? . -> . xexpr?) (λ(tx) tx)] [#:inline-txexpr-proc inline-txexpr-proc (txexpr? . -> . (or/c xexpr? (non-empty-listof xexpr?))) (λ(tx) tx)]
[#:string-proc string-proc (string? . -> . xexpr?) (λ(str) str)] [#:string-proc string-proc (string? . -> . (or/c xexpr? (non-empty-listof xexpr?))) (λ(str) str)]
[#:symbol-proc symbol-proc (symbol? . -> . xexpr?) (λ(sym) sym)] [#:entity-proc entity-proc ((or/c symbol? valid-char?) . -> . (or/c xexpr? (non-empty-listof xexpr?))) (λ(ent) ent)]
[#:valid-char-proc valid-char-proc (valid-char? . -> . xexpr?) (λ(vc) vc)] [#:cdata-proc cdata-proc (cdata? . -> . (or/c xexpr? (non-empty-listof xexpr?))) (λ(cdata) cdata)]
[#:cdata-proc cdata-proc (cdata? . -> . xexpr?) (λ(cdata) cdata)]
[#:exclude-tags tags-to-exclude (listof txexpr-tag?) null] [#:exclude-tags tags-to-exclude (listof txexpr-tag?) null]
[#:exclude-attrs attrs-to-exclude txexpr-attrs? null] [#:exclude-attrs attrs-to-exclude txexpr-attrs? null]
) )

Loading…
Cancel
Save