diff --git a/info.rkt b/info.rkt
index 4980e32..4ec478d 100644
--- a/info.rkt
+++ b/info.rkt
@@ -1,4 +1,4 @@
#lang info
-(define collection "tagged-xexpr")
+(define collection "txexpr")
-(define scribblings '(("scribblings/tagged-xexpr.scrbl" ())))
+(define scribblings '(("scribblings/txexpr.scrbl" ())))
diff --git a/main.rkt b/main.rkt
index a19b140..b0cf7b4 100644
--- a/main.rkt
+++ b/main.rkt
@@ -12,139 +12,184 @@
(provide (contract-out [name contract]))
(define name body ...))]))
-;; a tagged-xexpr consists of a tag, optional attributes, and then elements.
-
-(define+provide/contract (tagged-xexpr-tag? x)
+(define+provide/contract (txexpr-tag? x)
(any/c . -> . boolean?)
(symbol? x))
-
-(define+provide/contract (tagged-xexpr-attr? x)
+(define+provide/contract (txexpr-attr? x)
(any/c . -> . boolean?)
(match x
[(list (? symbol?) (? string?)) #t]
[else #f]))
-(define+provide/contract (tagged-xexpr-attrs? x)
+(define+provide/contract (txexpr-attrs? x)
(any/c . -> . boolean?)
(match x
- [(list (? tagged-xexpr-attr?) ...) #t]
+ [(list (? txexpr-attr?) ...) #t]
[else #f]))
-(define+provide/contract (tagged-xexpr-element? x)
+(define+provide/contract (txexpr-element? x)
(any/c . -> . boolean?)
- (or (string? x) (tagged-xexpr? x) (symbol? x)
+ (or (string? x) (txexpr? x) (symbol? x)
(valid-char? x) (cdata? x)))
-(define+provide/contract (tagged-xexpr-elements? x)
+(define+provide/contract (txexpr-elements? x)
(any/c . -> . boolean?)
(match x
- [(list elem ...) (andmap tagged-xexpr-element? elem)]
+ [(list elem ...) (andmap txexpr-element? elem)]
[else #f]))
;; is it a named x-expression?
;; todo: rewrite this recurively so errors can be pinpointed (for debugging)
-(define+provide/contract (tagged-xexpr? x)
+(define+provide/contract (txexpr? x)
(any/c . -> . boolean?)
(and (xexpr? x) ; meets basic xexpr contract
(match x
- ;; is a list starting with a symbol
- [(list (? symbol? name) rest ...)
- ;; the rest is content or ...
- (or (andmap tagged-xexpr-element? rest)
- ;; attr + content
- (and (tagged-xexpr-attrs? (car rest)) (andmap tagged-xexpr-element? (cdr rest))))]
+ [(list (? symbol? name) rest ...) ;; is a list starting with a symbol
+ (or (empty? rest)
+ (andmap txexpr-element? rest) ;; the rest is content or ...
+ (and (txexpr-attrs? (car rest)) (andmap txexpr-element? (cdr rest))))] ;; attr + content
[else #f])))
-
-(define+provide/contract (make-tagged-xexpr tag [attrs empty] [elements empty])
+(define+provide/contract (make-txexpr tag [attrs empty] [elements empty])
;; todo?: use xexpr/c provides a nicer error message
- ((symbol?) (tagged-xexpr-attrs? (listof tagged-xexpr-element?))
- . ->* . tagged-xexpr?)
+ ((symbol?) (txexpr-attrs? (listof txexpr-element?))
+ . ->* . txexpr?)
(filter-not empty? `(,tag ,attrs ,@elements)))
-(define+provide/contract (tagged-xexpr->values x)
- (tagged-xexpr? . -> .
- (values symbol? tagged-xexpr-attrs? (listof tagged-xexpr-element?)))
+(define+provide/contract (txexpr->values x)
+ (txexpr? . -> .
+ (values symbol? txexpr-attrs? (listof txexpr-element?)))
(match
- ; tagged-xexpr may or may not have attr
+ ; txexpr may or may not have attr
; if not, add empty attr so that decomposition only handles one case
(match x
- [(list _ (? tagged-xexpr-attrs?) _ ...) x]
+ [(list _ (? txexpr-attrs?) _ ...) x]
[else `(,(car x) ,empty ,@(cdr x))])
[(list tag attr content ...) (values tag attr content)]))
-(define+provide/contract (tagged-xexpr->list x)
- (tagged-xexpr? . -> . list?)
- (define-values (tag attrs content) (tagged-xexpr->values x))
+(define+provide/contract (txexpr->list x)
+ (txexpr? . -> . list?)
+ (define-values (tag attrs content) (txexpr->values x))
(list tag attrs content))
-;; convenience functions to retrieve only one part of tagged-xexpr
-(define+provide/contract (tagged-xexpr-tag x)
- (tagged-xexpr? . -> . tagged-xexpr-tag?)
+;; convenience functions to retrieve only one part of txexpr
+(define+provide/contract (get-tag x)
+ (txexpr? . -> . txexpr-tag?)
(car x))
-(define+provide/contract (tagged-xexpr-attrs x)
- (tagged-xexpr? . -> . tagged-xexpr-attrs?)
- (define-values (tag attrs content) (tagged-xexpr->values x))
+(define+provide/contract (get-attrs x)
+ (txexpr? . -> . txexpr-attrs?)
+ (define-values (tag attrs content) (txexpr->values x))
attrs)
-(define+provide/contract (tagged-xexpr-elements x)
- (tagged-xexpr? . -> . (listof tagged-xexpr-element?))
- (define-values (tag attrs elements) (tagged-xexpr->values x))
+(define+provide/contract (get-elements x)
+ (txexpr? . -> . (listof txexpr-element?))
+ (define-values (tag attrs elements) (txexpr->values x))
elements)
;; helpers. we are getting a string or symbol
-(define (->symbol x)
+(define+provide/contract (->txexpr-attr-key x)
+ (can-be-txexpr-attr-key? . -> . txexpr-attr-key?)
(if (string? x) (string->symbol x) x))
-(define (->string x)
+(define+provide/contract (->txexpr-attr-value x)
+ (can-be-txexpr-attr-value? . -> . txexpr-attr-value?)
+ (->string x))
+
+(define+provide/contract (txexpr-attr-key? x)
+ (any/c . -> . boolean?)
+ (symbol? x))
+
+(define+provide/contract (can-be-txexpr-attr-key? x)
+ (any/c . -> . boolean?)
+ (or (symbol? x) (string? x)))
+
+(define+provide/contract (txexpr-attr-value? x)
+ (any/c . -> . boolean?)
+ (string? x))
+
+(define+provide/contract (can-be-txexpr-attr-value? x)
+ (any/c . -> . boolean?)
+ (can-be-txexpr-attr-key? x))
+
+(define+provide/contract (->string x)
+ (any/c . -> . string?)
(if (symbol? x) (symbol->string x) x))
+(define+provide/contract (can-be-txexpr-attrs? x)
+ (any/c . -> . boolean?)
+ ((or/c txexpr-attr? txexpr-attrs? can-be-txexpr-attr-key? can-be-txexpr-attr-value?) x))
-;; convert list of alternating keys & values to attr
-(define+provide/contract (merge-attrs . items)
- (() #:rest (listof (or/c tagged-xexpr-attr? tagged-xexpr-attrs? symbol? string?)) . ->* . tagged-xexpr-attrs?)
-
- ;; need this function to make sure that 'foo and "foo" are treated as the same hash key
- (define (make-attr-list items)
+(define+provide/contract (attrs->hash . items)
+ (() #:rest (listof can-be-txexpr-attrs?) . ->* . hash?)
+ ;; can be liberal with input because they're all just nested key/value pairs
+ ;; but still need this function to make sure that 'foo and "foo" are treated as the same hash key
+ (define (make-key-value-list items)
(if (empty? items)
empty
- (let ([key (->symbol (first items))]
- [value (->string (second items))]
+ (let ([key (->txexpr-attr-key (first items))]
+ [value (->txexpr-attr-value (second items))]
[rest (drop items 2)])
- (append (list key value) (make-attr-list rest)))))
-
- ;; use hash to ensure keys are unique (later values will overwrite earlier)
- (define attr-hash (apply hash (make-attr-list (flatten items))))
- `(,@(map (λ(k) (list k (hash-ref attr-hash k)))
- ;; sort needed for predictable results for unit tests
- (sort (hash-keys attr-hash) (λ(a b) (string (->string a) (->string b)))))))
+ (cons (cons key value) (make-key-value-list rest)))))
+ (make-immutable-hash (make-key-value-list (flatten items))))
+
+(define+provide/contract (hash->attrs hash)
+ (hash? . -> . txexpr-attrs?)
+ (hash-map hash list))
+
+(define+provide/contract (attr-set tx key value)
+ (txexpr? can-be-txexpr-attr-key? can-be-txexpr-attr-value? . -> . txexpr?)
+ (define new-attrs
+ (hash->attrs (hash-set (attrs->hash (get-attrs tx)) (->txexpr-attr-key key) (->txexpr-attr-value value))))
+ (make-txexpr (get-tag tx) new-attrs (get-elements tx)))
+
+
+(define+provide/contract (attr-ref tx key)
+ (txexpr? can-be-txexpr-attr-key? . -> . txexpr-attr-value?)
+ (with-handlers ([exn:fail? (λ(e) (error (format "attr-ref: no value found for key ~v" key)))])
+ (hash-ref (attrs->hash (get-attrs tx)) key)))
+
+;; convert list of alternating keys & values to attr
+(define+provide/contract (merge-attrs . items)
+ (() #:rest (listof can-be-txexpr-attrs?) . ->* . txexpr-attrs?)
+ (define attrs-hash (apply attrs->hash items))
+ ;; sort needed for predictable results for unit tests
+ (define sorted-hash-keys (sort (hash-keys attrs-hash) (λ(a b) (string (->string a) (->string b)))))
+ `(,@(map (λ(key) (list key (hash-ref attrs-hash key))) sorted-hash-keys)))
(define+provide/contract (remove-attrs x)
- (tagged-xexpr? . -> . tagged-xexpr?)
- (match x
- [(? tagged-xexpr?) (let-values ([(tag attr elements) (tagged-xexpr->values x)])
- (make-tagged-xexpr tag empty (remove-attrs elements)))]
- [(? list?) (map remove-attrs x)]
+ (txexpr? . -> . txexpr?)
+ (cond
+ [(txexpr? x) (let-values ([(tag attr elements) (txexpr->values x)])
+ (make-txexpr tag empty (remove-attrs elements)))]
+ [(txexpr-elements? x) (map remove-attrs x)]
[else x]))
+;; todo: exclude-proc will keep things out, but is there a way to keep things in?
+(define+provide/contract (map-elements/exclude proc x exclude-test)
+ (procedure? txexpr? procedure? . -> . txexpr?)
+ (cond
+ [(txexpr? x)
+ (if (exclude-test x)
+ x
+ (let-values ([(tag attr elements) (txexpr->values x)])
+ (make-txexpr tag attr
+ (map (λ(x)(map-elements/exclude proc x exclude-test)) elements))))]
+ ;; externally the function only accepts txexpr,
+ ;; but internally we don't care
+ [else (proc x)]))
+
(define+provide/contract (map-elements proc x)
- (procedure? tagged-xexpr? . -> . tagged-xexpr?)
- (define-values (tag attr elements) (tagged-xexpr->values x))
- (define recursive-proc
- (λ(x)
- (cond
- [(tagged-xexpr? x) (map-elements proc x)]
- [else (proc x)])))
- (make-tagged-xexpr tag attr (map recursive-proc elements)))
+ (procedure? txexpr? . -> . txexpr?)
+ (map-elements/exclude proc x (λ(x) #f)))
diff --git a/scribblings/tagged-xexpr.scrbl b/scribblings/tagged-xexpr.scrbl
deleted file mode 100644
index 7fe577c..0000000
--- a/scribblings/tagged-xexpr.scrbl
+++ /dev/null
@@ -1,264 +0,0 @@
-#lang scribble/manual
-
-@(require scribble/eval (for-label racket "../main.rkt" xml))
-
-@(define my-eval (make-base-eval))
-@(my-eval `(require tagged-xexpr xml))
-
-
-@title{tagged-xexpr}
-
-@author[(author+email "Matthew Butterick" "mb@mbtype.com")]
-
-A set of small but handy functions for improving the readability and reliability of programs that operate on tagged X-expressions (aka tagged-xexprs).
-
-@section{Installation}
-
-At the command line:
-@verbatim{raco pkg install tagged-xexpr}
-
-After that, you can update the package from the command line:
-@verbatim{raco pkg update tagged-xexpr}
-
-@section{What’s a tagged-xexpr?}
-
-It's an X-expression with the following grammar:
-
-@racketgrammar*[
-#:literals (cons list symbol? string? xexpr?)
-[tagged-xexpr (list tag (list attr ...) element ...)
- (cons tag (list element ...))]
-[tag symbol?]
-[attr (list symbol? string?)]
-[element xexpr?]
-]
-
-
-A tagged X-expression is a list with a symbol in the first position — the @italic{tag} — followed by a series of @italic{elements}, which are other X-expressions. Optionally, a tagged X-expression can have a list of @italic{attributes} in the second position.
-
-@examples[#:eval my-eval
-(tagged-xexpr? '(span "Brennan" "Dale"))
-(tagged-xexpr? '(span "Brennan" (em "Richard") "Dale"))
-(tagged-xexpr? '(span [[class "hidden"][id "names"]] "Brennan" "Dale"))
-(tagged-xexpr? '(span lt gt amp))
-(tagged-xexpr? '("We really" "should have" "a tag"))
-(tagged-xexpr? '(span [[class not-quoted]] "Brennan"))
-(tagged-xexpr? '(span [class "hidden"] "Brennan" "Dale"))
- ]
-
-The last one is a common mistake. Because the key–value pair is not enclosed in a @racket[list], it's interpreted as a nested tagged-xexpr within the first tagged-xexpr, as you may not find out until you try to read its attributes:
-
-@margin-note{There's no way of eliminating this ambiguity, short of always requiring an attribute list — empty if necessary — in your tagged-xexpr. See also @racket[xexpr-drop-empty-attributes].}
-
-@examples[#:eval my-eval
-(tagged-xexpr-attrs '(span [class "hidden"] "Brennan" "Dale"))
-(tagged-xexpr-elements '(span [class "hidden"] "Brennan" "Dale"))
-]
-
-Tagged X-expressions are most commonly found in HTML & XML documents. Though the notation is different in Racket, the data structure is identical:
-
-@examples[#:eval my-eval
-(xexpr->string '(span [[id "names"]] "Brennan" (em "Richard") "Dale"))
-(string->xexpr "BrennanRichardDale")
- ]
-
-After converting to and from HTML, we get back the original X-expression. Well, almost. The brackets turned into parentheses — no big deal, since they mean the same thing in Racket. Also, per its usual practice, @racket[string->xexpr] added an empty attribute list after @racket[em]. This is also benign.
-
-@section{Why not just use @exec{match}, @exec{quasiquote}, and so on?}
-
-If you prefer those, please do. But I've found two benefits to using module functions:
-
-@bold{Readability.} In code that already has a lot of matching and quasiquoting going on, these functions make it easy to see where & how tagged-xexprs are being used.
-
-@bold{Reliability.} Because tagged-xexprs come in two close but not quite equal forms, careful coders will always have to take both cases into account.
-
-The programming is trivial, but the annoyance is real.
-
-@section{Interface}
-
-@defmodule[tagged-xexpr]
-
-@deftogether[(
-@defproc[
-(tagged-xexpr?
-[v any/c])
-boolean?]
-
-@defproc[
-(tagged-xexpr-tag?
-[v any/c])
-boolean?]
-
-@defproc[
-(tagged-xexpr-attr?
-[v any/c])
-boolean?]
-
-@defproc[
-(tagged-xexpr-element?
-[v any/c])
-boolean?]
-
-)]
-Predicates for @racket[_tagged-xexpr]s that implement this grammar:
-
-@racketgrammar*[
-#:literals (cons list symbol? string? xexpr?)
-[tagged-xexpr (list tag (list attr ...) element ...)
- (cons tag (list element ...))]
-[tag symbol?]
-[attr (list symbol? string?)]
-[element xexpr?]
-]
-
-@deftogether[(
-
-@defproc[
-(tagged-xexpr-attrs?
-[v any/c])
-boolean?]
-
-@defproc[
-(tagged-xexpr-elements?
-[v any/c])
-boolean?]
-)]
-Shorthand for @code{(listof tagged-xexpr-attr?)} and @code{(listof tagged-xexpr-element?)}.
-
-
-@defproc[
-(tagged-xexpr->values
-[tx tagged-xexpr?])
-(values tagged-xexpr-tag? tagged-xexpr-attrs? tagged-xexpr-elements?)]
-Dissolves a @racket[_tagged-xexpr] into its components and returns all three.
-
-@examples[#:eval my-eval
-(tagged-xexpr->values '(div))
-(tagged-xexpr->values '(div "Hello" (p "World")))
-(tagged-xexpr->values '(div [[id "top"]] "Hello" (p "World")))
-
-]
-
-@defproc[
-(tagged-xexpr->list
-[tx tagged-xexpr?])
-(list tagged-xexpr-tag?
-tagged-xexpr-attrs?
-tagged-xexpr-elements?)]
-Like @racket[tagged-xexpr->values], but returns the three components in a list.
-
-@examples[#:eval my-eval
-(tagged-xexpr->list '(div))
-(tagged-xexpr->list '(div "Hello" (p "World")))
-(tagged-xexpr->list '(div [[id "top"]] "Hello" (p "World")))
-]
-
-@deftogether[(
-@defproc[
-(tagged-xexpr-tag
-[tx tagged-xexpr?])
-tagged-xexpr-tag?]
-
-@defproc[
-(tagged-xexpr-attrs
-[tx tagged-xexpr?])
-tagged-xexpr-attr?]
-
-@defproc[
-(tagged-xexpr-elements
-[tx tagged-xexpr?])
-(listof tagged-xexpr-element?)]
-)]
-Accessor functions for the individual pieces of a @racket[_tagged-xexpr].
-
-@examples[#:eval my-eval
-(tagged-xexpr-tag '(div [[id "top"]] "Hello" (p "World")))
-(tagged-xexpr-attrs '(div [[id "top"]] "Hello" (p "World")))
-(tagged-xexpr-elements '(div [[id "top"]] "Hello" (p "World")))
-]
-
-@defproc[
-(make-tagged-xexpr
-[tag tagged-xexpr-tag?]
-[attrs tagged-xexpr-attrs? @empty]
-[elements tagged-xexpr-elements? @empty])
-tagged-xexpr?]
-Assemble a @racket[_tagged-xexpr] from its parts. If you don't have attributes, but you do have elements, you'll need to pass @racket[empty] as the second argument. Note that unlike @racket[xml->xexpr], if the attribute list is empty, it's not included in the resulting expression.
-
-@examples[#:eval my-eval
-(make-tagged-xexpr 'div)
-(make-tagged-xexpr 'div '() '("Hello" (p "World")))
-(make-tagged-xexpr 'div '[[id "top"]])
-(make-tagged-xexpr 'div '[[id "top"]] '("Hello" (p "World")))
-(define tx '(div [[id "top"]] "Hello" (p "World")))
-(make-tagged-xexpr (tagged-xexpr-tag tx)
-(tagged-xexpr-attrs tx) (tagged-xexpr-elements tx))
-]
-
-@defproc[
-(merge-attrs
-[attrs (listof (or/c tagged-xexpr-attr? tagged-xexpr-attrs?
-symbol? string?))] ...)
-tagged-xexpr-attrs?]
-Combine a series of attributes into a single @racket[_tagged-xexpr-attrs] item. This function addresses three annoyances that surface in working with tagged-xexpr attributes.
-
-@itemlist[#:style 'ordered
-@item{You can pass the attributes in multiple forms. The list of arguments can include single @racket[_xexpr-attr]s, lists of @racket[_xexpr-attr]s (i.e., what you get from @racket[tagged-xexpr-attrs]), or interleaved symbols and strings (each pair will be concatenated into a single @racket[_xexpr-attr]).}
-
-@item{Attributes with the same name are merged, with the later value taking precedence (i.e., @racket[hash] behavior). }
-
-@item{Attributes are sorted in alphabetical order.}]
-
-@examples[#:eval my-eval
-(define tx '(div [[id "top"][class "red"]] "Hello" (p "World")))
-(define tx-attrs (tagged-xexpr-attrs tx))
-tx-attrs
-(merge-attrs tx-attrs 'editable "true")
-(merge-attrs tx-attrs 'id "override-value")
-(define my-attr '(id "another-override"))
-(merge-attrs tx-attrs my-attr)
-(merge-attrs my-attr tx-attrs)
-]
-
-@defproc[
-(remove-attrs
-[tx tagged-xexpr?])
-tagged-xexpr?]
-Recursively remove all attributes.
-
-@examples[#:eval my-eval
-(define tx '(div [[id "top"]] "Hello" (p [[id "lower"]] "World")))
-(remove-attrs tx)
-]
-
-@defproc[
-(map-elements
-[proc procedure?]
-[tx tagged-xexpr?])
-tagged-xexpr?]
-Recursively apply @racket[_proc] to all elements, leaving tags and attributes alone. Using plain @racket[map] will only process elements at the top level of the current @racket[_tagged-xexpr]. Usually that's not what you want.
-
-@examples[#:eval my-eval
-(define tx '(div "Hello!" (p "Welcome to" (strong "Mars"))))
-(define upcaser (λ(x) (if (string? x) (string-upcase x) x)))
-(map upcaser tx)
-(map-elements upcaser tx)
-]
-
-In practice, most @racket[_xexpr-element]s are strings. But woe befalls those who pass string procedures to @racket[map-elements], because an @racket[_xexpr-element] can be any kind of @racket[xexpr?], and an @racket[xexpr?] is not necessarily a string.
-
-@examples[#:eval my-eval
-(define tx '(p "Welcome to" (strong "Mars" amp "Sons")))
-(map-elements string-upcase tx)
-(define upcaser (λ(x) (if (string? x) (string-upcase x) x)))
-(map-elements upcaser tx)
-]
-
-
-
-@section{License & source code}
-
-This module is licensed under the LGPL.
-
-Source repository at @link["http://github.com/mbutterick/tagged-xexpr"]{http://github.com/mbutterick/tagged-xexpr}. Suggestions & corrections welcome.
-
diff --git a/scribblings/txexpr.scrbl b/scribblings/txexpr.scrbl
new file mode 100644
index 0000000..190e116
--- /dev/null
+++ b/scribblings/txexpr.scrbl
@@ -0,0 +1,384 @@
+#lang scribble/manual
+
+@(require scribble/eval (for-label racket "../main.rkt" xml))
+
+@(define my-eval (make-base-eval))
+@(my-eval `(require txexpr xml))
+
+
+@title{txexpr: Tagged X-expressions}
+
+@author[(author+email "Matthew Butterick" "mb@mbtype.com")]
+
+A set of small but handy functions for improving the readability and reliability of programs that operate on tagged X-expressions (for short, @italic{txexpr}s).
+
+@section{Installation}
+
+At the command line:
+@verbatim{raco pkg install txexpr}
+
+After that, you can update the package from the command line:
+@verbatim{raco pkg update txexpr}
+
+@section{What’s a txexpr?}
+
+It's an X-expression with the following grammar:
+
+@racketgrammar*[
+#:literals (cons list symbol? string? xexpr?)
+[txexpr (list tag (list attr ...) element ...)
+ (cons tag (list element ...))]
+[tag symbol?]
+[attr (list key value)]
+[key symbol?]
+[value string?]
+[element xexpr?]
+]
+
+A txexpr is a list with a symbol in the first position — the @italic{tag} — followed by a series of @italic{elements}, which are other X-expressions. Optionally, a txexpr can have a list of @italic{attributes} in the second position.
+
+@examples[#:eval my-eval
+(txexpr? '(span "Brennan" "Dale"))
+(txexpr? '(span "Brennan" (em "Richard") "Dale"))
+(txexpr? '(span [[class "hidden"][id "names"]] "Brennan" "Dale"))
+(txexpr? '(span lt gt amp))
+(txexpr? '("We really" "should have" "a tag"))
+(txexpr? '(span [[class not-quoted]] "Brennan"))
+(txexpr? '(span [class "hidden"] "Brennan" "Dale"))
+ ]
+
+The last one is a common mistake. Because the key–value pair is not enclosed in a @racket[list], it's interpreted as a nested txexpr within the first txexpr, as you may not find out until you try to read its attributes:
+
+@margin-note{There's no way of eliminating this ambiguity, short of always requiring an attribute list — empty if necessary — in your txexpr. See also @racket[xexpr-drop-empty-attributes].}
+
+@examples[#:eval my-eval
+(get-attrs '(span [class "hidden"] "Brennan" "Dale"))
+(get-elements '(span [class "hidden"] "Brennan" "Dale"))
+]
+
+Tagged X-expressions are most commonly found in HTML & XML documents. Though the notation is different in Racket, the data structure is identical:
+
+@examples[#:eval my-eval
+(xexpr->string '(span [[id "names"]] "Brennan" (em "Richard") "Dale"))
+(string->xexpr "BrennanRichardDale")
+]
+
+After converting to and from HTML, we get back the original X-expression. Well, almost. The brackets turned into parentheses — no big deal, since they mean the same thing in Racket. Also, per its usual practice, @racket[string->xexpr] added an empty attribute list after @racket[em]. This is also benign.
+
+@section{Why not just use @exec{match}, @exec{quasiquote}, and so on?}
+
+If you prefer those, please do. But I've found two benefits to using module functions:
+
+@bold{Readability.} In code that already has a lot of matching and quasiquoting going on, these functions make it easy to see where & how txexprs are being used.
+
+@bold{Reliability.} Because txexprs come in two close but not quite equal forms, careful coders will always have to take both cases into account.
+
+The programming is trivial, but the annoyance is real.
+
+@section{Interface}
+
+@defmodule[txexpr]
+
+@deftogether[(
+@defproc[
+(txexpr?
+[v any/c])
+boolean?]
+
+@defproc[
+(txexpr-tag?
+[v any/c])
+boolean?]
+
+@defproc[
+(txexpr-attr?
+[v any/c])
+boolean?]
+
+@defproc[
+(txexpr-attr-key?
+[v any/c])
+boolean?]
+
+@defproc[
+(txexpr-attr-value?
+[v any/c])
+boolean?]
+
+@defproc[
+(txexpr-element?
+[v any/c])
+boolean?]
+
+)]
+Predicates for @racket[_txexpr]s that implement this grammar:
+
+@racketgrammar*[
+#:literals (cons list symbol? string? xexpr?)
+[txexpr (list tag (list attr ...) element ...)
+ (cons tag (list element ...))]
+[tag symbol?]
+[attr (list key value)]
+[key symbol?]
+[value string?]
+[element xexpr?]
+]
+
+@deftogether[(
+
+@defproc[
+(txexpr-attrs?
+[v any/c])
+boolean?]
+
+@defproc[
+(txexpr-elements?
+[v any/c])
+boolean?]
+)]
+Shorthand for @code{(listof txexpr-attr?)} and @code{(listof txexpr-element?)}.
+
+
+@deftogether[(
+
+@defproc[
+(can-be-txexpr-attr-key?
+[v any/c])
+boolean?]
+
+@defproc[
+(can-be-txexpr-attr-value?
+[v any/c])
+boolean?]
+)]
+Predicates for input arguments that are trivially converted to an attribute @racket[_key] or @racket[_value]…
+
+
+@deftogether[(
+
+@defproc[
+(->txexpr-attr-key
+[v can-be-txexpr-attr-key?])
+txexpr-attr-key?]
+
+@defproc[
+(->txexpr-attr-value
+[v can-be-txexpr-attr-value?])
+txexpr-attr-value?]
+)]
+… with these conversion functions.
+
+
+@defproc[
+(txexpr->values
+[tx txexpr?])
+(values txexpr-tag? txexpr-attrs? txexpr-elements?)]
+Dissolves a @racket[_txexpr] into its components and returns all three.
+
+@examples[#:eval my-eval
+(txexpr->values '(div))
+(txexpr->values '(div "Hello" (p "World")))
+(txexpr->values '(div [[id "top"]] "Hello" (p "World")))
+
+]
+
+@defproc[
+(txexpr->list
+[tx txexpr?])
+(list txexpr-tag?
+txexpr-attrs?
+txexpr-elements?)]
+Like @racket[txexpr->values], but returns the three components in a list.
+
+@examples[#:eval my-eval
+(txexpr->list '(div))
+(txexpr->list '(div "Hello" (p "World")))
+(txexpr->list '(div [[id "top"]] "Hello" (p "World")))
+]
+
+@deftogether[(
+@defproc[
+(get-tag
+[tx txexpr?])
+txexpr-tag?]
+
+@defproc[
+(get-attrs
+[tx txexpr?])
+txexpr-attr?]
+
+@defproc[
+(get-elements
+[tx txexpr?])
+(listof txexpr-element?)]
+)]
+Accessor functions for the individual pieces of a @racket[_txexpr].
+
+@examples[#:eval my-eval
+(get-tag '(div [[id "top"]] "Hello" (p "World")))
+(get-attrs '(div [[id "top"]] "Hello" (p "World")))
+(get-elements '(div [[id "top"]] "Hello" (p "World")))
+]
+
+@defproc[
+(make-txexpr
+[tag txexpr-tag?]
+[attrs txexpr-attrs? @empty]
+[elements txexpr-elements? @empty])
+txexpr?]
+Assemble a @racket[_txexpr] from its parts. If you don't have attributes, but you do have elements, you'll need to pass @racket[empty] as the second argument. Note that unlike @racket[xml->xexpr], if the attribute list is empty, it's not included in the resulting expression.
+
+@examples[#:eval my-eval
+(make-txexpr 'div)
+(make-txexpr 'div '() '("Hello" (p "World")))
+(make-txexpr 'div '[[id "top"]])
+(make-txexpr 'div '[[id "top"]] '("Hello" (p "World")))
+(define tx '(div [[id "top"]] "Hello" (p "World")))
+(make-txexpr (get-tag tx)
+(get-attrs tx) (get-elements tx))
+]
+
+@defproc[
+(can-be-txexpr-attrs?
+[v any/c])
+boolean?]
+Predicate for functions that handle @racket[_txexpr-attrs]. Covers values that are easily converted into pairs of @racket[_attr-key] and @racket[_attr-value]. Namely: single @racket[_xexpr-attr]s, lists of @racket[_xexpr-attr]s (i.e., what you get from @racket[get-attrs]), or interleaved symbols and strings (each pair will be concatenated into a single @racket[_xexpr-attr]).
+
+@deftogether[(
+@defproc[
+(attrs->hash [x can-be-txexpr-attrs?] ...)
+hash?]
+
+@defproc[
+(hash->attrs
+[h hash?])
+txexpr-attrs?]
+
+)]
+Convert @racket[_attrs] to an immutable hash, and back again.
+
+@examples[#:eval my-eval
+(define tx '(div [[id "top"][class "red"]] "Hello" (p "World")))
+(attrs->hash (get-attrs tx))
+(hash->attrs '#hash((class . "red") (id . "top")))
+]
+
+@defproc[
+(attr-ref
+[tx txexpr?]
+[key can-be-txexpr-attr-key?])
+txexpr-attr-value?]
+Given a @racket[_key], look up the corresponding @racket[_value] in the attributes of a @racket[_txexpr]. Asking for a nonexistent key produces an error.
+
+@examples[#:eval my-eval
+(attr-ref tx 'class)
+(attr-ref tx 'id)
+(attr-ref tx 'nonexistent-key)
+]
+
+
+@defproc[
+(attr-set
+[tx txexpr?]
+[key can-be-txexpr-attr-key?]
+[value txexpr-attr-value?])
+txexpr?]
+Given a @racket[_txexpr], set the value of attribute @racket[_key] to @racket[_value]. The function returns the updated @racket[_txexpr].
+
+@examples[#:eval my-eval
+(define tx '(div [[class "red"][id "top"]] "Hello" (p "World")))
+(attr-set tx 'id "bottom")
+(attr-set tx 'class "blue")
+(attr-set (attr-set tx 'id "bottom") 'class "blue")
+]
+
+
+@defproc[
+(merge-attrs
+[attrs (listof can-be-txexpr-attrs?)] ...)
+txexpr-attrs?]
+Combine a series of attributes into a single @racket[_txexpr-attrs] item. This function addresses three annoyances that surface in working with txexpr attributes.
+
+@itemlist[#:style 'ordered
+@item{You can pass the attributes in multiple forms. See @racket[can-be-txexpr-attrs?] for further details.}
+
+@item{Attributes with the same name are merged, with the later value taking precedence (i.e., @racket[hash] behavior). }
+
+@item{Attributes are sorted in alphabetical order.}]
+
+@examples[#:eval my-eval
+(define tx '(div [[id "top"][class "red"]] "Hello" (p "World")))
+(define tx-attrs (get-attrs tx))
+tx-attrs
+(merge-attrs tx-attrs 'editable "true")
+(merge-attrs tx-attrs 'id "override-value")
+(define my-attr '(id "another-override"))
+(merge-attrs tx-attrs my-attr)
+(merge-attrs my-attr tx-attrs)
+]
+
+@defproc[
+(remove-attrs
+[tx txexpr?])
+txexpr?]
+Recursively remove all attributes.
+
+@examples[#:eval my-eval
+(define tx '(div [[id "top"]] "Hello" (p [[id "lower"]] "World")))
+(remove-attrs tx)
+]
+
+@defproc[
+(map-elements
+[proc procedure?]
+[tx txexpr?])
+txexpr?]
+Recursively apply @racket[_proc] to all elements, leaving tags and attributes alone. Using plain @racket[map] will only process elements at the top level of the current @racket[_txexpr]. Usually that's not what you want.
+
+@examples[#:eval my-eval
+(define tx '(div "Hello!" (p "Welcome to" (strong "Mars"))))
+(define upcaser (λ(x) (if (string? x) (string-upcase x) x)))
+(map upcaser tx)
+(map-elements upcaser tx)
+]
+
+In practice, most @racket[_xexpr-element]s are strings. But woe befalls those who pass string procedures to @racket[map-elements], because an @racket[_xexpr-element] can be any kind of @racket[xexpr?], and an @racket[xexpr?] is not necessarily a string.
+
+@examples[#:eval my-eval
+(define tx '(p "Welcome to" (strong "Mars" amp "Sons")))
+(map-elements string-upcase tx)
+(define upcaser (λ(x) (if (string? x) (string-upcase x) x)))
+(map-elements upcaser tx)
+]
+
+@defproc[
+(map-elements/exclude
+[proc procedure?]
+[tx txexpr?]
+[exclude-test (txexpr? . -> . boolean?)])
+txexpr?]
+Like @racket[map-elements], but skips any @racket[_txexprs] that evaluate to @racket[#t] under @racket[_exclude-test]. The @racket[_exclude-test] gets a whole txexpr as input, so it can test any of its parts.
+
+@examples[#:eval my-eval
+(define tx '(div "Hello!" (p "Welcome to" (strong "Mars"))))
+(define upcaser (λ(x) (if (string? x) (string-upcase x) x)))
+(map-elements upcaser tx)
+(map-elements/exclude upcaser tx (λ(x) (equal? (get-tag x) 'strong)))
+]
+
+Be careful with the wider consequences of exclusion tests. When @racket[_exclude-test] is true, the @racket[_txexpr] is excluded, but so is everything underneath that @racket[_txexpr]. In other words, there is no way to re-include (un-exclude?) elements nested under an excluded element.
+
+@examples[#:eval my-eval
+(define tx '(div "Hello!" (p "Welcome to" (strong "Mars"))))
+(define upcaser (λ(x) (if (string? x) (string-upcase x) x)))
+(map-elements upcaser tx)
+(map-elements/exclude upcaser tx (λ(x) (equal? (get-tag x) 'p)))
+(map-elements/exclude upcaser tx (λ(x) (equal? (get-tag x) 'div)))
+]
+
+@section{License & source code}
+
+This module is licensed under the LGPL.
+
+Source repository at @link["http://github.com/mbutterick/txexpr"]{http://github.com/mbutterick/txexpr}. Suggestions & corrections welcome.
+
diff --git a/tests.rkt b/tests.rkt
index 600b086..94e2207 100644
--- a/tests.rkt
+++ b/tests.rkt
@@ -1,79 +1,113 @@
#lang racket/base
+(require (for-syntax racket/base))
(require "main.rkt" rackunit)
+
+(define-syntax (values->list stx)
+ (syntax-case stx ()
+ [(_ values-expr) #'(call-with-values (λ () values-expr) list)]))
+
(define empty '())
-;; helper for comparison of values
-;; normal function won't work for this. Has to be syntax-rule
-(define-syntax-rule (values->list vs)
- (call-with-values (λ() vs) list))
-
-(check-true (tagged-xexpr-attrs? '()))
-(check-true (tagged-xexpr-attrs? '((key "value"))))
-(check-true (tagged-xexpr-attrs? '((key "value") (foo "bar"))))
-(check-false (tagged-xexpr-attrs? '((key "value") "foo" "bar"))) ; content, not attr
-(check-false (tagged-xexpr-attrs? '(key "value"))) ; not a nested list
-(check-false (tagged-xexpr-attrs? '(("key" "value")))) ; two strings
-(check-false (tagged-xexpr-attrs? '((key value)))) ; two symbols
-
-(check-true (tagged-xexpr-elements? '("p" "foo" "123")))
-(check-true (tagged-xexpr-elements? '("p" "foo" 123))) ; includes number
-(check-true (tagged-xexpr-elements? '(p "foo" "123"))) ; includes symbol
-(check-false (tagged-xexpr-elements? "foo")) ; not a list
-(check-false (tagged-xexpr-elements? '(((key "value")) "foo" "bar"))) ; includes attr
-(check-false (tagged-xexpr-elements? '("foo" "bar" ((key "value"))))) ; malformed
-
-
-(check-true (tagged-xexpr? '(p "foo" "bar")))
-(check-true (tagged-xexpr? '(p ((key "value")) "foo" "bar")))
-(check-true (tagged-xexpr? '(p 123))) ; content is a number
-(check-false (tagged-xexpr? "foo")) ; not a list with symbol
-(check-false (tagged-xexpr? '(p "foo" "bar" ((key "value"))))) ; malformed
-(check-false (tagged-xexpr? '("p" "foo" "bar"))) ; no name
+(check-true (txexpr-tag?'foo))
+(check-false (txexpr-tag? "foo"))
+(check-false (txexpr-tag? 3))
-(check-equal? (merge-attrs 'foo "bar") '((foo "bar")))
-(check-equal? (merge-attrs '(foo "bar")) '((foo "bar")))
-(check-equal? (merge-attrs '((foo "bar"))) '((foo "bar")))
-(check-equal? (merge-attrs "foo" 'bar) '((foo "bar")))
-(check-equal? (merge-attrs "foo" "bar" "goo" "gar") '((foo "bar")(goo "gar")))
-(check-equal? (merge-attrs (merge-attrs "foo" "bar" "goo" "gar") "hee" "haw")
- '((foo "bar")(goo "gar")(hee "haw")))
-(check-equal? (merge-attrs '((foo "bar")(goo "gar")) "foo" "haw") '((foo "haw")(goo "gar")))
+(check-true (txexpr-attr? '(key "value")))
+(check-false (txexpr-attr? '(key "value" "another")))
+(check-false (txexpr-attr? '(key 0 "value")))
+
+(check-true (txexpr-attrs? '()))
+(check-true (txexpr-attrs? '((key "value"))))
+(check-true (txexpr-attrs? '((key "value") (foo "bar"))))
+(check-false (txexpr-attrs? '((key "value") "foo" "bar"))) ; content, not attr
+(check-false (txexpr-attrs? '(key "value"))) ; not a nested list
+(check-false (txexpr-attrs? '(("key" "value")))) ; two strings
+(check-false (txexpr-attrs? '((key value)))) ; two symbols
+
+(check-true (txexpr-element? "string"))
+(check-true (txexpr-element? 'amp))
+(check-true (txexpr-element? '(p "string")))
+(check-true (txexpr-element? 65)) ;; a valid-char
+(check-false (txexpr-element? 0)) ;; not a valid-char
+
+(check-true (txexpr-elements? '("p" "foo" "123")))
+(check-true (txexpr-elements? '("p" "foo" 123))) ; includes number
+(check-true (txexpr-elements? '(p "foo" "123"))) ; includes symbol
+(check-false (txexpr-elements? "foo")) ; not a list
+(check-false (txexpr-elements? '(((key "value")) "foo" "bar"))) ; includes attr
+(check-false (txexpr-elements? '("foo" "bar" ((key "value"))))) ; malformed
+(check-true (txexpr? '(p)))
+(check-true (txexpr? '(p "foo" "bar")))
+(check-true (txexpr? '(p ((key "value")) "foo" "bar")))
+(check-true (txexpr? '(p 123))) ; content is a number
+(check-false (txexpr? "foo")) ; not a list with symbol
+(check-false (txexpr? '(p "foo" "bar" ((key "value"))))) ; malformed
+(check-false (txexpr? '("p" "foo" "bar"))) ; no name
-(check-equal? (make-tagged-xexpr 'p) '(p))
-(check-equal? (make-tagged-xexpr 'p '((key "value"))) '(p ((key "value"))))
-(check-equal? (make-tagged-xexpr 'p empty '("foo" "bar")) '(p "foo" "bar"))
-(check-equal? (make-tagged-xexpr 'p '((key "value")) (list "foo" "bar"))
+(check-equal? (make-txexpr 'p) '(p))
+(check-equal? (make-txexpr 'p '((key "value"))) '(p ((key "value"))))
+(check-equal? (make-txexpr 'p empty '("foo" "bar")) '(p "foo" "bar"))
+(check-equal? (make-txexpr 'p '((key "value")) (list "foo" "bar"))
'(p ((key "value")) "foo" "bar"))
-(check-equal? (values->list (tagged-xexpr->values '(p)))
+(check-equal? (values->list (txexpr->values '(p)))
(values->list (values 'p empty empty)))
-(check-equal? (values->list (tagged-xexpr->values '(p "foo")))
+(check-equal? (values->list (txexpr->values '(p "foo")))
(values->list (values 'p empty '("foo"))))
-(check-equal? (values->list (tagged-xexpr->values '(p ((key "value")))))
+(check-equal? (values->list (txexpr->values '(p ((key "value")))))
(values->list (values 'p '((key "value")) empty)))
-(check-equal? (values->list (tagged-xexpr->values '(p ((key "value")) "foo")))
+(check-equal? (values->list (txexpr->values '(p ((key "value")) "foo")))
(values->list (values 'p '((key "value")) '("foo"))))
+(check-equal? (values->list (txexpr->values '(p)))
+ (txexpr->list '(p)))
+(check-equal? (values->list (txexpr->values '(p "foo")))
+ (txexpr->list '(p "foo")))
+(check-equal? (values->list (txexpr->values '(p ((key "value")))))
+ (txexpr->list '(p ((key "value")))))
+(check-equal? (values->list (txexpr->values '(p ((key "value")) "foo")))
+ (txexpr->list '(p ((key "value")) "foo")))
-(check-equal? (values->list (tagged-xexpr->values '(p)))
- (tagged-xexpr->list '(p)))
-(check-equal? (values->list (tagged-xexpr->values '(p "foo")))
- (tagged-xexpr->list '(p "foo")))
-(check-equal? (values->list (tagged-xexpr->values '(p ((key "value")))))
- (tagged-xexpr->list '(p ((key "value")))))
-(check-equal? (values->list (tagged-xexpr->values '(p ((key "value")) "foo")))
- (tagged-xexpr->list '(p ((key "value")) "foo")))
-
-(check-equal? (tagged-xexpr-tag '(p ((key "value"))"foo" "bar" (em "square"))) 'p)
-(check-equal? (tagged-xexpr-attrs '(p ((key "value"))"foo" "bar" (em "square"))) '((key "value")))
-(check-equal? (tagged-xexpr-elements '(p ((key "value"))"foo" "bar" (em "square")))
+(check-equal? (get-tag '(p ((key "value"))"foo" "bar" (em "square"))) 'p)
+(check-equal? (get-attrs '(p ((key "value"))"foo" "bar" (em "square"))) '((key "value")))
+(check-equal? (get-elements '(p ((key "value"))"foo" "bar" (em "square")))
'("foo" "bar" (em "square")))
+
+(check-equal? (->txexpr-attr-key "foo") 'foo)
+(check-equal? (->txexpr-attr-key 'foo) 'foo)
+
+(check-equal? (->txexpr-attr-value "foo") "foo")
+(check-equal? (->txexpr-attr-value 'foo) "foo")
+
+(check-equal? (attrs->hash '((foo "bar"))) '#hash((foo . "bar")))
+(check-equal? (attrs->hash '((foo "bar")) '(foo "fraw")) '#hash((foo . "fraw")))
+(check-equal? (attrs->hash '((foo "bar")) '(foo "fraw") 'foo "dog") '#hash((foo . "dog")))
+
+(check-equal? (hash->attrs '#hash((foo . "bar")(hee . "haw"))) '((foo "bar")(hee "haw")))
+
+(check-equal? (attr-ref '(p ((foo "bar"))) 'foo) "bar")
+(check-equal? (attr-set '(p ((foo "bar"))) 'foo "fraw") '(p ((foo "fraw"))))
+
+(check-equal? (merge-attrs 'foo "bar") '((foo "bar")))
+(check-equal? (merge-attrs '(foo "bar")) '((foo "bar")))
+(check-equal? (merge-attrs '((foo "bar"))) '((foo "bar")))
+(check-equal? (merge-attrs "foo" 'bar) '((foo "bar")))
+(check-equal? (merge-attrs "foo" "bar" "goo" "gar") '((foo "bar")(goo "gar")))
+(check-equal? (merge-attrs (merge-attrs "foo" "bar" "goo" "gar") "hee" "haw")
+ '((foo "bar")(goo "gar")(hee "haw")))
+(check-equal? (merge-attrs '((foo "bar")(goo "gar")) "foo" "haw") '((foo "haw")(goo "gar")))
+
+
(check-equal? (remove-attrs '(p ((foo "bar")) "hi")) '(p "hi"))
(check-equal? (remove-attrs '(p ((foo "bar")) "hi" (p ((foo "bar")) "hi"))) '(p "hi" (p "hi")))
+
(check-equal? (map-elements (λ(x) (if (string? x) "boing" x))
'(p "foo" "bar" (em "square")))
'(p "boing" "boing" (em "boing")))
+
+
+