v6.0-exception
Matthew Butterick 6 years ago
parent b137554658
commit ed1ce5ba4d

@ -1,9 +1,8 @@
#lang racket/base
(require sugar/define sugar/coerce racket/string racket/list xml)
(require racket/match racket/function racket/format sugar/define sugar/list sugar/coerce racket/string racket/list xml)
(provide cdata? cdata valid-char? xexpr->string xexpr?) ; from xml
(provide empty) ; from racket/list
;; Section 2.2 of XML 1.1
;; (XML 1.0 is slightly different and more restrictive)
;; make private version of my-valid-char to get consistent results with Racket 6.0
@ -13,76 +12,56 @@
(<= #xE000 i #xFFFD)
(<= #x10000 i #x10FFFF))))
(define (my-xexpr? x)
(or (txexpr? x) (xexpr? x) (my-valid-char? x)))
((disjoin txexpr? xexpr? my-valid-char?) x))
(define+provide+safe (txexpr? x [short-only #f])
predicate/c
(and (pair? x)
(txexpr-tag? (car x))
(let ([result (or (and (empty? (cdr x)) 'short)
;; separate the my-xexpr? tail match from the rest.
;; as a recursive operation, it's potentially time-consuming.
(and (andmap my-xexpr? (cddr x))
(cond
[(txexpr-attrs? (cadr x)) #t]
[(my-xexpr? (cadr x)) 'short]
[else #f])))])
(and result (if short-only
(eq? result 'short)
#t)))))
(match x
[(list (? txexpr-tag?) (? my-xexpr?) ...) #true]
[(list (? txexpr-tag?) (? txexpr-attrs?) (? my-xexpr?) ...) #:when (not short-only) #true]
[_ #false]))
(define+provide+safe (txexpr-short? x)
predicate/c
(txexpr? x 'short-only))
(define+provide+safe (txexpr-tag? x)
predicate/c
(symbol? x))
(define+provide+safe (txexpr-attr? x)
predicate/c
(and (list? x)
(= 2 (length x))
(txexpr-attr-key? (first x))
(txexpr-attr-value? (second x))))
(match x
[(list (? txexpr-attr-key?) (? txexpr-attr-value?)) #true]
[_ #false]))
(define+provide+safe (txexpr-element? x)
predicate/c
(my-xexpr? x))
(define+provide+safe (txexpr-attr-key? x)
predicate/c
(symbol? x))
(define+provide+safe (can-be-txexpr-attr-key? x)
predicate/c
(symbolish? x))
(define+provide+safe (txexpr-attr-value? x)
predicate/c
(string? x))
(define+provide+safe (can-be-txexpr-attr-value? x)
predicate/c
(stringish? x))
(define-syntax-rule (define-plural plural-id pred)
(define+provide+safe (plural-id x)
predicate/c
(and (list? x) (andmap pred x))))
(match x
[(list (? pred) (... ...)) #true]
[_ #false])))
(define-plural txexpr-tags? txexpr-tag?)
(define-plural txexpr-attrs? txexpr-attr?)
@ -90,64 +69,42 @@
(define-plural txexpr-attr-values? txexpr-attr-value?)
(define-plural list-of-can-be-txexpr-attrs? can-be-txexpr-attrs?)
(define+provide+safe (can-be-txexpr-attrs? x)
predicate/c
(for/or ([test (in-list (list txexpr-attr?
txexpr-attrs?
can-be-txexpr-attr-key?
can-be-txexpr-attr-value?))])
(test x)))
((disjoin txexpr-attr? txexpr-attrs? can-be-txexpr-attr-key? can-be-txexpr-attr-value?) x))
(define (validate-txexpr-attrs x #:context [txexpr-context #f])
(define (make-reason)
(if (not (list? x))
(format "because ~v is not a list" x)
(let ([bad-attrs (filter (λ (i) (not (txexpr-attr? i))) x)])
(format "because ~a ~a" (string-join (map (λ (ba) (format "~v" ba)) bad-attrs) " and ")
(if (> (length bad-attrs) 1)
"are not valid attributes"
"is not in the form '(symbol \"string\")")))))
(cond
[(and (list? x) (positive? (length x)) (andmap txexpr-attr? x)) x]
[else (error (string-append "validate-txexpr-attrs: "
(if txexpr-context (format "in ~v, " txexpr-context) "")
(format "~v is not a valid list of attributes ~a" x
(make-reason))))]))
(match x
[(list (? txexpr-attr? x) ...) x]
[_ (raise-argument-error 'validate-txexpr-attrs
(string-append
(if txexpr-context (format "in ~v, " txexpr-context) "")
(format "list of attributes, each in the form '(symbol \"string\")")) x)]))
(define (validate-txexpr-element x #:context [txexpr-context #f])
(cond
[(or (string? x) (txexpr? x) (symbol? x)
(valid-char? x) (cdata? x)) x]
[else (error (string-append "validate-txexpr-element: "
(if txexpr-context (format "in ~v, " txexpr-context) "")
(format "~v is not a valid element (must be txexpr, string, symbol, XML char, or cdata)" x)))]))
(match x
[(? (disjoin string? txexpr? symbol? valid-char? cdata?)) x]
[_ (raise-argument-error 'validate-txexpr-element
(string-append
(if txexpr-context (format "in ~v, " txexpr-context) "")
"valid element (= txexpr, string, symbol, XML char, or cdata)") x)]))
;; is it a named x-expression?
;; todo: rewrite this recurively so errors can be pinpointed (for debugging)
;; todo: rewrite this recursively so errors can be pinpointed (for debugging)
(define+provide+safe (validate-txexpr x)
(any/c . -> . txexpr?)
(cond
[(txexpr-short? x) x]
[(txexpr? x) (and
(validate-txexpr-attrs (get-attrs x) #:context x)
(andmap (λ (e) (validate-txexpr-element e #:context x)) (get-elements x)) x)]
[(and (list? x) (symbol? (car x)))
(match x
[(cons (? txexpr-tag?) _)
(and
(andmap (λ (e) (validate-txexpr-element e #:context x)) (get-elements x))
(validate-txexpr-attrs (get-attrs x) #:context x))]
[(list? x) (error 'validate-txexpr (format "~v is a list but it doesn't start with a symbol" x))]
[else (error 'validate-txexpr (format "~v: not an X-expression" x))]))
(validate-txexpr-attrs (get-attrs x) #:context x)
(andmap (λ (e) (validate-txexpr-element e #:context x)) (get-elements x)) x)]
[_ (raise-argument-error 'validate-txexpr "valid X-expression" x)]))
(define (txexpr-unsafe tag attrs elements)
(cons tag (if (empty? attrs)
elements
(cons attrs elements))))
(cons tag (match attrs
[(== empty) elements]
[_ (cons attrs elements)])))
(define (txexpr-base func-name tag attrs elements)
(unless (txexpr-tag? tag)
@ -158,51 +115,42 @@
(raise-argument-error func-name "txexpr-elements?" elements))
(txexpr-unsafe tag attrs elements))
(define+provide+safe (txexpr tag [attrs null] [elements null])
((txexpr-tag?) (txexpr-attrs? txexpr-elements?) . ->* . txexpr?)
(txexpr-base 'txexpr tag attrs elements))
(define+provide+safe (txexpr* tag [attrs null] . elements)
((txexpr-tag?) (txexpr-attrs?) #:rest txexpr-elements? . ->* . txexpr?)
(txexpr-base 'txexpr* tag attrs elements))
(define make-txexpr txexpr) ; for backward compatability
(provide+safe make-txexpr)
(define+provide+safe (txexpr->values x)
(txexpr? . -> . (values txexpr-tag? txexpr-attrs? txexpr-elements?))
(if (txexpr-short? x)
(values (car x) '() (cdr x))
(values (car x) (cadr x) (cddr x))))
(match x
[(? txexpr-short?) (values (car x) '() (cdr x))]
[_ (values (car x) (cadr x) (cddr x))]))
(define+provide+safe (txexpr->list x)
(txexpr? . -> . list?)
(call-with-values (λ () (txexpr->values x)) list))
;; convenience functions to retrieve only one part of txexpr
(define+provide+safe (get-tag x)
(txexpr? . -> . txexpr-tag?)
(car x))
(define+provide+safe (get-attrs x)
(txexpr? . -> . txexpr-attrs?)
(define-values (tag attrs elements) (txexpr->values x))
attrs)
(define+provide+safe (get-elements x)
(txexpr? . -> . txexpr-elements?)
(define-values (tag attrs elements) (txexpr->values x))
elements)
;; helpers. we are getting a string or symbol
(define+provide+safe (->txexpr-attr-key x)
(can-be-txexpr-attr-key? . -> . txexpr-attr-key?)
@ -210,15 +158,16 @@
(raise-argument-error '->txexpr-attr-key "can-be-txexpr-attr-key?" x))
(->symbol x))
(define+provide+safe (->txexpr-attr-value x)
(can-be-txexpr-attr-value? . -> . txexpr-attr-value?)
(unless (can-be-txexpr-attr-value? x)
(raise-argument-error '->txexpr-attr-value "can-be-txexpr-attr-value?" x))
(->string x))
(define (sublist->attr sublist)
(match sublist
[(list key value) (list (->txexpr-attr-key key) (->txexpr-attr-value value))]))
(define identity (λ (x) x))
(define+provide+safe (attrs->hash #:hash-style? [hash-style-priority #f] . items-in)
(() (#:hash-style? boolean?) #:rest (listof can-be-txexpr-attrs?) . ->* . hash-eq?)
;; can be liberal with input because they're all just nested key/value pairs
@ -231,88 +180,66 @@
;; thus reverse the pairs.
;; priority-inverted will defeat this assumption, and allow later attributes to overwrite earlier.
(for/hasheq ([sublist (in-list ((if hash-style-priority
identity
reverse) (for/list (#:when (pair? items)
[(k ki) (in-indexed items)]
[v (in-list (cdr items))]
#:when (even? ki))
(list k v))))])
(let ([key (first sublist)]
[value (second sublist)])
(values (->txexpr-attr-key key) (->txexpr-attr-value value)))))
values
reverse) (slice-at items 2)))])
(apply values (sublist->attr sublist))))
(define+provide+safe (hash->attrs attr-hash)
(hash? . -> . txexpr-attrs?)
(map flatten (hash->list attr-hash)))
(match (hash->list attr-hash)
[(list (cons ks vs) ...) (map list ks vs)]))
(define+provide+safe (attrs-have-key? x key)
((or/c txexpr-attrs? txexpr?) can-be-txexpr-attr-key? . -> . boolean?)
(define attrs (if (txexpr-attrs? x) x (get-attrs x)))
(and (assq (->txexpr-attr-key key) attrs) #t))
(match x
[(? txexpr? tx) (and (attr-ref tx key #false) #true)]
[_ (attrs-have-key? (txexpr-unsafe '_ x null) key)]))
(define+provide+safe (attr-set tx key value)
(txexpr? can-be-txexpr-attr-key? can-be-txexpr-attr-value? . -> . txexpr?)
(attr-set* tx key value))
(define+provide+safe (attr-set* tx . kvs)
((txexpr?) #:rest (listof (or/c can-be-txexpr-attr-key? can-be-txexpr-attr-value?)) . ->* . txexpr?)
;; unlike others, this uses hash operations to guarantee that your attr-set
;; is the only one remaining.
(unless (even? (length kvs))
(raise-argument-error 'attr-set* "even number of arguments" kvs))
(define new-attrs
(hash->attrs
(apply hash-set* (attrs->hash (get-attrs tx))
(append-map (λ (sublist)
(list (->txexpr-attr-key (first sublist))
(->txexpr-attr-value (second sublist))))
(let ([items kvs])
(for/list (#:when (pair? items)
[(k ki) (in-indexed items)]
[v (in-list (cdr items))]
#:when (even? ki))
(list k v)))))))
(define new-attrs (hash->attrs
(apply hash-set* (attrs->hash (get-attrs tx))
(append-map sublist->attr (slice-at kvs 2)))))
(txexpr-base 'attr-set* (get-tag tx) new-attrs (get-elements tx)))
(define+provide+safe (attr-join tx key value)
(txexpr? can-be-txexpr-attr-key? can-be-txexpr-attr-value? . -> . txexpr?)
(define starting-values (string-split (if (attrs-have-key? tx key)
(attr-ref tx key)
"")))
(define starting-values (string-split (attr-ref tx key "")))
(attr-set tx key (string-join (append starting-values (list value)) " ")))
(define+provide+safe (attr-ref tx key [failure-result (λ _ (raise (make-exn:fail:contract (format "attr-ref: no value found for key ~v" key) (current-continuation-marks))))])
(define no-failure-result (gensym)) ; failure-result might be #false
(define+provide+safe (attr-ref tx key [failure-result no-failure-result])
((txexpr? can-be-txexpr-attr-key?) (any/c) . ->* . any)
(define result (assq (->txexpr-attr-key key) (get-attrs tx)))
(if result
(second result)
(if (procedure? failure-result)
(failure-result)
failure-result)))
(match (assq (->txexpr-attr-key key) (get-attrs tx))
[(list _ value) value]
[_ (match failure-result
[(? procedure?) (failure-result)]
[(== no-failure-result) (raise-argument-error 'attr-ref "key that exists in attr list" key)]
[_ failure-result])]))
(define+provide+safe (remove-attrs x)
(txexpr? . -> . txexpr?)
(let loop ([x x])
(if (txexpr? x)
(let-values ([(tag attrs elements) (txexpr->values x)])
(cons tag (map loop elements)))
(txexpr-unsafe tag null (map loop elements)))
x)))
(define+provide+safe (map-elements proc x)
(procedure? txexpr? . -> . txexpr?)
(proc (if (txexpr? x)
(let-values ([(tag attrs elements) (txexpr->values x)])
(txexpr-unsafe tag attrs (map (λ (e)(map-elements proc e)) elements)))
x)))
(let loop ([x x])
(proc (if (txexpr? x)
(let-values ([(tag attrs elements) (txexpr->values x)])
(txexpr-unsafe tag attrs (map loop elements)))
x))))
;; function to split tag out of txexpr
(define deleted-signal (gensym))
@ -322,35 +249,29 @@
(raise-argument-error 'splitf-txexpr "txexpr?" tx))
(define matches null)
(define (extract! x)
(cond
[(pred x) ;; store matched item and return processed value
(match x
[(? pred) ;; store matched item and return processed value
(set! matches (cons x matches))
(proc x)]
[(txexpr? x) (let-values([(tag attrs elements) (txexpr->values x)])
(txexpr tag attrs (filter (λ (e) (not (eq? e deleted-signal)))
(map extract! elements))))]
[else x]))
[(? txexpr?) (let-values ([(tag attrs elements) (txexpr->values x)])
(txexpr-unsafe tag attrs (filter-not (λ (e) (eq? e deleted-signal))
(map extract! elements))))]
[_ x]))
(define tx-extracted (extract! tx)) ;; do this first to fill matches
(values tx-extracted (reverse matches)))
(define+provide+safe (findf*-txexpr tx pred)
(txexpr? procedure? . -> . (or/c #f txexpr-elements?))
(define-values (_ matches) (splitf-txexpr tx pred))
(and (pair? matches) matches))
(define+provide+safe (findf-txexpr tx pred)
(txexpr? procedure? . -> . (or/c #f txexpr-element?))
(define matches (findf*-txexpr tx pred))
(and matches (car matches)))
;; don't use "![CDATA[...]]" wrapper in HTML, it's not consistent with the spec
(define (->cdata x)
(if (string? x)
(cdata #f #f x)
x))
(define (->cdata x) (if (string? x) (cdata #f #f x) x))
;; but treat CDATA strings correctly anyhow, because that's friendly
(define (cdata-string? x)
@ -360,13 +281,13 @@
(xexpr? . -> . string?)
(xexpr->string
(let loop ([x x])
(cond
[(txexpr? x)
(match x
[(? txexpr?)
(let*-values ([(tag attrs elements) (txexpr->values x)]
[(proc) (if (memq tag '(script style))
->cdata
loop)])
;; a little faster than `txexpr` since we know the pieces are valid
(txexpr-unsafe tag attrs (map proc elements)))]
[(cdata-string? x) (->cdata x)]
[else x]))))
[(? cdata-string?) (->cdata x)]
[_ x]))))

@ -1,5 +1,16 @@
#lang racket/base
(require sugar/define "base.rkt" rackunit)
(require sugar/define racket/match "base.rkt" rackunit)
(define (stringify-attr attr)
(match attr
[(list key val) (string-append (symbol->string key) val)]))
(define (sort-attrs x)
(match x
[(? txexpr?)
(let-values ([(tag attr elements) (txexpr->values x)])
(txexpr tag (sort attr #:key stringify-attr #:cache-keys? #t string<?) (map sort-attrs elements)))]
[_ x]))
(define (txexprs-equal? tx1 tx2)
;; txexprs are deemed equal if they differ only in the ordering of attributes.
@ -10,21 +21,11 @@
;; so the whole attr is converted into a single string for sorting, which lets the attr value act as a tiebreaker.
;; it doesn't matter that this sort may not be correct (in the sense of a desirable ordering)
;; it just needs to be stable (e.g., a certain set of attrs will always sort the same way)
(letrec ([stringify-attr (λ (attr) (string-append (symbol->string (car attr)) (cadr attr)))]
[sort-attrs (λ (x)
(if (txexpr? x)
(let-values ([(tag attr elements) (txexpr->values x)])
(txexpr tag (sort attr #:key stringify-attr #:cache-keys? #t string<?) (map sort-attrs elements)))
x))])
(equal? (sort-attrs tx1) (sort-attrs tx2))))
(equal? (sort-attrs tx1) (sort-attrs tx2)))
(define+provide+safe (attrs-equal? x1 x2)
((or/c txexpr-attrs? txexpr?) (or/c txexpr-attrs? txexpr?) . -> . boolean?)
(define attrs-tx1 (if (txexpr-attrs? x1) x1 (get-attrs x1)))
(define attrs-tx2 (if (txexpr-attrs? x2) x2 (get-attrs x2)))
(txexprs-equal? `(div ,attrs-tx1) `(div ,attrs-tx2)))
(apply txexprs-equal? (map (λ (x) `(_ ,(if (txexpr-attrs? x) x (get-attrs x)))) (list x1 x2))))
(provide+safe check-txexprs-equal?)
(define-simple-check (check-txexprs-equal? tx1 tx2)

Loading…
Cancel
Save