You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
395 lines
14 KiB
Racket
395 lines
14 KiB
Racket
#lang racket/base
|
|
(require sugar/define sugar/coerce sugar/list racket/string racket/list racket/match xml rackunit)
|
|
(provide cdata? cdata valid-char? xexpr->string xexpr?) ; from xml
|
|
(provide empty) ; from racket/list
|
|
|
|
;; Section 2.2 of XML 1.1
|
|
;; (XML 1.0 is slightly different and more restrictive)
|
|
;; make private version of my-valid-char to get consistent results with Racket 6.0
|
|
(define (my-valid-char? i)
|
|
(and (exact-nonnegative-integer? i)
|
|
(or (<= #x1 i #xD7FF)
|
|
(<= #xE000 i #xFFFD)
|
|
(<= #x10000 i #x10FFFF))))
|
|
|
|
|
|
(define (my-xexpr? x)
|
|
(or (txexpr? x) (xexpr? x) (my-valid-char? x)))
|
|
|
|
|
|
(define+provide+safe (txexpr? x #:short-only? [short-only #f])
|
|
predicate/c
|
|
(define short-sym 'short)
|
|
(and (pair? x)
|
|
(txexpr-tag? (car x))
|
|
(let ([result (or (and (empty? (cdr x)) short-sym)
|
|
;; separate the my-xexpr? tail match from the rest.
|
|
;; as a recursive operation, it's potentially time-consuming.
|
|
(and (andmap my-xexpr? (cddr x))
|
|
(match (cadr x)
|
|
[(list (? txexpr-attr?) ...) #t]
|
|
[(? my-xexpr?) short-sym]
|
|
[else #f])))])
|
|
(and result (if short-only
|
|
(eq? result short-sym)
|
|
#t)))))
|
|
|
|
|
|
(define+provide+safe (txexpr-short? x)
|
|
predicate/c
|
|
(txexpr? x #:short-only? #t))
|
|
|
|
|
|
(define+provide+safe (txexpr-tag? x)
|
|
predicate/c
|
|
(symbol? x))
|
|
|
|
|
|
(define+provide+safe (txexpr-tags? x)
|
|
predicate/c
|
|
(and (list? x) (andmap txexpr-tag? x)))
|
|
|
|
|
|
(define+provide+safe (txexpr-attr? x)
|
|
predicate/c
|
|
(match x
|
|
[(list (? symbol?) (? string?)) #t]
|
|
[else #f]))
|
|
|
|
|
|
(define+provide+safe (txexpr-element? x)
|
|
predicate/c
|
|
(my-xexpr? x))
|
|
|
|
|
|
(define+provide+safe (txexpr-attr-key? x)
|
|
predicate/c
|
|
(symbol? x))
|
|
|
|
|
|
(define+provide+safe (can-be-txexpr-attr-key? x)
|
|
predicate/c
|
|
(or (symbol? x) (string? x)))
|
|
|
|
|
|
(define+provide+safe (txexpr-attr-value? x)
|
|
predicate/c
|
|
(string? x))
|
|
|
|
|
|
(define+provide+safe (can-be-txexpr-attr-value? x)
|
|
predicate/c
|
|
(or (symbol? x) (string? x)))
|
|
|
|
|
|
(define-syntax-rule (define-plural plural-id pred)
|
|
(define+provide+safe (plural-id x)
|
|
predicate/c
|
|
(and (list? x) (andmap pred x))))
|
|
|
|
(define-plural txexpr-attrs? txexpr-attr?)
|
|
(define-plural txexpr-elements? txexpr-element?)
|
|
(define-plural txexpr-attr-values? txexpr-attr-value?)
|
|
(define-plural list-of-can-be-txexpr-attrs? can-be-txexpr-attrs?)
|
|
|
|
|
|
(define+provide+safe (can-be-txexpr-attrs? x)
|
|
predicate/c
|
|
(ormap (λ(test) (test x)) (list txexpr-attr?
|
|
txexpr-attrs?
|
|
can-be-txexpr-attr-key?
|
|
can-be-txexpr-attr-value?)))
|
|
|
|
|
|
(define (validate-txexpr-attrs x #:context [txexpr-context #f])
|
|
(define (make-reason)
|
|
(if (not (list? x))
|
|
(format "because ~v is not a list" x)
|
|
(let ([bad-attrs (filter (λ(i) (not (txexpr-attr? i))) x)])
|
|
(format "because ~a ~a" (string-join (map (λ(ba) (format "~v" ba)) bad-attrs) " and ")
|
|
(if (> (length bad-attrs) 1)
|
|
"are not valid attributes"
|
|
"is not in the form '(symbol \"string\")")))))
|
|
(cond
|
|
[(and (list? x) (> (length x) 0) (andmap txexpr-attr? x)) x]
|
|
[else (error (string-append "validate-txexpr-attrs: "
|
|
(if txexpr-context (format "in ~v, " txexpr-context) "")
|
|
(format "~v is not a valid list of attributes ~a" x
|
|
(make-reason))))]))
|
|
|
|
|
|
(define (validate-txexpr-element x #:context [txexpr-context #f])
|
|
(cond
|
|
[(or (string? x) (txexpr? x) (symbol? x)
|
|
(valid-char? x) (cdata? x)) x]
|
|
[else (error (string-append "validate-txexpr-element: "
|
|
(if txexpr-context (format "in ~v, " txexpr-context) "")
|
|
(format "~v is not a valid element (must be txexpr, string, symbol, XML char, or cdata)" x)))]))
|
|
|
|
|
|
;; is it a named x-expression?
|
|
;; todo: rewrite this recurively so errors can be pinpointed (for debugging)
|
|
(define+provide+safe (validate-txexpr x)
|
|
(any/c . -> . txexpr?)
|
|
(define-syntax-rule (validate-txexpr-attrs-with-context e)
|
|
(validate-txexpr-attrs e #:context x))
|
|
(define-syntax-rule (validate-txexpr-element-with-context e)
|
|
(validate-txexpr-element e #:context x))
|
|
(cond
|
|
[(txexpr-short? x) x]
|
|
[(txexpr? x) (and
|
|
(validate-txexpr-attrs-with-context (get-attrs x))
|
|
(andmap (λ(e) (validate-txexpr-element-with-context e)) (get-elements x)) x)]
|
|
[(and (list? x) (symbol? (car x)))
|
|
(and
|
|
(andmap (λ(e) (validate-txexpr-element-with-context e)) (get-elements x))
|
|
(validate-txexpr-attrs-with-context (get-attrs x)))]
|
|
[(list? x) (error 'validate-txexpr (format "~v is a list but it doesn't start with a symbol" x))]
|
|
[else (error 'validate-txexpr (format "~v: not an X-expression" x))]))
|
|
|
|
|
|
(define+provide+safe (txexpr tag [attrs null] [elements null])
|
|
((symbol?) (txexpr-attrs? txexpr-elements?) . ->* . txexpr?)
|
|
(define result (cons tag (append (if (empty? attrs) empty (list attrs)) elements)))
|
|
(if (txexpr? result)
|
|
result
|
|
(error 'txexpr
|
|
(cond
|
|
[(not (txexpr-tag? tag))
|
|
(format "This is not a txexpr-tag: ~v" tag)]
|
|
[(not (txexpr-attrs? attrs))
|
|
(format "This is not a list of txexpr-attrs: ~v" attrs)]
|
|
[(not (txexpr-elements? elements))
|
|
(format "This is not a list of txexpr-elements: ~v" elements)]
|
|
[else ""]))))
|
|
|
|
|
|
(define make-txexpr txexpr) ; for backward compatability
|
|
(provide+safe make-txexpr)
|
|
|
|
|
|
(define+provide+safe (txexpr->values x)
|
|
(txexpr? . -> . (values symbol? txexpr-attrs? txexpr-elements?))
|
|
(if (txexpr-short? x)
|
|
(values (car x) '() (cdr x))
|
|
(values (car x) (cadr x) (cddr x))))
|
|
|
|
|
|
(define+provide+safe (txexpr->list x)
|
|
(txexpr? . -> . list?)
|
|
(define-values (tag attrs content) (txexpr->values x))
|
|
(list tag attrs content))
|
|
|
|
|
|
;; convenience functions to retrieve only one part of txexpr
|
|
(define+provide+safe (get-tag x)
|
|
(txexpr? . -> . txexpr-tag?)
|
|
(car x))
|
|
|
|
|
|
(define+provide+safe (get-attrs x)
|
|
(txexpr? . -> . txexpr-attrs?)
|
|
(define-values (tag attrs content) (txexpr->values x))
|
|
attrs)
|
|
|
|
|
|
(define+provide+safe (get-elements x)
|
|
(txexpr? . -> . txexpr-elements?)
|
|
(define-values (tag attrs elements) (txexpr->values x))
|
|
elements)
|
|
|
|
|
|
;; helpers. we are getting a string or symbol
|
|
(define+provide+safe (->txexpr-attr-key x)
|
|
(can-be-txexpr-attr-key? . -> . txexpr-attr-key?)
|
|
(->symbol x))
|
|
|
|
|
|
(define+provide+safe (->txexpr-attr-value x)
|
|
(can-be-txexpr-attr-value? . -> . txexpr-attr-value?)
|
|
(->string x))
|
|
|
|
(define identity (λ (x) x))
|
|
(define+provide+safe (attrs->hash #:hash-style? [hash-style-priority #f] . items-in)
|
|
(() (#:hash-style? boolean?) #:rest (listof can-be-txexpr-attrs?) . ->* . hash-eq?)
|
|
;; can be liberal with input because they're all just nested key/value pairs
|
|
;; but still need this function to make sure that 'foo and "foo" are treated as the same hash key
|
|
(define items (flatten items-in))
|
|
(unless (even? (length items))
|
|
(raise-argument-error 'attrs->hash "even number of arguments" items-in))
|
|
;; hasheq loop will overwrite earlier values with later.
|
|
;; but earlier attributes need priority (see https://www.w3.org/TR/xml/#attdecls)
|
|
;; thus reverse the pairs.
|
|
;; priority-inverted will defeat this assumption, and allow later attributes to overwrite earlier.
|
|
(for/hasheq ([sublist (in-list ((if hash-style-priority
|
|
identity
|
|
reverse) (slice-at items 2)))])
|
|
(let ([key (->txexpr-attr-key (first sublist))]
|
|
[value (->txexpr-attr-value (second sublist))])
|
|
(values key value))))
|
|
|
|
|
|
(define+provide+safe (hash->attrs attr-hash)
|
|
(hash? . -> . txexpr-attrs?)
|
|
(map flatten (hash->list attr-hash)))
|
|
|
|
|
|
(define+provide+safe (attrs-have-key? x key)
|
|
((or/c txexpr-attrs? txexpr?) can-be-txexpr-attr-key? . -> . boolean?)
|
|
(define attrs (if (txexpr-attrs? x) x (get-attrs x)))
|
|
(and (assq (->txexpr-attr-key key) attrs) #t))
|
|
|
|
|
|
(define+provide+safe (attr-set tx key value)
|
|
(txexpr? can-be-txexpr-attr-key? can-be-txexpr-attr-value? . -> . txexpr?)
|
|
(attr-set* tx key value))
|
|
|
|
(define+provide+safe (attr-set* tx . kvs)
|
|
((txexpr?) #:rest (listof (or/c can-be-txexpr-attr-key? can-be-txexpr-attr-value?)) . ->* . txexpr?)
|
|
;; unlike others, this uses hash operations to guarantee that your attr-set
|
|
;; is the only one remaining.
|
|
(unless (even? (length kvs))
|
|
(raise-argument-error 'attr-set* "even number of arguments" kvs))
|
|
(define new-attrs
|
|
(hash->attrs
|
|
(apply hash-set* (attrs->hash (get-attrs tx))
|
|
(append-map (λ(sublist)
|
|
(list (->txexpr-attr-key (first sublist))
|
|
(->txexpr-attr-value (second sublist)))) (slice-at kvs 2)))))
|
|
(txexpr (get-tag tx) new-attrs (get-elements tx)))
|
|
|
|
|
|
|
|
(define+provide+safe (attr-join tx key value)
|
|
(txexpr? can-be-txexpr-attr-key? can-be-txexpr-attr-value? . -> . txexpr?)
|
|
(define starting-values (string-split (if (attrs-have-key? tx key)
|
|
(attr-ref tx key)
|
|
"")))
|
|
(attr-set tx key (string-join `(,@starting-values ,value) " ")))
|
|
|
|
|
|
|
|
(define+provide+safe (attr-ref tx key [failure-result (λ _ (raise (make-exn:fail:contract (format "attr-ref: no value found for key ~v" key) (current-continuation-marks))))])
|
|
((txexpr? can-be-txexpr-attr-key?) (any/c) . ->* . any)
|
|
(define result (assq (->txexpr-attr-key key) (get-attrs tx)))
|
|
(if result
|
|
(second result)
|
|
(if (procedure? failure-result)
|
|
(failure-result)
|
|
failure-result)))
|
|
|
|
|
|
|
|
(define+provide+safe (remove-attrs x)
|
|
(txexpr? . -> . txexpr?)
|
|
(let loop ([x x])
|
|
(if (txexpr? x)
|
|
(let-values ([(tag attr elements) (txexpr->values x)])
|
|
(txexpr tag null (map loop elements)))
|
|
x)))
|
|
|
|
|
|
(define (map-elements/exclude proc x exclude-test)
|
|
(procedure? txexpr? procedure? . -> . txexpr?)
|
|
(cond
|
|
[(txexpr? x)
|
|
(if (exclude-test x)
|
|
x
|
|
(let-values ([(tag attr elements) (txexpr->values x)])
|
|
(txexpr tag attr
|
|
(map (λ(x)(map-elements/exclude proc x exclude-test)) elements))))]
|
|
;; externally the function only accepts txexpr,
|
|
;; but internally we don't care
|
|
[else (proc x)]))
|
|
|
|
|
|
(define+provide+safe (map-elements proc x)
|
|
(procedure? txexpr? . -> . txexpr?)
|
|
(map-elements/exclude proc x (λ _ #f)))
|
|
|
|
|
|
;; function to split tag out of txexpr
|
|
(define deleted-signal (gensym))
|
|
(define+provide+safe (splitf-txexpr tx pred [proc (λ(x) deleted-signal)])
|
|
((txexpr? procedure?) (procedure?) . ->* . (values txexpr? txexpr-elements?))
|
|
(define matches null)
|
|
(define (do-extraction x)
|
|
(cond
|
|
[(pred x) (begin ; store matched item and return processed value
|
|
(set! matches (cons x matches))
|
|
(proc x))]
|
|
[(txexpr? x) (let-values([(tag attr elements) (txexpr->values x)])
|
|
(txexpr tag attr (filter (λ(e) (not (equal? e deleted-signal)))
|
|
(map do-extraction elements))))]
|
|
[else x]))
|
|
(define tx-extracted (do-extraction tx)) ;; do this first to fill matches
|
|
(unless (txexpr? tx-extracted)
|
|
(error 'splitf-txexpr "Bad input"))
|
|
(values tx-extracted (reverse matches)))
|
|
|
|
|
|
(define+provide+safe (findf*-txexpr tx pred)
|
|
(txexpr? procedure? . -> . (or/c #f txexpr-elements?))
|
|
(define-values (_ matches) (splitf-txexpr tx pred))
|
|
(and (pair? matches) matches))
|
|
|
|
|
|
(define+provide+safe (findf-txexpr tx pred)
|
|
(txexpr? procedure? . -> . (or/c #f txexpr-element?))
|
|
(define matches (findf*-txexpr tx pred))
|
|
(and matches (car matches)))
|
|
|
|
|
|
(define+provide+safe (xexpr->html x)
|
|
(xexpr? . -> . string?)
|
|
(define (->cdata x)
|
|
(cond
|
|
[(cdata? x) x]
|
|
; don't use "![CDATA[...]]" wrapper in HTML, it's not consistent with the spec
|
|
[(string? x) (cdata #f #f x)]
|
|
[else x]))
|
|
(xexpr->string (let loop ([x x])
|
|
(cond
|
|
[(txexpr? x) (if (member (get-tag x) '(script style))
|
|
(txexpr (get-tag x) (get-attrs x)
|
|
(map ->cdata (get-elements x)))
|
|
(txexpr (get-tag x) (get-attrs x)
|
|
(map loop (get-elements x))))]
|
|
[else x]))))
|
|
|
|
|
|
(define (txexprs-equal? tx1 tx2)
|
|
;; txexprs are deemed equal if they differ only in the ordering of attributes.
|
|
;; therefore, to check them, 1) sort their attributes, 2) straight list comparison.
|
|
;; use letrec because `define-simple-check` wants an expression in <=6.2
|
|
|
|
;; `stringify-attr` is needed because comparing attr keys won't work if there are two attrs with same key.
|
|
;; so the whole attr is converted into a single string for sorting, which lets the attr value act as a tiebreaker.
|
|
;; it doesn't matter that this sort may not be correct (in the sense of a desirable ordering)
|
|
;; it just needs to be stable (e.g., a certain set of attrs will always sort the same way)
|
|
(letrec ([stringify-attr (λ(attr) (string-append (symbol->string (car attr)) (cadr attr)))]
|
|
[sort-attrs (λ(x)
|
|
(if (txexpr? x)
|
|
(let-values ([(tag attr elements) (txexpr->values x)])
|
|
(txexpr tag (sort attr #:key stringify-attr #:cache-keys? #t string<?) (map sort-attrs elements)))
|
|
x))])
|
|
(equal? (sort-attrs tx1) (sort-attrs tx2))))
|
|
|
|
|
|
(define+provide+safe (attrs-equal? x1 x2)
|
|
((or/c txexpr-attrs? txexpr?) (or/c txexpr-attrs? txexpr?) . -> . boolean?)
|
|
(define attrs-tx1 (if (txexpr-attrs? x1) x1 (get-attrs x1)))
|
|
(define attrs-tx2 (if (txexpr-attrs? x2) x2 (get-attrs x2)))
|
|
(txexprs-equal? `(div ,attrs-tx1) `(div ,attrs-tx2)))
|
|
|
|
|
|
(provide+safe check-txexprs-equal?)
|
|
(define-simple-check (check-txexprs-equal? tx1 tx2)
|
|
(txexprs-equal? tx1 tx2))
|
|
|
|
(module+ test
|
|
(check-txexprs-equal? '(p ((b "foo")(a "bar")) (span ((d "foo")(c "bar"))))
|
|
'(p ((a "bar")(b "foo")) (span ((c "bar")(d "foo")))))
|
|
;; two attrs with same key
|
|
(check-txexprs-equal? '(p ((a "foo")(a "bar")))
|
|
'(p ((a "bar")(a "foo"))))) |