pull/9/head
Matthew Butterick 12 years ago
parent b9439d96fc
commit 539878f61d

@ -3,7 +3,6 @@
(require racket/string)
(require racket/format)
(require "readability.rkt")
(provide (all-defined-out))

@ -2,10 +2,12 @@
(require racket/contract)
(require racket/list)
(require racket/string)
(require (only-in racket/format ~a))
(require (only-in racket/bool nor))
(require (only-in xml xexpr/c))
(module+ test (require rackunit))
(require "tools.rkt")
(require "tools.rkt" "library/html.rkt")
(provide (all-defined-out))
;; Find adjacent newline characters in a list and merge them into one item
@ -41,18 +43,35 @@
'(p "\n" "foo" "\n\n" "bar" (em "\n\n\n"))))
; Mon Aug 5: start here
;; is the named-xexpr a block element (as opposed to inline)
(define/contract (block-xexpr? nx)
(named-xexpr? . -> . boolean?)
;; this is a change in behavior since first pollen
;; blocks are only the ones on the html block tag list.
;; todo: make sure this is what I want.
;; this is, however, more consistent with browser behavior
;; (browsers assume that tags are inline by default)
(as-boolean (in block-tags (named-xexpr-name nx))))
(module+ test
(check-true (block-xexpr? '(p "foo")))
(check-true (block-xexpr? '(div "foo")))
(check-false (block-xexpr? '(em "foo")))
(check-false (block-xexpr? '(barfoo "foo"))))
; A block-xexpr is a named expression that's not on the inline list
; todo: bear in mind that browsers take the opposite view:
; that only elements on the block list are blocks
; and otherwise are treated as inline
(define (block-xexpr? x)
(and (named-xexpr? x) (not (in? inline-tags (car x)))))
;; start here Tues 6
;; default content decoder for pollen
(define/contract (decode nx)
;; use xexpr/c for contact because it gives better error messages
(xexpr/c . -> . named-xexpr?)
nx
)
;(decode `(p ((key "value")) ,decode))
; default content decoder for pollen
#|
;; default content decoder for pollen
(define/contract (decode x)
(named-xexpr? . -> . named-xexpr?)
@ -67,7 +86,7 @@
decoded-x))]
[(xexpr-content? x) ; a list of xexprs
(let ([x (prep-paragraph-flow x)])
(map &decode (if (any paragraph-break? x) ; need this condition to prevent infinite recursion
(map &decode (if (ormap paragraph-break? x) ; need this condition to prevent infinite recursion
(map wrap-paragraph (splitf-at* x paragraph-break?)) ; split into ¶¶
x)))]
[(string? x) (typogrify x)]
@ -86,3 +105,4 @@
;todo: improve this error message, more specific location
; now, it just spits out the whole defective content
(error (format "decode: ~v not a full named-xexpr" x)))))
|#

@ -1,14 +1,17 @@
#lang racket
#lang racket/base
(require (planet mb/pollen/syntax))
(require "../syntax.rkt")
; for now, body is deemed a block, not inline
;todo: is this legit? Why is body inline?
;; for now, body is deemed a block, not inline
;; todo: is this legit? Why is body inline?
(define block-tags
'(address article aside audio blockquote body canvas dd div dl fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6 header hgroup noscript ol output p pre section table tfoot ul video))
; for now, map is omitted because it's a Racket keyword
; for now, style, script, and link are omitted because they shouldn't be wrapped
;; for now, map is omitted because it's a Racket keyword
;; for now, style, script, and link are omitted because they shouldn't be wrapped
;; todo: figure out how to resolve this. should be able to use html version of map.
;; possibly prefixed? Probably a good idea so it doesn't collide with user namespace
(define inline-tags
'(a abbr acronym applet area b base basefont bdo big br button caption center cite code col colgroup del dir dfn dt em embed font frame framesethead hr html i iframe img input ins isindex kbd label legend li menu meta noframes object optgroup option param q s samp select small span strike strong sub sup tbody td textarea th thead title tr tt u var xmp))

@ -1,8 +1,11 @@
#lang racket/base
(require racket/contract)
(require (only-in racket/list empty?))
(require (only-in racket/list empty? range))
(require (only-in racket/format ~a))
(require (only-in racket/string string-join))
(require (only-in racket/vector vector-member))
(module+ test (require rackunit))
(require "debug.rkt")
(provide (all-defined-out))
@ -10,6 +13,7 @@
(define/contract (as-string x)
(any/c . -> . string?)
(cond
[(string? x) x]
[(empty? x) ""]
[(symbol? x) (symbol->string x)]
[(number? x) (number->string x)]
@ -18,6 +22,7 @@
[else (error (format "Can't make ~a into string" x))]))
(module+ test
(check-equal? (as-string "foo") "foo")
(check-equal? (as-string '()) "")
(check-equal? (as-string 'foo) "foo")
(check-equal? (as-string 123) "123")
@ -26,6 +31,12 @@
(check-equal? (as-string #\¶) ""))
;; general way of coercing to symbol
(define (as-symbol thing)
; todo: on bad input, it will pop a string error rather than symbol error
(string->symbol (as-string thing)))
;; general way of coercing to a list
(define (as-list x)
(any/c . -> . list?)
@ -39,6 +50,23 @@
(check-equal? (as-list (list->vector '(1 2 3))) '(1 2 3))
(check-equal? (as-list "foo") (list "foo")))
;; general way of coercing to boolean
(define (as-boolean x)
(any/c . -> . boolean?)
;; in Racket, everything but #f is true
(if x #t #f))
(module+ test
(check-true (as-boolean #t))
(check-false (as-boolean #f))
(check-true (as-boolean "#f"))
(check-true (as-boolean "foo"))
(check-true (as-boolean '()))
(check-true (as-boolean '(1 2 3))))
;; general way of asking for length
(define (len x)
(any/c . -> . integer?)
@ -59,5 +87,93 @@
(check-not-equal? (len 'fo) 3) ; len 2
(check-equal? (len (list->vector '(1 2 3))) 3)
(check-not-equal? (len (list->vector '(1 2))) 3) ; len 2
(check-equal? (len (make-hash '((a 1) (b 2) (c 3)))) 3)
(check-not-equal? (len (make-hash '((a 1) (b 2) (b 3)))) 3)) ; len 2
(check-equal? (len (make-hash '((a . 1) (b . 2) (c . 3)))) 3)
(check-not-equal? (len (make-hash '((a . 1) (b . 2)))) 3)) ; len 2
;; general way of fetching an item from a container
(define/contract (get container item [up-to #f])
((any/c any/c) ((λ(i)(or (integer? i) (and (symbol? i) (equal? i 'end)))))
. ->* . any/c)
(define (sliceable-container? container)
(ormap (λ(proc) (proc container)) (list list? string? vector?)))
(when (sliceable-container? container)
(set! up-to
(cond
;; treat negative lengths as offset from end (Python style)
[(and (integer? up-to) (< up-to 0)) (+ (len container) up-to)]
;; 'end slices to the end
[(equal? up-to 'end) (len container)]
;; default to slice length of 1 (i.e, single-item retrieval)
[(equal? up-to #f) (add1 item)]
[else up-to])))
(define result (cond
;; for sliceable containers, make a slice
[(list? container) (for/list ([i (range item up-to)])
(list-ref container i))]
[(vector? container) (for/vector ([i (range item up-to)])
(vector-ref container i))]
[(string? container) (substring container item up-to)]
[(symbol? container) (as-symbol (get (as-string container) item up-to))]
;; for hash, just get item
[(hash? container) (hash-ref container item)]
[else #f]))
;; don't return single-item results inside a list
(if (and (sliceable-container? result) (= (len result) 1))
(car (as-list result))
result))
(module+ test
(check-equal? (get '(0 1 2 3 4 5) 2) 2)
(check-equal? (get '(0 1 2 3 4 5) 0 2) '(0 1))
(check-equal? (get '(0 1 2 3 4 5) 2 -1) '(2 3 4))
(check-equal? (get '(0 1 2 3 4 5) 2 'end) '(2 3 4 5))
(check-equal? (get (list->vector '(0 1 2 3 4 5)) 2) 2)
(check-equal? (get (list->vector'(0 1 2 3 4 5)) 0 2) (list->vector '(0 1)))
(check-equal? (get (list->vector'(0 1 2 3 4 5)) 2 -1) (list->vector '(2 3 4)))
(check-equal? (get (list->vector'(0 1 2 3 4 5)) 2 'end) (list->vector '(2 3 4 5)))
(check-equal? (get "purple" 2) "r")
(check-equal? (get "purple" 0 2) "pu")
(check-equal? (get "purple" 2 -1) "rpl")
(check-equal? (get "purple" 2 'end) "rple")
(check-equal? (get 'purple 2) 'r)
(check-equal? (get 'purple 0 2) 'pu)
(check-equal? (get 'purple 2 -1) 'rpl)
(check-equal? (get 'purple 2 'end) 'rple)
(check-equal? (get (make-hash '((a . 1) (b . 2) (c . 3))) 'a) 1))
;; general way of testing for membership (à la Python 'in')
(define/contract (in container item)
(any/c any/c . -> . any/c)
(cond
[(list? container) (member item container)] ; returns #f or sublist beginning with item
[(vector? container) (vector-member item container)] ; returns #f or zero-based item index
[(hash? container)
(and (hash-has-key? container item) (get container item))] ; returns #f or hash value
[(string? container) (let ([result (in (map as-string (string->list container)) (as-string item))])
(if result
(string-join result "")
#f))] ; returns #f or substring beginning with item
[(symbol? container) (let ([result (in (as-string container) (as-string item))])
(if result
(as-symbol result)
result))] ; returns #f or subsymbol (?!) beginning with item
[else #f]))
(module+ test
(check-equal? (in '(1 2 3) 2) '(2 3))
(check-false (in '(1 2 3) 4))
(check-equal? (in (list->vector '(1 2 3)) 2) 1)
(check-false (in (list->vector '(1 2 3)) 4))
(check-equal? (in (make-hash '((a . 1) (b . 2) (c . 3))) 'a) 1)
(check-false (in (make-hash '((a . 1) (b . 2) (c . 3))) 'x))
(check-equal? (in "foobar" "o") "oobar")
(check-false (in "foobar" "z"))
(check-equal? (in 'foobar 'o) 'oobar)
(check-false (in 'foobar 'z))
(check-false (in #\F "F")))

@ -4,7 +4,7 @@
(require (only-in racket/format ~a))
(require (only-in racket/list empty empty? second filter-not splitf-at takef dropf))
(require (only-in racket/string string-join))
(require (only-in xml xexpr?))
(require (only-in xml xexpr? xexpr/c))
(require "readability.rkt" "debug.rkt")
(provide (all-defined-out) (all-from-out "readability.rkt" "debug.rkt"))
@ -113,6 +113,8 @@
(check-false (named-xexpr? '("p" "foo" "bar"))) ; no name
(check-false (named-xexpr? '(p 123)))) ; content is a number
;; helper for comparison of values
;; normal function won't work for this. Has to be syntax-rule
(define-syntax-rule (values->list vs)
@ -154,6 +156,29 @@
(values->list (values 'p '((key "value")) '("foo")))))
;; convenience functions to retrieve only one part of named-xexpr
(define (named-xexpr-name nx)
(named-xexpr? . -> . symbol?)
(define-values (name attr content) (break-named-xexpr nx))
name)
(define (named-xexpr-attr nx)
(named-xexpr? . -> . xexpr-attr?)
(define-values (name attr content) (break-named-xexpr nx))
attr)
(define (named-xexpr-content nx)
(named-xexpr? . -> . xexpr-content?)
(define-values (name attr content) (break-named-xexpr nx))
content)
(module+ test
(check-equal? (named-xexpr-name '(p ((key "value"))"foo" "bar" (em "square"))) 'p)
(check-equal? (named-xexpr-attr '(p ((key "value"))"foo" "bar" (em "square"))) '((key "value")))
(check-equal? (named-xexpr-content '(p ((key "value"))"foo" "bar" (em "square")))
'("foo" "bar" (em "square"))))
;; apply filter proc recursively
(define/contract (filter-tree proc tree)
(procedure? list? . -> . list?)

Loading…
Cancel
Save