pull/9/head
Matthew Butterick 11 years ago
parent b9439d96fc
commit 539878f61d

@ -3,7 +3,6 @@
(require racket/string) (require racket/string)
(require racket/format) (require racket/format)
(require "readability.rkt")
(provide (all-defined-out)) (provide (all-defined-out))

@ -2,10 +2,12 @@
(require racket/contract) (require racket/contract)
(require racket/list) (require racket/list)
(require racket/string) (require racket/string)
(require (only-in racket/format ~a))
(require (only-in racket/bool nor))
(require (only-in xml xexpr/c))
(module+ test (require rackunit)) (module+ test (require rackunit))
(require "tools.rkt") (require "tools.rkt" "library/html.rkt")
(provide (all-defined-out)) (provide (all-defined-out))
;; Find adjacent newline characters in a list and merge them into one item ;; Find adjacent newline characters in a list and merge them into one item
@ -30,7 +32,7 @@
;; otherwise peel off elements up to the next newline, append them to accumulator, ;; otherwise peel off elements up to the next newline, append them to accumulator,
;; and recurse on the rest ;; and recurse on the rest
(really-merge-newlines (dropf remainder not-newline?) (really-merge-newlines (dropf remainder not-newline?)
(append acc (takef remainder not-newline?))))))) (append acc (takef remainder not-newline?)))))))
(cond (cond
[(list? x) (really-merge-newlines (map merge-newlines x))] [(list? x) (really-merge-newlines (map merge-newlines x))]
@ -41,18 +43,35 @@
'(p "\n" "foo" "\n\n" "bar" (em "\n\n\n")))) '(p "\n" "foo" "\n\n" "bar" (em "\n\n\n"))))
; Mon Aug 5: start here ;; is the named-xexpr a block element (as opposed to inline)
(define/contract (block-xexpr? nx)
(named-xexpr? . -> . boolean?)
;; this is a change in behavior since first pollen
;; blocks are only the ones on the html block tag list.
;; todo: make sure this is what I want.
;; this is, however, more consistent with browser behavior
;; (browsers assume that tags are inline by default)
(as-boolean (in block-tags (named-xexpr-name nx))))
; A block-xexpr is a named expression that's not on the inline list (module+ test
; todo: bear in mind that browsers take the opposite view: (check-true (block-xexpr? '(p "foo")))
; that only elements on the block list are blocks (check-true (block-xexpr? '(div "foo")))
; and otherwise are treated as inline (check-false (block-xexpr? '(em "foo")))
(define (block-xexpr? x) (check-false (block-xexpr? '(barfoo "foo"))))
(and (named-xexpr? x) (not (in? inline-tags (car x)))))
;; start here Tues 6
;; default content decoder for pollen
(define/contract (decode nx)
;; use xexpr/c for contact because it gives better error messages
(xexpr/c . -> . named-xexpr?)
nx
)
;(decode `(p ((key "value")) ,decode))
; default content decoder for pollen #|
;; default content decoder for pollen
(define/contract (decode x) (define/contract (decode x)
(named-xexpr? . -> . named-xexpr?) (named-xexpr? . -> . named-xexpr?)
@ -67,7 +86,7 @@
decoded-x))] decoded-x))]
[(xexpr-content? x) ; a list of xexprs [(xexpr-content? x) ; a list of xexprs
(let ([x (prep-paragraph-flow x)]) (let ([x (prep-paragraph-flow x)])
(map &decode (if (any paragraph-break? x) ; need this condition to prevent infinite recursion (map &decode (if (ormap paragraph-break? x) ; need this condition to prevent infinite recursion
(map wrap-paragraph (splitf-at* x paragraph-break?)) ; split into ¶¶ (map wrap-paragraph (splitf-at* x paragraph-break?)) ; split into ¶¶
x)))] x)))]
[(string? x) (typogrify x)] [(string? x) (typogrify x)]
@ -86,3 +105,4 @@
;todo: improve this error message, more specific location ;todo: improve this error message, more specific location
; now, it just spits out the whole defective content ; now, it just spits out the whole defective content
(error (format "decode: ~v not a full named-xexpr" x))))) (error (format "decode: ~v not a full named-xexpr" x)))))
|#

@ -1,14 +1,17 @@
#lang racket #lang racket/base
(require (planet mb/pollen/syntax)) (require "../syntax.rkt")
; for now, body is deemed a block, not inline ;; for now, body is deemed a block, not inline
;todo: is this legit? Why is body inline? ;; todo: is this legit? Why is body inline?
(define block-tags (define block-tags
'(address article aside audio blockquote body canvas dd div dl fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6 header hgroup noscript ol output p pre section table tfoot ul video)) '(address article aside audio blockquote body canvas dd div dl fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6 header hgroup noscript ol output p pre section table tfoot ul video))
; for now, map is omitted because it's a Racket keyword
; for now, style, script, and link are omitted because they shouldn't be wrapped ;; for now, map is omitted because it's a Racket keyword
;; for now, style, script, and link are omitted because they shouldn't be wrapped
;; todo: figure out how to resolve this. should be able to use html version of map.
;; possibly prefixed? Probably a good idea so it doesn't collide with user namespace
(define inline-tags (define inline-tags
'(a abbr acronym applet area b base basefont bdo big br button caption center cite code col colgroup del dir dfn dt em embed font frame framesethead hr html i iframe img input ins isindex kbd label legend li menu meta noframes object optgroup option param q s samp select small span strike strong sub sup tbody td textarea th thead title tr tt u var xmp)) '(a abbr acronym applet area b base basefont bdo big br button caption center cite code col colgroup del dir dfn dt em embed font frame framesethead hr html i iframe img input ins isindex kbd label legend li menu meta noframes object optgroup option param q s samp select small span strike strong sub sup tbody td textarea th thead title tr tt u var xmp))

@ -1,8 +1,11 @@
#lang racket/base #lang racket/base
(require racket/contract) (require racket/contract)
(require (only-in racket/list empty?)) (require (only-in racket/list empty? range))
(require (only-in racket/format ~a)) (require (only-in racket/format ~a))
(require (only-in racket/string string-join))
(require (only-in racket/vector vector-member))
(module+ test (require rackunit)) (module+ test (require rackunit))
(require "debug.rkt")
(provide (all-defined-out)) (provide (all-defined-out))
@ -10,6 +13,7 @@
(define/contract (as-string x) (define/contract (as-string x)
(any/c . -> . string?) (any/c . -> . string?)
(cond (cond
[(string? x) x]
[(empty? x) ""] [(empty? x) ""]
[(symbol? x) (symbol->string x)] [(symbol? x) (symbol->string x)]
[(number? x) (number->string x)] [(number? x) (number->string x)]
@ -18,6 +22,7 @@
[else (error (format "Can't make ~a into string" x))])) [else (error (format "Can't make ~a into string" x))]))
(module+ test (module+ test
(check-equal? (as-string "foo") "foo")
(check-equal? (as-string '()) "") (check-equal? (as-string '()) "")
(check-equal? (as-string 'foo) "foo") (check-equal? (as-string 'foo) "foo")
(check-equal? (as-string 123) "123") (check-equal? (as-string 123) "123")
@ -26,6 +31,12 @@
(check-equal? (as-string #\¶) "")) (check-equal? (as-string #\¶) ""))
;; general way of coercing to symbol
(define (as-symbol thing)
; todo: on bad input, it will pop a string error rather than symbol error
(string->symbol (as-string thing)))
;; general way of coercing to a list ;; general way of coercing to a list
(define (as-list x) (define (as-list x)
(any/c . -> . list?) (any/c . -> . list?)
@ -39,6 +50,23 @@
(check-equal? (as-list (list->vector '(1 2 3))) '(1 2 3)) (check-equal? (as-list (list->vector '(1 2 3))) '(1 2 3))
(check-equal? (as-list "foo") (list "foo"))) (check-equal? (as-list "foo") (list "foo")))
;; general way of coercing to boolean
(define (as-boolean x)
(any/c . -> . boolean?)
;; in Racket, everything but #f is true
(if x #t #f))
(module+ test
(check-true (as-boolean #t))
(check-false (as-boolean #f))
(check-true (as-boolean "#f"))
(check-true (as-boolean "foo"))
(check-true (as-boolean '()))
(check-true (as-boolean '(1 2 3))))
;; general way of asking for length ;; general way of asking for length
(define (len x) (define (len x)
(any/c . -> . integer?) (any/c . -> . integer?)
@ -59,5 +87,93 @@
(check-not-equal? (len 'fo) 3) ; len 2 (check-not-equal? (len 'fo) 3) ; len 2
(check-equal? (len (list->vector '(1 2 3))) 3) (check-equal? (len (list->vector '(1 2 3))) 3)
(check-not-equal? (len (list->vector '(1 2))) 3) ; len 2 (check-not-equal? (len (list->vector '(1 2))) 3) ; len 2
(check-equal? (len (make-hash '((a 1) (b 2) (c 3)))) 3) (check-equal? (len (make-hash '((a . 1) (b . 2) (c . 3)))) 3)
(check-not-equal? (len (make-hash '((a 1) (b 2) (b 3)))) 3)) ; len 2 (check-not-equal? (len (make-hash '((a . 1) (b . 2)))) 3)) ; len 2
;; general way of fetching an item from a container
(define/contract (get container item [up-to #f])
((any/c any/c) ((λ(i)(or (integer? i) (and (symbol? i) (equal? i 'end)))))
. ->* . any/c)
(define (sliceable-container? container)
(ormap (λ(proc) (proc container)) (list list? string? vector?)))
(when (sliceable-container? container)
(set! up-to
(cond
;; treat negative lengths as offset from end (Python style)
[(and (integer? up-to) (< up-to 0)) (+ (len container) up-to)]
;; 'end slices to the end
[(equal? up-to 'end) (len container)]
;; default to slice length of 1 (i.e, single-item retrieval)
[(equal? up-to #f) (add1 item)]
[else up-to])))
(define result (cond
;; for sliceable containers, make a slice
[(list? container) (for/list ([i (range item up-to)])
(list-ref container i))]
[(vector? container) (for/vector ([i (range item up-to)])
(vector-ref container i))]
[(string? container) (substring container item up-to)]
[(symbol? container) (as-symbol (get (as-string container) item up-to))]
;; for hash, just get item
[(hash? container) (hash-ref container item)]
[else #f]))
;; don't return single-item results inside a list
(if (and (sliceable-container? result) (= (len result) 1))
(car (as-list result))
result))
(module+ test
(check-equal? (get '(0 1 2 3 4 5) 2) 2)
(check-equal? (get '(0 1 2 3 4 5) 0 2) '(0 1))
(check-equal? (get '(0 1 2 3 4 5) 2 -1) '(2 3 4))
(check-equal? (get '(0 1 2 3 4 5) 2 'end) '(2 3 4 5))
(check-equal? (get (list->vector '(0 1 2 3 4 5)) 2) 2)
(check-equal? (get (list->vector'(0 1 2 3 4 5)) 0 2) (list->vector '(0 1)))
(check-equal? (get (list->vector'(0 1 2 3 4 5)) 2 -1) (list->vector '(2 3 4)))
(check-equal? (get (list->vector'(0 1 2 3 4 5)) 2 'end) (list->vector '(2 3 4 5)))
(check-equal? (get "purple" 2) "r")
(check-equal? (get "purple" 0 2) "pu")
(check-equal? (get "purple" 2 -1) "rpl")
(check-equal? (get "purple" 2 'end) "rple")
(check-equal? (get 'purple 2) 'r)
(check-equal? (get 'purple 0 2) 'pu)
(check-equal? (get 'purple 2 -1) 'rpl)
(check-equal? (get 'purple 2 'end) 'rple)
(check-equal? (get (make-hash '((a . 1) (b . 2) (c . 3))) 'a) 1))
;; general way of testing for membership (à la Python 'in')
(define/contract (in container item)
(any/c any/c . -> . any/c)
(cond
[(list? container) (member item container)] ; returns #f or sublist beginning with item
[(vector? container) (vector-member item container)] ; returns #f or zero-based item index
[(hash? container)
(and (hash-has-key? container item) (get container item))] ; returns #f or hash value
[(string? container) (let ([result (in (map as-string (string->list container)) (as-string item))])
(if result
(string-join result "")
#f))] ; returns #f or substring beginning with item
[(symbol? container) (let ([result (in (as-string container) (as-string item))])
(if result
(as-symbol result)
result))] ; returns #f or subsymbol (?!) beginning with item
[else #f]))
(module+ test
(check-equal? (in '(1 2 3) 2) '(2 3))
(check-false (in '(1 2 3) 4))
(check-equal? (in (list->vector '(1 2 3)) 2) 1)
(check-false (in (list->vector '(1 2 3)) 4))
(check-equal? (in (make-hash '((a . 1) (b . 2) (c . 3))) 'a) 1)
(check-false (in (make-hash '((a . 1) (b . 2) (c . 3))) 'x))
(check-equal? (in "foobar" "o") "oobar")
(check-false (in "foobar" "z"))
(check-equal? (in 'foobar 'o) 'oobar)
(check-false (in 'foobar 'z))
(check-false (in #\F "F")))

@ -4,7 +4,7 @@
(require (only-in racket/format ~a)) (require (only-in racket/format ~a))
(require (only-in racket/list empty empty? second filter-not splitf-at takef dropf)) (require (only-in racket/list empty empty? second filter-not splitf-at takef dropf))
(require (only-in racket/string string-join)) (require (only-in racket/string string-join))
(require (only-in xml xexpr?)) (require (only-in xml xexpr? xexpr/c))
(require "readability.rkt" "debug.rkt") (require "readability.rkt" "debug.rkt")
(provide (all-defined-out) (all-from-out "readability.rkt" "debug.rkt")) (provide (all-defined-out) (all-from-out "readability.rkt" "debug.rkt"))
@ -113,6 +113,8 @@
(check-false (named-xexpr? '("p" "foo" "bar"))) ; no name (check-false (named-xexpr? '("p" "foo" "bar"))) ; no name
(check-false (named-xexpr? '(p 123)))) ; content is a number (check-false (named-xexpr? '(p 123)))) ; content is a number
;; helper for comparison of values ;; helper for comparison of values
;; normal function won't work for this. Has to be syntax-rule ;; normal function won't work for this. Has to be syntax-rule
(define-syntax-rule (values->list vs) (define-syntax-rule (values->list vs)
@ -154,6 +156,29 @@
(values->list (values 'p '((key "value")) '("foo"))))) (values->list (values 'p '((key "value")) '("foo")))))
;; convenience functions to retrieve only one part of named-xexpr
(define (named-xexpr-name nx)
(named-xexpr? . -> . symbol?)
(define-values (name attr content) (break-named-xexpr nx))
name)
(define (named-xexpr-attr nx)
(named-xexpr? . -> . xexpr-attr?)
(define-values (name attr content) (break-named-xexpr nx))
attr)
(define (named-xexpr-content nx)
(named-xexpr? . -> . xexpr-content?)
(define-values (name attr content) (break-named-xexpr nx))
content)
(module+ test
(check-equal? (named-xexpr-name '(p ((key "value"))"foo" "bar" (em "square"))) 'p)
(check-equal? (named-xexpr-attr '(p ((key "value"))"foo" "bar" (em "square"))) '((key "value")))
(check-equal? (named-xexpr-content '(p ((key "value"))"foo" "bar" (em "square")))
'("foo" "bar" (em "square"))))
;; apply filter proc recursively ;; apply filter proc recursively
(define/contract (filter-tree proc tree) (define/contract (filter-tree proc tree)
(procedure? list? . -> . list?) (procedure? list? . -> . list?)

Loading…
Cancel
Save