From 9ab47b1c652e547f0f8d28d983b0444bf05bf89c Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Tue, 21 Jun 2016 12:13:24 -0700 Subject: [PATCH] add overflow error handling --- quad/quad/atomize.rkt | 40 +++++++++++--------- quad/quad/dev.rkt | 4 +- quad/quad/error.rkt | 9 +++++ quad/quad/kafka.rkt | 28 -------------- quad/quad/main.rkt | 11 ++++-- quad/quad/measure.rkt | 2 +- quad/quad/quadlang-test.rkt | 5 +-- quad/quad/quads.rkt | 74 +++++++++++++++++-------------------- quad/quad/render.rkt | 6 +-- quad/quad/typeset.rkt | 42 ++++++++++++--------- 10 files changed, 104 insertions(+), 117 deletions(-) create mode 100644 quad/quad/error.rkt delete mode 100644 quad/quad/kafka.rkt diff --git a/quad/quad/atomize.rkt b/quad/quad/atomize.rkt index b9d533b0..6162a1ac 100644 --- a/quad/quad/atomize.rkt +++ b/quad/quad/atomize.rkt @@ -1,5 +1,5 @@ #lang quad/dev -(require racket/string) +(require racket/string hyphenate) (provide (all-defined-out)) (define (atomize x) @@ -7,23 +7,29 @@ (apply vector-immutable (flatten - (let loop ([x x][loop-attrs default-attrs]) - (cond - [(symbol? x) ($hard empty-attrs x #f)] - [(string? x) - ;; consolidate consecutive whitespaces into single word space - ;; todo: hyphenate here? then they are in the quad stream - (for/list ([c (in-string x)]) - (cons ($hard empty-attrs #f #f) - ;; todo: is it feasible to box or otherwise object-ize a char - ;; so that all the quads with that char share that object - ;; and thus the measurement can be shared too? - ;; (object would have to be packaged with other typographic specs) - ((if (or (char=? c #\space) (char=? c #\-)) $soft $black) loop-attrs #f c)))] - [else - (map (λ(xi) (loop xi ((quad-attrs x) . override-with . loop-attrs))) (quad-val x))]))))) + (cons + (let loop ([x x][loop-attrs default-attrs]) + (cond + [($shim? x) x] + [(string? x) + ;; consolidate consecutive whitespaces into single word space + ;; todo: hyphenate here? then they are in the quad stream + (for/list ([c (in-string x)]) + (cons ($shim empty-attrs #f #f) + ;; todo: is it feasible to box or otherwise object-ize a char + ;; so that all the quads with that char share that object + ;; and thus the measurement can be shared too? + ;; (object would have to be packaged with other typographic specs) + ((casev c + [(#\space) $space] + [(#\-) $hyphen] + [(#\u00AD) $shy] + [else $black]) loop-attrs #f c)))] + [else + (map (λ(xi) (loop xi ((quad-attrs x) . override-with . loop-attrs))) (quad-val x))])) + ($eof empty-attrs #f #f))))) ; add eof so any in-vector loop consumes all the input vals (module+ test (require rackunit) (atomize (quad (make-attrs #:size 10 #:font "Eq") "ba" (line-break) "r" (quad (make-attrs #:size 8) "zam") "q\tux")) - (atomize (quad #f "Meg is-an ally."))) + (atomize (quad #f "snowman"))) diff --git a/quad/quad/dev.rkt b/quad/quad/dev.rkt index 1d87a808..26ed44d2 100644 --- a/quad/quad/dev.rkt +++ b/quad/quad/dev.rkt @@ -1,7 +1,7 @@ #lang racket/base -(require (for-syntax racket/base) racket/list sugar/debug "quads.rkt") +(require (for-syntax racket/base) racket/list sugar/debug "quads.rkt" "error.rkt") (provide (except-out (all-from-out racket/base) #%module-begin) - (all-from-out racket/list sugar/debug "quads.rkt") + (all-from-out racket/list sugar/debug "quads.rkt" "error.rkt") (rename-out [~module-begin #%module-begin]) (for-syntax (all-from-out racket/base))) diff --git a/quad/quad/error.rkt b/quad/quad/error.rkt new file mode 100644 index 00000000..df83fd37 --- /dev/null +++ b/quad/quad/error.rkt @@ -0,0 +1,9 @@ +#lang racket/base +(provide (all-defined-out)) + +(struct exn:quad-overflow exn:fail ()) +(define (raise-overflow-error) + (raise + (exn:quad-overflow + "overflow error: No breakpoint available. Increase line width" + (current-continuation-marks)))) \ No newline at end of file diff --git a/quad/quad/kafka.rkt b/quad/quad/kafka.rkt deleted file mode 100644 index 38c0f1e7..00000000 --- a/quad/quad/kafka.rkt +++ /dev/null @@ -1,28 +0,0 @@ -#lang quad/text #:fit 300 150 -Produces a list of three-element lists, where each three-element list represents a set of consecutive code points for which the Unicode standard specifies character properties. Each three-element list contains two integers and a boolean; the first integer is a starting code-point value (inclusive), the second integer is an ending code-point value (inclusive), and the boolean is #t when all characters in the code-point range have identical results for all of the character predicates above. The three-element lists are ordered in the overall result list such that later lists represent larger code-point values, and all three-element lists are separated from every other by at least one code-point value that is not specified by Unicode. - -@quad[(make-attrs #:size 7)]{One morning, when Gregor Samsa woke from troubled dreams, he found -himself transformed in his bed into a horrible vermin. He lay on -his armour-like back, and if he lifted his head a little he could -see his brown belly, slightly domed and divided by arches into stiff -sections. The bedding was hardly able to cover it and seemed ready -to slide off any moment. His many legs, pitifully thin compared -with the size of the rest of him, waved about helplessly as he -looked.} - -"What's happened to me?" he thought. It wasn't a dream. His room, -a proper human room although a little too small, lay peacefully -between its four familiar walls. A collection of textile samples -lay spread out on the table - Samsa was a travelling salesman - and -above it there hung a picture that he had recently cut out of an -illustrated magazine and housed in a nice, gilded frame. It showed -a lady fitted out with a fur hat and fur boa who sat upright, -raising a heavy fur muff that covered the whole of her lower arm -towards the viewer. - -Gregor then turned to look out the window at the dull weather. -Drops of rain could be heard hitting the pane, which made him feel -quite sad. "How about if I sleep a little bit longer and forget all -this nonsense", he thought, but that was something he was unable to -do because he was used to sleeping on his right, and in his present -state couldn't get into that position. However hard he threw \ No newline at end of file diff --git a/quad/quad/main.rkt b/quad/quad/main.rkt index 067584ac..7f4ef75d 100644 --- a/quad/quad/main.rkt +++ b/quad/quad/main.rkt @@ -8,10 +8,13 @@ (define main-quad (apply quad #f (add-between (list . args) "\n"))) ; at-reader splits lines, but we want one contiguous run ;; branch on config-arg to allow debug / inspection options on #lang line (define config-pieces (string-split (string-trim lang-line-config-arg))) - (case (car config-pieces) - [("#:atoms") (atomize main-quad)] - [("#:fit") (time (debug-render (apply typeset-fit (atomize main-quad) (map string->number (cdr config-pieces)))))] - [else (typeset-fit (atomize main-quad))]))) + (and (pair? config-pieces) + (let ([config-args (map string->number (cdr config-pieces))]) + (case (car config-pieces) + [("in") (atomize main-quad)] + [("out") (time (apply fit (atomize main-quad) config-args))] + [("test") (time (debug-render (apply fit (atomize main-quad) config-args)))] + [else (fit (atomize main-quad))]))))) (module reader syntax/module-reader quad/main) \ No newline at end of file diff --git a/quad/quad/measure.rkt b/quad/quad/measure.rkt index fc951912..09bcdc2c 100644 --- a/quad/quad/measure.rkt +++ b/quad/quad/measure.rkt @@ -5,7 +5,7 @@ (define (measure! q) (quad-dim-set! q (cond - [(or ($black? q) ($soft? q)) + [(quad-printable? q) (* (measure-char (quad-font q) (quad-val q)) (quad-font-size q))] [else 0]))) diff --git a/quad/quad/quadlang-test.rkt b/quad/quad/quadlang-test.rkt index 27c238b0..434a0f56 100644 --- a/quad/quad/quadlang-test.rkt +++ b/quad/quad/quadlang-test.rkt @@ -1,3 +1,2 @@ -#lang quad/text - -Meg is an ally. @;{@(line-break) Meg is an ally.} \ No newline at end of file +#lang quad/text test 300 +Produces a list of three-element lists, where each three-element list represents a set of consecutive code points for which the Unicode standard specifies character properties. Each three-element list contains two integers and a boolean; the first integer is a starting code-point value (inclusive), the second integer is an ending code-point value (inclusive), and the boolean is #t when all characters in the code-point range have identical results for all of the character predicates above. The three-element lists are ordered in the overall result list such that later lists represent larger code-point values, and all three-element lists are separated from every other by at least one code-point value that is not specified by Unicode. \ No newline at end of file diff --git a/quad/quad/quads.rkt b/quad/quad/quads.rkt index b3050016..367fae7f 100644 --- a/quad/quad/quads.rkt +++ b/quad/quad/quads.rkt @@ -4,8 +4,13 @@ (struct $quad (attrs dim val) #:transparent #:mutable) (struct $black $quad () #:transparent) -(struct $soft $quad () #:transparent) -(struct $hard $quad () #:transparent) +(struct $space $quad () #:transparent) +(struct $hyphen $black () #:transparent) ; hyphen should be treated as black in measure & render ops +(struct $shy $quad () #:transparent) +(struct $shim $quad () #:transparent) +(struct $eof $quad () #:transparent) + +(define (quad-printable? x) (or ($black? x) ($space? x) ($hyphen? x))) (define quad? $quad?) @@ -30,8 +35,8 @@ measure (line width) (define (munge-whitespace str) ;; reduce multiple whitespace to single - ;; trim remaining - (string-trim (regexp-replace* #px"\\s+" str " "))) + ;; trim remaining (? maybe not) + (regexp-replace* #px"\\s+" str " ")) (define (merge-strings xs) ;; merge consecutive strings @@ -47,30 +52,6 @@ measure (line width) empty (list (munge-whitespace (string-append* strs)))) nonstrs (loop restrest))]))) -#;(define (merge-strings1 xs) - ;; merge consecutive strings - (define-values (last-list list-of-lists last-negating) - (for/fold ([current-list empty] - [list-of-lists empty] - [negating? #f]) - ([x (in-list xs)]) - (define current-pred (if negating? (λ (x) (not (string? x))) string?)) - (if (current-pred x) - (values (cons x current-list) list-of-lists negating?) - (values (cons x null) (if (not (empty? current-list)) - (cons (reverse current-list) list-of-lists) - list-of-lists) (not negating?))))) - (append-map (λ(xs) (if (string? (car xs)) - (list (munge-whitespace (string-append* xs))) - xs)) - (reverse (cons (reverse last-list) list-of-lists)))) - -#;(require sugar/list) -#;(define (merge-strings xs) - (append-map (λ(xis) (if (string? (car xis)) - (list (munge-whitespace (string-append* xis))) - xis)) (slicef xs string?))) - (struct $attrs (size font) #:transparent) (define (make-attrs #:size [size #f] @@ -102,25 +83,36 @@ measure (line width) (require (for-syntax sugar/debug)) (define-syntax-rule (define-break name) - (define (name) (quad #f 'name))) + (define (name) ($shim (make-attrs) 'name #f))) (define-break page-break) (define-break column-break) (define-break block-break) (define-break line-break) - -(define-syntax (caseq stx) - ;; like case but strictly uses `eq?` comparison (as opposed to `equal?`) +(define-syntax (define-case-macro stx) (syntax-case stx () - [(_ test-val [(match-val ...) . result] ... [else . else-result]) - #'(cond - [(memq test-val '(match-val ...)) . result] ... - [else . else-result])] - [(_ test-val [(match-val ...) . result] ...) - #'(caseq test-val - [(match-val ...) . result] ... - [else (error 'caseq "no match")])])) + [(_ ID PRED) + #'(define-syntax (ID stx) + (syntax-case stx () + [(_ test-val + [(match-val0 . match-vals) . result] (... ...) + [else . else-result]) + #'(cond + [(PRED test-val '(match-val0 . match-vals)) . result] (... ...) + [else . else-result])] + [(_ test-val + match-clause (... ...)) + #'(ID test-val + match-clause (... ...) + [else (error 'ID "no match")])]))])) + +;; like case but strictly uses `eq?` comparison (as opposed to `equal?`) +(define-case-macro caseq memq) + +;; `eqv?` is OK for chars (same as `char=?`) +(define-case-macro casev memv) + (module+ test (require rackunit) @@ -129,4 +121,4 @@ measure (line width) (check-false (quad? 42)) (check-equal? (quad-attrs q) (make-attrs)) (check-equal? (quad-val q) '("bar")) - (check-equal? (merge-strings '(50 " foo " " bar " 42 " zam")) '(50 "foo bar" 42 "zam"))) \ No newline at end of file + #;(check-equal? (merge-strings '(50 " foo " " bar " 42 " zam")) '(50 "foo bar" 42 "zam"))) \ No newline at end of file diff --git a/quad/quad/render.rkt b/quad/quad/render.rkt index 96c11ddf..2740744d 100644 --- a/quad/quad/render.rkt +++ b/quad/quad/render.rkt @@ -19,12 +19,12 @@ (for ([q (in-vector qs)]) (define qd (quad-dim q)) (cond - [(symbol? qd) - (case qd + [(symbol? qd) ; symbol indicates a break + (caseq qd [(line-break) (line-counter-increment!)] [(column-break) (line-counter-reset!) (printf "\n--col--")] [(page-break) (printf "\n\n==page==\n")]) (print-line-counter)] - [(or ($black? q) ($soft? q)) (printf "~a" (quad-val q))] + [(quad-printable? q) (printf "~a" (quad-val q))] [else (void)])) (printf "\n\n")) \ No newline at end of file diff --git a/quad/quad/typeset.rkt b/quad/quad/typeset.rkt index 9a2d079c..95f6c550 100644 --- a/quad/quad/typeset.rkt +++ b/quad/quad/typeset.rkt @@ -2,10 +2,14 @@ (provide (all-defined-out)) (require "measure.rkt") -(define last-breakpoint-k (λ _ (error 'typeset-fit "No breakpoint available. Increase line width"))) +(define last-breakpoint-k raise-overflow-error) + (define (set-breakpoint-k-here!) (let/cc k (set! last-breakpoint-k k) #f)) +(define (already-breakpoint-type? q type) + (eq? (quad-dim q) type)) + (define char-width 6) (define line-width (* 60 char-width)) ; 50 chars, each 6 pts wide (define line-height 12) @@ -15,12 +19,12 @@ ;; posn-page : horiz position of column within page ;; posn-col : vert position of line within column ;; posn-line : horiz position of char within line -(struct posn (page col line)) +(struct posn (page col line) #:transparent) (define (make-posn [page 0] [col 0] [line 0]) (posn page col line)) (define page-start-position (make-posn)) -(define (typeset-fit qs [line-width line-width] [col-height col-height]) +(define (fit qs [line-width line-width] [col-height col-height]) (define (handle-break val [current-posn #f]) (caseq val ; test in order of frequency @@ -29,38 +33,40 @@ [(page-break) page-start-position] [else current-posn])) - (for/fold ([current-posn (handle-break 'page-break)]) + (for/fold ([current-posn (handle-break 'page-break)]) ; moves to page start position ([q (in-vector qs)]) (unless (quad-dim q) (measure! q)) (cond - ;; hard may contain an imperative break. Test for this first because it makes the rest irrelevant. - ;; todo: how to suppress spaces adjacent to imperative breaks? - [($hard? q) (handle-break (quad-dim q) current-posn)] + ;; shim may contain an imperative break. + [($shim? q) (handle-break (quad-dim q) current-posn)] ;; test for overset (before a new bp-k gets set). - ;; order is precedence: test bigger breaks first ;; send break type back through continuation - ;; test page-horiz with >= because one column impliedly exists at the start - ;; (we could also make this explicit with page-start-position but it seems clearer to use zeroes there) - [(>= (posn-page current-posn) page-width) (last-breakpoint-k 'page-break)] + ;; test tp-horiz with > because no characters exist in the line at the start + [(> (posn-line current-posn) line-width) (last-breakpoint-k 'line-break)] ;; test tp-vert with >= because one column impliedly exists at the start [(>= (posn-col current-posn) col-height) (last-breakpoint-k 'column-break)] - ;; but test tp-horiz with > because no characters exist in the line at the start - [(> (posn-line current-posn) line-width) (last-breakpoint-k 'line-break)] + ;; test page-horiz with >= because one column impliedly exists at the start + [(>= (posn-page current-posn) page-width) (last-breakpoint-k 'page-break)] ;; set a new bp-k, or resume after invoking a bp-k - [(and ($soft? q) (set-breakpoint-k-here!)) - => ; grabs the end value of the conditional, which is the arg passed when breakpoint-k was invoked + [(and ($space? q) (set-breakpoint-k-here!)) + => ; grabs the value of the condition: the arg passed to breakpoint-k (λ (breakpoint-k-result) + (when (already-breakpoint-type? q breakpoint-k-result) + ;; it means we're caught in an overflow loop, so + (raise-overflow-error)) ;; convert the white, thereby consuming it. todo: don't consume hyphens - (quad-dim-set! q breakpoint-k-result) + (quad-dim-set! q breakpoint-k-result) (handle-break breakpoint-k-result current-posn))] + [else (posn (posn-page current-posn) (posn-col current-posn) (+ (posn-line current-posn) (quad-dim q)))])) qs) (module+ test (require "atomize.rkt" "render.rkt") - (define q (quad #f "One morning, when Gregor " (line-break) " and his old hizn himself")) - (time (debug-render (typeset-fit (atomize q))))) \ No newline at end of file + ;; todo: preserve space between black quads + (define q (quad #f "One morning " (quad #f "and himself"))) + (time (debug-render (fit (atomize q))))) \ No newline at end of file