You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
typesetting/quad/quads-typed.rkt

289 lines
11 KiB
Racket

#lang typed/racket/base
(require (for-syntax typed/racket/base racket/syntax racket/string))
(require "lib-typed.rkt")
;; note to self: a require/typed function with proper typing
;; is faster than a generic function + type assertion at location of call
(require/typed racket/list
[flatten ((Listof QuadAttrPair) . -> . HashableList)])
(require/typed sugar/list [trimf (All (A) ((Listof A) (A . -> . Boolean) -> (Listof A)))]
[filter-split (All (A) ((Listof A) (A . -> . Boolean) -> (Listof (Listof A))))])
(require/typed racket/string [string-append* ((Listof String) . -> . String)])
(require/typed sugar/string [ends-with? (String String . -> . Boolean)])
(require sugar/debug)
(provide (all-defined-out))
(define-syntax (define/typed stx)
(syntax-case stx ()
[(_ (proc-name arg ... . rest-arg) type-expr body ...)
#'(define/typed proc-name type-expr
(λ(arg ... . rest-arg) body ...))]
[(_ proc-name type-expr body ...)
#'(begin
(: proc-name type-expr)
(define proc-name body ...))]))
(define-syntax (define/typed+provide stx)
(syntax-case stx ()
[(_ (proc-name arg ... . rest-arg) type-expr body ...)
#'(begin
(provide proc-name)
(define/typed proc-name type-expr
(λ(arg ... . rest-arg) body ...)))]
[(_ proc-name type-expr body ...)
#'(begin
(provide proc-name)
(begin
(: proc-name type-expr)
(define proc-name body ...)))]))
(define-syntax-rule (even-members xs)
(for/list : (Listof Any) ([(x i) (in-indexed xs)] #:when (even? i))
x))
(define-type QuadName Symbol)
(define-predicate QuadName? QuadName)
(define-type QuadAttrKey Symbol)
(define-predicate QuadAttrKey? QuadAttrKey)
(define-type QuadAttrValue Any)
(define-predicate QuadAttrValue? QuadAttrValue)
(define-type QuadAttrs (HashTable QuadAttrKey QuadAttrValue))
;;(define-predicate QuadAttrs? QuadAttrs) ;; won't work because it generates a chaperone contract
(define-type HashableList (Rec duo (U Null (List* QuadAttrKey Any duo))))
(provide HashableList?)
(define-predicate HashableList? HashableList)
(: quad-attrs? (Any . -> . Boolean))
(define (quad-attrs? x)
(and (hash? x) (andmap QuadAttrKey? (hash-keys x))))
(define-type QuadListItem (U Quad String))
(define-predicate QuadListItem? QuadListItem)
(define-type QuadList (Listof QuadListItem))
(define-predicate QuadList? QuadList)
(define-type (Treeof A) (Rec as (U A (Listof as))))
(struct quad ([name : QuadName] [attrs : QuadAttrs] [list : QuadList]) #:transparent)
(define-type Quad quad)
(define-predicate Quad? Quad)
(define quad-attr-ref
(case-lambda
[([q : Quad] [key : QuadAttrKey])
(hash-ref (quad-attrs q) key)]
[([q : Quad] [key : QuadAttrKey] [default : QuadAttrValue])
(hash-ref (quad-attrs q) key (λ() default))]))
(define-syntax (quad-attr-ref/parameter stx)
(syntax-case stx ()
[(_ q key)
(with-syntax ([world:key-default (format-id stx "~a-default" (string-trim (symbol->string (syntax->datum #'key)) "-key"))])
#'(quad-attr-ref q key (world:key-default)))]))
(define cannot-be-common-attrs '(width x y page))
(define attr-missing (gensym))
(define-type QuadAttrPair (Pairof QuadAttrKey QuadAttrValue))
(: quad-ends-with? (Quad String . -> . Boolean))
(define (quad-ends-with? q str)
(cond
[(not (empty? (quad-list q)))
(define last-item (list-ref (quad-list q) (length (quad-list q))))
(cond
[(string? last-item) (ends-with? last-item str)]
[(quad? last-item) (quad-ends-with? last-item str)]
[else #f])]
[else #f]))
(: quad-append (Quad QuadListItem . -> . Quad))
(define (quad-append q new-item)
(quad (quad-name q) (quad-attrs q) (append (quad-list q) (list new-item))))
(: quad->string (Quad . -> . String))
(define (quad->string x)
(let loop : String ([x : (U Quad String) x])
(cond
[(string? x) x]
;; else branch relies on fact that x is either Quad or String
[else (string-append* ((inst map String QuadListItem) loop (quad-list x)))])))
(define/typed+provide (gather-common-attrs qs)
((Listof Quad) . -> . (Option HashableList))
(: check-cap (Quad QuadAttrPair . -> . Boolean))
(define (check-cap q cap) ; cap = candidate-attr-pair
(equal? (quad-attr-ref q (car cap) attr-missing) (cdr cap)))
(and (not (null? qs))
(let loop
([qs qs]
;; start with the set of pairs in the first quad, then filter it down
[candidate-attr-pairs : (Listof QuadAttrPair) (let ([first-attrs (quad-attrs (car qs))])
(if first-attrs
(for/fold ([kvps null]) ([k (in-list (hash-keys first-attrs))])
(if (member k cannot-be-common-attrs)
kvps
(cons (cons k (hash-ref first-attrs k)) kvps)))
null))])
(cond
[(null? candidate-attr-pairs) #f] ; ran out of possible pairs, so return #f
[(null? qs) (flatten candidate-attr-pairs)] ; ran out of quads, so return common-attr-pairs
;; todo: reconsider type interface between output of this function and input to quadattrs
[else (loop (cdr qs) (filter (λ([cap : QuadAttrPair]) (check-cap (car qs) cap)) candidate-attr-pairs))]))))
(define/typed (make-quadattrs xs)
;; no point typing the input as (U QuadAttrKey QuadAttrValue)
;; because QuadAttrValue is Any, so that's the same as plain Any
((Listof Any) . -> . QuadAttrs)
(let-values ([(ks vs even?) (for/fold
([ks : (Listof QuadAttrKey) null][vs : (Listof QuadAttrValue) null][even? : Boolean #t])
([x (in-list xs)])
(if even?
;; todo: how to avoid cast here by using HashableList typing?
(values (cons (cast x QuadAttrKey) ks) vs #f)
(values ks (cons (cast x QuadAttrValue) vs) #t)))])
(when (not even?) (error 'quadattrs "odd number of elements in ~a" xs))
(for/hash : QuadAttrs ([k (in-list ks)][v (in-list vs)])
(values k v))))
(define-syntax (define-quad-type stx)
(syntax-case stx ()
[(_ id)
(with-syntax ([id? (format-id #'id "~a?" #'id)]
[IdQuad (format-id #'id "~aQuad" (string-titlecase (symbol->string (syntax->datum #'id))))]
[quads->id (format-id #'id "quads->~a" #'id)])
#'(begin
;; quad converter
(define/typed (quads->id qs)
((Listof Quad) . -> . Quad)
(apply id (gather-common-attrs qs) qs))
;; v1 of quad maker
#;(define/typed (id [attrs #f] . xs)
(case->
(-> Quad)
(((U False QuadAttrs HashableList)) #:rest QuadListItem . ->* . Quad))
(quad 'id (cond
;; need this cast because no predicate can be made for QuadAttrs
[(quad-attrs? attrs) (cast attrs QuadAttrs)]
[(list? attrs)
(if (HashableList? attrs)
(quadattrs attrs)
(error 'id "got non-hashable list ~a" attrs))]
[else (quadattrs '())]) (assert xs QuadList?)))
;; v2: much slower than v1 ... why?
#;(define/typed id
(case->
(((U False QuadAttrs HashableList)) #:rest QuadListItem . ->* . Quad)
(-> Quad))
(case-lambda
[(attrs . xs)
(quad 'id (if attrs
(if (list? attrs)
(quadattrs attrs)
attrs)
(quadattrs null)) xs)]
[() (quad 'id (quadattrs null) null)]))
(struct IdQuad quad () #:transparent)
;; version 3
;; dummy kw arg is needed to typecheck correctly
(define/typed (id [attrs #f] #:zzz [zzz 0] . xs)
(() ((U False QuadAttrs HashableList) #:zzz Zero) #:rest QuadListItem . ->* . Quad)
(IdQuad 'id (if attrs
(if (list? attrs)
(make-quadattrs attrs)
attrs)
(make-quadattrs null)) xs))
(: id? (Any . -> . Boolean))
(define (id? x)
(and (quad? x) (equal? (quad-name x) 'id)))))]))
(define/typed (whitespace? x [nbsp? #f])
((Any) (Boolean) . ->* . Boolean)
(cond
[(quad? x) (whitespace? (quad-list x) nbsp?)]
[(string? x) (or (and (regexp-match #px"\\p{Zs}" x) ; Zs = unicode whitespace category
(or nbsp? (not (regexp-match #px"\u00a0" x)))))] ; 00a0: nbsp
[(list? x) (and (not (empty? x)) (andmap (λ(x) (whitespace? x nbsp?)) x))] ; andmap returns #t for empty lists
[else #f]))
(define (whitespace/nbsp? x)
(whitespace? x #t))
(define-syntax (define-break-type stx)
(syntax-case stx ()
[(_ id)
(with-syntax ([split-on-id-breaks (format-id #'id "split-on-~a-breaks" #'id)]
[id-break (format-id #'id "~a-break" #'id)]
[id-break? (format-id #'id "~a-break?" #'id)]
[multi-id (format-id #'id "multi~a" #'id)]
[multi-id? (format-id #'id "multi~a?" #'id)]
[quads->multi-id (format-id #'id "quads->multi~a" #'id)])
#'(begin
(define-quad-type id)
(define-quad-type id-break)
(define-quad-type multi-id)
;; breaker
(: split-on-id-breaks ((Listof Quad) . -> . (Listof (Listof Quad))))
(define (split-on-id-breaks xs)
;; omit leading & trailing whitespace, because they're superfluous next to a break
(map (λ([xs : (Listof Quad)]) (trimf xs whitespace?)) (filter-split xs id-break?)))))]))
(define quad= equal?)
(define/typed (quad-car q)
(Quad . -> . QuadListItem)
(define ql (quad-list q))
(if (not (empty? ql))
((inst car QuadListItem QuadList) ql)
(error 'quad-car "quad-list empty")))
(define/typed (quad-cdr q)
(Quad . -> . QuadList)
(define ql (quad-list q))
(if (not (empty? ql))
((inst cdr QuadListItem QuadList) ql)
(error 'quad-car "quad-list empty")))
(: quad-has-attr? (Quad QuadAttrKey . -> . Boolean))
(define (quad-has-attr? q key)
(hash-has-key? (quad-attrs q) key))
(define-quad-type box)
(define-quad-type spacer)
(define-quad-type kern)
(define-quad-type optical-kern)
(define-quad-type flag)
(define-quad-type doc)
(define-quad-type input)
(define-quad-type piece)
(define-quad-type run)
(define-break-type word)
(define/typed (word-string c)
(Quad . -> . String)
(define ql (quad-list c))
(if (and (not (null? ql)) (string? (car ql)))
(car ql)
""))
(define-break-type page)
(define-break-type column)
(define-break-type block)
(define-break-type line)