You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
typesetting/quad/quadwriter/query.rkt

204 lines
10 KiB
Racket

#lang debug racket
(require quad/base "struct.rkt" "param.rkt")
(provide (all-defined-out))
(define (make-query-index q)
(define qs (let loop ([q q])
(cons q (append* (for/list ([elem (in-list (quad-elems q))]
#:when (quad? elem))
(loop elem))))))
(list->vector qs))
(define (string->key str)
(match str
["doc" 'doc]
[(or "section" "sec" "s") 'section]
[(or "page" "pg" "p") 'page]
[(or "column" "col" "c") 'column]
[(or "block" "b") 'block]
[(or "line" "ln" "l") 'line]))
(define preds (hasheq 'doc doc-quad?
'section section-quad?
'page page-quad?
'column column-quad?
'block block-quad?
'line line-quad?))
(define (parse-query str)
(for/list ([piece (in-list (string-split str ":"))])
(match (regexp-match #px"^(.*)\\[(.*?)\\]$" piece)
[#false (cons (string->key piece) #false)]
[(list _ name arg) (cons (hash-ref preds (string->key name))
(cond
[(string-contains? arg "..") (map string->number (string-split arg ".."))]
[(string->number arg)]
[else (string->symbol arg)]))])))
(define (find-inclusive vec pred start-arg end-arg [count 1])
;; search from lidx to ridx inclusive
(define-values (start end step)
(cond
;; if lidx is bigger, search backward
[(> start-arg end-arg) (values start-arg (sub1 end-arg) -1)]
[else (values start-arg (add1 end-arg) 1)]))
(for/fold ([adjusted-start (- start step)]) ; remove step for reason below
([seen (in-range count)]
#:break (not adjusted-start))
;; add step so we find next matcher
;; and won't re-find the last one immediately
(for/first ([idx (in-range (+ adjusted-start step) end step)]
#:when (pred (vector-ref vec idx)))
idx)))
(define (query-one vec pred subscript this-idx maxidx)
(define (find start end count) (find-inclusive vec pred start end count))
(let loop ([subscript subscript])
(match subscript
[(== 'this eq?) ; start at querying quad, then search 1 back
(find this-idx 0 1)]
[(== 'last eq?) (loop -1)]
[(== 'prev eq?) ; search 2 back. same algo if current q is pred or not.
(find this-idx 0 2)]
[(== 'next eq?) ; search 1 ahead, but if current q is also pred, search 2 ahead
(find this-idx maxidx (if (pred (vector-ref vec this-idx)) 2 1))]
[(? number? count)
(cond
[(negative? count) ; search backward from end
(find maxidx this-idx (abs count))]
[else ; seach forward
(find this-idx maxidx count)])]
[_ #false])))
(define (query quad-or-index query-str [query-q #false])
(define vec (match quad-or-index
[(? quad? q) (make-query-index q)]
[idx idx]))
(when query-q (unless (quad? query-q)
(raise-argument-error 'query "quad" query-q)))
(define all-query-pieces (parse-query query-str))
(and (pair? all-query-pieces)
;; initial subtree is whole tree
;; each subtree is a pair of start idx (initial node) + end idx (boundary of subtree)
(let loop ([subtrees (list (cons (if query-q (vector-memq query-q vec) 0) (sub1 (vector-length vec))))]
[query-pieces all-query-pieces]
[multimode-query-seen? #false])
(cond
;; if subtrees are null, we have eliminated all searchable domains
[(null? subtrees) #false]
[(null? query-pieces)
(match (for/list ([(idx _) (in-dict subtrees)])
(vector-ref vec idx))
[vals #:when multimode-query-seen? vals] ; in multi mode, return list
[(list val) val] ; otherwise return single value
[_ (error 'should-never-have-multiple-vals-in-single-mode)])]
[else
(match-define (cons (cons pred subscript) other-query-pieces) query-pieces)
(define maybe-multimode-proc (match subscript
[(or (== 'all eq?) (== '* eq?)) values]
[(== 'rest eq?) cdr]
[(or (== 'even eq?) (== 'odd eq?))
(λ (xs)
(define proc (if (eq? subscript 'even) even? odd?))
(for/list ([(x idx) (in-indexed xs)]
#:when (proc (add1 idx)))
x))]
[(list larg rarg)
(λ (xs)
(define len (length xs))
(define lo (+ larg (if (negative? larg) (add1 len) 0)))
(define hi (min len (+ rarg (if (negative? rarg) (add1 len) 0))))
(define cmp (if (< lo hi) <= >=))
(for/list ([(x idx) (in-indexed xs)]
#:when (cmp lo (add1 idx) hi))
x))]
[_ #false]))
(define finish-proc
;; don't need to calculate end-idxs if we're at the end of the query
(if (null? other-query-pieces)
void
(λ (idx end-idx) (cond ;; otherwise calculate new end-idx
[(not idx) end-idx]
;; try searching for next occurence after this one, up to max.
[(find-inclusive vec pred (add1 idx) end-idx 1)]
[else end-idx]))))
(loop
(for*/list ([(start-idx end-idx) (in-dict subtrees)]
[idx (match maybe-multimode-proc
[#false (in-value (query-one vec pred subscript start-idx end-idx))]
[proc
(in-list (proc (for*/list ([idx (in-range start-idx (add1 end-idx))]
#:when (pred (vector-ref vec idx)))
idx)))])]
#:when idx)
(cons idx (finish-proc idx end-idx)))
other-query-pieces
(or maybe-multimode-proc multimode-query-seen?))]))))
(module+ test
(require rackunit)
(define counter 0)
(define-syntax-rule (factory type proc)
(make-quad #:type type
#:elems (for/list ([i (in-range 3)])
(set! counter (add1 counter))
(define new-q (proc))
(quad-update! new-q
[tag (format "~a[~a]-~a" 'proc counter (gensym))])
(hash-set! (quad-attrs new-q) 'count counter)
new-q)))
(define (line) (make-quad #:type line-quad))
(define (block) (factory block-quad line))
(define (col) (factory column-quad block))
(define (page) (factory page-quad col))
(define (sec) (factory section-quad page))
(define (count q) (and q (quad-ref q 'count)))
(define doc (factory doc-quad sec))
(check-equal? (count (query doc "sec[2]")) 242)
(check-false (query doc "sec[102]:line[1]"))
(check-equal? (count (query doc "sec[2]:pg[1]")) 162)
(check-equal? (count (query doc "sec[2]:pg[1]:ln[3]")) 128)
(check-eq? (query doc "page[this]" (query doc "line[2]")) (query doc "page[1]"))
(check-equal? (count (query doc "page[this]:line[last]" (query doc "line[2]"))) 41)
(check-equal? (count (query doc "sec[next]" (query doc "sec[1]"))) (count (query doc "sec[2]")) )
(check-equal? (count (query doc "sec[prev]" (query doc "sec[2]"))) (count (query doc "sec[1]")))
(check-equal? (count (query doc "page[prev]" (query doc "page[2]:line[1]")))
(count (query doc "page[1]")))
(check-equal? (count (query doc "page[next]" (query doc "page[2]:line[1]")))
(count (query doc "page[3]")))
(check-equal? (count (query doc "page[next]" (query doc "page[2]:line[1]")))
(count (query doc "page[3]")))
(check-equal? (count (query doc "page[next]:page[next]" (query doc "page[1]:line[1]")))
(count (query doc "page[3]")))
(check-equal? (count (query doc "page[next]:page[this]:page[prev]" (query doc "page[1]:line[1]")))
(count (query doc "page[1]")))
(check-equal? (map count (query doc "sec[*]")) '(121 242 363))
(check-equal? (map count (query doc "sec[*]:page[1]")) '(41 162 283))
(check-equal? (map count (query doc "sec[2]:page[1]:line[*]"))
'(126 127 128 130 131 132 134 135 136 139 140 141 143 144 145 147 148 149 152 153 154 156 157 158 160 161 162))
(check-equal? (map count (query doc "sec[rest]")) '(242 363))
(check-equal? (map count (query doc "sec[1..2]")) '(121 242))
(check-equal? (map count (query doc "sec[2..3]")) '(242 363))
(check-equal? (map count (query doc "sec[1..3]")) '(121 242 363))
(check-equal? (map count (query doc "sec[1..4]")) '(121 242 363))
(check-equal? (map count (query doc "sec[4..1]")) '(121 242 363))
(check-equal? (map count (query doc "sec[1..-1]")) '(121 242 363))
(check-equal? (map count (query doc "sec[-3..-1]")) '(121 242 363))
(check-equal? (map count (query doc "sec[-1..-3]")) '(121 242 363))
(check-equal? (map count (query doc "sec[1..-2]")) '(121 242))
(check-equal? (map count (query doc "sec[odd]")) '(121 363))
(check-equal? (map count (query doc "sec[even]")) '(242))
)