You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
61 lines
2.6 KiB
Racket
61 lines
2.6 KiB
Racket
10 years ago
|
#lang racket/base
|
||
|
(require racket/list sugar)
|
||
|
(require "samples.rkt" "quads.rkt" "utils.rkt")
|
||
|
|
||
|
(define ti (block '(measure 54 leading 18) "Meg is " (box '(foo 42)) " ally."))
|
||
|
|
||
|
;ti
|
||
|
|
||
|
|
||
|
(define (tokenize-quad0 q)
|
||
|
(define-values (all-tokens last-tidx)
|
||
|
(let loop ([q q][starting-tidx 0])
|
||
|
(for/fold ([token-list empty][tidx starting-tidx])
|
||
|
([item (in-list (quad-list q))])
|
||
|
(cond
|
||
|
[(quad? item)
|
||
|
(define-values (sub-token-list sub-last-tidx) (loop item tidx))
|
||
|
(values (cons sub-token-list token-list) sub-last-tidx)]
|
||
|
[(string? item)
|
||
|
(define atoms (regexp-match* #rx"." item))
|
||
|
(values (cons atoms token-list) (+ tidx (length atoms)))]
|
||
|
[else (values (cons item token-list) (+ tidx 1))]))))
|
||
|
(values (list->vector (flatten (reverse all-tokens))) last-tidx))
|
||
|
|
||
|
|
||
|
(define (tokenize-quad quad-in)
|
||
|
(define-values (all-tokens all-attrs last-tidx)
|
||
|
(let loop ([current-quad quad-in][attr-acc empty][starting-tidx 0])
|
||
|
(cond
|
||
|
[(empty? (quad-list current-quad)) ; no subelements, so treat this quad as single token
|
||
|
(values (quad (quad-name current-quad) #f empty)
|
||
|
(if (quad-attrs current-quad)
|
||
|
(cons (vector (quad-attrs current-quad) starting-tidx (add1 starting-tidx)) attr-acc)
|
||
|
attr-acc)
|
||
|
(add1 starting-tidx))]
|
||
|
[else ; replace quad with its tokens, exploded
|
||
|
(define-values (tokens-from-fold subattrs-from-fold last-tidx-from-fold)
|
||
|
(for/fold ([token-acc empty][subattr-acc empty][tidx starting-tidx])
|
||
|
([item (in-list (quad-list current-quad))])
|
||
|
(cond
|
||
|
[(quad? item)
|
||
|
(define-values (sub-tokens sub-attrs sub-last-tidx) (loop item attr-acc tidx))
|
||
|
(values (cons sub-tokens token-acc) (cons sub-attrs subattr-acc) sub-last-tidx)]
|
||
|
[(string? item)
|
||
|
(define atoms (regexp-match* #rx"." item))
|
||
|
(values (cons atoms token-acc) subattr-acc (+ tidx (length atoms)))]
|
||
|
[else
|
||
|
(values (cons item token-acc) subattr-acc (+ tidx 1))])))
|
||
|
(values tokens-from-fold
|
||
|
(if (quad-attrs current-quad)
|
||
|
(cons (vector (quad-attrs current-quad) starting-tidx last-tidx-from-fold) subattrs-from-fold)
|
||
|
subattrs-from-fold)
|
||
|
last-tidx-from-fold)])))
|
||
|
(values (list->vector (flatten (reverse all-tokens))) (flatten (reverse all-attrs))))
|
||
|
|
||
|
|
||
|
(define-values (tokens attrs) (tokenize-quad (ti2)))
|
||
|
tokens
|
||
|
attrs
|
||
|
(filter (λ(idx) (box? (vector-ref tokens idx))) (range (vector-length tokens)))
|