track render by source+output jobs (#240)

pull/243/head
Matthew Butterick 4 years ago committed by GitHub
parent 1f1bee90fd
commit 3160a46beb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -48,73 +48,74 @@
(define (mod-date-missing-or-changed? source-path template-path) (define (mod-date-missing-or-changed? source-path template-path)
(not (hash-has-key? mod-date-hash (paths->key 'output source-path template-path)))) (not (hash-has-key? mod-date-hash (paths->key 'output source-path template-path))))
(define (parallel-render source-paths-in output-paths-in job-count-arg) (struct $job (source output) #:transparent)
(struct $jobresult (job finished-successfully) #:transparent)
(define (parallel-render jobs-in worker-count-arg)
;; if jobs are already in the cache, pull them out before assigning workers ;; if jobs are already in the cache, pull them out before assigning workers
;; using worker to fetch from cache is slower ;; using worker to fetch from cache is slower
(define source-to-output-path-table (map cons source-paths-in output-paths-in)) (define-values (uncached-jobs previously-cached-jobs)
(define-values (uncached-source-paths previously-cached-jobs) (for/fold ([ujobs null]
(for/fold ([usps null] [pcjobs null])
[pcjs null]) ([job (in-list jobs-in)])
([source-path (in-list source-paths-in)]
[output-path (in-list output-paths-in)])
(match (let/ec exit (match (let/ec exit
(define template-path (define template-path
(cache-ref! (template-cache-key source-path output-path) (λ () (exit 'template-miss)))) (cache-ref! (template-cache-key ($job-source job) ($job-output job)) (λ () (exit 'template-miss))))
(render-to-file-if-needed source-path template-path output-path (λ () (exit 'render-miss)))) (render-to-file-if-needed ($job-source job) template-path ($job-output job) (λ () (exit 'render-miss))))
[(? symbol? sym) (values (cons source-path usps) pcjs)] [(? symbol? sym) (values (cons job ujobs) pcjobs)]
[_ (values usps (cons (cons source-path #true) pcjs))]))) [_ (values ujobs (cons ($jobresult job #true) pcjobs))])))
(define job-count (define worker-count
(min (min
(length uncached-source-paths) (length uncached-jobs)
(match job-count-arg (match worker-count-arg
[#true (processor-count)] [#true (processor-count)]
[(? exact-positive-integer? count) count] [(? exact-positive-integer? count) count]
[_ (raise-user-error 'render-batch "~a is not an exact positive integer or #true" job-count-arg)]))) [_ (raise-user-error 'render-batch "~a is not an exact positive integer or #true" worker-count-arg)])))
;; initialize the workers ;; initialize the workers
(define worker-evts (define worker-evts
(for/list ([wpidx (in-range job-count)]) (for/list ([wpidx (in-range worker-count)])
(define wp (place ch (define wp
(let loop () (place ch
(match-define (list path output-path poly-target) (let loop ()
(place-channel-put/get ch (list 'wants-job))) (match-define (list source-path output-path poly-target)
(parameterize ([current-poly-target poly-target]) (place-channel-put/get ch (list 'wants-job)))
(place-channel-put/get ch (list 'wants-lock output-path)) (parameterize ([current-poly-target poly-target])
;; trap any exceptions and pass them back as crashed jobs. (place-channel-put/get ch (list 'wants-lock output-path))
;; otherwise, a crashed rendering place can't recover, and the parallel job will be stuck. ;; trap any exceptions and pass them back as crashed jobs.
(with-handlers ([exn:fail? (λ (e) (place-channel-put ch (list 'crashed-job path #f)))]) ;; otherwise, a crashed rendering place can't recover, and the parallel job will be stuck.
(match-define-values (_ _ ms _) (with-handlers ([exn:fail? (λ (e) (place-channel-put ch (list 'crashed-job source-path output-path #f)))])
;; we don't use `render-to-file-if-needed` because we've already checked the render cache (match-define-values (_ _ ms _)
;; if we reached this point, we know we need a render ;; we don't use `render-to-file-if-needed` because we've already checked the render cache
(time-apply render-to-file (list path #f output-path))) ;; if we reached this point, we know we need a render
(place-channel-put ch (list 'finished-job path ms)))) (time-apply render-to-file (list source-path #f output-path)))
(loop)))) (place-channel-put ch (list 'finished-job source-path output-path ms))))
(handle-evt wp (λ (val) (list* wpidx wp val))))) (loop))))
(handle-evt wp (λ (val) (list* wpidx wp val)))))
(define poly-target (current-poly-target)) (define poly-target (current-poly-target))
;; `locks` and `blocks` are (listof (cons/c evt? path?)) (struct $lock (worker path) #:transparent)
(let loop ([source-paths (reverse uncached-source-paths)] ;; `locks` and `blocks` are (listof $lock)
(let loop ([jobs (reverse uncached-jobs)]
[locks-in null] [locks-in null]
[blocks-in null] [blocks-in null]
;; `completed-jobs` is (listof (cons/c path? boolean?)) [completed-job-results previously-cached-jobs] ; (listof jobresult)
[completed-jobs previously-cached-jobs]
[completed-job-count (length previously-cached-jobs)]) [completed-job-count (length previously-cached-jobs)])
;; try to unblock blocked workers ;; try to unblock blocked workers
(define-values (locks blocks) (define-values (locks blocks)
(for/fold ([locks locks-in] (for/fold ([locks locks-in]
[blocks null]) [blocks null])
([block (in-list blocks-in)]) ([block (in-list blocks-in)])
(match-define (cons wp path) block) (match-define ($lock wp path) block)
(cond (cond
[(member path (dict-values locks)) [(member path (map $lock-path locks))
(values locks (cons block blocks))] (values locks (cons block blocks))]
[else [else
(place-channel-put wp 'lock-approved) (place-channel-put wp 'lock-approved)
(values (cons block locks) blocks)]))) (values (cons block locks) blocks)])))
(cond (cond
[(eq? completed-job-count (length source-paths-in)) [(eq? completed-job-count (length jobs-in))
;; second bite at the apple for crashed jobs. ;; second bite at the apple for crashed jobs.
;; 1) many crashes that arise in parallel rendering are ;; 1) many crashes that arise in parallel rendering are
;; a result of concurrency issues (e.g. shared files not being readable at the right moment). ;; a result of concurrency issues (e.g. shared files not being readable at the right moment).
@ -126,42 +127,41 @@
;; if it was a concurrency-related error, it will disappear. ;; if it was a concurrency-related error, it will disappear.
;; if it was a legit error, the render will stop and print a trace. ;; if it was a legit error, the render will stop and print a trace.
;; crashed jobs are completed jobs that weren't finished ;; crashed jobs are completed jobs that weren't finished
(define failed-source-paths (for/list ([(path finished?) (in-dict completed-jobs)] (for/list ([jr (in-list completed-job-results)]
#:unless finished?) #:unless ($jobresult-finished-successfully jr))
path)) ($jobresult-job jr))]
(define failed-output-paths (for/list ([source-path (in-list failed-source-paths)])
(dict-ref source-to-output-path-table source-path)))
(list failed-source-paths failed-output-paths)]
[else [else
(match (apply sync worker-evts) (match (apply sync worker-evts)
[(list wpidx wp 'wants-job) [(list wpidx wp 'wants-job)
(match source-paths (match jobs
[(? null?) (loop null locks blocks completed-jobs completed-job-count)] [(? null?) (loop null locks blocks completed-job-results completed-job-count)]
[(cons source-path rest) [(cons ($job source-path output-path) rest)
(place-channel-put wp (list source-path (dict-ref source-to-output-path-table source-path) poly-target)) (place-channel-put wp (list source-path output-path poly-target))
(loop rest locks blocks completed-jobs completed-job-count)])] (loop rest locks blocks completed-job-results completed-job-count)])]
[(list wpidx wp (and (or 'finished-job 'crashed-job) tag) source-path ms) [(list wpidx wp (and (or 'finished-job 'crashed-job) tag) source-path output-path ms)
(match tag (match tag
['finished-job ['finished-job
(message (message
(format "rendered @ job ~a /~a ~a" (format "rendered @ job ~a /~a ~a"
(~r (add1 wpidx) #:min-width (string-length (~r job-count)) #:pad-string " ") (~r (add1 wpidx) #:min-width (string-length (~r worker-count)) #:pad-string " ")
(find-relative-path (current-project-root) (dict-ref source-to-output-path-table source-path)) (find-relative-path (current-project-root) output-path)
(if (< ms 1000) (format "(~a ms)" ms) (format "(~a s)" (/ ms 1000.0)))))] (if (< ms 1000) (format "(~a ms)" ms) (format "(~a s)" (/ ms 1000.0)))))]
[_ [_
(message (message
(format "render crash @ job ~a /~a (retry pending)" (format "render crash @ job ~a /~a (retry pending)"
(add1 wpidx) (add1 wpidx)
(find-relative-path (current-project-root) (dict-ref source-to-output-path-table source-path))))]) (find-relative-path (current-project-root) output-path)))])
(loop source-paths (loop jobs
(match (assoc wp locks) (match (findf (λ (lock) (equal? ($lock-worker lock) wp)) locks)
[#false locks] [#false locks]
[lock (remove lock locks)]) [lock (remove lock locks)])
blocks blocks
(cons (cons source-path (eq? tag 'finished-job)) completed-jobs) (let* ([job-finished? (eq? tag 'finished-job)]
[jr ($jobresult ($job source-path output-path) job-finished?)])
(cons jr completed-job-results))
(add1 completed-job-count))] (add1 completed-job-count))]
[(list wpidx wp 'wants-lock path) [(list wpidx wp 'wants-lock output-path)
(loop source-paths locks (append blocks (list (cons wp path))) completed-jobs completed-job-count)])]))) (loop jobs locks (append blocks (list ($lock wp output-path))) completed-job-results completed-job-count)])])))
(define current-null-output? (make-parameter #f)) (define current-null-output? (make-parameter #f))
@ -178,7 +178,7 @@
;; meaning, if source is "test.poly.pm" and we get `raco pollen render test.txt`, ;; meaning, if source is "test.poly.pm" and we get `raco pollen render test.txt`,
;; then the output path argument should force .txt rendering, regardless of `current-poly-target` setting ;; then the output path argument should force .txt rendering, regardless of `current-poly-target` setting
;; so the output path may contain information we need that we can't necessarily derive from the source path. ;; so the output path may contain information we need that we can't necessarily derive from the source path.
(define-values (expanded-source-paths expanded-output-paths) (define-values (expanded-source-paths expanded-output-paths)
;; we generate the output paths in parallel with the source paths ;; we generate the output paths in parallel with the source paths
;; rather than afterward, because ;; rather than afterward, because
@ -190,9 +190,15 @@
(let loop ([paths paths-in] [sps null] [ops null]) (let loop ([paths paths-in] [sps null] [ops null])
(match paths (match paths
[(? null?) [(? null?)
(define (cleanup paths) ;; it's possible that we have multiple output names for one poly file
(sort (remove-duplicates paths) string<? #:key path->string)) ;; so after we expand, we only remove duplicates where both the source and dest in the pair
(apply values (map cleanup (list sps ops)))] ;; are the same
(let* ([pairs (remove-duplicates (map cons sps ops))]
[pairs (sort pairs string<? #:key (compose1 path->string car))]
[pairs (sort pairs string<? #:key (compose1 path->string cdr))])
(for/lists (sps ops)
([pr (in-list pairs)])
(values (car pr) (cdr pr))))]
[(cons path rest) [(cons path rest)
(match (->complete-path path) (match (->complete-path path)
[(? pagetree-source? pt) [(? pagetree-source? pt)
@ -206,12 +212,14 @@
(cond (cond
[(null? expanded-source-paths) (message "[no paths to render]")] [(null? expanded-source-paths) (message "[no paths to render]")]
[(eq? special-output 'dry-run) (for-each message expanded-source-paths)] [(eq? special-output 'dry-run) (for-each message expanded-source-paths)]
[else (parameterize ([current-null-output? (eq? special-output 'null)]) [else
(apply for-each (λ (sp op) (render-to-file-if-needed sp #f op)) (define all-jobs (map $job expanded-source-paths expanded-output-paths))
(match wants-parallel-render? (parameterize ([current-null-output? (eq? special-output 'null)])
;; returns crashed jobs for serial rendering (for-each (λ (job) (render-to-file-if-needed ($job-source job) #f ($job-output job)))
[#false (list expanded-source-paths expanded-output-paths)] (match wants-parallel-render?
[jobs-arg (parallel-render expanded-source-paths expanded-output-paths jobs-arg)])))])) ;; returns crashed jobs for serial rendering
[#false all-jobs]
[worker-count-arg (parallel-render all-jobs worker-count-arg)])))]))
(define+provide/contract (render-pagenodes pagetree-or-path) (define+provide/contract (render-pagenodes pagetree-or-path)
((or/c pagetree? pathish?) . -> . void?) ((or/c pagetree? pathish?) . -> . void?)
@ -399,7 +407,7 @@
(define (file-exists-or-has-source? path) ; path could be #f (define (file-exists-or-has-source? path) ; path could be #f
(and path (for/first ([proc (in-list (list values ->preproc-source-path ->null-source-path))] (and path (for/first ([proc (in-list (list values ->preproc-source-path ->null-source-path))]
#:when (file-exists? (proc path))) #:when (file-exists? (proc path)))
path))) path)))
(define (get-template-from-metas source-path output-path-ext) (define (get-template-from-metas source-path output-path-ext)
(with-handlers ([exn:fail:contract? (λ (e) #f)]) ; in case source-path doesn't work with cached-require (with-handlers ([exn:fail:contract? (λ (e) #f)]) ; in case source-path doesn't work with cached-require
@ -443,7 +451,7 @@
(for/or ([proc (list get-template-from-metas (for/or ([proc (list get-template-from-metas
get-default-template get-default-template
get-fallback-template)]) get-fallback-template)])
(file-exists-or-has-source? (proc source-path output-path-ext)))] (file-exists-or-has-source? (proc source-path output-path-ext)))]
[_ #false])) [_ #false]))
(if (current-session-interactive?) (if (current-session-interactive?)
;; don't cache templates in interactive session, for fresher reloads ;; don't cache templates in interactive session, for fresher reloads

@ -2,11 +2,15 @@
(require rackunit (require rackunit
pollen/setup pollen/setup
racket/runtime-path racket/runtime-path
pollen/render) pollen/render
racket/file)
;; define-runtime-path only allowed at top level ;; define-runtime-path only allowed at top level
(define-runtime-path poly-dir "data/poly") (define-runtime-path poly-dir "data/poly")
(define-runtime-path poly-source "data/poly/test.poly.pm") (define-runtime-path poly-source "data/poly/test.poly.pm")
(define-runtime-path pollen-cache "data/poly/compiled")
(define-runtime-path test.txt "data/poly/test.txt")
(define-runtime-path test.html "data/poly/test.html")
(parameterize ([current-directory poly-dir] (parameterize ([current-directory poly-dir]
[current-project-root poly-dir] [current-project-root poly-dir]
@ -14,4 +18,20 @@
(parameterize ([current-poly-target 'txt]) (parameterize ([current-poly-target 'txt])
(check-equal? (render poly-source) "TITLE is **big**")) (check-equal? (render poly-source) "TITLE is **big**"))
(parameterize ([current-poly-target 'html]) (parameterize ([current-poly-target 'html])
(check-equal? (render poly-source) (format "~v" '(root (h2 "title") " is " (strong "big")))))) (check-equal? (render poly-source) (format "~v" '(root (h2 "title") " is " (strong "big"))))))
(parameterize ([current-output-port (open-output-string)]
[current-directory poly-dir]
[current-project-root poly-dir])
;; make sure that batch works with multiple output files
;; related to one poly sourc
;; or duplicate output files (which will only be rendered once)
(for ([parallel? (list #true #false)])
(render-batch #:parallel parallel? test.html test.txt test.html)
(check-equal? (file->string test.txt) "TITLE is **big**")
(check-equal? (file->string test.html) (format "~v" '(root (h2 "title") " is " (strong "big"))))
(delete-file test.txt)
(delete-file test.html)))
(delete-directory/files pollen-cache)
Loading…
Cancel
Save