From 939575d5500d560d40ec70f06f8f5f61756c8a3c Mon Sep 17 00:00:00 2001 From: Matthew Butterick Date: Fri, 28 Jun 2019 17:06:39 -0700 Subject: [PATCH] add --jobs switch to render and setup --- pollen/private/command.rkt | 54 +++++++++++++++--------- pollen/private/preheat-cache.rkt | 70 ++++++++++++++++++++------------ pollen/render.rkt | 14 +++++-- pollen/scribblings/raco.scrbl | 24 +++++++++-- 4 files changed, 108 insertions(+), 54 deletions(-) diff --git a/pollen/private/command.rkt b/pollen/private/command.rkt index 2948b43..6a8db76 100644 --- a/pollen/private/command.rkt +++ b/pollen/private/command.rkt @@ -28,22 +28,22 @@ (define (dispatch command-name) (with-logging-to-port - (current-error-port) - (λ () - (case command-name - [("test" "xyzzy") (handle-test)] - [(#f "help") (handle-help)] - [("start") (handle-start)] ; parses its own args - ;; "second" arg is actually third in command line args, so use cddr not cdr - [("render") (handle-render)] ; render parses its own args from current-command-line-arguments - [("version") (handle-version)] - [("reset") (handle-reset (get-first-arg-or-current-dir))] - [("setup") (handle-setup (get-first-arg-or-current-dir))] - [("clone" "publish") (handle-publish)] - [else (handle-unknown command-name)])) - #:logger pollen-logger - 'info - 'pollen)) + (current-error-port) + (λ () + (case command-name + [("test" "xyzzy") (handle-test)] + [(#f "help") (handle-help)] + [("start") (handle-start)] ; parses its own args + ;; "second" arg is actually third in command line args, so use cddr not cdr + [("render") (handle-render)] ; render parses its own args from current-command-line-arguments + [("version") (handle-version)] + [("reset") (handle-reset (get-first-arg-or-current-dir))] + [("setup") (handle-setup)] + [("clone" "publish") (handle-publish)] + [else (handle-unknown command-name)])) + #:logger pollen-logger + 'info + 'pollen)) (define (very-nice-path x) (path->complete-path (simplify-path (cleanse-path (->path x))))) @@ -73,9 +73,21 @@ version print the version" (current-server-port) (make-publish-di (message "resetting cache ...") ((dynamic-require 'pollen/cache 'reset-cache) directory-maybe)) -(define (handle-setup directory-maybe) +(define (handle-setup) (message "preheating cache ...") - ((dynamic-require 'pollen/private/preheat-cache 'preheat-cache) directory-maybe)) + (define setup-parallel? (make-parameter #false)) + (define parsed-args + (command-line #:program "raco pollen setup" + #:argv (vector-drop (current-command-line-arguments) 1) ; snip the 'setup' from the front + #:once-any + [("-p" "--parallel") "Setup in parallel using all cores" (setup-parallel? #true)] + [("-j" "--jobs") job-count "Setup in parallel using jobs" (setup-parallel? (or (string->number job-count) (raise-argument-error 'handle-setup "exact positive integer" job-count)))] + #:args other-args + other-args)) + (define starting-dir (match parsed-args + [(list dir) dir] + [_ (current-directory)])) + ((dynamic-require 'pollen/private/preheat-cache 'preheat-cache) starting-dir (setup-parallel?))) (define (handle-render) (define render-batch (dynamic-require 'pollen/render 'render-batch)) @@ -93,7 +105,9 @@ version print the version" (current-server-port) (make-publish-di [("-r" "--recursive") "Render subdirectories recursively" (render-with-subdirs? 'recursive)] [("-s" "--subdir") "Render subdirectories nonrecursively" (render-with-subdirs? 'include)] - [("-p" "--parallel") "Render in parallel" (render-parallel? #true)] + #:once-any + [("-p" "--parallel") "Render in parallel using all cores" (render-parallel? #true)] + [("-j" "--jobs") job-count "Render in parallel using jobs" (render-parallel? (or (string->number job-count) (raise-argument-error 'handle-render "exact positive integer" job-count)))] #:args other-args other-args)) (parameterize ([current-poly-target (render-target-wanted)]) ;; applies to both cases @@ -127,7 +141,7 @@ version print the version" (current-server-port) (make-publish-di (for ([path (in-list dirlist)] #:when (and (directory-exists? path) (not (omitted-path? path)))) - (render-one-dir (->complete-path path))))))] + (render-one-dir (->complete-path path))))))] [path-args ;; path mode (message (format "rendering ~a" (string-join (map ->string path-args) " "))) (apply render-batch (map very-nice-path path-args) #:parallel (render-parallel?))])))) diff --git a/pollen/private/preheat-cache.rkt b/pollen/private/preheat-cache.rkt index ca7e29f..e095935 100644 --- a/pollen/private/preheat-cache.rkt +++ b/pollen/private/preheat-cache.rkt @@ -18,7 +18,7 @@ (and (file-exists? cache-db-file) (hash-has-key? (file->value cache-db-file) (paths->key path)))) -(define (preheat-cache starting-dir) +(define (preheat-cache starting-dir [wants-parallel-setup #false]) (unless (and (path-string? starting-dir) (directory-exists? starting-dir)) (raise-argument-error 'preheat-cache "directory" starting-dir)) @@ -34,30 +34,46 @@ #:unless (path-cached? path)) path)) - (define worker-evts - (for/list ([wpidx (in-range (processor-count))]) - (define wp - (place ch - (let loop () - (define path (place-channel-put/get ch (list 'want-job))) - (place-channel-put ch (list 'job-finished path - (with-handlers ([exn:fail? (λ (e) #f)]) - (path->hash path)))) - (loop)))) - (handle-evt wp (λ (val) (list* wpidx wp val))))) + (cond + [(null? uncached-paths) + (message "all cached files are up to date")] + [wants-parallel-setup + + (define job-count + (match wants-parallel-setup + [#true (processor-count)] + [(? exact-positive-integer? count) count] + [_ (raise-argument-error 'preheat-cache "exact positive integer" wants-parallel-setup)])) + + (define worker-evts + (for/list ([wpidx (in-range job-count)]) + (define wp + (place ch + (let loop () + (define path (place-channel-put/get ch (list 'want-job))) + (place-channel-put ch (list 'job-finished path + (with-handlers ([exn:fail? (λ (e) #f)]) + (path->hash path)))) + (loop)))) + (handle-evt wp (λ (val) (list* wpidx wp val))))) - (let loop ([paths uncached-paths][actives null]) - (unless (and (null? paths) (null? actives)) - (match (apply sync worker-evts) - [(list wpidx wp 'want-job) - (match paths - [(? null?) (loop null actives)] - [(cons path rest) - (place-channel-put wp path) - (message (format "caching on core ~a: ~a" (add1 wpidx) (find-relative-path starting-dir path))) - (loop rest (cons wpidx actives))])] - [(list wpidx wp 'job-finished path result) - (if result - (cache-ref! (paths->key path) (λ () result)) - (message (format "caching failed on core ~a: ~a" (add1 wpidx) (find-relative-path starting-dir path)))) - (loop paths (remq wpidx actives))])))) \ No newline at end of file + (let loop ([paths uncached-paths][actives null]) + (unless (and (null? paths) (null? actives)) + (match (apply sync worker-evts) + [(list wpidx wp 'want-job) + (match paths + [(? null?) (loop null actives)] + [(cons path rest) + (place-channel-put wp path) + (message (format "caching @ job ~a: ~a" (add1 wpidx) (find-relative-path starting-dir path))) + (loop rest (cons wpidx actives))])] + [(list wpidx wp 'job-finished path result) + (if result + (cache-ref! (paths->key path) (λ () result)) + (message (format "caching failed on job ~a: ~a" (add1 wpidx) (find-relative-path starting-dir path)))) + (loop paths (remq wpidx actives))])))] + [else (for ([path (in-list uncached-paths)]) + (message (format "caching: ~a" (find-relative-path starting-dir path))) + (match (with-handlers ([exn:fail? (λ (e) #f)]) (path->hash path)) + [#false (message (format "caching failed: ~a" (find-relative-path starting-dir path)))] + [result (cache-ref! (paths->key path) (λ () result))]))])) \ No newline at end of file diff --git a/pollen/render.rkt b/pollen/render.rkt index c812437..5d30331 100644 --- a/pollen/render.rkt +++ b/pollen/render.rkt @@ -48,7 +48,7 @@ (define (list-of-pathish? x) (and (list? x) (andmap pathish? x))) -(define+provide/contract (render-batch #:parallel [parallel? #false] . paths) +(define+provide/contract (render-batch #:parallel [wants-parallel-render #false] . paths) ((#:parallel any/c) #:rest list-of-pathish? . ->* . void?) ;; Why not just (for-each render ...)? ;; Because certain files will pass through multiple times (e.g., templates) @@ -56,7 +56,7 @@ ;; Using reset-modification-dates is sort of like session control. (reset-mod-date-hash!) (cond - [parallel? + [wants-parallel-render (define source-paths (let () @@ -75,10 +75,16 @@ [maybe-source-path (in-value (->source-path p))] #:when (and maybe-source-path (file-exists? maybe-source-path))) maybe-source-path))) + + (define job-count + (match wants-parallel-render + [#true (processor-count)] + [(? exact-positive-integer? count) count] + [_ (raise-argument-error 'render-batch "exact positive integer" wants-parallel-render)])) ;; initialize the workers (define worker-evts - (for/list ([wpidx (in-range (processor-count))]) + (for/list ([wpidx (in-range job-count)]) (define wp (place ch (let loop () (match-define (cons path poly-target) @@ -120,7 +126,7 @@ (loop rest locks blocks)])] [(list wpidx wp 'finished-job path ms) (message - (format "rendered parallel on core ~a /~a ~a" + (format "rendered parallel @ job ~a /~a ~a" (add1 wpidx) (find-relative-path (current-project-root) (->output-path path)) (if (< ms 1000) (format "(~a ms)" ms) (format "(~a s)" (/ ms 1000.0))))) diff --git a/pollen/scribblings/raco.scrbl b/pollen/scribblings/raco.scrbl index 8982be3..56c7d5b 100644 --- a/pollen/scribblings/raco.scrbl +++ b/pollen/scribblings/raco.scrbl @@ -96,16 +96,22 @@ The optional @exec{--target} or @exec{-t} switch specifies the render target for See also @seclink["raco-pollen-render-poly"]. -The optional @exec{--parallel} or @exec{-p} switch creates a set of parallel rendering jobs. On a multi-core machine, this will usually make your rendering job finish faster. The order of rendering is not guaranteed, of course, so if your project depends on a certain order of rendering, don't use this option. +The optional @exec{--parallel} or @exec{-p} switch creates a set of parallel rendering jobs equal to the number of processing cores on the system. On a multi-core machine, this will usually make your rendering job finish faster. The order of rendering is not guaranteed, of course, so if your project depends on a certain order of rendering, don't use this option. @terminal{ > raco pollen render -p foo.html bar.html zam.css } +The alternative @exec{--jobs } or @exec{-j } switch does the same thing, but takes one argument that creates @racket[] parallel jobs (which can be more or less than the number of processing cores). + +@terminal{ +> raco pollen render -j 4 foo.html bar.html zam.css +} + As a rule of thumb, parallel rendering works best if you do @exec{raco setup} first, which updates Pollen's disk caches: @terminal{ -> raco setup +> raco setup -p > raco pollen render -p } @@ -115,7 +121,7 @@ As a rule of thumb, parallel rendering works best if you do @exec{raco setup} fi @bold{Directory mode}: @racket[raco pollen render _directory] renders all preprocessor source files and then all pagetree files found in the specified directory. If none of these files are found, a pagetree will be generated for the directory (which will include all source files) and then rendered. If the @racket[_directory] argument is omitted, the command defaults to the current directory. -In directory mode, this command can be invoked with two other optional arguments (in addition to the @exec{--target} and @exec{--parallel} switches mentioned above): +In directory mode, this command can be invoked with two other optional arguments (in addition to the @exec{--target}, @exec{--parallel}, and @exec{--jobs} switches mentioned above): The @exec{--subdir} or @exec{-s} switch also renders subdirectories. @racket[current-project-root] remains fixed at the initial directory, just as it would be in the project server after invoking @racket[raco pollen start]. @@ -149,6 +155,18 @@ Finds Pollen source files in the current directory, compiles them, and loads the Can also be invoked as @racket[raco pollen setup _directory], which will set up the files in @racket[_directory]. +The optional @exec{--parallel} or @exec{-p} switch creates a set of parallel setup jobs equal to the number of processing cores on the system. On a multi-core machine, this will usually make your setup finish faster. + +@terminal{ +> raco pollen setup -p +} + +The alternative @exec{--jobs } or @exec{-j } switch does the same thing, but takes one argument that creates @racket[] parallel jobs (which can be more or less than the number of processing cores). + +@terminal{ +> raco pollen setup -j 4 +} + @section{@exec{raco pollen reset}}