@ -2,6 +2,7 @@
( require xml
( require xml
txexpr/base
txexpr/base
racket/list
racket/list
racket/match
sugar/list
sugar/list
sugar/define
sugar/define
sugar/test
sugar/test
@ -48,27 +49,29 @@
#:exclude-tags txexpr-tags?
#:exclude-tags txexpr-tags?
#:exclude-attrs txexpr-attrs? ) . ->* . decode-proc-output-contract )
#:exclude-attrs txexpr-attrs? ) . ->* . decode-proc-output-contract )
( let loop ( [ x tx-in ] )
( let loop ( [ x tx-in ] )
( cond
( match x
[ ( txexpr? x ) ( define-values ( tag attrs elements ) ( txexpr->values x ) )
[ ( ? txexpr? )
( define-values ( tag attrs elements ) ( txexpr->values x ) )
( cond
( cond
[ ( or ( memq tag excluded-tags )
[ ( or ( memq tag excluded-tags )
( for/or ( [ attr ( in-list attrs ) ] )
( for/or ( [ attr ( in-list attrs ) ] )
( member attr excluded-attrs ) ) )
( member attr excluded-attrs ) ) ) x ] ; because it's excluded
x ] ; because it's excluded
[ else
[ else
;; we apply processing here rather than do recursive descent on the pieces
;; we apply processing here rather than do recursive descent on the pieces
;; because if we send them back through loop, certain element types are ambiguous
;; because if we send them back through loop, certain element types are ambiguous
;; e.g., ((p "foo")) tests out as both txexpr-attrs and txexpr-elements
;; e.g., ((p "foo")) tests out as both txexpr-attrs and txexpr-elements
( define decoded-txexpr ( make-txexpr ( txexpr-tag-proc tag )
( define decoded-txexpr
( make-txexpr ( txexpr-tag-proc tag )
( txexpr-attrs-proc attrs )
( txexpr-attrs-proc attrs )
( txexpr-elements-proc ( append-map ( λ ( x ) ( ->list/tx ( loop x ) ) ) elements ) ) ) )
( txexpr-elements-proc ( append-map ( λ ( x ) ( ->list/tx ( loop x ) ) ) elements ) ) ) )
( txexpr-proc ( ( if ( block-txexpr? decoded-txexpr )
( txexpr-proc ( ( if ( block-txexpr? decoded-txexpr )
block-txexpr-proc
block-txexpr-proc
inline-txexpr-proc ) decoded-txexpr ) ) ] ) ]
inline-txexpr-proc ) decoded-txexpr ) ) ] ) ]
[ ( string? x ) ( string-proc x ) ]
[ ( ? string? ) ( string-proc x ) ]
[ ( or ( symbol? x ) ( valid-char? x ) ) ( entity-proc x ) ]
[ ( ? symbol? ) ( entity-proc x ) ]
[ ( cdata? x ) ( cdata-proc x ) ]
[ ( ? valid-char? ) ( entity-proc x ) ]
[ else ( error " decode: can't decode " x ) ] ) ) )
[ ( ? cdata? ) ( cdata-proc x ) ]
[ else ( raise-argument-error ' decode " decodable thing " x ) ] ) ) )
( module-test-external
( module-test-external
( require racket/list txexpr racket/function )
( require racket/list txexpr racket/function )
@ -115,10 +118,8 @@
( make-keyword-procedure
( make-keyword-procedure
( λ ( kws kwargs . args )
( λ ( kws kwargs . args )
( define temp-tag ( gensym " temp-tag " ) )
( define temp-tag ( gensym " temp-tag " ) )
( define elements ( car args ) )
( define elements ( first args ) )
( define decode-result ( keyword-apply decode kws kwargs ( list ( cons temp-tag elements ) ) ) )
( get-elements ( keyword-apply decode kws kwargs ( list ( cons temp-tag elements ) ) ) ) ) ) )
( get-elements decode-result ) ) ) )
( define+provide/contract ( block-txexpr? x )
( define+provide/contract ( block-txexpr? x )
( any/c . -> . boolean? )
( any/c . -> . boolean? )
@ -129,17 +130,20 @@
( define+provide/contract ( decode-linebreaks elems [ maybe-linebreak-proc ' ( br ) ]
( define+provide/contract ( decode-linebreaks elems [ maybe-linebreak-proc ' ( br ) ]
#:separator [ newline ( setup:linebreak-separator ) ] )
#:separator [ newline ( setup:linebreak-separator ) ] )
( ( txexpr-elements? ) ( ( or/c #f txexpr-element? ( txexpr-element? txexpr-element? . -> . ( or/c #f txexpr-element? ) ) ) #:separator string? ) . ->* . txexpr-elements? )
( ( txexpr-elements? )
( ( or/c #f txexpr-element?
( txexpr-element? txexpr-element? . -> . ( or/c #f txexpr-element? ) ) ) #:separator string? )
. ->* . txexpr-elements? )
( unless ( string? newline )
( unless ( string? newline )
( raise-argument-error ' decode-linebreaks " string " newline ) )
( raise-argument-error ' decode-linebreaks " string " newline ) )
( define linebreak-proc ( if ( procedure? maybe-linebreak-proc )
( define linebreak-proc ( match maybe-linebreak-proc
maybe-linebreak-proc
[ ( ? procedure? proc ) proc ]
( λ ( e1 e2 ) maybe-linebreak-proc ) ) )
[ val ( λ ( e1 e2 ) val ) ] ) )
( define elems-vec ( list->vector elems ) )
( define elems-vec ( list->vector elems ) )
( filter values
( filter values
( for/list ( [ ( elem idx ) ( in-indexed elems-vec ) ] )
( for/list ( [ ( elem idx ) ( in-indexed elems-vec ) ] )
( cond
( cond
[ ( = idx 0 ) elem ] ; pass first item
[ ( zero? idx ) elem ] ; pass first item
[ ( = idx ( sub1 ( vector-length elems-vec ) ) ) elem ] ; pass through last item
[ ( = idx ( sub1 ( vector-length elems-vec ) ) ) elem ] ; pass through last item
[ ( equal? elem newline )
[ ( equal? elem newline )
( define prev ( vector-ref elems-vec ( sub1 idx ) ) )
( define prev ( vector-ref elems-vec ( sub1 idx ) ) )
@ -147,7 +151,7 @@
;; only convert if neither adjacent tag is a block
;; only convert if neither adjacent tag is a block
;; (because blocks automatically force a newline before & after)
;; (because blocks automatically force a newline before & after)
( if ( or ( block-txexpr? prev ) ( block-txexpr? next ) )
( if ( or ( block-txexpr? prev ) ( block-txexpr? next ) )
#f ; flag for filtering
#f alse ; flag for filtering
( linebreak-proc prev next ) ) ]
( linebreak-proc prev next ) ) ]
[ else elem ] ) ) ) )
[ else elem ] ) ) ) )
@ -169,17 +173,21 @@
( define+provide/contract ( merge-newlines x )
( define+provide/contract ( merge-newlines x )
( txexpr-elements? . -> . txexpr-elements? )
( txexpr-elements? . -> . txexpr-elements? )
( define newline-pat ( regexp ( format " ^~a+$ " ( setup:newline ) ) ) )
( define newline-pat ( regexp ( format " ^~a+$ " ( setup:newline ) ) ) )
( define ( newline? x ) ( and ( string? x ) ( regexp-match newline-pat x ) ) )
( define ( newline? x ) ( match x
[ ( regexp newline-pat ) #true ]
[ _ #false ] ) )
( define ( merge-newline-slice xs )
( define ( merge-newline-slice xs )
( if ( newline? ( car xs ) ) ; if first member of slice is newline, they all are
( match xs
( list ( apply string-append xs ) )
;; if first member of slice is newline, they all are
xs ) )
[ ( cons ( ? newline? ) _ ) ( list ( apply string-append xs ) ) ]
( define empty-string? ( λ ( x ) ( equal? x " " ) ) )
[ _ xs ] ) )
( define ( empty-string? x ) ( equal? x " " ) )
( let loop ( [ x x ] )
( let loop ( [ x x ] )
( if ( and ( pair? x ) ( not ( attrs? x ) ) )
( match x
( let ( [ xs ( map loop ( filter-not empty-string? x ) ) ] )
[ ( ? pair? x ) #:when ( not ( attrs? x ) )
( append-map merge-newline-slice ( slicef xs newline? ) ) )
( define xs ( map loop ( filter-not empty-string? x ) ) )
x ) ) )
( append-map merge-newline-slice ( slicef xs newline? ) ) ]
[ _ x ] ) ) )
( module-test-external
( module-test-external
( require racket/list )
( require racket/list )
@ -189,7 +197,6 @@
( check-equal? ( merge-newlines ' ( p " \n " " \n " " foo " " \n " " \n \n " " bar " ( em " \n " " \n " " \n " ) ) )
( check-equal? ( merge-newlines ' ( p " \n " " \n " " foo " " \n " " \n \n " " bar " ( em " \n " " \n " " \n " ) ) )
' ( p " \n \n " " foo " " \n \n \n " " bar " ( em " \n \n \n " ) ) ) )
' ( p " \n \n " " foo " " \n \n \n " " bar " ( em " \n \n \n " ) ) ) )
( define+provide/contract ( decode-paragraphs elements-in [ maybe-wrap-proc ' p ]
( define+provide/contract ( decode-paragraphs elements-in [ maybe-wrap-proc ' p ]
#:linebreak-proc [ linebreak-proc decode-linebreaks ]
#:linebreak-proc [ linebreak-proc decode-linebreaks ]
#:force? [ force-paragraph #f ] )
#:force? [ force-paragraph #f ] )
@ -206,26 +213,30 @@
( define ( paragraph-break? x )
( define ( paragraph-break? x )
( define paragraph-pattern ( pregexp ( format " ^~a+$ " paragraph-separator ) ) )
( define paragraph-pattern ( pregexp ( format " ^~a+$ " paragraph-separator ) ) )
( and ( string? x ) ( regexp-match paragraph-pattern x ) ) )
( match x
[ ( pregexp paragraph-pattern ) #true ]
[ _ #false ] ) )
( define ( explicit-or-implicit-paragraph-break? x )
( define ( explicit-or-implicit-paragraph-break? x )
( or ( paragraph-break? x ) ( block-txexpr? x ) ) )
( or ( paragraph-break? x ) ( block-txexpr? x ) ) )
( define wrap-proc ( if ( procedure? maybe-wrap-proc )
( define wrap-proc ( match maybe-wrap-proc
maybe-wrap-proc
[ ( ? procedure? proc ) proc ]
( λ ( elems ) ( list* maybe-wrap-proc elems ) ) ) )
[ _ ( λ ( elems ) ( list* maybe-wrap-proc elems ) ) ] ) )
( define ( wrap-paragraph elems )
( define ( wrap-paragraph elems )
( if ( andmap block-txexpr? elems )
( match elems
elems ; leave a series of block xexprs alone
[ ( list ( ? block-txexpr? ) ... ) elems ] ; leave a series of block xexprs alone
( list ( wrap-proc elems ) ) ) ) ; otherwise wrap in p tag
[ _ ( list ( wrap-proc elems ) ) ] ) ) ; otherwise wrap in p tag
( define elements ( prep-paragraph-flow elements-in ) )
( define elements ( prep-paragraph-flow elements-in ) )
( if ( ormap explicit-or-implicit-paragraph-break? elements ) ; need this condition to prevent infinite recursion
( if ( ormap explicit-or-implicit-paragraph-break? elements ) ; need this condition to prevent infinite recursion
;; use append-map on wrap-paragraph rather than map to permit return of multiple elements
;; use `append-map` on `wrap-paragraph` rather than `map` to permit return of multiple elements
( append-map wrap-paragraph ( append-map ( λ ( es ) ( filter-split es paragraph-break? ) ) ( slicef elements block-txexpr? ) ) ) ; split into ¶¶, using both implied and explicit paragraph breaks
( append-map wrap-paragraph
( append-map ( λ ( es ) ( filter-split es paragraph-break? ) ) ( slicef elements block-txexpr? ) ) ) ; split into ¶¶, using both implied and explicit paragraph breaks
( if force-paragraph
( if force-paragraph
( append-map wrap-paragraph ( slicef elements block-txexpr? ) ) ; upconverts non-block elements to paragraphs
;; upconverts non-block elements to paragraphs
( append-map wrap-paragraph ( slicef elements block-txexpr? ) )
elements ) ) )
elements ) ) )
( module-test-external
( module-test-external
@ -246,7 +257,6 @@
' ( ( p " foo " ) ( div " bar " ) ( div " zam " ) ) )
' ( ( p " foo " ) ( div " bar " ) ( div " zam " ) ) )
( check-equal? ( decode-paragraphs ' ( " foo " " \n \n " ( div " bar " ) " \n \n " ( div " zam " ) ) )
( check-equal? ( decode-paragraphs ' ( " foo " " \n \n " ( div " bar " ) " \n \n " ( div " zam " ) ) )
' ( ( p " foo " ) ( div " bar " ) ( div " zam " ) ) )
' ( ( p " foo " ) ( div " bar " ) ( div " zam " ) ) )
( check-equal? ( decode-paragraphs ' ( " foo " ) ) ' ( " foo " ) )
( check-equal? ( decode-paragraphs ' ( " foo " ) ) ' ( " foo " ) )
( check-equal? ( decode-paragraphs ' ( " foo " ) #:force? #t ) ' ( ( p " foo " ) ) )
( check-equal? ( decode-paragraphs ' ( " foo " ) #:force? #t ) ' ( ( p " foo " ) ) )
( check-equal? ( decode-paragraphs ' ( ( div " foo " ) ) ) ' ( ( div " foo " ) ) )
( check-equal? ( decode-paragraphs ' ( ( div " foo " ) ) ) ' ( ( div " foo " ) ) )