From a0a764502272b3a5303ddf439149a5bec6acb421 Mon Sep 17 00:00:00 2001 From: Uriel Date: Fri, 6 Feb 2009 20:42:15 +0100 Subject: [PATCH 1/3] Make fproc_cache take an optional second arg that can be a file or dir name, if provided that file/path will be used as input for the first argument removing the need for a temporary file. --- bin/fproc_cache.rc | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/bin/fproc_cache.rc b/bin/fproc_cache.rc index 11619e6..55096b9 100755 --- a/bin/fproc_cache.rc +++ b/bin/fproc_cache.rc @@ -1,19 +1,34 @@ #!/usr/bin/env rc #. 9.rc # Not really needed when calling from werc, only would be needed if you use fproc_cache.rc standalone -# TODO: Allow to provide two arguments, second arg is a file name, use filename+size+mtime for hashing. -# Useful when dealing with many big files (eg., thumb gallery generation). + +a=() +tmpf=() proc=$1 -tmpfile=/tmp/fmttmp.$pid -score=`{{tee $tmpfile || exit 1} | sha1sum} +shift +if(~ $#* 0) { + tmpf=/tmp/fmttmp.$pid + f=$tmpf + score=`{{tee $tmpf || exit 1} | sha1sum} +} +if not { + f=$1 + if(~ $f */) { + score=`{du -an $f | sha1sum || exit 1} # XXX using -n(bytes) instead of -t(lastmod) because sitemap proc touches files in tree. + a=$f + f=/dev/null + } + if not + score=`{sha1sum $f || exit 1} +} cachedir=/tmp/fproc_cache/$score mkdir -p $cachedir >[2]/dev/null if(test -s $cachedir/$proc) cat $cachedir/$proc if not - if($proc < $tmpfile | tee $cachedir/$pid) + if($proc $a < $f | tee $cachedir/$pid) mv $cachedir/$pid $cachedir/$proc -rm $tmpfile $cachedir/$pid >[2]/dev/null & +rm $tmpf $cachedir/$pid >[2]/dev/null & From 9ff545ec83cb90adeab414f886697a56dd332853 Mon Sep 17 00:00:00 2001 From: Uriel Date: Fri, 6 Feb 2009 20:43:11 +0100 Subject: [PATCH 2/3] Rewrite most sitemap generation code, much cleaner, modularize file title/desc extraction, take advantage of new fproc_cache to cache sitemap. --- lib/sitemap.tpl | 80 +++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 42 deletions(-) diff --git a/lib/sitemap.tpl b/lib/sitemap.tpl index 3ade1e9..3818233 100644 --- a/lib/sitemap.tpl +++ b/lib/sitemap.tpl @@ -5,65 +5,61 @@ tmpfile=/tmp/werc_sitemap_$pid.txt echo '' > $tmpfile saveddf=$dirfilter -fn getMdDesc { - sed 's/^(.......................................................................................................[^ ]*).*$/\1/g; 1q' < $1 +fn get_md_title { + sed 's/^(................................................................[^ ]*).*$/\1/g; 1q' < $1 +} + +fn get_html_title { + # H1 is not reliable because htmlroff doesn't use it :( + #desc=`{cat $1 | sed 32q | grep '<[Hh]1>' | sed 's/<[Hh]1>(.*)(<\/[Hh]1>|$)/\1/;s/<[^>]*>//g;1q'} + # Pick the first line of body instead + desc=`{sed -n '/<[Bb][Oo][Dd][Yy]/,/./s/(<[^>]*>|$)//gp' < $1} + if(~ $#desc 0) + desc=`{sed 's/<[^>]*>//g; 1q' < $1} +} + +fn get_file_title { + + if(~ $1 */) { + if(test -f $1/index.md) + get_md_title $1/index.md + if not if(test -f $1/index.html) + get_html_title $1/index.html + } + if not if(~ $1 *.md) + get_md_title $1 + if not if(~ $1 *.html) + get_html_title $1 + if not + echo '' } -# XXX Instead of recursion should use du(1) or similar. fn listDir { d=$1 - if(~ $#d 0) - d='' dirfilter=$saveddf - blogDirs=() if(test -f $d/_werc/config) . ./$d/_werc/config echo '' } -cd $sitedir -listDir '' +fproc_cache.rc listDir $sitedir/ +#listDir $sitedir/ -cp $tmpfile ./sitemap.txt -rm $tmpfile +if(test -s $tmpfile) + mv $tmpfile $sitedir/sitemap.txt & %} From 2e1f0dd32f2ee4794044567ea9c3a8ef091694ef Mon Sep 17 00:00:00 2001 From: Uriel Date: Fri, 6 Feb 2009 22:09:32 +0100 Subject: [PATCH 3/3] Convert fltr_cache into a function. Enable output buffering which had been commented out for unknwon reasons. --- bin/fltr_cache.rc | 35 +++++++++++++++++++++++++++++++++++ bin/fproc_cache.rc | 34 ---------------------------------- bin/werc.rc | 3 ++- etc/initrc | 12 ++++++------ lib/sitemap.tpl | 2 +- 5 files changed, 44 insertions(+), 42 deletions(-) create mode 100755 bin/fltr_cache.rc delete mode 100755 bin/fproc_cache.rc diff --git a/bin/fltr_cache.rc b/bin/fltr_cache.rc new file mode 100755 index 0000000..f09c963 --- /dev/null +++ b/bin/fltr_cache.rc @@ -0,0 +1,35 @@ +#!/usr/bin/env rc + +fn fltr_cache { + a=() + tmpf=() + + proc=$1 + shift + + if(~ $#* 0) { + tmpf=/tmp/fmttmp.$pid + f=$tmpf + score=`{{tee $tmpf || exit 1} | sha1sum} + } + if not { + f=$1 + if(~ $f */) { + score=`{du -an $f | sha1sum || exit 1} # XXX using -n(bytes) instead of -t(lastmod) because sitemap proc touches files in tree. + a=$f + f=/dev/null + } + if not + score=`{sha1sum $f || exit 1} + } + cachedir=/tmp/fltr_cache/$score + mkdir -p $cachedir >[2]/dev/null + + if(test -s $cachedir/$proc) + cat $cachedir/$proc + if not + if($proc $a < $f | tee $cachedir/$pid) + mv $cachedir/$pid $cachedir/$proc + + rm $tmpf $cachedir/$pid >[2]/dev/null & +} diff --git a/bin/fproc_cache.rc b/bin/fproc_cache.rc deleted file mode 100755 index 55096b9..0000000 --- a/bin/fproc_cache.rc +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env rc -#. 9.rc # Not really needed when calling from werc, only would be needed if you use fproc_cache.rc standalone - -a=() -tmpf=() - -proc=$1 -shift - -if(~ $#* 0) { - tmpf=/tmp/fmttmp.$pid - f=$tmpf - score=`{{tee $tmpf || exit 1} | sha1sum} -} -if not { - f=$1 - if(~ $f */) { - score=`{du -an $f | sha1sum || exit 1} # XXX using -n(bytes) instead of -t(lastmod) because sitemap proc touches files in tree. - a=$f - f=/dev/null - } - if not - score=`{sha1sum $f || exit 1} -} -cachedir=/tmp/fproc_cache/$score -mkdir -p $cachedir >[2]/dev/null - -if(test -s $cachedir/$proc) - cat $cachedir/$proc -if not - if($proc $a < $f | tee $cachedir/$pid) - mv $cachedir/$pid $cachedir/$proc - -rm $tmpf $cachedir/$pid >[2]/dev/null & diff --git a/bin/werc.rc b/bin/werc.rc index a274c7c..709d7b5 100755 --- a/bin/werc.rc +++ b/bin/werc.rc @@ -3,6 +3,7 @@ . ./werclib.rc . ./wercconf.rc . ./corehandlers.rc +. ./fltr_cache.rc cd .. forbidden_uri_chars='[^a-zA-Z0-9_+\-\/\.]' @@ -105,7 +106,7 @@ fn werc_exec_request { if(! ~ $#debug 0) dprint $"SERVER_NAME^$"REQUEST_URI - $"HTTP_USER_AGENT - $"REQUEST_METHOD - $"handler_body_main - $"master_template - template $headers $master_template #| awk_buffer + template $headers $master_template | awk_buffer echo $res_tail } diff --git a/etc/initrc b/etc/initrc index c02e0f2..d5c9f57 100644 --- a/etc/initrc +++ b/etc/initrc @@ -9,17 +9,17 @@ # Location of your Plan 9 from User Space installation (usually /usr/local/plan9) plan9port=$PLAN9 +#plan9port=/usr/local/plan9 # Path, make sure the plan9port /bin directory is included before /bin # Keep '.' in path! It is needed. path=($plan9port/bin/ . ./bin/ ./bin/contrib/ /bin/ /usr/bin/) -# Set this to your favorite markdown formatter, eg., markdown.pl (fproc_cache -# is a wrapper around a script it takes as an argument, in the default -# configuration markdown.pl, that caches output) -# Note that some werc components assume a markdown-like formatter, but all -# major functionality should should be formatter agnostic. -formatter=(fproc_cache.rc markdown.pl) +# Set this to your favorite markdown formatter, eg., markdown.pl (fltr_cache +# takes as an argument a filter, in the default configuration markdown.pl, that +# caches output) Note that some werc components assume a markdown-like +# formatter, but all major functionality should should be formatter agnostic. +formatter=(fltr_cache markdown.pl) # Enable debugging, to disable set to () debug=true diff --git a/lib/sitemap.tpl b/lib/sitemap.tpl index 3818233..7967856 100644 --- a/lib/sitemap.tpl +++ b/lib/sitemap.tpl @@ -56,7 +56,7 @@ fn listDir { echo '' } -fproc_cache.rc listDir $sitedir/ +fltr_cache listDir $sitedir/ #listDir $sitedir/ if(test -s $tmpfile)