120 lines
3 KiB
Text
120 lines
3 KiB
Text
|
#!/bin/rc
|
||
|
# Parse XML RSS feeds into BARF blog posts for the specified site.
|
||
|
# If a post with a matching _link_ already exists, no new post will
|
||
|
# be created for that item. Tags will be created according to the rules
|
||
|
# defined in the get_tags() function.
|
||
|
#
|
||
|
# The file argument should point to a file containing one line per feed,
|
||
|
# with fields separated by the | character, in the following format:
|
||
|
#
|
||
|
# http://feeds.feedburner.com/ImNotReallyStanleyLieber|stanleylieber
|
||
|
#
|
||
|
# where the first field is the feed URL and each addition field is a tag.
|
||
|
#
|
||
|
# Requires 20h's xmlpull and rssread.
|
||
|
rfork en
|
||
|
switch($1){
|
||
|
case /*
|
||
|
feeds=$1
|
||
|
site=read.stanleylieber.com
|
||
|
tags=()
|
||
|
case *
|
||
|
echo 'Usage: gz file' >[1=2]
|
||
|
exit usage
|
||
|
}
|
||
|
|
||
|
file=/tmp/gz.$pid
|
||
|
werc=/usr/sl/www/werc
|
||
|
|
||
|
if(test -f /boot/factotum)
|
||
|
cmd=hget
|
||
|
if not
|
||
|
cmd='curl -s'
|
||
|
|
||
|
fn cram{
|
||
|
ssam '
|
||
|
,s/^link:.*$/HJDIVIDER&HJDIVIDER/g
|
||
|
,s/^title:.*$/HJDIVIDER&HJDIVIDER/g
|
||
|
,s/\n//g
|
||
|
,s/HJDIVIDERtitle:/\n&/g
|
||
|
'
|
||
|
}
|
||
|
|
||
|
fn scape{
|
||
|
ssam '
|
||
|
,s/
//g
|
||
|
,s/\"/\"/g
|
||
|
,s/\"/\"/g
|
||
|
,s/\&/\&/g
|
||
|
,s/\&/\&/g
|
||
|
,s/\'/''/g
|
||
|
,s/\,/,/g
|
||
|
,s/\-/-/g
|
||
|
,s/\./\./g
|
||
|
,s/\//\//g
|
||
|
,s/\:/:/g
|
||
|
,s/\;/;/g
|
||
|
,s/\</</g
|
||
|
,s/\</</g
|
||
|
,s/\=/=/g
|
||
|
,s/\>/>/g
|
||
|
,s/\>/>/g
|
||
|
,s/\_/_/g
|
||
|
,s/\|/\|/g
|
||
|
'
|
||
|
}
|
||
|
|
||
|
fn get_posts{
|
||
|
$"cmd $"feed | rssread | cram | scape >$file
|
||
|
echo >>$file
|
||
|
}
|
||
|
|
||
|
fn parse_posts{
|
||
|
ifs='
|
||
|
' {
|
||
|
posts=`{cat $file}
|
||
|
for(i in `{seq 1 $#posts | sort -nr}){
|
||
|
post=$posts($i)
|
||
|
if(! ~ $post ''){
|
||
|
a_title=`{echo $post | sed 's/^.*HJDIVIDERtitle: //g; s/HJDIVIDER.*$//g'}
|
||
|
a_date=`{date}
|
||
|
a_link=`{echo $post | sed 's/^.*HJDIVIDERlink: //g; s/HJDIVIDER.*$//g'}
|
||
|
a_body=`{echo $post | sed 's/^.*HJDIVIDER//g; s/^.*HJDIVIDER//g; s/^.*HJDIVIDER//g'}
|
||
|
a_id=`{echo `{ls -p $werc/sites/$site/src | sort -n | tail -1}^+1 | bc}
|
||
|
if(~ $#a_id 0)
|
||
|
a_id=1
|
||
|
while(test -d $werc/sites/$site/src/$a_id)
|
||
|
a_id=`{echo $a_id^+1 | bc}
|
||
|
if(! ~ $"a_link '' && ! grep -s `{echo $"a_link | md5sum} $werc/sites/$site/links){
|
||
|
mkdir -p $werc/sites/$site/src/$a_id/tags # big fat race
|
||
|
echo $"a_title >$werc/sites/$site/src/$a_id/title
|
||
|
echo $"a_date >$werc/sites/$site/src/$a_id/date
|
||
|
echo $a_link(1) >$werc/sites/$site/src/$a_id/link
|
||
|
echo $a_link(1) | md5sum >>$werc/sites/$site/links
|
||
|
if(~ $a_link(1) *staticflickr.com*)
|
||
|
echo '<img src="'$"a_link'">' >$werc/sites/$site/src/$a_id/body
|
||
|
echo $"a_body '</a></li></ul>' >>$werc/sites/$site/src/$a_id/body
|
||
|
ifs=' ' {
|
||
|
for(j in $tags){
|
||
|
>$werc/sites/$site/src/$a_id/tags/$j
|
||
|
echo $a_id/tags/$j >>$werc/sites/$site/tags
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
if(test -f /boot/factotum && ~ $site *.stanleylieber.com && test -d $werc/sites/$site/src/$a_id)
|
||
|
chmod +t $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id/* $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id/tags/*
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if(test -f /boot/factotum && test -f /rc/bin/hget)
|
||
|
webfs
|
||
|
for(i in `{grep -v -e '^#' $feeds}){
|
||
|
feed=`{echo $"i | sed 's/\|.*$//g'}
|
||
|
tags=`{echo $"i | sed 's/\|/ /g'}
|
||
|
tags=$tags(2-)
|
||
|
get_posts
|
||
|
parse_posts
|
||
|
}
|