thirdculture/werc/apps/barf/bin/gz.tumblr

120 lines
3 KiB
Text
Raw Normal View History

2024-07-18 14:09:32 -04:00
#!/bin/rc
# Parse XML RSS feeds into BARF blog posts for the specified site.
# If a post with a matching _link_ already exists, no new post will
# be created for that item. Tags will be created according to the rules
# defined in the get_tags() function.
#
# The file argument should point to a file containing one line per feed,
# with fields separated by the | character, in the following format:
#
# http://feeds.feedburner.com/ImNotReallyStanleyLieber|stanleylieber
#
# where the first field is the feed URL and each addition field is a tag.
#
# Requires 20h's xmlpull and rssread.
rfork en
switch($1){
case /*
feeds=$1
site=tumblr.stanleylieber.com
tags=()
case *
echo 'Usage: gz file' >[1=2]
exit usage
}
file=/tmp/gz.$pid
werc=/usr/sl/www/werc
if(test -f /boot/factotum)
cmd=hget
if not
cmd='curl -s'
fn cram{
ssam '
,s/^link:.*$/HJDIVIDER&HJDIVIDER/g
,s/^title:.*$/HJDIVIDER&HJDIVIDER/g
,s/\n//g
,s/HJDIVIDERtitle:/\n&/g
'
}
fn scape{
ssam '
,s/ //g
,s/\"/\"/g
,s/\"/\"/g
,s/\&/\&/g
,s/\&/\&/g
,s/\'/''/g
,s/\,/,/g
,s/\-/-/g
,s/\./\./g
,s/\//\//g
,s/\:/:/g
,s/\&#59;/;/g
,s/\&lt;/</g
,s/\&#60;/</g
,s/\&#61;/=/g
,s/\&gt;/>/g
,s/\&#62;/>/g
,s/\&#95;/_/g
,s/\|/\&#124;/g
'
}
fn get_posts{
$"cmd $"feed | rssread | cram | scape >$file
echo >>$file
}
fn parse_posts{
ifs='
' {
posts=`{cat $file}
for(i in `{seq 1 $#posts | sort -nr}){
post=$posts($i)
if(! ~ $post ''){
a_title=`{echo $post | sed 's/^.*HJDIVIDERtitle: //g; s/HJDIVIDER.*$//g'}
a_date=`{date}
a_link=`{echo $post | sed 's/^.*HJDIVIDERlink: //g; s/HJDIVIDER.*$//g'}
a_body=`{echo $post | sed 's/^.*HJDIVIDER//g; s/^.*HJDIVIDER//g; s/^.*HJDIVIDER//g'}
a_id=`{echo `{ls -p $werc/sites/$site/src | sort -n | tail -1}^+1 | bc}
if(~ $#a_id 0)
a_id=1
while(test -d $werc/sites/$site/src/$a_id)
a_id=`{echo $a_id^+1 | bc}
if(! ~ $"a_link '' && ! grep -s `{echo $"a_link | md5sum} $werc/sites/$site/links){
mkdir -p $werc/sites/$site/src/$a_id/tags # big fat race
echo $"a_title >$werc/sites/$site/src/$a_id/title
echo $"a_date >$werc/sites/$site/src/$a_id/date
echo $a_link(1) >$werc/sites/$site/src/$a_id/link
echo $a_link(1) | md5sum >>$werc/sites/$site/links
if(~ $a_link(1) *staticflickr.com*)
echo '<img src="'$"a_link'">' >$werc/sites/$site/src/$a_id/body
echo $"a_body '</a></li></ul>' >>$werc/sites/$site/src/$a_id/body
ifs=' ' {
for(j in $tags){
>$werc/sites/$site/src/$a_id/tags/$j
echo $a_id/tags/$j >>$werc/sites/$site/tags
}
}
}
if(test -f /boot/factotum && ~ $site *.stanleylieber.com && test -d $werc/sites/$site/src/$a_id)
chmod +t $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id/* $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id/tags/*
}
}
}
}
if(test -f /boot/factotum && test -f /rc/bin/hget)
webfs
for(i in `{grep -v -e '^#' $feeds}){
feed=`{echo $"i | sed 's/\|.*$//g'}
tags=`{echo $"i | sed 's/\|/ /g'}
tags=$tags(2-)
get_posts
parse_posts
}