119 lines
		
	
	
	
		
			3 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			119 lines
		
	
	
	
		
			3 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable file
		
	
	
	
	
| #!/bin/rc
 | |
| # Parse XML RSS feeds into BARF blog posts for the specified site.
 | |
| # If a post with a matching _link_ already exists, no new post will
 | |
| # be created for that item. Tags will be created according to the rules
 | |
| # defined in the get_tags() function.
 | |
| #
 | |
| # The file argument should point to a file containing one line per feed,
 | |
| # with fields separated by the | character, in the following format:
 | |
| #
 | |
| #	http://feeds.feedburner.com/ImNotReallyStanleyLieber|stanleylieber
 | |
| #
 | |
| # where the first field is the feed URL and each addition field is a tag.
 | |
| #
 | |
| # Requires 20h's xmlpull and rssread.
 | |
| rfork en
 | |
| switch($1){
 | |
| case /*
 | |
| 	feeds=$1
 | |
| 	site=read.stanleylieber.com
 | |
| 	tags=()
 | |
| case *
 | |
| 	echo 'Usage: gz file' >[1=2]
 | |
| 	exit usage
 | |
| }
 | |
| 
 | |
| file=/tmp/gz.$pid
 | |
| werc=/usr/sl/www/werc
 | |
| 
 | |
| if(test -f /boot/factotum)
 | |
| 	cmd=hget
 | |
| if not
 | |
| 	cmd='curl -s'
 | |
| 
 | |
| fn cram{
 | |
| 	ssam '
 | |
| ,s/^link:.*$/HJDIVIDER&HJDIVIDER/g
 | |
| ,s/^title:.*$/HJDIVIDER&HJDIVIDER/g
 | |
| ,s/\n//g
 | |
| ,s/HJDIVIDERtitle:/\n&/g
 | |
| 	'
 | |
| }
 | |
| 
 | |
| fn scape{
 | |
| 	ssam '
 | |
| ,s/
 | |
| //g
 | |
| ,s/\"/\"/g
 | |
| ,s/\"/\"/g
 | |
| ,s/\&/\&/g
 | |
| ,s/\&/\&/g
 | |
| ,s/\'/''/g
 | |
| ,s/\,/,/g
 | |
| ,s/\-/-/g
 | |
| ,s/\./\./g
 | |
| ,s/\//\//g
 | |
| ,s/\:/:/g
 | |
| ,s/\;/;/g
 | |
| ,s/\</</g
 | |
| ,s/\</</g
 | |
| ,s/\=/=/g
 | |
| ,s/\>/>/g
 | |
| ,s/\>/>/g
 | |
| ,s/\_/_/g
 | |
| ,s/\|/\|/g
 | |
| 	'
 | |
| }
 | |
| 
 | |
| fn get_posts{
 | |
| 	$"cmd $"feed | rssread | cram | scape >$file
 | |
| 	echo >>$file
 | |
| }
 | |
| 
 | |
| fn parse_posts{
 | |
| 	ifs='
 | |
| ' {
 | |
| 		posts=`{cat $file}
 | |
| 		for(i in `{seq 1 $#posts | sort -nr}){
 | |
| 			post=$posts($i)
 | |
| 			if(! ~ $post ''){
 | |
| 				a_title=`{echo $post | sed 's/^.*HJDIVIDERtitle: //g; s/HJDIVIDER.*$//g'}
 | |
| 				a_date=`{date}
 | |
| 				a_link=`{echo $post | sed 's/^.*HJDIVIDERlink:  //g; s/HJDIVIDER.*$//g'}
 | |
| 				a_body=`{echo $post | sed 's/^.*HJDIVIDER//g; s/^.*HJDIVIDER//g; s/^.*HJDIVIDER//g'}
 | |
| 				a_id=`{echo `{ls -p $werc/sites/$site/src | sort -n | tail -1}^+1 | bc}
 | |
| 				if(~ $#a_id 0)
 | |
| 					a_id=1
 | |
| 				while(test -d $werc/sites/$site/src/$a_id)
 | |
| 					a_id=`{echo $a_id^+1 | bc}
 | |
| 				if(! ~ $"a_link '' && ! grep -s `{echo $"a_link | md5sum} $werc/sites/$site/links){
 | |
| 					mkdir -p $werc/sites/$site/src/$a_id/tags # big fat race
 | |
| 					echo $"a_title >$werc/sites/$site/src/$a_id/title
 | |
| 					echo $"a_date >$werc/sites/$site/src/$a_id/date
 | |
| 					echo $a_link(1) >$werc/sites/$site/src/$a_id/link
 | |
| 					echo $a_link(1) | md5sum >>$werc/sites/$site/links
 | |
| 					if(~ $a_link(1) *staticflickr.com*)
 | |
| 						echo '<img src="'$"a_link'">' >$werc/sites/$site/src/$a_id/body
 | |
| 					echo $"a_body '</a></li></ul>' >>$werc/sites/$site/src/$a_id/body
 | |
| 					ifs=' ' {
 | |
| 						for(j in $tags){
 | |
| 							>$werc/sites/$site/src/$a_id/tags/$j
 | |
| 							echo $a_id/tags/$j >>$werc/sites/$site/tags
 | |
| 						}
 | |
| 					}
 | |
| 				}
 | |
| 				if(test -f /boot/factotum && ~ $site *.stanleylieber.com && test -d $werc/sites/$site/src/$a_id)
 | |
| 					chmod +t $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id/* $werc/sites/$site/src/$a_id $werc/sites/$site/src/$a_id/tags/*
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| if(test -f /boot/factotum && test -f /rc/bin/hget)
 | |
| 	webfs
 | |
| for(i in `{grep -v -e '^#' $feeds}){
 | |
| 	feed=`{echo $"i | sed 's/\|.*$//g'}
 | |
| 	tags=`{echo $"i | sed 's/\|/ /g'}
 | |
| 	tags=$tags(2-)
 | |
| 	get_posts
 | |
| 	parse_posts
 | |
| }
 | 
