Rewrite most sitemap generation code, much cleaner, modularize file title/desc extraction, take advantage of new fproc_cache to cache sitemap.
This commit is contained in:
		
							parent
							
								
									a0a7645022
								
							
						
					
					
						commit
						9ff545ec83
					
				
					 1 changed files with 38 additions and 42 deletions
				
			
		|  | @ -5,65 +5,61 @@ tmpfile=/tmp/werc_sitemap_$pid.txt | ||||||
| echo '' > $tmpfile | echo '' > $tmpfile | ||||||
| saveddf=$dirfilter | saveddf=$dirfilter | ||||||
| 
 | 
 | ||||||
| fn getMdDesc { | fn get_md_title { | ||||||
|     sed 's/^(.......................................................................................................[^ ]*).*$/\1/g; 1q' < $1  |     sed 's/^(................................................................[^ ]*).*$/\1/g; 1q' < $1  | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | fn get_html_title { | ||||||
|  |     # H1 is not reliable because htmlroff doesn't use it :( | ||||||
|  |     #desc=`{cat $1 | sed 32q | grep '<[Hh]1>' | sed 's/<[Hh]1>(.*)(<\/[Hh]1>|$)/\1/;s/<[^>]*>//g;1q'} | ||||||
|  |     # Pick the first line of body  instead | ||||||
|  |     desc=`{sed -n '/<[Bb][Oo][Dd][Yy]/,/./s/(<[^>]*>|$)//gp' < $1} | ||||||
|  |     if(~ $#desc 0) | ||||||
|  |         desc=`{sed 's/<[^>]*>//g; 1q' < $1} | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | fn get_file_title { | ||||||
|  |          | ||||||
|  |     if(~ $1 */) { | ||||||
|  |         if(test -f $1/index.md) | ||||||
|  |             get_md_title $1/index.md | ||||||
|  |         if not if(test -f $1/index.html) | ||||||
|  |             get_html_title $1/index.html | ||||||
|  |     } | ||||||
|  |     if not if(~ $1 *.md) | ||||||
|  |         get_md_title $1 | ||||||
|  |     if not if(~ $1 *.html) | ||||||
|  |         get_html_title $1 | ||||||
|  |     if not | ||||||
|  |         echo '' | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| # XXX Instead of recursion should use du(1) or similar. |  | ||||||
| fn listDir { | fn listDir { | ||||||
|     d=$1 |     d=$1 | ||||||
|     if(~ $#d 0) |  | ||||||
|         d='' |  | ||||||
|     dirfilter=$saveddf |     dirfilter=$saveddf | ||||||
|     blogDirs=() |  | ||||||
|     if(test -f $d/_werc/config) |     if(test -f $d/_werc/config) | ||||||
|         . ./$d/_werc/config |         . ./$d/_werc/config | ||||||
| 
 | 
 | ||||||
|     echo '<ul class="sitemap-list">' |     echo '<ul class="sitemap-list">' | ||||||
| 
 | 
 | ||||||
|     # Don't hide blog dirs for now |     for(i in `{ls -dF $d^*/ $d^*.md $d^*.html $d^*.txt >[2]/dev/null | sed $dirfilter}) { | ||||||
|     #if(! ~ $#blogDirs 0 || ~ $1 */blog */Blog) |         desc=`{get_file_title $i} | ||||||
|     #    echo ''  |         u=`{echo $i|sed 's!'$sitedir'!!; '$dirclean's!/index$!/!; '} | ||||||
|     #if not  |  | ||||||
|     if(! ~ $#redirectPermanent 1) |  | ||||||
|     { |  | ||||||
| 
 |  | ||||||
|     for(i in `{ls -dF $d^*/ $d^*.md $d^*.html $d^*.txt >[2]/dev/null | sed $dirfilter$dirclean}) { |  | ||||||
|         desc='' |  | ||||||
|         if(test -f $i.md) |  | ||||||
|             desc=`{getMdDesc $i.md} |  | ||||||
|         if not if(~ $i */ && test -f $i/index.md) |  | ||||||
|             desc=`{getMdDesc $i/index.md} |  | ||||||
|         if not if(test -f $i.html) { |  | ||||||
|             # H1 is not reliable because htmlroff doesn't use it :( |  | ||||||
|             #desc = `{cat $i.html |sed 32q | grep '<[Hh]1>' |sed 's/<[Hh]1>(.*)(<\/[Hh]1>|$)/\1/;s/<[^>]*>//g;1q'} |  | ||||||
|             # Pick the first line of body  instead |  | ||||||
|             desc=`{sed -n '/<[Bb][Oo][Dd][Yy]/,/./s/(<[^>]*>|$)//gp' < $i.html} |  | ||||||
|             if(~ $#desc 0) |  | ||||||
|                 desc=`{sed 's/<[^>]*>//g; 1q' < $i.html} |  | ||||||
|             #desc=`{/bin/sed -e '0,/<[Bb][Oo][Dd][Yy]/d;s/<[^>]*>//g;/^$/d' < $i.html >[2]/dev/null | sed 1q} |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         if(! ~ $#desc 0 && ! ~ $desc '') |         if(! ~ $#desc 0 && ! ~ $desc '') | ||||||
|             desc=' - '$"desc |             desc=' - '$"desc | ||||||
|         tit=`{echo /$i|sed 's/_/ /g; s,.*/([^/]+)/?$,\1,'} |         n=`{echo /$u|sed 's/_/ /g; s,.*/([^/]+)/?$,\1,'} | ||||||
|         echo '<li><a href="/'$i'">'^$"tit^'</a>' $"desc '</li>'  |         echo '<li><a href="'$base_url$u'">'^$"n^'</a>' $"desc '</li>'  | ||||||
|         echo -n $base_url^$i >> $tmpfile |         echo $base_url^$u >> $tmpfile | ||||||
|         if(test -d $i) { |         if(test -d $i) | ||||||
|             echo / >> $tmpfile	 |  | ||||||
|             @{ listDir $i } |             @{ listDir $i } | ||||||
|         } |  | ||||||
|         if not |  | ||||||
|             echo >> $tmpfile |  | ||||||
|     } |  | ||||||
|     } |     } | ||||||
|     echo '</ul>' |     echo '</ul>' | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| cd $sitedir  | fproc_cache.rc listDir $sitedir/ | ||||||
| listDir '' | #listDir $sitedir/ | ||||||
| 
 | 
 | ||||||
| cp $tmpfile ./sitemap.txt | if(test -s $tmpfile) | ||||||
| rm $tmpfile |     mv $tmpfile $sitedir/sitemap.txt & | ||||||
| 
 | 
 | ||||||
| %} | %} | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Uriel
						Uriel