427 lines
		
	
	
	
		
			8.5 KiB
		
	
	
	
		
			Awk
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			427 lines
		
	
	
	
		
			8.5 KiB
		
	
	
	
		
			Awk
		
	
	
		
			Executable file
		
	
	
	
	
| #!/bin/awk -f
 | |
| #
 | |
| # by: Jesus Galan (yiyus) 2009
 | |
| #
 | |
| # Usage: md2html.awk file.md > file.html
 | |
| # See: http://4l77.com/src/md2html.awk
 | |
| 
 | |
| function eschtml(t) {
 | |
| 	gsub("&", "\\&", t);
 | |
| 	gsub("<", "\\<", t);
 | |
| 	return t;
 | |
| }
 | |
| 
 | |
| function oprint(t){
 | |
| 	if(nr == 0)
 | |
| 		print t;
 | |
| 	else
 | |
| 		otext = otext "\n" t;
 | |
| }
 | |
| 
 | |
| function subref(id){
 | |
| 	for(; nr > 0 && sub("<<" id, ref[id], otext); nr--);
 | |
| 	if(nr == 0 && otext) {
 | |
| 		print otext;
 | |
| 		otext = "";
 | |
| 	}
 | |
| }
 | |
| 
 | |
| function nextil(t) {
 | |
| 	if(!match(t, /[`<&\[*_\\-]|(\!\[)/))
 | |
| 		return t;
 | |
| 	t1 = substr(t, 1, RSTART - 1);
 | |
| 	tag = substr(t, RSTART, RLENGTH);
 | |
| 	t2 = substr(t, RSTART + RLENGTH);
 | |
| 	if(ilcode && tag != "`")
 | |
| 		return eschtml(t1 tag) nextil(t2);
 | |
| 	# Backslash escaping
 | |
| 	if(tag == "\\"){
 | |
| 		if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){
 | |
| 			tag = substr(t2, 1, 1);
 | |
| 			t2 = substr(t2, 2);
 | |
| 		}
 | |
| 		return t1 tag nextil(t2);
 | |
| 	}
 | |
| 	# Dashes
 | |
| 	if(tag == "-"){
 | |
| 		if(sub(/^-/, "", t2))
 | |
| 			tag = "—";
 | |
| 		return t1 tag nextil(t2);
 | |
| 	}
 | |
| 	# Inline Code
 | |
| 	if(tag == "`"){
 | |
| 		if(sub(/^`/, "", t2)){
 | |
| 			if(!match(t2, /``/))
 | |
| 				return t1 "”" nextil(t2);
 | |
| 			ilcode2 = !ilcode2;
 | |
| 		}
 | |
| 		else if(ilcode2)
 | |
| 			return t1 tag nextil(t2);
 | |
| 		tag = "<code>";
 | |
| 		if(ilcode){
 | |
| 			t1 = eschtml(t1);
 | |
| 			tag = "</code>";
 | |
| 		}
 | |
| 		ilcode = !ilcode;
 | |
| 		return t1 tag nextil(t2);
 | |
| 	}
 | |
| 	if(tag == "<"){
 | |
| 	# Autolinks
 | |
| 		if(match(t2, /^[^ 	]+[\.@][^ 	]+>/)){
 | |
| 			url = eschtml(substr(t2, 1, RLENGTH - 1));
 | |
| 			t2 = substr(t2, RLENGTH + 1);
 | |
| 			linktext = url;
 | |
| 			if(match(url, /@/) && !match(url, /^mailto:/))
 | |
| 				url = "mailto:" url;
 | |
| 			return t1 "<a href=\"" url "\">" linktext "</a>" nextil(t2);
 | |
| 		}
 | |
| 	# Html tags
 | |
| 		if(match(t2, /^[A-Za-z\/!][^>]*>/)){
 | |
| 			tag = tag substr(t2, RSTART, RLENGTH);
 | |
| 			t2 = substr(t2, RLENGTH + 1);
 | |
| 			return t1 tag nextil(t2);
 | |
| 		}
 | |
| 		return t1 "<" nextil(t2);
 | |
| 	}
 | |
| 	# Html special entities
 | |
| 	if(tag == "&"){
 | |
| 		if(match(t2, /^#?[A-Za-z0-9]+;/)){
 | |
| 			tag = tag substr(t2, RSTART, RLENGTH);
 | |
| 			t2 = substr(t2, RLENGTH + 1);
 | |
| 			return t1 tag nextil(t2);
 | |
| 		}
 | |
| 		return t1 "&" nextil(t2);
 | |
| 	}
 | |
| 	# Images
 | |
| 	if(tag == "!["){
 | |
| 		if(!match(t2, /(\[.*\])|(\(.*\))/))
 | |
| 			return t1 tag nextil(t2);
 | |
| 		match(t2, /^[^\]]*/);
 | |
| 		alt = substr(t2, 1, RLENGTH);
 | |
| 		t2 = substr(t2, RLENGTH + 2);
 | |
| 		if(match(t2, /^\(/)){
 | |
| 			# Inline
 | |
| 			sub(/^\(/, "", t2);
 | |
| 			match(t2, /^[^\)]+/);
 | |
| 			url = eschtml(substr(t2, 1, RLENGTH));
 | |
| 			t2 = substr(t2, RLENGTH + 2);
 | |
| 			title = "";
 | |
| 			if(match(url, /[ 	]+\".*\"[ 	]*$/)) {
 | |
| 				title = substr(url, RSTART, RLENGTH);
 | |
| 				url = substr(url, 1, RSTART - 1);
 | |
| 				match(title, /\".*\"/);
 | |
| 				title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
 | |
| 			}
 | |
| 			if(match(url, /^<.*>$/))
 | |
| 				url = substr(url, 2, RLENGTH - 2);
 | |
| 			return t1 "<img src=\"" url "\" alt=\"" alt "\"" title " />" nextil(t2);
 | |
| 		}
 | |
| 		else{
 | |
| 			# Referenced
 | |
| 			sub(/^ ?\[/, "", t2);
 | |
| 			id = alt;
 | |
| 			if(match(t2, /^[^\]]+/))
 | |
| 				id = substr(t2, 1, RLENGTH);
 | |
| 			t2 = substr(t2, RLENGTH + 2);
 | |
| 			if(ref[id])
 | |
| 				r = ref[id];
 | |
| 			else{
 | |
| 				r = "<<" id;
 | |
| 				nr++;
 | |
| 			}
 | |
| 			return t1 "<img src=\"" r "\" alt=\"" alt "\" />" nextil(t2);
 | |
| 		}
 | |
| 	}
 | |
| 	# Links
 | |
| 	if(tag == "["){
 | |
| 		if(!match(t2, /(\[.*\])|(\(.*\))/))
 | |
| 			return t1 tag nextil(t2);
 | |
| 		match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/);
 | |
| 		linktext = substr(t2, 1, RLENGTH);
 | |
| 		t2 = substr(t2, RLENGTH + 2);
 | |
| 		if(match(t2, /^\(/)){
 | |
| 			# Inline
 | |
| 			match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/);
 | |
| 			url = substr(t2, 2, RLENGTH - 1);
 | |
| 			pt2 = substr(t2, RLENGTH + 2);
 | |
| 			title = "";
 | |
| 			if(match(url, /[ 	]+\".*\"[ 	]*$/)) {
 | |
| 				title = substr(url, RSTART, RLENGTH);
 | |
| 				url = substr(url, 1, RSTART - 1);
 | |
| 				match(title, /\".*\"/);
 | |
| 				title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
 | |
| 			}
 | |
| 			if(match(url, /^<.*>$/))
 | |
| 				url = substr(url, 2, RLENGTH - 2);
 | |
| 			url = eschtml(url);
 | |
| 			return t1 "<a href=\"" url "\"" title ">" nextil(linktext) "</a>" nextil(pt2);
 | |
| 		}
 | |
| 		else{
 | |
| 			# Referenced
 | |
| 			sub(/^ ?\[/, "", t2);
 | |
| 			id = linktext;
 | |
| 			if(match(t2, /^[^\]]+/))
 | |
| 				id = substr(t2, 1, RLENGTH);
 | |
| 			t2 = substr(t2, RLENGTH + 2);
 | |
| 			if(ref[id])
 | |
| 				r = ref[id];
 | |
| 			else{
 | |
| 				r = "<<" id;
 | |
| 				nr++;
 | |
| 			}
 | |
| 			pt2 = t2;
 | |
| 			return t1 "<a href=\"" r "\" />" nextil(linktext) "</a>" nextil(pt2);
 | |
| 		}
 | |
| 	}
 | |
| 	# Emphasis
 | |
| 	if(match(tag, /[*_]/)){
 | |
| 		ntag = tag;
 | |
| 		if(sub("^" tag, "", t2)){
 | |
| 			if(stag[ns] == tag && match(t2, "^" tag))
 | |
| 				t2 = tag t2;
 | |
| 			else
 | |
| 				ntag = tag tag
 | |
| 		}
 | |
| 		n = length(ntag);
 | |
| 		tag = (n == 2) ? "strong" : "em";
 | |
| 		if(match(t1, / $/) && match(t2, /^ /))
 | |
| 			return t1 tag nextil(t2);
 | |
| 		if(stag[ns] == ntag){
 | |
| 			tag = "/" tag;
 | |
| 			ns--;
 | |
| 		}
 | |
| 		else
 | |
| 			stag[++ns] = ntag;
 | |
| 		tag = "<" tag ">";
 | |
| 		return t1 tag nextil(t2);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| function inline(t) {
 | |
| 	ilcode = 0;
 | |
| 	ilcode2 = 0;
 | |
| 	ns = 0;
 | |
| 	
 | |
| 	return nextil(t);
 | |
| }
 | |
| 
 | |
| function printp(tag) {
 | |
| 	if(!match(text, /^[ 	]*$/)){
 | |
| 		text = inline(text);
 | |
| 		if(tag != "")
 | |
| 			oprint("<" tag ">" text "</" tag ">");
 | |
| 		else
 | |
| 			oprint(text);
 | |
| 	}
 | |
| 	text = "";
 | |
| }
 | |
| 
 | |
| BEGIN {
 | |
| 	blank = 0;
 | |
| 	code = 0;
 | |
| 	hr = 0;
 | |
| 	html = 0;
 | |
| 	nl = 0;
 | |
| 	nr = 0;
 | |
| 	otext = "";
 | |
| 	text = "";
 | |
| 	par = "p";
 | |
| }
 | |
| 
 | |
| # References
 | |
| !code && /^ *\[[^\]]*\]:[ 	]+/ {
 | |
| 	sub(/^ *\[/, "");
 | |
| 	match($0, /\]/);
 | |
| 	id = substr($0, 1, RSTART - 1);
 | |
| 	sub(id "\\]:[ 	]+", "");
 | |
| 	title = "";
 | |
| 	if(match($0, /\".*\"$/))
 | |
| 		title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2);
 | |
| 	sub(/[ 	]+\".*\"$/, "");
 | |
| 	url = eschtml($0);
 | |
| 	ref[id] = url title;
 | |
| 
 | |
| 	subref(id);
 | |
| 	next;
 | |
| }
 | |
| 
 | |
| # html
 | |
| !html && /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
 | |
| isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/ {
 | |
| 	if(code)
 | |
| 		oprint("</pre></code>");
 | |
| 	for(; !text && block[nl] == "blockquote"; nl--)
 | |
| 		oprint("</blockquote>");
 | |
| 	match($0, /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
 | |
| 	isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/);
 | |
| 	htag = substr($0, 2, RLENGTH - 1);
 | |
| 	if(!match($0, "(<\\/" htag ">)|((^<hr ?\\/?)|(--)>$)"))
 | |
| 		html = 1;
 | |
| 	if(html && match($0, /^<hr/))
 | |
| 		hr = 1;
 | |
| 	oprint($0);
 | |
| 	next;
 | |
| }
 | |
| 
 | |
| html && (/(^<\/(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
 | |
| isindex|menu|noframes|noscript|ol|p|pre|table|ul).*)|(--)>$/ ||
 | |
| (hr && />$/)) {
 | |
| 	html = 0;
 | |
| 	hr = 0;
 | |
| 	oprint($0);
 | |
| 	next;
 | |
| }
 | |
| 
 | |
| html {
 | |
| 	oprint($0);
 | |
| 	next;
 | |
| }
 | |
| 
 | |
| # List and quote blocks
 | |
| 
 | |
| #   Remove indentation
 | |
| {
 | |
| 	for(nnl = 0; nnl < nl; nnl++)
 | |
| 		if((match(block[nnl + 1], /[ou]l/) && !sub(/^(    |	)/, "")) || \
 | |
| 		(block[nnl + 1] == "blockquote" && !sub(/^> ?/, "")))
 | |
| 			break;
 | |
| }
 | |
| nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +|	)/ { nnl = nl; }
 | |
| #   Quote blocks
 | |
| { 
 | |
| 	while(sub(/^> /, ""))
 | |
| 		nblock[++nnl] = "blockquote";
 | |
| }
 | |
| #   Horizontal rules
 | |
| { hr = 0; }
 | |
| (blank || (!text && !code)) && /^ ? ? ?([-*_][ 	]*)([-*_][ 	]*)([-*_][ 	]*)+$/ {
 | |
| 	if(code){
 | |
| 		oprint("</pre></code>");
 | |
| 		code = 0;
 | |
| 	}
 | |
| 	blank = 0;
 | |
| 	nnl = 0;
 | |
| 	hr = 1;
 | |
| }
 | |
| #   List items
 | |
| block[nl] ~ /[ou]l/ && /^$/ {
 | |
| 	blank = 1;
 | |
| 	next;
 | |
| }
 | |
| { newli = 0; }
 | |
| !hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +|	)/ {
 | |
| 	sub(/^ ? ? ?[*+-]( +|	)/, "");
 | |
| 	nnl++;
 | |
| 	nblock[nnl] = "ul";
 | |
| 	newli = 1;
 | |
| }
 | |
| (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +|	)/ {
 | |
| 	sub(/^ ? ? ?([0-9]+\.)+( +|	)/, "");
 | |
| 	nnl++;
 | |
| 	nblock[nnl] = "ol";
 | |
| 	newli = 1;
 | |
| }
 | |
| newli { 
 | |
| 	if(blank && nnl == nl && !par)
 | |
| 		par = "p";
 | |
| 	blank = 0;
 | |
| 	printp(par);
 | |
| 	if(nnl == nl && block[nl] == nblock[nl])
 | |
| 		oprint("</li><li>");
 | |
| }
 | |
| blank && ! /^$/ {
 | |
| 	if(match(block[nnl], /[ou]l/) && !par)
 | |
| 		par = "p";
 | |
| 	printp(par);
 | |
| 	par = "p";
 | |
| 	blank = 0;
 | |
| }
 | |
| 		
 | |
| # Close old blocks and open new ones
 | |
| nnl != nl || nblock[nl] != block[nl] {
 | |
| 	if(code){
 | |
| 		oprint("</pre></code>");
 | |
| 		code = 0;
 | |
| 	}
 | |
| 	printp(par);
 | |
| 	b = (nnl > nl) ? nblock[nnl] : block[nnl];
 | |
| 	par = (match(b, /[ou]l/)) ? "" : "p";
 | |
| }
 | |
| nnl < nl || (nnl == nl && nblock[nl] != block[nl]) {
 | |
| 	for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){
 | |
| 		if(match(block[nl], /[ou]l/))
 | |
| 			oprint("</li>");
 | |
| 		oprint("</" block[nl] ">");
 | |
| 	}
 | |
| }
 | |
| nnl > nl {
 | |
| 	for(; nl < nnl; nl++){
 | |
| 		block[nl + 1] = nblock[nl + 1];
 | |
| 		oprint("<" block[nl + 1] ">");
 | |
| 		if(match(block[nl + 1], /[ou]l/))
 | |
| 			oprint("<li>");
 | |
| 	}
 | |
| }
 | |
| hr {
 | |
| 	oprint("<hr>");
 | |
| 	next;
 | |
| }
 | |
| 
 | |
| # Code blocks
 | |
| code && /^$/ { 
 | |
| 	if(blanK)
 | |
| 		oprint("");
 | |
| 	blank = 1;
 | |
| 	next;
 | |
| }
 | |
| !text && sub(/^(	|    )/, "") {
 | |
| 	if(blanK)
 | |
| 		oprint("");
 | |
| 	blank = 0;
 | |
| 	if(!code)
 | |
| 		oprint("<code><pre>");
 | |
| 	code = 1;
 | |
| 	$0 = eschtml($0);
 | |
| 	oprint($0);
 | |
| 	next;
 | |
| }
 | |
| code {
 | |
| 	oprint("</pre></code>");
 | |
| 	code = 0;
 | |
| }
 | |
| 
 | |
| # Setex-style Headers
 | |
| text && /^=+$/ {printp("h1"); next;}
 | |
| text && /^-+$/ {printp("h2"); next;} 
 | |
| 
 | |
| # Atx-Style headers
 | |
| /^#+/ && (!newli || par=="p" || /^##/) {
 | |
| 	for(n = 0; n < 6 && sub(/^# */, ""); n++)
 | |
| 		sub(/#$/, "");
 | |
| 	par = "h" n;
 | |
| }
 | |
| 
 | |
| # Paragraph	
 | |
| /^$/ {
 | |
| 	printp(par);
 | |
| 	par = "p";
 | |
| 	next;
 | |
| }
 | |
| 
 | |
| # Add text
 | |
| { text = (text ? text " " : "") $0; }
 | |
| 
 | |
| END {
 | |
| 	if(code){
 | |
| 		oprint("</pre></code>");
 | |
| 		code = 0;
 | |
| 	}
 | |
| 	printp(par);
 | |
| 	for(; nl > 0; nl--){
 | |
| 		if(match(block[nl], /[ou]l/))
 | |
| 			oprint("</li>");
 | |
| 		oprint("</" block[nl] ">");
 | |
| 	}
 | |
| 	gsub(/<<[^\"]*/, "", otext);
 | |
| 	print(otext);
 | |
| }
 | 
