1447 lines
		
	
	
	
		
			35 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			1447 lines
		
	
	
	
		
			35 KiB
		
	
	
	
		
			Perl
		
	
	
		
			Executable file
		
	
	
	
	
| #!/usr/bin/env perl
 | |
| #
 | |
| # Markdown -- A text-to-HTML conversion tool for web writers
 | |
| #
 | |
| # Copyright (c) 2004 John Gruber
 | |
| # <http://daringfireball.net/projects/markdown/>
 | |
| #
 | |
| package Markdown;
 | |
| require 5.006_000;
 | |
| use strict;
 | |
| use warnings;
 | |
| 
 | |
| use Digest::MD5 qw(md5_hex);
 | |
| use vars qw($VERSION);
 | |
| $VERSION = '1.0.1';
 | |
| # Tue 14 Dec 2004
 | |
| 
 | |
| ## Disabled; causes problems under Perl 5.6.1:
 | |
| # use utf8;
 | |
| # binmode( STDOUT, ":utf8" );  # c.f.: http://acis.openlib.org/dev/perl-unicode-struggle.html
 | |
| 
 | |
| 
 | |
| #
 | |
| # Global default settings:
 | |
| #
 | |
| my $g_empty_element_suffix = " />";     # Change to ">" for HTML output
 | |
| my $g_tab_width = 4;
 | |
| 
 | |
| 
 | |
| #
 | |
| # Globals:
 | |
| #
 | |
| 
 | |
| # Regex to match balanced [brackets]. See Friedl's
 | |
| # "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
 | |
| my $g_nested_brackets;
 | |
| $g_nested_brackets = qr{
 | |
| 	(?> 								# Atomic matching
 | |
| 	   [^\[\]]+							# Anything other than brackets
 | |
| 	 | 
 | |
| 	   \[
 | |
| 		 (??{ $g_nested_brackets })		# Recursive set of nested brackets
 | |
| 	   \]
 | |
| 	)*
 | |
| }x;
 | |
| 
 | |
| 
 | |
| # Table of hash values for escaped characters:
 | |
| my %g_escape_table;
 | |
| foreach my $char (split //, '\\`*_{}[]()>#+-.!') {
 | |
| 	$g_escape_table{$char} = md5_hex($char);
 | |
| }
 | |
| 
 | |
| 
 | |
| # Global hashes, used by various utility routines
 | |
| my %g_urls;
 | |
| my %g_titles;
 | |
| my %g_html_blocks;
 | |
| 
 | |
| # Used to track when we're inside an ordered or unordered list
 | |
| # (see _ProcessListItems() for details):
 | |
| my $g_list_level = 0;
 | |
| 
 | |
| 
 | |
| #### Blosxom plug-in interface ##########################################
 | |
| 
 | |
| # Set $g_blosxom_use_meta to 1 to use Blosxom's meta plug-in to determine
 | |
| # which posts Markdown should process, using a "meta-markup: markdown"
 | |
| # header. If it's set to 0 (the default), Markdown will process all
 | |
| # entries.
 | |
| my $g_blosxom_use_meta = 0;
 | |
| 
 | |
| sub start { 1; }
 | |
| sub story {
 | |
| 	my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
 | |
| 
 | |
| 	if ( (! $g_blosxom_use_meta) or
 | |
| 	     (defined($meta::markup) and ($meta::markup =~ /^\s*markdown\s*$/i))
 | |
| 	     ){
 | |
| 			$$body_ref  = Markdown($$body_ref);
 | |
|      }
 | |
|      1;
 | |
| }
 | |
| 
 | |
| 
 | |
| #### Movable Type plug-in interface #####################################
 | |
| eval {require MT};  # Test to see if we're running in MT.
 | |
| unless ($@) {
 | |
|     require MT;
 | |
|     import  MT;
 | |
|     require MT::Template::Context;
 | |
|     import  MT::Template::Context;
 | |
| 
 | |
| 	eval {require MT::Plugin};  # Test to see if we're running >= MT 3.0.
 | |
| 	unless ($@) {
 | |
| 		require MT::Plugin;
 | |
| 		import  MT::Plugin;
 | |
| 		my $plugin = new MT::Plugin({
 | |
| 			name => "Markdown",
 | |
| 			description => "A plain-text-to-HTML formatting plugin. (Version: $VERSION)",
 | |
| 			doc_link => 'http://daringfireball.net/projects/markdown/'
 | |
| 		});
 | |
| 		MT->add_plugin( $plugin );
 | |
| 	}
 | |
| 
 | |
| 	MT::Template::Context->add_container_tag(MarkdownOptions => sub {
 | |
| 		my $ctx	 = shift;
 | |
| 		my $args = shift;
 | |
| 		my $builder = $ctx->stash('builder');
 | |
| 		my $tokens = $ctx->stash('tokens');
 | |
| 
 | |
| 		if (defined ($args->{'output'}) ) {
 | |
| 			$ctx->stash('markdown_output', lc $args->{'output'});
 | |
| 		}
 | |
| 
 | |
| 		defined (my $str = $builder->build($ctx, $tokens) )
 | |
| 			or return $ctx->error($builder->errstr);
 | |
| 		$str;		# return value
 | |
| 	});
 | |
| 
 | |
| 	MT->add_text_filter('markdown' => {
 | |
| 		label     => 'Markdown',
 | |
| 		docs      => 'http://daringfireball.net/projects/markdown/',
 | |
| 		on_format => sub {
 | |
| 			my $text = shift;
 | |
| 			my $ctx  = shift;
 | |
| 			my $raw  = 0;
 | |
| 		    if (defined $ctx) {
 | |
| 		    	my $output = $ctx->stash('markdown_output'); 
 | |
| 				if (defined $output  &&  $output =~ m/^html/i) {
 | |
| 					$g_empty_element_suffix = ">";
 | |
| 					$ctx->stash('markdown_output', '');
 | |
| 				}
 | |
| 				elsif (defined $output  &&  $output eq 'raw') {
 | |
| 					$raw = 1;
 | |
| 					$ctx->stash('markdown_output', '');
 | |
| 				}
 | |
| 				else {
 | |
| 					$raw = 0;
 | |
| 					$g_empty_element_suffix = " />";
 | |
| 				}
 | |
| 			}
 | |
| 			$text = $raw ? $text : Markdown($text);
 | |
| 			$text;
 | |
| 		},
 | |
| 	});
 | |
| 
 | |
| 	# If SmartyPants is loaded, add a combo Markdown/SmartyPants text filter:
 | |
| 	my $smartypants;
 | |
| 
 | |
| 	{
 | |
| 		no warnings "once";
 | |
| 		$smartypants = $MT::Template::Context::Global_filters{'smarty_pants'};
 | |
| 	}
 | |
| 
 | |
| 	if ($smartypants) {
 | |
| 		MT->add_text_filter('markdown_with_smartypants' => {
 | |
| 			label     => 'Markdown With SmartyPants',
 | |
| 			docs      => 'http://daringfireball.net/projects/markdown/',
 | |
| 			on_format => sub {
 | |
| 				my $text = shift;
 | |
| 				my $ctx  = shift;
 | |
| 				if (defined $ctx) {
 | |
| 					my $output = $ctx->stash('markdown_output'); 
 | |
| 					if (defined $output  &&  $output eq 'html') {
 | |
| 						$g_empty_element_suffix = ">";
 | |
| 					}
 | |
| 					else {
 | |
| 						$g_empty_element_suffix = " />";
 | |
| 					}
 | |
| 				}
 | |
| 				$text = Markdown($text);
 | |
| 				$text = $smartypants->($text, '1');
 | |
| 			},
 | |
| 		});
 | |
| 	}
 | |
| }
 | |
| else {
 | |
| #### BBEdit/command-line text filter interface ##########################
 | |
| # Needs to be hidden from MT (and Blosxom when running in static mode).
 | |
| 
 | |
|     # We're only using $blosxom::version once; tell Perl not to warn us:
 | |
| 	no warnings 'once';
 | |
|     unless ( defined($blosxom::version) ) {
 | |
| 		use warnings;
 | |
| 
 | |
| 		#### Check for command-line switches: #################
 | |
| 		my %cli_opts;
 | |
| 		use Getopt::Long;
 | |
| 		Getopt::Long::Configure('pass_through');
 | |
| 		GetOptions(\%cli_opts,
 | |
| 			'version',
 | |
| 			'shortversion',
 | |
| 			'html4tags',
 | |
| 		);
 | |
| 		if ($cli_opts{'version'}) {		# Version info
 | |
| 			print "\nThis is Markdown, version $VERSION.\n";
 | |
| 			print "Copyright 2004 John Gruber\n";
 | |
| 			print "http://daringfireball.net/projects/markdown/\n\n";
 | |
| 			exit 0;
 | |
| 		}
 | |
| 		if ($cli_opts{'shortversion'}) {		# Just the version number string.
 | |
| 			print $VERSION;
 | |
| 			exit 0;
 | |
| 		}
 | |
| 		if ($cli_opts{'html4tags'}) {			# Use HTML tag style instead of XHTML
 | |
| 			$g_empty_element_suffix = ">";
 | |
| 		}
 | |
| 
 | |
| 
 | |
| 		#### Process incoming text: ###########################
 | |
| 		my $text;
 | |
| 		{
 | |
| 			local $/;               # Slurp the whole file
 | |
| 			$text = <>;
 | |
| 		}
 | |
|         print Markdown($text);
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| sub Markdown {
 | |
| #
 | |
| # Main function. The order in which other subs are called here is
 | |
| # essential. Link and image substitutions need to happen before
 | |
| # _EscapeSpecialChars(), so that any *'s or _'s in the <a>
 | |
| # and <img> tags get encoded.
 | |
| #
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	# Clear the global hashes. If we don't clear these, you get conflicts
 | |
| 	# from other articles when generating a page which contains more than
 | |
| 	# one article (e.g. an index page that shows the N most recent
 | |
| 	# articles):
 | |
| 	%g_urls = ();
 | |
| 	%g_titles = ();
 | |
| 	%g_html_blocks = ();
 | |
| 
 | |
| 
 | |
| 	# Standardize line endings:
 | |
| 	$text =~ s{\r\n}{\n}g; 	# DOS to Unix
 | |
| 	$text =~ s{\r}{\n}g; 	# Mac to Unix
 | |
| 
 | |
| 	# Make sure $text ends with a couple of newlines:
 | |
| 	$text .= "\n\n";
 | |
| 
 | |
| 	# Convert all tabs to spaces.
 | |
| 	$text = _Detab($text);
 | |
| 
 | |
| 	# Strip any lines consisting only of spaces and tabs.
 | |
| 	# This makes subsequent regexen easier to write, because we can
 | |
| 	# match consecutive blank lines with /\n+/ instead of something
 | |
| 	# contorted like /[ \t]*\n+/ .
 | |
| 	$text =~ s/^[ \t]+$//mg;
 | |
| 
 | |
| 	# Turn block-level HTML blocks into hash entries
 | |
| 	$text = _HashHTMLBlocks($text);
 | |
| 
 | |
| 	# Strip link definitions, store in hashes.
 | |
| 	$text = _StripLinkDefinitions($text);
 | |
| 
 | |
| 	$text = _RunBlockGamut($text);
 | |
| 
 | |
| 	$text = _UnescapeSpecialChars($text);
 | |
| 
 | |
| 	return $text . "\n";
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _StripLinkDefinitions {
 | |
| #
 | |
| # Strips link definitions from text, stores the URLs and titles in
 | |
| # hash references.
 | |
| #
 | |
| 	my $text = shift;
 | |
| 	my $less_than_tab = $g_tab_width - 1;
 | |
| 
 | |
| 	# Link defs are in the form: ^[id]: url "optional title"
 | |
| 	while ($text =~ s{
 | |
| 						^[ ]{0,$less_than_tab}\[(.+)\]:	# id = $1
 | |
| 						  [ \t]*
 | |
| 						  \n?				# maybe *one* newline
 | |
| 						  [ \t]*
 | |
| 						<?(\S+?)>?			# url = $2
 | |
| 						  [ \t]*
 | |
| 						  \n?				# maybe one newline
 | |
| 						  [ \t]*
 | |
| 						(?:
 | |
| 							(?<=\s)			# lookbehind for whitespace
 | |
| 							["(]
 | |
| 							(.+?)			# title = $3
 | |
| 							[")]
 | |
| 							[ \t]*
 | |
| 						)?	# title is optional
 | |
| 						(?:\n+|\Z)
 | |
| 					}
 | |
| 					{}mx) {
 | |
| 		$g_urls{lc $1} = _EncodeAmpsAndAngles( $2 );	# Link IDs are case-insensitive
 | |
| 		if ($3) {
 | |
| 			$g_titles{lc $1} = $3;
 | |
| 			$g_titles{lc $1} =~ s/"/"/g;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _HashHTMLBlocks {
 | |
| 	my $text = shift;
 | |
| 	my $less_than_tab = $g_tab_width - 1;
 | |
| 
 | |
| 	# Hashify HTML blocks:
 | |
| 	# We only want to do this for block-level HTML tags, such as headers,
 | |
| 	# lists, and tables. That's because we still want to wrap <p>s around
 | |
| 	# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
 | |
| 	# phrase emphasis, and spans. The list of tags we're looking for is
 | |
| 	# hard-coded:
 | |
| 	my $block_tags_a = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del/;
 | |
| 	my $block_tags_b = qr/p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math/;
 | |
| 
 | |
| 	# First, look for nested blocks, e.g.:
 | |
| 	# 	<div>
 | |
| 	# 		<div>
 | |
| 	# 		tags for inner block must be indented.
 | |
| 	# 		</div>
 | |
| 	# 	</div>
 | |
| 	#
 | |
| 	# The outermost tags must start at the left margin for this to match, and
 | |
| 	# the inner nested divs must be indented.
 | |
| 	# We need to do this before the next, more liberal match, because the next
 | |
| 	# match will start at the first `<div>` and stop at the first `</div>`.
 | |
| 	$text =~ s{
 | |
| 				(						# save in $1
 | |
| 					^					# start of line  (with /m)
 | |
| 					<($block_tags_a)	# start tag = $2
 | |
| 					\b					# word break
 | |
| 					(.*\n)*?			# any number of lines, minimally matching
 | |
| 					</\2>				# the matching end tag
 | |
| 					[ \t]*				# trailing spaces/tabs
 | |
| 					(?=\n+|\Z)	# followed by a newline or end of document
 | |
| 				)
 | |
| 			}{
 | |
| 				my $key = md5_hex($1);
 | |
| 				$g_html_blocks{$key} = $1;
 | |
| 				"\n\n" . $key . "\n\n";
 | |
| 			}egmx;
 | |
| 
 | |
| 
 | |
| 	#
 | |
| 	# Now match more liberally, simply from `\n<tag>` to `</tag>\n`
 | |
| 	#
 | |
| 	$text =~ s{
 | |
| 				(						# save in $1
 | |
| 					^					# start of line  (with /m)
 | |
| 					<($block_tags_b)	# start tag = $2
 | |
| 					\b					# word break
 | |
| 					(.*\n)*?			# any number of lines, minimally matching
 | |
| 					.*</\2>				# the matching end tag
 | |
| 					[ \t]*				# trailing spaces/tabs
 | |
| 					(?=\n+|\Z)	# followed by a newline or end of document
 | |
| 				)
 | |
| 			}{
 | |
| 				my $key = md5_hex($1);
 | |
| 				$g_html_blocks{$key} = $1;
 | |
| 				"\n\n" . $key . "\n\n";
 | |
| 			}egmx;
 | |
| 	# Special case just for <hr />. It was easier to make a special case than
 | |
| 	# to make the other regex more complicated.	
 | |
| 	$text =~ s{
 | |
| 				(?:
 | |
| 					(?<=\n\n)		# Starting after a blank line
 | |
| 					|				# or
 | |
| 					\A\n?			# the beginning of the doc
 | |
| 				)
 | |
| 				(						# save in $1
 | |
| 					[ ]{0,$less_than_tab}
 | |
| 					<(hr)				# start tag = $2
 | |
| 					\b					# word break
 | |
| 					([^<>])*?			# 
 | |
| 					/?>					# the matching end tag
 | |
| 					[ \t]*
 | |
| 					(?=\n{2,}|\Z)		# followed by a blank line or end of document
 | |
| 				)
 | |
| 			}{
 | |
| 				my $key = md5_hex($1);
 | |
| 				$g_html_blocks{$key} = $1;
 | |
| 				"\n\n" . $key . "\n\n";
 | |
| 			}egx;
 | |
| 
 | |
| 	# Special case for standalone HTML comments:
 | |
| 	$text =~ s{
 | |
| 				(?:
 | |
| 					(?<=\n\n)		# Starting after a blank line
 | |
| 					|				# or
 | |
| 					\A\n?			# the beginning of the doc
 | |
| 				)
 | |
| 				(						# save in $1
 | |
| 					[ ]{0,$less_than_tab}
 | |
| 					(?s:
 | |
| 						<!
 | |
| 						(--.*?--\s*)+
 | |
| 						>
 | |
| 					)
 | |
| 					[ \t]*
 | |
| 					(?=\n{2,}|\Z)		# followed by a blank line or end of document
 | |
| 				)
 | |
| 			}{
 | |
| 				my $key = md5_hex($1);
 | |
| 				$g_html_blocks{$key} = $1;
 | |
| 				"\n\n" . $key . "\n\n";
 | |
| 			}egx;
 | |
| 
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _RunBlockGamut {
 | |
| #
 | |
| # These are all the transformations that form block-level
 | |
| # tags like paragraphs, headers, and list items.
 | |
| #
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	$text = _DoHeaders($text);
 | |
| 
 | |
| 	# Do Horizontal Rules:
 | |
| 	$text =~ s{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
 | |
| 	$text =~ s{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
 | |
| 	$text =~ s{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}{\n<hr$g_empty_element_suffix\n}gmx;
 | |
| 
 | |
| 	$text = _DoLists($text);
 | |
| 
 | |
| 	$text = _DoCodeBlocks($text);
 | |
| 
 | |
| 	$text = _DoBlockQuotes($text);
 | |
| 
 | |
| 	# We already ran _HashHTMLBlocks() before, in Markdown(), but that
 | |
| 	# was to escape raw HTML in the original Markdown source. This time,
 | |
| 	# we're escaping the markup we've just created, so that we don't wrap
 | |
| 	# <p> tags around block-level tags.
 | |
| 	$text = _HashHTMLBlocks($text);
 | |
| 
 | |
| 	$text = _FormParagraphs($text);
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _RunSpanGamut {
 | |
| #
 | |
| # These are all the transformations that occur *within* block-level
 | |
| # tags like paragraphs, headers, and list items.
 | |
| #
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	$text = _DoCodeSpans($text);
 | |
| 
 | |
| 	$text = _EscapeSpecialChars($text);
 | |
| 
 | |
| 	# Process anchor and image tags. Images must come first,
 | |
| 	# because ![foo][f] looks like an anchor.
 | |
| 	$text = _DoImages($text);
 | |
| 	$text = _DoAnchors($text);
 | |
| 
 | |
| 	# Make links out of things like `<http://example.com/>`
 | |
| 	# Must come after _DoAnchors(), because you can use < and >
 | |
| 	# delimiters in inline links like [this](<url>).
 | |
| 	$text = _DoAutoLinks($text);
 | |
| 
 | |
| 	$text = _EncodeAmpsAndAngles($text);
 | |
| 
 | |
| 	$text = _DoItalicsAndBold($text);
 | |
| 
 | |
| 	# Do hard breaks:
 | |
| 	$text =~ s/ {2,}\n/ <br$g_empty_element_suffix\n/g;
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _EscapeSpecialChars {
 | |
| 	my $text = shift;
 | |
| 	my $tokens ||= _TokenizeHTML($text);
 | |
| 
 | |
| 	$text = '';   # rebuild $text from the tokens
 | |
| # 	my $in_pre = 0;	 # Keep track of when we're inside <pre> or <code> tags.
 | |
| # 	my $tags_to_skip = qr!<(/?)(?:pre|code|kbd|script|math)[\s>]!;
 | |
| 
 | |
| 	foreach my $cur_token (@$tokens) {
 | |
| 		if ($cur_token->[0] eq "tag") {
 | |
| 			# Within tags, encode * and _ so they don't conflict
 | |
| 			# with their use in Markdown for italics and strong.
 | |
| 			# We're replacing each such character with its
 | |
| 			# corresponding MD5 checksum value; this is likely
 | |
| 			# overkill, but it should prevent us from colliding
 | |
| 			# with the escape values by accident.
 | |
| 			$cur_token->[1] =~  s! \* !$g_escape_table{'*'}!gx;
 | |
| 			$cur_token->[1] =~  s! _  !$g_escape_table{'_'}!gx;
 | |
| 			$text .= $cur_token->[1];
 | |
| 		} else {
 | |
| 			my $t = $cur_token->[1];
 | |
| 			$t = _EncodeBackslashEscapes($t);
 | |
| 			$text .= $t;
 | |
| 		}
 | |
| 	}
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _DoAnchors {
 | |
| #
 | |
| # Turn Markdown link shortcuts into XHTML <a> tags.
 | |
| #
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	#
 | |
| 	# First, handle reference-style links: [link text] [id]
 | |
| 	#
 | |
| 	$text =~ s{
 | |
| 		(					# wrap whole match in $1
 | |
| 		  \[
 | |
| 		    ($g_nested_brackets)	# link text = $2
 | |
| 		  \]
 | |
| 
 | |
| 		  [ ]?				# one optional space
 | |
| 		  (?:\n[ ]*)?		# one optional newline followed by spaces
 | |
| 
 | |
| 		  \[
 | |
| 		    (.*?)		# id = $3
 | |
| 		  \]
 | |
| 		)
 | |
| 	}{
 | |
| 		my $result;
 | |
| 		my $whole_match = $1;
 | |
| 		my $link_text   = $2;
 | |
| 		my $link_id     = lc $3;
 | |
| 
 | |
| 		if ($link_id eq "") {
 | |
| 			$link_id = lc $link_text;     # for shortcut links like [this][].
 | |
| 		}
 | |
| 
 | |
| 		if (defined $g_urls{$link_id}) {
 | |
| 			my $url = $g_urls{$link_id};
 | |
| 			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
 | |
| 			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
 | |
| 			$result = "<a href=\"$url\"";
 | |
| 			if ( defined $g_titles{$link_id} ) {
 | |
| 				my $title = $g_titles{$link_id};
 | |
| 				$title =~ s! \* !$g_escape_table{'*'}!gx;
 | |
| 				$title =~ s!  _ !$g_escape_table{'_'}!gx;
 | |
| 				$result .=  " title=\"$title\"";
 | |
| 			}
 | |
| 			$result .= ">$link_text</a>";
 | |
| 		}
 | |
| 		else {
 | |
| 			$result = $whole_match;
 | |
| 		}
 | |
| 		$result;
 | |
| 	}xsge;
 | |
| 
 | |
| 	#
 | |
| 	# Next, inline-style links: [link text](url "optional title")
 | |
| 	#
 | |
| 	$text =~ s{
 | |
| 		(				# wrap whole match in $1
 | |
| 		  \[
 | |
| 		    ($g_nested_brackets)	# link text = $2
 | |
| 		  \]
 | |
| 		  \(			# literal paren
 | |
| 		  	[ \t]*
 | |
| 			<?(.*?)>?	# href = $3
 | |
| 		  	[ \t]*
 | |
| 			(			# $4
 | |
| 			  (['"])	# quote char = $5
 | |
| 			  (.*?)		# Title = $6
 | |
| 			  \5		# matching quote
 | |
| 			)?			# title is optional
 | |
| 		  \)
 | |
| 		)
 | |
| 	}{
 | |
| 		my $result;
 | |
| 		my $whole_match = $1;
 | |
| 		my $link_text   = $2;
 | |
| 		my $url	  		= $3;
 | |
| 		my $title		= $6;
 | |
| 
 | |
| 		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
 | |
| 		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
 | |
| 		$result = "<a href=\"$url\"";
 | |
| 
 | |
| 		if (defined $title) {
 | |
| 			$title =~ s/"/"/g;
 | |
| 			$title =~ s! \* !$g_escape_table{'*'}!gx;
 | |
| 			$title =~ s!  _ !$g_escape_table{'_'}!gx;
 | |
| 			$result .=  " title=\"$title\"";
 | |
| 		}
 | |
| 
 | |
| 		$result .= ">$link_text</a>";
 | |
| 
 | |
| 		$result;
 | |
| 	}xsge;
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _DoImages {
 | |
| #
 | |
| # Turn Markdown image shortcuts into <img> tags.
 | |
| #
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	#
 | |
| 	# First, handle reference-style labeled images: ![alt text][id]
 | |
| 	#
 | |
| 	$text =~ s{
 | |
| 		(				# wrap whole match in $1
 | |
| 		  !\[
 | |
| 		    (.*?)		# alt text = $2
 | |
| 		  \]
 | |
| 
 | |
| 		  [ ]?				# one optional space
 | |
| 		  (?:\n[ ]*)?		# one optional newline followed by spaces
 | |
| 
 | |
| 		  \[
 | |
| 		    (.*?)		# id = $3
 | |
| 		  \]
 | |
| 
 | |
| 		)
 | |
| 	}{
 | |
| 		my $result;
 | |
| 		my $whole_match = $1;
 | |
| 		my $alt_text    = $2;
 | |
| 		my $link_id     = lc $3;
 | |
| 
 | |
| 		if ($link_id eq "") {
 | |
| 			$link_id = lc $alt_text;     # for shortcut links like ![this][].
 | |
| 		}
 | |
| 
 | |
| 		$alt_text =~ s/"/"/g;
 | |
| 		if (defined $g_urls{$link_id}) {
 | |
| 			my $url = $g_urls{$link_id};
 | |
| 			$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
 | |
| 			$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
 | |
| 			$result = "<img src=\"$url\" alt=\"$alt_text\"";
 | |
| 			if (defined $g_titles{$link_id}) {
 | |
| 				my $title = $g_titles{$link_id};
 | |
| 				$title =~ s! \* !$g_escape_table{'*'}!gx;
 | |
| 				$title =~ s!  _ !$g_escape_table{'_'}!gx;
 | |
| 				$result .=  " title=\"$title\"";
 | |
| 			}
 | |
| 			$result .= $g_empty_element_suffix;
 | |
| 		}
 | |
| 		else {
 | |
| 			# If there's no such link ID, leave intact:
 | |
| 			$result = $whole_match;
 | |
| 		}
 | |
| 
 | |
| 		$result;
 | |
| 	}xsge;
 | |
| 
 | |
| 	#
 | |
| 	# Next, handle inline images:  
 | |
| 	# Don't forget: encode * and _
 | |
| 
 | |
| 	$text =~ s{
 | |
| 		(				# wrap whole match in $1
 | |
| 		  !\[
 | |
| 		    (.*?)		# alt text = $2
 | |
| 		  \]
 | |
| 		  \(			# literal paren
 | |
| 		  	[ \t]*
 | |
| 			<?(\S+?)>?	# src url = $3
 | |
| 		  	[ \t]*
 | |
| 			(			# $4
 | |
| 			  (['"])	# quote char = $5
 | |
| 			  (.*?)		# title = $6
 | |
| 			  \5		# matching quote
 | |
| 			  [ \t]*
 | |
| 			)?			# title is optional
 | |
| 		  \)
 | |
| 		)
 | |
| 	}{
 | |
| 		my $result;
 | |
| 		my $whole_match = $1;
 | |
| 		my $alt_text    = $2;
 | |
| 		my $url	  		= $3;
 | |
| 		my $title		= '';
 | |
| 		if (defined($6)) {
 | |
| 			$title		= $6;
 | |
| 		}
 | |
| 
 | |
| 		$alt_text =~ s/"/"/g;
 | |
| 		$title    =~ s/"/"/g;
 | |
| 		$url =~ s! \* !$g_escape_table{'*'}!gx;		# We've got to encode these to avoid
 | |
| 		$url =~ s!  _ !$g_escape_table{'_'}!gx;		# conflicting with italics/bold.
 | |
| 		$result = "<img src=\"$url\" alt=\"$alt_text\"";
 | |
| 		if (defined $title) {
 | |
| 			$title =~ s! \* !$g_escape_table{'*'}!gx;
 | |
| 			$title =~ s!  _ !$g_escape_table{'_'}!gx;
 | |
| 			$result .=  " title=\"$title\"";
 | |
| 		}
 | |
| 		$result .= $g_empty_element_suffix;
 | |
| 
 | |
| 		$result;
 | |
| 	}xsge;
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _DoHeaders {
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	# Setext-style headers:
 | |
| 	#	  Header 1
 | |
| 	#	  ========
 | |
| 	#  
 | |
| 	#	  Header 2
 | |
| 	#	  --------
 | |
| 	#
 | |
| 	$text =~ s{ ^(.+)[ \t]*\n=+[ \t]*\n+ }{
 | |
| 		"<h1>"  .  _RunSpanGamut($1)  .  "</h1>\n\n";
 | |
| 	}egmx;
 | |
| 
 | |
| 	$text =~ s{ ^(.+)[ \t]*\n-+[ \t]*\n+ }{
 | |
| 		"<h2>"  .  _RunSpanGamut($1)  .  "</h2>\n\n";
 | |
| 	}egmx;
 | |
| 
 | |
| 
 | |
| 	# atx-style headers:
 | |
| 	#	# Header 1
 | |
| 	#	## Header 2
 | |
| 	#	## Header 2 with closing hashes ##
 | |
| 	#	...
 | |
| 	#	###### Header 6
 | |
| 	#
 | |
| 	$text =~ s{
 | |
| 			^(\#{1,6})	# $1 = string of #'s
 | |
| 			[ \t]*
 | |
| 			(.+?)		# $2 = Header text
 | |
| 			[ \t]*
 | |
| 			\#*			# optional closing #'s (not counted)
 | |
| 			\n+
 | |
| 		}{
 | |
| 			my $h_level = length($1);
 | |
| 			"<h$h_level>"  .  _RunSpanGamut($2)  .  "</h$h_level>\n\n";
 | |
| 		}egmx;
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _DoLists {
 | |
| #
 | |
| # Form HTML ordered (numbered) and unordered (bulleted) lists.
 | |
| #
 | |
| 	my $text = shift;
 | |
| 	my $less_than_tab = $g_tab_width - 1;
 | |
| 
 | |
| 	# Re-usable patterns to match list item bullets and number markers:
 | |
| 	my $marker_ul  = qr/[*+-]/;
 | |
| 	my $marker_ol  = qr/\d+[.]/;
 | |
| 	my $marker_any = qr/(?:$marker_ul|$marker_ol)/;
 | |
| 
 | |
| 	# Re-usable pattern to match any entirel ul or ol list:
 | |
| 	my $whole_list = qr{
 | |
| 		(								# $1 = whole list
 | |
| 		  (								# $2
 | |
| 			[ ]{0,$less_than_tab}
 | |
| 			(${marker_any})				# $3 = first list item marker
 | |
| 			[ \t]+
 | |
| 		  )
 | |
| 		  (?s:.+?)
 | |
| 		  (								# $4
 | |
| 			  \z
 | |
| 			|
 | |
| 			  \n{2,}
 | |
| 			  (?=\S)
 | |
| 			  (?!						# Negative lookahead for another list item marker
 | |
| 				[ \t]*
 | |
| 				${marker_any}[ \t]+
 | |
| 			  )
 | |
| 		  )
 | |
| 		)
 | |
| 	}mx;
 | |
| 
 | |
| 	# We use a different prefix before nested lists than top-level lists.
 | |
| 	# See extended comment in _ProcessListItems().
 | |
| 	#
 | |
| 	# Note: There's a bit of duplication here. My original implementation
 | |
| 	# created a scalar regex pattern as the conditional result of the test on
 | |
| 	# $g_list_level, and then only ran the $text =~ s{...}{...}egmx
 | |
| 	# substitution once, using the scalar as the pattern. This worked,
 | |
| 	# everywhere except when running under MT on my hosting account at Pair
 | |
| 	# Networks. There, this caused all rebuilds to be killed by the reaper (or
 | |
| 	# perhaps they crashed, but that seems incredibly unlikely given that the
 | |
| 	# same script on the same server ran fine *except* under MT. I've spent
 | |
| 	# more time trying to figure out why this is happening than I'd like to
 | |
| 	# admit. My only guess, backed up by the fact that this workaround works,
 | |
| 	# is that Perl optimizes the substition when it can figure out that the
 | |
| 	# pattern will never change, and when this optimization isn't on, we run
 | |
| 	# afoul of the reaper. Thus, the slightly redundant code to that uses two
 | |
| 	# static s/// patterns rather than one conditional pattern.
 | |
| 
 | |
| 	if ($g_list_level) {
 | |
| 		$text =~ s{
 | |
| 				^
 | |
| 				$whole_list
 | |
| 			}{
 | |
| 				my $list = $1;
 | |
| 				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
 | |
| 				# Turn double returns into triple returns, so that we can make a
 | |
| 				# paragraph for the last item in a list, if necessary:
 | |
| 				$list =~ s/\n{2,}/\n\n\n/g;
 | |
| 				my $result = _ProcessListItems($list, $marker_any);
 | |
| 				$result = "<$list_type>\n" . $result . "</$list_type>\n";
 | |
| 				$result;
 | |
| 			}egmx;
 | |
| 	}
 | |
| 	else {
 | |
| 		$text =~ s{
 | |
| 				(?:(?<=\n\n)|\A\n?)
 | |
| 				$whole_list
 | |
| 			}{
 | |
| 				my $list = $1;
 | |
| 				my $list_type = ($3 =~ m/$marker_ul/) ? "ul" : "ol";
 | |
| 				# Turn double returns into triple returns, so that we can make a
 | |
| 				# paragraph for the last item in a list, if necessary:
 | |
| 				$list =~ s/\n{2,}/\n\n\n/g;
 | |
| 				my $result = _ProcessListItems($list, $marker_any);
 | |
| 				$result = "<$list_type>\n" . $result . "</$list_type>\n";
 | |
| 				$result;
 | |
| 			}egmx;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _ProcessListItems {
 | |
| #
 | |
| #	Process the contents of a single ordered or unordered list, splitting it
 | |
| #	into individual list items.
 | |
| #
 | |
| 
 | |
| 	my $list_str = shift;
 | |
| 	my $marker_any = shift;
 | |
| 
 | |
| 
 | |
| 	# The $g_list_level global keeps track of when we're inside a list.
 | |
| 	# Each time we enter a list, we increment it; when we leave a list,
 | |
| 	# we decrement. If it's zero, we're not in a list anymore.
 | |
| 	#
 | |
| 	# We do this because when we're not inside a list, we want to treat
 | |
| 	# something like this:
 | |
| 	#
 | |
| 	#		I recommend upgrading to version
 | |
| 	#		8. Oops, now this line is treated
 | |
| 	#		as a sub-list.
 | |
| 	#
 | |
| 	# As a single paragraph, despite the fact that the second line starts
 | |
| 	# with a digit-period-space sequence.
 | |
| 	#
 | |
| 	# Whereas when we're inside a list (or sub-list), that line will be
 | |
| 	# treated as the start of a sub-list. What a kludge, huh? This is
 | |
| 	# an aspect of Markdown's syntax that's hard to parse perfectly
 | |
| 	# without resorting to mind-reading. Perhaps the solution is to
 | |
| 	# change the syntax rules such that sub-lists must start with a
 | |
| 	# starting cardinal number; e.g. "1." or "a.".
 | |
| 
 | |
| 	$g_list_level++;
 | |
| 
 | |
| 	# trim trailing blank lines:
 | |
| 	$list_str =~ s/\n{2,}\z/\n/;
 | |
| 
 | |
| 
 | |
| 	$list_str =~ s{
 | |
| 		(\n)?							# leading line = $1
 | |
| 		(^[ \t]*)						# leading whitespace = $2
 | |
| 		($marker_any) [ \t]+			# list marker = $3
 | |
| 		((?s:.+?)						# list item text   = $4
 | |
| 		(\n{1,2}))
 | |
| 		(?= \n* (\z | \2 ($marker_any) [ \t]+))
 | |
| 	}{
 | |
| 		my $item = $4;
 | |
| 		my $leading_line = $1;
 | |
| 		my $leading_space = $2;
 | |
| 
 | |
| 		if ($leading_line or ($item =~ m/\n{2,}/)) {
 | |
| 			$item = _RunBlockGamut(_Outdent($item));
 | |
| 		}
 | |
| 		else {
 | |
| 			# Recursion for sub-lists:
 | |
| 			$item = _DoLists(_Outdent($item));
 | |
| 			chomp $item;
 | |
| 			$item = _RunSpanGamut($item);
 | |
| 		}
 | |
| 
 | |
| 		"<li>" . $item . "</li>\n";
 | |
| 	}egmx;
 | |
| 
 | |
| 	$g_list_level--;
 | |
| 	return $list_str;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| sub _DoCodeBlocks {
 | |
| #
 | |
| #	Process Markdown `<pre><code>` blocks.
 | |
| #	
 | |
| 
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	$text =~ s{
 | |
| 			(?:\n\n|\A)
 | |
| 			(	            # $1 = the code block -- one or more lines, starting with a space/tab
 | |
| 			  (?:
 | |
| 			    (?:[ ]{$g_tab_width} | \t)  # Lines must start with a tab or a tab-width of spaces
 | |
| 			    .*\n+
 | |
| 			  )+
 | |
| 			)
 | |
| 			((?=^[ ]{0,$g_tab_width}\S)|\Z)	# Lookahead for non-space at line-start, or end of doc
 | |
| 		}{
 | |
| 			my $codeblock = $1;
 | |
| 			my $result; # return value
 | |
| 
 | |
| 			$codeblock = _EncodeCode(_Outdent($codeblock));
 | |
| 			$codeblock = _Detab($codeblock);
 | |
| 			$codeblock =~ s/\A\n+//; # trim leading newlines
 | |
| 			$codeblock =~ s/\s+\z//; # trim trailing whitespace
 | |
| 
 | |
| 			$result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
 | |
| 
 | |
| 			$result;
 | |
| 		}egmx;
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _DoCodeSpans {
 | |
| #
 | |
| # 	*	Backtick quotes are used for <code></code> spans.
 | |
| # 
 | |
| # 	*	You can use multiple backticks as the delimiters if you want to
 | |
| # 		include literal backticks in the code span. So, this input:
 | |
| #     
 | |
| #         Just type ``foo `bar` baz`` at the prompt.
 | |
| #     
 | |
| #     	Will translate to:
 | |
| #     
 | |
| #         <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
 | |
| #     
 | |
| #		There's no arbitrary limit to the number of backticks you
 | |
| #		can use as delimters. If you need three consecutive backticks
 | |
| #		in your code, use four for delimiters, etc.
 | |
| #
 | |
| #	*	You can use spaces to get literal backticks at the edges:
 | |
| #     
 | |
| #         ... type `` `bar` `` ...
 | |
| #     
 | |
| #     	Turns to:
 | |
| #     
 | |
| #         ... type <code>`bar`</code> ...
 | |
| #
 | |
| 
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	$text =~ s@
 | |
| 			(`+)		# $1 = Opening run of `
 | |
| 			(.+?)		# $2 = The code block
 | |
| 			(?<!`)
 | |
| 			\1			# Matching closer
 | |
| 			(?!`)
 | |
| 		@
 | |
|  			my $c = "$2";
 | |
|  			$c =~ s/^[ \t]*//g; # leading whitespace
 | |
|  			$c =~ s/[ \t]*$//g; # trailing whitespace
 | |
|  			$c = _EncodeCode($c);
 | |
| 			"<code>$c</code>";
 | |
| 		@egsx;
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _EncodeCode {
 | |
| #
 | |
| # Encode/escape certain characters inside Markdown code runs.
 | |
| # The point is that in code, these characters are literals,
 | |
| # and lose their special Markdown meanings.
 | |
| #
 | |
|     local $_ = shift;
 | |
| 
 | |
| 	# Encode all ampersands; HTML entities are not
 | |
| 	# entities within a Markdown code span.
 | |
| 	s/&/&/g;
 | |
| 
 | |
| 	# Encode $'s, but only if we're running under Blosxom.
 | |
| 	# (Blosxom interpolates Perl variables in article bodies.)
 | |
| 	{
 | |
| 		no warnings 'once';
 | |
|     	if (defined($blosxom::version)) {
 | |
|     		s/\$/$/g;	
 | |
|     	}
 | |
|     }
 | |
| 
 | |
| 
 | |
| 	# Do the angle bracket song and dance:
 | |
| 	s! <  !<!gx;
 | |
| 	s! >  !>!gx;
 | |
| 
 | |
| 	# Now, escape characters that are magic in Markdown:
 | |
| 	s! \* !$g_escape_table{'*'}!gx;
 | |
| 	s! _  !$g_escape_table{'_'}!gx;
 | |
| 	s! {  !$g_escape_table{'{'}!gx;
 | |
| 	s! }  !$g_escape_table{'}'}!gx;
 | |
| 	s! \[ !$g_escape_table{'['}!gx;
 | |
| 	s! \] !$g_escape_table{']'}!gx;
 | |
| 	s! \\ !$g_escape_table{'\\'}!gx;
 | |
| 
 | |
| 	return $_;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _DoItalicsAndBold {
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	# <strong> must go first:
 | |
| 	$text =~ s{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }
 | |
| 		{<strong>$2</strong>}gsx;
 | |
| 
 | |
| 	$text =~ s{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }
 | |
| 		{<em>$2</em>}gsx;
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _DoBlockQuotes {
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	$text =~ s{
 | |
| 		  (								# Wrap whole match in $1
 | |
| 			(
 | |
| 			  ^[ \t]*>[ \t]?			# '>' at the start of a line
 | |
| 			    .+\n					# rest of the first line
 | |
| 			  (.+\n)*					# subsequent consecutive lines
 | |
| 			  \n*						# blanks
 | |
| 			)+
 | |
| 		  )
 | |
| 		}{
 | |
| 			my $bq = $1;
 | |
| 			$bq =~ s/^[ \t]*>[ \t]?//gm;	# trim one level of quoting
 | |
| 			$bq =~ s/^[ \t]+$//mg;			# trim whitespace-only lines
 | |
| 			$bq = _RunBlockGamut($bq);		# recurse
 | |
| 
 | |
| 			$bq =~ s/^/  /g;
 | |
| 			# These leading spaces screw with <pre> content, so we need to fix that:
 | |
| 			$bq =~ s{
 | |
| 					(\s*<pre>.+?</pre>)
 | |
| 				}{
 | |
| 					my $pre = $1;
 | |
| 					$pre =~ s/^  //mg;
 | |
| 					$pre;
 | |
| 				}egsx;
 | |
| 
 | |
| 			"<blockquote>\n$bq\n</blockquote>\n\n";
 | |
| 		}egmx;
 | |
| 
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _FormParagraphs {
 | |
| #
 | |
| #	Params:
 | |
| #		$text - string to process with html <p> tags
 | |
| #
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	# Strip leading and trailing lines:
 | |
| 	$text =~ s/\A\n+//;
 | |
| 	$text =~ s/\n+\z//;
 | |
| 
 | |
| 	my @grafs = split(/\n{2,}/, $text);
 | |
| 
 | |
| 	#
 | |
| 	# Wrap <p> tags.
 | |
| 	#
 | |
| 	foreach (@grafs) {
 | |
| 		unless (defined( $g_html_blocks{$_} )) {
 | |
| 			$_ = _RunSpanGamut($_);
 | |
| 			s/^([ \t]*)/<p>/;
 | |
| 			$_ .= "</p>";
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	#
 | |
| 	# Unhashify HTML blocks
 | |
| 	#
 | |
| 	foreach (@grafs) {
 | |
| 		if (defined( $g_html_blocks{$_} )) {
 | |
| 			$_ = $g_html_blocks{$_};
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return join "\n\n", @grafs;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _EncodeAmpsAndAngles {
 | |
| # Smart processing for ampersands and angle brackets that need to be encoded.
 | |
| 
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
 | |
| 	#   http://bumppo.net/projects/amputator/
 | |
|  	$text =~ s/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/&/g;
 | |
| 
 | |
| 	# Encode naked <'s
 | |
|  	$text =~ s{<(?![a-z/?\$!])}{<}gi;
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _EncodeBackslashEscapes {
 | |
| #
 | |
| #   Parameter:  String.
 | |
| #   Returns:    The string, with after processing the following backslash
 | |
| #               escape sequences.
 | |
| #
 | |
|     local $_ = shift;
 | |
| 
 | |
|     s! \\\\  !$g_escape_table{'\\'}!gx;		# Must process escaped backslashes first.
 | |
|     s! \\`   !$g_escape_table{'`'}!gx;
 | |
|     s! \\\*  !$g_escape_table{'*'}!gx;
 | |
|     s! \\_   !$g_escape_table{'_'}!gx;
 | |
|     s! \\\{  !$g_escape_table{'{'}!gx;
 | |
|     s! \\\}  !$g_escape_table{'}'}!gx;
 | |
|     s! \\\[  !$g_escape_table{'['}!gx;
 | |
|     s! \\\]  !$g_escape_table{']'}!gx;
 | |
|     s! \\\(  !$g_escape_table{'('}!gx;
 | |
|     s! \\\)  !$g_escape_table{')'}!gx;
 | |
|     s! \\>   !$g_escape_table{'>'}!gx;
 | |
|     s! \\\#  !$g_escape_table{'#'}!gx;
 | |
|     s! \\\+  !$g_escape_table{'+'}!gx;
 | |
|     s! \\\-  !$g_escape_table{'-'}!gx;
 | |
|     s! \\\.  !$g_escape_table{'.'}!gx;
 | |
|     s{ \\!  }{$g_escape_table{'!'}}gx;
 | |
| 
 | |
|     return $_;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _DoAutoLinks {
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	$text =~ s{<((https?|ftp):[^'">\s]+)>}{<a href="$1">$1</a>}gi;
 | |
| 
 | |
| 	# Email addresses: <address@domain.foo>
 | |
| 	$text =~ s{
 | |
| 		<
 | |
|         (?:mailto:)?
 | |
| 		(
 | |
| 			[-.\w]+
 | |
| 			\@
 | |
| 			[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
 | |
| 		)
 | |
| 		>
 | |
| 	}{
 | |
| 		_EncodeEmailAddress( _UnescapeSpecialChars($1) );
 | |
| 	}egix;
 | |
| 
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _EncodeEmailAddress {
 | |
| #
 | |
| #	Input: an email address, e.g. "foo@example.com"
 | |
| #
 | |
| #	Output: the email address as a mailto link, with each character
 | |
| #		of the address encoded as either a decimal or hex entity, in
 | |
| #		the hopes of foiling most address harvesting spam bots. E.g.:
 | |
| #
 | |
| #	  <a href="mailto:foo@e
 | |
| #       xample.com">foo
 | |
| #       @example.com</a>
 | |
| #
 | |
| #	Based on a filter by Matthew Wickline, posted to the BBEdit-Talk
 | |
| #	mailing list: <http://tinyurl.com/yu7ue>
 | |
| #
 | |
| 
 | |
| 	my $addr = shift;
 | |
| 
 | |
| 	srand;
 | |
| 	my @encode = (
 | |
| 		sub { '&#' .                 ord(shift)   . ';' },
 | |
| 		sub { '&#x' . sprintf( "%X", ord(shift) ) . ';' },
 | |
| 		sub {                            shift          },
 | |
| 	);
 | |
| 
 | |
| 	$addr = "mailto:" . $addr;
 | |
| 
 | |
| 	$addr =~ s{(.)}{
 | |
| 		my $char = $1;
 | |
| 		if ( $char eq '@' ) {
 | |
| 			# this *must* be encoded. I insist.
 | |
| 			$char = $encode[int rand 1]->($char);
 | |
| 		} elsif ( $char ne ':' ) {
 | |
| 			# leave ':' alone (to spot mailto: later)
 | |
| 			my $r = rand;
 | |
| 			# roughly 10% raw, 45% hex, 45% dec
 | |
| 			$char = (
 | |
| 				$r > .9   ?  $encode[2]->($char)  :
 | |
| 				$r < .45  ?  $encode[1]->($char)  :
 | |
| 							 $encode[0]->($char)
 | |
| 			);
 | |
| 		}
 | |
| 		$char;
 | |
| 	}gex;
 | |
| 
 | |
| 	$addr = qq{<a href="$addr">$addr</a>};
 | |
| 	$addr =~ s{">.+?:}{">}; # strip the mailto: from the visible part
 | |
| 
 | |
| 	return $addr;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _UnescapeSpecialChars {
 | |
| #
 | |
| # Swap back in all the special characters we've hidden.
 | |
| #
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	while( my($char, $hash) = each(%g_escape_table) ) {
 | |
| 		$text =~ s/$hash/$char/g;
 | |
| 	}
 | |
|     return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _TokenizeHTML {
 | |
| #
 | |
| #   Parameter:  String containing HTML markup.
 | |
| #   Returns:    Reference to an array of the tokens comprising the input
 | |
| #               string. Each token is either a tag (possibly with nested,
 | |
| #               tags contained therein, such as <a href="<MTFoo>">, or a
 | |
| #               run of text between tags. Each element of the array is a
 | |
| #               two-element array; the first is either 'tag' or 'text';
 | |
| #               the second is the actual value.
 | |
| #
 | |
| #
 | |
| #   Derived from the _tokenize() subroutine from Brad Choate's MTRegex plugin.
 | |
| #       <http://www.bradchoate.com/past/mtregex.php>
 | |
| #
 | |
| 
 | |
|     my $str = shift;
 | |
|     my $pos = 0;
 | |
|     my $len = length $str;
 | |
|     my @tokens;
 | |
| 
 | |
|     my $depth = 6;
 | |
|     my $nested_tags = join('|', ('(?:<[a-z/!$](?:[^<>]') x $depth) . (')*>)' x  $depth);
 | |
|     my $match = qr/(?s: <! ( -- .*? -- \s* )+ > ) |  # comment
 | |
|                    (?s: <\? .*? \?> ) |              # processing instruction
 | |
|                    $nested_tags/ix;                   # nested tags
 | |
| 
 | |
|     while ($str =~ m/($match)/g) {
 | |
|         my $whole_tag = $1;
 | |
|         my $sec_start = pos $str;
 | |
|         my $tag_start = $sec_start - length $whole_tag;
 | |
|         if ($pos < $tag_start) {
 | |
|             push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
 | |
|         }
 | |
|         push @tokens, ['tag', $whole_tag];
 | |
|         $pos = pos $str;
 | |
|     }
 | |
|     push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
 | |
|     \@tokens;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _Outdent {
 | |
| #
 | |
| # Remove one level of line-leading tabs or spaces
 | |
| #
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	$text =~ s/^(\t|[ ]{1,$g_tab_width})//gm;
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| sub _Detab {
 | |
| #
 | |
| # Cribbed from a post by Bart Lateur:
 | |
| # <http://www.nntp.perl.org/group/perl.macperl.anyperl/154>
 | |
| #
 | |
| 	my $text = shift;
 | |
| 
 | |
| 	$text =~ s{(.*?)\t}{$1.(' ' x ($g_tab_width - length($1) % $g_tab_width))}ge;
 | |
| 	return $text;
 | |
| }
 | |
| 
 | |
| 
 | |
| 1;
 | |
| 
 | |
| __END__
 | |
| 
 | |
| 
 | |
| =pod
 | |
| 
 | |
| =head1 NAME
 | |
| 
 | |
| B<Markdown>
 | |
| 
 | |
| 
 | |
| =head1 SYNOPSIS
 | |
| 
 | |
| B<Markdown.pl> [ B<--html4tags> ] [ B<--version> ] [ B<-shortversion> ]
 | |
|     [ I<file> ... ]
 | |
| 
 | |
| 
 | |
| =head1 DESCRIPTION
 | |
| 
 | |
| Markdown is a text-to-HTML filter; it translates an easy-to-read /
 | |
| easy-to-write structured text format into HTML. Markdown's text format
 | |
| is most similar to that of plain text email, and supports features such
 | |
| as headers, *emphasis*, code blocks, blockquotes, and links.
 | |
| 
 | |
| Markdown's syntax is designed not as a generic markup language, but
 | |
| specifically to serve as a front-end to (X)HTML. You can  use span-level
 | |
| HTML tags anywhere in a Markdown document, and you can use block level
 | |
| HTML tags (like <div> and <table> as well).
 | |
| 
 | |
| For more information about Markdown's syntax, see:
 | |
| 
 | |
|     http://daringfireball.net/projects/markdown/
 | |
| 
 | |
| 
 | |
| =head1 OPTIONS
 | |
| 
 | |
| Use "--" to end switch parsing. For example, to open a file named "-z", use:
 | |
| 
 | |
| 	Markdown.pl -- -z
 | |
| 
 | |
| =over 4
 | |
| 
 | |
| 
 | |
| =item B<--html4tags>
 | |
| 
 | |
| Use HTML 4 style for empty element tags, e.g.:
 | |
| 
 | |
|     <br>
 | |
| 
 | |
| instead of Markdown's default XHTML style tags, e.g.:
 | |
| 
 | |
|     <br />
 | |
| 
 | |
| 
 | |
| =item B<-v>, B<--version>
 | |
| 
 | |
| Display Markdown's version number and copyright information.
 | |
| 
 | |
| 
 | |
| =item B<-s>, B<--shortversion>
 | |
| 
 | |
| Display the short-form version number.
 | |
| 
 | |
| 
 | |
| =back
 | |
| 
 | |
| 
 | |
| 
 | |
| =head1 BUGS
 | |
| 
 | |
| To file bug reports or feature requests (other than topics listed in the
 | |
| Caveats section above) please send email to:
 | |
| 
 | |
|     support@daringfireball.net
 | |
| 
 | |
| Please include with your report: (1) the example input; (2) the output
 | |
| you expected; (3) the output Markdown actually produced.
 | |
| 
 | |
| 
 | |
| =head1 VERSION HISTORY
 | |
| 
 | |
| See the readme file for detailed release notes for this version.
 | |
| 
 | |
| 1.0.1 - 14 Dec 2004
 | |
| 
 | |
| 1.0 - 28 Aug 2004
 | |
| 
 | |
| 
 | |
| =head1 AUTHOR
 | |
| 
 | |
|     John Gruber
 | |
|     http://daringfireball.net
 | |
| 
 | |
|     PHP port and other contributions by Michel Fortin
 | |
|     http://michelf.com
 | |
| 
 | |
| 
 | |
| =head1 COPYRIGHT AND LICENSE
 | |
| 
 | |
| Copyright (c) 2003-2004 John Gruber   
 | |
| <http://daringfireball.net/>   
 | |
| All rights reserved.
 | |
| 
 | |
| Redistribution and use in source and binary forms, with or without
 | |
| modification, are permitted provided that the following conditions are
 | |
| met:
 | |
| 
 | |
| * Redistributions of source code must retain the above copyright notice,
 | |
|   this list of conditions and the following disclaimer.
 | |
| 
 | |
| * Redistributions in binary form must reproduce the above copyright
 | |
|   notice, this list of conditions and the following disclaimer in the
 | |
|   documentation and/or other materials provided with the distribution.
 | |
| 
 | |
| * Neither the name "Markdown" nor the names of its contributors may
 | |
|   be used to endorse or promote products derived from this software
 | |
|   without specific prior written permission.
 | |
| 
 | |
| This software is provided by the copyright holders and contributors "as
 | |
| is" and any express or implied warranties, including, but not limited
 | |
| to, the implied warranties of merchantability and fitness for a
 | |
| particular purpose are disclaimed. In no event shall the copyright owner
 | |
| or contributors be liable for any direct, indirect, incidental, special,
 | |
| exemplary, or consequential damages (including, but not limited to,
 | |
| procurement of substitute goods or services; loss of use, data, or
 | |
| profits; or business interruption) however caused and on any theory of
 | |
| liability, whether in contract, strict liability, or tort (including
 | |
| negligence or otherwise) arising in any way out of the use of this
 | |
| software, even if advised of the possibility of such damage.
 | |
| 
 | |
| =cut
 | 
