();
+
+ string backslashPattern = "";
+
+ foreach (char c in @"\`*_{}[]()>#+-.!")
+ {
+ string key = c.ToString();
+ string hash = GetHashKey(key);
+ _escapeTable.Add(key, hash);
+ _invertedEscapeTable.Add(hash, key);
+ _backslashEscapeTable.Add(@"\" + key, hash);
+ backslashPattern += Regex.Escape(@"\" + key) + "|";
+ }
+
+ _backslashEscapes = new Regex(backslashPattern.Substring(0, backslashPattern.Length - 1), RegexOptions.Compiled);
+ }
+
+ ///
+ /// current version of MarkdownSharp;
+ /// see http://code.google.com/p/markdownsharp/ for the latest code or to contribute
+ ///
+ public string Version
+ {
+ get { return _version; }
+ }
+
+ ///
+ /// Transforms the provided Markdown-formatted text to HTML;
+ /// see http://en.wikipedia.org/wiki/Markdown
+ ///
+ ///
+ /// The order in which other subs are called here is
+ /// essential. Link and image substitutions need to happen before
+ /// EscapeSpecialChars(), so that any *'s or _'s in the a
+ /// and img tags get encoded.
+ ///
+ public string Transform(string text)
+ {
+ if (String.IsNullOrEmpty(text)) return "";
+
+ Setup();
+
+ text = Normalize(text);
+
+ text = HashHTMLBlocks(text);
+ text = StripLinkDefinitions(text);
+ text = RunBlockGamut(text);
+ text = Unescape(text);
+
+ Cleanup();
+
+ return text + "\n";
+ }
+
+
+ ///
+ /// Perform transformations that form block-level tags like paragraphs, headers, and list items.
+ ///
+ private string RunBlockGamut(string text)
+ {
+ text = DoHeaders(text);
+ text = DoHorizontalRules(text);
+ text = DoLists(text);
+ text = DoCodeBlocks(text);
+ text = DoBlockQuotes(text);
+
+ // We already ran HashHTMLBlocks() before, in Markdown(), but that
+ // was to escape raw HTML in the original Markdown source. This time,
+ // we're escaping the markup we've just created, so that we don't wrap
+ // tags around block-level tags.
+ text = HashHTMLBlocks(text);
+
+ text = FormParagraphs(text);
+
+ return text;
+ }
+
+
+ ///
+ /// Perform transformations that occur *within* block-level tags like paragraphs, headers, and list items.
+ ///
+ private string RunSpanGamut(string text)
+ {
+ text = DoCodeSpans(text);
+ text = EscapeSpecialCharsWithinTagAttributes(text);
+ text = EscapeBackslashes(text);
+
+ // Images must come first, because ![foo][f] looks like an anchor.
+ text = DoImages(text);
+ text = DoAnchors(text);
+
+ // Must come after DoAnchors(), because you can use < and >
+ // delimiters in inline links like [this]().
+ text = DoAutoLinks(text);
+
+ text = EncodeAmpsAndAngles(text);
+ text = DoItalicsAndBold(text);
+ text = DoHardBreaks(text);
+
+ return text;
+ }
+
+ private static Regex _newlinesLeadingTrailing = new Regex(@"^\n+|\n+\z", RegexOptions.Compiled);
+ private static Regex _newlinesMultiple = new Regex(@"\n{2,}", RegexOptions.Compiled);
+ private static Regex _leadingWhitespace = new Regex(@"^[ ]*", RegexOptions.Compiled);
+
+ ///
+ /// splits on two or more newlines, to form "paragraphs";
+ /// each paragraph is then unhashed (if it is a hash) or wrapped in HTML p tag
+ ///
+ private string FormParagraphs(string text)
+ {
+ // split on two or more newlines
+ string[] grafs = _newlinesMultiple.Split(_newlinesLeadingTrailing.Replace(text, ""));
+
+ for (int i = 0; i < grafs.Length; i++)
+ {
+ if (grafs[i].StartsWith("\x1A"))
+ {
+ // unhashify HTML blocks
+ grafs[i] = _htmlBlocks[grafs[i]];
+ }
+ else
+ {
+ // do span level processing inside the block, then wrap result in tags
+ grafs[i] = _leadingWhitespace.Replace(RunSpanGamut(grafs[i]), "
") + "
";
+ }
+ }
+
+ return string.Join("\n\n", grafs);
+ }
+
+
+ private void Setup()
+ {
+ // Clear the global hashes. If we don't clear these, you get conflicts
+ // from other articles when generating a page which contains more than
+ // one article (e.g. an index page that shows the N most recent
+ // articles):
+ _urls.Clear();
+ _titles.Clear();
+ _htmlBlocks.Clear();
+ _listLevel = 0;
+ }
+
+ private void Cleanup()
+ {
+ Setup();
+ }
+
+ private static string _nestedBracketsPattern;
+
+ ///
+ /// Reusable pattern to match balanced [brackets]. See Friedl's
+ /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
+ ///
+ private static string GetNestedBracketsPattern()
+ {
+ // in other words [this] and [this[also]] and [this[also[too]]]
+ // up to _nestDepth
+ if (_nestedBracketsPattern == null)
+ _nestedBracketsPattern =
+ RepeatString(@"
+ (?> # Atomic matching
+ [^\[\]]+ # Anything other than brackets
+ |
+ \[
+ ", _nestDepth) + RepeatString(
+ @" \]
+ )*"
+ , _nestDepth);
+ return _nestedBracketsPattern;
+ }
+
+ private static string _nestedParensPattern;
+
+ ///
+ /// Reusable pattern to match balanced (parens). See Friedl's
+ /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331.
+ ///
+ private static string GetNestedParensPattern()
+ {
+ // in other words (this) and (this(also)) and (this(also(too)))
+ // up to _nestDepth
+ if (_nestedParensPattern == null)
+ _nestedParensPattern =
+ RepeatString(@"
+ (?> # Atomic matching
+ [^()\s]+ # Anything other than parens or whitespace
+ |
+ \(
+ ", _nestDepth) + RepeatString(
+ @" \)
+ )*"
+ , _nestDepth);
+ return _nestedParensPattern;
+ }
+
+ private static Regex _linkDef = new Regex(string.Format(@"
+ ^[ ]{{0,{0}}}\[(.+)\]: # id = $1
+ [ ]*
+ \n? # maybe *one* newline
+ [ ]*
+ (\S+?)>? # url = $2
+ [ ]*
+ \n? # maybe one newline
+ [ ]*
+ (?:
+ (?<=\s) # lookbehind for whitespace
+ [""(]
+ (.+?) # title = $3
+ ["")]
+ [ ]*
+ )? # title is optional
+ (?:\n+|\Z)", _tabWidth - 1), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ ///
+ /// Strips link definitions from text, stores the URLs and titles in hash references.
+ ///
+ ///
+ /// ^[id]: url "optional title"
+ ///
+ private string StripLinkDefinitions(string text)
+ {
+ return _linkDef.Replace(text, new MatchEvaluator(LinkEvaluator));
+ }
+
+ private string LinkEvaluator(Match match)
+ {
+ string linkID = match.Groups[1].Value.ToLowerInvariant();
+ _urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value);
+
+ if (match.Groups[3] != null && match.Groups[3].Length > 0)
+ _titles[linkID] = match.Groups[3].Value.Replace("\"", """);
+
+ return "";
+ }
+
+ // compiling this monster regex results in worse performance. trust me.
+ private static Regex _blocksHtml = new Regex(GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
+
+
+ ///
+ /// derived pretty much verbatim from PHP Markdown
+ ///
+ private static string GetBlockPattern()
+ {
+
+ // Hashify HTML blocks:
+ // We only want to do this for block-level HTML tags, such as headers,
+ // lists, and tables. That's because we still want to wrap s around
+ // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+ // phrase emphasis, and spans. The list of tags we're looking for is
+ // hard-coded:
+ //
+ // * List "a" is made of tags which can be both inline or block-level.
+ // These will be treated block-level when the start tag is alone on
+ // its line, otherwise they're not matched here and will be taken as
+ // inline later.
+ // * List "b" is made of tags which are always block-level;
+ //
+ string blockTagsA = "ins|del";
+ string blockTagsB = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math";
+
+ // Regular expression for the content of a block tag.
+ string attr = @"
+ (?> # optional tag attributes
+ \s # starts with whitespace
+ (?>
+ [^>""/]+ # text outside quotes
+ |
+ /+(?!>) # slash not followed by >
+ |
+ ""[^""]*"" # text inside double quotes (tolerate >)
+ |
+ '[^']*' # text inside single quotes (tolerate >)
+ )*
+ )?
+ ";
+
+ string content = RepeatString(@"
+ (?>
+ [^<]+ # content without tag
+ |
+ <\2 # nested opening tag
+ " + attr + @" # attributes
+ (?>
+ />
+ |
+ >", _nestDepth) + // end of opening tag
+ ".*?" + // last level nested tag content
+ RepeatString(@"
+ \2\s*> # closing nested tag
+ )
+ |
+ <(?!/\2\s*> # other tags with a different name
+ )
+ )*", _nestDepth);
+
+ string content2 = content.Replace(@"\2", @"\3");
+
+ // First, look for nested blocks, e.g.:
+ //
+ //
+ // tags for inner block must be indented.
+ //
+ //
+ //
+ // The outermost tags must start at the left margin for this to match, and
+ // the inner nested divs must be indented.
+ // We need to do this before the next, more liberal match, because the next
+ // match will start at the first `` and stop at the first `
`.
+ string pattern = @"
+ (?>
+ (?>
+ (?<=\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in $1
+
+ # Match from `\n` to `\n`, handling nested tags
+ # in between.
+
+ [ ]{0,$less_than_tab}
+ <($block_tags_b_re) # start tag = $2
+ $attr> # attributes followed by > and \n
+ $content # content, support nesting
+ \2> # the matching end tag
+ [ ]* # trailing spaces
+ (?=\n+|\Z) # followed by a newline or end of document
+
+ | # Special version for tags of group a.
+
+ [ ]{0,$less_than_tab}
+ <($block_tags_a_re) # start tag = $3
+ $attr>[ ]*\n # attributes followed by >
+ $content2 # content, support nesting
+ \3> # the matching end tag
+ [ ]* # trailing spaces
+ (?=\n+|\Z) # followed by a newline or end of document
+
+ | # Special case just for
. It was easier to make a special
+ # case than to make the other regex more complicated.
+
+ [ ]{0,$less_than_tab}
+ <(hr) # start tag = $2
+ $attr # attributes
+ /?> # the matching end tag
+ [ ]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+
+ | # Special case for standalone HTML comments:
+
+ [ ]{0,$less_than_tab}
+ (?s:
+
+ )
+ [ ]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+
+ | # PHP and ASP-style processor instructions ( and <%)
+
+ [ ]{0,$less_than_tab}
+ (?s:
+ <([?%]) # $2
+ .*?
+ \2>
+ )
+ [ ]*
+ (?=\n{2,}|\Z) # followed by a blank line or end of document
+
+ )
+ )";
+
+ pattern = pattern.Replace("$less_than_tab", (_tabWidth - 1).ToString());
+ pattern = pattern.Replace("$block_tags_b_re", blockTagsB);
+ pattern = pattern.Replace("$block_tags_a_re", blockTagsA);
+ pattern = pattern.Replace("$attr", attr);
+ pattern = pattern.Replace("$content2", content2);
+ pattern = pattern.Replace("$content", content);
+
+ return pattern;
+ }
+
+ ///
+ /// replaces any block-level HTML blocks with hash entries
+ ///
+ private string HashHTMLBlocks(string text)
+ {
+ return _blocksHtml.Replace(text, new MatchEvaluator(HtmlEvaluator));
+ }
+
+ private string HtmlEvaluator(Match match)
+ {
+ string text = match.Groups[1].Value;
+ string key = GetHashKey(text);
+ _htmlBlocks[key] = text;
+
+ return string.Concat("\n\n", key, "\n\n");
+ }
+
+ private static string GetHashKey(string s)
+ {
+ return "\x1A" + Math.Abs(s.GetHashCode()).ToString() + "\x1A";
+ }
+
+ private static Regex _htmlTokens = new Regex(@"
+ ()| # match
+ (<\?.*?\?>)| # match " +
+ RepeatString(@"
+ (<[A-Za-z\/!$](?:[^<>]|", _nestDepth) + RepeatString(@")*>)", _nestDepth) +
+ " # match and ",
+ RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ ///
+ /// returns an array of HTML tokens comprising the input string. Each token is
+ /// either a tag (possibly with nested, tags contained therein, such
+ /// as <a href="<MTFoo>">, or a run of text between tags. Each element of the
+ /// array is a two-element array; the first is either 'tag' or 'text'; the second is
+ /// the actual value.
+ ///
+ private List TokenizeHTML(string text)
+ {
+ int pos = 0;
+ int tagStart = 0;
+ var tokens = new List();
+
+ // this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin.
+ // http://www.bradchoate.com/past/mtregex.php
+ foreach (Match m in _htmlTokens.Matches(text))
+ {
+ tagStart = m.Index;
+
+ if (pos < tagStart)
+ tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos)));
+
+ tokens.Add(new Token(TokenType.Tag, m.Value));
+ pos = tagStart + m.Length;
+ }
+
+ if (pos < text.Length)
+ tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos)));
+
+ return tokens;
+ }
+
+
+ private static Regex _anchorRef = new Regex(string.Format(@"
+ ( # wrap whole match in $1
+ \[
+ ({0}) # link text = $2
+ \]
+
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+
+ \[
+ (.*?) # id = $3
+ \]
+ )", GetNestedBracketsPattern()), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ private static Regex _anchorInline = new Regex(string.Format(@"
+ ( # wrap whole match in $1
+ \[
+ ({0}) # link text = $2
+ \]
+ \( # literal paren
+ [ ]*
+ ({1}) # href = $3
+ [ ]*
+ ( # $4
+ (['""]) # quote char = $5
+ (.*?) # title = $6
+ \5 # matching quote
+ [ ]* # ignore any spaces between closing quote and )
+ )? # title is optional
+ \)
+ )", GetNestedBracketsPattern(), GetNestedParensPattern()),
+ RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ private static Regex _anchorRefShortcut = new Regex(@"
+ ( # wrap whole match in $1
+ \[
+ ([^\[\]]+) # link text = $2; can't contain [ or ]
+ \]
+ )", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ ///
+ /// Turn Markdown link shortcuts into HTML anchor tags
+ ///
+ ///
+ /// [link text](url "title")
+ /// [link text][id]
+ /// [id]
+ ///
+ private string DoAnchors(string text)
+ {
+ // First, handle reference-style links: [link text] [id]
+ text = _anchorRef.Replace(text, new MatchEvaluator(AnchorRefEvaluator));
+
+ // Next, inline-style links: [link text](url "optional title") or [link text](url "optional title")
+ text = _anchorInline.Replace(text, new MatchEvaluator(AnchorInlineEvaluator));
+
+ // Last, handle reference-style shortcuts: [link text]
+ // These must come last in case you've also got [link test][1]
+ // or [link test](/foo)
+ text = _anchorRefShortcut.Replace(text, new MatchEvaluator(AnchorRefShortcutEvaluator));
+ return text;
+ }
+
+ private string AnchorRefEvaluator(Match match)
+ {
+ string wholeMatch = match.Groups[1].Value;
+ string linkText = match.Groups[2].Value;
+ string linkID = match.Groups[3].Value.ToLowerInvariant();
+
+ string result;
+
+ // for shortcut links like [this][].
+ if (linkID == "")
+ linkID = linkText.ToLowerInvariant();
+
+ if (_urls.ContainsKey(linkID))
+ {
+ string url = _urls[linkID];
+
+ url = EncodeProblemUrlChars(url);
+ url = EscapeBoldItalic(url);
+ result = "" + linkText + "";
+ }
+ else
+ result = wholeMatch;
+
+ return result;
+ }
+
+ private string AnchorRefShortcutEvaluator(Match match)
+ {
+ string wholeMatch = match.Groups[1].Value;
+ string linkText = match.Groups[2].Value;
+ string linkID = Regex.Replace(linkText.ToLowerInvariant(), @"[ ]*\n[ ]*", " "); // lower case and remove newlines / extra spaces
+
+ string result;
+
+ if (_urls.ContainsKey(linkID))
+ {
+ string url = _urls[linkID];
+
+ url = EncodeProblemUrlChars(url);
+ url = EscapeBoldItalic(url);
+ result = "" + linkText + "";
+ }
+ else
+ result = wholeMatch;
+
+ return result;
+ }
+
+
+ private string AnchorInlineEvaluator(Match match)
+ {
+ string linkText = match.Groups[2].Value;
+ string url = match.Groups[3].Value;
+ string title = match.Groups[6].Value;
+ string result;
+
+ url = EncodeProblemUrlChars(url);
+ url = EscapeBoldItalic(url);
+ if (url.StartsWith("<") && url.EndsWith(">"))
+ url = url.Substring(1, url.Length - 2); // remove <>'s surrounding URL, if present
+
+ result = string.Format("{0}", linkText);
+ return result;
+ }
+
+ private static Regex _imagesRef = new Regex(@"
+ ( # wrap whole match in $1
+ !\[
+ (.*?) # alt text = $2
+ \]
+
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+
+ \[
+ (.*?) # id = $3
+ \]
+
+ )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
+
+ private static Regex _imagesInline = new Regex(String.Format(@"
+ ( # wrap whole match in $1
+ !\[
+ (.*?) # alt text = $2
+ \]
+ \s? # one optional whitespace character
+ \( # literal paren
+ [ ]*
+ ({0}) # href = $3
+ [ ]*
+ ( # $4
+ (['""]) # quote char = $5
+ (.*?) # title = $6
+ \5 # matching quote
+ [ ]*
+ )? # title is optional
+ \)
+ )", GetNestedParensPattern()),
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
+
+ ///
+ /// Turn Markdown image shortcuts into HTML img tags.
+ ///
+ ///
+ /// ![alt text][id]
+ /// 
+ ///
+ private string DoImages(string text)
+ {
+ // First, handle reference-style labeled images: ![alt text][id]
+ text = _imagesRef.Replace(text, new MatchEvaluator(ImageReferenceEvaluator));
+
+ // Next, handle inline images: 
+ // Don't forget: encode * and _
+ text = _imagesInline.Replace(text, new MatchEvaluator(ImageInlineEvaluator));
+
+ return text;
+ }
+
+ private string ImageReferenceEvaluator(Match match)
+ {
+ string wholeMatch = match.Groups[1].Value;
+ string altText = match.Groups[2].Value;
+ string linkID = match.Groups[3].Value.ToLowerInvariant();
+ string result;
+
+ // for shortcut links like ![this][].
+ if (linkID == "")
+ linkID = altText.ToLowerInvariant();
+
+ altText = altText.Replace("\"", """);
+
+ if (_urls.ContainsKey(linkID))
+ {
+ string url = _urls[linkID];
+ url = EncodeProblemUrlChars(url);
+ url = EscapeBoldItalic(url);
+ result = string.Format("
"))
+ url = url.Substring(1, url.Length - 2); // Remove <>'s surrounding URL, if present
+ url = EncodeProblemUrlChars(url);
+ url = EscapeBoldItalic(url);
+
+ result = string.Format("
+ /// Turn Markdown headers into HTML header tags
+ ///
+ ///
+ /// Header 1
+ /// ========
+ ///
+ /// Header 2
+ /// --------
+ ///
+ /// # Header 1
+ /// ## Header 2
+ /// ## Header 2 with closing hashes ##
+ /// ...
+ /// ###### Header 6
+ ///
+ private string DoHeaders(string text)
+ {
+ text = _headerSetext.Replace(text, new MatchEvaluator(SetextHeaderEvaluator));
+ text = _headerAtx.Replace(text, new MatchEvaluator(AtxHeaderEvaluator));
+ return text;
+ }
+
+ private string SetextHeaderEvaluator(Match match)
+ {
+ string header = match.Groups[1].Value;
+ int level = match.Groups[2].Value.StartsWith("=") ? 1 : 2;
+ return string.Format("{0}\n\n", RunSpanGamut(header), level);
+ }
+
+ private string AtxHeaderEvaluator(Match match)
+ {
+ string header = match.Groups[2].Value;
+ int level = match.Groups[1].Value.Length;
+ return string.Format("{0}\n\n", RunSpanGamut(header), level);
+ }
+
+
+ private static Regex _horizontalRules = new Regex(@"
+ ^[ ]{0,3} # Leading space
+ ([-*_]) # $1: First marker
+ (?> # Repeated marker group
+ [ ]{0,2} # Zero, one, or two spaces.
+ \1 # Marker character
+ ){2,} # Group repeated at least twice
+ [ ]* # Trailing spaces
+ $ # End of line.
+ ", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ ///
+ /// Turn Markdown horizontal rules into HTML hr tags
+ ///
+ ///
+ /// ***
+ /// * * *
+ /// ---
+ /// - - -
+ ///
+ private string DoHorizontalRules(string text)
+ {
+ return _horizontalRules.Replace(text, "
+ /// Turn Markdown lists into HTML ul and ol and li tags
+ ///
+ private string DoLists(string text)
+ {
+ // We use a different prefix before nested lists than top-level lists.
+ // See extended comment in _ProcessListItems().
+ if (_listLevel > 0)
+ text = _listNested.Replace(text, new MatchEvaluator(ListEvaluator));
+ else
+ text = _listTopLevel.Replace(text, new MatchEvaluator(ListEvaluator));
+
+ return text;
+ }
+
+ private string ListEvaluator(Match match)
+ {
+ string list = match.Groups[1].Value;
+ string listType = Regex.IsMatch(match.Groups[3].Value, _markerUL) ? "ul" : "ol";
+ string result;
+
+ // Turn double returns into triple returns, so that we can make a
+ // paragraph for the last item in a list, if necessary:
+ list = Regex.Replace(list, @"\n{2,}", "\n\n\n");
+ result = ProcessListItems(list, listType == "ul" ? _markerUL : _markerOL);
+
+ result = string.Format("<{0}>\n{1}{0}>\n", listType, result);
+ return result;
+ }
+
+ ///
+ /// Process the contents of a single ordered or unordered list, splitting it
+ /// into individual list items.
+ ///
+ private string ProcessListItems(string list, string marker)
+ {
+ // The listLevel global keeps track of when we're inside a list.
+ // Each time we enter a list, we increment it; when we leave a list,
+ // we decrement. If it's zero, we're not in a list anymore.
+
+ // We do this because when we're not inside a list, we want to treat
+ // something like this:
+
+ // I recommend upgrading to version
+ // 8. Oops, now this line is treated
+ // as a sub-list.
+
+ // As a single paragraph, despite the fact that the second line starts
+ // with a digit-period-space sequence.
+
+ // Whereas when we're inside a list (or sub-list), that line will be
+ // treated as the start of a sub-list. What a kludge, huh? This is
+ // an aspect of Markdown's syntax that's hard to parse perfectly
+ // without resorting to mind-reading. Perhaps the solution is to
+ // change the syntax rules such that sub-lists must start with a
+ // starting cardinal number; e.g. "1." or "a.".
+
+ _listLevel++;
+
+ // Trim trailing blank lines:
+ list = Regex.Replace(list, @"\n{2,}\z", "\n");
+
+ string pattern = string.Format(
+ @"(\n)? # leading line = $1
+ (^[ ]*) # leading whitespace = $2
+ ({0}) [ ]+ # list marker = $3
+ ((?s:.+?) # list item text = $4
+ (\n{{1,2}}))
+ (?= \n* (\z | \2 ({0}) [ ]+))", marker);
+
+ list = Regex.Replace(list, pattern, new MatchEvaluator(ListItemEvaluator),
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
+ _listLevel--;
+ return list;
+ }
+
+ private string ListItemEvaluator(Match match)
+ {
+ string item = match.Groups[4].Value;
+ string leadingLine = match.Groups[1].Value;
+
+ if (!String.IsNullOrEmpty(leadingLine) || Regex.IsMatch(item, @"\n{2,}"))
+ // we could correct any bad indentation here..
+ item = RunBlockGamut(Outdent(item) + "\n");
+ else
+ {
+ // recursion for sub-lists
+ item = DoLists(Outdent(item));
+ item = item.TrimEnd('\n');
+ item = RunSpanGamut(item);
+ }
+
+ return string.Format("{0}\n", item);
+ }
+
+
+ private static Regex _codeBlock = new Regex(string.Format(@"
+ (?:\n\n|\A\n?)
+ ( # $1 = the code block -- one or more lines, starting with a space
+ (?:
+ (?:[ ]{{{0}}}) # Lines must start with a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{{0,{0}}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc",
+ _tabWidth), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
+
+ ///
+ /// /// Turn Markdown 4-space indented code into HTML pre code blocks
+ ///
+ private string DoCodeBlocks(string text)
+ {
+ text = _codeBlock.Replace(text, new MatchEvaluator(CodeBlockEvaluator));
+ return text;
+ }
+
+ private string CodeBlockEvaluator(Match match)
+ {
+ string codeBlock = match.Groups[1].Value;
+
+ codeBlock = EncodeCode(Outdent(codeBlock));
+ codeBlock = _newlinesLeadingTrailing.Replace(codeBlock, "");
+
+ return string.Concat("\n\n", codeBlock, "\n
\n\n");
+ }
+
+ private static Regex _codeSpan = new Regex(@"
+ (?
+ /// Turn Markdown `code spans` into HTML code tags
+ ///
+ private string DoCodeSpans(string text)
+ {
+ // * You can use multiple backticks as the delimiters if you want to
+ // include literal backticks in the code span. So, this input:
+ //
+ // Just type ``foo `bar` baz`` at the prompt.
+ //
+ // Will translate to:
+ //
+ // Just type foo `bar` baz at the prompt.
+ //
+ // There's no arbitrary limit to the number of backticks you
+ // can use as delimters. If you need three consecutive backticks
+ // in your code, use four for delimiters, etc.
+ //
+ // * You can use spaces to get literal backticks at the edges:
+ //
+ // ... type `` `bar` `` ...
+ //
+ // Turns to:
+ //
+ // ... type `bar` ...
+ //
+
+ return _codeSpan.Replace(text, new MatchEvaluator(CodeSpanEvaluator));
+ }
+
+ private string CodeSpanEvaluator(Match match)
+ {
+ string span = match.Groups[2].Value;
+ span = Regex.Replace(span, @"^[ ]*", ""); // leading whitespace
+ span = Regex.Replace(span, @"[ ]*$", ""); // trailing whitespace
+ span = EncodeCode(span);
+
+ return string.Concat("", span, "");
+ }
+
+
+ private static Regex _bold = new Regex(@"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1",
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
+ private static Regex _strictBold = new Regex(@"([\W_]|^) (\*\*|__) (?=\S) ([^\r]*?\S[\*_]*) \2 ([\W_]|$)",
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
+
+ private static Regex _italic = new Regex(@"(\*|_) (?=\S) (.+?) (?<=\S) \1",
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
+ private static Regex _strictItalic = new Regex(@"([\W_]|^) (\*|_) (?=\S) ([^\r\*_]*?\S) \2 ([\W_]|$)",
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
+
+ ///
+ /// Turn Markdown *italics* and **bold** into HTML strong and em tags
+ ///
+ private string DoItalicsAndBold(string text)
+ {
+
+ // must go first, then
+ if (_strictBoldItalic)
+ {
+ text = _strictBold.Replace(text, "$1$3$4");
+ text = _strictItalic.Replace(text, "$1$3$4");
+ }
+ else
+ {
+ text = _bold.Replace(text, "$2");
+ text = _italic.Replace(text, "$2");
+ }
+ return text;
+ }
+
+ ///
+ /// Turn markdown line breaks (two space at end of line) into HTML break tags
+ ///
+ private string DoHardBreaks(string text)
+ {
+ if (_autoNewlines)
+ text = Regex.Replace(text, @"\n", string.Format("
[ ]? # '>' at the start of a line
+ .+\n # rest of the first line
+ (.+\n)* # subsequent consecutive lines
+ \n* # blanks
+ )+
+ )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.Compiled);
+
+ ///
+ /// Turn Markdown > quoted blocks into HTML blockquote blocks
+ ///
+ private string DoBlockQuotes(string text)
+ {
+ return _blockquote.Replace(text, new MatchEvaluator(BlockQuoteEvaluator));
+ }
+
+ private string BlockQuoteEvaluator(Match match)
+ {
+ string bq = match.Groups[1].Value;
+
+ bq = Regex.Replace(bq, @"^[ ]*>[ ]?", "", RegexOptions.Multiline); // trim one level of quoting
+ bq = Regex.Replace(bq, @"^[ ]+$", "", RegexOptions.Multiline); // trim whitespace-only lines
+ bq = RunBlockGamut(bq); // recurse
+
+ bq = Regex.Replace(bq, @"^", " ", RegexOptions.Multiline);
+
+ // These leading spaces screw with content, so we need to fix that:
+ bq = Regex.Replace(bq, @"(\s*.+?
)", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
+
+ return string.Format("\n{0}\n
\n\n", bq);
+ }
+
+ private string BlockQuoteEvaluator2(Match match)
+ {
+ return Regex.Replace(match.Groups[1].Value, @"^ ", "", RegexOptions.Multiline);
+ }
+
+ private static Regex _autolinkBare = new Regex(@"(^|\s)(https?|ftp)(://[-A-Z0-9+&@#/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#/%=~_|\[\]])($|\W)",
+ RegexOptions.IgnoreCase | RegexOptions.Compiled);
+
+ ///
+ /// Turn angle-delimited URLs into HTML anchor tags
+ ///
+ ///
+ /// <http://www.example.com>
+ ///
+ private string DoAutoLinks(string text)
+ {
+
+ if (_autoHyperlink)
+ {
+ // fixup arbitrary URLs by adding Markdown < > so they get linked as well
+ // note that at this point, all other URL in the text are already hyperlinked as
+ // *except* for the case
+ text = _autolinkBare.Replace(text, @"$1<$2$3>$4");
+ }
+
+ // Hyperlinks:
+ text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator));
+
+ if (_linkEmails)
+ {
+ // Email addresses:
+ string pattern =
+ @"<
+ (?:mailto:)?
+ (
+ [-.\w]+
+ \@
+ [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
+ )
+ >";
+ text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
+ }
+
+ return text;
+ }
+
+ private string HyperlinkEvaluator(Match match)
+ {
+ string link = match.Groups[1].Value;
+ return string.Format("{0}", link);
+ }
+
+ private string EmailEvaluator(Match match)
+ {
+ string email = Unescape(match.Groups[1].Value);
+
+ //
+ // Input: an email address, e.g. "foo@example.com"
+ //
+ // Output: the email address as a mailto link, with each character
+ // of the address encoded as either a decimal or hex entity, in
+ // the hopes of foiling most address harvesting spam bots. E.g.:
+ //
+ // foo
+ // @example.com
+ //
+ // Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
+ // mailing list:
+ //
+ email = "mailto:" + email;
+
+ // leave ':' alone (to spot mailto: later)
+ email = EncodeEmailAddress(email);
+
+ email = string.Format("{0}", email);
+
+ // strip the mailto: from the visible part
+ email = Regex.Replace(email, "\">.+?:", "\">");
+ return email;
+ }
+
+
+ private static Regex _outDent = new Regex(@"^[ ]{1," + _tabWidth + @"}", RegexOptions.Multiline | RegexOptions.Compiled);
+
+ ///
+ /// Remove one level of line-leading spaces
+ ///
+ private string Outdent(string block)
+ {
+ return _outDent.Replace(block, "");
+ }
+
+
+ #region Encoding and Normalization
+
+
+ ///
+ /// encodes email address randomly
+ /// roughly 10% raw, 45% hex, 45% dec
+ /// note that @ is always encoded and : never is
+ ///
+ private string EncodeEmailAddress(string addr)
+ {
+ var sb = new StringBuilder(addr.Length * 5);
+ var rand = new Random();
+ int r;
+ foreach (char c in addr)
+ {
+ r = rand.Next(1, 100);
+ if ((r > 90 || c == ':') && c != '@')
+ sb.Append(c); // m
+ else if (r < 45)
+ sb.AppendFormat("{0:x};", (int)c); // m
+ else
+ sb.AppendFormat("{0};", (int)c); // m
+ }
+ return sb.ToString();
+ }
+
+ ///
+ /// Encode/escape certain Markdown characters inside code blocks and spans where they are literals
+ ///
+ private string EncodeCode(string code)
+ {
+ var sb = new StringBuilder(code.Length * 2);
+
+ Action putc = (c) => sb.Append(c);
+ Action put = (str) => sb.Append(str);
+
+ foreach (char c in code) {
+ switch (c) {
+ case '&':
+ put("&");
+ continue;
+ case '<':
+ put ("<");
+ continue;
+ case '>':
+ put (">");
+ continue;
+ default:
+ string val = null;
+
+ if (_escapeTable.TryGetValue(c.ToString(), out val))
+ put(val);
+ else if((int)c > 255)
+ sb.AppendFormat("{0};", (int)c);
+ else
+ putc(c);
+
+ continue;
+ }
+ }
+ return sb.ToString();
+ }
+
+
+ private string EncodeCodeEvaluator(Match match)
+ {
+ switch (match.Value)
+ {
+ // Encode all ampersands; HTML entities are not
+ // entities within a Markdown code span.
+ case "&":
+ return "&";
+ // Do the angle bracket song and dance
+ case "<":
+ return "<";
+ case ">":
+ return ">";
+ // escape characters that are magic in Markdown
+ default:
+ return _escapeTable[match.Value];
+ }
+ }
+
+
+ private static Regex _amps = new Regex(@"&(?!(#[0-9]+)|(#[xX][a-fA-F0-9])|([a-zA-Z][a-zA-Z0-9]*);)", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
+ private static Regex _angles = new Regex(@"<(?![A-Za-z/?\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
+
+ ///
+ /// Encode any ampersands (that aren't part of an HTML entity) and left or right angle brackets
+ ///
+ private string EncodeAmpsAndAngles(string s)
+ {
+ s = _amps.Replace(s, "&");
+ s = _angles.Replace(s, "<");
+ return s;
+ }
+
+ private static Regex _backslashEscapes;
+
+ ///
+ /// Encodes any escaped characters such as \`, \*, \[ etc
+ ///
+ private string EscapeBackslashes(string s)
+ {
+ return _backslashEscapes.Replace(s, new MatchEvaluator(EscapeBackslashesEvaluator));
+ }
+ private string EscapeBackslashesEvaluator(Match match)
+ {
+ return _backslashEscapeTable[match.Value];
+ }
+
+ private static Regex _unescapes = new Regex("\x1A\\d+\x1A", RegexOptions.Compiled);
+
+ ///
+ /// swap back in all the special characters we've hidden
+ ///
+ private string Unescape(string s)
+ {
+ return _unescapes.Replace(s, new MatchEvaluator(UnescapeEvaluator));
+ }
+ private string UnescapeEvaluator(Match match)
+ {
+ return _invertedEscapeTable[match.Value];
+ }
+
+
+ ///
+ /// escapes Bold [ * ] and Italic [ _ ] characters
+ ///
+ private string EscapeBoldItalic(string s)
+ {
+ s = s.Replace("*", _escapeTable["*"]);
+ s = s.Replace("_", _escapeTable["_"]);
+ return s;
+ }
+
+ private static char[] _problemUrlChars = @"""'*()[]$:".ToCharArray();
+
+ ///
+ /// hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems
+ ///
+ private string EncodeProblemUrlChars(string url)
+ {
+ if (!_encodeProblemUrlCharacters) return url;
+
+ var sb = new StringBuilder(url.Length);
+ bool encode;
+ char c;
+
+ for (int i = 0; i < url.Length; i++)
+ {
+ c = url[i];
+ encode = Array.IndexOf(_problemUrlChars, c) != -1;
+ if (encode && c == ':' && i < url.Length - 1)
+ encode = !(url[i + 1] == '/') && !(url[i + 1] >= '0' && url[i + 1] <= '9');
+
+ if (encode)
+ sb.Append("%" + String.Format("{0:x}", (byte)c));
+ else
+ sb.Append(c);
+ }
+
+ return sb.ToString();
+ }
+
+
+ ///
+ /// Within tags -- meaning between < and > -- encode [\ ` * _] so they
+ /// don't conflict with their use in Markdown for code, italics and strong.
+ /// We're replacing each such character with its corresponding hash
+ /// value; this is likely overkill, but it should prevent us from colliding
+ /// with the escape values by accident.
+ ///
+ private string EscapeSpecialCharsWithinTagAttributes(string text)
+ {
+ var tokens = TokenizeHTML(text);
+
+ // now, rebuild text from the tokens
+ var sb = new StringBuilder(text.Length);
+
+ foreach (var token in tokens)
+ {
+ string value = token.Value;
+
+ if (token.Type == TokenType.Tag)
+ {
+ value = value.Replace(@"\", _escapeTable[@"\"]);
+ value = Regex.Replace(value, "(?<=.)?code>(?=.)", _escapeTable[@"`"]);
+ value = EscapeBoldItalic(value);
+ }
+
+ sb.Append(value);
+ }
+
+ return sb.ToString();
+ }
+
+ ///
+ /// convert all tabs to _tabWidth spaces;
+ /// standardizes line endings from DOS (CR LF) or Mac (CR) to UNIX (LF);
+ /// makes sure text ends with a couple of newlines;
+ /// removes any blank lines (only spaces) in the text
+ ///
+ private string Normalize(string text)
+ {
+ var output = new StringBuilder(text.Length);
+ var line = new StringBuilder();
+ bool valid = false;
+
+ for (int i = 0; i < text.Length; i++)
+ {
+ switch (text[i])
+ {
+ case '\n':
+ if (valid) output.Append(line);
+ output.Append('\n');
+ line.Length = 0; valid = false;
+ break;
+ case '\r':
+ if ((i < text.Length - 1) && (text[i + 1] != '\n'))
+ {
+ if (valid) output.Append(line);
+ output.Append('\n');
+ line.Length = 0; valid = false;
+ }
+ break;
+ case '\t':
+ int width = (_tabWidth - line.Length % _tabWidth);
+ for (int k = 0; k < width; k++)
+ line.Append(' ');
+ break;
+ case '\x1A':
+ break;
+ default:
+ if (!valid && text[i] != ' ') valid = true;
+ line.Append(text[i]);
+ break;
+ }
+ }
+
+ if (valid) output.Append(line);
+ output.Append('\n');
+
+ // add two newlines to the end before return
+ return output.Append("\n\n").ToString();
+ }
+
+ #endregion
+
+ ///
+ /// this is to emulate what's evailable in PHP
+ ///
+ private static string RepeatString(string text, int count)
+ {
+ var sb = new StringBuilder(text.Length * count);
+ for (int i = 0; i < count; i++)
+ sb.Append(text);
+ return sb.ToString();
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/src/MarkdownSharp/MarkdownOld.cs b/src/MarkdownSharp/MarkdownOld.cs
new file mode 100644
index 0000000..19bf1b9
--- /dev/null
+++ b/src/MarkdownSharp/MarkdownOld.cs
@@ -0,0 +1,1311 @@
+/*
+ * Markdown - A text-to-HTML conversion tool for web writers
+ * Copyright (c) 2004 John Gruber
+ * http://daringfireball.net/projects/markdown/
+ *
+ * Copyright (c) 2004 Michel Fortin - Translation to PHP
+ * http://www.michelf.com/projects/php-markdown/
+ *
+ * Copyright (c) 2004-2005 Milan Negovan - C# translation to .NET
+ * http://www.aspnetresources.com
+ *
+ */
+
+#region Copyright and license
+
+/*
+Copyright (c) 2003-2004 John Gruber
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+* Neither the name "Markdown" nor the names of its contributors may
+ be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+This software is provided by the copyright holders and contributors "as
+is" and any express or implied warranties, including, but not limited
+to, the implied warranties of merchantability and fitness for a
+particular purpose are disclaimed. In no event shall the copyright owner
+or contributors be liable for any direct, indirect, incidental, special,
+exemplary, or consequential damages (including, but not limited to,
+procurement of substitute goods or services; loss of use, data, or
+profits; or business interruption) however caused and on any theory of
+liability, whether in contract, strict liability, or tort (including
+negligence or otherwise) arising in any way out of the use of this
+software, even if advised of the possibility of such damage.
+*/
+
+#endregion
+
+using System;
+using System.Collections;
+using System.Security.Cryptography;
+using System.Text;
+using System.Text.RegularExpressions;
+
+namespace MarkdownSharp
+{
+ [Obsolete("This old version is included only for historical comparison purposes; use at your own risk!")]
+ public class MarkdownOld
+ {
+ public class Pair
+ {
+ public Object First;
+ public Object Second;
+ }
+
+ #region Class members
+
+ private const int nestedBracketDepth = 6;
+ private const string emptyElementSuffix = " />"; // Change to ">" for HTML output
+ private const int tabWidth = 4;
+
+ private static readonly string markerUL;
+ private static readonly string markerOL;
+ private static readonly string markerAny;
+
+ private static readonly string nestedBrackets;
+ private static readonly Hashtable escapeTable;
+ private static readonly Hashtable backslashEscapeTable;
+
+ private Hashtable urls;
+ private Hashtable titles;
+ private Hashtable htmlBlocks;
+
+ private int listLevel = 0;
+
+ #endregion
+
+ ///
+ /// Static constructor
+ ///
+ ///
+ /// In the static constuctor we'll initialize what stays the same across all transforms.
+ ///
+ static MarkdownOld()
+ {
+ nestedBrackets += RepeatString(@"(?>[^\[\]]+|\[", nestedBracketDepth);
+ nestedBrackets += RepeatString(@"\])*", nestedBracketDepth);
+
+ markerUL = @"[*+-]";
+ markerOL = @"\d+[.]";
+ markerAny = string.Format("(?:{0}|{1})", markerUL, markerOL);
+
+ // Table of hash values for escaped characters:
+ escapeTable = new Hashtable();
+
+ escapeTable[@"\"] = ComputeMD5(@"\");
+ escapeTable["`"] = ComputeMD5("`");
+ escapeTable["*"] = ComputeMD5("*");
+ escapeTable["_"] = ComputeMD5("_");
+ escapeTable["{"] = ComputeMD5("{");
+ escapeTable["}"] = ComputeMD5("}");
+ escapeTable["["] = ComputeMD5("[");
+ escapeTable["]"] = ComputeMD5("]");
+ escapeTable["("] = ComputeMD5("(");
+ escapeTable[")"] = ComputeMD5(")");
+ escapeTable[">"] = ComputeMD5(">");
+ escapeTable["#"] = ComputeMD5("#");
+ escapeTable["+"] = ComputeMD5("+");
+ escapeTable["-"] = ComputeMD5("-");
+ escapeTable["."] = ComputeMD5(".");
+ escapeTable["!"] = ComputeMD5("!");
+
+ // Create an identical table but for escaped characters.
+ backslashEscapeTable = new Hashtable();
+
+ foreach (string key in escapeTable.Keys)
+ backslashEscapeTable[@"\" + key] = escapeTable[key];
+ }
+
+ public MarkdownOld()
+ {
+ urls = new Hashtable();
+ titles = new Hashtable();
+ htmlBlocks = new Hashtable();
+ }
+
+ ///
+ /// Main function. The order in which other subs are called here is
+ /// essential. Link and image substitutions need to happen before
+ /// EscapeSpecialChars(), so that any *'s or _'s in the
+ /// and
tags get encoded.
+ ///
+ public string Transform(string text)
+ {
+ // Standardize line endings:
+ // DOS to Unix and Mac to Unix
+ text = text.Replace("\r\n", "\n").Replace("\r", "\n");
+
+ // Make sure $text ends with a couple of newlines:
+ text += "\n\n";
+
+ // Convert all tabs to spaces.
+ text = Detab(text);
+
+ // Strip any lines consisting only of spaces and tabs.
+ // This makes subsequent regexen easier to write, because we can
+ // match consecutive blank lines with /\n+/ instead of something
+ // contorted like /[ \t]*\n+/ .
+ text = Regex.Replace(text, @"^[ \t]+$", string.Empty, RegexOptions.Multiline);
+
+ // Turn block-level HTML blocks into hash entries
+ text = HashHTMLBlocks(text);
+
+ // Strip link definitions, store in hashes.
+ text = StripLinkDefinitions(text);
+
+ text = RunBlockGamut(text);
+
+ text = UnescapeSpecialChars(text);
+
+ return text + "\n";
+ }
+
+ #region Process link definitions
+
+ ///
+ /// Strips link definitions from text, stores the URLs and titles in hash references.
+ ///
+ /// Link defs are in the form: ^[id]: url "optional title"
+ private string StripLinkDefinitions(string text)
+ {
+ string pattern = string.Format(@"
+ ^[ ]{{0,{0}}}\[(.+)\]: # id = $1
+ [ \t]*
+ \n? # maybe *one* newline
+ [ \t]*
+ (\S+?)>? # url = $2
+ [ \t]*
+ \n? # maybe one newline
+ [ \t]*
+ (?:
+ (?<=\s) # lookbehind for whitespace
+ [\x22(]
+ (.+?) # title = $3
+ [\x22)]
+ [ \t]*
+ )? # title is optional
+ (?:\n+|\Z)", tabWidth - 1);
+
+ text = Regex.Replace(text, pattern, new MatchEvaluator(LinkEvaluator), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
+ return text;
+ }
+
+ private string LinkEvaluator(Match match)
+ {
+ string linkID = match.Groups[1].Value.ToLower();
+ urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value);
+
+ if (match.Groups[3] != null && match.Groups[3].Length > 0)
+ titles[linkID] = match.Groups[3].Value.Replace("\"", """);
+
+ return string.Empty;
+ }
+
+ #endregion
+
+ #region Hashify HTML blocks
+
+ ///
+ /// Hashify HTML blocks
+ ///
+ private string HashHTMLBlocks(string text)
+ {
+ /*
+ We only want to do this for block-level HTML tags, such as headers,
+ lists, and tables. That's because we still want to wrap s around
+ "paragraphs" that are wrapped in non-block-level tags, such as anchors,
+ phrase emphasis, and spans. The list of tags we're looking for is
+ hard-coded:
+ */
+ string blockTags1 = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del";
+ string blockTags2 = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math";
+
+ /*
+ First, look for nested blocks, e.g.:
+
+
+ tags for inner block must be indented.
+
+
+
+ The outermost tags must start at the left margin for this to match, and
+ the inner nested divs must be indented.
+ We need to do this before the next, more liberal match, because the next
+ match will start at the first `` and stop at the first `
`.
+ */
+ string pattern = string.Format(@"
+ ( # save in $1
+ ^ # start of line (with /m)
+ <({0}) # start tag = $2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ \2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )", blockTags1);
+
+ text = Regex.Replace(text, pattern, new MatchEvaluator(HtmlEvaluator), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
+
+ // Now match more liberally, simply from `\n` to `\n`
+ pattern = string.Format(@"
+ ( # save in $1
+ ^ # start of line (with /m)
+ <({0}) # start tag = $2
+ \b # word break
+ (.*\n)*? # any number of lines, minimally matching
+ .*\2> # the matching end tag
+ [ \t]* # trailing spaces/tabs
+ (?=\n+|\Z) # followed by a newline or end of document
+ )", blockTags2);
+
+ text = Regex.Replace(text, pattern, new MatchEvaluator(HtmlEvaluator), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
+
+ // Special case just for
. It was easier to make a special case than
+ // to make the other regex more complicated.
+ pattern = string.Format(@"
+ (?:
+ (?<=\n\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in $1
+ [ ]{{0, {0}}}
+ <(hr) # start tag = $2
+ \b # word break
+ ([^<>])*? #
+ /?> # the matching end tag
+ [ \t]*
+ (?=\n{{2,}}|\Z) # followed by a blank line or end of document
+ )", tabWidth - 1);
+ text = Regex.Replace(text, pattern, new MatchEvaluator(HtmlEvaluator), RegexOptions.IgnorePatternWhitespace);
+
+ // Special case for standalone HTML comments:
+ pattern = string.Format(@"
+ (?:
+ (?<=\n\n) # Starting after a blank line
+ | # or
+ \A\n? # the beginning of the doc
+ )
+ ( # save in $1
+ [ ]{{0,{0}}}
+ (?s:
+
+ )
+ [ \t]*
+ (?=\n{{2,}}|\Z) # followed by a blank line or end of document
+ )", tabWidth - 1);
+ text = Regex.Replace(text, pattern, new MatchEvaluator(HtmlEvaluator), RegexOptions.IgnorePatternWhitespace);
+
+ return text;
+ }
+
+ private string HtmlEvaluator(Match match)
+ {
+ string text = match.Groups[1].Value;
+ string key = ComputeMD5(text);
+ htmlBlocks[key] = text;
+
+ // # String that will replace the block
+ return string.Concat("\n\n", key, "\n\n");
+ }
+
+ #endregion
+
+ #region Run transformations that form block-level elements (RunBlockGamut)
+
+ ///
+ /// These are all the transformations that form block-level
+ /// tags like paragraphs, headers, and list items.
+ ///
+ private string RunBlockGamut(string text)
+ {
+ text = DoHeaders(text);
+
+ // Do Horizontal Rules:
+ text = Regex.Replace(text, @"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", "
tags around block-level tags.
+ */
+ text = HashHTMLBlocks(text);
+
+ text = FormParagraphs(text);
+
+ return text;
+ }
+
+ #endregion
+
+ #region Run transformations within block-level elements (RunSpanGamut)
+
+ ///
+ /// These are all the transformations that occur *within* block-level
+ /// tags like paragraphs, headers, and list items.
+ ///
+ private string RunSpanGamut(string text)
+ {
+ text = DoCodeSpans(text);
+
+ text = EscapeSpecialChars(text);
+
+ // Process anchor and image tags. Images must come first,
+ // because ![foo][f] looks like an anchor.
+ text = DoImages(text);
+ text = DoAnchors(text);
+
+ // Make links out of things like ``
+ // Must come after DoAnchors(), because you can use < and >
+ // delimiters in inline links like [this]().
+ text = DoAutoLinks(text);
+
+ // Fix unencoded ampersands and <'s:
+ text = EncodeAmpsAndAngles(text);
+
+ text = DoItalicsAndBold(text);
+
+ // Do hard breaks:
+ text = Regex.Replace(text, @" {2,}\n", string.Format("
+ ///
+ ///
+ /// String containing HTML markup.
+ /// An array of the tokens comprising the input string. Each token is
+ /// either a tag (possibly with nested, tags contained therein, such
+ /// as <a href="">, or a run of text between tags. Each element of the
+ /// array is a two-element array; the first is either 'tag' or 'text'; the second is
+ /// the actual value.
+ ///
+ private ArrayList TokenizeHTML(string text)
+ {
+ // Regular expression derived from the _tokenize() subroutine in
+ // Brad Choate's MTRegex plugin.
+ // http://www.bradchoate.com/past/mtregex.php
+ int pos = 0;
+ int depth = 6;
+ ArrayList tokens = new ArrayList();
+
+
+ string nestedTags = string.Concat(RepeatString(@"(?:<[a-z\/!$](?:[^<>]|", depth),
+ RepeatString(@")*>)", depth));
+ string pattern = string.Concat(@"(?s:)|(?s:<\?.*?\?>)|", nestedTags);
+
+ MatchCollection mc = Regex.Matches(text, pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline);
+
+ foreach (Match m in mc)
+ {
+ string wholeTag = m.Value;
+ int tagStart = m.Index;
+ Pair token = null;
+
+ if (pos < tagStart)
+ {
+ token = new Pair();
+ token.First = "text";
+ token.Second = text.Substring(pos, tagStart - pos);
+ tokens.Add(token);
+ }
+
+ token = new Pair();
+ token.First = "tag";
+ token.Second = wholeTag;
+ tokens.Add(token);
+
+ pos = m.Index + m.Length;
+ }
+
+ if (pos < text.Length)
+ {
+ Pair token = new Pair();
+ token.First = "text";
+ token.Second = text.Substring(pos, text.Length - pos);
+ tokens.Add(token);
+ }
+
+ return tokens;
+ }
+
+ #endregion
+
+ #region Escape special characters
+
+ private string EscapeSpecialChars(string text)
+ {
+ ArrayList tokens = TokenizeHTML(text);
+
+ // Rebuild text from the tokens
+ text = string.Empty;
+
+ foreach (Pair token in tokens)
+ {
+ string value = token.Second.ToString();
+
+ if (token.First.Equals("tag"))
+ /*
+ Within tags, encode * and _ so they don't conflict with their use
+ in Markdown for italics and strong. We're replacing each
+ such character with its corresponding MD5 checksum value;
+ this is likely overkill, but it should prevent us from colliding
+ with the escape values by accident.
+ */
+ value = value.Replace("*", escapeTable["*"].ToString()).Replace("_", escapeTable["_"].ToString());
+ else
+ value = EncodeBackslashEscapes(value);
+
+ text += value;
+ }
+
+ return text;
+ }
+
+ #endregion
+
+ #region Process referenced and inline anchors
+
+ ///
+ /// Turn Markdown link shortcuts into XHTML tags.
+ ///
+ private string DoAnchors(string text)
+ {
+ //
+ // First, handle reference-style links: [link text] [id]
+ //
+ string pattern = string.Format(@"
+ ( # wrap whole match in $1
+ \[
+ ({0}) # link text = $2
+ \]
+
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+
+ \[
+ (.*?) # id = $3
+ \]
+ )", nestedBrackets);
+
+ text = Regex.Replace(text, pattern, new MatchEvaluator(AnchorReferenceEvaluator), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace);
+
+ //
+ // Next, inline-style links: [link text](url "optional title")
+ //
+ pattern = string.Format(@"
+ ( # wrap whole match in $1
+ \[
+ ({0}) # link text = $2
+ \]
+ \( # literal paren
+ [ \t]*
+ (.*?)>? # href = $3
+ [ \t]*
+ ( # $4
+ (['\x22]) # quote char = $5
+ (.*?) # Title = $6
+ \5 # matching quote
+ )? # title is optional
+ \)
+ )", nestedBrackets);
+
+ text = Regex.Replace(text, pattern, new MatchEvaluator(AnchorInlineEvaluator), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace);
+
+ return text;
+ }
+
+ private string AnchorReferenceEvaluator(Match match)
+ {
+ string wholeMatch = match.Groups[1].Value;
+ string linkText = match.Groups[2].Value;
+ string linkID = match.Groups[3].Value.ToLower();
+ string url = null;
+ string res = null;
+ string title = null;
+
+ // for shortcut links like [this][].
+ if (linkID.Equals(string.Empty))
+ linkID = linkText.ToLower();
+
+ if (urls[linkID] != null)
+ {
+ url = urls[linkID].ToString();
+
+ //We've got to encode these to avoid conflicting with italics/bold.
+ url = url.Replace("*", escapeTable["*"].ToString()).Replace("_", escapeTable["_"].ToString());
+ res = string.Format("{0}", linkText);
+ }
+ else
+ res = wholeMatch;
+
+ return res;
+ }
+
+ private string AnchorInlineEvaluator(Match match)
+ {
+ string linkText = match.Groups[2].Value;
+ string url = match.Groups[3].Value;
+ string title = match.Groups[6].Value;
+ string res = null;
+
+ // We've got to encode these to avoid conflicting with italics/bold.
+ url = url.Replace("*", escapeTable["*"].ToString()).Replace("_", escapeTable["_"].ToString());
+ res = string.Format(" 0)
+ {
+ title = title.Replace("\"", """).Replace("*", escapeTable["*"].ToString()).Replace("_", escapeTable["_"].ToString());
+ res += string.Format(" title=\"{0}\"", title);
+ }
+
+ res += string.Format(">{0}", linkText);
+ return res;
+ }
+
+ #endregion
+
+ #region Process inline and referenced images
+
+ ///
+ /// Turn Markdown image shortcuts into
tags.
+ ///
+ private string DoImages(string text)
+ {
+ // First, handle reference-style labeled images: ![alt text][id]
+ string pattern = @"
+ ( # wrap whole match in $1
+ !\[
+ (.*?) # alt text = $2
+ \]
+
+ [ ]? # one optional space
+ (?:\n[ ]*)? # one optional newline followed by spaces
+
+ \[
+ (.*?) # id = $3
+ \]
+
+ )";
+
+ text = Regex.Replace(text, pattern, new MatchEvaluator(ImageReferenceEvaluator), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
+
+ // Next, handle inline images: 
+ // Don't forget: encode * and _
+ pattern = @"
+ ( # wrap whole match in $1
+ !\[
+ (.*?) # alt text = $2
+ \]
+ \( # literal paren
+ [ \t]*
+ (\S+?)>? # src url = $3
+ [ \t]*
+ ( # $4
+ (['\x22]) # quote char = $5
+ (.*?) # title = $6
+ \5 # matching quote
+ [ \t]*
+ )? # title is optional
+ \)
+ )";
+
+ text = Regex.Replace(text, pattern, new MatchEvaluator(ImageInlineEvaluator), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
+
+ return text;
+ }
+
+ private string ImageReferenceEvaluator(Match match)
+ {
+ string wholeMatch = match.Groups[1].Value;
+ string altText = match.Groups[2].Value;
+ string linkID = match.Groups[3].Value.ToLower();
+ string url = null;
+ string res = null;
+ string title = null;
+
+ // for shortcut links like ![this][].
+ if (linkID.Equals(string.Empty))
+ linkID = altText.ToLower();
+
+ altText = altText.Replace("\"", """);
+
+ if (urls[linkID] != null)
+ {
+ url = urls[linkID].ToString();
+
+ // We've got to encode these to avoid conflicting with italics/bold.
+ url = url.Replace("*", escapeTable["*"].ToString()).Replace("_", escapeTable["_"].ToString());
+ res = string.Format("
", RunSpanGamut(header), "\n\n");
+ }
+
+ private string SetextHeader2Evaluator(Match match)
+ {
+ string header = match.Groups[1].Value;
+ return string.Concat("", RunSpanGamut(header), "
\n\n");
+ }
+
+ private string AtxHeaderEvaluator(Match match)
+ {
+ string headerSig = match.Groups[1].Value;
+ string headerText = match.Groups[2].Value;
+
+ return string.Concat("", RunSpanGamut(headerText), "\n\n");
+ }
+
+ #endregion
+
+ #region Process ordered and unordered lists
+
+ private string DoLists(string text)
+ {
+ // Re-usable pattern to match any entirel ul or ol list:
+ string pattern = null;
+
+ string wholeList = string.Format(@"
+ ( # $1 = whole list
+ ( # $2
+ [ ]{{0,{1}}}
+ ({0}) # $3 = first list item marker
+ [ \t]+
+ )
+ (?s:.+?)
+ ( # $4
+ \z
+ |
+ \n{{2,}}
+ (?=\S)
+ (?! # Negative lookahead for another list item marker
+ [ \t]*
+ {0}[ \t]+
+ )
+ )
+ )", markerAny, tabWidth - 1);
+
+ // We use a different prefix before nested lists than top-level lists.
+ // See extended comment in _ProcessListItems().
+ if (listLevel > 0)
+ {
+ pattern = "^" + wholeList;
+ text = Regex.Replace(text, pattern, new MatchEvaluator(ListEvaluator), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
+ }
+ else
+ {
+ pattern = @"(?:(?<=\n\n)|\A\n?)" + wholeList;
+ text = Regex.Replace(text, pattern, new MatchEvaluator(ListEvaluator), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
+ }
+
+ return text;
+ }
+
+ private string ListEvaluator(Match match)
+ {
+ string list = match.Groups[1].Value;
+ string listType = Regex.IsMatch(match.Groups[3].Value, markerUL) ? "ul" : "ol";
+ string result = null;
+
+ // Turn double returns into triple returns, so that we can make a
+ // paragraph for the last item in a list, if necessary:
+ list = Regex.Replace(list, @"\n{2,}", "\n\n\n");
+ result = ProcessListItems(list, markerAny);
+ result = string.Format("<{0}>\n{1}{0}>\n", listType, result);
+
+ return result;
+ }
+
+ ///
+ /// Process the contents of a single ordered or unordered list, splitting it
+ /// into individual list items.
+ ///
+ private string ProcessListItems(string list, string marker)
+ {
+ /*
+ The listLevel global keeps track of when we're inside a list.
+ Each time we enter a list, we increment it; when we leave a list,
+ we decrement. If it's zero, we're not in a list anymore.
+
+ We do this because when we're not inside a list, we want to treat
+ something like this:
+
+ I recommend upgrading to version
+ 8. Oops, now this line is treated
+ as a sub-list.
+
+ As a single paragraph, despite the fact that the second line starts
+ with a digit-period-space sequence.
+
+ Whereas when we're inside a list (or sub-list), that line will be
+ treated as the start of a sub-list. What a kludge, huh? This is
+ an aspect of Markdown's syntax that's hard to parse perfectly
+ without resorting to mind-reading. Perhaps the solution is to
+ change the syntax rules such that sub-lists must start with a
+ starting cardinal number; e.g. "1." or "a.".
+ */
+
+ listLevel++;
+
+ // Trim trailing blank lines:
+ list = Regex.Replace(list, @"\n{2,}\z", "\n");
+
+ string pattern = string.Format(
+ @"(\n)? # leading line = $1
+ (^[ \t]*) # leading whitespace = $2
+ ({0}) [ \t]+ # list marker = $3
+ ((?s:.+?) # list item text = $4
+ (\n{{1,2}}))
+ (?= \n* (\z | \2 ({0}) [ \t]+))", marker);
+
+ list = Regex.Replace(list, pattern, new MatchEvaluator(ListEvaluator2),
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
+ listLevel--;
+ return list;
+ }
+
+ private string ListEvaluator2(Match match)
+ {
+ string item = match.Groups[4].Value;
+ string leadingLine = match.Groups[1].Value;
+
+
+ if ((leadingLine != null && leadingLine != string.Empty) || Regex.IsMatch(item, @"\n{2,}"))
+ item = RunBlockGamut(Outdent(item));
+ else
+ {
+ // Recursion for sub-lists:
+ item = DoLists(Outdent(item));
+ item = item.TrimEnd('\n');
+ item = RunSpanGamut(item);
+ }
+
+ return string.Format("{0}\n", item);
+ }
+
+ #endregion
+
+ #region Process code blocks
+
+ private string DoCodeBlocks(string text)
+ {
+ // TODO: Should we allow 2 empty lines here or only one?
+ string pattern = string.Format(@"
+ (?:\n\n|\A)
+ ( # $1 = the code block -- one or more lines, starting with a space/tab
+ (?:
+ (?:[ ]{{{0}}} | \t) # Lines must start with a tab or a tab-width of spaces
+ .*\n+
+ )+
+ )
+ ((?=^[ ]{{0,{0}}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc",
+ tabWidth);
+
+ text = Regex.Replace(text, pattern,
+ new MatchEvaluator(CodeBlockEvaluator),
+ RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
+
+ return text;
+ }
+
+ private string CodeBlockEvaluator(Match match)
+ {
+ string codeBlock = match.Groups[1].Value;
+ codeBlock = EncodeCode(Outdent(codeBlock));
+
+ // Trim leading newlines and trailing whitespace
+ codeBlock = Regex.Replace(codeBlock, @"^\n+", string.Empty);
+ codeBlock = Regex.Replace(codeBlock, @"\s+\z", string.Empty);
+
+ return string.Concat("\n\n", codeBlock, "\n
\n\n");
+ }
+
+ #endregion
+
+ #region Process code spans
+
+ private string DoCodeSpans(string text)
+ {
+ /*
+ * Backtick quotes are used for spans.
+ * You can use multiple backticks as the delimiters if you want to
+ include literal backticks in the code span. So, this input:
+
+ Just type ``foo `bar` baz`` at the prompt.
+
+ Will translate to:
+
+ Just type foo `bar` baz at the prompt.
+
+ There's no arbitrary limit to the number of backticks you
+ can use as delimters. If you need three consecutive backticks
+ in your code, use four for delimiters, etc.
+
+ * You can use spaces to get literal backticks at the edges:
+
+ ... type `` `bar` `` ...
+
+ Turns to:
+
+ ... type `bar` ...
+ */
+
+ string pattern = @"
+ (`+) # $1 = Opening run of `
+ (.+?) # $2 = The code block
+ (?", s, "");
+ }
+
+ #endregion
+
+ #region Encode/escape certain characters inside Markdown code runs
+
+ ///
+ /// Encode/escape certain characters inside Markdown code runs.
+ ///
+ ///
+ /// The point is that in code, these characters are literals, and lose their
+ /// special Markdown meanings.
+ ///
+ private string EncodeCode(string code)
+ {
+ code = code.Replace("&", "&").Replace("<", "<").Replace(">", ">");
+
+ foreach (string key in escapeTable.Keys)
+ code = code.Replace(key, escapeTable[key].ToString());
+
+ return code;
+ }
+
+ #endregion
+
+ #region Process bold and italics
+
+ private string DoItalicsAndBold(string text)
+ {
+ // must go first:
+ text = Regex.Replace(text, @"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1",
+ new MatchEvaluator(BoldEvaluator),
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
+
+ // Then :
+ text = Regex.Replace(text, @"(\*|_) (?=\S) (.+?) (?<=\S) \1",
+ new MatchEvaluator(ItalicsEvaluator),
+ RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
+ return text;
+ }
+
+ private string ItalicsEvaluator(Match match)
+ {
+ return string.Format("{0}", match.Groups[2].Value);
+ }
+
+ private string BoldEvaluator(Match match)
+ {
+ return string.Format("{0}", match.Groups[2].Value);
+ }
+
+ #endregion
+
+ #region Process blockquotes
+
+ private string DoBlockQuotes(string text)
+ {
+ string pattern =
+ @"( # Wrap whole match in $1
+ (
+ ^[ \t]*>[ \t]? # '>' at the start of a line
+ .+\n # rest of the first line
+ (.+\n)* # subsequent consecutive lines
+ \n* # blanks
+ )+
+ )";
+
+ text = Regex.Replace(text, pattern, new MatchEvaluator(BlockQuoteEvaluator), RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
+ return text;
+ }
+
+ private string BlockQuoteEvaluator(Match match)
+ {
+ string bq = match.Groups[1].Value;
+
+ // Trim one level of quoting - trim whitespace-only lines
+ bq = Regex.Replace(bq, @"^[ \t]*>[ \t]?", string.Empty, RegexOptions.Multiline);
+ bq = Regex.Replace(bq, @"^[ \t]+$", string.Empty, RegexOptions.Multiline);
+
+ bq = RunBlockGamut(bq);
+ bq = Regex.Replace(bq, @"^", " ", RegexOptions.Multiline);
+
+ // These leading spaces screw with content, so we need to fix that:
+ bq = Regex.Replace(bq, @"(\s*.+?
)", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
+
+ return string.Format("\n{0}\n
\n\n", bq);
+ }
+
+ private string BlockQuoteEvaluator2(Match match)
+ {
+ string pre = match.Groups[1].Value;
+ pre = Regex.Replace(pre, @"^ ", string.Empty, RegexOptions.Multiline);
+
+ return pre;
+ }
+
+ #endregion
+
+ #region Create paragraph tags
+
+ private string FormParagraphs(string text)
+ {
+ // Strip leading and trailing lines:
+ text = Regex.Replace(text, @"^\n+", string.Empty);
+ text = Regex.Replace(text, @"\n+\z", string.Empty);
+
+ string[] grafs = Regex.Split(text, @"\n{2,}");
+
+ // Wrap tags.
+ for (int i = 0; i < grafs.Length; i++)
+ {
+ // Milan Negovan: I'm adding an additional check for an empty block of code.
+ // Otherwise an empty
is created.
+ if (htmlBlocks[grafs[i]] == null && grafs[i].Length > 0)
+ {
+ string block = grafs[i];
+
+ block = RunSpanGamut(block);
+ block = Regex.Replace(block, @"^([ \t]*)", "");
+ block += "
";
+
+ grafs[i] = block;
+ }
+ }
+
+ // Unhashify HTML blocks
+ for (int i = 0; i < grafs.Length; i++)
+ {
+ string block = (string)htmlBlocks[grafs[i]];
+
+ if (block != null)
+ grafs[i] = block;
+ }
+
+ return string.Join("\n\n", grafs);
+
+ }
+
+ #endregion
+
+ #region Process emails and links
+
+ private string DoAutoLinks(string text)
+ {
+ text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator));
+
+ // Email addresses:
+ string pattern =
+ @"<
+ (?:mailto:)?
+ (
+ [-.\w]+
+ \@
+ [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
+ )
+ >";
+
+ text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
+
+ return text;
+ }
+
+ private string HyperlinkEvaluator(Match match)
+ {
+ string link = match.Groups[1].Value;
+ return string.Format("{0}", link);
+ }
+
+ private string EmailEvaluator(Match match)
+ {
+ string email = UnescapeSpecialChars(match.Groups[1].Value);
+
+ /*
+ Input: an email address, e.g. "foo@example.com"
+
+ Output: the email address as a mailto link, with each character
+ of the address encoded as either a decimal or hex entity, in
+ the hopes of foiling most address harvesting spam bots. E.g.:
+
+ foo
+ @example.com
+
+ Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
+ mailing list:
+
+ */
+ email = "mailto:" + email;
+
+ // leave ':' alone (to spot mailto: later)
+ email = Regex.Replace(email, @"([^\:])", new MatchEvaluator(EncodeEmailEvaluator));
+
+ email = string.Format("{0}", email);
+
+ // strip the mailto: from the visible part
+ email = Regex.Replace(email, "\">.+?:", "\">");
+ return email;
+ }
+
+ private string EncodeEmailEvaluator(Match match)
+ {
+ char c = Convert.ToChar(match.Groups[1].Value);
+
+ Random rnd = new Random();
+ int r = rnd.Next(0, 100);
+
+ // Original author note:
+ // Roughly 10% raw, 45% hex, 45% dec
+ // '@' *must* be encoded. I insist.
+ if (r > 90 && c != '@') return c.ToString();
+ if (r < 45) return string.Format("{0:x};", (int)c);
+
+ return string.Format("{0:x};", (int)c);
+ }
+
+ #endregion
+
+ #region EncodeAmpsAndAngles, EncodeBackslashEscapes, UnescapeSpecialChars, Outdent, UnslashQuotes
+
+ ///
+ /// Smart processing for ampersands and angle brackets that need to be encoded.
+ ///
+ private string EncodeAmpsAndAngles(string text)
+ {
+ // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
+ // http://bumppo.net/projects/amputator/
+
+ text = Regex.Replace(text, @"&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)", "&");
+
+ // Encode naked <'s
+ text = Regex.Replace(text, @"<(?![a-z/?\$!])", "<", RegexOptions.IgnoreCase);
+
+ return text;
+ }
+
+ private string EncodeBackslashEscapes(string value)
+ {
+ // Must process escaped backslashes first.
+ foreach (string key in backslashEscapeTable.Keys)
+ value = value.Replace(key, backslashEscapeTable[key].ToString());
+
+ return value;
+ }
+
+ ///
+ /// Swap back in all the special characters we've hidden.
+ ///
+ private string UnescapeSpecialChars(string text)
+ {
+ foreach (string key in escapeTable.Keys)
+ text = text.Replace(escapeTable[key].ToString(), key);
+
+ return text;
+ }
+
+ ///
+ /// Remove one level of line-leading tabs or spaces
+ ///
+ private string Outdent(string block)
+ {
+ return Regex.Replace(block, @"^(\t|[ ]{1," + tabWidth.ToString() + @"})", string.Empty, RegexOptions.Multiline);
+ }
+ #endregion
+
+ #region Replace tabs with spaces and pad them to tab width
+
+ private string Detab(string text)
+ {
+ // Inspired from a post by Bart Lateur:
+ // http://www.nntp.perl.org/group/perl.macperl.anyperl/154
+ return Regex.Replace(text, @"^(.*?)\t", new MatchEvaluator(TabEvaluator), RegexOptions.Multiline);
+ }
+
+ private string TabEvaluator(Match match)
+ {
+ string leading = match.Groups[1].Value;
+ return string.Concat(leading, RepeatString(" ", tabWidth - leading.Length % tabWidth));
+ }
+
+ #endregion
+
+ #region Helper methods (RepeatString & ComputeMD5)
+
+ ///
+ /// This is to emulate what's evailable in PHP
+ ///
+ ///
+ ///
+ ///
+ private static string RepeatString(string text, int count)
+ {
+ string res = null;
+
+ for (int i = 0; i < count; i++)
+ res += text;
+
+ return res;
+ }
+
+ ///
+ /// Calculate an MD5 hash of an arbitrary string
+ ///
+ ///
+ ///
+ private static string ComputeMD5(string text)
+ {
+ MD5 algo = MD5.Create();
+ byte[] plainText = Encoding.UTF8.GetBytes(text);
+ byte[] hashedText = algo.ComputeHash(plainText);
+ string res = null;
+
+ foreach (byte b in hashedText)
+ res += b.ToString("x2");
+
+ return res;
+ }
+ #endregion
+ }
+}
\ No newline at end of file
diff --git a/src/MarkdownSharp/MarkdownOptions.cs b/src/MarkdownSharp/MarkdownOptions.cs
new file mode 100644
index 0000000..ce9620d
--- /dev/null
+++ b/src/MarkdownSharp/MarkdownOptions.cs
@@ -0,0 +1,172 @@
+using System;
+using System.Collections.Specialized;
+using System.Configuration;
+using System.Runtime.CompilerServices;
+using System.Web.Configuration;
+
+namespace MarkdownSharp
+{
+ ///
+ /// Container for Markdown options. This class is immutable, create a new instance
+ /// if you want to change the options after construction.
+ ///
+ public class MarkdownOptions
+ {
+ ///
+ /// Default constructor. First loads default values, then
+ /// overrides them with any values specified in the configuration file.
+ /// Configuration values are specified in the <appSettings>
+ /// section of the config file and take the form Markdown.PropertyName
+ /// where PropertyName is any of the properties in this class.
+ ///
+ public MarkdownOptions() : this(true) { }
+
+ ///
+ /// Sets all values to their defaults, and if loadFromConfigFile
+ /// is true it overrides them with configuration file values.
+ /// Configuration values are specified in the <appSettings>
+ /// section of the config file and take the form Markdown.PropertyName
+ /// where PropertyName is any of the properties in this class.
+ ///
+ /// True to override defaults with values from config file.
+ public MarkdownOptions(bool loadFromConfigFile)
+ : this(loadFromConfigFile, new[] { ConfigurationManager.AppSettings, WebConfigurationManager.AppSettings })
+ {
+ }
+
+
+ ///
+ /// Constructor for internal use and unit testing.
+ ///
+ ///
+ ///
+ internal MarkdownOptions(bool loadFromConfigFile, NameValueCollection[] configProviders)
+ {
+ Defaults();
+ if (!loadFromConfigFile) return;
+
+
+ foreach (var appSettings in configProviders)
+ {
+ foreach (string key in appSettings.Keys)
+ {
+ switch (key)
+ {
+ case "Markdown.AutoHyperlink":
+ AutoHyperlink = Convert.ToBoolean(appSettings[key]);
+ break;
+ case "Markdown.AutoNewlines":
+ AutoNewlines = Convert.ToBoolean(appSettings[key]);
+ break;
+ case "Markdown.EmptyElementSuffix":
+ EmptyElementSuffix = appSettings[key];
+ break;
+ case "Markdown.EncodeProblemUrlCharacters":
+ EncodeProblemUrlCharacters = Convert.ToBoolean(appSettings[key]);
+ break;
+ case "Markdown.LinkEmails":
+ LinkEmails = Convert.ToBoolean(appSettings[key]);
+ break;
+ case "Markdown.NestDepth":
+ NestDepth = Convert.ToInt32(appSettings[key]);
+ break;
+ case "Markdown.StrictBoldItalic":
+ StrictBoldItalic = Convert.ToBoolean(appSettings[key]);
+ break;
+ case "Markdown.TabWidth":
+ TabWidth = Convert.ToInt32(appSettings[key]);
+ break;
+ }
+ }
+ }
+ }
+
+
+ ///
+ /// Sets all options explicitly and does not attempt to override them with values
+ /// from configuration file.
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ ///
+ public MarkdownOptions(bool autoHyperlink, bool autoNewlines, string emptyElementsSuffix,
+ bool encodeProblemUrlCharacters, bool linkEmails, int nestDepth, bool strictBoldItalic,
+ int tabWidth)
+ {
+ AutoHyperlink = autoHyperlink;
+ AutoNewlines = autoNewlines;
+ EmptyElementSuffix = emptyElementsSuffix;
+ EncodeProblemUrlCharacters = encodeProblemUrlCharacters;
+ LinkEmails = linkEmails;
+ NestDepth = nestDepth;
+ StrictBoldItalic = strictBoldItalic;
+ TabWidth = tabWidth;
+ }
+
+ ///
+ /// Sets all fields to their default values
+ ///
+ private void Defaults()
+ {
+ AutoHyperlink = false;
+ AutoNewlines = false;
+ EmptyElementSuffix = " />";
+ EncodeProblemUrlCharacters = false;
+ LinkEmails = true;
+ NestDepth = 6;
+ StrictBoldItalic = false;
+ TabWidth = 4;
+ }
+
+ ///
+ /// when true, (most) bare plain URLs are auto-hyperlinked
+ /// WARNING: this is a significant deviation from the markdown spec
+ ///
+ public bool AutoHyperlink { get; private set; }
+
+ ///
+ /// when true, RETURN becomes a literal newline
+ /// WARNING: this is a significant deviation from the markdown spec
+ ///
+ public bool AutoNewlines { get; private set; }
+
+ ///
+ /// use ">" for HTML output, or " />" for XHTML output
+ ///
+ public string EmptyElementSuffix { get; private set; }
+
+ ///
+ /// when true, problematic URL characters like [, ], (, and so forth will be encoded
+ /// WARNING: this is a significant deviation from the markdown spec
+ ///
+ public bool EncodeProblemUrlCharacters { get; private set; }
+
+ ///
+ /// when false, email addresses will never be auto-linked
+ /// WARNING: this is a significant deviation from the markdown spec
+ ///
+ public bool LinkEmails { get; private set; }
+
+ ///
+ /// maximum nested depth of [] and () supported by the transform
+ ///
+ public int NestDepth { get; private set; }
+
+ ///
+ /// when true, bold and italic require non-word characters on either side
+ /// WARNING: this is a significant deviation from the markdown spec
+ ///
+ public bool StrictBoldItalic { get; private set; }
+
+ ///
+ /// Tabs are automatically converted to spaces as part of the transform
+ /// this variable determines how "wide" those tabs become in spaces
+ ///
+ public int TabWidth { get; private set; }
+ }
+}
diff --git a/src/MarkdownSharp/MarkdownSharp.csproj b/src/MarkdownSharp/MarkdownSharp.csproj
new file mode 100644
index 0000000..3f2ae8a
--- /dev/null
+++ b/src/MarkdownSharp/MarkdownSharp.csproj
@@ -0,0 +1,95 @@
+
+
+
+ Debug
+ AnyCPU
+ 9.0.30729
+ 2.0
+ {37619116-CCE8-465A-8B1F-081CA53364BB}
+ Library
+ Properties
+ MarkdownSharp
+ MarkdownSharp
+ v3.5
+ 512
+
+
+
+
+ 3.5
+
+ publish\
+ true
+ Disk
+ false
+ Foreground
+ 7
+ Days
+ false
+ false
+ true
+ 0
+ 1.0.0.%2a
+ false
+ false
+ true
+
+
+ true
+ full
+ false
+ bin\Debug\
+ DEBUG;TRACE
+ prompt
+ 4
+ AllRules.ruleset
+
+
+ pdbonly
+ true
+ bin\Release\
+ TRACE
+ prompt
+ 4
+ AllRules.ruleset
+
+
+
+
+
+ 3.5
+
+
+
+
+
+
+
+
+
+
+
+ False
+ .NET Framework 3.5 SP1 Client Profile
+ false
+
+
+ False
+ .NET Framework 3.5 SP1
+ true
+
+
+ False
+ Windows Installer 3.1
+ true
+
+
+
+
+
\ No newline at end of file
diff --git a/src/MarkdownWin/MainForm.cs b/src/MarkdownWin/MainForm.cs
index 54c7155..c1f4227 100644
--- a/src/MarkdownWin/MainForm.cs
+++ b/src/MarkdownWin/MainForm.cs
@@ -90,20 +90,9 @@ private void RefreshPreview(string fileName)
private void BrowserDocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
- const string htmlTemplate = "{1}";
-
if (browser.Document != null)
{
- string stylesheet;
- using (var stream = Assembly.GetExecutingAssembly().GetManifestResourceStream(this.GetType(), "markdown.css"))
- using (var reader = new StreamReader(stream))
- {
- stylesheet = reader.ReadToEnd();
- }
-
- string html = string.Format(htmlTemplate, stylesheet, _pendingPreviewHtml);
- browser.Document.Write(html);
-
+ browser.Document.Write(Stylizer.Run(_pendingPreviewHtml));
Debug.WriteLine("Document Completed and written to.");
}
}
diff --git a/src/MarkdownWin/MarkdownWin.csproj b/src/MarkdownWin/MarkdownWin.csproj
index 4035940..4a41fb8 100644
--- a/src/MarkdownWin/MarkdownWin.csproj
+++ b/src/MarkdownWin/MarkdownWin.csproj
@@ -1,104 +1,109 @@
-
-
-
- Debug
- x86
- 8.0.30703
- 2.0
- {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}
- WinExe
- Properties
- MarkdownWin
- MarkdownWin
- v4.0
-
-
- 512
-
-
- x86
- true
- full
- false
- bin\Debug\
- DEBUG;TRACE
- prompt
- 4
-
-
- x86
- pdbonly
- true
- bin\Release\
- TRACE
- prompt
- 4
-
-
-
-
-
-
-
- ..\..\packages\MarkdownSharp.1.13.0.0\lib\35\MarkdownSharp.dll
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Form
-
-
- MainForm.cs
-
-
- ResXFileCodeGenerator
- Resources.Designer.cs
- Designer
-
-
- True
- Resources.resx
- True
-
-
- MainForm.cs
-
-
-
-
- SettingsSingleFileGenerator
- Settings.Designer.cs
-
-
- True
- Settings.settings
- True
-
-
-
-
-
-
-
-
-
+
+
+
+ Debug
+ x86
+ 8.0.30703
+ 2.0
+ {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}
+ WinExe
+ Properties
+ MarkdownWin
+ MarkdownWin
+ v4.0
+
+
+ 512
+
+
+ x86
+ true
+ full
+ false
+ bin\Debug\
+ DEBUG;TRACE
+ prompt
+ 4
+ false
+
+
+ x86
+ pdbonly
+ true
+ bin\Release\
+ TRACE
+ prompt
+ 4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Form
+
+
+ MainForm.cs
+
+
+
+ ResXFileCodeGenerator
+ Resources.Designer.cs
+ Designer
+
+
+ True
+ Resources.resx
+ True
+
+
+ MainForm.cs
+
+
+
+
+ SettingsSingleFileGenerator
+ Settings.Designer.cs
+
+
+ True
+ Settings.settings
+ True
+
+
+
+
+
+
+
+
+
+
+ {37619116-CCE8-465A-8B1F-081CA53364BB}
+ MarkdownSharp
+
+
+
+ -->
\ No newline at end of file
diff --git a/src/MarkdownWin/MarkdownWin.csproj.user b/src/MarkdownWin/MarkdownWin.csproj.user
new file mode 100644
index 0000000..27dd4ad
--- /dev/null
+++ b/src/MarkdownWin/MarkdownWin.csproj.user
@@ -0,0 +1,6 @@
+
+
+
+ -in G:\Dev\Projects\Commercial\MarkdownWin\readme.md
+
+
\ No newline at end of file
diff --git a/src/MarkdownWin/Program.cs b/src/MarkdownWin/Program.cs
index e67cbf6..c5c8112 100644
--- a/src/MarkdownWin/Program.cs
+++ b/src/MarkdownWin/Program.cs
@@ -1,20 +1,106 @@
-using System;
-using System.Linq;
-using System.Windows.Forms;
-
-namespace MarkdownWin
-{
- static class Program
- {
- ///
- /// The main entry point for the application.
- ///
- [STAThread]
- static void Main()
- {
- Application.EnableVisualStyles();
- Application.SetCompatibleTextRenderingDefault(false);
- Application.Run(new MainForm());
- }
- }
-}
+using System;
+using System.Linq;
+using System.Windows.Forms;
+using System.Collections.Generic;
+using MarkdownSharp;
+using System.IO;
+
+namespace MarkdownWin {
+ static class Program {
+ ///
+ /// The main entry point for the application.
+ ///
+ [STAThread]
+ static void Main(string[] args) {
+ if (args != null && args.Length > 0) {
+ RunCli(args);
+ } else {
+ RunForm();
+ }
+ }
+
+ static void RunForm() {
+ Application.EnableVisualStyles();
+ Application.SetCompatibleTextRenderingDefault(false);
+ Application.Run(new MainForm());
+ }
+
+ static void RunCli(string[] args) {
+ const string inArg = "in";
+ const string outArg = "out";
+ const string rawArg = "raw";
+ const string helpArg = "help";
+
+ try {
+ var parsedArgs = ParseArgsRaw(args);
+
+ if (parsedArgs.ContainsKey(helpArg))
+ PrintCliHelp();
+ else {
+ var inPath = GetAbsPath(parsedArgs[inArg][0]);
+ var outPath = GetAbsPath(GetOutPath(inPath, outArg, parsedArgs));
+
+ var mkDown = new Markdown();
+ var result = mkDown.Transform(File.ReadAllText(inPath));
+
+ if (!parsedArgs.ContainsKey(rawArg))
+ File.WriteAllText((outPath), Stylizer.Run(result), System.Text.Encoding.UTF8);
+ else
+ File.WriteAllText((outPath), result, System.Text.Encoding.UTF8);
+ }
+ } catch (Exception ex) {
+ PrintCliHelp(ex.Message);
+ }
+ }
+
+ private static string GetOutPath(string inPath, string outArg, Dictionary> parsedArgs) {
+ if (parsedArgs.ContainsKey(outArg))
+ return parsedArgs[outArg][0];
+ else {
+ var fi = new FileInfo(inPath);
+ return fi.FullName.Substring(0, fi.FullName.Length - fi.Extension.Length) + ".html";
+ }
+ }
+
+ static string GetAbsPath(string path) {
+ if (Path.IsPathRooted(path))
+ return path;
+ else
+ return Path.Combine(Environment.CurrentDirectory, path);
+ }
+
+ static void PrintCliHelp(string errorMsg = "") {
+ if (errorMsg != "")
+ Console.WriteLine("Error: " + errorMsg);
+
+ Console.WriteLine("Usage: -in [-out ] [-raw]");
+ Console.WriteLine(" -in: Path to the markdown file");
+ Console.WriteLine(" -out: (Optional) Path to the output html file (uses .html by default)");
+ Console.WriteLine(" -raw: (Optional) Don't stylize the output.");
+ }
+
+ static Dictionary> ParseArgsRaw(string[] args) {
+ var parsedArgs = new Dictionary>();
+
+ var prev = string.Empty;
+ var curr = string.Empty;
+
+ for (int i = 0; i < args.Length; i++) {
+ curr = args[i];
+
+ if (curr.StartsWith("-")) {
+ var arg = curr.TrimStart('-');
+
+ parsedArgs[arg] = new List(2);
+ prev = arg;
+ } else if (!String.IsNullOrEmpty(prev)) {
+ parsedArgs[prev].Add(curr);
+ } else {
+ throw new ArgumentException("invalid syntax");
+ }
+ }
+
+ return parsedArgs;
+ }
+ }
+}
diff --git a/src/MarkdownWin/Stylizer.cs b/src/MarkdownWin/Stylizer.cs
new file mode 100644
index 0000000..9a3192b
--- /dev/null
+++ b/src/MarkdownWin/Stylizer.cs
@@ -0,0 +1,28 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Reflection;
+using System.IO;
+
+namespace MarkdownWin {
+ class Stylizer {
+ public static string Run(string html, string cssOverridePath = "") {
+ const string htmlTemplate =
+ "{1}";
+
+ string stylesheet;
+
+ if (String.IsNullOrEmpty(cssOverridePath)) {
+ using (var stream = Assembly.GetExecutingAssembly().GetManifestResourceStream(typeof(Stylizer), "markdown.css"))
+ using (var reader = new StreamReader(stream)) {
+ stylesheet = reader.ReadToEnd();
+ }
+ } else {
+ stylesheet = File.ReadAllText(cssOverridePath);
+ }
+
+ return string.Format(htmlTemplate, stylesheet, html);
+ }
+ }
+}
diff --git a/src/MarkdownWin/markdown.css b/src/MarkdownWin/markdown.css
index 69aae2a..855c0dd 100644
--- a/src/MarkdownWin/markdown.css
+++ b/src/MarkdownWin/markdown.css
@@ -1,12 +1,18 @@
-body
+@import url(http://fonts.googleapis.com/css?family=Open+Sans:300);
+
+body
{
margin: 0 auto;
- font-family: Georgia, Palatino, serif;
+ font-family: Arial, sans-serif;
color: #444444;
line-height: 1;
max-width: 960px;
padding: 30px;
}
+h1, h2, h3
+{
+ font-family: "Open Sans", serif;
+}
h1, h2, h3, h4
{
color: #111111;
@@ -40,24 +46,24 @@ h5
}
a
{
- color: #0099ff;
+ color: #64B6B1;
+ text-decoration: none;
margin: 0;
padding: 0;
vertical-align: baseline;
}
a:hover
{
- text-decoration: none;
- color: #ff6600;
+ color: #46433A;
}
a:visited
{
- color: purple;
+ color: #CE534D;
}
ul, ol
{
padding: 0;
- margin: 1em;
+ margin: 1.2em;
}
li
{
@@ -120,6 +126,9 @@ hr
{
width: 540px;
text-align: left;
- margin: 0 auto 0 0;
- color: #999;
+ margin: 1.2em auto 1.2em 0;
+ color: #EEE;
+ background-color: #EEE;
+ border: 0;
+ height: 1px;
}