diff --git a/MarkdownWin.sln b/MarkdownWin.sln index 6b8cee8..6cfe7d8 100644 --- a/MarkdownWin.sln +++ b/MarkdownWin.sln @@ -1,20 +1,42 @@ - -Microsoft Visual Studio Solution File, Format Version 11.00 -# Visual Studio 2010 -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkdownWin", "src\MarkdownWin\MarkdownWin.csproj", "{F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug|x86 = Debug|x86 - Release|x86 = Release|x86 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Debug|x86.ActiveCfg = Debug|x86 - {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Debug|x86.Build.0 = Debug|x86 - {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Release|x86.ActiveCfg = Release|x86 - {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Release|x86.Build.0 = Release|x86 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection -EndGlobal + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkdownWin", "src\MarkdownWin\MarkdownWin.csproj", "{F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MarkdownSharp", "src\MarkdownSharp\MarkdownSharp.csproj", "{37619116-CCE8-465A-8B1F-081CA53364BB}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|Mixed Platforms = Debug|Mixed Platforms + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|Mixed Platforms = Release|Mixed Platforms + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Debug|Any CPU.ActiveCfg = Debug|x86 + {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Debug|Mixed Platforms.ActiveCfg = Debug|x86 + {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Debug|Mixed Platforms.Build.0 = Debug|x86 + {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Debug|x86.ActiveCfg = Debug|x86 + {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Debug|x86.Build.0 = Debug|x86 + {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Release|Any CPU.ActiveCfg = Release|x86 + {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Release|Mixed Platforms.ActiveCfg = Release|x86 + {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Release|Mixed Platforms.Build.0 = Release|x86 + {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Release|x86.ActiveCfg = Release|x86 + {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F}.Release|x86.Build.0 = Release|x86 + {37619116-CCE8-465A-8B1F-081CA53364BB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {37619116-CCE8-465A-8B1F-081CA53364BB}.Debug|Any CPU.Build.0 = Debug|Any CPU + {37619116-CCE8-465A-8B1F-081CA53364BB}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU + {37619116-CCE8-465A-8B1F-081CA53364BB}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU + {37619116-CCE8-465A-8B1F-081CA53364BB}.Debug|x86.ActiveCfg = Debug|Any CPU + {37619116-CCE8-465A-8B1F-081CA53364BB}.Release|Any CPU.ActiveCfg = Release|Any CPU + {37619116-CCE8-465A-8B1F-081CA53364BB}.Release|Any CPU.Build.0 = Release|Any CPU + {37619116-CCE8-465A-8B1F-081CA53364BB}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU + {37619116-CCE8-465A-8B1F-081CA53364BB}.Release|Mixed Platforms.Build.0 = Release|Any CPU + {37619116-CCE8-465A-8B1F-081CA53364BB}.Release|x86.ActiveCfg = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/dist/MarkdownWin.exe b/dist/MarkdownWin.exe index 8a06d8c..d45ca07 100644 Binary files a/dist/MarkdownWin.exe and b/dist/MarkdownWin.exe differ diff --git a/packages/MarkdownSharp.1.13.0.0/MarkdownSharp.1.13.0.0.nupkg b/packages/MarkdownSharp.1.13.0.0/MarkdownSharp.1.13.0.0.nupkg deleted file mode 100644 index c41f524..0000000 Binary files a/packages/MarkdownSharp.1.13.0.0/MarkdownSharp.1.13.0.0.nupkg and /dev/null differ diff --git a/packages/MarkdownSharp.1.13.0.0/lib/35/MarkdownSharp.dll b/packages/MarkdownSharp.1.13.0.0/lib/35/MarkdownSharp.dll deleted file mode 100644 index b779a46..0000000 Binary files a/packages/MarkdownSharp.1.13.0.0/lib/35/MarkdownSharp.dll and /dev/null differ diff --git a/packages/MarkdownSharp.1.13.0.0/lib/35/MarkdownSharp.pdb b/packages/MarkdownSharp.1.13.0.0/lib/35/MarkdownSharp.pdb deleted file mode 100644 index a6695c6..0000000 Binary files a/packages/MarkdownSharp.1.13.0.0/lib/35/MarkdownSharp.pdb and /dev/null differ diff --git a/packages/MarkdownSharp.1.13.0.0/lib/35/MarkdownSharp.xml b/packages/MarkdownSharp.1.13.0.0/lib/35/MarkdownSharp.xml deleted file mode 100644 index b0c7706..0000000 --- a/packages/MarkdownSharp.1.13.0.0/lib/35/MarkdownSharp.xml +++ /dev/null @@ -1,433 +0,0 @@ - - - - MarkdownSharp - - - - - when true, (most) bare plain URLs are auto-hyperlinked - WARNING: this is a significant deviation from the markdown spec - - - - - when true, RETURN becomes a literal newline - WARNING: this is a significant deviation from the markdown spec - - - - - use ">" for HTML output, or " />" for XHTML output - - - - - when true, problematic URL characters like [, ], (, and so forth will be encoded - WARNING: this is a significant deviation from the markdown spec - - - - - when false, email addresses will never be auto-linked - WARNING: this is a significant deviation from the markdown spec - - - - - when true, bold and italic require non-word characters on either side - WARNING: this is a significant deviation from the markdown spec - - - - - Markdown is a text-to-HTML conversion tool for web writers. - Markdown allows you to write using an easy-to-read, easy-to-write plain text format, - then convert it to structurally valid XHTML (or HTML). - - - - - maximum nested depth of [] and () supported by the transform; implementation detail - - - - - Tabs are automatically converted to spaces as part of the transform - this constant determines how "wide" those tabs become in spaces - - - - - Create a new Markdown instance using default options - - - - - Create a new Markdown instance and optionally load options from the supplied options parameter. - - - - - Create a new Markdown instance and optionally load options from a configuration - file. There they should be stored in the appSettings section, available options are: - - Markdown.StrictBoldItalic (true/false) - Markdown.EmptyElementSuffix (">" or " />" without the quotes) - Markdown.LinkEmails (true/false) - Markdown.AutoNewLines (true/false) - Markdown.AutoHyperlink (true/false) - Markdown.EncodeProblemUrlCharacters (true/false) - - - - - In the static constuctor we'll initialize what stays the same across all transforms. - - - - - Transforms the provided Markdown-formatted text to HTML; - see http://en.wikipedia.org/wiki/Markdown - - - The order in which other subs are called here is - essential. Link and image substitutions need to happen before - EscapeSpecialChars(), so that any *'s or _'s in the a - and img tags get encoded. - - - - - Perform transformations that form block-level tags like paragraphs, headers, and list items. - - - - - Perform transformations that occur *within* block-level tags like paragraphs, headers, and list items. - - - - - splits on two or more newlines, to form "paragraphs"; - each paragraph is then unhashed (if it is a hash) or wrapped in HTML p tag - - - - - Reusable pattern to match balanced [brackets]. See Friedl's - "Mastering Regular Expressions", 2nd Ed., pp. 328-331. - - - - - Reusable pattern to match balanced (parens). See Friedl's - "Mastering Regular Expressions", 2nd Ed., pp. 328-331. - - - - - Strips link definitions from text, stores the URLs and titles in hash references. - - - ^[id]: url "optional title" - - - - - derived pretty much verbatim from PHP Markdown - - - - - replaces any block-level HTML blocks with hash entries - - - - - returns an array of HTML tokens comprising the input string. Each token is - either a tag (possibly with nested, tags contained therein, such - as <a href="<MTFoo>">, or a run of text between tags. Each element of the - array is a two-element array; the first is either 'tag' or 'text'; the second is - the actual value. - - - - - Turn Markdown link shortcuts into HTML anchor tags - - - [link text](url "title") - [link text][id] - [id] - - - - - Turn Markdown image shortcuts into HTML img tags. - - - ![alt text][id] - ![alt text](url "optional title") - - - - - Turn Markdown headers into HTML header tags - - - Header 1 - ======== - - Header 2 - -------- - - # Header 1 - ## Header 2 - ## Header 2 with closing hashes ## - ... - ###### Header 6 - - - - - Turn Markdown horizontal rules into HTML hr tags - - - *** - * * * - --- - - - - - - - - - Turn Markdown lists into HTML ul and ol and li tags - - - - - Process the contents of a single ordered or unordered list, splitting it - into individual list items. - - - - - /// Turn Markdown 4-space indented code into HTML pre code blocks - - - - - Turn Markdown `code spans` into HTML code tags - - - - - Turn Markdown *italics* and **bold** into HTML strong and em tags - - - - - Turn markdown line breaks (two space at end of line) into HTML break tags - - - - - Turn Markdown > quoted blocks into HTML blockquote blocks - - - - - Turn angle-delimited URLs into HTML anchor tags - - - <http://www.example.com> - - - - - Remove one level of line-leading spaces - - - - - encodes email address randomly - roughly 10% raw, 45% hex, 45% dec - note that @ is always encoded and : never is - - - - - Encode/escape certain Markdown characters inside code blocks and spans where they are literals - - - - - Encode any ampersands (that aren't part of an HTML entity) and left or right angle brackets - - - - - Encodes any escaped characters such as \`, \*, \[ etc - - - - - swap back in all the special characters we've hidden - - - - - escapes Bold [ * ] and Italic [ _ ] characters - - - - - hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems - - - - - Within tags -- meaning between < and > -- encode [\ ` * _] so they - don't conflict with their use in Markdown for code, italics and strong. - We're replacing each such character with its corresponding hash - value; this is likely overkill, but it should prevent us from colliding - with the escape values by accident. - - - - - convert all tabs to _tabWidth spaces; - standardizes line endings from DOS (CR LF) or Mac (CR) to UNIX (LF); - makes sure text ends with a couple of newlines; - removes any blank lines (only spaces) in the text - - - - - this is to emulate what's evailable in PHP - - - - - current version of MarkdownSharp; - see http://code.google.com/p/markdownsharp/ for the latest code or to contribute - - - - - Static constructor - - - In the static constuctor we'll initialize what stays the same across all transforms. - - - - - - Strips link definitions from text, stores the URLs and titles in hash references. - - Link defs are in the form: ^[id]: url "optional title" - - - - Hashify HTML blocks - - - - - These are all the transformations that form block-level - tags like paragraphs, headers, and list items. - - - - - These are all the transformations that occur *within* block-level - tags like paragraphs, headers, and list items. - - - - - - - - Process the contents of a single ordered or unordered list, splitting it - into individual list items. - - - - - Encode/escape certain characters inside Markdown code runs. - - - The point is that in code, these characters are literals, and lose their - special Markdown meanings. - - - - - Smart processing for ampersands and angle brackets that need to be encoded. - - - - - Swap back in all the special characters we've hidden. - - - - - Remove one level of line-leading tabs or spaces - - - - - This is to emulate what's evailable in PHP - - - - - - - - Calculate an MD5 hash of an arbitrary string - - - - - - - when true, (most) bare plain URLs are auto-hyperlinked - WARNING: this is a significant deviation from the markdown spec - - - - - when true, RETURN becomes a literal newline - WARNING: this is a significant deviation from the markdown spec - - - - - use ">" for HTML output, or " />" for XHTML output - - - - - when true, problematic URL characters like [, ], (, and so forth will be encoded - WARNING: this is a significant deviation from the markdown spec - - - - - when false, email addresses will never be auto-linked - WARNING: this is a significant deviation from the markdown spec - - - - - when true, bold and italic require non-word characters on either side - WARNING: this is a significant deviation from the markdown spec - - - - diff --git a/readme.html b/readme.html new file mode 100644 index 0000000..1983bf9 --- /dev/null +++ b/readme.html @@ -0,0 +1,179 @@ +

MarkdownWin

+ +
+ +

A Markdown editor and compiler with live-preview for Windows.

+ +

Download the binary by tapping right here.

+ +

I built this because MarkedApp isn't available for Windows. +Like MarkedApp, MarkdownWin will monitor a file for changes and keep the live preview +in sync with your file. You can use whatever editor you want.

+ +

We use MarkdownSharp to render the live preview.

+ +

Features

+ + + +
+ +

Screenshot

+ +

Dev Requirements
+VS 2010, NuGet

+ +

Run psake.bat to build for release and update the /dist folder.

+ +

Notes

+ +

The icon was borrowed without permission from dashkards.com.

+ +

ICO created with converticon.com.

+ \ No newline at end of file diff --git a/readme.md b/readme.md index 0898927..a87ca91 100644 --- a/readme.md +++ b/readme.md @@ -1,6 +1,8 @@ ## MarkdownWin -*A [Markdown](http://daringfireball.net/projects/markdown/) editor with live-preview for Windows.* +----------------------------------------------------- + +*A [Markdown](http://daringfireball.net/projects/markdown/) editor and compiler with live-preview for Windows.* **Download the binary by tapping [right here](https://github.com/jpoehls/MarkdownWin/raw/master/dist/MarkdownWin.exe).** @@ -10,14 +12,23 @@ in sync with your file. You can use whatever editor you want. We use [MarkdownSharp](http://code.google.com/p/markdownsharp/) to render the live preview. + ### Features * Preview your Markdown file live, while you edit it. * Copy the HTML source to your clipboard with `CTRL+C`. * Print it with `CTRL+P`. * Float the preview window on top of other applications for easy viewing. +* MarkdownWin also to be used as a Markdown build tool via its simple CLI interface: + + usage -in [-out ] [-raw] + -in: Path to the markdown file + -out: (Optional) Path to the output html file (uses .html by default) + -raw: (Optional) Don't stylize the output. + +--------------------------------------------------------- -![Screenshot](https://raw.github.com/jpoehls/MarkdownWin/master/screenshot.png) +![Screenshot](https://raw.github.com/cynosura/MarkdownWin/master/screenshot.png) **Dev Requirements** VS 2010, NuGet diff --git a/screenshot.png b/screenshot.png index c0083b2..b4c5922 100644 Binary files a/screenshot.png and b/screenshot.png differ diff --git a/src/MarkdownSharp/AssemblyInfo.cs b/src/MarkdownSharp/AssemblyInfo.cs new file mode 100644 index 0000000..c501d4a --- /dev/null +++ b/src/MarkdownSharp/AssemblyInfo.cs @@ -0,0 +1,36 @@ +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +// General Information about an assembly is controlled through the following +// set of attributes. Change these attribute values to modify the information +// associated with an assembly. +[assembly: AssemblyTitle("MarkdownSharp")] +[assembly: AssemblyDescription("")] +[assembly: AssemblyConfiguration("")] +[assembly: AssemblyCompany("")] +[assembly: AssemblyProduct("MarkdownSharp")] +[assembly: AssemblyCopyright("Copyright © 2010")] +[assembly: AssemblyTrademark("")] +[assembly: AssemblyCulture("")] + +// Setting ComVisible to false makes the types in this assembly not visible +// to COM components. If you need to access a type in this assembly from +// COM, set the ComVisible attribute to true on that type. +[assembly: ComVisible(false)] + +// The following GUID is for the ID of the typelib if this project is exposed to COM +[assembly: Guid("82206d27-3b6e-4e1a-a971-8d8ff12f6044")] + +// Version information for an assembly consists of the following four values: +// +// Major Version +// Minor Version +// Build Number +// Revision +// +[assembly: AssemblyVersion("1.007")] +[assembly: AssemblyFileVersion("1.007")] + +//For unit testing +[assembly: InternalsVisibleTo("MarkdownSharpTests")] \ No newline at end of file diff --git a/src/MarkdownSharp/Escapes.cs b/src/MarkdownSharp/Escapes.cs new file mode 100644 index 0000000..ba66f16 --- /dev/null +++ b/src/MarkdownSharp/Escapes.cs @@ -0,0 +1,164 @@ +using System; +using System.Collections.Generic; +using System.Text; +using System.Text.RegularExpressions; + +namespace MarkdownSharp +{ + internal static class Escapes + { + private const string _escapeCharacters = @"\`*_{}[]()>#+-.!"; + private static readonly KeyValuePair[] _escapeTable; + private static readonly Regex _hashFinder; + + static Escapes() + { + _escapeTable = new KeyValuePair[_escapeCharacters.Length]; + string pattern = ""; + for (int i = 0; i < _escapeCharacters.Length; ++i) + { + char c = _escapeCharacters[i]; + string hash = c.ToString().GetHashCode().ToString(); + _escapeTable[i] = new KeyValuePair(c, hash); + + if (pattern != "") pattern += "|(" + hash + ")"; + else pattern += "(" + hash + ")"; + } + _hashFinder = new Regex(pattern, RegexOptions.Compiled | RegexOptions.ExplicitCapture); + } + + /// + /// Gets the escape code for a single character + /// + public static string get(char c) + { + foreach(var pair in _escapeTable) + if (pair.Key == c) + return pair.Value; + throw new IndexOutOfRangeException("The requested character can not be escaped"); + } + + /// + /// Gets the character that a hash refers to + /// + private static char getInverse(string s) + { + foreach (var pair in _escapeTable) + if (pair.Value == s) + return pair.Key; + throw new IndexOutOfRangeException("The requested hash can not be found"); + } + + /// + /// Encodes any escaped characters such as \`, \*, \[ etc + /// + public static string BackslashEscapes(string text) + { + int len = text.Length, first = 0, i = 0; + var sb = new StringBuilder(len); + while (i < len) + { + if (text[i] == '\\' && i + 1 < len && Contains(_escapeCharacters, text[i + 1])) + { + sb.Append(text, first, i - first); + sb.Append(get(text[++i])); + first = ++i; + } + else ++i; + } + if (first == 0) return text; + sb.Append(text, first, i - first); + return sb.ToString(); + } + + /// + /// Encodes Bold [ * ] and Italic [ _ ] characters + /// + public static string BoldItalic(string text) + { + int len = text.Length, first = 0, i = 0; + var sb = new StringBuilder(len); + while (i < len) + { + if ('*' == text[i]) + { + sb.Append(text, first, i - first); + sb.Append(get('*')); + first = ++i; + } + else if ('_' == text[i]) + { + sb.Append(text, first, i - first); + sb.Append(get('_')); + first = ++i; + } + else ++i; + } + if (first == 0) return text; + sb.Append(text, first, i - first); + return sb.ToString(); + } + + /// + /// Encodes all chars of the second parameter. + /// + public static string Escape(string text, string escapes) + { + int len = text.Length, first = 0, i = 0; + var sb = new StringBuilder(len); + while (i < len) + { + if (Contains(escapes, text[i])) + { + sb.Append(text, first, i - first); + sb.Append(get(text[i])); + first = ++i; + } + else ++i; + } + if (first == 0) return text; + sb.Append(text, first, i - first); + return sb.ToString(); + } + + /// + /// encodes problem characters in URLs, such as + /// * _ and optionally ' () [] : + /// this is to avoid problems with markup later + /// + public static string ProblemUrlChars(string url) + { + url = url.Replace("*", "%2A"); + url = url.Replace("_", "%5F"); + url = url.Replace("'", "%27"); + url = url.Replace("(", "%28"); + url = url.Replace(")", "%29"); + url = url.Replace("[", "%5B"); + url = url.Replace("]", "%5D"); + if (url.Length > 7 && Contains(url.Substring(7), ':')) + { + // replace any colons in the body of the URL that are NOT followed by 2 or more numbers + url = url.Substring(0, 7) + Regex.Replace(url.Substring(7), @":(?!\d{2,})", "%3A"); + } + + return url; + } + + private static bool Contains(string s, char c) + { + int len = s.Length; + for (int i = 0; i < len; ++i) + if (s[i] == c) + return true; + return false; + } + + /// + /// swap back in all the special characters we've hidden + /// + public static string Unescape(string text) + { + return _hashFinder.Replace(text, match => getInverse(match.Value).ToString()); + } + } +} \ No newline at end of file diff --git a/src/MarkdownSharp/Markdown.cs b/src/MarkdownSharp/Markdown.cs new file mode 100644 index 0000000..9133c54 --- /dev/null +++ b/src/MarkdownSharp/Markdown.cs @@ -0,0 +1,1754 @@ +/* + * MarkdownSharp + * ------------- + * a C# Markdown processor + * + * Markdown is a text-to-HTML conversion tool for web writers + * Copyright (c) 2004 John Gruber + * http://daringfireball.net/projects/markdown/ + * + * Markdown.NET + * Copyright (c) 2004-2009 Milan Negovan + * http://www.aspnetresources.com + * http://aspnetresources.com/blog/markdown_announced.aspx + * + * MarkdownSharp + * Copyright (c) 2009-2010 Jeff Atwood + * http://stackoverflow.com + * http://www.codinghorror.com/blog/ + * http://code.google.com/p/markdownsharp/ + * + * History: Milan ported the Markdown processor to C#. He granted license to me so I can open source it + * and let the community contribute to and improve MarkdownSharp. + * + */ + +#region Copyright and license + +/* + +Copyright (c) 2009 - 2010 Jeff Atwood + +http://www.opensource.org/licenses/mit-license.php + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +Copyright (c) 2003-2004 John Gruber + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. +*/ + +#endregion + +using System; +using System.Collections.Generic; +using System.Configuration; +using System.Text; +using System.Text.RegularExpressions; + +namespace MarkdownSharp +{ + + public class MarkdownOptions + { + /// + /// when true, (most) bare plain URLs are auto-hyperlinked + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool AutoHyperlink { get; set; } + /// + /// when true, RETURN becomes a literal newline + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool AutoNewlines { get; set; } + /// + /// use ">" for HTML output, or " />" for XHTML output + /// + public string EmptyElementSuffix { get; set; } + /// + /// when true, problematic URL characters like [, ], (, and so forth will be encoded + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool EncodeProblemUrlCharacters { get; set; } + /// + /// when false, email addresses will never be auto-linked + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool LinkEmails { get; set; } + /// + /// when true, bold and italic require non-word characters on either side + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool StrictBoldItalic { get; set; } + } + + + /// + /// Markdown is a text-to-HTML conversion tool for web writers. + /// Markdown allows you to write using an easy-to-read, easy-to-write plain text format, + /// then convert it to structurally valid XHTML (or HTML). + /// + public class Markdown + { + private const string _version = "1.13"; + + #region Constructors and Options + + /// + /// Create a new Markdown instance using default options + /// + public Markdown() : this(false) + { + } + + /// + /// Create a new Markdown instance and optionally load options from a configuration + /// file. There they should be stored in the appSettings section, available options are: + /// + /// Markdown.StrictBoldItalic (true/false) + /// Markdown.EmptyElementSuffix (">" or " />" without the quotes) + /// Markdown.LinkEmails (true/false) + /// Markdown.AutoNewLines (true/false) + /// Markdown.AutoHyperlink (true/false) + /// Markdown.EncodeProblemUrlCharacters (true/false) + /// + /// + public Markdown(bool loadOptionsFromConfigFile) + { + if (!loadOptionsFromConfigFile) return; + + var settings = ConfigurationManager.AppSettings; + foreach (string key in settings.Keys) + { + switch (key) + { + case "Markdown.AutoHyperlink": + _autoHyperlink = Convert.ToBoolean(settings[key]); + break; + case "Markdown.AutoNewlines": + _autoNewlines = Convert.ToBoolean(settings[key]); + break; + case "Markdown.EmptyElementSuffix": + _emptyElementSuffix = settings[key]; + break; + case "Markdown.EncodeProblemUrlCharacters": + _encodeProblemUrlCharacters = Convert.ToBoolean(settings[key]); + break; + case "Markdown.LinkEmails": + _linkEmails = Convert.ToBoolean(settings[key]); + break; + case "Markdown.StrictBoldItalic": + _strictBoldItalic = Convert.ToBoolean(settings[key]); + break; + } + } + } + + /// + /// Create a new Markdown instance and set the options from the MarkdownOptions object. + /// + public Markdown(MarkdownOptions options) + { + _autoHyperlink = options.AutoHyperlink; + _autoNewlines = options.AutoNewlines; + _emptyElementSuffix = options.EmptyElementSuffix; + _encodeProblemUrlCharacters = options.EncodeProblemUrlCharacters; + _linkEmails = options.LinkEmails; + _strictBoldItalic = options.StrictBoldItalic; + } + + + /// + /// use ">" for HTML output, or " />" for XHTML output + /// + public string EmptyElementSuffix + { + get { return _emptyElementSuffix; } + set { _emptyElementSuffix = value; } + } + private string _emptyElementSuffix = " />"; + + /// + /// when false, email addresses will never be auto-linked + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool LinkEmails + { + get { return _linkEmails; } + set { _linkEmails = value; } + } + private bool _linkEmails = true; + + /// + /// when true, bold and italic require non-word characters on either side + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool StrictBoldItalic + { + get { return _strictBoldItalic; } + set { _strictBoldItalic = value; } + } + private bool _strictBoldItalic = false; + + /// + /// when true, RETURN becomes a literal newline + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool AutoNewLines + { + get { return _autoNewlines; } + set { _autoNewlines = value; } + } + private bool _autoNewlines = false; + + /// + /// when true, (most) bare plain URLs are auto-hyperlinked + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool AutoHyperlink + { + get { return _autoHyperlink; } + set { _autoHyperlink = value; } + } + private bool _autoHyperlink = false; + + /// + /// when true, problematic URL characters like [, ], (, and so forth will be encoded + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool EncodeProblemUrlCharacters + { + get { return _encodeProblemUrlCharacters; } + set { _encodeProblemUrlCharacters = value; } + } + private bool _encodeProblemUrlCharacters = false; + + #endregion + + private enum TokenType { Text, Tag } + + private struct Token + { + public Token(TokenType type, string value) + { + this.Type = type; + this.Value = value; + } + public TokenType Type; + public string Value; + } + + /// + /// maximum nested depth of [] and () supported by the transform; implementation detail + /// + private const int _nestDepth = 6; + + /// + /// Tabs are automatically converted to spaces as part of the transform + /// this constant determines how "wide" those tabs become in spaces + /// + private const int _tabWidth = 4; + + private const string _markerUL = @"[*+-]"; + private const string _markerOL = @"\d+[.]"; + + private static readonly Dictionary _escapeTable; + private static readonly Dictionary _invertedEscapeTable; + private static readonly Dictionary _backslashEscapeTable; + + private readonly Dictionary _urls = new Dictionary(); + private readonly Dictionary _titles = new Dictionary(); + private readonly Dictionary _htmlBlocks = new Dictionary(); + + private int _listLevel; + + /// + /// In the static constuctor we'll initialize what stays the same across all transforms. + /// + static Markdown() + { + // Table of hash values for escaped characters: + _escapeTable = new Dictionary(); + _invertedEscapeTable = new Dictionary(); + // Table of hash value for backslash escaped characters: + _backslashEscapeTable = new Dictionary(); + + string backslashPattern = ""; + + foreach (char c in @"\`*_{}[]()>#+-.!") + { + string key = c.ToString(); + string hash = GetHashKey(key); + _escapeTable.Add(key, hash); + _invertedEscapeTable.Add(hash, key); + _backslashEscapeTable.Add(@"\" + key, hash); + backslashPattern += Regex.Escape(@"\" + key) + "|"; + } + + _backslashEscapes = new Regex(backslashPattern.Substring(0, backslashPattern.Length - 1), RegexOptions.Compiled); + } + + /// + /// current version of MarkdownSharp; + /// see http://code.google.com/p/markdownsharp/ for the latest code or to contribute + /// + public string Version + { + get { return _version; } + } + + /// + /// Transforms the provided Markdown-formatted text to HTML; + /// see http://en.wikipedia.org/wiki/Markdown + /// + /// + /// The order in which other subs are called here is + /// essential. Link and image substitutions need to happen before + /// EscapeSpecialChars(), so that any *'s or _'s in the a + /// and img tags get encoded. + /// + public string Transform(string text) + { + if (String.IsNullOrEmpty(text)) return ""; + + Setup(); + + text = Normalize(text); + + text = HashHTMLBlocks(text); + text = StripLinkDefinitions(text); + text = RunBlockGamut(text); + text = Unescape(text); + + Cleanup(); + + return text + "\n"; + } + + + /// + /// Perform transformations that form block-level tags like paragraphs, headers, and list items. + /// + private string RunBlockGamut(string text) + { + text = DoHeaders(text); + text = DoHorizontalRules(text); + text = DoLists(text); + text = DoCodeBlocks(text); + text = DoBlockQuotes(text); + + // We already ran HashHTMLBlocks() before, in Markdown(), but that + // was to escape raw HTML in the original Markdown source. This time, + // we're escaping the markup we've just created, so that we don't wrap + //

tags around block-level tags. + text = HashHTMLBlocks(text); + + text = FormParagraphs(text); + + return text; + } + + + ///

+ /// Perform transformations that occur *within* block-level tags like paragraphs, headers, and list items. + /// + private string RunSpanGamut(string text) + { + text = DoCodeSpans(text); + text = EscapeSpecialCharsWithinTagAttributes(text); + text = EscapeBackslashes(text); + + // Images must come first, because ![foo][f] looks like an anchor. + text = DoImages(text); + text = DoAnchors(text); + + // Must come after DoAnchors(), because you can use < and > + // delimiters in inline links like [this](). + text = DoAutoLinks(text); + + text = EncodeAmpsAndAngles(text); + text = DoItalicsAndBold(text); + text = DoHardBreaks(text); + + return text; + } + + private static Regex _newlinesLeadingTrailing = new Regex(@"^\n+|\n+\z", RegexOptions.Compiled); + private static Regex _newlinesMultiple = new Regex(@"\n{2,}", RegexOptions.Compiled); + private static Regex _leadingWhitespace = new Regex(@"^[ ]*", RegexOptions.Compiled); + + /// + /// splits on two or more newlines, to form "paragraphs"; + /// each paragraph is then unhashed (if it is a hash) or wrapped in HTML p tag + /// + private string FormParagraphs(string text) + { + // split on two or more newlines + string[] grafs = _newlinesMultiple.Split(_newlinesLeadingTrailing.Replace(text, "")); + + for (int i = 0; i < grafs.Length; i++) + { + if (grafs[i].StartsWith("\x1A")) + { + // unhashify HTML blocks + grafs[i] = _htmlBlocks[grafs[i]]; + } + else + { + // do span level processing inside the block, then wrap result in

tags + grafs[i] = _leadingWhitespace.Replace(RunSpanGamut(grafs[i]), "

") + "

"; + } + } + + return string.Join("\n\n", grafs); + } + + + private void Setup() + { + // Clear the global hashes. If we don't clear these, you get conflicts + // from other articles when generating a page which contains more than + // one article (e.g. an index page that shows the N most recent + // articles): + _urls.Clear(); + _titles.Clear(); + _htmlBlocks.Clear(); + _listLevel = 0; + } + + private void Cleanup() + { + Setup(); + } + + private static string _nestedBracketsPattern; + + /// + /// Reusable pattern to match balanced [brackets]. See Friedl's + /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331. + /// + private static string GetNestedBracketsPattern() + { + // in other words [this] and [this[also]] and [this[also[too]]] + // up to _nestDepth + if (_nestedBracketsPattern == null) + _nestedBracketsPattern = + RepeatString(@" + (?> # Atomic matching + [^\[\]]+ # Anything other than brackets + | + \[ + ", _nestDepth) + RepeatString( + @" \] + )*" + , _nestDepth); + return _nestedBracketsPattern; + } + + private static string _nestedParensPattern; + + /// + /// Reusable pattern to match balanced (parens). See Friedl's + /// "Mastering Regular Expressions", 2nd Ed., pp. 328-331. + /// + private static string GetNestedParensPattern() + { + // in other words (this) and (this(also)) and (this(also(too))) + // up to _nestDepth + if (_nestedParensPattern == null) + _nestedParensPattern = + RepeatString(@" + (?> # Atomic matching + [^()\s]+ # Anything other than parens or whitespace + | + \( + ", _nestDepth) + RepeatString( + @" \) + )*" + , _nestDepth); + return _nestedParensPattern; + } + + private static Regex _linkDef = new Regex(string.Format(@" + ^[ ]{{0,{0}}}\[(.+)\]: # id = $1 + [ ]* + \n? # maybe *one* newline + [ ]* + ? # url = $2 + [ ]* + \n? # maybe one newline + [ ]* + (?: + (?<=\s) # lookbehind for whitespace + [""(] + (.+?) # title = $3 + ["")] + [ ]* + )? # title is optional + (?:\n+|\Z)", _tabWidth - 1), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + /// + /// Strips link definitions from text, stores the URLs and titles in hash references. + /// + /// + /// ^[id]: url "optional title" + /// + private string StripLinkDefinitions(string text) + { + return _linkDef.Replace(text, new MatchEvaluator(LinkEvaluator)); + } + + private string LinkEvaluator(Match match) + { + string linkID = match.Groups[1].Value.ToLowerInvariant(); + _urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value); + + if (match.Groups[3] != null && match.Groups[3].Length > 0) + _titles[linkID] = match.Groups[3].Value.Replace("\"", """); + + return ""; + } + + // compiling this monster regex results in worse performance. trust me. + private static Regex _blocksHtml = new Regex(GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); + + + /// + /// derived pretty much verbatim from PHP Markdown + /// + private static string GetBlockPattern() + { + + // Hashify HTML blocks: + // We only want to do this for block-level HTML tags, such as headers, + // lists, and tables. That's because we still want to wrap

s around + // "paragraphs" that are wrapped in non-block-level tags, such as anchors, + // phrase emphasis, and spans. The list of tags we're looking for is + // hard-coded: + // + // * List "a" is made of tags which can be both inline or block-level. + // These will be treated block-level when the start tag is alone on + // its line, otherwise they're not matched here and will be taken as + // inline later. + // * List "b" is made of tags which are always block-level; + // + string blockTagsA = "ins|del"; + string blockTagsB = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math"; + + // Regular expression for the content of a block tag. + string attr = @" + (?> # optional tag attributes + \s # starts with whitespace + (?> + [^>""/]+ # text outside quotes + | + /+(?!>) # slash not followed by > + | + ""[^""]*"" # text inside double quotes (tolerate >) + | + '[^']*' # text inside single quotes (tolerate >) + )* + )? + "; + + string content = RepeatString(@" + (?> + [^<]+ # content without tag + | + <\2 # nested opening tag + " + attr + @" # attributes + (?> + /> + | + >", _nestDepth) + // end of opening tag + ".*?" + // last level nested tag content + RepeatString(@" + # closing nested tag + ) + | + <(?!/\2\s*> # other tags with a different name + ) + )*", _nestDepth); + + string content2 = content.Replace(@"\2", @"\3"); + + // First, look for nested blocks, e.g.: + //

+ //
+ // tags for inner block must be indented. + //
+ //
+ // + // The outermost tags must start at the left margin for this to match, and + // the inner nested divs must be indented. + // We need to do this before the next, more liberal match, because the next + // match will start at the first `
` and stop at the first `
`. + string pattern = @" + (?> + (?> + (?<=\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + + # Match from `\n` to `\n`, handling nested tags + # in between. + + [ ]{0,$less_than_tab} + <($block_tags_b_re) # start tag = $2 + $attr> # attributes followed by > and \n + $content # content, support nesting + # the matching end tag + [ ]* # trailing spaces + (?=\n+|\Z) # followed by a newline or end of document + + | # Special version for tags of group a. + + [ ]{0,$less_than_tab} + <($block_tags_a_re) # start tag = $3 + $attr>[ ]*\n # attributes followed by > + $content2 # content, support nesting + # the matching end tag + [ ]* # trailing spaces + (?=\n+|\Z) # followed by a newline or end of document + + | # Special case just for
. It was easier to make a special + # case than to make the other regex more complicated. + + [ ]{0,$less_than_tab} + <(hr) # start tag = $2 + $attr # attributes + /?> # the matching end tag + [ ]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + + | # Special case for standalone HTML comments: + + [ ]{0,$less_than_tab} + (?s: + + ) + [ ]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + + | # PHP and ASP-style processor instructions ( + ) + [ ]* + (?=\n{2,}|\Z) # followed by a blank line or end of document + + ) + )"; + + pattern = pattern.Replace("$less_than_tab", (_tabWidth - 1).ToString()); + pattern = pattern.Replace("$block_tags_b_re", blockTagsB); + pattern = pattern.Replace("$block_tags_a_re", blockTagsA); + pattern = pattern.Replace("$attr", attr); + pattern = pattern.Replace("$content2", content2); + pattern = pattern.Replace("$content", content); + + return pattern; + } + + /// + /// replaces any block-level HTML blocks with hash entries + /// + private string HashHTMLBlocks(string text) + { + return _blocksHtml.Replace(text, new MatchEvaluator(HtmlEvaluator)); + } + + private string HtmlEvaluator(Match match) + { + string text = match.Groups[1].Value; + string key = GetHashKey(text); + _htmlBlocks[key] = text; + + return string.Concat("\n\n", key, "\n\n"); + } + + private static string GetHashKey(string s) + { + return "\x1A" + Math.Abs(s.GetHashCode()).ToString() + "\x1A"; + } + + private static Regex _htmlTokens = new Regex(@" + ()| # match + (<\?.*?\?>)| # match " + + RepeatString(@" + (<[A-Za-z\/!$](?:[^<>]|", _nestDepth) + RepeatString(@")*>)", _nestDepth) + + " # match and ", + RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + /// + /// returns an array of HTML tokens comprising the input string. Each token is + /// either a tag (possibly with nested, tags contained therein, such + /// as <a href="<MTFoo>">, or a run of text between tags. Each element of the + /// array is a two-element array; the first is either 'tag' or 'text'; the second is + /// the actual value. + /// + private List TokenizeHTML(string text) + { + int pos = 0; + int tagStart = 0; + var tokens = new List(); + + // this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin. + // http://www.bradchoate.com/past/mtregex.php + foreach (Match m in _htmlTokens.Matches(text)) + { + tagStart = m.Index; + + if (pos < tagStart) + tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos))); + + tokens.Add(new Token(TokenType.Tag, m.Value)); + pos = tagStart + m.Length; + } + + if (pos < text.Length) + tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos))); + + return tokens; + } + + + private static Regex _anchorRef = new Regex(string.Format(@" + ( # wrap whole match in $1 + \[ + ({0}) # link text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + )", GetNestedBracketsPattern()), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + private static Regex _anchorInline = new Regex(string.Format(@" + ( # wrap whole match in $1 + \[ + ({0}) # link text = $2 + \] + \( # literal paren + [ ]* + ({1}) # href = $3 + [ ]* + ( # $4 + (['""]) # quote char = $5 + (.*?) # title = $6 + \5 # matching quote + [ ]* # ignore any spaces between closing quote and ) + )? # title is optional + \) + )", GetNestedBracketsPattern(), GetNestedParensPattern()), + RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + private static Regex _anchorRefShortcut = new Regex(@" + ( # wrap whole match in $1 + \[ + ([^\[\]]+) # link text = $2; can't contain [ or ] + \] + )", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + /// + /// Turn Markdown link shortcuts into HTML anchor tags + /// + /// + /// [link text](url "title") + /// [link text][id] + /// [id] + /// + private string DoAnchors(string text) + { + // First, handle reference-style links: [link text] [id] + text = _anchorRef.Replace(text, new MatchEvaluator(AnchorRefEvaluator)); + + // Next, inline-style links: [link text](url "optional title") or [link text](url "optional title") + text = _anchorInline.Replace(text, new MatchEvaluator(AnchorInlineEvaluator)); + + // Last, handle reference-style shortcuts: [link text] + // These must come last in case you've also got [link test][1] + // or [link test](/foo) + text = _anchorRefShortcut.Replace(text, new MatchEvaluator(AnchorRefShortcutEvaluator)); + return text; + } + + private string AnchorRefEvaluator(Match match) + { + string wholeMatch = match.Groups[1].Value; + string linkText = match.Groups[2].Value; + string linkID = match.Groups[3].Value.ToLowerInvariant(); + + string result; + + // for shortcut links like [this][]. + if (linkID == "") + linkID = linkText.ToLowerInvariant(); + + if (_urls.ContainsKey(linkID)) + { + string url = _urls[linkID]; + + url = EncodeProblemUrlChars(url); + url = EscapeBoldItalic(url); + result = ""; + } + else + result = wholeMatch; + + return result; + } + + private string AnchorRefShortcutEvaluator(Match match) + { + string wholeMatch = match.Groups[1].Value; + string linkText = match.Groups[2].Value; + string linkID = Regex.Replace(linkText.ToLowerInvariant(), @"[ ]*\n[ ]*", " "); // lower case and remove newlines / extra spaces + + string result; + + if (_urls.ContainsKey(linkID)) + { + string url = _urls[linkID]; + + url = EncodeProblemUrlChars(url); + url = EscapeBoldItalic(url); + result = ""; + } + else + result = wholeMatch; + + return result; + } + + + private string AnchorInlineEvaluator(Match match) + { + string linkText = match.Groups[2].Value; + string url = match.Groups[3].Value; + string title = match.Groups[6].Value; + string result; + + url = EncodeProblemUrlChars(url); + url = EscapeBoldItalic(url); + if (url.StartsWith("<") && url.EndsWith(">")) + url = url.Substring(1, url.Length - 2); // remove <>'s surrounding URL, if present + + result = string.Format("{0}", linkText); + return result; + } + + private static Regex _imagesRef = new Regex(@" + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); + + private static Regex _imagesInline = new Regex(String.Format(@" + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + \s? # one optional whitespace character + \( # literal paren + [ ]* + ({0}) # href = $3 + [ ]* + ( # $4 + (['""]) # quote char = $5 + (.*?) # title = $6 + \5 # matching quote + [ ]* + )? # title is optional + \) + )", GetNestedParensPattern()), + RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); + + /// + /// Turn Markdown image shortcuts into HTML img tags. + /// + /// + /// ![alt text][id] + /// ![alt text](url "optional title") + /// + private string DoImages(string text) + { + // First, handle reference-style labeled images: ![alt text][id] + text = _imagesRef.Replace(text, new MatchEvaluator(ImageReferenceEvaluator)); + + // Next, handle inline images: ![alt text](url "optional title") + // Don't forget: encode * and _ + text = _imagesInline.Replace(text, new MatchEvaluator(ImageInlineEvaluator)); + + return text; + } + + private string ImageReferenceEvaluator(Match match) + { + string wholeMatch = match.Groups[1].Value; + string altText = match.Groups[2].Value; + string linkID = match.Groups[3].Value.ToLowerInvariant(); + string result; + + // for shortcut links like ![this][]. + if (linkID == "") + linkID = altText.ToLowerInvariant(); + + altText = altText.Replace("\"", """); + + if (_urls.ContainsKey(linkID)) + { + string url = _urls[linkID]; + url = EncodeProblemUrlChars(url); + url = EscapeBoldItalic(url); + result = string.Format("\"{1}\"",")) + url = url.Substring(1, url.Length - 2); // Remove <>'s surrounding URL, if present + url = EncodeProblemUrlChars(url); + url = EscapeBoldItalic(url); + + result = string.Format("\"{1}\"", + /// Turn Markdown headers into HTML header tags + /// + /// + /// Header 1 + /// ======== + /// + /// Header 2 + /// -------- + /// + /// # Header 1 + /// ## Header 2 + /// ## Header 2 with closing hashes ## + /// ... + /// ###### Header 6 + /// + private string DoHeaders(string text) + { + text = _headerSetext.Replace(text, new MatchEvaluator(SetextHeaderEvaluator)); + text = _headerAtx.Replace(text, new MatchEvaluator(AtxHeaderEvaluator)); + return text; + } + + private string SetextHeaderEvaluator(Match match) + { + string header = match.Groups[1].Value; + int level = match.Groups[2].Value.StartsWith("=") ? 1 : 2; + return string.Format("{0}\n\n", RunSpanGamut(header), level); + } + + private string AtxHeaderEvaluator(Match match) + { + string header = match.Groups[2].Value; + int level = match.Groups[1].Value.Length; + return string.Format("{0}\n\n", RunSpanGamut(header), level); + } + + + private static Regex _horizontalRules = new Regex(@" + ^[ ]{0,3} # Leading space + ([-*_]) # $1: First marker + (?> # Repeated marker group + [ ]{0,2} # Zero, one, or two spaces. + \1 # Marker character + ){2,} # Group repeated at least twice + [ ]* # Trailing spaces + $ # End of line. + ", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + /// + /// Turn Markdown horizontal rules into HTML hr tags + /// + /// + /// *** + /// * * * + /// --- + /// - - - + /// + private string DoHorizontalRules(string text) + { + return _horizontalRules.Replace(text, " + /// Turn Markdown lists into HTML ul and ol and li tags + /// + private string DoLists(string text) + { + // We use a different prefix before nested lists than top-level lists. + // See extended comment in _ProcessListItems(). + if (_listLevel > 0) + text = _listNested.Replace(text, new MatchEvaluator(ListEvaluator)); + else + text = _listTopLevel.Replace(text, new MatchEvaluator(ListEvaluator)); + + return text; + } + + private string ListEvaluator(Match match) + { + string list = match.Groups[1].Value; + string listType = Regex.IsMatch(match.Groups[3].Value, _markerUL) ? "ul" : "ol"; + string result; + + // Turn double returns into triple returns, so that we can make a + // paragraph for the last item in a list, if necessary: + list = Regex.Replace(list, @"\n{2,}", "\n\n\n"); + result = ProcessListItems(list, listType == "ul" ? _markerUL : _markerOL); + + result = string.Format("<{0}>\n{1}\n", listType, result); + return result; + } + + /// + /// Process the contents of a single ordered or unordered list, splitting it + /// into individual list items. + /// + private string ProcessListItems(string list, string marker) + { + // The listLevel global keeps track of when we're inside a list. + // Each time we enter a list, we increment it; when we leave a list, + // we decrement. If it's zero, we're not in a list anymore. + + // We do this because when we're not inside a list, we want to treat + // something like this: + + // I recommend upgrading to version + // 8. Oops, now this line is treated + // as a sub-list. + + // As a single paragraph, despite the fact that the second line starts + // with a digit-period-space sequence. + + // Whereas when we're inside a list (or sub-list), that line will be + // treated as the start of a sub-list. What a kludge, huh? This is + // an aspect of Markdown's syntax that's hard to parse perfectly + // without resorting to mind-reading. Perhaps the solution is to + // change the syntax rules such that sub-lists must start with a + // starting cardinal number; e.g. "1." or "a.". + + _listLevel++; + + // Trim trailing blank lines: + list = Regex.Replace(list, @"\n{2,}\z", "\n"); + + string pattern = string.Format( + @"(\n)? # leading line = $1 + (^[ ]*) # leading whitespace = $2 + ({0}) [ ]+ # list marker = $3 + ((?s:.+?) # list item text = $4 + (\n{{1,2}})) + (?= \n* (\z | \2 ({0}) [ ]+))", marker); + + list = Regex.Replace(list, pattern, new MatchEvaluator(ListItemEvaluator), + RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline); + _listLevel--; + return list; + } + + private string ListItemEvaluator(Match match) + { + string item = match.Groups[4].Value; + string leadingLine = match.Groups[1].Value; + + if (!String.IsNullOrEmpty(leadingLine) || Regex.IsMatch(item, @"\n{2,}")) + // we could correct any bad indentation here.. + item = RunBlockGamut(Outdent(item) + "\n"); + else + { + // recursion for sub-lists + item = DoLists(Outdent(item)); + item = item.TrimEnd('\n'); + item = RunSpanGamut(item); + } + + return string.Format("
  • {0}
  • \n", item); + } + + + private static Regex _codeBlock = new Regex(string.Format(@" + (?:\n\n|\A\n?) + ( # $1 = the code block -- one or more lines, starting with a space + (?: + (?:[ ]{{{0}}}) # Lines must start with a tab-width of spaces + .*\n+ + )+ + ) + ((?=^[ ]{{0,{0}}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc", + _tabWidth), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); + + /// + /// /// Turn Markdown 4-space indented code into HTML pre code blocks + /// + private string DoCodeBlocks(string text) + { + text = _codeBlock.Replace(text, new MatchEvaluator(CodeBlockEvaluator)); + return text; + } + + private string CodeBlockEvaluator(Match match) + { + string codeBlock = match.Groups[1].Value; + + codeBlock = EncodeCode(Outdent(codeBlock)); + codeBlock = _newlinesLeadingTrailing.Replace(codeBlock, ""); + + return string.Concat("\n\n
    ", codeBlock, "\n
    \n\n"); + } + + private static Regex _codeSpan = new Regex(@" + (? + /// Turn Markdown `code spans` into HTML code tags + /// + private string DoCodeSpans(string text) + { + // * You can use multiple backticks as the delimiters if you want to + // include literal backticks in the code span. So, this input: + // + // Just type ``foo `bar` baz`` at the prompt. + // + // Will translate to: + // + //

    Just type foo `bar` baz at the prompt.

    + // + // There's no arbitrary limit to the number of backticks you + // can use as delimters. If you need three consecutive backticks + // in your code, use four for delimiters, etc. + // + // * You can use spaces to get literal backticks at the edges: + // + // ... type `` `bar` `` ... + // + // Turns to: + // + // ... type `bar` ... + // + + return _codeSpan.Replace(text, new MatchEvaluator(CodeSpanEvaluator)); + } + + private string CodeSpanEvaluator(Match match) + { + string span = match.Groups[2].Value; + span = Regex.Replace(span, @"^[ ]*", ""); // leading whitespace + span = Regex.Replace(span, @"[ ]*$", ""); // trailing whitespace + span = EncodeCode(span); + + return string.Concat("", span, ""); + } + + + private static Regex _bold = new Regex(@"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1", + RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); + private static Regex _strictBold = new Regex(@"([\W_]|^) (\*\*|__) (?=\S) ([^\r]*?\S[\*_]*) \2 ([\W_]|$)", + RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); + + private static Regex _italic = new Regex(@"(\*|_) (?=\S) (.+?) (?<=\S) \1", + RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); + private static Regex _strictItalic = new Regex(@"([\W_]|^) (\*|_) (?=\S) ([^\r\*_]*?\S) \2 ([\W_]|$)", + RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); + + /// + /// Turn Markdown *italics* and **bold** into HTML strong and em tags + /// + private string DoItalicsAndBold(string text) + { + + // must go first, then + if (_strictBoldItalic) + { + text = _strictBold.Replace(text, "$1$3$4"); + text = _strictItalic.Replace(text, "$1$3$4"); + } + else + { + text = _bold.Replace(text, "$2"); + text = _italic.Replace(text, "$2"); + } + return text; + } + + /// + /// Turn markdown line breaks (two space at end of line) into HTML break tags + /// + private string DoHardBreaks(string text) + { + if (_autoNewlines) + text = Regex.Replace(text, @"\n", string.Format("[ ]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.Compiled); + + /// + /// Turn Markdown > quoted blocks into HTML blockquote blocks + /// + private string DoBlockQuotes(string text) + { + return _blockquote.Replace(text, new MatchEvaluator(BlockQuoteEvaluator)); + } + + private string BlockQuoteEvaluator(Match match) + { + string bq = match.Groups[1].Value; + + bq = Regex.Replace(bq, @"^[ ]*>[ ]?", "", RegexOptions.Multiline); // trim one level of quoting + bq = Regex.Replace(bq, @"^[ ]+$", "", RegexOptions.Multiline); // trim whitespace-only lines + bq = RunBlockGamut(bq); // recurse + + bq = Regex.Replace(bq, @"^", " ", RegexOptions.Multiline); + + // These leading spaces screw with
     content, so we need to fix that:
    +            bq = Regex.Replace(bq, @"(\s*
    .+?
    )", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); + + return string.Format("
    \n{0}\n
    \n\n", bq); + } + + private string BlockQuoteEvaluator2(Match match) + { + return Regex.Replace(match.Groups[1].Value, @"^ ", "", RegexOptions.Multiline); + } + + private static Regex _autolinkBare = new Regex(@"(^|\s)(https?|ftp)(://[-A-Z0-9+&@#/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#/%=~_|\[\]])($|\W)", + RegexOptions.IgnoreCase | RegexOptions.Compiled); + + /// + /// Turn angle-delimited URLs into HTML anchor tags + /// + /// + /// <http://www.example.com> + /// + private string DoAutoLinks(string text) + { + + if (_autoHyperlink) + { + // fixup arbitrary URLs by adding Markdown < > so they get linked as well + // note that at this point, all other URL in the text are already hyperlinked as + // *except* for the case + text = _autolinkBare.Replace(text, @"$1<$2$3>$4"); + } + + // Hyperlinks: + text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator)); + + if (_linkEmails) + { + // Email addresses: + string pattern = + @"< + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + >"; + text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); + } + + return text; + } + + private string HyperlinkEvaluator(Match match) + { + string link = match.Groups[1].Value; + return string.Format("{0}", link); + } + + private string EmailEvaluator(Match match) + { + string email = Unescape(match.Groups[1].Value); + + // + // Input: an email address, e.g. "foo@example.com" + // + // Output: the email address as a mailto link, with each character + // of the address encoded as either a decimal or hex entity, in + // the hopes of foiling most address harvesting spam bots. E.g.: + // + // foo + // @example.com + // + // Based by a filter by Matthew Wickline, posted to the BBEdit-Talk + // mailing list: + // + email = "mailto:" + email; + + // leave ':' alone (to spot mailto: later) + email = EncodeEmailAddress(email); + + email = string.Format("{0}", email); + + // strip the mailto: from the visible part + email = Regex.Replace(email, "\">.+?:", "\">"); + return email; + } + + + private static Regex _outDent = new Regex(@"^[ ]{1," + _tabWidth + @"}", RegexOptions.Multiline | RegexOptions.Compiled); + + /// + /// Remove one level of line-leading spaces + /// + private string Outdent(string block) + { + return _outDent.Replace(block, ""); + } + + + #region Encoding and Normalization + + + /// + /// encodes email address randomly + /// roughly 10% raw, 45% hex, 45% dec + /// note that @ is always encoded and : never is + /// + private string EncodeEmailAddress(string addr) + { + var sb = new StringBuilder(addr.Length * 5); + var rand = new Random(); + int r; + foreach (char c in addr) + { + r = rand.Next(1, 100); + if ((r > 90 || c == ':') && c != '@') + sb.Append(c); // m + else if (r < 45) + sb.AppendFormat("&#x{0:x};", (int)c); // m + else + sb.AppendFormat("&#{0};", (int)c); // m + } + return sb.ToString(); + } + + /// + /// Encode/escape certain Markdown characters inside code blocks and spans where they are literals + /// + private string EncodeCode(string code) + { + var sb = new StringBuilder(code.Length * 2); + + Action putc = (c) => sb.Append(c); + Action put = (str) => sb.Append(str); + + foreach (char c in code) { + switch (c) { + case '&': + put("&"); + continue; + case '<': + put ("<"); + continue; + case '>': + put (">"); + continue; + default: + string val = null; + + if (_escapeTable.TryGetValue(c.ToString(), out val)) + put(val); + else if((int)c > 255) + sb.AppendFormat("&#{0};", (int)c); + else + putc(c); + + continue; + } + } + return sb.ToString(); + } + + + private string EncodeCodeEvaluator(Match match) + { + switch (match.Value) + { + // Encode all ampersands; HTML entities are not + // entities within a Markdown code span. + case "&": + return "&"; + // Do the angle bracket song and dance + case "<": + return "<"; + case ">": + return ">"; + // escape characters that are magic in Markdown + default: + return _escapeTable[match.Value]; + } + } + + + private static Regex _amps = new Regex(@"&(?!(#[0-9]+)|(#[xX][a-fA-F0-9])|([a-zA-Z][a-zA-Z0-9]*);)", RegexOptions.ExplicitCapture | RegexOptions.Compiled); + private static Regex _angles = new Regex(@"<(?![A-Za-z/?\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled); + + /// + /// Encode any ampersands (that aren't part of an HTML entity) and left or right angle brackets + /// + private string EncodeAmpsAndAngles(string s) + { + s = _amps.Replace(s, "&"); + s = _angles.Replace(s, "<"); + return s; + } + + private static Regex _backslashEscapes; + + /// + /// Encodes any escaped characters such as \`, \*, \[ etc + /// + private string EscapeBackslashes(string s) + { + return _backslashEscapes.Replace(s, new MatchEvaluator(EscapeBackslashesEvaluator)); + } + private string EscapeBackslashesEvaluator(Match match) + { + return _backslashEscapeTable[match.Value]; + } + + private static Regex _unescapes = new Regex("\x1A\\d+\x1A", RegexOptions.Compiled); + + /// + /// swap back in all the special characters we've hidden + /// + private string Unescape(string s) + { + return _unescapes.Replace(s, new MatchEvaluator(UnescapeEvaluator)); + } + private string UnescapeEvaluator(Match match) + { + return _invertedEscapeTable[match.Value]; + } + + + /// + /// escapes Bold [ * ] and Italic [ _ ] characters + /// + private string EscapeBoldItalic(string s) + { + s = s.Replace("*", _escapeTable["*"]); + s = s.Replace("_", _escapeTable["_"]); + return s; + } + + private static char[] _problemUrlChars = @"""'*()[]$:".ToCharArray(); + + /// + /// hex-encodes some unusual "problem" chars in URLs to avoid URL detection problems + /// + private string EncodeProblemUrlChars(string url) + { + if (!_encodeProblemUrlCharacters) return url; + + var sb = new StringBuilder(url.Length); + bool encode; + char c; + + for (int i = 0; i < url.Length; i++) + { + c = url[i]; + encode = Array.IndexOf(_problemUrlChars, c) != -1; + if (encode && c == ':' && i < url.Length - 1) + encode = !(url[i + 1] == '/') && !(url[i + 1] >= '0' && url[i + 1] <= '9'); + + if (encode) + sb.Append("%" + String.Format("{0:x}", (byte)c)); + else + sb.Append(c); + } + + return sb.ToString(); + } + + + /// + /// Within tags -- meaning between < and > -- encode [\ ` * _] so they + /// don't conflict with their use in Markdown for code, italics and strong. + /// We're replacing each such character with its corresponding hash + /// value; this is likely overkill, but it should prevent us from colliding + /// with the escape values by accident. + /// + private string EscapeSpecialCharsWithinTagAttributes(string text) + { + var tokens = TokenizeHTML(text); + + // now, rebuild text from the tokens + var sb = new StringBuilder(text.Length); + + foreach (var token in tokens) + { + string value = token.Value; + + if (token.Type == TokenType.Tag) + { + value = value.Replace(@"\", _escapeTable[@"\"]); + value = Regex.Replace(value, "(?<=.)(?=.)", _escapeTable[@"`"]); + value = EscapeBoldItalic(value); + } + + sb.Append(value); + } + + return sb.ToString(); + } + + /// + /// convert all tabs to _tabWidth spaces; + /// standardizes line endings from DOS (CR LF) or Mac (CR) to UNIX (LF); + /// makes sure text ends with a couple of newlines; + /// removes any blank lines (only spaces) in the text + /// + private string Normalize(string text) + { + var output = new StringBuilder(text.Length); + var line = new StringBuilder(); + bool valid = false; + + for (int i = 0; i < text.Length; i++) + { + switch (text[i]) + { + case '\n': + if (valid) output.Append(line); + output.Append('\n'); + line.Length = 0; valid = false; + break; + case '\r': + if ((i < text.Length - 1) && (text[i + 1] != '\n')) + { + if (valid) output.Append(line); + output.Append('\n'); + line.Length = 0; valid = false; + } + break; + case '\t': + int width = (_tabWidth - line.Length % _tabWidth); + for (int k = 0; k < width; k++) + line.Append(' '); + break; + case '\x1A': + break; + default: + if (!valid && text[i] != ' ') valid = true; + line.Append(text[i]); + break; + } + } + + if (valid) output.Append(line); + output.Append('\n'); + + // add two newlines to the end before return + return output.Append("\n\n").ToString(); + } + + #endregion + + /// + /// this is to emulate what's evailable in PHP + /// + private static string RepeatString(string text, int count) + { + var sb = new StringBuilder(text.Length * count); + for (int i = 0; i < count; i++) + sb.Append(text); + return sb.ToString(); + } + + } +} \ No newline at end of file diff --git a/src/MarkdownSharp/MarkdownOld.cs b/src/MarkdownSharp/MarkdownOld.cs new file mode 100644 index 0000000..19bf1b9 --- /dev/null +++ b/src/MarkdownSharp/MarkdownOld.cs @@ -0,0 +1,1311 @@ +/* + * Markdown - A text-to-HTML conversion tool for web writers + * Copyright (c) 2004 John Gruber + * http://daringfireball.net/projects/markdown/ + * + * Copyright (c) 2004 Michel Fortin - Translation to PHP + * http://www.michelf.com/projects/php-markdown/ + * + * Copyright (c) 2004-2005 Milan Negovan - C# translation to .NET + * http://www.aspnetresources.com + * + */ + +#region Copyright and license + +/* +Copyright (c) 2003-2004 John Gruber + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name "Markdown" nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +This software is provided by the copyright holders and contributors "as +is" and any express or implied warranties, including, but not limited +to, the implied warranties of merchantability and fitness for a +particular purpose are disclaimed. In no event shall the copyright owner +or contributors be liable for any direct, indirect, incidental, special, +exemplary, or consequential damages (including, but not limited to, +procurement of substitute goods or services; loss of use, data, or +profits; or business interruption) however caused and on any theory of +liability, whether in contract, strict liability, or tort (including +negligence or otherwise) arising in any way out of the use of this +software, even if advised of the possibility of such damage. +*/ + +#endregion + +using System; +using System.Collections; +using System.Security.Cryptography; +using System.Text; +using System.Text.RegularExpressions; + +namespace MarkdownSharp +{ + [Obsolete("This old version is included only for historical comparison purposes; use at your own risk!")] + public class MarkdownOld + { + public class Pair + { + public Object First; + public Object Second; + } + + #region Class members + + private const int nestedBracketDepth = 6; + private const string emptyElementSuffix = " />"; // Change to ">" for HTML output + private const int tabWidth = 4; + + private static readonly string markerUL; + private static readonly string markerOL; + private static readonly string markerAny; + + private static readonly string nestedBrackets; + private static readonly Hashtable escapeTable; + private static readonly Hashtable backslashEscapeTable; + + private Hashtable urls; + private Hashtable titles; + private Hashtable htmlBlocks; + + private int listLevel = 0; + + #endregion + + /// + /// Static constructor + /// + /// + /// In the static constuctor we'll initialize what stays the same across all transforms. + /// + static MarkdownOld() + { + nestedBrackets += RepeatString(@"(?>[^\[\]]+|\[", nestedBracketDepth); + nestedBrackets += RepeatString(@"\])*", nestedBracketDepth); + + markerUL = @"[*+-]"; + markerOL = @"\d+[.]"; + markerAny = string.Format("(?:{0}|{1})", markerUL, markerOL); + + // Table of hash values for escaped characters: + escapeTable = new Hashtable(); + + escapeTable[@"\"] = ComputeMD5(@"\"); + escapeTable["`"] = ComputeMD5("`"); + escapeTable["*"] = ComputeMD5("*"); + escapeTable["_"] = ComputeMD5("_"); + escapeTable["{"] = ComputeMD5("{"); + escapeTable["}"] = ComputeMD5("}"); + escapeTable["["] = ComputeMD5("["); + escapeTable["]"] = ComputeMD5("]"); + escapeTable["("] = ComputeMD5("("); + escapeTable[")"] = ComputeMD5(")"); + escapeTable[">"] = ComputeMD5(">"); + escapeTable["#"] = ComputeMD5("#"); + escapeTable["+"] = ComputeMD5("+"); + escapeTable["-"] = ComputeMD5("-"); + escapeTable["."] = ComputeMD5("."); + escapeTable["!"] = ComputeMD5("!"); + + // Create an identical table but for escaped characters. + backslashEscapeTable = new Hashtable(); + + foreach (string key in escapeTable.Keys) + backslashEscapeTable[@"\" + key] = escapeTable[key]; + } + + public MarkdownOld() + { + urls = new Hashtable(); + titles = new Hashtable(); + htmlBlocks = new Hashtable(); + } + + /// + /// Main function. The order in which other subs are called here is + /// essential. Link and image substitutions need to happen before + /// EscapeSpecialChars(), so that any *'s or _'s in the + /// and tags get encoded. + /// + public string Transform(string text) + { + // Standardize line endings: + // DOS to Unix and Mac to Unix + text = text.Replace("\r\n", "\n").Replace("\r", "\n"); + + // Make sure $text ends with a couple of newlines: + text += "\n\n"; + + // Convert all tabs to spaces. + text = Detab(text); + + // Strip any lines consisting only of spaces and tabs. + // This makes subsequent regexen easier to write, because we can + // match consecutive blank lines with /\n+/ instead of something + // contorted like /[ \t]*\n+/ . + text = Regex.Replace(text, @"^[ \t]+$", string.Empty, RegexOptions.Multiline); + + // Turn block-level HTML blocks into hash entries + text = HashHTMLBlocks(text); + + // Strip link definitions, store in hashes. + text = StripLinkDefinitions(text); + + text = RunBlockGamut(text); + + text = UnescapeSpecialChars(text); + + return text + "\n"; + } + + #region Process link definitions + + /// + /// Strips link definitions from text, stores the URLs and titles in hash references. + /// + /// Link defs are in the form: ^[id]: url "optional title" + private string StripLinkDefinitions(string text) + { + string pattern = string.Format(@" + ^[ ]{{0,{0}}}\[(.+)\]: # id = $1 + [ \t]* + \n? # maybe *one* newline + [ \t]* + ? # url = $2 + [ \t]* + \n? # maybe one newline + [ \t]* + (?: + (?<=\s) # lookbehind for whitespace + [\x22(] + (.+?) # title = $3 + [\x22)] + [ \t]* + )? # title is optional + (?:\n+|\Z)", tabWidth - 1); + + text = Regex.Replace(text, pattern, new MatchEvaluator(LinkEvaluator), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); + return text; + } + + private string LinkEvaluator(Match match) + { + string linkID = match.Groups[1].Value.ToLower(); + urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value); + + if (match.Groups[3] != null && match.Groups[3].Length > 0) + titles[linkID] = match.Groups[3].Value.Replace("\"", """); + + return string.Empty; + } + + #endregion + + #region Hashify HTML blocks + + /// + /// Hashify HTML blocks + /// + private string HashHTMLBlocks(string text) + { + /* + We only want to do this for block-level HTML tags, such as headers, + lists, and tables. That's because we still want to wrap

    s around + "paragraphs" that are wrapped in non-block-level tags, such as anchors, + phrase emphasis, and spans. The list of tags we're looking for is + hard-coded: + */ + string blockTags1 = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math|ins|del"; + string blockTags2 = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|script|noscript|form|fieldset|iframe|math"; + + /* + First, look for nested blocks, e.g.: +

    +
    + tags for inner block must be indented. +
    +
    + + The outermost tags must start at the left margin for this to match, and + the inner nested divs must be indented. + We need to do this before the next, more liberal match, because the next + match will start at the first `
    ` and stop at the first `
    `. + */ + string pattern = string.Format(@" + ( # save in $1 + ^ # start of line (with /m) + <({0}) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + )", blockTags1); + + text = Regex.Replace(text, pattern, new MatchEvaluator(HtmlEvaluator), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); + + // Now match more liberally, simply from `\n` to `\n` + pattern = string.Format(@" + ( # save in $1 + ^ # start of line (with /m) + <({0}) # start tag = $2 + \b # word break + (.*\n)*? # any number of lines, minimally matching + .* # the matching end tag + [ \t]* # trailing spaces/tabs + (?=\n+|\Z) # followed by a newline or end of document + )", blockTags2); + + text = Regex.Replace(text, pattern, new MatchEvaluator(HtmlEvaluator), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); + + // Special case just for
    . It was easier to make a special case than + // to make the other regex more complicated. + pattern = string.Format(@" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{{0, {0}}} + <(hr) # start tag = $2 + \b # word break + ([^<>])*? # + /?> # the matching end tag + [ \t]* + (?=\n{{2,}}|\Z) # followed by a blank line or end of document + )", tabWidth - 1); + text = Regex.Replace(text, pattern, new MatchEvaluator(HtmlEvaluator), RegexOptions.IgnorePatternWhitespace); + + // Special case for standalone HTML comments: + pattern = string.Format(@" + (?: + (?<=\n\n) # Starting after a blank line + | # or + \A\n? # the beginning of the doc + ) + ( # save in $1 + [ ]{{0,{0}}} + (?s: + + ) + [ \t]* + (?=\n{{2,}}|\Z) # followed by a blank line or end of document + )", tabWidth - 1); + text = Regex.Replace(text, pattern, new MatchEvaluator(HtmlEvaluator), RegexOptions.IgnorePatternWhitespace); + + return text; + } + + private string HtmlEvaluator(Match match) + { + string text = match.Groups[1].Value; + string key = ComputeMD5(text); + htmlBlocks[key] = text; + + // # String that will replace the block + return string.Concat("\n\n", key, "\n\n"); + } + + #endregion + + #region Run transformations that form block-level elements (RunBlockGamut) + + /// + /// These are all the transformations that form block-level + /// tags like paragraphs, headers, and list items. + /// + private string RunBlockGamut(string text) + { + text = DoHeaders(text); + + // Do Horizontal Rules: + text = Regex.Replace(text, @"^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$", " tags around block-level tags. + */ + text = HashHTMLBlocks(text); + + text = FormParagraphs(text); + + return text; + } + + #endregion + + #region Run transformations within block-level elements (RunSpanGamut) + + /// + /// These are all the transformations that occur *within* block-level + /// tags like paragraphs, headers, and list items. + /// + private string RunSpanGamut(string text) + { + text = DoCodeSpans(text); + + text = EscapeSpecialChars(text); + + // Process anchor and image tags. Images must come first, + // because ![foo][f] looks like an anchor. + text = DoImages(text); + text = DoAnchors(text); + + // Make links out of things like `` + // Must come after DoAnchors(), because you can use < and > + // delimiters in inline links like [this](). + text = DoAutoLinks(text); + + // Fix unencoded ampersands and <'s: + text = EncodeAmpsAndAngles(text); + + text = DoItalicsAndBold(text); + + // Do hard breaks: + text = Regex.Replace(text, @" {2,}\n", string.Format(" + /// + /// + /// String containing HTML markup. + /// An array of the tokens comprising the input string. Each token is + /// either a tag (possibly with nested, tags contained therein, such + /// as <a href="">, or a run of text between tags. Each element of the + /// array is a two-element array; the first is either 'tag' or 'text'; the second is + /// the actual value. + /// + private ArrayList TokenizeHTML(string text) + { + // Regular expression derived from the _tokenize() subroutine in + // Brad Choate's MTRegex plugin. + // http://www.bradchoate.com/past/mtregex.php + int pos = 0; + int depth = 6; + ArrayList tokens = new ArrayList(); + + + string nestedTags = string.Concat(RepeatString(@"(?:<[a-z\/!$](?:[^<>]|", depth), + RepeatString(@")*>)", depth)); + string pattern = string.Concat(@"(?s:)|(?s:<\?.*?\?>)|", nestedTags); + + MatchCollection mc = Regex.Matches(text, pattern, RegexOptions.IgnoreCase | RegexOptions.Multiline); + + foreach (Match m in mc) + { + string wholeTag = m.Value; + int tagStart = m.Index; + Pair token = null; + + if (pos < tagStart) + { + token = new Pair(); + token.First = "text"; + token.Second = text.Substring(pos, tagStart - pos); + tokens.Add(token); + } + + token = new Pair(); + token.First = "tag"; + token.Second = wholeTag; + tokens.Add(token); + + pos = m.Index + m.Length; + } + + if (pos < text.Length) + { + Pair token = new Pair(); + token.First = "text"; + token.Second = text.Substring(pos, text.Length - pos); + tokens.Add(token); + } + + return tokens; + } + + #endregion + + #region Escape special characters + + private string EscapeSpecialChars(string text) + { + ArrayList tokens = TokenizeHTML(text); + + // Rebuild text from the tokens + text = string.Empty; + + foreach (Pair token in tokens) + { + string value = token.Second.ToString(); + + if (token.First.Equals("tag")) + /* + Within tags, encode * and _ so they don't conflict with their use + in Markdown for italics and strong. We're replacing each + such character with its corresponding MD5 checksum value; + this is likely overkill, but it should prevent us from colliding + with the escape values by accident. + */ + value = value.Replace("*", escapeTable["*"].ToString()).Replace("_", escapeTable["_"].ToString()); + else + value = EncodeBackslashEscapes(value); + + text += value; + } + + return text; + } + + #endregion + + #region Process referenced and inline anchors + + ///
    + /// Turn Markdown link shortcuts into XHTML tags. + /// + private string DoAnchors(string text) + { + // + // First, handle reference-style links: [link text] [id] + // + string pattern = string.Format(@" + ( # wrap whole match in $1 + \[ + ({0}) # link text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + )", nestedBrackets); + + text = Regex.Replace(text, pattern, new MatchEvaluator(AnchorReferenceEvaluator), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); + + // + // Next, inline-style links: [link text](url "optional title") + // + pattern = string.Format(@" + ( # wrap whole match in $1 + \[ + ({0}) # link text = $2 + \] + \( # literal paren + [ \t]* + ? # href = $3 + [ \t]* + ( # $4 + (['\x22]) # quote char = $5 + (.*?) # Title = $6 + \5 # matching quote + )? # title is optional + \) + )", nestedBrackets); + + text = Regex.Replace(text, pattern, new MatchEvaluator(AnchorInlineEvaluator), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace); + + return text; + } + + private string AnchorReferenceEvaluator(Match match) + { + string wholeMatch = match.Groups[1].Value; + string linkText = match.Groups[2].Value; + string linkID = match.Groups[3].Value.ToLower(); + string url = null; + string res = null; + string title = null; + + // for shortcut links like [this][]. + if (linkID.Equals(string.Empty)) + linkID = linkText.ToLower(); + + if (urls[linkID] != null) + { + url = urls[linkID].ToString(); + + //We've got to encode these to avoid conflicting with italics/bold. + url = url.Replace("*", escapeTable["*"].ToString()).Replace("_", escapeTable["_"].ToString()); + res = string.Format("{0}", linkText); + } + else + res = wholeMatch; + + return res; + } + + private string AnchorInlineEvaluator(Match match) + { + string linkText = match.Groups[2].Value; + string url = match.Groups[3].Value; + string title = match.Groups[6].Value; + string res = null; + + // We've got to encode these to avoid conflicting with italics/bold. + url = url.Replace("*", escapeTable["*"].ToString()).Replace("_", escapeTable["_"].ToString()); + res = string.Format(" 0) + { + title = title.Replace("\"", """).Replace("*", escapeTable["*"].ToString()).Replace("_", escapeTable["_"].ToString()); + res += string.Format(" title=\"{0}\"", title); + } + + res += string.Format(">{0}", linkText); + return res; + } + + #endregion + + #region Process inline and referenced images + + /// + /// Turn Markdown image shortcuts into tags. + /// + private string DoImages(string text) + { + // First, handle reference-style labeled images: ![alt text][id] + string pattern = @" + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + + [ ]? # one optional space + (?:\n[ ]*)? # one optional newline followed by spaces + + \[ + (.*?) # id = $3 + \] + + )"; + + text = Regex.Replace(text, pattern, new MatchEvaluator(ImageReferenceEvaluator), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); + + // Next, handle inline images: ![alt text](url "optional title") + // Don't forget: encode * and _ + pattern = @" + ( # wrap whole match in $1 + !\[ + (.*?) # alt text = $2 + \] + \( # literal paren + [ \t]* + ? # src url = $3 + [ \t]* + ( # $4 + (['\x22]) # quote char = $5 + (.*?) # title = $6 + \5 # matching quote + [ \t]* + )? # title is optional + \) + )"; + + text = Regex.Replace(text, pattern, new MatchEvaluator(ImageInlineEvaluator), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); + + return text; + } + + private string ImageReferenceEvaluator(Match match) + { + string wholeMatch = match.Groups[1].Value; + string altText = match.Groups[2].Value; + string linkID = match.Groups[3].Value.ToLower(); + string url = null; + string res = null; + string title = null; + + // for shortcut links like ![this][]. + if (linkID.Equals(string.Empty)) + linkID = altText.ToLower(); + + altText = altText.Replace("\"", """); + + if (urls[linkID] != null) + { + url = urls[linkID].ToString(); + + // We've got to encode these to avoid conflicting with italics/bold. + url = url.Replace("*", escapeTable["*"].ToString()).Replace("_", escapeTable["_"].ToString()); + res = string.Format("\"{1}\"",", RunSpanGamut(header), "\n\n"); + } + + private string SetextHeader2Evaluator(Match match) + { + string header = match.Groups[1].Value; + return string.Concat("

    ", RunSpanGamut(header), "

    \n\n"); + } + + private string AtxHeaderEvaluator(Match match) + { + string headerSig = match.Groups[1].Value; + string headerText = match.Groups[2].Value; + + return string.Concat("", RunSpanGamut(headerText), "\n\n"); + } + + #endregion + + #region Process ordered and unordered lists + + private string DoLists(string text) + { + // Re-usable pattern to match any entirel ul or ol list: + string pattern = null; + + string wholeList = string.Format(@" + ( # $1 = whole list + ( # $2 + [ ]{{0,{1}}} + ({0}) # $3 = first list item marker + [ \t]+ + ) + (?s:.+?) + ( # $4 + \z + | + \n{{2,}} + (?=\S) + (?! # Negative lookahead for another list item marker + [ \t]* + {0}[ \t]+ + ) + ) + )", markerAny, tabWidth - 1); + + // We use a different prefix before nested lists than top-level lists. + // See extended comment in _ProcessListItems(). + if (listLevel > 0) + { + pattern = "^" + wholeList; + text = Regex.Replace(text, pattern, new MatchEvaluator(ListEvaluator), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); + } + else + { + pattern = @"(?:(?<=\n\n)|\A\n?)" + wholeList; + text = Regex.Replace(text, pattern, new MatchEvaluator(ListEvaluator), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); + } + + return text; + } + + private string ListEvaluator(Match match) + { + string list = match.Groups[1].Value; + string listType = Regex.IsMatch(match.Groups[3].Value, markerUL) ? "ul" : "ol"; + string result = null; + + // Turn double returns into triple returns, so that we can make a + // paragraph for the last item in a list, if necessary: + list = Regex.Replace(list, @"\n{2,}", "\n\n\n"); + result = ProcessListItems(list, markerAny); + result = string.Format("<{0}>\n{1}\n", listType, result); + + return result; + } + + /// + /// Process the contents of a single ordered or unordered list, splitting it + /// into individual list items. + /// + private string ProcessListItems(string list, string marker) + { + /* + The listLevel global keeps track of when we're inside a list. + Each time we enter a list, we increment it; when we leave a list, + we decrement. If it's zero, we're not in a list anymore. + + We do this because when we're not inside a list, we want to treat + something like this: + + I recommend upgrading to version + 8. Oops, now this line is treated + as a sub-list. + + As a single paragraph, despite the fact that the second line starts + with a digit-period-space sequence. + + Whereas when we're inside a list (or sub-list), that line will be + treated as the start of a sub-list. What a kludge, huh? This is + an aspect of Markdown's syntax that's hard to parse perfectly + without resorting to mind-reading. Perhaps the solution is to + change the syntax rules such that sub-lists must start with a + starting cardinal number; e.g. "1." or "a.". + */ + + listLevel++; + + // Trim trailing blank lines: + list = Regex.Replace(list, @"\n{2,}\z", "\n"); + + string pattern = string.Format( + @"(\n)? # leading line = $1 + (^[ \t]*) # leading whitespace = $2 + ({0}) [ \t]+ # list marker = $3 + ((?s:.+?) # list item text = $4 + (\n{{1,2}})) + (?= \n* (\z | \2 ({0}) [ \t]+))", marker); + + list = Regex.Replace(list, pattern, new MatchEvaluator(ListEvaluator2), + RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline); + listLevel--; + return list; + } + + private string ListEvaluator2(Match match) + { + string item = match.Groups[4].Value; + string leadingLine = match.Groups[1].Value; + + + if ((leadingLine != null && leadingLine != string.Empty) || Regex.IsMatch(item, @"\n{2,}")) + item = RunBlockGamut(Outdent(item)); + else + { + // Recursion for sub-lists: + item = DoLists(Outdent(item)); + item = item.TrimEnd('\n'); + item = RunSpanGamut(item); + } + + return string.Format("
  • {0}
  • \n", item); + } + + #endregion + + #region Process code blocks + + private string DoCodeBlocks(string text) + { + // TODO: Should we allow 2 empty lines here or only one? + string pattern = string.Format(@" + (?:\n\n|\A) + ( # $1 = the code block -- one or more lines, starting with a space/tab + (?: + (?:[ ]{{{0}}} | \t) # Lines must start with a tab or a tab-width of spaces + .*\n+ + )+ + ) + ((?=^[ ]{{0,{0}}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc", + tabWidth); + + text = Regex.Replace(text, pattern, + new MatchEvaluator(CodeBlockEvaluator), + RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); + + return text; + } + + private string CodeBlockEvaluator(Match match) + { + string codeBlock = match.Groups[1].Value; + codeBlock = EncodeCode(Outdent(codeBlock)); + + // Trim leading newlines and trailing whitespace + codeBlock = Regex.Replace(codeBlock, @"^\n+", string.Empty); + codeBlock = Regex.Replace(codeBlock, @"\s+\z", string.Empty); + + return string.Concat("\n\n
    ", codeBlock, "\n
    \n\n"); + } + + #endregion + + #region Process code spans + + private string DoCodeSpans(string text) + { + /* + * Backtick quotes are used for spans. + * You can use multiple backticks as the delimiters if you want to + include literal backticks in the code span. So, this input: + + Just type ``foo `bar` baz`` at the prompt. + + Will translate to: + +

    Just type foo `bar` baz at the prompt.

    + + There's no arbitrary limit to the number of backticks you + can use as delimters. If you need three consecutive backticks + in your code, use four for delimiters, etc. + + * You can use spaces to get literal backticks at the edges: + + ... type `` `bar` `` ... + + Turns to: + + ... type `bar` ... + */ + + string pattern = @" + (`+) # $1 = Opening run of ` + (.+?) # $2 = The code block + (?", s, ""); + } + + #endregion + + #region Encode/escape certain characters inside Markdown code runs + + /// + /// Encode/escape certain characters inside Markdown code runs. + /// + /// + /// The point is that in code, these characters are literals, and lose their + /// special Markdown meanings. + /// + private string EncodeCode(string code) + { + code = code.Replace("&", "&").Replace("<", "<").Replace(">", ">"); + + foreach (string key in escapeTable.Keys) + code = code.Replace(key, escapeTable[key].ToString()); + + return code; + } + + #endregion + + #region Process bold and italics + + private string DoItalicsAndBold(string text) + { + // must go first: + text = Regex.Replace(text, @"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1", + new MatchEvaluator(BoldEvaluator), + RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); + + // Then : + text = Regex.Replace(text, @"(\*|_) (?=\S) (.+?) (?<=\S) \1", + new MatchEvaluator(ItalicsEvaluator), + RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); + return text; + } + + private string ItalicsEvaluator(Match match) + { + return string.Format("{0}", match.Groups[2].Value); + } + + private string BoldEvaluator(Match match) + { + return string.Format("{0}", match.Groups[2].Value); + } + + #endregion + + #region Process blockquotes + + private string DoBlockQuotes(string text) + { + string pattern = + @"( # Wrap whole match in $1 + ( + ^[ \t]*>[ \t]? # '>' at the start of a line + .+\n # rest of the first line + (.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + )"; + + text = Regex.Replace(text, pattern, new MatchEvaluator(BlockQuoteEvaluator), RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline); + return text; + } + + private string BlockQuoteEvaluator(Match match) + { + string bq = match.Groups[1].Value; + + // Trim one level of quoting - trim whitespace-only lines + bq = Regex.Replace(bq, @"^[ \t]*>[ \t]?", string.Empty, RegexOptions.Multiline); + bq = Regex.Replace(bq, @"^[ \t]+$", string.Empty, RegexOptions.Multiline); + + bq = RunBlockGamut(bq); + bq = Regex.Replace(bq, @"^", " ", RegexOptions.Multiline); + + // These leading spaces screw with
     content, so we need to fix that:
    +            bq = Regex.Replace(bq, @"(\s*
    .+?
    )", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); + + return string.Format("
    \n{0}\n
    \n\n", bq); + } + + private string BlockQuoteEvaluator2(Match match) + { + string pre = match.Groups[1].Value; + pre = Regex.Replace(pre, @"^ ", string.Empty, RegexOptions.Multiline); + + return pre; + } + + #endregion + + #region Create paragraph tags + + private string FormParagraphs(string text) + { + // Strip leading and trailing lines: + text = Regex.Replace(text, @"^\n+", string.Empty); + text = Regex.Replace(text, @"\n+\z", string.Empty); + + string[] grafs = Regex.Split(text, @"\n{2,}"); + + // Wrap

    tags. + for (int i = 0; i < grafs.Length; i++) + { + // Milan Negovan: I'm adding an additional check for an empty block of code. + // Otherwise an empty

    is created. + if (htmlBlocks[grafs[i]] == null && grafs[i].Length > 0) + { + string block = grafs[i]; + + block = RunSpanGamut(block); + block = Regex.Replace(block, @"^([ \t]*)", "

    "); + block += "

    "; + + grafs[i] = block; + } + } + + // Unhashify HTML blocks + for (int i = 0; i < grafs.Length; i++) + { + string block = (string)htmlBlocks[grafs[i]]; + + if (block != null) + grafs[i] = block; + } + + return string.Join("\n\n", grafs); + + } + + #endregion + + #region Process emails and links + + private string DoAutoLinks(string text) + { + text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator)); + + // Email addresses: + string pattern = + @"< + (?:mailto:)? + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + >"; + + text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); + + return text; + } + + private string HyperlinkEvaluator(Match match) + { + string link = match.Groups[1].Value; + return string.Format("{0}", link); + } + + private string EmailEvaluator(Match match) + { + string email = UnescapeSpecialChars(match.Groups[1].Value); + + /* + Input: an email address, e.g. "foo@example.com" + + Output: the email address as a mailto link, with each character + of the address encoded as either a decimal or hex entity, in + the hopes of foiling most address harvesting spam bots. E.g.: + + foo + @example.com + + Based by a filter by Matthew Wickline, posted to the BBEdit-Talk + mailing list: + + */ + email = "mailto:" + email; + + // leave ':' alone (to spot mailto: later) + email = Regex.Replace(email, @"([^\:])", new MatchEvaluator(EncodeEmailEvaluator)); + + email = string.Format("{0}", email); + + // strip the mailto: from the visible part + email = Regex.Replace(email, "\">.+?:", "\">"); + return email; + } + + private string EncodeEmailEvaluator(Match match) + { + char c = Convert.ToChar(match.Groups[1].Value); + + Random rnd = new Random(); + int r = rnd.Next(0, 100); + + // Original author note: + // Roughly 10% raw, 45% hex, 45% dec + // '@' *must* be encoded. I insist. + if (r > 90 && c != '@') return c.ToString(); + if (r < 45) return string.Format("&#x{0:x};", (int)c); + + return string.Format("&#x{0:x};", (int)c); + } + + #endregion + + #region EncodeAmpsAndAngles, EncodeBackslashEscapes, UnescapeSpecialChars, Outdent, UnslashQuotes + + /// + /// Smart processing for ampersands and angle brackets that need to be encoded. + /// + private string EncodeAmpsAndAngles(string text) + { + // Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin: + // http://bumppo.net/projects/amputator/ + + text = Regex.Replace(text, @"&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)", "&"); + + // Encode naked <'s + text = Regex.Replace(text, @"<(?![a-z/?\$!])", "<", RegexOptions.IgnoreCase); + + return text; + } + + private string EncodeBackslashEscapes(string value) + { + // Must process escaped backslashes first. + foreach (string key in backslashEscapeTable.Keys) + value = value.Replace(key, backslashEscapeTable[key].ToString()); + + return value; + } + + /// + /// Swap back in all the special characters we've hidden. + /// + private string UnescapeSpecialChars(string text) + { + foreach (string key in escapeTable.Keys) + text = text.Replace(escapeTable[key].ToString(), key); + + return text; + } + + /// + /// Remove one level of line-leading tabs or spaces + /// + private string Outdent(string block) + { + return Regex.Replace(block, @"^(\t|[ ]{1," + tabWidth.ToString() + @"})", string.Empty, RegexOptions.Multiline); + } + #endregion + + #region Replace tabs with spaces and pad them to tab width + + private string Detab(string text) + { + // Inspired from a post by Bart Lateur: + // http://www.nntp.perl.org/group/perl.macperl.anyperl/154 + return Regex.Replace(text, @"^(.*?)\t", new MatchEvaluator(TabEvaluator), RegexOptions.Multiline); + } + + private string TabEvaluator(Match match) + { + string leading = match.Groups[1].Value; + return string.Concat(leading, RepeatString(" ", tabWidth - leading.Length % tabWidth)); + } + + #endregion + + #region Helper methods (RepeatString & ComputeMD5) + + /// + /// This is to emulate what's evailable in PHP + /// + /// + /// + /// + private static string RepeatString(string text, int count) + { + string res = null; + + for (int i = 0; i < count; i++) + res += text; + + return res; + } + + /// + /// Calculate an MD5 hash of an arbitrary string + /// + /// + /// + private static string ComputeMD5(string text) + { + MD5 algo = MD5.Create(); + byte[] plainText = Encoding.UTF8.GetBytes(text); + byte[] hashedText = algo.ComputeHash(plainText); + string res = null; + + foreach (byte b in hashedText) + res += b.ToString("x2"); + + return res; + } + #endregion + } +} \ No newline at end of file diff --git a/src/MarkdownSharp/MarkdownOptions.cs b/src/MarkdownSharp/MarkdownOptions.cs new file mode 100644 index 0000000..ce9620d --- /dev/null +++ b/src/MarkdownSharp/MarkdownOptions.cs @@ -0,0 +1,172 @@ +using System; +using System.Collections.Specialized; +using System.Configuration; +using System.Runtime.CompilerServices; +using System.Web.Configuration; + +namespace MarkdownSharp +{ + /// + /// Container for Markdown options. This class is immutable, create a new instance + /// if you want to change the options after construction. + /// + public class MarkdownOptions + { + /// + /// Default constructor. First loads default values, then + /// overrides them with any values specified in the configuration file. + /// Configuration values are specified in the <appSettings> + /// section of the config file and take the form Markdown.PropertyName + /// where PropertyName is any of the properties in this class. + /// + public MarkdownOptions() : this(true) { } + + /// + /// Sets all values to their defaults, and if loadFromConfigFile + /// is true it overrides them with configuration file values. + /// Configuration values are specified in the <appSettings> + /// section of the config file and take the form Markdown.PropertyName + /// where PropertyName is any of the properties in this class. + /// + /// True to override defaults with values from config file. + public MarkdownOptions(bool loadFromConfigFile) + : this(loadFromConfigFile, new[] { ConfigurationManager.AppSettings, WebConfigurationManager.AppSettings }) + { + } + + + /// + /// Constructor for internal use and unit testing. + /// + /// + /// + internal MarkdownOptions(bool loadFromConfigFile, NameValueCollection[] configProviders) + { + Defaults(); + if (!loadFromConfigFile) return; + + + foreach (var appSettings in configProviders) + { + foreach (string key in appSettings.Keys) + { + switch (key) + { + case "Markdown.AutoHyperlink": + AutoHyperlink = Convert.ToBoolean(appSettings[key]); + break; + case "Markdown.AutoNewlines": + AutoNewlines = Convert.ToBoolean(appSettings[key]); + break; + case "Markdown.EmptyElementSuffix": + EmptyElementSuffix = appSettings[key]; + break; + case "Markdown.EncodeProblemUrlCharacters": + EncodeProblemUrlCharacters = Convert.ToBoolean(appSettings[key]); + break; + case "Markdown.LinkEmails": + LinkEmails = Convert.ToBoolean(appSettings[key]); + break; + case "Markdown.NestDepth": + NestDepth = Convert.ToInt32(appSettings[key]); + break; + case "Markdown.StrictBoldItalic": + StrictBoldItalic = Convert.ToBoolean(appSettings[key]); + break; + case "Markdown.TabWidth": + TabWidth = Convert.ToInt32(appSettings[key]); + break; + } + } + } + } + + + /// + /// Sets all options explicitly and does not attempt to override them with values + /// from configuration file. + /// + /// + /// + /// + /// + /// + /// + /// + /// + public MarkdownOptions(bool autoHyperlink, bool autoNewlines, string emptyElementsSuffix, + bool encodeProblemUrlCharacters, bool linkEmails, int nestDepth, bool strictBoldItalic, + int tabWidth) + { + AutoHyperlink = autoHyperlink; + AutoNewlines = autoNewlines; + EmptyElementSuffix = emptyElementsSuffix; + EncodeProblemUrlCharacters = encodeProblemUrlCharacters; + LinkEmails = linkEmails; + NestDepth = nestDepth; + StrictBoldItalic = strictBoldItalic; + TabWidth = tabWidth; + } + + /// + /// Sets all fields to their default values + /// + private void Defaults() + { + AutoHyperlink = false; + AutoNewlines = false; + EmptyElementSuffix = " />"; + EncodeProblemUrlCharacters = false; + LinkEmails = true; + NestDepth = 6; + StrictBoldItalic = false; + TabWidth = 4; + } + + /// + /// when true, (most) bare plain URLs are auto-hyperlinked + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool AutoHyperlink { get; private set; } + + /// + /// when true, RETURN becomes a literal newline + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool AutoNewlines { get; private set; } + + /// + /// use ">" for HTML output, or " />" for XHTML output + /// + public string EmptyElementSuffix { get; private set; } + + /// + /// when true, problematic URL characters like [, ], (, and so forth will be encoded + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool EncodeProblemUrlCharacters { get; private set; } + + /// + /// when false, email addresses will never be auto-linked + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool LinkEmails { get; private set; } + + /// + /// maximum nested depth of [] and () supported by the transform + /// + public int NestDepth { get; private set; } + + /// + /// when true, bold and italic require non-word characters on either side + /// WARNING: this is a significant deviation from the markdown spec + /// + public bool StrictBoldItalic { get; private set; } + + /// + /// Tabs are automatically converted to spaces as part of the transform + /// this variable determines how "wide" those tabs become in spaces + /// + public int TabWidth { get; private set; } + } +} diff --git a/src/MarkdownSharp/MarkdownSharp.csproj b/src/MarkdownSharp/MarkdownSharp.csproj new file mode 100644 index 0000000..3f2ae8a --- /dev/null +++ b/src/MarkdownSharp/MarkdownSharp.csproj @@ -0,0 +1,95 @@ + + + + Debug + AnyCPU + 9.0.30729 + 2.0 + {37619116-CCE8-465A-8B1F-081CA53364BB} + Library + Properties + MarkdownSharp + MarkdownSharp + v3.5 + 512 + + + + + 3.5 + + publish\ + true + Disk + false + Foreground + 7 + Days + false + false + true + 0 + 1.0.0.%2a + false + false + true + + + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + AllRules.ruleset + + + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + AllRules.ruleset + + + + + + 3.5 + + + + + + + + + + + + False + .NET Framework 3.5 SP1 Client Profile + false + + + False + .NET Framework 3.5 SP1 + true + + + False + Windows Installer 3.1 + true + + + + + \ No newline at end of file diff --git a/src/MarkdownWin/MainForm.cs b/src/MarkdownWin/MainForm.cs index 54c7155..c1f4227 100644 --- a/src/MarkdownWin/MainForm.cs +++ b/src/MarkdownWin/MainForm.cs @@ -90,20 +90,9 @@ private void RefreshPreview(string fileName) private void BrowserDocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { - const string htmlTemplate = "{1}"; - if (browser.Document != null) { - string stylesheet; - using (var stream = Assembly.GetExecutingAssembly().GetManifestResourceStream(this.GetType(), "markdown.css")) - using (var reader = new StreamReader(stream)) - { - stylesheet = reader.ReadToEnd(); - } - - string html = string.Format(htmlTemplate, stylesheet, _pendingPreviewHtml); - browser.Document.Write(html); - + browser.Document.Write(Stylizer.Run(_pendingPreviewHtml)); Debug.WriteLine("Document Completed and written to."); } } diff --git a/src/MarkdownWin/MarkdownWin.csproj b/src/MarkdownWin/MarkdownWin.csproj index 4035940..4a41fb8 100644 --- a/src/MarkdownWin/MarkdownWin.csproj +++ b/src/MarkdownWin/MarkdownWin.csproj @@ -1,104 +1,109 @@ - - - - Debug - x86 - 8.0.30703 - 2.0 - {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F} - WinExe - Properties - MarkdownWin - MarkdownWin - v4.0 - - - 512 - - - x86 - true - full - false - bin\Debug\ - DEBUG;TRACE - prompt - 4 - - - x86 - pdbonly - true - bin\Release\ - TRACE - prompt - 4 - - - - - - - - ..\..\packages\MarkdownSharp.1.13.0.0\lib\35\MarkdownSharp.dll - - - - - - - - - - - - - - - - - Form - - - MainForm.cs - - - ResXFileCodeGenerator - Resources.Designer.cs - Designer - - - True - Resources.resx - True - - - MainForm.cs - - - - - SettingsSingleFileGenerator - Settings.Designer.cs - - - True - Settings.settings - True - - - - - - - - - + + + + Debug + x86 + 8.0.30703 + 2.0 + {F578EEC8-1C03-43B6-92A5-76C25D9A4F0F} + WinExe + Properties + MarkdownWin + MarkdownWin + v4.0 + + + 512 + + + x86 + true + full + false + bin\Debug\ + DEBUG;TRACE + prompt + 4 + false + + + x86 + pdbonly + true + bin\Release\ + TRACE + prompt + 4 + + + + + + + + + + + + + + + + + + + + + + Form + + + MainForm.cs + + + + ResXFileCodeGenerator + Resources.Designer.cs + Designer + + + True + Resources.resx + True + + + MainForm.cs + + + + + SettingsSingleFileGenerator + Settings.Designer.cs + + + True + Settings.settings + True + + + + + + + + + + + {37619116-CCE8-465A-8B1F-081CA53364BB} + MarkdownSharp + + + + --> \ No newline at end of file diff --git a/src/MarkdownWin/MarkdownWin.csproj.user b/src/MarkdownWin/MarkdownWin.csproj.user new file mode 100644 index 0000000..27dd4ad --- /dev/null +++ b/src/MarkdownWin/MarkdownWin.csproj.user @@ -0,0 +1,6 @@ + + + + -in G:\Dev\Projects\Commercial\MarkdownWin\readme.md + + \ No newline at end of file diff --git a/src/MarkdownWin/Program.cs b/src/MarkdownWin/Program.cs index e67cbf6..c5c8112 100644 --- a/src/MarkdownWin/Program.cs +++ b/src/MarkdownWin/Program.cs @@ -1,20 +1,106 @@ -using System; -using System.Linq; -using System.Windows.Forms; - -namespace MarkdownWin -{ - static class Program - { - /// - /// The main entry point for the application. - /// - [STAThread] - static void Main() - { - Application.EnableVisualStyles(); - Application.SetCompatibleTextRenderingDefault(false); - Application.Run(new MainForm()); - } - } -} +using System; +using System.Linq; +using System.Windows.Forms; +using System.Collections.Generic; +using MarkdownSharp; +using System.IO; + +namespace MarkdownWin { + static class Program { + /// + /// The main entry point for the application. + /// + [STAThread] + static void Main(string[] args) { + if (args != null && args.Length > 0) { + RunCli(args); + } else { + RunForm(); + } + } + + static void RunForm() { + Application.EnableVisualStyles(); + Application.SetCompatibleTextRenderingDefault(false); + Application.Run(new MainForm()); + } + + static void RunCli(string[] args) { + const string inArg = "in"; + const string outArg = "out"; + const string rawArg = "raw"; + const string helpArg = "help"; + + try { + var parsedArgs = ParseArgsRaw(args); + + if (parsedArgs.ContainsKey(helpArg)) + PrintCliHelp(); + else { + var inPath = GetAbsPath(parsedArgs[inArg][0]); + var outPath = GetAbsPath(GetOutPath(inPath, outArg, parsedArgs)); + + var mkDown = new Markdown(); + var result = mkDown.Transform(File.ReadAllText(inPath)); + + if (!parsedArgs.ContainsKey(rawArg)) + File.WriteAllText((outPath), Stylizer.Run(result), System.Text.Encoding.UTF8); + else + File.WriteAllText((outPath), result, System.Text.Encoding.UTF8); + } + } catch (Exception ex) { + PrintCliHelp(ex.Message); + } + } + + private static string GetOutPath(string inPath, string outArg, Dictionary> parsedArgs) { + if (parsedArgs.ContainsKey(outArg)) + return parsedArgs[outArg][0]; + else { + var fi = new FileInfo(inPath); + return fi.FullName.Substring(0, fi.FullName.Length - fi.Extension.Length) + ".html"; + } + } + + static string GetAbsPath(string path) { + if (Path.IsPathRooted(path)) + return path; + else + return Path.Combine(Environment.CurrentDirectory, path); + } + + static void PrintCliHelp(string errorMsg = "") { + if (errorMsg != "") + Console.WriteLine("Error: " + errorMsg); + + Console.WriteLine("Usage: -in [-out ] [-raw]"); + Console.WriteLine(" -in: Path to the markdown file"); + Console.WriteLine(" -out: (Optional) Path to the output html file (uses .html by default)"); + Console.WriteLine(" -raw: (Optional) Don't stylize the output."); + } + + static Dictionary> ParseArgsRaw(string[] args) { + var parsedArgs = new Dictionary>(); + + var prev = string.Empty; + var curr = string.Empty; + + for (int i = 0; i < args.Length; i++) { + curr = args[i]; + + if (curr.StartsWith("-")) { + var arg = curr.TrimStart('-'); + + parsedArgs[arg] = new List(2); + prev = arg; + } else if (!String.IsNullOrEmpty(prev)) { + parsedArgs[prev].Add(curr); + } else { + throw new ArgumentException("invalid syntax"); + } + } + + return parsedArgs; + } + } +} diff --git a/src/MarkdownWin/Stylizer.cs b/src/MarkdownWin/Stylizer.cs new file mode 100644 index 0000000..9a3192b --- /dev/null +++ b/src/MarkdownWin/Stylizer.cs @@ -0,0 +1,28 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Reflection; +using System.IO; + +namespace MarkdownWin { + class Stylizer { + public static string Run(string html, string cssOverridePath = "") { + const string htmlTemplate = + "{1}"; + + string stylesheet; + + if (String.IsNullOrEmpty(cssOverridePath)) { + using (var stream = Assembly.GetExecutingAssembly().GetManifestResourceStream(typeof(Stylizer), "markdown.css")) + using (var reader = new StreamReader(stream)) { + stylesheet = reader.ReadToEnd(); + } + } else { + stylesheet = File.ReadAllText(cssOverridePath); + } + + return string.Format(htmlTemplate, stylesheet, html); + } + } +} diff --git a/src/MarkdownWin/markdown.css b/src/MarkdownWin/markdown.css index 69aae2a..855c0dd 100644 --- a/src/MarkdownWin/markdown.css +++ b/src/MarkdownWin/markdown.css @@ -1,12 +1,18 @@ -body +@import url(http://fonts.googleapis.com/css?family=Open+Sans:300); + +body { margin: 0 auto; - font-family: Georgia, Palatino, serif; + font-family: Arial, sans-serif; color: #444444; line-height: 1; max-width: 960px; padding: 30px; } +h1, h2, h3 +{ + font-family: "Open Sans", serif; +} h1, h2, h3, h4 { color: #111111; @@ -40,24 +46,24 @@ h5 } a { - color: #0099ff; + color: #64B6B1; + text-decoration: none; margin: 0; padding: 0; vertical-align: baseline; } a:hover { - text-decoration: none; - color: #ff6600; + color: #46433A; } a:visited { - color: purple; + color: #CE534D; } ul, ol { padding: 0; - margin: 1em; + margin: 1.2em; } li { @@ -120,6 +126,9 @@ hr { width: 540px; text-align: left; - margin: 0 auto 0 0; - color: #999; + margin: 1.2em auto 1.2em 0; + color: #EEE; + background-color: #EEE; + border: 0; + height: 1px; }