As of January 12, 2008.
Consider: when this renders, I'm sending TIME code through the TIME rendering (markup -> HTML) engine. This is, umm,
meta. In particular,
TIME syntax allows for backslash escaping. When this escaping is applied to TIME code itself, weird things happen. The backslash escaping code is highlighted below; even
it isn't correct, because it has been applied to itself.
#region Parsing
public string ParseMarkup()
{
if (this.Data == null)
return null;
string html = HttpUtility.HtmlEncode(this.Data);
// BUG: breaks if a [=]+ line has a URL reference in it
html = ParseSeparatedHyperlinks(html);
// whatever: -> bold
//html = Regex.Replace(html, @"^\w[a-zA-Z0-9_- ]+:(?!//)", "<strong>$0</strong>", RegexOptions.Multiline);
html = Regex.Replace(html,
@"(?:\r?\n|^)\ *"(!)?((?:(?!"$).)+)"\ *(?:\r?\n|$)",
m => string.Format("<blockquote{0}>{1}</blockquote>", m.Groups[1].Success ? " class='important'" : "", m.Groups[2].Value),
RegexOptions.IgnoreCase);
// two linebreaks before/after bullet -> one linebreak
html = Regex.Replace(html,
@"(?<=\r\n)\r\n(?=- )|" +
@"(?<=^(?: )*- [^\r^\n]*)\r\n(?=\r\n)", ""); // (?<b>\r\n) taken out (right before the last \r\n)
html = Regex.Replace(html,
@"(?<!^(?: )*[-=][^\n\r]*)\r\n(?={{)|(?<=}})\r\n(?!(?: )*[-=])", "", RegexOptions.Multiline);
html = ParseListMarkup(html);
const int MaxHeading = 6;
html = Regex.Replace(
html, "(?:^|(?<=</ul>))(={1," + MaxHeading + @"})([^\r\n]+)(?=[\r\n]|$)(?:\r\n)?",
m => string.Format("<h{0}>{1}</h{0}>", MaxHeading - m.Groups[1].Length + 1, m.Groups[2].Value),
RegexOptions.Multiline);
// optional -> item url
html = Regex.Replace(html, @"(?x)
(?<!\)
[[
(?:
(\w+://(?:[^/]+:\d+)?[^\]]+?) # http://a.b.com:800 is ambiguous: is the description 800, or is that the port?
|
([a-zA-Z0-9][a-zA-Z0-9 -_#.:[\],]+)(/\d+)?
)
(?::([^:]*?))?
\]\]", (Match m) =>
{
Group http = m.Groups[1];
Group name = m.Groups[2];
Group pk = m.Groups[3];
Group display = m.Groups[4];
return !http.Success
?
Urls.Item(
pk.Success
? int.Parse(pk.Value.Substring("/".Length))
: -1,
Time.Item.EncodeName(name.Value)).RenderHyperlink(
display.Success
? display.Value
: name.Success
? name.Value
: pk.Value)
:
string.Format("<a href='{0}'>{1}</a>",
http.Value,
display.Success
? display.Value
: http.Value);
});
// hyperlink-ize
html = Regex.Replace(html,
@"(?<!<[^>]+href=[""'])\b\w+:// (?: (?<p>() | [\w#~!@#$%&-+=:;,./?] | (?(p)(?<-p>))) )+ (?<![.,:;!])",
"<a href='$0'>$0</a>", RegexOptions.IgnorePatternWhitespace);
// susceptible to <a href="http://www.breuer.com/">; test </a>,
// which has been turned into ..."breuer.com/">;"...
// emphasize
html = Regex.Replace(html, @"(?<!\)(?<=\s|^|>)_((?:[^_<>]|\_)+?)(?<!\)_", "<em>$1</em>");
// embolden
html = Regex.Replace(html, @"(?<!\)(?<=\s|^|>)*((?:[^_<>]|\*)+?)(?<!\)*", "<strong>$1</strong>");
// remove line break after }} //html = html.Replace("}}\r\n", "}}");
// linebreaks -> html linebreaks
html = html.Replace("\n", "<br />");
// single-line-code -> <code>single-line-code</code>
// (?<!\)(\\)* returns an uneven number of backslashes
html = Regex.Replace(html, @"
(?<!
(?<!\)\(?:\\)*
)
`
(
(?>
(?<!\)\(?:\\)*`
|
[^`]
)*
)
`", "<code>$1</code>", RegexOptions.IgnorePatternWhitespace);
// {{code}} -> <pre>code</pre>
html = Regex.Replace(html, @"(?x)
(?<!\)
{{
(
(?:
[^}]
|
(?:
}(?!})
|
\(?:}})+
)
)*
)
(?<!\)
}} (?!})", (Match m) =>
{
// HACK: undo em & strong inside code
string processed = Common.TabsToSpaces(m.Groups[1].Value.Replace("<br />", "\n")
.Replace("<em>", "_")
.Replace("</em>", "_")
.Replace("<strong>", "*")
.Replace("</strong>", "*"));
int maxLen = Functional.Maximum(Functional.Map(
processed.Split('\n'), s => Regex.Replace(Regex.Replace(s, @"&\w+;", " "), "<[^>]*>", "").TrimEnd().Length));
processed = Regex.Replace(processed, @"
^!
(
([ ])? # if indentation is in spaces, we add one to replace the !
[ \t]* # capture this to put it outside the <code> tags
)
([^\n\r]*)
([ \t]*) # capture this to put it outside the <code> tags
",
"$2$1<code class='highlight'>$3</code>$4",
RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
// MAGIC: boo yah!
const int MinLenForExplicitWidth = 81;
const double WidthDivisor = 1.77;
return string.Format("<pre{0}>{1}</pre>",
maxLen >= MinLenForExplicitWidth ? string.Format(" style='width:{0:0.00}em'", (maxLen + 0.8) / WidthDivisor) : "",
processed);
}, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
// remove backslashes in escapes
html = Regex.Replace(html, @"(?x)
! (
[[\`_*!]
|
\}\}
)", "$1");
return html;
}
private string ParseSeparatedHyperlinks(string html)
{
// copied in part, with modifications, from ParseMarkup()
const string DeclarationPattern = @"
(?:\r\n)? # optionally strip out the preceding newline
^([\d+\])\s* # our reference ID (allow any trailing spaces)
(
\w+:// (?: (?<p>() | [\w#~!@#$%&-+=:;,./?] | (?(p)(?<-p>))) )+ (?<![.,:;!])
|
[a-zA-Z0-9][a-zA-Z0-9 -_#.:[\],]+(?:/\d+)?
)
(?:\r\n)? # ditto the beginning";
var declarations = Regex.Matches(html,
DeclarationPattern,
RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
var decl = new Dictionary<string, List<Match>>();
foreach (Match m in declarations)
{
List<Match> list;
string key = m.Groups[1].Value;
if (!decl.TryGetValue(key, out list))
decl[key] = list = new List<Match>();
list.Add(m);
}
foreach (var kvp in decl)
kvp.Value.Sort((a, b) => a.Index.CompareTo(b.Index));
string replaced = Regex.Replace(html, @"("(?:(?!")[^\r\n])+")([\d+\])",
use =>
{
List<Match> list;
string key = use.Groups[2].Value;
if (decl.TryGetValue(key, out list))
{
Match dec = list.Find(m => m.Index > use.Index);
int DelimiterLength = """.Length;
Converter<string, string> strip = s => s.Substring(DelimiterLength, s.Length - DelimiterLength * 2);
string text = strip(use.Groups[1].Value);
string link = dec.Groups[2].Value;
if (dec != null)
// '_' -> ' ' required for valid names (see Time.Item.EncodeName)
return string.Format("[[{0}:{1}]]", link.Contains("://") ? link : link.Replace('_', ' '), text);
}
return use.Value;
});
replaced = Regex.Replace(replaced, DeclarationPattern, "", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
return replaced;
}
private static string ParseListMarkup(string html)
{
int nestLevel = -1;
StringBuilder sb = new StringBuilder();
bool inCodeBlock = false;
foreach (string line in (html + "\n").Split('\n'))
{
int spaces = 0;
bool nestingIncreased = false;
bool wasInCodeBlock = inCodeBlock;
inCodeBlock |= Regex.IsMatch(line, @"(?<!\){{");
inCodeBlock &= !line.Contains("}}");
for (int i = 0; i < line.Length && line[i] == ' '; i++)
spaces = i + 1;
if (line.Length < spaces + "- ".Length || line.Substring(spaces, 2) != "- ")
spaces = -2;
spaces /= 2;
// we're going up a nesting level
if (spaces > nestLevel)
{
for (int i = 0; i < spaces - nestLevel; i++)
sb.Append("<ul><li>");
nestLevel += spaces - nestLevel;
nestingIncreased = true;
}
// we're going down a nesting level
else if (nestLevel >= 0 && spaces < nestLevel && !inCodeBlock && !wasInCodeBlock)
{
for (int i = 0; i < nestLevel - spaces; i++)
sb.AppendFormat("</li></ul>");
nestLevel -= nestLevel - spaces;
}
if (nestLevel >= 0 && !wasInCodeBlock)
{
// if nesting increased, we already emitted an <li>
if (!nestingIncreased)
sb.Append("</li><li>");
sb.Append(line.Substring(spaces * 2 + "- ".Length));
}
else
{
sb.Append(line);
sb.Append('\n');
}
}
return sb.ToString();
}
#endregion