Colorized this
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Web;
namespace UB92Processor
{
class MeditechCodeProcessor
{
/// <summary>
/// Reads ---- ------'s extract file and tries to match the assignments to
/// the "variable" @S to fields in the data dictionary.
/// </summary>
/// <remarks>
/// Sometimes the comments have the supposed (but not always correct) sequence
/// number, sometimes not. Sometimes there are multiple lines of comments/code
/// preceding the assignment. Most of the time it is simple to find the
/// RECTYPE field (aka table mnemonic). It is very likely that the code below
/// will need modifying when ----- modifies his code.
/// </remarks>
public static void Process(string inputFile, string outputFile)
{
const string StripPagingPattern = @"(CONTINUED)\s+\n(.*\n){3}";
string code = Regex.Replace(File.ReadAllText(inputFile), StripPagingPattern, "");
Regex splitter = new Regex(@"(?=; account # key)");
var blocks = splitter.Split(code).Skip(1); // the first element is not needed
Regex field = new Regex(@"
(?<comment>
(?:
^;\s*\n
)?
(?:
^;.*?
(?:
\ -\s*(?<sequence>\d+)
)?
\s*\n
)?
(?:
^;.*\s*\n
){0,5}
)
(?<setup>
^[^;]
(?:
.(?!@S)
)*
\s*\n
){0,3}
(?<assignment>
^[^;].*\^@S
)", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
const string RectypePattern = @"
(?<=^\s*"")
[A-Z0-9]{2,}
(?=""\^@S)
|
^RECTYPE(?=\^@S)
";
Regex rectype = new Regex(RectypePattern, RegexOptions.IgnorePatternWhitespace);
var q =
from block in blocks
from m in field.Matches(block).Cast<Match>().ToIndexed()
let extract = (Func<string, string>)(key => m.Value.Groups[key].Value.TrimEnd())
let f = new
{
Comment = extract("comment").RegexReplace(@"^; (.*?)\s*$", "$1"),
CommentSequence = extract("sequence"),
CodeSequence = m.Index + 1,
Setup = extract("setup"),
Assignment = extract("assignment")
}
group f by block into g
select new
{
Mnemonic = g.Select(f => rectype.Match(f.Assignment).Value.Nullify()).Single(s => s != null),
Fields = g.AsEnumerable(),
Block = g.Key
};
var RECTYPE = q.Single(b => b.Mnemonic == "RECTYPE");
q = q.Where(b => b.Mnemonic != "RECTYPE").Concat(new[] {
new { Mnemonic = "ABSCPTMOD", Fields = RECTYPE.Fields, Block = RECTYPE.Block },
new { Mnemonic = "ABSCHGCPTMOD", Fields = RECTYPE.Fields, Block = RECTYPE.Block },
});
using (StreamWriter sw = new StreamWriter(outputFile))
{
sw.WriteLine(new[] { "Mnemonic", "CommentSequenceNo", "CodeSquenceNo", "Comment", "SetupCode", "AssignmentCode" }.Join("\t"));
foreach (var block in q.Distinct(b => b.Mnemonic))
foreach (var f in block.Fields)
sw.WriteLine(new[] { block.Mnemonic, f.CommentSequence.ToString(), f.CodeSequence.ToString(), f.Comment, f.Setup, f.Assignment }.Select(s => s.Replace('\t', ' ')).Join("\t"));
}
}
const string ModulePattern = @"
(?<=
^-----------------------------------+$\n
(^\s*$\n)?
)
\w[a-z0-9._ :]*$";
const RegexOptions ModuleOptions = RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline;
const string SubroutinePattern = @"(?<=^\s*)(?!S\W)[A-Z][A-Z0-9.]+(?=\s*(?:;.*)?$)";
const RegexOptions SubroutineOptions = RegexOptions.IgnoreCase | RegexOptions.Multiline;
private static string NameToAnchor(string name)
{
return Regex.Replace(name, "[^a-z0-9_.]", "", RegexOptions.IgnoreCase);
}
public static void CodeToHtml(string inputFile, string outputFile)
{
var modules = GetModules(HttpUtility.HtmlEncode(File.ReadAllText(inputFile)).Replace("\r", ""))
.ToDictionary(
kvp => kvp.Key,
kvp => kvp.Value.Replace(kvp.Key, string.Format("<h1><a name='{0}'>{1}</a></h1>", NameToAnchor(kvp.Key), kvp.Key)));
var modulesWithSubroutines = modules
.ToDictionary(
kvp => kvp.Key,
kvp => new { Text = kvp.Value, Subroutines = GetSubroutines(kvp.Value) });
StringBuilder sb = new StringBuilder();
sb.AppendFormat(@"
<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>----- ------- Meditech Code {0}</title>
<style type='text/css'>
a.subroutine {{ color: red; }}
span.comment {{ color: green; }}
span.keyword {{ color: blue; }}
span.operator {{ color: gray; font-weight: bold; }}
span.string {{ color: maroon; }}
h1 {{ font-family: monospace; }}
</style>
</head>
<body>".TrimLiteral(), inputFile);
sb.AppendFormat("<ul>{0}</ul>", modules.Keys
.Select(key => string.Format("<li><a href='#{0}'>{1}</a></li>", NameToAnchor(key), key))
.Join("\n"));
const string MacroModulePrefix = "BAR.PAT.zcus.ep.breuer.ub.ibill.download.M.";
string moduleLinkPattern = string.Format(@"(?<=@Macro()({0})(?=))", modules
.Where(m => m.Key.StartsWith(MacroModulePrefix))
.Select(m => Regex.Escape(Regex.Match(m.Key, @"(?<=.M.).*$").Value))
.Join("|"));
foreach (var module in modulesWithSubroutines)
{
string locallyLinked = LinkSubroutines(module.Value.Text, module.Value.Subroutines);
locallyLinked = Regex.Replace(locallyLinked, moduleLinkPattern, m =>
string.Format("<a href='#{0}'>{1}</a>", NameToAnchor(MacroModulePrefix + m.Value), m.Value));
locallyLinked = Regex.Replace(locallyLinked, @"^\s*;.*", "<span class='comment'>$0</span>", RegexOptions.Multiline);
locallyLinked = Regex.Replace(locallyLinked, @"(?<![a-zA-Z0-9.])(DO|IF)(?![a-zA-Z0-9.])", "<span class='keyword'>$0</span>", RegexOptions.Multiline);
locallyLinked = Regex.Replace(locallyLinked, @"(?<!(^|>)\s*;.*|&\w{2,5})[{};]", "<span class='operator'>$0</span>", RegexOptions.Multiline);
locallyLinked = Regex.Replace(locallyLinked, @"(?<!(^|>)\s*;.*)"(?:(?!").)*?"", "<span class='string'>$0</span>", RegexOptions.Multiline);
// <h1>'s cannot exist in <pre>'s
sb.AppendFormat("{0}</pre>", locallyLinked.Replace("</h1>", "</h1><pre>"));
}
sb.Append(@"
</body>
</html>
".TrimLiteral());
File.WriteAllText(outputFile, sb.ToString());
}
private static string LinkSubroutines(string text, HashSet<IndexedValue<string>> subroutines)
{
var anchoredSubroutines = subroutines
.Select(sub => new {
sub.Index,
Anchor = string.Format("<a class='subroutine' name='{0}'>{1}</a>", NameToAnchor(sub.Value), sub.Value),
Name = sub.Value })
.SequentialAggregate(0, (offset, sub) => offset + sub.Anchor.Length - sub.Name.Length)
.Select(sagg => new {
Index = sagg.Value.Index + sagg.Aggregate,
Anchor = sagg.Value.Anchor,
Name = sagg.Value.Name});
StringBuilder sb = new StringBuilder(text);
foreach (var sub in anchoredSubroutines)
sb.Remove(sub.Index, sub.Name.Length).Insert(sub.Index, sub.Anchor);
string pattern = string.Format("(?<=@)({0})(?![<a-zA-Z0-9.])", anchoredSubroutines
.Select(sub => Regex.Escape(sub.Name))
.Join("|"));
return pattern.Length > 0
? Regex.Replace(sb.ToString(), pattern, m => string.Format("<a href='#{0}'>{1}</a>", NameToAnchor(m.Value), m.Value))
: sb.ToString();
}
private static HashSet<IndexedValue<string>> GetSubroutines(string code)
{
return Regex
.Matches(code, SubroutinePattern, SubroutineOptions)
.Cast<Match>()
.Select(m => new IndexedValue<string>(m.Value, m.Index))
.ToHashSet();
}
private static Dictionary<string, string> GetModules(string original)
{
return Regex
.Matches(original, ModulePattern, ModuleOptions)
.Cast<Match>()
.SelfJoinByOffset(1)
.ToDictionary(t => t.First.Value,
t => original.Substring(t.First.Index, (t.Second != null ? t.Second.Index : original.Length) - t.First.Index));
}
}
}