tsunami

log in
history

MeditechCodeProcessor

Luke Breuer
2008-03-21 22:44 UTC

Colorized this
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;
using System.Web;

namespace UB92Processor
{
    class MeditechCodeProcessor
    {
        /// <summary>
        /// Reads ---- ------'s extract file and tries to match the assignments to
        /// the "variable" @S to fields in the data dictionary.
        /// </summary>
        /// <remarks>
        /// Sometimes the comments have the supposed (but not always correct) sequence 
        /// number, sometimes not.  Sometimes there are multiple lines of comments/code 
        /// preceding the assignment.  Most of the time it is simple to find the 
        /// RECTYPE field (aka table mnemonic).  It is very likely that the code below
        /// will need modifying when ----- modifies his code.
        /// </remarks>
        public static void Process(string inputFile, string outputFile)
        {
            const string StripPagingPattern = @"(CONTINUED)\s+\n(.*\n){3}";
            string code = Regex.Replace(File.ReadAllText(inputFile), StripPagingPattern, "");
            Regex splitter = new Regex(@"(?=; account # key)");
            var blocks = splitter.Split(code).Skip(1); // the first element is not needed
            Regex field = new Regex(@"
                (?<comment>
                    (?:
                        ^;\s*\n
                    )?
                    (?:
                        ^;.*?
                        (?:
                            \ -\s*(?<sequence>\d+)
                        )?
                        \s*\n
                    )?
                    (?:
                        ^;.*\s*\n
                    ){0,5}
                )
                (?<setup>
                    ^[^;]
                    (?:
                        .(?!@S)
                    )*
                    \s*\n
                ){0,3}
                (?<assignment>
                    ^[^;].*\^@S
                )", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
            const string RectypePattern = @"
                (?<=^\s*"")
                [A-Z0-9]{2,}
                (?=""\^@S)
                |
                ^RECTYPE(?=\^@S)
                ";
            Regex rectype = new Regex(RectypePattern, RegexOptions.IgnorePatternWhitespace);
            var q =
                from block in blocks
                from m in field.Matches(block).Cast<Match>().ToIndexed()
                let extract = (Func<string, string>)(key => m.Value.Groups[key].Value.TrimEnd())
                let f = new
                {
                    Comment = extract("comment").RegexReplace(@"^; (.*?)\s*$", "$1"),
                    CommentSequence = extract("sequence"),
                    CodeSequence = m.Index + 1,
                    Setup = extract("setup"),
                    Assignment = extract("assignment")
                }
                group f by block into g
                select new
                {
                    Mnemonic = g.Select(f => rectype.Match(f.Assignment).Value.Nullify()).Single(s => s != null),
                    Fields = g.AsEnumerable(),
                    Block = g.Key
                };
            var RECTYPE = q.Single(b => b.Mnemonic == "RECTYPE");

            q = q.Where(b => b.Mnemonic != "RECTYPE").Concat(new[] {
                new { Mnemonic = "ABSCPTMOD", Fields = RECTYPE.Fields, Block = RECTYPE.Block },
                new { Mnemonic = "ABSCHGCPTMOD", Fields = RECTYPE.Fields, Block = RECTYPE.Block },
                });

            using (StreamWriter sw = new StreamWriter(outputFile))
            {
                sw.WriteLine(new[] { "Mnemonic", "CommentSequenceNo", "CodeSquenceNo", "Comment", "SetupCode", "AssignmentCode" }.Join("\t"));

                foreach (var block in q.Distinct(b => b.Mnemonic))
                    foreach (var f in block.Fields)
                        sw.WriteLine(new[] { block.Mnemonic, f.CommentSequence.ToString(), f.CodeSequence.ToString(), f.Comment, f.Setup, f.Assignment }.Select(s => s.Replace('\t', ' ')).Join("\t"));
            }
        }

        const string ModulePattern = @"
            (?<=
                ^-----------------------------------+$\n
                (^\s*$\n)?
            )
            \w[a-z0-9._ :]*$";
        const RegexOptions ModuleOptions = RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline;

        const string SubroutinePattern = @"(?<=^\s*)(?!S\W)[A-Z][A-Z0-9.]+(?=\s*(?:;.*)?$)";
        const RegexOptions SubroutineOptions = RegexOptions.IgnoreCase | RegexOptions.Multiline;

        private static string NameToAnchor(string name)
        {
            return Regex.Replace(name, "[^a-z0-9_.]", "", RegexOptions.IgnoreCase);
        }

        public static void CodeToHtml(string inputFile, string outputFile)
        {
            var modules = GetModules(HttpUtility.HtmlEncode(File.ReadAllText(inputFile)).Replace("\r", ""))
                .ToDictionary(
                    kvp => kvp.Key,
                    kvp => kvp.Value.Replace(kvp.Key, string.Format("<h1><a name='{0}'>{1}</a></h1>", NameToAnchor(kvp.Key), kvp.Key)));
            var modulesWithSubroutines = modules
                .ToDictionary(
                    kvp => kvp.Key,
                    kvp => new { Text = kvp.Value, Subroutines = GetSubroutines(kvp.Value) });

            StringBuilder sb = new StringBuilder();

            sb.AppendFormat(@"
                <!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>;
                <html xmlns='http://www.w3.org/1999/xhtml'>;
                <head>
                    <title>-----  ------- Meditech Code {0}</title>
                    <style type='text/css'>
                        a.subroutine {{ color: red; }}
                        span.comment {{ color: green; }}
                        span.keyword {{ color: blue; }}
                        span.operator {{ color: gray; font-weight: bold; }}
                        span.string {{ color: maroon; }}
                        h1 {{ font-family: monospace; }}
                    </style>
                </head>
                <body>".TrimLiteral(), inputFile);

            sb.AppendFormat("<ul>{0}</ul>", modules.Keys
                .Select(key => string.Format("<li><a href='#{0}'>{1}</a></li>", NameToAnchor(key), key))
                .Join("\n"));

            const string MacroModulePrefix = "BAR.PAT.zcus.ep.breuer.ub.ibill.download.M.";
            string moduleLinkPattern = string.Format(@"(?<=@Macro()({0})(?=))", modules
                .Where(m => m.Key.StartsWith(MacroModulePrefix))
                .Select(m => Regex.Escape(Regex.Match(m.Key, @"(?<=.M.).*$").Value))
                .Join("|"));

            foreach (var module in modulesWithSubroutines)
            {
                string locallyLinked = LinkSubroutines(module.Value.Text, module.Value.Subroutines);

                locallyLinked = Regex.Replace(locallyLinked, moduleLinkPattern, m =>
                    string.Format("<a href='#{0}'>{1}</a>", NameToAnchor(MacroModulePrefix + m.Value), m.Value));
                locallyLinked = Regex.Replace(locallyLinked, @"^\s*;.*", "<span class='comment'>$0</span>", RegexOptions.Multiline);
                locallyLinked = Regex.Replace(locallyLinked, @"(?<![a-zA-Z0-9.])(DO|IF)(?![a-zA-Z0-9.])", "<span class='keyword'>$0</span>", RegexOptions.Multiline);
                locallyLinked = Regex.Replace(locallyLinked, @"(?<!(^|>)\s*;.*|&\w{2,5})[{};]", "<span class='operator'>$0</span>", RegexOptions.Multiline);
                locallyLinked = Regex.Replace(locallyLinked, @"(?<!(^|>)\s*;.*)&quot;(?:(?!&quot;).)*?&quot;", "<span class='string'>$0</span>", RegexOptions.Multiline);
                
                // <h1>'s cannot exist in <pre>'s
                sb.AppendFormat("{0}</pre>", locallyLinked.Replace("</h1>", "</h1><pre>"));
            }
            
            sb.Append(@"
                </body>
                </html>
                ".TrimLiteral());

            File.WriteAllText(outputFile, sb.ToString());
            
        }

        private static string LinkSubroutines(string text, HashSet<IndexedValue<string>> subroutines)
        {
            var anchoredSubroutines = subroutines
                .Select(sub => new { 
                    sub.Index, 
                    Anchor = string.Format("<a class='subroutine' name='{0}'>{1}</a>", NameToAnchor(sub.Value), sub.Value), 
                    Name = sub.Value })
                .SequentialAggregate(0, (offset, sub) => offset + sub.Anchor.Length - sub.Name.Length)
                .Select(sagg => new {
                    Index = sagg.Value.Index + sagg.Aggregate,
                    Anchor = sagg.Value.Anchor,
                    Name = sagg.Value.Name});

            StringBuilder sb = new StringBuilder(text);

            foreach (var sub in anchoredSubroutines)
                sb.Remove(sub.Index, sub.Name.Length).Insert(sub.Index, sub.Anchor);

            string pattern = string.Format("(?<=@)({0})(?![<a-zA-Z0-9.])", anchoredSubroutines
                .Select(sub => Regex.Escape(sub.Name))
                .Join("|"));

            return pattern.Length > 0
                ? Regex.Replace(sb.ToString(), pattern, m => string.Format("<a href='#{0}'>{1}</a>", NameToAnchor(m.Value), m.Value))
                : sb.ToString();
        }

        private static HashSet<IndexedValue<string>> GetSubroutines(string code)
        {
            return Regex
                .Matches(code, SubroutinePattern, SubroutineOptions)
                .Cast<Match>()
                .Select(m => new IndexedValue<string>(m.Value, m.Index))
                .ToHashSet();
        }

        private static Dictionary<string, string> GetModules(string original)
        {
            return Regex
                .Matches(original, ModulePattern, ModuleOptions)
                .Cast<Match>()
                .SelfJoinByOffset(1)
                .ToDictionary(t => t.First.Value,
                    t => original.Substring(t.First.Index, (t.Second != null ? t.Second.Index : original.Length) - t.First.Index));
        }
    }
}