tsunami

log in
history

Different browser transformations of HTML

Luke Breuer
2010-10-02 05:11 UTC

introduction
The HTML accessible by javascript is not necessarily the same HTML served by the web server:
Firefox 3.6 and Safari 5
  • replaces <br /> with <br>
IE8
  • replaces <br /> with <br>
  • removes quotations around attributes; some examples:
    • a.target
    • a.rel
    • *.class
  • reorders attributes
    • <a rel target>
  • upper-cases attribute names
  • removes whitespace between inline* elements that would not be rendered

* those elements which default to inline display, not which are styled as such
seeing exactly what happens
table definition
create table Web_Debug (
    web_debug_pk  int           primary key identity,
    post_fk       int           not null,
    text          nvarchar(max) not null,
    user_agent    varchar(256)  not null,
    input_date    smalldatetime not null default getdate(),
)
go
grant insert on Web_Debug to time
.NET .ashx handler
debug.ashx.cs
using System.Web;
using System.Web.Services;
using System.Web.SessionState;

namespace Time
{
    public class debug : IHttpHandler, IRequiresSessionState
    {
        public void ProcessRequest(HttpContext context)
        {
            Common.CheckThreadAuthentication();

            int postPk = -1;
            string text = context.Request.Form["text"];
            string userAgent = context.Request.UserAgent;

            int.TryParse(context.Request.Form["pk"], out postPk);

            if (postPk != -1 && text != null && userAgent != null)
            {
                using (var dal = Common.GetDataLayer())
                {
                    dal.SaveDebugToDb(postPk, text, userAgent);
                }
            }
            else
            {
                context.Response.StatusCode = 412;
            }               
        }

        public bool IsReusable { get { return false; } }
    }
}
DataLayer.SafeDebugToDb
        internal void SaveDebugToDb(int postPk, string text, string userAgent)
        {
                string sql = "insert Web_Debug (post_fk, text, user_agent) values (@post_fk, @text, @user_agent)";

                using (var cmd = this.Connection.NewSqlCommand(sql))
                {
                    cmd.Parameters.AddWithValue("post_fk", postPk);
                    cmd.Parameters.AddWithValue("text", text);
                    cmd.Parameters.AddWithValue("user_agent", userAgent);

                    cmd.ExecuteNonQuery();
                }
        }
Thread.aspx page with HTML to be measured
(uses http://jquery.com/)
    <input type="hidden" id="postSizes" runat="server" />
    <pre id="debug"></pre>
    <script type="text/javascript">
    /* <![CDATA[ */
$(document).ready(analyze_sizes);

function analyze_sizes()
{
    var x = eval($('#postSizes')[0].value);

    $('#debug')[0].innerHTML = '';

    $.each(x, function(id,v) { 
        // simple jQuery selector syntax to target the HTML, which was loaded into a
        // particular table cell (other cells contain post author, etc.)
        var e = $('#' + id + ' td.content')[0];
        
        $.post('/debug.ashx', { pk : /\d+/.exec(id)[0], text : e.innerHTML }, function() {
            $('#debug')[0].innerHTML += e.innerHTML.length + ' : ' + v + '<br />';
        });
    });
}
    /* ]]> */
    </script>
Thread.aspx.cs
p is of type Post, and has the property .Text which contains the HTML rendered. We are building a Javascript literal dictionary with keys being "post" + post_pk, and values being the length of the post HTML according to the DB. This isn't strictly needed, but it's nice to see the difference between DB and browser without having to run SQL.
postSizes.Value = string.Format("({{{0}}})",
    posts.Select(p => string.Format("post{0}:{1}", p.Pk, 
        p.Text.Length)).Join(","));
DB analysis
The following assumes that only three browsers are used, and no differing versions are tested.
with Simplified(pretty_name, user_agent) as (
    select  'Firefox', '%Firefox%'
    union
    select  'IE', '%Trident%'
    union
    select  'Safari', '%Safari%'
), Numbered as (
    select  *,
            row_number = row_number() over (partition by user_agent, post_fk order by web_debug_pk desc)
    from    Web_Debug
), Project as (
    select  w.post_fk,
            DB = datalength(p.text),
            s.pretty_name,
            len = datalength(w.text)
    from    Numbered w
    inner join Simplified s on w.user_agent like s.user_agent
    inner join Post p on p.post_pk = w.post_fk
    where   w.row_number = 1
)
select  post_fk,
        DB,
        Firefox,
        IE,
        Safari
from    Project
pivot (max(len) for pretty_name in (Firefox, IE, Safari))p
PowerShell for extracting to files
Other than bcp.exe, which I couldn't seem to get to operate correctly with the queryout argument, this seems to be the easiest way to get text to output to file. Note that SSMS restricts the maximum text output length to 8192, so if the HTML length is longer, it won't get returned.
$cn = new-object System.Data.SqlClient.SqlConnection "server=asp3;database=time_database;Integrated Security=sspi"
$cn.Open()
$cmd = $cn.CreateCommand()
$cmd.CommandText = "
with Simplified(pretty_name, user_agent) as (
    select  'Firefox', '%Firefox%'
    union
    select  'IE', '%Trident%'
    union
    select  'Safari', '%Safari%'
), Numbered as (
    select  *,
            row_number = row_number() over (partition by user_agent, post_fk order by web_debug_pk desc)
    from    Web_Debug
), Project as (
    select  w.post_fk,
            db_text = p.text,
            s.pretty_name,
            browser_text = w.text
    from    Numbered w
    inner join Simplified s on s.user_agent = w.user_agent
    inner join Post p on p.post_pk = w.post_fk
    where   w.row_number = 1
)
select  post_fk,
        db_text,
        Firefox,
        IE,
        Safari
from    Project
pivot (max(browser_text) for pretty_name in (Firefox, IE, Safari))p
where   post_fk = 6169
"
$dr = $cmd.ExecuteReader()
if ($dr.Read())
{
    $dr["db_text"] > sql.txt;
    $dr["Firefox"] > Firefox.txt;
    $dr["IE"] > IE.txt;
    $dr["Safari"] > Safari.txt;
}
$dr.Dispose();
$cn.Close()