tsunami

log in
history

Word2BBCode

Luke Breuer
2010-12-28 20:15 UTC

introduction
Supports:
  • [b][/b]
  • [i][/i]
  • [u][/u]
  • [s][/s]
  • [super][/super]
  • [sub][/sub]
  • [fixed][/fixed]
  • [list][/list]
    • [list=A][/list=A], nested, with 1,A,a,I,i
  • [url=...][/url]
  • [quote=...][/quote]
    • no nested quotes, unfortunately
code
'Modified 11/22/10 by Luke Breuer:
'  - added support for quotations (use a style called "quote")
'  - fixed hyperlink support to include the # section of URLs
'  - revamped list support for 1,A,a,I,i, bullets, and implemented nesting
'  - added support for Consolas font -> [fixed][/fixed]
'  - added support for superscript, subscript
'  - partially fixed bugs due to having vertical tabs (\x0B) instead of
'    carriage returns (\x0D, or 13) at the end of "quote headers"
'  - fixed a bug where text with multiple formattings (e.g. bold + italic)
'    would be transformed into [b][i]text[/b][/i] instead of [b][i]text[/i][/b]
'  - extracted common code (the amount of original code is extremely small now)
'Word2BBCode-Converter v0.1, June 2, 2006
'Matthew Kruer
'Some parts adapted from
'Word2Wiki-Converter V0.4, May 28, 2006
'http://de.wikipedia.org/wiki/Wikipedia:Helferlein/Word2MediaWikiPlus
'Original Version by InfPro: http://www.infpro.com/downloads/downloads/wordmedia.htm
'Major improvements by Gunter Schmidt, Mail me: Word2MediaWikiPlus@beadsoft.de
'Works only with Word 2000 and above
'License: GPL: Feel free to use and modify. Keep the credits and do not sell.

Sub Word2BBCode()
   
    Application.ScreenUpdating = False
       
    ConvertHyperlinks
    ConvertItalic
    ConvertBold
    ConvertUnderline
    ConvertSuperscript
    ConvertSubscript
    ConvertStrikethrough
    ConvertMonospace
    ConvertLists
    ConvertQuotes
   
    ActiveDocument.Content.Copy
    
    Call Selection.Find.ClearFormatting
   
    Application.ScreenUpdating = True
End Sub

Private Sub SurroundSelectionWithTag(tagName As String)
    With Selection
        .InsertBefore "[" & tagName & "]"
        .ClearCharacterDirectFormatting
        .Collapse (wdCollapseEnd)
        .InsertAfter "[/" & tagName & "]"
        .ClearCharacterDirectFormatting
    End With
End Sub

Private Sub SetupFindObject()
    ActiveDocument.Select
    
    With Selection.Find
        .ClearFormatting
        '.Font.Name = "Consolas"  (or whatever else goes here)
        .Text = ""
       
        .Format = True
        .MatchCase = False
        .MatchWholeWord = False
        .MatchWildcards = False
        .MatchSoundsLike = False
        .MatchAllWordForms = False
        
        .Forward = True
        .Wrap = wdFindContinue
    End With
End Sub

Private Sub ConvertMonospace()
    Call SetupFindObject
    Selection.Find.Font.Name = "Consolas"
    
    Do While Selection.Find.Execute
        With Selection
            If InStr(1, .Text, vbCr) Then
                ' Just process the chunk before any newline characters
                ' We'll pick-up the rest with the next search
                .Font.Name = "Calibri"
                .Collapse
                .MoveEndUntil vbCr
            End If
                                   
            ' Don't bother to markup newline characters (prevents a loop, as well)
            If Not .Text = vbCr Then
                .Font.Name = "Calibri"
                SurroundSelectionWithTag ("fixed")
            End If
        End With
    Loop
End Sub
Private Sub ConvertBold()
    Call SetupFindObject
    Selection.Find.Font.Bold = True
    
    ' note the extra work done on Bold: that's because quote headers are defined
    ' by being bold, and there are more restrictions on what they can be like in
    ' order for the quoting code to be released; what is done here is a major
    ' HACK, but I didn't want to expend even more time figuring out the API
    Dim otherLf As String
    otherLf = Chr(11)
    
    Do While Selection.Find.Execute
        With Selection
            ' Just process the chunk before any newline characters
            ' We'll pick-up the rest with the next search
            If InStr(1, .Text, vbCr) Then
                .Font.Bold = False
                .Collapse
                .MoveEndUntil vbCr
            ElseIf InStr(1, .Text, otherLf) Then
                .Font.Bold = False
                .Collapse
                .MoveEndUntil otherLf
            End If
            
                                   
            ' Don't bother to markup newline characters (prevents a loop, as well)
            If Not .Text = vbCr And Not .Text = otherLf Then
                ' if it's bold until the end of the line, but not including the newline,
                ' the .InsertAfter statement will keep the current formatting, including
                ' bold and hyperlink, which screws up quoting functionality; doing the
                ' below fixes things, and only requires that the quote have content
                If .Style = "Quote" Then
                '    .MoveEnd wdCharacter, 1
                '    .MoveEnd wdCharacter, -1
                End If
                
                .Font.Bold = False
                SurroundSelectionWithTag ("b")
            End If
        End With
    Loop
End Sub
Private Sub ConvertItalic()
    Call SetupFindObject
    Selection.Find.Font.Italic = True
      
    Do While Selection.Find.Execute
        With Selection
            If InStr(1, .Text, vbCr) Then
                ' Just process the chunk before any newline characters
                ' We'll pick-up the rest with the next search
                .Font.Italic = False
                .Collapse
                .MoveEndUntil vbCr
            End If
                                   
            ' Don't bother to markup newline characters (prevents a loop, as well)
            If Not .Text = vbCr Then
                .Font.Italic = False
                SurroundSelectionWithTag ("i")
            End If
        End With
    Loop
End Sub
Private Sub ConvertUnderline()
    Call SetupFindObject
    Selection.Find.Font.Underline = True

    Do While Selection.Find.Execute
        With Selection
            If InStr(1, .Text, vbCr) Then
                ' Just process the chunk before any newline characters
                ' We'll pick-up the rest with the next search
                .Font.Underline = False
                .Collapse
                .MoveEndUntil vbCr
            End If
                                   
            ' Don't bother to markup newline characters (prevents a loop, as well)
            If Not .Text = vbCr And .Range.Hyperlinks.Count = 0 Then
                .Font.Underline = False
                SurroundSelectionWithTag ("u")
            End If
        End With
    Loop
End Sub
Private Sub ConvertSubscript()
    Call SetupFindObject
    Selection.Find.Font.Subscript = True
       
    Do While Selection.Find.Execute
        With Selection
            If InStr(1, .Text, vbCr) Then
                ' Just process the chunk before any newline characters
                ' We'll pick-up the rest with the next search
                .Font.Superscript = False
                .Collapse
                .MoveEndUntil vbCr
            End If
                                   
            ' Don't bother to markup newline characters (prevents a loop, as well)
            If Not .Text = vbCr Then
                .Font.Superscript = False
                SurroundSelectionWithTag ("sub")
            End If
        End With
    Loop
End Sub
Private Sub ConvertSuperscript()
    Call SetupFindObject
    Selection.Find.Font.Superscript = True
        
    Do While Selection.Find.Execute
        With Selection
            If InStr(1, .Text, vbCr) Then
                ' Just process the chunk before any newline characters
                ' We'll pick-up the rest with the next search
                .Font.Superscript = False
                .Collapse
                .MoveEndUntil vbCr
            End If
                                   
            ' Don't bother to markup newline characters (prevents a loop, as well)
            If Not .Text = vbCr Then
                .Font.Superscript = False
                SurroundSelectionWithTag ("super")
            End If
        End With
    Loop
End Sub
Private Sub ConvertStrikethrough()
    Call SetupFindObject
    Selection.Find.Font.StrikeThrough = True
        
    Do While Selection.Find.Execute
        With Selection
            If InStr(1, .Text, vbCr) Then
                ' Just process the chunk before any newline characters
                ' We'll pick-up the rest with the next search
                .Font.StrikeThrough = False
                .Collapse
                .MoveEndUntil vbCr
            End If
                                   
            ' Don't bother to markup newline characters (prevents a loop, as well)
            If Not .Text = vbCr Then
                .Font.StrikeThrough = False
                SurroundSelectionWithTag ("s")
            End If
        End With
    Loop
End Sub

Private Sub ConvertQuotes()
    Call EnsureBoundedByNormalStyle

    Dim r As RegExp
    Dim matches As MatchCollection
    Set r = New RegExp
    Dim paras As Collection
    
    Set paras = New Collection
    
    r.Pattern = "^(?:[b\]([^\r\n]+)[/b\][\r\n\x0B]+)?(.*?)[\r\n]+$"
    
    ActiveDocument.Select
    
    Call Selection.GoTo(wdGoToLine, wdGoToFirst)
    
    With Selection.Find
        Call SetupFindObject
        
        .Wrap = wdFindStop
        .Style = "Quote"
        
        Do While .Execute
            Call paras.Add(Selection.Range)
        Loop
    End With
    
    Call Selection.GoTo(wdGoToLine, wdGoToLast)
    
    Call paras.Add(Selection.Range)
    
    Dim quotes As Collection
    Dim rng As Range
    
    Set quotes = New Collection
    Dim qStart As Long
    Dim qEnd As Long
    Dim lastEnd As Long
    
    lastEnd = paras(1).Start
    
    For Each rng In paras
        'Debug.Print rng.Start & " - " & rng.End & " : " & qStart & " : " & qEnd
        
        If lastEnd = rng.Start And rng.Style = "Quote" Then
            ' continue a quote
            qEnd = rng.End
            
            If qStart = 0 Then qStart = rng.Start
        ElseIf qStart <> qEnd Then
            ' new quote
            'Debug.Print "adding " & qStart & " - " & qEnd
            
            Call quotes.Add(ActiveDocument.Range(qStart, qEnd))
            
            qStart = rng.Start
            qEnd = rng.End
        End If
        
        lastEnd = rng.End
    Next rng
    
    Dim i As Integer
    
    For i = quotes.Count To 1 Step -1
        Call Selection.SetRange(quotes(i).Start, quotes(i).End)
            
        With Selection
            Set matches = r.Execute(.Text)
                        
            Dim submatch As String
            
            If matches.Count > 0 Then
                submatch = matches(0).SubMatches(0)
            Else
                submatch = ""
            End If
                                    
            If submatch <> "" Then
                .Text = r.Replace(.Text, "[quote=""$1""]$2[/quote]")
            Else
                .Text = r.Replace(.Text, "[quote]$2[/quote]")
            End If
        End With
    Next i
End Sub

' The code below is kind of hacky, because Microsoft's list implementation
' leaves a lot to be desired.  Fortunately, it seems to work well, although
' it has not been tested with blank lines in lists.
'
' This code assumes that [list=X] needs to be terminated by [/list=X],
' instead of [/list].  Making the change would be trivial.
Private Sub ConvertLists()
    Dim l As List
    Dim p As Paragraph
    Dim curLevel As Integer
    Dim i As Integer
    Dim typeStack(10) As String ' hopefully lists aren't indented more than this
        
    curLevel = 1
        
    For Each l In ActiveDocument.Lists
        With l.Range
            For i = 1 To .ListParagraphs.Count
                Set p = .ListParagraphs(i)
                
                p.Range.InsertBefore "[*]"
                
                If p.Range.ListFormat.ListLevelNumber < curLevel Then
                    p.Range.InsertBefore "[/list" & typeStack(curLevel) & "]"
                    curLevel = curLevel - 1
                ElseIf p.Range.ListFormat.ListLevelNumber > curLevel Then
                    curLevel = curLevel + 1
                    typeStack(curLevel) = ExtractListSuffix(p.Range.ListFormat.ListString)
                    p.Range.InsertBefore "[list" & typeStack(curLevel) & "]"
                End If
            Next i
            
            Do While curLevel > 1
                .InsertAfter "[/list" & typeStack(curLevel) & "]"
                curLevel = curLevel - 1
            Loop
            
            Dim bullet As String
            
            bullet = ExtractListSuffix(l.Range.ListFormat.ListString)
            
            .InsertBefore "[list" & bullet & "]"
            .InsertAfter "[/list" & bullet & "]" & vbCrLf
            .ListFormat.RemoveNumbers
        End With
    Next l
End Sub

Public Function ExtractListSuffix(bullet As String) As String
    bullet = Left$(bullet, 1)
    
    If InStr(1, "AI1", bullet, vbTextCompare) > 0 Then
        ExtractListSuffix = "=" & bullet
    Else
        ExtractListSuffix = ""
    End If
End Function

Private Sub ConvertHyperlinks()
    '24-MAY-2006: only convert http..., mark others with error marker
    Dim hyperCount&
    Dim i&
    Dim addr$ ', title$

    hyperCount = ActiveDocument.Hyperlinks.Count

    For i = 1 To hyperCount

        With ActiveDocument.Hyperlinks(1) 'must be 1, since the delete changes count and position

            addr = .Address
            If Trim$(addr) = "" Then addr = "no hyperlink found"
            'title = .Range.Text
            
            If .SubAddress <> "" Then addr = addr & "#" & .SubAddress
           
            'http, ftp
            If LCase(Left$(addr, 4)) = "http" Or LCase(Left$(addr, 3)) = "ftp" Then
                .Delete 'hyperlink
                .Range.InsertBefore "[url=" & addr & "]"
                .Range.InsertAfter "[/url]"
               
                GoTo ConvertHyperlinks_Next
            End If
           
            'mailto:
            If LCase(Left$(addr, 7)) = "mailto:" Then
                .Delete 'hyperlink
                .Range.InsertBefore "[email]" & addr & " "
                .Range.InsertAfter "[/email]"
               
                GoTo ConvertHyperlinks_Next
            End If
           
            'file guess
            If Len(addr) > 4 Then 'the reason for not nice goto
                If Mid$(addr, Len(addr) - 3, 1) = "." Then
                    .Delete
                    .Range.InsertBefore "[file://"; & Replace(addr, " ", "_") & " "
                    .Range.InsertAfter "]"
                   
                    GoTo ConvertHyperlinks_Next
                End If
            End If
           
            'unidentified
            .Delete
            .Range.InsertBefore UnableToConvertMarker & "[" & addr & " "
            .Range.InsertAfter "]"

ConvertHyperlinks_Next:
        End With

    Next i
End Sub


' If the first or last section of the document is of Quote style, the ConvertQuotes
' routine won't process them; instead of making a change there, which is tricky with
' Microsoft Range objects and such, we just add non-formatted, non-styled whitespace
' if we need to.
Private Sub EnsureBoundedByNormalStyle()
    Selection.HomeKey Unit:=wdStory
    
    If Selection.Style = "Quote" Then
        Selection.TypeParagraph
        Selection.MoveUp Unit:=wdLine, Count:=1
        Selection.MoveRight Unit:=wdCharacter, Count:=1, Extend:=wdExtend
        Selection.Font.Reset
        Selection.Style = ActiveDocument.Styles("Normal")
    End If
    
    Selection.EndKey Unit:=wdStory
    
    If Selection.Style = "Quote" Then
        Selection.TypeParagraph
    End If
End Sub