Skip to content

Instantly share code, notes, and snippets.

@shasso
Last active January 10, 2025 16:27
Show Gist options
  • Save shasso/1238568f4e9ad677654b66beb877b99f to your computer and use it in GitHub Desktop.
Save shasso/1238568f4e9ad677654b66beb877b99f to your computer and use it in GitHub Desktop.
Office 365 VBA Scripts
Attribute VB_Name = "Module6"
Option Explicit
Sub toUnicodeMacro(fontName)
'
' toUnicode Macro
'
'
Selection.WholeStory
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Forward = True
.Format = True
.MatchCase = False
.MatchWholeWord = False
.MatchKashida = True
.MatchDiacritics = True
.MatchAlefHamza = True
.MatchControl = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
.Font.NameBi = fontName
'.Text = ChrW(1604) & ChrW(1573): .Replacement.Text = ChrW(1824) & ChrW(1808): Selection.Find.Execute replace:=wdReplaceAll: .MatchDiacritics = True
.Text = ChrW(1604) & ChrW(1570): .MatchDiacritics = True: .Replacement.Text = ChrW(1852): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1604) & ChrW(1571): .MatchDiacritics = True: .Replacement.Text = ChrW(1855): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1617) & ChrW(1615): .MatchDiacritics = True: .Replacement.Text = ChrW(776): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1617) & ChrW(1611): .MatchDiacritics = True: .Replacement.Text = ChrW(1849): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1563): .Replacement.Text = ChrW(1563): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1566): .Replacement.Text = ChrW(1792): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1567): .Replacement.Text = ChrW(1567): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1569): .Replacement.Text = ChrW(1845): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1570): .MatchDiacritics = True: .Replacement.Text = ChrW(1815) & ChrW(1857): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1571): .Replacement.Text = ChrW(1808): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1572): .Replacement.Text = ChrW(1832): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1573): .Replacement.Text = ChrW(1808): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1574): .Replacement.Text = ChrW(1830) & ChrW(814): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1575): .Replacement.Text = ChrW(1808): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1576): .Replacement.Text = ChrW(1810): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1577): .Replacement.Text = ChrW(1836): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1578): .Replacement.Text = ChrW(1821) & ChrW(1852): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1579): .Replacement.Text = ChrW(1810) & ChrW(1858): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1580): .Replacement.Text = ChrW(1811) & ChrW(816): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1581): .Replacement.Text = ChrW(1818): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1582): .Replacement.Text = ChrW(1823) & ChrW(1858): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1583): .Replacement.Text = ChrW(1813): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1584): .Replacement.Text = ChrW(1834) & ChrW(776): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1585): .Replacement.Text = ChrW(1834): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1586): .Replacement.Text = ChrW(1817): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1587): .Replacement.Text = ChrW(1827): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1588): .Replacement.Text = ChrW(1835): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1589): .Replacement.Text = ChrW(1823) & ChrW(816): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1590): .Replacement.Text = ChrW(1835) & ChrW(816): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1591): .Replacement.Text = ChrW(1819): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
' .Text = ChrW(1592): .Replacement.Text = ChrW(1836) & ChrW(1808): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1593): .Replacement.Text = ChrW(1829): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1594): .Replacement.Text = ChrW(1811) & ChrW(1858): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1600): .Replacement.Text = ChrW(1600): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1601): .Replacement.Text = ChrW(1830): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1602): .Replacement.Text = ChrW(1833): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1603): .Replacement.Text = ChrW(1823): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1604): .Replacement.Text = ChrW(1824): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1605): .Replacement.Text = ChrW(1825): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1606): .Replacement.Text = ChrW(1826): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1607): .Replacement.Text = ChrW(1811): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1608): .Replacement.Text = ChrW(1816): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1609): .Replacement.Text = ChrW(1815): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1610): .Replacement.Text = ChrW(1821): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1611): .Replacement.Text = ChrW(1849): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1612): .Replacement.Text = ChrW(776) & ChrW(1849): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1613): .Replacement.Text = ChrW(1842): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1614): .Replacement.Text = ChrW(1848): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1615): .Replacement.Text = ChrW(776) & ChrW(1849): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1616): .Replacement.Text = ChrW(1845): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1617): .Replacement.Text = ChrW(1858): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1618): .Replacement.Text = ChrW(1863): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1632): .Replacement.Text = ChrW(1632): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1633): .Replacement.Text = ChrW(1633): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1634): .Replacement.Text = ChrW(1634): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1635): .Replacement.Text = ChrW(1635): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1636): .Replacement.Text = ChrW(1636): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1637): .Replacement.Text = ChrW(1637): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1638): .Replacement.Text = ChrW(1638): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1639): .Replacement.Text = ChrW(1639): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1640): .Replacement.Text = ChrW(1640): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1641): .Replacement.Text = ChrW(1641): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1642): .Replacement.Text = ChrW(1642): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1643): .Replacement.Text = ChrW(1643): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1644): .Replacement.Text = ChrW(1644): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1645): .Replacement.Text = ChrW(1805): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(47): .Replacement.Text = ChrW(1825) & ChrW(1858) & ChrW(1826): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(46): .Replacement.Text = ChrW(1793): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(42): .Replacement.Text = ChrW(1805): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1570): .Replacement.Text = ChrW(1815) & ChrW(775): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1817) & ChrW(1858) & ChrW(1842): .Replacement.Text = ChrW(1817) & ChrW(1842): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1817) & ChrW(1858) & ChrW(1848): .Replacement.Text = ChrW(1817) & ChrW(1848): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1832) & ChrW(1858) & ChrW(1842): .Replacement.Text = ChrW(1832) & ChrW(1842): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1832) & ChrW(1858) & ChrW(1848): .Replacement.Text = ChrW(1832) & ChrW(1848): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1858) & ChrW(776) & ChrW(1849): .Replacement.Text = ChrW(817): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1825) & ChrW(1858) & ChrW(1845): .Replacement.Text = ChrW(1825) & ChrW(775): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1836) & ChrW(1808) & ChrW(1808): .Replacement.Text = ChrW(1836) & ChrW(776) & ChrW(1845) & ChrW(1808): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1600): .Replacement.Text = "": .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1845) & ChrW(1845): .Replacement.Text = ChrW(1845): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1842) & ChrW(1842): .Replacement.Text = ChrW(1842): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
.Text = ChrW(1849) & ChrW(1849): .Replacement.Text = ChrW(1849): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
End With
End Sub
Sub toUnicodeMacro2(fontName)
Selection.WholeStory
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Forward = True
.Format = True
.MatchCase = False
.MatchWholeWord = False
.MatchKashida = True
.MatchDiacritics = True
.MatchAlefHamza = True
.MatchControl = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
.Font.NameBi = fontName
'.Text = ChrW(1609): .Replacement.Text = ChrW(1815): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
' [('1615', '0x64f'), ('776', '0x308'), ('1849', '0x739')]
'.MatchWildcards = True: .Text = ("?") & ChrW(&H64F): .Replacement.Text = "\1" & ChrW(&H308) & ChrW(&H739): .Execute Replace:=wdReplaceAll
'.Text = ChrW(&H64F): .Replacement.Text = ChrW(&H308) & ChrW(&H739): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue: .MatchControl = True
' ('1592', '0x638'), ('1836', '0x72c'), ('1808', '0x710')]
.Text = ChrW(&H638): .Replacement.Text = ChrW(&H72C) & ChrW(&H308) & ChrW(&H735): .Execute Replace:=wdReplaceAll: .MatchDiacritics = True: .Wrap = wdFindContinue
End With
End Sub
Sub toUnicodeMacroV2(fontName As String)
Dim conversionMap As Object
Set conversionMap = CreateObject("Scripting.Dictionary")
' Define the conversion map (add entries as needed)
With conversionMap
.Add ChrW(1604) & ChrW(1570), ChrW(1852)
.Add ChrW(1604) & ChrW(1571), ChrW(1855)
.Add ChrW(1617) & ChrW(1615), ChrW(776)
.Add ChrW(1617) & ChrW(1611), ChrW(1849)
.Add ChrW(1563), ChrW(1563)
.Add ChrW(1566), ChrW(1792)
.Add ChrW(1570), ChrW(1815) & ChrW(1857)
.Add ChrW(1571), ChrW(1808)
' Add additional mappings as necessary
' .Add ChrW(<source_unicode>), ChrW(<target_unicode>)
' For multiple characters in the replacement, concatenate them:
' .Add ChrW(<source_unicode>), ChrW(<target_unicode1>) & ChrW(<target_unicode2>)
.Add ChrW(1572), ChrW(1832)
.Add ChrW(1573), ChrW(1808)
.Add ChrW(1574), ChrW(1830) & ChrW(814)
.Add ChrW(1575), ChrW(1808)
.Add ChrW(1576), ChrW(1810)
.Add ChrW(1577), ChrW(1836)
.Add ChrW(1578), ChrW(1821) & ChrW(1852)
.Add ChrW(1579), ChrW(1810) & ChrW(1858)
.Add ChrW(1580), ChrW(1811) & ChrW(816)
.Add ChrW(1581), ChrW(1818)
.Add ChrW(1582), ChrW(1823) & ChrW(1858)
.Add ChrW(1583), ChrW(1813)
.Add ChrW(1584), ChrW(1834) & ChrW(776)
.Add ChrW(1585), ChrW(1834)
.Add ChrW(1586), ChrW(1817)
.Add ChrW(1587), ChrW(1827)
.Add ChrW(1588), ChrW(1835)
.Add ChrW(1589), ChrW(1823) & ChrW(816)
.Add ChrW(1590), ChrW(1835) & ChrW(816)
.Add ChrW(1591), ChrW(1819)
' .Add ChrW(1592), ChrW(1836) & ChrW(1808)
.Add ChrW(1593), ChrW(1829)
.Add ChrW(1594), ChrW(1811) & ChrW(1858)
' 0640 ARABIC TATWEEL
.Add ChrW(1600), ""
.Add ChrW(1601), ChrW(1830)
.Add ChrW(1602), ChrW(1833)
.Add ChrW(1603), ChrW(1823)
.Add ChrW(1604), ChrW(1824)
.Add ChrW(1605), ChrW(1825)
.Add ChrW(1606), ChrW(1826)
.Add ChrW(1607), ChrW(1811)
.Add ChrW(1608), ChrW(1816)
.Add ChrW(1609), ChrW(1815)
.Add ChrW(1610), ChrW(1821)
.Add ChrW(1611), ChrW(1849)
.Add ChrW(1612), ChrW(1849) & ChrW(776)
.Add ChrW(1613), ChrW(1842)
.Add ChrW(1614), ChrW(1848)
.Add ChrW(1615), ChrW(776) & ChrW(1849)
.Add ChrW(1616), ChrW(1845)
.Add ChrW(1617), ChrW(1858)
.Add ChrW(1618), ChrW(1863)
.Add ChrW(1632), ChrW(1632)
.Add ChrW(1633), ChrW(1633)
.Add ChrW(1634), ChrW(1634)
.Add ChrW(1635), ChrW(1635)
.Add ChrW(1636), ChrW(1636)
.Add ChrW(1637), ChrW(1637)
.Add ChrW(1638), ChrW(1638)
.Add ChrW(1639), ChrW(1639)
.Add ChrW(1640), ChrW(1640)
.Add ChrW(1641), ChrW(1641)
.Add ChrW(1642), ChrW(1642)
.Add ChrW(1643), ChrW(1643)
.Add ChrW(1644), ChrW(1644)
.Add ChrW(1645), ChrW(1805)
.Add ChrW(47), ChrW(1825) & ChrW(1858) & ChrW(1826)
.Add ChrW(46), ChrW(1793)
.Add ChrW(42), ChrW(1805)
'.Add ChrW(1570), ChrW(1815) & ChrW(775)
.Add ChrW(1817) & ChrW(1858) & ChrW(1842), ChrW(1817) & ChrW(1842)
.Add ChrW(1817) & ChrW(1858) & ChrW(1848), ChrW(1817) & ChrW(1848)
.Add ChrW(1832) & ChrW(1858) & ChrW(1842), ChrW(1832) & ChrW(1842)
.Add ChrW(1832) & ChrW(1858) & ChrW(1848), ChrW(1832) & ChrW(1848)
.Add ChrW(1858) & ChrW(776) & ChrW(1849), ChrW(817)
.Add ChrW(1825) & ChrW(1858) & ChrW(1845), ChrW(1825) & ChrW(775)
.Add ChrW(1836) & ChrW(1808) & ChrW(1808), ChrW(1836) & ChrW(776) & ChrW(1845) & ChrW(1808)
.Add ChrW(1845) & ChrW(1845), ChrW(1845)
.Add ChrW(1842) & ChrW(1842), ChrW(1842)
.Add ChrW(1849) & ChrW(1849), ChrW(1849)
End With
' Configure selection
Selection.WholeStory
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
Selection.Find.Font.NameBi = fontName
' Iterate through the conversion map and apply replacements
Dim key As Variant
For Each key In conversionMap.Keys
With Selection.Find
.Text = key
.Replacement.Text = conversionMap(key)
.Forward = True
.Format = True
.MatchCase = False
.MatchWholeWord = False
.MatchKashida = True
.MatchDiacritics = True
.MatchAlefHamza = True
.MatchControl = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
.Wrap = wdFindContinue
.Execute Replace:=wdReplaceAll
End With
Next key
End Sub
Sub PromptAndCallMacro()
Dim fontName As String
' Prompt the user to enter a font name
fontName = InputBox("Enter the font name:", "Font Input")
' Validate the input
If fontName = "" Then
MsgBox "No font name entered. Macro canceled.", vbExclamation
Exit Sub
End If
' Call the existing macro with the user-provided font name
toUnicodeMacroV2 fontName
End Sub
Sub ExportPagesToTextFilesUTF8()
Dim pubDoc As Document
Dim page As page
Dim outputFolder As String
Dim pageText As String
Dim filePath As String
Dim pageIndex As Integer
Dim startPageNumber As String
Dim currentPageNumber As Integer
Dim stream As Object ' ADODB.Stream
' Prompt the user to select a destination folder
With Application.FileDialog(msoFileDialogFolderPicker)
.Title = "Select Destination Folder for UTF-8 Text Files"
If .Show = -1 Then
outputFolder = .SelectedItems(1)
Else
MsgBox "Operation cancelled by user.", vbExclamation
Exit Sub
End If
End With
' Prompt the user to enter a starting page number for naming (optional)
startPageNumber = InputBox("Enter the starting page number for file naming (optional):", "Starting Page Number", "")
If IsNumeric(startPageNumber) And startPageNumber <> "" Then
currentPageNumber = CInt(startPageNumber)
Else
currentPageNumber = 1 ' Default to 1 if no input or invalid input
End If
' Get the active document
Set pubDoc = ActiveDocument
' Loop through all pages
For pageIndex = 1 To pubDoc.Pages.Count
Set page = pubDoc.Pages(pageIndex)
' Extract text content from the page
pageText = GetPageText(page)
' Format file name as "page_NNNN.txt" using current page number
filePath = outputFolder & Application.PathSeparator & "page_" & Format(currentPageNumber, "0000") & ".txt"
' Write the text to the file in UTF-8 encoding
Set stream = CreateObject("ADODB.Stream")
With stream
.Type = 2 ' Text mode
.Charset = "UTF-8" ' UTF-8 encoding
.Open
.WriteText pageText
.SaveToFile filePath, 2 ' Overwrite mode
.Close
End With
' Increment the current page number
currentPageNumber = currentPageNumber + 1
Next pageIndex
' Completion message
MsgBox "Pages have been exported as UTF-8 text files in: " & outputFolder, vbInformation, "Export Complete"
End Sub
' Function to extract all text from a given page
Function GetPageText(pg As page) As String
Dim shp As Shape
Dim textContent As String
textContent = ""
' Loop through all shapes on the page
For Each shp In pg.Shapes
If shp.HasTextFrame Then
If Not shp.TextFrame.TextRange Is Nothing Then
textContent = textContent & shp.TextFrame.TextRange.Text & vbCrLf
End If
End If
Next shp
GetPageText = textContent
End Function
Sub SplitDocumentByPageToUTF8Text_v2()
Dim doc As Document
Dim pageCount As Integer
Dim i As Integer
Dim pageRange As Range
Dim outputFolder As String
Dim fileName As String
Dim textContent As String
Dim stream As Object
' Get the active document
Set doc = ActiveDocument
' Determine the number of pages in the document
pageCount = doc.ComputeStatistics(wdStatisticPages)
' Prompt user for the output folder
With Application.FileDialog(msoFileDialogFolderPicker)
.Title = "Select Output Folder"
If .Show = -1 Then
outputFolder = .SelectedItems(1)
Else
MsgBox "No folder selected. Operation canceled.", vbExclamation
Exit Sub
End If
End With
' Loop through each page
For i = 1 To pageCount
' Define the range for the current page
Set pageRange = doc.GoTo(What:=wdGoToPage, Which:=wdGoToAbsolute, Count:=i)
If i < pageCount Then
pageRange.End = doc.GoTo(What:=wdGoToPage, Which:=wdGoToAbsolute, Count:=i + 1).Start
pageRange.End = pageRange.End - 1 ' Exclude the page break
Else
pageRange.End = doc.Content.End ' Include the last page
End If
' Get the text content of the current page
textContent = pageRange.Text
' Remove problematic characters if necessary
textContent = Replace(textContent, Chr(13), vbCrLf) ' Replace line breaks with standard format
' Define file name
fileName = outputFolder & "\page_" & i & ".txt"
' Write to UTF-8 text file using ADODB.Stream
Set stream = CreateObject("ADODB.Stream")
stream.Type = 2 ' Text data
stream.Charset = "UTF-8" ' Set to UTF-8 encoding
stream.Open
stream.WriteText textContent ' Write the text content
stream.SaveToFile fileName, 2 ' Save and overwrite if file exists
stream.Close
Next i
' Notify user of completion
MsgBox "Document successfully split into " & pageCount & " UTF-8 text files.", vbInformation
End Sub
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment