Last active
February 1, 2022 10:27
-
-
Save bungard/0d3f76a0216a2ac7c3d5 to your computer and use it in GitHub Desktop.
Powershell script to split a word document into separate pages. Assumes each page should be its own document and the name for each document can be extracted from each page.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Author: Matt Bungard / bungard at g-mail d com | |
# | |
#Pull bits from various sources, if you've been exluded let me know and I'll cite accordingly | |
#http://stackoverflow.com/questions/26737239/powershell-add-a-new-document-to-exisitng-word-file-with-page-number-of-2 | |
## -- Settings -- | |
#$fileNamePattern = "ID #:\s+(\d+)" | |
$fileNamePattern = "Student ID #:\s+# (\d+)" | |
$pageLength = 1 | |
$inputFile = "inputDoc.docx" | |
$outputPath = "outputDir\" #End the path with a slash | |
## -- End Settings | |
[ref]$SaveFormat = "microsoft.office.interop.word.WdSaveFormat" -as [type] | |
$word = New-Object -ComObject word.application | |
$word.Visible = $true | |
$doc = $word.Documents.Open($inputFile) | |
$pages = $doc.ComputeStatistics([Microsoft.Office.Interop.Word.WdStatistic]::wdStatisticPages) | |
$rngPage = $doc.Range() | |
for($i=1;$i -le $pages; $i+=$pageLength) | |
{ | |
[Void]$word.Selection.GoTo([Microsoft.Office.Interop.Word.WdGoToItem]::wdGoToPage, | |
[Microsoft.Office.Interop.Word.WdGoToDirection]::wdGoToAbsolute, | |
$i #Starting Page | |
) | |
$rngPage.Start = $word.Selection.Start | |
[Void]$word.Selection.GoTo([Microsoft.Office.Interop.Word.WdGoToItem]::wdGoToPage, | |
[Microsoft.Office.Interop.Word.WdGoToDirection]::wdGoToAbsolute, | |
$i+$pageLength #Next page Number | |
) | |
$rngPage.End = $word.Selection.Start | |
$marginTop = $word.Selection.PageSetup.TopMargin | |
$marginBottom = $word.Selection.PageSetup.BottomMargin | |
$marginLeft = $word.Selection.PageSetup.LeftMargin | |
$marginRight = $word.Selection.PageSetup.RightMargin | |
$rngPage.Copy() | |
$newDoc = $word.Documents.Add() | |
$word.Selection.PageSetup.TopMargin = $marginTop | |
$word.Selection.PageSetup.BottomMargin = $marginBottom | |
$word.Selection.PageSetup.LeftMargin = $marginLeft | |
$word.Selection.PageSetup.RightMargin = $marginRight | |
$word.Selection.Paste() # Now we have our new page on a new doc | |
$word.Selection.EndKey(6,0) #Move to the end of the file | |
$word.Selection.TypeBackspace() #Seems to grab an extra section/page break | |
$word.Selection.Delete() #Now we have our doc down to size | |
#Get Name | |
$regex = [Regex]::Match($rngPage.Text, $fileNamePattern) | |
if($regex.Success) | |
{ | |
$id = $regex.Groups[1].Value | |
} | |
else | |
{ | |
$id = "patternNotFound_" + $i | |
} | |
$path = $outputPath + $id + ".docx" | |
$newDoc.saveas([ref] $path, [ref]$SaveFormat::wdFormatDocumentDefault) | |
$newDoc.close() | |
Remove-Variable(regex) | |
Remove-Variable(id) | |
} | |
[gc]::collect() | |
[gc]::WaitForPendingFinalizers() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The header is expanding after the copy to a new document.
Other than that it is working properly.
Can you help me how i get the exact same header as in the original file?