Skip to content

Instantly share code, notes, and snippets.

@mdbraber
Last active August 8, 2025 13:08
Show Gist options
  • Save mdbraber/bf37df37967903ad0b0e4a6285533983 to your computer and use it in GitHub Desktop.
Save mdbraber/bf37df37967903ad0b0e4a6285533983 to your computer and use it in GitHub Desktop.
Save Safari website as PDF (with syncing cookies)
#!/usr/bin/swift
@preconcurrency import WebKit
@preconcurrency import Foundation
@preconcurrency import Darwin
// Disable everything written to stderr
freopen("/dev/null", "w", stderr)
struct Cookie {
var domain: String
var path: String
var secure: Bool
var expires: Date?
var name: String
var value: String
}
struct CookieFlags {
static let secure: Int = 0x01
static let httpOnly: Int = 0x04
}
extension Data {
func read(location:Int, length: Int) -> Data {
if location >= self.count || length <= 0 {
return Data()
}
let endLocation = location + length
if self.count < endLocation {
return self.subdata(in: location..<(self.count - location))
}
return self.subdata(in: location..<(location + length))
}
func string(encoding: String.Encoding) -> String? {
return String(data: self, encoding: encoding)
}
func readUntilZero(from startIndex: Int) -> Data {
let start = startIndex
// Find the next zero byte
guard let zeroIndex = self[start...].firstIndex(of: 0) else {
// If no zero byte is found, return the remainder of the data
return self[start...]
}
// Extract the data up to (but not including) the zero byte
let resultData = self[start..<zeroIndex]
// Return the data and the index after the zero byte
return Data(resultData)
}
}
class BinaryCookiesReader {
static func getCookies() -> [Cookie] {
let path = NSHomeDirectory().appending("/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies")
return self.readCookies(path: path)
}
static func getCookies(like domain:String) -> [Cookie] {
let path = NSHomeDirectory().appending("/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies")
return self.readCookies(path: path).filter { $0.domain.contains(domain) }
}
static func readCookies(path: String) -> [Cookie] {
guard let data = try? Data(contentsOf: URL(fileURLWithPath: path)) else {
return []
}
return self.readCookies(data: data)
}
static func readCookies(data: Data) -> [Cookie] {
var curentLocation: Int = 0
var cookieList:[Cookie] = []
let file_header = data.read(location: curentLocation, length: 4)
curentLocation += 4
if String(data: file_header , encoding: .utf8) != "cook" {
debugPrint("Not a Cookies.binarycookie file")
return cookieList
}
// pageCount
let pageCount = self.bytesToInt(data: data.read(location: curentLocation, length: 4) , isBe: true)
curentLocation += 4
guard pageCount - 1 >= 0 else {
return cookieList
}
// pageSize of page
var pageSize:[Int] = []
for _ in 0...(pageCount - 1) {
let size = self.bytesToInt(data: data.read(location: curentLocation, length: 4) , isBe: true)
pageSize.append(size)
curentLocation += 4
}
// cookie
for i in 0...(pageCount - 1) {
let begin = data.read(location: curentLocation, length: pageSize[i])
let cookies = self.handleCookieData(data: begin)
cookieList.append(contentsOf: cookies)
curentLocation += pageSize[i]
}
return cookieList
}
static func handleCookieData(data: Data) -> [Cookie] {
let cookieData = data
var tempLocation:Int = 0
var cookieList:[Cookie] = []
let pageHeader = self.bytesToInt(data: cookieData, isBe: true)
tempLocation += 4
if pageHeader != 0x00000100 {
debugPrint("page header is error, not 0x00000100!")
return cookieList
}
let cookieCount = self.bytesToInt(data: cookieData.read(location: tempLocation, length: 4) , isBe: false)
tempLocation += 4
if cookieCount - 1 >= 0 {
for _ in 0...(cookieCount - 1 ) {
let offset = self.bytesToInt(data: cookieData.read(location: tempLocation, length: 4), isBe: false)
tempLocation += 4
let contentData = cookieData.read(location: offset, length: cookieData.count - offset)
var contentDataLocation = 0
//let cookieSize = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
// let version = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
let flags = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
//let hasPort = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
let domain_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
let name_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
let path_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
let value_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
//let endofcookie = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 8), isBe: false)
contentDataLocation += 8
let data:NSData = contentData.read(location: contentDataLocation, length: 8) as NSData
contentDataLocation += 8
var out:double_t = 0;
memcpy(&out, data.bytes, MemoryLayout<double_t>.size);
let expiry_date_epoch = Int64(out) + Int64(978307200)
let expires:Date = Date(timeIntervalSince1970: TimeInterval(expiry_date_epoch))
let domainData = contentData.readUntilZero(from: domain_offset)
let domain = String(data: domainData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? ""
let nameData = contentData.readUntilZero(from: name_offset)
let name = String(data: nameData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? ""
let pathData = contentData.readUntilZero(from: path_offset)
let path = String(data: pathData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? ""
let valueData = contentData.readUntilZero(from: value_offset)
let value = String(data: valueData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? ""
let cookie = Cookie(
domain: domain,
path: path,
secure: (flags & CookieFlags.secure) != 0,
expires: expires,
name: name,
value: value
)
cookieList.append(cookie)
}
}
return cookieList
}
static func bytesToInt(data: Data, isBe: Bool) -> Int {
if data.count < 4 {
return 0
}
let temp = [UInt8](data)
var src:[UInt64] = []
for item in temp {
src.append(UInt64(item))
}
if isBe {
let first = (src[3] & 0xFF) | ((src[2] & 0xFF)<<8)
return Int(first | ((src[1] & 0xFF)<<16) | ((src[0] & 0xFF)<<24))
} else {
let first = (src[0] & 0xFF) | ((src[1] & 0xFF)<<8)
return Int(first | ((src[2] & 0xFF)<<16) | ((src[3] & 0xFF)<<24))
}
}
}
class SaveWebView: WKWebView, WKNavigationDelegate {
var contentHeight: CGFloat
private var loadCompletion: ((Error?) -> Void)?
override init(frame: CGRect, configuration: WKWebViewConfiguration) {
self.contentHeight = frame.height
super.init(frame: frame, configuration: configuration)
self.navigationDelegate = self
configuration.websiteDataStore = .default()
configuration.processPool = WKProcessPool()
}
required init?(coder: NSCoder) {
// Initialize for storyboard/xib usage
self.contentHeight = 0
super.init(coder: coder)
self.navigationDelegate = self
}
func load(url: URL, completion: @escaping (Error?) -> Void) {
self.loadCompletion = completion
let request = URLRequest(url: url)
self.load(request)
}
func loadCookiesFromFile(path: String) -> [Cookie] {
guard let content = try? String(contentsOfFile: path, encoding: .utf8) else {
print("Could not read cookie file")
return []
}
var cookies: [Cookie] = []
let lines = content.components(separatedBy: .newlines)
for line in lines {
// Skip comments and empty lines
if line.starts(with: "#") || line.isEmpty {
continue
}
let parts = line.components(separatedBy: "\t")
if parts.count < 7 {
print("Invalid cookie format: \(line)")
continue
}
if parts.count >= 7 {
let domain = parts[0]
let path = parts[2]
let secure = parts[3] == "TRUE"
let expiryString = parts[4]
let name = parts[5]
let value = parts[6]
// Convert expiry timestamp
let expires = Double(expiryString).flatMap { Date(timeIntervalSince1970: $0) }
let cookie = Cookie(
domain: domain,
path: path,
secure: secure,
expires: expires,
name: name,
value: value
)
cookies.append(cookie)
}
}
return cookies
}
func syncCookiesToWebView(cookies: [Cookie], completion: @escaping () -> Void) {
let group = DispatchGroup()
for cookieData in cookies {
var properties: [HTTPCookiePropertyKey: Any] = [
.domain: cookieData.domain as String,
.path: cookieData.path as String,
.name: cookieData.name as String,
.value: cookieData.value as String,
.secure: cookieData.secure as Bool
]
if let expires = cookieData.expires {
properties[.expires] = expires
}
if let cookie = HTTPCookie(properties: properties) {
group.enter()
self.configuration.websiteDataStore.httpCookieStore.setCookie(cookie) {
group.leave()
}
}
}
group.notify(queue: .main) {
completion()
}
}
func loadUrlWithFileCookies(url: URL, cookieFilePath: String, completion: @escaping (Error?) -> Void) {
let cookies = loadCookiesFromFile(path: cookieFilePath)
syncCookiesToWebView(cookies: cookies) {
self.load(url: url, completion: completion)
}
}
func loadUrlWithSyncedCookies(url: URL, completion: @escaping (Error?) -> Void) {
let cookies = BinaryCookiesReader.getCookies(like: url.host!)
syncCookiesToWebView(cookies: cookies) {
self.load(url: url, completion: completion)
}
}
func scrollPage(_ timeout: Int) {
let javascript = """
(async function () {
const viewportHeight = window.innerHeight;
const totalHeight = document.documentElement.scrollHeight;
while (window.scrollY < totalHeight - viewportHeight) {
window.scrollBy({
top: viewportHeight,
behavior: 'smooth'
});
await new Promise(resolve => setTimeout(resolve, \(timeout)));
}
})();
true;
"""
self.evaluateJavaScript(javascript) { (result, error) in
if let error = error {
print("Error preloading images: \(error.localizedDescription)")
}
}
}
/// Save a copy of the web view's contents as a webarchive file.
///
/// This method will block until the webarchive has been saved,
/// or the save has failed for some reason.
func saveAsWebArchive(savePath: URL, contentHeight: CGFloat) {
var isSaving = true
// Create PDF configuration with full content size
let config = WKPDFConfiguration()
// Set the rect to cover the entire content
config.rect = CGRect(x: 0, y: 0, width: 900, height: contentHeight)
//self.createWebArchiveData(completionHandler: { result in
self.createPDF(configuration: config, completionHandler: { result in
do {
let data = try result.get()
try data.write(
to: savePath,
options: [Data.WritingOptions.withoutOverwriting]
)
isSaving = false
} catch {
fputs("Unable to save webarchive file: \(error.localizedDescription)\n", stderr)
exit(1)
}})
while (isSaving) {
RunLoop.main.run(until: Date(timeIntervalSinceNow: 0.1))
}
}
// WKNavigationDelegate Methods
func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!) {
let group = DispatchGroup()
group.enter()
self.evaluateJavaScript("document.documentElement.scrollHeight") { (result, error) in
if let error = error {
print("Error getting scrollHeight: \(error.localizedDescription)")
} else {
self.contentHeight = result as! CGFloat
}
group.leave()
}
group.notify(queue: .main) {
self.loadCompletion?(nil)
self.loadCompletion = nil
}
}
func webView(_ webView: WKWebView, didFail navigation: WKNavigation!, withError error: Error) {
print("Navigation failed: \(error.localizedDescription)")
self.loadCompletion?(error)
self.loadCompletion = nil
}
func webView(_ webView: WKWebView, didFailProvisionalNavigation navigation: WKNavigation!, withError error: Error) {
print("didFailProvisionalNavigation")
self.loadCompletion?(error)
self.loadCompletion = nil
}
}
guard CommandLine.arguments.count == 3 else {
fputs("Usage: \(CommandLine.arguments[0]) <URL> <OUTPUT_PATH>\n", stderr)
exit(1)
}
var keepRunning = true
let frameHeight: Int = 1200
let scrollTimeout: Double = 0.25
let webView = SaveWebView(frame: CGRect(x: 0, y: 0, width: 900, height: frameHeight))
let url = URL(string: CommandLine.arguments[1])!
let savePath = URL(fileURLWithPath: CommandLine.arguments[2])
func wait(seconds: Double) {
let until = Date().addingTimeInterval(seconds)
repeat {
RunLoop.current.run(mode: .default, before: until)
} while Date() < until
}
webView.loadUrlWithSyncedCookies(url: url) { error in
if let error = error {
print("Failed to load the page: \(error.localizedDescription)")
} else {
wait(seconds: 5.0)
webView.scrollPage(Int(scrollTimeout * 1000))
wait(seconds: ceil(Double(webView.contentHeight) / Double(frameHeight)) * scrollTimeout * 1.1)
//print(webView.title as Any)
webView.saveAsWebArchive(savePath: savePath, contentHeight: webView.contentHeight)
}
keepRunning = false
}
while keepRunning {
RunLoop.current.run(mode: .default, before: .distantFuture)
}
@mdbraber
Copy link
Author

mdbraber commented Aug 8, 2025

No unfortunately there's not an easy solution - you could try and look up the code for various ad-blockers to see how some are trying to get rid of those. It's probably quite different per site.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment