Skip to content

Instantly share code, notes, and snippets.

@prescience-data
Last active May 26, 2025 13:37
Show Gist options
  • Save prescience-data/fcf38928d7abd4b32b915a4c553b9fde to your computer and use it in GitHub Desktop.
Save prescience-data/fcf38928d7abd4b32b915a4c553b9fde to your computer and use it in GitHub Desktop.
Save and restore session data using Playwright
import { exec } from "node:child_process"
import { createReadStream, createWriteStream, existsSync, readFileSync } from "node:fs"
import { mkdir } from "node:fs/promises"
import { basename, dirname, join } from "node:path"
import { pipeline } from "node:stream/promises"
import { promisify } from "node:util"
import { createGzip } from "node:zlib"
import { PutObjectCommand, S3Client } from "@aws-sdk/client-s3"
/**
* Configuration.
*/
const BUCKET_NAME = "your-s3-bucket-name"
const USER_DATA_DIR = join(process.env.HOME || "", "Library/Application Support/Google/Chrome")
const OUTPUT_DIR = join(process.env.HOME || "", "ChromeBackup")
const TIMESTAMP = new Date().toISOString().replace(/[.:]/g, "-")
const HIGH_PRIORITY_FILES = [
{ path: "/Default/Cookies", rank: 5 },
{ path: "/Default/Cookies-journal", rank: 5 },
{ path: "/Default/Local Storage/leveldb/", rank: 5, isDir: true },
{ path: "/Default/Session Storage/", rank: 4, isDir: true },
{ path: "/Local State", rank: 4 },
{ path: "/Default/Web Data", rank: 3 },
{ path: "/Default/Web Data-journal", rank: 3 },
{ path: "/Default/Login Data", rank: 3 },
{ path: "/Default/Login Data-journal", rank: 3 },
{ path: "/Default/Preferences", rank: 3 },
{ path: "/Default/IndexedDB/", rank: 3, isDir: true }
] as const
/**
* AWS S3 client instance.
*/
const s3Client = new S3Client({
region: S3_REGION
// ... Credentials
})
/**
* Main backup function.
*
* @public
*/
export async function backupChromeData(): Promise<void> {
// Sanity check.
await mkdir(OUTPUT_DIR, { recursive: true })
console.info(`Starting Chrome UserData backup from ${USER_DATA_DIR}`)
console.info(
`Backup files will be saved to ${OUTPUT_DIR} and uploaded to S3 bucket ${BUCKET_NAME}`
)
for (const item of HIGH_PRIORITY_FILES) {
const fullPath = join(USER_DATA_DIR, item.path)
try {
if (item.isDir) {
if (existsSync(fullPath)) {
console.info(`Processing directory: ${fullPath} (Rank: ${item.rank})`)
const backupPath = await gzipItem(fullPath, true)
await uploadToS3(backupPath, fullPath)
} else {
console.warn(`Directory not found: ${fullPath}`)
}
} else {
if (existsSync(fullPath)) {
console.info(`Processing file: ${fullPath} (Rank: ${item.rank})`)
const backupPath = await gzipItem(fullPath)
await uploadToS3(backupPath, fullPath)
} else {
console.warn(`File not found: ${fullPath}`)
}
}
} catch (error) {
console.error(`Error processing ${fullPath}:`, error)
}
}
console.info("Chrome UserData backup completed!")
}
/**
* Helper function to gzip a file/directory,
*
* @param sourcePath - The path to the file or directory to compress
* @param isDir - Whether the sourcePath is a directory
*
* @internal
*/
async function gzipItem(sourcePath: string, isDir: boolean = false): Promise<string> {
const backupFileName = basename(sourcePath) + ".tar.gz"
const backupPath = join(OUTPUT_DIR, backupFileName)
if (isDir) {
// Back up the directory using Mac tar.
const execPromise = promisify(exec)
try {
await execPromise(
`tar -czf "${backupPath}" -C "${dirname(sourcePath)}" "${basename(sourcePath)}"`
)
console.info(`Directory compressed: ${sourcePath} -> ${backupPath}`)
return backupPath
} catch (error) {
console.error(`Failed to compress directory ${sourcePath}:`, error)
throw error
}
} else {
// Back up the file using native gzip.
const gzip = createGzip()
const source = createReadStream(sourcePath)
const destination = createWriteStream(backupPath)
try {
await pipeline(source, gzip, destination)
console.info(`File compressed: ${sourcePath} -> ${backupPath}`)
return backupPath
} catch (error) {
console.error(`Failed to compress file ${sourcePath}:`, error)
throw error
}
}
}
/**
* Helper function to upload file to S3.
*
* @param filePath - The path to the file to upload
* @param originalPath - The original path of the file (for S3 key)
*
* @internal
*/
async function uploadToS3(filePath: string, originalPath: string): Promise<void> {
const fileContent = readFileSync(filePath)
const key = `chrome-backup/${TIMESTAMP}${originalPath.replace(USER_DATA_DIR, "")}.gz`
try {
const command = new PutObjectCommand({
Bucket: BUCKET_NAME,
Key: key,
Body: fileContent
})
await s3Client.send(command)
console.info(`Successfully uploaded ${filePath} to S3 bucket as ${key}`)
} catch (error) {
console.error(`Error uploading ${filePath} to S3:`, error)
throw error
}
}
import { mkdir, readFile, writeFile } from "node:fs/promises"
import { resolve } from "node:path"
import type { Page } from "playwright-core"
import type { Protocol } from "playwright-core/types/protocol"
/**
* Saves session data for a given page and URL.
*
* @param page - The Playwright page object.
* @param url - The URL to save session data for.
*
* @public
*/
export async function saveSessionData(page: Page, url: string): Promise<void> {
// Get the storage key for the target origin.
const origin = new URL(url).origin
// Navigate to page and establish CDP session.
await page.goto(url, { waitUntil: "load" })
const context = page.context()
const client = await context.newCDPSession(page)
// @ts-expect-error - "_id" is private.
const frameId = page.mainFrame()._id
if (!frameId) {
throw new Error("Frame ID not found")
}
// Extract session data.
const [{ cookies }, { storageKey }] = await Promise.all([
client.send("Network.getAllCookies", { frameId }),
client.send("Storage.getStorageKeyForFrame", { frameId })
])
if (!storageKey) {
throw new Error("Storage key not found")
}
const { entries: localStorage } = await client.send("DOMStorage.getDOMStorageItems", {
storageId: {
securityOrigin: storageKey,
isLocalStorage: true
}
})
// Clean up to avoid memory leaks.
await client.detach()
// Save to session file.
const filePath = await resolveFilePath("./data", origin)
const sessionData = parseSessionData({ origin, cookies, localStorage })
await writeFile(filePath, JSON.stringify(sessionData))
}
/**
* Restores session data for a given page and URL.
*
* @remarks
* Local storage must be restored before the page loads.
*
* @param page - The Playwright page object.
* @param url - The URL to navigate to.
*
* @public
*/
export async function restoreSessionDataAfterNavigation(page: Page, url: string): Promise<void> {
// Get the storage key for the target origin.
const origin = new URL(url).origin
// Create a new CDP session.
const context = page.context()
// Load data from files.
const filePath = await resolveFilePath("./data", origin)
const { cookies, localStorage } = await readFile(filePath, "utf8").then(parseSessionData)
// Set cookies.
await context.addCookies(cookies)
// NOTE: Not sure if this will work, but to avoid race condition, navigate to the target origin with page load interception.
await context.route(`${origin}/**`, async (route) => {
await route.fulfill({
status: 200,
contentType: "text/html",
body: "<html><head><title>Restoring localStorage</title></head><body></body></html>"
})
})
try {
await page.goto(origin, { waitUntil: "domcontentloaded", timeout: 5000 })
const client = await context.newCDPSession(page)
// @ts-expect-error - "_id" is private.
const frameId = page.mainFrame()._id
if (!frameId) {
throw new Error("Frame ID not found,")
}
// Set localStorage.
const { storageKey } = await client.send("Storage.getStorageKeyForFrame", { frameId })
if (!storageKey) {
throw new Error("Storage key not found.")
}
for (const [key, value] of localStorage) {
await client.send("DOMStorage.setDOMStorageItem", {
storageId: {
securityOrigin: storageKey,
isLocalStorage: true
},
key,
value
})
}
// Clean up to avoid memory leaks.
await client.detach()
} finally {
// Remove the route interception.
await context.unroute(`${origin}/**`)
}
// Navigate to the target URL.
await page.goto(url, { waitUntil: "load" })
}
/**
* Ensure the session data is in the correct format.
*
* @remarks
* This is crappy parsing, in practice you should use a schema validator.
*
* @param sessionData - Object containing session data or a JSON string.
* @returns - Parsed session data.
* @throws - If the session data is not in the expected format.
*
* @internal
*/
function parseSessionData(sessionData: string | SessionData): SessionData {
const { origin, cookies, localStorage } =
typeof sessionData === "string" ? _parse(sessionData) : sessionData
if (!origin) {
throw new Error("Origin not found in session data")
}
if (!Array.isArray(cookies)) {
throw new Error("Cookies not found in session data")
}
if (!Array.isArray(localStorage)) {
throw new Error("LocalStorage not found in session data")
}
return { origin, cookies, localStorage }
function _parse(jsonString: string): SessionData {
try {
return JSON.parse(jsonString) as SessionData
} catch (cause) {
throw new Error(`Failed to parse session data: ${cause.message}`, { cause })
}
}
}
/**
* Ensures the target directory exists and returns the full path to the session file for a specified origin.
*
* @param path - The target directory path.
* @param origin - The origin for which the session file is being created.
* @returns - The fully resolved path to the session file.
*
* @internal
*/
async function resolveFilePath(path: string, origin: string) {
const dir = resolve(path)
await mkdir(dir, { recursive: true })
return resolve(dir, `${origin}.json`)
}
/**
* Custom SessionData interface representing the structure of session data.
*
* @internal
*/
interface SessionData {
/* The origin of the session data. */
origin: string
/* The array of cookies associated with the session. */
cookies: Protocol.Network.Cookie[]
/* The array of localStorage items associated with the session. */
localStorage: Protocol.DOMStorage.Item[]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment