Last active
May 26, 2025 13:37
-
-
Save prescience-data/fcf38928d7abd4b32b915a4c553b9fde to your computer and use it in GitHub Desktop.
Save and restore session data using Playwright
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { exec } from "node:child_process" | |
import { createReadStream, createWriteStream, existsSync, readFileSync } from "node:fs" | |
import { mkdir } from "node:fs/promises" | |
import { basename, dirname, join } from "node:path" | |
import { pipeline } from "node:stream/promises" | |
import { promisify } from "node:util" | |
import { createGzip } from "node:zlib" | |
import { PutObjectCommand, S3Client } from "@aws-sdk/client-s3" | |
/** | |
* Configuration. | |
*/ | |
const BUCKET_NAME = "your-s3-bucket-name" | |
const USER_DATA_DIR = join(process.env.HOME || "", "Library/Application Support/Google/Chrome") | |
const OUTPUT_DIR = join(process.env.HOME || "", "ChromeBackup") | |
const TIMESTAMP = new Date().toISOString().replace(/[.:]/g, "-") | |
const HIGH_PRIORITY_FILES = [ | |
{ path: "/Default/Cookies", rank: 5 }, | |
{ path: "/Default/Cookies-journal", rank: 5 }, | |
{ path: "/Default/Local Storage/leveldb/", rank: 5, isDir: true }, | |
{ path: "/Default/Session Storage/", rank: 4, isDir: true }, | |
{ path: "/Local State", rank: 4 }, | |
{ path: "/Default/Web Data", rank: 3 }, | |
{ path: "/Default/Web Data-journal", rank: 3 }, | |
{ path: "/Default/Login Data", rank: 3 }, | |
{ path: "/Default/Login Data-journal", rank: 3 }, | |
{ path: "/Default/Preferences", rank: 3 }, | |
{ path: "/Default/IndexedDB/", rank: 3, isDir: true } | |
] as const | |
/** | |
* AWS S3 client instance. | |
*/ | |
const s3Client = new S3Client({ | |
region: S3_REGION | |
// ... Credentials | |
}) | |
/** | |
* Main backup function. | |
* | |
* @public | |
*/ | |
export async function backupChromeData(): Promise<void> { | |
// Sanity check. | |
await mkdir(OUTPUT_DIR, { recursive: true }) | |
console.info(`Starting Chrome UserData backup from ${USER_DATA_DIR}`) | |
console.info( | |
`Backup files will be saved to ${OUTPUT_DIR} and uploaded to S3 bucket ${BUCKET_NAME}` | |
) | |
for (const item of HIGH_PRIORITY_FILES) { | |
const fullPath = join(USER_DATA_DIR, item.path) | |
try { | |
if (item.isDir) { | |
if (existsSync(fullPath)) { | |
console.info(`Processing directory: ${fullPath} (Rank: ${item.rank})`) | |
const backupPath = await gzipItem(fullPath, true) | |
await uploadToS3(backupPath, fullPath) | |
} else { | |
console.warn(`Directory not found: ${fullPath}`) | |
} | |
} else { | |
if (existsSync(fullPath)) { | |
console.info(`Processing file: ${fullPath} (Rank: ${item.rank})`) | |
const backupPath = await gzipItem(fullPath) | |
await uploadToS3(backupPath, fullPath) | |
} else { | |
console.warn(`File not found: ${fullPath}`) | |
} | |
} | |
} catch (error) { | |
console.error(`Error processing ${fullPath}:`, error) | |
} | |
} | |
console.info("Chrome UserData backup completed!") | |
} | |
/** | |
* Helper function to gzip a file/directory, | |
* | |
* @param sourcePath - The path to the file or directory to compress | |
* @param isDir - Whether the sourcePath is a directory | |
* | |
* @internal | |
*/ | |
async function gzipItem(sourcePath: string, isDir: boolean = false): Promise<string> { | |
const backupFileName = basename(sourcePath) + ".tar.gz" | |
const backupPath = join(OUTPUT_DIR, backupFileName) | |
if (isDir) { | |
// Back up the directory using Mac tar. | |
const execPromise = promisify(exec) | |
try { | |
await execPromise( | |
`tar -czf "${backupPath}" -C "${dirname(sourcePath)}" "${basename(sourcePath)}"` | |
) | |
console.info(`Directory compressed: ${sourcePath} -> ${backupPath}`) | |
return backupPath | |
} catch (error) { | |
console.error(`Failed to compress directory ${sourcePath}:`, error) | |
throw error | |
} | |
} else { | |
// Back up the file using native gzip. | |
const gzip = createGzip() | |
const source = createReadStream(sourcePath) | |
const destination = createWriteStream(backupPath) | |
try { | |
await pipeline(source, gzip, destination) | |
console.info(`File compressed: ${sourcePath} -> ${backupPath}`) | |
return backupPath | |
} catch (error) { | |
console.error(`Failed to compress file ${sourcePath}:`, error) | |
throw error | |
} | |
} | |
} | |
/** | |
* Helper function to upload file to S3. | |
* | |
* @param filePath - The path to the file to upload | |
* @param originalPath - The original path of the file (for S3 key) | |
* | |
* @internal | |
*/ | |
async function uploadToS3(filePath: string, originalPath: string): Promise<void> { | |
const fileContent = readFileSync(filePath) | |
const key = `chrome-backup/${TIMESTAMP}${originalPath.replace(USER_DATA_DIR, "")}.gz` | |
try { | |
const command = new PutObjectCommand({ | |
Bucket: BUCKET_NAME, | |
Key: key, | |
Body: fileContent | |
}) | |
await s3Client.send(command) | |
console.info(`Successfully uploaded ${filePath} to S3 bucket as ${key}`) | |
} catch (error) { | |
console.error(`Error uploading ${filePath} to S3:`, error) | |
throw error | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { mkdir, readFile, writeFile } from "node:fs/promises" | |
import { resolve } from "node:path" | |
import type { Page } from "playwright-core" | |
import type { Protocol } from "playwright-core/types/protocol" | |
/** | |
* Saves session data for a given page and URL. | |
* | |
* @param page - The Playwright page object. | |
* @param url - The URL to save session data for. | |
* | |
* @public | |
*/ | |
export async function saveSessionData(page: Page, url: string): Promise<void> { | |
// Get the storage key for the target origin. | |
const origin = new URL(url).origin | |
// Navigate to page and establish CDP session. | |
await page.goto(url, { waitUntil: "load" }) | |
const context = page.context() | |
const client = await context.newCDPSession(page) | |
// @ts-expect-error - "_id" is private. | |
const frameId = page.mainFrame()._id | |
if (!frameId) { | |
throw new Error("Frame ID not found") | |
} | |
// Extract session data. | |
const [{ cookies }, { storageKey }] = await Promise.all([ | |
client.send("Network.getAllCookies", { frameId }), | |
client.send("Storage.getStorageKeyForFrame", { frameId }) | |
]) | |
if (!storageKey) { | |
throw new Error("Storage key not found") | |
} | |
const { entries: localStorage } = await client.send("DOMStorage.getDOMStorageItems", { | |
storageId: { | |
securityOrigin: storageKey, | |
isLocalStorage: true | |
} | |
}) | |
// Clean up to avoid memory leaks. | |
await client.detach() | |
// Save to session file. | |
const filePath = await resolveFilePath("./data", origin) | |
const sessionData = parseSessionData({ origin, cookies, localStorage }) | |
await writeFile(filePath, JSON.stringify(sessionData)) | |
} | |
/** | |
* Restores session data for a given page and URL. | |
* | |
* @remarks | |
* Local storage must be restored before the page loads. | |
* | |
* @param page - The Playwright page object. | |
* @param url - The URL to navigate to. | |
* | |
* @public | |
*/ | |
export async function restoreSessionDataAfterNavigation(page: Page, url: string): Promise<void> { | |
// Get the storage key for the target origin. | |
const origin = new URL(url).origin | |
// Create a new CDP session. | |
const context = page.context() | |
// Load data from files. | |
const filePath = await resolveFilePath("./data", origin) | |
const { cookies, localStorage } = await readFile(filePath, "utf8").then(parseSessionData) | |
// Set cookies. | |
await context.addCookies(cookies) | |
// NOTE: Not sure if this will work, but to avoid race condition, navigate to the target origin with page load interception. | |
await context.route(`${origin}/**`, async (route) => { | |
await route.fulfill({ | |
status: 200, | |
contentType: "text/html", | |
body: "<html><head><title>Restoring localStorage</title></head><body></body></html>" | |
}) | |
}) | |
try { | |
await page.goto(origin, { waitUntil: "domcontentloaded", timeout: 5000 }) | |
const client = await context.newCDPSession(page) | |
// @ts-expect-error - "_id" is private. | |
const frameId = page.mainFrame()._id | |
if (!frameId) { | |
throw new Error("Frame ID not found,") | |
} | |
// Set localStorage. | |
const { storageKey } = await client.send("Storage.getStorageKeyForFrame", { frameId }) | |
if (!storageKey) { | |
throw new Error("Storage key not found.") | |
} | |
for (const [key, value] of localStorage) { | |
await client.send("DOMStorage.setDOMStorageItem", { | |
storageId: { | |
securityOrigin: storageKey, | |
isLocalStorage: true | |
}, | |
key, | |
value | |
}) | |
} | |
// Clean up to avoid memory leaks. | |
await client.detach() | |
} finally { | |
// Remove the route interception. | |
await context.unroute(`${origin}/**`) | |
} | |
// Navigate to the target URL. | |
await page.goto(url, { waitUntil: "load" }) | |
} | |
/** | |
* Ensure the session data is in the correct format. | |
* | |
* @remarks | |
* This is crappy parsing, in practice you should use a schema validator. | |
* | |
* @param sessionData - Object containing session data or a JSON string. | |
* @returns - Parsed session data. | |
* @throws - If the session data is not in the expected format. | |
* | |
* @internal | |
*/ | |
function parseSessionData(sessionData: string | SessionData): SessionData { | |
const { origin, cookies, localStorage } = | |
typeof sessionData === "string" ? _parse(sessionData) : sessionData | |
if (!origin) { | |
throw new Error("Origin not found in session data") | |
} | |
if (!Array.isArray(cookies)) { | |
throw new Error("Cookies not found in session data") | |
} | |
if (!Array.isArray(localStorage)) { | |
throw new Error("LocalStorage not found in session data") | |
} | |
return { origin, cookies, localStorage } | |
function _parse(jsonString: string): SessionData { | |
try { | |
return JSON.parse(jsonString) as SessionData | |
} catch (cause) { | |
throw new Error(`Failed to parse session data: ${cause.message}`, { cause }) | |
} | |
} | |
} | |
/** | |
* Ensures the target directory exists and returns the full path to the session file for a specified origin. | |
* | |
* @param path - The target directory path. | |
* @param origin - The origin for which the session file is being created. | |
* @returns - The fully resolved path to the session file. | |
* | |
* @internal | |
*/ | |
async function resolveFilePath(path: string, origin: string) { | |
const dir = resolve(path) | |
await mkdir(dir, { recursive: true }) | |
return resolve(dir, `${origin}.json`) | |
} | |
/** | |
* Custom SessionData interface representing the structure of session data. | |
* | |
* @internal | |
*/ | |
interface SessionData { | |
/* The origin of the session data. */ | |
origin: string | |
/* The array of cookies associated with the session. */ | |
cookies: Protocol.Network.Cookie[] | |
/* The array of localStorage items associated with the session. */ | |
localStorage: Protocol.DOMStorage.Item[] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment