Created
March 14, 2025 17:27
-
-
Save mizchi/81d17dd85f5cb9fb2373db872f648260 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Usage | |
* deno run -A v0.ts https://google.com | |
*/ | |
import "core-js/proposals/explicit-resource-management.js"; | |
import puppeteer from "puppeteer"; | |
import { tool } from "ai"; | |
import { collectHighlight } from "./inject.ts"; | |
import { z } from "zod"; | |
// optional: show image on terminal | |
import { printImageFromBase64 } from "jsr:@mizchi/imgcat"; | |
function trapCtrlC(fn: () => Promise<void>) { | |
const handler = async () => { | |
console.log("Signal received. Exiting..."); | |
try { | |
await fn(); | |
Deno.exit(0); | |
} finally { | |
console.error("Error during cleanup. Exiting..."); | |
Deno.exit(1); | |
} | |
}; | |
Deno.addSignalListener("SIGINT", handler); | |
return () => { | |
Deno.removeSignalListener("SIGINT", handler); | |
}; | |
} | |
async function createBrowserTools( | |
browser: puppeteer.Browser, | |
options: { | |
url?: string; | |
headless?: boolean; | |
imgcat?: boolean; | |
}, | |
) { | |
// TODO: Add a way to close the browser | |
const activePage = await browser.newPage(); | |
activePage.on("console", (msg) => { | |
console.log(`%c[console.${msg.type()}]: ${msg.text()}`, "color: gray"); | |
}); | |
async function updateState() { | |
const elements = await collectHighlight(activePage); | |
const screenshot = await activePage.screenshot({ encoding: "base64" }); | |
if (options.imgcat) { | |
printImageFromBase64(screenshot); | |
} | |
return { | |
screenshot, | |
elements, | |
}; | |
} | |
if (options.url) { | |
await activePage.goto(options.url, { waitUntil: "networkidle0" }); | |
// await updateState(); | |
} | |
const getBrowserState = tool({ | |
description: ` | |
現在のブラウザの状態を取得します。 | |
スクリーンショットを撮影し、画面上のインタラクティブな要素を取得します。 | |
スクリーンショットには操作可能なインデックスがオーバーレイで表示されています。 | |
`.trim(), | |
parameters: z.object({}), | |
async execute() { | |
return await updateState(); | |
}, | |
// experimental_toToolResultContent | |
experimental_toToolResultContent(result) { | |
return [ | |
{ | |
type: "image", | |
data: result.screenshot, | |
mimeType: "image/png", | |
}, | |
{ | |
type: "text", | |
text: JSON.stringify(result.elements, null, 2), | |
}, | |
]; | |
}, | |
}); | |
const doClick = tool({ | |
description: ` | |
指定されたXPathの要素をクリックします。 | |
操作完了後のブラウザの状態を返します。 | |
`.trim(), | |
parameters: z.object({ | |
xpath: z.string(), | |
}), | |
async execute({ xpath }) { | |
await activePage.evaluate((xpath) => { | |
const element = document.evaluate( | |
xpath, | |
document, | |
null, | |
XPathResult.FIRST_ORDERED_NODE_TYPE, | |
null, | |
).singleNodeValue as HTMLElement; | |
if (element) { | |
element.click(); | |
} | |
}, xpath); | |
await activePage.waitForNavigation({ | |
timeout: 10000, | |
}); | |
return await updateState(); | |
}, | |
experimental_toToolResultContent(result) { | |
return [ | |
{ | |
type: "image", | |
data: result.screenshot, | |
mimeType: "image/png", | |
}, | |
{ | |
type: "text", | |
text: JSON.stringify(result.elements, null, 2), | |
}, | |
]; | |
}, | |
}); | |
return { | |
getBrowserState, | |
doClick, | |
askToUser, | |
}; | |
} | |
const PROMPT = ` | |
あなたはユーザーのブラウザ操作を代行します。 | |
ステップごとにユーザーに質問をし、その回答に基づいて操作を行います。 | |
`.trim(); | |
// async function run() { | |
import { parseArgs } from "node:util"; | |
import { askToUser, runTools } from "./ai.ts"; | |
if (import.meta.main) { | |
const parsed = parseArgs({ | |
allowPositionals: true, | |
options: { | |
prompt: { type: "string", short: "p" }, | |
headful: { type: "boolean" }, | |
imgcat: { type: "boolean" }, | |
output: { type: "string", short: "o" }, | |
}, | |
}); | |
const url = parsed.positionals[0] || "https://google.com"; | |
// Launch a browser instance | |
await using d = new AsyncDisposableStack(); | |
trapCtrlC(d[Symbol.asyncDispose]); | |
const browser = await puppeteer.launch({ | |
headless: !parsed.values.headful, | |
defaultViewport: { | |
width: 1280, | |
height: 800, | |
}, | |
}); | |
d.defer(() => browser.close()); | |
const tools = await createBrowserTools(browser, { | |
url, | |
headless: !parsed.values.headful, | |
imgcat: parsed.values.imgcat, | |
}); | |
await runTools({ | |
prompt: PROMPT + "\n" + parsed.values.prompt, | |
tools, | |
maxSteps: 15, | |
}); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { anthropic } from "@ai-sdk/anthropic"; | |
import { streamText, Tool, tool, type ToolResultPart } from "ai"; | |
import { z } from "zod"; | |
const _encoder = new TextEncoder(); | |
const write = (text: string) => { | |
Deno.stdout.write(_encoder.encode(text)); | |
}; | |
function truncate(text: string, length: number = 100) { | |
return text.length > length ? text.slice(0, length) + "..." : text; | |
} | |
export async function runTools( | |
options: Partial<Parameters<typeof streamText>[0]> = {}, | |
): Promise<void> { | |
const { fullStream } = streamText({ | |
model: anthropic("claude-3-7-sonnet-20250219"), | |
...options, | |
}); | |
for await (const part of fullStream) { | |
switch (part.type) { | |
case "text-delta": { | |
write(part.textDelta); | |
break; | |
} | |
case "tool-call": { | |
console.log( | |
`\n%c[tool-call:${part.toolName}] ${ | |
JSON.stringify( | |
part.args, | |
null, | |
2, | |
) | |
}`, | |
"color: gray", | |
); | |
// write("\n"); | |
break; | |
} | |
// @ts-ignore this is returned by the AI SDK | |
case "tool-result": { | |
const toolPart = part as ToolResultPart; | |
console.log( | |
`\n%c[tool-result:${toolPart.toolName}] ${ | |
truncate(JSON.stringify( | |
toolPart.result, | |
null, | |
2, | |
)) | |
}`, | |
"color: gray", | |
); | |
break; | |
} | |
case "error": { | |
console.error("Error:", part.error); | |
break; | |
} | |
case "finish": | |
case "step-start": | |
case "step-finish": { | |
break; | |
} | |
default: { | |
console.error("Unknown part type:", part); | |
} | |
} | |
} | |
write("\n"); | |
} | |
export const askToUser: Tool = tool({ | |
description: "Ask a question to the user. Call this for user input", | |
parameters: z.object({ | |
question: z.string().describe("The question to ask the user"), | |
}), | |
async execute({ question }) { | |
console.log(`\n%c[askTo] ${question}`, "color: green"); | |
const ret = prompt(">") ?? "no answer"; | |
console.log(`\n%c[response] ${ret}`, "color: blue"); | |
if (!ret) { | |
Deno.exit(1); | |
} | |
return ret; | |
}, | |
}); | |
if (import.meta.main) { | |
const initial = Deno.args[0] ?? "What is the meaning of life?"; | |
await runTools({ | |
messages: [ | |
{ | |
role: "user", | |
content: [{ | |
type: "text", | |
text: initial, | |
}], | |
}, | |
], | |
tools: { | |
askToUser, | |
}, | |
maxSteps: 10, | |
}); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import type { Page } from "puppeteer"; | |
export type AccessibleElement = { | |
tagName: string; | |
xpath: string; | |
text: string; | |
attributes: Record<string, string>; | |
}; | |
export type Highlighter = { | |
collect(): AccessibleElement[]; | |
reset(): void; | |
}; | |
export async function collectHighlight( | |
page: Page, | |
): Promise<AccessibleElement[]> { | |
const injected = await page.evaluate(() => { | |
// @ts-ignore xxx | |
return globalThis.__hl != null; | |
}); | |
if (!injected) { | |
await injectHighlighter(page); | |
} | |
return await page.evaluate(() => { | |
// @ts-ignore xxx | |
if (globalThis.__hl == null) { | |
console.error("Highlighter not found"); | |
return []; | |
} | |
// @ts-ignore xxx | |
globalThis.__hl.reset(); | |
// @ts-ignore xxx | |
return globalThis.__hl.collect(); | |
}); | |
} | |
async function injectHighlighter( | |
page: Page, | |
) { | |
return await page.evaluate(injectHighlightScript); | |
} | |
function injectHighlightScript() { | |
// @ts-expect-error this is a browser context | |
const window = globalThis as Window; | |
console.log("Highlighting interactive elements..."); | |
// Generate a color based on the index | |
const colors = [ | |
"#FF0000", | |
"#00FF00", | |
"#0000FF", | |
"#FFA500", | |
"#800080", | |
"#008080", | |
"#FF69B4", | |
"#4B0082", | |
"#FF4500", | |
"#2E8B57", | |
"#DC143C", | |
"#4682B4", | |
]; | |
// Find interactive elements | |
const selectors = [ | |
"a", | |
"button", | |
"input", | |
"select", | |
"textarea", | |
'[role="button"]', | |
'[role="link"]', | |
'[role="checkbox"]', | |
'[role="radio"]', | |
'[role="tab"]', | |
'[role="menuitem"]', | |
"[onclick]", | |
'[tabindex]:not([tabindex="-1"])', | |
]; | |
// @ts-ignore xxx | |
if (globalThis.__hl != null) { | |
console.log("[Inject] Highlighter already exists"); | |
return; | |
} | |
console.log("[Inject] Creating highlighter"); | |
// @ts-ignore inject global | |
globalThis.__hl = createHighlighter(); | |
console.log("[Inject] Setup complete"); | |
return; | |
// ---- | |
function generateXPath(element: HTMLElement): string { | |
if (element.nodeType !== Node.ELEMENT_NODE) { | |
throw new Error("XPathは要素ノードに対してのみ生成できます"); | |
} | |
// 要素がdocument.bodyの場合は直接パスを返す | |
if (element === document.body) { | |
return "/html/body"; | |
} | |
// 親要素が存在しない場合 | |
if (!element.parentNode) { | |
return ""; | |
} | |
// 親要素のXPathを取得 | |
const parentPath = generateXPath(element.parentNode as HTMLElement); | |
// 現在の要素の位置を特定 | |
const tagName = element.tagName.toLowerCase(); | |
// 同じタグ名の兄弟要素がある場合はインデックスを計算 | |
if (element.parentNode) { | |
const siblings = Array.from(element.parentNode.children).filter( | |
(sibling) => sibling.tagName.toLowerCase() === tagName, | |
); | |
if (siblings.length > 1) { | |
const index = siblings.indexOf(element) + 1; | |
return `${parentPath}/${tagName}[${index}]`; | |
} | |
} | |
// 兄弟要素がない場合は単純にタグ名を追加 | |
return `${parentPath}/${tagName}`; | |
} | |
function getVisibleElements() { | |
const elements = document.querySelectorAll(selectors.join(",")); | |
const visibleElements = Array.from(elements).filter( | |
(element): element is HTMLElement => { | |
if (!(element instanceof HTMLElement)) return false; | |
// return isVisible(element); | |
const style = window.getComputedStyle(element); | |
return element.offsetWidth > 0 && | |
element.offsetHeight > 0 && | |
style.visibility !== "hidden" && | |
style.display !== "none" && | |
style.opacity !== "0"; | |
}, | |
); | |
return visibleElements; | |
} | |
///////////// | |
/** | |
* Create a highlighter object | |
*/ | |
function createHighlighter(): Highlighter { | |
let container = initContainer(); | |
let highlightIndex = 0; | |
return { | |
reset() { | |
container.remove(); | |
container = initContainer(); | |
highlightIndex = 0; | |
}, | |
collect() { | |
const visibleElements = getVisibleElements(); | |
for (const element of visibleElements) { | |
_overlay(element); | |
} | |
const elements = visibleElements.map((element) => { | |
return { | |
xpath: generateXPath(element), | |
tagName: element.tagName.toLowerCase(), | |
text: element.textContent?.trim(), | |
attributes: Array.from(element.attributes).reduce((acc, attr) => { | |
acc[attr.name] = attr.value; | |
return acc; | |
}, {} as Record<string, string>), | |
}; | |
}) as AccessibleElement[]; | |
return elements; | |
// overlay(element); | |
}, | |
}; | |
/** | |
* Overlay a highlight on top of an element | |
*/ | |
function _overlay(element: HTMLElement) { | |
// Get element position | |
const rect = element.getBoundingClientRect(); | |
if (!rect) return; | |
highlightIndex++; | |
const colorIndex = highlightIndex % colors.length; | |
const baseColor = colors[colorIndex]; | |
const backgroundColor = baseColor + "1A"; // 10% opacity version of the color | |
// Create highlight overlay | |
const overlay = document.createElement("div"); | |
Object.assign(overlay.style, { | |
position: "fixed", | |
border: `2px solid ${baseColor}`, | |
backgroundColor, | |
pointerEvents: "none", | |
boxSizing: "border-box", | |
top: `${rect.top}px`, | |
left: `${rect.left}px`, | |
width: `${rect.width}px`, | |
height: `${rect.height}px`, | |
zIndex: "2147483646", | |
}); | |
// Create and position label | |
const label = document.createElement("div"); | |
Object.assign(label.style, { | |
position: "fixed", | |
background: baseColor, | |
color: "white", | |
padding: "1px 4px", | |
borderRadius: "4px", | |
fontSize: `${Math.min(12, Math.max(8, rect.height / 2))}px`, | |
zIndex: "2147483647", | |
}); | |
label.textContent = highlightIndex.toString(); | |
const labelWidth = 20; | |
const labelHeight = 16; | |
let labelTop = rect.top + 2; | |
let labelLeft = rect.left + rect.width - labelWidth - 2; | |
if (rect.width < labelWidth + 4 || rect.height < labelHeight + 4) { | |
labelTop = rect.top - labelHeight - 2; | |
labelLeft = rect.left + rect.width - labelWidth; | |
} | |
label.style.top = `${labelTop}px`; | |
label.style.left = `${labelLeft}px`; | |
// Add to container | |
container.appendChild(overlay); | |
container.appendChild(label); | |
} | |
/** | |
* Ensure a container for the highlights exists | |
*/ | |
function initContainer(): HTMLElement { | |
console.log("[init] Creating container"); | |
const HIGHLIGHT_CONTAINER_ID = "dom-analyzer-highlight-container"; | |
let container = document.getElementById(HIGHLIGHT_CONTAINER_ID); | |
// force reset | |
if (container) { | |
container.remove(); | |
} | |
container = document.createElement("div"); | |
container.id = HIGHLIGHT_CONTAINER_ID; | |
container.style.position = "fixed"; | |
container.style.pointerEvents = "none"; | |
container.style.top = "0"; | |
container.style.left = "0"; | |
container.style.width = "100%"; | |
container.style.height = "100%"; | |
container.style.zIndex = "2147483647"; | |
document.body.appendChild(container); | |
return container; | |
} | |
} | |
} |
Author
mizchi
commented
Mar 14, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment