Created
October 22, 2024 18:30
-
-
Save JacobFV/2ed9912170c26d558cec0f13e034824c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async def __call__( | |
self, | |
*, | |
action: Action, | |
text: str | None = None, | |
coordinate: tuple[int, int] | None = None, | |
**kwargs, | |
): | |
if action in ("mouse_move", "left_click_drag"): | |
if coordinate is None: | |
raise ToolError(f"coordinate is required for {action}") | |
if text is not None: | |
raise ToolError(f"text is not accepted for {action}") | |
if not isinstance(coordinate, list) or len(coordinate) != 2: | |
raise ToolError(f"{coordinate} must be a tuple of length 2") | |
if not all(isinstance(i, int) and i >= 0 for i in coordinate): | |
raise ToolError(f"{coordinate} must be a tuple of non-negative ints") | |
x, y = self.scale_coordinates( | |
ScalingSource.API, coordinate[0], coordinate[1] | |
) | |
if action == "mouse_move": | |
return await self.shell(f"{self.xdotool} mousemove --sync {x} {y}") | |
elif action == "left_click_drag": | |
return await self.shell( | |
f"{self.xdotool} mousedown 1 mousemove --sync {x} {y} mouseup 1" | |
) | |
if action in ("key", "type"): | |
if text is None: | |
raise ToolError(f"text is required for {action}") | |
if coordinate is not None: | |
raise ToolError(f"coordinate is not accepted for {action}") | |
if not isinstance(text, str): | |
raise ToolError(output=f"{text} must be a string") | |
if action == "key": | |
return await self.shell(f"{self.xdotool} key -- {text}") | |
elif action == "type": | |
results: list[ToolResult] = [] | |
for chunk in chunks(text, TYPING_GROUP_SIZE): | |
cmd = f"{self.xdotool} type --delay {TYPING_DELAY_MS} -- {shlex.quote(chunk)}" | |
results.append(await self.shell(cmd, take_screenshot=False)) | |
screenshot_base64 = (await self.screenshot()).base64_image | |
return ToolResult( | |
output="".join(result.output or "" for result in results), | |
error="".join(result.error or "" for result in results), | |
base64_image=screenshot_base64, | |
) | |
if action in ( | |
"left_click", | |
"right_click", | |
"double_click", | |
"middle_click", | |
"screenshot", | |
"cursor_position", | |
): | |
if text is not None: | |
raise ToolError(f"text is not accepted for {action}") | |
if coordinate is not None: | |
raise ToolError(f"coordinate is not accepted for {action}") | |
if action == "screenshot": | |
return await self.screenshot() | |
elif action == "cursor_position": | |
result = await self.shell( | |
f"{self.xdotool} getmouselocation --shell", | |
take_screenshot=False, | |
) | |
output = result.output or "" | |
x, y = self.scale_coordinates( | |
ScalingSource.COMPUTER, | |
int(output.split("X=")[1].split("\n")[0]), | |
int(output.split("Y=")[1].split("\n")[0]), | |
) | |
return result.replace(output=f"X={x},Y={y}") | |
else: | |
click_arg = { | |
"left_click": "1", | |
"right_click": "3", | |
"middle_click": "2", | |
"double_click": "--repeat 2 --delay 500 1", | |
}[action] | |
return await self.shell(f"{self.xdotool} click {click_arg}") | |
raise ToolError(f"Invalid action: {action}") | |
async def screenshot(self): | |
"""Take a screenshot of the current screen and return the base64 encoded image.""" | |
output_dir = Path(OUTPUT_DIR) | |
output_dir.mkdir(parents=True, exist_ok=True) | |
path = output_dir / f"screenshot_{uuid4().hex}.png" | |
# Try gnome-screenshot first | |
if shutil.which("gnome-screenshot"): | |
screenshot_cmd = f"{self._display_prefix}gnome-screenshot -f {path} -p" | |
else: | |
# Fall back to scrot if gnome-screenshot isn't available | |
screenshot_cmd = f"{self._display_prefix}scrot -p {path}" | |
result = await self.shell(screenshot_cmd, take_screenshot=False) | |
if self._scaling_enabled: | |
x, y = self.scale_coordinates( | |
ScalingSource.COMPUTER, self.width, self.height | |
) | |
await self.shell( | |
f"convert {path} -resize {x}x{y}! {path}", take_screenshot=False | |
) | |
if path.exists(): | |
return result.replace( | |
base64_image=base64.b64encode(path.read_bytes()).decode() | |
) | |
raise ToolError(f"Failed to take screenshot: {result.error}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment