Files
aitest/6-tc-computer-builtin.mjs
wameup 0e92f55616 u
2026-03-17 03:20:48 +00:00

90 lines
2.5 KiB
JavaScript

import OpenAI from 'openai'
import { chromium } from 'playwright'
const client = new OpenAI({
apiKey:
'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'
})
const browser = await chromium.launch()
const page = await browser.newPage()
const response = await client.responses.create({
model: 'gpt-5.4',
tools: [{ type: 'computer' }],
input:
'Check whether the Filters panel is open. If it is not open, click Show filters. Then type penguin in the search box. Use the computer tool for UI interaction.'
})
console.log(JSON.stringify(response.output, null, 2))
await computerUseLoop(page, response)
console.log(JSON.stringify(response.output, null, 2))
await browser.close()
async function handleComputerActions(page, actions) {
for (const action of actions) {
switch (action.type) {
case "click":
await page.mouse.click(action.x, action.y, {
button: action.button ?? "left",
});
break;
case "double_click":
await page.mouse.dblclick(action.x, action.y, {
button: action.button ?? "left",
});
break;
case "scroll":
await page.mouse.move(action.x, action.y);
await page.mouse.wheel(action.scrollX ?? 0, action.scrollY ?? 0);
break;
case "keypress":
for (const key of action.keys) {
await page.keyboard.press(key === "SPACE" ? " " : key);
}
break;
case "type":
await page.keyboard.type(action.text);
break;
case "wait":
case "screenshot":
break;
default:
throw new Error(`Unsupported action: ${action.type}`);
}
}
}
async function computerUseLoop(page, response) {
while (true) {
const computerCall = response.output.find((item) => item.type === "computer_call");
if (!computerCall) {
return response;
}
await handleComputerActions(page, computerCall.actions);
const screenshot = await page.screenshot()
const screenshotBase64 = screenshot.toString("base64");
response = await client.responses.create({
model: "gpt-5.4",
tools: [{ type: "computer" }],
previous_response_id: response.id,
input: [
{
type: "computer_call_output",
call_id: computerCall.call_id,
output: {
type: "computer_screenshot",
image_url: `data:image/png;base64,${screenshotBase64}`,
detail: "original",
},
},
],
});
}
}