import OpenAI from 'openai' import { chromium } from 'playwright' const client = new OpenAI({ apiKey: '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' }) const browser = await chromium.launch() const page = await browser.newPage() const response = await client.responses.create({ model: 'gpt-5.4', tools: [{ type: 'computer' }], input: 'Check whether the Filters panel is open. If it is not open, click Show filters. Then type penguin in the search box. Use the computer tool for UI interaction.' }) console.log(JSON.stringify(response.output, null, 2)) await computerUseLoop(page, response) console.log(JSON.stringify(response.output, null, 2)) await browser.close() async function handleComputerActions(page, actions) { for (const action of actions) { switch (action.type) { case "click": await page.mouse.click(action.x, action.y, { button: action.button ?? "left", }); break; case "double_click": await page.mouse.dblclick(action.x, action.y, { button: action.button ?? "left", }); break; case "scroll": await page.mouse.move(action.x, action.y); await page.mouse.wheel(action.scrollX ?? 0, action.scrollY ?? 0); break; case "keypress": for (const key of action.keys) { await page.keyboard.press(key === "SPACE" ? " " : key); } break; case "type": await page.keyboard.type(action.text); break; case "wait": case "screenshot": break; default: throw new Error(`Unsupported action: ${action.type}`); } } } async function computerUseLoop(page, response) { while (true) { const computerCall = response.output.find((item) => item.type === "computer_call"); if (!computerCall) { return response; } await handleComputerActions(page, computerCall.actions); const screenshot = await page.screenshot() const screenshotBase64 = screenshot.toString("base64"); response = await client.responses.create({ model: "gpt-5.4", tools: [{ type: "computer" }], previous_response_id: response.id, input: [ { type: "computer_call_output", call_id: computerCall.call_id, output: { type: "computer_screenshot", image_url: `data:image/png;base64,${screenshotBase64}`, detail: "original", }, }, ], }); } }