import OpenAI from 'openai' import { chromium } from 'playwright' const openai = new OpenAI({ apiKey: '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' }) // or set environment: export OPENAI_API_KEY=... const my = { model: 'gpt-5.4' // computer tool is only available in gpt-5.4 or later } const browser = await chromium.launch({ headless: false, chromiumSandbox: true, env: {}, args: ['--disable-extensions', '--disable-file-system'] }) const page = await browser.newPage({ viewport: { width: 1280, height: 720 }, url: 'https://www.bing.com' }) async function handleComputerActions (page, actions) { for (const action of actions) { switch (action.type) { case 'click': await page.mouse.click(action.x, action.y, { button: action.button ?? 'left' }) break case 'double_click': await page.mouse.dblclick(action.x, action.y, { button: action.button ?? 'left' }) break case 'scroll': await page.mouse.move(action.x, action.y) await page.mouse.wheel(action.scrollX ?? 0, action.scrollY ?? 0) break case 'keypress': for (const key of action.keys) { await page.keyboard.press(key === 'SPACE' ? ' ' : key) } break case 'type': await page.keyboard.type(action.text) break case 'wait': case 'screenshot': break default: throw new Error(`Unsupported action: ${action.type}`) } } } async function computerUseLoop (target, response) { while (true) { const computerCall = response.output.find( item => item.type === 'computer_call' ) if (!computerCall) { return response } await handleComputerActions(target, computerCall.actions) const screenshot = await page.screenshot({ type: 'png' }) const screenshotBase64 = Buffer.from(screenshot).toString('base64') response = await openai.responses.create({ model: my.model, tools: [{ type: 'computer' }], previous_response_id: response.id, input: [ { type: 'computer_call_output', call_id: computerCall.call_id, output: { type: 'computer_screenshot', image_url: `data:image/png;base64,${screenshotBase64}`, detail: 'original' } } ] }) } } const response = await openai.responses.create({ model: my.model, tools: [{ type: 'computer' }], input: "visit agidin.com and tell me what's the title of the page, then screenshot the page and save it as homepage.png" }) console.log(JSON.stringify(response.output, null, 2)) computerUseLoop(page, response)