Files
aitest/5-tc-browser.mjs
2026-03-16 14:55:27 +08:00

101 lines
2.8 KiB
JavaScript

import OpenAI from 'openai'
import { chromium } from 'playwright'
const openai = new OpenAI({
apiKey:
'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'
}) // or set environment: export OPENAI_API_KEY=...
const my = {
model: 'gpt-5.4' // computer tool is only available in gpt-5.4 or later
}
const browser = await chromium.launch({
headless: false,
chromiumSandbox: true,
env: {},
args: ['--disable-extensions', '--disable-file-system']
})
const page = await browser.newPage({
viewport: { width: 1280, height: 720 },
url: 'https://www.bing.com'
})
async function handleComputerActions (page, actions) {
for (const action of actions) {
switch (action.type) {
case 'click':
await page.mouse.click(action.x, action.y, {
button: action.button ?? 'left'
})
break
case 'double_click':
await page.mouse.dblclick(action.x, action.y, {
button: action.button ?? 'left'
})
break
case 'scroll':
await page.mouse.move(action.x, action.y)
await page.mouse.wheel(action.scrollX ?? 0, action.scrollY ?? 0)
break
case 'keypress':
for (const key of action.keys) {
await page.keyboard.press(key === 'SPACE' ? ' ' : key)
}
break
case 'type':
await page.keyboard.type(action.text)
break
case 'wait':
case 'screenshot':
break
default:
throw new Error(`Unsupported action: ${action.type}`)
}
}
}
async function computerUseLoop (target, response) {
while (true) {
const computerCall = response.output.find(
item => item.type === 'computer_call'
)
if (!computerCall) {
return response
}
await handleComputerActions(target, computerCall.actions)
const screenshot = await page.screenshot({ type: 'png' })
const screenshotBase64 = Buffer.from(screenshot).toString('base64')
response = await openai.responses.create({
model: my.model,
tools: [{ type: 'computer' }],
previous_response_id: response.id,
input: [
{
type: 'computer_call_output',
call_id: computerCall.call_id,
output: {
type: 'computer_screenshot',
image_url: `data:image/png;base64,${screenshotBase64}`,
detail: 'original'
}
}
]
})
}
}
const response = await openai.responses.create({
model: my.model,
tools: [{ type: 'computer' }],
input:
"visit agidin.com and tell me what's the title of the page, then screenshot the page and save it as homepage.png"
})
console.log(JSON.stringify(response.output, null, 2))
computerUseLoop(page, response)