init
This commit is contained in:
100
5-tc-browser.mjs
Normal file
100
5-tc-browser.mjs
Normal file
@@ -0,0 +1,100 @@
|
||||
import OpenAI from 'openai'
|
||||
import { chromium } from 'playwright'
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey:
|
||||
'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'
|
||||
}) // or set environment: export OPENAI_API_KEY=...
|
||||
|
||||
const my = {
|
||||
model: 'gpt-5.4' // computer tool is only available in gpt-5.4 or later
|
||||
}
|
||||
|
||||
const browser = await chromium.launch({
|
||||
headless: false,
|
||||
chromiumSandbox: true,
|
||||
env: {},
|
||||
args: ['--disable-extensions', '--disable-file-system']
|
||||
})
|
||||
const page = await browser.newPage({
|
||||
viewport: { width: 1280, height: 720 },
|
||||
url: 'https://www.bing.com'
|
||||
})
|
||||
|
||||
async function handleComputerActions (page, actions) {
|
||||
for (const action of actions) {
|
||||
switch (action.type) {
|
||||
case 'click':
|
||||
await page.mouse.click(action.x, action.y, {
|
||||
button: action.button ?? 'left'
|
||||
})
|
||||
break
|
||||
case 'double_click':
|
||||
await page.mouse.dblclick(action.x, action.y, {
|
||||
button: action.button ?? 'left'
|
||||
})
|
||||
break
|
||||
case 'scroll':
|
||||
await page.mouse.move(action.x, action.y)
|
||||
await page.mouse.wheel(action.scrollX ?? 0, action.scrollY ?? 0)
|
||||
break
|
||||
case 'keypress':
|
||||
for (const key of action.keys) {
|
||||
await page.keyboard.press(key === 'SPACE' ? ' ' : key)
|
||||
}
|
||||
break
|
||||
case 'type':
|
||||
await page.keyboard.type(action.text)
|
||||
break
|
||||
case 'wait':
|
||||
case 'screenshot':
|
||||
break
|
||||
default:
|
||||
throw new Error(`Unsupported action: ${action.type}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function computerUseLoop (target, response) {
|
||||
while (true) {
|
||||
const computerCall = response.output.find(
|
||||
item => item.type === 'computer_call'
|
||||
)
|
||||
if (!computerCall) {
|
||||
return response
|
||||
}
|
||||
|
||||
await handleComputerActions(target, computerCall.actions)
|
||||
|
||||
const screenshot = await page.screenshot({ type: 'png' })
|
||||
const screenshotBase64 = Buffer.from(screenshot).toString('base64')
|
||||
|
||||
response = await openai.responses.create({
|
||||
model: my.model,
|
||||
tools: [{ type: 'computer' }],
|
||||
previous_response_id: response.id,
|
||||
input: [
|
||||
{
|
||||
type: 'computer_call_output',
|
||||
call_id: computerCall.call_id,
|
||||
output: {
|
||||
type: 'computer_screenshot',
|
||||
image_url: `data:image/png;base64,${screenshotBase64}`,
|
||||
detail: 'original'
|
||||
}
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const response = await openai.responses.create({
|
||||
model: my.model,
|
||||
tools: [{ type: 'computer' }],
|
||||
input:
|
||||
"visit agidin.com and tell me what's the title of the page, then screenshot the page and save it as homepage.png"
|
||||
})
|
||||
|
||||
console.log(JSON.stringify(response.output, null, 2))
|
||||
|
||||
computerUseLoop(page, response)
|
||||
Reference in New Issue
Block a user