u
This commit is contained in:
90
6-tc-computer-builtin.mjs
Normal file
90
6-tc-computer-builtin.mjs
Normal file
@@ -0,0 +1,90 @@
|
||||
import OpenAI from 'openai'
|
||||
import { chromium } from 'playwright'
|
||||
|
||||
const client = new OpenAI({
|
||||
apiKey:
|
||||
'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'
|
||||
})
|
||||
|
||||
const browser = await chromium.launch()
|
||||
const page = await browser.newPage()
|
||||
|
||||
const response = await client.responses.create({
|
||||
model: 'gpt-5.4',
|
||||
tools: [{ type: 'computer' }],
|
||||
input:
|
||||
'Check whether the Filters panel is open. If it is not open, click Show filters. Then type penguin in the search box. Use the computer tool for UI interaction.'
|
||||
})
|
||||
|
||||
console.log(JSON.stringify(response.output, null, 2))
|
||||
|
||||
await computerUseLoop(page, response)
|
||||
|
||||
console.log(JSON.stringify(response.output, null, 2))
|
||||
|
||||
await browser.close()
|
||||
|
||||
async function handleComputerActions(page, actions) {
|
||||
for (const action of actions) {
|
||||
switch (action.type) {
|
||||
case "click":
|
||||
await page.mouse.click(action.x, action.y, {
|
||||
button: action.button ?? "left",
|
||||
});
|
||||
break;
|
||||
case "double_click":
|
||||
await page.mouse.dblclick(action.x, action.y, {
|
||||
button: action.button ?? "left",
|
||||
});
|
||||
break;
|
||||
case "scroll":
|
||||
await page.mouse.move(action.x, action.y);
|
||||
await page.mouse.wheel(action.scrollX ?? 0, action.scrollY ?? 0);
|
||||
break;
|
||||
case "keypress":
|
||||
for (const key of action.keys) {
|
||||
await page.keyboard.press(key === "SPACE" ? " " : key);
|
||||
}
|
||||
break;
|
||||
case "type":
|
||||
await page.keyboard.type(action.text);
|
||||
break;
|
||||
case "wait":
|
||||
case "screenshot":
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Unsupported action: ${action.type}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function computerUseLoop(page, response) {
|
||||
while (true) {
|
||||
const computerCall = response.output.find((item) => item.type === "computer_call");
|
||||
if (!computerCall) {
|
||||
return response;
|
||||
}
|
||||
|
||||
await handleComputerActions(page, computerCall.actions);
|
||||
|
||||
const screenshot = await page.screenshot()
|
||||
const screenshotBase64 = screenshot.toString("base64");
|
||||
|
||||
response = await client.responses.create({
|
||||
model: "gpt-5.4",
|
||||
tools: [{ type: "computer" }],
|
||||
previous_response_id: response.id,
|
||||
input: [
|
||||
{
|
||||
type: "computer_call_output",
|
||||
call_id: computerCall.call_id,
|
||||
output: {
|
||||
type: "computer_screenshot",
|
||||
image_url: `data:image/png;base64,${screenshotBase64}`,
|
||||
detail: "original",
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -8,6 +8,8 @@
|
||||
// model reaches a final answer. Because the browser/context are not closed,
|
||||
// Bun stays alive until you close the browser or stop the process manually.
|
||||
|
||||
// 这是官网文档里的 option 3: Use a code-execution harness. 它并没有用到 tools: [{type:'computer'}] 的 buildin computer tool。
|
||||
|
||||
import OpenAI from 'openai'
|
||||
import readline from 'node:readline/promises'
|
||||
import vm from 'node:vm'
|
||||
|
||||
Reference in New Issue
Block a user