From 0e92f55616635f730f60e49c8b129c5d22b47713 Mon Sep 17 00:00:00 2001 From: wameup Date: Tue, 17 Mar 2026 03:20:48 +0000 Subject: [PATCH] u --- 6-tc-computer-builtin.mjs | 90 +++++++++++++++++++++++++++++++++++++++ 6-tc-computer.ts | 2 + 2 files changed, 92 insertions(+) create mode 100644 6-tc-computer-builtin.mjs diff --git a/6-tc-computer-builtin.mjs b/6-tc-computer-builtin.mjs new file mode 100644 index 0000000..07db554 --- /dev/null +++ b/6-tc-computer-builtin.mjs @@ -0,0 +1,90 @@ +import OpenAI from 'openai' +import { chromium } from 'playwright' + +const client = new OpenAI({ + apiKey: + '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' +}) + +const browser = await chromium.launch() +const page = await browser.newPage() + +const response = await client.responses.create({ + model: 'gpt-5.4', + tools: [{ type: 'computer' }], + input: + 'Check whether the Filters panel is open. If it is not open, click Show filters. Then type penguin in the search box. Use the computer tool for UI interaction.' +}) + +console.log(JSON.stringify(response.output, null, 2)) + +await computerUseLoop(page, response) + +console.log(JSON.stringify(response.output, null, 2)) + +await browser.close() + +async function handleComputerActions(page, actions) { + for (const action of actions) { + switch (action.type) { + case "click": + await page.mouse.click(action.x, action.y, { + button: action.button ?? "left", + }); + break; + case "double_click": + await page.mouse.dblclick(action.x, action.y, { + button: action.button ?? "left", + }); + break; + case "scroll": + await page.mouse.move(action.x, action.y); + await page.mouse.wheel(action.scrollX ?? 0, action.scrollY ?? 0); + break; + case "keypress": + for (const key of action.keys) { + await page.keyboard.press(key === "SPACE" ? " " : key); + } + break; + case "type": + await page.keyboard.type(action.text); + break; + case "wait": + case "screenshot": + break; + default: + throw new Error(`Unsupported action: ${action.type}`); + } + } +} + +async function computerUseLoop(page, response) { + while (true) { + const computerCall = response.output.find((item) => item.type === "computer_call"); + if (!computerCall) { + return response; + } + + await handleComputerActions(page, computerCall.actions); + + const screenshot = await page.screenshot() + const screenshotBase64 = screenshot.toString("base64"); + + response = await client.responses.create({ + model: "gpt-5.4", + tools: [{ type: "computer" }], + previous_response_id: response.id, + input: [ + { + type: "computer_call_output", + call_id: computerCall.call_id, + output: { + type: "computer_screenshot", + image_url: `data:image/png;base64,${screenshotBase64}`, + detail: "original", + }, + }, + ], + }); + } +} \ No newline at end of file diff --git a/6-tc-computer.ts b/6-tc-computer.ts index 72e0dc0..70aec30 100644 --- a/6-tc-computer.ts +++ b/6-tc-computer.ts @@ -8,6 +8,8 @@ // model reaches a final answer. Because the browser/context are not closed, // Bun stays alive until you close the browser or stop the process manually. +// 这是官网文档里的 option 3: Use a code-execution harness. 它并没有用到 tools: [{type:'computer'}] 的 buildin computer tool。 + import OpenAI from 'openai' import readline from 'node:readline/promises' import vm from 'node:vm'