import OpenAI from 'openai'
import { chromium } from 'playwright'

const client = new OpenAI({
  apiKey:
    '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'
})

const browser = await chromium.launch()
const page = await browser.newPage()

const response = await client.responses.create({
  model: 'gpt-5.4',
  tools: [{ type: 'computer' }],
  input:
    'Check whether the Filters panel is open. If it is not open, click Show filters. Then type penguin in the search box. Use the computer tool for UI interaction.'
})

console.log(JSON.stringify(response.output, null, 2))

await computerUseLoop(page, response)

console.log(JSON.stringify(response.output, null, 2))

await browser.close()

async function handleComputerActions(page, actions) {
  for (const action of actions) {
    switch (action.type) {
      case "click":
        await page.mouse.click(action.x, action.y, {
          button: action.button ?? "left",
        });
        break;
      case "double_click":
        await page.mouse.dblclick(action.x, action.y, {
          button: action.button ?? "left",
        });
        break;
      case "scroll":
        await page.mouse.move(action.x, action.y);
        await page.mouse.wheel(action.scrollX ?? 0, action.scrollY ?? 0);
        break;
      case "keypress":
        for (const key of action.keys) {
          await page.keyboard.press(key === "SPACE" ? " " : key);
        }
        break;
      case "type":
        await page.keyboard.type(action.text);
        break;
      case "wait":
      case "screenshot":
        break;
      default:
        throw new Error(`Unsupported action: ${action.type}`);
    }
  }
}

async function computerUseLoop(page, response) {
  while (true) {
    const computerCall = response.output.find((item) => item.type === "computer_call");
    if (!computerCall) {
      return response;
    }

    await handleComputerActions(page, computerCall.actions);

    const screenshot = await page.screenshot()
    const screenshotBase64 = screenshot.toString("base64");

    response = await client.responses.create({
      model: "gpt-5.4",
      tools: [{ type: "computer" }],
      previous_response_id: response.id,
      input: [
        {
          type: "computer_call_output",
          call_id: computerCall.call_id,
          output: {
            type: "computer_screenshot",
            image_url: `data:image/png;base64,${screenshotBase64}`,
            detail: "original",
          },
        },
      ],
    });
  }
}