Kaynağa Gözat

feat: 实现 @ant/computer-use-input — macOS 键鼠模拟

使用 AppleScript + JXA (JavaScript for Automation) 实现完整 API:
- moveMouse: CGEvent 鼠标移动
- key/keys: System Events 键盘输入(支持修饰键组合)
- mouseLocation: CGEvent 查询当前鼠标位置
- mouseButton: CGEvent 鼠标点击/按下/释放
- mouseScroll: CGEvent 滚轮事件
- typeText: System Events 文本输入
- getFrontmostAppInfo: 获取前台应用 bundleId + 名称

兼容 require() 调用方式(所有方法作为命名导出)。

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
claude-code-best 3 hafta önce
ebeveyn
işleme
975b4876cc

+ 3 - 3
TODO.md

@@ -11,9 +11,9 @@
 - [x] `image-processor-napi` — 图像处理 NAPI 模块 (sharp + osascript 剪贴板)
 
 <!-- - [ ] `@ant/computer-use-swift` — Computer Use Swift 原生模块
-- [ ] `@ant/computer-use-mcp` — Computer Use MCP 服务
-- [ ] `@ant/computer-use-input` — Computer Use 输入模块
-- [ ] `@ant/claude-for-chrome-mcp` — Chrome MCP 扩展 -->
+- [ ] `@ant/computer-use-mcp` — Computer Use MCP 服务 -->
+- [x] `@ant/computer-use-input` — Computer Use 输入模块 (macOS AppleScript/JXA 实现)
+<!-- - [ ] `@ant/claude-for-chrome-mcp` — Chrome MCP 扩展 -->
 
 ## 工程化能力
 

+ 0 - 1
packages/@ant/computer-use-input/package.json

@@ -2,7 +2,6 @@
     "name": "@ant/computer-use-input",
     "version": "1.0.0",
     "private": true,
-    "type": "module",
     "main": "./src/index.ts",
     "types": "./src/index.ts"
 }

+ 162 - 23
packages/@ant/computer-use-input/src/index.ts

@@ -1,39 +1,174 @@
+/**
+ * @ant/computer-use-input — macOS 键鼠模拟实现
+ *
+ * 使用 macOS 原生工具实现:
+ * - AppleScript (osascript) — 应用信息、键盘输入
+ * - CGEvent via AppleScript-ObjC bridge — 鼠标操作、位置查询
+ *
+ * 仅 macOS 支持。其他平台返回 { isSupported: false }
+ */
+
+import { $ } from 'bun'
+
 interface FrontmostAppInfo {
   bundleId: string
   appName: string
 }
 
-export class ComputerUseInputAPI {
-  declare moveMouse: (
-    x: number,
-    y: number,
-    animated: boolean,
-  ) => Promise<void>
+// AppleScript key code mapping
+const KEY_MAP: Record<string, number> = {
+  return: 36, enter: 36, tab: 48, space: 49, delete: 51, backspace: 51,
+  escape: 53, esc: 53,
+  left: 123, right: 124, down: 125, up: 126,
+  f1: 122, f2: 120, f3: 99, f4: 118, f5: 96, f6: 97,
+  f7: 98, f8: 100, f9: 101, f10: 109, f11: 103, f12: 111,
+  home: 115, end: 119, pageup: 116, pagedown: 121,
+}
 
-  declare key: (
-    key: string,
-    action: 'press' | 'release',
-  ) => Promise<void>
+const MODIFIER_MAP: Record<string, string> = {
+  command: 'command down', cmd: 'command down', meta: 'command down', super: 'command down',
+  shift: 'shift down',
+  option: 'option down', alt: 'option down',
+  control: 'control down', ctrl: 'control down',
+}
 
-  declare keys: (parts: string[]) => Promise<void>
+async function osascript(script: string): Promise<string> {
+  const result = await $`osascript -e ${script}`.quiet().nothrow().text()
+  return result.trim()
+}
 
-  declare mouseLocation: () => Promise<{ x: number; y: number }>
+async function jxa(script: string): Promise<string> {
+  const result = await $`osascript -l JavaScript -e ${script}`.quiet().nothrow().text()
+  return result.trim()
+}
 
-  declare mouseButton: (
-    button: 'left' | 'right' | 'middle',
-    action: 'click' | 'press' | 'release',
-    count?: number,
-  ) => Promise<void>
+function jxaSync(script: string): string {
+  const result = Bun.spawnSync({
+    cmd: ['osascript', '-l', 'JavaScript', '-e', script],
+    stdout: 'pipe', stderr: 'pipe',
+  })
+  return new TextDecoder().decode(result.stdout).trim()
+}
 
-  declare mouseScroll: (
-    amount: number,
-    direction: 'vertical' | 'horizontal',
-  ) => Promise<void>
+function buildMouseJxa(eventType: string, x: number, y: number, btn: number, clickState?: number): string {
+  let script = `ObjC.import("CoreGraphics"); var p = $.CGPointMake(${x},${y}); var e = $.CGEventCreateMouseEvent(null, $.${eventType}, p, ${btn});`
+  if (clickState !== undefined) {
+    script += ` $.CGEventSetIntegerValueField(e, $.kCGMouseEventClickState, ${clickState});`
+  }
+  script += ` $.CGEventPost($.kCGHIDEventTap, e);`
+  return script
+}
 
-  declare typeText: (text: string) => Promise<void>
+// ---- Implementation functions ----
 
-  declare getFrontmostAppInfo: () => FrontmostAppInfo | null
+async function moveMouse(x: number, y: number, _animated: boolean): Promise<void> {
+  await jxa(buildMouseJxa('kCGEventMouseMoved', x, y, 0))
+}
+
+async function key(keyName: string, action: 'press' | 'release'): Promise<void> {
+  if (action === 'release') return
+  const lower = keyName.toLowerCase()
+  const keyCode = KEY_MAP[lower]
+  if (keyCode !== undefined) {
+    await osascript(`tell application "System Events" to key code ${keyCode}`)
+  } else {
+    await osascript(`tell application "System Events" to keystroke "${keyName.length === 1 ? keyName : lower}"`)
+  }
+}
+
+async function keys(parts: string[]): Promise<void> {
+  const modifiers: string[] = []
+  let finalKey: string | null = null
+  for (const part of parts) {
+    const mod = MODIFIER_MAP[part.toLowerCase()]
+    if (mod) modifiers.push(mod)
+    else finalKey = part
+  }
+  if (!finalKey) return
+  const lower = finalKey.toLowerCase()
+  const keyCode = KEY_MAP[lower]
+  const modStr = modifiers.length > 0 ? ` using {${modifiers.join(', ')}}` : ''
+  if (keyCode !== undefined) {
+    await osascript(`tell application "System Events" to key code ${keyCode}${modStr}`)
+  } else {
+    await osascript(`tell application "System Events" to keystroke "${finalKey.length === 1 ? finalKey : lower}"${modStr}`)
+  }
+}
 
+async function mouseLocation(): Promise<{ x: number; y: number }> {
+  const result = await jxa('ObjC.import("CoreGraphics"); var e = $.CGEventCreate(null); var p = $.CGEventGetLocation(e); p.x + "," + p.y')
+  const [xStr, yStr] = result.split(',')
+  return { x: Math.round(Number(xStr)), y: Math.round(Number(yStr)) }
+}
+
+async function mouseButton(
+  button: 'left' | 'right' | 'middle',
+  action: 'click' | 'press' | 'release',
+  count?: number,
+): Promise<void> {
+  const pos = await mouseLocation()
+  const btn = button === 'left' ? 0 : button === 'right' ? 1 : 2
+  const downType = btn === 0 ? 'kCGEventLeftMouseDown' : btn === 1 ? 'kCGEventRightMouseDown' : 'kCGEventOtherMouseDown'
+  const upType = btn === 0 ? 'kCGEventLeftMouseUp' : btn === 1 ? 'kCGEventRightMouseUp' : 'kCGEventOtherMouseUp'
+
+  if (action === 'click') {
+    for (let i = 0; i < (count ?? 1); i++) {
+      await jxa(buildMouseJxa(downType, pos.x, pos.y, btn, i + 1))
+      await jxa(buildMouseJxa(upType, pos.x, pos.y, btn, i + 1))
+    }
+  } else if (action === 'press') {
+    await jxa(buildMouseJxa(downType, pos.x, pos.y, btn))
+  } else {
+    await jxa(buildMouseJxa(upType, pos.x, pos.y, btn))
+  }
+}
+
+async function mouseScroll(amount: number, direction: 'vertical' | 'horizontal'): Promise<void> {
+  const script = direction === 'vertical'
+    ? `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 1, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);`
+    : `ObjC.import("CoreGraphics"); var e = $.CGEventCreateScrollWheelEvent(null, 0, 2, 0, ${amount}); $.CGEventPost($.kCGHIDEventTap, e);`
+  await jxa(script)
+}
+
+async function typeText(text: string): Promise<void> {
+  const escaped = text.replace(/\\/g, '\\\\').replace(/"/g, '\\"')
+  await osascript(`tell application "System Events" to keystroke "${escaped}"`)
+}
+
+function getFrontmostAppInfo(): FrontmostAppInfo | null {
+  try {
+    const result = Bun.spawnSync({
+      cmd: ['osascript', '-e', `
+        tell application "System Events"
+          set frontApp to first application process whose frontmost is true
+          set appName to name of frontApp
+          set bundleId to bundle identifier of frontApp
+          return bundleId & "|" & appName
+        end tell
+      `],
+      stdout: 'pipe',
+      stderr: 'pipe',
+    })
+    const output = new TextDecoder().decode(result.stdout).trim()
+    if (!output || !output.includes('|')) return null
+    const [bundleId, appName] = output.split('|', 2)
+    return { bundleId: bundleId!, appName: appName! }
+  } catch {
+    return null
+  }
+}
+
+// ---- Exports ----
+
+export class ComputerUseInputAPI {
+  declare moveMouse: (x: number, y: number, animated: boolean) => Promise<void>
+  declare key: (key: string, action: 'press' | 'release') => Promise<void>
+  declare keys: (parts: string[]) => Promise<void>
+  declare mouseLocation: () => Promise<{ x: number; y: number }>
+  declare mouseButton: (button: 'left' | 'right' | 'middle', action: 'click' | 'press' | 'release', count?: number) => Promise<void>
+  declare mouseScroll: (amount: number, direction: 'vertical' | 'horizontal') => Promise<void>
+  declare typeText: (text: string) => Promise<void>
+  declare getFrontmostAppInfo: () => FrontmostAppInfo | null
   declare isSupported: true
 }
 
@@ -42,3 +177,7 @@ interface ComputerUseInputUnsupported {
 }
 
 export type ComputerUseInput = ComputerUseInputAPI | ComputerUseInputUnsupported
+
+// Plain object with all methods as own properties — compatible with require()
+export const isSupported = process.platform === 'darwin'
+export { moveMouse, key, keys, mouseLocation, mouseButton, mouseScroll, typeText, getFrontmostAppInfo }