index.ts 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. /**
  2. * @ant/computer-use-swift — macOS 实现
  3. *
  4. * 用 AppleScript/JXA/screencapture 替代原始 Swift 原生模块。
  5. * 提供显示器信息、应用管理、截图等功能。
  6. *
  7. * 仅 macOS 支持。
  8. */
  9. import { readFileSync, unlinkSync } from 'fs'
  10. import { tmpdir } from 'os'
  11. import { join } from 'path'
  12. // ---------------------------------------------------------------------------
  13. // Types (exported for callers)
  14. // ---------------------------------------------------------------------------
  15. export interface DisplayGeometry {
  16. width: number
  17. height: number
  18. scaleFactor: number
  19. displayId: number
  20. }
  21. export interface PrepareDisplayResult {
  22. activated: string
  23. hidden: string[]
  24. }
  25. export interface AppInfo {
  26. bundleId: string
  27. displayName: string
  28. }
  29. export interface InstalledApp {
  30. bundleId: string
  31. displayName: string
  32. path: string
  33. iconDataUrl?: string
  34. }
  35. export interface RunningApp {
  36. bundleId: string
  37. displayName: string
  38. }
  39. export interface ScreenshotResult {
  40. base64: string
  41. width: number
  42. height: number
  43. }
  44. export interface ResolvePrepareCaptureResult {
  45. base64: string
  46. width: number
  47. height: number
  48. }
  49. export interface WindowDisplayInfo {
  50. bundleId: string
  51. displayIds: number[]
  52. }
  53. // ---------------------------------------------------------------------------
  54. // Helpers
  55. // ---------------------------------------------------------------------------
  56. function jxaSync(script: string): string {
  57. const result = Bun.spawnSync({
  58. cmd: ['osascript', '-l', 'JavaScript', '-e', script],
  59. stdout: 'pipe', stderr: 'pipe',
  60. })
  61. return new TextDecoder().decode(result.stdout).trim()
  62. }
  63. function osascriptSync(script: string): string {
  64. const result = Bun.spawnSync({
  65. cmd: ['osascript', '-e', script],
  66. stdout: 'pipe', stderr: 'pipe',
  67. })
  68. return new TextDecoder().decode(result.stdout).trim()
  69. }
  70. async function osascript(script: string): Promise<string> {
  71. const proc = Bun.spawn(['osascript', '-e', script], {
  72. stdout: 'pipe', stderr: 'pipe',
  73. })
  74. const text = await new Response(proc.stdout).text()
  75. await proc.exited
  76. return text.trim()
  77. }
  78. async function jxa(script: string): Promise<string> {
  79. const proc = Bun.spawn(['osascript', '-l', 'JavaScript', '-e', script], {
  80. stdout: 'pipe', stderr: 'pipe',
  81. })
  82. const text = await new Response(proc.stdout).text()
  83. await proc.exited
  84. return text.trim()
  85. }
  86. // ---------------------------------------------------------------------------
  87. // DisplayAPI
  88. // ---------------------------------------------------------------------------
  89. interface DisplayAPI {
  90. getSize(displayId?: number): DisplayGeometry
  91. listAll(): DisplayGeometry[]
  92. }
  93. const displayAPI: DisplayAPI = {
  94. getSize(displayId?: number): DisplayGeometry {
  95. const all = this.listAll()
  96. if (displayId !== undefined) {
  97. const found = all.find(d => d.displayId === displayId)
  98. if (found) return found
  99. }
  100. return all[0] ?? { width: 1920, height: 1080, scaleFactor: 2, displayId: 1 }
  101. },
  102. listAll(): DisplayGeometry[] {
  103. try {
  104. const raw = jxaSync(`
  105. ObjC.import("CoreGraphics");
  106. var displays = $.CGDisplayCopyAllDisplayModes ? [] : [];
  107. var active = $.CGGetActiveDisplayList(10, null, Ref());
  108. var countRef = Ref();
  109. $.CGGetActiveDisplayList(0, null, countRef);
  110. var count = countRef[0];
  111. var idBuf = Ref();
  112. $.CGGetActiveDisplayList(count, idBuf, countRef);
  113. var result = [];
  114. for (var i = 0; i < count; i++) {
  115. var did = idBuf[i];
  116. var w = $.CGDisplayPixelsWide(did);
  117. var h = $.CGDisplayPixelsHigh(did);
  118. var mode = $.CGDisplayCopyDisplayMode(did);
  119. var pw = $.CGDisplayModeGetPixelWidth(mode);
  120. var sf = pw > 0 && w > 0 ? pw / w : 2;
  121. result.push({width: w, height: h, scaleFactor: sf, displayId: did});
  122. }
  123. JSON.stringify(result);
  124. `)
  125. return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({
  126. width: Number(d.width), height: Number(d.height),
  127. scaleFactor: Number(d.scaleFactor), displayId: Number(d.displayId),
  128. }))
  129. } catch {
  130. // Fallback: use NSScreen via JXA
  131. try {
  132. const raw = jxaSync(`
  133. ObjC.import("AppKit");
  134. var screens = $.NSScreen.screens;
  135. var result = [];
  136. for (var i = 0; i < screens.count; i++) {
  137. var s = screens.objectAtIndex(i);
  138. var frame = s.frame;
  139. var desc = s.deviceDescription;
  140. var screenNumber = desc.objectForKey($("NSScreenNumber")).intValue;
  141. var backingFactor = s.backingScaleFactor;
  142. result.push({
  143. width: Math.round(frame.size.width),
  144. height: Math.round(frame.size.height),
  145. scaleFactor: backingFactor,
  146. displayId: screenNumber
  147. });
  148. }
  149. JSON.stringify(result);
  150. `)
  151. return (JSON.parse(raw) as DisplayGeometry[]).map(d => ({
  152. width: Number(d.width),
  153. height: Number(d.height),
  154. scaleFactor: Number(d.scaleFactor),
  155. displayId: Number(d.displayId),
  156. }))
  157. } catch {
  158. return [{ width: 1920, height: 1080, scaleFactor: 2, displayId: 1 }]
  159. }
  160. }
  161. },
  162. }
  163. // ---------------------------------------------------------------------------
  164. // AppsAPI
  165. // ---------------------------------------------------------------------------
  166. interface AppsAPI {
  167. prepareDisplay(allowlistBundleIds: string[], surrogateHost: string, displayId?: number): Promise<PrepareDisplayResult>
  168. previewHideSet(bundleIds: string[], displayId?: number): Promise<AppInfo[]>
  169. findWindowDisplays(bundleIds: string[]): Promise<WindowDisplayInfo[]>
  170. appUnderPoint(x: number, y: number): Promise<AppInfo | null>
  171. listInstalled(): Promise<InstalledApp[]>
  172. iconDataUrl(path: string): string | null
  173. listRunning(): RunningApp[]
  174. open(bundleId: string): Promise<void>
  175. unhide(bundleIds: string[]): Promise<void>
  176. }
  177. const appsAPI: AppsAPI = {
  178. async prepareDisplay(
  179. _allowlistBundleIds: string[],
  180. _surrogateHost: string,
  181. _displayId?: number,
  182. ): Promise<PrepareDisplayResult> {
  183. return { activated: '', hidden: [] }
  184. },
  185. async previewHideSet(
  186. _bundleIds: string[],
  187. _displayId?: number,
  188. ): Promise<AppInfo[]> {
  189. return []
  190. },
  191. async findWindowDisplays(bundleIds: string[]): Promise<WindowDisplayInfo[]> {
  192. // Each running app is assumed to be on display 1
  193. return bundleIds.map(bundleId => ({ bundleId, displayIds: [1] }))
  194. },
  195. async appUnderPoint(_x: number, _y: number): Promise<AppInfo | null> {
  196. // Use JXA to find app at mouse position via accessibility
  197. try {
  198. const result = await jxa(`
  199. ObjC.import("CoreGraphics");
  200. ObjC.import("AppKit");
  201. var pt = $.CGPointMake(${_x}, ${_y});
  202. // Get frontmost app as a fallback
  203. var app = $.NSWorkspace.sharedWorkspace.frontmostApplication;
  204. JSON.stringify({bundleId: app.bundleIdentifier.js, displayName: app.localizedName.js});
  205. `)
  206. return JSON.parse(result)
  207. } catch {
  208. return null
  209. }
  210. },
  211. async listInstalled(): Promise<InstalledApp[]> {
  212. try {
  213. const result = await osascript(`
  214. tell application "System Events"
  215. set appList to ""
  216. repeat with appFile in (every file of folder "Applications" of startup disk whose name ends with ".app")
  217. set appPath to POSIX path of (appFile as alias)
  218. set appName to name of appFile
  219. set appList to appList & appPath & "|" & appName & "\\n"
  220. end repeat
  221. return appList
  222. end tell
  223. `)
  224. return result.split('\n').filter(Boolean).map(line => {
  225. const [path, name] = line.split('|', 2)
  226. // Derive bundleId from Info.plist would be ideal, but use path-based fallback
  227. const displayName = (name ?? '').replace(/\.app$/, '')
  228. return {
  229. bundleId: `com.app.${displayName.toLowerCase().replace(/\s+/g, '-')}`,
  230. displayName,
  231. path: path ?? '',
  232. }
  233. })
  234. } catch {
  235. return []
  236. }
  237. },
  238. iconDataUrl(_path: string): string | null {
  239. return null
  240. },
  241. listRunning(): RunningApp[] {
  242. try {
  243. const raw = jxaSync(`
  244. var apps = Application("System Events").applicationProcesses.whose({backgroundOnly: false});
  245. var result = [];
  246. for (var i = 0; i < apps.length; i++) {
  247. try {
  248. var a = apps[i];
  249. result.push({bundleId: a.bundleIdentifier(), displayName: a.name()});
  250. } catch(e) {}
  251. }
  252. JSON.stringify(result);
  253. `)
  254. return JSON.parse(raw)
  255. } catch {
  256. return []
  257. }
  258. },
  259. async open(bundleId: string): Promise<void> {
  260. await osascript(`tell application id "${bundleId}" to activate`)
  261. },
  262. async unhide(bundleIds: string[]): Promise<void> {
  263. for (const bundleId of bundleIds) {
  264. await osascript(`
  265. tell application "System Events"
  266. set visible of application process (name of application process whose bundle identifier is "${bundleId}") to true
  267. end tell
  268. `)
  269. }
  270. },
  271. }
  272. // ---------------------------------------------------------------------------
  273. // ScreenshotAPI
  274. // ---------------------------------------------------------------------------
  275. interface ScreenshotAPI {
  276. captureExcluding(
  277. allowedBundleIds: string[], quality: number,
  278. targetW: number, targetH: number, displayId?: number,
  279. ): Promise<ScreenshotResult>
  280. captureRegion(
  281. allowedBundleIds: string[],
  282. x: number, y: number, w: number, h: number,
  283. outW: number, outH: number, quality: number, displayId?: number,
  284. ): Promise<ScreenshotResult>
  285. }
  286. async function captureScreenToBase64(args: string[]): Promise<{ base64: string; width: number; height: number }> {
  287. const tmpFile = join(tmpdir(), `cu-screenshot-${Date.now()}.png`)
  288. const proc = Bun.spawn(['screencapture', ...args, tmpFile], {
  289. stdout: 'pipe', stderr: 'pipe',
  290. })
  291. await proc.exited
  292. try {
  293. const buf = readFileSync(tmpFile)
  294. const base64 = buf.toString('base64')
  295. // Parse PNG header for dimensions (bytes 16-23)
  296. const width = buf.readUInt32BE(16)
  297. const height = buf.readUInt32BE(20)
  298. return { base64, width, height }
  299. } finally {
  300. try { unlinkSync(tmpFile) } catch {}
  301. }
  302. }
  303. const screenshotAPI: ScreenshotAPI = {
  304. async captureExcluding(
  305. _allowedBundleIds: string[],
  306. _quality: number,
  307. _targetW: number,
  308. _targetH: number,
  309. displayId?: number,
  310. ): Promise<ScreenshotResult> {
  311. const args = ['-x'] // silent
  312. if (displayId !== undefined) {
  313. args.push('-D', String(displayId))
  314. }
  315. return captureScreenToBase64(args)
  316. },
  317. async captureRegion(
  318. _allowedBundleIds: string[],
  319. x: number, y: number, w: number, h: number,
  320. _outW: number, _outH: number, _quality: number,
  321. displayId?: number,
  322. ): Promise<ScreenshotResult> {
  323. const args = ['-x', '-R', `${x},${y},${w},${h}`]
  324. if (displayId !== undefined) {
  325. args.push('-D', String(displayId))
  326. }
  327. return captureScreenToBase64(args)
  328. },
  329. }
  330. // ---------------------------------------------------------------------------
  331. // ComputerUseAPI — Main export
  332. // ---------------------------------------------------------------------------
  333. export class ComputerUseAPI {
  334. apps: AppsAPI = appsAPI
  335. display: DisplayAPI = displayAPI
  336. screenshot: ScreenshotAPI = screenshotAPI
  337. async resolvePrepareCapture(
  338. allowedBundleIds: string[],
  339. _surrogateHost: string,
  340. quality: number,
  341. targetW: number,
  342. targetH: number,
  343. displayId?: number,
  344. _autoResolve?: boolean,
  345. _doHide?: boolean,
  346. ): Promise<ResolvePrepareCaptureResult> {
  347. return this.screenshot.captureExcluding(allowedBundleIds, quality, targetW, targetH, displayId)
  348. }
  349. }