From 3258ce6349cadb36fc7f6e4ecae3a7a5888b37b5 Mon Sep 17 00:00:00 2001 From: Vince Graics Date: Wed, 20 May 2026 19:01:25 +0200 Subject: [PATCH 1/9] feat: Add @wdio/elements package - Migrates browser element detection, accessibility tree, and mobile element locator generation from the MCP project into a standalone, reusable package --- packages/elements/package.json | 32 + packages/elements/src/accessibility-tree.ts | 427 ++++++++++++ packages/elements/src/browser-elements.ts | 293 ++++++++ packages/elements/src/get-elements.ts | 60 ++ packages/elements/src/index.ts | 17 + packages/elements/src/locators/constants.ts | 169 +++++ .../elements/src/locators/element-filter.ts | 234 +++++++ packages/elements/src/locators/index.ts | 264 +++++++ .../src/locators/locator-generation.ts | 644 ++++++++++++++++++ packages/elements/src/locators/types.ts | 108 +++ packages/elements/src/locators/xml-parsing.ts | 329 +++++++++ packages/elements/src/mobile-elements.ts | 163 +++++ .../elements/tests/accessibility-tree.test.ts | 26 + .../elements/tests/browser-elements.test.ts | 22 + .../tests/locators/locator-generation.test.ts | 23 + .../elements/tests/mobile-elements.test.ts | 13 + packages/elements/tsconfig.json | 16 + 17 files changed, 2840 insertions(+) create mode 100644 packages/elements/package.json create mode 100644 packages/elements/src/accessibility-tree.ts create mode 100644 packages/elements/src/browser-elements.ts create mode 100644 packages/elements/src/get-elements.ts create mode 100644 packages/elements/src/index.ts create mode 100644 packages/elements/src/locators/constants.ts create mode 100644 packages/elements/src/locators/element-filter.ts create mode 100644 packages/elements/src/locators/index.ts create mode 100644 packages/elements/src/locators/locator-generation.ts create mode 100644 packages/elements/src/locators/types.ts create mode 100644 packages/elements/src/locators/xml-parsing.ts create mode 100644 packages/elements/src/mobile-elements.ts create mode 100644 packages/elements/tests/accessibility-tree.test.ts create mode 100644 packages/elements/tests/browser-elements.test.ts create mode 100644 packages/elements/tests/locators/locator-generation.test.ts create mode 100644 packages/elements/tests/mobile-elements.test.ts create mode 100644 packages/elements/tsconfig.json diff --git a/packages/elements/package.json b/packages/elements/package.json new file mode 100644 index 0000000..2078346 --- /dev/null +++ b/packages/elements/package.json @@ -0,0 +1,32 @@ +{ + "name": "@wdio/elements", + "version": "1.0.0", + "description": "Element detection scripts for WebdriverIO", + "author": "Vince Graics", + "license": "MIT", + "type": "module", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "types": "./dist/index.d.ts", + "scripts": { + "build": "tsc -p ./tsconfig.json", + "lint": "eslint . --fix" + }, + "dependencies": { + "@xmldom/xmldom": "^0.9.8", + "xpath": "^0.0.34" + }, + "devDependencies": { + "@types/node": "25.5.2", + "@wdio/globals": "9.27.0", + "typescript": "6.0.2", + "vitest": "^4.0.16" + }, + "peerDependencies": { + "webdriverio": "^9.0.0" + } +} diff --git a/packages/elements/src/accessibility-tree.ts b/packages/elements/src/accessibility-tree.ts new file mode 100644 index 0000000..7abb203 --- /dev/null +++ b/packages/elements/src/accessibility-tree.ts @@ -0,0 +1,427 @@ +/** + * Browser accessibility tree + * Single browser.execute() call: DOM walk → flat accessibility node list + * + * NOTE: This script runs in browser context via browser.execute() + * It must be self-contained with no external dependencies + */ + +export interface AccessibilityNode { + role: string + name: string + selector: string + level: number | string + disabled: string + checked: string + expanded: string + selected: string + pressed: string + required: string + readonly: string +} + +const accessibilityTreeScript = () => + (function () { + const INPUT_TYPE_ROLES: Record = { + text: 'textbox', + search: 'searchbox', + email: 'textbox', + url: 'textbox', + tel: 'textbox', + password: 'textbox', + number: 'spinbutton', + checkbox: 'checkbox', + radio: 'radio', + range: 'slider', + submit: 'button', + reset: 'button', + image: 'button', + file: 'button', + color: 'button' + } + + const LANDMARK_ROLES = new Set([ + 'navigation', + 'main', + 'banner', + 'contentinfo', + 'complementary', + 'form', + 'dialog', + 'region' + ]) + + // Container roles: named only via aria-label/aria-labelledby, not textContent + const CONTAINER_ROLES = new Set([ + 'navigation', + 'banner', + 'contentinfo', + 'complementary', + 'main', + 'form', + 'region', + 'group', + 'list', + 'listitem', + 'table', + 'row', + 'rowgroup', + 'generic' + ]) + + function getRole(el: HTMLElement): string | null { + const explicit = el.getAttribute('role') + if (explicit) { + return explicit.split(' ')[0] + } + + const tag = el.tagName.toLowerCase() + + switch (tag) { + case 'button': + return 'button' + case 'a': + return el.hasAttribute('href') ? 'link' : null + case 'input': { + const type = (el.getAttribute('type') || 'text').toLowerCase() + if (type === 'hidden') { + return null + } + return INPUT_TYPE_ROLES[type] || 'textbox' + } + case 'select': + return 'combobox' + case 'textarea': + return 'textbox' + case 'h1': + case 'h2': + case 'h3': + case 'h4': + case 'h5': + case 'h6': + return 'heading' + case 'img': + return 'img' + case 'nav': + return 'navigation' + case 'main': + return 'main' + case 'header': + return !el.closest('article,aside,main,nav,section') ? 'banner' : null + case 'footer': + return !el.closest('article,aside,main,nav,section') + ? 'contentinfo' + : null + case 'aside': + return 'complementary' + case 'dialog': + return 'dialog' + case 'form': + return 'form' + case 'section': + return el.hasAttribute('aria-label') || + el.hasAttribute('aria-labelledby') + ? 'region' + : null + case 'summary': + return 'button' + case 'details': + return 'group' + case 'progress': + return 'progressbar' + case 'meter': + return 'meter' + case 'ul': + case 'ol': + return 'list' + case 'li': + return 'listitem' + case 'table': + return 'table' + } + + if ( + (el as HTMLElement & { contentEditable: string }).contentEditable === + 'true' + ) { + return 'textbox' + } + if ( + el.hasAttribute('tabindex') && + parseInt(el.getAttribute('tabindex') || '-1', 10) >= 0 + ) { + return 'generic' + } + + return null + } + + function getAccessibleName(el: HTMLElement, role: string | null): string { + const ariaLabel = el.getAttribute('aria-label') + if (ariaLabel) { + return ariaLabel.trim() + } + + const labelledBy = el.getAttribute('aria-labelledby') + if (labelledBy) { + const texts = labelledBy + .split(/\s+/) + .map((id) => document.getElementById(id)?.textContent?.trim() || '') + .filter(Boolean) + if (texts.length > 0) { + return texts.join(' ').slice(0, 100) + } + } + + const tag = el.tagName.toLowerCase() + + if ( + tag === 'img' || + (tag === 'input' && el.getAttribute('type') === 'image') + ) { + const alt = el.getAttribute('alt') + if (alt !== null) { + return alt.trim() + } + } + + if (['input', 'select', 'textarea'].includes(tag)) { + const id = el.getAttribute('id') + if (id) { + const label = document.querySelector(`label[for="${CSS.escape(id)}"]`) + if (label) { + return label.textContent?.trim() || '' + } + } + const parentLabel = el.closest('label') + if (parentLabel) { + const clone = parentLabel.cloneNode(true) as HTMLElement + clone + .querySelectorAll('input,select,textarea') + .forEach((n) => n.remove()) + const lt = clone.textContent?.trim() + if (lt) { + return lt + } + } + } + + const ph = el.getAttribute('placeholder') + if (ph) { + return ph.trim() + } + + const title = el.getAttribute('title') + if (title) { + return title.trim() + } + + if (role && CONTAINER_ROLES.has(role)) { + return '' + } + return (el.textContent?.trim().replace(/\s+/g, ' ') || '').slice(0, 100) + } + + function getSelector(element: HTMLElement): string { + const tag = element.tagName.toLowerCase() + + const text = element.textContent?.trim().replace(/\s+/g, ' ') + if (text && text.length > 0 && text.length <= 50) { + const sameTagElements = document.querySelectorAll(tag) + let matchCount = 0 + sameTagElements.forEach((el) => { + if (el.textContent?.includes(text)) { + matchCount++ + } + }) + if (matchCount === 1) { + return `${tag}*=${text}` + } + } + + const ariaLabel = element.getAttribute('aria-label') + if (ariaLabel && ariaLabel.length <= 80) { + return `aria/${ariaLabel}` + } + + const testId = element.getAttribute('data-testid') + if (testId) { + const sel = `[data-testid="${CSS.escape(testId)}"]` + if (document.querySelectorAll(sel).length === 1) { + return sel + } + } + + if (element.id) { + return `#${CSS.escape(element.id)}` + } + + const nameAttr = element.getAttribute('name') + if (nameAttr) { + const sel = `${tag}[name="${CSS.escape(nameAttr)}"]` + if (document.querySelectorAll(sel).length === 1) { + return sel + } + } + + if (element.className && typeof element.className === 'string') { + const classes = element.className.trim().split(/\s+/).filter(Boolean) + for (const cls of classes) { + const sel = `${tag}.${CSS.escape(cls)}` + if (document.querySelectorAll(sel).length === 1) { + return sel + } + } + if (classes.length >= 2) { + const sel = `${tag}${classes + .slice(0, 2) + .map((c) => `.${CSS.escape(c)}`) + .join('')}` + if (document.querySelectorAll(sel).length === 1) { + return sel + } + } + } + + let current: HTMLElement | null = element + const path: string[] = [] + while (current && current !== document.documentElement) { + let seg = current.tagName.toLowerCase() + if (current.id) { + path.unshift(`#${CSS.escape(current.id)}`) + break + } + const parent = current.parentElement + if (parent) { + const siblings = Array.from(parent.children).filter( + (c) => c.tagName === current!.tagName + ) + if (siblings.length > 1) { + seg += `:nth-of-type(${siblings.indexOf(current) + 1})` + } + } + path.unshift(seg) + current = current.parentElement + if (path.length >= 4) { + break + } + } + return path.join(' > ') + } + + function isVisible(el: HTMLElement): boolean { + if (typeof el.checkVisibility === 'function') { + return el.checkVisibility({ + opacityProperty: true, + visibilityProperty: true, + contentVisibilityAuto: true + }) + } + const style = window.getComputedStyle(el) + return ( + style.display !== 'none' && + style.visibility !== 'hidden' && + style.opacity !== '0' && + el.offsetWidth > 0 && + el.offsetHeight > 0 + ) + } + + function getLevel(el: HTMLElement): number | undefined { + const m = el.tagName.toLowerCase().match(/^h([1-6])$/) + if (m) { + return parseInt(m[1], 10) + } + const ariaLevel = el.getAttribute('aria-level') + if (ariaLevel) { + return parseInt(ariaLevel, 10) + } + return undefined + } + + function getState(el: HTMLElement): Record { + const inputEl = el as HTMLInputElement + const isCheckable = + ['input', 'menuitemcheckbox', 'menuitemradio'].includes( + el.tagName.toLowerCase() + ) || + ['checkbox', 'radio', 'switch'].includes(el.getAttribute('role') || '') + return { + disabled: + el.getAttribute('aria-disabled') === 'true' || inputEl.disabled + ? 'true' + : '', + checked: + isCheckable && inputEl.checked + ? 'true' + : el.getAttribute('aria-checked') || '', + expanded: el.getAttribute('aria-expanded') || '', + selected: el.getAttribute('aria-selected') || '', + pressed: el.getAttribute('aria-pressed') || '', + required: + inputEl.required || el.getAttribute('aria-required') === 'true' + ? 'true' + : '', + readonly: + inputEl.readOnly || el.getAttribute('aria-readonly') === 'true' + ? 'true' + : '' + } + } + + type RawNode = Record + + const result: RawNode[] = [] + + function walk(el: HTMLElement, depth = 0): void { + if (depth > 200) { + return + } + if (!isVisible(el)) { + return + } + + const role = getRole(el) + + if (!role) { + for (const child of Array.from(el.children)) { + walk(child as HTMLElement, depth + 1) + } + return + } + + const name = getAccessibleName(el, role) + const isLandmark = LANDMARK_ROLES.has(role) + const hasIdentity = !!(name || isLandmark) + const selector = hasIdentity ? getSelector(el) : '' + const node: RawNode = { + role, + name, + selector, + level: getLevel(el) ?? '', + ...getState(el) + } + result.push(node) + + for (const child of Array.from(el.children)) { + walk(child as HTMLElement, depth + 1) + } + } + + for (const child of Array.from(document.body.children)) { + walk(child as HTMLElement, 0) + } + + return result + })() + +/** + * Get browser accessibility tree via a single DOM walk. + */ +export async function getBrowserAccessibilityTree( + browser: WebdriverIO.Browser +): Promise { + return (browser as any).execute( + accessibilityTreeScript + ) as unknown as Promise +} diff --git a/packages/elements/src/browser-elements.ts b/packages/elements/src/browser-elements.ts new file mode 100644 index 0000000..f8d599d --- /dev/null +++ b/packages/elements/src/browser-elements.ts @@ -0,0 +1,293 @@ +/** + * Browser element detection + * Single browser.execute() call: querySelectorAll → flat interactable element list + * + * NOTE: This script runs in browser context via browser.execute() + * It must be self-contained with no external dependencies + */ + +export interface BrowserElementInfo { + tagName: string + name: string // computed accessible name (ARIA spec) + type: string + value: string + href: string + selector: string + isInViewport: boolean + boundingBox?: { x: number; y: number; width: number; height: number } +} + +export interface GetBrowserElementsOptions { + includeBounds?: boolean +} + +const elementsScript = (includeBounds: boolean) => + (function () { + const interactableSelectors = [ + 'a[href]', + 'button', + 'input:not([type="hidden"])', + 'select', + 'textarea', + '[role="button"]', + '[role="link"]', + '[role="checkbox"]', + '[role="radio"]', + '[role="tab"]', + '[role="menuitem"]', + '[role="combobox"]', + '[role="option"]', + '[role="switch"]', + '[role="slider"]', + '[role="textbox"]', + '[role="searchbox"]', + '[role="spinbutton"]', + '[contenteditable="true"]', + '[tabindex]:not([tabindex="-1"])' + ].join(',') + + function isVisible(element: HTMLElement): boolean { + if (typeof element.checkVisibility === 'function') { + return element.checkVisibility({ + opacityProperty: true, + visibilityProperty: true, + contentVisibilityAuto: true + }) + } + const style = window.getComputedStyle(element) + return ( + style.display !== 'none' && + style.visibility !== 'hidden' && + style.opacity !== '0' && + element.offsetWidth > 0 && + element.offsetHeight > 0 + ) + } + + function getAccessibleName(el: HTMLElement): string { + // 1. aria-label + const ariaLabel = el.getAttribute('aria-label') + if (ariaLabel) { + return ariaLabel.trim() + } + + // 2. aria-labelledby — resolve referenced elements + const labelledBy = el.getAttribute('aria-labelledby') + if (labelledBy) { + const texts = labelledBy + .split(/\s+/) + .map((id) => document.getElementById(id)?.textContent?.trim() || '') + .filter(Boolean) + if (texts.length > 0) { + return texts.join(' ').slice(0, 100) + } + } + + const tag = el.tagName.toLowerCase() + + // 3. alt for images and input[type=image] + if ( + tag === 'img' || + (tag === 'input' && el.getAttribute('type') === 'image') + ) { + const alt = el.getAttribute('alt') + if (alt !== null) { + return alt.trim() + } + } + + // 4. label[for=id] for form elements + if (['input', 'select', 'textarea'].includes(tag)) { + const id = el.getAttribute('id') + if (id) { + const label = document.querySelector(`label[for="${CSS.escape(id)}"]`) + if (label) { + return label.textContent?.trim() || '' + } + } + // 5. Wrapping label — clone, strip inputs, read text + const parentLabel = el.closest('label') + if (parentLabel) { + const clone = parentLabel.cloneNode(true) as HTMLElement + clone + .querySelectorAll('input,select,textarea') + .forEach((n) => n.remove()) + const lt = clone.textContent?.trim() + if (lt) { + return lt + } + } + } + + // 6. placeholder + const ph = el.getAttribute('placeholder') + if (ph) { + return ph.trim() + } + + // 7. title + const title = el.getAttribute('title') + if (title) { + return title.trim() + } + + // 8. text content (truncated, whitespace normalized) + return (el.textContent?.trim().replace(/\s+/g, ' ') || '').slice(0, 100) + } + + function getSelector(element: HTMLElement): string { + const tag = element.tagName.toLowerCase() + + // 1. tag*=Text — best per WebdriverIO docs + const text = element.textContent?.trim().replace(/\s+/g, ' ') + if (text && text.length > 0 && text.length <= 50) { + const sameTagElements = document.querySelectorAll(tag) + let matchCount = 0 + sameTagElements.forEach((el) => { + if (el.textContent?.includes(text)) { + matchCount++ + } + }) + if (matchCount === 1) { + return `${tag}*=${text}` + } + } + + // 2. aria/label + const ariaLabel = element.getAttribute('aria-label') + if (ariaLabel && ariaLabel.length <= 80) { + return `aria/${ariaLabel}` + } + + // 3. data-testid + const testId = element.getAttribute('data-testid') + if (testId) { + const sel = `[data-testid="${CSS.escape(testId)}"]` + if (document.querySelectorAll(sel).length === 1) { + return sel + } + } + + // 4. #id + if (element.id) { + return `#${CSS.escape(element.id)}` + } + + // 5. [name] — form elements + const nameAttr = element.getAttribute('name') + if (nameAttr) { + const sel = `${tag}[name="${CSS.escape(nameAttr)}"]` + if (document.querySelectorAll(sel).length === 1) { + return sel + } + } + + // 6. tag.class — try each class individually, then first-two combination + if (element.className && typeof element.className === 'string') { + const classes = element.className.trim().split(/\s+/).filter(Boolean) + for (const cls of classes) { + const sel = `${tag}.${CSS.escape(cls)}` + if (document.querySelectorAll(sel).length === 1) { + return sel + } + } + if (classes.length >= 2) { + const sel = `${tag}${classes + .slice(0, 2) + .map((c) => `.${CSS.escape(c)}`) + .join('')}` + if (document.querySelectorAll(sel).length === 1) { + return sel + } + } + } + + // 7. CSS path fallback + let current: HTMLElement | null = element + const path: string[] = [] + while (current && current !== document.documentElement) { + let seg = current.tagName.toLowerCase() + if (current.id) { + path.unshift(`#${CSS.escape(current.id)}`) + break + } + const parent = current.parentElement + if (parent) { + const siblings = Array.from(parent.children).filter( + (c) => c.tagName === current!.tagName + ) + if (siblings.length > 1) { + seg += `:nth-of-type(${siblings.indexOf(current) + 1})` + } + } + path.unshift(seg) + current = current.parentElement + if (path.length >= 4) { + break + } + } + return path.join(' > ') + } + + const elements: Record[] = [] + const seen = new Set() + + document.querySelectorAll(interactableSelectors).forEach((el) => { + if (seen.has(el)) { + return + } + seen.add(el) + + const htmlEl = el as HTMLElement + if (!isVisible(htmlEl)) { + return + } + + const inputEl = htmlEl as HTMLInputElement + const rect = htmlEl.getBoundingClientRect() + const isInViewport = + rect.top >= 0 && + rect.left >= 0 && + rect.bottom <= + (window.innerHeight || document.documentElement.clientHeight) && + rect.right <= + (window.innerWidth || document.documentElement.clientWidth) + + const entry: Record = { + tagName: htmlEl.tagName.toLowerCase(), + name: getAccessibleName(htmlEl), + type: htmlEl.getAttribute('type') || '', + value: inputEl.value || '', + href: htmlEl.getAttribute('href') || '', + selector: getSelector(htmlEl), + isInViewport + } + + if (includeBounds) { + entry.boundingBox = { + x: rect.x + window.scrollX, + y: rect.y + window.scrollY, + width: rect.width, + height: rect.height + } + } + + elements.push(entry) + }) + + return elements + })() + +/** + * Get interactable browser elements via querySelectorAll. + */ +export async function getInteractableBrowserElements( + browser: WebdriverIO.Browser, + options: GetBrowserElementsOptions = {} +): Promise { + const { includeBounds = false } = options + return (browser as any).execute( + elementsScript, + includeBounds + ) as unknown as Promise +} diff --git a/packages/elements/src/get-elements.ts b/packages/elements/src/get-elements.ts new file mode 100644 index 0000000..2e24e9b --- /dev/null +++ b/packages/elements/src/get-elements.ts @@ -0,0 +1,60 @@ +import { getInteractableBrowserElements } from './browser-elements.js' +import { getMobileVisibleElements } from './mobile-elements.js' + +export type VisibleElementsResult = { + total: number + showing: number + hasMore: boolean + elements: unknown[] +} + +export async function getElements( + browser: WebdriverIO.Browser, + params: { + inViewportOnly?: boolean + includeContainers?: boolean + includeBounds?: boolean + limit?: number + offset?: number + } +): Promise { + const { + inViewportOnly = true, + includeContainers = false, + includeBounds = false, + limit = 0, + offset = 0 + } = params + + let elements: { isInViewport?: boolean }[] + + if (browser.isAndroid || browser.isIOS) { + const platform = browser.isAndroid ? 'android' : 'ios' + elements = await getMobileVisibleElements(browser, platform, { + includeContainers, + includeBounds + }) + } else { + elements = await getInteractableBrowserElements(browser, { includeBounds }) + } + + if (inViewportOnly) { + elements = elements.filter((el) => el.isInViewport !== false) + } + + const total = elements.length + + if (offset > 0) { + elements = elements.slice(offset) + } + if (limit > 0) { + elements = elements.slice(0, limit) + } + + return { + total, + showing: elements.length, + hasMore: offset + elements.length < total, + elements + } +} diff --git a/packages/elements/src/index.ts b/packages/elements/src/index.ts new file mode 100644 index 0000000..5cac548 --- /dev/null +++ b/packages/elements/src/index.ts @@ -0,0 +1,17 @@ +export { getInteractableBrowserElements } from './browser-elements.js' +export type { + BrowserElementInfo, + GetBrowserElementsOptions +} from './browser-elements.js' + +export { getBrowserAccessibilityTree } from './accessibility-tree.js' +export type { AccessibilityNode } from './accessibility-tree.js' + +export { getMobileVisibleElements } from './mobile-elements.js' +export type { + MobileElementInfo, + GetMobileElementsOptions +} from './mobile-elements.js' + +export { getElements } from './get-elements.js' +export type { VisibleElementsResult } from './get-elements.js' diff --git a/packages/elements/src/locators/constants.ts b/packages/elements/src/locators/constants.ts new file mode 100644 index 0000000..540784b --- /dev/null +++ b/packages/elements/src/locators/constants.ts @@ -0,0 +1,169 @@ +/** + * Platform-specific element tag constants for mobile automation + */ + +export const ANDROID_INTERACTABLE_TAGS = [ + // Input elements + 'android.widget.EditText', + 'android.widget.AutoCompleteTextView', + 'android.widget.MultiAutoCompleteTextView', + 'android.widget.SearchView', + + // Button-like elements + 'android.widget.Button', + 'android.widget.ImageButton', + 'android.widget.ToggleButton', + 'android.widget.CompoundButton', + 'android.widget.RadioButton', + 'android.widget.CheckBox', + 'android.widget.Switch', + 'android.widget.FloatingActionButton', + 'com.google.android.material.button.MaterialButton', + 'com.google.android.material.floatingactionbutton.FloatingActionButton', + + // Text elements (often tappable) + 'android.widget.TextView', + 'android.widget.CheckedTextView', + + // Image elements (often tappable) + 'android.widget.ImageView', + 'android.widget.QuickContactBadge', + + // Selection elements + 'android.widget.Spinner', + 'android.widget.SeekBar', + 'android.widget.RatingBar', + 'android.widget.ProgressBar', + 'android.widget.DatePicker', + 'android.widget.TimePicker', + 'android.widget.NumberPicker', + + // List/grid items + 'android.widget.AdapterView' +] + +export const ANDROID_LAYOUT_CONTAINERS = [ + // Core ViewGroup classes + 'android.view.ViewGroup', + 'android.view.View', + 'android.widget.FrameLayout', + 'android.widget.LinearLayout', + 'android.widget.RelativeLayout', + 'android.widget.GridLayout', + 'android.widget.TableLayout', + 'android.widget.TableRow', + 'android.widget.AbsoluteLayout', + + // AndroidX layout classes + 'androidx.constraintlayout.widget.ConstraintLayout', + 'androidx.coordinatorlayout.widget.CoordinatorLayout', + 'androidx.appcompat.widget.LinearLayoutCompat', + 'androidx.cardview.widget.CardView', + 'androidx.appcompat.widget.ContentFrameLayout', + 'androidx.appcompat.widget.FitWindowsFrameLayout', + + // Scrolling containers + 'android.widget.ScrollView', + 'android.widget.HorizontalScrollView', + 'android.widget.NestedScrollView', + 'androidx.core.widget.NestedScrollView', + 'androidx.recyclerview.widget.RecyclerView', + 'android.widget.ListView', + 'android.widget.GridView', + 'android.widget.AbsListView', + + // App chrome / system elements + 'android.widget.ActionBarContainer', + 'android.widget.ActionBarOverlayLayout', + 'android.view.ViewStub', + 'androidx.appcompat.widget.ActionBarContainer', + 'androidx.appcompat.widget.ActionBarContextView', + 'androidx.appcompat.widget.ActionBarOverlayLayout', + + // Decor views + 'com.android.internal.policy.DecorView', + 'android.widget.DecorView' +] + +export const IOS_INTERACTABLE_TAGS = [ + // Input elements + 'XCUIElementTypeTextField', + 'XCUIElementTypeSecureTextField', + 'XCUIElementTypeTextView', + 'XCUIElementTypeSearchField', + + // Button-like elements + 'XCUIElementTypeButton', + 'XCUIElementTypeLink', + + // Text elements (often tappable) + 'XCUIElementTypeStaticText', + + // Image elements + 'XCUIElementTypeImage', + 'XCUIElementTypeIcon', + + // Selection elements + 'XCUIElementTypeSwitch', + 'XCUIElementTypeSlider', + 'XCUIElementTypeStepper', + 'XCUIElementTypeSegmentedControl', + 'XCUIElementTypePicker', + 'XCUIElementTypePickerWheel', + 'XCUIElementTypeDatePicker', + 'XCUIElementTypePageIndicator', + + // Table/list items + 'XCUIElementTypeCell', + 'XCUIElementTypeMenuItem', + 'XCUIElementTypeMenuBarItem', + + // Toggle elements + 'XCUIElementTypeCheckBox', + 'XCUIElementTypeRadioButton', + 'XCUIElementTypeToggle', + + // Other interactive + 'XCUIElementTypeKey', + 'XCUIElementTypeKeyboard', + 'XCUIElementTypeAlert', + 'XCUIElementTypeSheet' +] + +export const IOS_LAYOUT_CONTAINERS = [ + // Generic containers + 'XCUIElementTypeOther', + 'XCUIElementTypeGroup', + 'XCUIElementTypeLayoutItem', + + // Scroll containers + 'XCUIElementTypeScrollView', + 'XCUIElementTypeTable', + 'XCUIElementTypeCollectionView', + 'XCUIElementTypeScrollBar', + + // Navigation chrome + 'XCUIElementTypeNavigationBar', + 'XCUIElementTypeTabBar', + 'XCUIElementTypeToolbar', + 'XCUIElementTypeStatusBar', + 'XCUIElementTypeMenuBar', + + // Windows and views + 'XCUIElementTypeWindow', + 'XCUIElementTypeSheet', + 'XCUIElementTypeDrawer', + 'XCUIElementTypeDialog', + 'XCUIElementTypePopover', + 'XCUIElementTypePopUpButton', + + // Outline elements + 'XCUIElementTypeOutline', + 'XCUIElementTypeOutlineRow', + 'XCUIElementTypeBrowser', + 'XCUIElementTypeSplitGroup', + 'XCUIElementTypeSplitter', + + // Application root + 'XCUIElementTypeApplication' +] diff --git a/packages/elements/src/locators/element-filter.ts b/packages/elements/src/locators/element-filter.ts new file mode 100644 index 0000000..d249f3a --- /dev/null +++ b/packages/elements/src/locators/element-filter.ts @@ -0,0 +1,234 @@ +/** + * Element filtering logic for mobile automation + */ + +import type { JSONElement, FilterOptions } from './types.js' +import { + ANDROID_INTERACTABLE_TAGS, + IOS_INTERACTABLE_TAGS, + ANDROID_LAYOUT_CONTAINERS, + IOS_LAYOUT_CONTAINERS +} from './constants.js' + +/** + * Check if element tag matches any in the list (handles partial matches) + */ +function matchesTagList(tagName: string, tagList: string[]): boolean { + if (tagList.includes(tagName)) { + return true + } + + for (const tag of tagList) { + if (tagName.endsWith(tag) || tagName.includes(tag)) { + return true + } + } + + return false +} + +/** + * Check if element matches tag name filters + */ +function matchesTagFilters( + element: JSONElement, + includeTagNames: string[], + excludeTagNames: string[] +): boolean { + if ( + includeTagNames.length > 0 && + !matchesTagList(element.tagName, includeTagNames) + ) { + return false + } + + if (matchesTagList(element.tagName, excludeTagNames)) { + return false + } + + return true +} + +/** + * Check if element matches attribute-based filters + */ +function matchesAttributeFilters( + element: JSONElement, + requireAttributes: string[], + minAttributeCount: number +): boolean { + if (requireAttributes.length > 0) { + const hasRequiredAttr = requireAttributes.some( + (attr) => element.attributes?.[attr] + ) + if (!hasRequiredAttr) { + return false + } + } + + if (element.attributes && minAttributeCount > 0) { + const attrCount = Object.values(element.attributes).filter( + (v) => v !== undefined && v !== null && v !== '' + ).length + if (attrCount < minAttributeCount) { + return false + } + } + + return true +} + +/** + * Check if element is interactable based on platform + */ +export function isInteractableElement( + element: JSONElement, + _isNative: boolean, + automationName: string +): boolean { + const isAndroid = automationName.toLowerCase().includes('uiautomator') + const interactableTags = isAndroid + ? ANDROID_INTERACTABLE_TAGS + : IOS_INTERACTABLE_TAGS + + if (matchesTagList(element.tagName, interactableTags)) { + return true + } + + if (isAndroid) { + if ( + element.attributes?.clickable === 'true' || + element.attributes?.focusable === 'true' || + element.attributes?.checkable === 'true' || + element.attributes?.['long-clickable'] === 'true' + ) { + return true + } + } + + if (!isAndroid) { + if (element.attributes?.accessible === 'true') { + return true + } + } + + return false +} + +/** + * Check if element is a layout container + */ +export function isLayoutContainer( + element: JSONElement, + platform: 'android' | 'ios' +): boolean { + const containerList = + platform === 'android' ? ANDROID_LAYOUT_CONTAINERS : IOS_LAYOUT_CONTAINERS + return matchesTagList(element.tagName, containerList) +} + +/** + * Check if element has meaningful content (text, accessibility info) + */ +export function hasMeaningfulContent( + element: JSONElement, + platform: 'android' | 'ios' +): boolean { + const attrs = element.attributes + + if (attrs.text && attrs.text.trim() !== '' && attrs.text !== 'null') { + return true + } + + if (platform === 'android') { + if ( + attrs['content-desc'] && + attrs['content-desc'].trim() !== '' && + attrs['content-desc'] !== 'null' + ) { + return true + } + } else { + if (attrs.label && attrs.label.trim() !== '' && attrs.label !== 'null') { + return true + } + if (attrs.name && attrs.name.trim() !== '' && attrs.name !== 'null') { + return true + } + } + + return false +} + +/** + * Determine if an element should be included based on all filter criteria + */ +export function shouldIncludeElement( + element: JSONElement, + filters: FilterOptions, + isNative: boolean, + automationName: string +): boolean { + const { + includeTagNames = [], + excludeTagNames = ['hierarchy'], + requireAttributes = [], + minAttributeCount = 0, + fetchableOnly = false, + clickableOnly = false, + visibleOnly = true + } = filters + + if (!matchesTagFilters(element, includeTagNames, excludeTagNames)) { + if (element.attributes?.clickable !== 'true') { + return false + } + } + + if (!matchesAttributeFilters(element, requireAttributes, minAttributeCount)) { + return false + } + + if (clickableOnly && element.attributes?.clickable !== 'true') { + return false + } + + if (visibleOnly) { + const isAndroid = automationName.toLowerCase().includes('uiautomator') + if (isAndroid && element.attributes?.displayed === 'false') { + return false + } + if (!isAndroid && element.attributes?.visible === 'false') { + return false + } + } + + if ( + fetchableOnly && + !isInteractableElement(element, isNative, automationName) + ) { + return false + } + + return true +} + +/** + * Get default filter options for a platform + */ +export function getDefaultFilters( + platform: 'android' | 'ios', + includeContainers: boolean = false +): FilterOptions { + const layoutContainers = + platform === 'android' ? ANDROID_LAYOUT_CONTAINERS : IOS_LAYOUT_CONTAINERS + + return { + excludeTagNames: includeContainers + ? ['hierarchy'] + : ['hierarchy', ...layoutContainers], + fetchableOnly: !includeContainers, + visibleOnly: true, + clickableOnly: false + } +} diff --git a/packages/elements/src/locators/index.ts b/packages/elements/src/locators/index.ts new file mode 100644 index 0000000..2ed62e0 --- /dev/null +++ b/packages/elements/src/locators/index.ts @@ -0,0 +1,264 @@ +/** + * Mobile element locator generation + * + * Main orchestrator module that coordinates XML parsing, element filtering, + * and locator generation for mobile automation. + * + * Based on: https://github.com/appium/appium-mcp + */ + +// Types +export type { + ElementAttributes, + JSONElement, + Bounds, + FilterOptions, + UniquenessResult, + LocatorStrategy, + LocatorContext, + ElementWithLocators, + GenerateLocatorsOptions +} from './types.js' + +// Constants +export { + ANDROID_INTERACTABLE_TAGS, + IOS_INTERACTABLE_TAGS, + ANDROID_LAYOUT_CONTAINERS, + IOS_LAYOUT_CONTAINERS +} from './constants.js' + +// XML Parsing +export { + xmlToJSON, + xmlToDOM, + evaluateXPath, + checkXPathUniqueness, + findDOMNodeByPath, + parseAndroidBounds, + parseIOSBounds, + flattenElementTree, + countAttributeOccurrences, + isAttributeUnique +} from './xml-parsing.js' + +// Element Filtering +export { + isInteractableElement, + isLayoutContainer, + hasMeaningfulContent, + shouldIncludeElement, + getDefaultFilters +} from './element-filter.js' + +// Locator Generation +export { + getSuggestedLocators, + getBestLocator, + locatorsToObject +} from './locator-generation.js' + +import type { + JSONElement, + FilterOptions, + LocatorStrategy, + ElementWithLocators, + GenerateLocatorsOptions, + XMLDocument +} from './types.js' + +import { + xmlToJSON, + xmlToDOM, + parseAndroidBounds, + parseIOSBounds, + findDOMNodeByPath +} from './xml-parsing.js' +import { + shouldIncludeElement, + isLayoutContainer, + hasMeaningfulContent +} from './element-filter.js' +import { getSuggestedLocators, locatorsToObject } from './locator-generation.js' + +interface ProcessingContext { + sourceXML: string + platform: 'android' | 'ios' + automationName: string + isNative: boolean + viewportSize: { width: number; height: number } + filters: FilterOptions + results: ElementWithLocators[] + parsedDOM: XMLDocument | null +} + +/** + * Parse element bounds based on platform + */ +function parseBounds( + element: JSONElement, + platform: 'android' | 'ios' +): { x: number; y: number; width: number; height: number } { + return platform === 'android' + ? parseAndroidBounds(element.attributes.bounds || '') + : parseIOSBounds(element.attributes) +} + +/** + * Check if bounds are within viewport + */ +function isWithinViewport( + bounds: { x: number; y: number; width: number; height: number }, + viewport: { width: number; height: number } +): boolean { + return ( + bounds.x >= 0 && + bounds.y >= 0 && + bounds.width > 0 && + bounds.height > 0 && + bounds.x + bounds.width <= viewport.width && + bounds.y + bounds.height <= viewport.height + ) +} + +/** + * Transform JSONElement to ElementWithLocators + */ +function transformElement( + element: JSONElement, + locators: [LocatorStrategy, string][], + ctx: ProcessingContext +): ElementWithLocators { + const attrs = element.attributes + const bounds = parseBounds(element, ctx.platform) + + return { + tagName: element.tagName, + locators: locatorsToObject(locators), + text: attrs.text || attrs.label || '', + contentDesc: attrs['content-desc'] || '', + resourceId: attrs['resource-id'] || '', + accessibilityId: attrs.name || attrs['content-desc'] || '', + label: attrs.label || '', + value: attrs.value || '', + className: attrs.class || element.tagName, + clickable: + attrs.clickable === 'true' || + attrs.accessible === 'true' || + attrs['long-clickable'] === 'true', + enabled: attrs.enabled !== 'false', + displayed: + ctx.platform === 'android' + ? attrs.displayed !== 'false' + : attrs.visible !== 'false', + bounds, + isInViewport: isWithinViewport(bounds, ctx.viewportSize) + } +} + +/** + * Check if element should be processed + */ +function shouldProcess(element: JSONElement, ctx: ProcessingContext): boolean { + if ( + shouldIncludeElement(element, ctx.filters, ctx.isNative, ctx.automationName) + ) { + return true + } + return ( + isLayoutContainer(element, ctx.platform) && + hasMeaningfulContent(element, ctx.platform) + ) +} + +/** + * Process a single element and add to results if valid + */ +function processElement(element: JSONElement, ctx: ProcessingContext): void { + if (!shouldProcess(element, ctx)) { + return + } + + try { + const targetNode = ctx.parsedDOM + ? findDOMNodeByPath(ctx.parsedDOM, element.path) + : undefined + + const locators = getSuggestedLocators( + element, + ctx.sourceXML, + ctx.automationName, + { + sourceXML: ctx.sourceXML, + parsedDOM: ctx.parsedDOM, + isAndroid: ctx.platform === 'android' + }, + targetNode || undefined + ) + if (locators.length === 0) { + return + } + + const transformed = transformElement(element, locators, ctx) + if (Object.keys(transformed.locators).length === 0) { + return + } + + ctx.results.push(transformed) + } catch (error) { + console.error(`[processElement] Error at path ${element.path}:`, error) + } +} + +/** + * Recursively traverse and process element tree + */ +function traverseTree( + element: JSONElement | null, + ctx: ProcessingContext +): void { + if (!element) { + return + } + + processElement(element, ctx) + + for (const child of element.children || []) { + traverseTree(child, ctx) + } +} + +/** + * Generate locators for all elements from page source XML + */ +export function generateAllElementLocators( + sourceXML: string, + options: GenerateLocatorsOptions +): ElementWithLocators[] { + const sourceJSON = xmlToJSON(sourceXML) + + if (!sourceJSON) { + console.error( + '[generateAllElementLocators] Failed to parse page source XML' + ) + return [] + } + + const parsedDOM = xmlToDOM(sourceXML) + + const ctx: ProcessingContext = { + sourceXML, + platform: options.platform, + automationName: + options.platform === 'android' ? 'uiautomator2' : 'xcuitest', + isNative: options.isNative ?? true, + viewportSize: options.viewportSize ?? { width: 9999, height: 9999 }, + filters: options.filters ?? {}, + results: [], + parsedDOM + } + + traverseTree(sourceJSON, ctx) + + return ctx.results +} diff --git a/packages/elements/src/locators/locator-generation.ts b/packages/elements/src/locators/locator-generation.ts new file mode 100644 index 0000000..cba05cc --- /dev/null +++ b/packages/elements/src/locators/locator-generation.ts @@ -0,0 +1,644 @@ +/** + * Locator strategy generation for mobile elements + */ + +import type { + JSONElement, + LocatorStrategy, + LocatorContext, + UniquenessResult, + XMLNode, + XMLDocument +} from './types.js' +import type { Element as XMLElement } from '@xmldom/xmldom' +import { + checkXPathUniqueness, + evaluateXPath, + isAttributeUnique +} from './xml-parsing.js' + +/** + * Check if a string value is valid for use in a locator + */ +function isValidValue(value: string | undefined): value is string { + return ( + value !== undefined && + value !== null && + value !== 'null' && + value.trim() !== '' + ) +} + +/** + * Escape special characters in text for use in selectors + */ +function escapeText(text: string): string { + return text.replace(/\\/g, '\\\\').replace(/"/g, '\\"').replace(/\n/g, '\\n') +} + +/** + * Escape value for use in XPath expressions + */ +function escapeXPathValue(value: string): string { + if (!value.includes("'")) { + return `'${value}'` + } + if (!value.includes('"')) { + return `"${value}"` + } + const parts: string[] = [] + let current = '' + for (const char of value) { + if (char === "'") { + if (current) { + parts.push(`'${current}'`) + } + parts.push('"\'"') + current = '' + } else { + current += char + } + } + if (current) { + parts.push(`'${current}'`) + } + return `concat(${parts.join(',')})` +} + +/** + * Wrap non-unique XPath with index + */ +function generateIndexedXPath(baseXPath: string, index: number): string { + return `(${baseXPath})[${index}]` +} + +/** + * Add .instance(n) for UiAutomator (0-based) + */ +function generateIndexedUiAutomator( + baseSelector: string, + index: number +): string { + return `${baseSelector}.instance(${index - 1})` +} + +/** + * Check uniqueness, falling back to regex if no DOM available + */ +function checkUniqueness( + ctx: LocatorContext, + xpath: string, + targetNode?: XMLNode +): UniquenessResult { + if (ctx.parsedDOM) { + return checkXPathUniqueness(ctx.parsedDOM, xpath, targetNode) + } + + const match = xpath.match(/\/\/\*\[@([^=]+)="([^"]+)"\]/) + if (match) { + const [, attr, value] = match + return { isUnique: isAttributeUnique(ctx.sourceXML, attr, value) } + } + return { isUnique: false } +} + +/** + * Get sibling index (1-based) among same-tag siblings + */ +function getSiblingIndex(element: XMLElement): number { + const parent = element.parentNode + if (!parent) { + return 1 + } + + const tagName = element.nodeName + let index = 0 + + for (let i = 0; i < parent.childNodes.length; i++) { + const child = parent.childNodes.item(i) + if (child?.nodeType === 1 && child.nodeName === tagName) { + index++ + if (child === element) { + return index + } + } + } + + return 1 +} + +/** + * Count siblings with same tag name + */ +function countSiblings(element: XMLElement): number { + const parent = element.parentNode + if (!parent) { + return 1 + } + + const tagName = element.nodeName + let count = 0 + + for (let i = 0; i < parent.childNodes.length; i++) { + const child = parent.childNodes.item(i) + if (child?.nodeType === 1 && child.nodeName === tagName) { + count++ + } + } + + return count +} + +/** + * Find unique attribute for element in XPath format + */ +function findUniqueAttribute( + element: XMLElement, + ctx: LocatorContext +): string | null { + const attrs = ctx.isAndroid + ? ['resource-id', 'content-desc', 'text'] + : ['name', 'label', 'value'] + + for (const attr of attrs) { + const value = element.getAttribute(attr) + if (value && value.trim()) { + const xpath = `//*[@${attr}=${escapeXPathValue(value)}]` + const result = ctx.parsedDOM + ? checkXPathUniqueness(ctx.parsedDOM, xpath) + : { isUnique: isAttributeUnique(ctx.sourceXML, attr, value) } + + if (result.isUnique) { + return `@${attr}=${escapeXPathValue(value)}` + } + } + } + + return null +} + +/** + * Build hierarchical XPath by traversing up the DOM tree + */ +function buildHierarchicalXPath( + ctx: LocatorContext, + element: XMLElement, + maxDepth: number = 3 +): string | null { + if (!ctx.parsedDOM) { + return null + } + + const pathParts: string[] = [] + let current: XMLElement | null = element + let depth = 0 + + while (current && depth < maxDepth) { + const tagName = current.nodeName + const uniqueAttr = findUniqueAttribute(current, ctx) + + if (uniqueAttr) { + pathParts.unshift(`//${tagName}[${uniqueAttr}]`) + break + } else { + const siblingIndex = getSiblingIndex(current) + const siblingCount = countSiblings(current) + + if (siblingCount > 1) { + pathParts.unshift(`${tagName}[${siblingIndex}]`) + } else { + pathParts.unshift(tagName) + } + } + + const parent = current.parentNode as XMLElement | null + current = parent && parent.nodeType === 1 ? parent : null + depth++ + } + + if (pathParts.length === 0) { + return null + } + + let result = pathParts[0] + for (let i = 1; i < pathParts.length; i++) { + result += '/' + pathParts[i] + } + + if (!result.startsWith('//')) { + result = '//' + result + } + + return result +} + +/** + * Add XPath locator with uniqueness checking and fallbacks + */ +function addXPathLocator( + results: [LocatorStrategy, string][], + xpath: string, + ctx: LocatorContext, + targetNode?: XMLNode +): void { + const uniqueness = checkUniqueness(ctx, xpath, targetNode) + if (uniqueness.isUnique) { + results.push(['xpath', xpath]) + } else if (uniqueness.index) { + results.push(['xpath', generateIndexedXPath(xpath, uniqueness.index)]) + } else { + if (targetNode && ctx.parsedDOM) { + // @xmldom/xmldom 0.9+ XMLNode doesn't satisfy global Node; safe at runtime + const hierarchical = buildHierarchicalXPath( + ctx, + targetNode as unknown as XMLElement + ) + if (hierarchical) { + results.push(['xpath', hierarchical]) + } + } + results.push(['xpath', xpath]) + } +} + +/** + * Check if element is within UiAutomator scope + */ +function isInUiAutomatorScope( + element: JSONElement, + doc: XMLDocument | null +): boolean { + if (!doc) { + return true + } + + const hierarchyNodes = evaluateXPath(doc, '/hierarchy/*') + if (hierarchyNodes.length === 0) { + return true + } + + const lastIndex = hierarchyNodes.length + const pathParts = element.path.split('.') + if (pathParts.length === 0 || pathParts[0] === '') { + return true + } + + const firstIndex = parseInt(pathParts[0], 10) + return firstIndex === lastIndex - 1 +} + +/** + * Build Android UiAutomator selector with multiple attributes + */ +function buildUiAutomatorSelector(element: JSONElement): string | null { + const attrs = element.attributes + const parts: string[] = [] + + if (isValidValue(attrs['resource-id'])) { + parts.push(`resourceId("${attrs['resource-id']}")`) + } + if (isValidValue(attrs.text) && attrs.text!.length < 100) { + parts.push(`text("${escapeText(attrs.text!)}")`) + } + if (isValidValue(attrs['content-desc'])) { + parts.push(`description("${attrs['content-desc']}")`) + } + if (isValidValue(attrs.class)) { + parts.push(`className("${attrs.class}")`) + } + + if (parts.length === 0) { + return null + } + + return `android=new UiSelector().${parts.join('.')}` +} + +/** + * Build iOS predicate string with multiple conditions + */ +function buildPredicateString(element: JSONElement): string | null { + const attrs = element.attributes + const conditions: string[] = [] + + if (isValidValue(attrs.name)) { + conditions.push(`name == "${escapeText(attrs.name!)}"`) + } + if (isValidValue(attrs.label)) { + conditions.push(`label == "${escapeText(attrs.label!)}"`) + } + if (isValidValue(attrs.value)) { + conditions.push(`value == "${escapeText(attrs.value!)}"`) + } + if (attrs.visible === 'true') { + conditions.push('visible == 1') + } + if (attrs.enabled === 'true') { + conditions.push('enabled == 1') + } + + if (conditions.length === 0) { + return null + } + + return `-ios predicate string:${conditions.join(' AND ')}` +} + +/** + * Build iOS class chain selector + */ +function buildClassChain(element: JSONElement): string | null { + const attrs = element.attributes + const tagName = element.tagName + + if (!tagName.startsWith('XCUI')) { + return null + } + + let selector = `**/${tagName}` + + if (isValidValue(attrs.label)) { + selector += `[\`label == "${escapeText(attrs.label!)}"\`]` + } else if (isValidValue(attrs.name)) { + selector += `[\`name == "${escapeText(attrs.name!)}"\`]` + } + + return `-ios class chain:${selector}` +} + +/** + * Build XPath for element with unique identification + */ +function buildXPath( + element: JSONElement, + _sourceXML: string, + isAndroid: boolean +): string | null { + const attrs = element.attributes + const tagName = element.tagName + const conditions: string[] = [] + + if (isAndroid) { + if (isValidValue(attrs['resource-id'])) { + conditions.push(`@resource-id="${attrs['resource-id']}"`) + } + if (isValidValue(attrs['content-desc'])) { + conditions.push(`@content-desc="${attrs['content-desc']}"`) + } + if (isValidValue(attrs.text) && attrs.text!.length < 100) { + conditions.push(`@text="${escapeText(attrs.text!)}"`) + } + } else { + if (isValidValue(attrs.name)) { + conditions.push(`@name="${attrs.name}"`) + } + if (isValidValue(attrs.label)) { + conditions.push(`@label="${attrs.label}"`) + } + if (isValidValue(attrs.value)) { + conditions.push(`@value="${attrs.value}"`) + } + } + + if (conditions.length === 0) { + return `//${tagName}` + } + + return `//${tagName}[${conditions.join(' and ')}]` +} + +/** + * Get simple locators based on single attributes + */ +function getSimpleSuggestedLocators( + element: JSONElement, + ctx: LocatorContext, + automationName: string, + targetNode?: XMLNode +): [LocatorStrategy, string][] { + const results: [LocatorStrategy, string][] = [] + const isAndroid = automationName.toLowerCase().includes('uiautomator') + const attrs = element.attributes + const inUiAutomatorScope = isAndroid + ? isInUiAutomatorScope(element, ctx.parsedDOM) + : true + + if (isAndroid) { + // Resource ID + const resourceId = attrs['resource-id'] + if (isValidValue(resourceId)) { + const xpath = `//*[@resource-id="${resourceId}"]` + const uniqueness = checkUniqueness(ctx, xpath, targetNode) + + if (uniqueness.isUnique && inUiAutomatorScope) { + results.push([ + 'id', + `android=new UiSelector().resourceId("${resourceId}")` + ]) + } else if (uniqueness.index && inUiAutomatorScope) { + const base = `android=new UiSelector().resourceId("${resourceId}")` + results.push(['id', generateIndexedUiAutomator(base, uniqueness.index)]) + } + } + + // Content Description + const contentDesc = attrs['content-desc'] + if (isValidValue(contentDesc)) { + const xpath = `//*[@content-desc="${contentDesc}"]` + const uniqueness = checkUniqueness(ctx, xpath, targetNode) + + if (uniqueness.isUnique) { + results.push(['accessibility-id', `~${contentDesc}`]) + } + } + + // Text + const text = attrs.text + if (isValidValue(text) && text.length < 100) { + const xpath = `//*[@text="${escapeText(text)}"]` + const uniqueness = checkUniqueness(ctx, xpath, targetNode) + + if (uniqueness.isUnique && inUiAutomatorScope) { + results.push([ + 'text', + `android=new UiSelector().text("${escapeText(text)}")` + ]) + } else if (uniqueness.index && inUiAutomatorScope) { + const base = `android=new UiSelector().text("${escapeText(text)}")` + results.push([ + 'text', + generateIndexedUiAutomator(base, uniqueness.index) + ]) + } + } + } else { + // iOS: Accessibility ID (name) + const name = attrs.name + if (isValidValue(name)) { + const xpath = `//*[@name="${name}"]` + const uniqueness = checkUniqueness(ctx, xpath, targetNode) + + if (uniqueness.isUnique) { + results.push(['accessibility-id', `~${name}`]) + } + } + + // iOS: Label + const label = attrs.label + if (isValidValue(label) && label !== attrs.name) { + const xpath = `//*[@label="${escapeText(label)}"]` + const uniqueness = checkUniqueness(ctx, xpath, targetNode) + + if (uniqueness.isUnique) { + results.push([ + 'predicate-string', + `-ios predicate string:label == "${escapeText(label)}"` + ]) + } + } + + // iOS: Value + const value = attrs.value + if (isValidValue(value)) { + const xpath = `//*[@value="${escapeText(value)}"]` + const uniqueness = checkUniqueness(ctx, xpath, targetNode) + + if (uniqueness.isUnique) { + results.push([ + 'predicate-string', + `-ios predicate string:value == "${escapeText(value)}"` + ]) + } + } + } + + return results +} + +/** + * Get complex locators (combinations, XPath, etc.) + */ +function getComplexSuggestedLocators( + element: JSONElement, + ctx: LocatorContext, + automationName: string, + targetNode?: XMLNode +): [LocatorStrategy, string][] { + const results: [LocatorStrategy, string][] = [] + const isAndroid = automationName.toLowerCase().includes('uiautomator') + const inUiAutomatorScope = isAndroid + ? isInUiAutomatorScope(element, ctx.parsedDOM) + : true + + if (isAndroid) { + if (inUiAutomatorScope) { + const uiAutomator = buildUiAutomatorSelector(element) + if (uiAutomator) { + results.push(['uiautomator', uiAutomator]) + } + } + + const xpath = buildXPath(element, ctx.sourceXML, true) + if (xpath) { + addXPathLocator(results, xpath, ctx, targetNode) + } + + if (inUiAutomatorScope && isValidValue(element.attributes.class)) { + results.push([ + 'class-name', + `android=new UiSelector().className("${element.attributes.class}")` + ]) + } + } else { + const predicate = buildPredicateString(element) + if (predicate) { + results.push(['predicate-string', predicate]) + } + + const classChain = buildClassChain(element) + if (classChain) { + results.push(['class-chain', classChain]) + } + + const xpath = buildXPath(element, ctx.sourceXML, false) + if (xpath) { + addXPathLocator(results, xpath, ctx, targetNode) + } + + const type = element.tagName + if (type.startsWith('XCUIElementType')) { + results.push(['class-name', `-ios class chain:**/${type}`]) + } + } + + return results +} + +/** + * Get all suggested locators for an element + */ +export function getSuggestedLocators( + element: JSONElement, + sourceXML: string, + automationName: string, + ctx?: LocatorContext, + targetNode?: XMLNode +): [LocatorStrategy, string][] { + const locatorCtx = ctx ?? { + sourceXML, + parsedDOM: null, + isAndroid: automationName.toLowerCase().includes('uiautomator') + } + + const simpleLocators = getSimpleSuggestedLocators( + element, + locatorCtx, + automationName, + targetNode + ) + const complexLocators = getComplexSuggestedLocators( + element, + locatorCtx, + automationName, + targetNode + ) + + const seen = new Set() + const results: [LocatorStrategy, string][] = [] + + for (const locator of [...simpleLocators, ...complexLocators]) { + if (!seen.has(locator[1])) { + seen.add(locator[1]) + results.push(locator) + } + } + + return results +} + +/** + * Get the best (first priority) locator for an element + */ +export function getBestLocator( + element: JSONElement, + sourceXML: string, + automationName: string +): string | null { + const locators = getSuggestedLocators(element, sourceXML, automationName) + return locators.length > 0 ? locators[0][1] : null +} + +/** + * Convert locator array to object format + */ +export function locatorsToObject( + locators: [LocatorStrategy, string][] +): Record { + const result: Record = {} + for (const [strategy, value] of locators) { + if (!result[strategy]) { + result[strategy] = value + } + } + return result +} diff --git a/packages/elements/src/locators/types.ts b/packages/elements/src/locators/types.ts new file mode 100644 index 0000000..e381d45 --- /dev/null +++ b/packages/elements/src/locators/types.ts @@ -0,0 +1,108 @@ +/** + * Type definitions for mobile element locator generation + */ + +import type { Document as XMLDocument, Node as XMLNode } from '@xmldom/xmldom' +export type { XMLDocument, XMLNode } + +export interface ElementAttributes { + // Android attributes + 'resource-id'?: string + 'content-desc'?: string + text?: string + class?: string + package?: string + clickable?: string + 'long-clickable'?: string + focusable?: string + checkable?: string + scrollable?: string + enabled?: string + displayed?: string + bounds?: string // Format: "[x1,y1][x2,y2]" + + // iOS attributes + type?: string + name?: string + label?: string + value?: string + accessible?: string + visible?: string + x?: string + y?: string + width?: string + height?: string + + // Generic + [key: string]: string | undefined +} + +export interface JSONElement { + children: JSONElement[] + tagName: string + attributes: ElementAttributes + path: string // Dot-separated index path for tree traversal +} + +export interface Bounds { + x: number + y: number + width: number + height: number +} + +export interface FilterOptions { + includeTagNames?: string[] // Only include these tags (whitelist) + excludeTagNames?: string[] // Exclude these tags (blacklist) + requireAttributes?: string[] // Must have at least one of these attributes + minAttributeCount?: number // Minimum number of non-empty attributes + fetchableOnly?: boolean // Only interactable elements + clickableOnly?: boolean // Only elements with clickable="true" + visibleOnly?: boolean // Only visible/displayed elements +} + +export interface UniquenessResult { + isUnique: boolean + index?: number // 1-based index if not unique + totalMatches?: number +} + +export type LocatorStrategy = + | 'accessibility-id' + | 'id' + | 'class-name' + | 'xpath' + | 'predicate-string' + | 'class-chain' + | 'uiautomator' + | 'text' + +export interface LocatorContext { + sourceXML: string + parsedDOM: XMLDocument | null + isAndroid: boolean +} + +export interface ElementWithLocators { + tagName: string + locators: Record + text: string + contentDesc: string + resourceId: string + accessibilityId: string + label: string + value: string + className: string + clickable: boolean + enabled: boolean + displayed: boolean + bounds: Bounds + isInViewport: boolean +} + +export interface GenerateLocatorsOptions { + platform: 'android' | 'ios' + viewportSize?: { width: number; height: number } + filters?: FilterOptions + isNative?: boolean +} diff --git a/packages/elements/src/locators/xml-parsing.ts b/packages/elements/src/locators/xml-parsing.ts new file mode 100644 index 0000000..a100a04 --- /dev/null +++ b/packages/elements/src/locators/xml-parsing.ts @@ -0,0 +1,329 @@ +/** + * XML parsing utilities for mobile element source + */ + +import { DOMParser } from '@xmldom/xmldom' +import type { + Document as XMLDocument, + Element as XMLElement, + Node as XMLNode +} from '@xmldom/xmldom' +import xpath from 'xpath' +import type { + ElementAttributes, + JSONElement, + Bounds, + UniquenessResult +} from './types.js' + +/** + * Get child nodes that are elements (not text nodes, comments, etc.) + */ +function childNodesOf(node: XMLNode): XMLNode[] { + const children: XMLNode[] = [] + if (node.childNodes) { + for (let i = 0; i < node.childNodes.length; i++) { + const child = node.childNodes.item(i) + if (child?.nodeType === 1) { + children.push(child) + } + } + } + return children +} + +/** + * Recursively translate DOM node to JSONElement + */ +function translateRecursively( + domNode: XMLNode, + parentPath: string = '', + index: number | null = null +): JSONElement { + const attributes: ElementAttributes = {} + + const element = domNode as XMLElement + if (element.attributes) { + for (let attrIdx = 0; attrIdx < element.attributes.length; attrIdx++) { + const attr = element.attributes.item(attrIdx) + if (attr) { + attributes[attr.name] = attr.value.replace(/(\n)/gm, '\\n') + } + } + } + + const path = + index === null ? '' : `${parentPath ? parentPath + '.' : ''}${index}` + + return { + children: childNodesOf(domNode).map((childNode, childIndex) => + translateRecursively(childNode as XMLNode, path, childIndex) + ), + tagName: domNode.nodeName, + attributes, + path + } +} + +/** + * Compare two nodes for equality by platform-specific attributes + * (reference equality via === may fail when nodes come from different traversals) + */ +function isSameElement(node1: XMLNode, node2: XMLNode): boolean { + if (node1.nodeType !== 1 || node2.nodeType !== 1) { + return false + } + const el1 = node1 as XMLElement + const el2 = node2 as XMLElement + + if (el1.nodeName !== el2.nodeName) { + return false + } + + // For Android, compare by bounds (unique per element) + const bounds1 = el1.getAttribute('bounds') + const bounds2 = el2.getAttribute('bounds') + if (bounds1 && bounds2) { + return bounds1 === bounds2 + } + + // For iOS, compare by x, y, width, height + const x1 = el1.getAttribute('x') + const y1 = el1.getAttribute('y') + const x2 = el2.getAttribute('x') + const y2 = el2.getAttribute('y') + if (x1 && y1 && x2 && y2) { + return ( + x1 === x2 && + y1 === y2 && + el1.getAttribute('width') === el2.getAttribute('width') && + el1.getAttribute('height') === el2.getAttribute('height') + ) + } + + return false +} + +/** + * Convert XML page source to JSON tree structure + */ +export function xmlToJSON(sourceXML: string): JSONElement | null { + try { + const parser = new DOMParser() + const sourceDoc = parser.parseFromString(sourceXML, 'text/xml') + + // xmldom 0.9+ throws ParseError for fatal errors (caught below); this catches non-fatal cases + const parseErrors = sourceDoc.getElementsByTagName('parsererror') + if (parseErrors.length > 0) { + console.error( + '[xmlToJSON] XML parsing error:', + parseErrors[0].textContent + ) + return null + } + + const children = childNodesOf(sourceDoc) + const firstChild = + children[0] || + (sourceDoc.documentElement + ? childNodesOf(sourceDoc.documentElement)[0] + : null) + + return firstChild + ? translateRecursively(firstChild) + : { children: [], tagName: '', attributes: {}, path: '' } + } catch (e) { + console.error('[xmlToJSON] Failed to parse XML:', e) + return null + } +} + +/** + * Parse XML source to DOM Document for XPath evaluation + */ +export function xmlToDOM(sourceXML: string): XMLDocument | null { + try { + const parser = new DOMParser() + const doc = parser.parseFromString(sourceXML, 'text/xml') + + // xmldom 0.9+ throws ParseError for fatal errors (caught below); this catches non-fatal cases + const parseErrors = doc.getElementsByTagName('parsererror') + if (parseErrors.length > 0) { + console.error('[xmlToDOM] XML parsing error:', parseErrors[0].textContent) + return null + } + + return doc + } catch (e) { + console.error('[xmlToDOM] Failed to parse XML:', e) + return null + } +} + +/** + * Execute XPath query on DOM document + */ +export function evaluateXPath(doc: XMLDocument, xpathExpr: string): XMLNode[] { + try { + // @xmldom/xmldom 0.9+ types don't satisfy global Node; xpath still works at runtime + const nodes = xpath.select(xpathExpr, doc as unknown as Node) + if (Array.isArray(nodes)) { + return nodes as unknown as XMLNode[] + } + return [] + } catch (e) { + console.error(`[evaluateXPath] Failed to evaluate "${xpathExpr}":`, e) + return [] + } +} + +/** + * Check if an XPath selector is unique and get index if not + */ +export function checkXPathUniqueness( + doc: XMLDocument, + xpathExpr: string, + targetNode?: XMLNode +): UniquenessResult { + try { + const nodes = evaluateXPath(doc, xpathExpr) + const totalMatches = nodes.length + + if (totalMatches === 0) { + return { isUnique: false } + } + + if (totalMatches === 1) { + return { isUnique: true } + } + + // Not unique - find index of target node if provided + if (targetNode) { + for (let i = 0; i < nodes.length; i++) { + if (nodes[i] === targetNode || isSameElement(nodes[i], targetNode)) { + return { + isUnique: false, + index: i + 1, // 1-based index for XPath + totalMatches + } + } + } + } + + return { isUnique: false, totalMatches } + } catch (e) { + console.error(`[checkXPathUniqueness] Error checking "${xpathExpr}":`, e) + return { isUnique: false } + } +} + +/** + * Find DOM node by JSONElement path (e.g., "0.2.1") + */ +export function findDOMNodeByPath( + doc: XMLDocument, + path: string +): XMLNode | null { + if (!path) { + return doc.documentElement + } + + const indices = path.split('.').map(Number) + let current: XMLNode | null = doc.documentElement + + for (const index of indices) { + if (!current) { + return null + } + + const children: XMLNode[] = [] + if (current.childNodes) { + for (let i = 0; i < current.childNodes.length; i++) { + const child = current.childNodes.item(i) + if (child?.nodeType === 1) { + children.push(child) + } + } + } + + current = children[index] || null + } + + return current +} + +/** + * Parse Android bounds string "[x1,y1][x2,y2]" to coordinates + */ +export function parseAndroidBounds(bounds: string): Bounds { + const match = bounds.match(/\[(\d+),(\d+)\]\[(\d+),(\d+)\]/) + if (!match) { + return { x: 0, y: 0, width: 0, height: 0 } + } + + const x1 = parseInt(match[1], 10) + const y1 = parseInt(match[2], 10) + const x2 = parseInt(match[3], 10) + const y2 = parseInt(match[4], 10) + + return { + x: x1, + y: y1, + width: x2 - x1, + height: y2 - y1 + } +} + +/** + * Parse iOS element bounds from individual x, y, width, height attributes + */ +export function parseIOSBounds(attributes: ElementAttributes): Bounds { + return { + x: parseInt(attributes.x || '0', 10), + y: parseInt(attributes.y || '0', 10), + width: parseInt(attributes.width || '0', 10), + height: parseInt(attributes.height || '0', 10) + } +} + +/** + * Flatten JSON element tree to array (depth-first) + */ +export function flattenElementTree(root: JSONElement): JSONElement[] { + const result: JSONElement[] = [] + + function traverse(element: JSONElement) { + result.push(element) + for (const child of element.children) { + traverse(child) + } + } + + traverse(root) + return result +} + +/** + * Count occurrences of an attribute value in the source XML + */ +export function countAttributeOccurrences( + sourceXML: string, + attribute: string, + value: string +): number { + const escapedValue = value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + const pattern = new RegExp(`${attribute}=["']${escapedValue}["']`, 'g') + const matches = sourceXML.match(pattern) + return matches ? matches.length : 0 +} + +/** + * Check if an attribute value is unique in the source (fast regex-based check) + */ +export function isAttributeUnique( + sourceXML: string, + attribute: string, + value: string +): boolean { + return countAttributeOccurrences(sourceXML, attribute, value) === 1 +} diff --git a/packages/elements/src/mobile-elements.ts b/packages/elements/src/mobile-elements.ts new file mode 100644 index 0000000..3de90b1 --- /dev/null +++ b/packages/elements/src/mobile-elements.ts @@ -0,0 +1,163 @@ +/** + * Mobile element detection utilities for iOS and Android + * + * Uses page source parsing for optimal performance (2 HTTP calls vs 600+ for 50 elements) + */ + +import type { + ElementWithLocators, + FilterOptions, + LocatorStrategy +} from './locators/index.js' +import { + generateAllElementLocators, + getDefaultFilters +} from './locators/index.js' + +/** + * Element info returned by getMobileVisibleElements + * Uses uniform fields (all elements have same keys) to enable TOON tabular format + */ +export interface MobileElementInfo { + selector: string + tagName: string + isInViewport: boolean + text: string + resourceId: string + accessibilityId: string + isEnabled: boolean + altSelector: string // Single alternative selector (flattened for tabular format) + // Only present when includeBounds=true + bounds?: { x: number; y: number; width: number; height: number } +} + +/** + * Options for getMobileVisibleElements + */ +export interface GetMobileElementsOptions { + includeContainers?: boolean + includeBounds?: boolean + filterOptions?: FilterOptions +} + +/** + * Locator strategy priority order for selecting best selector + * Earlier = higher priority + */ +const LOCATOR_PRIORITY: LocatorStrategy[] = [ + 'accessibility-id', // Most stable, cross-platform + 'id', // Android resource-id + 'text', // Text-based (can be fragile but readable) + 'predicate-string', // iOS predicate + 'class-chain', // iOS class chain + 'uiautomator', // Android UiAutomator compound + 'xpath' // XPath (last resort, brittle) + // 'class-name' intentionally excluded - too generic +] + +/** + * Select best locators from available strategies + * Returns [primarySelector, ...alternativeSelectors] + */ +function selectBestLocators(locators: Record): string[] { + const selected: string[] = [] + + // Find primary selector based on priority + for (const strategy of LOCATOR_PRIORITY) { + if (locators[strategy]) { + selected.push(locators[strategy]) + break + } + } + + // Add one alternative if available (different strategy) + for (const strategy of LOCATOR_PRIORITY) { + if (locators[strategy] && !selected.includes(locators[strategy])) { + selected.push(locators[strategy]) + break + } + } + + return selected +} + +/** + * Convert ElementWithLocators to MobileElementInfo + * Uses uniform fields (all elements have same keys) to enable CSV tabular format + */ +function toMobileElementInfo( + element: ElementWithLocators, + includeBounds: boolean +): MobileElementInfo { + const selectedLocators = selectBestLocators(element.locators) + + // Use contentDesc for accessibilityId on Android, or name on iOS + const accessId = element.accessibilityId || element.contentDesc + + // Build object with ALL fields for uniform schema (enables CSV tabular format) + // Empty string '' used for missing values to keep schema consistent + const info: MobileElementInfo = { + selector: selectedLocators[0] || '', + tagName: element.tagName, + isInViewport: element.isInViewport, + text: element.text || '', + resourceId: element.resourceId || '', + accessibilityId: accessId || '', + isEnabled: element.enabled !== false, + altSelector: selectedLocators[1] || '' // Single alternative (flattened for tabular) + } + + // Only include bounds if explicitly requested (adds 4 extra columns) + if (includeBounds) { + info.bounds = element.bounds + } + + return info +} + +/** + * Get viewport size from browser + */ +async function getViewportSize( + browser: WebdriverIO.Browser +): Promise<{ width: number; height: number }> { + try { + const size = await browser.getWindowSize() + return { width: size.width, height: size.height } + } catch { + return { width: 9999, height: 9999 } + } +} + +/** + * Get all visible elements from a mobile app + * + * Performance: 2 HTTP calls (getWindowSize + getPageSource) vs 12+ per element with legacy approach + */ +export async function getMobileVisibleElements( + browser: WebdriverIO.Browser, + platform: 'ios' | 'android', + options: GetMobileElementsOptions = {} +): Promise { + const { + includeContainers = false, + includeBounds = false, + filterOptions + } = options + + const viewportSize = await getViewportSize(browser) + const pageSource = await browser.getPageSource() + + const filters: FilterOptions = { + ...getDefaultFilters(platform, includeContainers), + ...filterOptions + } + + const elements = generateAllElementLocators(pageSource, { + platform, + viewportSize, + filters + }) + + return elements.map((el) => toMobileElementInfo(el, includeBounds)) +} diff --git a/packages/elements/tests/accessibility-tree.test.ts b/packages/elements/tests/accessibility-tree.test.ts new file mode 100644 index 0000000..7f187b5 --- /dev/null +++ b/packages/elements/tests/accessibility-tree.test.ts @@ -0,0 +1,26 @@ +import { describe, it, expect, vi } from 'vitest' +import { getBrowserAccessibilityTree } from '../src/accessibility-tree.js' + +describe('getBrowserAccessibilityTree', () => { + it('calls browser.execute and returns result', async () => { + const nodes = [ + { + role: 'button', + name: 'Submit', + selector: 'button*=Submit', + level: '', + disabled: '', + checked: '', + expanded: '', + selected: '', + pressed: '', + required: '', + readonly: '' + } + ] + const mockBrowser = { execute: vi.fn().mockResolvedValue(nodes) } as any + const result = await getBrowserAccessibilityTree(mockBrowser) + expect(mockBrowser.execute).toHaveBeenCalledTimes(1) + expect(result).toEqual(nodes) + }) +}) diff --git a/packages/elements/tests/browser-elements.test.ts b/packages/elements/tests/browser-elements.test.ts new file mode 100644 index 0000000..6fba3af --- /dev/null +++ b/packages/elements/tests/browser-elements.test.ts @@ -0,0 +1,22 @@ +import { describe, it, expect, vi } from 'vitest' +import { getInteractableBrowserElements } from '../src/browser-elements.js' + +describe('getInteractableBrowserElements', () => { + it('calls browser.execute with includeBounds=false by default', async () => { + const mockBrowser = { execute: vi.fn().mockResolvedValue([]) } as any + const result = await getInteractableBrowserElements(mockBrowser) + expect(mockBrowser.execute).toHaveBeenCalledTimes(1) + expect(result).toEqual([]) + }) + + it('passes includeBounds option to script', async () => { + const mockBrowser = { + execute: vi.fn().mockResolvedValue([{ tagName: 'button', name: 'OK' }]) + } as any + const result = await getInteractableBrowserElements(mockBrowser, { + includeBounds: true + }) + expect(result).toHaveLength(1) + expect(result[0].tagName).toBe('button') + }) +}) diff --git a/packages/elements/tests/locators/locator-generation.test.ts b/packages/elements/tests/locators/locator-generation.test.ts new file mode 100644 index 0000000..c5d2de4 --- /dev/null +++ b/packages/elements/tests/locators/locator-generation.test.ts @@ -0,0 +1,23 @@ +import { describe, it, expect } from 'vitest' +import { locatorsToObject } from '@wdio/elements/locators' + +describe('locatorsToObject', () => { + it('converts locator array to object', () => { + const locators: [any, string][] = [ + ['accessibility-id', '~Submit'], + ['xpath', '//XCUIElementTypeButton[@name="Submit"]'] + ] + const result = locatorsToObject(locators) + expect(result['accessibility-id']).toBe('~Submit') + expect(result['xpath']).toBe('//XCUIElementTypeButton[@name="Submit"]') + }) + + it('returns first value for duplicate strategies', () => { + const locators: [any, string][] = [ + ['xpath', '//first'], + ['xpath', '//second'] + ] + const result = locatorsToObject(locators) + expect(result['xpath']).toBe('//first') + }) +}) diff --git a/packages/elements/tests/mobile-elements.test.ts b/packages/elements/tests/mobile-elements.test.ts new file mode 100644 index 0000000..64f1986 --- /dev/null +++ b/packages/elements/tests/mobile-elements.test.ts @@ -0,0 +1,13 @@ +import { describe, it, expect, vi } from 'vitest' +import { getMobileVisibleElements } from '../src/mobile-elements.js' + +describe('getMobileVisibleElements', () => { + it('returns empty array for unparseable XML', async () => { + const mockBrowser = { + getWindowSize: vi.fn().mockResolvedValue({ width: 375, height: 812 }), + getPageSource: vi.fn().mockResolvedValue(' Date: Wed, 20 May 2026 18:59:56 +0200 Subject: [PATCH 2/9] chore: Wire @wdio/elements into monorepo - Adds package to pnpm-workspace.yaml and registers path aliases in the root tsconfig.json for IDE resolution. - Fix linter --- pnpm-workspace.yaml | 1 + tsconfig.json | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index da8b8f2..ae25e1d 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -6,4 +6,5 @@ packages: - 'packages/app' - 'packages/nightwatch-devtools' - 'packages/selenium-devtools' + - 'packages/elements' - 'example' diff --git a/tsconfig.json b/tsconfig.json index 17fb23b..5273df4 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -32,7 +32,9 @@ "@wdio/devtools-service": ["packages/service/src/index.ts"], "@wdio/devtools-service/*": ["packages/service/src/*"], "@wdio/selenium-devtools": ["packages/selenium-devtools/src/index.ts"], - "@wdio/selenium-devtools/*": ["packages/selenium-devtools/src/*"] + "@wdio/selenium-devtools/*": ["packages/selenium-devtools/src/*"], + "@wdio/elements": ["packages/elements/src/index.ts"], + "@wdio/elements/*": ["packages/elements/src/*"] } } } From 8ecd79e1907d3156e46716662102e63e741972eb Mon Sep 17 00:00:00 2001 From: Vince Graics Date: Sun, 31 May 2026 18:27:41 +0200 Subject: [PATCH 3/9] fix: Include `./locators` in package.json --- packages/elements/package.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/elements/package.json b/packages/elements/package.json index 2078346..058044f 100644 --- a/packages/elements/package.json +++ b/packages/elements/package.json @@ -9,6 +9,10 @@ ".": { "types": "./dist/index.d.ts", "import": "./dist/index.js" + }, + "./locators": { + "types": "./dist/locators/index.d.ts", + "import": "./dist/locators/index.js" } }, "types": "./dist/index.d.ts", From 117a63080d103b796a0fb7440856d8a849591188 Mon Sep 17 00:00:00 2001 From: Vince Graics Date: Sun, 31 May 2026 18:29:09 +0200 Subject: [PATCH 4/9] feat: Extend AI trace readability with visual snapshot feature - Modify a11y tree query --- packages/elements/src/accessibility-tree.ts | 2 + packages/elements/src/get-elements.ts | 13 +- packages/elements/src/index.ts | 3 + packages/elements/src/mobile-elements.ts | 38 ++- packages/elements/src/snapshot.ts | 243 ++++++++++++++++++ .../elements/tests/accessibility-tree.test.ts | 1 + packages/elements/tests/snapshot.test.ts | 233 +++++++++++++++++ 7 files changed, 524 insertions(+), 9 deletions(-) create mode 100644 packages/elements/src/snapshot.ts create mode 100644 packages/elements/tests/snapshot.test.ts diff --git a/packages/elements/src/accessibility-tree.ts b/packages/elements/src/accessibility-tree.ts index 7abb203..6d8bb59 100644 --- a/packages/elements/src/accessibility-tree.ts +++ b/packages/elements/src/accessibility-tree.ts @@ -10,6 +10,7 @@ export interface AccessibilityNode { role: string name: string selector: string + depth: number level: number | string disabled: string checked: string @@ -398,6 +399,7 @@ const accessibilityTreeScript = () => role, name, selector, + depth, level: getLevel(el) ?? '', ...getState(el) } diff --git a/packages/elements/src/get-elements.ts b/packages/elements/src/get-elements.ts index 2e24e9b..ee1d27d 100644 --- a/packages/elements/src/get-elements.ts +++ b/packages/elements/src/get-elements.ts @@ -1,11 +1,14 @@ import { getInteractableBrowserElements } from './browser-elements.js' -import { getMobileVisibleElements } from './mobile-elements.js' +import { getMobileVisibleElementsWithTree } from './mobile-elements.js' +import type { JSONElement } from './locators/types.js' export type VisibleElementsResult = { total: number showing: number hasMore: boolean elements: unknown[] + /** Raw JSON element tree — only present for mobile (android/ios) sessions */ + tree?: JSONElement } export async function getElements( @@ -27,13 +30,16 @@ export async function getElements( } = params let elements: { isInViewport?: boolean }[] + let tree: JSONElement | undefined if (browser.isAndroid || browser.isIOS) { const platform = browser.isAndroid ? 'android' : 'ios' - elements = await getMobileVisibleElements(browser, platform, { + const result = await getMobileVisibleElementsWithTree(browser, platform, { includeContainers, includeBounds }) + elements = result.elements + tree = result.tree ?? undefined } else { elements = await getInteractableBrowserElements(browser, { includeBounds }) } @@ -55,6 +61,7 @@ export async function getElements( total, showing: elements.length, hasMore: offset + elements.length < total, - elements + elements, + ...(tree !== undefined ? { tree } : {}) } } diff --git a/packages/elements/src/index.ts b/packages/elements/src/index.ts index 5cac548..dc6f116 100644 --- a/packages/elements/src/index.ts +++ b/packages/elements/src/index.ts @@ -15,3 +15,6 @@ export type { export { getElements } from './get-elements.js' export type { VisibleElementsResult } from './get-elements.js' + +export { serializeWebSnapshot, serializeMobileSnapshot } from './snapshot.js' +export type { JSONElement } from './locators/types.js' diff --git a/packages/elements/src/mobile-elements.ts b/packages/elements/src/mobile-elements.ts index 3de90b1..0fc45b4 100644 --- a/packages/elements/src/mobile-elements.ts +++ b/packages/elements/src/mobile-elements.ts @@ -7,11 +7,13 @@ import type { ElementWithLocators, FilterOptions, + JSONElement, LocatorStrategy } from './locators/index.js' import { generateAllElementLocators, - getDefaultFilters + getDefaultFilters, + xmlToJSON } from './locators/index.js' /** @@ -130,15 +132,16 @@ async function getViewportSize( } /** - * Get all visible elements from a mobile app + * Get all visible elements from a mobile app, also returning the raw JSON element tree. + * Single parse of page source: tree and flat list share one xmlToJSON call. * * Performance: 2 HTTP calls (getWindowSize + getPageSource) vs 12+ per element with legacy approach */ -export async function getMobileVisibleElements( +export async function getMobileVisibleElementsWithTree( browser: WebdriverIO.Browser, platform: 'ios' | 'android', options: GetMobileElementsOptions = {} -): Promise { +): Promise<{ elements: MobileElementInfo[]; tree: JSONElement | null }> { const { includeContainers = false, includeBounds = false, @@ -153,11 +156,34 @@ export async function getMobileVisibleElements( ...filterOptions } - const elements = generateAllElementLocators(pageSource, { + const tree = xmlToJSON(pageSource) + + const elementLocators = generateAllElementLocators(pageSource, { platform, viewportSize, filters }) - return elements.map((el) => toMobileElementInfo(el, includeBounds)) + const elements = elementLocators.map((el) => + toMobileElementInfo(el, includeBounds) + ) + return { elements, tree } +} + +/** + * Get all visible elements from a mobile app + * + * Performance: 2 HTTP calls (getWindowSize + getPageSource) vs 12+ per element with legacy approach + */ +export async function getMobileVisibleElements( + browser: WebdriverIO.Browser, + platform: 'ios' | 'android', + options: GetMobileElementsOptions = {} +): Promise { + const { elements } = await getMobileVisibleElementsWithTree( + browser, + platform, + options + ) + return elements } diff --git a/packages/elements/src/snapshot.ts b/packages/elements/src/snapshot.ts new file mode 100644 index 0000000..94a167e --- /dev/null +++ b/packages/elements/src/snapshot.ts @@ -0,0 +1,243 @@ +/** + * AI-readable snapshot serializers + * + * Converts accessibility trees and mobile element trees into depth-indented + * text files that LLMs can consume without any parsing. + */ + +import type { AccessibilityNode } from './accessibility-tree.js' +import type { JSONElement } from './locators/types.js' + +/** + * Roles that can be interacted with — rendered with `→ selector`. + * Structural roles (heading, img, form, nav, …) are intentionally excluded. + */ +const INTERACTIVE_ROLES = new Set([ + 'button', + 'link', + 'textbox', + 'checkbox', + 'radio', + 'combobox', + 'slider', + 'searchbox', + 'spinbutton', + 'switch', + 'tab', + 'menuitem', + 'option' +]) + +/** + * Walk backwards from `index` to find the nearest ancestor with a non-empty name. + * Returns that name, or undefined if none found. + */ +function inferPurpose( + nodes: AccessibilityNode[], + index: number +): string | undefined { + const myDepth = nodes[index].depth + for (let i = index - 1; i >= 0; i--) { + if (nodes[i].depth < myDepth && nodes[i].name) { + return nodes[i].name + } + } + return undefined +} + +/** + * Serialize a web accessibility tree into a depth-indented text snapshot. + * + * @param nodes Flat ordered node list from getBrowserAccessibilityTree() + * @param context Optional page context for the header line + */ +export function serializeWebSnapshot( + nodes: AccessibilityNode[], + context?: { url?: string; title?: string } +): string { + let header = '[Page' + if (context?.title) { + header += `: ${context.title}` + } + if (context?.url) { + header += ` — ${context.url}` + } + header += ']' + + const lines: string[] = [header] + + for (let i = 0; i < nodes.length; i++) { + const node = nodes[i] + const indent = ' '.repeat(node.depth + 1) // +1 indents everything under the header + const isInteractive = INTERACTIVE_ROLES.has(node.role) + + // Heading gets level suffix: heading[2] + const roleLabel = + node.role === 'heading' && node.level + ? `heading[${node.level}]` + : node.role + + if (isInteractive) { + if (node.name) { + lines.push(`${indent}${roleLabel} "${node.name}" → ${node.selector}`) + } else { + const purpose = inferPurpose(nodes, i) + if (purpose) { + lines.push( + `${indent}${roleLabel} ∈ "${purpose}" → ${node.selector}` + ) + } else if (node.selector) { + lines.push(`${indent}${roleLabel} → ${node.selector}`) + } + // No name, no purpose, no selector: skip — not useful to an LLM + } + } else { + // Container / structural: show role + name when present, no selector + lines.push( + node.name + ? `${indent}${roleLabel} "${node.name}"` + : `${indent}${roleLabel}` + ) + } + } + + return lines.join('\n') +} + +// --------------------------------------------------------------------------- +// Mobile snapshot helpers +// --------------------------------------------------------------------------- + +/** Shorten fully-qualified Android/iOS class names to the last segment. */ +function simplifyTag(tagName: string): string { + const dot = tagName.lastIndexOf('.') + if (dot !== -1) { + return tagName.slice(dot + 1) + } + // iOS: strip XCUIElementType prefix + return tagName.replace(/^XCUIElementType/, '') +} + +function getBestAndroidLocator( + attrs: JSONElement['attributes'] +): string | undefined { + if (attrs['content-desc']) { + return `accessibility-id:${attrs['content-desc']}` + } + if (attrs['resource-id']) { + return `id:${attrs['resource-id']}` + } + if (attrs.text) { + return `text:${attrs.text}` + } + return undefined +} + +function getBestIOSLocator( + attrs: JSONElement['attributes'] +): string | undefined { + if (attrs.name) { + return `accessibility-id:${attrs.name}` + } + if (attrs.label) { + return `label:${attrs.label}` + } + if (attrs.value) { + return `value:${attrs.value}` + } + return undefined +} + +function getMobileNodeIdentity( + attrs: JSONElement['attributes'], + platform: 'android' | 'ios' +): string { + if (platform === 'android') { + return attrs['content-desc'] || attrs.text || '' + } + return attrs.name || attrs.label || attrs.value || attrs.text || '' +} + +function isMobileInteractive( + element: JSONElement, + platform: 'android' | 'ios' +): boolean { + const attrs = element.attributes + if (platform === 'android') { + return attrs.clickable === 'true' || attrs['long-clickable'] === 'true' + } + // iOS: accessible="true" or a type known to be interactive + return attrs.accessible === 'true' +} + +function walkMobileTree( + element: JSONElement, + platform: 'android' | 'ios', + depth: number, + lines: string[], + parentIdentity?: string +): void { + const attrs = element.attributes + const tag = simplifyTag(element.tagName) + const indent = ' '.repeat(depth) + const identity = getMobileNodeIdentity(attrs, platform) + const interactive = isMobileInteractive(element, platform) + + const locator = + platform === 'android' + ? getBestAndroidLocator(attrs) + : getBestIOSLocator(attrs) + + if (interactive && locator) { + if (identity) { + lines.push(`${indent}${tag} "${identity}" → ${locator}`) + } else if (parentIdentity) { + lines.push(`${indent}${tag} ∈ "${parentIdentity}" → ${locator}`) + } else { + lines.push(`${indent}${tag} → ${locator}`) + } + } else { + // Container or non-locatable: show tag + identity if any + lines.push(identity ? `${indent}${tag} "${identity}"` : `${indent}${tag}`) + } + + for (const child of element.children || []) { + walkMobileTree( + child, + platform, + depth + 1, + lines, + identity || parentIdentity + ) + } +} + +/** + * Serialize a mobile element tree into a depth-indented text snapshot. + * + * @param root Root JSONElement from the page source XML parse + * @param context Platform, optional device name and viewport + */ +export function serializeMobileSnapshot( + root: JSONElement, + context: { + platform: 'android' | 'ios' + deviceName?: string + viewport?: { width: number; height: number } + } +): string { + const { platform, deviceName, viewport } = context + + let header = `[${platform}` + if (deviceName) { + header += ` — ${deviceName}` + } + if (viewport) { + header += ` (${viewport.width}×${viewport.height})` + } + header += ']' + + const lines: string[] = [header] + walkMobileTree(root, platform, 1, lines) + return lines.join('\n') +} diff --git a/packages/elements/tests/accessibility-tree.test.ts b/packages/elements/tests/accessibility-tree.test.ts index 7f187b5..6fd7be1 100644 --- a/packages/elements/tests/accessibility-tree.test.ts +++ b/packages/elements/tests/accessibility-tree.test.ts @@ -8,6 +8,7 @@ describe('getBrowserAccessibilityTree', () => { role: 'button', name: 'Submit', selector: 'button*=Submit', + depth: 0, level: '', disabled: '', checked: '', diff --git a/packages/elements/tests/snapshot.test.ts b/packages/elements/tests/snapshot.test.ts new file mode 100644 index 0000000..c665e8e --- /dev/null +++ b/packages/elements/tests/snapshot.test.ts @@ -0,0 +1,233 @@ +import { describe, it, expect } from 'vitest' +import { + serializeWebSnapshot, + serializeMobileSnapshot +} from '../src/snapshot.js' +import type { AccessibilityNode } from '../src/accessibility-tree.js' +import type { JSONElement } from '../src/locators/types.js' + +// --------------------------------------------------------------------------- +// serializeWebSnapshot +// --------------------------------------------------------------------------- + +function node( + overrides: Partial & { role: string; depth: number } +): AccessibilityNode { + return { + name: '', + selector: '', + level: '', + disabled: '', + checked: '', + expanded: '', + selected: '', + pressed: '', + required: '', + readonly: '', + ...overrides + } +} + +describe('serializeWebSnapshot', () => { + it('produces a page header', () => { + const out = serializeWebSnapshot([]) + expect(out).toBe('[Page]') + }) + + it('includes title and url in header', () => { + const out = serializeWebSnapshot([], { + title: 'Login', + url: 'https://example.com/login' + }) + expect(out).toMatch('[Page: Login — https://example.com/login]') + }) + + it('renders interactive role with name and selector', () => { + const nodes = [ + node({ + role: 'button', + depth: 0, + name: 'Submit', + selector: 'button*=Submit' + }) + ] + const out = serializeWebSnapshot(nodes) + expect(out).toContain('button "Submit" → button*=Submit') + }) + + it('renders interactive role with ∈ ancestor name when self has no name', () => { + const nodes = [ + node({ role: 'form', depth: 0, name: 'Login form' }), + node({ role: 'checkbox', depth: 1, name: '', selector: '#remember' }) + ] + const out = serializeWebSnapshot(nodes) + expect(out).toContain('checkbox ∈ "Login form" → #remember') + }) + + it('omits interactive node with no name, no ancestor name, and no selector', () => { + const nodes = [node({ role: 'button', depth: 0, name: '', selector: '' })] + const out = serializeWebSnapshot(nodes) + // Only the header, node is skipped + expect(out.split('\n').length).toBe(1) + }) + + it('renders container role without selector', () => { + const nodes = [node({ role: 'navigation', depth: 0, name: 'Main' })] + const out = serializeWebSnapshot(nodes) + expect(out).toContain('navigation "Main"') + expect(out).not.toContain('→') + }) + + it('renders heading with level suffix', () => { + const nodes = [ + node({ role: 'heading', depth: 0, name: 'Sign in', level: 1 }) + ] + const out = serializeWebSnapshot(nodes) + expect(out).toContain('heading[1] "Sign in"') + }) + + it('indents nodes according to depth', () => { + const nodes = [ + node({ role: 'navigation', depth: 0, name: 'Nav' }), + node({ role: 'link', depth: 1, name: 'Home', selector: 'a*=Home' }) + ] + const lines = serializeWebSnapshot(nodes).split('\n') + // depth 0 → 1 level of indent (' ' × 1), depth 1 → 2 levels (' ' × 2) + expect(lines[1]).toMatch(/^ navigation/) + expect(lines[2]).toMatch(/^ link/) + }) + + it('renders full login page example correctly', () => { + const nodes: AccessibilityNode[] = [ + node({ role: 'navigation', depth: 0, name: 'Main' }), + node({ role: 'link', depth: 1, name: 'Home', selector: 'a*=Home' }), + node({ role: 'main', depth: 0, name: '' }), + node({ role: 'heading', depth: 1, name: 'Sign in', level: 1 }), + node({ role: 'form', depth: 1, name: 'Login' }), + node({ + role: 'textbox', + depth: 2, + name: 'Email address', + selector: '#email' + }), + node({ + role: 'button', + depth: 2, + name: 'Sign in', + selector: 'button*=Sign in' + }) + ] + const out = serializeWebSnapshot(nodes, { + title: 'Login', + url: 'https://example.com/login' + }) + expect(out).toContain('[Page: Login — https://example.com/login]') + expect(out).toContain('navigation "Main"') + expect(out).toContain('link "Home" → a*=Home') + expect(out).toContain('heading[1] "Sign in"') + expect(out).toContain('textbox "Email address" → #email') + expect(out).toContain('button "Sign in" → button*=Sign in') + }) +}) + +// --------------------------------------------------------------------------- +// serializeMobileSnapshot +// --------------------------------------------------------------------------- + +function mobileEl( + tagName: string, + attrs: JSONElement['attributes'], + children: JSONElement[] = [] +): JSONElement { + return { tagName, attributes: attrs, children, path: '' } +} + +describe('serializeMobileSnapshot', () => { + it('produces a platform header with device and viewport', () => { + const root = mobileEl('hierarchy', {}) + const out = serializeMobileSnapshot(root, { + platform: 'android', + deviceName: 'Pixel 7', + viewport: { width: 412, height: 915 } + }) + expect(out).toMatch('[android — Pixel 7 (412×915)]') + }) + + it('renders interactive Android element with accessibility-id locator', () => { + const root = mobileEl('hierarchy', {}, [ + mobileEl('android.widget.Button', { + clickable: 'true', + 'content-desc': 'Skip', + text: '' + }) + ]) + const out = serializeMobileSnapshot(root, { platform: 'android' }) + expect(out).toContain('Button "Skip" → accessibility-id:Skip') + }) + + it('falls back to resource-id when no content-desc', () => { + const root = mobileEl('hierarchy', {}, [ + mobileEl('android.widget.EditText', { + clickable: 'true', + 'content-desc': '', + 'resource-id': 'com.example:id/search', + text: '' + }) + ]) + const out = serializeMobileSnapshot(root, { platform: 'android' }) + expect(out).toContain('EditText → id:com.example:id/search') + }) + + it('renders ∈ ancestor context when element has no identity', () => { + const root = mobileEl('hierarchy', {}, [ + mobileEl( + 'android.widget.LinearLayout', + { 'content-desc': 'Search section' }, + [ + mobileEl('android.widget.EditText', { + clickable: 'true', + 'content-desc': '', + 'resource-id': 'com.example:id/search', + text: '' + }) + ] + ) + ]) + const out = serializeMobileSnapshot(root, { platform: 'android' }) + expect(out).toContain( + 'EditText ∈ "Search section" → id:com.example:id/search' + ) + }) + + it('renders iOS element with accessibility-id', () => { + const root = mobileEl('XCUIElementTypeApplication', {}, [ + mobileEl('XCUIElementTypeButton', { + accessible: 'true', + name: 'Accept All Cookies', + label: 'Accept All Cookies' + }) + ]) + const out = serializeMobileSnapshot(root, { platform: 'ios' }) + expect(out).toContain( + 'Button "Accept All Cookies" → accessibility-id:Accept All Cookies' + ) + }) + + it('simplifies iOS XCUIElementType prefix', () => { + const root = mobileEl('XCUIElementTypeApplication', {}, [ + mobileEl('XCUIElementTypeScrollView', {}) + ]) + const out = serializeMobileSnapshot(root, { platform: 'ios' }) + expect(out).toContain('ScrollView') + expect(out).not.toContain('XCUIElementType') + }) + + it('shows container without selector', () => { + const root = mobileEl('hierarchy', {}, [ + mobileEl('android.widget.FrameLayout', { 'content-desc': '' }) + ]) + const out = serializeMobileSnapshot(root, { platform: 'android' }) + expect(out).toContain('FrameLayout') + expect(out).not.toContain('→') + }) +}) From 70245a1528033160646af24ad7d7bbe6d435a036 Mon Sep 17 00:00:00 2001 From: Vince Graics Date: Tue, 26 May 2026 09:17:37 +0200 Subject: [PATCH 5/9] fix: Drop interactive nodes with empty selector from web snapshot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Empty selector → trailing '→ ' in output, unusable by agents, guard added at top of the interactive branch: skip the node entirely --- packages/elements/src/snapshot.ts | 7 +++++-- packages/elements/tests/snapshot.test.ts | 25 +++++++++++++++++++++--- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/packages/elements/src/snapshot.ts b/packages/elements/src/snapshot.ts index 94a167e..71a36ea 100644 --- a/packages/elements/src/snapshot.ts +++ b/packages/elements/src/snapshot.ts @@ -78,6 +78,10 @@ export function serializeWebSnapshot( : node.role if (isInteractive) { + // No selector → agent can't act on this node; skip entirely + if (!node.selector) { + continue + } if (node.name) { lines.push(`${indent}${roleLabel} "${node.name}" → ${node.selector}`) } else { @@ -86,10 +90,9 @@ export function serializeWebSnapshot( lines.push( `${indent}${roleLabel} ∈ "${purpose}" → ${node.selector}` ) - } else if (node.selector) { + } else { lines.push(`${indent}${roleLabel} → ${node.selector}`) } - // No name, no purpose, no selector: skip — not useful to an LLM } } else { // Container / structural: show role + name when present, no selector diff --git a/packages/elements/tests/snapshot.test.ts b/packages/elements/tests/snapshot.test.ts index c665e8e..4cc5762 100644 --- a/packages/elements/tests/snapshot.test.ts +++ b/packages/elements/tests/snapshot.test.ts @@ -64,13 +64,32 @@ describe('serializeWebSnapshot', () => { expect(out).toContain('checkbox ∈ "Login form" → #remember') }) - it('omits interactive node with no name, no ancestor name, and no selector', () => { - const nodes = [node({ role: 'button', depth: 0, name: '', selector: '' })] + it('omits interactive node with no selector regardless of name', () => { + const nodes = [ + node({ role: 'button', depth: 0, name: '', selector: '' }), + node({ + role: 'button', + depth: 0, + name: 'Named but unselector', + selector: '' + }) + ] const out = serializeWebSnapshot(nodes) - // Only the header, node is skipped + // Only the header — both nodes skipped due to missing selector expect(out.split('\n').length).toBe(1) }) + it('omits interactive node with ∈ context but no selector', () => { + const nodes = [ + node({ role: 'form', depth: 0, name: 'Login form' }), + node({ role: 'combobox', depth: 1, name: '', selector: '' }) + ] + const out = serializeWebSnapshot(nodes) + // combobox has ancestor context but no selector — must be dropped + expect(out).not.toContain('combobox') + expect(out).not.toContain('→') + }) + it('renders container role without selector', () => { const nodes = [node({ role: 'navigation', depth: 0, name: 'Main' })] const out = serializeWebSnapshot(nodes) From 92c19078679a93dcd5377b061300b09c690f0f18 Mon Sep 17 00:00:00 2001 From: Vince Graics Date: Sun, 31 May 2026 18:33:21 +0200 Subject: [PATCH 6/9] chore(deps): Update pnpm-lock.yaml and package properties --- packages/elements/.npmignore | 1 + packages/elements/package.json | 8 +++- pnpm-lock.yaml | 71 ++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 1 deletion(-) create mode 120000 packages/elements/.npmignore diff --git a/packages/elements/.npmignore b/packages/elements/.npmignore new file mode 120000 index 0000000..b4359f6 --- /dev/null +++ b/packages/elements/.npmignore @@ -0,0 +1 @@ +../../.npmignore \ No newline at end of file diff --git a/packages/elements/package.json b/packages/elements/package.json index 058044f..3c208c0 100644 --- a/packages/elements/package.json +++ b/packages/elements/package.json @@ -16,9 +16,15 @@ } }, "types": "./dist/index.d.ts", + "repository": { + "type": "git", + "url": "git+https://github.com/webdriverio/devtools.git", + "directory": "packages/elements" + }, "scripts": { "build": "tsc -p ./tsconfig.json", - "lint": "eslint . --fix" + "lint": "eslint . --fix", + "test": "vitest run" }, "dependencies": { "@xmldom/xmldom": "^0.9.8", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 64fa838..65f9815 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -257,6 +257,31 @@ importers: specifier: ^8.18.3 version: 8.20.0 + packages/elements: + dependencies: + '@xmldom/xmldom': + specifier: ^0.9.8 + version: 0.9.10 + webdriverio: + specifier: ^9.0.0 + version: 9.27.0(puppeteer-core@21.11.0) + xpath: + specifier: ^0.0.34 + version: 0.0.34 + devDependencies: + '@types/node': + specifier: 25.5.2 + version: 25.5.2 + '@wdio/globals': + specifier: 9.27.0 + version: 9.27.0(expect-webdriverio@5.6.5)(webdriverio@9.27.0(puppeteer-core@21.11.0)) + typescript: + specifier: 6.0.2 + version: 6.0.2 + vitest: + specifier: ^4.0.16 + version: 4.1.3(@types/node@25.5.2)(happy-dom@20.8.9)(jsdom@24.1.3)(vite@8.0.7(@types/node@25.5.2)(esbuild@0.27.7)(jiti@2.6.1)(tsx@4.21.0)(yaml@2.8.3)) + packages/nightwatch-devtools: dependencies: '@wdio/devtools-backend': @@ -1447,42 +1472,49 @@ packages: engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [glibc] '@napi-rs/nice-linux-arm64-musl@1.1.1': resolution: {integrity: sha512-+2Rzdb3nTIYZ0YJF43qf2twhqOCkiSrHx2Pg6DJaCPYhhaxbLcdlV8hCRMHghQ+EtZQWGNcS2xF4KxBhSGeutg==} engines: {node: '>= 10'} cpu: [arm64] os: [linux] + libc: [musl] '@napi-rs/nice-linux-ppc64-gnu@1.1.1': resolution: {integrity: sha512-4FS8oc0GeHpwvv4tKciKkw3Y4jKsL7FRhaOeiPei0X9T4Jd619wHNe4xCLmN2EMgZoeGg+Q7GY7BsvwKpL22Tg==} engines: {node: '>= 10'} cpu: [ppc64] os: [linux] + libc: [glibc] '@napi-rs/nice-linux-riscv64-gnu@1.1.1': resolution: {integrity: sha512-HU0nw9uD4FO/oGCCk409tCi5IzIZpH2agE6nN4fqpwVlCn5BOq0MS1dXGjXaG17JaAvrlpV5ZeyZwSon10XOXw==} engines: {node: '>= 10'} cpu: [riscv64] os: [linux] + libc: [glibc] '@napi-rs/nice-linux-s390x-gnu@1.1.1': resolution: {integrity: sha512-2YqKJWWl24EwrX0DzCQgPLKQBxYDdBxOHot1KWEq7aY2uYeX+Uvtv4I8xFVVygJDgf6/92h9N3Y43WPx8+PAgQ==} engines: {node: '>= 10'} cpu: [s390x] os: [linux] + libc: [glibc] '@napi-rs/nice-linux-x64-gnu@1.1.1': resolution: {integrity: sha512-/gaNz3R92t+dcrfCw/96pDopcmec7oCcAQ3l/M+Zxr82KT4DljD37CpgrnXV+pJC263JkW572pdbP3hP+KjcIg==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [glibc] '@napi-rs/nice-linux-x64-musl@1.1.1': resolution: {integrity: sha512-xScCGnyj/oppsNPMnevsBe3pvNaoK7FGvMjT35riz9YdhB2WtTG47ZlbxtOLpjeO9SqqQ2J2igCmz6IJOD5JYw==} engines: {node: '>= 10'} cpu: [x64] os: [linux] + libc: [musl] '@napi-rs/nice-openharmony-arm64@1.1.1': resolution: {integrity: sha512-6uJPRVwVCLDeoOaNyeiW0gp2kFIM4r7PL2MczdZQHkFi9gVlgm+Vn+V6nTWRcu856mJ2WjYJiumEajfSm7arPQ==} @@ -1605,36 +1637,42 @@ packages: engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] + libc: [glibc] '@rolldown/binding-linux-arm64-musl@1.0.0-rc.13': resolution: {integrity: sha512-bmRg3O6Z0gq9yodKKWCIpnlH051sEfdVwt+6m5UDffAQMUUqU0xjnQqqAUm+Gu7ofAAly9DqiQDtKu2nPDEABA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [arm64] os: [linux] + libc: [musl] '@rolldown/binding-linux-ppc64-gnu@1.0.0-rc.13': resolution: {integrity: sha512-8Wtnbw4k7pMYN9B/mOEAsQ8HOiq7AZ31Ig4M9BKn2So4xRaFEhtCSa4ZJaOutOWq50zpgR4N5+L/opnlaCx8wQ==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [ppc64] os: [linux] + libc: [glibc] '@rolldown/binding-linux-s390x-gnu@1.0.0-rc.13': resolution: {integrity: sha512-D/0Nlo8mQuxSMohNJUF2lDXWRsFDsHldfRRgD9bRgktj+EndGPj4DOV37LqDKPYS+osdyhZEH7fTakTAEcW7qg==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [s390x] os: [linux] + libc: [glibc] '@rolldown/binding-linux-x64-gnu@1.0.0-rc.13': resolution: {integrity: sha512-eRrPvat2YaVQcwwKi/JzOP6MKf1WRnOCr+VaI3cTWz3ZoLcP/654z90lVCJ4dAuMEpPdke0n+qyAqXDZdIC4rA==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] + libc: [glibc] '@rolldown/binding-linux-x64-musl@1.0.0-rc.13': resolution: {integrity: sha512-PsdONiFRp8hR8KgVjTWjZ9s7uA3uueWL0t74/cKHfM4dR5zXYv4AjB8BvA+QDToqxAFg4ZkcVEqeu5F7inoz5w==} engines: {node: ^20.19.0 || >=22.12.0} cpu: [x64] os: [linux] + libc: [musl] '@rolldown/binding-openharmony-arm64@1.0.0-rc.13': resolution: {integrity: sha512-hCNXgC5dI3TVOLrPT++PKFNZ+1EtS0mLQwfXXXSUD/+rGlB65gZDwN/IDuxLpQP4x8RYYHqGomlUXzpO8aVI2w==} @@ -1705,66 +1743,79 @@ packages: resolution: {integrity: sha512-L+34Qqil+v5uC0zEubW7uByo78WOCIrBvci69E7sFASRl0X7b/MB6Cqd1lky/CtcSVTydWa2WZwFuWexjS5o6g==} cpu: [arm] os: [linux] + libc: [glibc] '@rollup/rollup-linux-arm-musleabihf@4.60.1': resolution: {integrity: sha512-n83O8rt4v34hgFzlkb1ycniJh7IR5RCIqt6mz1VRJD6pmhRi0CXdmfnLu9dIUS6buzh60IvACM842Ffb3xd6Gg==} cpu: [arm] os: [linux] + libc: [musl] '@rollup/rollup-linux-arm64-gnu@4.60.1': resolution: {integrity: sha512-Nql7sTeAzhTAja3QXeAI48+/+GjBJ+QmAH13snn0AJSNL50JsDqotyudHyMbO2RbJkskbMbFJfIJKWA6R1LCJQ==} cpu: [arm64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-arm64-musl@4.60.1': resolution: {integrity: sha512-+pUymDhd0ys9GcKZPPWlFiZ67sTWV5UU6zOJat02M1+PiuSGDziyRuI/pPue3hoUwm2uGfxdL+trT6Z9rxnlMA==} cpu: [arm64] os: [linux] + libc: [musl] '@rollup/rollup-linux-loong64-gnu@4.60.1': resolution: {integrity: sha512-VSvgvQeIcsEvY4bKDHEDWcpW4Yw7BtlKG1GUT4FzBUlEKQK0rWHYBqQt6Fm2taXS+1bXvJT6kICu5ZwqKCnvlQ==} cpu: [loong64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-loong64-musl@4.60.1': resolution: {integrity: sha512-4LqhUomJqwe641gsPp6xLfhqWMbQV04KtPp7/dIp0nzPxAkNY1AbwL5W0MQpcalLYk07vaW9Kp1PBhdpZYYcEw==} cpu: [loong64] os: [linux] + libc: [musl] '@rollup/rollup-linux-ppc64-gnu@4.60.1': resolution: {integrity: sha512-tLQQ9aPvkBxOc/EUT6j3pyeMD6Hb8QF2BTBnCQWP/uu1lhc9AIrIjKnLYMEroIz/JvtGYgI9dF3AxHZNaEH0rw==} cpu: [ppc64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-ppc64-musl@4.60.1': resolution: {integrity: sha512-RMxFhJwc9fSXP6PqmAz4cbv3kAyvD1etJFjTx4ONqFP9DkTkXsAMU4v3Vyc5BgzC+anz7nS/9tp4obsKfqkDHg==} cpu: [ppc64] os: [linux] + libc: [musl] '@rollup/rollup-linux-riscv64-gnu@4.60.1': resolution: {integrity: sha512-QKgFl+Yc1eEk6MmOBfRHYF6lTxiiiV3/z/BRrbSiW2I7AFTXoBFvdMEyglohPj//2mZS4hDOqeB0H1ACh3sBbg==} cpu: [riscv64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-riscv64-musl@4.60.1': resolution: {integrity: sha512-RAjXjP/8c6ZtzatZcA1RaQr6O1TRhzC+adn8YZDnChliZHviqIjmvFwHcxi4JKPSDAt6Uhf/7vqcBzQJy0PDJg==} cpu: [riscv64] os: [linux] + libc: [musl] '@rollup/rollup-linux-s390x-gnu@4.60.1': resolution: {integrity: sha512-wcuocpaOlaL1COBYiA89O6yfjlp3RwKDeTIA0hM7OpmhR1Bjo9j31G1uQVpDlTvwxGn2nQs65fBFL5UFd76FcQ==} cpu: [s390x] os: [linux] + libc: [glibc] '@rollup/rollup-linux-x64-gnu@4.60.1': resolution: {integrity: sha512-77PpsFQUCOiZR9+LQEFg9GClyfkNXj1MP6wRnzYs0EeWbPcHs02AXu4xuUbM1zhwn3wqaizle3AEYg5aeoohhg==} cpu: [x64] os: [linux] + libc: [glibc] '@rollup/rollup-linux-x64-musl@4.60.1': resolution: {integrity: sha512-5cIATbk5vynAjqqmyBjlciMJl1+R/CwX9oLk/EyiFXDWd95KpHdrOJT//rnUl4cUcskrd0jCCw3wpZnhIHdD9w==} cpu: [x64] os: [linux] + libc: [musl] '@rollup/rollup-openbsd-x64@4.60.1': resolution: {integrity: sha512-cl0w09WsCi17mcmWqqglez9Gk8isgeWvoUZ3WiJFYSR3zjBQc2J5/ihSjpl+VLjPqjQ/1hJRcqBfLjssREQILw==} @@ -1889,24 +1940,28 @@ packages: engines: {node: '>= 20'} cpu: [arm64] os: [linux] + libc: [glibc] '@tailwindcss/oxide-linux-arm64-musl@4.2.2': resolution: {integrity: sha512-oCfG/mS+/+XRlwNjnsNLVwnMWYH7tn/kYPsNPh+JSOMlnt93mYNCKHYzylRhI51X+TbR+ufNhhKKzm6QkqX8ag==} engines: {node: '>= 20'} cpu: [arm64] os: [linux] + libc: [musl] '@tailwindcss/oxide-linux-x64-gnu@4.2.2': resolution: {integrity: sha512-rTAGAkDgqbXHNp/xW0iugLVmX62wOp2PoE39BTCGKjv3Iocf6AFbRP/wZT/kuCxC9QBh9Pu8XPkv/zCZB2mcMg==} engines: {node: '>= 20'} cpu: [x64] os: [linux] + libc: [glibc] '@tailwindcss/oxide-linux-x64-musl@4.2.2': resolution: {integrity: sha512-XW3t3qwbIwiSyRCggeO2zxe3KWaEbM0/kW9e8+0XpBgyKU4ATYzcVSMKteZJ1iukJ3HgHBjbg9P5YPRCVUxlnQ==} engines: {node: '>= 20'} cpu: [x64] os: [linux] + libc: [musl] '@tailwindcss/oxide-wasm32-wasi@4.2.2': resolution: {integrity: sha512-eKSztKsmEsn1O5lJ4ZAfyn41NfG7vzCg496YiGtMDV86jz1q/irhms5O0VrY6ZwTUkFy/EKG3RfWgxSI3VbZ8Q==} @@ -2299,6 +2354,10 @@ packages: resolution: {integrity: sha512-sumk8m5wzOPMs8TizfQkWG0MTqe0p1yfu77ouz+xy1hNW+gaSf99uiU3lvz4rSghloM1esKfqRCFQibJI4+d/w==} engines: {node: '>=18'} + '@xmldom/xmldom@0.9.10': + resolution: {integrity: sha512-A9gOqLdi6cV4ibazAjcQufGj0B1y/vDqYrcuP6d/6x8P27gRS8643Dj9o1dEKtB6O7fwxb2FgBmJS2mX7gpvdw==} + engines: {node: '>=14.6'} + '@zip.js/zip.js@2.8.26': resolution: {integrity: sha512-RQ4h9F6DOiHxpdocUDrOl6xBM+yOtz+LkUol47AVWcfebGBDpZ7w7Xvz9PS24JgXvLGiXXzSAfdCdVy1tPlaFA==} engines: {bun: '>=0.7.0', deno: '>=1.0.0', node: '>=18.0.0'} @@ -4730,24 +4789,28 @@ packages: engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [glibc] lightningcss-linux-arm64-musl@1.32.0: resolution: {integrity: sha512-UpQkoenr4UJEzgVIYpI80lDFvRmPVg6oqboNHfoH4CQIfNA+HOrZ7Mo7KZP02dC6LjghPQJeBsvXhJod/wnIBg==} engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] + libc: [musl] lightningcss-linux-x64-gnu@1.32.0: resolution: {integrity: sha512-V7Qr52IhZmdKPVr+Vtw8o+WLsQJYCTd8loIfpDaMRWGUZfBOYEJeyJIkqGIDMZPwPx24pUMfwSxxI8phr/MbOA==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [glibc] lightningcss-linux-x64-musl@1.32.0: resolution: {integrity: sha512-bYcLp+Vb0awsiXg/80uCRezCYHNg1/l3mt0gzHnWV9XP1W5sKa5/TCdGWaR/zBM2PeF/HbsQv/j2URNOiVuxWg==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] + libc: [musl] lightningcss-win32-arm64-msvc@1.32.0: resolution: {integrity: sha512-8SbC8BR40pS6baCM8sbtYDSwEVQd4JlFTOlaD3gWGHfThTcABnNDBda6eTZeqbofalIJhFx0qKzgHJmcPTnGdw==} @@ -6907,6 +6970,10 @@ packages: xmlchars@2.2.0: resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==} + xpath@0.0.34: + resolution: {integrity: sha512-FxF6+rkr1rNSQrhUNYrAFJpRXNzlDoMxeXN5qI84939ylEv3qqPFKa85Oxr6tDaJKqwW6KKyo2v26TSv3k6LeA==} + engines: {node: '>=0.6.0'} + y18n@5.0.8: resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==} engines: {node: '>=10'} @@ -9167,6 +9234,8 @@ snapshots: dependencies: '@wdio/logger': 9.18.0 + '@xmldom/xmldom@0.9.10': {} + '@zip.js/zip.js@2.8.26': {} '@zip.js/zip.js@2.8.8': {} @@ -14478,6 +14547,8 @@ snapshots: xmlchars@2.2.0: {} + xpath@0.0.34: {} + y18n@5.0.8: {} yallist@3.1.1: {} From 041f2086dd6ece49098360e8c9e9e94f1beab869 Mon Sep 17 00:00:00 2001 From: Vince Graics Date: Sun, 31 May 2026 19:25:05 +0200 Subject: [PATCH 7/9] feat(viewport): Default elements snapshots to viewport visibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Option added to filter elements that are not visible (or not in viewport) - Introduce `statictext` role for visible text-bearing element (book titles, prices, promo copy) that otherwise have no a11y role (Suppress `statictext` children whose text is already included in the parent) - Extend inferPurpose() to consider same-depth structural siblings (img, heading, statictext) as context, and render `∈ parent` for all named interactive elements --- packages/elements/src/accessibility-tree.ts | 95 ++++++++++--- packages/elements/src/browser-elements.ts | 26 ++-- packages/elements/src/get-elements.ts | 12 +- packages/elements/src/index.ts | 4 + packages/elements/src/locators/index.ts | 11 ++ packages/elements/src/locators/types.ts | 2 + packages/elements/src/mobile-elements.ts | 6 +- packages/elements/src/snapshot.ts | 148 ++++++++++++++++++-- packages/elements/tests/snapshot.test.ts | 6 +- 9 files changed, 260 insertions(+), 50 deletions(-) diff --git a/packages/elements/src/accessibility-tree.ts b/packages/elements/src/accessibility-tree.ts index 6d8bb59..a5e593b 100644 --- a/packages/elements/src/accessibility-tree.ts +++ b/packages/elements/src/accessibility-tree.ts @@ -19,9 +19,11 @@ export interface AccessibilityNode { pressed: string required: string readonly: string + /** Whether the element's bounding rect intersects the viewport. */ + isInViewport?: boolean } -const accessibilityTreeScript = () => +const accessibilityTreeScript = (inViewportOnly: boolean) => (function () { const INPUT_TYPE_ROLES: Record = { text: 'textbox', @@ -41,17 +43,6 @@ const accessibilityTreeScript = () => color: 'button' } - const LANDMARK_ROLES = new Set([ - 'navigation', - 'main', - 'banner', - 'contentinfo', - 'complementary', - 'form', - 'dialog', - 'region' - ]) - // Container roles: named only via aria-label/aria-labelledby, not textContent const CONTAINER_ROLES = new Set([ 'navigation', @@ -154,6 +145,12 @@ const accessibilityTreeScript = () => return 'generic' } + // Capture elements with visible direct text that don't match + // any semantic role — book titles, prices, labels, etc. + if (getDirectText(el)) { + return 'statictext' + } + return null } @@ -170,7 +167,7 @@ const accessibilityTreeScript = () => .map((id) => document.getElementById(id)?.textContent?.trim() || '') .filter(Boolean) if (texts.length > 0) { - return texts.join(' ').slice(0, 100) + return texts.join(' ').slice(0, 200) } } @@ -217,17 +214,26 @@ const accessibilityTreeScript = () => return title.trim() } + // 9. Child — common pattern for image links and buttons + const childImg = el.querySelector('img') + if (childImg) { + const alt = childImg.getAttribute('alt') + if (alt) { + return alt.trim() + } + } + if (role && CONTAINER_ROLES.has(role)) { return '' } - return (el.textContent?.trim().replace(/\s+/g, ' ') || '').slice(0, 100) + return (el.textContent?.trim().replace(/\s+/g, ' ') || '').slice(0, 200) } function getSelector(element: HTMLElement): string { const tag = element.tagName.toLowerCase() const text = element.textContent?.trim().replace(/\s+/g, ' ') - if (text && text.length > 0 && text.length <= 50) { + if (text && text.length > 0 && text.length <= 120) { const sameTagElements = document.querySelectorAll(tag) let matchCount = 0 sameTagElements.forEach((el) => { @@ -241,8 +247,11 @@ const accessibilityTreeScript = () => } const ariaLabel = element.getAttribute('aria-label') - if (ariaLabel && ariaLabel.length <= 80) { - return `aria/${ariaLabel}` + if (ariaLabel && ariaLabel.length <= 200) { + const sel = `[aria-label="${CSS.escape(ariaLabel)}"]` + if (document.querySelectorAll(sel).length === 1) { + return sel + } } const testId = element.getAttribute('data-testid') @@ -310,6 +319,17 @@ const accessibilityTreeScript = () => return path.join(' > ') } + /** Extract text from immediate text-node children only (not nested elements). */ + function getDirectText(el: HTMLElement): string { + let text = '' + for (const child of Array.from(el.childNodes)) { + if (child.nodeType === 3 /* TEXT_NODE */) { + text += child.textContent + } + } + return text.trim().replace(/\s+/g, ' ') + } + function isVisible(el: HTMLElement): boolean { if (typeof el.checkVisibility === 'function') { return el.checkVisibility({ @@ -328,6 +348,18 @@ const accessibilityTreeScript = () => ) } + function isInViewport(el: HTMLElement): boolean { + const rect = el.getBoundingClientRect() + return ( + rect.top >= 0 && + rect.left >= 0 && + rect.bottom <= + (window.innerHeight || document.documentElement.clientHeight) && + rect.right <= + (window.innerWidth || document.documentElement.clientWidth) + ) + } + function getLevel(el: HTMLElement): number | undefined { const m = el.tagName.toLowerCase().match(/^h([1-6])$/) if (m) { @@ -383,6 +415,7 @@ const accessibilityTreeScript = () => } const role = getRole(el) + const inViewport = isInViewport(el) if (!role) { for (const child of Array.from(el.children)) { @@ -391,16 +424,27 @@ const accessibilityTreeScript = () => return } + // When viewport filtering is on, skip nodes outside the viewport. + // Still recurse into children — they may have different positioning + // (e.g. position:fixed elements inside an off-screen container). + if (inViewportOnly && !inViewport) { + for (const child of Array.from(el.children)) { + walk(child as HTMLElement, depth + 1) + } + return + } + const name = getAccessibleName(el, role) - const isLandmark = LANDMARK_ROLES.has(role) - const hasIdentity = !!(name || isLandmark) - const selector = hasIdentity ? getSelector(el) : '' + // Always generate a selector — even elements without an accessible + // name need a CSS-path fallback so the snapshot doesn't lose them. + const selector = getSelector(el) const node: RawNode = { role, name, selector, depth, level: getLevel(el) ?? '', + isInViewport: inViewport, ...getState(el) } result.push(node) @@ -419,11 +463,18 @@ const accessibilityTreeScript = () => /** * Get browser accessibility tree via a single DOM walk. + * + * @param browser WebdriverIO browser instance + * @param options {@link inViewportOnly} defaults to `true` — only nodes + * whose bounding rect intersects the viewport are included. */ export async function getBrowserAccessibilityTree( - browser: WebdriverIO.Browser + browser: WebdriverIO.Browser, + options: { inViewportOnly?: boolean } = {} ): Promise { + const { inViewportOnly = true } = options return (browser as any).execute( - accessibilityTreeScript + accessibilityTreeScript, + inViewportOnly ) as unknown as Promise } diff --git a/packages/elements/src/browser-elements.ts b/packages/elements/src/browser-elements.ts index f8d599d..0e38e5a 100644 --- a/packages/elements/src/browser-elements.ts +++ b/packages/elements/src/browser-elements.ts @@ -19,9 +19,11 @@ export interface BrowserElementInfo { export interface GetBrowserElementsOptions { includeBounds?: boolean + /** Only return elements whose bounding rect intersects the viewport (default true). */ + inViewportOnly?: boolean } -const elementsScript = (includeBounds: boolean) => +const elementsScript = (includeBounds: boolean, inViewportOnly: boolean) => (function () { const interactableSelectors = [ 'a[href]', @@ -79,7 +81,7 @@ const elementsScript = (includeBounds: boolean) => .map((id) => document.getElementById(id)?.textContent?.trim() || '') .filter(Boolean) if (texts.length > 0) { - return texts.join(' ').slice(0, 100) + return texts.join(' ').slice(0, 200) } } @@ -132,7 +134,7 @@ const elementsScript = (includeBounds: boolean) => } // 8. text content (truncated, whitespace normalized) - return (el.textContent?.trim().replace(/\s+/g, ' ') || '').slice(0, 100) + return (el.textContent?.trim().replace(/\s+/g, ' ') || '').slice(0, 200) } function getSelector(element: HTMLElement): string { @@ -140,7 +142,7 @@ const elementsScript = (includeBounds: boolean) => // 1. tag*=Text — best per WebdriverIO docs const text = element.textContent?.trim().replace(/\s+/g, ' ') - if (text && text.length > 0 && text.length <= 50) { + if (text && text.length > 0 && text.length <= 120) { const sameTagElements = document.querySelectorAll(tag) let matchCount = 0 sameTagElements.forEach((el) => { @@ -155,8 +157,11 @@ const elementsScript = (includeBounds: boolean) => // 2. aria/label const ariaLabel = element.getAttribute('aria-label') - if (ariaLabel && ariaLabel.length <= 80) { - return `aria/${ariaLabel}` + if (ariaLabel && ariaLabel.length <= 200) { + const sel = `[aria-label="${CSS.escape(ariaLabel)}"]` + if (document.querySelectorAll(sel).length === 1) { + return sel + } } // 3. data-testid @@ -253,6 +258,10 @@ const elementsScript = (includeBounds: boolean) => rect.right <= (window.innerWidth || document.documentElement.clientWidth) + if (inViewportOnly && !isInViewport) { + return + } + const entry: Record = { tagName: htmlEl.tagName.toLowerCase(), name: getAccessibleName(htmlEl), @@ -285,9 +294,10 @@ export async function getInteractableBrowserElements( browser: WebdriverIO.Browser, options: GetBrowserElementsOptions = {} ): Promise { - const { includeBounds = false } = options + const { includeBounds = false, inViewportOnly = true } = options return (browser as any).execute( elementsScript, - includeBounds + includeBounds, + inViewportOnly ) as unknown as Promise } diff --git a/packages/elements/src/get-elements.ts b/packages/elements/src/get-elements.ts index ee1d27d..e763a1f 100644 --- a/packages/elements/src/get-elements.ts +++ b/packages/elements/src/get-elements.ts @@ -36,16 +36,16 @@ export async function getElements( const platform = browser.isAndroid ? 'android' : 'ios' const result = await getMobileVisibleElementsWithTree(browser, platform, { includeContainers, - includeBounds + includeBounds, + inViewportOnly }) elements = result.elements tree = result.tree ?? undefined } else { - elements = await getInteractableBrowserElements(browser, { includeBounds }) - } - - if (inViewportOnly) { - elements = elements.filter((el) => el.isInViewport !== false) + elements = await getInteractableBrowserElements(browser, { + includeBounds, + inViewportOnly + }) } const total = elements.length diff --git a/packages/elements/src/index.ts b/packages/elements/src/index.ts index dc6f116..892e5aa 100644 --- a/packages/elements/src/index.ts +++ b/packages/elements/src/index.ts @@ -17,4 +17,8 @@ export { getElements } from './get-elements.js' export type { VisibleElementsResult } from './get-elements.js' export { serializeWebSnapshot, serializeMobileSnapshot } from './snapshot.js' +export type { + WebSnapshotOptions, + MobileSnapshotOptions +} from './snapshot.js' export type { JSONElement } from './locators/types.js' diff --git a/packages/elements/src/locators/index.ts b/packages/elements/src/locators/index.ts index 2ed62e0..1d03d19 100644 --- a/packages/elements/src/locators/index.ts +++ b/packages/elements/src/locators/index.ts @@ -88,6 +88,7 @@ interface ProcessingContext { isNative: boolean viewportSize: { width: number; height: number } filters: FilterOptions + inViewportOnly: boolean results: ElementWithLocators[] parsedDOM: XMLDocument | null } @@ -179,6 +180,15 @@ function processElement(element: JSONElement, ctx: ProcessingContext): void { return } + // Skip off-screen elements early when viewport filtering is on — + // avoids expensive locator generation for elements the caller doesn't want. + if (ctx.inViewportOnly) { + const b = parseBounds(element, ctx.platform) + if (!isWithinViewport(b, ctx.viewportSize)) { + return + } + } + try { const targetNode = ctx.parsedDOM ? findDOMNodeByPath(ctx.parsedDOM, element.path) @@ -254,6 +264,7 @@ export function generateAllElementLocators( isNative: options.isNative ?? true, viewportSize: options.viewportSize ?? { width: 9999, height: 9999 }, filters: options.filters ?? {}, + inViewportOnly: options.inViewportOnly ?? true, results: [], parsedDOM } diff --git a/packages/elements/src/locators/types.ts b/packages/elements/src/locators/types.ts index e381d45..28f5a49 100644 --- a/packages/elements/src/locators/types.ts +++ b/packages/elements/src/locators/types.ts @@ -105,4 +105,6 @@ export interface GenerateLocatorsOptions { viewportSize?: { width: number; height: number } filters?: FilterOptions isNative?: boolean + /** Only return elements whose bounds intersect the viewport (default true). */ + inViewportOnly?: boolean } diff --git a/packages/elements/src/mobile-elements.ts b/packages/elements/src/mobile-elements.ts index 0fc45b4..e57f3cf 100644 --- a/packages/elements/src/mobile-elements.ts +++ b/packages/elements/src/mobile-elements.ts @@ -39,6 +39,8 @@ export interface MobileElementInfo { export interface GetMobileElementsOptions { includeContainers?: boolean includeBounds?: boolean + /** Only return elements whose bounds intersect the viewport (default true). */ + inViewportOnly?: boolean filterOptions?: FilterOptions } @@ -145,6 +147,7 @@ export async function getMobileVisibleElementsWithTree( const { includeContainers = false, includeBounds = false, + inViewportOnly = true, filterOptions } = options @@ -161,7 +164,8 @@ export async function getMobileVisibleElementsWithTree( const elementLocators = generateAllElementLocators(pageSource, { platform, viewportSize, - filters + filters, + inViewportOnly }) const elements = elementLocators.map((el) => diff --git a/packages/elements/src/snapshot.ts b/packages/elements/src/snapshot.ts index 71a36ea..536ba5f 100644 --- a/packages/elements/src/snapshot.ts +++ b/packages/elements/src/snapshot.ts @@ -7,6 +7,7 @@ import type { AccessibilityNode } from './accessibility-tree.js' import type { JSONElement } from './locators/types.js' +import { parseAndroidBounds, parseIOSBounds } from './locators/xml-parsing.js' /** * Roles that can be interacted with — rendered with `→ selector`. @@ -29,8 +30,10 @@ const INTERACTIVE_ROLES = new Set([ ]) /** - * Walk backwards from `index` to find the nearest ancestor with a non-empty name. - * Returns that name, or undefined if none found. + * Walk backwards from `index` to find the nearest ancestor or preceding + * structural sibling with a non-empty name. Same-depth nodes are only + * used when they are structural (img, heading, statictext, …) — never + * another interactive element. */ function inferPurpose( nodes: AccessibilityNode[], @@ -38,23 +41,36 @@ function inferPurpose( ): string | undefined { const myDepth = nodes[index].depth for (let i = index - 1; i >= 0; i--) { - if (nodes[i].depth < myDepth && nodes[i].name) { + if (nodes[i].depth <= myDepth && nodes[i].name) { + // Same-depth sibling: only structural elements count + if (nodes[i].depth === myDepth && INTERACTIVE_ROLES.has(nodes[i].role)) { + continue + } return nodes[i].name } } return undefined } +export interface WebSnapshotOptions { + /** Only include nodes whose bounding rect intersects the viewport (default true). */ + inViewportOnly?: boolean +} + /** * Serialize a web accessibility tree into a depth-indented text snapshot. * - * @param nodes Flat ordered node list from getBrowserAccessibilityTree() + * @param nodes Flat ordered node list from getBrowserAccessibilityTree() * @param context Optional page context for the header line + * @param options {@link WebSnapshotOptions} */ export function serializeWebSnapshot( nodes: AccessibilityNode[], - context?: { url?: string; title?: string } + context?: { url?: string; title?: string }, + options: WebSnapshotOptions = {} ): string { + const { inViewportOnly = true } = options + let header = '[Page' if (context?.title) { header += `: ${context.title}` @@ -68,9 +84,42 @@ export function serializeWebSnapshot( for (let i = 0; i < nodes.length; i++) { const node = nodes[i] + + // When viewport filtering is on, skip nodes that are known to be off-screen. + // Nodes from a tree captured with inViewportOnly=false will have + // isInViewport populated; nodes from a pre-filtered tree all have + // isInViewport=true (or undefined for pre-existing data). + if (inViewportOnly && node.isInViewport === false) { + continue + } + const indent = ' '.repeat(node.depth + 1) // +1 indents everything under the header const isInteractive = INTERACTIVE_ROLES.has(node.role) + // Skip statictext that merely echoes the parent link/button name. + // Example: link "Highlights" → a*=Highlights doesn't need + // statictext "Highlights" as a child because it adds no information. + if (node.role === 'statictext' && node.name) { + let echoedByParent = false + for (let j = i - 1; j >= 0; j--) { + if (nodes[j].depth < node.depth) { + const parentRole = nodes[j].role + const parentName = nodes[j].name + if ( + INTERACTIVE_ROLES.has(parentRole) && + parentName && + parentName.includes(node.name) + ) { + echoedByParent = true + } + break // only check the immediate structural parent + } + } + if (echoedByParent) { + continue + } + } + // Heading gets level suffix: heading[2] const roleLabel = node.role === 'heading' && node.level @@ -82,10 +131,16 @@ export function serializeWebSnapshot( if (!node.selector) { continue } + const purpose = inferPurpose(nodes, i) if (node.name) { - lines.push(`${indent}${roleLabel} "${node.name}" → ${node.selector}`) + // Show parent context when available — disambiguates + // duplicate selectors like six "Add to Wishlist" buttons. + lines.push( + purpose + ? `${indent}${roleLabel} "${node.name}" ∈ "${purpose}" → ${node.selector}` + : `${indent}${roleLabel} "${node.name}" → ${node.selector}` + ) } else { - const purpose = inferPurpose(nodes, i) if (purpose) { lines.push( `${indent}${roleLabel} ∈ "${purpose}" → ${node.selector}` @@ -173,11 +228,43 @@ function isMobileInteractive( return attrs.accessible === 'true' } +interface WalkMobileOptions { + inViewportOnly: boolean + viewport: { width: number; height: number } +} + +function isMobileInViewport( + element: JSONElement, + platform: 'android' | 'ios', + viewport: { width: number; height: number } +): boolean { + const bounds = + platform === 'android' + ? parseAndroidBounds(element.attributes.bounds || '') + : parseIOSBounds(element.attributes) + + // Elements without explicit bounds dimensions default to "in viewport" + // so we don't silently drop content from sources that omit bounds info. + if (bounds.width === 0 && bounds.height === 0) { + return true + } + + return ( + bounds.x >= 0 && + bounds.y >= 0 && + bounds.width > 0 && + bounds.height > 0 && + bounds.x + bounds.width <= viewport.width && + bounds.y + bounds.height <= viewport.height + ) +} + function walkMobileTree( element: JSONElement, platform: 'android' | 'ios', depth: number, lines: string[], + walkOpts: WalkMobileOptions, parentIdentity?: string ): void { const attrs = element.attributes @@ -191,6 +278,32 @@ function walkMobileTree( ? getBestAndroidLocator(attrs) : getBestIOSLocator(attrs) + if (walkOpts.inViewportOnly) { + const inViewport = isMobileInViewport(element, platform, walkOpts.viewport) + + if (interactive && !inViewport) { + // Interactive element off-screen — skip entirely. + // Still recurse into children (e.g. a scrollable list whose items + // extend beyond the viewport but the scroll container itself is in view). + for (const child of element.children || []) { + walkMobileTree(child, platform, depth + 1, lines, walkOpts, + identity || parentIdentity) + } + return + } + + if (!interactive && !inViewport) { + // Container fully off-screen — collapse to a single label. + lines.push( + identity + ? `${indent}⋯ ${tag} "${identity}" (off-screen)` + : `${indent}⋯ ${tag} (off-screen)` + ) + // Do NOT recurse into children of an off-screen container. + return + } + } + if (interactive && locator) { if (identity) { lines.push(`${indent}${tag} "${identity}" → ${locator}`) @@ -210,16 +323,24 @@ function walkMobileTree( platform, depth + 1, lines, + walkOpts, identity || parentIdentity ) } } +export interface MobileSnapshotOptions { + /** Only include elements whose bounds intersect the viewport (default true). */ + inViewportOnly?: boolean +} + /** * Serialize a mobile element tree into a depth-indented text snapshot. * * @param root Root JSONElement from the page source XML parse - * @param context Platform, optional device name and viewport + * @param context Platform, optional device name and viewport. + * Viewport dimensions are required when `inViewportOnly` is true. + * @param options {@link MobileSnapshotOptions} */ export function serializeMobileSnapshot( root: JSONElement, @@ -227,9 +348,13 @@ export function serializeMobileSnapshot( platform: 'android' | 'ios' deviceName?: string viewport?: { width: number; height: number } - } + }, + options: MobileSnapshotOptions = {} ): string { const { platform, deviceName, viewport } = context + const { inViewportOnly = true } = options + + const effectiveViewport = viewport ?? { width: 9999, height: 9999 } let header = `[${platform}` if (deviceName) { @@ -241,6 +366,9 @@ export function serializeMobileSnapshot( header += ']' const lines: string[] = [header] - walkMobileTree(root, platform, 1, lines) + walkMobileTree(root, platform, 1, lines, { + inViewportOnly, + viewport: effectiveViewport + }) return lines.join('\n') } diff --git a/packages/elements/tests/snapshot.test.ts b/packages/elements/tests/snapshot.test.ts index 4cc5762..cff20bb 100644 --- a/packages/elements/tests/snapshot.test.ts +++ b/packages/elements/tests/snapshot.test.ts @@ -142,10 +142,10 @@ describe('serializeWebSnapshot', () => { }) expect(out).toContain('[Page: Login — https://example.com/login]') expect(out).toContain('navigation "Main"') - expect(out).toContain('link "Home" → a*=Home') + expect(out).toContain('link "Home" ∈ "Main" → a*=Home') expect(out).toContain('heading[1] "Sign in"') - expect(out).toContain('textbox "Email address" → #email') - expect(out).toContain('button "Sign in" → button*=Sign in') + expect(out).toContain('textbox "Email address" ∈ "Login" → #email') + expect(out).toContain('button "Sign in" ∈ "Login" → button*=Sign in') }) }) From 3015efef5b141910970c1ab8d4e7c6fcd8584a3d Mon Sep 17 00:00:00 2001 From: Vince Graics Date: Sun, 31 May 2026 21:03:13 +0200 Subject: [PATCH 8/9] feat(mobile): Extend mobile snapshot with selectors from `getElements` - Use local fallback `getBestAndroidLocator(attrs)` and `getBestIOSLocator(attrs)` - Emit noisy mobile node roles from the snapshot --- packages/elements/src/index.ts | 5 +- packages/elements/src/locators/index.ts | 4 + packages/elements/src/mobile-elements.ts | 6 + packages/elements/src/snapshot.ts | 518 ++++++++++++++++++++--- packages/elements/tests/snapshot.test.ts | 10 +- 5 files changed, 463 insertions(+), 80 deletions(-) diff --git a/packages/elements/src/index.ts b/packages/elements/src/index.ts index 892e5aa..7aeabf7 100644 --- a/packages/elements/src/index.ts +++ b/packages/elements/src/index.ts @@ -17,8 +17,5 @@ export { getElements } from './get-elements.js' export type { VisibleElementsResult } from './get-elements.js' export { serializeWebSnapshot, serializeMobileSnapshot } from './snapshot.js' -export type { - WebSnapshotOptions, - MobileSnapshotOptions -} from './snapshot.js' +export type { WebSnapshotOptions, MobileSnapshotOptions } from './snapshot.js' export type { JSONElement } from './locators/types.js' diff --git a/packages/elements/src/locators/index.ts b/packages/elements/src/locators/index.ts index 1d03d19..20e2330 100644 --- a/packages/elements/src/locators/index.ts +++ b/packages/elements/src/locators/index.ts @@ -209,6 +209,10 @@ function processElement(element: JSONElement, ctx: ProcessingContext): void { return } + // Stash the best locator on the tree node so serializeMobileSnapshot + // can reuse the full locator pipeline instead of recomputing. + element.attributes._selector = locators[0][1] + const transformed = transformElement(element, locators, ctx) if (Object.keys(transformed.locators).length === 0) { return diff --git a/packages/elements/src/mobile-elements.ts b/packages/elements/src/mobile-elements.ts index e57f3cf..1eabbc5 100644 --- a/packages/elements/src/mobile-elements.ts +++ b/packages/elements/src/mobile-elements.ts @@ -161,6 +161,12 @@ export async function getMobileVisibleElementsWithTree( const tree = xmlToJSON(pageSource) + // Stash the source XML on the root element so serializeMobileSnapshot + // can use the full locator pipeline without requiring it as a separate arg. + if (tree) { + tree.attributes._sourceXML = pageSource + } + const elementLocators = generateAllElementLocators(pageSource, { platform, viewportSize, diff --git a/packages/elements/src/snapshot.ts b/packages/elements/src/snapshot.ts index 536ba5f..fd1d1f5 100644 --- a/packages/elements/src/snapshot.ts +++ b/packages/elements/src/snapshot.ts @@ -8,6 +8,11 @@ import type { AccessibilityNode } from './accessibility-tree.js' import type { JSONElement } from './locators/types.js' import { parseAndroidBounds, parseIOSBounds } from './locators/xml-parsing.js' +import { + ANDROID_INTERACTABLE_TAGS, + IOS_INTERACTABLE_TAGS +} from './locators/constants.js' +import { getSuggestedLocators } from './locators/locator-generation.js' /** * Roles that can be interacted with — rendered with `→ selector`. @@ -140,14 +145,10 @@ export function serializeWebSnapshot( ? `${indent}${roleLabel} "${node.name}" ∈ "${purpose}" → ${node.selector}` : `${indent}${roleLabel} "${node.name}" → ${node.selector}` ) + } else if (purpose) { + lines.push(`${indent}${roleLabel} ∈ "${purpose}" → ${node.selector}`) } else { - if (purpose) { - lines.push( - `${indent}${roleLabel} ∈ "${purpose}" → ${node.selector}` - ) - } else { - lines.push(`${indent}${roleLabel} → ${node.selector}`) - } + lines.push(`${indent}${roleLabel} → ${node.selector}`) } } else { // Container / structural: show role + name when present, no selector @@ -172,21 +173,102 @@ function simplifyTag(tagName: string): string { if (dot !== -1) { return tagName.slice(dot + 1) } - // iOS: strip XCUIElementType prefix return tagName.replace(/^XCUIElementType/, '') } +// --------------------------------------------------------------------------- +// Mobile role classification — maps raw Android/iOS class names to semantic +// roles so the snapshot reads like the web version (button, textbox, img, …). +// --------------------------------------------------------------------------- + +const ANDROID_ROLE_MAP: Record = { + 'android.widget.Button': 'button', + 'android.widget.ImageButton': 'button', + 'android.widget.ToggleButton': 'button', + 'android.widget.FloatingActionButton': 'button', + 'com.google.android.material.button.MaterialButton': 'button', + 'com.google.android.material.floatingactionbutton.FloatingActionButton': + 'button', + 'android.widget.EditText': 'textbox', + 'android.widget.AutoCompleteTextView': 'textbox', + 'android.widget.MultiAutoCompleteTextView': 'textbox', + 'android.widget.SearchView': 'searchbox', + 'android.widget.ImageView': 'img', + 'android.widget.QuickContactBadge': 'img', + 'android.widget.CheckBox': 'checkbox', + 'android.widget.RadioButton': 'radio', + 'android.widget.Switch': 'switch', + 'android.widget.Spinner': 'combobox', + 'android.widget.SeekBar': 'slider', + 'android.widget.RatingBar': 'slider', + 'android.widget.ProgressBar': 'progressbar', + 'android.widget.TextView': 'statictext', + 'android.widget.CheckedTextView': 'statictext', + 'android.widget.RecyclerView': 'list', + 'android.widget.ListView': 'list', + 'android.widget.GridView': 'list', + 'android.webkit.WebView': 'webview' +} + +const IOS_ROLE_MAP: Record = { + XCUIElementTypeButton: 'button', + XCUIElementTypeLink: 'link', + XCUIElementTypeTextField: 'textbox', + XCUIElementTypeSecureTextField: 'textbox', + XCUIElementTypeTextView: 'textbox', + XCUIElementTypeSearchField: 'searchbox', + XCUIElementTypeImage: 'img', + XCUIElementTypeIcon: 'img', + XCUIElementTypeSwitch: 'switch', + XCUIElementTypeSlider: 'slider', + XCUIElementTypeStepper: 'slider', + XCUIElementTypeCheckBox: 'checkbox', + XCUIElementTypeRadioButton: 'radio', + XCUIElementTypePicker: 'combobox', + XCUIElementTypePickerWheel: 'combobox', + XCUIElementTypeDatePicker: 'combobox', + XCUIElementTypeSegmentedControl: 'combobox', + XCUIElementTypeStaticText: 'statictext', + XCUIElementTypeCell: 'listitem', + XCUIElementTypeTable: 'list', + XCUIElementTypeCollectionView: 'list' +} + +function classifyMobileRole( + tagName: string, + platform: 'android' | 'ios' +): string { + if (platform === 'android') { + return ANDROID_ROLE_MAP[tagName] || simplifyTag(tagName) + } + return IOS_ROLE_MAP[tagName] || simplifyTag(tagName) +} + +// --------------------------------------------------------------------------- +// Locator generation +// --------------------------------------------------------------------------- + function getBestAndroidLocator( attrs: JSONElement['attributes'] ): string | undefined { + // Pre-computed by the full locator pipeline (generateAllElementLocators). + // Takes priority over the simplified fallback logic below. + if (attrs._selector) { + return attrs._selector + } + // ~ prefix = accessibility-id shorthand in WebdriverIO ($('~foo')) if (attrs['content-desc']) { - return `accessibility-id:${attrs['content-desc']}` + return `~${attrs['content-desc']}` } if (attrs['resource-id']) { return `id:${attrs['resource-id']}` } if (attrs.text) { - return `text:${attrs.text}` + return `~${attrs.text}` + } + // Fallback: class-based locator (only useful with :nth-of-type or index) + if (attrs.class) { + return `class:${simplifyTag(attrs.class)}` } return undefined } @@ -194,43 +276,113 @@ function getBestAndroidLocator( function getBestIOSLocator( attrs: JSONElement['attributes'] ): string | undefined { + // Pre-computed by the full locator pipeline. + if (attrs._selector) { + return attrs._selector + } + // ~ prefix = accessibility-id shorthand (maps to `name` on iOS) if (attrs.name) { - return `accessibility-id:${attrs.name}` + return `~${attrs.name}` } if (attrs.label) { - return `label:${attrs.label}` + return `~${attrs.label}` } if (attrs.value) { - return `value:${attrs.value}` + return `~${attrs.value}` + } + // Fallback: class-based locator + if (attrs.type) { + return `class:${simplifyTag(attrs.type)}` } return undefined } +// --------------------------------------------------------------------------- +// Identity +// --------------------------------------------------------------------------- + function getMobileNodeIdentity( attrs: JSONElement['attributes'], platform: 'android' | 'ios' ): string { if (platform === 'android') { - return attrs['content-desc'] || attrs.text || '' + const contentDesc = attrs['content-desc'] + if (contentDesc) { + return contentDesc + } + if (attrs.text) { + return attrs.text + } + // Fall back to the last segment of the resource-id (e.g. "search_action_bar") + const rid = attrs['resource-id'] + if (rid) { + const slash = rid.lastIndexOf('/') + return slash !== -1 ? rid.slice(slash + 1) : rid + } + return '' } return attrs.name || attrs.label || attrs.value || attrs.text || '' } +// --------------------------------------------------------------------------- +// Interactivity +// --------------------------------------------------------------------------- + +const ANDROID_INTERACTABLE_SET = new Set(ANDROID_INTERACTABLE_TAGS) +const IOS_INTERACTABLE_SET = new Set(IOS_INTERACTABLE_TAGS) + +/** An element is *explicitly* interactive when it carries a click/focus/check + * attribute — as opposed to being interactive only because its tag is in the + * interactable-tag list. Explicit parents should carry the → selector, not + * their tag-interactive children. */ +function isExplicitlyInteractive( + attrs: JSONElement['attributes'], + platform: 'android' | 'ios' +): boolean { + if (platform === 'android') { + return ( + attrs.clickable === 'true' || + attrs.focusable === 'true' || + attrs.checkable === 'true' || + attrs['long-clickable'] === 'true' + ) + } + return attrs.accessible === 'true' +} + function isMobileInteractive( element: JSONElement, platform: 'android' | 'ios' ): boolean { const attrs = element.attributes if (platform === 'android') { - return attrs.clickable === 'true' || attrs['long-clickable'] === 'true' + if (ANDROID_INTERACTABLE_SET.has(element.tagName)) { + return true + } + return ( + attrs.clickable === 'true' || + attrs['long-clickable'] === 'true' || + attrs.focusable === 'true' || + attrs.checkable === 'true' + ) + } + if (IOS_INTERACTABLE_SET.has(element.tagName)) { + return true } - // iOS: accessible="true" or a type known to be interactive return attrs.accessible === 'true' } +// --------------------------------------------------------------------------- +// Viewport +// --------------------------------------------------------------------------- + interface WalkMobileOptions { inViewportOnly: boolean viewport: { width: number; height: number } + /** Raw page-source XML. When provided, the full locator pipeline is used. */ + sourceXML?: string + /** 'uiautomator2' or 'xcuitest'. Required when sourceXML is set. */ + automationName?: string } function isMobileInViewport( @@ -243,8 +395,6 @@ function isMobileInViewport( ? parseAndroidBounds(element.attributes.bounds || '') : parseIOSBounds(element.attributes) - // Elements without explicit bounds dimensions default to "in viewport" - // so we don't silently drop content from sources that omit bounds info. if (bounds.width === 0 && bounds.height === 0) { return true } @@ -259,87 +409,302 @@ function isMobileInViewport( ) } -function walkMobileTree( +// --------------------------------------------------------------------------- +// Flat-node representation (mirrors AccessibilityNode so both pipelines share +// inferPurpose, dedup, and rendering logic). +// --------------------------------------------------------------------------- + +interface MobileFlatNode { + role: string + name: string + selector: string + depth: number + isInteractive: boolean + /** True when the element has clickable/focusable/checkable — the intended tap target. */ + isExplicitInteractive: boolean + isInViewport: boolean +} + +/** + * First pass: walk the JSONElement tree, apply viewport filtering and + * collect every node into a flat array with semantic roles and selectors. + */ +function collectMobileNodes( element: JSONElement, platform: 'android' | 'ios', depth: number, - lines: string[], - walkOpts: WalkMobileOptions, - parentIdentity?: string + nodes: MobileFlatNode[], + walkOpts: WalkMobileOptions ): void { const attrs = element.attributes - const tag = simplifyTag(element.tagName) - const indent = ' '.repeat(depth) - const identity = getMobileNodeIdentity(attrs, platform) + const role = classifyMobileRole(element.tagName, platform) + const name = getMobileNodeIdentity(attrs, platform) + const explicit = isExplicitlyInteractive(attrs, platform) const interactive = isMobileInteractive(element, platform) + const inViewport = isMobileInViewport(element, platform, walkOpts.viewport) - const locator = - platform === 'android' - ? getBestAndroidLocator(attrs) - : getBestIOSLocator(attrs) - + // Viewport filtering if (walkOpts.inViewportOnly) { - const inViewport = isMobileInViewport(element, platform, walkOpts.viewport) - if (interactive && !inViewport) { - // Interactive element off-screen — skip entirely. - // Still recurse into children (e.g. a scrollable list whose items - // extend beyond the viewport but the scroll container itself is in view). + // Skip this node but still recurse (scroll children may be in view). for (const child of element.children || []) { - walkMobileTree(child, platform, depth + 1, lines, walkOpts, - identity || parentIdentity) + collectMobileNodes(child, platform, depth + 1, nodes, walkOpts) } return } - if (!interactive && !inViewport) { - // Container fully off-screen — collapse to a single label. - lines.push( - identity - ? `${indent}⋯ ${tag} "${identity}" (off-screen)` - : `${indent}⋯ ${tag} (off-screen)` - ) - // Do NOT recurse into children of an off-screen container. + // Collapse off-screen container to a placeholder. + nodes.push({ + role: 'generic', + name: name ? `${role} "${name}"` : role, + selector: '', + depth, + isInteractive: false, + isExplicitInteractive: false, + isInViewport: false + }) return } } - if (interactive && locator) { - if (identity) { - lines.push(`${indent}${tag} "${identity}" → ${locator}`) - } else if (parentIdentity) { - lines.push(`${indent}${tag} ∈ "${parentIdentity}" → ${locator}`) - } else { - lines.push(`${indent}${tag} → ${locator}`) + // Generate a selector for every interactive element. + // Use the full locator pipeline when source XML is available; + // otherwise fall back to the simplified attribute-based heuristics. + let locator = '' + if (interactive) { + if (walkOpts.sourceXML && walkOpts.automationName) { + // Full pipeline: accessible-id, id, text, uiautomator, xpath, class-name + const suggested = getSuggestedLocators( + element, + walkOpts.sourceXML, + walkOpts.automationName, + { + sourceXML: walkOpts.sourceXML, + parsedDOM: null, + isAndroid: platform === 'android' + } + ) + if (suggested.length > 0) { + locator = suggested[0][1] // first = best priority + } + } + if (!locator) { + // Simplified fallback + locator = + (platform === 'android' + ? getBestAndroidLocator(attrs) + : getBestIOSLocator(attrs)) ?? '' } - } else { - // Container or non-locatable: show tag + identity if any - lines.push(identity ? `${indent}${tag} "${identity}"` : `${indent}${tag}`) } + nodes.push({ + role, + name, + selector: locator, + depth, + isInteractive: interactive, + isExplicitInteractive: explicit, + isInViewport: inViewport + }) + for (const child of element.children || []) { - walkMobileTree( - child, - platform, - depth + 1, - lines, - walkOpts, - identity || parentIdentity - ) + collectMobileNodes(child, platform, depth + 1, nodes, walkOpts) } } +// --------------------------------------------------------------------------- +// Context inference — shared with the web pipeline. +// Same-depth structural siblings (img, statictext, heading, …) provide +// context for following interactive nodes. +// --------------------------------------------------------------------------- + +const MOBILE_STRUCTURAL_ROLES = new Set([ + 'img', + 'heading', + 'list', + 'listitem', + 'webview', + 'progressbar', + 'slider', + 'switch', + 'generic' +]) + +function mobileInferPurpose( + nodes: MobileFlatNode[], + index: number +): string | undefined { + const myDepth = nodes[index].depth + for (let i = index - 1; i >= 0; i--) { + if (nodes[i].depth <= myDepth && nodes[i].name) { + if ( + nodes[i].depth === myDepth && + !MOBILE_STRUCTURAL_ROLES.has(nodes[i].role) + ) { + continue + } + return nodes[i].name + } + } + return undefined +} + +// --------------------------------------------------------------------------- +// When a tag-only-interactive child (e.g. a statictext TextView) sits +// directly under an explicitly-interactive parent (e.g. a clickable +// LinearLayout row), the *parent* should carry the → selector — the +// child is just a label. Suppress the child's interactivity so the +// parent renders as the actionable element. +// --------------------------------------------------------------------------- + +function suppressTagOnlyChildren(nodes: MobileFlatNode[]): void { + for (let i = 0; i < nodes.length; i++) { + const node = nodes[i] + if (!node.isInteractive || node.isExplicitInteractive) { + continue + } + // Walk up through ALL ancestors looking for an explicitly-interactive + // parent. The immediate depth-1 parent may just be a layout wrapper; + // the real clickable row could be 2-3 levels up. + for (let j = i - 1; j >= 0; j--) { + if (nodes[j].depth < node.depth) { + if (nodes[j].isExplicitInteractive) { + node.isInteractive = false + break // found — suppress and stop + } + // keep looking upward through the ancestor chain + } + } + } +} + +// --------------------------------------------------------------------------- +// Render pass: flat nodes into lines with ∈ context, dedup, noise filter, +// and class-instance indexing. +// --------------------------------------------------------------------------- + +/** Layout roles that carry no semantic meaning by themselves. */ +const NOISY_ROLES = new Set([ + 'FrameLayout', 'LinearLayout', 'ViewGroup', 'RelativeLayout', + 'View', 'CardView', 'ConstraintLayout', 'ScrollView' +]) + +/** + * Pre-count selector occurrences so we can attach .instance(N) suffixes + * to duplicate selectors. + */ +function countSelectors(nodes: MobileFlatNode[]): Map { + const counts = new Map() + for (const node of nodes) { + if (node.selector) { + counts.set(node.selector, (counts.get(node.selector) ?? 0) + 1) + } + } + return counts +} + +function renderMobileNodes(nodes: MobileFlatNode[]): string[] { + const lines: string[] = [] + const selectorCounts = countSelectors(nodes) + const selectorIndex = new Map() + + for (let i = 0; i < nodes.length; i++) { + const node = nodes[i] + const indent = ' '.repeat(node.depth + 1) + + // Collapse anonymous layout containers at depth ≥ 2. + // Keep depth 0-1 structural chrome and any named container. + if ( + NOISY_ROLES.has(node.role) && + !node.name && + node.depth > 1 && + !node.isInteractive + ) { + continue + } + + // Off-screen containers rendered as collapsed placedersen + if (node.isInViewport === false && !node.isInteractive) { + lines.push(`${indent}⋯ ${node.name} (off-screen)`) + continue + } + + // Dedup: skip statictext whose text is echoed by the parent interactive element + if (node.role === 'statictext' && node.name) { + let echoedByParent = false + for (let j = i - 1; j >= 0; j--) { + if (nodes[j].depth < node.depth) { + if ( + nodes[j].isInteractive && + nodes[j].name && + nodes[j].name.includes(node.name) + ) { + echoedByParent = true + } + break + } + } + if (echoedByParent) { + continue + } + } + + if (node.isInteractive && node.selector) { + // Append .instance(N) when the same selector repeats + let selector = node.selector + const total = selectorCounts.get(selector) ?? 1 + if (total > 1) { + const idx = selectorIndex.get(selector) ?? 0 + selectorIndex.set(selector, idx + 1) + selector = `${selector}.instance(${idx})` + } + + const purpose = mobileInferPurpose(nodes, i) + if (node.name) { + lines.push( + purpose + ? `${indent}${node.role} "${node.name}" ∈ "${purpose}" → ${selector}` + : `${indent}${node.role} "${node.name}" → ${selector}` + ) + } else if (purpose) { + lines.push(`${indent}${node.role} ∈ "${purpose}" → ${selector}`) + } else { + lines.push(`${indent}${node.role} → ${selector}`) + } + } else { + // Container / structural / non-locatable + lines.push( + node.name + ? `${indent}${node.role} "${node.name}"` + : `${indent}${node.role}` + ) + } + } + + return lines +} + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + export interface MobileSnapshotOptions { /** Only include elements whose bounds intersect the viewport (default true). */ inViewportOnly?: boolean + /** + * Raw XML page source string. When provided the full locator pipeline + * (getSuggestedLocators) runs on every interactive node, producing the same + * selectors that getElements() returns. Omit to use simplified heuristics. + */ + sourceXML?: string } /** * Serialize a mobile element tree into a depth-indented text snapshot. * * @param root Root JSONElement from the page source XML parse - * @param context Platform, optional device name and viewport. - * Viewport dimensions are required when `inViewportOnly` is true. + * @param context Platform, optional device name, viewport, and source XML. + * Include `sourceXML` to use the full locator pipeline. * @param options {@link MobileSnapshotOptions} */ export function serializeMobileSnapshot( @@ -348,13 +713,19 @@ export function serializeMobileSnapshot( platform: 'android' | 'ios' deviceName?: string viewport?: { width: number; height: number } + /** Raw page-source XML. When set, selectors match getElements() output. */ + sourceXML?: string }, options: MobileSnapshotOptions = {} ): string { - const { platform, deviceName, viewport } = context + const { platform, deviceName, viewport, sourceXML } = context const { inViewportOnly = true } = options + // Auto-detect source XML stashed by getMobileVisibleElementsWithTree + const effectiveXML = sourceXML || root.attributes._sourceXML + const effectiveViewport = viewport ?? { width: 9999, height: 9999 } + const automationName = platform === 'android' ? 'uiautomator2' : 'xcuitest' let header = `[${platform}` if (deviceName) { @@ -365,10 +736,17 @@ export function serializeMobileSnapshot( } header += ']' - const lines: string[] = [header] - walkMobileTree(root, platform, 1, lines, { + const nodes: MobileFlatNode[] = [] + collectMobileNodes(root, platform, 0, nodes, { inViewportOnly, - viewport: effectiveViewport + viewport: effectiveViewport, + sourceXML: effectiveXML, + automationName: effectiveXML ? automationName : undefined }) - return lines.join('\n') + + // Let explicitly-interactive parents carry the → selector + suppressTagOnlyChildren(nodes) + + const lines = renderMobileNodes(nodes) + return [header, ...lines].join('\n') } diff --git a/packages/elements/tests/snapshot.test.ts b/packages/elements/tests/snapshot.test.ts index cff20bb..3bd008e 100644 --- a/packages/elements/tests/snapshot.test.ts +++ b/packages/elements/tests/snapshot.test.ts @@ -181,7 +181,7 @@ describe('serializeMobileSnapshot', () => { }) ]) const out = serializeMobileSnapshot(root, { platform: 'android' }) - expect(out).toContain('Button "Skip" → accessibility-id:Skip') + expect(out).toContain('button "Skip" → ~Skip') }) it('falls back to resource-id when no content-desc', () => { @@ -194,7 +194,7 @@ describe('serializeMobileSnapshot', () => { }) ]) const out = serializeMobileSnapshot(root, { platform: 'android' }) - expect(out).toContain('EditText → id:com.example:id/search') + expect(out).toContain('textbox "search" → id:com.example:id/search') }) it('renders ∈ ancestor context when element has no identity', () => { @@ -214,7 +214,7 @@ describe('serializeMobileSnapshot', () => { ]) const out = serializeMobileSnapshot(root, { platform: 'android' }) expect(out).toContain( - 'EditText ∈ "Search section" → id:com.example:id/search' + 'textbox "search" ∈ "Search section" → id:com.example:id/search' ) }) @@ -227,9 +227,7 @@ describe('serializeMobileSnapshot', () => { }) ]) const out = serializeMobileSnapshot(root, { platform: 'ios' }) - expect(out).toContain( - 'Button "Accept All Cookies" → accessibility-id:Accept All Cookies' - ) + expect(out).toContain('button "Accept All Cookies" → ~Accept All Cookies') }) it('simplifies iOS XCUIElementType prefix', () => { From 44f94de80c99577ce0c3d48910c0e4e77f37a8f7 Mon Sep 17 00:00:00 2001 From: Vince Graics Date: Sun, 31 May 2026 21:21:08 +0200 Subject: [PATCH 9/9] docs(roadmap): Include gaps and improvement opportunities --- packages/elements/ROADMAP.md | 81 ++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 packages/elements/ROADMAP.md diff --git a/packages/elements/ROADMAP.md b/packages/elements/ROADMAP.md new file mode 100644 index 0000000..f387fb4 --- /dev/null +++ b/packages/elements/ROADMAP.md @@ -0,0 +1,81 @@ +# @wdio/elements Roadmap + +## Current state (May 2026) + +The package delivers LLM-readable element snapshots for both web and mobile: + +| Capability | Web | Mobile | +|---|---|---| +| Interactable element list | `getInteractableBrowserElements()` | `getMobileVisibleElements()` | +| Semantic tree | `getBrowserAccessibilityTree()` | *(raw `JSONElement` only)* | +| Snapshot serialization | `serializeWebSnapshot()` | `serializeMobileSnapshot()` | +| Unified API | `getElements()` returns both | `getElements()` returns both | +| Viewport filtering | `inViewportOnly` (default true) | `inViewportOnly` (default true) | +| Role classification | Computed in-browser from tag/ARIA | `ANDROID_ROLE_MAP` / `IOS_ROLE_MAP` in snapshot.ts | +| Locator generation | CSS selectors in browser script | `getSuggestedLocators()` from locator-generation.ts | +| Context disambiguation | `∈` via `inferPurpose()` | `∈` via `mobileInferPurpose()` | +| Duplicate selector indexing | N/A (selectors are unique) | `.instance(N)` suffix | + +## Architectural concerns + +### 1. Two independent mobile pipelines + +`serializeMobileSnapshot` in `snapshot.ts` has its own copies of: + +- **Role classification** — `ANDROID_ROLE_MAP` / `IOS_ROLE_MAP` duplicate logic from `locators/constants.ts` and `locators/element-filter.ts`. +- **Interactivity detection** — `isMobileInteractive()` shadows `isInteractableElement()` from `element-filter.ts`. They use different criteria (tag-based vs attribute-based) and can disagree. +- **Locator generation** — `getBestAndroidLocator()` / `getBestIOSLocator()` are simplified fallbacks. The full pipeline (`getSuggestedLocators()`) is now wired in when source XML is available, but the fallback still exists and the two paths can produce different selectors for the same element. + +These should be collapsed: `serializeMobileSnapshot` should consume pre-computed roles, interactivity flags, and selectors from the locator pipeline, not recompute them. + +### 2. No mobile equivalent of `getBrowserAccessibilityTree()` + +The web path returns a flat `AccessibilityNode[]` with roles, names, selectors, depths, and state. The mobile path returns a raw `JSONElement` tree — the snapshot does all enrichment internally via `collectMobileNodes()` → `MobileFlatNode[]` (a private interface). There is no public function to get an enriched flat node list for mobile. + +**Proposal:** Extract `collectMobileNodes()` into a public `getMobileAccessibilityTree()` that returns `MobileFlatNode[]` (or a shared type). `serializeMobileSnapshot()` becomes a pure formatting pass — like `serializeWebSnapshot()` already is. + +### 3. Layout noise in mobile snapshots + +The Android view hierarchy includes every layout container (`FrameLayout`, `LinearLayout`, `ViewGroup`, etc.). The current noise filter (`NOISY_ROLES`) collapses anonymous containers at depth ≥ 2, but named containers and depth 0-1 scaffolding still appear. The web a11y tree doesn't have this problem because the browser's accessibility computation already skips layout-only `
`s. + +**Proposal:** A `collapseContainers` option on the snapshot (default `true`) that skips any container without an interactive descendant. Alternatively, the tree collection pass could flag "informative" vs "structural" containers and let the renderer decide. + +### 4. Selector format for mobile + +Mobile selectors are Appium/WDIO-specific strings (`~Accessibility`, `android=new UiSelector()...`, `id:com.example:id/foo`). The web path outputs CSS selectors (`a*=Highlights`, `#cart-icon-bubble`). An LLM/agent needs different selector parsing logic per platform. There's no common selector abstraction. + +**Proposal:** A `SelectorString` type with platform-aware parsing, or at minimum consistent prefix conventions documented for LLM consumption. + +### 5. The raw tree doesn't carry locators unless processed + +`getMobileVisibleElementsWithTree()` returns `{ elements, tree }` where `tree` is the raw `xmlToJSON()` output. Locators are only on `elements` (from `generateAllElementLocators()`). The snapshot reads locators by running `getSuggestedLocators()` again (or falling back). If a consumer wants to annotate the tree themselves, they must re-run the locator pipeline. + +**Proposal:** Enrich the tree in-place during `generateAllElementLocators()` — attach `_selector`, `_role`, and `_interactive` attributes to each `JSONElement` node that passes the filter. The raw tree becomes self-describing. + +## Improvement backlog + +| Priority | What | Effort | +|---|---|---| +| P0 | Merge `isMobileInteractive` + role classification into `generateAllElementLocators` — one source of truth | Medium | +| P1 | Extract `getMobileAccessibilityTree()` as a public API returning enriched flat nodes | Medium | +| P1 | Enrich `JSONElement` tree nodes with locators during `generateAllElementLocators()` | Small | +| P2 | `collapseContainers` option on `serializeMobileSnapshot` | Small | +| P2 | Unify web + mobile serialization into a single `serializeSnapshot()` function | Large | +| P3 | Document selector format conventions for LLM consumption | Small | +| P3 | Add `checked`/`selected`/`expanded` state rendering to mobile snapshot (parity with web) | Small | + +## Verified capabilities + +- [x] Web: viewport-only snapshot with semantic roles and unique CSS selectors +- [x] Web: `∈` disambiguation for duplicate selectors (6 "Add to Wishlist" buttons → each with book title context) +- [x] Web: `statictext` role capturing visible text (book titles, promo copy, cookie text) +- [x] Web: deduplication of echoed text (child text already in parent name → skipped) +- [x] Mobile: semantic role mapping (TextView→statictext, ImageView→img, Button→button, etc.) +- [x] Mobile: full-pipeline selectors via `getSuggestedLocators()` wired into snapshot +- [x] Mobile: `~` prefix for accessibility-id, `id:` for resource-id, `android=new UiSelector()...` for compound +- [x] Mobile: `.instance(N)` indexing for duplicate selectors +- [x] Mobile: explicit tap-target promotion (clickable parent carries `→`, label children provide `∈` context) +- [x] Mobile: layout noise collapse for anonymous containers +- [x] Mobile: `∈` context from actual parent, not previous list-item sibling +- [x] Unified `getElements()` API returning `{ elements, tree }` for both platforms +- [x] `inViewportOnly` default `true` across all entry points with per-function toggles