diff --git a/web_hacker/cdp/interaction_monitor.py b/web_hacker/cdp/interaction_monitor.py index e9aa8b0..1f365e1 100644 --- a/web_hacker/cdp/interaction_monitor.py +++ b/web_hacker/cdp/interaction_monitor.py @@ -13,6 +13,14 @@ from web_hacker.config import Config from web_hacker.utils.cdp_utils import write_jsonl, write_json_file +# Import UiElement and UiInteractionEvent models +from web_hacker.data_models.ui_elements import ( + UiElement, Identifier, IdentifierType, BoundingBox +) +from web_hacker.data_models.ui_interactions import ( + UiInteractionEvent, InteractionType, Interaction +) + logging.basicConfig(level=Config.LOG_LEVEL, format=Config.LOG_FORMAT, datefmt=Config.LOG_DATE_FORMAT) logger = logging.getLogger(__name__) @@ -92,58 +100,143 @@ def _inject_interaction_listeners(self, cdp_session): check(); }} - // Helper function to get element details + // Helper function to get element details (UiElement format) function getElementDetails(element) {{ if (!element) return null; + // Collect all attributes + const attributes = {{}}; + if (element.attributes) {{ + for (let i = 0; i < element.attributes.length; i++) {{ + const attr = element.attributes[i]; + attributes[attr.name] = attr.value; + }} + }} + + // Parse class names into array + const classNames = element.className && typeof element.className === 'string' + ? element.className.split(/\\s+/).filter(c => c) + : []; + const details = {{ - tagName: element.tagName || '', - id: element.id || '', - className: element.className || '', - name: element.name || '', - type: element.type || '', - value: element.value || '', - text: element.textContent ? element.textContent.substring(0, 200) : '', - href: element.href || '', - src: element.src || '', - role: element.getAttribute('role') || '', - ariaLabel: element.getAttribute('aria-label') || '', - title: element.title || '', - placeholder: element.placeholder || '', + tag_name: (element.tagName || '').toLowerCase(), + id: element.id || null, + name: element.name || null, + class_names: classNames.length > 0 ? classNames : null, + type_attr: element.type || null, + role: element.getAttribute('role') || null, + aria_label: element.getAttribute('aria-label') || null, + placeholder: element.placeholder || null, + title: element.title || null, + href: element.href || null, + src: element.src || null, + value: element.value || null, + text: element.textContent ? element.textContent.trim().substring(0, 200) : null, + attributes: Object.keys(attributes).length > 0 ? attributes : null, }}; - // Get XPath-like path + // Improved selector generation function getElementPath(el) {{ if (!el || el.nodeType !== 1) return ''; const path = []; - while (el && el.nodeType === 1) {{ - let selector = el.tagName.toLowerCase(); - if (el.id) {{ - selector += '#' + el.id; - }} else if (el.className) {{ - const classes = el.className.split(' ').filter(c => c).slice(0, 3).join('.'); - if (classes) selector += '.' + classes; + let current = el; + + while (current && current.nodeType === 1) {{ + let selector = current.tagName.toLowerCase(); + + // 1. ID is gold standard + if (current.id) {{ + selector += '#' + current.id; + path.unshift(selector); + break; // ID is usually unique enough + }} + + // 2. Stable attributes + const stableAttrs = ['name', 'data-testid', 'data-test-id', 'data-cy', 'role', 'placeholder', 'aria-label', 'title']; + let foundStable = false; + for (const attr of stableAttrs) {{ + const val = current.getAttribute(attr); + if (val) {{ + selector += `[${{attr}}="${{val.replace(/"/g, '\\"')}}"]`; + foundStable = true; + break; + }} + }} + + // 3. Classes (careful filtering) + if (!foundStable && current.className && typeof current.className === 'string') {{ + // Filter out likely generated classes + const classes = current.className.split(/\\s+/) + .filter(c => c) + .filter(c => !c.startsWith('sc-')) // Styled Components + .filter(c => !c.match(/^[a-zA-Z0-9]{{10,}}$/)) // Long random strings + .filter(c => !c.match(/css-/)); // Emotion/CSS-in-JS + + if (classes.length > 0) {{ + selector += '.' + classes.join('.'); + }} }} + + // 4. Nth-child fallback if no unique traits + if (!foundStable && !current.id) {{ + let sibling = current; + let index = 1; + while (sibling = sibling.previousElementSibling) {{ + if (sibling.tagName === current.tagName) index++; + }} + if (index > 1) selector += `:nth-of-type(${{index}})`; + }} + path.unshift(selector); - el = el.parentElement; + current = current.parentElement; if (path.length > 5) break; // Limit depth }} return path.join(' > '); }} - details.path = getElementPath(element); + details.css_path = getElementPath(element); + + // Get XPath (Full structural path like /html/body/div[1]/input[1]) + function getXPath(el) {{ + if (!el || el.nodeType !== 1) return ''; + + const parts = []; + while (el && el.nodeType === 1) {{ + let part = el.tagName.toLowerCase(); + + // Count all previous siblings with the same tag name (1-based indexing) + let index = 1; + let sibling = el.previousElementSibling; + while (sibling) {{ + if (sibling.nodeType === 1 && sibling.tagName === el.tagName) {{ + index++; + }} + sibling = sibling.previousElementSibling; + }} + + // Always include index (XPath is 1-based) + part += `[${{index}}]`; + parts.unshift(part); + + el = el.parentElement; + }} + return '/' + parts.join('/'); + }} + + details.xpath = getXPath(element); + details.url = window.location.href; // Get bounding box try {{ const rect = element.getBoundingClientRect(); - details.boundingBox = {{ - x: Math.round(rect.x), - y: Math.round(rect.y), - width: Math.round(rect.width), - height: Math.round(rect.height) + details.bounding_box = {{ + x: rect.x, + y: rect.y, + width: rect.width, + height: rect.height }}; }} catch (e) {{ - details.boundingBox = null; + details.bounding_box = null; }} return details; @@ -156,20 +249,19 @@ def _inject_interaction_listeners(self, cdp_session): type: type, timestamp: Date.now(), event: {{ - type: event.type, - button: event.button !== undefined ? event.button : null, - key: event.key || null, - code: event.code || null, - keyCode: event.keyCode || null, - which: event.which || null, - ctrlKey: event.ctrlKey || false, - shiftKey: event.shiftKey || false, - altKey: event.altKey || false, - metaKey: event.metaKey || false, - clientX: event.clientX || null, - clientY: event.clientY || null, - pageX: event.pageX || null, - pageY: event.pageY || null, + mouse_button: event.button !== undefined ? event.button : null, + key_value: event.key || null, + key_code: event.code || null, + key_code_deprecated: event.keyCode || null, + key_which_deprecated: event.which || null, + ctrl_pressed: event.ctrlKey || false, + shift_pressed: event.shiftKey || false, + alt_pressed: event.altKey || false, + meta_pressed: event.metaKey || false, + mouse_x_viewport: event.clientX || null, + mouse_y_viewport: event.clientY || null, + mouse_x_page: event.pageX || null, + mouse_y_page: event.pageY || null, }}, element: details, url: window.location.href @@ -189,40 +281,40 @@ def _inject_interaction_listeners(self, cdp_session): waitForBinding(function() {{ // Mouse event listeners document.addEventListener('click', function(event) {{ - logInteraction('mouse_click', event, event.target); + logInteraction('click', event, event.target); }}, true); document.addEventListener('mousedown', function(event) {{ - logInteraction('mouse_down', event, event.target); + logInteraction('mousedown', event, event.target); }}, true); document.addEventListener('mouseup', function(event) {{ - logInteraction('mouse_up', event, event.target); + logInteraction('mouseup', event, event.target); }}, true); document.addEventListener('dblclick', function(event) {{ - logInteraction('mouse_double_click', event, event.target); + logInteraction('dblclick', event, event.target); }}, true); document.addEventListener('contextmenu', function(event) {{ - logInteraction('mouse_context_menu', event, event.target); + logInteraction('contextmenu', event, event.target); }}, true); document.addEventListener('mouseover', function(event) {{ - logInteraction('mouse_over', event, event.target); + logInteraction('mouseover', event, event.target); }}, true); // Keyboard event listeners document.addEventListener('keydown', function(event) {{ - logInteraction('key_down', event, event.target); + logInteraction('keydown', event, event.target); }}, true); document.addEventListener('keyup', function(event) {{ - logInteraction('key_up', event, event.target); + logInteraction('keyup', event, event.target); }}, true); document.addEventListener('keypress', function(event) {{ - logInteraction('key_press', event, event.target); + logInteraction('keypress', event, event.target); }}, true); // Input events (for form fields) @@ -310,15 +402,105 @@ def _on_binding_called(self, msg, cdp_session): return False # Parse the interaction data from JavaScript - interaction_data = json.loads(payload) + raw_data = json.loads(payload) - # Add server-side timestamp - interaction_data["server_timestamp"] = time.time() + try: + # Convert element details to UiElement format + element_data = raw_data.get("element") + ui_element = None + + if element_data: + # Convert bounding_box if present + bounding_box = None + if element_data.get("bounding_box"): + bb_data = element_data["bounding_box"] + bounding_box = BoundingBox( + x=bb_data.get("x", 0), + y=bb_data.get("y", 0), + width=bb_data.get("width", 0), + height=bb_data.get("height", 0) + ) + + # Create UiElement + ui_element = UiElement( + tag_name=element_data.get("tag_name", ""), + id=element_data.get("id"), + name=element_data.get("name"), + class_names=element_data.get("class_names"), + type_attr=element_data.get("type_attr"), + role=element_data.get("role"), + aria_label=element_data.get("aria_label"), + placeholder=element_data.get("placeholder"), + title=element_data.get("title"), + href=element_data.get("href"), + src=element_data.get("src"), + value=element_data.get("value"), + text=element_data.get("text"), + attributes=element_data.get("attributes"), + bounding_box=bounding_box, + css_path=element_data.get("css_path"), + xpath=element_data.get("xpath"), + url=element_data.get("url") or raw_data.get("url"), + ) + + # Build default Identifiers + ui_element.build_default_Identifiers() + + # Convert event data to Interaction format + interaction_details = None + event_raw = raw_data.get("event") + if event_raw: + interaction_details = Interaction( + mouse_button=event_raw.get("mouse_button"), + key_value=event_raw.get("key_value"), + key_code=event_raw.get("key_code"), + key_code_deprecated=event_raw.get("key_code_deprecated"), + key_which_deprecated=event_raw.get("key_which_deprecated"), + ctrl_pressed=event_raw.get("ctrl_pressed", False), + shift_pressed=event_raw.get("shift_pressed", False), + alt_pressed=event_raw.get("alt_pressed", False), + meta_pressed=event_raw.get("meta_pressed", False), + mouse_x_viewport=event_raw.get("mouse_x_viewport"), + mouse_y_viewport=event_raw.get("mouse_y_viewport"), + mouse_x_page=event_raw.get("mouse_x_page"), + mouse_y_page=event_raw.get("mouse_y_page"), + ) + + # Get interaction type (convert string to enum) + interaction_type_str = raw_data.get("type", "unknown") + try: + interaction_type = InteractionType(interaction_type_str) + except ValueError: + # If type doesn't match enum, log warning and skip + logger.warning("Unknown interaction type: %s, skipping", interaction_type_str) + return False + + # Create UiInteractionEvent + if ui_element is None: + logger.warning("Missing element data for interaction, skipping") + return False + + ui_interaction_event = UiInteractionEvent( + type=interaction_type, + timestamp=raw_data.get("timestamp", 0), + interaction=interaction_details, + element=ui_element, + url=raw_data.get("url", ""), + ) + + # Convert to dict for logging + interaction_data = ui_interaction_event.model_dump() + + except Exception as e: + logger.info("Failed to convert to UiInteractionEvent format: %s", e) + # Fallback to original format if conversion fails + interaction_data = raw_data # Update statistics self.interaction_count += 1 - interaction_type = interaction_data.get("type", "unknown") - self.interaction_types[interaction_type] += 1 + # Extract interaction type (model_dump() serializes enum to string) + interaction_type_str = interaction_data.get("type", "unknown") + self.interaction_types[interaction_type_str] += 1 url = interaction_data.get("url", "unknown") self.interactions_by_url[url] += 1 diff --git a/web_hacker/data_models/ui_elements.py b/web_hacker/data_models/ui_elements.py new file mode 100644 index 0000000..c8fa256 --- /dev/null +++ b/web_hacker/data_models/ui_elements.py @@ -0,0 +1,245 @@ + +""" +web_hacker/data_models/ui_elements.py + +UI element data models for robust element identification and replay. +""" + +from enum import StrEnum +from typing import Dict, List +from pydantic import BaseModel, Field +import logging + +logger = logging.getLogger(__name__) + + +class IdentifierType(StrEnum): + CSS = "css" + XPATH = "xpath" + TEXT = "text" # e.g. "button with label X" + ROLE = "role" # e.g. role+name/aria-label + NAME = "name" # input[name="..."] + ID = "id" # #id + + +# Default priority mapping for selector types (lower = higher priority) +DEFAULT_IDENTIFIER_PRIORITIES: Dict[IdentifierType, int] = { + IdentifierType.ID: 10, # Highest priority - IDs are unique + IdentifierType.NAME: 20, # Form controls by name are very stable + IdentifierType.CSS: 30, # CSS Identifiers (with stable attributes) + IdentifierType.ROLE: 40, # ARIA roles + labels + IdentifierType.TEXT: 50, # Text-based matching + IdentifierType.XPATH: 80, # XPath (often brittle, last resort) +} + + +class Identifier(BaseModel): + """ + A single way to locate an element. + `value` is the raw string (CSS, XPath, etc.) + `type` tells the executor how to interpret it. + `priority` controls which selector to try first (lower = higher priority). + If not specified, uses the default priority for the selector type. + """ + type: IdentifierType + value: str + priority: int | None = Field( + default=None, + description="Priority for this selector (lower = higher priority). If None, uses default for selector type.", + ) + + description: str | None = Field( + default=None, + description="Human readable note (e.g. 'primary stable selector').", + ) + + def get_priority(self) -> int: + """Get the effective priority, using default if not set.""" + if self.priority is not None: + return self.priority + return DEFAULT_IDENTIFIER_PRIORITIES.get(self.type, 100) + + +class BoundingBox(BaseModel): + x: float + y: float + width: float + height: float + + +class UiElement(BaseModel): + """ + Unified description of a UI element sufficient for robust replay. + + - Raw DOM data (tag, attributes, text) + - Multiple Identifiers (CSS, XPath, text-based, etc.) + - Context (URL, frame) + """ + # Context + url: str | None = Field( + default=None, + description="Page URL where this element was observed.", + ) + + # Core DOM identity + tag_name: str + id: str | None = None + name: str | None = None + class_names: List[str] | None = Field(default=None, description="List of CSS class names.") + + # Common attributes + type_attr: str | None = Field(default=None, description="Input type, button type, etc.") + role: str | None = None + aria_label: str | None = None + placeholder: str | None = None + title: str | None = None + href: str | None = None + src: str | None = None + value: str | None = None + + # Full attribute map for anything else (data-*, etc.) + attributes: Dict[str, str] | None = Field( + default=None, + description="All raw attributes from the DOM element.", + ) + + # Content + text: str | None = Field( + default=None, + description="Trimmed inner text (useful for text-based Identifiers).", + ) + + # Geometry + bounding_box: BoundingBox | None = None + + # Locators (multiple ways to find it again) + Identifiers: List[Identifier] | None = Field( + default=None, + description="Ordered list of Identifiers to try when locating this element.", + ) + + # Convenience accessors for most common Identifiers + css_path: str | None = None # from getElementPath + xpath: str | None = None # full xpath + + def build_default_Identifiers(self) -> None: + """ + Populate `Identifiers` from known fields if empty. + Call this once after constructing from raw DOM. + """ + if self.Identifiers is None: + self.Identifiers = [] + elif self.Identifiers: + return + + # Ensure attributes is a dict for easier access + if self.attributes is None: + self.attributes = {} + + # Ensure class_names is a list + if self.class_names is None: + self.class_names = [] + + # Highest priority: ID (uses default priority from DEFAULT_IDENTIFIER_PRIORITIES) + if self.id: + self.Identifiers.append( + Identifier( + type=IdentifierType.ID, + value=self.id, + priority=DEFAULT_IDENTIFIER_PRIORITIES[IdentifierType.ID], + description="Locate by DOM id", + ) + ) + + # Name attribute - if it exists, use it + if self.name: + self.Identifiers.append( + Identifier( + type=IdentifierType.NAME, + value=self.name, + priority=DEFAULT_IDENTIFIER_PRIORITIES[IdentifierType.NAME], + description="Locate by name attribute", + ) + ) + + # Placeholder attribute - if it exists, use it + if self.placeholder: + self.Identifiers.append( + Identifier( + type=IdentifierType.CSS, + value=f'{self.tag_name.lower()}[placeholder="{self.placeholder}"]', + priority=DEFAULT_IDENTIFIER_PRIORITIES[IdentifierType.CSS], + description="Locate by placeholder", + ) + ) + + # Role - if it exists, use it + if self.role: + self.Identifiers.append( + Identifier( + type=IdentifierType.ROLE, + value=self.role, + priority=DEFAULT_IDENTIFIER_PRIORITIES[IdentifierType.ROLE], + description=f"Locate by role={self.role}", + ) + ) + + # Text - if it exists, use it + if self.text: + snippet = self.text.strip() + if snippet: + self.Identifiers.append( + Identifier( + type=IdentifierType.TEXT, + value=snippet, + priority=DEFAULT_IDENTIFIER_PRIORITIES[IdentifierType.TEXT], + description="Locate by text content", + ) + ) + + # Direct CSS and XPath if we have them + if self.css_path: + self.Identifiers.append( + Identifier( + type=IdentifierType.CSS, + value=self.css_path, + priority=DEFAULT_IDENTIFIER_PRIORITIES[IdentifierType.CSS], + description="Recorded CSS path", + ) + ) + if self.xpath: + self.Identifiers.append( + Identifier( + type=IdentifierType.XPATH, + value=self.xpath, + priority=DEFAULT_IDENTIFIER_PRIORITIES[IdentifierType.XPATH], + description="Full XPath (last resort)", + ) + ) + + # Fallback: first stable-looking class + if not self.Identifiers and self.class_names: + + # Filter out classes that are likely to be unstable + stable_classes = [ + c for c in self.class_names + if not c.startswith("sc-") + and not c.startswith("css-") + and (not c.isalnum() or len(c) < 10) + ] + + # If there are stable classes, use the first one + if stable_classes: + cls = stable_classes[0] + self.Identifiers.append( + Identifier( + type=IdentifierType.CSS, + value=f".{cls}", + priority=DEFAULT_IDENTIFIER_PRIORITIES[IdentifierType.CSS], + description="Fallback by single stable-looking class", + ) + ) + + if not self.Identifiers: + logger.warning("No Identifiers found for element %s", self.model_dump_json()) + \ No newline at end of file diff --git a/web_hacker/data_models/ui_interactions.py b/web_hacker/data_models/ui_interactions.py new file mode 100644 index 0000000..d98d015 --- /dev/null +++ b/web_hacker/data_models/ui_interactions.py @@ -0,0 +1,138 @@ +""" +web_hacker/data_models/ui_interactions.py + +UI interaction data models for tracking user interactions with web elements. +""" + +from enum import StrEnum +from typing import Optional +from pydantic import BaseModel, Field + +from web_hacker.data_models.ui_elements import UiElement + + +class InteractionType(StrEnum): + """Types of UI interactions that match real DOM event names.""" + + # Mouse events + CLICK = "click" + MOUSEDOWN = "mousedown" + MOUSEUP = "mouseup" + DBLCLICK = "dblclick" + CONTEXTMENU = "contextmenu" + MOUSEOVER = "mouseover" + + # Keyboard events + KEYDOWN = "keydown" + KEYUP = "keyup" + KEYPRESS = "keypress" # Deprecated but still emitted by browsers + + # Form events + INPUT = "input" + CHANGE = "change" + + # Focus events + FOCUS = "focus" + BLUR = "blur" + + +class Interaction(BaseModel): + """ + Details about how an interaction occurred. + + Contains browser event properties like mouse coordinates, keyboard keys, + and modifier keys. These details provide the "how" of an interaction, + while InteractionType provides the "what". + """ + # Mouse properties + mouse_button: Optional[int] = Field( + default=None, + description="Mouse button pressed (0=left, 1=middle, 2=right). None for non-mouse interactions." + ) + mouse_x_viewport: Optional[int] = Field( + default=None, + description="X coordinate relative to viewport. None for non-mouse interactions." + ) + mouse_y_viewport: Optional[int] = Field( + default=None, + description="Y coordinate relative to viewport. None for non-mouse interactions." + ) + mouse_x_page: Optional[int] = Field( + default=None, + description="X coordinate relative to page (includes scroll). None for non-mouse interactions." + ) + mouse_y_page: Optional[int] = Field( + default=None, + description="Y coordinate relative to page (includes scroll). None for non-mouse interactions." + ) + + # Keyboard properties + key_value: Optional[str] = Field( + default=None, + description="The key value pressed (e.g., 'a', 'Enter', 'Shift'). None for non-keyboard interactions." + ) + key_code: Optional[str] = Field( + default=None, + description="The physical key code (e.g., 'KeyA', 'Enter', 'ShiftLeft'). None for non-keyboard interactions." + ) + key_code_deprecated: Optional[int] = Field( + default=None, + description="Deprecated numeric key code. None for non-keyboard interactions." + ) + key_which_deprecated: Optional[int] = Field( + default=None, + description="Deprecated numeric key code. None for non-keyboard interactions." + ) + + # Modifier keys (apply to both mouse and keyboard interactions) + ctrl_pressed: bool = Field( + default=False, + description="Whether the Ctrl key was pressed during the interaction." + ) + shift_pressed: bool = Field( + default=False, + description="Whether the Shift key was pressed during the interaction." + ) + alt_pressed: bool = Field( + default=False, + description="Whether the Alt key was pressed during the interaction." + ) + meta_pressed: bool = Field( + default=False, + description="Whether the Meta/Cmd key was pressed during the interaction." + ) + + +class UiInteractionEvent(BaseModel): + """ + Complete UI interaction event record. + + Represents a single user interaction with a web element, including: + - What type of interaction occurred + - When it occurred (timestamp) + - What element was interacted with (UiElement) + - How it occurred (Interaction) - mouse position, keys pressed, modifiers, etc. + - Page context (URL) + """ + # Interaction type + type: InteractionType + + # Timestamp + timestamp: int = Field( + description="Client-side timestamp (milliseconds since epoch) when the interaction occurred." + ) + + # How the interaction occurred (mouse coordinates, keyboard keys, modifiers, etc.) + interaction: Interaction | None = Field( + default=None, + description="Details about how the interaction occurred (mouse position, keys pressed, modifiers, etc.)." + ) + + # Element that was interacted with + element: UiElement + + # Page context + url: str = Field( + description="URL of the page where the interaction occurred." + ) +