diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 3980b5f..948f83a 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -27,7 +27,7 @@ jobs: - python steps: - - uses: step-security/harden-runner@f4a75cfd619ee5ce8d5b864b0d183aff3c69b55a + - uses: step-security/harden-runner@95d9a5deda9de15063e7595e9719c11c38c90ae2 with: disable-sudo: true egress-policy: block @@ -36,15 +36,16 @@ jobs: github.com:443 objects.githubusercontent.com:443 uploads.github.com:443 + release-assets.githubusercontent.com - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 - - uses: github/codeql-action/init@0499de31b99561a6d14a36a5f662c2a54f91beee + - uses: github/codeql-action/init@e12f0178983d466f2f6028f5cc7a6d786fd97f4b with: languages: ${{ matrix.language }} - - uses: github/codeql-action/autobuild@0499de31b99561a6d14a36a5f662c2a54f91beee + - uses: github/codeql-action/autobuild@e12f0178983d466f2f6028f5cc7a6d786fd97f4b - - uses: github/codeql-action/analyze@0499de31b99561a6d14a36a5f662c2a54f91beee + - uses: github/codeql-action/analyze@e12f0178983d466f2f6028f5cc7a6d786fd97f4b with: category: /language:${{matrix.language}} diff --git a/requestium/requestium_mixin.py b/requestium/requestium_mixin.py index c627496..880eb70 100644 --- a/requestium/requestium_mixin.py +++ b/requestium/requestium_mixin.py @@ -8,7 +8,7 @@ import tldextract from parsel.selector import Selector, SelectorList from selenium.common.exceptions import WebDriverException -from selenium.webdriver.common.by import By +from selenium.webdriver.common.by import By, ByType from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver from selenium.webdriver.support import expected_conditions from selenium.webdriver.support.ui import WebDriverWait @@ -130,7 +130,7 @@ def ensure_element_by_class_name(self, selector: str, state: str | None = "prese def ensure_element_by_css_selector(self, selector: str, state: str | None = "present", timeout: float | None = None) -> WebElement | None: return self.ensure_element(By.CSS_SELECTOR, selector, state, timeout) - def ensure_element(self, locator: str, selector: str, state: str | None = "present", timeout: float | None = None) -> WebElement | None: + def ensure_element(self, locator: ByType | str, selector: str, state: str | None = "present", timeout: float | None = None) -> WebElement | None: """ Wait until an element appears or disappears in the browser. diff --git a/tests/conftest.py b/tests/conftest.py index a71a00f..b8c455a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,49 +1,100 @@ -# import os -# import shutil +import contextlib +from collections.abc import Generator from typing import TYPE_CHECKING, cast import pytest +from _pytest.fixtures import FixtureRequest from selenium import webdriver +from selenium.common import WebDriverException +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC import requestium if TYPE_CHECKING: from requestium.requestium_mixin import DriverMixin +# ruff: noqa FBT003 + + +@pytest.fixture(scope="module") +def example_html() -> str: + return """ + + The Internet + +

Test Header 1

+

Test Header 2

+

Test Header 3

+

Test Paragraph 1

+ +

Test Link 1

+

Test Link 2

+ + + """ + + +def _create_chrome_driver(headless: bool) -> webdriver.Chrome: + options = webdriver.ChromeOptions() + options.add_argument("--no-sandbox") + options.add_argument("--disable-dev-shm-usage") + if headless: + options.add_argument("--headless=new") + driver = webdriver.Chrome(options=options) + WebDriverWait(driver, 5).until(EC.number_of_windows_to_be(1)) + return driver + + +def _create_firefox_driver(headless: bool) -> webdriver.Firefox: + options = webdriver.FirefoxOptions() + options.set_preference("browser.cache.disk.enable", False) + options.set_preference("browser.cache.memory.enable", False) + options.set_preference("browser.cache.offline.enable", False) + options.set_preference("network.http.use-cache", False) + if headless: + options.add_argument("--headless") + driver = webdriver.Firefox(options=options) + WebDriverWait(driver, 5).until(EC.number_of_windows_to_be(1)) + return driver + @pytest.fixture( - params=[ - "chrome-headless", - "chrome", - # "chrome-custom-path", - "firefox-headless", - "firefox", - ] + params=["chrome-headless", "chrome", "firefox-headless", "firefox"], + scope="module", ) -def session(request): # noqa: ANN001, ANN201 +def session(request: FixtureRequest) -> Generator[requestium.Session, None, None]: driver_type = request.param + browser, _, mode = driver_type.partition("-") + headless = mode == "headless" - if driver_type == "chrome-headless": - options = webdriver.ChromeOptions() - options.add_argument("--headless=new") - driver = webdriver.Chrome(options=options) - elif driver_type == "chrome": - driver = webdriver.Chrome() - # elif driver_type == "chrome-custom-path": - # chromedriver_name = "chromedriver" - # custom_path = shutil.which(chromedriver_name) - # assert custom_path, f"'{chromedriver_name}' not found in PATH." - # assert os.path.exists(custom_path), f"Custom chromedriver not found at {custom_path}." - # driver = webdriver.Chrome(service=webdriver.ChromeService(executable_path=custom_path)) - elif driver_type == "firefox-headless": - options = webdriver.FirefoxOptions() - options.add_argument("--headless") - driver = webdriver.Firefox(options=options) - elif driver_type == "firefox": - driver = webdriver.Firefox() + driver: webdriver.Chrome | webdriver.Firefox + if browser == "chrome": + driver = _create_chrome_driver(headless) + elif browser == "firefox": + driver = _create_firefox_driver(headless) else: - msg = f"Unknown driver type: {driver_type}" + msg = f"Unknown driver type: {browser}" raise ValueError(msg) - with requestium.Session(driver=cast("DriverMixin", driver)) as session: - yield session + session = requestium.Session(driver=cast("DriverMixin", driver)) + yield session + + with contextlib.suppress(WebDriverException, OSError): + driver.quit() + + +@pytest.fixture(autouse=True) +def ensure_valid_session(session: requestium.Session) -> None: + """Skip test if browser context is discarded.""" + try: + _ = session.driver.current_url + _ = session.driver.window_handles + except WebDriverException as e: + if "Browsing context has been discarded" not in str(e): + raise + + try: + session.driver.switch_to.new_window("tab") + except WebDriverException: + pytest.skip("Browser context discarded and cannot be recovered") diff --git a/tests/resources/test_extension.crx b/tests/resources/test_extension.crx new file mode 100644 index 0000000..feadf74 Binary files /dev/null and b/tests/resources/test_extension.crx differ diff --git a/tests/test__start_chrome_driver.py b/tests/test__start_chrome_driver.py deleted file mode 100644 index 21cc7a0..0000000 --- a/tests/test__start_chrome_driver.py +++ /dev/null @@ -1,25 +0,0 @@ -import time - -import pytest - -import requestium.requestium - - -def test__start_chrome_driver(session: requestium.Session) -> None: - session._start_chrome_browser() - session.driver.get("http://the-internet.herokuapp.com") - time.sleep(1) - title = session.driver.title - assert title == "The Internet" - - -def test__start_chrome_driver_options_typeerror() -> None: - invalid_webdriver_options = {"arguments": "invalid_string"} - with ( - requestium.Session(webdriver_options=invalid_webdriver_options) as session, - pytest.raises( - TypeError, - match=f"'arguments' option must be a list, but got {type(invalid_webdriver_options['arguments']).__name__}", - ), - ): - session._start_chrome_browser() diff --git a/tests/test_cookies.py b/tests/test_cookies.py index 3e6271a..fc0c735 100644 --- a/tests/test_cookies.py +++ b/tests/test_cookies.py @@ -1,22 +1,95 @@ import pytest +from _pytest.fixtures import FixtureRequest from selenium.common import InvalidCookieDomainException import requestium.requestium -def test_transfer_session_cookies_to_driver(session: requestium.Session) -> None: - assert session.cookies.keys() == [] - response = session.get("http://google.com/") - assert response.cookies.keys().sort() == ["AEC", "NID"].sort() - session.transfer_session_cookies_to_driver() - assert session.cookies.keys() == ["AEC", "NID"] +@pytest.fixture( + params=[ + {"name": "session_id", "value": "abc123", "domain": "example.com", "path": "/"}, + {"name": "user_token", "value": "xyz789", "domain": "example.com", "path": "/"}, + ], + ids=["session_id", "user_token"], + scope="module", +) +def cookie_data(request: FixtureRequest) -> dict[str, str]: + return request.param + + +@pytest.fixture +def clean_session(session: requestium.Session) -> requestium.Session: + """Ensure cookies are cleared before each test.""" + session.cookies.clear() + session.driver.delete_all_cookies() + return session + + +def assert_first_cookie_matches(driver_cookies: list[dict], expected: dict[str, str]) -> None: + """Verify the first cookie in a list matches expected values.""" + assert len(driver_cookies) == 1 + + cookie = driver_cookies[0] + assert cookie["name"] == expected["name"] + assert cookie["value"] == expected["value"] + assert cookie["domain"] in {expected["domain"], f".{expected['domain']}"} + assert cookie["path"] == expected["path"] + + +def test_ensure_add_cookie(clean_session: requestium.Session, cookie_data: dict[str, str]) -> None: + clean_session.driver.get("https://google.com") + clean_session.driver.delete_all_cookies() + clean_session.driver.ensure_add_cookie(cookie_data) + + assert_first_cookie_matches(clean_session.driver.get_cookies(), cookie_data) + + +def test_ensure_add_cookie_domain_override(clean_session: requestium.Session, cookie_data: dict[str, str]) -> None: + override_domain = "example.net" + + clean_session.driver.get("https://google.com") + clean_session.driver.delete_all_cookies() + clean_session.driver.ensure_add_cookie(cookie_data, override_domain=override_domain) + + expected = {**cookie_data, "domain": override_domain} + assert_first_cookie_matches(clean_session.driver.get_cookies(), expected) + + +def test_transfer_driver_cookies_to_session(clean_session: requestium.Session, cookie_data: dict[str, str]) -> None: + clean_session.driver.get(f"https://{cookie_data['domain']}") + clean_session.driver.add_cookie(cookie_data) + + assert not clean_session.cookies.keys() + clean_session.transfer_driver_cookies_to_session() + assert clean_session.cookies.keys() == [cookie_data["name"]] + + +def test_transfer_session_cookies_to_driver(clean_session: requestium.Session, cookie_data: dict[str, str]) -> None: + clean_session.get(f"http://{cookie_data['domain']}") + clean_session.cookies.set(name=cookie_data["name"], value=cookie_data["value"], domain=cookie_data["domain"], path=cookie_data["path"]) + + assert not clean_session.driver.get_cookies() + clean_session.transfer_session_cookies_to_driver() + assert_first_cookie_matches(clean_session.driver.get_cookies(), cookie_data) + + +def test_transfer_session_cookies_to_driver_domain_filter(clean_session: requestium.Session, cookie_data: dict[str, str]) -> None: + clean_session.get(f"http://{cookie_data['domain']}") + clean_session.cookies.set(name="junk_cookie", value="sfkjn782", domain="google.com", path=cookie_data["path"]) + clean_session.cookies.set(name=cookie_data["name"], value=cookie_data["value"], domain=cookie_data["domain"], path=cookie_data["path"]) + + assert not clean_session.driver.get_cookies() + clean_session.transfer_session_cookies_to_driver(domain=cookie_data["domain"]) + assert_first_cookie_matches(clean_session.driver.get_cookies(), cookie_data) def test_transfer_session_cookies_to_driver_no_domain_error(session: requestium.Session) -> None: - with ( - pytest.raises( - InvalidCookieDomainException, - match="Trying to transfer cookies to selenium without specifying a domain and without having visited any page in the current session", - ), + session.cookies.clear() + session.driver.delete_all_cookies() + session._last_requests_url = None + + with pytest.raises( + InvalidCookieDomainException, + match="Trying to transfer cookies to selenium without specifying a domain and without having visited any page in the current session", ): session.transfer_session_cookies_to_driver() diff --git a/tests/test_ensure_elements_deprecation.py b/tests/test_ensure_elements_deprecation.py deleted file mode 100644 index 431277f..0000000 --- a/tests/test_ensure_elements_deprecation.py +++ /dev/null @@ -1,9 +0,0 @@ -import pytest - -import requestium.requestium - - -def test_deprecation_warning_for_ensure_element_locators_with_underscores(session: requestium.Session) -> None: - session.driver.get("http://the-internet.herokuapp.com") - with pytest.warns(DeprecationWarning): - session.driver.ensure_element("class_name", "no-js") diff --git a/tests/test_mixin.py b/tests/test_mixin.py new file mode 100644 index 0000000..b23f262 --- /dev/null +++ b/tests/test_mixin.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +import pytest +from selenium.webdriver.common.by import By, ByType +from selenium.webdriver.remote.webelement import WebElement + +import requestium.requestium + + +def assert_webelement_text_exact_match(element: WebElement | None, expected: str) -> None: + """Verify the provided element is a WebElement with matching text.""" + assert isinstance(element, WebElement) + assert element.text == expected + + +def test_ensure_element_by_id(session: requestium.Session, example_html: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + + element = session.driver.ensure_element_by_id("test-header") + assert_webelement_text_exact_match(element, "Test Header 2") + + +def test_ensure_element_by_name(session: requestium.Session, example_html: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + + element = session.driver.ensure_element_by_name("link-paragraph") + assert_webelement_text_exact_match(element, "Test Link 1") + + +def test_ensure_element_by_xpath(session: requestium.Session, example_html: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + + element = session.driver.ensure_element_by_xpath("//a[text()='Test Link 2']") + assert_webelement_text_exact_match(element, "Test Link 2") + + +def test_ensure_element_by_link_text(session: requestium.Session, example_html: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + + element = session.driver.ensure_element_by_link_text("Test Link 1") + assert_webelement_text_exact_match(element, "Test Link 1") + + +def test_ensure_element_by_partial_link_text(session: requestium.Session, example_html: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + + element = session.driver.ensure_element_by_partial_link_text("Link 2") + assert_webelement_text_exact_match(element, "Test Link 2") + + +def test_ensure_element_by_tag_name(session: requestium.Session, example_html: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + + element = session.driver.ensure_element_by_tag_name("h1") + assert_webelement_text_exact_match(element, "Test Header 1") + + +def test_ensure_element_by_class_name(session: requestium.Session, example_html: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + + element = session.driver.ensure_element_by_class_name("body-text") + assert_webelement_text_exact_match(element, "Test Paragraph 1") + + +def test_ensure_element_by_css_selector(session: requestium.Session, example_html: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + + element = session.driver.ensure_element_by_css_selector(".body-text") + assert_webelement_text_exact_match(element, "Test Paragraph 1") + + +@pytest.mark.parametrize( + ("locator", "selector", "result"), + [ + (By.ID, "test-header", "Test Header 2"), + (By.NAME, "link-paragraph", "Test Link 1"), + (By.XPATH, "//a[text()='Test Link 2']", "Test Link 2"), + (By.LINK_TEXT, "Test Link 1", "Test Link 1"), + (By.PARTIAL_LINK_TEXT, "Link 2", "Test Link 2"), + (By.TAG_NAME, "h1", "Test Header 1"), + (By.CLASS_NAME, "body-text", "Test Paragraph 1"), + (By.CSS_SELECTOR, ".body-text", "Test Paragraph 1"), + (By.CSS_SELECTOR, "#test-header", "Test Header 2"), + ], + ids=["id", "name", "xpath", "link_text", "partial_link_text", "tag_name", "class_name", "css_selector_class", "css_selector_id"], +) +def test_ensure_element(session: requestium.Session, example_html: str, locator: ByType, selector: str, result: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + + element = session.driver.ensure_element(locator=locator, selector=selector) + assert_webelement_text_exact_match(element, result) + + element = session.driver.ensure_element(locator, selector) + assert_webelement_text_exact_match(element, result) + + +def test_deprecation_warning_for_ensure_element_locators_with_underscores(session: requestium.Session, example_html: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + with pytest.warns(DeprecationWarning, match="Support for locator strategy names with underscores is deprecated"): + session.driver.ensure_element(locator="tag_name", selector="h1") + with pytest.warns(DeprecationWarning, match="Support for locator strategy names with underscores is deprecated"): + session.driver.ensure_element("tag_name", "h1") + + +def test_simple_page_load(session: requestium.Session, example_html: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + + session.driver.ensure_element_by_tag_name("h1") # wait for page load + title = session.driver.title + assert title == "The Internet" + + +def test_ensure_click(session: requestium.Session, example_html: str) -> None: + session.driver.get(f"data:text/html,{example_html}") + + element = session.driver.ensure_element_by_tag_name("button") + assert isinstance(element, WebElement) + requestium.requestium._ensure_click(element) diff --git a/tests/test_session.py b/tests/test_session.py index 2226ee3..d8985cf 100644 --- a/tests/test_session.py +++ b/tests/test_session.py @@ -1,14 +1,85 @@ +import contextlib +from pathlib import Path + +import pytest +from selenium.common import WebDriverException from selenium.webdriver.common.by import By import requestium.requestium -def test_simple_page_load(session: requestium.Session) -> None: - session.driver.get("http://the-internet.herokuapp.com") - session.driver.ensure_element(By.ID, "content") +@pytest.mark.parametrize( + "headless", + [ + None, + False, + True, + ], + ids=["no_headless_arg", "headless=false", "headless=true"], +) +def test_initialize_session_without_explicit_driver(example_html: str, headless: bool) -> None: # noqa: FBT001 + session = requestium.Session(headless=headless) + session.driver.get(f"data:text/html,{example_html}") + session.driver.ensure_element(By.TAG_NAME, "h1") + + assert session.driver.title == "The Internet" + + with contextlib.suppress(WebDriverException, OSError): + session.driver.quit() + + +def test_initialize_session_with_webdriver_options(example_html: str) -> None: + session = requestium.Session(webdriver_options={"arguments": ["headless=new"]}) + session.driver.get(f"data:text/html,{example_html}") + session.driver.ensure_element(By.TAG_NAME, "h1") + + assert session.driver.title == "The Internet" + + with contextlib.suppress(WebDriverException, OSError): + session.driver.quit() + + +def test_initialize_session_with_experimental_options(example_html: str) -> None: + session = requestium.Session(webdriver_options={"experimental_options": {"useAutomationExtension": False}}) + session.driver.get(f"data:text/html,{example_html}") + session.driver.ensure_element(By.TAG_NAME, "h1") + + assert session.driver.title == "The Internet" + + with contextlib.suppress(WebDriverException, OSError): + session.driver.quit() + + +def test_initialize_session_with_webdriver_prefs(example_html: str) -> None: + session = requestium.Session(webdriver_options={"prefs": {"plugins.always_open_pdf_externally": True}}) + session.driver.get(f"data:text/html,{example_html}") + session.driver.ensure_element(By.TAG_NAME, "h1") + + assert session.driver.title == "The Internet" + + with contextlib.suppress(WebDriverException, OSError): + session.driver.quit() + + +def test_initialize_session_with_extension(example_html: str) -> None: + test_extension_path = Path(__file__).parent / "resources/test_extension.crx" + session = requestium.Session(webdriver_options={"extensions": [str(test_extension_path)]}) + session.driver.get(f"data:text/html,{example_html}") + session.driver.ensure_element(By.TAG_NAME, "h1") + + assert session.driver.title == "The Internet" + + with contextlib.suppress(WebDriverException, OSError): + session.driver.quit() - title = session.driver.title - heading = session.driver.find_element(By.XPATH, '//*[@id="content"]/h1') - assert title == "The Internet" - assert heading.text == "Welcome to the-internet" +def test__start_chrome_driver_webdriver_options_typeerror() -> None: + invalid_webdriver_options = {"arguments": "invalid_string"} + with ( + requestium.Session(webdriver_options=invalid_webdriver_options) as session, + pytest.raises( + TypeError, + match="'arguments' option must be a list, but got str", + ), + ): + session._start_chrome_browser() diff --git a/tests/test_user_agent.py b/tests/test_user_agent.py index 7c59fdf..f1c6761 100644 --- a/tests/test_user_agent.py +++ b/tests/test_user_agent.py @@ -1,13 +1,33 @@ +from collections.abc import Generator + +import pytest + import requestium.requestium -def test_copy_user_agent_from_driver(session: requestium.Session) -> None: +@pytest.fixture +def reset_session_headers(session: requestium.Session) -> Generator[requestium.Session, None, None]: + """Reset session headers before each test.""" + # Store original headers - convert to dict to copy + original_headers = dict(session.headers) + session.headers.clear() + session.headers.update(original_headers) # Restore to clean state at start + + yield session + + # Restore original headers after test + session.headers.clear() + session.headers.update(original_headers) + + +def test_copy_user_agent_from_driver(reset_session_headers: requestium.Session, example_html: str) -> None: """Ensure that requests user-agent header has been changed after calling session.copy_user_agent_from_driver().""" + session = reset_session_headers pre_copy_requests_useragent = session.headers["user-agent"] assert pre_copy_requests_useragent assert pre_copy_requests_useragent != "" - session.driver.get("http://the-internet.herokuapp.com") + session.driver.get(f"data:text/html,{example_html}") session.copy_user_agent_from_driver() post_copy_requests_useragent = session.headers["user-agent"]