Skip to content

Commit a0fc5c6

Browse files
authored
Fix header processing (#226)
* Scrapy headers were being updated for every Playwright request, they should only be updated for the Playwright request that corresponds to the original Scrapy one * Referer header from the Playwright request was not added to the output of scrapy_playwright.headers.use_scrapy_headers
1 parent 05648c6 commit a0fc5c6

File tree

2 files changed

+13
-7
lines changed

2 files changed

+13
-7
lines changed

scrapy_playwright/handler.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -494,13 +494,8 @@ async def _request_handler(route: Route, playwright_request: PlaywrightRequest)
494494
self.browser_type_name, playwright_request, headers
495495
)
496496
)
497-
# the request that reaches the callback should contain the final headers
498-
headers.clear()
499-
headers.update(final_headers)
500-
del final_headers
501497

502-
# if the request is triggered by scrapy, not playwright
503-
original_playwright_method: str = playwright_request.method
498+
# if the current request corresponds to the original scrapy one
504499
if (
505500
playwright_request.url.rstrip("/") == url.rstrip("/")
506501
and playwright_request.is_navigation_request()
@@ -509,7 +504,13 @@ async def _request_handler(route: Route, playwright_request: PlaywrightRequest)
509504
overrides["method"] = method
510505
if body:
511506
overrides["post_data"] = body.decode(encoding)
507+
# the request that reaches the callback should contain the final headers
508+
headers.clear()
509+
headers.update(final_headers)
512510

511+
del final_headers
512+
513+
original_playwright_method: str = playwright_request.method
513514
try:
514515
await route.continue_(**overrides)
515516
if overrides.get("method"):

scrapy_playwright/headers.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,14 @@ async def use_scrapy_headers(
2323
scrapy_headers_str.setdefault("user-agent", playwright_headers.get("user-agent"))
2424

2525
if playwright_request.is_navigation_request():
26+
# if referer header is set via playwright_page_goto_kwargs
27+
if referer := playwright_headers.get("referer"):
28+
scrapy_headers_str.setdefault("referer", referer)
29+
30+
# otherwise it fails with playwright.helper.Error: NS_ERROR_NET_RESET
2631
if browser_type == "firefox":
27-
# otherwise this fails with playwright.helper.Error: NS_ERROR_NET_RESET
2832
scrapy_headers_str["host"] = urlparse(playwright_request.url).netloc
33+
2934
return scrapy_headers_str
3035

3136
# override user agent, for consistency with other requests

0 commit comments

Comments
 (0)