From c4c2b95b6cdf60824272827dfa283116d7e74687 Mon Sep 17 00:00:00 2001 From: Gilad Date: Thu, 14 Nov 2024 12:47:47 +0000 Subject: [PATCH] fix: ensure both URL format and status code are validated Modified the '_validate_url' method to require both a valid URL prefix and a 200 status code for URL validation. Previously, the method could pass validation if only one condition was met, leading to false postitives. --- rightmove_webscraper/scraper.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rightmove_webscraper/scraper.py b/rightmove_webscraper/scraper.py index 12acc0b..6198d23 100644 --- a/rightmove_webscraper/scraper.py +++ b/rightmove_webscraper/scraper.py @@ -59,9 +59,12 @@ def _validate_url(self): protocols = ["http", "https"] types = ["property-to-rent", "property-for-sale", "new-homes-for-sale"] urls = [real_url.format(p, t) for p in protocols for t in types] + conditions = [self.url.startswith(u) for u in urls] - conditions.append(self._status_code == 200) - if not any(conditions): + + valid_url = any(conditions) + valid_status = self._status_code==200 + if not (valid_url and valid_status): raise ValueError(f"Invalid rightmove search URL:\n\n\t{self.url}") @property