Skip to content

Commit 3158448

Browse files
authored
chore: fix flaky tests (#1478)
### Issues - Closes: #1469 - Closes: #1460
1 parent a4a1c0f commit 3158448

File tree

4 files changed

+10
-16
lines changed

4 files changed

+10
-16
lines changed

src/crawlee/_utils/sitemap.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ async def _fetch_and_process_sitemap(
335335
# Check if the first chunk is a valid gzip header
336336
if first_chunk and raw_chunk.startswith(b'\x1f\x8b'):
337337
decompressor = zlib.decompressobj(zlib.MAX_WBITS | 16)
338-
first_chunk = False
338+
first_chunk = False
339339

340340
chunk = decompressor.decompress(raw_chunk) if decompressor else raw_chunk
341341
text_chunk = decoder.decode(chunk)

tests/unit/_utils/test_sitemap.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
11
import base64
22
import gzip
3-
import os
43
from datetime import datetime
54

6-
import pytest
75
from yarl import URL
86

97
from crawlee._utils.sitemap import Sitemap, SitemapUrl, parse_sitemap
@@ -106,10 +104,6 @@ async def test_gzipped_sitemap_with_invalid_data(server_url: URL, http_client: H
106104
assert sitemap.urls == []
107105

108106

109-
@pytest.mark.skipif(
110-
os.name == 'nt',
111-
reason='This test is flaky on Windows, see https://github.com/apify/crawlee-python/issues/1460.',
112-
)
113107
async def test_gz_sitemap_with_non_gzipped(server_url: URL, http_client: HttpClient) -> None:
114108
"""Test loading a sitemap with gzip type and .xml.gz url, but without gzipped data."""
115109
sitemap_url = (server_url / 'sitemap.xml.gz').with_query(

tests/unit/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,9 +195,9 @@ def redirect_server_url(redirect_http_server: TestServer) -> URL:
195195

196196
@pytest.fixture(
197197
params=[
198-
pytest.param('curl', id='curl'),
199198
pytest.param('httpx', id='httpx'),
200199
pytest.param('impit', id='impit'),
200+
pytest.param('curl', id='curl'),
201201
]
202202
)
203203
async def http_client(request: pytest.FixtureRequest) -> HttpClient:

tests/unit/otel/test_crawler_instrumentor.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,20 @@
11
import io
22
import json
3-
import os
43
import re
54
from unittest import mock
65

7-
import pytest
86
from opentelemetry.sdk.resources import Resource
97
from opentelemetry.sdk.trace import TracerProvider
108
from opentelemetry.sdk.trace.export import ConsoleSpanExporter, SimpleSpanProcessor
119
from opentelemetry.trace import set_tracer_provider
1210
from yarl import URL
1311

12+
from crawlee import ConcurrencySettings
1413
from crawlee.crawlers import ParselCrawler
1514
from crawlee.otel.crawler_instrumentor import CrawlerInstrumentor
1615
from crawlee.storages import Dataset
1716

1817

19-
@pytest.mark.skipif(
20-
os.name == 'nt',
21-
reason='This test is flaky on Windows, see https://github.com/apify/crawlee-python/issues/1469.',
22-
)
2318
async def test_crawler_instrumentor_capability(server_url: URL) -> None:
2419
"""Test OpenTelemetry instrumentation capability of the crawler.
2520
@@ -40,15 +35,20 @@ async def test_crawler_instrumentor_capability(server_url: URL) -> None:
4035
provider.add_span_processor(SimpleSpanProcessor(exporter))
4136
set_tracer_provider(provider)
4237
# Instrument the crawler with OpenTelemetry
43-
CrawlerInstrumentor(instrument_classes=[Dataset]).instrument()
38+
instrumentor = CrawlerInstrumentor(instrument_classes=[Dataset])
39+
instrumentor.instrument()
4440

4541
# Generate first telemetry data from `Dataset` public methods.
4642
# `Dataset` is in `instrument_classes` argument, and thus it's public methods are instrumented.
4743
dataset = await Dataset.open(name='test-dataset')
4844
await dataset.drop()
4945

5046
# Other traces will be from crawler run.
51-
crawler = ParselCrawler(max_requests_per_crawl=1, request_handler=mock.AsyncMock())
47+
crawler = ParselCrawler(
48+
max_requests_per_crawl=1,
49+
request_handler=mock.AsyncMock(),
50+
concurrency_settings=ConcurrencySettings(desired_concurrency=1, max_concurrency=1),
51+
)
5252

5353
# Run crawler and generate more telemetry data.
5454
await crawler.run([str(server_url)])

0 commit comments

Comments
 (0)