|
3 | 3 | import pytest |
4 | 4 | import numpy as np |
5 | 5 | import asyncio |
| 6 | +from time import perf_counter |
6 | 7 | from tokenizers import AddedToken, Encoding, Tokenizer |
7 | 8 | from tokenizers.implementations import BertWordPieceTokenizer |
8 | 9 | from tokenizers.models import BPE, Model, Unigram |
9 | 10 | from tokenizers.pre_tokenizers import ByteLevel, Metaspace |
10 | 11 | from tokenizers.processors import RobertaProcessing, TemplateProcessing |
11 | 12 | from tokenizers.normalizers import Strip, Lowercase, Sequence |
12 | 13 | from tokenizers.decoders import ByteFallback, DecodeStream, Metaspace as DecoderMetaspace |
13 | | -import time |
14 | 14 |
|
15 | 15 | from ..utils import bert_files, data_dir, multiprocessing_with_parallelism, roberta_files |
16 | 16 |
|
@@ -341,7 +341,7 @@ def test_padding(self): |
341 | 341 |
|
342 | 342 | # Can pad to the longest in a batch |
343 | 343 | output = tokenizer.encode_batch(["my name", "my name is john"]) |
344 | | - assert all([len(encoding) == 4 for encoding in output]) |
| 344 | + assert all(len(encoding) == 4 for encoding in output) |
345 | 345 |
|
346 | 346 | # Can pad to the specified length otherwise |
347 | 347 | tokenizer.enable_padding(length=4) |
@@ -950,21 +950,21 @@ async def encode_async(_): |
950 | 950 | # Measure sync performance with pre-initialized executor |
951 | 951 | # Warm up |
952 | 952 | await asyncio.gather(*[encode_sync_with_executor(i) for i in range(10)]) |
953 | | - time.sleep(0.03) |
| 953 | + asyncio.sleep(0.03) |
954 | 954 | # Actual measurement |
955 | | - start = time.perf_counter() |
| 955 | + start = perf_counter() |
956 | 956 | await asyncio.gather(*[encode_sync_with_executor(i) for i in range(n_tasks)]) |
957 | | - sync_time = time.perf_counter() - start |
| 957 | + sync_time = perf_counter() - start |
958 | 958 |
|
959 | 959 | # Measure async performance |
960 | 960 | # Warm up |
961 | 961 | await asyncio.gather(*[encode_async(i) for i in range(10)]) |
962 | 962 |
|
963 | 963 | # Actual measurement |
964 | | - time.sleep(0.03) |
965 | | - start = time.perf_counter() |
| 964 | + asyncio.sleep(0.03) |
| 965 | + start = perf_counter() |
966 | 966 | await asyncio.gather(*[encode_async(i) for i in range(n_tasks)]) |
967 | | - async_time = time.perf_counter() - start |
| 967 | + async_time = perf_counter() - start |
968 | 968 |
|
969 | 969 | # Log times |
970 | 970 | print(f"sync vs async processing times: {sync_time:.4f}s vs {async_time:.4f}s for {n_tasks} tasks") |
|
0 commit comments