Skip to content

Commit 354b33a

Browse files
authored
[html5lib] Annotate more (#14994)
* Import `webencodings` to use `Encoding` class instead of `Protocol` * Make `_inputstream.BufferedStream`, `filters.base.Filter`, `_utils.MethodDispatcher` generic * Fix incorrect types in `_ihatexml.pyi` * Add types for some attributes/methods
1 parent d58177d commit 354b33a

17 files changed

+244
-189
lines changed

stubs/bleach/bleach/html5lib_shim.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class InputStreamWithMemory:
5050

5151
class BleachHTMLTokenizer(HTMLTokenizer):
5252
consume_entities: bool
53-
stream: InputStreamWithMemory
53+
stream: InputStreamWithMemory # type: ignore[assignment]
5454
emitted_last_token: dict[str, Any] | None
5555
def __init__(self, consume_entities: bool = False, **kwargs: Any) -> None: ...
5656

stubs/bleach/bleach/linkifier.pyi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class Linker:
3636
# or `html5lib` token might be reused
3737
_Token: TypeAlias = dict[str, Any]
3838

39-
class LinkifyFilter(Filter):
39+
class LinkifyFilter(Filter[_Token]):
4040
callbacks: Iterable[_Callback]
4141
skip_tags: Container[str]
4242
parse_email: bool

stubs/bleach/bleach/sanitizer.pyi

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class Cleaner:
4040
protocols: Iterable[str]
4141
strip: bool
4242
strip_comments: bool
43-
filters: Iterable[Filter]
43+
filters: Iterable[_FilterConstructor]
4444
css_sanitizer: CSSSanitizer | None
4545
parser: BleachHTMLParser
4646
walker: TreeWalker
@@ -85,7 +85,7 @@ class BleachSanitizerFilter(SanitizerFilter):
8585
def sanitize_stream(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
8686
def merge_characters(self, token_iterator: Iterable[_Token]) -> Iterator[_Token]: ...
8787
def __iter__(self) -> Iterator[_Token]: ...
88-
def sanitize_token(self, token: _Token) -> _Token | list[_Token] | None: ...
88+
def sanitize_token(self, token: _Token) -> _Token | list[_Token] | None: ... # type: ignore[override]
8989
def sanitize_characters(self, token: _Token) -> _Token | list[_Token]: ...
9090
def sanitize_uri_value(self, value: str, allowed_protocols: Container[str]) -> str | None: ...
9191
def allow_token(self, token: _Token) -> _Token: ...

stubs/html5lib/METADATA.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
version = "1.1.*"
22
upstream_repository = "https://github.com/html5lib/html5lib-python"
3+
requires = ["types-webencodings"]
34

45
[tool.stubtest]
56
extras = ["all"]
Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
import re
2-
from _typeshed import Incomplete
3-
from collections.abc import Iterable
42

53
baseChar: str
64
ideographic: str
@@ -13,13 +11,13 @@ nameFirst: str
1311
reChar: re.Pattern[str]
1412
reCharRange: re.Pattern[str]
1513

16-
def charStringToList(chars: str) -> list[str]: ...
17-
def normaliseCharList(charList: Iterable[str]) -> list[str]: ...
14+
def charStringToList(chars: str) -> list[list[int]]: ...
15+
def normaliseCharList(charList: list[list[int]]) -> list[list[int]]: ...
1816

1917
max_unicode: int
2018

21-
def missingRanges(charList: Iterable[str]) -> list[str]: ...
22-
def listToRegexpStr(charList): ...
19+
def missingRanges(charList: list[list[int]]) -> list[list[int]]: ...
20+
def listToRegexpStr(charList: list[list[int]]) -> str: ...
2321
def hexToInt(hex_str: str | bytes | bytearray) -> int: ...
2422
def escapeRegexp(string: str) -> str: ...
2523

@@ -29,13 +27,13 @@ nonPubidCharRegexp: re.Pattern[str]
2927

3028
class InfosetFilter:
3129
replacementRegexp: re.Pattern[str]
32-
dropXmlnsLocalName: Incomplete
33-
dropXmlnsAttrNs: Incomplete
34-
preventDoubleDashComments: Incomplete
35-
preventDashAtCommentEnd: Incomplete
36-
replaceFormFeedCharacters: Incomplete
37-
preventSingleQuotePubid: Incomplete
38-
replaceCache: Incomplete
30+
dropXmlnsLocalName: bool
31+
dropXmlnsAttrNs: bool
32+
preventDoubleDashComments: bool
33+
preventDashAtCommentEnd: bool
34+
replaceFormFeedCharacters: bool
35+
preventSingleQuotePubid: bool
36+
replaceCache: dict[str, str]
3937
def __init__(
4038
self,
4139
dropXmlnsLocalName: bool = False,
@@ -45,13 +43,13 @@ class InfosetFilter:
4543
replaceFormFeedCharacters: bool = True,
4644
preventSingleQuotePubid: bool = False,
4745
) -> None: ...
48-
def coerceAttribute(self, name, namespace=None): ...
49-
def coerceElement(self, name): ...
50-
def coerceComment(self, data): ...
51-
def coerceCharacters(self, data): ...
52-
def coercePubid(self, data): ...
53-
def toXmlName(self, name): ...
54-
def getReplacementCharacter(self, char): ...
55-
def fromXmlName(self, name): ...
56-
def escapeChar(self, char): ...
57-
def unescapeChar(self, charcode): ...
46+
def coerceAttribute(self, name: str, namespace: str | None = None) -> str | None: ...
47+
def coerceElement(self, name: str) -> str: ...
48+
def coerceComment(self, data: str) -> str: ...
49+
def coerceCharacters(self, data: str) -> str: ...
50+
def coercePubid(self, data: str) -> str: ...
51+
def toXmlName(self, name: str) -> str: ...
52+
def getReplacementCharacter(self, char: str) -> str: ...
53+
def fromXmlName(self, name: str) -> str: ...
54+
def escapeChar(self, char: str) -> str: ...
55+
def unescapeChar(self, charcode: str | bytes | bytearray) -> str: ...
Lines changed: 79 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,36 @@
1-
from _typeshed import Incomplete, SupportsRead
2-
from codecs import CodecInfo
3-
from typing import Protocol, overload, type_check_only
4-
from typing_extensions import TypeAlias
1+
import re
2+
from _io import BytesIO, StringIO
3+
from _typeshed import Incomplete, ReadableBuffer, SupportsRead
4+
from collections.abc import Callable, Iterable
5+
from typing import Any, AnyStr, Generic, Literal, TypeVar, overload
6+
from typing_extensions import Self, TypeAlias
57

6-
# Is actually webencodings.Encoding
7-
@type_check_only
8-
class _Encoding(Protocol):
9-
name: str
10-
codec_info: CodecInfo
11-
def __init__(self, name: str, codec_info: CodecInfo) -> None: ...
8+
from webencodings import Encoding
129

1310
_UnicodeInputStream: TypeAlias = str | SupportsRead[str]
1411
_BinaryInputStream: TypeAlias = bytes | SupportsRead[bytes]
1512
_InputStream: TypeAlias = _UnicodeInputStream | _BinaryInputStream # noqa: Y047 # used in other files
13+
_SupportsReadT = TypeVar("_SupportsReadT", bound=SupportsRead[Any])
14+
_SupportsReadBytesT = TypeVar("_SupportsReadBytesT", bound=SupportsRead[bytes])
1615

17-
spaceCharactersBytes: Incomplete
18-
asciiLettersBytes: Incomplete
19-
asciiUppercaseBytes: Incomplete
20-
spacesAngleBrackets: Incomplete
16+
spaceCharactersBytes: frozenset[bytes]
17+
asciiLettersBytes: frozenset[bytes]
18+
asciiUppercaseBytes: frozenset[bytes]
19+
spacesAngleBrackets: frozenset[bytes]
2120
invalid_unicode_no_surrogate: str
22-
invalid_unicode_re: Incomplete
23-
non_bmp_invalid_codepoints: Incomplete
24-
ascii_punctuation_re: Incomplete
25-
charsUntilRegEx: Incomplete
21+
invalid_unicode_re: re.Pattern[str]
22+
non_bmp_invalid_codepoints: set[int]
23+
ascii_punctuation_re: re.Pattern[str]
24+
charsUntilRegEx: dict[tuple[Iterable[str | bytes | bytearray], bool], re.Pattern[str]]
2625

27-
class BufferedStream:
28-
stream: Incomplete
29-
buffer: Incomplete
30-
position: Incomplete
31-
def __init__(self, stream) -> None: ...
32-
def tell(self): ...
33-
def seek(self, pos) -> None: ...
34-
def read(self, bytes): ...
26+
class BufferedStream(Generic[AnyStr]):
27+
stream: SupportsRead[AnyStr]
28+
buffer: list[AnyStr]
29+
position: list[int]
30+
def __init__(self, stream: SupportsRead[AnyStr]) -> None: ...
31+
def tell(self) -> int: ...
32+
def seek(self, pos: int) -> None: ...
33+
def read(self, bytes: int) -> AnyStr: ...
3534

3635
@overload
3736
def HTMLInputStream(source: _UnicodeInputStream) -> HTMLUnicodeInputStream: ...
@@ -48,9 +47,9 @@ def HTMLInputStream(
4847
) -> HTMLBinaryInputStream: ...
4948

5049
class HTMLUnicodeInputStream:
51-
reportCharacterErrors: Incomplete
52-
newLines: Incomplete
53-
charEncoding: tuple[_Encoding, str]
50+
reportCharacterErrors: Callable[[str], None]
51+
newLines: list[int]
52+
charEncoding: tuple[Encoding, str]
5453
dataStream: Incomplete
5554
def __init__(self, source: _UnicodeInputStream) -> None: ...
5655
chunk: str
@@ -60,14 +59,17 @@ class HTMLUnicodeInputStream:
6059
prevNumLines: int
6160
prevNumCols: int
6261
def reset(self) -> None: ...
63-
def openStream(self, source): ...
62+
@overload
63+
def openStream(self, source: _SupportsReadT) -> _SupportsReadT: ...
64+
@overload
65+
def openStream(self, source: str | None) -> StringIO: ...
6466
def position(self) -> tuple[int, int]: ...
65-
def char(self): ...
66-
def readChunk(self, chunkSize=None): ...
67-
def characterErrorsUCS4(self, data) -> None: ...
68-
def characterErrorsUCS2(self, data) -> None: ...
69-
def charsUntil(self, characters, opposite: bool = False): ...
70-
def unget(self, char) -> None: ...
67+
def char(self) -> str | None: ...
68+
def readChunk(self, chunkSize: int | None = None) -> bool: ...
69+
def characterErrorsUCS4(self, data: str) -> None: ...
70+
def characterErrorsUCS2(self, data: str) -> None: ...
71+
def charsUntil(self, characters: Iterable[str | bytes | bytearray], opposite: bool = False) -> str: ...
72+
def unget(self, char: str | None) -> None: ...
7173

7274
class HTMLBinaryInputStream(HTMLUnicodeInputStream):
7375
rawStream: Incomplete
@@ -77,8 +79,8 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
7779
transport_encoding: Incomplete
7880
same_origin_parent_encoding: Incomplete
7981
likely_encoding: Incomplete
80-
default_encoding: Incomplete
81-
charEncoding: tuple[_Encoding, str]
82+
default_encoding: str
83+
charEncoding: tuple[Encoding, str]
8284
def __init__(
8385
self,
8486
source: _BinaryInputStream,
@@ -91,46 +93,52 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
9193
) -> None: ...
9294
dataStream: Incomplete
9395
def reset(self) -> None: ...
94-
def openStream(self, source): ...
96+
@overload # type: ignore[override]
97+
def openStream(self, source: _SupportsReadBytesT) -> _SupportsReadBytesT: ...
98+
@overload # type: ignore[override]
99+
def openStream(self, source: ReadableBuffer) -> BytesIO: ...
95100
def determineEncoding(self, chardet: bool = True): ...
96101
def changeEncoding(self, newEncoding: str | bytes | None) -> None: ...
97-
def detectBOM(self): ...
98-
def detectEncodingMeta(self): ...
102+
def detectBOM(self) -> Encoding | None: ...
103+
def detectEncodingMeta(self) -> Encoding | None: ...
99104

100105
class EncodingBytes(bytes):
101-
def __new__(self, value): ...
102-
def __init__(self, value) -> None: ...
103-
def __iter__(self): ...
104-
def __next__(self): ...
105-
def next(self): ...
106-
def previous(self): ...
107-
def setPosition(self, position) -> None: ...
108-
def getPosition(self): ...
109-
position: Incomplete
110-
def getCurrentByte(self): ...
106+
def __new__(self, value: bytes) -> Self: ...
107+
def __init__(self, value: bytes) -> None: ...
108+
def __iter__(self) -> Self: ... # type: ignore[override]
109+
def __next__(self) -> bytes: ...
110+
def next(self) -> bytes: ...
111+
def previous(self) -> bytes: ...
112+
def setPosition(self, position: int) -> None: ...
113+
def getPosition(self) -> int | None: ...
111114
@property
112-
def currentByte(self): ...
113-
def skip(self, chars=...): ...
114-
def skipUntil(self, chars): ...
115-
def matchBytes(self, bytes): ...
116-
def jumpTo(self, bytes): ...
115+
def position(self) -> int | None: ...
116+
@position.setter
117+
def position(self, position: int) -> None: ...
118+
def getCurrentByte(self) -> bytes: ...
119+
@property
120+
def currentByte(self) -> bytes: ...
121+
def skip(self, chars: bytes | bytearray | Iterable[bytes] = ...) -> bytes | None: ...
122+
def skipUntil(self, chars: bytes | bytearray | Iterable[bytes]) -> bytes | None: ...
123+
def matchBytes(self, bytes: bytes | bytearray) -> bool: ...
124+
def jumpTo(self, bytes: bytes | bytearray) -> Literal[True]: ...
117125

118126
class EncodingParser:
119-
data: Incomplete
120-
encoding: Incomplete
121-
def __init__(self, data) -> None: ...
122-
def getEncoding(self): ...
123-
def handleComment(self): ...
124-
def handleMeta(self): ...
125-
def handlePossibleStartTag(self): ...
126-
def handlePossibleEndTag(self): ...
127-
def handlePossibleTag(self, endTag): ...
128-
def handleOther(self): ...
129-
def getAttribute(self): ...
127+
data: EncodingBytes
128+
encoding: Encoding | None
129+
def __init__(self, data: bytes) -> None: ...
130+
def getEncoding(self) -> Encoding | None: ...
131+
def handleComment(self) -> bool: ...
132+
def handleMeta(self) -> bool: ...
133+
def handlePossibleStartTag(self) -> bool: ...
134+
def handlePossibleEndTag(self) -> bool: ...
135+
def handlePossibleTag(self, endTag: bool | None) -> bool: ...
136+
def handleOther(self) -> bool: ...
137+
def getAttribute(self) -> tuple[bytes, bytes] | None: ...
130138

131139
class ContentAttrParser:
132-
data: Incomplete
133-
def __init__(self, data) -> None: ...
134-
def parse(self): ...
140+
data: EncodingBytes
141+
def __init__(self, data: EncodingBytes) -> None: ...
142+
def parse(self) -> bytes | None: ...
135143

136-
def lookupEncoding(encoding: str | bytes | None) -> str | None: ...
144+
def lookupEncoding(encoding: str | bytes | None) -> Encoding | None: ...

0 commit comments

Comments
 (0)