Skip to content

Commit a3d406f

Browse files
authored
Merge pull request #697 from onekey-sec/padding-auto-id
Unknown chunks auto-identification (padding)
2 parents 00b25fa + 89cd491 commit a3d406f

File tree

11 files changed

+69
-10
lines changed

11 files changed

+69
-10
lines changed

tests/test_report.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def hello_kitty_task_results(
7373
extract_root: Path,
7474
hello_id: str,
7575
kitty_id: str,
76+
padding_id: str,
7677
container_id="",
7778
start_depth=0,
7879
):
@@ -133,12 +134,14 @@ def hello_kitty_task_results(
133134
size=7,
134135
entropy=None,
135136
),
136-
UnknownChunkReport(
137-
id=ANY,
137+
ChunkReport(
138+
id=padding_id,
138139
start_offset=263,
139140
end_offset=264,
140141
size=1,
141-
entropy=None,
142+
handler_name="padding",
143+
is_encrypted=False,
144+
extraction_reports=[],
142145
),
143146
ChunkReport(
144147
id=hello_id,
@@ -286,13 +289,14 @@ def test_flat_report_structure(hello_kitty: Path, extract_root):
286289
task_results = get_normalized_task_results(process_result)
287290

288291
# extract the ids from the chunks
289-
hello_id, kitty_id = get_chunk_ids(task_results[0])
292+
padding_id, hello_id, kitty_id = get_chunk_ids(task_results[0])
290293

291294
assert task_results == hello_kitty_task_results(
292295
hello_kitty=hello_kitty,
293296
extract_root=extract_root,
294297
hello_id=hello_id,
295298
kitty_id=kitty_id,
299+
padding_id=padding_id,
296300
)
297301

298302

@@ -416,7 +420,7 @@ def test_chunk_in_chunk_report_structure(hello_kitty_container: Path, extract_ro
416420
# and they should be the only differences
417421
[main_id] = get_chunk_ids(task_results[0])
418422

419-
hello_id, kitty_id = get_chunk_ids(task_results[2])
423+
padding_id, hello_id, kitty_id = get_chunk_ids(task_results[2])
420424

421425
# We test, that the container is referenced from the internal file
422426
# through the chunk id `main_id`
@@ -428,6 +432,7 @@ def test_chunk_in_chunk_report_structure(hello_kitty_container: Path, extract_ro
428432
extract_root=extract_root / "container_extract",
429433
hello_id=hello_id,
430434
kitty_id=kitty_id,
435+
padding_id=padding_id,
431436
container_id=main_id,
432437
start_depth=1,
433438
)

unblob/extractor.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
import errno
33
import os
44
from pathlib import Path
5+
from typing import Union
56

67
from structlog import get_logger
78

89
from .file_utils import carve, is_safe_path
9-
from .models import Chunk, File, TaskResult, UnknownChunk, ValidChunk
10+
from .models import Chunk, File, PaddingChunk, TaskResult, UnknownChunk, ValidChunk
1011
from .report import MaliciousSymlinkRemoved
1112

1213
logger = get_logger()
@@ -113,8 +114,14 @@ def _fix_extracted_directory(directory: Path):
113114
_fix_extracted_directory(outdir)
114115

115116

116-
def carve_unknown_chunk(extract_dir: Path, file: File, chunk: UnknownChunk) -> Path:
117-
filename = f"{chunk.start_offset}-{chunk.end_offset}.unknown"
117+
def carve_unknown_chunk(
118+
extract_dir: Path, file: File, chunk: Union[UnknownChunk, PaddingChunk]
119+
) -> Path:
120+
extension = "unknown"
121+
if isinstance(chunk, PaddingChunk):
122+
extension = "padding"
123+
124+
filename = f"{chunk.start_offset}-{chunk.end_offset}.{extension}"
118125
carve_path = extract_dir / filename
119126
logger.info("Extracting unknown chunk", path=carve_path, chunk=chunk)
120127
carve_chunk_to_file(carve_path, file, chunk)

unblob/models.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,28 @@ def as_report(self, entropy: Optional[EntropyReport]) -> UnknownChunkReport:
147147
)
148148

149149

150+
@attr.define(repr=False)
151+
class PaddingChunk(Chunk):
152+
r"""Gaps between valid chunks or otherwise unknown chunks.
153+
154+
Important for manual analysis, and analytical certanity: for example
155+
entropy, other chunks inside it, metadata, etc.
156+
"""
157+
158+
def as_report(
159+
self, entropy: Optional[EntropyReport] # noqa: ARG002
160+
) -> ChunkReport:
161+
return ChunkReport(
162+
id=self.id,
163+
start_offset=self.start_offset,
164+
end_offset=self.end_offset,
165+
size=self.size,
166+
is_encrypted=False,
167+
handler_name="padding",
168+
extraction_reports=[],
169+
)
170+
171+
150172
@attrs.define
151173
class MultiFile(Blob):
152174
name: str = attr.field(kw_only=True)

0 commit comments

Comments
 (0)