Skip to content

Commit 3c3cafb

Browse files
qkaisere3krisztian
andcommitted
fix(cli): add ability to extend default skip magic rather than overwrite it.
unblob has a decent default skip magic list that gets overwritten if a user provides its own, which means unblob users need to redefine all of unblob's default skip magic through the CLI whenever they provide their own. Changed the logic so that user provided skip magic values are simply appended to unblob's default list unless the user explicitly provides the "--clear-skip-magics" flag. Co-authored-by: Krisztián Fekete <1246751+e3krisztian@users.noreply.github.com>
1 parent a3d406f commit 3c3cafb

File tree

3 files changed

+91
-24
lines changed

3 files changed

+91
-24
lines changed

tests/test_cli.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from pathlib import Path
2-
from typing import List, Optional, Type
2+
from typing import Iterable, List, Optional, Type
33
from unittest import mock
44

55
import pytest
@@ -10,7 +10,12 @@
1010
from unblob.extractors.command import MultiFileCommand
1111
from unblob.handlers import BUILTIN_HANDLERS
1212
from unblob.models import DirectoryHandler, Glob, Handler, HexString, MultiFile
13-
from unblob.processing import DEFAULT_DEPTH, DEFAULT_PROCESS_NUM, ExtractionConfig
13+
from unblob.processing import (
14+
DEFAULT_DEPTH,
15+
DEFAULT_PROCESS_NUM,
16+
DEFAULT_SKIP_MAGIC,
17+
ExtractionConfig,
18+
)
1419
from unblob.ui import (
1520
NullProgressReporter,
1621
ProgressReporter,
@@ -367,3 +372,51 @@ def test_skip_extraction(
367372
assert (
368373
process_file_mock.call_args.args[0].skip_extraction == skip_extraction
369374
), fail_message
375+
376+
377+
@pytest.mark.parametrize(
378+
"args, skip_magic, fail_message",
379+
[
380+
([], DEFAULT_SKIP_MAGIC, "Should have kept default skip magics"),
381+
(
382+
["--skip-magic", "SUPERMAGIC"],
383+
(*DEFAULT_SKIP_MAGIC, "SUPERMAGIC"),
384+
"Should have kept default skip magics",
385+
),
386+
(["--clear-skip-magics"], [], "Should have cleared default skip magics"),
387+
(
388+
["--clear-skip-magics", "--skip-magic", "SUPERMAGIC"],
389+
["SUPERMAGIC"],
390+
"Should have cleared default skip magics",
391+
),
392+
(
393+
["--clear-skip-magics", "--skip-magic", DEFAULT_SKIP_MAGIC[1]],
394+
[DEFAULT_SKIP_MAGIC[1]],
395+
"Should allow user specified and remove the rest",
396+
),
397+
],
398+
)
399+
def test_clear_skip_magics(
400+
args: List[str], skip_magic: Iterable[str], fail_message: str, tmp_path: Path
401+
):
402+
runner = CliRunner()
403+
in_path = (
404+
Path(__file__).parent
405+
/ "integration"
406+
/ "archive"
407+
/ "zip"
408+
/ "regular"
409+
/ "__input__"
410+
/ "apple.zip"
411+
)
412+
params = [*args, "--extract-dir", str(tmp_path), str(in_path)]
413+
414+
process_file_mock = mock.MagicMock()
415+
with mock.patch.object(unblob.cli, "process_file", process_file_mock):
416+
result = runner.invoke(unblob.cli.cli, params)
417+
418+
assert result.exit_code == 0
419+
process_file_mock.assert_called_once()
420+
assert sorted(process_file_mock.call_args.args[0].skip_magic) == sorted(
421+
skip_magic
422+
), fail_message

unblob/cli.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,9 +168,11 @@ def __init__(
168168
"--skip-magic",
169169
"skip_magic",
170170
type=click.STRING,
171-
default=DEFAULT_SKIP_MAGIC,
172-
help="Skip processing files with given magic prefix",
173-
show_default=True,
171+
help=f"""Skip processing files with given magic prefix.
172+
The provided values are appended to unblob's own skip magic list unless
173+
--clear-skip-magic is provided.
174+
[default: {', '.join(DEFAULT_SKIP_MAGIC)}]
175+
""",
174176
multiple=True,
175177
)
176178
@click.option(
@@ -182,6 +184,14 @@ def __init__(
182184
show_default=True,
183185
multiple=True,
184186
)
187+
@click.option(
188+
"--clear-skip-magics",
189+
"clear_skip_magics",
190+
is_flag=True,
191+
show_default=True,
192+
default=False,
193+
help="Clear unblob's own skip magic list.",
194+
)
185195
@click.option(
186196
"-p",
187197
"--process-num",
@@ -246,6 +256,7 @@ def cli(
246256
entropy_depth: int,
247257
skip_magic: Iterable[str],
248258
skip_extension: Iterable[str],
259+
clear_skip_magics: bool, # noqa: FBT001
249260
skip_extraction: bool, # noqa: FBT001
250261
keep_extracted_chunks: bool, # noqa: FBT001
251262
handlers: Handlers,
@@ -263,6 +274,9 @@ def cli(
263274
extra_dir_handlers = plugin_manager.load_dir_handlers_from_plugins()
264275
dir_handlers += tuple(extra_dir_handlers)
265276

277+
extra_magics_to_skip = () if clear_skip_magics else DEFAULT_SKIP_MAGIC
278+
skip_magic = tuple(sorted(set(skip_magic).union(extra_magics_to_skip)))
279+
266280
config = ExtractionConfig(
267281
extract_root=extract_root,
268282
force_extract=force,

unblob/processing.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -52,31 +52,31 @@
5252
DEFAULT_PROCESS_NUM = multiprocessing.cpu_count()
5353
DEFAULT_SKIP_MAGIC = (
5454
"BFLT",
55-
"JPEG",
55+
"Composite Document File V2 Document",
56+
"Erlang BEAM file",
5657
"GIF",
57-
"PNG",
58-
"SQLite",
59-
"compiled Java class",
60-
"TrueType Font data",
61-
"PDF document",
62-
"magic binary file",
63-
"MS Windows icon resource",
64-
"Web Open Font Format",
6558
"GNU message catalog",
66-
"Xilinx BIT data",
59+
"HP Printer Job Language",
60+
"Intel serial flash for PCH ROM",
61+
"JPEG",
62+
"MPEG",
63+
"MS Windows icon resource",
64+
"Macromedia Flash data",
6765
"Microsoft Excel",
68-
"Microsoft Word",
69-
"Microsoft PowerPoint",
7066
"Microsoft OOXML",
67+
"Microsoft PowerPoint",
68+
"Microsoft Word",
7169
"OpenDocument",
72-
"Macromedia Flash data",
73-
"MPEG",
74-
"HP Printer Job Language",
75-
"Erlang BEAM file",
76-
"python", # (e.g. python 2.7 byte-compiled)
77-
"Composite Document File V2 Document",
70+
"PDF document",
71+
"PNG",
72+
"SQLite",
73+
"TrueType Font data",
74+
"Web Open Font Format",
7875
"Windows Embedded CE binary image",
79-
"Intel serial flash for PCH ROM",
76+
"Xilinx BIT data",
77+
"compiled Java class",
78+
"magic binary file",
79+
"python", # # (e.g. python 2.7 byte-compiled)
8080
)
8181
DEFAULT_SKIP_EXTENSION = (".rlib",)
8282

0 commit comments

Comments
 (0)