diff --git a/archive/bzip3.ksy b/archive/bzip3.ksy new file mode 100644 index 000000000..dab3cfdca --- /dev/null +++ b/archive/bzip3.ksy @@ -0,0 +1,62 @@ +meta: + id: bzip3 + title: Bzip3 header + file-extension: bz3 + license: CC0-1.0 + endian: le +doc: | + bzip3 is a decompression tool and library. Depending on whether or not the + CLI or the library is used the file format is slightly different. This + specification describes the structure of the format as generated by the CLI + (header and chunks, not blocks). + + bzip3 assumes that the entire file is a bzip3 archive and will fail as there + is no reliable way to detect if data following a chunk is a next chunk or if + it is other data (like what happens in concatenated files). + + A simple example to illustrate: + + $ cp /bin/ls . + $ bzip3 ls + $ cat ls.bz3 ls > test.bz3 + $ bz3cat test.bz3 > /dev/null + Failed to decode a block: Inconsistent headers. + + If a potential chunk has an invalid length (len_compressed), then the end of + the file has been reached. This check will not always work as it is possible + to have data where the first four bytes will be a valid length. With + additional block parsing and CRC checks it will be possible to detect invalid + blocks. This is future work. +doc-ref: + - https://github.com/kspalaiologos/bzip3 + - https://github.com/kspalaiologos/bzip3/blob/972e6694b815/doc/bzip3_format.md +seq: + - id: header + type: header + - id: blocks + type: compressed_data_block + repeat: until + repeat-until: _io.eof or _.is_last +types: + header: + seq: + - id: signature + contents: 'BZ3v1' + - id: max_block_size + type: u4 + valid: + min: 66_560 # 65 KiB + max: 535_822_336 # 511 MiB + compressed_data_block: + seq: + - id: len_compressed + type: u4 + - id: len_uncompressed + type: u4 + valid: + max: _root.header.max_block_size + - id: data + size: len_compressed + instances: + is_last: + value: len_uncompressed <= _root.header.max_block_size