Skip to content

Commit e5734a7

Browse files
committed
fix!: store raw commit/tag actor headers and parse lazily
1 parent 2f14246 commit e5734a7

File tree

11 files changed

+137
-44
lines changed

11 files changed

+137
-44
lines changed

examples/log.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,15 +142,16 @@ fn run(args: Args) -> anyhow::Result<()> {
142142
let info = info?;
143143
let commit = info.object()?;
144144
let commit_ref = commit.decode()?;
145+
let author = commit_ref.author();
145146
Ok(LogEntryInfo {
146147
commit_id: commit.id().to_hex().to_string(),
147148
parents: info.parent_ids().map(|id| id.shorten_or_id().to_string()).collect(),
148149
author: {
149150
let mut buf = Vec::new();
150-
commit_ref.author.actor().write_to(&mut buf)?;
151+
author.actor().write_to(&mut buf)?;
151152
buf.into()
152153
},
153-
time: commit_ref.author.time()?.format_or_unix(format::DEFAULT),
154+
time: author.time()?.format_or_unix(format::DEFAULT),
154155
message: commit_ref.message.to_owned(),
155156
})
156157
}),

gix-object/src/commit/decode.rs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,20 @@ pub fn commit<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>(
4040
.context(StrContext::Expected(
4141
"zero or more 'parent <40 lowercase hex char>'".into(),
4242
)),
43-
(|i: &mut _| parse::header_field(i, b"author", parse::signature))
44-
.context(StrContext::Expected("author <signature>".into())),
45-
(|i: &mut _| parse::header_field(i, b"committer", parse::signature))
46-
.context(StrContext::Expected("committer <signature>".into())),
43+
(|i: &mut _| {
44+
parse::header_field(i, b"author", parse::signature_with_raw).map(|(signature, raw)| {
45+
let _ = signature;
46+
raw
47+
})
48+
})
49+
.context(StrContext::Expected("author <signature>".into())),
50+
(|i: &mut _| {
51+
parse::header_field(i, b"committer", parse::signature_with_raw).map(|(signature, raw)| {
52+
let _ = signature;
53+
raw
54+
})
55+
})
56+
.context(StrContext::Expected("committer <signature>".into())),
4757
opt(|i: &mut _| parse::header_field(i, b"encoding", take_till(0.., NL)))
4858
.context(StrContext::Expected("encoding <encoding>".into())),
4959
repeat(

gix-object/src/commit/mod.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,10 @@ impl<'a> CommitRef<'a> {
7474

7575
/// Access
7676
impl<'a> CommitRef<'a> {
77+
fn parse_signature(raw: &'a BStr) -> gix_actor::SignatureRef<'a> {
78+
gix_actor::SignatureRef::from_bytes::<()>(raw.as_ref()).expect("signatures were validated during parsing")
79+
}
80+
7781
/// Return the `tree` fields hash digest.
7882
pub fn tree(&self) -> gix_hash::ObjectId {
7983
gix_hash::ObjectId::from_hex(self.tree).expect("prior validation of tree hash during parsing")
@@ -95,14 +99,14 @@ impl<'a> CommitRef<'a> {
9599
///
96100
/// This is different from the `author` field which may contain whitespace.
97101
pub fn author(&self) -> gix_actor::SignatureRef<'a> {
98-
self.author.trim()
102+
Self::parse_signature(self.author).trim()
99103
}
100104

101105
/// Return the committer, with whitespace trimmed.
102106
///
103107
/// This is different from the `committer` field which may contain whitespace.
104108
pub fn committer(&self) -> gix_actor::SignatureRef<'a> {
105-
self.committer.trim()
109+
Self::parse_signature(self.committer).trim()
106110
}
107111

108112
/// Returns a partially parsed message from which more information can be derived.
@@ -112,7 +116,7 @@ impl<'a> CommitRef<'a> {
112116

113117
/// Returns the time at which this commit was created, or a default time if it could not be parsed.
114118
pub fn time(&self) -> gix_date::Time {
115-
self.committer.time.parse().unwrap_or_default()
119+
Self::parse_signature(self.committer).time().unwrap_or_default()
116120
}
117121
}
118122

gix-object/src/commit/write.rs

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,32 @@ use bstr::ByteSlice;
44

55
use crate::{encode, encode::NL, Commit, CommitRef, Kind};
66

7+
fn parse_signature(raw: &bstr::BStr) -> gix_actor::SignatureRef<'_> {
8+
gix_actor::SignatureRef::from_bytes::<()>(raw.as_ref()).expect("signatures were validated during parsing")
9+
}
10+
11+
fn signature_requires_raw(raw: &bstr::BStr) -> bool {
12+
let signature = parse_signature(raw);
13+
signature.name.find_byteset(b"<>\n").is_some() || signature.email.find_byteset(b"<>\n").is_some()
14+
}
15+
16+
fn signature_len(raw: &bstr::BStr) -> usize {
17+
if signature_requires_raw(raw) {
18+
raw.len()
19+
} else {
20+
parse_signature(raw).size()
21+
}
22+
}
23+
24+
fn write_signature(mut out: &mut dyn io::Write, field: &[u8], raw: &bstr::BStr) -> io::Result<()> {
25+
if signature_requires_raw(raw) {
26+
encode::trusted_header_field(field, raw.as_ref(), &mut out)
27+
} else {
28+
let signature = parse_signature(raw);
29+
encode::trusted_header_signature(field, &signature, &mut out)
30+
}
31+
}
32+
733
impl crate::WriteTo for Commit {
834
/// Serializes this instance to `out` in the git serialization format.
935
fn write_to(&self, mut out: &mut dyn io::Write) -> io::Result<()> {
@@ -58,8 +84,8 @@ impl crate::WriteTo for CommitRef<'_> {
5884
for parent in self.parents() {
5985
encode::trusted_header_id(b"parent", &parent, &mut out)?;
6086
}
61-
encode::trusted_header_signature(b"author", &self.author, &mut out)?;
62-
encode::trusted_header_signature(b"committer", &self.committer, &mut out)?;
87+
write_signature(&mut out, b"author", self.author)?;
88+
write_signature(&mut out, b"committer", self.committer)?;
6389
if let Some(encoding) = self.encoding.as_ref() {
6490
encode::header_field(b"encoding", encoding, &mut out)?;
6591
}
@@ -78,8 +104,8 @@ impl crate::WriteTo for CommitRef<'_> {
78104
let hash_in_hex = self.tree().kind().len_in_hex();
79105
(b"tree".len() + 1 /* space */ + hash_in_hex + 1 /* nl */
80106
+ self.parents.iter().count() * (b"parent".len() + 1 /* space */ + hash_in_hex + 1 /* nl */)
81-
+ b"author".len() + 1 /* space */ + self.author.size() + 1 /* nl */
82-
+ b"committer".len() + 1 /* space */ + self.committer.size() + 1 /* nl */
107+
+ b"author".len() + 1 /* space */ + signature_len(self.author) + 1 /* nl */
108+
+ b"committer".len() + 1 /* space */ + signature_len(self.committer) + 1 /* nl */
83109
+ self
84110
.encoding
85111
.as_ref()

gix-object/src/lib.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -88,17 +88,16 @@ pub struct CommitRef<'a> {
8888
pub tree: &'a BStr,
8989
/// HEX hash of each parent commit. Empty for first commit in repository.
9090
pub parents: SmallVec<[&'a BStr; 1]>,
91-
/// Who wrote this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping.
91+
/// The raw author header value as encountered during parsing.
9292
///
93-
/// Use the [`author()`](CommitRef::author()) method to received a trimmed version of it.
94-
pub author: gix_actor::SignatureRef<'a>,
95-
/// Who committed this commit. Name and email might contain whitespace and are not trimmed to ensure round-tripping.
96-
///
97-
/// Use the [`committer()`](CommitRef::committer()) method to received a trimmed version of it.
93+
/// Use the [`author()`](CommitRef::author()) method to obtain a parsed version of it.
94+
#[cfg_attr(feature = "serde", serde(borrow))]
95+
pub author: &'a BStr,
96+
/// The raw committer header value as encountered during parsing.
9897
///
99-
/// This may be different from the `author` in case the author couldn't write to the repository themselves and
100-
/// is commonly encountered with contributed commits.
101-
pub committer: gix_actor::SignatureRef<'a>,
98+
/// Use the [`committer()`](CommitRef::committer()) method to obtain a parsed version of it.
99+
#[cfg_attr(feature = "serde", serde(borrow))]
100+
pub committer: &'a BStr,
102101
/// The name of the message encoding, otherwise [UTF-8 should be assumed](https://github.com/git/git/blob/e67fbf927dfdf13d0b21dc6ea15dc3c7ef448ea0/commit.c#L1493:L1493).
103102
pub encoding: Option<&'a BStr>,
104103
/// The commit message documenting the change.
@@ -150,8 +149,11 @@ pub struct TagRef<'a> {
150149
pub target_kind: Kind,
151150
/// The name of the tag, e.g. "v1.0".
152151
pub name: &'a BStr,
153-
/// The author of the tag.
154-
pub tagger: Option<gix_actor::SignatureRef<'a>>,
152+
/// The raw tagger header value as encountered during parsing.
153+
///
154+
/// Use the [`tagger()`](TagRef::tagger()) method to obtain a parsed version of it.
155+
#[cfg_attr(feature = "serde", serde(borrow))]
156+
pub tagger: Option<&'a BStr>,
155157
/// The message describing this release.
156158
pub message: &'a BStr,
157159
/// A cryptographic signature over the entire content of the serialized tag object thus far.

gix-object/src/object/convert.rs

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,20 @@ impl From<TagRef<'_>> for Tag {
77
name,
88
target_kind,
99
message,
10-
tagger: signature,
10+
tagger,
1111
pgp_signature,
1212
} = other;
13+
let tagger = tagger.map(|raw| {
14+
gix_actor::SignatureRef::from_bytes::<()>(raw.as_ref())
15+
.expect("signatures were validated during parsing")
16+
.into()
17+
});
1318
Tag {
1419
target: gix_hash::ObjectId::from_hex(target).expect("prior parser validation"),
1520
name: name.to_owned(),
1621
target_kind,
1722
message: message.to_owned(),
18-
tagger: signature.map(Into::into),
23+
tagger,
1924
pgp_signature: pgp_signature.map(ToOwned::to_owned),
2025
}
2126
}
@@ -32,6 +37,10 @@ impl From<CommitRef<'_>> for Commit {
3237
message,
3338
extra_headers,
3439
} = other;
40+
let author = gix_actor::SignatureRef::from_bytes::<()>(author.as_ref())
41+
.expect("signatures were validated during parsing");
42+
let committer = gix_actor::SignatureRef::from_bytes::<()>(committer.as_ref())
43+
.expect("signatures were validated during parsing");
3544
Commit {
3645
tree: gix_hash::ObjectId::from_hex(tree).expect("prior parser validation"),
3746
parents: parents

gix-object/src/parse.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,13 @@ pub(crate) fn signature<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrC
7171
) -> ModalResult<gix_actor::SignatureRef<'a>, E> {
7272
gix_actor::signature::decode(i)
7373
}
74+
75+
pub(crate) fn signature_with_raw<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>(
76+
i: &mut &'a [u8],
77+
) -> ModalResult<(gix_actor::SignatureRef<'a>, &'a BStr), E> {
78+
let original = *i;
79+
gix_actor::signature::decode(i).map(|signature| {
80+
let consumed = original.len() - i.len();
81+
(signature, original[..consumed].as_bstr())
82+
})
83+
}

gix-object/src/tag/decode.rs

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,23 @@ pub fn git_tag<'a, E: ParserError<&'a [u8]> + AddContext<&'a [u8], StrContext>>(
1919
.context(StrContext::Expected("type <object kind>".into())),
2020
(|i: &mut _| parse::header_field(i, b"tag", take_while(1.., |b| b != NL[0])))
2121
.context(StrContext::Expected("tag <version>".into())),
22-
opt(|i: &mut _| parse::header_field(i, b"tagger", parse::signature))
23-
.context(StrContext::Expected("tagger <signature>".into())),
22+
opt(|i: &mut _| {
23+
parse::header_field(i, b"tagger", parse::signature_with_raw).map(|(signature, raw)| {
24+
let _ = signature;
25+
raw
26+
})
27+
})
28+
.context(StrContext::Expected("tagger <signature>".into())),
2429
terminated(message, eof),
2530
)
26-
.map(
27-
|(target, kind, tag_version, signature, (message, pgp_signature))| TagRef {
28-
target,
29-
name: tag_version.as_bstr(),
30-
target_kind: kind,
31-
message,
32-
tagger: signature,
33-
pgp_signature,
34-
},
35-
)
31+
.map(|(target, kind, tag_version, tagger, (message, pgp_signature))| TagRef {
32+
target,
33+
name: tag_version.as_bstr(),
34+
target_kind: kind,
35+
message,
36+
tagger,
37+
pgp_signature,
38+
})
3639
.parse_next(i)
3740
}
3841

gix-object/src/tag/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,13 @@ impl<'a> TagRef<'a> {
2424
gix_hash::ObjectId::from_hex(self.target).expect("prior validation")
2525
}
2626

27+
/// Return the tagger, if present.
28+
pub fn tagger(&self) -> Option<gix_actor::SignatureRef<'a>> {
29+
self.tagger.map(|raw| {
30+
gix_actor::SignatureRef::from_bytes::<()>(raw.as_ref()).expect("tagger was validated during parsing")
31+
})
32+
}
33+
2734
/// Copy all data into a fully-owned instance.
2835
pub fn into_owned(self) -> crate::Tag {
2936
self.into()

gix-object/src/tag/write.rs

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,27 @@
11
use std::io;
22

3-
use bstr::BStr;
3+
use bstr::{BStr, ByteSlice};
44
use gix_date::parse::TimeBuf;
55

66
use crate::{encode, encode::NL, Kind, Tag, TagRef};
77

8+
fn parse_signature(raw: &BStr) -> gix_actor::SignatureRef<'_> {
9+
gix_actor::SignatureRef::from_bytes::<()>(raw.as_ref()).expect("signatures were validated during parsing")
10+
}
11+
12+
fn signature_requires_raw(raw: &BStr) -> bool {
13+
let signature = parse_signature(raw);
14+
signature.name.find_byteset(b"<>\n").is_some() || signature.email.find_byteset(b"<>\n").is_some()
15+
}
16+
17+
fn signature_len(raw: &BStr) -> usize {
18+
if signature_requires_raw(raw) {
19+
raw.len()
20+
} else {
21+
parse_signature(raw).size()
22+
}
23+
}
24+
825
/// An Error used in [`Tag::write_to()`][crate::WriteTo::write_to()].
926
#[derive(Debug, thiserror::Error)]
1027
#[allow(missing_docs)]
@@ -64,8 +81,13 @@ impl crate::WriteTo for TagRef<'_> {
6481
encode::trusted_header_field(b"object", self.target, &mut out)?;
6582
encode::trusted_header_field(b"type", self.target_kind.as_bytes(), &mut out)?;
6683
encode::header_field(b"tag", validated_name(self.name)?, &mut out)?;
67-
if let Some(tagger) = &self.tagger {
68-
encode::trusted_header_signature(b"tagger", tagger, &mut out)?;
84+
if let Some(tagger) = self.tagger {
85+
if signature_requires_raw(tagger) {
86+
encode::trusted_header_field(b"tagger", tagger.as_ref(), &mut out)?;
87+
} else {
88+
let signature = parse_signature(tagger);
89+
encode::trusted_header_signature(b"tagger", &signature, &mut out)?;
90+
}
6991
}
7092

7193
if !self.message.iter().all(|b| *b == b'\n') {
@@ -89,8 +111,7 @@ impl crate::WriteTo for TagRef<'_> {
89111
+ b"tag".len() + 1 /* space */ + self.name.len() + 1 /* nl */
90112
+ self
91113
.tagger
92-
.as_ref()
93-
.map_or(0, |t| b"tagger".len() + 1 /* space */ + t.size() + 1 /* nl */)
114+
.map_or(0, |raw| b"tagger".len() + 1 /* space */ + signature_len(raw) + 1 /* nl */)
94115
+ if self.message.iter().all(|b| *b == b'\n') { 0 } else { 1 /* nl */ } + self.message.len()
95116
+ self.pgp_signature.as_ref().map_or(0, |m| 1 /* nl */ + m.len())) as u64
96117
}

0 commit comments

Comments
 (0)