Skip to content

Commit b9bed6f

Browse files
committed
Add benchmarks for escaping / unescaping text
closes #404
1 parent 7770b76 commit b9bed6f

File tree

4 files changed

+154
-15
lines changed

4 files changed

+154
-15
lines changed

Changelog.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@
103103
- [#393]: Added more tests for namespace resolver
104104
- [#393]: Added tests for reserved names (started with "xml"i) -- see <https://www.w3.org/TR/xml-names11/#xmlReserved>
105105
- [#363]: Add tests for `Reader::read_event_buffered` to ensure that proper events generated for corresponding inputs
106+
- [#407]: Improved benchmark suite to cover whole-document parsing, escaping and unescaping text
106107

107108
[#8]: https://github.com/Mingun/fast-xml/pull/8
108109
[#9]: https://github.com/Mingun/fast-xml/pull/9
@@ -115,6 +116,7 @@
115116
[#393]: https://github.com/tafia/quick-xml/pull/393
116117
[#395]: https://github.com/tafia/quick-xml/pull/395
117118
[#403]: https://github.com/tafia/quick-xml/pull/403
119+
[#407]: https://github.com/tafia/quick-xml/pull/407
118120

119121
## 0.23.0 -- 2022-05-08
120122

benches/macrobenches.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,13 @@ fn parse_document(doc: &[u8]) -> XmlResult<()> {
2626
for attr in e.attributes() {
2727
criterion::black_box(attr?.unescaped_value()?);
2828
}
29-
},
29+
}
3030
Event::Text(e) => {
3131
criterion::black_box(e.unescaped()?);
32-
},
32+
}
3333
Event::CData(e) => {
3434
criterion::black_box(e.into_inner());
35-
},
35+
}
3636
Event::End(_) => (),
3737
Event::Eof => break,
3838
_ => (),
@@ -131,7 +131,4 @@ pub fn bench_fully_parse_document(c: &mut Criterion) {
131131
group.finish();
132132
}
133133

134-
criterion_group!(
135-
benches,
136-
bench_fully_parse_document,
137-
);
134+
criterion_group!(benches, bench_fully_parse_document,);

benches/microbenches.rs

Lines changed: 146 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,28 @@
11
use criterion::{self, criterion_group, Criterion};
22
use pretty_assertions::assert_eq;
3+
use quick_xml::escape::{escape, unescape};
34
use quick_xml::events::Event;
45
use quick_xml::name::QName;
56
use quick_xml::Reader;
67

78
static SAMPLE: &[u8] = include_bytes!("../tests/documents/sample_rss.xml");
89
static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
910

11+
static LOREM_IPSUM_TEXT: &[u8] =
12+
b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt
13+
ut labore et dolore magna aliqua. Hac habitasse platea dictumst vestibulum rhoncus est pellentesque.
14+
Risus ultricies tristique nulla aliquet enim tortor at. Fermentum odio eu feugiat pretium nibh ipsum.
15+
Volutpat sed cras ornare arcu dui. Scelerisque fermentum dui faucibus in ornare quam. Arcu cursus
16+
euismod quis viverra nibh cras pulvinar mattis. Sed viverra tellus in hac habitasse platea. Quis
17+
commodo odio aenean sed. Cursus in hac habitasse platea dictumst quisque sagittis purus.
18+
19+
Neque convallis a cras semper auctor. Sit amet mauris commodo quis imperdiet massa. Ac ut consequat
20+
semper viverra nam libero justo laoreet sit. Adipiscing commodo elit at imperdiet dui accumsan.
21+
Enim lobortis scelerisque fermentum dui faucibus in ornare. Natoque penatibus et magnis dis parturient
22+
montes nascetur ridiculus mus. At lectus urna duis convallis convallis tellus id interdum. Libero
23+
volutpat sed cras ornare arcu dui vivamus arcu. Cursus in hac habitasse platea dictumst quisque sagittis
24+
purus. Consequat id porta nibh venenatis cras sed felis.";
25+
1026
/// Benchmarks the `Reader::read_event` function with all XML well-formless
1127
/// checks disabled (with and without trimming content of #text nodes)
1228
fn read_event(c: &mut Criterion) {
@@ -25,7 +41,10 @@ fn read_event(c: &mut Criterion) {
2541
}
2642
buf.clear();
2743
}
28-
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
44+
assert_eq!(
45+
count, 1550,
46+
"Overall tag count in ./tests/documents/sample_rss.xml"
47+
);
2948
})
3049
});
3150

@@ -45,7 +64,10 @@ fn read_event(c: &mut Criterion) {
4564
}
4665
buf.clear();
4766
}
48-
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
67+
assert_eq!(
68+
count, 1550,
69+
"Overall tag count in ./tests/documents/sample_rss.xml"
70+
);
4971
});
5072
});
5173
group.finish();
@@ -70,7 +92,10 @@ fn read_namespaced_event(c: &mut Criterion) {
7092
}
7193
buf.clear();
7294
}
73-
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
95+
assert_eq!(
96+
count, 1550,
97+
"Overall tag count in ./tests/documents/sample_rss.xml"
98+
);
7499
});
75100
});
76101

@@ -91,7 +116,10 @@ fn read_namespaced_event(c: &mut Criterion) {
91116
}
92117
buf.clear();
93118
}
94-
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
119+
assert_eq!(
120+
count, 1550,
121+
"Overall tag count in ./tests/documents/sample_rss.xml"
122+
);
95123
});
96124
});
97125
group.finish();
@@ -117,7 +145,10 @@ fn bytes_text_unescaped(c: &mut Criterion) {
117145
}
118146
buf.clear();
119147
}
120-
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
148+
assert_eq!(
149+
count, 1550,
150+
"Overall tag count in ./tests/documents/sample_rss.xml"
151+
);
121152

122153
// Windows has \r\n instead of \n
123154
#[cfg(windows)]
@@ -152,7 +183,10 @@ fn bytes_text_unescaped(c: &mut Criterion) {
152183
}
153184
buf.clear();
154185
}
155-
assert_eq!(count, 1550, "Overall tag count in ./tests/documents/sample_rss.xml");
186+
assert_eq!(
187+
count, 1550,
188+
"Overall tag count in ./tests/documents/sample_rss.xml"
189+
);
156190

157191
// Windows has \r\n instead of \n
158192
#[cfg(windows)]
@@ -333,11 +367,116 @@ fn attributes(c: &mut Criterion) {
333367
group.finish();
334368
}
335369

370+
/// Benchmarks escaping text using XML rules
371+
fn escaping(c: &mut Criterion) {
372+
let mut group = c.benchmark_group("escape_text");
373+
374+
group.bench_function("no_chars_to_escape_long", |b| {
375+
b.iter(|| {
376+
criterion::black_box(escape(LOREM_IPSUM_TEXT));
377+
})
378+
});
379+
380+
group.bench_function("no_chars_to_escape_short", |b| {
381+
b.iter(|| {
382+
criterion::black_box(escape(b"just bit of text"));
383+
})
384+
});
385+
386+
group.bench_function("escaped_chars_short", |b| {
387+
b.iter(|| {
388+
criterion::black_box(escape(b"age > 72 && age < 21"));
389+
criterion::black_box(escape(b"\"what's that?\""));
390+
})
391+
});
392+
393+
group.bench_function("escaped_chars_long", |b| {
394+
let lorem_ipsum_with_escape_chars =
395+
b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt
396+
ut labore et dolore magna aliqua. & Hac habitasse platea dictumst vestibulum rhoncus est pellentesque.
397+
Risus ultricies tristique nulla aliquet enim tortor at. Fermentum odio eu feugiat pretium nibh ipsum.
398+
Volutpat sed cras ornare arcu dui. Scelerisque fermentum dui faucibus in ornare quam. Arcu cursus
399+
euismod quis< viverra nibh cras pulvinar mattis. Sed viverra tellus in hac habitasse platea. Quis
400+
commodo odio aenean sed. Cursus in hac habitasse platea dictumst quisque sagittis purus.
401+
402+
Neque convallis >a cras semper auctor. Sit amet mauris commodo quis imperdiet massa. Ac ut consequat
403+
semper viverra nam libero justo laoreet sit. 'Adipiscing' commodo elit at imperdiet dui accumsan.
404+
Enim lobortis scelerisque fermentum dui faucibus in ornare. Natoque penatibus et magnis dis parturient
405+
montes nascetur ridiculus mus. At lectus urna duis convallis convallis tellus id interdum. Libero
406+
volutpat sed cras ornare arcu dui vivamus arcu. Cursus in hac habitasse platea dictumst quisque sagittis
407+
purus. Consequat id porta nibh venenatis cras sed felis.";
408+
409+
b.iter(|| {
410+
criterion::black_box(escape(lorem_ipsum_with_escape_chars));
411+
})
412+
});
413+
group.finish();
414+
}
415+
416+
/// Benchmarks unescaping text encoded using XML rules
417+
fn unescaping(c: &mut Criterion) {
418+
let mut group = c.benchmark_group("unescape_text");
419+
420+
group.bench_function("no_chars_to_unescape_long", |b| {
421+
b.iter(|| {
422+
criterion::black_box(unescape(LOREM_IPSUM_TEXT)).unwrap();
423+
})
424+
});
425+
426+
group.bench_function("no_chars_to_unescape_short", |b| {
427+
b.iter(|| {
428+
criterion::black_box(unescape(b"just a bit of text")).unwrap();
429+
})
430+
});
431+
432+
group.bench_function("char_reference", |b| {
433+
b.iter(|| {
434+
let text = b"prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;";
435+
criterion::black_box(unescape(text)).unwrap();
436+
let text = b"&#38;&#60;";
437+
criterion::black_box(unescape(text)).unwrap();
438+
})
439+
});
440+
441+
group.bench_function("entity_reference", |b| {
442+
b.iter(|| {
443+
let text = b"age &gt; 72 &amp;&amp; age &lt; 21";
444+
criterion::black_box(unescape(text)).unwrap();
445+
let text = b"&quot;what&apos;s that?&quot;";
446+
criterion::black_box(unescape(text)).unwrap();
447+
})
448+
});
449+
450+
group.bench_function("mixed", |b| {
451+
let text =
452+
b"Lorem ipsum dolor sit amet, &amp;consectetur adipiscing elit, sed do eiusmod tempor incididunt
453+
ut labore et dolore magna aliqua. Hac habitasse platea dictumst vestibulum rhoncus est pellentesque.
454+
Risus ultricies &quot;tristique nulla aliquet enim tortor&quot; at. Fermentum odio eu feugiat pretium
455+
nibh ipsum. Volutpat sed cras ornare arcu dui. Scelerisque fermentum dui faucibus in ornare quam. Arcu
456+
cursus euismod quis &#60;viverra nibh cras pulvinar mattis. Sed viverra tellus in hac habitasse platea.
457+
Quis commodo odio aenean sed. Cursus in hac habitasse platea dictumst quisque sagittis purus.
458+
459+
Neque convallis a cras semper auctor. Sit amet mauris commodo quis imperdiet massa. Ac ut consequat
460+
semper viverra nam libero justo &#35; laoreet sit. Adipiscing commodo elit at imperdiet dui accumsan.
461+
Enim lobortis scelerisque fermentum dui faucibus in ornare. Natoque penatibus et magnis dis parturient
462+
montes nascetur ridiculus mus. At lectus urna &#33;duis convallis convallis tellus id interdum. Libero
463+
volutpat sed cras ornare arcu dui vivamus arcu. Cursus in hac habitasse platea dictumst quisque sagittis
464+
purus. Consequat id porta nibh venenatis cras sed felis.";
465+
466+
b.iter(|| {
467+
criterion::black_box(unescape(text)).unwrap();
468+
})
469+
});
470+
group.finish();
471+
}
472+
336473
criterion_group!(
337474
benches,
338475
read_event,
339476
bytes_text_unescaped,
340477
read_namespaced_event,
341478
one_event,
342-
attributes
479+
attributes,
480+
escaping,
481+
unescaping,
343482
);

tests/test.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,8 @@ fn line_score() {
264264
inning: String,
265265
}
266266

267-
let res: LineScoreData = quick_xml::de::from_str(include_str!("documents/linescore.xml")).unwrap();
267+
let res: LineScoreData =
268+
quick_xml::de::from_str(include_str!("documents/linescore.xml")).unwrap();
268269

269270
let expected = LineScoreData {
270271
game_pk: 239575,

0 commit comments

Comments
 (0)