Skip to content

Commit 32bbe1b

Browse files
authored
Upgrading dependencies. (#1801)
1 parent 89dc9cb commit 32bbe1b

File tree

7 files changed

+17
-28
lines changed

7 files changed

+17
-28
lines changed

bindings/python/Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@ serde = { version = "1.0", features = ["rc", "derive"] }
1414
serde_json = "1.0"
1515
libc = "0.2"
1616
env_logger = "0.11"
17-
pyo3 = { version = "0.24.2", features = ["abi3", "abi3-py39", "py-clone"] }
18-
numpy = "0.24"
17+
pyo3 = { version = "0.25", features = ["abi3", "abi3-py39", "py-clone"] }
18+
numpy = "0.25"
1919
ndarray = "0.16"
20-
itertools = "0.12"
20+
itertools = "0.14"
2121

2222
[dependencies.tokenizers]
2323
path = "../../tokenizers"
2424

2525
[dev-dependencies]
2626
tempfile = "3.10"
27-
pyo3 = { version = "0.24.2", features = ["auto-initialize"] }
27+
pyo3 = { version = "0.25", features = ["auto-initialize"] }
2828

2929
[features]
3030
default = ["pyo3/extension-module"]

tokenizers/Cargo.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ name = "llama3_benchmark"
4141
harness = false
4242

4343
[dependencies]
44-
rand = "0.8"
44+
rand = "0.9"
4545
onig = { version = "6.5.1", default-features = false, optional = true }
4646
regex = "1.10"
4747
regex-syntax = "0.8"
@@ -63,7 +63,7 @@ paste = "1.0.14"
6363
macro_rules_attribute = "0.2.0"
6464
thiserror = "2"
6565
fancy-regex = { version = "0.14", optional = true}
66-
getrandom = { version = "0.2.10" }
66+
getrandom = { version = "0.3" }
6767
esaxx-rs = { version = "0.1.10", default-features = false, features=[]}
6868
monostate = "0.1.12"
6969

@@ -72,11 +72,11 @@ default = ["progressbar", "onig", "esaxx_fast"]
7272
esaxx_fast = ["esaxx-rs/cpp"]
7373
progressbar = ["indicatif"]
7474
http = ["hf-hub"]
75-
unstable_wasm = ["fancy-regex", "getrandom/js"]
75+
unstable_wasm = ["fancy-regex", "getrandom/wasm_js"]
7676
rustls-tls = ["hf-hub?/rustls-tls"]
7777

7878
[dev-dependencies]
79-
criterion = "0.5"
79+
criterion = "0.6"
8080
tempfile = "3.10"
8181
assert_approx_eq = "1.1"
8282
tracing = "0.1"

tokenizers/benches/common/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use std::time::{Duration, Instant};
22

3-
use criterion::black_box;
3+
use std::hint::black_box;
44

55
use tokenizers::{
66
Decoder, EncodeInput, Model, Normalizer, PostProcessor, PreTokenizer, TokenizerImpl, Trainer,

tokenizers/benches/layout_benchmark.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ use std::io::{BufRead, BufReader};
66
use std::path::Path;
77
use std::time::{Duration, Instant};
88

9-
use criterion::black_box;
109
use criterion::Criterion;
10+
use std::hint::black_box;
1111
use tokenizers::processors::template::TemplateProcessing;
1212
use tokenizers::{EncodeInput, Encoding, PostProcessor, Tokenizer};
1313

tokenizers/benches/llama3_benchmark.rs

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ mod common;
55

66
use common::{iter_bench_encode, iter_bench_encode_batch, iter_bench_train};
77
use criterion::{Criterion, Throughput};
8+
use std::hint::black_box;
89
use tokenizers::{
910
models::{bpe::BpeTrainerBuilder, TrainerWrapper},
1011
EncodeInput, Tokenizer,
@@ -32,7 +33,7 @@ pub fn llama3(c: &mut Criterion) {
3233
let add_special_tokens = false;
3334
b.iter(|| {
3435
tokenizer
35-
.encode_batch_char_offsets(criterion::black_box(data.clone()), add_special_tokens)
36+
.encode_batch_char_offsets(black_box(data.clone()), add_special_tokens)
3637
.unwrap()
3738
})
3839
});
@@ -42,15 +43,6 @@ pub fn llama3(c: &mut Criterion) {
4243
group.bench_function("llama3-batch", |b| {
4344
b.iter_custom(|iters| iter_bench_encode_batch(iters, &tokenizer, &batches))
4445
});
45-
// group.bench_function("llama3-nooffsets", |b| {
46-
// let data: Vec<_> = data.lines().collect();
47-
// let add_special_tokens = false;
48-
// b.iter(|| {
49-
// tokenizer
50-
// .encode_batch(criterion::black_box(data.clone()), add_special_tokens)
51-
// .unwrap()
52-
// })
53-
// });
5446
let mut trainer: TrainerWrapper = BpeTrainerBuilder::default()
5547
.show_progress(false)
5648
.build()

tokenizers/src/models/bpe/word.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use super::Pair;
2-
use rand::{thread_rng, Rng};
2+
use rand::{rng, Rng};
33
use std::cmp::Ordering;
44
use std::collections::{BinaryHeap, HashMap};
55

@@ -177,10 +177,7 @@ impl Word {
177177
);
178178

179179
while let Some(top) = queue.pop() {
180-
if dropout
181-
.map(|d| thread_rng().gen::<f32>() < d)
182-
.unwrap_or(false)
183-
{
180+
if dropout.map(|d| rng().random::<f32>() < d).unwrap_or(false) {
184181
skip.push(top);
185182
} else {
186183
// Re-insert the skipped elements

tokenizers/src/models/unigram/lattice.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
use rand::distributions::WeightedIndex;
2-
use rand::prelude::*;
1+
use rand::distr::weighted::WeightedIndex;
2+
use rand::{prelude::*, rng};
33
use std::cell::RefCell;
44
use std::cmp::{min, Ordering};
55
use std::collections::BinaryHeap;
@@ -397,7 +397,7 @@ impl<'a> Lattice<'a> {
397397
}
398398
}
399399

400-
let mut rng = thread_rng();
400+
let mut rng = rng();
401401
let mut results: Vec<NodeRef> = vec![];
402402
let mut probs: Vec<f64> = vec![];
403403
let mut z = alpha[self.eos_node().borrow().node_id];

0 commit comments

Comments
 (0)