Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ clevertest
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock

*.idea
# These are backup files generated by rustfmt
**/*.rs.bk

Expand Down
11 changes: 11 additions & 0 deletions .idea/cleverlib.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,4 @@ regex = "1.11.0"
rayon = "1.10.0"
rayon-progress = "1.0.0"
indicatif = { version = "0.18.0", features = ["rayon"] }
thiserror = "2.0"
8 changes: 8 additions & 0 deletions cleverlibtest/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
[package]
name = "cleverlibtest"
version = "0.1.0"
edition = "2021"

[dependencies]
cleverlib = { path = "../" }
clap = { version = "4.0", features = ["derive"] }
166 changes: 166 additions & 0 deletions cleverlibtest/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
use cleverlib::clefparser::{ClefParser, ClefParserSettings};
use std::fs;

fn generate_test_entries(count: usize) -> Vec<String> {
let levels = ["Error", "Warning", "Info", "Debug"];
let messages = [
"File upload failed for user {UserId}",
"Database connection timeout for {Service}",
"User {Username} logged in successfully",
"Cache miss for key {CacheKey}",
"API request failed with status {StatusCode}",
];
let exceptions = [
"IOException: Insufficient disk space\\n at FileService.Upload()",
"SqlTimeoutException: Connection timeout\\n at Database.Connect()",
"NullPointerException: Object reference not set\\n at Service.Process()",
"ArgumentException: Invalid parameter\\n at Validator.Check()",
];

(0..count).map(|i| {
let level = levels[i % levels.len()];
let message = messages[i % messages.len()];
let exception = if i % 3 == 0 { format!(r#","@x":"{}""#, exceptions[i % exceptions.len()]) } else { String::new() };
let user_id = format!("user_{}", 100 + i);
let timestamp = format!("2024-12-28T10:15:{:02}.789Z", 16 + (i % 44));

format!(
r#"{{"@t":"{}","@l":"{}","@mt":"{}","UserId":"{}","@props":{{"FileName":"document_{}.pdf","FileSize":{}}}{}}}"#,
timestamp, level, message, user_id, i, 1024000 + i * 1000, exception
)
}).collect()
}

fn main() {
println!("Testing cleverlib ClefParser chunking functionality...");

// Create a test file with CLEF entries
let test_file = "test_clef_entries.log";
let test_entries = generate_test_entries(100);

// Write test entries to file
let content = test_entries.join("\n");
if let Err(e) = fs::write(test_file, content) {
eprintln!("Failed to create test file: {}", e);
return;
}

println!("Created test file: {}", test_file);
println!("Total entries: {}", test_entries.len());

// Test ClefParser with custom settings - calling get_next_chunk 4 times
println!("\nTesting ClefParser with custom settings - calling get_next_chunk 4 times...");
let settings = ClefParserSettings {
chunk_size: 25,
ignore_errors: true,
};

match ClefParser::new(test_file, 25, settings.clone()) {
Ok(mut parser) => {
println!("✓ ClefParser created (chunk_size: 2)");
let mut total_events = 0;

let mut call_num = 0;
loop {
call_num += 1;
println!("\n--- Call {} to get_next_chunk ---", call_num);

if let Some(chunk) = parser.get_next_chunk() {
total_events += chunk.len();
println!(
"✓ Retrieved chunk with {} events (chunk size: {} bytes)",
chunk.len(),
std::mem::size_of_val(&chunk)
);
println!(" Total events processed so far: {}", total_events);
println!(
" Cached chunks in VecDeque: {}",
parser.cached_chunks_count()
);
} else {
println!("✗ No more chunks available");
println!(" Total events processed: {}", total_events);
println!(
" Cached chunks in VecDeque: {}",
parser.cached_chunks_count()
);
println!(" VecDeque contents:");
for (i, cached_chunk) in parser.cached_chunks.iter().enumerate() {
println!(
" Cached Chunk {}: {} events (size: {} bytes)",
i,
cached_chunk.len(),
std::mem::size_of_val(cached_chunk)
);
}
break;
}
}

println!("\n=== Testing get_previous_chunk ===");
println!(
"Current tail position after forward navigation: {}",
parser.tail
);

for call_num in 1..=4 {
println!("\n--- Call {} to get_previous_chunk ---", call_num);

if let Some(chunk) = parser.get_previous_chunk() {
println!(
"✓ Retrieved previous chunk with {} events (chunk size: {} bytes)",
chunk.len(),
std::mem::size_of_val(&chunk)
);
println!(" Current tail position: {}", parser.tail);
println!(
" Cached chunks in VecDeque: {}",
parser.cached_chunks_count()
);

println!(" VecDeque contents:");
for (i, cached_chunk) in parser.cached_chunks.iter().enumerate() {
println!(
" Cached Chunk {}: {} events (size: {} bytes)",
i,
cached_chunk.len(),
std::mem::size_of_val(cached_chunk)
);
}

println!(
" Current previous chunk: {} events (size: {} bytes)",
chunk.len(),
std::mem::size_of_val(&chunk)
);
} else {
println!("✗ No previous chunks available");
println!(" Current tail position: {}", parser.tail);
println!(
" Cached chunks in VecDeque: {}",
parser.cached_chunks_count()
);
}
}

println!("\n=== Final Summary ===");
println!("Total lines in file: {}", parser.line_count);
println!("Total events processed: {}", total_events);
println!("Final tail position: {}", parser.tail);
println!(
"Final cached chunks in VecDeque: {}",
parser.cached_chunks_count()
);
}
Err(e) => {
println!("✗ Failed to create ClefParser: {}", e);
}
}

// Clean up test file
if let Err(e) = fs::remove_file(test_file) {
eprintln!("Warning: Failed to remove test file: {}", e);
}

println!("\nClefParser chunking test completed!");
}
185 changes: 185 additions & 0 deletions src/clefparser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
use std::{
collections::VecDeque,
fs::File,
io::{BufRead, BufReader},
};

use crate::{
clever_parser_options::CleverParserOptions,
errors::{ClefParserError, ClefParserResult},
event::Event,
event_collection::EventCollection,
};

pub struct ClefParser<'a> {
file: File,
pub line_count: usize,
path: &'a str,
settings: ClefParserSettings,
pub cached_chunks: VecDeque<Vec<Event>>,
pub tail: usize,
chunk_size: usize,
}

impl Clone for ClefParserSettings {
fn clone(&self) -> Self {
Self {
chunk_size: self.chunk_size,
ignore_errors: self.ignore_errors,
}
}
}
pub struct ClefParserSettings {
pub chunk_size: usize,
pub ignore_errors: bool,
}
impl<'a> ClefParser<'a> {
pub fn new_with_defaults(path: &str) -> ClefParserResult<ClefParser> {
let file = File::open(path).map_err(|e| ClefParserError::FileOpenError {
path: path.to_string(),
source: e,
})?;
let buffer = BufReader::new(&file);
let line_count = buffer.lines().count();
Ok(ClefParser {
path,
file,
cached_chunks: VecDeque::new(),
tail: 1,
line_count,
chunk_size: 1,
settings: ClefParserSettings {
chunk_size: 500,
ignore_errors: true,
},
})
}

pub fn new(
path: &str,
chunk_size: usize,
parser_settings: ClefParserSettings,
) -> ClefParserResult<ClefParser> {
if chunk_size == 0 {
return Err(ClefParserError::InvalidChunkSize { size: chunk_size });
}
let file = File::open(path).map_err(|e| ClefParserError::FileOpenError {
path: path.to_string(),
source: e,
})?;
let buffer = BufReader::new(&file);
let line_count = buffer.lines().count();
println!("Maxlines: {}", &line_count);
Ok(ClefParser {
path,
file,
line_count,
cached_chunks: VecDeque::new(),
chunk_size,
tail: 1,
settings: parser_settings,
})
}

pub fn get_next_chunk(&mut self) -> ClefParserResult<Option<Vec<Event>>> {
if self.tail > self.line_count {
return Ok(None);
}

println!(
"Reading from line {} (chunk_size: {})",
self.tail, self.chunk_size
);

let file = File::open(self.path).map_err(|e| ClefParserError::FileOpenError {
path: self.path.to_string(),
source: e,
})?;
let reader = BufReader::new(file);
let lines: Vec<String> = reader
.lines()
.skip(self.tail - 1) // Convert 1-based to 0-based
.take(self.chunk_size)
.filter_map(|line| line.ok())
.collect();

println!("Read {} lines", lines.len());

if lines.is_empty() {
return Ok(None);
}

let events = EventCollection::create(
&lines,
Some(&CleverParserOptions {
debug: Some(false),
ignore_errors: Some(true),
}),
)?;

// Add to cached_chunks and maintain max 3 chunks
self.cached_chunks.push_back(events.events.clone());
if self.cached_chunks.len() > 3 {
self.cached_chunks.pop_front();
}

self.tail += self.chunk_size;
Ok(Some(events.events))
}

pub fn cached_chunks_count(&self) -> usize {
self.cached_chunks.len()
}
pub fn get_previous_chunk(&mut self) -> ClefParserResult<Option<Vec<Event>>> {
if self.tail <= 1 {
return Ok(None);
}

// Move backward by chunk_size
let new_tail = if self.tail > self.chunk_size {
self.tail - self.chunk_size
} else {
1
};

println!(
"Reading previous chunk from line {} (chunk_size: {})",
new_tail, self.chunk_size
);

let file = File::open(self.path).map_err(|e| ClefParserError::FileOpenError {
path: self.path.to_string(),
source: e,
})?;
let reader = BufReader::new(file);
let lines: Vec<String> = reader
.lines()
.skip(new_tail - 1) // Convert 1-based to 0-based
.take(self.chunk_size)
.filter_map(|line| line.ok())
.collect();

println!("Read {} lines", lines.len());

if lines.is_empty() {
return Ok(None);
}

let events = EventCollection::create(
&lines,
Some(&CleverParserOptions {
debug: Some(false),
ignore_errors: Some(true),
}),
)?;

// Add to cached_chunks and maintain max 3 chunks
self.cached_chunks.push_back(events.events.clone());
if self.cached_chunks.len() > 3 {
self.cached_chunks.pop_front();
}

self.tail = new_tail;
Ok(Some(events.events))
}
}
Loading
Loading