Skip to main content

pd_cipher::pipeline::components

Standard components for token processing pipelines.

This module provides common, reusable components for token processing including normalization, validation, filtering, and transformation.

Structs

NormalizationComponent

Component for normalizing token content.

Performs common normalization operations including whitespace trimming, case normalization, and Unicode normalization.

Examples

use pd_cipher::pipeline::components::NormalizationComponent;
use pd_cipher::pipeline::Component;

let component = NormalizationComponent::new();
let tokens = vec![
" Hello World ".as_bytes().to_vec(),
"UPPERCASE".as_bytes().to_vec(),
];
let normalized = component.process(&tokens).unwrap();
pub struct NormalizationComponent

ValidationComponent

Component for validating tokens against size and content constraints.

Ensures tokens meet specified criteria before further processing.

Examples

use pd_cipher::pipeline::components::ValidationComponent;
use pd_cipher::pipeline::Component;

let component = ValidationComponent::new();
let tokens = vec![
"valid_token".as_bytes().to_vec(),
"another_valid_token".as_bytes().to_vec(),
];
let result = component.process(&tokens).unwrap();
pub struct ValidationComponent

FilterComponent

Component for filtering tokens based on various criteria.

Can filter tokens by size, content patterns, or custom predicates.

Examples

use pd_cipher::pipeline::components::FilterComponent;
use pd_cipher::pipeline::Component;

let component = FilterComponent::new()
.with_min_size(3)
.with_max_size(50);

let tokens = vec![
"a".as_bytes().to_vec(), // Too short
"valid".as_bytes().to_vec(), // Valid
"x".repeat(100).as_bytes().to_vec(), // Too long
];
let filtered = component.process(&tokens).unwrap();
assert_eq!(filtered.len(), 1); // Only "valid" remains
pub struct FilterComponent

TransformComponent

Component for transforming tokens using custom functions.

Provides a flexible way to apply custom transformations to tokens.

Examples

use pd_cipher::pipeline::components::TransformComponent;
use pd_cipher::pipeline::Component;

// Create a component that adds a prefix to each token
let component = TransformComponent::new("prefix", |token| {
let mut prefixed = b"PREFIX:".to_vec();
prefixed.extend_from_slice(token);
Ok(prefixed)
});

let tokens = vec!["test".as_bytes().to_vec()];
let transformed = component.process(&tokens).unwrap();
assert_eq!(transformed[0], b"PREFIX:test");
pub struct TransformComponent

Enums

CaseNormalization

Case transformation options for text normalization.

pub enum CaseNormalization {
None,
Lower,
Upper,
}