Custom Validator Examples
Examples of creating and using custom validators for domain-specific validation logic.
Business Rules Validation
Validate business-specific constraints:
from typing import Protocol, Annotated
from pavise.pandas import DataFrame
from pavise.validators import Custom, Range
import pandas as pd
def is_business_day(date_value) -> bool:
"""Check if date is a business day (Monday-Friday)."""
return date_value.weekday() < 5
def is_positive(value) -> bool:
"""Check if value is positive."""
return value > 0
class FinancialDataSchema(Protocol):
date: Annotated[datetime.date, Custom(is_business_day, "must be a business day")]
amount: Annotated[float, Custom(is_positive, "must be positive")]
profit_margin: Annotated[float, Range(0.0, 1.0)]
# Validate financial data
df = pd.DataFrame({
"date": pd.to_datetime(["2024-01-01", "2024-01-02", "2024-01-03"]).date,
"amount": [1000.0, 2000.0, -500.0], # -500.0 will fail
"profit_margin": [0.15, 0.20, 0.18]
})
try:
validated_df = DataFrame[FinancialDataSchema](df)
except ValueError as e:
print(f"Validation failed: {e}")
Domain-Specific Validators
Create validators for domain-specific rules:
from typing import Protocol, Annotated
from pavise.validators import Custom
import re
def is_valid_isbn(isbn: str) -> bool:
"""Validate ISBN-10 or ISBN-13 format."""
isbn = isbn.replace("-", "").replace(" ", "")
if len(isbn) == 10:
return bool(re.match(r'^\d{9}[\dX]$', isbn))
elif len(isbn) == 13:
return bool(re.match(r'^\d{13}$', isbn))
return False
def is_valid_price(price: float) -> bool:
"""Price must be positive and have at most 2 decimal places."""
if price <= 0:
return False
return round(price, 2) == price
class BookSchema(Protocol):
isbn: Annotated[str, Custom(is_valid_isbn, "must be valid ISBN-10 or ISBN-13")]
title: str
price: Annotated[float, Custom(is_valid_price, "must be positive with max 2 decimals")]
Cross-Field Validation
Validate relationships between fields (note: this requires accessing the full row):
from typing import Protocol, Annotated
from pavise.pandas import DataFrame
from pavise.validators import Custom
import pandas as pd
def is_valid_discount(discount: float) -> bool:
"""Discount must be between 0% and 100%."""
return 0.0 <= discount <= 1.0
class ProductSchema(Protocol):
original_price: float
discount: Annotated[float, Custom(is_valid_discount, "must be between 0.0 and 1.0")]
final_price: float
# After validation, verify cross-field constraint manually
def validate_pricing(df: DataFrame[ProductSchema]) -> DataFrame[ProductSchema]:
expected_price = df["original_price"] * (1 - df["discount"])
if not (df["final_price"] == expected_price).all():
raise ValueError("final_price must equal original_price * (1 - discount)")
return df
df = pd.DataFrame({
"original_price": [100.0, 200.0],
"discount": [0.1, 0.2],
"final_price": [90.0, 160.0]
})
validated_df = DataFrame[ProductSchema](df)
validated_df = validate_pricing(validated_df)
Combining Multiple Custom Validators
Use multiple custom validators on a single field:
from typing import Protocol, Annotated
from pavise.validators import Custom, MinLen
def no_special_chars(value: str) -> bool:
"""Check if string contains only alphanumeric characters and spaces."""
return value.replace(" ", "").isalnum()
def no_leading_trailing_spaces(value: str) -> bool:
"""Check if string has no leading or trailing spaces."""
return value == value.strip()
class UserInputSchema(Protocol):
username: Annotated[
str,
MinLen(3),
Custom(no_special_chars, "must contain only letters, numbers, and spaces"),
Custom(no_leading_trailing_spaces, "must not have leading or trailing spaces")
]
Complex Validation Logic
Implement complex business logic in custom validators:
from typing import Protocol, Annotated
from pavise.validators import Custom
import re
def is_strong_password(password: str) -> bool:
"""
Validate password strength:
- At least 8 characters
- Contains uppercase and lowercase
- Contains at least one digit
- Contains at least one special character
"""
if len(password) < 8:
return False
if not re.search(r'[A-Z]', password):
return False
if not re.search(r'[a-z]', password):
return False
if not re.search(r'\d', password):
return False
if not re.search(r'[!@#$%^&*(),.?":{}|<>]', password):
return False
return True
def is_valid_email_domain(email: str) -> bool:
"""Only allow specific email domains."""
allowed_domains = ["example.com", "test.com"]
domain = email.split("@")[-1]
return domain in allowed_domains
class SecureUserSchema(Protocol):
email: Annotated[
str,
Custom(is_valid_email_domain, "must be from allowed domains")
]
password: Annotated[
str,
Custom(is_strong_password, "must meet password strength requirements")
]