Coverage for src/ss_utils_safe_python/snake_case.py: 100%
11 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-01 18:15 +0800
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-01 18:15 +0800
1import re
2import unicodedata
5def to_snake_case(text: str) -> str:
6 """
7 Convert any string to snake_case format.
9 This function handles various input formats including:
10 - CamelCase / PascalCase
11 - kebab-case
12 - Title Case
13 - UPPER CASE
14 - Mixed formats
15 - Special characters and numbers
16 - Unicode characters
18 Args:
19 text (str): The input string to convert
21 Returns:
22 str: The snake_case version of the input string
24 Examples:
25 >>> to_snake_case("Executive Summary")
26 'executive_summary'
27 >>> to_snake_case("FinancialPerformance")
28 'financial_performance'
29 >>> to_snake_case("risk-assessment")
30 'risk_assessment'
31 >>> to_snake_case("MARKET ANALYSIS")
32 'market_analysis'
33 >>> to_snake_case("Section 1.2: Company Overview")
34 'section_1_2_company_overview'
35 >>> to_snake_case("ESG & Sustainability Report")
36 'esg_sustainability_report'
37 """
38 if not text or not isinstance(text, str):
39 return ""
41 # Normalize unicode characters
42 text = unicodedata.normalize("NFKD", text)
44 # Remove or replace common punctuation and special characters
45 # Keep alphanumeric characters and some separators temporarily
46 text = re.sub(r"[^\w\s\-_.]", " ", text)
48 # Handle CamelCase by inserting spaces before uppercase letters
49 # This regex looks for lowercase followed by uppercase
50 text = re.sub(r"([a-z0-9])([A-Z])", r"\1 \2", text)
52 # Replace multiple separators (spaces, hyphens, underscores, dots) with single space
53 text = re.sub(r"[\s\-_.]+", " ", text)
55 # Split into words, filter empty strings, and convert to lowercase
56 words = [word.lower() for word in text.split() if word]
58 # Join with underscores
59 return "_".join(words)