Coverage for src/ss_utils_safe_python/snake_case.py: 100%

11 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-01 18:15 +0800

1import re 

2import unicodedata 

3 

4 

5def to_snake_case(text: str) -> str: 

6 """ 

7 Convert any string to snake_case format. 

8 

9 This function handles various input formats including: 

10 - CamelCase / PascalCase 

11 - kebab-case 

12 - Title Case 

13 - UPPER CASE 

14 - Mixed formats 

15 - Special characters and numbers 

16 - Unicode characters 

17 

18 Args: 

19 text (str): The input string to convert 

20 

21 Returns: 

22 str: The snake_case version of the input string 

23 

24 Examples: 

25 >>> to_snake_case("Executive Summary") 

26 'executive_summary' 

27 >>> to_snake_case("FinancialPerformance") 

28 'financial_performance' 

29 >>> to_snake_case("risk-assessment") 

30 'risk_assessment' 

31 >>> to_snake_case("MARKET ANALYSIS") 

32 'market_analysis' 

33 >>> to_snake_case("Section 1.2: Company Overview") 

34 'section_1_2_company_overview' 

35 >>> to_snake_case("ESG & Sustainability Report") 

36 'esg_sustainability_report' 

37 """ 

38 if not text or not isinstance(text, str): 

39 return "" 

40 

41 # Normalize unicode characters 

42 text = unicodedata.normalize("NFKD", text) 

43 

44 # Remove or replace common punctuation and special characters 

45 # Keep alphanumeric characters and some separators temporarily 

46 text = re.sub(r"[^\w\s\-_.]", " ", text) 

47 

48 # Handle CamelCase by inserting spaces before uppercase letters 

49 # This regex looks for lowercase followed by uppercase 

50 text = re.sub(r"([a-z0-9])([A-Z])", r"\1 \2", text) 

51 

52 # Replace multiple separators (spaces, hyphens, underscores, dots) with single space 

53 text = re.sub(r"[\s\-_.]+", " ", text) 

54 

55 # Split into words, filter empty strings, and convert to lowercase 

56 words = [word.lower() for word in text.split() if word] 

57 

58 # Join with underscores 

59 return "_".join(words)