[46c9de]: / src / preprocessing / __init__.py

Download this file

77 lines (67 with data), 1.8 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""
Preprocessing module for medical text processing
Contains classes and functions for text preprocessing and standardization
"""
from .preprocessing import (
# Main Classes
TextPreprocessor,
PreprocessingPipeline,
ProcessingResult,
PreprocessingStats,
PreprocessingContext,
# Preprocessor Classes
LengthNormalizer,
MedicalTermPreprocessor,
SpecialCharacterHandler,
DiseaseCategoryPreprocessor,
MedicalScorePreprocessor,
MedicalAbbreviationNormalizer,
MedicalTermStandardizer,
DomainSpecificStopwordHandler,
SpecialCharacterCleaner,
WhitespaceNormalizer,
MedicalTokenizer,
# Main Pipeline Creation Function
create_ordered_medical_pipeline
)
# Version info
__version__ = '1.0.0'
# Default export for easy importing
__all__ = [
# Main Classes
'TextPreprocessor',
'PreprocessingPipeline',
'ProcessingResult',
'PreprocessingStats',
'PreprocessingContext',
# Preprocessor Classes
'LengthNormalizer',
'MedicalTermPreprocessor',
'SpecialCharacterHandler',
'DiseaseCategoryPreprocessor',
'MedicalScorePreprocessor',
'MedicalAbbreviationNormalizer',
'MedicalTermStandardizer',
'DomainSpecificStopwordHandler',
'SpecialCharacterCleaner',
'WhitespaceNormalizer',
'MedicalTokenizer',
# Functions
'create_ordered_medical_pipeline'
]
# Example usage documentation
'''
Example Usage:
from preprocessing import create_ordered_medical_pipeline
# Create pipeline
pipeline = create_ordered_medical_pipeline(
disease_category='ALS',
config={
'max_length': 5000,
'preserve_case': True,
'include_scores': True
}
)
# Process text
processed_text = pipeline.process("Sample medical text...")
'''