forked from inventree/InvenTree
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathformat.py
More file actions
156 lines (105 loc) · 4.46 KB
/
format.py
File metadata and controls
156 lines (105 loc) · 4.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
"""Custom string formatting functions and helpers"""
import re
import string
from django.utils.translation import gettext_lazy as _
def parse_format_string(fmt_string: str) -> dict:
"""Extract formatting information from the provided format string.
Returns a dict object which contains structured information about the format groups
"""
groups = string.Formatter().parse(fmt_string)
info = {}
for group in groups:
# Skip any group which does not have a named value
if not group[1]:
continue
info[group[1]] = {
'format': group[1],
'prefix': group[0],
}
return info
def construct_format_regex(fmt_string: str) -> str:
r"""Construct a regular expression based on a provided format string
This function turns a python format string into a regular expression,
which can be used for two purposes:
- Ensure that a particular string matches the specified format
- Extract named variables from a matching string
This function also provides support for wildcard characters:
- '?' provides single character matching; is converted to a '.' (period) for regex
- '#' provides single digit matching; is converted to '\d'
Args:
fmt_string: A typical format string e.g. "PO-???-{ref:04d}"
Returns:
str: A regular expression pattern e.g. ^PO\-...\-(?P<ref>.*)$
Raises:
ValueError: Format string is invalid
"""
pattern = "^"
for group in string.Formatter().parse(fmt_string):
prefix = group[0] # Prefix (literal text appearing before this group)
name = group[1] # Name of this format variable
format = group[2] # Format specifier e.g :04d
rep = [
'+', '-', '.',
'{', '}', '(', ')',
'^', '$', '~', '!', '@', ':', ';', '|', '\'', '"',
]
# Escape any special regex characters
for ch in rep:
prefix = prefix.replace(ch, '\\' + ch)
# Replace ? with single-character match
prefix = prefix.replace('?', '.')
# Replace # with single-digit match
prefix = prefix.replace('#', r'\d')
pattern += prefix
# Add a named capture group for the format entry
if name:
# Check if integer values are requried
if format.endswith('d'):
chr = '\d'
else:
chr = '.'
# Specify width
# TODO: Introspect required width
w = '+'
pattern += f"(?P<{name}>{chr}{w})"
pattern += "$"
return pattern
def validate_string(value: str, fmt_string: str) -> str:
"""Validate that the provided string matches the specified format.
Args:
value: The string to be tested e.g. 'SO-1234-ABC',
fmt_string: The required format e.g. 'SO-{ref}-???',
Returns:
bool: True if the value matches the required format, else False
Raises:
ValueError: The provided format string is invalid
"""
pattern = construct_format_regex(fmt_string)
result = re.match(pattern, value)
return result is not None
def extract_named_group(name: str, value: str, fmt_string: str) -> str:
"""Extract a named value from the provided string, given the provided format string
Args:
name: Name of group to extract e.g. 'ref'
value: Raw string e.g. 'PO-ABC-1234'
fmt_string: Format pattern e.g. 'PO-???-{ref}
Returns:
str: String value of the named group
Raises:
ValueError: format string is incorrectly specified, or provided value does not match format string
NameError: named value does not exist in the format string
IndexError: named value could not be found in the provided entry
"""
info = parse_format_string(fmt_string)
if name not in info.keys():
raise NameError(_(f"Value '{name}' does not appear in pattern format"))
# Construct a regular expression for matching against the provided format string
# Note: This will raise a ValueError if 'fmt_string' is incorrectly specified
pattern = construct_format_regex(fmt_string)
# Run the regex matcher against the raw string
result = re.match(pattern, value)
if not result:
raise ValueError(_("Provided value does not match required pattern: ") + fmt_string)
# And return the value we are interested in
# Note: This will raise an IndexError if the named group was not matched
return result.group(name)