forked from reactive-python/reactpy
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathutils.py
More file actions
307 lines (248 loc) · 10.5 KB
/
Copy pathutils.py
File metadata and controls
307 lines (248 loc) · 10.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
from __future__ import annotations
import re
from collections.abc import Iterable
from itertools import chain
from typing import Any, Callable, Generic, TypeVar, cast
from lxml import etree
from lxml.html import fromstring, tostring
from reactpy.core.types import VdomDict
from reactpy.core.vdom import vdom
_RefValue = TypeVar("_RefValue")
_ModelTransform = Callable[[VdomDict], Any]
_UNDEFINED: Any = object()
class Ref(Generic[_RefValue]):
"""Hold a reference to a value
This is used in imperative code to mutate the state of this object in order to
incur side effects. Generally refs should be avoided if possible, but sometimes
they are required.
Notes:
You can compare the contents for two ``Ref`` objects using the ``==`` operator.
"""
__slots__ = "current"
def __init__(self, initial_value: _RefValue = _UNDEFINED) -> None:
if initial_value is not _UNDEFINED:
self.current = initial_value
"""The present value"""
def set_current(self, new: _RefValue) -> _RefValue:
"""Set the current value and return what is now the old value
This is nice to use in ``lambda`` functions.
"""
old = self.current
self.current = new
return old
def __eq__(self, other: Any) -> bool:
try:
return isinstance(other, Ref) and (other.current == self.current)
except AttributeError:
# attribute error occurs for uninitialized refs
return False
def __repr__(self) -> str:
try:
current = repr(self.current)
except AttributeError:
# attribute error occurs for uninitialized refs
current = "<undefined>"
return f"{type(self).__name__}({current})"
def vdom_to_html(vdom: VdomDict) -> str:
"""Convert a VDOM dictionary into an HTML string
Only the following keys are translated to HTML:
- ``tagName``
- ``attributes``
- ``children`` (must be strings or more VDOM dicts)
Parameters:
vdom: The VdomDict element to convert to HTML
"""
temp_root = etree.Element("__temp__")
_add_vdom_to_etree(temp_root, vdom)
html = cast(bytes, tostring(temp_root)).decode()
# strip out temp root <__temp__> element
return html[10:-11]
def html_to_vdom(
html: str, *transforms: _ModelTransform, strict: bool = True
) -> VdomDict:
"""Transform HTML into a DOM model. Unique keys can be provided to HTML elements
using a ``key=...`` attribute within your HTML tag.
Parameters:
html:
The raw HTML as a string
transforms:
Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
dictionary which will be replaced by ``new``. For example, you could use a
transform function to add highlighting to a ``<code/>`` block.
strict:
If ``True``, raise an exception if the HTML does not perfectly follow HTML5
syntax.
"""
if not isinstance(html, str): # nocov
msg = f"Expected html to be a string, not {type(html).__name__}"
raise TypeError(msg)
# If the user provided a string, convert it to a list of lxml.etree nodes
try:
root_node: etree._Element = fromstring(
html.strip(),
parser=etree.HTMLParser(
remove_comments=True,
remove_pis=True,
remove_blank_text=True,
recover=not strict,
),
)
except etree.XMLSyntaxError as e:
if not strict:
raise e # nocov
msg = "An error has occurred while parsing the HTML.\n\nThis HTML may be malformatted, or may not perfectly adhere to HTML5.\nIf you believe the exception above was due to something intentional, you can disable the strict parameter on html_to_vdom().\nOtherwise, repair your broken HTML and try again."
raise HTMLParseError(msg) from e
return _etree_to_vdom(root_node, transforms)
class HTMLParseError(etree.LxmlSyntaxError): # type: ignore[misc]
"""Raised when an HTML document cannot be parsed using strict parsing."""
def _etree_to_vdom(
node: etree._Element, transforms: Iterable[_ModelTransform]
) -> VdomDict:
"""Transform an lxml etree node into a DOM model
Parameters:
node:
The ``lxml.etree._Element`` node
transforms:
Functions of the form ``transform(old) -> new`` where ``old`` is a VDOM
dictionary which will be replaced by ``new``. For example, you could use a
transform function to add highlighting to a ``<code/>`` block.
"""
if not isinstance(node, etree._Element): # nocov
msg = f"Expected node to be a etree._Element, not {type(node).__name__}"
raise TypeError(msg)
# Recursively call _etree_to_vdom() on all children
children = _generate_vdom_children(node, transforms)
# Convert the lxml node to a VDOM dict
el = vdom(node.tag, dict(node.items()), *children)
# Perform any necessary mutations on the VDOM attributes to meet VDOM spec
_mutate_vdom(el)
# Apply any provided transforms.
for transform in transforms:
el = transform(el)
return el
def _add_vdom_to_etree(parent: etree._Element, vdom: VdomDict | dict[str, Any]) -> None:
try:
tag = vdom["tagName"]
except KeyError as e:
msg = f"Expected a VDOM dict, not {vdom}"
raise TypeError(msg) from e
else:
vdom = cast(VdomDict, vdom)
if tag:
element = etree.SubElement(parent, tag)
element.attrib.update(
_vdom_attr_to_html_str(k, v) for k, v in vdom.get("attributes", {}).items()
)
else:
element = parent
for c in vdom.get("children", []):
if isinstance(c, dict):
_add_vdom_to_etree(element, c)
else:
"""
LXML handles string children by storing them under `text` and `tail`
attributes of Element objects. The `text` attribute, if present, effectively
becomes that element's first child. Then the `tail` attribute, if present,
becomes a sibling that follows that element. For example, consider the
following HTML:
<p><a>hello</a>world</p>
In this code sample, "hello" is the `text` attribute of the `<a>` element
and "world" is the `tail` attribute of that same `<a>` element. It's for
this reason that, depending on whether the element being constructed has
non-string a child element, we need to assign a `text` vs `tail` attribute
to that element or the last non-string child respectively.
"""
if len(element):
last_child = element[-1]
last_child.tail = f"{last_child.tail or ''}{c}"
else:
element.text = f"{element.text or ''}{c}"
def _mutate_vdom(vdom: VdomDict) -> None:
"""Performs any necessary mutations on the VDOM attributes to meet VDOM spec.
Currently, this function only transforms the ``style`` attribute into a dictionary whose keys are
camelCase so as to be renderable by React.
This function may be extended in the future.
"""
# Determine if the style attribute needs to be converted to a dict
if (
"attributes" in vdom
and "style" in vdom["attributes"]
and isinstance(vdom["attributes"]["style"], str)
):
# Convince type checker that it's safe to mutate attributes
assert isinstance(vdom["attributes"], dict) # noqa: S101
# Convert style attribute from str -> dict with camelCase keys
vdom["attributes"]["style"] = {
key.strip().replace("-", "_"): value.strip()
for key, value in (
part.split(":", 1)
for part in vdom["attributes"]["style"].split(";")
if ":" in part
)
}
def _generate_vdom_children(
node: etree._Element, transforms: Iterable[_ModelTransform]
) -> list[VdomDict | str]:
"""Generates a list of VDOM children from an lxml node.
Inserts inner text and/or tail text in between VDOM children, if necessary.
"""
return ( # Get the inner text of the current node
[node.text] if node.text else []
) + list(
chain(
*(
# Recursively convert each child node to VDOM
[_etree_to_vdom(child, transforms)]
# Insert the tail text between each child node
+ ([child.tail] if child.tail else [])
for child in node.iterchildren(None)
)
)
)
def del_html_head_body_transform(vdom: VdomDict) -> VdomDict:
"""Transform intended for use with `html_to_vdom`.
Removes `<html>`, `<head>`, and `<body>` while preserving their children.
Parameters:
vdom:
The VDOM dictionary to transform.
"""
if vdom["tagName"] in {"html", "body", "head"}:
return {"tagName": "", "children": vdom["children"]}
return vdom
def _vdom_attr_to_html_str(key: str, value: Any) -> tuple[str, str]:
if key == "style":
if isinstance(value, dict):
value = ";".join(
# We lower only to normalize - CSS is case-insensitive:
# https://www.w3.org/TR/css-fonts-3/#font-family-casing
f"{_CAMEL_CASE_SUB_PATTERN.sub('-', k).lower()}:{v}"
for k, v in value.items()
)
elif (
# camel to data-* attributes
key.startswith("data_")
# camel to aria-* attributes
or key.startswith("aria_")
# handle special cases
or key in DASHED_HTML_ATTRS
):
key = key.replace("_", "-")
elif (
# camel to data-* attributes
key.startswith("data")
# camel to aria-* attributes
or key.startswith("aria")
# handle special cases
or key in DASHED_HTML_ATTRS
):
key = _CAMEL_CASE_SUB_PATTERN.sub("-", key)
if callable(value): # nocov
raise TypeError(f"Cannot convert callable attribute {key}={value} to HTML")
# Again, we lower the attribute name only to normalize - HTML is case-insensitive:
# http://w3c.github.io/html-reference/documents.html#case-insensitivity
return key.lower(), str(value)
# see list of HTML attributes with dashes in them:
# https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes#attribute_list
DASHED_HTML_ATTRS = {"accept_charset", "acceptCharset", "http_equiv", "httpEquiv"}
# Pattern for delimitting camelCase names (e.g. camelCase to camel-case)
_CAMEL_CASE_SUB_PATTERN = re.compile(r"(?<!^)(?=[A-Z])")