-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathserialize.py
More file actions
290 lines (232 loc) · 9.31 KB
/
serialize.py
File metadata and controls
290 lines (232 loc) · 9.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
import json
from copy import deepcopy
from typing import Any, Dict, List, Type, Union
from pydantic import BaseModel, validate_call
from jsondoc.models.block import Type as BlockType
from jsondoc.models.block.base import BlockBase
from jsondoc.models.block.types.bulleted_list_item import BulletedListItemBlock
from jsondoc.models.block.types.code import CodeBlock
from jsondoc.models.block.types.column import ColumnBlock
from jsondoc.models.block.types.column_list import ColumnListBlock
from jsondoc.models.block.types.divider import DividerBlock
from jsondoc.models.block.types.equation import EquationBlock
from jsondoc.models.block.types.heading_1 import Heading1Block
from jsondoc.models.block.types.heading_2 import Heading2Block
from jsondoc.models.block.types.heading_3 import Heading3Block
from jsondoc.models.block.types.image import ImageBlock
from jsondoc.models.block.types.image.external_image import ExternalImage
from jsondoc.models.block.types.image.file_image import FileImage
from jsondoc.models.block.types.numbered_list_item import NumberedListItemBlock
from jsondoc.models.block.types.paragraph import ParagraphBlock
from jsondoc.models.block.types.quote import QuoteBlock
from jsondoc.models.block.types.rich_text import Type as RichTextType
from jsondoc.models.block.types.rich_text.base import RichTextBase
from jsondoc.models.block.types.rich_text.equation import RichTextEquation
from jsondoc.models.block.types.rich_text.text import RichTextText
from jsondoc.models.block.types.table import TableBlock
from jsondoc.models.block.types.table_row import TableRowBlock
from jsondoc.models.block.types.to_do import ToDoBlock
from jsondoc.models.block.types.toggle import ToggleBlock
from jsondoc.models.file import Type as FileType
from jsondoc.models.file.base import FileBase
from jsondoc.models.page import Page
from jsondoc.utils import get_nested_value, set_nested_value
# Resolve block types
BLOCK_TYPES = {
BlockType.paragraph: ParagraphBlock,
BlockType.to_do: ToDoBlock,
BlockType.bulleted_list_item: BulletedListItemBlock,
BlockType.numbered_list_item: NumberedListItemBlock,
BlockType.code: CodeBlock,
BlockType.column: ColumnBlock,
BlockType.column_list: ColumnListBlock,
BlockType.divider: DividerBlock,
BlockType.equation: EquationBlock,
BlockType.heading_1: Heading1Block,
BlockType.heading_2: Heading2Block,
BlockType.heading_3: Heading3Block,
BlockType.image: ImageBlock,
BlockType.quote: QuoteBlock,
BlockType.equation_1: EquationBlock,
BlockType.table: TableBlock,
BlockType.table_row: TableRowBlock,
BlockType.toggle: ToggleBlock,
}
RICH_TEXT_TYPES = {
RichTextType.text: RichTextText,
RichTextType.equation: RichTextEquation,
}
IMAGE_FILE_TYPES = {
FileType.file: FileImage,
FileType.external: ExternalImage,
}
OTHER_RICH_TEXT_FIELDS = {
BlockType.code: [".code.caption"],
BlockType.image: [".image.caption"],
}
# Cell fields are nested lists of rich texts
NESTED_RICH_TEXT_FIELDS = {
BlockType.table_row: [".table_row.cells"],
}
def load_rich_text(obj: Union[str, Dict[str, Any]]) -> Type[RichTextBase]:
obj = deepcopy(obj)
if isinstance(obj, str):
obj = json.loads(obj)
mutable_obj = dict(obj)
# Extract and validate rich text type
current_type = obj["type"]
try:
current_type = RichTextType(current_type)
except ValueError:
raise ValueError(f"Unsupported rich text type: {current_type}")
# Find the corresponding rich text class
rich_text_instantiator = RICH_TEXT_TYPES.get(current_type)
if rich_text_instantiator is None:
raise ValueError(f"Unsupported rich text type: {current_type}")
# Create the rich text with all properties
rich_text = rich_text_instantiator(**mutable_obj)
return rich_text
def load_image(obj: Union[str, Dict[str, Any]]) -> Type[FileBase]:
"""
For some reason, the file's type field is at the parent level of the file object.
{
"type": "file",
"file": {
"url": "...",
"expiry_time": "2024-08-09T13:15:57.161Z",
},
}
For that reason, we input the type as a separate argument.
"""
obj = deepcopy(obj)
if isinstance(obj, str):
obj = json.loads(obj)
mutable_obj = dict(obj)
try:
current_type = mutable_obj.get("type")
current_type = FileType(current_type)
except ValueError:
raise ValueError(f"Unsupported file type: {current_type}")
# Find the corresponding file class
file_instantiator = IMAGE_FILE_TYPES.get(current_type)
# Create the file with all properties
file = file_instantiator(**mutable_obj)
return file
@validate_call
def load_block(obj: Union[str, Dict[str, Any]]) -> Type[BlockBase]:
obj = deepcopy(obj)
if isinstance(obj, str):
obj = json.loads(obj)
mutable_obj = dict(obj)
children = obj.pop("children", None)
# Extract and validate block type
current_type = obj["type"]
try:
current_type = BlockType(current_type)
except ValueError:
raise ValueError(f"Unsupported block type: {current_type}")
# Find the corresponding block class
block_instantiator = BLOCK_TYPES.get(current_type)
if block_instantiator is None:
raise ValueError(f"Unsupported block type: {current_type}")
# Create a mutable copy of the object properties
if children is not None:
# Process children recursively
processed_children = [load_block(child) for child in children]
# Add processed children to the mutable object
mutable_obj["children"] = processed_children
# Process rich text
# First get sub object field
obj_field_key = current_type.value
obj_field = mutable_obj.get(obj_field_key, {})
# Check if there is a rich text field
if "rich_text" in obj_field:
# Must be a list
if not isinstance(obj_field["rich_text"], list):
raise ValueError(
f"Rich text field must be a list: {obj_field['rich_text']}"
)
# Load rich text field
rich_text = [load_rich_text(rich_text) for rich_text in obj_field["rich_text"]]
# Replace rich text field
obj_field["rich_text"] = rich_text
# Process caption field
if current_type in OTHER_RICH_TEXT_FIELDS:
rt_fields = OTHER_RICH_TEXT_FIELDS[current_type]
for rt_field in rt_fields:
val_ = get_nested_value(mutable_obj, rt_field)
if val_ is None:
continue
if not isinstance(val_, list):
raise ValueError(f"Field {rt_field} must be a list: {val_}")
new_val = [load_rich_text(rich_text) for rich_text in val_]
set_nested_value(mutable_obj, rt_field, new_val)
# Process cell field
if current_type in NESTED_RICH_TEXT_FIELDS:
rt_fields = NESTED_RICH_TEXT_FIELDS[current_type]
for rt_field in rt_fields:
val_ = get_nested_value(mutable_obj, rt_field)
if not isinstance(val_, list):
raise ValueError(f"Field {rt_field} must be a list: {val_}")
new_val = []
for row in val_:
new_row = [load_rich_text(rich_text) for rich_text in row]
new_val.append(new_row)
set_nested_value(mutable_obj, rt_field, new_val)
# Process image field
if current_type == BlockType.image:
file_field = ".image"
val_ = get_nested_value(mutable_obj, file_field)
new_val = load_image(val_)
set_nested_value(mutable_obj, file_field, new_val)
# Create the block with all properties, including processed children
block = block_instantiator(**mutable_obj)
return block
@validate_call
def load_page(obj: Union[str, Dict[str, Any]]) -> Page:
obj = deepcopy(obj)
if isinstance(obj, str):
obj = json.loads(obj)
mutable_obj = dict(obj)
children = mutable_obj.pop("children", [])
mutable_obj["children"] = [load_block(child) for child in children]
page = Page(**mutable_obj)
return page
@validate_call
def load_jsondoc(
obj: Union[str, Dict[str, Any], List[Dict[str, Any]]],
) -> Page | BlockBase | List[BlockBase]:
if isinstance(obj, str):
obj = json.loads(obj)
if isinstance(obj, list):
return [load_jsondoc(block) for block in obj]
object_ = obj.get("object")
if object_ == "page":
return load_page(obj)
elif object_ == "block":
return load_block(obj)
else:
raise ValueError("Invalid object: must be either 'page' or 'block'")
def base_model_dump_json(obj: BaseModel, indent: int | None = None) -> str:
return obj.model_dump_json(
serialize_as_any=True,
exclude_none=True,
indent=indent,
)
@validate_call
def jsondoc_dump_json(
obj: BlockBase | List[BlockBase] | Page,
indent: int | None = None,
) -> str:
"""
Serializes an input JSON-DOC object to a JSON string.
:param obj: JSON-DOC object to serialize (can be a single block, a list of blocks, or a page)
:param indent: Indentation level for the JSON string
:return: JSON string
"""
if isinstance(obj, list):
strs = [base_model_dump_json(block, indent=indent) for block in obj]
dicts = [json.loads(s) for s in strs]
return json.dumps(dicts, indent=indent)
else:
return base_model_dump_json(obj, indent=indent)