Skip to content

Commit 2aefbfd

Browse files
Midblytedelivrance
authored andcommitted
Add better support for nested entities (both for HTML and Markdown) (pyrogram#297)
* Added better support for nested entities, both for HTML and Markdown * Tiny style fix * Make use of pre-defined constants
1 parent 82e0087 commit 2aefbfd

File tree

2 files changed

+56
-61
lines changed

2 files changed

+56
-61
lines changed

pyrogram/client/parser/html.py

Lines changed: 25 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -147,43 +147,38 @@ def parse(self, text: str):
147147
@staticmethod
148148
def unparse(text: str, entities: list):
149149
text = utils.add_surrogates(text)
150-
copy = text
150+
151+
entities_offsets = []
151152

152153
for entity in entities:
154+
entity_type = entity.type
153155
start = entity.offset
154156
end = start + entity.length
155157

156-
type = entity.type
157-
158-
url = entity.url
159-
user = entity.user
160-
161-
sub = copy[start:end]
162-
163-
if type == "bold":
164-
style = "b"
165-
elif type == "italic":
166-
style = "i"
167-
elif type == "underline":
168-
style = "u"
169-
elif type == "strike":
170-
style = "s"
171-
elif type == "code":
172-
style = "code"
173-
elif type == "pre":
174-
style = "pre"
175-
elif type == "blockquote":
176-
style = "blockquote"
177-
elif type == "text_link":
178-
text = text[:start] + text[start:].replace(sub, '<a href="{}">{}</a>'.format(url, sub), 1)
179-
continue
180-
elif type == "text_mention":
181-
text = text[:start] + text[start:].replace(
182-
sub, '<a href="tg://user?id={}">{}</a>'.format(user.id, sub), 1)
183-
continue
158+
if entity_type in ("bold", "italic", "underline", "strike"):
159+
start_tag = "<{}>".format(entity_type[0])
160+
end_tag = "</{}>".format(entity_type[0])
161+
elif entity_type in ("code", "pre", "blockquote"):
162+
start_tag = "<{}>".format(entity_type)
163+
end_tag = "</{}>".format(entity_type)
164+
elif entity_type == "text_link":
165+
url = entity.url
166+
start_tag = '<a href="{}">'.format(url)
167+
end_tag = "</a>"
168+
elif entity_type == "text_mention":
169+
user = entity.user
170+
start_tag = '<a href="tg://user?id={}">'.format(user.id)
171+
end_tag = "</a>"
184172
else:
185173
continue
186174

187-
text = text[:start] + text[start:].replace(sub, "<{0}>{1}</{0}>".format(style, sub), 1)
175+
entities_offsets.append((start_tag, start,))
176+
entities_offsets.append((end_tag, end,))
177+
178+
# sorting by offset (desc)
179+
entities_offsets.sort(key=lambda x: -x[1])
180+
181+
for entity, offset in entities_offsets:
182+
text = text[:offset] + entity + text[offset:]
188183

189184
return utils.remove_surrogates(text)

pyrogram/client/parser/markdown.py

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -107,44 +107,44 @@ def parse(self, text: str, strict: bool = False):
107107
@staticmethod
108108
def unparse(text: str, entities: list):
109109
text = utils.add_surrogates(text)
110-
copy = text
110+
111+
entities_offsets = []
111112

112113
for entity in entities:
114+
entity_type = entity.type
113115
start = entity.offset
114116
end = start + entity.length
115117

116-
type = entity.type
117-
118-
url = entity.url
119-
user = entity.user
120-
121-
sub = copy[start:end]
122-
123-
if type == "bold":
124-
style = BOLD_DELIM
125-
elif type == "italic":
126-
style = ITALIC_DELIM
127-
elif type == "underline":
128-
style = UNDERLINE_DELIM
129-
elif type == "strike":
130-
style = STRIKE_DELIM
131-
elif type == "code":
132-
style = CODE_DELIM
133-
elif type == "pre":
134-
style = PRE_DELIM
135-
# TODO: Blockquote for MD
136-
# elif type == "blockquote":
137-
# style = ...
138-
elif type == "text_link":
139-
text = text[:start] + text[start:].replace(sub, '[{1}]({0})'.format(url, sub), 1)
140-
continue
141-
elif type == "text_mention":
142-
text = text[:start] + text[start:].replace(
143-
sub, '[{1}](tg://user?id={0})'.format(user.id, sub), 1)
144-
continue
118+
if entity_type == "bold":
119+
start_tag = end_tag = BOLD_DELIM
120+
elif entity_type == "italic":
121+
start_tag = end_tag = ITALIC_DELIM
122+
elif entity_type == "underline":
123+
start_tag = end_tag = UNDERLINE_DELIM
124+
elif entity_type == "strike":
125+
start_tag = end_tag = STRIKE_DELIM
126+
elif entity_type == "code":
127+
start_tag = end_tag = CODE_DELIM
128+
elif entity_type in ("pre", "blockquote"):
129+
start_tag = end_tag = PRE_DELIM
130+
elif entity_type == "text_link":
131+
url = entity.url
132+
start_tag = "["
133+
end_tag = "]({})".format(url)
134+
elif entity_type == "text_mention":
135+
user = entity.user
136+
start_tag = "["
137+
end_tag = "](tg://user?id={})".format(user.id)
145138
else:
146139
continue
147140

148-
text = text[:start] + text[start:].replace(sub, "{0}{1}{0}".format(style, sub), 1)
141+
entities_offsets.append((start_tag, start,))
142+
entities_offsets.append((end_tag, end,))
143+
144+
# sorting by offset (desc)
145+
entities_offsets.sort(key=lambda x: -x[1])
146+
147+
for entity, offset in entities_offsets:
148+
text = text[:offset] + entity + text[offset:]
149149

150150
return utils.remove_surrogates(text)

0 commit comments

Comments
 (0)