Skip to content

Commit 2e2e2bb

Browse files
committed
nico: native danmaku downloader
1 parent 25f7986 commit 2e2e2bb

16 files changed

Lines changed: 1881 additions & 28 deletions

nico.py

Lines changed: 68 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,36 @@
66
from urllib.parse import urljoin, urlparse
77
from urllib.request import getproxies
88
from pathlib import Path
9-
109
import websocket
1110
from rich.console import Console
1211
from rich.table import Table
12+
1313
from util import download, dump_json, get, MyTime, requests_retry_session, safeify, to_jp_time, load_cookie
14+
from proto.dwango.nicolive.chat.service.edge import payload_pb2 as chat
15+
import google.protobuf.json_format
1416

1517
console = Console()
1618
print = console.print
1719

20+
# based on https://github.com/rinsuki-lab/ndgr-reader/blob/main/src/protobuf-stream-reader.ts
21+
def read_protobuf_message(data):
22+
offset = 0
23+
result = 0
24+
i = 0
25+
while True:
26+
if offset >= len(data):
27+
return None
28+
current = data[offset]
29+
result |= (current & 0x7F) << i
30+
offset += 1
31+
i += 7
32+
if not (current & 0x80):
33+
break
34+
if offset + result > len(data):
35+
return None
36+
return data[offset:offset + result]
37+
38+
1839
class NicoDownloader():
1940
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'}
2041

@@ -86,31 +107,43 @@ def fetch_page(self, url):
86107
live_data = json.loads(soup.select_one('#embedded-data')['data-props'])
87108
return live_data
88109

89-
def download_comments(self, video_id, output):
90-
import asyncio
91-
try:
92-
from ndgr_client import NDGRClient
93-
except ImportError:
94-
print('ERROR: Please install ndgr_client first.', style='bold red')
95-
print('You can install it by running: "pip install git+https://github.com/tsukumijima/NDGRClient"')
96-
return
97-
98-
async def download(video_id, output, cookies, verbose=False):
99-
ndgr_client = NDGRClient(video_id, verbose=verbose, console_output=True)
100-
await ndgr_client.login(cookies=cookies)
101-
102-
comments = await ndgr_client.downloadBackwardComments()
103-
comments_count = len(comments)
110+
def download_comments_native(self, message_server_info, output):
111+
view_uri = message_server_info['data']['viewUri']
112+
#"vposBaseTime": "2024-09-25T21:50:00+09:00",
113+
vpos_base_time_dt = datetime.strptime(message_server_info['data']['vposBaseTime'], '%Y-%m-%dT%H:%M:%S%z')
114+
vpos_base_time_epoch = int(vpos_base_time_dt.timestamp())
115+
print(f'vpos Base time: {vpos_base_time_dt} ({vpos_base_time_epoch})')
104116

105-
with output.open(mode='w', encoding='utf-8') as f:
106-
f.write('<?xml version="1.0" encoding="UTF-8"?>\n<packet>\n')
107-
f.write(NDGRClient.convertToXMLString(comments))
108-
f.write('\n</packet>\n')
109-
print(f'Total comments for {video_id}: {comments_count}')
110-
print(f'Saved to {output}.')
117+
at = 'now'
118+
backward_api_uri = None
119+
while True:
120+
url = f'{view_uri}?&at={at}'
121+
print(f'Fetch {url}')
122+
r = self.session.get(url, timeout=30)
123+
message = read_protobuf_message(r.content)
124+
chunked_entry = chat.ChunkedEntry()
125+
chunked_entry.ParseFromString(message)
126+
if chunked_entry.HasField('next'):
127+
at = chunked_entry.next.at
128+
elif chunked_entry.HasField('backward'):
129+
backward_api_uri = chunked_entry.backward.segment.uri
130+
break
131+
messages = []
132+
while True:
133+
print(f'Fetch {backward_api_uri}')
134+
r2 = self.session.get(backward_api_uri, timeout=30)
135+
packed_segment = chat.PackedSegment()
136+
packed_segment.ParseFromString(r2.content)
137+
# prepend to messages
138+
messages = [message for message in packed_segment.messages] + messages
139+
if packed_segment.HasField('next'):
140+
backward_api_uri = packed_segment.next.uri
141+
else:
142+
break
143+
print(f'Find {len(messages)} messages.')
144+
dump_json([google.protobuf.json_format.MessageToDict(message) for message in messages], output)
145+
# TODO: convert the json to a format that is compatible with nicoxml2ass
111146

112-
cookies = self.session.cookies.get_dict()
113-
asyncio.run(download(video_id, output, cookies))
114147

115148
def download_timeshift(self, url_or_video_id, info_only=False, comments='no', verbose=False, dump=False, auto_reserve=False):
116149
video_id, url, video_type = self._parse_url_or_video_id(url_or_video_id)
@@ -128,8 +161,8 @@ def download_timeshift(self, url_or_video_id, info_only=False, comments='no', ve
128161

129162
live_data = self.fetch_page(url)
130163
title = live_data['program']['title']
131-
end_time_epoch = live_data["program"]["endTime"]
132164
begin_time_epoch = live_data["program"]["beginTime"]
165+
end_time_epoch = live_data["program"]["endTime"]
133166
begin_time_dt = to_jp_time(datetime.fromtimestamp(begin_time_epoch))
134167
end_time_dt = to_jp_time(datetime.fromtimestamp(end_time_epoch))
135168

@@ -228,6 +261,7 @@ def download_timeshift(self, url_or_video_id, info_only=False, comments='no', ve
228261
verbose and print('Payload:', start_watching_payload)
229262
ws.send(json.dumps(start_watching_payload))
230263
stream_info = None
264+
message_server_info = None
231265

232266
while True:
233267
verbose and print("Receiving...")
@@ -236,20 +270,26 @@ def download_timeshift(self, url_or_video_id, info_only=False, comments='no', ve
236270
data = json.loads(result)
237271
if data['type'] == 'stream':
238272
stream_info = data
273+
elif data['type'] == 'messageServer':
274+
message_server_info = data
275+
if stream_info and message_server_info:
276+
print('Got all the info we needed. Close WS connection.')
239277
break
240278
ws.close()
241279

242280
if dump:
243281
dump_json(stream_info, self.save_dir / f'{filename}.streaminfo.json')
282+
dump_json(message_server_info, self.save_dir / f'{filename}.msgserverinfo.json')
244283
return_value.update({
245-
'stream_info': stream_info
284+
'stream_info': stream_info,
285+
'message_server_info': message_server_info
246286
})
247287

248288
ex = concurrent.futures.ThreadPoolExecutor(max_workers=1)
249289
if comments in ['yes', 'only']:
250290
print('Downloading comments...')
251-
danmaku_output = self.save_dir / f'{filename}.xml'
252-
ex.submit(self.download_comments, video_id, danmaku_output)
291+
danmaku_output = self.save_dir / f'{filename}.json'
292+
ex.submit(self.download_comments_native, message_server_info, end_time_dt, danmaku_output)
253293

254294
if comments == 'only':
255295
ex.shutdown(wait=True)

proto/__init__.py

Whitespace-only changes.

proto/dwango/nicolive/chat/data/atoms/moderator_pb2.py

Lines changed: 51 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)