Skip to content

Commit 2c903b0

Browse files
authored
[AINode] Support register as system service (#17138)
1 parent 4bf7c3e commit 2c903b0

9 files changed

Lines changed: 104 additions & 6 deletions

File tree

iotdb-core/ainode/ainode.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,14 @@
6969
</includes>
7070
<fileMode>0755</fileMode>
7171
</fileSet>
72+
<fileSet>
73+
<directory>${project.basedir}/../../scripts/tools/ops</directory>
74+
<outputDirectory>tools/ops</outputDirectory>
75+
<includes>
76+
<include>*ainode.*</include>
77+
<include>**/*ainode.*</include>
78+
</includes>
79+
<fileMode>0755</fileMode>
80+
</fileSet>
7281
</fileSets>
7382
</assembly>

iotdb-core/ainode/iotdb/ainode/core/inference/pool_controller.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import random
2121
import threading
2222
from concurrent.futures import wait
23+
from queue import Empty
2324
from typing import Dict, Optional
2425

2526
import torch
@@ -176,9 +177,16 @@ def show_loaded_models(
176177

177178
def _worker_loop(self):
178179
while not self._stop_event.is_set():
179-
task = self._task_queue.get()
180+
try:
181+
task = self._task_queue.get(timeout=1)
182+
except Empty:
183+
# Ignore Empty exception and continue the loop
184+
continue
180185
if task is None:
181186
self._task_queue.task_done()
187+
logger.info(
188+
"PoolController received task None, the worker loop is existed."
189+
)
182190
break
183191
task_fn, args, kwargs = task
184192
try:
@@ -519,9 +527,12 @@ def stop(self):
519527
self._task_queue.put(None)
520528
self._pool_control_worker_thread.join()
521529
self._executor.close()
530+
logger.info(f"PoolController stopped its task executor.")
522531

523532
# shutdown pool instances
524533
# TODO: pool instances can be shutdown in parallel
525534
for inner in self._request_pool_map.values():
526535
for group in inner.values():
527536
group.shutdown()
537+
538+
logger.info("The PoolController has been stopped.")

iotdb-core/ainode/iotdb/ainode/core/manager/inference_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,3 +292,4 @@ def stop(self):
292292
while not self._result_queue.empty():
293293
self._result_queue.get_nowait()
294294
self._result_queue.close()
295+
logger.info("The Inference Manager has been stopped.")

iotdb-core/ainode/iotdb/ainode/core/rpc/handler.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,9 @@ def __init__(self, ainode):
5858
# ==================== Cluster Management ====================
5959

6060
def stop(self):
61-
logger.info("Stopping the RPC service handler of IoTDB-AINode...")
61+
logger.info("Stopping the RPC handler of IoTDB-AINode...")
6262
self._inference_manager.stop()
63+
logger.info("The RPC handler of IoTDB-AINode exited.")
6364

6465
def stopAINode(self) -> TSStatus:
6566
self._ainode.stop()

scripts/sbin/start-ainode.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,5 @@ if [ "$daemon_mode" = true ]; then
5151
echo AINode started in background
5252
else
5353
echo Starting AINode...
54-
$ain_ainode_executable start
54+
exec "$ain_ainode_executable" start
5555
fi

scripts/sbin/stop-ainode.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ if [ -z "$PID" ]; then
7373
fi
7474
exit 1
7575
elif [[ "${PID_VERIFY}" =~ ${PID} ]]; then
76-
kill -s TERM "$PID"
76+
kill -s TERM -- "-$PID"
7777
echo "Stop AINode, PID:" "$PID"
7878
else
7979
echo "No AINode to stop"

scripts/tools/ops/daemon-ainode.sh

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#!/bin/bash
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
#
20+
IOTDB_AINODE_SBIN_HOME="$(cd "`dirname "$0"`"/../../sbin; pwd)"
21+
SYSTEMD_DIR="/etc/systemd/system"
22+
23+
if [ ! -d "$SYSTEMD_DIR" ]; then
24+
echo "Current system can't support systemd"
25+
exit 1 # Exit with an error status
26+
fi
27+
28+
FILE_NAME=$SYSTEMD_DIR/iotdb-ainode.service
29+
30+
cat > "$FILE_NAME" <<EOF
31+
[Unit]
32+
Description=iotdb-ainode
33+
Documentation=https://iotdb.apache.org/
34+
After=network.target
35+
36+
[Service]
37+
StandardOutput=null
38+
StandardError=null
39+
LimitNOFILE=65536
40+
Type=simple
41+
User=root
42+
Group=root
43+
ExecStart=$IOTDB_AINODE_SBIN_HOME/start-ainode.sh
44+
ExecStop=/bin/kill -TERM -\$MAINPID
45+
Restart=on-failure
46+
SuccessExitStatus=143
47+
RestartSec=5
48+
StartLimitInterval=600s
49+
StartLimitBurst=3
50+
RestartPreventExitStatus=SIGKILL
51+
TimeoutStopSec=60s
52+
53+
[Install]
54+
WantedBy=multi-user.target
55+
EOF
56+
57+
echo "Daemon service of IoTDB AINode has been successfully registered."
58+
59+
systemctl daemon-reload
60+
echo
61+
echo "Do you want to execute 'systemctl start iotdb-ainode'? y/n (default y)"
62+
read -r START_SERVICE
63+
if [[ -z "$START_SERVICE" || "$START_SERVICE" =~ ^[Yy]$ ]]; then
64+
"${IOTDB_AINODE_SBIN_HOME}"/stop-ainode.sh >/dev/null 2>&1 &
65+
systemctl start iotdb-ainode
66+
echo "Executed successfully."
67+
fi
68+
echo
69+
echo "Do you want to execute 'systemctl enable iotdb-ainode' to start at boot? y/n (default y)"
70+
read -r ADD_STARTUP
71+
if [[ -z "$ADD_STARTUP" || "$ADD_STARTUP" =~ ^[Yy]$ ]]; then
72+
systemctl enable iotdb-ainode >/dev/null 2>&1
73+
echo "Executed successfully."
74+
fi

scripts/tools/ops/daemon-confignode.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ User=root
4747
Group=root
4848
Environment=JAVA_HOME=$JAVA_HOME
4949
ExecStart=$IOTDB_SBIN_HOME/start-confignode.sh
50+
ExecStop=$IOTDB_SBIN_HOME/stop-confignode.sh
5051
Restart=on-failure
5152
SuccessExitStatus=143
5253
RestartSec=5
@@ -65,7 +66,7 @@ echo
6566
echo "Do you want to execute 'systemctl start iotdb-confignode'? y/n (default y)"
6667
read -r START_SERVICE
6768
if [[ -z "$START_SERVICE" || "$START_SERVICE" =~ ^[Yy]$ ]]; then
68-
"${IOTDB_SBIN_HOME}"/sbin/stop-confignode.sh >/dev/null 2>&1 &
69+
"${IOTDB_SBIN_HOME}"/stop-confignode.sh >/dev/null 2>&1 &
6970
systemctl start iotdb-confignode
7071
echo "Executed successfully."
7172
fi

scripts/tools/ops/daemon-datanode.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ User=root
4747
Group=root
4848
Environment=JAVA_HOME=$JAVA_HOME
4949
ExecStart=$IOTDB_SBIN_HOME/start-datanode.sh
50+
ExecStop=$IOTDB_SBIN_HOME/stop-datanode.sh
5051
Restart=on-failure
5152
SuccessExitStatus=143
5253
RestartSec=5
@@ -65,7 +66,7 @@ echo
6566
echo "Do you want to execute 'systemctl start iotdb-datanode'? y/n (default y)"
6667
read -r START_SERVICE
6768
if [[ -z "$START_SERVICE" || "$START_SERVICE" =~ ^[Yy]$ ]]; then
68-
"${IOTDB_SBIN_HOME}"/sbin/stop-datanode.sh >/dev/null 2>&1 &
69+
"${IOTDB_SBIN_HOME}"/stop-datanode.sh >/dev/null 2>&1 &
6970
systemctl start iotdb-datanode
7071
echo "Executed successfully."
7172
fi

0 commit comments

Comments
 (0)