Skip to content

Commit 54bf893

Browse files
author
bajins
committed
fixed 去除多余字符避免SQL执行错误
1 parent 4743f15 commit 54bf893

File tree

3 files changed

+16
-4
lines changed

3 files changed

+16
-4
lines changed

Pexels.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# @Software: PyCharm
1010
import gc
1111
import os
12+
import re
1213
import threading
1314
import time
1415

@@ -57,11 +58,11 @@ def download_latest_images(page, directory):
5758

5859
info_html = BeautifulSoup(HttpUtil.get("https://www.pexels.com/zh-cn/photo/" + image_id).text,
5960
features="lxml")
60-
tags = info_html.find("meta", {"name": "keywords"}).attrs["content"].replace(" ", "").replace("'", "")
61+
tags = info_html.find("meta", {"name": "keywords"}).attrs["content"]
6162
if len(tags) > 0 and tags != "":
6263
# 简繁转换
6364
tags = zhconv.convert(tags[:len(tags) - 7], 'zh-cn')
64-
65+
tags = re.sub(r"[^a-z,\u4e00-\u9fa5]+|^,|,$", "", tags).replace(",,", ",")
6566
s3.execute_commit(f"""
6667
INSERT OR IGNORE INTO images(image_id,suffix,url,type,page,tags)
6768
VALUES('{image_id}','{download_url[download_url.rfind(".") + 1:]}','{download_url}','latest','{page}','{tags}')
@@ -89,6 +90,8 @@ def download_latest_images(page, directory):
8990
page = 1
9091
run_count = 0
9192

93+
except Exception as e:
94+
print(e)
9295
finally:
9396
print("当前活跃线程数:", threading.activeCount())
9497
time.sleep(400)

Wallhaven.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# @Software: PyCharm
1010
import gc
1111
import os
12-
import platform
12+
import re
1313
import threading
1414
import time
1515

@@ -42,7 +42,7 @@ def download_images(url, page, directory):
4242

4343
wait()
4444

45-
html = BeautifulSoup(HttpUtil.get(url + str(2472)).text, features="lxml")
45+
html = BeautifulSoup(HttpUtil.get(url + str(page)).text, features="lxml")
4646
figure = html.find_all("figure")
4747
# 获取所有包含指定属性的标签
4848
page_all = html.find_all(lambda tag: tag.has_attr('original-title'))
@@ -63,6 +63,7 @@ def download_images(url, page, directory):
6363
tags = ",".join([tag_html.text for tag_html in tags_html]).replace("'", "")
6464
if len(tags) > 0 and tags != "":
6565
tags = TranslationUtil.translate_google(tags).replace(",", ",")
66+
tags = re.sub(r"[^a-z,\u4e00-\u9fa5]+|^,|,$", "", tags).replace(",,", ",")
6667

6768
download_url = info_html.find("img", {"id": "wallpaper"}).attrs["src"]
6869
if len(download_url) <= 0 or download_url == "":
@@ -96,6 +97,8 @@ def download_images(url, page, directory):
9697
page = 1
9798
run_count = 0
9899

100+
except Exception as e:
101+
print(e)
99102
finally:
100103
print("当前活跃线程数:", threading.activeCount())
101104
time.sleep(400)

utils/DatabaseUtil.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ def select(connect, sql):
3131
# data = cursor.fetchone()
3232
# 获取所有数据
3333
return cursor.fetchall()
34+
except sqlite3.OperationalError as e:
35+
print(sql)
36+
raise e
3437
finally:
3538
# 关闭游标
3639
cursor.close()
@@ -57,6 +60,9 @@ def execute_commit(connect, sql):
5760
# 操作后获取成功行数
5861
# return cursor.arraysize
5962
return cursor.rowcount
63+
except sqlite3.OperationalError as e:
64+
print(sql)
65+
raise e
6066
finally:
6167
# 关闭游标
6268
cursor.close()

0 commit comments

Comments
 (0)