This commit is contained in:
王老板 2024-09-27 17:06:15 +08:00
commit e5a38feb29
25 changed files with 1656 additions and 0 deletions

394
Editer.py Normal file
View File

@ -0,0 +1,394 @@
#!/usr/bin/python
# -*- coding:utf-8 -*-
import requests # 用来抓取网页的html源码
from bs4 import BeautifulSoup # 用于代替正则式 取源码中相应标签中的内容
import time # 时间相关操作
import os
from rich.progress import track as tqdm
from utils import *
import zipfile
import shutil
import re
import pickle
from PIL import Image
import time
import threading
from concurrent.futures import ThreadPoolExecutor, wait
import pickle
from selenium import webdriver
from selenium.webdriver.edge.options import Options
lock = threading.RLock()
class Editer(object):
def __init__(self, root_path, head='https://www.linovelib.com', book_no='0000', volume_no=1):
self.header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.47', 'referer': head, 'cookie':'night=1'}
self.url_head = head
options = Options()
options.add_argument('--start-minimized')
self.driver = webdriver.Edge(options = options)
self.main_page = f'{self.url_head}/novel/{book_no}.html'
self.cata_page = f'{self.url_head}/novel/{book_no}/catalog'
self.read_tool_page = f'{self.url_head}/themes/zhmb/js/readtool.js'
self.color_chap_name = '插图'
self.color_page_name = '彩页'
self.html_buffer = dict()
main_html = self.get_html(self.main_page)
bf = BeautifulSoup(main_html, 'html.parser')
self.title = bf.find('meta', {"property": "og:novel:book_name"})['content']
self.author = bf.find('meta', {"property": "og:novel:author"})['content']
try:
self.cover_url = re.search(r'src=\"(.*?)\"', str(bf.find('div', {"class": "book-img fl"}))).group(1)
except:
self.cover_url = 'cid'
self.img_url_map = dict()
self.volume_no = volume_no
self.epub_path = root_path
self.temp_path = os.path.join(self.epub_path, 'temp_'+ check_chars(self.title) + '_' + str(self.volume_no))
self.missing_last_chap_list = []
self.is_color_page = True
self.page_url_map = dict()
self.ignore_urls = []
self.url_buffer = []
self.max_thread_num = 8
self.pool = ThreadPoolExecutor(self.max_thread_num)
# 获取html文档内容
def get_html(self, url, is_gbk=False):
while True:
time.sleep(0.5)
self.driver.get(url)
req = self.driver.page_source
while '<title>Access denied | www.linovelib.com used Cloudflare to restrict access</title>' in req:
time.sleep(5)
self.driver.get(url)
req = self.driver.page_source
if is_gbk:
req.encoding = 'GBK' #这里是网页的编码转换,根据网页的实际需要进行修改,经测试这个编码没有问题
break
return req
def get_html_img(self, url):
time.sleep(0.5)
req=requests.get(url, headers=self.header)
num_retry = 0
while 'Forbidden' in str(req.content[0:100]) and num_retry<10:
req=requests.get(url, headers=self.header)
time.sleep(0.5)
num_retry += 1
if 'Forbidden' in str(req.content[0:100]):
print('插图下载失败,链接:' + url)
return req.content
def make_folder(self):
os.makedirs(self.temp_path, exist_ok=True)
self.text_path = os.path.join(self.temp_path, 'OEBPS/Text')
os.makedirs(self.text_path, exist_ok=True)
self.img_path = os.path.join(self.temp_path, 'OEBPS/Images')
os.makedirs(self.img_path, exist_ok=True)
def get_index_url(self):
self.volume = {}
self.volume['chap_urls'] = []
self.volume['chap_names'] = []
chap_html_list = self.get_chap_list(is_print=False)
if len(chap_html_list)<self.volume_no:
print('输入卷号超过实际卷数!')
return False
volume_array = self.volume_no - 1
chap_html = chap_html_list[volume_array]
self.volume['book_name'] = chap_html.find('h2', {'class': 'v-line'}).text
chap_list = chap_html.find_all('li', {'class', 'col-4'})
for chap_html in chap_list:
self.volume['chap_names'].append(chap_html.text)
self.volume['chap_urls'].append(self.url_head + chap_html.find('a').get('href'))
return True
def get_chap_list(self, is_print=True):
cata_html = self.get_html(self.cata_page, is_gbk=False)
bf = BeautifulSoup(cata_html, 'html.parser')
chap_html_list = bf.find_all('div', {'class', 'volume clearfix'})
if is_print:
for chap_no, chap_html in enumerate(chap_html_list):
print(f'[{chap_no+1}]', chap_html.find('h2', {'class': 'v-line'}).text)
return
else:
return chap_html_list
def get_page_text(self, content_html):
bf = BeautifulSoup(content_html, 'html.parser')
text_with_head = bf.find('div', {'id': 'TextContent', 'class': 'read-content'})
text_html = str(text_with_head)
img_urlre_list = re.findall(r"<img .*?>", text_html)
for img_urlre in img_urlre_list:
img_url_full = re.search(r'.[a-zA-Z]{3}/(.*?).(jpg|png|jpeg)', img_urlre)
img_url_name = img_url_full.group(1)
img_url_tail = img_url_full.group(0).split('.')[-1]
img_url = f'https://img3.readpai.com/{img_url_name}.{img_url_tail}'
text_html = text_html.replace('<br/>\n' + img_urlre +'\n<br/>', img_urlre)
if not img_url in self.img_url_map:
self.img_url_map[img_url] = str(len(self.img_url_map)).zfill(2)
img_symbol = f'<p>[img:{self.img_url_map[img_url]}]</p>'
if '00' in img_symbol:
text_html = text_html.replace(img_urlre, '') #默认第一张为封面图片 不写入彩页
else:
text_html = text_html.replace(img_urlre, img_symbol)
symbol_index = text_html.index(img_symbol)
if text_html[symbol_index-1] != '\n':
text_html = text_html[:symbol_index] + '\n' + text_html[symbol_index:]
text = BeautifulSoup(text_html, 'html.parser').get_text()
return text
def get_chap_text(self, url, chap_name, return_next_chapter=False):
text_chap = ''
page_no = 1
url_ori = url
next_chap_url = None
while True:
if page_no == 1:
str_out = chap_name
else:
str_out = f' 正在下载第{page_no}页......'
print(str_out)
content_html = self.get_html(url, is_gbk=False)
text = self.get_page_text(content_html)
text_chap += text
url_new = url_ori.replace('.html', '_{}.html'.format(page_no+1))[len(self.url_head):]
if url_new in content_html:
page_no += 1
url = self.url_head + url_new
else:
if return_next_chapter:
next_chap_url = self.url_head + re.search(r'书签</a><a href="(.*?)">下一章</a>', content_html).group(1)
break
return text_chap, next_chap_url
def get_text(self):
self.make_folder()
img_strs = [] #记录后文中出现的所有图片位置
text_no=0 #text_no正文章节编号(排除插图) chap_no 是所有章节编号
for chap_no, (chap_name, chap_url) in enumerate(zip(self.volume['chap_names'], self.volume['chap_urls'])):
is_fix_next_chap_url = (chap_name in self.missing_last_chap_list)
text, next_chap_url = self.get_chap_text(chap_url, chap_name, return_next_chapter=is_fix_next_chap_url)
if is_fix_next_chap_url:
self.volume['chap_urls'][chap_no+1] = next_chap_url #正向修复
if chap_name == self.color_chap_name:
text_html_color = text2htmls(self.color_page_name, text)
else:
text_html = text2htmls(chap_name, text)
textfile = self.text_path + f'/{str(text_no).zfill(2)}.xhtml'
with open(textfile, 'w+', encoding='utf-8') as f:
f.writelines(text_html)
for text_line in text_html:
img_str = re.search(r"<img(.*?)\/>", text_line)
if img_str is not None:
img_strs.append(img_str.group(0))
text_no += 1
# 将彩页中后文已经出现的图片删除,避免重复
if self.is_color_page: #判断彩页是否存在
text_html_color_new = []
textfile = self.text_path + '/color.xhtml'
for text_line in text_html_color:
is_save = True
for img_str in img_strs:
if img_str in text_line:
is_save = False
break
if is_save:
text_html_color_new.append(text_line)
with open(textfile, 'w+', encoding='utf-8') as f:
f.writelines(text_html_color_new)
def get_image(self, is_gui=False, signal=None):
if is_gui:
len_iter = len(self.img_url_map.items())
signal.emit('start')
for i, (img_url, img_name) in enumerate(self.img_url_map.items()):
content = self.get_html_img(img_url)
with open(self.img_path+f'/{img_name}.jpg', 'wb') as f:
f.write(content) #写入二进制内容
signal.emit(int(100*(i+1)/len_iter))
signal.emit('end')
else:
for img_url, img_name in tqdm(self.img_url_map.items()):
content = self.get_html_img(img_url)
with open(self.img_path+f'/{img_name}.jpg', 'wb') as f:
f.write(content) #写入二进制内容
def get_cover(self, is_gui=False, signal=None):
textfile = os.path.join(self.text_path, 'cover.xhtml')
img_w, img_h = 300, 300
try:
imgfile = os.path.join(self.img_path, '00.jpg')
img = Image.open(imgfile)
img_w, img_h = img.size
signal_msg = (imgfile, img_h, img_w)
if is_gui:
signal.emit(signal_msg)
except Exception as e:
print(e)
print('没有封面图片请自行用第三方EPUB编辑器手动添加封面')
img_htmls = get_cover_html(img_w, img_h)
with open(textfile, 'w+', encoding='utf-8') as f:
f.writelines(img_htmls)
def check_volume(self, is_gui=False, signal=None, editline=None):
chap_names = self.volume['chap_names']
chap_num = len(self.volume['chap_names'])
for chap_no, url in enumerate(self.volume['chap_urls']):
if self.check_url(url):
if not self.prev_fix_url(chap_no, chap_num): #先尝试反向递归修复
if chap_no == 0: #第一个章节都反向修复失败 说明后面章节全部缺失,只能手动输入第一个章节,保证第一个章节一定有效
self.volume['chap_urls'][0] = self.hand_in_url(chap_names[chap_no], is_gui, signal, editline)
else: #其余章节反向修复失败 默认使用正向修复
self.missing_last_chap_list.append(chap_names[chap_no-1])
#没有检测到插图页,手动输入插图页标题
if self.color_chap_name not in self.volume['chap_names']:
self.color_chap_name = self.hand_in_color_page_name(is_gui, signal, editline)
self.volume['color_chap_name'] = self.color_chap_name
#没有彩页 但主页封面存在,将主页封面设为书籍封面
if self.color_chap_name=='' and (not self.check_url(self.cover_url)):
self.is_color_page = False
self.img_url_map[self.cover_url] = str(len(self.img_url_map)).zfill(2)
print('**************')
print('提示:没有彩页,但主页封面存在,将使用主页的封面图片作为本卷图书封面')
print('**************')
def check_url(self, url):#当检测有问题返回True
return ('javascript' in url or 'cid' in url)
def get_prev_url(self, chap_no): #获取前一个章节的链接
content_html = self.get_html(self.volume['chap_urls'][chap_no], is_gbk=False)
next_url = self.url_head + re.search(r'<div class="mlfy_page"><a href="(.*?)">上一章</a>', content_html).group(1)
return next_url
def prev_fix_url(self, chap_no, chap_num): #反向递归修复缺失链接后修复前若成功修复返回True否则返回False
if chap_no==chap_num-1: #最后一个章节直接选择不修复 返回False
return False
elif self.check_url(self.volume['chap_urls'][chap_no+1]):
if self.prev_fix_url(chap_no+1, chap_num):
self.volume['chap_urls'][chap_no] = self.get_prev_url(chap_no+1)
return True
else:
return False
else:
self.volume['chap_urls'][chap_no] = self.get_prev_url(chap_no+1)
return True
def hand_in_msg(self, error_msg='', is_gui=False, signal=None, editline=None):
if is_gui:
print(error_msg)
signal.emit('hang')
time.sleep(1)
while not editline.isHidden():
time.sleep(1)
content = editline.text()
editline.clear()
else:
content = input(error_msg)
return content
def hand_in_url(self, chap_name, is_gui=False, signal=None, editline=None):
error_msg = f'章节\"{chap_name}\"连接失效,请手动输入该章节链接(手机版“{self.url_head}”开头的链接):'
return self.hand_in_msg(error_msg, is_gui, signal, editline)
def hand_in_color_page_name(self, is_gui=False, signal=None, editline=None):
if is_gui:
error_msg = f'插图页面不存在,需要下拉选择插图页标题,若不需要插图页则保持本栏为空直接点确定:'
editline.addItems(self.volume['chap_names'])
editline.setCurrentIndex(-1)
else:
error_msg = f'插图页面不存在,需要手动输入插图页标题,若不需要插图页则不输入直接回车:'
return self.hand_in_msg(error_msg, is_gui, signal, editline)
def get_toc(self):
if self.is_color_page:
ind = self.volume["chap_names"].index(self.color_chap_name)
self.volume["chap_names"].pop(ind)
toc_htmls = get_toc_html(self.title, self.volume["chap_names"])
textfile = self.temp_path + '/OEBPS/toc.ncx'
with open(textfile, 'w+', encoding='utf-8') as f:
f.writelines(toc_htmls)
def get_content(self):
num_chap = len(self.volume["chap_names"])
num_img = len(os.listdir(self.img_path))
content_htmls = get_content_html(self.title + '-' + self.volume['book_name'], self.author, num_chap, num_img, self.is_color_page)
textfile = self.temp_path + '/OEBPS/content.opf'
with open(textfile, 'w+', encoding='utf-8') as f:
f.writelines(content_htmls)
def get_epub_head(self):
mimetype = 'application/epub+zip'
mimetypefile = self.temp_path + '/mimetype'
with open(mimetypefile, 'w+', encoding='utf-8') as f:
f.write(mimetype)
metainf_folder = os.path.join(self.temp_path, 'META-INF')
os.makedirs(metainf_folder, exist_ok=True)
container = metainf_folder + '/container.xml'
container_htmls = get_container_html()
with open(container, 'w+', encoding='utf-8') as f:
f.writelines(container_htmls)
def get_epub(self):
os.remove(os.path.join(self.temp_path, 'buffer.pkl'))
epub_file = self.epub_path + '/' + check_chars(self.title + '-' + self.volume['book_name']) + '.epub'
with zipfile.ZipFile(epub_file, "w", zipfile.ZIP_DEFLATED) as zf:
for dirpath, _, filenames in os.walk(self.temp_path):
fpath = dirpath.replace(self.temp_path,'') #这一句很重要不replace的话就从根目录开始复制
fpath = fpath and fpath + os.sep or ''
for filename in filenames:
zf.write(os.path.join(dirpath, filename), fpath+filename)
shutil.rmtree(self.temp_path)
return epub_file
# # 恢复函数根据secret_map进行恢复
# def restore_chars(self, text):
# restored_text = ""
# i = 0
# while i < len(text):
# char = text[i]
# if char in self.secret_map:
# restored_text += self.secret_map[char]
# else:
# restored_text += char
# i += 1
# return restored_text
def buffer(self):
filename = 'buffer.pkl'
filepath = os.path.join(self.temp_path, filename)
if os.path.isfile(filepath):
with open(filepath, 'rb') as f:
self.volume, self.img_url_map = pickle.load(f)
self.text_path = os.path.join(self.temp_path, 'OEBPS/Text')
os.makedirs(self.text_path, exist_ok=True)
self.img_path = os.path.join(self.temp_path, 'OEBPS/Images')
os.makedirs(self.img_path, exist_ok=True)
self.color_chap_name = self.volume['color_chap_name']
else:
with open(filepath, 'wb') as f:
pickle.dump((self.volume ,self.img_url_map), f)
def is_buffer(self):
filename = 'buffer.pkl'
filepath = os.path.join(self.temp_path, filename)
return os.path.isfile(filepath)

449
Editer2.py Normal file
View File

@ -0,0 +1,449 @@
#!/usr/bin/python
# -*- coding:utf-8 -*-
import requests # 用来抓取网页的html源码
from bs4 import BeautifulSoup # 用于代替正则式 取源码中相应标签中的内容
import time # 时间相关操作
import os
from rich.progress import track as tqdm
from utils import *
import zipfile
import shutil
import re
import pickle
from PIL import Image
import time
import threading
from concurrent.futures import ThreadPoolExecutor, wait
import pickle
# from selenium import webdriver
# from selenium.webdriver.edge.options import Option
lock = threading.RLock()
class Editer(object):
def __init__(self, root_path, book_no='0000', volume_no=1):
self.url_head = 'https://www.linovelib.com'
self.url_head_mobile = 'https://www.bilinovel.com'
self.header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.47', 'referer': self.url_head, 'Accept-Language': 'zh-CN,zh;q=0.9',}
self.header_mobile = {'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1', 'referer': self.url_head_mobile, 'Accept-Language': 'zh-CN,zh;q=0.9', 'cookie':'night=1'}
# options = Options()
# self.driver = webdriver.Edge(options = options)
self.main_page = f'{self.url_head}/novel/{book_no}.html'
self.cata_page = f'{self.url_head}/novel/{book_no}/catalog'
self.read_tool_page = f'{self.url_head}/themes/zhmb/js/readtool.js'
self.color_chap_name = '插图'
self.color_page_name = '彩页'
self.html_buffer = dict()
self.get_secret_map()
main_html = self.get_html(self.main_page)
bf = BeautifulSoup(main_html, 'html.parser')
self.title = bf.find('meta', {"property": "og:novel:book_name"})['content']
self.author = bf.find('meta', {"property": "og:novel:author"})['content']
try:
self.cover_url = re.search(r'src=\"(.*?)\"', str(bf.find('div', {"class": "book-img fl"}))).group(1)
except:
self.cover_url = 'cid'
self.img_url_map = dict()
self.volume_no = volume_no
self.epub_path = root_path
self.temp_path = (os.path.join(self.epub_path, 'temp_'+ check_chars(self.title) + '_' + str(self.volume_no)))
self.missing_last_chap_list = []
self.is_color_page = True
self.page_url_map = dict()
self.ignore_urls = []
self.url_buffer = []
self.max_thread_num = 8
self.pool = ThreadPoolExecutor(self.max_thread_num)
# 获取html文档内容
def get_html(self, url, is_gbk=False, use_mobile=False):
if use_mobile:
header = self.header_mobile
else:
header = self.header
while True:
time.sleep(0.5)
# self.driver.get(url)
# req = self.driver.page_source
req = requests.get(url, headers=header)
while 'used Cloudflare to restrict access</title>' in req.text:
time.sleep(5)
# self.driver.get(url)
# req = self.driver.page_source
req = requests.get(url, headers=header)
if is_gbk:
req.encoding = 'GBK' #这里是网页的编码转换,根据网页的实际需要进行修改,经测试这个编码没有问题
break
return req.text
def get_html_content(self, url, is_buffer=False, use_mobile=False):
if use_mobile:
header = self.header_mobile
else:
header = self.header
if is_buffer:
while not url in self.html_buffer.keys():
time.sleep(0.1)
if url in self.html_buffer.keys():
return self.html_buffer[url]
while True:
try:
req=requests.get(url, headers=header)
break
except Exception as e:
pass
lock.acquire()
self.html_buffer[url] = req.content
lock.release()
return req.content
def get_secret_map(self):
with open('secret_map.cfg', 'rb') as f:
self.secret_map = pickle.load(f)
def get_secret_map(self):
url = 'https://gitee.com/bosswangs/bili-novel-map/releases/tag/secret_map'
html = self.get_html(url, is_gbk=True)
# print(html)
url_download = 'https://gitee.com' + re.search(r'{\"download_url\":\"(.*?).cfg', html).group(1)+'.cfg'
version = re.search(r'_v(.*?).cfg', url_download).group(1)
path = 'secret_map.cfg'
if os.path.exists(path):
with open(path, 'rb') as f:
map = pickle.load(f)
version_exist = map[1]
if version == version_exist:
self.secret_map = map[0]
return
content = self.get_html_content(url_download)
if content is not None:
with open(path, 'wb') as f:
f.write(content) #写入二进制内容
with open(path, 'rb') as f:
map = pickle.load(f)
self.secret_map = map[0]
return
def make_folder(self):
os.makedirs(self.temp_path, exist_ok=True)
self.text_path = os.path.join(self.temp_path, 'OEBPS/Text')
os.makedirs(self.text_path, exist_ok=True)
self.img_path = os.path.join(self.temp_path, 'OEBPS/Images')
os.makedirs(self.img_path, exist_ok=True)
def get_index_url(self):
self.volume = {}
self.volume['chap_urls'] = []
self.volume['chap_names'] = []
chap_html_list = self.get_chap_list(is_print=False)
if len(chap_html_list)<self.volume_no:
print('输入卷号超过实际卷数!')
return False
volume_array = self.volume_no - 1
chap_html = chap_html_list[volume_array]
self.volume['book_name'] = chap_html.find('h2', {'class': 'v-line'}).text
chap_list = chap_html.find_all('li', {'class', 'col-4'})
for chap_html in chap_list:
self.volume['chap_names'].append(chap_html.text)
self.volume['chap_urls'].append(self.url_head_mobile + chap_html.find('a').get('href'))
return True
def get_chap_list(self, is_print=True):
cata_html = self.get_html(self.cata_page, is_gbk=False)
bf = BeautifulSoup(cata_html, 'html.parser')
chap_html_list = bf.find_all('div', {'class', 'volume clearfix'})
if is_print:
for chap_no, chap_html in enumerate(chap_html_list):
print(f'[{chap_no+1}]', chap_html.find('h2', {'class': 'v-line'}).text)
return
else:
return chap_html_list
def get_page_text(self, content_html):
bf = BeautifulSoup(content_html, 'html.parser')
# text_with_head = bf.find('div', {'id': 'TextContent', 'class': 'read-content'})
text_with_head = bf.find('div', {'id': 'acontentz', 'class': 'bcontent'})
text_html = str(text_with_head)
img_urlre_list = re.findall(r"<img .*?>", text_html)
for img_urlre in img_urlre_list:
img_url_full = re.search(r'.[a-zA-Z]{3}/(.*?).(jpg|png|jpeg)', img_urlre)
img_url_name = img_url_full.group(1)
img_url_tail = img_url_full.group(0).split('.')[-1]
img_url = f'https://img3.readpai.com/{img_url_name}.{img_url_tail}'
text_html = text_html.replace('<br/>\n' + img_urlre +'\n<br/>', img_urlre)
if not img_url in self.img_url_map:
self.img_url_map[img_url] = str(len(self.img_url_map)).zfill(2)
img_symbol = f'<p>[img:{self.img_url_map[img_url]}]</p>'
if '00' in img_symbol:
text_html = text_html.replace(img_urlre, '') #默认第一张为封面图片 不写入彩页
else:
text_html = text_html.replace(img_urlre, img_symbol)
symbol_index = text_html.index(img_symbol)
if text_html[symbol_index-1] != '\n':
text_html = text_html[:symbol_index] + '\n' + text_html[symbol_index:]
text = BeautifulSoup(text_html, 'html.parser').get_text()
text = self.restore_chars(text)
return text
def get_chap_text(self, url, chap_name, return_next_chapter=False):
text_chap = ''
page_no = 1
url_ori = url
next_chap_url = None
while True:
if page_no == 1:
str_out = chap_name
else:
str_out = f' 正在下载第{page_no}页......'
print(str_out)
content_html = self.get_html(url, is_gbk=False, use_mobile=True)
text = self.get_page_text(content_html)
text_chap += text
url_new = url_ori.replace('.html', '_{}.html'.format(page_no+1))[len(self.url_head):]
if url_new in content_html:
page_no += 1
url = self.url_head_mobile + url_new
else:
if return_next_chapter:
# next_chap_url = self.url_head_mobile + re.search(r'书签</a><a href="(.*?)">下一章</a>', content_html).group(1)
bf = BeautifulSoup(content_html, 'html.parser')
next_chap_url = bf.find('link', {'rel': 'prerender'}).get('href')
break
return text_chap, next_chap_url
def get_text(self):
self.make_folder()
img_strs = [] #记录后文中出现的所有图片位置
text_no=0 #text_no正文章节编号(排除插图) chap_no 是所有章节编号
for chap_no, (chap_name, chap_url) in enumerate(zip(self.volume['chap_names'], self.volume['chap_urls'])):
is_fix_next_chap_url = (chap_name in self.missing_last_chap_list)
text, next_chap_url = self.get_chap_text(chap_url, chap_name, return_next_chapter=is_fix_next_chap_url)
if is_fix_next_chap_url:
self.volume['chap_urls'][chap_no+1] = next_chap_url #正向修复
if chap_name == self.color_chap_name:
text_html_color = text2htmls(self.color_page_name, text)
else:
text_html = text2htmls(chap_name, text)
textfile = self.text_path + f'/{str(text_no).zfill(2)}.xhtml'
with open(textfile, 'w+', encoding='utf-8') as f:
f.writelines(text_html)
for text_line in text_html:
img_str = re.search(r"<img(.*?)\/>", text_line)
if img_str is not None:
img_strs.append(img_str.group(0))
text_no += 1
# 将彩页中后文已经出现的图片删除,避免重复
if self.is_color_page: #判断彩页是否存在
text_html_color_new = []
textfile = self.text_path + '/color.xhtml'
for text_line in text_html_color:
is_save = True
for img_str in img_strs:
if img_str in text_line:
is_save = False
break
if is_save:
text_html_color_new.append(text_line)
with open(textfile, 'w+', encoding='utf-8') as f:
f.writelines(text_html_color_new)
def get_image(self, is_gui=False, signal=None):
for url in self.img_url_map.keys():
self.pool.submit(self.get_html_content, url)
img_path = self.img_path
if is_gui:
len_iter = len(self.img_url_map.items())
signal.emit('start')
for i, (img_url, img_name) in enumerate(self.img_url_map.items()):
content = self.get_html_content(img_url, is_buffer=True)
with open(img_path+f'/{img_name}.jpg', 'wb') as f:
f.write(content) #写入二进制内容
signal.emit(int(100*(i+1)/len_iter))
signal.emit('end')
else:
for img_url, img_name in tqdm(self.img_url_map.items()):
content = self.get_html_content(img_url)
with open(img_path+f'/{img_name}.jpg', 'wb') as f:
f.write(content) #写入二进制内容
def get_cover(self, is_gui=False, signal=None):
textfile = os.path.join(self.text_path, 'cover.xhtml')
img_w, img_h = 300, 300
try:
imgfile = os.path.join(self.img_path, '00.jpg')
img = Image.open(imgfile)
img_w, img_h = img.size
signal_msg = (imgfile, img_h, img_w)
if is_gui:
signal.emit(signal_msg)
except Exception as e:
print(e)
print('没有封面图片请自行用第三方EPUB编辑器手动添加封面')
img_htmls = get_cover_html(img_w, img_h)
with open(textfile, 'w+', encoding='utf-8') as f:
f.writelines(img_htmls)
def check_volume(self, is_gui=False, signal=None, editline=None):
chap_names = self.volume['chap_names']
chap_num = len(self.volume['chap_names'])
for chap_no, url in enumerate(self.volume['chap_urls']):
if self.check_url(url):
if not self.prev_fix_url(chap_no, chap_num): #先尝试反向递归修复
if chap_no == 0: #第一个章节都反向修复失败 说明后面章节全部缺失,只能手动输入第一个章节,保证第一个章节一定有效
self.volume['chap_urls'][0] = self.hand_in_url(chap_names[chap_no], is_gui, signal, editline)
else: #其余章节反向修复失败 默认使用正向修复
self.missing_last_chap_list.append(chap_names[chap_no-1])
#没有检测到插图页,手动输入插图页标题
if self.color_chap_name not in self.volume['chap_names']:
self.color_chap_name = self.hand_in_color_page_name(is_gui, signal, editline)
#没有彩页 但主页封面存在,将主页封面设为书籍封面
if self.color_chap_name=='' and (not self.check_url(self.cover_url)):
self.is_color_page = False
self.img_url_map[self.cover_url] = str(len(self.img_url_map)).zfill(2)
print('**************')
print('提示:没有彩页,但主页封面存在,将使用主页的封面图片作为本卷图书封面')
print('**************')
def check_url(self, url):#当检测有问题返回True
return ('javascript' in url or 'cid' in url)
def get_prev_url(self, chap_no): #获取前一个章节的链接
content_html = self.get_html(self.volume['chap_urls'][chap_no], is_gbk=False)
# next_url = self.url_head + re.search(r'<div class="mlfy_page"><a href="(.*?)">上一章</a>', content_html).group(1)
next_url = self.url_head_mobile + re.search('var prevpage=\"(.*?)\";var', content_html).group(1)
return next_url
def prev_fix_url(self, chap_no, chap_num): #反向递归修复缺失链接后修复前若成功修复返回True否则返回False
if chap_no==chap_num-1: #最后一个章节直接选择不修复 返回False
return False
elif self.check_url(self.volume['chap_urls'][chap_no+1]):
if self.prev_fix_url(chap_no+1, chap_num):
self.volume['chap_urls'][chap_no] = self.get_prev_url(chap_no+1)
return True
else:
return False
else:
self.volume['chap_urls'][chap_no] = self.get_prev_url(chap_no+1)
return True
def hand_in_msg(self, error_msg='', is_gui=False, signal=None, editline=None):
if is_gui:
print(error_msg)
signal.emit('hang')
time.sleep(1)
while not editline.isHidden():
time.sleep(1)
content = editline.text()
editline.clear()
else:
content = input(error_msg)
return content
def hand_in_url(self, chap_name, is_gui=False, signal=None, editline=None):
error_msg = f'章节\"{chap_name}\"连接失效,请手动输入该章节链接(手机版“{self.url_head}”开头的链接):'
return self.hand_in_msg(error_msg, is_gui, signal, editline)
def hand_in_color_page_name(self, is_gui=False, signal=None, editline=None):
if is_gui:
error_msg = f'插图页面不存在,需要下拉选择插图页标题,若不需要插图页则保持本栏为空直接点确定:'
editline.addItems(self.volume['chap_names'])
editline.setCurrentIndex(-1)
else:
error_msg = f'插图页面不存在,需要手动输入插图页标题,若不需要插图页则不输入直接回车:'
return self.hand_in_msg(error_msg, is_gui, signal, editline)
def get_toc(self):
if self.is_color_page:
ind = self.volume["chap_names"].index(self.color_chap_name)
self.volume["chap_names"].pop(ind)
toc_htmls = get_toc_html(self.title, self.volume["chap_names"])
textfile = self.temp_path + '/OEBPS/toc.ncx'
with open(textfile, 'w+', encoding='utf-8') as f:
f.writelines(toc_htmls)
def get_content(self):
num_chap = len(self.volume["chap_names"])
num_img = len(os.listdir(self.img_path))
content_htmls = get_content_html(self.title + '-' + self.volume['book_name'], self.author, num_chap, num_img, self.is_color_page)
textfile = self.temp_path + '/OEBPS/content.opf'
with open(textfile, 'w+', encoding='utf-8') as f:
f.writelines(content_htmls)
def get_epub_head(self):
mimetype = 'application/epub+zip'
mimetypefile = self.temp_path + '/mimetype'
with open(mimetypefile, 'w+', encoding='utf-8') as f:
f.write(mimetype)
metainf_folder = os.path.join(self.temp_path, 'META-INF')
os.makedirs(metainf_folder, exist_ok=True)
container = metainf_folder + '/container.xml'
container_htmls = get_container_html()
with open(container, 'w+', encoding='utf-8') as f:
f.writelines(container_htmls)
def get_epub(self):
os.remove(os.path.join(self.temp_path, 'buffer.pkl'))
epub_file = (self.epub_path + '/' + check_chars(self.title) + '-' + check_chars(self.volume['book_name']) + '.epub')
with zipfile.ZipFile(epub_file, "w", zipfile.ZIP_DEFLATED) as zf:
for dirpath, _, filenames in os.walk(self.temp_path):
fpath = dirpath.replace(self.temp_path,'') #这一句很重要不replace的话就从根目录开始复制
fpath = fpath and fpath + os.sep or ''
for filename in filenames:
zf.write(os.path.join(dirpath, filename), fpath+filename)
shutil.rmtree(self.temp_path)
return epub_file
# 恢复函数根据secret_map进行恢复
def restore_chars(self, text):
restored_text = ""
i = 0
while i < len(text):
char = text[i]
if char in self.secret_map:
restored_text += self.secret_map[char]
else:
restored_text += char
i += 1
return restored_text
def buffer(self):
filename = 'buffer.pkl'
filepath = os.path.join(self.temp_path, filename)
if os.path.isfile(filepath):
with open(filepath, 'rb') as f:
self.volume, self.img_url_map = pickle.load(f)
self.text_path = os.path.join(self.temp_path, 'OEBPS/Text')
os.makedirs(self.text_path, exist_ok=True)
self.img_path = os.path.join(self.temp_path, 'OEBPS/Images')
os.makedirs(self.img_path, exist_ok=True)
else:
with open(filepath, 'wb') as f:
pickle.dump((self.volume ,self.img_url_map), f)
def is_buffer(self):
filename = 'buffer.pkl'
filepath = os.path.join(self.temp_path, filename)
return os.path.isfile(filepath)

77
README.md Normal file
View File

@ -0,0 +1,77 @@
<div align="center">
<img src="resource/logo_big.png" width="300" style="margin-right: 3000px;"/>
</div>
<h1 align="center">
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;EPUB下载器
</h1>
[哔哩轻小说](https://www.linovelib.com)(linovelib)网站小说下载EPUB打包。
特性:
* Fluent Design风格界面下载进度与书籍封面显示主题切换下载目录自定义。
* 前后端分离,同时支持命令行版本。
* EPUB格式打包支持多种阅读器。
* 正文黑白插图和彩页插图智能排版。
* 书籍批量下载。
* 图片多线程下载。
<!-- * <del>多线程预缓存策略,下载速度快。</del>(网站限制访问频率,默认单线程下载) -->
* 缺失链接自动修复。
* 自定义彩页。
* ...................
有建议或bug可以提issue由于软件更新频繁可以加QQ群获得更多信息563072544
图形界面使用[PyQt-Fluent-Widgets](https://pyqt-fluent-widgets.readthedocs.io/en/latest/index.html)界面编写。
[release](https://github.com/ShqWW/bilinovel-download/releases/tag/exe)页面发布了已经打包好的exe可执行程序包括图形化版本和命令行版本(系统最低要求Windows 10)。
界面样例:
<div align="center">
<img src="resource/example1.png" width="400"/>
<img src="resource/example2.png" width="400"/>
</div>
PS暂不支持漫画的排版本来也没几个漫画<
## 使用前安装需要的包
```
pip install -r requirements.txt -i https://pypi.org/simple/
```
## 使用命令行模式运行(无需安装图形界面库支持Linux):
```
python bilinovel.py
```
## 使用图形界面运行:
```
python bilinovel_gui.py
```
## 使用pyinstaller打包:
```
pip install pyinstaller
```
```
pyinstaller -F -w -i .\resource\logo.png .\bilinovel_gui.py
```
## 相关项目:
* [轻小说文库EPUB下载器](https://github.com/ShqWW/lightnovel-download)
* [哔哩轻小说EPUB下载器](https://github.com/ShqWW/bilinovel-download)
* [拷贝漫画EPUB下载器](https://github.com/ShqWW/copymanga-download)
## EPUB书籍编辑和管理工具推荐
1. [Sigil](https://sigil-ebook.com/)
2. [Calibre](https://www.calibre-ebook.com/)

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

145
bilinovel.py Normal file
View File

@ -0,0 +1,145 @@
import argparse
from Editer2 import Editer
import os
import shutil
from utils import *
def parse_args():
"""Parse input arguments."""
parser = argparse.ArgumentParser(description='config')
parser.add_argument('--book_no', default='0000', type=str)
parser.add_argument('--volume_no', default='1', type=int)
parser.add_argument('--no_input', default=False, type=bool)
args = parser.parse_args()
return args
def query_chaps(book_no):
print('未输入卷号,将返回书籍目录信息......')
editer = Editer(root_path='./out', book_no=book_no)
print('*******************************')
print(editer.title, editer.author)
print('*******************************')
editer.get_chap_list()
print('*******************************')
print('请输入所需要的卷号进行下载(多卷可以用英文逗号分隔或直接使用连字符,详情见说明)')
temp_path = ''
def delete_tmp():
print(temp_path)
if os.path.exists(temp_path):
shutil.rmtree(temp_path)
def download_single_volume(root_path,
book_no,
volume_no,
is_gui=False,
hang_signal=None,
progressring_signal=None,
cover_signal=None,
edit_line_hang=None):
editer = Editer(root_path=root_path, book_no=book_no, volume_no=volume_no)
print('正在积极地获取书籍信息....')
success = editer.get_index_url()
if not success:
print('书籍信息获取失败')
return
print(editer.title + '-' + editer.volume['book_name'], editer.author)
print('****************************')
temp_path = editer.temp_path
if not editer.is_buffer():
editer.check_volume(is_gui=is_gui, signal=hang_signal, editline=edit_line_hang)
print('正在下载文本....')
print('*********************')
editer.get_text()
print('*********************')
editer.buffer()
else:
print('检测到文本文件,直接下载插图')
editer.buffer()
print('正在下载插图.....................................')
editer.get_image(is_gui=is_gui, signal=progressring_signal)
print('正在编辑元数据....')
editer.get_cover(is_gui=is_gui, signal=cover_signal)
editer.get_toc()
editer.get_content()
editer.get_epub_head()
print('正在生成电子书....')
epub_file = editer.get_epub()
print('生成成功!', f'电子书路径【{epub_file}')
def downloader_router(root_path,
book_no,
volume_no,
is_gui=False,
hang_signal=None,
progressring_signal=None,
cover_signal=None,
edit_line_hang=None):
is_multi_chap = False
if len(book_no)==0:
print('请检查输入是否完整正确!')
return
elif volume_no == '':
query_chaps(book_no)
return
elif volume_no.isdigit():
volume_no = int(volume_no)
if volume_no<=0:
print('请检查输入是否完整正确!')
return
elif "-" in volume_no:
start, end = map(str, volume_no.split("-"))
if start.isdigit() and end.isdigit() and int(start)>0 and int(start)<int(end):
volume_no_list = list(range(int(start), int(end) + 1))
is_multi_chap = True
else:
print('请检查输入是否完整正确!')
return
elif "," in volume_no:
volume_no_list = [num for num in volume_no.split(",")]
if all([num.isdigit() for num in volume_no_list]):
volume_no_list = [int(num) for num in volume_no_list]
is_multi_chap = True
else:
print('请检查输入是否完整正确!')
return
else:
print('请检查输入是否完整正确!')
return
if is_multi_chap:
for volume_no in volume_no_list:
download_single_volume(root_path, book_no, volume_no, is_gui, hang_signal, progressring_signal, cover_signal, edit_line_hang)
print('所有下载任务都已经完成!')
else:
download_single_volume(root_path, book_no, volume_no, is_gui, hang_signal, progressring_signal, cover_signal, edit_line_hang)
if __name__=='__main__':
args = parse_args()
download_path = os.path.join(os.path.expanduser('~'), 'Downloads')
if args.no_input:
downloader_router(root_path='out', book_no=args.book_no, volume_no=args.volume_no)
else:
while True:
args.book_no = input('请输入书籍号:')
args.volume_no = input('请输入卷号(查看目录信息不输入直接按回车,下载多卷请使用逗号分隔或者连字符-)')
downloader_router(root_path='out', book_no=args.book_no, volume_no=args.volume_no)
# args.book_no = '2931'
# args.volume_no = '7'
# downloader_router(root_path='out', book_no=args.book_no, volume_no=args.volume_no)
# exit(0)

337
bilinovel_gui.py Normal file
View File

@ -0,0 +1,337 @@
# coding:utf-8
from PyQt5.QtCore import Qt, pyqtSignal, QObject, QThread, QRegExp
from PyQt5.QtGui import QIcon, QFont, QTextCursor, QPixmap, QColor,QRegExpValidator
from PyQt5.QtWidgets import QApplication, QFrame, QGridLayout, QFileDialog
from qfluentwidgets import (setTheme, Theme, PushSettingCard, SettingCardGroup, ExpandLayout, TextEdit, ImageLabel, LineEdit, PushButton, Theme, ProgressRing, setTheme, Theme, OptionsSettingCard, OptionsConfigItem, OptionsValidator, FluentWindow, SubtitleLabel, NavigationItemPosition, setThemeColor, qconfig, EditableComboBox, BoolValidator)
from qfluentwidgets import FluentIcon as FIF
import sys
import base64
import shutil
from resource.logo import logo_base64
from resource.book import book_base64
from bilinovel import *
font_label = QFont('微软雅黑', 18)
font_msg = QFont('微软雅黑', 11)
class MainThread(QThread):
def __init__(self, parent):
super().__init__()
self.parent = parent
def run(self):
self.parent.clear_signal.emit('')
try:
book_no = self.parent.editline_book.text()
volumn_no = self.parent.editline_volumn.text()
downloader_router(self.parent.parent.out_path, book_no, volumn_no, True, self.parent.hang_signal, self.parent.progressring_signal, self.parent.cover_signal, self.parent.editline_hang)
self.parent.end_signal.emit('')
except Exception as e:
self.parent.end_signal.emit('')
print('错误,请检查网络情况或确认输入是否正确')
print('错误信息:')
print(e)
def terminate(self) -> None:
result = super().terminate()
return result
class EmittingStr(QObject):
textWritten = pyqtSignal(str) # 定义一个发送str的信号
def write(self, text):
self.textWritten.emit(str(text))
def flush(self):
pass
def isatty(self):
pass
class SettingWidget(QFrame):
def __init__(self, text: str, parent=None):
super().__init__(parent=parent)
self.parent = parent
self.expandLayout = ExpandLayout(self)
self.setObjectName(text.replace(' ', '-'))
self.setting_group = SettingCardGroup(self.tr("下载设置"), self)
self.download_path_card = PushSettingCard(
self.tr('选择文件夹'),
FIF.DOWNLOAD,
self.tr("下载目录"),
self.parent.out_path,
self.setting_group
)
self.themeMode = OptionsConfigItem(
None, "ThemeMode", Theme.DARK, OptionsValidator(Theme), None)
self.threadMode = OptionsConfigItem(
None, "ThreadMode", True, BoolValidator())
self.theme_card = OptionsSettingCard(
self.themeMode,
FIF.BRUSH,
self.tr('应用主题'),
self.tr("更改外观"),
texts=[
self.tr(''), self.tr(''),
self.tr('跟随系统设置')
],
parent=self.parent
)
self.setting_group.addSettingCard(self.download_path_card)
self.setting_group.addSettingCard(self.theme_card)
self.expandLayout.setSpacing(28)
self.expandLayout.setContentsMargins(20, 10, 20, 0)
self.expandLayout.addWidget(self.setting_group)
self.download_path_card.clicked.connect(self.download_path_changed)
self.theme_card.optionChanged.connect(self.theme_changed)
def download_path_changed(self):
""" download folder card clicked slot """
self.parent.out_path = QFileDialog.getExistingDirectory(
self, self.tr("Choose folder"), self.parent.out_path)
self.download_path_card.contentLabel.setText(self.parent.out_path)
def theme_changed(self):
theme_name = self.theme_card.choiceLabel.text()
self.parent.set_theme(theme_name)
if os.path.exists('./config'):
shutil.rmtree('./config')
class HomeWidget(QFrame):
progressring_signal = pyqtSignal(object)
end_signal = pyqtSignal(object)
hang_signal = pyqtSignal(object)
clear_signal = pyqtSignal(object)
cover_signal = pyqtSignal(object)
def __init__(self, text: str, parent=None):
super().__init__(parent=parent)
self.setObjectName(text)
self.parent = parent
self.label_book = SubtitleLabel('书号:', self)
self.label_volumn = SubtitleLabel('卷号:', self)
self.editline_book = LineEdit(self)
self.editline_volumn = LineEdit(self)
validator = QRegExpValidator(QRegExp("\\d+")) # 正则表达式匹配阿拉伯数字
self.editline_book.setValidator(validator)
# self.editline_volumn.setValidator(validator)
self.editline_book.setMaxLength(4)
# self.editline_volumn.setMaxLength(2)
# self.editline_book.setText('2059')
# self.editline_volumn.setText('3')
self.book_icon = QPixmap()
self.book_icon.loadFromData(base64.b64decode(book_base64))
self.cover_w, self.cover_h = 152, 230
self.label_cover = ImageLabel(self.book_icon, self)
self.label_cover.setBorderRadius(8, 8, 8, 8)
self.label_cover.setFixedSize(self.cover_w, self.cover_h)
self.text_screen = TextEdit()
self.text_screen.setReadOnly(True)
self.text_screen.setFixedHeight(self.cover_h)
self.progressRing = ProgressRing(self)
self.progressRing.setValue(0)
self.progressRing.setTextVisible(True)
self.progressRing.setFixedSize(50, 50)
self.btn_run = PushButton('确定', self)
self.btn_run.setShortcut(Qt.Key_Return)
self.btn_stop = PushButton('取消', self)
self.btn_hang = PushButton('确定', self)
self.editline_hang = EditableComboBox(self)
self.gridLayout = QGridLayout(self)
self.screen_layout = QGridLayout()
self.btn_layout = QGridLayout()
self.hang_layout = QGridLayout()
self.label_book.setFont(font_label)
self.label_volumn.setFont(font_label)
self.editline_book.setFont(font_label)
self.editline_volumn.setFont(font_label)
self.text_screen.setFont(font_msg)
self.editline_hang.setFont(font_msg)
self.gridLayout.addWidget(self.editline_book, 0, 1)
self.gridLayout.addWidget(self.editline_volumn, 1, 1)
self.gridLayout.addWidget(self.label_book, 0, 0)
self.gridLayout.addWidget(self.label_volumn, 1, 0)
self.gridLayout.addLayout(self.btn_layout, 2, 1, 1, 1)
self.btn_layout.addWidget(self.btn_run, 2, 1)
self.btn_layout.addWidget(self.btn_stop, 2, 2)
self.gridLayout.addLayout(self.screen_layout, 3, 0, 2, 2)
self.screen_layout.addWidget(self.progressRing, 0, 1, Qt.AlignLeft|Qt.AlignBottom)
self.screen_layout.addWidget(self.text_screen, 0, 0)
self.screen_layout.addWidget(self.label_cover, 0, 1)
self.gridLayout.addLayout(self.hang_layout, 5, 0, 1, 2)
self.hang_layout.addWidget(self.editline_hang, 0, 0)
self.hang_layout.addWidget(self.btn_hang, 0, 1)
self.screen_layout.setContentsMargins(0,0,0,0)
self.btn_layout.setContentsMargins(0,0,0,0)
self.gridLayout.setContentsMargins(20, 10, 20, 10)
self.btn_run.clicked.connect(self.process_start)
self.btn_stop.clicked.connect(self.process_stop)
self.btn_hang.clicked.connect(self.process_continue)
self.progressring_signal.connect(self.progressring_msg)
self.end_signal.connect(self.process_end)
self.hang_signal.connect(self.process_hang)
self.clear_signal.connect(self.clear_screen)
self.cover_signal.connect(self.display_cover)
self.progressRing.hide()
self.btn_hang.hide()
self.editline_hang.hide()
self.btn_stop.setEnabled(False)
sys.stdout = EmittingStr(textWritten=self.outputWritten)
sys.stderr = EmittingStr(textWritten=self.outputWritten)
self.text_screen.setText(self.parent.welcome_text)
def process_start(self):
self.label_cover.setImage(self.book_icon)
self.label_cover.setFixedSize(self.cover_w, self.cover_h)
self.btn_run.setEnabled(False)
self.btn_run.setText('正在下载')
self.btn_stop.setEnabled(True)
self.main_thread = MainThread(self)
self.main_thread.start()
def process_end(self, input=None):
self.btn_run.setEnabled(True)
self.btn_run.setText('开始下载')
self.btn_run.setShortcut(Qt.Key_Return)
self.btn_stop.setEnabled(False)
self.progressRing.hide()
self.btn_hang.hide()
self.editline_hang.clear()
self.editline_hang.hide()
if input=='refresh':
self.label_cover.setImage(self.book_icon)
self.label_cover.setFixedSize(self.cover_w, self.cover_h)
self.clear_signal.emit('')
self.text_screen.setText(self.parent.welcome_text)
def outputWritten(self, text):
cursor = self.text_screen.textCursor()
scrollbar=self.text_screen.verticalScrollBar()
is_bottom = (scrollbar.value()>=scrollbar.maximum() - 15)
cursor.movePosition(QTextCursor.End)
cursor.insertText(text)
if is_bottom:
self.text_screen.setTextCursor(cursor)
# self.text_screen.ensureCursorVisible()
def clear_screen(self):
self.text_screen.clear()
def display_cover(self, signal_msg):
filepath, img_h, img_w = signal_msg
self.label_cover.setImage(filepath)
self.label_cover.setFixedSize(int(img_w*self.cover_h/img_h), self.cover_h)
def progressring_msg(self, input):
if input == 'start':
self.label_cover.setImage(self.book_icon)
self.label_cover.setFixedSize(self.cover_w, self.cover_h)
self.progressRing.show()
elif input == 'end':
self.progressRing.hide()
self.progressRing.setValue(0)
else:
self.progressRing.setValue(input)
def process_hang(self, input=None):
self.btn_hang.setEnabled(True)
self.btn_hang.setShortcut(Qt.Key_Return)
self.btn_hang.show()
self.editline_hang.show()
def process_continue(self, input=None):
self.btn_hang.hide()
self.btn_hang.setEnabled(False)
self.editline_hang.hide()
def process_stop(self):
self.main_thread.terminate()
self.end_signal.emit('refresh')
class Window(FluentWindow):
def __init__(self):
super().__init__()
self.out_path = os.path.join(os.path.expanduser('~'), 'Downloads')
self.head = 'https://www.linovelib.com'
split_str = '**************************************\n '
self.welcome_text = f'使用说明共4条记得下拉\n{split_str}1.哔哩轻小说{self.head}根据书籍网址输入书号以及下载的卷号书号最多输入4位阿拉伯数字。\n{split_str}2.例如小说网址是{self.head}/novel/2704.html则书号输入2704。\n{split_str}3.要查询书籍卷号卷名等信息,则可以只输入书号不输入卷号,点击确定会返回书籍卷名称和对应的卷号。\n{split_str}4.根据上一步返回的信息确定自己想下载的卷号,要下载编号[2]对应卷则卷号输入2。想下载多卷比如[1]至[3]对应卷则卷号输入1-3或1,2,3英文逗号分隔编号也可以不连续并点击确定。'
self.homeInterface = HomeWidget('Home Interface', self)
self.settingInterface = SettingWidget('Setting Interface', self)
self.initNavigation()
self.initWindow()
def initNavigation(self):
self.addSubInterface(self.homeInterface, FIF.HOME, '主界面')
self.addSubInterface(self.settingInterface, FIF.SETTING, '设置', NavigationItemPosition.BOTTOM)
def initWindow(self):
self.resize(700, 460)
pixmap = QPixmap()
pixmap.loadFromData(base64.b64decode(logo_base64))
self.setWindowIcon(QIcon(pixmap))
self.setWindowTitle('哔哩轻小说EPUB下载器')
self.setFont(font_label)
desktop = QApplication.desktop().availableGeometry()
w, h = desktop.width(), desktop.height()
self.move(w//2 - self.width()//2, h//2 - self.height()//2)
def set_theme(self, mode=None):
if mode=='':
setTheme(Theme.LIGHT)
elif mode=='':
setTheme(Theme.DARK)
elif mode=='跟随系统设置':
setTheme(Theme.AUTO)
theme = qconfig.theme
if theme == Theme.DARK:
self.homeInterface.label_book.setTextColor(QColor(255,255,255))
self.homeInterface.label_volumn.setTextColor(QColor(255,255,255))
elif theme == Theme.LIGHT:
self.homeInterface.label_book.setTextColor(QColor(0,0,0))
self.homeInterface.label_volumn.setTextColor(QColor(0,0,0))
if __name__ == '__main__':
QApplication.setHighDpiScaleFactorRoundingPolicy(Qt.HighDpiScaleFactorRoundingPolicy.PassThrough)
QApplication.setAttribute(Qt.AA_EnableHighDpiScaling)
QApplication.setAttribute(Qt.AA_UseHighDpiPixmaps)
setTheme(Theme.DARK)
setThemeColor('#FF7233')
app = QApplication(sys.argv)
w = Window()
w.show()
app.exec_()

73
debug.py Normal file
View File

@ -0,0 +1,73 @@
import requests # 用来抓取网页的html源码
head='https://www.linovel.com'
# url = 'https://www.linovelib.com/novel/3207/163708.html'
url1 = 'https://www.linovelib.com/novel/2662/catalog'
url = 'https://www.linovelib.com/novel/2662/183423.html'
# url = 'https://img3.readpai.com/2/2662/183423/210186.jpg'
url0 = 'https://www.linovelib.com/'
cookie = "night=1"
header = {'User-Agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1", 'referer': head, 'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cookie':cookie}
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.47', 'referer': head, 'Accept-Language':'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cookie':cookie}
# session = requests.Session()
# req=session.get(url1, headers=header)
req=requests.get(url, headers=header)
print(req.text)
# from selenium import webdriver
# from selenium.webdriver.edge.options import Options
# import time
# options = Options()
# options.add_argument('--start-minimized')
# driver = webdriver.Edge(options = options)
# driver2 = webdriver.Edge(options = options)
# # exit(0)
# driver.get(url0)
# cookies = driver.get_cookies()
# print(driver.page_source)
# # driver.delete_all_cookies()
# driver2.get('https://www.linovelib.com/')
# # driver2.get(url)
# time.sleep(5)
# for cookie in cookies:
# driver2.add_cookie({'name': cookie['name'], 'value': cookie['value'], 'domain': cookie['domain']})
# driver2.get(url)
# print(driver2.page_source)
# cookies_dict = dict()
# for cookie in cookies:
# cookies_dict[cookie["name"]] = cookie["value"]
# # 将 cookie 设置到 requests 的 CookieJar 中
# requests_cookiejar = requests.utils.cookiejar_from_dict(cookies_dict)
# req=requests.get(url, headers=header, cookies=requests_cookiejar)
# print(req.text)
# driver.get(url)
# cookies = driver.get_cookies()
# 打印 Cookie
# for cookie in cookies:
# print(cookie["name"])
# print('*******************')
# print(cookie["value"])
# print('*******************')
# print(driver2.page_source)
# # driver.delete_all_cookies()
# driver.get(url)
# print(driver.page_source)
# import pdb
# pdb.set_trace()
# print(driver.element)

BIN
dist/bilinovel-v0.0.3.exe vendored Normal file

Binary file not shown.

BIN
dist/secret_map.cfg vendored Normal file

Binary file not shown.

7
requirements.txt Normal file
View File

@ -0,0 +1,7 @@
# pip install -r requirements.txt -i https://pypi.org/simple/
requests
bs4
rich
pyqt5
PyQt-Fluent-Widgets[full]
selenium

Binary file not shown.

Binary file not shown.

BIN
resource/book.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

1
resource/book.py Normal file
View File

@ -0,0 +1 @@
book_base64 = 'iVBORw0KGgoAAAANSUhEUgAAAQ8AAAGKCAMAAAAljDRnAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAACHUExURQAAAL+/v7Ompq+vr66oqK+qqq+rq66qqq6rq6+pqa2rq6+tra6srK6pqa+rq62pqbCsrK+srK+srK6pqa+srK+srK+rq66qqq+rq6+rq7CsrK+rq6+rq7Crq66rq66qqrCrq6+rq6+rq6+rq6+rq6+rq66qqq6rq6+qqq+rq6+srLCrq7CsrNpNNrIAAAAmdFJOUwAEFCMsMEBITFNkZnJ0fICHnJ+go6+/wcLDx8/S19rf5+/w9fn7jVpzpgAAAAlwSFlzAAAywAAAMsABKGRa2wAABoxJREFUeF7t3Wl3ozYARuF0Y7q7q7t3mi5pM+3//32VxOuEFwOSHMAeuM+H2rFlQHdkO9A5Z+4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADwck1zDB5b4d7h0OiZPWpOJVyoogH7ctD8B+2viSY+7rirJJOr42RHq2T4s+PcXpJouiV2UURzLbODIpppqc2/bTTPCtsuoklW2XIRTbHSdotogtW2WkTTq7fRUz7NrpX+0JvmcCj6Le3lS6SJdP9GaG6t7gybgijVRVLr50sLXekyw5UvNITD09G0+vPLNikPEjZVemoQrzSsnaV7AejZ0PQySQqKlKy0Aetdfxm5ADQ6ucPUSfD0QV/Y4snSTeKVQe1qwPjOp6Y19qqwL414kbmuv2jm7edU/Bxv0uanTO2591HTNfSymWK00g7id1H6NI6P1Ec6O/wH3U7I7GS0SP91s8YYU1fkbDEU5MjvYqzIUc8na8SIaoJMfQKOK9jD2IZPX5BrxYjKv5TzHxWDioqPFEkHt2aN3qqcdHbMJe+W4hU4XCR8YK9ao6bHhQdW+o4cDvKTbtejw8nT+Frln1BjHyPr0sHkaXyt8h4vem/Ec4V47hZ/K0q3QfnZTYeOJU/ja1X0uGSJhArT5/cpT0UYvSxP4yfFP6TGv4iqelQFibvSywqUXoHR8DyNH3c8HZ1+btX1KP1WP152zafgVFAj8zR+RPfqgh5qVfYo+B67sEUruwA1Lk/jR3SnrYda1T2mgzwtwgu9hT3Gg7xoZbQW6hFnqbutOXuMBHnp0ki8x9hDJTS+tWyPoSAXbefc29mjv6pnWRrJFnrMV2MbPfTgLOjh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OHo4ejh6OGu3uOif/xpwz2i+E+xVmXZeI9WRZRd9EhCFI2asp8eSbbJznoEx8n3zv56RONJ9tkjGEmy2x7xjaNXdW2rR/ZffO45L7KtHoX/JnhH/1t4Wz38mVLdj5KN9ah9w8jzItlYj94uyp2KbK1H1LQOh8OxYr2029liDxe6FFaJW9p+j1ZRlLCpvfSIDr7/Ib0heuEsbq9H0OSWiT+tV83iJntEpR8ngV4xi5vtEVeJXpuj8bO4To/TN21HfETPPiv4LAk0OGdgp0Fvt6v2iMdznP5tI15ajgcZh0cli0RDB6UEmX0GT1e01+pxrPqtK4ll4jFmi6Td9+XTDwhHqXuttKVFelwuZPlad0e0+3/WNNUhRqTN3ViP5LVuB7X7T+ZLkaRt3mKPx8cH3Q5o9x9izNoiSpu9zR4TReLew8LQT3OKW77ZHqNF4srQ3Zmlid1uj5EgC8UI0sRuuMfkx8gC0sRuuse6QdLEbrvHqtLE5uqx3Nt6Lcc0sbl61P9vllJrvWfiLObrMbJAwllFezb7fF6ZfkqnnoW/UVUHCTtNp0ORdto67VfjTLs85uvRDxIOaeCc/lw6xHnebWmXJfsMew071atayjFjj06Q6b/PMSyeqevlffkFcjwWdnDPTU455uxxF8+545+RfrzARBQ5a3NhipN0yaXz1x7n6BEP6kx8o/akN7TT5ky/ycTyGGuhrRsdRIeOtEvbPdHm8jR+Cem4XnfO+bs9Ovfv739KQ2c/z+3QbPM0fkkPti4mFsmCNNs8jV+YJ7kCzTZP45d33SKabZ7GL+yf+J/eIok/pcfXoNnmafw6Hq72vtFs8zR+LQ+9982/ul2aZpu34Hfck7UmPe7pt9as5U5nb8npF+8CKyyQ/3R7NeXL49oL5I1ul1V1XrR8kIlJr/KFW3maGM+6dAJhtLUlvflbd+agw3bh4cocVXSK2aVzTKODOfn5Dx1yz5+/aECHttClHRkdz1vqw79UwH2sp/fno++VoOPXT/TkHr37xb0ynHz7vp7aqfc+/U4lgt+/fKWHd+ydV59/88Nvj/c/fvXZB3oIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABUuLv7Hzo1zyb0ghw2AAAAAElFTkSuQmCC'

BIN
resource/example1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

BIN
resource/example2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 136 KiB

BIN
resource/logo.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.7 KiB

1
resource/logo.py Normal file

File diff suppressed because one or more lines are too long

BIN
resource/logo_big.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.8 KiB

23
resource/trans_base64.py Normal file
View File

@ -0,0 +1,23 @@
from PIL import Image
import base64
from resource.logo import logo_base64
import io
# # 从Base64编码数据中获取图像数据
# image_bytes = base64.b64decode(logo_base64)
# # 将图像数据解码为Image对象
# image = Image.open(io.BytesIO(image_bytes))
# # 显示图像
# image.show()
def image_to_base64(image_path):
with open(image_path, "rb") as image_file:
encoded_string = base64.b64encode(image_file.read())
return encoded_string.decode("utf-8")
image_path = "resource/book.png "
base64_string = image_to_base64(image_path)
print(base64_string)

BIN
secret_map.cfg Normal file

Binary file not shown.

149
utils.py Normal file
View File

@ -0,0 +1,149 @@
def get_cover_html(img_w, img_h):
img_htmls = []
img_msg = ' <image width=\"'+ str(img_w)+'\" height=\"'+ str(img_h)+'\" xlink:href="../Images/00.jpg"/>\n'
img_htmls.append('<?xml version="1.0" encoding="UTF-8" standalone="no" ?>\n')
img_htmls.append('<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\n')
img_htmls.append('\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n')
img_htmls.append('<html xmlns=\"http://www.w3.org/1999/xhtml\">\n')
img_htmls.append('<head>\n')
img_htmls.append(' <title>Cover</title>\n')
img_htmls.append('</head>\n')
img_htmls.append('<body>\n')
img_htmls.append(' <div style="text-align: center; padding: 0pt; margin: 0pt;">\n')
img_htmls.append(' <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"100%\" preserveAspectRatio=\"xMidYMid meet\" version=\"1.1\" viewBox=\"0 0 '+ str(img_w)+' '+ str(img_h)+'\" width=\"100%\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n')
img_htmls.append(img_msg)
img_htmls.append(' </svg>\n')
img_htmls.append(' </div>\n')
img_htmls.append('</body>\n')
img_htmls.append('</html>')
return img_htmls
def text2htmls(chap_name, text):
text_lines = text.split('\n')
text_body = []
text_body.append('<body>\n')
text_body.append('<h1>' + chap_name + '</h1>\n')
for text_line in text_lines:
if text_line.startswith('[img:'):
img_no = text_line[5:7]
text_line_html = f' <img alt=\"{img_no}\" src=\"../Images/{img_no}.jpg\"/>\n'
else:
text_line_html = '<p>' + text_line + '</p>\n'
text_body.append(text_line_html)
text_body.append('</body>\n')
text_head = []
text_head.append('<?xml version="1.0" encoding="UTF-8" standalone="no" ?>\n')
text_head.append('<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\n')
text_head.append('\"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n')
text_head.append('<html xmlns=\"http://www.w3.org/1999/xhtml\">\n')
text_head.append('<head>\n')
text_head.append('<title>'+ chap_name+'</title>\n')
text_head.append('<style>p{text-indent:2em;}</style>\n')
text_head.append('</head>\n')
text_htmls = text_head + text_body + ['</html>']
return text_htmls
def get_toc_html(title, chap_names):
toc_htmls = []
toc_htmls.append('<?xml version=\"1.0\" encoding=\"utf-8\"?>\n')
toc_htmls.append('<!DOCTYPE ncx PUBLIC \"-//NISO//DTD ncx 2005-1//EN\"\n')
toc_htmls.append(' \"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd\">\n\n')
toc_htmls.append('<ncx xmlns=\"http://www.daisy.org/z3986/2005/ncx/\" version=\"2005-1\">\n')
toc_htmls.append(' <head>\n')
toc_htmls.append(' <meta name=\"dtb:uid\" content=\"urn:uuid:a18aac05-497d-476d-b66f-0211f609743d\" />\n')
toc_htmls.append(' <meta name=\"dtb:depth\" content=\"0\" />\n')
toc_htmls.append(' <meta name=\"dtb:totalPageCount\" content=\"0\" />\n')
toc_htmls.append(' <meta name=\"dtb:maxPageNumber\" content=\"0\" />\n')
toc_htmls.append(' </head>\n')
toc_htmls.append('<docTitle>\n')
toc_htmls.append(' <text>'+ title +'</text>\n')
toc_htmls.append('</docTitle>\n')
toc_htmls.append('<navMap>\n')
for chap_no, chap_name in enumerate(chap_names):
toc_htmls.append(' <navPoint id=\"navPoint-'+str(chap_no+1)+'\" playOrder=\"'+str(chap_no+1)+'\">\n')
toc_htmls.append(' <navLabel>\n')
toc_htmls.append(' <text>'+ chap_name +'</text>\n')
toc_htmls.append(' </navLabel>\n')
toc_htmls.append(' <content src="Text/'+str(chap_no).zfill(2)+'.xhtml"/>\n')
toc_htmls.append(' </navPoint>\n')
toc_htmls.append('</navMap>\n')
toc_htmls.append('</ncx>')
return toc_htmls
def get_content_html(title, author, num_chap, num_img, img_exist=False):
content_htmls = []
content_htmls.append('<?xml version=\"1.0\" encoding=\"utf-8\"?>\n')
content_htmls.append('<package version=\"2.0\" unique-identifier=\"BookId\" xmlns=\"http://www.idpf.org/2007/opf\">\n')
content_htmls.append(' <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:opf=\"http://www.idpf.org/2007/opf\">\n')
content_htmls.append(' <dc:identifier id=\"BookId\" opf:scheme=\"UUID\">urn:uuid:942b8224-476b-463b-9078-cdfab0ee2686</dc:identifier>\n')
content_htmls.append(' <dc:language>zh</dc:language>\n')
content_htmls.append(' <dc:title>'+ title +'</dc:title>\n')
content_htmls.append(' <dc:creator opf:role="aut" opf:file-as="未知">'+ author +'</dc:creator>\n')
content_htmls.append(' <meta name=\"cover\" content=\"x00.jpg\"/>\n')
content_htmls.append(' </metadata>\n')
content_htmls.append(' <manifest>\n')
content_htmls.append(' <item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>\n')
content_htmls.append(' <item id="cover.xhtml" href="Text/cover.xhtml" media-type="application/xhtml+xml"/>\n')
if img_exist:
content_htmls.append(' <item id="xcolor" href="Text/color.xhtml" media-type="application/xhtml+xml"/>\n')
for chap_no in range(num_chap):
content_htmls.append(' <item id=\"x'+str(chap_no).zfill(2)+'.xhtml\" href=\"Text/'+ str(chap_no).zfill(2)+'.xhtml\" media-type=\"application/xhtml+xml\"/>\n')
for img_no in range(num_img):
content_htmls.append(' <item id=\"x'+str(img_no).zfill(2)+'.jpg\" href=\"Images/'+ str(img_no).zfill(2)+'.jpg\" media-type=\"image/jpeg\"/>\n')
content_htmls.append(' </manifest>\n')
content_htmls.append(' <spine toc="ncx">\n')
content_htmls.append(' <itemref idref="cover.xhtml"/>\n')
content_htmls.append(' <itemref idref="xcolor"/>\n')
for chap_no in range(num_chap):
content_htmls.append(' <itemref idref=\"x'+str(chap_no).zfill(2)+'.xhtml\"/>\n')
content_htmls.append(' </spine>\n')
content_htmls.append(' <guide>\n')
content_htmls.append(' <reference type="cover" title="封面" href="Text/cover.xhtml"/>\n')
content_htmls.append(' </guide>\n')
content_htmls.append('</package>\n')
return content_htmls
def get_container_html():
container_htmls = []
container_htmls.append('<?xml version="1.0" encoding="UTF-8"?>\n')
container_htmls.append('<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">\n')
container_htmls.append(' <rootfiles>\n')
container_htmls.append(' <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>\n')
container_htmls.append(' </rootfiles>\n')
container_htmls.append('</container>\n')
return container_htmls
def get_color_html(colorimg_num):
color_htmls = []
color_htmls.append('<?xml version=\"1.0\" encoding=\"utf-8\"?>\n')
color_htmls.append('<html>\n')
color_htmls.append('<head>\n')
color_htmls.append(' <title>彩插</title>\n')
color_htmls.append('</head>\n')
color_htmls.append('<body>\n')
for i in range(1, colorimg_num):
color_htmls.append(' <img alt=\"'+str(i).zfill(2)+'\" src=\"../Images/'+str(i).zfill(2)+'.jpg\"/>\n')
color_htmls.append('</body>\n')
color_htmls.append('</html>')
return color_htmls
def check_chars(win_chars):
win_illegal_chars = '?*"<>|:/'
new_chars = ''
for char in win_chars:
if char in win_illegal_chars:
new_chars += '\u25A0'
else:
new_chars += char
return new_chars