"""
Big5小說: http://big5.quanben5.com/
免費小說閱讀網: https://big5.mfxsydw.com/
天天看小說: https://www.ttkan.co/
"""
import os
import shutil
import math
import random
import re
import requests
import lxml
from bs4 import BeautifulSoup
import multiprocessing
import tqdm
from google.colab import drive
drive.mount('/content/drive', force_remount=False)
#=======================================================================================================================================================================================================================================================
class 建立章節資料夾:
def __init__(self, path):
file_list = os.listdir(path)
self.path = path
self.number = len(file_list)
self.file_startWords = re.split("\d+", file_list[0])[0]
self.file_endWords = re.split("\d+", file_list[0])[1]
print(self.number)
def _zfill(self, integer):
return str(int(integer)).zfill(len(str(self.number)))
def create_folder(self, start, middle, end):
folder_list = []
for i in range(1,len(str(self.number))):
divide_part = math.pow(10,i)
folder_number = math.ceil(self.number/divide_part) -1
for j in range(folder_number):
folder_path = os.path.join(self.path, f"{start}{self._zfill(j*divide_part+1)}{middle}{self._zfill((j+1)*divide_part)}{end}")
folder_list.append(folder_path)
folder_path = os.path.join(self.path, f"{start}{self._zfill(folder_number*divide_part+1)}{middle}{self._zfill(self.number)}{end}")
folder_list.append(folder_path)
return list(dict.fromkeys(folder_list))
def moving_chapter_files(self, start, middle, end):
destination = []
if self.number <= 10:
return
folder_number = math.ceil(self.number/10)
for i in range(folder_number):
for j in range(10):
if i*10+j+1 > self.number:
break
original_file = f"{self.file_startWords}{self._zfill(i*10+j+1)}{self.file_endWords}"
old = os.path.join(self.path, original_file)
if (i+1)*10 > self.number:
new = os.path.join(self.path, f"{start}{self._zfill(i*10+1)}{middle}{self._zfill(self.number)}{end}", original_file)
else:
new = os.path.join(self.path, f"{start}{self._zfill(i*10+1)}{middle}{self._zfill((i+1)*10)}{end}", original_file)
destination.append([old, new])
return destination
def moving_folder_files(self, start, middle, end):
destination = []
for i in range(1,len(str(self.number))):
divided_part = math.pow(10,i)
folder_number = math.ceil(self.number/divided_part)
if folder_number <= 10:
continue
for j in range(folder_number):
x1 = j*divided_part+1
x2 = (j+1)*divided_part
x3 = 10*divided_part*math.floor((j*divided_part+1)/(10*divided_part))+1
x4 = x3 + 10*divided_part - 1
if x2 > self.number:
x2 = self.number
if x4 > self.number:
x4 = self.number
if x1 == x3 and x2 == x4:
continue
old = os.path.join(self.path, f"{start}{self._zfill(x1)}{middle}{self._zfill(x2)}{end}")
new = os.path.join(self.path, f"{start}{self._zfill(x3)}{middle}{self._zfill(x4)}{end}")
destination.append([old, new])
return destination
def execute_command(self, makedir_name):
if self.number <=10:
return
_name = re.split("\{\s*\}", makedir_name)
start = _name[0]
middle = _name[1]
end = _name[2]
for folder in self.create_folder(start, middle, end):
try:
os.makedirs(folder)
except Exception as e:
print(e)
print("資料夾創建完成。")
for chapter in self.moving_chapter_files(start, middle, end):
try:
shutil.move(chapter[0], chapter[1])
except Exception as e:
print(e)
print("小說章節已經移動到資料夾中。")
for folder_path in self.moving_folder_files(start, middle, end):
try:
shutil.move(folder_path[0], folder_path[1])
except Exception as e:
print(e)
print("小說整理已經完成。")
#=======================================================================================================================================================================================================================================================
class 免費小說閱讀網:
def __init__(self, url):
self.url = url
self.headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
def create_path(self, novel_title):
path = "/content/drive/MyDrive/小說/%s/"%(novel_title)
try:
os.listdir(path)
except:
os.makedirs(path)
return path
def base64(self, _Str):
staticchars = "PXhw7UT1B0a9kQDKZsjIASmOezxYG4CHo5Jyfg2b8FLpEvRr3WtVnlqMidu6cN"
encodechars = ""
for i in range(len(_Str)):
try:
num0 = staticchars.index(_Str[i])
except:
num0 = -1
if num0 == -1:
code = _Str[i]
else:
code = staticchars[(num0+3)%62]
num1 = int(math.floor(random.random()*62))
num2 = int(math.floor(random.random()*62))
encodechars += (staticchars[num1] + code + staticchars[num2])
return encodechars
def getAll_download_information(self):
download_information = {}
response = requests.get(self.url, headers = self.headers)
soup = BeautifulSoup(response.text, "lxml")
download_information['novel_title'] = "_".join(soup.find("h1").getText().split(" "))
html_list = list(map(lambda x: "https://big5.mfxsydw.com" + x.find("a").get("href"), soup.find_all("li")))
id = self.url.split("/")[-1].split(".html")[0]
callback = re.findall("""callback=(["'])(.*?)\\1""", response.text, re.MULTILINE).pop()[1]
jsonUrl = "https://big5.mfxsydw.com/index.php?c=book&a=show.jsonp&callback={}&book_id={}&b={}".format(callback, id, self.base64(callback))
json_headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
"Referer": self.url}
r = requests.get(jsonUrl, headers = json_headers)
json_list = list(map(lambda x: "https://big5.mfxsydw.com" + x[1], re.findall("""<a\s+href=(["'])(.*?)\\1""", "".join(r.text.split("\\")))))
download_list = sorted((html_list + json_list), key=lambda x: int(re.search("\/(\d+)\.html", x).group(1)))
download_information['download_list'] = list(dict.fromkeys(download_list))
return download_information
def execute_download(self, queue, href, number, path, total):
r = requests.get(href, headers = self.headers)
soup = BeautifulSoup(r.text, "lxml")
novel_header = soup.find("h1").getText()
content = list(map(lambda x: x.getText(), soup.find_all("p")))
novel_content = "\n\n".join(list(filter(lambda x: x!="上一頁" and x!= "目錄" and x!= "下一頁" and x!="免費小說閱讀網", content)))
zfill_number = str(number).zfill(len(str(total)))
if len(novel_content) < 40:
print(f"\n第{zfill_number}章: {novel_header} \n少過40字。")
with open(os.path.join(path, f"第{zfill_number}章.txt"), "w") as file:
file.write(novel_header + "\n\n" + novel_content)
queue.put("done")
def execute_command(self):
dowload_information = self.getAll_download_information()
path = self.create_path(dowload_information['novel_title'])
pool = multiprocessing.Pool(8)
queue = multiprocessing.Manager().Queue()
total = len(dowload_information['download_list'])
number = 1
for url in dowload_information['download_list']:
pool.apply_async(self.execute_download, args = (queue, url, number, path, total))
number += 1
pool.close()
self.progressBar(queue, total)
return path
def progressBar(self, queue, total):
for i in tqdm.tqdm(range(total)):
file = queue.get()
#====================================================================================================================================================================================================================================
class Big5:
def __init__(self, url):
self.url = url
self.headers = {'user-agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
def get_Novel_information(self):
information = {}
response = requests.get(self.url, headers=self.headers)
soup = BeautifulSoup(response.text, "lxml")
information['novel_title'] = "_".join(soup.find("h1").getText().split(" "))
information['download_list'] = list(map(lambda x: "http://big5.quanben5.com/"+ x.find("a").get("href"), soup.find_all("li")))
return information
def execute_download(self, queue, url, zfill_number, path, ):
response = requests.get(url, headers=self.headers)
soup = BeautifulSoup(response.text, "lxml")
novel_header = soup.find("h1").getText()
content = list(map(lambda x: x.getText(), soup.find_all("p")))
novel_content = "\n\n".join(list(filter(lambda x: x!="上一頁" and x!="目錄" and x!= "下一頁" and x!="全本小說網", content)))
if len(novel_content) < 40:
print("第%s章,標題: %s \n少過40字。"%(zfill_number, novel_header))
with open(os.path.join(path, "第%s章.txt"%(zfill_number)), "w") as f:
f.write(novel_header + "\n\n" + novel_content)
queue.put("done")
def execute_command(self):
information = self.get_Novel_information()
path = "/content/drive/MyDrive/小說/%s/"%(information['novel_title'])
try:
os.listdir(path)
except:
os.makedirs(path)
pool = multiprocessing.Pool(8)
queue = multiprocessing.Manager().Queue()
total = len(information['download_list'])
number = 1
for url in information['download_list']:
zfill_number = str(number).zfill(len(str(total)))
pool.apply_async(self.execute_download, args = (queue, url, zfill_number, path))
number += 1
pool.close()
self.progressBar(queue, total)
return path
def progressBar(self, queue, total):
for i in tqdm.tqdm(range(total)):
file = queue.get()
#====================================================================================================================================================================================================================================
class 天天看小說:
def __init__(self, url):
self.url = url
self.headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
def getAll_download_information(self):
information = {}
r = requests.get(self.url, headers = self.headers)
soup = BeautifulSoup(r.text, "lxml")
information['novel_title'] = "_".join(soup.find("h1").getText().split(" "))
data = list(map(lambda x: re.search("<a href=\"(.*?)\"", x).group(1), re.findall("<a href=\".*?\"", r.text)))
information['download_list'] = list(filter(lambda x: re.search("^https.*?\.html$", x), data))
return information
def execute_download(self, queue, url, zfill_number, path):
response = requests.get(url, headers=self.headers)
soup = BeautifulSoup(response.text, "lxml")
novel_header = soup.title.getText()
novel_content = "\n\n".join(list(map(lambda x: x.getText().strip(), soup.find_all("p"))))
if len(novel_content) < 40:
print("第%s章,標題: %s \n少過40字。"%(zfill_number, novel_header))
with open(os.path.join(path, "第%s章.txt"%(zfill_number)), "w") as f:
f.write(novel_header + "\n\n" + novel_content)
queue.put("done")
def execute_command(self):
information = self.getAll_download_information()
path = "/content/drive/MyDrive/小說/%s/"%(information['novel_title'])
try:
os.listdir(path)
except:
os.makedirs(path)
pool = multiprocessing.Pool(8)
queue = multiprocessing.Manager().Queue()
total = len(information['download_list'])
number = 1
for url in information['download_list']:
zfill_number = str(number).zfill(len(str(total)))
pool.apply_async(self.execute_download, args = (queue, url, zfill_number, path))
number += 1
pool.close()
self.progressBar(queue, total)
return path
def progressBar(self, queue, total):
for i in tqdm.tqdm(range(total)):
file = queue.get()
#====================================================================================================================================================================================================================================
def download(url):
if "big5" and "quanben5" in url:
novel_path = Big5(url).execute_command()
return novel_path
if "big5" in url and re.search("\/book\/\d+\.html", url):
novel_path = 免費小說閱讀網(url).execute_command()
return novel_path
if url.startswith("https://www.ttkan"):
novel_path = 天天看小說(url).execute_command()
return novel_path
if __name__ == "__main__":
if not os.path.exists("/content/drive/MyDrive/小說/"):
os.makedirs("/content/drive/MyDrive/小說/")
url = input("Please enter url of 目錄\n>>>")
novel_path = download(url)
建立章節資料夾(novel_path).execute_command("第{}至{}章")