Files
tools/jyeoo_dl/main.py
flt6 f6658f3d52 First commit
Former-commit-id: 424079609133edcc501ae185509836ee1181a02c
2022-06-12 19:47:20 +08:00

168 lines
4.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from requests import get
from bs4 import BeautifulSoup
from pickle import dump,load
from traceback import print_exc
SAV_COOKIE=True
DEBUG=True
try:
with open("cookies.txt","rb") as f:
head=load(f)
print("Used cookies.txt.")
print("If you want to change account, please delete cookie.txt")
except IOError:
if DEBUG:
print_exc()
cook=input("Please input cookie: ")
head={"Cookie": cook}
else:
print("Can't read cookie.txt!")
print("If you use this program for thhe first time, please ignore this.")
print("Set DEBUG at the start of file to see debug information.")
cook=input("Please input cookie: ")
head={"Cookie": cook}
except Exception:
if DEBUG:
print_exc()
cook=input("Please input cookie: ")
head={"Cookie": cook}
else:
print("Can't read cookie.txt!")
cook=input("Please input cookie: ")
head={"Cookie": cook}
url=input("Please input URL: ")
try:
if SAV_COOKIE:
with open("cookies.txt","wb") as f:
dump(head,f)
except Exception:
if DEBUG:
print_exc()
else:
print("Can't save cookie.txt!")
base='''<!DOCTYPE html>
<html lang="zh-cn">
<head>
<meta charset="utf-8"/>
<meta name="renderer" content="webkit"/>
<style type="text/css">
.list-box {
width: 21cm;
}
div,label{
font-size: 20px !important;
line-height: 50px !important;
}
</style>
<link href="https://img.jyeoo.net/jye-root-3.0.css?v=20211228" rel="stylesheet" type="text/css"/>
<link href="https://img.jyeoo.net/images/formula/style_math.css?v=20211028" rel="stylesheet" type="text/css"/>
</head>
<body>
<div class="content">
<div class="wrapper clearfix">
<ul class="ques-list list-box">
</ul>
</div>
</div>
</body>
</html>
'''
try:
t=get(url,headers=head).text
except Exception:
print("GET %s failed."%(url,))
if DEBUG:
print_exc()
else:
print("Please try again or check the URL and the Internet.")
exit(-1)
print("GET %s succeed."%(url,))
try:
root=BeautifulSoup(t,features="lxml")
file=BeautifulSoup(base,features="lxml")
new=file.find_all("div",class_="wrapper clearfix")[0].ul
a=root.find_all("li",class_="QUES_LI")
if DEBUG:print("Prepare parse succeed")
for i in a:
for tem in i.find_all("label",class_='s'):
if DEBUG:
print("[*] Found answer in html:\n"+tem.prettify())
tem["class"]=""
for tem in i.find_all("div",class_='quizPutTag'):
if DEBUG:
print("[*] Found answer in html:\n"+tem.prettify())
tem.contents=[]
new.append(i.fieldset)
if len(a)==0:
print("Warning: 获取数据为空,请用浏览器访问网址检查是否是试卷界面。")
else:
print("Parse succeed.")
except Exception:
if DEBUG:
print_exc()
else:
print("Parse failed.")
exit(-1)
try:
st=file.new_string("答案")
tag_p=file.new_tag("p")
h1=file.new_tag("b")
h1.contents.append(st)
tag_p.append(h1)
new.append(tag_p)
sub=url.split("/")[3]
url="http://www.jyeoo.com/{}/ques/detail/%s".format(sub)
a=BeautifulSoup(t,features="lxml").find_all("li",class_="QUES_LI")
if DEBUG:print("Get answer page succeed.")
except Exception:
if DEBUG:
print_exc()
else:
print("Prepare for geting answer failed.")
exit(-1)
for i in range(len(a)):
print("fetching the answer of T%d..."%(i+1))
try:
tem=get(url%(a[i].fieldset["id"],),headers=head)
except Exception:
print("GET %s failed."%(url%(a[i].fieldset["id"],),))
if DEBUG:
print_exc()
else:
print("Please try again or check the URL and the Internet.")
exit(-1)
try:
tem=BeautifulSoup(tem.text,features="lxml")
t2=tem.find("fieldset").find("div",{"class":"pt11"})
if "见试题解答内容" in t2.text:
t2=tem.find("fieldset").find("div",{"class":"pt6"})
st=file.new_string("T%d"%(i+1,))
tag=file.new_tag("b")
tag.contents.append(st)
new.append(tag)
new.append(t2)
except Exception:
if DEBUG:
print_exc()
else:
print("Get answer Error.")
exit(-1)
try:
with open("file.html","w",encoding="utf-8") as f:
print(file.prettify(),file=f)
except Exception:
if DEBUG:
print_exc()
else:
print("Write file error.")
print("Please check if there is a file named 'file.html;")
print("If so, please rename it.")