Former-commit-id: 3bca851bcb1555e7d994460a84389dfd5be02fec
Former-commit-id: 9bea7e3ecdab4f1e103a618828d2c6e5ddfd1af4
This commit is contained in:
2022-08-16 12:28:57 +08:00
parent 86e3096ff0
commit 849f29e160
4 changed files with 262 additions and 66 deletions

View File

@ -0,0 +1,88 @@
from sys import argv,exit
from pydub import AudioSegment,silence
from os import path,mkdir,system
from shutil import rmtree
from traceback import print_exc
from json import dumps,loads
def init():
if len(argv) != 2:
print("Usage: a.py file.mp3")
exit(1)
return argv[1]
# return "新课程外研高一第32期.mp3"
def make_dir(pth:str):
if not path.isdir(pth):
try:
mkdir(pth)
except PermissionError:
print("Could not create directory '{pth}' because of permission error.".format(pth=pth))
exit(-1)
else:
ipt=input(f"'{pth}' exists. Remove it [YES/no]: ")
if ipt == "" or ipt == "yes":
try:
rmtree(pth)
except PermissionError:
print("Could not remove '{pth}' because of permission error.".format(pth=pth))
exit(-1)
mkdir(pth)
else:
print(f"Warning: Directory '{pth}' exists, which may influence the programm!")
def cut(file:AudioSegment,length:list[int],keep=300) -> AudioSegment:
maxlen = len(file)
bgn,end=length
if bgn-keep>=0:
bgn-=keep
if end+keep<=maxlen:
end+=keep
return file[bgn:end]
def main():
file = init()
NAMES=["BASIC", "1-5", "Others"]
make_dir("OUTPUT")
for i in NAMES:
make_dir(f"OUTPUT/{i}")
print("Reading file ...")
file:AudioSegment = AudioSegment.from_file(file)
print("Detecting audio segments ...")
arr=silence.detect_nonsilent(
file,
silence_thresh=-100,
seek_step=500,
min_silence_len=4000
)
print("Cutting audio segments ...")
group=[]
groups=[]
groups.append([(cut(file,arr[0]),'info'),(cut(file,arr[1]),'example')])
for i in range(2,2+5):
group.append((cut(file,arr[i]),str(i-1)))
groups.append(group)
group=[]
for i in range(7,len(arr)-1,2):
group.append((cut(file,arr[i]),f"Tips_of_{(i-7)//2+6}"))
group.append((cut(file,arr[i+1]),f"Text_{(i-7)//2+6}"))
groups.append(group)
for i,group in enumerate(groups):
print(f"Saving audio segments '{NAMES[i]}' ...")
for f,name in group:
f.export(f"OUTPUT/{NAMES[i]}/{name}.mp3")
print("Done!")
if __name__ == '__main__':
try:
main()
except SystemExit:
system("pause")
except KeyboardInterrupt as e:
raise e
except Exception:
print("An unexpected exception occurred.")
print_exc()

103
recode/recode.py Normal file
View File

@ -0,0 +1,103 @@
from sys import argv
from os.path import isfile, isdir
from os import mkdir
from json import dump, load
HELP = '''
Usage: recode.py <options>/<filename(s)>
Options:
If options is specified, program will only save configs.
-i: input file code. (eg. utf-8) (Default: test all known codes)
-o: output file code. (Default: utf-8)
-c: codes that try to convert from. (Default: test all known codes)
split by ','. (eg. utf-8,gbk,gb2312)
-r: save the output file in the specified directory. (Default: out)
-s: show all known codes. (Don't use this with others)
'''
OPTIONS = ["-i", "-o", "-c", "-s", "-r"]
CODES = ['ascii', 'big5', 'big5hkscs', 'cp037', 'cp273', 'cp424', 'cp437', 'cp500', 'cp720', 'cp737', 'cp775', 'cp850', 'cp852', 'cp855', 'cp856', 'cp857', 'cp858', 'cp860', 'cp861', 'cp862', 'cp863', 'cp864', 'cp865', 'cp866', 'cp869', 'cp874', 'cp875', 'cp932', 'cp949', 'cp950', 'cp1006', 'cp1026', 'cp1125', 'cp1140', 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', 'cp1256', 'cp1257', 'cp1258', 'euc_jp', 'euc_jis_2004', 'euc_jisx0213', 'euc_kr', 'gb2312', 'gbk', 'gb18030', 'hz', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004',
'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr', 'latin_1', 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', 'iso8859_7', 'iso8859_8', 'iso8859_9', 'iso8859_10', 'iso8859_11', 'iso8859_13', 'iso8859_14', 'iso8859_15', 'iso8859_16', 'johab', 'koi8_r', 'koi8_t', 'koi8_u', 'kz1048', 'mac_cyrillic', 'mac_greek', 'mac_iceland', 'mac_latin2', 'mac_roman', 'mac_turkish', 'ptcp154', 'shift_jis', 'shift_jis_2004', 'shift_jisx0213', 'utf_32', 'utf_32_be', 'utf_32_le', 'utf_16', 'utf_16_be', 'utf_16_le', 'utf_7', 'utf_8', 'utf_8_sig']
DEFALT = {'ipt': None, 'opt': 'utf-8', 'test': None, 'write': 'out'}
def setOption(arg):
if arg[0] == '-s':
for i in CODES:
print("%15s" % i, end=' ')
return
opt = arg[::2]
val = arg[1:][::2]
option = DEFALT.copy()
trans = {'-i': 'ipt', '-o': 'opt', '-c': 'test', "-r": "write"}
for o, v in zip(opt, val):
if o == '-c':
option[trans[o]] = v.split(',')
else:
option[trans[o]] = v
with open("config.json", "w") as f:
dump(option, f)
def main(files):
if isfile("config.json"):
try:
with open("config.json", "r") as f:
config = load(f)
except Exception:
print("Can't read config file.")
config = DEFALT
else:
print("No config file provided.")
config = DEFALT
codes = CODES if config["test"] is None else config["test"]
outcode = config["opt"]
outdir = config["write"]
if not isdir(outdir):
mkdir(outdir)
for file in files:
if isfile(outdir+'/'+file):
print(f"File '{outdir+'/'+file} exists. Ignore.")
continue
if config["ipt"] is None:
for code in codes:
if recode(file, outdir, code, outcode, config["write"]):
break
else:
if not recode(file, outdir, config["ipt"], outcode):
print("Could not convert '%s' from '%s' to '%s'" %
(file, config["ipt"], outcode))
else:
print(f"Success to convert {file}")
def recode(file, opt, src, to) -> bool:
try:
with open(file, 'r', encoding=src) as f:
tem = f.read()
except KeyboardInterrupt:
exit(2)
except Exception:
print("Can't open file.")
return False
try:
with open(opt+'/'+file, 'w', encoding=to) as f:
f.write(tem)
except KeyboardInterrupt:
exit(2)
except Exception:
print("Can't write file.")
return True
if __name__ == '__main__':
if len(argv) == 1:
print(HELP)
exit(1)
elif argv[1] in OPTIONS:
setOption(argv[1:])
else:
main(argv[1:])

3
zxxk_dl/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*.html
*.pdf
*.docx

View File

@ -1,66 +1,68 @@
import requests
from time import strftime
from re import findall
HTML_FORMAT ='''
<html>
<head>
<titile>{title}</titile>
</head>
<body>
{body}
</body>
</html>
'''
def writefile(filename,text):
with open(filename+'.html', 'w') as f:
f.write(text)
def main():
softID=input("ID: ")
url = "https://www.zxxk.com/soft/Preview/FirstLoadPreviewJson?softID={}&type=3&product=1&v=2&FullPreview=true"
response = requests.get(url.format(softID))
if response.status_code!=200:
print("ERROR")
print(response.status_code)
return -1
ret=response.json()["data"]
if not ret["IsSuccess"]:
print("ERROR: IsSuccess option is not true")
print(ret)
if not ret['IsRar']:
print("Not rar")
print("TotalPage=%d" % ret['TotalPage'])
print("SoftExt=%s" % ret['SoftExt'])
html=response["Html"]
# replace "data-original" to "src" for showing in browser
html=html.replace("data-original", "src")
writefile(strftime("%Y%m%d-%H:%M"),html)
else:
print("is RAR")
rar=ret['rarPreviewInfo']
for file in rar:
html=file["Html"]
title=file["SoftName"]
# replace "data-original" to "src" for showing in browser
# html=html.replace("data-original", "src")
urls=findall("(?<=data-original=\")https://preview.xkw.com/.+(?=\")",html)
l=[]
for url in urls:
page=requests.get(url,cookies=response.cookies)
if not page.status_code==200:
print(page)
print(page.status_code)
print(page.text)
assert page.status_code==200
l.append(page.text)
format_html=HTML_FORMAT.format(title=title,body="\n".join(l))
writefile(title,format_html)
if __name__ == "__main__":
main()
import requests
from time import strftime
import re
HTML_FORMAT ='''
<html>
<head>
<titile>{title}</titile>
</head>
<body>
{body}
</body>
</html>
'''
def writefile(filename,text):
filename=re.sub(r'''[\*\/\\\|\<\>\? \:\.\'\"\!]''',"",filename)
with open(filename+'.html', 'w') as f:
f.write(text)
def main():
softID=input("ID: ")
url = "https://www.zxxk.com/soft/Preview/FirstLoadPreviewJson?softID={}&fileaddress=&type=3&product=1&v=2&FullPreview=true"
response = requests.get(url.format(softID))
if response.status_code!=200:
print("ERROR")
print(response.status_code)
return -1
ret=response.json()["data"]
if not ret["IsSuccess"]:
print("ERROR: IsSuccess option is not true")
print(ret)
if not ret['IsRar']:
print("Not rar")
print("TotalPage=%d" % ret['TotalPage'])
print("SoftExt=%s" % ret['SoftExt'])
html=ret["Html"]
# replace "data-original" to "src" for showing in browser
html=html.replace("data-original", "src")
writefile(strftime("%Y%m%d-%H:%M"),html)
else:
print("is RAR")
rar=ret['rarPreviewInfo']
for file in rar:
html=file["Html"]
title=file["SoftName"]
# replace "data-original" to "src" for showing in browser
html=html.replace("data-original", "src")
# urls=findall("(?<=data-original=\")https://preview.xkw.com/.+(?=\")",html)
# l=[]
# for url in urls:
# page=requests.get(url,cookies=response.cookies)
# if not page.status_code==200:
# print(page)
# print(page.status_code)
# print(page.text)
# print(page)
# assert page.status_code==200
# l.append(page.text)
# format_html=HTML_FORMAT.format(title=title,body="\n".join(l))
writefile(title,html)
if __name__ == "__main__":
main()