✨ feat: update

Former-commit-id: 675e932431ea8000277a664896ba8f4c9fb80a94 Former-commit-id: 99dacc5ebe0dfe1c2ab4c0d5e9c7fc47aa586aab
2023-03-12 13:57:38 +08:00
parent 450c6d30d0
commit 11b1f8ea6f
5 changed files with 330 additions and 8 deletions
--- a/English_listen_gen/main.py
+++ b/English_listen_gen/main.py
@ -0,0 +1,60 @@
+from re import match
+from mytts import SpeechConfig, AudioOutputConfig, SpeechSynthesizer
+from html import escape
+from rich import print
+
+def read(filename):
+    rst:list[str] = []
+    with open(filename, 'r',encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            delay = match("\{delay\s([\d\.]+)\}",line)
+            if delay is not None:
+                # rst.append(f'<break time="{delay.group(1)}" />')
+                t = float(delay.group(1))
+                delay_text = ""
+                if t>5:
+                    delay_text = '<break time="5s" />'*int(t//5)+f'<break time="{t%5}s" />'
+                else:
+                    delay_text = f'<break time="{t}s" />'
+                if "</voice>" in rst[-1]:
+                    rst[-1] = rst[-1].replace("</voice>",delay_text+'</voice>')
+                else:
+                    rst.append(f'<voice name="zh-CN-YunfengNeural"><break time="{delay_text}s" /</voice>')
+                    raise RuntimeWarning("Why there is a delay at the very beginning?")
+                continue
+            who, con = line.split(': ')
+            if who == "B":
+                fmt = f'<voice name="zh-CN-YunfengNeural"><prosody rate="-30%">{con}</prosody></voice>'
+            elif who == "W":
+                fmt = f'<voice name="en-US-JennyMultilingualNeural"><prosody rate="-5%" pitch="0%">{con}</prosody></voice>'
+            elif who == "M":
+                fmt = f'<voice name="en-US-EricNeural"><mstts:silence  type="Sentenceboundary" value="100ms"/><prosody rate="10%" pitch="0%">{con}</prosody></voice>'
+            else:
+                raise AssertionError("Not a valid text format: {}->{}".format(who,con))
+            rst.append(fmt)
+    SSML_MODEL='''<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US">{}</speak>'''
+    lines = rst
+    totLines = len(lines)
+    content = []
+    i = 0
+    while i < totLines:
+        tem = ""
+        while tem.count("<voice name=") < 49 and i < totLines:
+            tem += lines[i]
+            tem += "\n"
+            i += 1
+        # tem = escape(tem)
+        content.append(SSML_MODEL.format(tem))
+    return content
+
+def syn(content:list[str]):
+    spe=SpeechConfig()
+    for i,con in enumerate(content):
+        opt_cfg=AudioOutputConfig(filename="%02d.mp3" % i)
+        print(SpeechSynthesizer(spe,opt_cfg,debug=True).speak_ssml(con))
+
+
+if __name__ == '__main__':
+    print(read("input.txt")[0])
+    syn(read("input.txt"))