1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
| from multiprocessing import Process from threading import Thread import os import json import whisper import zhconv from pyaudio import PyAudio,paInt16 import wave from pydub import AudioSegment from pydub.playback import play import sys import time import numpy as np import subprocess import openai
script_dir=os.path.dirname(os.path.realpath(sys.argv[0]))
def transcribe(file): print(f'transcribing {file}') model=whisper.load_model('small') print('whisper model loaded') result=model.transcribe(file,language='Chinese') print(result) with open(f'{file.rsplit(".",1)[0]}.json','w',encoding='utf8') as f: json.dump(result,f,ensure_ascii=False,indent=4)
class recorder: NUM_SAMPLES=2000 SAMPLING_RATE=16000 voice_string=[] does=False
def start(self): print('recording audio...') self.does=True self.voice_string=[] pa=PyAudio() stream=pa.open(format=paInt16,channels=1,rate=self.SAMPLING_RATE,input=True,frames_per_buffer=self.NUM_SAMPLES) save_buffer=[] while self.does: string_audio_data=stream.read(self.NUM_SAMPLES) self.voice_string.append(string_audio_data)
def finish(self): print('recording audio end') self.does=False wav_id=time.strftime('%y_%m_%d_%H%M%S') if not os.path.exists(wav_id[:8].replace('_','/')): os.makedirs(wav_id[:8].replace('_','/')) wf=wave.open(f'{wav_id.replace("_","/")}.wav','wb') wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(self.SAMPLING_RATE) wf.writeframes(np.array(self.voice_string).tobytes()) wf.close() wav=wave.open(f'{wav_id.replace("_","/")}.wav','rb') wav_duration=wav.getnframes()/16000 wav.close() print('wav_id',wav_id) return wav_id,wav_duration
rec=recorder()
def start_minicpm_service(): os.chdir('C:/Users/tellw/apps/python') subprocess.run('python run_minicpm_service.py',shell=True)
def answer2(result,client): completion=client.chat.completions.create(model='Model-7.6B-Q4_0_openbmb_MiniCPM-o-2_6-gguf',messages=[{'role':'system','content':'You are a helpful assistant.'},{'role':'user','content':result}],frequency_penalty=0.2) return completion.choices[0].message.content
def synthesize_answer_and_play(answer,wav_id): os.chdir('D:/asr-service/VITS-Paimon') print('synthesizing audios...') subprocess.run(f'python custom_synthesize_shell.py {answer.replace(" ","")} {script_dir}/{wav_id.replace("_","/")}_answer') os.chdir(script_dir) print('start to play') song=AudioSegment.from_wav(f'{wav_id.replace("_","/")}_answer.wav') play(song) print('play end')
def exec_shell(cmd, ignore_err=False): process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, err = process.communicate() retcode = process.poll() if retcode == 0 or ignore_err: return output, err else: return -1000, f'execute "{cmd}" failed'
if __name__=='__main__': while True: input('录音?') record_thread=Thread(target=rec.start,args=()) record_thread.daemon=True record_thread.start() input('停止?') st=time.time() wav_id,wav_duration=rec.finish() p1=Process(target=transcribe,args=(f'{wav_id.replace("_","/")}.wav',)) p1.start() p1.join() res,_=exec_shell('netstat -ano|grep 8080|grep -i listen',True) if len(res.decode())==0: gpt_running=False else: print('chatgpt服务正在运行') gpt_running=True p=None if not gpt_running: p=Process(target=start_minicpm_service,args=()) p.daemon=True p.start() time.sleep(60) with open(f'{wav_id.replace("_","/")}.json','r',encoding='utf8') as f: result=json.load(f) client=openai.OpenAI(base_url='http://127.0.0.1:8080/v1',api_key='1') result=zhconv.convert(result['text'],'zh-hans') print(f'responcing to {result}') answer=answer2(result,client) print('answer',answer) if not gpt_running: output,_=exec_shell('netstat -ano|grep 8080|grep -i listen') pid=int(output.decode().strip().split('\n')[0].strip().split(' ')[-1]) print('pid',pid) os.kill(pid,9) print('结束chatgpt服务') p2=Process(target=synthesize_answer_and_play,args=(answer,wav_id)) p2.start() p2.join() with open('audio-robot-logs.txt','a',encoding='utf8') as f: f.write(f'{wav_id}\n{result}<SPLIT>\n{answer}<SPLIT>\n') et=time.time() print(f'this query costs {et-st}s')
|