语音问答助手

from multiprocessing import Process
from threading import Thread
import os
import json
import whisper
import zhconv
from pyaudio import PyAudio,paInt16
import wave
from pydub import AudioSegment
from pydub.playback import play
import sys
import time
import numpy as np
import subprocess
import openai

script_dir=os.path.dirname(os.path.realpath(sys.argv[0]))

def transcribe(file):
	print(f'transcribing {file}')
	model=whisper.load_model('small')
	print('whisper model loaded')
	result=model.transcribe(file,language='Chinese')
	print(result)
	with open(f'{file.rsplit(".",1)[0]}.json','w',encoding='utf8') as f:
		json.dump(result,f,ensure_ascii=False,indent=4)

class recorder:
	NUM_SAMPLES=2000
	SAMPLING_RATE=16000
	voice_string=[]
	does=False

	def start(self):
		print('recording audio...')
		self.does=True
		self.voice_string=[]
		pa=PyAudio()
		stream=pa.open(format=paInt16,channels=1,rate=self.SAMPLING_RATE,input=True,frames_per_buffer=self.NUM_SAMPLES)
		save_buffer=[]
		while self.does:
			string_audio_data=stream.read(self.NUM_SAMPLES)
			self.voice_string.append(string_audio_data)

	def finish(self):
		print('recording audio end')
		self.does=False
		wav_id=time.strftime('%y_%m_%d_%H%M%S')
		if not os.path.exists(wav_id[:8].replace('_','/')):
			os.makedirs(wav_id[:8].replace('_','/'))
		wf=wave.open(f'{wav_id.replace("_","/")}.wav','wb')
		wf.setnchannels(1)
		wf.setsampwidth(2)
		wf.setframerate(self.SAMPLING_RATE)
		wf.writeframes(np.array(self.voice_string).tobytes())
		wf.close()
		wav=wave.open(f'{wav_id.replace("_","/")}.wav','rb')
		wav_duration=wav.getnframes()/16000
		wav.close()
		print('wav_id',wav_id)
		return wav_id,wav_duration

rec=recorder()

def start_minicpm_service():
	os.chdir('C:/Users/tellw/apps/python')
	subprocess.run('python run_minicpm_service.py',shell=True)

def answer2(result,client):
	completion=client.chat.completions.create(model='Model-7.6B-Q4_0_openbmb_MiniCPM-o-2_6-gguf',messages=[{'role':'system','content':'You are a helpful assistant.'},{'role':'user','content':result}],frequency_penalty=0.2)
	return completion.choices[0].message.content

def synthesize_answer_and_play(answer,wav_id):
	os.chdir('D:/asr-service/VITS-Paimon')
	print('synthesizing audios...')
	subprocess.run(f'python custom_synthesize_shell.py {answer.replace(" ","")} {script_dir}/{wav_id.replace("_","/")}_answer')
	os.chdir(script_dir)
	print('start to play')
	song=AudioSegment.from_wav(f'{wav_id.replace("_","/")}_answer.wav')
	play(song)
	print('play end')

def exec_shell(cmd, ignore_err=False):
    process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, err = process.communicate()
    retcode = process.poll()
    if retcode == 0 or ignore_err:
        return output, err
    else:
        return -1000, f'execute "{cmd}" failed'

if __name__=='__main__':
	while True:
		input('录音？')
		record_thread=Thread(target=rec.start,args=())
		record_thread.daemon=True
		record_thread.start()
		input('停止？')
		st=time.time()
		wav_id,wav_duration=rec.finish()
		p1=Process(target=transcribe,args=(f'{wav_id.replace("_","/")}.wav',))
		p1.start()
		p1.join()
		res,_=exec_shell('netstat -ano|grep 8080|grep -i listen',True)
		if len(res.decode())==0:
			gpt_running=False
		else:
			print('chatgpt服务正在运行')
			gpt_running=True
		p=None
		if not gpt_running:
			p=Process(target=start_minicpm_service,args=())
			p.daemon=True
			p.start()
			time.sleep(60)
		with open(f'{wav_id.replace("_","/")}.json','r',encoding='utf8') as f:
			result=json.load(f)
		client=openai.OpenAI(base_url='http://127.0.0.1:8080/v1',api_key='1')
		result=zhconv.convert(result['text'],'zh-hans')
		print(f'responcing to {result}')
		answer=answer2(result,client)
		print('answer',answer)
		if not gpt_running:
			output,_=exec_shell('netstat -ano|grep 8080|grep -i listen')
			pid=int(output.decode().strip().split('\n')[0].strip().split(' ')[-1])
			print('pid',pid)
			os.kill(pid,9)
			print('结束chatgpt服务')
		p2=Process(target=synthesize_answer_and_play,args=(answer,wav_id))
		p2.start()
		p2.join()
		with open('audio-robot-logs.txt','a',encoding='utf8') as f:
			f.write(f'{wav_id}\n{result}<SPLIT>\n{answer}<SPLIT>\n')
		et=time.time()
		print(f'this query costs {et-st}s')

生成熟肉视频

from multiprocessing import Process
import os
import subprocess
import sys
import psutil
import time
import whisper
import json
import openai

script_dir=os.path.dirname(os.path.realpath(sys.argv[0]))

def transcribe(file):
    print(f'transcribing {file}')
    subprocess.run(f'ffmpeg -i "{file}" test.wav -y',shell=True)
    model=whisper.load_model('small')
    print('whisper model loaded')
    result=model.transcribe('test.wav',language='Japanese')
    print(result)
    with open(f'{file.rsplit(".",1)[0]}.json','w',encoding='utf8') as f:
        json.dump(result,f,ensure_ascii=False,indent=4)

def start_jp2cn_service():
    os.chdir('C:/Users/tellw/apps/python')
    subprocess.run('python run_jp2zh_service.py',shell=True)

def translate_jp2zh(jp,client):
    try:
        completion=client.chat.completions.create(model='sakura-1.5b-qwen2.5-v1.0-fp16',messages=[{'role':'system','content':'You are a helpful assistant.'},{'role':'user','content':jp}],frequency_penalty=0.2,timeout=20)
    except Exception as e:
        print(f'{e}')
        return jp
    if completion.choices[0].message.content[:-1]=='。':
        return completion.choices[0].message.content[:-1]
    else:
        return completion.choices[0].message.content

def srt_time(t):
    h=int(t//3600)
    t%=3600
    m=int(t//60)
    t%=60
    s=int(t)
    return f'{h:02d}:{m:02d}:{s:02d},000'

def exec_shell(cmd, ignore_err=False):
    process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, err = process.communicate()
    retcode = process.poll()
    if retcode == 0 or ignore_err:
        return output, err
    else:
        return -1000, f'execute "{cmd}" failed'

if __name__=='__main__':
    wd=sys.argv[1]
    os.chdir(wd)
    for file in os.listdir():
        if '.' not in file or file.rsplit('.',1)[1] not in ['mp4','mkv']:
            continue
        if not os.path.exists(f'{file.rsplit(".",1)[0]}.json'):
            p1=Process(target=transcribe,args=(file,))
            p1.start()
            p1.join()
        if not os.path.exists(f'{file.rsplit(".",1)[0]}.srt'):
            p=Process(target=start_jp2cn_service,args=())
            p.daemon=True
            p.start()
            time.sleep(30)
            with open(f'{file.rsplit(".",1)[0]}.json','r',encoding='utf8') as f:
                result=json.load(f)
            srt_txt=''
            client=openai.OpenAI(base_url='http://127.0.0.1:8080/v1',api_key='1')
            rsl=len(result["segments"])
            for i,segment in enumerate(result['segments']):
                print(f'translating {i/rsl*100}% {i}/{rsl} {segment["text"]} {time.strftime("%y%m%d%H%M%S")}')
                nc=f'{segment["id"]+1}\n{srt_time(segment["start"])} --> {srt_time(segment["end"])}\n{translate_jp2zh(segment["text"],client)}\n\n'
                print(nc)
                srt_txt+=nc
            with open(f'{file.rsplit(".",1)[0]}.srt','w',encoding='utf8') as f:
                f.write(srt_txt)
            output,_=exec_shell('netstat -ano|grep 8080|grep -i listen')
            pid=int(output.decode().strip().split('\n')[0].strip().split(' ')[-1])
            print(pid)
            os.kill(pid,9)
            print('结束日语翻译服务')
        print(f'generating subtitle video--{file.rsplit(".",1)[0]}_subtitle.mp4')
        subprocess.run(f'ffmpeg -i "{file}" -vf subtitles="{file.rsplit(".",1)[0]}.srt" "{file.rsplit(".",1)[0]}_subtitle.mp4" -y')
    if len(sys.argv)>=3 and sys.argv[2]=='shutdown_y':
        subprocess.run('shutdown -s -t 0',shell=True)

创建于2502051411，修改于2502051411

语音问答助手和生成熟肉视频

语音问答助手

生成熟肉视频