使用sounddevice录制扬声器的声音

寻找扬声器声音的可录制接口

import sounddevice as sd
import wave
import os

devices=sd.query_devices()
print(devices)

for i in range(len(devices)):
	wf=wave.open(f'test{i}.wav','wb')
	wf.setnchannels(1)
	wf.setsampwidth(2)
	wf.setframerate(16000)

	def callback(indata,frames,time,status):
		wf.writeframes(bytes(indata))

	try:
		with sd.RawInputStream(samplerate=16000,blocksize=4096,dtype='int16',channels=1,device=i,callback=callback):
			sd.sleep(10000)
		print(f'{i} interface available')
		wf.close()
	except:
		print(f'{i} interface unavailable')
		wf.close()
		os.remove(f'test{i}.wav')

针对sounddevice.query_devices()所查询到的每个设备录制音频，出现报错则说明该设备无法录制。聆听每个可用设备所生成的test.wav，可以找到目标设备。

录制屏幕

import sounddevice as sd
import wave
import os
import time
import numpy as np
from PIL import ImageGrab,Image
import cv2
from multiprocessing import Process
import subprocess
import shutil

def record_screen_motion(st):
    while time.time()<st:
        pass
    while True:
        img=ImageGrab.grab(bbox=(0,0,1920,1080)) # windows-系统设置-屏幕-分辨率，屏幕坐标系
        ct=time.time()
        img=np.array(img.getdata(),np.uint8).reshape(img.size[1],img.size[0],3)
        img=cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
        cv2.imwrite(f'{int((ct-st)*10):05d}.jpg',img)

if __name__=='__main__':
    di=time.strftime('%Y%m%d%H%M%S')
    os.mkdir(di)
    os.chdir(di)
    wf=wave.open('test.wav','wb')
    wf.setnchannels(1)
    wf.setsampwidth(2)
    wf.setframerate(16000)

    def callback(indata,frames,time,status):
        wf.writeframes(bytes(indata))

    try:
        st=time.time()
        record_screen_process=Process(target=record_screen_motion,args=(st+1,))
        record_screen_process.daemon=True
        record_screen_process.start()
        while time.time()<st+1:
            pass
        with sd.RawInputStream(samplerate=16000,blocksize=1024,dtype='int16',channels=1,device=21,callback=callback):
                while True:
                    sd.sleep(10000)
    except KeyboardInterrupt:
        wf.close()
        files=os.listdir()
        existed_num=[]
        for file in files:
            if file.endswith('.jpg'):
                existed_num.append(int(file.split('.')[0]))
        existed_num.sort()
        count=0
        for i in range(existed_num[-1]):
            if not os.path.exists(f'{i:05d}.jpg'):
                shutil.copy(f'{existed_num[count]:05d}.jpg',f'{i:05d}.jpg')
            else:
                count+=1
        subprocess.run('ffmpeg -f image2 -framerate 10 -i "%05d.jpg" -b:v 25313k test.mp4')
        subprocess.run('ffmpeg -i test.mp4 -i test.wav -vcodec copy -acodec aac o.mp4') # 音频流进行aac编码，来构造视频，MP4容器中不支持PCM（pcm_alaw、pcm_s16le），https://cloud.tencent.com/developer/ask/sof/105539438
        print('完成录制屏幕任务')

截屏和录音在脚本启动1秒后同时开始进行，由于笔记本的垃圾性能，隔40毫秒启动一个截屏线程（以电影帧率来做帧），跟不用线程、连续截屏的效果一样（1fps），算好截屏时跟0帧之间的距离，按照10fps为帧编号，且复制补充中间帧，最后构造图片流，如果电脑硬件条件好，截屏速率大于10fps，可以考虑改变一些参数，使得画面更加流畅。【录屏】

参考链接：

massorant Vosk_real-time_stt

tellw/speech_input

ffmpeg操作汇总

python 三种方式实现截屏（详解+完整代码）

创建于2412222215，修改于2412222215