import wave
import pyaudio
import configparser
import librosa
import soundfile as sf
from pydub import AudioSegment
# Audio format Parameters for whisper
FORMAT = pyaudio.paInt16 # 16-bit depth
CHANNELS = 1 # Mono
RATE = 16000 # 16kHz
CHUNK = 1024 # Smaller chunks might reduce latency
RECORD_SECONDS = 30 # 30 seconds to match Whisper's expected segment length
def print_input_devices():
p = pyaudio.PyAudio()
info = p.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
print("Input devices:")
for i in range(0, numdevices):
if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
print("Input Device ID ", i, " - ", p.get_device_info_by_host_api_device_index(0, i).get('name'))
def print_output_devices():
# Initialize PyAudio
p = pyaudio.PyAudio()
# Get total number of devices
num_devices = p.get_device_count()
# Loop through and print device info
print("Output devices:")
for i in range(num_devices):
device_info = p.get_device_info_by_index(i)
if device_info['maxOutputChannels'] > 0: # Output device
print(f"Index: {i}, Name: {device_info['name']}, Channels: {device_info['maxOutputChannels']}")
# Terminate PyAudio
p.terminate()
def find_output_device_index():
# Initialize ConfigParser
config = configparser.ConfigParser()
# Read settings.ini file
config.read('settings.ini')
# Get values
device_output_name = config.get('Audio Settings', 'device_output_name')
# Initialize PyAudio
p = pyaudio.PyAudio()
# Get total number of devices
num_devices = p.get_device_count()
# Loop through and print device info
for i in range(num_devices):
device_info = p.get_device_info_by_index(i)
device_name = device_info.get('name')
# Search for VB-Audio in the device name
if device_output_name in device_name:
return i
# If no device is found, assert False
assert False, "Could not find output device. Check settings.ini file"
def find_input_device_index():
# Initialize ConfigParser
config = configparser.ConfigParser()
# Read settings.ini file
config.read('settings.ini')
# Get values
device_input_name = config.get('Audio Settings', 'device_input_name')
p = pyaudio.PyAudio()
info = p.get_host_api_info_by_index(0)
numdevices = info.get('deviceCount')
for i in range(0, numdevices):
device_info = p.get_device_info_by_host_api_device_index(0, i)
device_name = device_info.get('name')
# Search for VB-Audio in the device name
if device_input_name in device_name:
return i
# If no device is found, assert False
assert False, "Could not find input device. Check settings.ini file"
def record_short_audio():
# Change this to the index of the device you want to use
# Default recoding devie uses vb audio
input_device_index = find_input_device_index()
# Initialize PyAudio and start recording
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS,
rate=RATE, input=True,
frames_per_buffer=CHUNK, input_device_index=input_device_index)
print("Recording...")
frames = []
for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("Finished recording")
# Stop the stream and terminate PyAudio
stream.stop_stream()
stream.close()
audio.terminate()
# Save the audio
save_audio_frames(audio, frames)
def save_audio_frames(audio, frames):
WAVE_OUTPUT_FILENAME = "audio.wav"
# Save the recording as a WAV file
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(audio.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
print(f"Audio saved as {WAVE_OUTPUT_FILENAME}")
def change_playback_speed(audio_file, speed=1.25):
# Load the audio file with librosa
y, sr = librosa.load(audio_file, sr=None)
# Use librosa's effects.time_stretch for time-stretching without pitch change
y_fast = librosa.effects.time_stretch(y, rate=speed)
# Write the altered audio back to a file
sf.write(audio_file, y_fast, sr)
return audio_file
def play_audio(filename):
# Open the audio file
wf = wave.open(filename, 'rb')
# Create a PyAudio instance
p = pyaudio.PyAudio()
# Open an output stream
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True)
# Read data in chunks
data = wf.readframes(1024)
# Play the audio file
while len(data) > 0:
stream.write(data)
data = wf.readframes(1024)
# Close the stream
stream.stop_stream()
stream.close()
# Terminate the PyAudio instance
p.terminate()
if __name__=='__main__':
print_input_devices()
print_output_devices()
# record_short_audio()