--- a +++ b/audio.py @@ -0,0 +1,182 @@ +import wave +import pyaudio +import configparser +import librosa +import soundfile as sf +from pydub import AudioSegment + +# Audio format Parameters for whisper +FORMAT = pyaudio.paInt16 # 16-bit depth +CHANNELS = 1 # Mono +RATE = 16000 # 16kHz +CHUNK = 1024 # Smaller chunks might reduce latency +RECORD_SECONDS = 30 # 30 seconds to match Whisper's expected segment length + +def print_input_devices(): + p = pyaudio.PyAudio() + info = p.get_host_api_info_by_index(0) + numdevices = info.get('deviceCount') + print("Input devices:") + for i in range(0, numdevices): + if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0: + print("Input Device ID ", i, " - ", p.get_device_info_by_host_api_device_index(0, i).get('name')) + +def print_output_devices(): + # Initialize PyAudio + p = pyaudio.PyAudio() + # Get total number of devices + num_devices = p.get_device_count() + # Loop through and print device info + print("Output devices:") + for i in range(num_devices): + device_info = p.get_device_info_by_index(i) + if device_info['maxOutputChannels'] > 0: # Output device + print(f"Index: {i}, Name: {device_info['name']}, Channels: {device_info['maxOutputChannels']}") + + # Terminate PyAudio + p.terminate() + + +def find_output_device_index(): + + # Initialize ConfigParser + config = configparser.ConfigParser() + + # Read settings.ini file + config.read('settings.ini') + # Get values + device_output_name = config.get('Audio Settings', 'device_output_name') + + # Initialize PyAudio + p = pyaudio.PyAudio() + # Get total number of devices + num_devices = p.get_device_count() + # Loop through and print device info + for i in range(num_devices): + device_info = p.get_device_info_by_index(i) + device_name = device_info.get('name') + + # Search for VB-Audio in the device name + if device_output_name in device_name: + return i + + # If no device is found, assert False + assert False, "Could not find output device. Check settings.ini file" + + +def find_input_device_index(): + + # Initialize ConfigParser + config = configparser.ConfigParser() + + # Read settings.ini file + config.read('settings.ini') + # Get values + device_input_name = config.get('Audio Settings', 'device_input_name') + p = pyaudio.PyAudio() + info = p.get_host_api_info_by_index(0) + numdevices = info.get('deviceCount') + + for i in range(0, numdevices): + device_info = p.get_device_info_by_host_api_device_index(0, i) + device_name = device_info.get('name') + + # Search for VB-Audio in the device name + if device_input_name in device_name: + return i + + # If no device is found, assert False + assert False, "Could not find input device. Check settings.ini file" + + +def record_short_audio(): + + # Change this to the index of the device you want to use + # Default recoding devie uses vb audio + input_device_index = find_input_device_index() + + # Initialize PyAudio and start recording + audio = pyaudio.PyAudio() + stream = audio.open(format=FORMAT, channels=CHANNELS, + rate=RATE, input=True, + frames_per_buffer=CHUNK, input_device_index=input_device_index) + + print("Recording...") + frames = [] + + for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)): + data = stream.read(CHUNK) + frames.append(data) + + print("Finished recording") + + # Stop the stream and terminate PyAudio + stream.stop_stream() + stream.close() + audio.terminate() + + # Save the audio + save_audio_frames(audio, frames) + + +def save_audio_frames(audio, frames): + WAVE_OUTPUT_FILENAME = "audio.wav" + # Save the recording as a WAV file + wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') + wf.setnchannels(CHANNELS) + wf.setsampwidth(audio.get_sample_size(FORMAT)) + wf.setframerate(RATE) + wf.writeframes(b''.join(frames)) + wf.close() + + print(f"Audio saved as {WAVE_OUTPUT_FILENAME}") + + +def change_playback_speed(audio_file, speed=1.25): + # Load the audio file with librosa + y, sr = librosa.load(audio_file, sr=None) + + # Use librosa's effects.time_stretch for time-stretching without pitch change + y_fast = librosa.effects.time_stretch(y, rate=speed) + + # Write the altered audio back to a file + sf.write(audio_file, y_fast, sr) + + return audio_file + +def play_audio(filename): + + # Open the audio file + wf = wave.open(filename, 'rb') + + # Create a PyAudio instance + p = pyaudio.PyAudio() + + # Open an output stream + stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), + channels=wf.getnchannels(), + rate=wf.getframerate(), + output=True) + + # Read data in chunks + data = wf.readframes(1024) + + # Play the audio file + while len(data) > 0: + stream.write(data) + data = wf.readframes(1024) + + # Close the stream + stream.stop_stream() + stream.close() + + # Terminate the PyAudio instance + p.terminate() + + + +if __name__=='__main__': + print_input_devices() + print_output_devices() + + # record_short_audio()