Diff of /audio.py [000000] .. [f86987]

Switch to side-by-side view

--- a
+++ b/audio.py
@@ -0,0 +1,182 @@
+import wave
+import pyaudio
+import configparser
+import librosa
+import soundfile as sf
+from pydub import AudioSegment
+
+# Audio format Parameters for whisper
+FORMAT = pyaudio.paInt16  # 16-bit depth
+CHANNELS = 1  # Mono
+RATE = 16000  # 16kHz
+CHUNK = 1024  # Smaller chunks might reduce latency
+RECORD_SECONDS = 30  # 30 seconds to match Whisper's expected segment length
+
+def print_input_devices():
+    p = pyaudio.PyAudio()
+    info = p.get_host_api_info_by_index(0)
+    numdevices = info.get('deviceCount')
+    print("Input devices:")
+    for i in range(0, numdevices):
+        if (p.get_device_info_by_host_api_device_index(0, i).get('maxInputChannels')) > 0:
+            print("Input Device ID ", i, " - ", p.get_device_info_by_host_api_device_index(0, i).get('name'))
+
+def print_output_devices():
+    # Initialize PyAudio
+    p = pyaudio.PyAudio()
+    # Get total number of devices
+    num_devices = p.get_device_count()
+    # Loop through and print device info
+    print("Output devices:")
+    for i in range(num_devices):
+        device_info = p.get_device_info_by_index(i)
+        if device_info['maxOutputChannels'] > 0:  # Output device
+            print(f"Index: {i}, Name: {device_info['name']}, Channels: {device_info['maxOutputChannels']}")
+
+    # Terminate PyAudio
+    p.terminate()
+
+
+def find_output_device_index():
+    
+    # Initialize ConfigParser
+    config = configparser.ConfigParser()
+
+    # Read settings.ini file
+    config.read('settings.ini')
+    # Get values
+    device_output_name = config.get('Audio Settings', 'device_output_name')
+
+    # Initialize PyAudio
+    p = pyaudio.PyAudio()
+    # Get total number of devices
+    num_devices = p.get_device_count()
+    # Loop through and print device info
+    for i in range(num_devices):
+        device_info = p.get_device_info_by_index(i)
+        device_name = device_info.get('name')
+
+        # Search for VB-Audio in the device name
+        if device_output_name in device_name:
+            return i
+    
+    # If no device is found, assert False
+    assert False, "Could not find output device. Check settings.ini file"
+   
+
+def find_input_device_index():
+    
+    # Initialize ConfigParser
+    config = configparser.ConfigParser()
+
+    # Read settings.ini file
+    config.read('settings.ini')
+    # Get values
+    device_input_name = config.get('Audio Settings', 'device_input_name')
+    p = pyaudio.PyAudio()
+    info = p.get_host_api_info_by_index(0)
+    numdevices = info.get('deviceCount')
+    
+    for i in range(0, numdevices):
+        device_info = p.get_device_info_by_host_api_device_index(0, i)
+        device_name = device_info.get('name')
+        
+        # Search for VB-Audio in the device name
+        if device_input_name in device_name:
+            return i
+    
+    # If no device is found, assert False
+    assert False, "Could not find input device. Check settings.ini file"
+
+
+def record_short_audio():
+
+    # Change this to the index of the device you want to use
+    # Default recoding devie uses vb audio 
+    input_device_index = find_input_device_index()
+
+    # Initialize PyAudio and start recording
+    audio = pyaudio.PyAudio()
+    stream = audio.open(format=FORMAT, channels=CHANNELS,
+                        rate=RATE, input=True,
+                        frames_per_buffer=CHUNK, input_device_index=input_device_index)
+
+    print("Recording...")
+    frames = []
+
+    for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
+        data = stream.read(CHUNK)
+        frames.append(data)
+
+    print("Finished recording")
+
+    # Stop the stream and terminate PyAudio
+    stream.stop_stream()
+    stream.close()
+    audio.terminate()
+
+    # Save the audio
+    save_audio_frames(audio, frames)
+
+
+def save_audio_frames(audio, frames):
+    WAVE_OUTPUT_FILENAME = "audio.wav"
+    # Save the recording as a WAV file
+    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
+    wf.setnchannels(CHANNELS)
+    wf.setsampwidth(audio.get_sample_size(FORMAT))
+    wf.setframerate(RATE)
+    wf.writeframes(b''.join(frames))
+    wf.close()
+
+    print(f"Audio saved as {WAVE_OUTPUT_FILENAME}")
+
+
+def change_playback_speed(audio_file, speed=1.25):
+       # Load the audio file with librosa
+    y, sr = librosa.load(audio_file, sr=None)
+    
+    # Use librosa's effects.time_stretch for time-stretching without pitch change
+    y_fast = librosa.effects.time_stretch(y, rate=speed)
+    
+    # Write the altered audio back to a file
+    sf.write(audio_file, y_fast, sr)
+    
+    return audio_file
+
+def play_audio(filename):
+
+     # Open the audio file
+    wf = wave.open(filename, 'rb')
+    
+    # Create a PyAudio instance
+    p = pyaudio.PyAudio()
+    
+    # Open an output stream
+    stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
+                    channels=wf.getnchannels(),
+                    rate=wf.getframerate(),
+                    output=True)
+    
+    # Read data in chunks
+    data = wf.readframes(1024)
+    
+    # Play the audio file
+    while len(data) > 0:
+        stream.write(data)
+        data = wf.readframes(1024)
+    
+    # Close the stream
+    stream.stop_stream()
+    stream.close()
+    
+    # Terminate the PyAudio instance
+    p.terminate()
+
+
+
+if __name__=='__main__':
+    print_input_devices()
+    print_output_devices()
+
+    # record_short_audio()