Diff of /overlay.py [000000] .. [f86987]

Switch to side-by-side view

--- a
+++ b/overlay.py
@@ -0,0 +1,301 @@
+import sys
+import base64
+import threading
+from PyQt5.QtCore import Qt, QTimer, QPoint, QByteArray, QBuffer, QIODevice
+from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QGridLayout, QScrollArea, QSizeGrip, QPushButton
+from PyQt5.QtGui import QPixmap
+
+from openai_wrapper import text_fallacy_classification, openAI_TTS
+from real_time_classifier import continuous_audio_transcription
+from real_time_classifier import WHISPER_TEXTS
+from audio import play_audio, change_playback_speed
+
+GPT_TEXTS = []
+class TransparentOverlay(QMainWindow):
+    
+    def __init__(self, whs_model, auto):
+        super().__init__()
+
+        self.whs_model = whs_model
+        self.auto = auto
+        self.dragPos = QPoint()
+        self.opacity = 0.6
+        self.is_tts_enabled = False
+
+        self.initUI()
+        
+
+    def initUI(self):
+        self.setWindowTitle('Transparent Overlay')
+        self.setGeometry(0, 0, 1000, 600)
+        self.setWindowOpacity(self.opacity)
+        self.setAttribute(Qt.WA_TranslucentBackground)
+        self.setWindowFlags(Qt.Window | Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
+
+        # Create Scroll Areas
+        self.scroll_area1 = QScrollArea(self)
+        self.scroll_area2 = QScrollArea(self)
+
+        # Increase the dimensions of the scroll areas
+        self.scroll_area1.setMinimumSize(380, 120)
+        self.scroll_area2.setMinimumSize(380, 120)
+        
+        # Create Labels
+        self.whisper_label = QLabel('Whisper Output Will Appear Here')
+        self.chatgpt_label = QLabel('ChatGPT Output Will Appear Here')
+
+        # Enable word-wrap on labels
+        self.whisper_label.setWordWrap(True)
+        self.chatgpt_label.setWordWrap(True)
+
+        # Add labels to Scroll Areas
+        self.scroll_area1.setWidget(self.whisper_label)
+        self.scroll_area2.setWidget(self.chatgpt_label)
+
+        # Enable scroll bars on the scroll areas
+        self.scroll_area1.setWidgetResizable(True)
+        self.scroll_area2.setWidgetResizable(True)
+
+    
+        # Style labels with bold text and increased font size, using professional grey shades
+        self.whisper_label.setStyleSheet('background-color: #dcdcdc; font-weight: bold; font-size: 12px; color: black;')
+        self.chatgpt_label.setStyleSheet('background-color: #696969; font-weight: bold; font-size: 15px; color: white;')
+
+       
+        # Layout setup
+        # QVBoxLayout for the scroll areas
+        vbox_layout = QVBoxLayout()
+        vbox_layout.addWidget(self.scroll_area2)
+        vbox_layout.addWidget(self.scroll_area1)
+        
+        # QGridLayout to include QVBoxLayout and QSizeGrip
+        grid_layout = QGridLayout()
+        grid_layout.addLayout(vbox_layout, 0, 0)
+        
+        # Add QSizeGrip to the QGridLayout
+        size_grip = QSizeGrip(self)
+        grid_layout.addWidget(size_grip, 1, 1, Qt.AlignBottom | Qt.AlignRight)
+
+        container = QWidget()
+        container.setLayout(grid_layout)
+        self.setCentralWidget(container)
+
+        # Run the continuous transcription and classification in a separate thread
+        self.stop_event = threading.Event()
+        self.transcription_thread = threading.Thread(target=continuous_audio_transcription, args=(self.whs_model, self.stop_event))
+        self.transcription_thread.start()
+
+        # Timer to update Whisper and ChatGPT outputs
+        self.timer = QTimer(self)
+        self.timer.timeout.connect(self.update_labels)
+        self.timer.start(500)
+
+         # Create a label to display the screenshot
+        # self.screenshot_label = QLabel(self)
+        # self.screenshot_label.setWordWrap(True)
+        # vbox_layout.addWidget(self.screenshot_label)
+
+        # Add a button for screen capture
+        self.capture_button = QPushButton('Analyze Transcript', self)
+        self.capture_button.clicked.connect(self.start_text_thread)
+        
+        # Toogles   
+        self.toggle_whisper_button = QPushButton('Toggle Transcript', self)
+        self.toggle_whisper_button.clicked.connect(self.toggle_whisper_box)
+
+        self.toggle_chatgpt_button = QPushButton('Toggle Analysis', self)
+        self.toggle_chatgpt_button.clicked.connect(self.toggle_chatgpt_box)
+
+        self.toggle_tts_button = QPushButton('Toggle TTS', self)
+        self.toggle_tts_button.clicked.connect(self.toggle_tts)
+
+        # Style buttons
+        self.capture_button.setStyleSheet("QPushButton { background-color: grey; font-weight: bold;  }")
+        self.toggle_whisper_button.setStyleSheet("QPushButton { background-color: green; font-weight: bold;  }")
+        self.toggle_chatgpt_button.setStyleSheet("QPushButton { background-color: green; font-weight: bold; }")
+        self.toggle_tts_button.setStyleSheet("QPushButton { background-color: red; font-weight: bold; }")
+
+        # Create a horizontal layout for the buttons
+        button_layout = QHBoxLayout()
+
+        # Add buttons to the horizontal layout
+        button_layout.addWidget(self.capture_button)
+        button_layout.addWidget(self.toggle_whisper_button)
+        button_layout.addWidget(self.toggle_chatgpt_button)
+        button_layout.addWidget(self.toggle_tts_button)
+
+        # Now add the horizontal layout of buttons to the main vertical layout
+        vbox_layout.addLayout(button_layout)
+    
+
+    def update_labels(self):
+        # get_whisper_transcription returns a list of text segments, newest last.
+        whisper_segments = get_whisper_transcription()
+
+        # Concatenate the segments and set the label text.
+        self.whisper_label.setText("Transcript: " + '- '.join(whisper_segments))
+
+        # Color old response grey new reponse black
+        chatgpt_output_list = get_chatgpt_output()
+        chatgpt_text = "".join(chatgpt_output_list)
+        self.chatgpt_label.setText(f"{chatgpt_text}")
+
+        self.whisper_label.setMouseTracking(True)
+        self.chatgpt_label.setMouseTracking(True)
+        self.scroll_area1.setMouseTracking(True)
+        self.scroll_area2.setMouseTracking(True)
+
+    def toggle_whisper_box(self):
+        is_visible = self.scroll_area1.isVisible()
+        self.scroll_area1.setVisible(not is_visible)
+        self.toggle_whisper_button.setStyleSheet(
+            "QPushButton { background-color: %s; }" % ('green' if not is_visible else 'red')
+        )
+
+    def toggle_chatgpt_box(self):
+        is_visible = self.scroll_area2.isVisible()
+        self.scroll_area2.setVisible(not is_visible)
+        self.toggle_chatgpt_button.setStyleSheet(
+            "QPushButton { background-color: %s; }" % ('green' if not is_visible else 'red')
+        )
+
+    def toggle_tts(self):
+        self.is_tts_enabled = not self.is_tts_enabled  # Assume this flag exists
+        # Update the button color based on the state
+        self.toggle_tts_button.setStyleSheet(
+            "QPushButton { background-color: %s; }" % ('green' if self.is_tts_enabled else 'red')
+        )
+        print(f'TTS is set to {self.is_tts_enabled}')
+        
+
+    def mousePressEvent(self, event):
+        self.dragPos = event.globalPos()
+     
+
+    def mouseMoveEvent(self, event):
+        if event.buttons() == Qt.LeftButton:
+            self.move(self.pos() + event.globalPos() - self.dragPos)
+            self.dragPos = event.globalPos()
+
+
+    def keyPressEvent(self, event):
+        global TRANSCRIBE
+        if event.key() == Qt.Key_Escape:
+            # To stop the thread
+            self.stop_event.set()
+            self.transcription_thread.join()  # Optional: Wait for the thread to finish
+            self.close()
+
+    def start_img_text_thread(self):
+        capture_thread = threading.Thread(target=self.capture_and_process)
+        capture_thread.start()
+
+    def start_text_thread(self):
+        process_thread = threading.Thread(target=self.process_text)
+        process_thread.start()
+
+    def capture_and_process(self):
+         # Increase transparency to 100%
+        self.setWindowOpacity(0.0)
+
+        # Process all pending application events
+        QApplication.processEvents()
+        
+        # Delay the screenshot to ensure the overlay is fully transparent
+        self.capture_screen()
+        
+    def capture_screen(self):
+        # Use the overlay's geometry as the capture area
+        capture_area = self.geometry()
+        # Capture the screen
+        screen = QApplication.primaryScreen()
+        screenshot = screen.grabWindow(0, capture_area.x()-50, capture_area.y()-50, capture_area.width()+100, capture_area.height()+100)
+        
+        # Reset the transparency
+        self.setWindowOpacity(self.opacity)  # Assuming 0.6 is your default opacity
+        # Process all pending application events
+        QApplication.processEvents()
+
+        # Display the screenshot
+        self.process_screenshot(screenshot)
+
+    def process_screenshot(self, screenshot):
+        # Convert screenshot to QPixmap and display it in the label
+        pixmap = QPixmap(screenshot)
+        self.screenshot_label.setPixmap(pixmap.scaled(self.screenshot_label.size(), Qt.KeepAspectRatio))
+
+        # Convert QPixmap to QImage
+        image = screenshot.toImage()
+
+        # Scale the image by a factor, e.g., 0.5 for half size
+        scale_factor = 0.3
+        new_width = image.width() * scale_factor
+        new_height = image.height() * scale_factor
+        scaled_image = image.scaled(new_width, new_height, Qt.KeepAspectRatio, Qt.SmoothTransformation)
+        print(scaled_image.width(), scaled_image.height())
+
+        # Prepare a byte array and a buffer to hold the image data
+        byte_array = QByteArray()
+        buffer = QBuffer(byte_array)
+        buffer.open(QIODevice.WriteOnly)
+
+        # Save the image to the buffer in PNG format
+        scaled_image.save(buffer, "PNG")
+
+        # Save the image to a file
+        file_path = "img/screenshot.png"  # Specify your directory path and file name here
+        scaled_image.save(file_path, "PNG")  # Saving as a PNG file
+
+        # Convert byte array to base64
+        base64_data = base64.b64encode(byte_array.data()).decode()
+
+        # Format the base64 string for API use
+        formatted_base64_image = "data:image/png;base64," + base64_data
+
+        # Here, you can use formatted_base64_image with your API
+        # For demonstration, let's just print it
+        text = text_fallacy_classification(formatted_base64_image, get_whisper_transcription())
+
+        GPT_TEXTS.append(text)
+
+        if self.is_tts_enabled:
+            # Play GPT4
+            audio_file = openAI_TTS(text)
+            audio_file = change_playback_speed(audio_file)
+            play_audio(audio_file)
+
+    def process_text(self):
+        # Here, you can use formatted_base64_image with your API
+        # For demonstration, let's just print it
+        text = text_fallacy_classification(None, get_whisper_transcription())
+
+        GPT_TEXTS.append(text)
+
+        if self.is_tts_enabled:
+            # Play GPT4
+            audio_file = openAI_TTS(text)
+            audio_file = change_playback_speed(audio_file)
+            play_audio(audio_file)
+            
+
+def get_whisper_transcription():
+    global WHISPER_TEXTS
+    last_n_segments = WHISPER_TEXTS[-9:]  # Assuming you want the last 10 segments
+    # text = ' - '.join(last_n_segments)
+    return last_n_segments
+
+def get_chatgpt_output():
+    global GPT_TEXTS
+    if len(GPT_TEXTS):
+        return GPT_TEXTS[-1]
+    else:
+        return [""]
+
+def launch_overlay(whs_model, use_gpt):
+    app = QApplication(sys.argv)
+    overlay = TransparentOverlay(whs_model, use_gpt)
+    overlay.show()
+    sys.exit(app.exec_())
+
+if __name__ == '__main__':
+    launch_overlay()