a b/overlay.py
1
import sys
2
import base64
3
import threading
4
from PyQt5.QtCore import Qt, QTimer, QPoint, QByteArray, QBuffer, QIODevice
5
from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QGridLayout, QScrollArea, QSizeGrip, QPushButton
6
from PyQt5.QtGui import QPixmap
7
8
from openai_wrapper import text_fallacy_classification, openAI_TTS
9
from real_time_classifier import continuous_audio_transcription
10
from real_time_classifier import WHISPER_TEXTS
11
from audio import play_audio, change_playback_speed
12
13
GPT_TEXTS = []
14
class TransparentOverlay(QMainWindow):
15
    
16
    def __init__(self, whs_model, auto):
17
        super().__init__()
18
19
        self.whs_model = whs_model
20
        self.auto = auto
21
        self.dragPos = QPoint()
22
        self.opacity = 0.6
23
        self.is_tts_enabled = False
24
25
        self.initUI()
26
        
27
28
    def initUI(self):
29
        self.setWindowTitle('Transparent Overlay')
30
        self.setGeometry(0, 0, 1000, 600)
31
        self.setWindowOpacity(self.opacity)
32
        self.setAttribute(Qt.WA_TranslucentBackground)
33
        self.setWindowFlags(Qt.Window | Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
34
35
        # Create Scroll Areas
36
        self.scroll_area1 = QScrollArea(self)
37
        self.scroll_area2 = QScrollArea(self)
38
39
        # Increase the dimensions of the scroll areas
40
        self.scroll_area1.setMinimumSize(380, 120)
41
        self.scroll_area2.setMinimumSize(380, 120)
42
        
43
        # Create Labels
44
        self.whisper_label = QLabel('Whisper Output Will Appear Here')
45
        self.chatgpt_label = QLabel('ChatGPT Output Will Appear Here')
46
47
        # Enable word-wrap on labels
48
        self.whisper_label.setWordWrap(True)
49
        self.chatgpt_label.setWordWrap(True)
50
51
        # Add labels to Scroll Areas
52
        self.scroll_area1.setWidget(self.whisper_label)
53
        self.scroll_area2.setWidget(self.chatgpt_label)
54
55
        # Enable scroll bars on the scroll areas
56
        self.scroll_area1.setWidgetResizable(True)
57
        self.scroll_area2.setWidgetResizable(True)
58
59
    
60
        # Style labels with bold text and increased font size, using professional grey shades
61
        self.whisper_label.setStyleSheet('background-color: #dcdcdc; font-weight: bold; font-size: 12px; color: black;')
62
        self.chatgpt_label.setStyleSheet('background-color: #696969; font-weight: bold; font-size: 15px; color: white;')
63
64
       
65
        # Layout setup
66
        # QVBoxLayout for the scroll areas
67
        vbox_layout = QVBoxLayout()
68
        vbox_layout.addWidget(self.scroll_area2)
69
        vbox_layout.addWidget(self.scroll_area1)
70
        
71
        # QGridLayout to include QVBoxLayout and QSizeGrip
72
        grid_layout = QGridLayout()
73
        grid_layout.addLayout(vbox_layout, 0, 0)
74
        
75
        # Add QSizeGrip to the QGridLayout
76
        size_grip = QSizeGrip(self)
77
        grid_layout.addWidget(size_grip, 1, 1, Qt.AlignBottom | Qt.AlignRight)
78
79
        container = QWidget()
80
        container.setLayout(grid_layout)
81
        self.setCentralWidget(container)
82
83
        # Run the continuous transcription and classification in a separate thread
84
        self.stop_event = threading.Event()
85
        self.transcription_thread = threading.Thread(target=continuous_audio_transcription, args=(self.whs_model, self.stop_event))
86
        self.transcription_thread.start()
87
88
        # Timer to update Whisper and ChatGPT outputs
89
        self.timer = QTimer(self)
90
        self.timer.timeout.connect(self.update_labels)
91
        self.timer.start(500)
92
93
         # Create a label to display the screenshot
94
        # self.screenshot_label = QLabel(self)
95
        # self.screenshot_label.setWordWrap(True)
96
        # vbox_layout.addWidget(self.screenshot_label)
97
98
        # Add a button for screen capture
99
        self.capture_button = QPushButton('Analyze Transcript', self)
100
        self.capture_button.clicked.connect(self.start_text_thread)
101
        
102
        # Toogles   
103
        self.toggle_whisper_button = QPushButton('Toggle Transcript', self)
104
        self.toggle_whisper_button.clicked.connect(self.toggle_whisper_box)
105
106
        self.toggle_chatgpt_button = QPushButton('Toggle Analysis', self)
107
        self.toggle_chatgpt_button.clicked.connect(self.toggle_chatgpt_box)
108
109
        self.toggle_tts_button = QPushButton('Toggle TTS', self)
110
        self.toggle_tts_button.clicked.connect(self.toggle_tts)
111
112
        # Style buttons
113
        self.capture_button.setStyleSheet("QPushButton { background-color: grey; font-weight: bold;  }")
114
        self.toggle_whisper_button.setStyleSheet("QPushButton { background-color: green; font-weight: bold;  }")
115
        self.toggle_chatgpt_button.setStyleSheet("QPushButton { background-color: green; font-weight: bold; }")
116
        self.toggle_tts_button.setStyleSheet("QPushButton { background-color: red; font-weight: bold; }")
117
118
        # Create a horizontal layout for the buttons
119
        button_layout = QHBoxLayout()
120
121
        # Add buttons to the horizontal layout
122
        button_layout.addWidget(self.capture_button)
123
        button_layout.addWidget(self.toggle_whisper_button)
124
        button_layout.addWidget(self.toggle_chatgpt_button)
125
        button_layout.addWidget(self.toggle_tts_button)
126
127
        # Now add the horizontal layout of buttons to the main vertical layout
128
        vbox_layout.addLayout(button_layout)
129
    
130
131
    def update_labels(self):
132
        # get_whisper_transcription returns a list of text segments, newest last.
133
        whisper_segments = get_whisper_transcription()
134
135
        # Concatenate the segments and set the label text.
136
        self.whisper_label.setText("Transcript: " + '- '.join(whisper_segments))
137
138
        # Color old response grey new reponse black
139
        chatgpt_output_list = get_chatgpt_output()
140
        chatgpt_text = "".join(chatgpt_output_list)
141
        self.chatgpt_label.setText(f"{chatgpt_text}")
142
143
        self.whisper_label.setMouseTracking(True)
144
        self.chatgpt_label.setMouseTracking(True)
145
        self.scroll_area1.setMouseTracking(True)
146
        self.scroll_area2.setMouseTracking(True)
147
148
    def toggle_whisper_box(self):
149
        is_visible = self.scroll_area1.isVisible()
150
        self.scroll_area1.setVisible(not is_visible)
151
        self.toggle_whisper_button.setStyleSheet(
152
            "QPushButton { background-color: %s; }" % ('green' if not is_visible else 'red')
153
        )
154
155
    def toggle_chatgpt_box(self):
156
        is_visible = self.scroll_area2.isVisible()
157
        self.scroll_area2.setVisible(not is_visible)
158
        self.toggle_chatgpt_button.setStyleSheet(
159
            "QPushButton { background-color: %s; }" % ('green' if not is_visible else 'red')
160
        )
161
162
    def toggle_tts(self):
163
        self.is_tts_enabled = not self.is_tts_enabled  # Assume this flag exists
164
        # Update the button color based on the state
165
        self.toggle_tts_button.setStyleSheet(
166
            "QPushButton { background-color: %s; }" % ('green' if self.is_tts_enabled else 'red')
167
        )
168
        print(f'TTS is set to {self.is_tts_enabled}')
169
        
170
171
    def mousePressEvent(self, event):
172
        self.dragPos = event.globalPos()
173
     
174
175
    def mouseMoveEvent(self, event):
176
        if event.buttons() == Qt.LeftButton:
177
            self.move(self.pos() + event.globalPos() - self.dragPos)
178
            self.dragPos = event.globalPos()
179
180
181
    def keyPressEvent(self, event):
182
        global TRANSCRIBE
183
        if event.key() == Qt.Key_Escape:
184
            # To stop the thread
185
            self.stop_event.set()
186
            self.transcription_thread.join()  # Optional: Wait for the thread to finish
187
            self.close()
188
189
    def start_img_text_thread(self):
190
        capture_thread = threading.Thread(target=self.capture_and_process)
191
        capture_thread.start()
192
193
    def start_text_thread(self):
194
        process_thread = threading.Thread(target=self.process_text)
195
        process_thread.start()
196
197
    def capture_and_process(self):
198
         # Increase transparency to 100%
199
        self.setWindowOpacity(0.0)
200
201
        # Process all pending application events
202
        QApplication.processEvents()
203
        
204
        # Delay the screenshot to ensure the overlay is fully transparent
205
        self.capture_screen()
206
        
207
    def capture_screen(self):
208
        # Use the overlay's geometry as the capture area
209
        capture_area = self.geometry()
210
        # Capture the screen
211
        screen = QApplication.primaryScreen()
212
        screenshot = screen.grabWindow(0, capture_area.x()-50, capture_area.y()-50, capture_area.width()+100, capture_area.height()+100)
213
        
214
        # Reset the transparency
215
        self.setWindowOpacity(self.opacity)  # Assuming 0.6 is your default opacity
216
        # Process all pending application events
217
        QApplication.processEvents()
218
219
        # Display the screenshot
220
        self.process_screenshot(screenshot)
221
222
    def process_screenshot(self, screenshot):
223
        # Convert screenshot to QPixmap and display it in the label
224
        pixmap = QPixmap(screenshot)
225
        self.screenshot_label.setPixmap(pixmap.scaled(self.screenshot_label.size(), Qt.KeepAspectRatio))
226
227
        # Convert QPixmap to QImage
228
        image = screenshot.toImage()
229
230
        # Scale the image by a factor, e.g., 0.5 for half size
231
        scale_factor = 0.3
232
        new_width = image.width() * scale_factor
233
        new_height = image.height() * scale_factor
234
        scaled_image = image.scaled(new_width, new_height, Qt.KeepAspectRatio, Qt.SmoothTransformation)
235
        print(scaled_image.width(), scaled_image.height())
236
237
        # Prepare a byte array and a buffer to hold the image data
238
        byte_array = QByteArray()
239
        buffer = QBuffer(byte_array)
240
        buffer.open(QIODevice.WriteOnly)
241
242
        # Save the image to the buffer in PNG format
243
        scaled_image.save(buffer, "PNG")
244
245
        # Save the image to a file
246
        file_path = "img/screenshot.png"  # Specify your directory path and file name here
247
        scaled_image.save(file_path, "PNG")  # Saving as a PNG file
248
249
        # Convert byte array to base64
250
        base64_data = base64.b64encode(byte_array.data()).decode()
251
252
        # Format the base64 string for API use
253
        formatted_base64_image = "data:image/png;base64," + base64_data
254
255
        # Here, you can use formatted_base64_image with your API
256
        # For demonstration, let's just print it
257
        text = text_fallacy_classification(formatted_base64_image, get_whisper_transcription())
258
259
        GPT_TEXTS.append(text)
260
261
        if self.is_tts_enabled:
262
            # Play GPT4
263
            audio_file = openAI_TTS(text)
264
            audio_file = change_playback_speed(audio_file)
265
            play_audio(audio_file)
266
267
    def process_text(self):
268
        # Here, you can use formatted_base64_image with your API
269
        # For demonstration, let's just print it
270
        text = text_fallacy_classification(None, get_whisper_transcription())
271
272
        GPT_TEXTS.append(text)
273
274
        if self.is_tts_enabled:
275
            # Play GPT4
276
            audio_file = openAI_TTS(text)
277
            audio_file = change_playback_speed(audio_file)
278
            play_audio(audio_file)
279
            
280
281
def get_whisper_transcription():
282
    global WHISPER_TEXTS
283
    last_n_segments = WHISPER_TEXTS[-9:]  # Assuming you want the last 10 segments
284
    # text = ' - '.join(last_n_segments)
285
    return last_n_segments
286
287
def get_chatgpt_output():
288
    global GPT_TEXTS
289
    if len(GPT_TEXTS):
290
        return GPT_TEXTS[-1]
291
    else:
292
        return [""]
293
294
def launch_overlay(whs_model, use_gpt):
295
    app = QApplication(sys.argv)
296
    overlay = TransparentOverlay(whs_model, use_gpt)
297
    overlay.show()
298
    sys.exit(app.exec_())
299
300
if __name__ == '__main__':
301
    launch_overlay()