|
a |
|
b/overlay.py |
|
|
1 |
import sys |
|
|
2 |
import base64 |
|
|
3 |
import threading |
|
|
4 |
from PyQt5.QtCore import Qt, QTimer, QPoint, QByteArray, QBuffer, QIODevice |
|
|
5 |
from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QGridLayout, QScrollArea, QSizeGrip, QPushButton |
|
|
6 |
from PyQt5.QtGui import QPixmap |
|
|
7 |
|
|
|
8 |
from openai_wrapper import text_fallacy_classification, openAI_TTS |
|
|
9 |
from real_time_classifier import continuous_audio_transcription |
|
|
10 |
from real_time_classifier import WHISPER_TEXTS |
|
|
11 |
from audio import play_audio, change_playback_speed |
|
|
12 |
|
|
|
13 |
GPT_TEXTS = [] |
|
|
14 |
class TransparentOverlay(QMainWindow): |
|
|
15 |
|
|
|
16 |
def __init__(self, whs_model, auto): |
|
|
17 |
super().__init__() |
|
|
18 |
|
|
|
19 |
self.whs_model = whs_model |
|
|
20 |
self.auto = auto |
|
|
21 |
self.dragPos = QPoint() |
|
|
22 |
self.opacity = 0.6 |
|
|
23 |
self.is_tts_enabled = False |
|
|
24 |
|
|
|
25 |
self.initUI() |
|
|
26 |
|
|
|
27 |
|
|
|
28 |
def initUI(self): |
|
|
29 |
self.setWindowTitle('Transparent Overlay') |
|
|
30 |
self.setGeometry(0, 0, 1000, 600) |
|
|
31 |
self.setWindowOpacity(self.opacity) |
|
|
32 |
self.setAttribute(Qt.WA_TranslucentBackground) |
|
|
33 |
self.setWindowFlags(Qt.Window | Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint) |
|
|
34 |
|
|
|
35 |
# Create Scroll Areas |
|
|
36 |
self.scroll_area1 = QScrollArea(self) |
|
|
37 |
self.scroll_area2 = QScrollArea(self) |
|
|
38 |
|
|
|
39 |
# Increase the dimensions of the scroll areas |
|
|
40 |
self.scroll_area1.setMinimumSize(380, 120) |
|
|
41 |
self.scroll_area2.setMinimumSize(380, 120) |
|
|
42 |
|
|
|
43 |
# Create Labels |
|
|
44 |
self.whisper_label = QLabel('Whisper Output Will Appear Here') |
|
|
45 |
self.chatgpt_label = QLabel('ChatGPT Output Will Appear Here') |
|
|
46 |
|
|
|
47 |
# Enable word-wrap on labels |
|
|
48 |
self.whisper_label.setWordWrap(True) |
|
|
49 |
self.chatgpt_label.setWordWrap(True) |
|
|
50 |
|
|
|
51 |
# Add labels to Scroll Areas |
|
|
52 |
self.scroll_area1.setWidget(self.whisper_label) |
|
|
53 |
self.scroll_area2.setWidget(self.chatgpt_label) |
|
|
54 |
|
|
|
55 |
# Enable scroll bars on the scroll areas |
|
|
56 |
self.scroll_area1.setWidgetResizable(True) |
|
|
57 |
self.scroll_area2.setWidgetResizable(True) |
|
|
58 |
|
|
|
59 |
|
|
|
60 |
# Style labels with bold text and increased font size, using professional grey shades |
|
|
61 |
self.whisper_label.setStyleSheet('background-color: #dcdcdc; font-weight: bold; font-size: 12px; color: black;') |
|
|
62 |
self.chatgpt_label.setStyleSheet('background-color: #696969; font-weight: bold; font-size: 15px; color: white;') |
|
|
63 |
|
|
|
64 |
|
|
|
65 |
# Layout setup |
|
|
66 |
# QVBoxLayout for the scroll areas |
|
|
67 |
vbox_layout = QVBoxLayout() |
|
|
68 |
vbox_layout.addWidget(self.scroll_area2) |
|
|
69 |
vbox_layout.addWidget(self.scroll_area1) |
|
|
70 |
|
|
|
71 |
# QGridLayout to include QVBoxLayout and QSizeGrip |
|
|
72 |
grid_layout = QGridLayout() |
|
|
73 |
grid_layout.addLayout(vbox_layout, 0, 0) |
|
|
74 |
|
|
|
75 |
# Add QSizeGrip to the QGridLayout |
|
|
76 |
size_grip = QSizeGrip(self) |
|
|
77 |
grid_layout.addWidget(size_grip, 1, 1, Qt.AlignBottom | Qt.AlignRight) |
|
|
78 |
|
|
|
79 |
container = QWidget() |
|
|
80 |
container.setLayout(grid_layout) |
|
|
81 |
self.setCentralWidget(container) |
|
|
82 |
|
|
|
83 |
# Run the continuous transcription and classification in a separate thread |
|
|
84 |
self.stop_event = threading.Event() |
|
|
85 |
self.transcription_thread = threading.Thread(target=continuous_audio_transcription, args=(self.whs_model, self.stop_event)) |
|
|
86 |
self.transcription_thread.start() |
|
|
87 |
|
|
|
88 |
# Timer to update Whisper and ChatGPT outputs |
|
|
89 |
self.timer = QTimer(self) |
|
|
90 |
self.timer.timeout.connect(self.update_labels) |
|
|
91 |
self.timer.start(500) |
|
|
92 |
|
|
|
93 |
# Create a label to display the screenshot |
|
|
94 |
# self.screenshot_label = QLabel(self) |
|
|
95 |
# self.screenshot_label.setWordWrap(True) |
|
|
96 |
# vbox_layout.addWidget(self.screenshot_label) |
|
|
97 |
|
|
|
98 |
# Add a button for screen capture |
|
|
99 |
self.capture_button = QPushButton('Analyze Transcript', self) |
|
|
100 |
self.capture_button.clicked.connect(self.start_text_thread) |
|
|
101 |
|
|
|
102 |
# Toogles |
|
|
103 |
self.toggle_whisper_button = QPushButton('Toggle Transcript', self) |
|
|
104 |
self.toggle_whisper_button.clicked.connect(self.toggle_whisper_box) |
|
|
105 |
|
|
|
106 |
self.toggle_chatgpt_button = QPushButton('Toggle Analysis', self) |
|
|
107 |
self.toggle_chatgpt_button.clicked.connect(self.toggle_chatgpt_box) |
|
|
108 |
|
|
|
109 |
self.toggle_tts_button = QPushButton('Toggle TTS', self) |
|
|
110 |
self.toggle_tts_button.clicked.connect(self.toggle_tts) |
|
|
111 |
|
|
|
112 |
# Style buttons |
|
|
113 |
self.capture_button.setStyleSheet("QPushButton { background-color: grey; font-weight: bold; }") |
|
|
114 |
self.toggle_whisper_button.setStyleSheet("QPushButton { background-color: green; font-weight: bold; }") |
|
|
115 |
self.toggle_chatgpt_button.setStyleSheet("QPushButton { background-color: green; font-weight: bold; }") |
|
|
116 |
self.toggle_tts_button.setStyleSheet("QPushButton { background-color: red; font-weight: bold; }") |
|
|
117 |
|
|
|
118 |
# Create a horizontal layout for the buttons |
|
|
119 |
button_layout = QHBoxLayout() |
|
|
120 |
|
|
|
121 |
# Add buttons to the horizontal layout |
|
|
122 |
button_layout.addWidget(self.capture_button) |
|
|
123 |
button_layout.addWidget(self.toggle_whisper_button) |
|
|
124 |
button_layout.addWidget(self.toggle_chatgpt_button) |
|
|
125 |
button_layout.addWidget(self.toggle_tts_button) |
|
|
126 |
|
|
|
127 |
# Now add the horizontal layout of buttons to the main vertical layout |
|
|
128 |
vbox_layout.addLayout(button_layout) |
|
|
129 |
|
|
|
130 |
|
|
|
131 |
def update_labels(self): |
|
|
132 |
# get_whisper_transcription returns a list of text segments, newest last. |
|
|
133 |
whisper_segments = get_whisper_transcription() |
|
|
134 |
|
|
|
135 |
# Concatenate the segments and set the label text. |
|
|
136 |
self.whisper_label.setText("Transcript: " + '- '.join(whisper_segments)) |
|
|
137 |
|
|
|
138 |
# Color old response grey new reponse black |
|
|
139 |
chatgpt_output_list = get_chatgpt_output() |
|
|
140 |
chatgpt_text = "".join(chatgpt_output_list) |
|
|
141 |
self.chatgpt_label.setText(f"{chatgpt_text}") |
|
|
142 |
|
|
|
143 |
self.whisper_label.setMouseTracking(True) |
|
|
144 |
self.chatgpt_label.setMouseTracking(True) |
|
|
145 |
self.scroll_area1.setMouseTracking(True) |
|
|
146 |
self.scroll_area2.setMouseTracking(True) |
|
|
147 |
|
|
|
148 |
def toggle_whisper_box(self): |
|
|
149 |
is_visible = self.scroll_area1.isVisible() |
|
|
150 |
self.scroll_area1.setVisible(not is_visible) |
|
|
151 |
self.toggle_whisper_button.setStyleSheet( |
|
|
152 |
"QPushButton { background-color: %s; }" % ('green' if not is_visible else 'red') |
|
|
153 |
) |
|
|
154 |
|
|
|
155 |
def toggle_chatgpt_box(self): |
|
|
156 |
is_visible = self.scroll_area2.isVisible() |
|
|
157 |
self.scroll_area2.setVisible(not is_visible) |
|
|
158 |
self.toggle_chatgpt_button.setStyleSheet( |
|
|
159 |
"QPushButton { background-color: %s; }" % ('green' if not is_visible else 'red') |
|
|
160 |
) |
|
|
161 |
|
|
|
162 |
def toggle_tts(self): |
|
|
163 |
self.is_tts_enabled = not self.is_tts_enabled # Assume this flag exists |
|
|
164 |
# Update the button color based on the state |
|
|
165 |
self.toggle_tts_button.setStyleSheet( |
|
|
166 |
"QPushButton { background-color: %s; }" % ('green' if self.is_tts_enabled else 'red') |
|
|
167 |
) |
|
|
168 |
print(f'TTS is set to {self.is_tts_enabled}') |
|
|
169 |
|
|
|
170 |
|
|
|
171 |
def mousePressEvent(self, event): |
|
|
172 |
self.dragPos = event.globalPos() |
|
|
173 |
|
|
|
174 |
|
|
|
175 |
def mouseMoveEvent(self, event): |
|
|
176 |
if event.buttons() == Qt.LeftButton: |
|
|
177 |
self.move(self.pos() + event.globalPos() - self.dragPos) |
|
|
178 |
self.dragPos = event.globalPos() |
|
|
179 |
|
|
|
180 |
|
|
|
181 |
def keyPressEvent(self, event): |
|
|
182 |
global TRANSCRIBE |
|
|
183 |
if event.key() == Qt.Key_Escape: |
|
|
184 |
# To stop the thread |
|
|
185 |
self.stop_event.set() |
|
|
186 |
self.transcription_thread.join() # Optional: Wait for the thread to finish |
|
|
187 |
self.close() |
|
|
188 |
|
|
|
189 |
def start_img_text_thread(self): |
|
|
190 |
capture_thread = threading.Thread(target=self.capture_and_process) |
|
|
191 |
capture_thread.start() |
|
|
192 |
|
|
|
193 |
def start_text_thread(self): |
|
|
194 |
process_thread = threading.Thread(target=self.process_text) |
|
|
195 |
process_thread.start() |
|
|
196 |
|
|
|
197 |
def capture_and_process(self): |
|
|
198 |
# Increase transparency to 100% |
|
|
199 |
self.setWindowOpacity(0.0) |
|
|
200 |
|
|
|
201 |
# Process all pending application events |
|
|
202 |
QApplication.processEvents() |
|
|
203 |
|
|
|
204 |
# Delay the screenshot to ensure the overlay is fully transparent |
|
|
205 |
self.capture_screen() |
|
|
206 |
|
|
|
207 |
def capture_screen(self): |
|
|
208 |
# Use the overlay's geometry as the capture area |
|
|
209 |
capture_area = self.geometry() |
|
|
210 |
# Capture the screen |
|
|
211 |
screen = QApplication.primaryScreen() |
|
|
212 |
screenshot = screen.grabWindow(0, capture_area.x()-50, capture_area.y()-50, capture_area.width()+100, capture_area.height()+100) |
|
|
213 |
|
|
|
214 |
# Reset the transparency |
|
|
215 |
self.setWindowOpacity(self.opacity) # Assuming 0.6 is your default opacity |
|
|
216 |
# Process all pending application events |
|
|
217 |
QApplication.processEvents() |
|
|
218 |
|
|
|
219 |
# Display the screenshot |
|
|
220 |
self.process_screenshot(screenshot) |
|
|
221 |
|
|
|
222 |
def process_screenshot(self, screenshot): |
|
|
223 |
# Convert screenshot to QPixmap and display it in the label |
|
|
224 |
pixmap = QPixmap(screenshot) |
|
|
225 |
self.screenshot_label.setPixmap(pixmap.scaled(self.screenshot_label.size(), Qt.KeepAspectRatio)) |
|
|
226 |
|
|
|
227 |
# Convert QPixmap to QImage |
|
|
228 |
image = screenshot.toImage() |
|
|
229 |
|
|
|
230 |
# Scale the image by a factor, e.g., 0.5 for half size |
|
|
231 |
scale_factor = 0.3 |
|
|
232 |
new_width = image.width() * scale_factor |
|
|
233 |
new_height = image.height() * scale_factor |
|
|
234 |
scaled_image = image.scaled(new_width, new_height, Qt.KeepAspectRatio, Qt.SmoothTransformation) |
|
|
235 |
print(scaled_image.width(), scaled_image.height()) |
|
|
236 |
|
|
|
237 |
# Prepare a byte array and a buffer to hold the image data |
|
|
238 |
byte_array = QByteArray() |
|
|
239 |
buffer = QBuffer(byte_array) |
|
|
240 |
buffer.open(QIODevice.WriteOnly) |
|
|
241 |
|
|
|
242 |
# Save the image to the buffer in PNG format |
|
|
243 |
scaled_image.save(buffer, "PNG") |
|
|
244 |
|
|
|
245 |
# Save the image to a file |
|
|
246 |
file_path = "img/screenshot.png" # Specify your directory path and file name here |
|
|
247 |
scaled_image.save(file_path, "PNG") # Saving as a PNG file |
|
|
248 |
|
|
|
249 |
# Convert byte array to base64 |
|
|
250 |
base64_data = base64.b64encode(byte_array.data()).decode() |
|
|
251 |
|
|
|
252 |
# Format the base64 string for API use |
|
|
253 |
formatted_base64_image = "data:image/png;base64," + base64_data |
|
|
254 |
|
|
|
255 |
# Here, you can use formatted_base64_image with your API |
|
|
256 |
# For demonstration, let's just print it |
|
|
257 |
text = text_fallacy_classification(formatted_base64_image, get_whisper_transcription()) |
|
|
258 |
|
|
|
259 |
GPT_TEXTS.append(text) |
|
|
260 |
|
|
|
261 |
if self.is_tts_enabled: |
|
|
262 |
# Play GPT4 |
|
|
263 |
audio_file = openAI_TTS(text) |
|
|
264 |
audio_file = change_playback_speed(audio_file) |
|
|
265 |
play_audio(audio_file) |
|
|
266 |
|
|
|
267 |
def process_text(self): |
|
|
268 |
# Here, you can use formatted_base64_image with your API |
|
|
269 |
# For demonstration, let's just print it |
|
|
270 |
text = text_fallacy_classification(None, get_whisper_transcription()) |
|
|
271 |
|
|
|
272 |
GPT_TEXTS.append(text) |
|
|
273 |
|
|
|
274 |
if self.is_tts_enabled: |
|
|
275 |
# Play GPT4 |
|
|
276 |
audio_file = openAI_TTS(text) |
|
|
277 |
audio_file = change_playback_speed(audio_file) |
|
|
278 |
play_audio(audio_file) |
|
|
279 |
|
|
|
280 |
|
|
|
281 |
def get_whisper_transcription(): |
|
|
282 |
global WHISPER_TEXTS |
|
|
283 |
last_n_segments = WHISPER_TEXTS[-9:] # Assuming you want the last 10 segments |
|
|
284 |
# text = ' - '.join(last_n_segments) |
|
|
285 |
return last_n_segments |
|
|
286 |
|
|
|
287 |
def get_chatgpt_output(): |
|
|
288 |
global GPT_TEXTS |
|
|
289 |
if len(GPT_TEXTS): |
|
|
290 |
return GPT_TEXTS[-1] |
|
|
291 |
else: |
|
|
292 |
return [""] |
|
|
293 |
|
|
|
294 |
def launch_overlay(whs_model, use_gpt): |
|
|
295 |
app = QApplication(sys.argv) |
|
|
296 |
overlay = TransparentOverlay(whs_model, use_gpt) |
|
|
297 |
overlay.show() |
|
|
298 |
sys.exit(app.exec_()) |
|
|
299 |
|
|
|
300 |
if __name__ == '__main__': |
|
|
301 |
launch_overlay() |