Switch to side-by-side view

--- a
+++ b/data_description/extract_text_entities.py
@@ -0,0 +1,26 @@
+import json
+
+def extract_text_entities(input_file_path, text_file_path, entities_file_path):
+    with open(input_file_path, 'r') as file:
+        lines = file.readlines()
+
+    for line in lines:
+        jsonl_entry = json.loads(line)
+        text_content = jsonl_entry['text']
+        entities_content = jsonl_entry['entities']
+
+        extracted_entities = [text_content[entity['start_offset']:entity['end_offset']] for entity in entities_content]
+
+        with open(text_file_path, 'a') as text_file:
+            text_file.write(text_content)
+
+        with open(entities_file_path, 'a') as entities_file:
+            for entity in extracted_entities:
+                entities_file.write(entity + '\n')
+
+# Define file paths
+input_jsonl_file_path = '../datasets/labelled_data/all.jsonl'
+output_text_file_path = './text.txt'
+output_entities_file_path = './entities.txt'
+
+extract_text_entities(input_jsonl_file_path, output_text_file_path, output_entities_file_path)