--- a
+++ b/deidentify/dataset/nursing2brat.py
@@ -0,0 +1,96 @@
+import argparse
+import os
+import re
+from typing import Dict, List
+
+from deidentify.base import Annotation, Document
+from deidentify.dataset import brat
+
+
+def readlines(filename):
+    with open(filename) as file:
+        lines = file.readlines()
+    return lines
+
+
+def documents_iter(notes):
+    lines = readlines(notes)
+
+    record_lines = []
+    for line in lines:
+        if line.startswith('START_OF_RECORD'):
+            record_lines = []
+            patient_id, record_id = re.findall(r'\d+', line)
+        elif line.startswith('||||END_OF_RECORD'):
+            yield Document(
+                name='note-{}-{}'.format(patient_id, record_id),
+                text=''.join(record_lines).rstrip(),
+                annotations=[]
+            )
+        else:
+            record_lines.append(line)
+
+
+def annotations_iter(annotations):
+    lines = readlines(annotations)
+
+    current_pid, current_rid = lines[0].split(maxsplit=5)[0:2]
+
+    annotations = []
+    i = 1
+    for line in lines:
+        pid, rid, start, end, tag, text = line.strip().split(maxsplit=5)
+        if pid != current_pid or rid != current_rid:
+            yield annotations
+            annotations = []
+            i = 1
+            current_pid = pid
+            current_rid = rid
+
+        annotations.append(Annotation(
+            text=text,
+            start=int(start),
+            end=int(end),
+            tag=tag,
+            ann_id='T{}'.format(i),
+            doc_id='note-{}-{}'.format(current_pid, current_rid)
+        ))
+        i += 1
+
+    yield annotations
+
+
+def _map_annotations(annotations):
+    # Mapping: doc_id -> List[Annotation]
+    mapping: Dict[str, List[Annotation]] = {}
+
+    for doc_anns in annotations:
+        mapping[doc_anns[0].doc_id] = doc_anns
+
+    return mapping
+
+
+def main(args):
+    documents = documents_iter(args.notes_file)
+    annotations = annotations_iter(args.phi_file)
+
+    doc_annotations_mapping = _map_annotations(annotations)
+
+    for doc in documents:
+        anns = doc_annotations_mapping.get(doc.name, [])
+        brat.write_brat_document(args.output_dir, doc_name=doc.name,
+                                 text=doc.text, annotations=anns)
+
+
+def arg_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("notes_file", help="Full path to raw notes file (notes-raw.txt)")
+    parser.add_argument("phi_file", help="Full path to annotations file (id-phi.phrase)")
+    parser.add_argument("output_dir", help="Path to output directory.")
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+    ARGS = arg_parser()
+    os.makedirs(ARGS.output_dir, exist_ok=True)
+    main(ARGS)