b/tests/surrogates/test_rewrite_dataset.py
+import argparse
+import filecmp
+import glob
+from os.path import basename, dirname, join
+import pytest
+from deidentify.base import Annotation
+from deidentify.surrogates import rewrite_dataset
+def test_apply_surrogates():
+    text = 'ccc cc ccc c c ccc cccccc cccc'
+    annotations = [
+        Annotation('ccc', start=0, end=3, tag='A'),
+        Annotation('cc', start=4, end=6, tag='A'),
+        Annotation('ccc', start=15, end=18, tag='B')
+    ]
+    surrogates = ['a', 'dd', 'bbbbb']
+    surrogate_doc = rewrite_dataset.apply_surrogates(text, annotations, surrogates)
+    assert surrogate_doc.text == 'a dd ccc c c bbbbb cccccc cccc'
+    assert surrogate_doc.annotations == [
+        Annotation('a', start=0, end=1, tag='A'),
+        Annotation('dd', start=2, end=4, tag='A'),
+        Annotation('bbbbb', start=13, end=18, tag='B')
+    ]
+    assert surrogate_doc.annotations_without_surrogates == []
+def test_apply_surrogates_no_annotations():
+    surrogate_doc = rewrite_dataset.apply_surrogates('ccc cc ccc', annotations=[], surrogates=[])
+    assert surrogate_doc.text == 'ccc cc ccc'
+    assert surrogate_doc.annotations == []
+    assert surrogate_doc.annotations_without_surrogates == []
+def test_apply_surrogates_errors_raise():
+    text = 'ccc cc ccc'
+    annotations = [
+        Annotation('ccc', start=0, end=3, tag='A'),
+        Annotation('cc', start=4, end=6, tag='A'),
+        Annotation('ccc', start=7, end=10, tag='B')
+    ]
+    surrogates = ['a', None, 'b']
+    with pytest.raises(ValueError):
+        rewrite_dataset.apply_surrogates(text, annotations, surrogates)
+    with pytest.raises(ValueError):
+        rewrite_dataset.apply_surrogates(text, annotations, surrogates, errors='raise')
+def test_apply_surrogates_errors_ignore():
+    text = 'ccc cc ccc'
+    annotations = [
+        Annotation('ccc', start=0, end=3, tag='A'),
+        Annotation('cc', start=4, end=6, tag='A'),
+        Annotation('ccc', start=7, end=10, tag='B')
+    ]
+    surrogates = ['a', None, 'b']
+    surrogate_doc = rewrite_dataset.apply_surrogates(text, annotations, surrogates, errors='ignore')
+    assert surrogate_doc.text == 'a cc b'
+    assert surrogate_doc.annotations == [
+        Annotation('a', start=0, end=1, tag='A'),
+        Annotation('cc', start=2, end=4, tag='A'),
+        Annotation('b', start=5, end=6, tag='B')
+    ]
+    assert surrogate_doc.annotations_without_surrogates == [
+        Annotation('cc', start=4, end=6, tag='A'),
+    ]
+def test_apply_surrogates_errors_coerce():
+    text = 'ccc cc ccc'
+    annotations = [
+        Annotation('ccc', start=0, end=3, tag='A'),
+        Annotation('cc', start=4, end=6, tag='A'),
+        Annotation('ccc', start=7, end=10, tag='B')
+    ]
+    surrogates = ['a', None, 'b']
+    surrogate_doc = rewrite_dataset.apply_surrogates(text, annotations, surrogates, errors='coerce')
+    assert surrogate_doc.text == 'a [A] b'
+    assert surrogate_doc.annotations == [
+        Annotation('a', start=0, end=1, tag='A'),
+        Annotation('[A]', start=2, end=5, tag='A'),
+        Annotation('b', start=6, end=7, tag='B')
+    ]
+    assert surrogate_doc.annotations_without_surrogates == [
+        Annotation('cc', start=4, end=6, tag='A'),
+    ]
+def test_main(tmpdir):
+    args = argparse.Namespace(
+        surrogate_table=join(dirname(__file__), 'data/annotations-rewrite-table.csv'),
+        data_path=join(dirname(__file__), 'data/original'),
+        output_path=tmpdir
+    )
+    ann_files = glob.glob(join(dirname(__file__), 'data/rewritten/*.ann'))
+    txt_files = glob.glob(join(dirname(__file__), 'data/rewritten/*.txt'))
+    to_compare = ann_files + txt_files
+    to_compare = [basename(f) for f in to_compare]
+    rewrite_dataset.main(args)
+    for file in to_compare:
+        expected = join(dirname(__file__), 'data/rewritten/', file)
+        actual = join(tmpdir, file)
+        assert filecmp.cmp(expected, actual)