--- a
+++ b/data_description/describe_entitiy_location.py
@@ -0,0 +1,64 @@
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import argparse
+
+parser = argparse.ArgumentParser(
+        description='This script is used to describe the relative positions of entities in the text.')
+
+parser.add_argument('-i', '--input', type=str, required=True,
+                    help='Choose the input CSV file.')
+
+args = parser.parse_args()
+
+if not args.input.endswith('.csv'):
+    raise ValueError('Input file needs to be defined as a CSV-file')
+
+df = pd.read_csv(args.input, sep='|', header=None, names=['Text', 'Annotations'])
+df['Annotations'] = df['Annotations'].str.strip().str.split(' ')
+
+def extract_entities(annotations):
+    return [(i, tag) for i, tag in enumerate(annotations) if tag not in ['O']]
+
+df['EntityPositions'] = df['Annotations'].apply(extract_entities)
+
+def calculate_relative_positions(dataframe):
+    relative_positions = []
+    for _, row in dataframe.iterrows():
+        annotations = row['Annotations']
+        positions = [i for i, tag in enumerate(annotations) if tag not in ['O']]
+        total_length = len(annotations)
+        relative_positions.extend([pos / total_length for pos in positions])
+    return relative_positions
+
+relative_positions = calculate_relative_positions(df)
+
+plt.figure(figsize=(12, 6))
+sns.kdeplot(relative_positions, bw_adjust=0.5, fill=True)
+plt.xlabel("Relative Word Position")
+plt.ylabel("Density")
+plt.title("Density Plot of Entities Across Relative Word Positions")
+plt.grid(True)
+plt.show()
+
+plt.figure(figsize=(12, 6))
+plt.hist(relative_positions, bins=20, color='skyblue', edgecolor='black', alpha=0.7)
+plt.xlabel("Relative Word Position")
+plt.ylabel("Frequency")
+plt.title("Distribution of Entities Across Relative Word Positions")
+plt.grid(axis='y')
+plt.show()
+
+bin_edges = np.linspace(0, 1, 21)  # 20 bins from 0% to 100%
+bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2  # Calculate bin centers
+hist, _ = np.histogram(relative_positions, bins=bin_edges)
+
+plt.figure(figsize=(12, 6))
+plt.plot(bin_centers, hist, marker='o', linestyle='-', color='purple')
+plt.xlabel("Relative Word Position")
+plt.ylabel("Frequency")
+plt.title("Distribution Graph of Entities Across Relative Word Positions")
+plt.grid(True)
+plt.show()
+