--- a +++ b/experiments/expression/codex/codex_alignment.py @@ -0,0 +1,83 @@ +import pandas as pd +from os.path import join as pjoin +import numpy as np +import matplotlib.pyplot as plt + +DATA_DIR = "../../../data/codex" +data = pd.read_csv(pjoin(DATA_DIR, "codex_mrl_expression.csv")) # , nrows=200) + +marker_names = data.columns.values[1:-8] +sample_names = data.sample_Xtile_Ytile.str.split("_").str[0].values +sample_names_unique = np.unique(sample_names) + +sample1_idx = np.where(sample_names == "BALBc-3")[0] +sample2_idx = np.where(sample_names == "BALBc-2")[0] + +data_sample1 = data.iloc[sample1_idx, :] +data_sample2 = data.iloc[sample2_idx, :] + +xtilespan = 1344 +ytilespan = 1008 + + +def tile_spatial_coordinates(data_df): + + if "xcoord" in data_df.columns or "ycoord" in data_df.columns: + raise Exception("DataFrame already contains scaled coordinates.") + + tile_nums_split = data_df.sample_Xtile_Ytile.str.split("_") + x_tile_nums = tile_nums_split.str[1].str[1:].values.astype(float) + y_tile_nums = tile_nums_split.str[2].str[1:].values.astype(float) + xcoords = (x_tile_nums - 1) * xtilespan + data_df["X.X"].values + ycoords = (y_tile_nums - 1) * ytilespan + data_df["Y.Y"].values + data_df["xcoord"] = xcoords + data_df["ycoord"] = ycoords + + +tile_spatial_coordinates(data_sample1) +tile_spatial_coordinates(data_sample2) + +# plt.scatter(data_sample1.xcoord, data_sample1.ycoord) +# plt.show() +# import ipdb; ipdb.set_trace() + +normalized_data1 = data_sample1[marker_names].values.copy() +keep_idx = np.where((np.abs(normalized_data1) >= 10_000).sum(1) == 0)[0] +data_sample1 = data_sample1.iloc[keep_idx] +normalized_data2 = data_sample2[marker_names].values.copy() +keep_idx = np.where((np.abs(normalized_data2) >= 10_000).sum(1) == 0)[0] +data_sample2 = data_sample2.iloc[keep_idx] + +# import ipdb + +# ipdb.set_trace() +for marker in marker_names: + plt.figure(figsize=(10, 5)) + + plt.subplot(121) + plt.title("Slice 1") + curr_data = data_sample1[marker].values + curr_data = (curr_data - curr_data.mean()) / curr_data.std() + plt.scatter( + data_sample1["xcoord"], + data_sample1["ycoord"], + c=data_sample1[marker], + s=1, + marker="s", + ) + + plt.subplot(122) + plt.title("Slice 2") + curr_data = data_sample2[marker].values + curr_data = (curr_data - curr_data.mean()) / curr_data.std() + plt.scatter( + data_sample2["xcoord"], + data_sample2["ycoord"], + c=data_sample2[marker], + s=1, + marker="s", + ) + plt.show() +import ipdb + +ipdb.set_trace()