[5c09f6]: / experiments / expression / codex / codex_alignment.py

Download this file

84 lines (65 with data), 2.5 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import pandas as pd
from os.path import join as pjoin
import numpy as np
import matplotlib.pyplot as plt
DATA_DIR = "../../../data/codex"
data = pd.read_csv(pjoin(DATA_DIR, "codex_mrl_expression.csv")) # , nrows=200)
marker_names = data.columns.values[1:-8]
sample_names = data.sample_Xtile_Ytile.str.split("_").str[0].values
sample_names_unique = np.unique(sample_names)
sample1_idx = np.where(sample_names == "BALBc-3")[0]
sample2_idx = np.where(sample_names == "BALBc-2")[0]
data_sample1 = data.iloc[sample1_idx, :]
data_sample2 = data.iloc[sample2_idx, :]
xtilespan = 1344
ytilespan = 1008
def tile_spatial_coordinates(data_df):
if "xcoord" in data_df.columns or "ycoord" in data_df.columns:
raise Exception("DataFrame already contains scaled coordinates.")
tile_nums_split = data_df.sample_Xtile_Ytile.str.split("_")
x_tile_nums = tile_nums_split.str[1].str[1:].values.astype(float)
y_tile_nums = tile_nums_split.str[2].str[1:].values.astype(float)
xcoords = (x_tile_nums - 1) * xtilespan + data_df["X.X"].values
ycoords = (y_tile_nums - 1) * ytilespan + data_df["Y.Y"].values
data_df["xcoord"] = xcoords
data_df["ycoord"] = ycoords
tile_spatial_coordinates(data_sample1)
tile_spatial_coordinates(data_sample2)
# plt.scatter(data_sample1.xcoord, data_sample1.ycoord)
# plt.show()
# import ipdb; ipdb.set_trace()
normalized_data1 = data_sample1[marker_names].values.copy()
keep_idx = np.where((np.abs(normalized_data1) >= 10_000).sum(1) == 0)[0]
data_sample1 = data_sample1.iloc[keep_idx]
normalized_data2 = data_sample2[marker_names].values.copy()
keep_idx = np.where((np.abs(normalized_data2) >= 10_000).sum(1) == 0)[0]
data_sample2 = data_sample2.iloc[keep_idx]
# import ipdb
# ipdb.set_trace()
for marker in marker_names:
plt.figure(figsize=(10, 5))
plt.subplot(121)
plt.title("Slice 1")
curr_data = data_sample1[marker].values
curr_data = (curr_data - curr_data.mean()) / curr_data.std()
plt.scatter(
data_sample1["xcoord"],
data_sample1["ycoord"],
c=data_sample1[marker],
s=1,
marker="s",
)
plt.subplot(122)
plt.title("Slice 2")
curr_data = data_sample2[marker].values
curr_data = (curr_data - curr_data.mean()) / curr_data.std()
plt.scatter(
data_sample2["xcoord"],
data_sample2["ycoord"],
c=data_sample2[marker],
s=1,
marker="s",
)
plt.show()
import ipdb
ipdb.set_trace()