Quantitative-Histology / Git / [171cba] /4x/Data

Models:
DanielG/
Quantitative-Histology
Downloads: 1
[171cba]: / 4x / Data_analysis.py
History
Download this file
777 lines (480 with data), 21.8 kB

# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>

# <codecell>

import numpy as np
from matplotlib import rcParams
import matplotlib.pyplot as plt
import seaborn
import pandas as pd
import itertools
import os
from sklearn import linear_model, ensemble, decomposition, cross_validation, preprocessing
from statsmodels.regression.mixed_linear_model import MixedLM
import statsmodels
import statsmodels.api as sm
from statsmodels.regression.linear_model import OLSResults
from statsmodels.tools.tools import add_constant
from sklearn.neighbors import KernelDensity

from mpl_toolkits.mplot3d import Axes3D, proj3d

print statsmodels.__version__

%matplotlib inline
rcParams["figure.figsize"] = (14, 8)

rcParams["text.usetex"] = False
rcParams["xtick.labelsize"] = 12
rcParams["ytick.labelsize"] = 12
rcParams["font.size"] = 14
rcParams["axes.titlesize"] = 16
#rcParams["text.usetex"] = False
rcParams["font.family"] = "Serif"
rcParams["figure.dpi"] = 600

# <codecell>

# RAW DATA

raw_physical = pd.read_csv("../data/physical.csv")
raw_histo = pd.read_csv("../data/tawfik.csv")
ent = pd.read_csv("results/entropy.csv").drop(["Unnamed: 0"], 1)
foci = pd.read_csv("results/foci.csv").drop(["Unnamed: 0"], 1)
lac = pd.read_csv("results/normalised_lacunarity.csv").drop(["Unnamed: 0"], 1)
gabor = pd.read_csv("results/gabor_filters.csv").drop(["Unnamed: 0"], 1)
ts = pd.read_csv("results/tissue_sinusoid_ratio.csv").drop(["Unnamed: 0"], 1)
blur = pd.read_csv("results/blur.csv").drop(["Unnamed: 0"], 1)
distances = pd.read_csv("results/interfoci_dist.csv").drop(["Unnamed: 0"], 1)

raw_image = pd.merge(lac, ent,
	on=["Sheep", "Image"]).merge(foci, 
	on=["Sheep", "Image"]).merge(gabor,
	on=["Sheep", "Image"]).merge(ts, 
	on=["Sheep", "Image"]).merge(blur, 
	on=["Sheep", "Image"]).merge(distances, 
    on=["Sheep", "Image"])
raw_image.rename(columns = {	"meanSize" : "FociSize", 
								"TSRatio" : "TissueToSinusoid",
								"Count" : "FociCount" }, inplace=True)

# <codecell>

def normalise(df, skip = []) :
	for i in df.columns :
		if i not in skip :
			df[i] -= df[i].mean()
			df[i] /= df[i].std()
	return df






def rescale(df, skip = []) :
    for i in df.columns :
        if i not in skip :
            df[i] -= df[i].min()
            df[i] /= df[i].max()
    return df



# Remove a layer from a list
def delayer(m) :
	out = []
	for i in m :
		if isinstance(i, list) :
			for j in i :
				out.append(j)
		else :
			out.append(i)
	return out







# Remove all layers from a list
def flatten(m) :
	out = m[:]

	while out != delayer(out) :
		out = delayer(out)

	return out








# Generate all combinations of objects in a list
def combinatorial(l, short = np.inf) :
	out = []

	for numel in range(len(l)) :
		for i in itertools.combinations(l, numel+1) :
			if len(list(i)) < short :
				out.append(list(i))

	return out










def pcaplot(df) :

	# PCA
	pca = decomposition.PCA(whiten = True)
	pca.fit(df)
	p1 = pca.components_[0] / np.abs(pca.components_[0]).max() * np.sqrt(2)/2
	p2 = pca.components_[1] / np.abs(pca.components_[1]).max() * np.sqrt(2)/2

	# Normalise
	norms = np.max([np.sqrt((np.array(zip(p1, p2)[i])**2).sum()) for i in range(len(p1))])
	c = plt.Circle( (0, 0), radius = 1, alpha = 0.2)
	plt.axes(aspect = 1)
	plt.gca().add_artist(c)

	plt.scatter(p1 / norms, p2 / norms)
	plt.xlim([-1, 1])
	plt.ylim([-1, 1])

	for i, text in enumerate(df.columns) :
		plt.annotate(text, xy = [p1[i], p2[i]])

	plt.tight_layout()









def big_ass_matrix(df, y, x, group = None, short = True) :

    independent = combinatorial(x, short)
    
    models = {}
    p = {}
    aic = {}
    r2 = {}
    best = {}
    dfs = {}
    bestdf = {}
    
    for dependent in y :
        
        print "Regressing for %s" % dependent
        
        for covariate in independent :
            
            if group is None :
                
                subset = delayer([covariate, dependent])
                df2 = df[subset].dropna()
                df2["Intercept"] = np.ones(len(df2))
                dfs.setdefault(dependent, []).append(df2)
                
                ols = sm.GLS(endog=df2[dependent], exog=df2[delayer([covariate, "Intercept"])]).fit()
                
                models.setdefault(dependent, []).append(ols)
                p.setdefault(dependent, []).append(ols.pvalues[:-1].values)
                aic.setdefault(dependent, []).append(ols.aic)
                r2.setdefault(dependent, []).append(ols.rsquared)
            
            else :
                
                subset = delayer([covariate, dependent, group])
                df2 = df[subset].dropna()
                dfs.setdefault(dependent, []).append(df2)
                
                ols = MixedLM.from_formula(rstr(y=dependent, x=covariate), data=df2, groups=df2[group]).fit()
                
                models.setdefault(dependent, []).append(ols)
                aic.setdefault(dependent, []).append(2 * (ols.k_fe + 1) - 2 * ols.llf)
                p.setdefault(dependent, []).append(ols.pvalues[1:-1].values)
                r2.setdefault(dependent, []).append(mmR2(df2, ols))

    
       
        bestAIC = np.min(aic[dependent])
        
        for i, val in enumerate(models[dependent]) :
            
            if aic[dependent][i] < 2 + bestAIC :
                
                if np.sum(p[dependent][i] > 0.05) == 0 :
                    
                    if group is None :
                        
                        best.setdefault(dependent, []).append(val)
                        bestdf.setdefault(dependent, []).append(dfs[dependent][i])
                        
                    else :
                        
                        if val.random_effects.abs().mean()[0] > 0.01 :
                            
                            best.setdefault(dependent, []).append(val)
                            bestdf.setdefault(dependent, []).append(dfs[dependent][i])
       
            
        if best.has_key(dependent) :
            for i, model in enumerate(best[dependent]) :
                
                if not os.path.exists("regressions/%s" % dependent) :                  
                    os.mkdir("regressions/%s" % dependent)
                    
                if not os.path.exists("../talk/figures/regressions/%s" % dependent) :                  
                    os.mkdir("../talk/figures/regressions/%s" % dependent)                

                if group is None :

                    dfx = bestdf[dependent][i]
                    plt.scatter(model.fittedvalues.values, dfx[model.model.endog_names].values, c=seaborn.color_palette("deep", 8)[0])
                    plt.plot(dfx[model.model.endog_names].values, dfx[model.model.endog_names].values, c=seaborn.color_palette("deep", 8)[2])
                    plt.ylabel(model.model.endog_names)
                    yl = model.model.exog_names[:]
                    yl.remove("Intercept")
                    plt.xlabel("Estimate using " + ", ".join(yl))
                    plt.title(rstr(dependent, model.model.exog_names).replace(" + Intercept", ""))
                    #plt.title(r"$R^2$ = %.02f" % model.rsquared)
                    st = ("$R^2$ = %.03f\n\n"% model.rsquared)
                    for coefnum, coef in enumerate(yl) :
                        st += ("%s" % coef)
                        st += (" : %.03f\n" % model.params[coef])
                        st += ("$p$ = %.01e\n\n" % model.pvalues[coefnum])
                    #plt.suptitle(st)
                    plt.text(0.01, .99, st, va="top", ha="left")
                    plt.xlim([-0.05, 1.05])
                    plt.ylim([-0.05, 1.05])
                    plt.savefig("regressions/%s/lm-%d.pdf" % (dependent, i))
                    plt.savefig("../talk/figures/regressions/%s/lm-%d.png" % (dependent, i), dpi=300, jpeg_quality=90)
                    plt.close()

                else :
                    dfx = bestdf[dependent][i]
                    y, yhat = mmPredict(model.model.data.frame, model)
                    plt.scatter(yhat, y, c=seaborn.color_palette("deep", 8)[0])
                    plt.plot(y, y, c=seaborn.color_palette("deep", 8)[2])
                    plt.ylabel(model.model.endog_names)
                    yl = model.model.exog_names[:]
                    yl.remove("Intercept")
                    plt.xlabel("Estimate using " + ", ".join(yl))
                    plt.title(rstr(dependent, model.model.exog_names).replace("Intercept + ", ""))
                    
                    #plt.title(r"$R^2$ = %.02f" % mmR2(dfx, model))
                    st = ("$R^2$ = %.03f\n\n" % mmR2(dfx, model))
                    for coefnum, coef in enumerate(yl) :
                        st += coef
                        st += " : %.03f\n" % model.fe_params[1+coefnum]
                        st += "$p$ = %.01e\n\n" % model.pvalues[coef]
                    st += ("Avg. abs. RE coef. : %.03f" % model.random_effects.abs().mean())
                    plt.text(0.01, .99, st, va="top", ha="left")
                    
                    plt.xlim([-0.05, 1.05])
                    plt.ylim([-0.05, 1.05])
                    plt.savefig("regressions/%s/mm_%d.pdf" % (dependent, i))
                    plt.savefig("../talk/figures/regressions/%s/mm_%d.png" % (dependent, i), dpi=300, jpeg_quality=90)
                    plt.close()
        
    return best, (models, p, r2, aic)
    
    
    
    














def summary(models) :

	# Generate list of everything
	r2 = np.array([m.r2 for dependent in models.keys() for m in models[dependent]])
	p = np.array([m.f_stat["p-value"] for dependent in models.keys() for m in models[dependent]])
	mod = np.array([m for dependent in models.keys() for m in models[dependent]])
	dependent = np.array([dependent for dependent in models.keys() for m in models[dependent]])

	# Sort by R2
	idx = np.argsort(r2)[::-1]

	# Output string
	s = "%d significant regressions.\n\n" % len(r2)
	s += "Ten most correlated :\n\n"

	# Print a summary of the top ten correlations
	for i in idx[:10] :
		s += ("%s ~ %s\n" % (dependent[i], " + ".join(mod[i].x.columns[:-1])))
		s += ("R^2 = %f\tp = %f\n\n" % (r2[i], p[i]))

	print s
    
    
    
    
    
    
    
def rstr(y, x) :
    formatstr = "%s ~ " % y
    for i in x[:-1] :
        formatstr += str(i)
        formatstr += " + "
    formatstr += str(x[-1])
    return formatstr









def mmR2(df, ols) :

    y = df[ols.model.endog_names]
    sigma_a = ols.random_effects.values.var()

    yhat = np.zeros_like(y)
    for i, coef in enumerate(ols.fe_params) :
        yhat += ols.model.data.exog[:, i] * coef

    sigma_f = yhat.var()

    idxx = []
    
    grouplabel = list(df.columns)
    grouplabel.remove(ols.model.endog_names)
    remove_these = ols.model.exog_names[:]
    remove_these.remove("Intercept")
    for i in remove_these :
        grouplabel.remove(i)

    for j, u in enumerate(ols.model.group_labels) :
        idx = np.where(df[grouplabel] == u)[0]
        yhat[idx] += ols.random_effects.values[j]

    sigma_e = np.var(yhat - y)

    return (sigma_f + sigma_a) / (sigma_f + sigma_a + sigma_e)





def mmPredict(df, ols) :

    y = df[ols.model.endog_names]
    sigma_a = ols.random_effects.values.var()

    yhat = np.zeros_like(y)
    for i, coef in enumerate(ols.fe_params) :
        yhat += ols.model.data.exog[:, i] * coef

    sigma_f = yhat.var()

    idxx = []
    
    grouplabel = list(df.columns)
    grouplabel.remove(ols.model.endog_names)
    remove_these = ols.model.exog_names[:]
    remove_these.remove("Intercept")
    for i in remove_these :
        grouplabel.remove(i)

    for j, u in enumerate(ols.model.group_labels) :
        idx = np.where(df[grouplabel] == u)[0]
        yhat[idx] += ols.random_effects.values[j]
        
    return y, yhat




#best, stuff = big_ass_matrix(df=sheep, y=["Weight"], x=imagecols, group="AgeAtDeath", short=True)

# <codecell>

raw_image.columns

# <codecell>

# CLEAN DATA

physcols = ["Weight", "Sex", "AgeAtDeath", "Foreleg", "Hindleg", "E", "CES", "RES"]
imagecols = ["Entropy", "Lacunarity", "Inflammation", "Scale", "Directionality", "FociCount", "FociSize", "TissueToSinusoid", "Blur", "MinDist", "IFDist"]
histcols = ["LobularCollapse", "InterfaceHepatitis", "ConfluentNecrosis", "LnApRi", "PortalInflammation", "BDHyperplasia", "Fibrosis", "TawfikTotal", "MeanHepSize", "MinHepSize", "MaxHepSize"]





# IMAGE

# Set FociSize to zero if FociCount is zero
# Drop stdSize
image = raw_image
image = image.drop("stdSize", 1)
image.FociSize[raw_image.FociCount == 0] = 0



# HISTO

histo = raw_histo
histo = histo.drop(["Vessels", "Vacuol", "Pigment", "Std_hep_size"], 1)



# PHYSICAL
physical = raw_physical
physical = physical.drop(["CurrTag", "DeathDate", "Category"], 1)
exposure = pd.read_csv("../data/exposure.csv")
exposure.rename(columns={"BirthYear" : "BirthYear", "AvgOfLambWS" : "E"}, inplace=True)
exposure["AgeAtDeath"] = 2011 - exposure.BirthYear
exposure.E = np.round(1. - exposure.E, 6)
exposure["CES"] = np.flipud(np.flipud(exposure.E).cumsum())
exposure["RES"] = np.flipud(np.flipud(exposure.E - exposure.CES / (exposure.AgeAtDeath + 1)).cumsum())
physical = pd.merge(physical, exposure, on="AgeAtDeath", how="inner")




# COMPLETE DATASET

raw_data = pd.merge(pd.merge(image, histo, on="Sheep", how="outer"), physical, on="Sheep", how="outer")
raw_data.to_csv("../data/tentative_complete.csv")




# AVERAGED BY SHEEP
data = raw_data
data["Inflammation"] = data.FociCount * data.FociSize

sheep = np.round(rescale(data.groupby("Sheep").mean()), 9)



sheep.rename(columns = {"Lobular_collapse" : "LobularCollapse", 
                        "Interface_hepatitis" : "InterfaceHepatitis",
                        "Confluent_necrosis" : "ConfluentNecrosis",
                        "Ln_ap_ri" : "LnApRi",
                        "Portal_inflammation" : "PortalInflammation",
                        "BD_hyperplasia" : "BDHyperplasia",
                        "Mean_hep_size" : "MeanHepSize",
                        "Min_hep_size" : "MinHepSize",
                        "Max_hep_size" : "MaxHepSize"}, inplace=True)



data.rename(columns = {"Lobular_collapse" : "LobularCollapse", 
                        "Interface_hepatitis" : "InterfaceHepatitis",
                        "Confluent_necrosis" : "ConfluentNecrosis",
                        "Ln_ap_ri" : "LnApRi",
                        "Portal_inflammation" : "PortalInflammation",
                        "BD_hyperplasia" : "BDHyperplasia",
                        "Mean_hep_size" : "MeanHepSize",
                        "Min_hep_size" : "MinHepSize",
                        "Max_hep_size" : "MaxHepSize"}, inplace=True)

#age = rescale(data.groupby("Sheep").mean().dropna(subset=delayer([imagecols, histcols, "AgeAtDeath"])).groupby("AgeAtDeath").mean())

# <codecell>

pcols = physcols[:]
pcols.remove("AgeAtDeath")

print " -------- 1"
best_lm_phys, stuff_lm_phys = big_ass_matrix(df=sheep, y=physcols, x=imagecols, group=None, short=5)

print " -------- 2"
best_lm_hist, stuff_lm_hist = big_ass_matrix(df=sheep, y=histcols, x=imagecols, group=None, short=5)

print " -------- 3"
best_mm_phys, stuff_mm_phys = big_ass_matrix(df=sheep, y=pcols, x=imagecols, group="AgeAtDeath", short=5)

print " -------- 4"
best_mm_hist, stuff_mm_hist = big_ass_matrix(df=sheep, y=histcols, x=imagecols, group="AgeAtDeath", short=5)

# <codecell>

y = "BDHyperplasia"
x = ["Inflammation", "Scale", "Directionality"]

dfx = sheep[delayer([x, y, "AgeAtDeath"])].dropna()
model = MixedLM.from_formula(rstr(y, x), data=dfx, groups="AgeAtDeath").fit()
#model = sm.GLS(endog=dfx.Portal_inflammation, exog=dfx[["FociSize", "AgeAtDeath"]]).fit()

dfx = sheep[["BDHyperplasia", "Inflammation", "AgeAtDeath"]].dropna()
model2 = MixedLM.from_formula(rstr(y, ["Inflammation"]), data=dfx, groups="AgeAtDeath").fit()

dfx = sheep[["BDHyperplasia", "FociSize", "AgeAtDeath"]].dropna()
model3 = MixedLM.from_formula(rstr(y, ["FociSize"]), data=dfx, groups="AgeAtDeath").fit()

# <codecell>

ss = "E"
s = np.array([sheep[sheep.AgeAtDeath == model.random_effects.index.values[i]][ss].iloc[0] for i in range(len(model.random_effects.index.values))])
s -= s.min()
s /= s.max()

idx = np.round(s * 2000).astype(int)
C = seaborn.color_palette("coolwarm", 2001)
CC = []
for i in idx :
    CC.append(C[i])


fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
plt.scatter(model.random_effects, model2.random_effects, zs=model3.random_effects, s=450, c=CC, alpha=1)#, c=np.array(C)[idx.astype(int)], alpha=1)
plt.title("Biliary Hyperplasia : Coefficients of Birth Year Random Effect\nFirst component of PCA explains 98.3%% of variance.\nWarmer colours - higher %s" % ss, y=0.9)

for i, sheep_age in enumerate(model.random_effects.index) :
    sheep_e = sheep[sheep.AgeAtDeath == sheep_age].E.iloc[0]
    x2, y2, _ = proj3d.proj_transform(model.random_effects.values[i], model2.random_effects.values[i], model3.random_effects.values[i], ax.get_proj())
    plt.annotate(int(np.round(2011 - sheep_age * 11)), xy=(x2, y2), xytext=(x2-0.0105, y2-0.002))

ax.set_axis_bgcolor("white")
plt.savefig("../talk/figures/regressions/BDHyperplasia/mm_coefs_color_%s.png" % ss, dpi=600, jpeg_quality=100)

# <codecell>

plt.scatter(exposure.BirthYear, exposure.CES)

# <codecell>

plt.scatter(model.random_effects.index.values, idx)

# <codecell>


#plt.plot([sheep[np.round(sheep.AgeAtDeath * 11) == i].E.iloc[0] for i in range(11)])
#plt.plot(exposure.AgeAtDeath, exposure.E)
C2 = []
for i in (np.unique(sheep[["AgeAtDeath", "BD_hyperplasia"]].dropna().AgeAtDeath) * 11).astype(int) :
    C2.append(sheep[np.round(sheep.AgeAtDeath * 11) == i].E.iloc[0])
#sheep[["AgeAtDeath", "E"]].dropna()

# <codecell>

plt.plot(exposure.BirthYear, exposure.E)

# <codecell>

exposure

# <codecell>

plt.text()

# <codecell>

df = sheep[["Interface_hepatitis", "AgeAtDeath", "Entropy"]].dropna()
ols = MixedLM.from_formula(rstr("Interface_hepatitis", ["Entropy"]), data=df, groups=df.AgeAtDeath).fit()
ols.summary()

# <codecell>

print mmR2(df, ols)
print ols.random_effects.abs().mean()

# <codecell>

print ols.summary()

print ols.bse
print ols.bse_re
print ols.bse_fe

# <codecell>

histo.columns

# <codecell>

y1, y2 = mmPredict(df, ols)
plt.scatter(y1, y2)
plt.plot(y1,y1)

# <codecell>


# <codecell>

ols = pd.stats.api.ols(y = avesc.Portal_inflammation, x = avesc.Inflammation)
print ols.summary

# <codecell>

plt.scatter(avesc.FociSize, avesc.Portal_inflammation)
plt.plot(avesc.FociSize, avesc.FociSize * ols.beta[0] + ols.beta[1])
plt.title("p = %.02e" % ols.p_value[0])

# <codecell>

ols2 = pd.stats.api.ols(y = avesc.BD_hyperplasia, x = avesc[["FociSize", "Directionality", "Scale"]])
print ols2.summary

# <codecell>

X = ["FociSize", "Directionality", "Scale"]
x = (ols2.beta[-1] + 
    ols2.beta[0] * avesc[X[0]] +
    ols2.beta[1] * avesc[X[1]] + 
    ols2.beta[2] * avesc[X[2]])
y = avesc.BD_hyperplasia

plt.scatter(x, y)
#plt.plot([0, 1], [- 0.0504, 1- 0.0504] )
#plt.plot(y, x)

#plt.scatter(avesc.FociSize, avesc.Portal_inflammation)
#plt.plot(avesc.FociSize, avesc.FociSize * ols.beta[0] + ols.beta[1])
#plt.title("p = %.02e" % ols.p_value[0])

# <codecell>

ols3 = pd.stats.api.ols(y = avesc.Portal_inflammation, x = avesc.Inflammation)
plt.scatter(avesc.Inflammation, avesc.Portal_inflammation)
plt.plot(avesc.Inflammation, avesc.Inflammation * ols3.beta[0] + ols3.beta[1])
print ols3.summary

# <codecell>

ols4 = pd.stats.api.ols(y = avesc.QTotal, x = avesc.TawfikTotal)
plt.scatter(avesc.TawfikTotal, avesc.QTotal)
plt.plot(avesc.TawfikTotal, avesc.TawfikTotal * ols4.beta[0] + ols4.beta[1])
print ols4.summary

# <codecell>

df2 = sheep[["TawfikTotal", "Entropy", "AgeAtDeath"]]
df2.dropna(inplace=True)
ols = MixedLM.from_formula(rstr("TawfikTotal", ["Entropy"]), data=df2, groups="AgeAtDeath").fit()

ols.pvalues[1:-1].values

# <codecell>

df = sheep[["Inflammation", "Mean_hep_size", "FociSize"]].dropna()
df["Intercept"] = np.ones(len(df))
ols = sm.GLS(endog=df.Mean_hep_size, exog=df[["Inflammation", "FociSize"]]).fit()
ols.rsquared

# <codecell>

mmR2(df, ols)






print "YO"

# <codecell>

print "YO"

# <codecell>

y = pca.fit_transform(s[histcols])

# <codecell>

mm, mmfits, mmpvals, mmqsum = test_all_linear(sheep, ["TawfikTotal"], imagecols, group="AgeAtDeath")

# <codecell>

df = sheep[delayer(["TawfikTotal", "Inflammation", "ResidualES"])].dropna()
df["Intercept"] = np.ones(len(df))
tt = MixedLM(endog = df.TawfikTotal, exog = df[["Inflammation"]], groups=df.ResidualES).fit()
tt.summary()
#del fibrosis.tables[2]


# <codecell>

models, fits, pvals, blah = test_all_linear(sheep, ["Ln_ap_ri"], imagecols, group="AgeAtDeath")

# <codecell>

np.where(np.array(fits["Ln_ap_ri"]) < (2 + np.min(fits["Ln_ap_ri"])))

# <codecell>

idx = 0
print models["Ln_ap_ri"][idx].summary()
print fits["Ln_ap_ri"][idx]

# <codecell>

a.model.endog_names

plt.scatter(y, yhat, c=seaborn.color_palette("deep", 8)[0])
plt.plot(y, y, c=seaborn.color_palette("deep", 8)[2])
plt.xlabel(a.model.endog_names)
yl = a.model.exog_names
#yl.remove("Intercept")
plt.ylabel(", ".join(yl))
plt.title("R2 = %.02f")
st = "%s : %.03f, p = %.03e.\n" * len(yl)
stl = []
for i in range(len(yl)) :
    stl.append(yl[i])
    stl.append(p[dependent][i])
plt.suptitle(st % tuple(delayer([yl[i], p[dependent][i] for i in range(len(yl))

# <codecell>

a.model.exog_names

# <codecell>


# <codecell>

raw_data[raw_data.TissueToSinusoid == raw_data.TissueToSinusoid.max()]

# <codecell>

df = sheep[["MeanHepSize", "Directionality"]].dropna()
df["Intercept"] = np.ones(len(df))
ols = sm.GLS(endog=df.MeanHepSize, exog=df[["Directionality", "Intercept"]]).fit()
ols.summary()

# <codecell>