a b/scripts/beat_by_beat_analysis.R
1
library(ggplot2)
2
library(stringr)
3
library(plyr)
4
library(dplyr)
5
library(lubridate)
6
library(reshape2)
7
library(scales)
8
library(ggthemes)
9
library(Metrics)
10
11
data <- read.csv("r2plus1d_18_32_2_pretrained_test_predictions.csv", header = FALSE)
12
str(data)
13
14
15
dataNoAugmentation <- data[data$V2 == 0,]
16
str(dataNoAugmentation)
17
18
19
dataGlobalAugmentation <- data %>% group_by(V1) %>% summarize(meanPrediction = mean(V3), sdPred = sd(V3))
20
str(dataGlobalAugmentation)
21
22
23
sizeData <- read.csv("size.csv")
24
sizeData <- sizeData[sizeData$ComputerSmall == 1,]
25
str(sizeData)
26
27
sizeRelevantFrames <- sizeData[c(1,2)]
28
sizeRelevantFrames$Frame <- sizeRelevantFrames$Frame - 32
29
sizeRelevantFrames[sizeRelevantFrames$Frame < 0,]$Frame <- 0
30
31
32
beatByBeat <- merge(sizeRelevantFrames, data, by.x = c("Filename", "Frame"), by.y = c("V1", "V2"))
33
beatByBeat <- beatByBeat %>% group_by(Filename) %>% summarize(meanPrediction = mean(V3), sdPred = sd(V3))
34
str(beatByBeat)
35
36
### For use, need to specify file directory
37
fileLocation <- "/Users/davidouyang/Local Medical Data/"
38
ActualNumbers <- read.csv(paste0(fileLocation, "FileList.csv", sep = ""))
39
ActualNumbers <- ActualNumbers[c(1,2)]
40
str(ActualNumbers)
41
42
43
44
dataNoAugmentation <- merge(dataNoAugmentation, ActualNumbers, by.x = "V1", by.y = "Filename", all.x = TRUE)
45
dataNoAugmentation$AbsErr <- abs(dataNoAugmentation$V3 - dataNoAugmentation$EF)
46
str(dataNoAugmentation)
47
48
summary(abs(dataNoAugmentation$V3 - dataNoAugmentation$EF))
49
# Mean of 4.216
50
51
rmse(dataNoAugmentation$V3,dataNoAugmentation$EF) 
52
## 5.56
53
54
modelNoAugmentation <- lm(dataNoAugmentation$EF ~ dataNoAugmentation$V3)
55
summary(modelNoAugmentation)$r.squared
56
# 0.79475
57
58
59
beatByBeat <- merge(beatByBeat, ActualNumbers, by.x = "Filename", by.y = "Filename", all.x = TRUE)
60
summary(abs(beatByBeat$meanPrediction - beatByBeat$EF))
61
# Mean of 4.051697
62
63
rmse(beatByBeat$meanPrediction, beatByBeat$EF) 
64
# 5.325237
65
66
modelBeatByBeat <- lm(beatByBeat$EF ~ beatByBeat$meanPrediction)
67
summary(modelBeatByBeat)$r.squared
68
# 0.8093174
69
70
71
beatByBeatAnalysis <- merge(sizeRelevantFrames, data, by.x = c("Filename", "Frame"), by.y = c("V1", "V2"))
72
str(beatByBeatAnalysis)
73
74
75
MAEdata <- data.frame(counter = 1:500)
76
MAEdata$sample <- -9999
77
MAEdata$error <- -9999
78
79
str(MAEdata)
80
81
for (i in 1:500){
82
83
84
samplingBeat <-  sample_n(beatByBeatAnalysis %>% group_by(Filename), 1 + floor((i-1)/100), replace = TRUE) %>% group_by(Filename) %>% dplyr::summarize(meanPred = mean(V3))
85
samplingBeat <- merge(samplingBeat, ActualNumbers, by.x = "Filename", by.y = "Filename", all.x = TRUE)
86
samplingBeat$error <- abs(samplingBeat$meanPred - samplingBeat$EF)
87
88
MAEdata$sample[i] <-  1 + floor((i-1)/100)
89
MAEdata$error[i] <- mean(samplingBeat$error )
90
91
92
}
93
94
str(MAEdata)
95
96
beatBoxPlot <- ggplot(data = MAEdata) + geom_boxplot(aes(x = sample, y = error, group = sample), outlier.shape = NA
97
) + theme_classic() + theme(legend.position = "none", axis.text.y = element_text( size=7)) + xlab("Number of Sampled Beats") + ylab("Mean Absolute Error") + scale_fill_brewer(palette = "Set1", direction = -1) 
98
99
beatBoxPlot
100