list.of.packages <- c("tidyverse","bestNormalize")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages))
install.packages(new.packages)
lapply(list.of.packages , require, character.only = T)## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
## [1] "FID" "IID" "demographic_id" "encoded_sex"
## [5] "GENDER" "SEX" "visit_date" "vAGE"
## [9] "AGE" "STATUS" "SBP" "DBP"
## [13] "HEIGHTcm" "HEIGHT" "WEIGHT" "BMI"
## [17] "WBC" "NEUTROPHIL" "LYMPHOCYTE" "Hb"
## [21] "RBC" "MCV" "MCH" "MCHC"
## [25] "RDW" "PLATELET" "RETICULOCYTE" "CREATININE"
## [29] "LDH" "EOSINOPHIL" "MONOCYTE" "BASOPHIL"
## [33] "MPV" "HCT" "HbF" "HbA"
## [37] "HbA2" "HbS"
write.table(pheno, sub(".xlsx", ".tsv", params$data), col.names = TRUE, row.names = FALSE, sep="\t", quote = FALSE) pheno <- read.table( sub(".xlsx", ".tsv", params$data), sep="\t", header = TRUE)
keep_colmns <- c(
"FID","IID","AGE","SEX","GENDER","HEIGHT","WEIGHT","BMI","SBP","DBP",
"STATUS","HbA","HbA2","HbF","HbS",
"RBC","Hb","MCV","HCT","MCHC","MCH","WBC","RDW",
"LYMPHOCYTE","MONOCYTE","NEUTROPHIL",
"EOSINOPHIL","BASOPHIL","PLATELET","RETICULOCYTE"
)
bt_pheno <- pheno[, which(names(pheno) %in% keep_colmns)]
bt_pheno$AGE <- as.numeric(bt_pheno$AGE)
bt_pheno$SBP <- as.numeric(bt_pheno$SBP)
bt_pheno$DBP <- as.numeric(bt_pheno$DBP)
bt_pheno$EOSINOPHIL <- as.numeric(bt_pheno$EOSINOPHIL)
bt_pheno$MONOCYTE <- as.numeric(bt_pheno$MONOCYTE)
bt_pheno$BMI <- (bt_pheno$WEIGHT)/(bt_pheno$HEIGHT)^2## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.021 8.039 11.234 13.395 16.235 44.201
hist(bt_pheno$AGE, main="AGE destribution", xlab="AGE")
#plot(density(na.omit(bt_pheno$AGE)))
#polygon(density(na.omit(bt_pheno$AGE)), col = "coral")
#abline(v=c(min(bt_pheno$AGE), max(bt_pheno$AGE)), lty=2, lwd=1)bt_pheno %>%
select(SEX) %>%
table() %>%
as.data.frame() %>%
na.omit() %>%
mutate(Prop = (Freq/sum(Freq))*100) -> sex # Sex
sexcolnames(sex) <- c("cat", "freq", "prop")
par(mar=c(4,4,3,4))
barplot(
sex$prop,
ylim=c(0, 60),
col=c(2,4)
)
text(
x=c(0.7,1.9),
y=30,
labels=c("Female", "Male")
)
mtext(
text = "Proportion (%)",
side = 2,
line = 3
)
mtext(
text = "Sex",
side = 1,
line = 1
)Definition of outliers removed:
# figures-side,
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$RBC, main="RBC (million/uL)")
plot(density(na.omit(bt_pheno$RBC)), main="RBC (million/uL)")
polygon(density(na.omit(bt_pheno$RBC)), col = "coral")
# removing outliers
abline(v=40, lwd=1, lty=2)
bt_pheno$RBC <- as.numeric(ifelse(bt_pheno$RBC > 40, "NA", bt_pheno$RBC))## Warning: NAs introduced by coercion
plot(density(na.omit(bt_pheno$RBC)), main="RBC (>40) outlier pruned")
polygon(density(na.omit(bt_pheno$RBC)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$RBC)## Warning in bestNormalize::orderNorm(bt_pheno$RBC): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 778 nonmissing obs and ties
## - 257 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.90 2.38 2.80 3.19 13.90
bt_pheno$nRBC <- BNphen$x.t
plot(density(na.omit(bt_pheno$nRBC)), main="Normalized RBC")
polygon(density(na.omit(bt_pheno$nRBC)), col = "lightblue")
# fit smooth spline for Age against RBC
rbc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","RBC"))])
rbc_plot <- qplot(AGE, RBC, data = rbc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.900 2.380 2.800 2.856 3.190 13.900 79
# figures-side,
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$RDW, main="RDW")
plot(density(na.omit(bt_pheno$RDW)), main="RDW")
polygon(density(na.omit(bt_pheno$RDW)), col = "coral")
# removing outliers
#abline(v=10, lwd=1, lty=2)
#bt_pheno$RDW <- as.numeric(ifelse(bt_pheno$RDW > 10, "NA", bt_pheno$RDW))
#plot(density(na.omit(bt_pheno$RDW)), main="RDW (>10) outlier pruned")
#polygon(density(na.omit(bt_pheno$RDW)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$RDW)## Warning in bestNormalize::orderNorm(bt_pheno$RDW): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 779 nonmissing obs and ties
## - 181 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 13.4 19.7 22.5 25.5 47.9
bt_pheno$nRDW <- BNphen$x.t
plot(density(na.omit(bt_pheno$nRDW)), main="Normalized RDW")
polygon(density(na.omit(bt_pheno$nRDW)), col = "lightblue")
# fit smooth spline for Age against RBC
rdw_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","RDW"))])
rdw_plot <- qplot(AGE, RDW, data = rdw_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
rdw_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 13.40 19.70 22.50 22.98 25.50 47.90 78
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$Hb, main="Hb (g/dL)", xlab="Hb")
plot(density(na.omit(bt_pheno$Hb)), main="Hb (g/dL)")
polygon(density(na.omit(bt_pheno$Hb)), col = "coral")
# removing outliers
#abline(v=13, lwd=1, lty=2)
#bt_pheno$Hb <- as.numeric(ifelse(bt_pheno$Hb > 13, "NA", bt_pheno$Hb))
#plot(density(na.omit(bt_pheno$Hb)), main="Hb (>13) outlier pruned")
#polygon(density(na.omit(bt_pheno$Hb)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$Hb)## Warning in bestNormalize::orderNorm(bt_pheno$Hb): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 783 nonmissing obs and ties
## - 317 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 2.20 6.72 7.50 8.36 11.80
bt_pheno$nHb <- BNphen$x.t
plot(density(na.omit(bt_pheno$nHb)), main="Normalized Hb")
polygon(density(na.omit(bt_pheno$nHb)), col = "lightblue")
# fit smooth spline for Age against Hb
hb_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","Hb"))])
hb_plot <- qplot(AGE, Hb, data = hb_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
hb_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.200 6.720 7.500 7.473 8.360 11.800 74
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$HbF, main="HbF (%)", xlab="HbF")
plot(density(na.omit(bt_pheno$HbF)), main="HbF")
polygon(density(na.omit(bt_pheno$HbF)), col = "coral")
# removing outliers
abline(v=0, lwd=1, lty=2)
bt_pheno$HbF <- as.numeric(ifelse(bt_pheno$HbF == 0, "NA", bt_pheno$HbF))
plot(density(na.omit(bt_pheno$HbF)), main="HbF (0) outlier pruned")
polygon(density(na.omit(bt_pheno$HbF)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$HbF)
BNphen ## orderNorm Transformation with 857 nonmissing obs and ties
## - 162 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.2 2.6 4.7 7.7 28.0
bt_pheno$onHbF <- BNphen$x.t
plot(density(na.omit(bt_pheno$onHbF)), main="Normalized HbF")
polygon(density(na.omit(bt_pheno$onHbF)), col = "lightblue")
# CUBIC ROOT TRANSFORMATION
bt_pheno$nHbF <- (bt_pheno$HbF)^(1/3)
plot(density(na.omit(bt_pheno$nHbF)), main="Cubic root transformed HbF")
polygon(density(na.omit(bt_pheno$nHbF)), col = "lightblue")
# fit smooth spline for Age against Hb
HbF_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","HbF"))])
HbF_plot <- qplot(AGE, HbF, data = HbF_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
HbF_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.200 2.600 4.700 5.657 7.700 28.000
par(mar = c(4, 4, 2, 2))
hist(pheno$MCV, main="MCV (fL)")
plot(density(na.omit(bt_pheno$MCV)), main="MCV (fL)")
polygon(density(na.omit(bt_pheno$MCV)), col = "coral")
# removing outliers
abline(v=10, lwd=1, lty=2)
bt_pheno$MCV <- as.numeric(ifelse(bt_pheno$MCV < 10, "NA", bt_pheno$MCV))## Warning: NAs introduced by coercion
plot(density(na.omit(bt_pheno$MCV)), main="MCV (<10) outlier pruned")
polygon(density(na.omit(bt_pheno$MCV)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MCV)## Warning in bestNormalize::orderNorm(bt_pheno$MCV): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 781 nonmissing obs and ties
## - 274 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 20.3 75.3 81.3 87.0 110.0
bt_pheno$nMCV <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMCV)), main="Normalized (orderNorm) MCV")
polygon(density(na.omit(bt_pheno$nMCV)), col = "lightblue")
# fit smooth spline for Age against Hb
mcv_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MCV"))])
mcv_plot <- qplot(AGE, MCV, data = mcv_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
mcv_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 20.30 75.30 81.30 81.03 87.00 110.00 76
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$HCT, main="HCT")
plot(density(na.omit(bt_pheno$HCT)), main="HCT")
polygon(density(na.omit(bt_pheno$HCT)), col = "coral")
# removing ourliers
#abline(v=43, lwd=1, lty=2)
#bt_pheno$HCT <- as.numeric(ifelse(bt_pheno$HCT > 43, "NA", bt_pheno$HCT))
#plot(density(na.omit(bt_pheno$HCT)), main="HCT (>43) outlier pruned")
#polygon(density(na.omit(bt_pheno$HCT)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$HCT)## Warning in bestNormalize::orderNorm(bt_pheno$HCT): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 773 nonmissing obs and ties
## - 187 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 7.6 20.0 22.5 25.0 39.5
bt_pheno$nHCT <- BNphen$x.t
plot(density(na.omit(bt_pheno$nHCT)), main="Normalized HCT")
polygon(density(na.omit(bt_pheno$nHCT)), col = "lightblue")
# fit smooth spline for Age against Hb
htc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","HCT"))])
htc_plot <- qplot(AGE, HCT, data = htc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
htc_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 7.60 20.00 22.50 22.54 25.00 39.50 84
#Mean Corpuscular Hemoglobin Concentration
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MCHC, main="MCHC (g/dL)")
plot(density(na.omit(bt_pheno$MCHC)), main="MCHC (g/dL)")
polygon(density(na.omit(bt_pheno$MCHC)), col = "coral")
# removing
abline(v=c(10, 60), lwd=1, lty=2)
bt_pheno$MCHC <- as.numeric(
ifelse(bt_pheno$MCHC > 60, "NA",
#bt_pheno$MCHC
ifelse(bt_pheno$MCHC < 10, "NA", bt_pheno$MCHC)
)
)## Warning: NAs introduced by coercion
plot(density(na.omit(bt_pheno$MCHC)), main="MCHC (>60 and <10) outlier pruned")
polygon(density(na.omit(bt_pheno$MCHC)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MCHC)## Warning in bestNormalize::orderNorm(bt_pheno$MCHC): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 778 nonmissing obs and ties
## - 99 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 24.4 31.9 33.3 34.5 51.1
bt_pheno$nMCHC <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMCHC)), main="Normalized MCHC")
polygon(density(na.omit(bt_pheno$nMCHC)), col = "lightblue")
# fit smooth spline for Age against Hb
mchc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MCHC"))])
mchc_plot <- qplot(AGE, MCHC, data = mchc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
mchc_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 24.40 31.90 33.30 33.21 34.50 51.10 79
#The Mean Corpuscular Hemoglobin
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MCH, main="MCH (pg/cell)")
plot(density(na.omit(bt_pheno$MCH)), main="MCH (pg/cell)")
polygon(density(na.omit(bt_pheno$MCH)), col = "coral")
# removing outliers
# abline(v=c(10, 50), lwd=1, lty=2)
# bt_pheno$MCH <- as.numeric(
# ifelse(bt_pheno$MCH < 10, "NA",
# #bt_pheno$MCH
# ifelse(bt_pheno$MCH > 50, "NA", bt_pheno$MCH)
# )
# )
#
# plot(density(na.omit(bt_pheno$MCH)), main="MCH (<10 and >50) outlier pruned")
# polygon(density(na.omit(bt_pheno$MCH)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MCH)## Warning in bestNormalize::orderNorm(bt_pheno$MCH): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 765 nonmissing obs and ties
## - 165 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 4.01 24.70 27.10 29.40 54.70
bt_pheno$nMCH <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMCH)), main="Normalized MCH")
polygon(density(na.omit(bt_pheno$nMCH)), col = "lightblue")
# fit smooth spline for Age against Hb
MCH_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MCH"))])
MCH_plot <- qplot(AGE, MCH, data = MCH_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
MCH_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 4.01 24.70 27.10 27.05 29.40 54.70 92
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$WBC, main="WBC (x10^9 cells/L)")
plot(density(na.omit(bt_pheno$WBC)), main="WBC (x10^9 cells/L)")
polygon(density(na.omit(bt_pheno$WBC)), col = "coral")
# removing
#abline(v=35, lwd=1, lty=2)
#bt_pheno$WBC <- as.numeric(
# ifelse(bt_pheno$WBC > 35, "NA",
# bt_pheno$WBC
# #ifelse(bt_pheno$WBC < 10, "NA", bt_pheno$WBC)
# )
#)
plot(density(na.omit(bt_pheno$WBC)), main="WBC (>35) outlier pruned")
polygon(density(na.omit(bt_pheno$WBC)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$WBC)## Warning in bestNormalize::orderNorm(bt_pheno$WBC): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 793 nonmissing obs and ties
## - 290 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 3.43 11.50 14.40 18.20 59.70
bt_pheno$nWBC <- BNphen$x.t
plot(density(na.omit(bt_pheno$nWBC)), main="Normalized WBC")
polygon(density(na.omit(bt_pheno$nWBC)), col = "lightblue")
# fit smooth spline for Age against Hb
wbc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","WBC"))])
wbc_plot <- qplot(AGE, WBC, data = wbc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
wbc_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 3.43 11.50 14.40 15.70 18.20 59.70 64
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$LYMPHOCYTE, main="LYMPHOCYTE (x10^9 cells/L)")
plot(density(na.omit(bt_pheno$LYMPHOCYTE)), main="LYMPHOCYTE (x10^9 cells/L)")
polygon(density(na.omit(bt_pheno$LYMPHOCYTE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$LYMPHOCYTE)## Warning in bestNormalize::orderNorm(bt_pheno$LYMPHOCYTE): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 795 nonmissing obs and ties
## - 523 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 2.22 34.90 42.90 51.35 89.40
bt_pheno$nLYMPHOCYTE <- BNphen$x.t
plot(density(na.omit(bt_pheno$nLYMPHOCYTE)), main="Normalized LYMPHOCYTE")
polygon(density(na.omit(bt_pheno$nLYMPHOCYTE)), col = "lightblue")
# fit smooth spline for Age against Hb
lym_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","LYMPHOCYTE"))])
lym_plot <- qplot(AGE, LYMPHOCYTE, data = lym_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
lym_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.22 34.90 42.90 42.06 51.35 89.40 62
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MONOCYTE, main="MONOCYTE (x10^9 cells/L)")
plot(density(na.omit(bt_pheno$MONOCYTE)), main="MONOCYTE (x10^9 cells/L)")
polygon(density(na.omit(bt_pheno$MONOCYTE)), col = "coral")
# removing
#abline(v=5, lwd=1, lty=2)
#bt_pheno$MONOCYTE <- as.numeric(
# ifelse(bt_pheno$MONOCYTE > 5, "NA",
# bt_pheno$MONOCYTE
# #ifelse(bt_pheno$MONOCYTE == 0, "NA", bt_pheno$MONOCYTE)
# )
#)
plot(density(na.omit(bt_pheno$MONOCYTE)), main="MONOCYTE (>5) outlier pruned")
polygon(density(na.omit(bt_pheno$MONOCYTE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MONOCYTE)## Warning in bestNormalize::orderNorm(bt_pheno$MONOCYTE): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 776 nonmissing obs and ties
## - 25 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 1 4 6 8 46
bt_pheno$nMONOCYTE <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMONOCYTE)), main="Normalized MONOCYTE")
polygon(density(na.omit(bt_pheno$nMONOCYTE)), col = "lightblue")
# fit smooth spline for Age against Hb
mon_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MONOCYTE"))])
mon_plot <- qplot(AGE, MONOCYTE, data = mon_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
mon_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 4.000 6.000 6.416 8.000 46.000 81
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$NEUTROPHIL, main="NEUTROPHIL (g/L)")
plot(density(na.omit(bt_pheno$NEUTROPHIL)), main="NEUTROPHIL (g/L)")
polygon(density(na.omit(bt_pheno$NEUTROPHIL)), col = "coral")
# removing
#abline(v=20, lwd=1, lty=2)
#bt_pheno$NEUTROPHIL <- as.numeric(
# ifelse(bt_pheno$NEUTROPHIL > 20, "NA",
# bt_pheno$NEUTROPHIL
# #ifelse(bt_pheno$NEUTROPHIL < 10, "NA", bt_pheno$NEUTROPHIL)
# )
#)
plot(density(na.omit(bt_pheno$NEUTROPHIL)), main="NEUTROPHIL (>20) outlier pruned")
polygon(density(na.omit(bt_pheno$NEUTROPHIL)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$NEUTROPHIL)## Warning in bestNormalize::orderNorm(bt_pheno$NEUTROPHIL): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 716 nonmissing obs and ties
## - 478 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 2.300 34.400 41.950 49.725 90.100
bt_pheno$nNEUTROPHIL <- BNphen$x.t
plot(density(na.omit(bt_pheno$nNEUTROPHIL)), main="Normalized NEUTROPHIL")
polygon(density(na.omit(bt_pheno$nNEUTROPHIL)), col = "lightblue")
# fit smooth spline for Age against NEUTROPHIL
neu_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","NEUTROPHIL"))])
neu_plot <- qplot(AGE, NEUTROPHIL, data = neu_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
neu_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.30 34.40 41.95 41.40 49.73 90.10 141
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$EOSINOPHIL, main="EOSINOPHIL (x10^9 cells/L)")
plot(density(na.omit(bt_pheno$EOSINOPHIL)), main="EOSINOPHIL (x10^9 cells/L)")
polygon(density(na.omit(bt_pheno$EOSINOPHIL)), col = "coral")
# removing
#abline(v=2, lwd=1, lty=2)
#bt_pheno$EOSINOPHIL <- as.numeric(
# ifelse(bt_pheno$EOSINOPHIL > 2, "NA",
# bt_pheno$EOSINOPHIL
# #ifelse(bt_pheno$EOSINOPHIL < 10, "NA", bt_pheno$EOSINOPHIL)
# )
#)
plot(density(na.omit(bt_pheno$EOSINOPHIL)), main="EOSINOPHIL (>2) outlier pruned")
polygon(density(na.omit(bt_pheno$EOSINOPHIL)), col = "coral")
BNphen <- bestNormalize(bt_pheno$EOSINOPHIL)
BNphen ## Best Normalizing transformation with 815 Observations
## Estimated Normality Statistics (Pearson P / df, lower => more normal):
## - arcsinh(x): 7.2672
## - Center+scale: 7.2766
## - Double Reversed Log_b(x+a): 7.8185
## - Exp(x): 92.6634
## - Log_b(x+a): 14.441
## - orderNorm (ORQ): 7.1458
## - sqrt(x + a): 6.9607
## - Yeo-Johnson: 7.2511
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##
## Based off these, bestNormalize chose:
## Standardized sqrt(x + a) Transformation with 815 nonmissing obs.:
## Relevant statistics:
## - a = 0
## - mean (before standardization) = 1.544998
## - sd (before standardization) = 1.017404
bt_pheno$nEOSINOPHIL <- BNphen$x.t
plot(density(na.omit(bt_pheno$nEOSINOPHIL)), main="Normalized EOSINOPHIL")
polygon(density(na.omit(bt_pheno$nEOSINOPHIL)), col = "lightblue")
# fit smooth spline for Age against EOSINOPHIL
eu_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","EOSINOPHIL"))])
eu_plot <- qplot(AGE, EOSINOPHIL, data = eu_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
eu_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 1.000 2.000 3.421 5.000 40.000 42
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$BASOPHIL, main="BASOPHIL")
plot(density(na.omit(bt_pheno$BASOPHIL)), main="BASOPHIL")
polygon(density(na.omit(bt_pheno$BASOPHIL)), col = "coral")
# removing
#abline(v=1, lwd=1, lty=2)
#bt_pheno$BASOPHIL <- as.numeric(
# ifelse(bt_pheno$BASOPHIL > 1, "NA",
# bt_pheno$BASOPHIL
# #ifelse(bt_pheno$BASOPHIL < 10, "NA", bt_pheno$BASOPHIL)
# )
#)
plot(density(na.omit(bt_pheno$BASOPHIL)), main="BASOPHIL (>1) outlier pruned")
polygon(density(na.omit(bt_pheno$BASOPHIL)), col = "coral")
# binarize basophil after removing outliers (1-control, 2-case)
# manual (10.1016/j.ajhg.2021.08.007)
bt_pheno$bBASOPHIL <- as.numeric(
ifelse(bt_pheno$BASOPHIL < 0.05, 1,
#bt_pheno$BASOPHIL
ifelse(bt_pheno$BASOPHIL >= 0.05, 2, "NA")
)
)
bt_pheno$cBASOPHIL <- as.numeric(
ifelse(bt_pheno$BASOPHIL < 0.05, 0,
#bt_pheno$BASOPHIL
ifelse(bt_pheno$BASOPHIL >= 0.05, 1, "NA")
)
)
hist(bt_pheno$bBASOPHIL, main="binarized")
# fit smooth spline for Age against BASOPHIL
bas_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","BASOPHIL"))])
bas_plot <- qplot(AGE, BASOPHIL, data = bas_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
bas_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.6667 1.0395 1.5438 1.5117 82.0000 45
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$PLATELET, main="PLATELET (x10^9/L)")
plot(density(na.omit(bt_pheno$PLATELET)), main="PLATELET (x10^9/L)")
polygon(density(na.omit(bt_pheno$PLATELET)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$PLATELET)## Warning in bestNormalize::orderNorm(bt_pheno$PLATELET): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 783 nonmissing obs and ties
## - 468 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.733 337.500 441.000 559.500 1355.000
bt_pheno$nPLATELET <- BNphen$x.t
plot(density(na.omit(bt_pheno$nPLATELET)), main="Normalized PLATELET")
polygon(density(na.omit(bt_pheno$nPLATELET)), col = "lightblue")
# fit smooth spline for Age against PLATELET
pla_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","PLATELET"))])
pla_plot <- qplot(AGE, PLATELET, data = pla_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
pla_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.733 337.500 441.000 450.752 559.500 1355.000 74
# RETICULOCYTE PER /uL
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$RETICULOCYTE, main="RETICULOCYTE (units/uL)")
plot(density(na.omit(bt_pheno$RETICULOCYTE)), main="RETICULOCYTE")
polygon(density(na.omit(bt_pheno$RETICULOCYTE)), col = "coral")
# removing
abline(v=60, lwd=1, lty=2)
bt_pheno$RETICULOCYTE <- as.numeric(
ifelse(bt_pheno$RETICULOCYTE > 60, "NA",
bt_pheno$RETICULOCYTE
#ifelse(bt_pheno$RETICULOCYTE < 10, "NA", bt_pheno$RETICULOCYTE)
)
)## Warning: NAs introduced by coercion
plot(density(na.omit(bt_pheno$RETICULOCYTE)), main="RETICULOCYTE (>60) outlier pruned")
polygon(density(na.omit(bt_pheno$RETICULOCYTE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$RETICULOCYTE)## Warning in bestNormalize::orderNorm(bt_pheno$RETICULOCYTE): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 441 nonmissing obs and ties
## - 176 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.00 5.60 10.10 14.65 38.10
bt_pheno$nRETICULOCYTE <- BNphen$x.t
plot(density(na.omit(bt_pheno$nRETICULOCYTE)), main="Normalized RETICULOCYTE")
polygon(density(na.omit(bt_pheno$nRETICULOCYTE)), col = "lightblue")
# fit smooth spline for Age against PLATELET
ret_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","RETICULOCYTE"))])
ret_plot <- qplot(AGE, RETICULOCYTE, data = ret_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
ret_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 5.60 10.10 10.60 14.65 38.10 416
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$SBP, main="SBP")
plot(density(na.omit(bt_pheno$SBP)), main="SBP")
polygon(density(na.omit(bt_pheno$SBP)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$SBP)
BNphen ## orderNorm Transformation with 648 nonmissing obs and ties
## - 87 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 20.0 97.0 105.5 117.0 168.0
bt_pheno$nSBP <- BNphen$x.t
plot(density(na.omit(bt_pheno$nSBP)), main="Normalized SBP")
polygon(density(na.omit(bt_pheno$nSBP)), col = "lightblue")
# fit smooth spline for Age against RETICS
sbp_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","SBP"))])
sbp_plot <- qplot(AGE, SBP, data = sbp_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
sbp_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 20.0 97.0 105.5 106.6 117.0 168.0 209
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$DBP, main="DBP")
plot(density(na.omit(bt_pheno$DBP)), main="DBP")
polygon(density(na.omit(bt_pheno$DBP)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$DBP)
BNphen ## orderNorm Transformation with 647 nonmissing obs and ties
## - 72 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 20 58 65 73 109
bt_pheno$nDBP <- BNphen$x.t
plot(density(na.omit(bt_pheno$nDBP)), main="Normalized DBP")
polygon(density(na.omit(bt_pheno$nDBP)), col = "lightblue")
# fit smooth spline for Age against DBP
dbp_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","DBP"))])
dbp_plot <- qplot(AGE, DBP, data = dbp_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
dbp_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 20.0 58.0 65.0 65.3 73.0 109.0 210
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$BMI, main="BMI")
plot(density(na.omit(bt_pheno$BMI)), main="BMI")
polygon(density(na.omit(bt_pheno$BMI)), col = "coral")
# removing outliers
abline(v=40, lwd=1, lty=2)
bt_pheno$BMI <- as.numeric(
ifelse(bt_pheno$BMI > 40, "NA",
bt_pheno$BMI
#ifelse(bt_pheno$BMI < 10, "NA", bt_pheno$BMI)
)
)
plot(density(na.omit(bt_pheno$BMI)), main="BMI (>40) outlier pruned")
polygon(density(na.omit(bt_pheno$BMI)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$BMI)
BNphen ## orderNorm Transformation with 425 nonmissing obs and ties
## - 419 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 11.818 14.134 15.311 17.526 34.666
bt_pheno$nBMI <- BNphen$x.t
plot(density(na.omit(bt_pheno$nBMI)), main="Normalized BMI")
polygon(density(na.omit(bt_pheno$nBMI)), col = "lightblue")
# fit smooth spline for Age against BMI
bmi_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","BMI"))])
bmi_plot <- qplot(AGE, BMI, data = bmi_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
bmi_plot## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 20.0 58.0 65.0 65.3 73.0 109.0 210