Load phenotype data and
extract blood traits with relevant additional columns
https://www.hematology.org/education/trainees/fellows/hematopoiesis/2021/hemoglobin-electrophoresis-in-sickle-cell-disease
pheno <- read_excel(params$data, sheet = "clean_metadata")
names(pheno)
## [1] "FID" "IID" "GID"
## [4] "SID" "GENDER" "GENDER2"
## [7] "SEX" "AGE1" "AGE_UNITS"
## [10] "AGE" "DISTRICT" "WEIGHT"
## [13] "HEIGHT" "BMI" "HU_STATUS"
## [16] "SBP" "DBP" "UREA"
## [19] "CREATININE" "uACR" "BASOPHIL"
## [22] "BASOPHIL_PERCENT" "EOSINOPHIL" "EOSINOPHIL_PERCENT"
## [25] "HCT" "Hb" "LDH"
## [28] "LYMPHOCYTE" "LYMPHOCYTE_PERCENT" "MCH"
## [31] "MCHC" "MCV" "MPV"
## [34] "MONOCYTE" "MONOCYTE_PERCENT" "NEUTROPHIL"
## [37] "NEUTROPHIL_PERCENT" "PLATELET" "RBC"
## [40] "RDW" "RDWSD" "WBC"
## [43] "HbA" "HbA2" "HbF"
## [46] "HbS" "SICKLECELLTEST" "CURATED_STATUS2"
## [49] "CURATED_STATUS"
keep_colmns <- c(
"FID","IID","GID","AGE","GENDER","SEX","HEIGHT","WEIGHT","BMI",
"CURATED_STATUS","HU_STATUS","SBP","DBP",
"BASOPHIL","BASOPHIL_PERCENT","EOSINOPHIL","EOSINOPHIL_PERCENT","HCT",
"Hb","LYMPHOCYTE","LYMPHOCYTE_PERCENT","MCH","MCHC","MCV","MPV","MONOCYTE",
"MONOCYTE_PERCENT","NEUTROPHIL","NEUTROPHIL_PERCENT","PLATELET","RBC","RDW","RDWSD",
"WBC","HbA","HbA2","HbF","HbS"
)
bt_pheno <- pheno[, which(names(pheno) %in% keep_colmns)]
colmns_to_update <- c(
"AGE","SEX",
"HEIGHT","WEIGHT","BMI","SBP","DBP",
"BASOPHIL","BASOPHIL_PERCENT","EOSINOPHIL","EOSINOPHIL_PERCENT","LYMPHOCYTE","LYMPHOCYTE_PERCENT",
"MONOCYTE","MONOCYTE_PERCENT","NEUTROPHIL","NEUTROPHIL_PERCENT","WBC",
"MCH","MCHC","MCV","RBC","RDW","RDWSD",
"PLATELET","MPV",
"HCT","Hb","HbA","HbA2","HbF","HbS"
)
set_var_type <- function(x) {
as.numeric(x)
}
bt_pheno[, c(colmns_to_update)] <- lapply(bt_pheno[, c(colmns_to_update)] , set_var_type)
#bt_pheno$BMI <- as.numeric((bt_pheno$WEIGHT)/(bt_pheno$HEIGHT)^2)
str(bt_pheno)
## tibble [952 × 38] (S3: tbl_df/tbl/data.frame)
## $ FID : chr [1:952] "207477820002_R03C01" "207477820010_R03C01" "207477820029_R04C01" "207477820029_R02C01" ...
## $ IID : chr [1:952] "207477820002_R03C01" "207477820010_R03C01" "207477820029_R04C01" "207477820029_R02C01" ...
## $ GID : chr [1:952] "SMLW000077" "SMLW000093" "SMLW000126" "SMLW000124" ...
## $ GENDER : chr [1:952] "Female" "Male" "Male" "Female" ...
## $ SEX : num [1:952] 2 1 1 2 1 2 1 1 1 1 ...
## $ AGE : num [1:952] 16 3 10 9 13 5 4 16 2.75 5 ...
## $ WEIGHT : num [1:952] 41.1 12.9 23.9 25.4 36.9 ...
## $ HEIGHT : num [1:952] 149 94.4 128 127 149 108 112 158 90 115 ...
## $ BMI : num [1:952] 18.5 14.5 14.6 15.7 16.6 ...
## $ HU_STATUS : chr [1:952] "Yes" "Yes" "Yes" "Yes" ...
## $ SBP : num [1:952] 123 88 90 100 95 90 90 113 101 NA ...
## $ DBP : num [1:952] 96 42 57 53 62 48 54 73 58 NA ...
## $ BASOPHIL : num [1:952] 0.02 0.01 0.04 0.02 0.06 0.04 0.05 0.11 0.06 NA ...
## $ BASOPHIL_PERCENT : num [1:952] 0.3 0.2 0.4 0.3 0.5 0.3 0.4 0.8 0.4 NA ...
## $ EOSINOPHIL : num [1:952] 0.15 0.14 0.15 0.12 0.1 0.26 0.65 0.42 0.4 NA ...
## $ EOSINOPHIL_PERCENT: num [1:952] 1.9 2.7 1.2 2 0.8 2 4.9 3 2.5 NA ...
## $ HCT : num [1:952] 33.7 37.1 19.9 32.9 20.5 24.3 19.5 22.4 25.8 NA ...
## $ Hb : num [1:952] 11.3 11.9 6.5 11 6.8 7.9 6.7 7.4 8.4 NA ...
## $ LYMPHOCYTE : num [1:952] 2.49 3.25 4.75 2.61 2.91 6.04 5.49 6.34 9.82 NA ...
## $ LYMPHOCYTE_PERCENT: num [1:952] 32.1 62.9 38.5 43.7 24.2 47.1 41 45.4 60.1 NA ...
## $ MCH : num [1:952] 38.3 22.3 35.1 29.8 30 28.6 33.1 32.9 29.6 NA ...
## $ MCHC : num [1:952] 33.6 32.1 32.8 33.3 32.9 32.5 34.3 33.2 32.7 NA ...
## $ MCV : num [1:952] 114 70 107 90 91 88 96 99 91 NA ...
## $ MPV : num [1:952] 8.8 7.5 10.9 10.1 9.9 9.7 8.8 10.3 8.9 NA ...
## $ MONOCYTE : num [1:952] 1.11 0.27 1.39 0.38 1.25 1.41 0.97 1.16 1.04 NA ...
## $ MONOCYTE_PERCENT : num [1:952] 14.3 5.3 11.3 6.3 10.4 11 7.3 8.3 6.4 NA ...
## $ NEUTROPHIL : num [1:952] 3.99 1.49 6 2.86 7.71 5.09 6.22 5.92 5.02 NA ...
## $ NEUTROPHIL_PERCENT: num [1:952] 51.4 29 48.7 47.8 64.2 39.6 46.5 42.4 30.7 NA ...
## $ PLATELET : num [1:952] 373 274 244 383 441 411 388 293 439 NA ...
## $ RBC : num [1:952] 2.96 5.33 1.86 3.68 2.25 2.76 2.02 2.26 2.85 NA ...
## $ RDW : num [1:952] 14.5 17.2 17.7 19 22.7 23.2 27.7 19.5 18.7 NA ...
## $ RDWSD : num [1:952] 63.8 40.6 69.3 60.1 69.3 71.5 89.2 69.3 59.6 NA ...
## $ WBC : num [1:952] 7.8 5.2 12.3 6 12 12.8 13.4 14 16.3 NA ...
## $ HbA : num [1:952] 0 65.9 0 11.7 0 2.8 2.6 7.3 27.9 0 ...
## $ HbA2 : num [1:952] 5.2 6.1 0 6.6 4.6 5.9 5.7 5.4 4.8 4.6 ...
## $ HbF : num [1:952] 15.9 NA 14.2 11.8 9 6.4 5.7 3.8 9 8.6 ...
## $ HbS : num [1:952] 78.9 28 85.8 70 86.6 84.8 86 83.4 58.1 86.8 ...
## $ CURATED_STATUS : chr [1:952] "HbSS" "HbAS" "HbSS" "NA" ...
# Get only HbSS
bt_pheno = bt_pheno[bt_pheno$CURATED_STATUS == "HbSS",]
Describe phenotypes and
normalize if needed
RBC
# figures-side,
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$RBC, main="RBC (million/uL)")
plot(density(na.omit(bt_pheno$RBC)), main="RBC (million/uL)")
polygon(density(na.omit(bt_pheno$RBC)), col = "coral")
# removing outliers
#abline(v=8, lwd=1, lty=2)
#bt_pheno$RBC <- as.numeric(ifelse(bt_pheno$RBC > 8, "NA", bt_pheno$RBC))
#plot(density(na.omit(bt_pheno$RBC)), main="RBC (>8) outlier pruned")
#polygon(density(na.omit(bt_pheno$RBC)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$RBC)
BNphen
## orderNorm Transformation with 281 nonmissing obs and ties
## - 150 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 1.03 2.30 2.58 2.88 4.94
bt_pheno$nRBC <- BNphen$x.t
plot(density(na.omit(bt_pheno$nRBC)), main="Normalized RBC")
polygon(density(na.omit(bt_pheno$nRBC)), col = "lightblue")
# fit smooth spline for Age against RBC
rbc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","RBC"))])
rbc_plot <- qplot(AGE, RBC, data = rbc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
rbc_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.030 2.300 2.580 2.606 2.880 4.940 12




RDW
# figures-side,
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$RDW, main="RDW (%)")
plot(density(na.omit(bt_pheno$RDW)), main="RDW (%)")
polygon(density(na.omit(bt_pheno$RDW)), col = "coral")
# removing outliers
#abline(v=10, lwd=1, lty=2)
#bt_pheno$RDW <- as.numeric(ifelse(bt_pheno$RDW > 10, "NA", bt_pheno$RDW))
#plot(density(na.omit(bt_pheno$RDW)), main="RDW (>10) outlier pruned")
#polygon(density(na.omit(bt_pheno$RDW)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$RDW)
BNphen
## orderNorm Transformation with 281 nonmissing obs and ties
## - 152 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 13.0 18.6 21.8 25.6 47.0
bt_pheno$nRDW <- BNphen$x.t
plot(density(na.omit(bt_pheno$nRDW)), main="Normalized RDW")
polygon(density(na.omit(bt_pheno$nRDW)), col = "lightblue")
# fit smooth spline for Age against RBC
rdw_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","RDW"))])
rdw_plot <- qplot(AGE, RDW, data = rdw_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
rdw_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 13.00 18.60 21.80 22.58 25.60 47.00 12




RDWSD
# figures-side,
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$RDWSD, main="RDWSD (%)")
plot(density(na.omit(bt_pheno$RDWSD)), main="RDWSD (%)")
polygon(density(na.omit(bt_pheno$RDWSD)), col = "coral")
# removing outliers
abline(v=200, lwd=1, lty=2)
bt_pheno$RDWSD <- as.numeric(ifelse(bt_pheno$RDWSD > 200, "NA", bt_pheno$RDWSD))
plot(density(na.omit(bt_pheno$RDWSD)), main="RDWSD (>200) outlier pruned")
polygon(density(na.omit(bt_pheno$RDWSD)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$RDWSD)
BNphen
## orderNorm Transformation with 281 nonmissing obs and ties
## - 204 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 41.1 60.3 67.8 76.4 118.0
bt_pheno$nRDWSD <- BNphen$x.t
plot(density(na.omit(bt_pheno$nRDWSD)), main="Normalized RDWSD")
polygon(density(na.omit(bt_pheno$nRDWSD)), col = "lightblue")
# fit smooth spline for Age against RBC
RDWSD_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","RDWSD"))])
RDWSD_plot <- qplot(AGE, RDWSD, data = RDWSD_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
RDWSD_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 41.10 60.30 67.80 69.04 76.40 118.00 12





Hb
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$Hb, main="Hb (g/dL)", xlab="Hb")
plot(density(na.omit(bt_pheno$Hb)), main="Hb (g/dL)")
polygon(density(na.omit(bt_pheno$Hb)), col = "coral")
# removing outliers
#abline(v=30, lwd=1, lty=2)
#bt_pheno$Hb <- as.numeric(ifelse(bt_pheno$Hb > 30, "NA", bt_pheno$Hb))
#plot(density(na.omit(bt_pheno$Hb)), main="Hb (>30) outlier pruned")
#polygon(density(na.omit(bt_pheno$Hb)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$Hb)
BNphen
## orderNorm Transformation with 281 nonmissing obs and ties
## - 63 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 2.9 7.3 8.2 9.0 13.9
bt_pheno$nHb <- BNphen$x.t
plot(density(na.omit(bt_pheno$nHb)), main="Normalized Hb")
polygon(density(na.omit(bt_pheno$nHb)), col = "lightblue")
# fit smooth spline for Age against Hb
hb_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","Hb"))])
hb_plot <- qplot(AGE, Hb, data = hb_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
hb_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.900 7.300 8.200 8.174 9.000 13.900 12




HbF
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$HbF, main="HbF", xlab="HbF")
plot(density(na.omit(bt_pheno$HbF)), main="HbF")
polygon(density(na.omit(bt_pheno$HbF)), col = "coral")
# removing outliers
abline(v=c(0,70), lwd=1, lty=2)
bt_pheno$HbF <- as.numeric(
ifelse(bt_pheno$HbF > 70, "NA",
ifelse(bt_pheno$HbF == 0, "NA", bt_pheno$HbF)
)
)
plot(density(na.omit(bt_pheno$HbF)), main="HbF (>70) outlier pruned")
polygon(density(na.omit(bt_pheno$HbF)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$HbF)
BNphen
## orderNorm Transformation with 279 nonmissing obs and ties
## - 167 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 1.8 7.1 10.7 16.1 39.4
bt_pheno$nHbF <- BNphen$x.t
plot(density(na.omit(bt_pheno$nHbF)), main="Normalized HbF")
polygon(density(na.omit(bt_pheno$nHbF)), col = "lightblue")
# CUBIC ROOT TRANSFORMATION
#bt_pheno$nHbF <- (bt_pheno$HbF)^(1/3)
#plot(density(na.omit(bt_pheno$nHbF)), main="Cubic root transformed HbF")
#polygon(density(na.omit(bt_pheno$nHbF)), col = "lightblue")
# fit smooth spline for Age against Hb
HbF_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","HbF"))])
HbF_plot <- qplot(AGE, HbF, data = HbF_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
HbF_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.8 7.1 10.7 12.3 16.1 39.4 14





MCV
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MCV, main="MCV (fL)")
plot(density(na.omit(bt_pheno$MCV)), main="MCV (fL)")
polygon(density(na.omit(bt_pheno$MCV)), col = "coral")
# removing outliers
# abline(v=55, lwd=1, lty=2)
# bt_pheno$MCV <- as.numeric(ifelse(bt_pheno$MCV < 55, "NA", bt_pheno$MCV))
# plot(density(na.omit(bt_pheno$MCV)), main="MCV (>55) outlier pruned")
# polygon(density(na.omit(bt_pheno$MCV)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MCV)
BNphen
## orderNorm Transformation with 281 nonmissing obs and ties
## - 52 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 70 89 97 104 133
bt_pheno$nMCV <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMCV)), main="Normalized MCV")
polygon(density(na.omit(bt_pheno$nMCV)), col = "lightblue")
# fit smooth spline for Age against Hb
mcv_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MCV"))])
mcv_plot <- qplot(AGE, MCV, data = mcv_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
mcv_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 70.00 89.00 97.00 96.85 104.00 133.00 12




HCT
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$HCT, main="HCT")
plot(density(na.omit(bt_pheno$HCT)), main="HCT")
polygon(density(na.omit(bt_pheno$HCT)), col = "coral")
# removing ourliers
#abline(v=43, lwd=1, lty=2)
#bt_pheno$HTC <- as.numeric(ifelse(bt_pheno$HTC > 43, "NA", bt_pheno$HTC))
#plot(density(na.omit(bt_pheno$HTC)), main="HTC (>43) outlier pruned")
#polygon(density(na.omit(bt_pheno$HTC)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$HCT)
BNphen
## orderNorm Transformation with 281 nonmissing obs and ties
## - 133 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 9.1 22.2 24.9 27.2 42.6
bt_pheno$nHCT <- BNphen$x.t
plot(density(na.omit(bt_pheno$nHCT)), main="Normalized HCT")
polygon(density(na.omit(bt_pheno$nHCT)), col = "lightblue")
# fit smooth spline for Age against Hb
htc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","HCT"))])
htc_plot <- qplot(AGE, HCT, data = htc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
htc_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 9.10 22.20 24.90 24.89 27.20 42.60 12




MCHC
#Mean Corpuscular Hemoglobin Concentration
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MCHC, main="MCHC (g/dL)")
plot(density(na.omit(bt_pheno$MCHC)), main="MCHC (g/dL)")
polygon(density(na.omit(bt_pheno$MCHC)), col = "coral")
# removing
#abline(v=20, lwd=1, lty=2)
#bt_pheno$MCHC <- as.numeric(
# ifelse(bt_pheno$MCHC < 20, "NA",
# bt_pheno$MCHC
# #ifelse(bt_pheno$MCHC < 10, "NA", bt_pheno$MCHC)
# )
#)
#plot(density(na.omit(bt_pheno$MCHC)), main="MCHC (<20) outlier pruned")
#polygon(density(na.omit(bt_pheno$MCHC)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MCHC)
BNphen
## orderNorm Transformation with 281 nonmissing obs and ties
## - 59 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 27.8 32.0 32.9 33.7 35.3
bt_pheno$nMCHC <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMCHC)), main="Normalized MCHC")
polygon(density(na.omit(bt_pheno$nMCHC)), col = "lightblue")
# fit smooth spline for Age against Hb
mchc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MCHC"))])
mchc_plot <- qplot(AGE, MCHC, data = mchc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
mchc_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 27.80 32.00 32.90 32.79 33.70 35.30 12




MCH
#The Mean Corpuscular Hemoglobin
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MCH, main="MCH (pg/cell)")
plot(density(na.omit(bt_pheno$MCH)), main="MCH (pg/cell)")
polygon(density(na.omit(bt_pheno$MCH)), col = "coral")
# removing outliers
#abline(v=10, lwd=1, lty=2)
#bt_pheno$MCH <- as.numeric(
# ifelse(bt_pheno$MCH < 10, "NA",
# bt_pheno$MCH
# #ifelse(bt_pheno$MCH < 10, "NA", bt_pheno$MCH)
# )
#)
#plot(density(na.omit(bt_pheno$MCH)), main="MCH (<10) outlier pruned")
#polygon(density(na.omit(bt_pheno$MCH)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MCH)
BNphen
## orderNorm Transformation with 281 nonmissing obs and ties
## - 122 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 21.5 28.9 31.8 34.5 42.3
bt_pheno$nMCH <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMCH)), main="Normalized MCH")
polygon(density(na.omit(bt_pheno$nMCH)), col = "lightblue")
# fit smooth spline for Age against Hb
tcmh_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MCH"))])
tcmh_plot <- qplot(AGE, MCH, data = tcmh_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
tcmh_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 21.50 28.90 31.80 31.76 34.50 42.30 12




WBC
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$WBC, main="WBC (counts/uL)")
plot(density(na.omit(bt_pheno$WBC)), main="WBC (counts/uL)")
polygon(density(na.omit(bt_pheno$WBC)), col = "coral")
# removing
#abline(v=60000, lwd=1, lty=2)
#bt_pheno$WBC <- as.numeric(
# ifelse(bt_pheno$WBC > 60000, "NA",
# bt_pheno$WBC
# #ifelse(bt_pheno$WBC < 10, "NA", bt_pheno$WBC)
# )
#)
#plot(density(na.omit(bt_pheno$WBC)), main="WBC (>60000) outlier pruned")
#polygon(density(na.omit(bt_pheno$WBC)), col = "coral")
BNphen <- bestNormalize(bt_pheno$WBC)
BNphen
## Best Normalizing transformation with 281 Observations
## Estimated Normality Statistics (Pearson P / df, lower => more normal):
## - arcsinh(x): 1.3158
## - Box-Cox: 1.2405
## - Center+scale: 1.4121
## - Double Reversed Log_b(x+a): 2.6596
## - Exp(x): 31.6407
## - Log_b(x+a): 1.3112
## - orderNorm (ORQ): 1.3273
## - sqrt(x + a): 1.2894
## - Yeo-Johnson: 1.2382
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##
## Based off these, bestNormalize chose:
## Standardized Yeo-Johnson Transformation with 281 nonmissing obs.:
## Estimated statistics:
## - lambda = -0.4513376
## - mean (before standardization) = 1.518601
## - sd (before standardization) = 0.09695185
bt_pheno$nWBC <- BNphen$x.t
plot(density(na.omit(bt_pheno$nWBC)), main="Normalized WBC")
polygon(density(na.omit(bt_pheno$nWBC)), col = "lightblue")
# fit smooth spline for Age against Hb
wbc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","WBC"))])
wbc_plot <- qplot(AGE, WBC, data = wbc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
wbc_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 4.10 10.30 12.30 12.84 14.60 28.10 12




NEUTROPHIL
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$NEUTROPHIL, main="NEUTROPHIL (10^3/uL)")
plot(density(na.omit(bt_pheno$NEUTROPHIL)), main="NEUTROPHIL (10^3/uL)")
polygon(density(na.omit(bt_pheno$NEUTROPHIL)), col = "coral")
# removing
#abline(v=20, lwd=1, lty=2)
#bt_pheno$NEUTROPHIL <- as.numeric(
# ifelse(bt_pheno$NEUTROPHIL > 20, "NA",
# bt_pheno$NEUTROPHIL
# #ifelse(bt_pheno$NEUTROPHIL < 10, "NA", bt_pheno$NEUTROPHIL)
# )
#)
#plot(density(na.omit(bt_pheno$NEUTROPHILE)), main="NEUTROPHIL (>20) outlier pruned")
#polygon(density(na.omit(bt_pheno$NEUTROPHILE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$NEUTROPHIL)
BNphen
## orderNorm Transformation with 287 nonmissing obs and ties
## - 229 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.93 3.94 5.34 6.90 16.42
bt_pheno$nNEUTROPHIL <- BNphen$x.t
plot(density(na.omit(bt_pheno$nNEUTROPHIL)), main="Normalized NEUTROPHIL")
polygon(density(na.omit(bt_pheno$nNEUTROPHIL)), col = "lightblue")
# fit smooth spline for Age against NEUTROPHILE
neu_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","NEUTROPHIL"))])
neu_plot <- qplot(AGE, NEUTROPHIL, data = neu_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
neu_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
summary(bt_pheno$NEUTROPHIL)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.930 3.940 5.340 5.702 6.900 16.420 6




NEUTROPHIL_PERCENT
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$NEUTROPHIL_PERCENT, main="NEUTROPHIL_PERCENT (10^3/uL)")
plot(density(na.omit(bt_pheno$NEUTROPHIL_PERCENT)), main="NEUTROPHIL_PERCENT (10^3/uL)")
polygon(density(na.omit(bt_pheno$NEUTROPHIL_PERCENT)), col = "coral")
# removing
#abline(v=20, lwd=1, lty=2)
#bt_pheno$NEUTROPHIL_PERCENT <- as.numeric(
# ifelse(bt_pheno$NEUTROPHIL_PERCENT > 20, "NA",
# bt_pheno$NEUTROPHIL_PERCENT
# #ifelse(bt_pheno$NEUTROPHIL_PERCENT < 10, "NA", bt_pheno$NEUTROPHIL_PERCENT)
# )
#)
#plot(density(na.omit(bt_pheno$NEUTROPHIL_PERCENTE)), main="NEUTROPHIL_PERCENT (>20) outlier pruned")
#polygon(density(na.omit(bt_pheno$NEUTROPHIL_PERCENTE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$NEUTROPHIL_PERCENT)
BNphen
## orderNorm Transformation with 287 nonmissing obs and ties
## - 205 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 16.20 36.70 43.70 51.95 89.00
bt_pheno$nNEUTROPHIL_PERCENT <- BNphen$x.t
plot(density(na.omit(bt_pheno$nNEUTROPHIL_PERCENT)), main="Normalized NEUTROPHIL_PERCENT")
polygon(density(na.omit(bt_pheno$nNEUTROPHIL_PERCENT)), col = "lightblue")
# fit smooth spline for Age against NEUTROPHIL_PERCENTE
neup_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","NEUTROPHIL_PERCENT"))])
neup_plot <- qplot(AGE, NEUTROPHIL_PERCENT, data = neup_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
neup_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
summary(bt_pheno$NEUTROPHIL_PERCENT)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 16.20 36.70 43.70 44.14 51.95 89.00 6




LYMPHOCYTE
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$LYMPHOCYTE, main="LYMPHOCYTE (10^3/uL)")
plot(density(na.omit(bt_pheno$LYMPHOCYTE)), main="LYMPHOCYTE (10^3/uL)")
polygon(density(na.omit(bt_pheno$LYMPHOCYTE)), col = "coral")
# removing
#abline(v=20, lwd=1, lty=2)
#bt_pheno$LYMPHOCYTE <- as.numeric(
# ifelse(bt_pheno$LYMPHOCYTE > 20, "NA",
# bt_pheno$LYMPHOCYTE
# #ifelse(bt_pheno$LYMPHOCYTE < 10, "NA", bt_pheno$LYMPHOCYTE)
# )
#)
#plot(density(na.omit(bt_pheno$LYMPHOCYTEE)), main="LYMPHOCYTE (>20) outlier pruned")
#polygon(density(na.omit(bt_pheno$LYMPHOCYTEE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$LYMPHOCYTE)
BNphen
## orderNorm Transformation with 287 nonmissing obs and ties
## - 238 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 1.040 3.985 5.140 6.365 14.490
bt_pheno$nLYMPHOCYTE <- BNphen$x.t
plot(density(na.omit(bt_pheno$nLYMPHOCYTE)), main="Normalized LYMPHOCYTE")
polygon(density(na.omit(bt_pheno$nLYMPHOCYTE)), col = "lightblue")
# fit smooth spline for Age against LYMPHOCYTEE
lym_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","LYMPHOCYTE"))])
lym_plot <- qplot(AGE, LYMPHOCYTE, data = lym_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
lym_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
summary(bt_pheno$LYMPHOCYTE)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.040 3.985 5.140 5.415 6.365 14.490 6




LYMPHOCYTE_PERCENT
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$LYMPHOCYTE_PERCENT, main="LYMPHOCYTE_PERCENT (10^3/uL)")
plot(density(na.omit(bt_pheno$LYMPHOCYTE_PERCENT)), main="LYMPHOCYTE_PERCENT (10^3/uL)")
polygon(density(na.omit(bt_pheno$LYMPHOCYTE_PERCENT)), col = "coral")
# removing
#abline(v=20, lwd=1, lty=2)
#bt_pheno$LYMPHOCYTE_PERCENT <- as.numeric(
# ifelse(bt_pheno$LYMPHOCYTE_PERCENT > 20, "NA",
# bt_pheno$LYMPHOCYTE_PERCENT
# #ifelse(bt_pheno$LYMPHOCYTE_PERCENT < 10, "NA", bt_pheno$LYMPHOCYTE_PERCENT)
# )
#)
#plot(density(na.omit(bt_pheno$LYMPHOCYTE_PERCENTE)), main="LYMPHOCYTE_PERCENT (>20) outlier pruned")
#polygon(density(na.omit(bt_pheno$LYMPHOCYTE_PERCENTE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$LYMPHOCYTE_PERCENT)
BNphen
## orderNorm Transformation with 287 nonmissing obs and ties
## - 214 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 6.60 34.95 42.50 49.10 75.60
bt_pheno$nLYMPHOCYTE_PERCENT <- BNphen$x.t
plot(density(na.omit(bt_pheno$nLYMPHOCYTE_PERCENT)), main="Normalized LYMPHOCYTE_PERCENT")
polygon(density(na.omit(bt_pheno$nLYMPHOCYTE_PERCENT)), col = "lightblue")
# fit smooth spline for Age against LYMPHOCYTE_PERCENTE
lymp_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","LYMPHOCYTE_PERCENT"))])
lymp_plot <- qplot(AGE, LYMPHOCYTE_PERCENT, data = lymp_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
lymp_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
summary(bt_pheno$LYMPHOCYTE_PERCENT)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 6.60 34.95 42.50 42.54 49.10 75.60 6




MONOCYTE
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MONOCYTE, main="MONOCYTE (10^3/uL)")
plot(density(na.omit(bt_pheno$MONOCYTE)), main="MONOCYTE (10^3/uL)")
polygon(density(na.omit(bt_pheno$MONOCYTE)), col = "coral")
# removing
#abline(v=20, lwd=1, lty=2)
#bt_pheno$MONOCYTE <- as.numeric(
# ifelse(bt_pheno$MONOCYTE > 20, "NA",
# bt_pheno$MONOCYTE
# #ifelse(bt_pheno$MONOCYTE < 10, "NA", bt_pheno$MONOCYTE)
# )
#)
#plot(density(na.omit(bt_pheno$MONOCYTEE)), main="MONOCYTE (>20) outlier pruned")
#polygon(density(na.omit(bt_pheno$MONOCYTEE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MONOCYTE)
BNphen
## orderNorm Transformation with 287 nonmissing obs and ties
## - 152 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.180 0.905 1.170 1.485 4.260
bt_pheno$nMONOCYTE <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMONOCYTE)), main="Normalized MONOCYTE")
polygon(density(na.omit(bt_pheno$nMONOCYTE)), col = "lightblue")
# fit smooth spline for Age against MONOCYTEE
mon_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MONOCYTE"))])
mon_plot <- qplot(AGE, MONOCYTE, data = mon_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
mon_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
summary(bt_pheno$MONOCYTE)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.180 0.905 1.170 1.244 1.485 4.260 6




MONOCYTE_PERCENT
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MONOCYTE_PERCENT, main="MONOCYTE_PERCENT (10^3/uL)")
plot(density(na.omit(bt_pheno$MONOCYTE_PERCENT)), main="MONOCYTE_PERCENT (10^3/uL)")
polygon(density(na.omit(bt_pheno$MONOCYTE_PERCENT)), col = "coral")
# removing
#abline(v=20, lwd=1, lty=2)
#bt_pheno$MONOCYTE_PERCENT <- as.numeric(
# ifelse(bt_pheno$MONOCYTE_PERCENT > 20, "NA",
# bt_pheno$MONOCYTE_PERCENT
# #ifelse(bt_pheno$MONOCYTE_PERCENT < 10, "NA", bt_pheno$MONOCYTE_PERCENT)
# )
#)
#plot(density(na.omit(bt_pheno$MONOCYTE_PERCENTE)), main="MONOCYTE_PERCENT (>20) outlier pruned")
#polygon(density(na.omit(bt_pheno$MONOCYTE_PERCENTE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MONOCYTE_PERCENT)
BNphen
## orderNorm Transformation with 287 nonmissing obs and ties
## - 104 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 2.5 7.5 9.2 11.5 18.6
bt_pheno$nMONOCYTE_PERCENT <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMONOCYTE_PERCENT)), main="Normalized MONOCYTE_PERCENT")
polygon(density(na.omit(bt_pheno$nMONOCYTE_PERCENT)), col = "lightblue")
# fit smooth spline for Age against MONOCYTE_PERCENTE
monp_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MONOCYTE_PERCENT"))])
monp_plot <- qplot(AGE, MONOCYTE_PERCENT, data = monp_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
monp_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
summary(bt_pheno$MONOCYTE_PERCENT)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 2.500 7.500 9.200 9.587 11.500 18.600 6




BASOPHIL
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$BASOPHIL, main="BASOPHIL (10^3/uL)")
plot(density(na.omit(bt_pheno$BASOPHIL)), main="BASOPHIL (10^3/uL)")
polygon(density(na.omit(bt_pheno$BASOPHIL)), col = "coral")
# removing
#abline(v=20, lwd=1, lty=2)
#bt_pheno$BASOPHIL <- as.numeric(
# ifelse(bt_pheno$BASOPHIL > 20, "NA",
# bt_pheno$BASOPHIL
# #ifelse(bt_pheno$BASOPHIL < 10, "NA", bt_pheno$BASOPHIL)
# )
#)
#plot(density(na.omit(bt_pheno$BASOPHILE)), main="BASOPHIL (>20) outlier pruned")
#polygon(density(na.omit(bt_pheno$BASOPHILE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$BASOPHIL)
BNphen
## orderNorm Transformation with 287 nonmissing obs and ties
## - 18 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.00 0.03 0.05 0.07 0.19
bt_pheno$nBASOPHIL <- BNphen$x.t
plot(density(na.omit(bt_pheno$nBASOPHIL)), main="Normalized BASOPHIL")
polygon(density(na.omit(bt_pheno$nBASOPHIL)), col = "lightblue")
# fit smooth spline for Age against BASOPHILE
baso_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","BASOPHIL"))])
baso_plot <- qplot(AGE, BASOPHIL, data = baso_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
baso_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
summary(bt_pheno$BASOPHIL)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00000 0.03000 0.05000 0.05265 0.07000 0.19000 6




BASOPHIL_PERCENT
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$BASOPHIL_PERCENT, main="BASOPHIL_PERCENT (10^3/uL)")
plot(density(na.omit(bt_pheno$BASOPHIL_PERCENT)), main="BASOPHIL_PERCENT (10^3/uL)")
polygon(density(na.omit(bt_pheno$BASOPHIL_PERCENT)), col = "coral")
# removing
#abline(v=20, lwd=1, lty=2)
#bt_pheno$BASOPHIL_PERCENT <- as.numeric(
# ifelse(bt_pheno$BASOPHIL_PERCENT > 20, "NA",
# bt_pheno$BASOPHIL_PERCENT
# #ifelse(bt_pheno$BASOPHIL_PERCENT < 10, "NA", bt_pheno$BASOPHIL_PERCENT)
# )
#)
#plot(density(na.omit(bt_pheno$BASOPHIL_PERCENTE)), main="BASOPHIL_PERCENT (>20) outlier pruned")
#polygon(density(na.omit(bt_pheno$BASOPHIL_PERCENTE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$BASOPHIL_PERCENT)
BNphen
## orderNorm Transformation with 287 nonmissing obs and ties
## - 12 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.0 0.3 0.4 0.5 1.2
bt_pheno$nBASOPHIL_PERCENT <- BNphen$x.t
plot(density(na.omit(bt_pheno$nBASOPHIL_PERCENT)), main="Normalized BASOPHIL_PERCENT")
polygon(density(na.omit(bt_pheno$nBASOPHIL_PERCENT)), col = "lightblue")
# fit smooth spline for Age against BASOPHIL_PERCENTE
basop_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","BASOPHIL_PERCENT"))])
basop_plot <- qplot(AGE, BASOPHIL_PERCENT, data = basop_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
basop_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
summary(bt_pheno$BASOPHIL_PERCENT)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0000 0.3000 0.4000 0.4098 0.5000 1.2000 6




EOSINOPHIL
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$EOSINOPHIL, main="EOSINOPHIL (10^3/uL)")
plot(density(na.omit(bt_pheno$EOSINOPHIL)), main="EOSINOPHIL (10^3/uL)")
polygon(density(na.omit(bt_pheno$EOSINOPHIL)), col = "coral")
# removing
abline(v=3, lwd=1, lty=2)
bt_pheno$EOSINOPHIL <- as.numeric(
ifelse(bt_pheno$EOSINOPHIL > 3, "NA",
bt_pheno$EOSINOPHIL
#ifelse(bt_pheno$EOSINOPHIL < 10, "NA", bt_pheno$EOSINOPHIL)
)
)
plot(density(na.omit(bt_pheno$EOSINOPHIL)), main="EOSINOPHIL (>3) outlier pruned")
polygon(density(na.omit(bt_pheno$EOSINOPHIL)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$EOSINOPHIL)
BNphen
## orderNorm Transformation with 282 nonmissing obs and ties
## - 100 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.02 0.13 0.26 0.52 1.95
bt_pheno$nEOSINOPHIL <- BNphen$x.t
plot(density(na.omit(bt_pheno$nEOSINOPHIL)), main="Normalized EOSINOPHIL")
polygon(density(na.omit(bt_pheno$nEOSINOPHIL)), col = "lightblue")
# fit smooth spline for Age against EOSINOPHILE
eos_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","EOSINOPHIL"))])
eos_plot <- qplot(AGE, EOSINOPHIL, data = eos_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
eos_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
summary(bt_pheno$EOSINOPHIL)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0200 0.1300 0.2600 0.3852 0.5200 1.9500 11





EOSINOPHIL_PERCENT
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$EOSINOPHIL_PERCENT, main="EOSINOPHIL_PERCENT (10^3/uL)")
plot(density(na.omit(bt_pheno$EOSINOPHIL_PERCENT)), main="EOSINOPHIL_PERCENT (10^3/uL)")
polygon(density(na.omit(bt_pheno$EOSINOPHIL_PERCENT)), col = "coral")
# removing
abline(v=25, lwd=1, lty=2)
bt_pheno$EOSINOPHIL_PERCENT <- as.numeric(
ifelse(bt_pheno$EOSINOPHIL_PERCENT > 25, "NA",
bt_pheno$EOSINOPHIL_PERCENT
#ifelse(bt_pheno$EOSINOPHIL_PERCENT < 10, "NA", bt_pheno$EOSINOPHIL_PERCENT)
)
)
plot(density(na.omit(bt_pheno$EOSINOPHIL_PERCENT)), main="EOSINOPHIL_PERCENT (>25) outlier pruned")
polygon(density(na.omit(bt_pheno$EOSINOPHIL_PERCENT)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$EOSINOPHIL_PERCENT)
BNphen
## orderNorm Transformation with 285 nonmissing obs and ties
## - 80 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.2 1.1 2.2 3.8 18.9
bt_pheno$nEOSINOPHIL_PERCENT <- BNphen$x.t
plot(density(na.omit(bt_pheno$nEOSINOPHIL_PERCENT)), main="Normalized EOSINOPHIL_PERCENT")
polygon(density(na.omit(bt_pheno$nEOSINOPHIL_PERCENT)), col = "lightblue")
# fit smooth spline for Age against EOSINOPHIL_PERCENTE
eosp_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","EOSINOPHIL_PERCENT"))])
eosp_plot <- qplot(AGE, EOSINOPHIL_PERCENT, data = eosp_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
eosp_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
summary(bt_pheno$EOSINOPHIL_PERCENT)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.200 1.100 2.200 3.119 3.800 18.900 8





PLATELET
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$PLATELET, main="PLATELET (K/cu mm)")
plot(density(na.omit(bt_pheno$PLATELET)), main="PLATELET (K/cu mm)")
polygon(density(na.omit(bt_pheno$PLATELET)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$PLATELET)
BNphen
## orderNorm Transformation with 281 nonmissing obs and ties
## - 220 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 44 333 440 548 1186
bt_pheno$nPLATELET <- BNphen$x.t
plot(density(na.omit(bt_pheno$nPLATELET)), main="Normalized PLATELET")
polygon(density(na.omit(bt_pheno$nPLATELET)), col = "lightblue")
# fit smooth spline for Age against PLATELET
pla_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","PLATELET"))])
pla_plot <- qplot(AGE, PLATELET, data = pla_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
pla_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
summary(bt_pheno$PLATELET)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 44.0 333.0 440.0 456.9 548.0 1186.0 12




MPV
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MPV, main="MPV")
plot(density(na.omit(bt_pheno$MPV)), main="MPV")
polygon(density(na.omit(bt_pheno$MPV)), col = "coral")
# removing
#abline(v=20, lwd=1, lty=2)
#bt_pheno$MPV <- as.numeric(
# ifelse(bt_pheno$MPV > 20, "NA",
# bt_pheno$MPV
# #ifelse(bt_pheno$MPV < 10, "NA", bt_pheno$MPV)
# )
#)
#plot(density(na.omit(bt_pheno$MPV)), main="MPV (>20) outlier pruned")
#polygon(density(na.omit(bt_pheno$MPV)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MPV)
BNphen
## orderNorm Transformation with 281 nonmissing obs and ties
## - 53 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 6.9 8.2 8.8 9.6 14.0
bt_pheno$nMPV <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMPV)), main="Normalized MPV")
polygon(density(na.omit(bt_pheno$nMPV)), col = "lightblue")
# fit smooth spline for Age against PLATELET
pla_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MPV"))])
pla_plot <- qplot(AGE, MPV, data = pla_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
pla_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 6.900 8.200 8.800 9.012 9.600 14.000 12




SBP
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$SBP, main="SBP")
plot(density(na.omit(bt_pheno$SBP)), main="SBP")
polygon(density(na.omit(bt_pheno$SBP)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$SBP)
BNphen
## orderNorm Transformation with 239 nonmissing obs and ties
## - 49 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 70 90 98 110 140
bt_pheno$nSBP <- BNphen$x.t
plot(density(na.omit(bt_pheno$nSBP)), main="Normalized SBP")
polygon(density(na.omit(bt_pheno$nSBP)), col = "lightblue")
# fit smooth spline for Age against RETICS
sbp_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","SBP"))])
sbp_plot <- qplot(AGE, SBP, data = sbp_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
sbp_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 70 90 98 100 110 140 54




DBP
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$DBP, main="DBP")
plot(density(na.omit(bt_pheno$DBP)), main="DBP")
polygon(density(na.omit(bt_pheno$DBP)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$DBP)
BNphen
## orderNorm Transformation with 239 nonmissing obs and ties
## - 47 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 30 52 60 68 96
bt_pheno$nDBP <- BNphen$x.t
plot(density(na.omit(bt_pheno$nDBP)), main="Normalized DBP")
polygon(density(na.omit(bt_pheno$nDBP)), col = "lightblue")
# fit smooth spline for Age against DBP
dbp_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","DBP"))])
dbp_plot <- qplot(AGE, DBP, data = dbp_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
dbp_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 30.00 52.00 60.00 60.38 68.00 96.00 54




BMI
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$BMI, main="BMI")
plot(density(na.omit(bt_pheno$BMI)), main="BMI")
polygon(density(na.omit(bt_pheno$BMI)), col = "coral")
# removing outliers
abline(v=40, lwd=1, lty=2)
bt_pheno$BMI <- as.numeric(
ifelse(bt_pheno$BMI > 40, "NA",
bt_pheno$BMI
#ifelse(bt_pheno$BMI < 10, "NA", bt_pheno$BMI)
)
)
plot(density(na.omit(bt_pheno$BMI)), main="BMI (>40) outlier pruned")
polygon(density(na.omit(bt_pheno$BMI)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$BMI)
BNphen
## orderNorm Transformation with 281 nonmissing obs and ties
## - 276 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 8.889 14.284 15.503 16.667 31.267
bt_pheno$nBMI <- BNphen$x.t
plot(density(na.omit(bt_pheno$nBMI)), main="Normalized BMI")
polygon(density(na.omit(bt_pheno$nBMI)), col = "lightblue")
# fit smooth spline for Age against BMI
bmi_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","BMI"))])
bmi_plot <- qplot(AGE, BMI, data = bmi_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
bmi_plot
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 30.00 52.00 60.00 60.38 68.00 96.00 54




