## Loading required package: tidyverse
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr 1.1.4 v readr 2.1.5
## v forcats 1.0.0 v stringr 1.5.1
## v ggplot2 4.0.1 v tibble 3.3.0
## v lubridate 1.9.3 v tidyr 1.3.1
## v purrr 1.0.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: bestNormalize
##
## Registered S3 method overwritten by 'butcher':
## method from
## as.character.dev_topic generics
## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
sheets <- c("gha-metadata", "hbss", "hbsc", "hbss-sc")
pheno <- readxl::read_xlsx(params$data, sheet = "hbss-sc")
#names(pheno)
write.table(
pheno,
sub(".xlsx", ".tsv", params$data),
col.names = TRUE,
row.names = FALSE,
sep="\t",
quote = FALSE
)
str(pheno)## tibble [2,108 x 25] (S3: tbl_df/tbl/data.frame)
## $ FID : chr [1:2108] "SGA-ACC-01-0004" "SGA-ACC-01-0006" "SGA-ACC-01-0009" "SGA-ACC-01-0011" ...
## $ IID : chr [1:2108] "SGA-ACC-01-0004" "SGA-ACC-01-0006" "SGA-ACC-01-0009" "SGA-ACC-01-0011" ...
## $ AGE : chr [1:2108] "12" "9" "9" "6" ...
## $ GENDER : chr [1:2108] "Female" "Female" "Female" "Male" ...
## $ SEX : chr [1:2108] "2" "2" "2" "1" ...
## $ HEIGHT : chr [1:2108] "1.36" "1.54" "1.27" "1.32" ...
## $ WEIGHT : chr [1:2108] "28.5" "32" "24" "38" ...
## $ BMI : chr [1:2108] "15.4087370242215" "13.4930005059875" "14.8800297600595" "21.8089990817264" ...
## $ STATUS : chr [1:2108] "SS" "SC" "SS" "SS" ...
## $ RBC : chr [1:2108] "2.71" "4.33" "3" "2.85" ...
## $ Hb : num [1:2108] 8.9 11 9.2 7.4 7.4 10.8 10.7 6 11.1 10.6 ...
## $ HCT : num [1:2108] 25.8 33 27.4 22.2 22 31.5 31.9 17.6 31 30 ...
## $ MCV : chr [1:2108] "95.3" "76.1" "91.4" "78" ...
## $ MCH : num [1:2108] 32.8 25.4 30.7 25.9 36.5 29.1 24.7 28.3 25.9 34 ...
## $ MCHC : chr [1:2108] "34.5" "33.4" "33.6" "33.2" ...
## $ RDW : chr [1:2108] "18.1" "14.1" "14.6" "20.1" ...
## $ WBC : chr [1:2108] "8.65" "6.38" "9.15" "16.08" ...
## $ LYMPHOCYTE_PERCENT : chr [1:2108] "51.3" "59.9" "45" "NA" ...
## $ MONOCYTE_PERCENT : chr [1:2108] "6.2" "4.5" "9.7" "2.8" ...
## $ GRANULOCYTE_PERCENT: chr [1:2108] "NA" "NA" "NA" "NA" ...
## $ LYMPHOCYTE : chr [1:2108] "4.44" "3.82" "4.11" "NA" ...
## $ MONOCYTE : chr [1:2108] "0.54" "0.29" "0.89" "0.45" ...
## $ GRANULOCYTE : chr [1:2108] "NA" "NA" "NA" "NA" ...
## $ PLATELET : chr [1:2108] "413" "411" "323" "673" ...
## $ MPV : chr [1:2108] "8.6" "7.9" "7.7" "7.6" ...
pheno <- read.table( sub(".xlsx", ".tsv", params$data), sep="\t", header = TRUE)
keep_colmns <- c(
"FID","IID","AGE","GENDER","SEX","HEIGHT","WEIGHT","BMI",
"STATUS","RBC","Hb","HCT","MCV","MCH","MCHC","RDW",
"WBC","LYMPHOCYTE_PERCENT","MONOCYTE_PERCENT",
"GRANULOCYTE_PERCENT","LYMPHOCYTE","MONOCYTE",
"GRANULOCYTE","PLATELET","MPV"
)
bt_pheno <- pheno[, which(names(pheno) %in% keep_colmns)]
colmns_to_update <- c(
"AGE","SEX","HEIGHT","WEIGHT","BMI",
"RBC","Hb","HCT","MCV","MCH","MCHC","RDW",
"WBC","LYMPHOCYTE_PERCENT","MONOCYTE_PERCENT",
"GRANULOCYTE_PERCENT","LYMPHOCYTE","MONOCYTE",
"GRANULOCYTE","PLATELET","MPV"
)
set_var_type <- function(x) {
as.numeric(x)
}
bt_pheno[, c(colmns_to_update)] <- lapply(bt_pheno[, c(colmns_to_update)] , set_var_type)
#bt_pheno$BMI <- as.numeric((bt_pheno$WEIGHT)/(bt_pheno$HEIGHT)^2)
str(bt_pheno)## 'data.frame': 2108 obs. of 25 variables:
## $ FID : chr "SGA-ACC-01-0004" "SGA-ACC-01-0006" "SGA-ACC-01-0009" "SGA-ACC-01-0011" ...
## $ IID : chr "SGA-ACC-01-0004" "SGA-ACC-01-0006" "SGA-ACC-01-0009" "SGA-ACC-01-0011" ...
## $ AGE : num 12 9 9 6 13 7 12 7 12 10 ...
## $ GENDER : chr "Female" "Female" "Female" "Male" ...
## $ SEX : num 2 2 2 1 2 1 1 2 1 2 ...
## $ HEIGHT : num 1.36 1.54 1.27 1.32 1.4 ...
## $ WEIGHT : num 28.5 32 24 38 31 21.5 37 23 31.5 28 ...
## $ BMI : num 15.4 13.5 14.9 21.8 15.8 ...
## $ STATUS : chr "SS" "SC" "SS" "SS" ...
## $ RBC : num 2.71 4.33 3 2.85 2.43 3.71 4.02 2.11 4.27 3.13 ...
## $ Hb : num 8.9 11 9.2 7.4 7.4 10.8 10.7 6 11.1 10.6 ...
## $ HCT : num 25.8 33 27.4 22.2 22 31.5 31.9 17.6 31 30 ...
## $ MCV : num 95.3 76.1 91.4 78 90.5 51.9 79.3 83.1 72.4 96 ...
## $ MCH : num 32.8 25.4 30.7 25.9 36.5 29.1 24.7 28.3 25.9 34 ...
## $ MCHC : num 34.5 33.4 33.6 33.2 33.7 NA 33.7 34 35.8 35.4 ...
## $ RDW : num 18.1 14.1 14.6 20.1 19.9 NA 14.2 29.9 17.2 14.6 ...
## $ WBC : num 8.65 6.38 9.15 16.08 8.57 ...
## $ LYMPHOCYTE_PERCENT : num 51.3 59.9 45 NA 44.3 NA 15.7 56.4 43.7 51.2 ...
## $ MONOCYTE_PERCENT : num 6.2 4.5 9.7 2.8 1.2 NA NA 7 6 7.3 ...
## $ GRANULOCYTE_PERCENT: num NA NA NA NA NA NA NA NA NA NA ...
## $ LYMPHOCYTE : num 4.44 3.82 4.11 NA 3.78 NA 3.31 9.46 3.06 3.86 ...
## $ MONOCYTE : num 0.54 0.29 0.89 0.45 0.82 NA 0.4 1.17 0.42 0.54 ...
## $ GRANULOCYTE : num NA NA NA NA NA NA NA NA NA NA ...
## $ PLATELET : num 413 411 323 673 347 NA 387 355 274 374 ...
## $ MPV : num 8.6 7.9 7.7 7.6 9.7 NA 10.2 8.7 7.9 7.2 ...
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 6.00 12.00 16.59 24.00 73.00 6
hist(bt_pheno$AGE, main="AGE destribution", xlab="AGE")
#plot(density(na.omit(bt_pheno$AGE)))
#polygon(density(na.omit(bt_pheno$AGE)), col = "coral")
#abline(v=c(min(bt_pheno$AGE), max(bt_pheno$AGE)), lty=2, lwd=1)bt_pheno %>%
select(SEX) %>%
table() %>%
as.data.frame() %>%
na.omit() %>%
mutate(Prop = (Freq/sum(Freq))*100) -> sex # Sex
sexcolnames(sex) <- c("cat", "freq", "prop")
par(mar=c(4,4,3,4))
barplot(
sex$prop,
ylim=c(0, 60),
col=c(2,4)
)
text(
x=c(0.7,1.9),
y=30,
labels=c("Female", "Male")
)
mtext(
text = "Proportion (%)",
side = 2,
line = 3
)
mtext(
text = "Sex",
side = 1,
line = 1
)Definition of outliers removed:
# figures-side,
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$RBC, main="RBC (million/µL)")
plot(density(na.omit(bt_pheno$RBC)), main="RBC (million/µL)")
polygon(density(na.omit(bt_pheno$RBC)), col = "coral")
# removing outliers
abline(v=40, lwd=1, lty=2)
bt_pheno$RBC <- as.numeric(ifelse(bt_pheno$RBC > 40, "NA", bt_pheno$RBC))
plot(density(na.omit(bt_pheno$RBC)), main="RBC (>40) outlier pruned")
polygon(density(na.omit(bt_pheno$RBC)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$RBC)## Warning in bestNormalize::orderNorm(bt_pheno$RBC): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 2106 nonmissing obs and ties
## - 383 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.31 2.56 3.10 3.94 5.95
bt_pheno$nRBC <- BNphen$x.t
plot(density(na.omit(bt_pheno$nRBC)), main="Normalized RBC")
polygon(density(na.omit(bt_pheno$nRBC)), col = "lightblue")
# fit smooth spline for Age against RBC
rbc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","RBC"))])
rbc_plot <- qplot(AGE, RBC, data = rbc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.310 2.560 3.100 3.266 3.940 5.950 2
# figures-side,
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$RDW, main="RDW (%)")
plot(density(na.omit(bt_pheno$RDW)), main="RDW (%)")
polygon(density(na.omit(bt_pheno$RDW)), col = "coral")
# removing outliers
#abline(v=10, lwd=1, lty=2)
#bt_pheno$RDW <- as.numeric(ifelse(bt_pheno$RDW > 10, "NA", bt_pheno$RDW))
#plot(density(na.omit(bt_pheno$RDW)), main="RDW (>10) outlier pruned")
#polygon(density(na.omit(bt_pheno$RDW)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$RDW)## Warning in bestNormalize::orderNorm(bt_pheno$RDW): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 2016 nonmissing obs and ties
## - 191 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 1.7 15.9 18.1 21.0 312.0
bt_pheno$nRDW <- BNphen$x.t
plot(density(na.omit(bt_pheno$nRDW)), main="Normalized RDW")
polygon(density(na.omit(bt_pheno$nRDW)), col = "lightblue")
# fit smooth spline for Age against RBC
rdw_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","RDW"))])
rdw_plot <- qplot(AGE, RDW, data = rdw_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
rdw_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.70 15.90 18.10 18.91 21.00 312.00 92
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$Hb, main="Hb (g/dL)", xlab="Hb")
plot(density(na.omit(bt_pheno$Hb)), main="Hb (g/dL)")
polygon(density(na.omit(bt_pheno$Hb)), col = "coral")
# removing outliers
#abline(v=13, lwd=1, lty=2)
#bt_pheno$Hb <- as.numeric(ifelse(bt_pheno$Hb > 13, "NA", bt_pheno$Hb))
#plot(density(na.omit(bt_pheno$Hb)), main="Hb (>13) outlier pruned")
#polygon(density(na.omit(bt_pheno$Hb)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$Hb)## Warning in bestNormalize::orderNorm(bt_pheno$Hb): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 2108 nonmissing obs and ties
## - 101 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.1 7.4 8.5 10.1 15.6
bt_pheno$nHb <- BNphen$x.t
plot(density(na.omit(bt_pheno$nHb)), main="Normalized Hb")
polygon(density(na.omit(bt_pheno$nHb)), col = "lightblue")
# fit smooth spline for Age against Hb
hb_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","Hb"))])
hb_plot <- qplot(AGE, Hb, data = hb_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
hb_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.10 7.40 8.50 8.75 10.10 15.60
par(mar = c(4, 4, 2, 2))
hist(pheno$MCV, main="MCV (fL)")
plot(density(na.omit(bt_pheno$MCV)), main="MCV (fL)")
polygon(density(na.omit(bt_pheno$MCV)), col = "coral")
# removing outliers
#abline(v=10, lwd=1, lty=2)
#bt_pheno$MCV <- as.numeric(ifelse(bt_pheno$MCV < 10, "NA", bt_pheno$MCV))
#plot(density(na.omit(bt_pheno$MCV)), main="MCV (<10) outlier pruned")
#polygon(density(na.omit(bt_pheno$MCV)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MCV)## Warning in bestNormalize::orderNorm(bt_pheno$MCV): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 2107 nonmissing obs and ties
## - 462 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 43.0 71.0 80.0 88.0 124.7
bt_pheno$nMCV <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMCV)), main="Normalized (orderNorm) MCV")
polygon(density(na.omit(bt_pheno$nMCV)), col = "lightblue")
# fit smooth spline for Age against Hb
mcv_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MCV"))])
mcv_plot <- qplot(AGE, MCV, data = mcv_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
mcv_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 43.00 71.00 80.00 79.62 88.00 124.70 1
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$HCT, main="HCT (%)")
plot(density(na.omit(bt_pheno$HCT)), main="HCT (%)")
polygon(density(na.omit(bt_pheno$HCT)), col = "coral")
# removing ourliers
#abline(v=43, lwd=1, lty=2)
#bt_pheno$HCT <- as.numeric(ifelse(bt_pheno$HCT > 43, "NA", bt_pheno$HCT))
#plot(density(na.omit(bt_pheno$HCT)), main="HCT (>43) outlier pruned")
#polygon(density(na.omit(bt_pheno$HCT)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$HCT)## Warning in bestNormalize::orderNorm(bt_pheno$HCT): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 2108 nonmissing obs and ties
## - 284 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 2.9 21.1 24.7 28.9 70.0
bt_pheno$nHCT <- BNphen$x.t
plot(density(na.omit(bt_pheno$nHCT)), main="Normalized HCT")
polygon(density(na.omit(bt_pheno$nHCT)), col = "lightblue")
# fit smooth spline for Age against Hb
htc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","HCT"))])
htc_plot <- qplot(AGE, HCT, data = htc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
htc_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.90 21.10 24.70 25.46 28.90 70.00
#Mean Corpuscular Hemoglobin Concentration
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MCHC, main="MCHC (g/dL)")
plot(density(na.omit(bt_pheno$MCHC)), main="MCHC (g/dL)")
polygon(density(na.omit(bt_pheno$MCHC)), col = "coral")
# removing
abline(v=c(10, 60), lwd=1, lty=2)
bt_pheno$MCHC <- as.numeric(
ifelse(bt_pheno$MCHC > 60, "NA",
#bt_pheno$MCHC
ifelse(bt_pheno$MCHC < 10, "NA", bt_pheno$MCHC)
)
)## Warning: NAs introduced by coercion
plot(density(na.omit(bt_pheno$MCHC)), main="MCHC (>60 and <10) outlier pruned")
polygon(density(na.omit(bt_pheno$MCHC)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MCHC)## Warning in bestNormalize::orderNorm(bt_pheno$MCHC): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 2100 nonmissing obs and ties
## - 182 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 18.3 32.2 34.5 36.6 51.4
bt_pheno$nMCHC <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMCHC)), main="Normalized MCHC")
polygon(density(na.omit(bt_pheno$nMCHC)), col = "lightblue")
# fit smooth spline for Age against Hb
mchc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MCHC"))])
mchc_plot <- qplot(AGE, MCHC, data = mchc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
mchc_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 18.30 32.20 34.50 34.67 36.60 51.40 8
#The Mean Corpuscular Hemoglobin
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MCH, main="MCH (pg/cell)")
plot(density(na.omit(bt_pheno$MCH)), main="MCH (pg/cell)")
polygon(density(na.omit(bt_pheno$MCH)), col = "coral")
# removing outliers
abline(v=c(60), lwd=1, lty=2)
bt_pheno$MCH <- as.numeric(
ifelse(bt_pheno$MCH > 60, "NA",
bt_pheno$MCH
#ifelse(bt_pheno$MCH > 50, "NA", bt_pheno$MCH)
)
)## Warning: NAs introduced by coercion
plot(density(na.omit(bt_pheno$MCH)), main="MCH (>60) outlier pruned")
polygon(density(na.omit(bt_pheno$MCH)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MCH)## Warning in bestNormalize::orderNorm(bt_pheno$MCH): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 2107 nonmissing obs and ties
## - 205 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 16.00 24.70 27.20 30.05 58.50
bt_pheno$nMCH <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMCH)), main="Normalized MCH")
polygon(density(na.omit(bt_pheno$nMCH)), col = "lightblue")
# fit smooth spline for Age against Hb
MCH_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MCH"))])
MCH_plot <- qplot(AGE, MCH, data = MCH_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
MCH_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 16.00 24.70 27.20 27.49 30.05 58.50 1
Check standard units here
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$WBC, main="WBC (10^9/L)")
plot(density(na.omit(bt_pheno$WBC)), main="WBC (10^9/L)")
polygon(density(na.omit(bt_pheno$WBC)), col = "coral")
# removing
#abline(v=35, lwd=1, lty=2)
#bt_pheno$WBC <- as.numeric(
# ifelse(bt_pheno$WBC > 35, "NA",
# bt_pheno$WBC
# #ifelse(bt_pheno$WBC < 10, "NA", bt_pheno$WBC)
# )
#)
#plot(density(na.omit(bt_pheno$WBC)), main="WBC (>35) outlier pruned")
#polygon(density(na.omit(bt_pheno$WBC)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$WBC)## Warning in bestNormalize::orderNorm(bt_pheno$WBC): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 2107 nonmissing obs and ties
## - 924 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.0 7.7 10.3 13.5 68.2
bt_pheno$nWBC <- BNphen$x.t
plot(density(na.omit(bt_pheno$nWBC)), main="Normalized WBC")
polygon(density(na.omit(bt_pheno$nWBC)), col = "lightblue")
# fit smooth spline for Age against Hb
wbc_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","WBC"))])
wbc_plot <- qplot(AGE, WBC, data = wbc_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
wbc_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.00 7.70 10.30 11.11 13.50 68.20 1
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$LYMPHOCYTE, main="LYMPHOCYTE (10^9/L)")
plot(density(na.omit(bt_pheno$LYMPHOCYTE)), main="LYMPHOCYTE (10^9/L)")
polygon(density(na.omit(bt_pheno$LYMPHOCYTE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$LYMPHOCYTE)## Warning in bestNormalize::orderNorm(bt_pheno$LYMPHOCYTE): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 1899 nonmissing obs and ties
## - 584 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.63 2.88 3.80 5.15 19.90
bt_pheno$nLYMPHOCYTE <- BNphen$x.t
plot(density(na.omit(bt_pheno$nLYMPHOCYTE)), main="Normalized LYMPHOCYTE")
polygon(density(na.omit(bt_pheno$nLYMPHOCYTE)), col = "lightblue")
# fit smooth spline for Age against Hb
lym_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","LYMPHOCYTE"))])
lym_plot <- qplot(AGE, LYMPHOCYTE, data = lym_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
lym_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.63 2.88 3.80 4.30 5.15 19.90 209
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MONOCYTE, main="MONOCYTE (10^9/L)")
plot(density(na.omit(bt_pheno$MONOCYTE)), main="MONOCYTE (10^9/L)")
polygon(density(na.omit(bt_pheno$MONOCYTE)), col = "coral")
# removing
#abline(v=5, lwd=1, lty=2)
#bt_pheno$MONOCYTE <- as.numeric(
# ifelse(bt_pheno$MONOCYTE > 5, "NA",
# bt_pheno$MONOCYTE
# #ifelse(bt_pheno$MONOCYTE == 0, "NA", bt_pheno$MONOCYTE)
# )
#)
#plot(density(na.omit(bt_pheno$MONOCYTE)), main="MONOCYTE (>5) outlier pruned")
#polygon(density(na.omit(bt_pheno$MONOCYTE)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MONOCYTE)## Warning in bestNormalize::orderNorm(bt_pheno$MONOCYTE): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 1903 nonmissing obs and ties
## - 249 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.00 0.50 0.70 1.09 60.00
bt_pheno$nMONOCYTE <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMONOCYTE)), main="Normalized MONOCYTE")
polygon(density(na.omit(bt_pheno$nMONOCYTE)), col = "lightblue")
# fit smooth spline for Age against Hb
mon_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MONOCYTE"))])
mon_plot <- qplot(AGE, MONOCYTE, data = mon_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
mon_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 0.500 0.700 0.893 1.090 60.000 205
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$PLATELET, main="PLATELET (10^9/L)")
plot(density(na.omit(bt_pheno$PLATELET)), main="PLATELET (10^9/L)")
polygon(density(na.omit(bt_pheno$PLATELET)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$PLATELET)## Warning in bestNormalize::orderNorm(bt_pheno$PLATELET): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 2102 nonmissing obs and ties
## - 587 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 24.00 258.25 349.00 449.00 1601.00
bt_pheno$nPLATELET <- BNphen$x.t
plot(density(na.omit(bt_pheno$nPLATELET)), main="Normalized PLATELET")
polygon(density(na.omit(bt_pheno$nPLATELET)), col = "lightblue")
# fit smooth spline for Age against PLATELET
pla_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","PLATELET"))])
pla_plot <- qplot(AGE, PLATELET, data = pla_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
pla_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 24.0 258.2 349.0 365.4 449.0 1601.0 6
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$MPV, main="MPV")
plot(density(na.omit(bt_pheno$MPV)), main="MPV")
polygon(density(na.omit(bt_pheno$MPV)), col = "coral")
# removing
abline(v=20, lwd=1, lty=2)
bt_pheno$MPV <- as.numeric(
ifelse(bt_pheno$MPV > 20, "NA",
bt_pheno$MPV
#ifelse(bt_pheno$MPV < 10, "NA", bt_pheno$MPV)
)
)## Warning: NAs introduced by coercion
plot(density(na.omit(bt_pheno$MPV)), main="MPV (>20) outlier pruned")
polygon(density(na.omit(bt_pheno$MPV)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$MPV)## Warning in bestNormalize::orderNorm(bt_pheno$MPV): Ties in data, Normal distribution not guaranteed
## orderNorm Transformation with 1927 nonmissing obs and ties
## - 75 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 0.2 7.6 8.4 9.3 14.0
bt_pheno$nMPV <- BNphen$x.t
plot(density(na.omit(bt_pheno$nMPV)), main="Normalized MPV")
polygon(density(na.omit(bt_pheno$nMPV)), col = "lightblue")
# fit smooth spline for Age against PLATELET
pla_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","MPV"))])
pla_plot <- qplot(AGE, MPV, data = pla_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
pla_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.200 7.600 8.400 8.503 9.300 14.000 181
par(mar = c(4, 4, 2, 2))
hist(bt_pheno$BMI, main="BMI")
plot(density(na.omit(bt_pheno$BMI)), main="BMI")
polygon(density(na.omit(bt_pheno$BMI)), col = "coral")
# removing outliers
abline(v=50, lwd=1, lty=2)
bt_pheno$BMI <- as.numeric(
ifelse(bt_pheno$BMI > 50, "NA",
bt_pheno$BMI
#ifelse(bt_pheno$BMI < 10, "NA", bt_pheno$BMI)
)
)
plot(density(na.omit(bt_pheno$BMI)), main="BMI (>50) outlier pruned")
polygon(density(na.omit(bt_pheno$BMI)), col = "coral")
BNphen <- bestNormalize::orderNorm(bt_pheno$BMI)
BNphen ## orderNorm Transformation with 2069 nonmissing obs and ties
## - 1484 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 8.333 15.151 17.492 21.028 42.999
bt_pheno$nBMI <- BNphen$x.t
plot(density(na.omit(bt_pheno$nBMI)), main="Normalized BMI")
polygon(density(na.omit(bt_pheno$nBMI)), col = "lightblue")
# fit smooth spline for Age against BMI
bmi_pheno <- na.omit(bt_pheno[, which(names(bt_pheno) %in% c("FID","GENDER","AGE","BMI"))])
bmi_plot <- qplot(AGE, BMI, data = bmi_pheno, geom='smooth', span =0.5, color = GENDER) + theme_bw()
bmi_plot## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 8.333 15.151 17.492 18.551 21.028 42.999 39