--- title: "Initialization algorithms" author: "Cristian Castiglione" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{initialization} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ## Workspace setup ```{r setup, include = FALSE} options(rmarkdown.html_vignette.check_title = FALSE) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` Load the \code{sgdGMF} package in the workspace. ```{r sgdgmf} library(sgdGMF) ``` Load other useful packages in the workspace. ```{r libraries} library(ggplot2) library(ggpubr) library(reshape2) ``` ## Ant traits data Load the ant traits data in the workspace and define the response matrix `Y` and covariate matrices `X` and `Z`. ```{r data} # install.packages("mvabund") # data(antTraits, package = "mvabund") load(url("https://raw.githubusercontent.com/cran/mvabund/master/data/antTraits.RData")) Y = as.matrix(antTraits$abund) X = as.matrix(antTraits$env[,-3]) Z = matrix(1, nrow = ncol(Y), ncol = 1) attr(Y, "dimnames") = NULL attr(X, "dimnames") = NULL attr(Z, "dimnames") = NULL n = nrow(Y) m = ncol(Y) ``` ## Model specification Set the model family to Poisson since the response matrix contain count data. ```{r family} family = poisson() ``` ```{r init} suppressWarnings({ init_glm_dev = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "glm", type = "deviance") init_glm_prs = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "glm", type = "pearson") init_glm_lnk = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "glm", type = "link") init_ols_dev = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "ols", type = "deviance") init_ols_prs = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "ols", type = "pearson") init_ols_lnk = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "ols", type = "link") }) ``` ```{r, fig.width = 7, fig.height = 10} data.frame( "Method" = rep(c("GLM", "OLS"), each = 3), "Resid" = rep(c("Deviance", "Pearson", "Link"), times = 2), "Deviance" = list(init_glm_dev, init_glm_prs, init_glm_lnk, init_ols_dev, init_ols_prs, init_ols_lnk) |> lapply(function (obj) round(100 * deviance(obj, normalize = TRUE), 2)) |> unlist() |> drop() ) ``` ```{r resid, echo = TRUE, include = FALSE, fig.width = 7, fig.height = 10} ggpubr::ggarrange( # plot(init_glm_dev, type = "res-fit") + labs(subtitle = "GLM - Deviance"), plot(init_glm_dev, type = "std-fit") + labs(subtitle = "GLM - Deviance"), plot(init_glm_dev, type = "qq") + labs(subtitle = "GLM - Deviance"), # plot(init_glm_prs, type = "res-fit") + labs(subtitle = "GLM - Pearson"), plot(init_glm_prs, type = "std-fit") + labs(subtitle = "GLM - Pearson"), plot(init_glm_prs, type = "qq") + labs(subtitle = "GLM - Pearson"), # plot(init_glm_lnk, type = "res-fit") + labs(subtitle = "GLM - Link"), plot(init_glm_lnk, type = "std-fit") + labs(subtitle = "GLM - Link"), plot(init_glm_lnk, type = "qq") + labs(subtitle = "GLM - Link"), # plot(init_ols_dev, type = "res-fit") + labs(subtitle = "OLS - Deviance"), plot(init_ols_dev, type = "std-fit") + labs(subtitle = "OLS - Deviance"), plot(init_ols_dev, type = "qq") + labs(subtitle = "OLS - Deviance"), # plot(init_ols_prs, type = "res-fit") + labs(subtitle = "OLS - Pearson"), plot(init_ols_prs, type = "std-fit") + labs(subtitle = "OLS - Pearson"), plot(init_ols_prs, type = "qq") + labs(subtitle = "OLS - Pearson"), # plot(init_ols_lnk, type = "res-fit") + labs(subtitle = "OLS - Link"), plot(init_ols_lnk, type = "std-fit") + labs(subtitle = "OLS - Link"), plot(init_ols_lnk, type = "qq") + labs(subtitle = "OLS - Link"), nrow = 6, ncol = 3, align = "hv" ) ``` ```{r eigen, echo = TRUE, include = FALSE, fig.width = 7, fig.height = 5} ggpubr::ggarrange( screeplot(init_glm_dev) + labs(subtitle = "GLM - Deviance"), screeplot(init_glm_prs) + labs(subtitle = "GLM - Pearson"), screeplot(init_glm_lnk) + labs(subtitle = "GLM - Link"), screeplot(init_ols_dev) + labs(subtitle = "OLS - Deviance"), screeplot(init_ols_prs) + labs(subtitle = "OLS - Pearson"), screeplot(init_ols_lnk) + labs(subtitle = "OLS - Link"), nrow = 2, ncol = 3, common.legend = TRUE, legend = "bottom", align = "hv" ) ``` ```{r scores, echo = TRUE, include = FALSE, fig.width = 7, fig.height = 7} ggpubr::ggarrange( ggpubr::ggarrange( biplot(init_glm_dev, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Deviance"), biplot(init_glm_prs, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Pearson"), biplot(init_glm_lnk, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Link"), biplot(init_ols_dev, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Deviance"), biplot(init_ols_prs, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Pearson"), biplot(init_ols_lnk, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Link"), nrow = 3, ncol = 2, common.legend = TRUE, legend = "bottom", align = "hv"), ggpubr::ggarrange( biplot(init_glm_dev, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Deviance"), biplot(init_glm_prs, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Pearson"), biplot(init_glm_lnk, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Link"), biplot(init_ols_dev, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Deviance"), biplot(init_ols_prs, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Pearson"), biplot(init_ols_lnk, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Link"), nrow = 3, ncol = 2, common.legend = TRUE, legend = "bottom", align = "hv"), nrow = 1, ncol = 2 ) ``` ```{r pred, echo = TRUE, include = FALSE, fig.width = 7, fig.height = 10} plt_glm_dev = image(init_glm_dev, limits = range(c(Y)), type = "response") plt_glm_prs = image(init_glm_prs, limits = range(c(Y)), type = "response") plt_glm_lnk = image(init_glm_lnk, limits = range(c(Y)), type = "response") plt_ols_dev = image(init_ols_dev, limits = range(c(Y)), type = "response") plt_ols_prs = image(init_ols_prs, limits = range(c(Y)), type = "response") plt_ols_lnk = image(init_ols_lnk, limits = range(c(Y)), type = "response") ggpubr::ggarrange( plt_glm_dev + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "GLM - Deviance"), plt_glm_prs + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "GLM - Pearson"), plt_glm_lnk + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "GLM - Link"), plt_ols_dev + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "OLS - Deviance"), plt_ols_prs + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "OLS - Pearson"), plt_ols_lnk + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "OLS - Link"), nrow = 3, ncol = 2, common.legend = TRUE, legend = "bottom", align = "hv") ``` ```{r resid2, echo = TRUE, include = FALSE, fig.width = 7, fig.height = 10} limits = c(-20, +20) plt_glm_dev = image(init_glm_dev, type = "response", limits = limits, resid = TRUE, symmetric = TRUE) plt_glm_prs = image(init_glm_prs, type = "response", limits = limits, resid = TRUE, symmetric = TRUE) plt_glm_lnk = image(init_glm_lnk, type = "response", limits = limits, resid = TRUE, symmetric = TRUE) plt_ols_dev = image(init_ols_dev, type = "response", limits = limits, resid = TRUE, symmetric = TRUE) plt_ols_prs = image(init_ols_prs, type = "response", limits = limits, resid = TRUE, symmetric = TRUE) plt_ols_lnk = image(init_ols_lnk, type = "response", limits = limits, resid = TRUE, symmetric = TRUE) ggpubr::ggarrange( plt_glm_dev + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "GLM - Deviance"), plt_glm_prs + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "GLM - Pearson"), plt_glm_lnk + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "GLM - Link"), plt_ols_dev + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "OLS - Deviance"), plt_ols_prs + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "OLS - Pearson"), plt_ols_lnk + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "OLS - Link"), nrow = 3, ncol = 2, common.legend = TRUE, legend = "bottom", align = "hv") ```