---
title: "Initialization algorithms"
author: "Cristian Castiglione"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{initialization}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

## Workspace setup

```{r setup, include = FALSE}
options(rmarkdown.html_vignette.check_title = FALSE)

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

Load the \code{sgdGMF} package in the workspace.

```{r sgdgmf}
library(sgdGMF)
```

Load other useful packages in the workspace.

```{r libraries}
library(ggplot2)
library(ggpubr)
library(reshape2)
```

## Ant traits data

Load the ant traits data in the workspace and define the response matrix `Y` and covariate matrices `X` and `Z`.

```{r data}
# install.packages("mvabund")
# data(antTraits, package = "mvabund")

load(url("https://raw.githubusercontent.com/cran/mvabund/master/data/antTraits.RData"))

Y = as.matrix(antTraits$abund)
X = as.matrix(antTraits$env[,-3])
Z = matrix(1, nrow = ncol(Y), ncol = 1)

attr(Y, "dimnames") = NULL
attr(X, "dimnames") = NULL
attr(Z, "dimnames") = NULL

n = nrow(Y)
m = ncol(Y)
```

## Model specification

Set the model family to Poisson since the response matrix contain count data.

```{r family}
family = poisson()
```


```{r init}
suppressWarnings({
  init_glm_dev = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "glm", type = "deviance")
  init_glm_prs = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "glm", type = "pearson")
  init_glm_lnk = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "glm", type = "link")
  init_ols_dev = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "ols", type = "deviance")
  init_ols_prs = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "ols", type = "pearson")
  init_ols_lnk = sgdGMF::sgdgmf.init(Y, X, Z, ncomp = 2, family = family, method = "ols", type = "link")
})
```


```{r, fig.width = 7, fig.height = 10}
data.frame(
  "Method" = rep(c("GLM", "OLS"), each = 3),
  "Resid" = rep(c("Deviance", "Pearson", "Link"), times = 2),
  "Deviance" = 
    list(init_glm_dev, init_glm_prs, init_glm_lnk, init_ols_dev, init_ols_prs, init_ols_lnk) |> 
    lapply(function (obj) round(100 * deviance(obj, normalize = TRUE), 2)) |> unlist() |> drop()
)
```


```{r resid, echo = TRUE, include = FALSE, fig.width = 7, fig.height = 10}
ggpubr::ggarrange(
  # 
  plot(init_glm_dev, type = "res-fit") + labs(subtitle = "GLM - Deviance"),
  plot(init_glm_dev, type = "std-fit") + labs(subtitle = "GLM - Deviance"),
  plot(init_glm_dev, type = "qq") + labs(subtitle = "GLM - Deviance"),
  # 
  plot(init_glm_prs, type = "res-fit") + labs(subtitle = "GLM - Pearson"),
  plot(init_glm_prs, type = "std-fit") + labs(subtitle = "GLM - Pearson"),
  plot(init_glm_prs, type = "qq") + labs(subtitle = "GLM - Pearson"),
  # 
  plot(init_glm_lnk, type = "res-fit") + labs(subtitle = "GLM - Link"),
  plot(init_glm_lnk, type = "std-fit") + labs(subtitle = "GLM - Link"),
  plot(init_glm_lnk, type = "qq") + labs(subtitle = "GLM - Link"),
  # 
  plot(init_ols_dev, type = "res-fit") + labs(subtitle = "OLS - Deviance"),
  plot(init_ols_dev, type = "std-fit") + labs(subtitle = "OLS - Deviance"),
  plot(init_ols_dev, type = "qq") + labs(subtitle = "OLS - Deviance"),
  # 
  plot(init_ols_prs, type = "res-fit") + labs(subtitle = "OLS - Pearson"),
  plot(init_ols_prs, type = "std-fit") + labs(subtitle = "OLS - Pearson"),
  plot(init_ols_prs, type = "qq") + labs(subtitle = "OLS - Pearson"),
  # 
  plot(init_ols_lnk, type = "res-fit") + labs(subtitle = "OLS - Link"),
  plot(init_ols_lnk, type = "std-fit") + labs(subtitle = "OLS - Link"),
  plot(init_ols_lnk, type = "qq") + labs(subtitle = "OLS - Link"),
  nrow = 6, ncol = 3, align = "hv"
)

```

```{r eigen, echo = TRUE, include = FALSE, fig.width = 7, fig.height = 5}
ggpubr::ggarrange(
  screeplot(init_glm_dev) + labs(subtitle = "GLM - Deviance"),
  screeplot(init_glm_prs) + labs(subtitle = "GLM - Pearson"),
  screeplot(init_glm_lnk) + labs(subtitle = "GLM - Link"),
  screeplot(init_ols_dev) + labs(subtitle = "OLS - Deviance"),
  screeplot(init_ols_prs) + labs(subtitle = "OLS - Pearson"),
  screeplot(init_ols_lnk) + labs(subtitle = "OLS - Link"),
  nrow = 2, ncol = 3, common.legend = TRUE, legend = "bottom", align = "hv"
)
```


```{r scores, echo = TRUE, include = FALSE, fig.width = 7, fig.height = 7}
ggpubr::ggarrange(
  ggpubr::ggarrange(
    biplot(init_glm_dev, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Deviance"),
    biplot(init_glm_prs, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Pearson"),
    biplot(init_glm_lnk, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Link"),
    biplot(init_ols_dev, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Deviance"),
    biplot(init_ols_prs, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Pearson"),
    biplot(init_ols_lnk, arrange = FALSE)$scores + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Link"),
    nrow = 3, ncol = 2, common.legend = TRUE, legend = "bottom", align = "hv"),
  ggpubr::ggarrange(
    biplot(init_glm_dev, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Deviance"),
    biplot(init_glm_prs, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Pearson"),
    biplot(init_glm_lnk, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "GLM - Link"),
    biplot(init_ols_dev, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Deviance"),
    biplot(init_ols_prs, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Pearson"),
    biplot(init_ols_lnk, arrange = FALSE)$loadings + theme(axis.title = element_blank()) + labs(subtitle = "OLS - Link"),
    nrow = 3, ncol = 2, common.legend = TRUE, legend = "bottom", align = "hv"),
  nrow = 1, ncol = 2
)
```


```{r pred, echo = TRUE, include = FALSE, fig.width = 7, fig.height = 10}
plt_glm_dev = image(init_glm_dev, limits = range(c(Y)), type = "response")
plt_glm_prs = image(init_glm_prs, limits = range(c(Y)), type = "response")
plt_glm_lnk = image(init_glm_lnk, limits = range(c(Y)), type = "response")
plt_ols_dev = image(init_ols_dev, limits = range(c(Y)), type = "response")
plt_ols_prs = image(init_ols_prs, limits = range(c(Y)), type = "response")
plt_ols_lnk = image(init_ols_lnk, limits = range(c(Y)), type = "response")

ggpubr::ggarrange(
  plt_glm_dev + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "GLM - Deviance"), 
  plt_glm_prs + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "GLM - Pearson"), 
  plt_glm_lnk + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "GLM - Link"), 
  plt_ols_dev + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "OLS - Deviance"), 
  plt_ols_prs + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "OLS - Pearson"), 
  plt_ols_lnk + labs(x = "Species", y = "Environments", title = "Predicted abundance", subtitle = "OLS - Link"), 
  nrow = 3, ncol = 2, common.legend = TRUE, legend = "bottom", align = "hv")
```


```{r resid2, echo = TRUE, include = FALSE, fig.width = 7, fig.height = 10}
limits = c(-20, +20)

plt_glm_dev = image(init_glm_dev, type = "response", limits = limits, resid = TRUE, symmetric = TRUE)
plt_glm_prs = image(init_glm_prs, type = "response", limits = limits, resid = TRUE, symmetric = TRUE)
plt_glm_lnk = image(init_glm_lnk, type = "response", limits = limits, resid = TRUE, symmetric = TRUE)
plt_ols_dev = image(init_ols_dev, type = "response", limits = limits, resid = TRUE, symmetric = TRUE)
plt_ols_prs = image(init_ols_prs, type = "response", limits = limits, resid = TRUE, symmetric = TRUE)
plt_ols_lnk = image(init_ols_lnk, type = "response", limits = limits, resid = TRUE, symmetric = TRUE)

ggpubr::ggarrange(
  plt_glm_dev + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "GLM - Deviance"), 
  plt_glm_prs + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "GLM - Pearson"),  
  plt_glm_lnk + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "GLM - Link"), 
  plt_ols_dev + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "OLS - Deviance"), 
  plt_ols_prs + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "OLS - Pearson"), 
  plt_ols_lnk + labs(x = "Species", y = "Environments", title = "Residual heatmap", subtitle = "OLS - Link"), 
  nrow = 3, ncol = 2, common.legend = TRUE, legend = "bottom", align = "hv")
```