Skip to contents

Z-score normalization scales each feature (gene) across all cells to have a mean of 0 and a standard deviation of 1. This function allows for optional weighted univariance normalization, where cells are grouped by a categorical variable.

Usage

Znorm(mat, groups = NULL)

Arguments

mat

A matrix of gene expression data, where rows represent genes and columns represent cells.

groups

A character vector specifying the group labels for each cell. If provided, weighted univariance normalization will be performed.

Value

A matrix of Z-score normalized gene expression data, with rows representing genes and columns representing cells.

Examples

library(Seurat)
library(data.table)
library(SpatialEcoTyper)
library(googledrive)
drive_deauth() # no Google sign-in is required
drive_download(as_id("1CoQmU3u8MoVC8RbLUvTDQmOuJJ703HHB"),
              "HumanMelanomaPatient1_subset_counts.tsv.gz", overwrite = TRUE)
#> File downloaded:
#>HumanMelanomaPatient1_subset_counts.tsv.gz
#>   <id: 1CoQmU3u8MoVC8RbLUvTDQmOuJJ703HHB>
#> Saved locally as:
#>HumanMelanomaPatient1_subset_counts.tsv.gz
scdata <- fread("HumanMelanomaPatient1_subset_counts.tsv.gz",
                sep = "\t",header = TRUE, data.table = FALSE)
rownames(scdata) <- scdata[, 1]
scdata <- as.matrix(scdata[, -1])
tmpobj <- CreateSeuratObject(scdata) %>%
          SCTransform(clip.range = c(-10, 10), verbose = FALSE)
#> Warning: Data is of class matrix. Coercing to dgCMatrix.
seurat_version = as.integer(gsub("\\..*", "", as.character(packageVersion("SeuratObject"))))
if(seurat_version<5){
  normdata <- GetAssayData(tmpobj, "data")
}else{
  normdata <- tmpobj[["SCT"]]$data
}
# Z-score normalization
znorm_data <- Znorm(normdata)
#> Centering and scaling data matrix
head(znorm_data[, 1:5])
#>          HumanMelanomaPatient1__cell_3655 HumanMelanomaPatient1__cell_3657
#> PDK4                           -0.3183628                        2.5919742
#> TNFRSF17                       -0.1060717                       -0.1060717
#> ICAM3                          -0.4436469                       -0.4436469
#> FAP                             3.2256642                       -0.2634737
#> GZMB                           -0.2174644                       -0.2174644
#> TSC2                           -0.1316410                       -0.1316410
#>          HumanMelanomaPatient1__cell_3658 HumanMelanomaPatient1__cell_3660
#> PDK4                            2.5919742                       -0.3183628
#> TNFRSF17                       -0.1060717                       -0.1060717
#> ICAM3                          -0.4436469                       -0.4436469
#> FAP                            -0.2634737                       -0.2634737
#> GZMB                           -0.2174644                       -0.2174644
#> TSC2                           -0.1316410                       -0.1316410
#>          HumanMelanomaPatient1__cell_3661
#> PDK4                           -0.3183628
#> TNFRSF17                       -0.1060717
#> ICAM3                          -0.4436469
#> FAP                            -0.2634737
#> GZMB                           -0.2174644
#> TSC2                           -0.1316410

# Weighted Z-score normalization
drive_download(as_id("12xcZNhpT-xbhcG8kX1QAdTeM9TKeFAUW"),
                     "HumanMelanomaPatient1_subset_scmeta.tsv",
                    overwrite = TRUE, verbose = FALSE)
#> Warning: The `verbose` argument of `drive_download()` is deprecated as of googledrive
#> 2.0.0.
#>  Set `options(googledrive_quiet = TRUE)` to suppress all googledrive messages.
#>  For finer control, use `local_drive_quiet()` or `with_drive_quiet()`.
#>  googledrive's `verbose` argument will be removed in the future.
scmeta <- fread("HumanMelanomaPatient1_subset_scmeta.tsv",
                sep = "\t",header = TRUE, data.table = FALSE)
wtdznorm_data <- Znorm(normdata, groups = scmeta$Region)
head(wtdznorm_data[, 1:5])
#> 6 x 5 Matrix of class "dgeMatrix"
#>          HumanMelanomaPatient1__cell_3655 HumanMelanomaPatient1__cell_3657
#> PDK4                          -0.27144312                       2.29194978
#> TNFRSF17                      -0.09363659                      -0.09363659
#> ICAM3                         -0.38535691                      -0.38535691
#> FAP                            2.81145245                      -0.22714128
#> GZMB                          -0.19294377                      -0.19294377
#> TSC2                          -0.11586941                      -0.11586941
#>          HumanMelanomaPatient1__cell_3658 HumanMelanomaPatient1__cell_3660
#> PDK4                           2.29194978                      -0.27144312
#> TNFRSF17                      -0.09363659                      -0.09363659
#> ICAM3                         -0.38535691                      -0.38535691
#> FAP                           -0.22714128                      -0.22714128
#> GZMB                          -0.19294377                      -0.19294377
#> TSC2                          -0.11586941                      -0.11586941
#>          HumanMelanomaPatient1__cell_3661
#> PDK4                          -0.27144312
#> TNFRSF17                      -0.09363659
#> ICAM3                         -0.38535691
#> FAP                           -0.22714128
#> GZMB                          -0.19294377
#> TSC2                          -0.11586941