[code language=”r”]
##### inspect data ####
#inspect summary stats
summary(Data_in2) #max, min,median, mean
summary(Data_in) #max, min,median, mean
range(B_taxa) #max & min values in whole dataset
range(Data_in) #you will get an error message: this only works if the dataframe contains only numeric values
#The amount of 0s in your dataset can be an issue, so you should check for that
sum(B_taxa) #Number of 0-counts
sum(B_taxa==0)/(nrow(B_taxa)*ncol(B_taxa))#Proportion of 0s
#### Standardize and transform data ####
###standardization environmental variables
# there are several methods and possibilities to standardize your data (numerical variables)
# here, we use the functions sapply and scale; the formula used in scale is: (x-mean(x))/sd(x))
names(B_envir) #to check column, names for subsetting – remember only columns with numerical values
B_envir.st <- data.frame(sapply(B_envir[,c(6:9,12)],scale,USE.NAMES = TRUE))
#other option for standardizing using function from "vegan": envir.st<-decostand(B_envir[,c(6:9,12)],"standardize")
library(vegan)
B_envir.st<-decostand(B_envir[,c(6:9,12)],"standardize")
#### transformations response variables
library(stats)
#example: square root transformation (appropriate for many species composition datasets)
names(B_taxa)
B_taxa.sqrt<-(sqrt(B_taxa)) #save it in new file for later use in analysis
#example: 4th root transformation (strong transformation, for datasets with many 0s adn some very high abundances)
names(B_taxa)
B_taxa.4thrt<-(sqrt(sqrt(B_taxa))) #save it in new file for later use in analysis
[/code]