#### stack all the LULC maps from each year as one
# get a list of raster name
raster_names = Sys.glob('data/7classes_all_years/*.tif')
raster_names
## [1] "data/7classes_all_years/test_7c_2000.tif"
## [2] "data/7classes_all_years/test_7c_2001.tif"
## [3] "data/7classes_all_years/test_7c_2002.tif"
## [4] "data/7classes_all_years/test_7c_2003.tif"
## [5] "data/7classes_all_years/test_7c_2004.tif"
## [6] "data/7classes_all_years/test_7c_2005.tif"
## [7] "data/7classes_all_years/test_7c_2006.tif"
## [8] "data/7classes_all_years/test_7c_2007.tif"
## [9] "data/7classes_all_years/test_7c_2008.tif"
## [10] "data/7classes_all_years/test_7c_2009.tif"
## [11] "data/7classes_all_years/test_7c_2010.tif"
## [12] "data/7classes_all_years/test_7c_2011.tif"
## [13] "data/7classes_all_years/test_7c_2012.tif"
## [14] "data/7classes_all_years/test_7c_2013.tif"
## [15] "data/7classes_all_years/test_7c_2014.tif"
## [16] "data/7classes_all_years/test_7c_2015.tif"
## [17] "data/7classes_all_years/test_7c_2016.tif"
## [18] "data/7classes_all_years/test_7c_2017.tif"
## [19] "data/7classes_all_years/test_7c_2018.tif"
## [20] "data/7classes_all_years/test_7c_2019.tif"
## [21] "data/7classes_all_years/test_7c_2020.tif"
## [22] "data/7classes_all_years/test_7c_2021.tif"
## [23] "data/7classes_all_years/test_7c_2022.tif"
# stack
lulc_rast = stack(raster_names)
lulc_rast
## class : RasterStack
## dimensions : 205, 294, 60270, 23 (nrow, ncol, ncell, nlayers)
## resolution : 30, 30 (x, y)
## extent : 4565746, 4574566, 3396580, 3402730 (xmin, xmax, ymin, ymax)
## crs : +proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs
## names : test_7c_2000, test_7c_2001, test_7c_2002, test_7c_2003, test_7c_2004, test_7c_2005, test_7c_2006, test_7c_2007, test_7c_2008, test_7c_2009, test_7c_2010, test_7c_2011, test_7c_2012, test_7c_2013, test_7c_2014, ...
## min values : 1, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ...
## max values : 7, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ...
plot(lulc_rast)
tab = as.data.frame(rasterToPoints(lulc_rast))
tab
# change to new LC code
if (new_LC_order == TRUE){
tab[tab[] == 1] = 11
tab[tab[] == 2] = 12
tab[tab[] == 3] = 13
tab[tab[] == 4] = 14
tab[tab[] == 5] = 15
tab[tab[] == 6] = 16
tab[tab[] == 7] = 17
tab[tab[] == 11] = 7
tab[tab[] == 12] = 3
tab[tab[] == 13] = 2
tab[tab[] == 14] = 6
tab[tab[] == 15] = 1
tab[tab[] == 16] = 5
tab[tab[] == 17] = 4
}
# create a column of concatened sequence
tab$seq = apply(tab[, 3:ncol(tab)], 1, paste, collapse = "")
head(tab$seq)
## [1] "55555555555555555555555" "55455555555455555555555"
## [3] "55555555555555555555555" "55555555555555555555555"
## [5] "55555555555555555555555" "55555555555555555555555"
#### 1. by remove all the stable sequences
# identify stable sequences
stable_seq.index = grepl("^([0-9])\\1*$", tab$seq)
tab[stable_seq.index,]
sum(stable_seq.index)
## [1] 47452
# remove all stable sequences
tab = tab[!stable_seq.index,]
tab
# get the number of unique sequences
unique_traj = sort(unique(tab$seq))
length(unique_traj)
## [1] 5082
# get the frequency of these trajectories
tab_freq = as.data.frame(table(tab$seq))
# sort the sequences from more to less frequent
tab_freq = tab_freq[order(tab_freq$Freq,decreasing = TRUE),]
tab_freq
### 2. remove sequences that only appear in less than [n_raster] rasters
# find all sequences appear in less than {n_raster} raster
remove_seq = tab_freq[tab_freq$Freq <= n_raster ,'Var1']
# how many percent of sequences will be remove?
length(remove_seq)/nrow(tab)*100
## [1] 30.92526
# remove sequences
tab = tab[!(tab$seq %in% remove_seq),]
# get the number of unique sequences
unique_traj = sort(unique(tab$seq))
length(unique_traj)
## [1] 1118
# get the frequency of these trajectories
tab_freq = as.data.frame(table(tab$seq))
# sort the sequences from more to less frequent
tab_freq = tab_freq[order(tab_freq$Freq,decreasing = TRUE),]
tab_freq
# define landcover dictionary
if (new_LC_order == TRUE){
lc_dict = list('Wetland & Peatland vegetation','Woody (Shrubland and Forest)', 'Water', 'Grassland', 'Cropland', 'Bare peat (exploited)', 'Built-up & bareland')
shortName_dict = c('Wetland','Woody', 'Water', 'Grassland', 'Cropland', 'Bare peat', 'Built-up')
color_dict = c('#a25fff','#00a700','#00ccf2','#c1ecd0','#e4ce3f','#7a4700','#cc0000')
} else {
lc_dict = list('Built-up & bareland','Water','Woody (Shrubland and Forest)','Bare peat (exploited)','Wetland & Peatland vegetation','Cropland','Grassland')
# define landcover short name dictionary
shortName_dict = c('Built-up','Water','Woody','Bare peat','Wetland','Cropland','Grassland')
# define color code dictionary
color_dict = c("#cc0000","#00ccf2","#00a700","#7a4700",'#a25fff','#e4ce3f','#c1ecd0')
}
# get unique landcover class in sequences
alphabet = sort(unique(unlist(tab[,3:(ncol(tab)-1)])))
# get label, short name and color code for availabe LC class
label = c()
short_label = c()
color = c()
for (alpha in alphabet) {
label = c(label, lc_dict[alpha])
short_label = c(short_label, shortName_dict[alpha])
color = c(color, color_dict[alpha])
}
label = unlist(label)
short_label
## [1] "Wetland" "Woody" "Water" "Grassland" "Cropland" "Built-up"
color
## [1] "#a25fff" "#00a700" "#00ccf2" "#c1ecd0" "#e4ce3f" "#cc0000"
# create state sequence object
tab.seq = seqdef(tab, 3:(ncol(tab)-1), alphabet = alphabet, states = short_label, cpal = color, labels = label)
## [>] state coding:
## [alphabet] [label] [long label]
## 1 1Wetland Wetland & Peatland vegetation
## 2 2Woody Woody (Shrubland and Forest)
## 3 3Water Water
## 4 4Grassland Grassland
## 5 5Cropland Cropland
## 6 7Built-up Built-up & bareland
## [>] 8854 sequences in the data set
## [>] min/max sequence length: 23/23
# plot 10 random sequences
some_seq = tab.seq[sample(nrow(tab.seq),10),]
seqiplot(some_seq, with.legend = T, border = T, main = "10 random sequences")
# Plot all the sequences in the data set, sorted by states from start.
seqIplot(tab.seq, sortv = "from.start", with.legend = T, main = "All sequences 2000-2020")
# Plot the 10 most frequent sequences.
seqfplot(tab.seq, with.legend = T, main="10 most common sequences")
## Warning in (function (seqdata, idxs = 1:10, weighted = TRUE, format = "SPS", :
## '-' character in state codes may cause invalid results
# plot the state distributions by time step.
seqdplot(tab.seq, with.legend = T, border = NA,main="Land cover (states) distribution", ylab="Proportion of study area")
# Plot dominant land cover of the transversal state distributions.
seqmsplot(tab.seq, with.legend = T, main ="Most frequent land cover")
# Plot the mean time spent in each land cover category.
seqmtplot(tab.seq, with.legend = T, main = "Permanence", ylab="Number of 2 years periods")
# compute substitution costs based on transition probabilities and indel set as half the maximum substitution cost
costs.tr <- seqcost(tab.seq, method = "TRATE",with.missing = FALSE)
## [>] creating substitution-cost matrix using transition rates ...
## [>] computing transition probabilities for states Wetland/Woody/Water/Grassland/Cropland/Built-up ...
print(costs.tr)
## $indel
## [1] 0.9999347
##
## $sm
## Wetland Woody Water Grassland Cropland Built-up
## Wetland 0.000000 1.925148 1.931359 1.954220 1.999005 1.998710
## Woody 1.925148 0.000000 1.979127 1.923395 1.990207 1.947442
## Water 1.931359 1.979127 0.000000 1.974708 1.999869 1.998710
## Grassland 1.954220 1.923395 1.974708 0.000000 1.856916 1.949528
## Cropland 1.999005 1.990207 1.999869 1.856916 0.000000 1.998130
## Built-up 1.998710 1.947442 1.998710 1.949528 1.998130 0.000000
# Computes pairwise dissimilarities between sequences by OM
dist.om1 <- seqdist(tab.seq, method = "OM",indel = costs.tr$indel, sm = costs.tr$sm,with.missing = FALSE)
## [>] 8854 sequences with 6 distinct states
## [>] checking 'sm' (size and triangle inequality)
## [>] 1118 distinct sequences
## [>] min/max sequence lengths: 23/23
## [>] computing distances using the OM metric
## [>] elapsed time: 3.5 secs
dim(dist.om1)
## [1] 8854 8854
# create a table of land cover type
tab_state_features = data.frame(state=alphabet)
# compute "FEATURES" (Gower distance between state features)
costs.gower = seqcost(tab.seq, method = "FEATURES",with.missing = FALSE,state.features = tab_state_features)
print(costs.gower)
## $indel
## [1] 0.5
##
## $sm
## Wetland Woody Water Grassland Cropland Built-up
## Wetland 0.0000000 0.1666667 0.3333333 0.5000000 0.6666667 1.0000000
## Woody 0.1666667 0.0000000 0.1666667 0.3333333 0.5000000 0.8333333
## Water 0.3333333 0.1666667 0.0000000 0.1666667 0.3333333 0.6666667
## Grassland 0.5000000 0.3333333 0.1666667 0.0000000 0.1666667 0.5000000
## Cropland 0.6666667 0.5000000 0.3333333 0.1666667 0.0000000 0.3333333
## Built-up 1.0000000 0.8333333 0.6666667 0.5000000 0.3333333 0.0000000
# Computes pairwise dissimilarities between sequences by OM
dist.om2 <- seqdist(tab.seq, method = "OM",indel = costs.gower$indel, sm = costs.gower$sm,with.missing = F)
## [>] 8854 sequences with 6 distinct states
## [>] checking 'sm' (size and triangle inequality)
## [>] 1118 distinct sequences
## [>] min/max sequence lengths: 23/23
## [>] computing distances using the OM metric
## [>] elapsed time: 2.63 secs
dim(dist.om2)
## [1] 8854 8854
# Computes pairwise dissimilarities between sequences
dist.dhd = seqdist(tab.seq, method = "DHD")
## [>] 8854 sequences with 6 distinct states
## [>] creating a 'sm' with the costs derived from the transition rates
## [>] creating time varying substitution-cost matrix using transition rates ...
## [>] computing time varying transition probabilities for states Wetland/Woody/Water/Grassland/Cropland/Built-up ...
## [>] 1118 distinct sequences
## [>] min/max sequence lengths: 23/23
## [>] computing distances using the DHD metric
## [>] elapsed time: 2.07 secs
dim(dist.dhd)
## [1] 8854 8854
# Computes pairwise dissimilarities between sequences
dist.svr = seqdist(tab.seq, method = "SVRspell")
## [>] 8854 sequences with 6 distinct states
## [>] creating a neutral 'prox' (identity matrix)
## [>] 1118 distinct spell sequences
## [>] min/max spell sequence lengths: 2/11
## [>] computing distances using the SVRspell metric
## [>] elapsed time: 3.61 secs
dim(dist.svr)
## [1] 8854 8854
Build a Ward hierarchical clustering of the sequences (a typology of the trajectories) from the different dissimilarity metrics in section 2, and assign each sequence a cluster group.
The hierarchical clustering is cut at 5 cluster group level in oder to compare the results derived form the different dissimilarity metrics.
clusterward_om1 = hclust(as.dist(dist.om1),method="ward.D")
# plot cluster dendrogram
plot(clusterward_om1)
# cut the dendrogram to different cluster level
for (n_cluster in n_clusters) {
assign(paste0('cl_om1_', n_cluster), cutree(clusterward_om1, k = n_cluster))
}
# assign each sequence to cluster group
for (n_cluster in n_clusters) {
col_name = paste0('clusterom1_', n_cluster)
var_name = paste0('cl_om1_', n_cluster)
tab[[col_name]] = get(var_name)
}
head(tab)
clusterward_om2 = hclust(as.dist(dist.om2),method="ward.D")
# plot cluster dendrogram
plot(clusterward_om2)
# cut the dendrogram to different cluster level
for (n_cluster in n_clusters) {
assign(paste0('cl_om2_', n_cluster), cutree(clusterward_om2, k = n_cluster))
}
# assign each sequence to cluster group
for (n_cluster in n_clusters) {
col_name = paste0('clusterom2_', n_cluster)
var_name = paste0('cl_om2_', n_cluster)
tab[[col_name]] = get(var_name)
}
head(tab)
clusterward_dhd = hclust(as.dist(dist.dhd),method="ward.D")
# plot cluster dendrogram
plot(clusterward_dhd)
# cut the dendrogram to different cluster level
for (n_cluster in n_clusters) {
assign(paste0('cl_dhd_', n_cluster), cutree(clusterward_dhd,
k = n_cluster))
}
# assign each sequence to cluster group
for (n_cluster in n_clusters) {
col_name = paste0('clusterdhd_', n_cluster)
var_name = paste0('cl_dhd_', n_cluster)
tab[[col_name]] = get(var_name)
}
head(tab)
clusterward_svr = hclust(as.dist(dist.svr),method="ward.D")
# plot cluster dendrogram
plot(clusterward_svr)
# cut the dendrogram to different cluster level
for (n_cluster in n_clusters) {
assign(paste0('cl_svr_', n_cluster), cutree(clusterward_svr,
k = n_cluster))
}
# assign each sequence to cluster group
for (n_cluster in n_clusters) {
col_name = paste0('clustersvr_', n_cluster)
var_name = paste0('cl_svr_', n_cluster)
tab[[col_name]] = get(var_name)
}
head(tab)
for (n_cluster in n_clusters) {
print(paste('plot cluster',n_cluster,'level'))
# larger margin for plotting
par(mar=c(2,2,3,0))
# OM1
col_name = paste0('clusterom1_',n_cluster)
main = paste0('cluster ',n_cluster,' level of sequences based on OM transition rates')
seqIplot(tab.seq, group = tab[[col_name]], sortv = "from.start", main = main)
# OM2
col_name = paste0('clusterom2_',n_cluster)
main = paste0('cluster ',n_cluster,' level of sequences based on OM features')
seqIplot(tab.seq, group = tab[[col_name]], sortv = "from.start", main = main)
# LCS
col_name = paste0('clusterdhd_',n_cluster)
main = paste0('cluster ',n_cluster,' level of sequences based on DHD')
seqIplot(tab.seq, group = tab[[col_name]], sortv = "from.start", main = main)
# LCP
col_name = paste0('clustersvr_',n_cluster)
main = paste0('cluster ',n_cluster,' level of sequences based on SVRspell')
seqIplot(tab.seq, group = tab[[col_name]], sortv = "from.start", main = main)
}
## [1] "plot cluster 10 level"
## [1] "plot cluster 15 level"
## [1] "plot cluster 20 level"
## [1] "plot cluster 25 level"
Function to create a raster from cluster column
# load 1 input raster to use as template raster
template_rast = rast(raster_names[1])
# function
raster_cl.func = function(template_rast,tab,cluster.col){
# create an empty raster
new_rast = rast(ext(template_rast),
crs=crs(template_rast),
resolution=res(template_rast))
# get x, y, cluster group number for each pixel
xyz = as.data.frame(cbind(tab$x,tab$y,cluster.col))
# Convert the dataframe to a SpatVector object
points = vect(xyz, geom = c("V1", "V2"),
crs = crs(template_rast))
# Rasterize the points into the new raster
rast = rasterize(points, new_rast, field="cluster.col")
return(rast)
}
Plot all cluster rasters from 4 methods together
for (n_cluster in n_clusters) {
print(paste('plot cluster',n_cluster,'level'))
# create a 2 row x 2 column plotting matrix
par(mfrow = c(2,2))
# larger margin for plotting
par(mar=c(2,2,7,2))
#### OM1
col_name = paste0('clusterom1_',n_cluster)
fig_name = paste0('raster_om1_', n_cluster)
# create cluster raster
assign(fig_name, raster_cl.func(template_rast=template_rast,tab = tab,
cluster.col = tab[[col_name]]))
# plot
plot(get(fig_name), main = 'OM transition rates',
col =colorRampPalette(brewer.pal(12,'Set3'))(n_cluster))
# write raster
writeRaster(x=get(fig_name),filename = paste0('output/',fig_name,'.tif'),
overwrite=TRUE)
#### OM2
col_name = paste0('clusterom2_',n_cluster)
fig_name = paste0('raster_om2_', n_cluster)
# create cluster raster
assign(fig_name, raster_cl.func(template_rast=template_rast,tab = tab,
cluster.col = tab[[col_name]]))
# plot
plot(get(fig_name), main = 'OM features',
col=colorRampPalette(brewer.pal(12,'Set3'))(n_cluster))
# write raster
writeRaster(x=get(fig_name),filename = paste0('output/',fig_name,'.tif'),
overwrite=TRUE)
#### DHD
col_name = paste0('clusterdhd_',n_cluster)
fig_name = paste0('raster_dhd_', n_cluster)
# create cluster raster
assign(fig_name, raster_cl.func(template_rast=template_rast,tab = tab,
cluster.col = tab[[col_name]]))
# plot
plot(get(fig_name), main = 'DHD',
col=colorRampPalette(brewer.pal(12,'Set3'))(n_cluster))
# write raster
writeRaster(x=get(fig_name),filename = paste0('output/',fig_name,'.tif'),
overwrite=TRUE)
#### SVRspell
col_name = paste0('clustersvr_',n_cluster)
fig_name = paste0('raster_svr_', n_cluster)
# create cluster raster
assign(fig_name, raster_cl.func(template_rast=template_rast,tab = tab,
cluster.col = tab[[col_name]]))
# plot
plot(get(fig_name), main = 'SVRspell',
col=colorRampPalette(brewer.pal(12,'Set3'))(n_cluster))
# write raster
writeRaster(x=get(fig_name),filename = paste0('output/',fig_name,'.tif'),
overwrite=TRUE)
#### add title
mtext(paste0(n_cluster,'-level Cluster distribution'), side = 3,
line = -2, outer = TRUE, cex=2)
}
## [1] "plot cluster 10 level"
## [1] "plot cluster 15 level"
## [1] "plot cluster 20 level"
## [1] "plot cluster 25 level"