6.1 Spatial objects attributes manipulations

Now that we know how to import and transform different classes of spatial objects in R, we can start manipulating their attributes. In this post, we will make a brief overview of some useful basic manipulations that we can perform on spatial object attributes. By no means are those examples exhaustive, but they constitute common manipulations that are made with spatial objects.


6.1.1 Vector objects

For this part, we discuss how to manipulate attributes of objects from the sf package. sf objects have the advantage of being structured like data frames, making their manipulations more intuitive than for objects of class sp. However, if the structure of sp objects is well understood, then the same principles will mostly apply.

Let’s begin by creating an sf points object as seen in the post on Spatial objects in R

mydata <- data.frame(
  id = 1:20,
  long = -82 + 2 * runif(20),
  lat = 42 + 2 * runif(20),
  var1 = rnorm(20),
  var2 = 10 * runif(20)
)

library(sf)
spatData <- st_as_sf(mydata,
  coords = c("long", "lat"),
  crs = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs +towgs84=0,0,0"
)

knitr::kable(head(spatData))
id var1 var2 geometry
1 -0.2656208 4.759596 POINT (-81.49872 42.14625)
2 -0.4473916 1.959090 POINT (-81.90416 43.27473)
3 0.5489937 9.644724 POINT (-80.34889 43.69822)
4 0.0284773 4.680517 POINT (-80.50904 43.53668)
5 -0.0961527 4.989629 POINT (-81.23522 43.63335)
6 -1.1786894 8.523099 POINT (-81.90933 42.20106)
plot(spatData)

6.1.2 Adding and removing attributes

The object we currently have has to variables var1 and var2. Additional attributes can quickly be added to the attributes table of our objects.

spatData$var3 <- runif(20)
spatData$var4 <- spatData$var1 * spatData$var2
knitr::kable(head(spatData))
id var1 var2 geometry var3 var4
1 -0.2656208 4.759596 POINT (-81.49872 42.14625) 0.8109781 -1.2642476
2 -0.4473916 1.959090 POINT (-81.90416 43.27473) 0.7204655 -0.8764803
3 0.5489937 9.644724 POINT (-80.34889 43.69822) 0.5373404 5.2948924
4 0.0284773 4.680517 POINT (-80.50904 43.53668) 0.8825313 0.1332883
5 -0.0961527 4.989629 POINT (-81.23522 43.63335) 0.5776756 -0.4797662
6 -1.1786894 8.523099 POINT (-81.90933 42.20106) 0.4965744 -10.0460866


Similarly, unwanted columns can be removed.

spatData$var3 <- spatData$var4 <- NULL
knitr::kable(head(spatData))
id var1 var2 geometry
1 -0.2656208 4.759596 POINT (-81.49872 42.14625)
2 -0.4473916 1.959090 POINT (-81.90416 43.27473)
3 0.5489937 9.644724 POINT (-80.34889 43.69822)
4 0.0284773 4.680517 POINT (-80.50904 43.53668)
5 -0.0961527 4.989629 POINT (-81.23522 43.63335)
6 -1.1786894 8.523099 POINT (-81.90933 42.20106)


However, if you have a very big dataset, you may want to remove columns without having to write all column names manually! You could do this based on the names of the columns you wish to remove or, alternatively, on the names of the attributes you wish to keep.

# Remove last 20 attributes
for (i in 1:5) spatData <- cbind(spatData, varSup = runif(20))
knitr::kable(head(spatData))
id var1 var2 varSup varSup.1 varSup.2 varSup.3 varSup.4 geometry
1 -0.2656208 4.759596 0.9534384 0.6138915 0.1015347 0.5420090 0.6238369 POINT (-81.49872 42.14625)
2 -0.4473916 1.959090 0.8681574 0.4294649 0.4743601 0.2269843 0.8081788 POINT (-81.90416 43.27473)
3 0.5489937 9.644724 0.1361151 0.7992288 0.8391657 0.5569911 0.6493939 POINT (-80.34889 43.69822)
4 0.0284773 4.680517 0.4525283 0.4184388 0.7522447 0.8993869 0.2867850 POINT (-80.50904 43.53668)
5 -0.0961527 4.989629 0.3083761 0.5784799 0.4390189 0.4630437 0.9403453 POINT (-81.23522 43.63335)
6 -1.1786894 8.523099 0.4567769 0.5681770 0.2781423 0.2046933 0.9574098 POINT (-81.90933 42.20106)
rem <- colnames(spatData)[4:8]
spatData <- spatData[, !colnames(spatData) %in% rem]
knitr::kable(head(spatData))
id var1 var2 geometry
1 -0.2656208 4.759596 POINT (-81.49872 42.14625)
2 -0.4473916 1.959090 POINT (-81.90416 43.27473)
3 0.5489937 9.644724 POINT (-80.34889 43.69822)
4 0.0284773 4.680517 POINT (-80.50904 43.53668)
5 -0.0961527 4.989629 POINT (-81.23522 43.63335)
6 -1.1786894 8.523099 POINT (-81.90933 42.20106)
# Keep id, var1 and var2
for (i in 1:5) spatData <- cbind(spatData, varSup = runif(20))
knitr::kable(head(spatData))
id var1 var2 varSup varSup.1 varSup.2 varSup.3 varSup.4 geometry
1 -0.2656208 4.759596 0.7449004 0.0452048 0.8988959 0.0168985 0.2944407 POINT (-81.49872 42.14625)
2 -0.4473916 1.959090 0.0900476 0.8871131 0.1598538 0.2328176 0.4268325 POINT (-81.90416 43.27473)
3 0.5489937 9.644724 0.5445201 0.2484915 0.7909352 0.3587498 0.4109497 POINT (-80.34889 43.69822)
4 0.0284773 4.680517 0.6810730 0.7645665 0.9036288 0.4662961 0.3269100 POINT (-80.50904 43.53668)
5 -0.0961527 4.989629 0.4125643 0.9801848 0.2356249 0.9420042 0.5674170 POINT (-81.23522 43.63335)
6 -1.1786894 8.523099 0.8083331 0.2663551 0.8119234 0.2049022 0.4381835 POINT (-81.90933 42.20106)
keep <- c("id", "var1", "var2")
spatData <- spatData[, keep]
knitr::kable(head(spatData))
id var1 var2 geometry
1 -0.2656208 4.759596 POINT (-81.49872 42.14625)
2 -0.4473916 1.959090 POINT (-81.90416 43.27473)
3 0.5489937 9.644724 POINT (-80.34889 43.69822)
4 0.0284773 4.680517 POINT (-80.50904 43.53668)
5 -0.0961527 4.989629 POINT (-81.23522 43.63335)
6 -1.1786894 8.523099 POINT (-81.90933 42.20106)


6.1.3 Subsets

You may also wish to subset your object based on certain attribute values. We will begin by adding some factorial attributes to our spatial object to discuss this in more detail.

spatData$fact1 <- paste0("a", 1:2) # Create factor with 2 levels
spatData$fact2 <- paste0("b", 1:5) # Create factor with 5 levels
knitr::kable(head(spatData))
id var1 var2 geometry fact1 fact2
1 -0.2656208 4.759596 POINT (-81.49872 42.14625) a1 b1
2 -0.4473916 1.959090 POINT (-81.90416 43.27473) a2 b2
3 0.5489937 9.644724 POINT (-80.34889 43.69822) a1 b3
4 0.0284773 4.680517 POINT (-80.50904 43.53668) a2 b4
5 -0.0961527 4.989629 POINT (-81.23522 43.63335) a1 b5
6 -1.1786894 8.523099 POINT (-81.90933 42.20106) a2 b1


The most simple way to subset an attributes table would be to manually select the rows that we wish to view. In this instance, let’s say we only wish to use the first 10 rows of our data.

selectID <- 1:10
plot(spatData$geometry, col = "#00000055", pch = 20, cex = 1.25, main = "")
plot(spatData$geometry[selectID], col = "#000000", add = TRUE, pch = 1, cex = 2, lwd = 2)


However, subsets based on certain criteria, e.g. all values greater to or equal to 0, can be much more efficient to create subsets. These are conditional statements and there is a vast body of material available discussing them, so we will only present a few examples and invite you to consult other resources like StackOverflow for more specific questions.

# Select all values for var1 greater to or equal to 0
selectID <- spatData$var1 >= 0
plot(spatData$geometry, col = "#00000055", pch = 20, cex = 1.25, main = "")
plot(spatData$geometry[selectID], col = "#000000", add = T, pch = 1, cex = 2, lwd = 2)

# var1 smaller than 0 and var2 higher than 5
selectID <- spatData$var1 < 0 & spatData$var2 < 5
plot(spatData$geometry, col = "#00000055", pch = 20, cex = 1.25, main = "")
plot(spatData$geometry[selectID], col = "#000000", add = T, pch = 1, cex = 2, lwd = 2)

# fact1 equal to a1
selectID <- spatData$fact1 == "a1"
plot(spatData$geometry, col = "#00000055", pch = 20, cex = 1.25, main = "")
plot(spatData$geometry[selectID], col = "#000000", add = T, pch = 1, cex = 2, lwd = 2)

# fact1 equal to a1 or var1 greater than 0
selectID <- spatData$fact1 == "a1" | spatData$var1 > 0
plot(spatData$geometry, col = "#00000055", pch = 20, cex = 1.25, main = "")
plot(spatData$geometry[selectID], col = "#000000", add = T, pch = 1, cex = 2, lwd = 2)

# fact2 equal to b3 or b4
selectID <- spatData$fact2 %in% c("b3", "b4")
plot(spatData$geometry, col = "#00000055", pch = 20, cex = 1.25, main = "")
plot(spatData$geometry[selectID], col = "#000000", add = T, pch = 1, cex = 2, lwd = 2)


6.1.4 Join

Joining tables based on their shared id is another common manipulation. This can be quickly accomplished using the left_join function from the dplyr package.

# Create data.frame with id field similar to that in the spatial object
joinData <- data.frame(id = seq(1, 20, by = 2),
                       var3 = rnorm(10))

# Join with attributes table of spatial object
library(dplyr)
#R>  
#R>  Attaching package: 'dplyr'
#R>  The following objects are masked from 'package:raster':
#R>  
#R>      intersect, select, union
#R>  The following objects are masked from 'package:stats':
#R>  
#R>      filter, lag
#R>  The following objects are masked from 'package:base':
#R>  
#R>      intersect, setdiff, setequal, union
spatData <- left_join(spatData, joinData)
#R>  Joining with `by = join_by(id)`
knitr::kable(head(spatData))
id var1 var2 fact1 fact2 var3 geometry
1 -0.2656208 4.759596 a1 b1 -0.8490988 POINT (-81.49872 42.14625)
2 -0.4473916 1.959090 a2 b2 NA POINT (-81.90416 43.27473)
3 0.5489937 9.644724 a1 b3 -1.0932612 POINT (-80.34889 43.69822)
4 0.0284773 4.680517 a2 b4 NA POINT (-80.50904 43.53668)
5 -0.0961527 4.989629 a1 b5 -1.0502298 POINT (-81.23522 43.63335)
6 -1.1786894 8.523099 a2 b1 NA POINT (-81.90933 42.20106)


6.1.5 Aggregate

Information contained in a table can also be used to aggregate

aggData <- aggregate(spatData['var1'], by = list(spatData$fact1), FUN = sum)
knitr::kable(head(aggData))
Group.1 var1 geometry
a1 2.164414 MULTIPOINT ((-81.78574 43.2…
a2 -2.856789 MULTIPOINT ((-81.90933 42.2…
plot(aggData, cex = abs(aggData$var1))