1 sf : the simple & tidy way of working with spatial data in R

The sf package is the (relatively) new kid on the block when it comes to working with spatial data in R. It provides a much more intuitive way and is easier to learn that the sp package. It provides a syntax and data-structures which are coherent with the tidyverse. A quick introduction can be found here: https://mhweber.github.io/gis_in_action_r_spatial/2017/04/18/05-Spatial-Data-In-R-sf

It builds upon the ‘simple features’ standard (not R specific!) https://en.wikipedia.org/wiki/Simple_Features

Documentation on CRAN: https://cran.r-project.org/web/packages/sf/sf.pdf

2 Plotting a map : my workflow before using the sf-package

After reading in the shapefile you need to fortify it to be able to join the values which should be used to colour the map to it and plot it. The mapdata dataset in the example below is created with the sole purpouse of generating a map. It contains a row for every edge-point which composes the polygons. Each point-entry contains the value for the respective municipality. That’s a lot of redundancy for a single-use object…

library(pacman)
pacman::p_load(rgdal, rgeos)
# devtools::install_github("tidyverse/ggplot2")
library(ggplot2)

#read shapefile with rgdal (in past i've regularly used maptools::readShapePoly). This results in a Spatial*Dataframe, a nested datastructure (lists within lists) which is hard to decompose.

shape <- readOGR("geodata/Shape_detailliert_SEEN_2016/UP_GEMEINDEN_SEEN_2016_F.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "geodata/Shape_detailliert_SEEN_2016/UP_GEMEINDEN_SEEN_2016_F.shp", layer: "UP_GEMEINDEN_SEEN_2016_F"
## with 179 features
## It has 9 fields
#fortify -> generates a dataframe which ggplot2 can handle from a spatial object
shape2 <- fortify(shape, region='BFS')

# Dataset (Motorization and "generalabonnement"-owners)
mfz_ga <-readRDS("mfz_ga.rds")

#merge data with shapefile

mapdata <- merge(shape2, mfz_ga, by.x="id", by.y="bfs",all.x = TRUE, all.y = TRUE)

mapdata <- mapdata[order(mapdata$order),]

#how does the df look like?
mapdata[11300:11310,]
# ggplot mapping  # data layer

ggplot(data=mapdata,aes(x=long, y=lat, group=group)) + 
  geom_path(color='gray') + 
  coord_equal()+ 
  geom_polygon(aes(fill=Anteil_GA),color = "white")+
  theme_void()+  
  theme(legend.key.size = unit(1,"line"),                                       
  #Grösse Legende
  legend.key.height= unit(0.5,"line"))+
  scale_fill_continuous(name="GA pro 100 EW")+
  labs(title="GA-Dichte im Kanton Zürich",
       subtitle="Anzahl Generalabonnemente pro 100 Einwohner",x="",y="")

3 Plotting maps the ‘tidy’ way with the sf-package

library(pacman)

pacman::p_load(sf,dplyr,ggplot2)

#Shapefile (municipalities) - read_sf generates a tidy dataset, each row represents one municipality, the polygons are stored in a single special geo-variable, the 'geometry' column
gemeinden<- st_read('geodata/Shape_detailliert_SEEN_2016')
## Reading layer `UP_GEMEINDEN_SEEN_2016_F' from data source `L:\STAT\08_DS\06_Diffusion\R\geodaten_austausch\geodata_workshop\sf_intro\geodata\Shape_detailliert_SEEN_2016' using driver `ESRI Shapefile'
## Simple feature collection with 179 features and 9 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 2669245 ymin: 1223895 xmax: 2716901 ymax: 1283343
## epsg (SRID):    NA
## proj4string:    +proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +units=m +no_defs
# Dataset (Motorization and GA-owners)
mfz_ga <-readRDS("mfz_ga.rds")

#join data - the dataset-structure stays the same, just 
gemdata <-gemeinden %>% left_join(mfz_ga, by=c("BFS"="bfs"))

#how does the df with class "sf" look like?
head(gemdata)
# dataframe with class sf can easily be ploted to get an overview! not so with sp...
plot(gemdata)
## Warning: plotting the first 10 out of 18 attributes; use max.plot = 18 to
## plot all
## Warning in classInt::classIntervals(values, nbreaks, breaks): n greater
## than number of different finite values\nn reset to number of different
## finite values
## Warning in classInt::classIntervals(values, nbreaks, breaks): n same
## as number of different finite values\neach different finite value is a
## separate class

#map with ggplot2
sfmap <-ggplot()+
  geom_sf(data=gemdata,aes(fill=Anteil_GA),color = "white")+
  coord_sf(datum = NA)+  
  theme_void()+#Koordinatennetz verbergen
  theme(legend.key.size = unit(1,"line"),                                       
  legend.key.height= unit(0.5,"line"))+
  scale_fill_continuous(name="GA pro 100 EW")+
  labs(title="GA-Dichte im Kanton Zürich",
       subtitle="Anzahl Generalabonnemente pro 100 Einwohner",x="",y="")

sfmap

4 sf objects are just ordinary dataframes!

Dataframes with class ‘sf’ are ordinary dataframes with a special column containing the geo-variable. They can be handled like any ordinary structured dataframe.

library(gghighlight)
#library(devtools)
# devtools::install_github("statistikZH/statR")
# library(statR)


gghighlight_point(gemdata, aes(mfzpro100ew,Anteil_GA),Anteil_GA > 7 & mfzpro100ew<600 |                     mfzpro100ew>1000|
                    mfzpro100ew<700 & Anteil_GA<3|
                    mfzpro100ew>750 & Anteil_GA>9,label_key = GEMEINDENA)+
  geom_point(aes(size=Anzahl_Einw/1000, color=Anteil_HTA))+
  # theme_stat()+
  scale_size(name="Einwohner")+
  labs(title="Motorisierungsgrad & Generalabonnements pro Gemeinde\n",
       y="Anzahl GA pro 100 Einwohner",x="Motorfahrzeuge pro 1000 Einwohner")
## Warning: Removed 9 rows containing missing values (geom_point).

5 This was just a foretaste…

Imagine we have several shapefiles which we want to plot layer by layer. Without sf we would need to fortify each shapefile to be able to make a ggplot2-map with it.

# Shapefiles of the "Handlungsräume & Bauzonen"
handlungsraum<- st_read("geodata/handlungsraeume", stringsAsFactors = FALSE,quiet = TRUE) 

bauzonen <- st_read("geodata/bauzonen", stringsAsFactors = FALSE,quiet = TRUE) 

sfmap+
  geom_sf(data=handlungsraum,fill=NA)+
  geom_sf(data=bauzonen,alpha=0.5,color="white")+
  coord_sf(datum = NA)  
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

6 geospatial operations & transformations made easy with sf

6.1 Example: Transforming projection & coordinate system

gemdata %>% 
  select(GEMEINDENA) %>%
st_transform(crs = "+proj=moll +datum=WGS84") %>% 
  # plot(key.pos = NULL, graticule = TRUE) %>% 
  ggplot()+
  geom_sf()

6.2 Example: UNION Polygons and add a 100m buffer

#Example: UNION Polygons and add a 100m buffer
gemdata %>% 
  filter(ART_TEXT=="Gemeinde") %>% 
  st_union() %>% 
  st_buffer(dist=100) %>% 
  plot()

6.3 Example: Spatial Join, add attributes of polygons to points

#set projection
st_crs(gemdata)<- "+init=epsg:2056"
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform
## for that
#load station passenger data
sbb <- read_sf('geodata/passagierfreq', stringsAsFactors = FALSE)

#set projection
sbb <- st_transform(sbb,"+init=epsg:4326 +proj=somerc +lat_0=46.95240555555556 +lon_0=7.439583333333333 +k_0=1 +x_0=2600000 +y_0=1200000 +ellps=bessel +units=m +no_defs")

#apply same projection to municipality data
gemdata_new <-st_transform(gemdata, crs = st_crs(sbb,asText=TRUE))

#join municipality-attributes to points
sbb_gem_data <- st_join(sbb, gemdata_new) %>% 
  filter(!is.na(ART_TEXT))

head(sbb_gem_data)

6.4 Example: Rotate and shrink polyongs

We can for instance rotate the municipalities of the canton of Zurich 90 degrees clockwise around their centroid, and shrink them to 75% of their original size.

#function for rotation
rot = function(a) matrix(c(cos(a), sin(a), -sin(a), cos(a)), 2, 2)

#extract geometries
ncg = st_geometry(gemdata)
plot(ncg, border = 'grey')
#get centroids
cntrd = st_centroid(ncg)
#rotate + shrink
ncg2 = (ncg - cntrd) * rot(pi/2) * .75 + cntrd

plot(ncg2, add = TRUE)
plot(cntrd, col = 'red', add = TRUE, cex = .5)