# Chapter 10: Spatial Data Wrangling

# DON'T load the libraries

# set working directory
#setwd("~/GIS/using-r-gis")
setwd("~/work/2020-24-intro-to-r-gis-book/data")
setwd("~/work/2020-24-using-r-scripts-TMP/data")

## 10.2 Reading your non-spatial data in

data <- read.csv("pop2015.csv")

# common problems
  
  # could not find function "st_read"
  countries <- st_read("world_countries.shp")
  library(sf)
  countries <- st_read("world_countries.shp")

  # Error: Cannot open "world_countries_update.shp"; The file doesn't seem to exist.
  countries <- st_read("world_countries_update.shp")
  # check the current working directory
  getwd()

## 10.3.3 Shapefile not compleye  
    
  # Shapefile not complete
  countries <- st_read("world_countries_updated - Copy.shp")

## 10.4 Viewing head/tail

  nrow(data)
  ncol(data)
  
  #View(head(large_data))
  
  library(tmap)
  qtm(countries)
  
## 10.5 Incorrect geometries

  # read in data
  countries <- st_read("world_countries_old.shp") 
  library(tmap)
  qtm(countries)
  # update code, error is different
  
  # try the automatic fixing
  tmap_options(check.and.fix = TRUE)
  qtm(countries)
  
  # more ways of checking if geometry is valid
  ?sf::st_is_valid
  st_is_valid(countries)
  # summarise the results 
  st_is_valid(countries, reason = TRUE)
  table(st_is_valid(countries, reason = TRUE))
  
  # ways of automatically fixing geometry
  countries2 <- st_make_valid(countries)
  table(st_is_valid(countries2, reason = TRUE))
  qtm(countries2)
  # check data output
  
## 10.6 Converting data types
  
  # read in correct countries
  countries <- st_read("world_countries.shp") 
  
  # add different data types
  countries$NAME[1] + countries$NAME[2]
  
  # check data types
  str(countries)
  str(data)

  # reading in data as character
  data <- read.csv("pop2015-new.csv", colClasses = "character")
  
  # check data types
  str(data)
  
  # join data
  countries <- merge(countries, data, by.x="NAME", by.y="Name")

  # try mapping the data
  library(tmap)
  qtm(countries, fill = "POP2015")
  # check code, slightly different error message
  # change data to numeric
  countries$POP2015num <- as.numeric(countries$POP2015)
  
  # try mapping again
  qtm(countries, fill = "POP2015num")

## 10.7 Subsetting

  # select out the UK by name
  UK <- countries[which(countries$NAME == "United Kingdom"),]
  head(UK)
  qtm(UK)
  
  # select out the UK by code
  UK_FIPS <- countries[which(countries$FIPS == "UK"),]
  head(UK_FIPS)
  qtm(UK_FIPS)

  # select multiple countries using OR |
  multiple_countries <- countries[which(countries$FIPS == "UK" | countries$FIPS == "FR" | countries$FIPS == "SP"),]
  qtm(multiple_countries)

## Multi-Part Polygons
  
  # exmaple of a multi-part polygon
  Spain <- countries[which(countries$FIPS == "SP"),]
  qtm(Spain)
  
  # convert from Multipolygon to Polygon
  Spain_Islands <- st_cast(Spain,"POLYGON")
  head(Spain_Islands)

## Using Ranges

  life_exp <- countries[which(countries$LIFEEXP > 75),]
  head(life_exp)
  qtm(life_exp)
  
  # Multiple ranges
  multiple_countries <- countries[which(countries$LIFEEXP > 75 & countries$INFMRT > 10),]
  head(multiple_countries)
  qtm(multiple_countries)

## 10.8 Raster data

  # load library
  library(terra)

  # Popchange data downloaded from URLs below and extracted from age.zip
  # https://www.qub.ac.uk/research-centres/GIS/Research/PopChange/
  # https://reshare.ukdataservice.ac.uk/852498/

  # read data
  age0_14 <- rast("5a_ascii_grid2011_Age_Age0_14.asc")

  age0_14
  plot(age0_14)

## Crop out a local authority

library(sf)
library(tmap)

sthelens <- st_read("sthelens.shp")

qtm(sthelens)

# convert to SpatVector
sthelens_spatVect <- vect(sthelens)
# crop
sthelens_0_14 <- crop(age0_14, sthelens_spatVect, mask = TRUE)
# plot
plot(sthelens_0_14)
plot(sthelens_spatVect, add = TRUE)

# check whether CRS are the same
st_crs(age0_14)
st_crs(sthelens)

# repeat process for numbers of people ages over 65
age_65plus <- rast("5a_ascii_grid2011_Age_Age65p.asc")
age_65plus
plot(age_65plus)
# crop
sthelens_65plus <- crop(age_65plus, sthelens_spatVect, mask = TRUE)
# plot
plot(sthelens_65plus)

# note this now matches the sthelens area:
plot(sthelens_spatVect, add = TRUE)

# note the very different spatial pattern
plot(sthelens_65plus)
plot(sthelens_0_14)
# you can also use back and forward button to switch between the two

## Raster Calculator
sthelens_14_65 <- sthelens_0_14 + sthelens_65plus
plot(sthelens_14_65)                
# we can also divide one by the other
sthelens_14_65 <- sthelens_65plus / sthelens_0_14
plot(sthelens_14_65)   
# this is the number of <14s compared to over 65s. 

## 10.11 Reclassify Data

#view info & histogram
sthelens_65plus
hist(sthelens_65plus)

# values range from 0 to 992
# let's reclassify:
# 0 - 100 low
# 100 - 500 medium
# 500+ high

## from-to-becomes
# classify the values into three groups 
# all values >= 0 and <= 0.25 become 1, etc.
# see https://rdrr.io/cran/terra/man/classify.html for details

m <- c(0, 100, 1,
       100, 500, 2,
       500, 1000, 3)
rclmat <- matrix(m, ncol=3, byrow=TRUE)
reclassified <- classify(sthelens_65plus, rclmat, include.lowest=TRUE)

plot(reclassified)

## 10.12 Masking

# this gives a square crop
sthelens_0_14 <- crop(age0_14, sthelens_spatVect)
plot(sthelens_0_14)

# this follows the boundary outline
sthelens_0_14 <- crop(age0_14, sthelens_spatVect, mask = TRUE)
plot(sthelens_0_14)
