london_osm_canyons.Rmd

---
title: "Determining whether London streets are air quality canyons"
author: "James Smith"
output:
  html_document:
    toc: true
    toc_depth: 4
date: "`r format(Sys.time(), '%Y-%m%-%d %H%:%M')`"
knit: (function(inputFile, encoding) {
  rmarkdown::render(inputFile, encoding = encoding, output_dir = "docs", output_file ="index.html") })
---

### Loading libraries
```{r load libraries, message=F, warning=F, include=F}
rm(list=ls(all=TRUE)) 

library(devtools)
#suppressMessages(devtools::install_github("r-spatial/sf"))
library(sf)
#suppressMessages(devtools::install_github("ropensci/osmdata"))
suppressMessages(library(osmdata))
suppressMessages(library(raster))
#suppressMessages(devtools::install_github("tidyverse/ggplot2"))
library(ggplot2)
library(rgeos)
library(maptools)
library(rgdal)
library(rgeos)
library(data.table)
library(knitr)
suppressMessages(library(mapview))
library(RCurl)
#library(stplanr)
library(reshape2)
library(dplyr)
library(snow)
library(snowfall)
library(rlist)
library(rasterVis)
library(dplyr)
```


###  Import building height data

Building height data is downloaded from [Copernicus Urban Atlas](https://land.copernicus.eu/local/urban-atlas/building-height-2012?tab=mapview), unzipped and extracted.

The methodology for their creation, by [gaf.d])(https://www.gaf.de/), was as following: "Our original input data were Cartosat-1 acquisitions. The following processing system uses a customised and specialised semi-global matching algorithm to generate am 3m in close cooperation with DLR (German Aerospace Center). Subsequent to the highly automated DSM generation, a DTM was calculated and a nDSM (normalised Digital Surface Model) derived by using internal software and processing tools. The nDSM were finally resampled from 3m up to 10m resolution by using the maximum height value". 

This raster is then imported.

```{r get building rasters}

london_raster <- raster('buildings_raster/UK001L2_LONDON_UA2012_DHM/UK001L2_LONDON_UA2012_DHM.tif')
```

```{r}

focus_area                <- st_as_sf(as(raster::extent(529735, 533671, 180049, 181052), 'SpatialPolygons')) %>% 
                              st_set_crs(27700) %>%
                              st_transform(crs(london_raster)@projargs) %>%
                              as('Spatial')

temp <- crop(london_raster, focus_area)

temp <- rasterToPolygons(temp)

temp <- st_as_sf(temp)

labels <- list()

for (i in 1:5) {
  labels[[i]]                   <- paste(round(quantile(temp$UK001L2_LONDON_UA2012_DHM,c(0.5,0.8,0.85,0.9,0.95,1))[i],0),
                                         '-',
                                         round(quantile(temp$UK001L2_LONDON_UA2012_DHM,c(0.5,0.8,0.85,0.9,0.95,1))[(i+1)],0))
}
labels                          <- unlist(labels)
temp$height <- cut(temp$UK001L2_LONDON_UA2012_DHM, 
                                       breaks=c(0,quantile(temp$UK001L2_LONDON_UA2012_DHM,c(0.5,0.8,0.85,0.9,0.95,1))[2:7]),
                                       labels = labels)
colours <- c('#ffffd4','#fed98e','#fe9929','#d95f0e','#993404')

rm(focus_area)

plot <- ggplot() + 
  geom_sf(data = filter(temp, !is.na(height)), colour = NA, aes(fill = height)) +
  coord_sf() +
  scale_fill_manual(values = colours, name = "Height (m)") +
  #scale_fill_discrete(name = expression(paste("NOx emissions (", mu, m^2, "/", m^3, ")", sep=""))) +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.background = element_blank())

ggsave('building_height_map.png', plot = plot, path = 'maps/', height = 5, width = 15, units='cm')

```

### Import OSM roads data

First we get the extent of the raster
```{r get extent of the raster}

extent              <- extent(london_raster)
extent              <- as(extent, 'SpatialPolygons')
proj4string(extent) <- crs(london_raster)
latlong             <- CRS("+init=epsg:4326")
extent              <- spTransform(extent, latlong)

min_x   <- extent(extent)[1]
max_x   <- extent(extent)[2]
min_y   <- extent(extent)[3]
max_y   <- extent(extent)[4]
rm(extent)
```

Now using the [osmdata package](https://cran.r-project.org/web/packages/osmdata/osmdata.pdf) we can download osmdata for the extent we just defined. Roads have the value 'highway' in [OpenStreetMap](https://en.wikipedia.org/wiki/OpenStreetMap). Within that heading, we can also filter using the following tags:

```{r download data from osm, warning=F}
available_tags('highway')
```

Now we import OSM roads data for the following road types.

```{r import road data}
primary           <- opq(bbox = c(min_x, min_y, max_x, max_y)) %>% add_osm_feature(key = 'highway', value='primary') %>% osmdata_sf()
primary           <- primary$osm_lines[,c('osm_id', 'oneway', 'lanes', 'geometry')]
primary$type      <- 'primary'

primary_link      <- opq(bbox = c(min_x, min_y, max_x, max_y)) %>% add_osm_feature(key = 'highway', value='primary_link') %>% osmdata_sf()
primary_link      <- primary_link$osm_lines[,c('osm_id', 'oneway', 'lanes', 'geometry')]
primary_link$type <- 'primary_link'

secondary         <- opq(bbox = c(min_x, min_y, max_x, max_y)) %>% add_osm_feature(key = 'highway', value='secondary') %>% osmdata_sf()
secondary         <- secondary$osm_lines[,c('osm_id', 'oneway', 'lanes', 'geometry')]
secondary$type    <- 'secondary'

motorway          <- opq(bbox = c(min_x, min_y, max_x, max_y)) %>% add_osm_feature(key = 'highway', value='motorway') %>% osmdata_sf()
motorway          <- motorway$osm_lines[,c('osm_id', 'oneway', 'lanes', 'geometry')]
motorway$type     <- 'motorway'

motorway_link     <- opq(bbox = c(min_x, min_y, max_x, max_y)) %>% add_osm_feature(key = 'highway', value='motorway_link') %>% osmdata_sf()
motorway_link     <- motorway_link$osm_lines[,c('osm_id', 'oneway', 'lanes', 'geometry')]
motorway_link$type <- 'motorway_link'

trunk             <- opq(bbox = c(min_x, min_y, max_x, max_y)) %>% add_osm_feature(key = 'highway', value='trunk') %>% osmdata_sf()
trunk             <- trunk$osm_lines[,c('osm_id', 'oneway', 'lanes', 'geometry')]
trunk$type        <- 'trunk'

trunk_link        <- opq(bbox = c(min_x, min_y, max_x, max_y)) %>% add_osm_feature(key = 'highway', value='trunk_link') %>% osmdata_sf()
trunk_link        <- trunk_link$osm_lines[,c('osm_id', 'oneway', 'lanes', 'geometry')]
trunk_link$type   <- 'trunk_link'

tertiary          <- opq(bbox = c(min_x, min_y, max_x, max_y)) %>% add_osm_feature(key = 'highway', value='tertiary') %>% osmdata_sf()
tertiary          <- tertiary$osm_lines[,c('osm_id', 'oneway', 'lanes', 'geometry')]
tertiary$type     <- 'tertiary'
```

Then bind them all into one object

```{r bind_roads}
roads             <- rbind(primary, primary_link, secondary, motorway, motorway_link, trunk, trunk_link, tertiary) 

roads$osm_id      <- as.numeric(as.character(roads$osm_id))

rm(primary, primary_link, secondary, motorway, motorway_link, trunk, trunk_link, tertiary)

```

Due to the user-generated nature of OSM, need to remove duplicates in space. Also remove duplicated osm_ids.

```{r remove_duplicates}

roads <- roads[!duplicated(roads$geometry),]

roads <- roads[!duplicated(roads$osm_id),]

```

Now that we have a SpatialLinesDataFrame of all the London roads, we delete the ones where the 'lane' attribute is not formed properly or contains an error.

```{r clean lanes data}
roads$lanes       <- as.character(roads$lanes)
roads             <- roads[!grepl(';', roads$lanes),]
roads$lanes       <- as.numeric(roads$lanes)
```

The number of lanes for each road type is below.

```{r check lanes data}
table(roads$type, roads$lanes)

```

Where this is missing, we fill in the blanks, using the mean values of that road type, rounded.

```{r fill_empty_roads}

roads[roads$type == 'primary'       & is.na(roads$lanes),'lanes'] <- round(mean(roads[roads$type == 'primary'       & !is.na(roads$lanes),]$lanes),0)
roads[roads$type == 'primary_link'  & is.na(roads$lanes),'lanes'] <- round(mean(roads[roads$type == 'primary_link'  & !is.na(roads$lanes),]$lanes),0)
roads[roads$type == 'secondary'     & is.na(roads$lanes),'lanes'] <- round(mean(roads[roads$type == 'secondary'     & !is.na(roads$lanes),]$lanes),0)
roads[roads$type == 'motorway'      & is.na(roads$lanes),'lanes'] <- round(mean(roads[roads$type == 'motorway'      & !is.na(roads$lanes),]$lanes),0)
roads[roads$type == 'motorway_link' & is.na(roads$lanes),'lanes'] <- round(mean(roads[roads$type == 'motorway_link' & !is.na(roads$lanes),]$lanes),0)
roads[roads$type == 'trunk'         & is.na(roads$lanes),'lanes'] <- round(mean(roads[roads$type == 'trunk'         & !is.na(roads$lanes),]$lanes),0)
roads[roads$type == 'trunk_link'    & is.na(roads$lanes),'lanes'] <- round(mean(roads[roads$type == 'trunk_link'    & !is.na(roads$lanes),]$lanes),0)
roads[roads$type == 'tertiary'      & is.na(roads$lanes),'lanes'] <- round(mean(roads[roads$type == 'tertiary'      & !is.na(roads$lanes),]$lanes),0)
```

Now, presuming that a lane is 3.15 metres wide, each road width is now calculated and added to the dataset. 

```{r road_widths}
roads$width             <- as.numeric(roads$lanes) * 3.15
```

OSM splits some roads into two, like Marylebone Road, and motorways. This is a problem for calculating height to width ratios. So we are going to try and identify these.

```{r marylebone_issue}
roads$half_of_one <- NA

roads[roads$oneway == 'yes' & !is.na(roads$oneway) & roads$type == 'trunk'           & !is.na(roads$type),'half_of_one'] <- as.character('Yes')
roads[roads$oneway == 'yes' & !is.na(roads$oneway) & roads$type == 'trunk_link'      & !is.na(roads$type),'half_of_one'] <- as.character('Yes')

roads[roads$oneway == 'yes' & !is.na(roads$oneway) & roads$type == 'motorway'        & !is.na(roads$type),'half_of_one'] <- as.character('Yes')
roads[roads$oneway == 'yes' & !is.na(roads$oneway) & roads$type == 'motorway_link'   & !is.na(roads$type),'half_of_one'] <- as.character('Yes')
```

The road widths by road type are now as below

```{r road_widths_summary, fig.width=10}

ggplot(roads, aes(x = width, fill=type)) + geom_histogram(alpha=0.3) + facet_wrap(.~type, scales="free") + ggtitle('Road widths by road type')
```

The widest road in the dataset is here
```{r checking roads}
m <- mapview(st_buffer(st_transform(roads[roads$width>25,][1,],27700),100), map.types = 'Esri.WorldImagery', alpha.regions=0.1)
mapshot(m, file = paste0(getwd(), "/map.png"), remove_controls = c("zoomControl", "layersControl", "homeButton","scaleBar"))
```

![](map.png)

Pavement width is also added to the roads, depending on what type of road it is, as follows. Presumes 3 metres wide pavement. For motorways we still add 'pavement' but presume this is hard-shoulder.

```{r add pavement widths}

roads$pavement_width   <- NA

roads[roads$type == 'primary'        & is.na(roads$half_of_one),  'pavement_width'] <- 6
roads[roads$type == 'primary_link'   & is.na(roads$half_of_one),  'pavement_width'] <- 6

roads[roads$type == 'secondary'      & is.na(roads$half_of_one),  'pavement_width'] <- 6

roads[roads$type == 'motorway'       & is.na(roads$half_of_one),  'pavement_width'] <- 6
roads[roads$type == 'motorway_link'  & is.na(roads$half_of_one),  'pavement_width'] <- 6

roads[roads$type == 'trunk'          & is.na(roads$half_of_one),  'pavement_width'] <- 6
roads[roads$type == 'trunk_link'     & is.na(roads$half_of_one),  'pavement_width'] <- 6

roads[roads$type == 'tertiary'       & is.na(roads$half_of_one),  'pavement_width'] <- 6

roads[roads$type == 'primary'        & !is.na(roads$half_of_one) & roads$half_of_one == 'Yes', 'pavement_width'] <- 3
roads[roads$type == 'primary_link'   & !is.na(roads$half_of_one) & roads$half_of_one == 'Yes', 'pavement_width'] <- 3

roads[roads$type == 'secondary'      & !is.na(roads$half_of_one) & roads$half_of_one == 'Yes', 'pavement_width'] <- 3

roads[roads$type == 'motorway'       & !is.na(roads$half_of_one) & roads$half_of_one == 'Yes', 'pavement_width'] <- 3
roads[roads$type == 'motorway_link'  & !is.na(roads$half_of_one) & roads$half_of_one == 'Yes', 'pavement_width'] <- 3

roads[roads$type == 'trunk'          & !is.na(roads$half_of_one) & roads$half_of_one == 'Yes', 'pavement_width'] <- 3
roads[roads$type == 'trunk_link'     & !is.na(roads$half_of_one) & roads$half_of_one == 'Yes', 'pavement_width'] <- 3

roads[roads$type == 'tertiary'       & !is.na(roads$half_of_one) & roads$half_of_one == 'Yes', 'pavement_width'] <- 3

roads$total_width <- roads$width + roads$pavement_width
```

Road widths are now

```{r new_road_widths_summary, fig.width=10}

ggplot(roads, aes(x = total_width, fill=type)) + geom_histogram(alpha=0.3) + facet_wrap(.~type, scales="free") + ggtitle('Road widths by road type')
```

Add half central reservation
```{r}
roads[roads$half_of_one == 'Yes' & !is.na(roads$half_of_one),]$total_width <- roads[roads$half_of_one == 'Yes' & !is.na(roads$half_of_one),]$total_width + 2

```

With pavements added the roads width is..

```{r road_widths_summary_with_pavements, fig.width=10}

ggplot(roads, aes(x = total_width, fill=type)) + geom_histogram(alpha=0.3) + facet_wrap(.~type, scales="free") + ggtitle('Road widths by road type')
```

As a table the road widths 

```{r road_widths_table_summary_2}

table(roads$type, roads$total_width)

```

As a table the road lanes 

```{r road_lanes_table_summary}

table(roads$type, roads$lanes)

```

The map below shows roads currently in the dataset, coloured by type

```{r show downloaded roads}
ggplot(roads, aes(colour = type, fill=type)) + 
  geom_sf() +
  ggtitle('Downloaded OSM roads, by road type') +
  theme(axis.text = element_blank(),
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        legend.position = 'bottom')
```

Remove a few circular roads that are a problem for buffers

```{r remove problem roads}

roads_to_ignore         <- c(9393,11079,14595,15578) 
roads                   <- roads[-roads_to_ignore,]
rm(roads_to_ignore)

```

Make the buffers around the roads

```{r make road buffers}
roads                   <- st_transform(roads, 27700)

roads$area              <- st_geometry(st_buffer(roads, dist = 20)) / st_geometry(st_buffer(roads, dist = 15))
```

For the roads like Marylebone Road, going to double the buffer area and add 5 metres to the buffers.

```{r special_case}
roads[!is.na(roads$half_of_one),]$area              <- st_geometry(st_buffer(roads[!is.na(roads$half_of_one),], dist = 20*2+5)) /
                                                       st_geometry(st_buffer(roads[!is.na(roads$half_of_one),], dist = 15*2+5))
```

Transform the buffers to the same CRS as the raster we're going to extract from.
```{r buffers_transform}
roads$geometry          <- st_transform(roads$geometry,4326)
roads$area              <- st_transform(roads$area, 4326)

roads$geometry          <- st_transform(roads$geometry, crs(london_raster)@projargs)
roads$area              <- st_transform(roads$area, crs(london_raster)@projargs)

```

Check everything looks ok

```{r sanity_check}
plot(london_raster, xlim=c(3621000, 3622000), ylim = c(3204000,3205000))
plot(st_geometry(roads), xlim=c(3621000, 3622000), add=T)
plot(st_geometry(roads$area), xlim=c(3621000, 3622000), add=T)

```

Make an example of one road with buffer for report

```{r checking}

plot <- ggplot(data=filter(roads, osm_id == 379356929)) + 
  geom_sf(aes(geometry = area), fill = NA, colour = 'red') + 
  geom_sf(aes(geometry=geometry), colour = 'blue') +
  coord_sf() +
  theme_bw() +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.border = element_blank())

ggsave(plot = plot, filename='road_buffer_example.png', path='maps')

```

Now another example, showing a road and raster intersection

```{r}

focus_area                <- st_as_sf(as(raster::extent(filter(st_set_geometry(roads, 'area'), osm_id == 379356929))+50, 'SpatialPolygons')) %>% 
                              as('Spatial')

temp <- crop(london_raster, focus_area)

temp <- rasterToPolygons(temp)

#temp <- st_as_sf(temp) %>% st_set_crs(27700)

labels <- list()

for (i in 1:5) {
  labels[[i]]                   <- paste(round(quantile(temp$UK001L2_LONDON_UA2012_DHM,c(0,0.6,0.7,0.8,0.9,1))[i],0),
                                         '-',
                                         round(quantile(temp$UK001L2_LONDON_UA2012_DHM,c(0,0.6,0.7,0.8,0.9,1))[(i+1)],0))
}
labels                          <- unlist(labels)
temp$height <- cut(temp$UK001L2_LONDON_UA2012_DHM, 
                                       breaks=c(0,quantile(temp$UK001L2_LONDON_UA2012_DHM,c(0,0.6,0.7,0.8,0.9,1))[2:6]),
                                       labels = labels)
colours <- c('#ffffd4','#fed98e','#fe9929','#d95f0e','#993404')

rm(focus_area)

temp <- st_as_sf(temp)

plot <- ggplot(data=(filter(roads, osm_id == 379356929))) + 
  geom_sf(data = filter(temp,!is.na(height)), colour = NA, aes(fill = height)) +
  scale_fill_manual(values = colours, name = "Height (m)") +
  geom_sf(aes(geometry = area), fill = NA, colour = 'red') + 
  geom_sf(aes(geometry=geometry), colour = 'blue') +
  coord_sf() +
  theme_bw() +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.border = element_blank())

ggsave('roads_with_raster_eample.png', plot = plot, path = 'maps/', height = 5, width = 15, units='cm')

```

```{r checking}

ggplot(data=filter(roads, osm_id == 379356929)) + 
  geom_sf(aes(geometry = area), fill = NA, colour = 'red') + 
  geom_sf(aes(geometry=geometry), colour = 'blue') +
  coord_sf() +
  theme_bw() +
  theme(axis.text = element_blank(),
        axis.ticks = element_blank(),
        panel.border = element_blank())


ggsave(plot = plot, filename='road_buffer_example.png', path='maps')

```

Interactive map view of one feature

```{r map view of a buffer}
osm_id_to_examine <- 379356929

temp_raster <- extent(st_bbox(roads[roads$osm_id == osm_id_to_examine,])$xmin-40, st_bbox(roads[roads$osm_id == osm_id_to_examine,])$xmax+40, 
                      st_bbox(roads[roads$osm_id == osm_id_to_examine,])$ymin-40, st_bbox(roads[roads$osm_id == osm_id_to_examine,])$ymax+40)
temp_raster <- crop(london_raster, temp_raster)

mapview(temp_raster, alpha.regions = 0.4, map.types = 'OpenStreetMap.BlackAndWhite') %>% 
  addFeatures(st_transform(roads[roads$osm_id == osm_id_to_examine,],4326))

rm(temp_raster, osm_id_to_examine)
```

Now extract the data we need from the raster (this takes circa 12 hours)

```{r extracting_heights, results="hide"}
start_time          <- Sys.time()

road_polygons       <- as(roads$area,'Spatial')
roads$area          <- NULL 

print('about extracting data')

sfInit(parallel=TRUE, cpus=parallel:::detectCores()-1)
sfLibrary(raster)
sfLibrary(sp)
start_time          <- Sys.time()
extracted <- extract(london_raster, road_polygons, na.rm=T)

end_time <- Sys.time()
sfStop()
print(end_time - start_time)

print('extracted data, using sapply to put into the roads file')

# Check this next bit doesn't insert a list to the data framne. might need to unlist.
roads$canyon_height                                  <- suppressWarnings(lapply(extracted, max))
roads[roads$canyon_height == '-Inf' | is.na(roads$canyon_height),'canyon_height'] <- 5
print('done')

roads$geometry      <- st_transform(roads$geometry, 4326)

rm(london_raster)

end_time <- Sys.time()

print(end_time - start_time)

rm(start_time, end_time)

st_write(roads, 'roads_after_height_extraction.geojson', delete_dsn = T)
```

Where the road is 'half of one', double the width of the road

```{r double_width_for_half_roads}
roads[roads$half_of_one == 'Yes' & !is.na(roads$half_of_one),]$total_width <- roads[roads$half_of_one == 'Yes' & !is.na(roads$half_of_one),]$total_width * 2
```

Add another column for height to width ratio

```{r height and width ratio}
#roads$height_width_ratio <- roads$weighted_mean / roads$total_width
```

Add another height width ratio column for when using the non-zero height from the rasterz

```{r add_another_height_width_ratio}
#roads$height_width_ratio <- roads$canyon_height / roads$total_width
```

Now plot the non-zero height/width ratios.

```{r plot_the_non_zero_height_width_ratios}

ggplot(roads, aes(y = height_width_ratio, group=type, x=type, fill=type)) + geom_boxplot() + ggtitle('Height to width ratio by street type')

```

Need to classify the roads as canyons and directions for modelling. Here are some categories.
```{r road_classification}
road_types               <- read.csv('model_street_types.csv')

road_types$orientation   <- as.character(road_types$orientation)

road_types$width_cat     <- paste0(road_types$min_width, '-', road_types$max_width)
road_types$height_cat    <- paste0(road_types$min_height, '-', road_types$max_height)

road_types$jds_id        <- 1:nrow(road_types)

```

Now need to get the bearing of each road from start point to end point

```{r calculate bearing}

roads$bearing             <- line_bearing(roads)

```

Haven't been working on them, but think it's time time to add the residential roads back in and harmonise them with the main roads data.

```{r add_residential_roads}
residential           <- opq(bbox = c(min_x, min_y, max_x, max_y)) %>% add_osm_feature(key = 'highway', value='residential') %>% osmdata_sf()
residential           <- residential$osm_lines[,c('osm_id', 'oneway', 'lanes', 'geometry')]
residential$type      <- 'residential'

service               <- opq(bbox = c(min_x, min_y, max_x, max_y)) %>% add_osm_feature(key = 'highway', value='service') %>% osmdata_sf()
service               <- service$osm_lines[,c('osm_id', 'oneway', 'lanes', 'geometry')]
service$type          <- 'service'

unclassified           <- opq(bbox = c(min_x, min_y, max_x, max_y)) %>% add_osm_feature(key = 'highway', value='unclassified') %>% osmdata_sf()
unclassified           <- unclassified$osm_lines[,c('osm_id', 'oneway', 'lanes', 'geometry')]
unclassified$type      <- 'unclassified'

extra_roads            <- rbind(residential, service, unclassified)
rm(residential, service, unclassified)

extra_roads$osm_id      <- as.numeric(as.character(extra_roads$osm_id))

extra_roads$lanes       <- as.character(extra_roads$lanes)
extra_roads             <- extra_roads[!grepl(';', extra_roads$lanes),]
extra_roads$lanes       <- as.numeric(extra_roads$lanes)

extra_roads[is.na(extra_roads$lanes),'lanes'] <- 2

extra_roads$width                           <- extra_roads$lanes * 3.15
extra_roads$half_of_one                     <- as.factor(NA)
extra_roads$pavement_width                  <- as.numeric(6)
extra_roads$total_width                     <- as.numeric(extra_roads$width + extra_roads$pavement_width)
extra_roads$type                            <- as.factor(extra_roads$type)

#extra_roads$weighted_mean                  <- as.numeric(NA)
#extra_roads$cell_count                     <- as.integer(NA)
#extra_roads$zero_cells                     <- as.integer(NA)
extra_roads$bearing                         <- line_bearing(extra_roads)
#extra_roads$height_width_ratio             <- as.numeric(NA)
#extra_roads$max_height                     <- as.numeric(NA)
extra_roads$canyon_height                   <- as.numeric(5)
#extra_roads$height_width_ratio              <- as.numeric(NA)

col_order <- names(roads)

extra_roads <- extra_roads[,col_order]

roads <- rbind(roads, extra_roads)

rm(extra_roads)

```

Add orientations

```{r categorise_orientations}
roads$orientation_classification <- NA

roads[roads$bearing >= -22.5   & roads$bearing <= 22.5 & !is.na(roads$bearing),]$orientation_classification   <- 'NS'
roads[roads$bearing >= 157.5   & roads$bearing <= 180 & !is.na(roads$bearing), ]$orientation_classification   <- 'NS'
roads[roads$bearing <= -157.5  & roads$bearing >= -180 & !is.na(roads$bearing), ]$orientation_classification  <- 'NS'

roads[roads$bearing >= 67.5    & roads$bearing <= 112.5 & !is.na(roads$bearing),]$orientation_classification  <- 'EW'
roads[roads$bearing >= -112.5  & roads$bearing <= -67.5 & !is.na(roads$bearing),]$orientation_classification  <- 'EW'

roads[roads$bearing >= 112.5   & roads$bearing <= 157.5 & !is.na(roads$bearing),]$orientation_classification  <- 'SENW'
roads[roads$bearing <= -22.5   & roads$bearing >= -67.5 & !is.na(roads$bearing),]$orientation_classification  <- 'SENW'

roads[roads$bearing >= 22.5    & roads$bearing <= 67.5 & !is.na(roads$bearing),]$orientation_classification   <- 'SWNE'
roads[roads$bearing <= -112.5  & roads$bearing >= -157.5 & !is.na(roads$bearing),]$orientation_classification <- 'SWNE'

```

Categorise our heights and widths
```{r}
road_height_banding <- unique(c(road_types$min_height, road_types$max_height))
road_width_banding  <- unique(c(road_types$min_width, road_types$max_width))

roads$height_cat   <- cut(roads$canyon_height, road_height_banding)
roads$height_cat   <- as.character(roads$height_cat)

roads$width_cat   <- cut(roads$total_width, road_width_banding)
roads$width_cat   <- as.character(roads$width_cat)

roads$height_cat <- gsub("(", "",  roads$height_cat, fixed=T)
roads$height_cat <- gsub("]", "",  roads$height_cat, fixed=T)
roads$height_cat <- gsub(",", "-", roads$height_cat, fixed=T)

roads$width_cat <- gsub("(", "",  roads$width_cat, fixed=T)
roads$width_cat <- gsub("]", "",  roads$width_cat, fixed=T)
roads$width_cat <- gsub(",", "-", roads$width_cat, fixed=T)

roads[is.na(roads$height_cat) & roads$canyon_height == 0,'height_cat'] <- '0-0'
```

Add a road type type depending on orientation, height category, and width category

```{r join_street_canyon_types}

roads <- left_join(roads, road_types, by = c("orientation_classification" = "orientation", "height_cat" = "height_cat", "width_cat" = "width_cat"), suffix = c('x','y'))

rm(road_types, road_height_banding, road_width_banding)

```

Remove duplicates.

```{r remove_duplicates}

roads <- roads[!duplicated(roads$geometry),]

roads <- roads[!duplicated(roads$osm_id),]

```

```{r write_out_as_geojson}
st_write(roads, dsn='final_roads_london.gpkg', delete_dsn = T)
```

Kernels data for Sean
```{r}
london_kernels_in_use <- aggregate(data = roads, cbind(width, total_width, canyon_height) ~ jds_id, FUN=mean )

count                <- aggregate(data = roads, osm_id ~ jds_id, FUN=length )

road_types <- merge(road_types, london_kernels_in_use, by = 'jds_id', all.x=T)

road_types <- merge(road_types, count, by = 'jds_id', all.x=T)

names(road_types)[names(road_types) == 'osm_id'] <- 'count_of_roads'

road_types$canyon_height <- round(road_types$canyon_height,2)
road_types$width <- round(road_types$width,2)
road_types$total_width <- round(road_types$total_width,2)

write.csv(road_types, 'london_kernels.csv')
```

Now just for London look to join our new classifications to the existing roads using a link file from Andrew Beddows.

```{r join}
osm_to_dotref <- read.csv('osmToDotRef.csv')

osm_to_dotref <- osm_to_dotref[osm_to_dotref$osm_id != osm_to_dotref$DotRef & osm_to_dotref$DotRef_Lts == 'DotRef',]

osm_to_dotref <- osm_to_dotref[,c('osm_id', 'DotRef')]

roads         <- merge(roads, osm_to_dotref, by.x = 'osm_id', by.y = 'osm_id', all.x=T)

dotref_roads  <- roads[!is.na(roads$DotRef),c('DotRef', 'jds_id')] 

dotref_roads <- st_transform(dotref_roads, 27700)

dotref_roads$length <- as.numeric(st_length(dotref_roads$geometry))

dotref_roads$geometry <- NULL

rm(osm_to_dotref)

sum_of_weights_per_dotref <- aggregate(data=dotref_roads, length ~ DotRef+jds_id, FUN=sum)

sum_of_weights_per_dotref <- sum_of_weights_per_dotref %>% arrange(DotRef,desc(length))

sum_of_weights_per_dotref <- sum_of_weights_per_dotref %>% group_by(DotRef) %>% top_n(n=1)

# Now import Gregor file and update with new types

dotref_points <- read.csv('Oscar_DOT_Toid_10m.csv')

dotref_points <- merge(dotref_points, sum_of_weights_per_dotref, by.x = 'Dot_ref', by.y = 'DotRef', all.x=T)

write.csv(dotref_points, 'Oscar_DOT_Toid_10m_V2.csv', row.names = F)
rm(count, dotref_points, dotref_roads, extracted, london_raster, london_kernels_in_use, road_polygons, road_types, sum_of_weights_per_dotref, col_order, end_time, max_x, min_x, max_y, min_y, latlong, road_height_banding, road_width_banding, start_time)

#roads$max_height <- NULL
#roads$min_height <- NULL
#roads$max_width <- NULL
#roads$min_width <- NULL

roads <- merge(roads, sum_of_weights_per_dotref, by.x = 'DotRef', by.y = 'DotRef', all.x=T)
st_write(roads, 'roads_with_dotref_new_type.gpkg', delete_dsn = T)
```