= list("link","lime","spin") vendors
Data Gathering and Wrangling
Setup
The purpose of this script is to define functions which convert data collected by the python script running on mapping.capital, which are returned as nested JSON files, to geoJSON which can be used for analysis. This file is set up for a dataset of all scooter locations, collected every 15 minutes, from 0600 Eastern to 1000 Eastern on May 1, 2022. The second function defined in this document creates a long dataframe which is a timeseries of scooter locations over time.
This code block defines a method to convert the nested JSON returned by scooter vendors to a non-nested geoJSON which can be read by sf
.
<- function(vendor){
json2geoJSON <- list.files(path = paste("../data/monday/", vendor, "/morning/json/", sep = ""), pattern="*.json", full.names=TRUE, recursive=FALSE) #list files in directory
files lapply(files, function(x) {
<- fromJSON(txt = x) # load file
current_data <- as_tibble(current_data$data$bikes) #convert to tibble
current_tibble $timestamp_utc <- as_datetime(current_data$last_updated, tz = Sys.timezone()) #create timestamp column
current_tibble<- st_as_sf(current_tibble, coords = c("lon","lat"), crs = 4326) #coerce to sf
current_sf if (!file.exists(paste("../data/monday/", vendor, "/morning/geoJSON/",
$last_updated, "_", vendor, ".geoJSON"))){
current_datast_write(current_sf, dsn = paste("../data/monday/", vendor, "/morning/geoJSON/",
$last_updated, "_", vendor, ".geoJSON", sep = "")
current_dataappend = FALSE) #write as geoJSON
,
}
}) }
for (v in vendors){
json2geoJSON(v)
#loop through each of link, lime, spin }
This section of code defines a function which creates a timeseries for each scooter and adds a vendor column which can be grouped by in following scripts.
Note 2024-10-11: I cannot emphasize enough how much you SHOULD NOT USE GLOBAL ASSIGNMENT (<<-
) in a function. This was some of the first R code I’d ever written, and I’m leaving it for posterity. However, there are MUCH better ways to do this.
<- function(vendor){
load_timeseries <- list.files(path = paste("../data/monday/", vendor, "/morning/geoJSON/", sep = ""), pattern="*.geoJSON", full.names=TRUE, recursive=FALSE) #load files from geoJSON directory
files <<- vector(mode = "list") #empty list
list_df for(fn in files){
<- st_read(fn) #read each file in geoJSON dir
tmp which(fn == files)]] <<- tmp #append to list_df
list_df[[
}<<- bind_rows(list_df) #make long df
test_sf $vendor <<- vendor #create vendor column
test_sf<<- distinct(test_sf) #script adds multiples, need to debug. hacky solution here
test_sf }
load_timeseries("link")
<- test_sf
link_data if (!file.exists("../results/link_mon_am.gpkg")){
st_write(link_data, dsn = paste0("../results/link_mon_am.gpkg", sep = ""), append = FALSE)
}
load_timeseries("lime")
<- test_sf
lime_data if (!file.exists("../results/lime_mon_am.gpkg")){
st_write(lime_data, dsn = paste0("../results/lime_mon_am.gpkg", sep = ""), append = FALSE)
}
load_timeseries("spin")
<- test_sf
spin_data if (!file.exists("../results/spin_mon_am.gpkg")){
st_write(spin_data, dsn = paste0("../results/spin_mon_am.gpkg", sep = ""), append = FALSE)
}