Login details for the PostgreSQL server are stored in a file that isn’t sync’d to Github
credentials <- source('../postgres_credentials.R')
drv = dbDriver("PostgreSQL")
con = dbConnect(drv, dbname="LTDS_2015", user=username, password=password, host="10.0.4.240")
Get LTDS data from my database for Waltham Forest, showing person, year, mode, duration and expansion factor
ltds_data <- activity_data <- dbGetQuery(con, "
WITH people AS (
SELECT ppid as person,
CASE
WHEN page < 10 THEN '0-10'
WHEN page BETWEEN 10 AND 19 THEN '10-20'
WHEN page BETWEEN 20 AND 29 THEN '20-30'
WHEN page BETWEEN 30 AND 39 THEN '30-40'
WHEN page BETWEEN 40 AND 49 THEN '40-50'
WHEN page BETWEEN 50 AND 59 THEN '50-60'
WHEN page BETWEEN 60 AND 69 THEN '60-70'
WHEN page BETWEEN 70 AND 79 THEN '70-80'
ELSE '>80'
END AS age_category,
pyearid as year,
ppiwt as expansion
FROM person
WHERE phaboro = 33),
cycling AS (
SELECT substring(spid, 1, length(spid)-3)::text as spid,
SUM(sdurn::numeric) as cycling_duration
FROM stage
WHERE smodec = '2'
GROUP BY spid
),
walking AS (
SELECT substring(spid, 1, length(spid)-3)::text as spid,
SUM(sdurn::numeric) as walking_duration
FROM stage
WHERE smodec = '1'
GROUP BY spid
)
SELECT people.person,
people.age_category,
people.year,
people.expansion,
CASE WHEN walking.walking_duration IS NULL THEN 0 ELSE walking.walking_duration END AS walking_duration,
CASE WHEN cycling.cycling_duration IS NULL THEN 0 ELSE cycling.cycling_duration END AS cycling_duration
FROM people
LEFT JOIN cycling ON people.person::text = cycling.spid
LEFT JOIN walking ON people.person::text = walking.spid")
head(ltds_data)
## person age_category year expansion walking_duration cycling_duration
## 1 1048003101 30-40 10 823.0119 0 0
## 2 1048003102 20-30 10 990.3504 0 0
## 3 1048003103 60-70 10 721.7584 0 0
## 4 1048003104 0-10 10 566.3039 0 0
## 5 1048008101 60-70 10 492.9294 29 0
## 6 1048011101 60-70 10 548.8974 50 0
Now expand the people to be representative of population of Waltham Forest
## Expand the results using expansion factors.
ltds_data <- data.frame(
age_category = rep(ltds_data$age_category, ltds_data$expansion),
year = rep(ltds_data$year, ltds_data$expansion),
walking_duration = rep(ltds_data$walking_duration, ltds_data$expansion),
cycling_duration = rep(ltds_data$cycling_duration, ltds_data$expansion),
stringsAsFactors=FALSE
)
Aggregate the data by year
ltds_data <- aggregate(cbind(walking_duration,cycling_duration) ~ age_category+year, data = ltds_data, FUN=mean)
ggplot(ltds_data, aes(x = year, y = walking_duration, colour = age_category, group = age_category)) +
geom_line()
ggplot(ltds_data, aes(x = year, y = walking_duration, colour = age_category, group = age_category)) +
geom_smooth(method = 'lm')
ggplot(ltds_data, aes(x = year, y = cycling_duration, colour = age_category, group = age_category)) +
geom_line()
ggplot(ltds_data, aes(x = year, y = cycling_duration, colour = age_category, group = age_category)) +
geom_smooth(method = 'lm')