Method

Login details for the PostgreSQL server are stored in a file that isn’t sync’d to Github

credentials <- source('../postgres_credentials.R')
drv = dbDriver("PostgreSQL")
con = dbConnect(drv, dbname="LTDS_2015", user=username, password=password, host="10.0.4.240")

Get LTDS data from my database for Waltham Forest, showing person, year, mode, duration and expansion factor

ltds_data     <- activity_data <- dbGetQuery(con, "
WITH people AS (
        SELECT      ppid as person,
                    CASE    
                    WHEN page < 10 THEN '0-10'
                    WHEN page BETWEEN 10 AND 19 THEN '10-20'
                    WHEN page BETWEEN 20 AND 29 THEN '20-30'
                    WHEN page BETWEEN 30 AND 39 THEN '30-40'
                    WHEN page BETWEEN 40 AND 49 THEN '40-50'
                    WHEN page BETWEEN 50 AND 59 THEN '50-60'
                    WHEN page BETWEEN 60 AND 69 THEN '60-70'
                    WHEN page BETWEEN 70 AND 79 THEN '70-80'
                    ELSE '>80'
                    END AS  age_category,
                    pyearid as year,
                    ppiwt as expansion
        FROM        person
        WHERE       phaboro = 33),
   cycling AS (
        SELECT        substring(spid, 1, length(spid)-3)::text as spid,
                    SUM(sdurn::numeric) as cycling_duration
        FROM          stage
        WHERE         smodec = '2'        
        GROUP BY      spid
        ),
   walking AS (
        SELECT        substring(spid, 1, length(spid)-3)::text as spid,
        SUM(sdurn::numeric) as walking_duration
        FROM          stage
        WHERE         smodec = '1'
        GROUP BY      spid
)
SELECT     people.person,
           people.age_category,
           people.year,
           people.expansion,
           CASE WHEN walking.walking_duration IS NULL THEN 0 ELSE walking.walking_duration END AS walking_duration,
           CASE WHEN cycling.cycling_duration IS NULL THEN 0 ELSE cycling.cycling_duration END AS cycling_duration
FROM       people
LEFT JOIN  cycling ON people.person::text = cycling.spid
LEFT JOIN  walking ON people.person::text = walking.spid")

head(ltds_data)
##       person age_category year expansion walking_duration cycling_duration
## 1 1048003101        30-40   10  823.0119                0                0
## 2 1048003102        20-30   10  990.3504                0                0
## 3 1048003103        60-70   10  721.7584                0                0
## 4 1048003104         0-10   10  566.3039                0                0
## 5 1048008101        60-70   10  492.9294               29                0
## 6 1048011101        60-70   10  548.8974               50                0

Now expand the people to be representative of population of Waltham Forest

## Expand the results using expansion factors.
ltds_data <- data.frame(
  age_category        =   rep(ltds_data$age_category,     ltds_data$expansion),
  year                =   rep(ltds_data$year,             ltds_data$expansion),
  walking_duration    =   rep(ltds_data$walking_duration, ltds_data$expansion),
  cycling_duration    =   rep(ltds_data$cycling_duration, ltds_data$expansion),
  stringsAsFactors=FALSE
)    

Aggregate the data by year

ltds_data <- aggregate(cbind(walking_duration,cycling_duration) ~ age_category+year, data = ltds_data, FUN=mean)