Sunteți pe pagina 1din 8

## Programme Name: MBIE Principal Research Analyst.

r ## ## Objective: This programme is designed to read in Statistics New Zealand's ## GDP Production statistics and address questions requested for ## applicants for MBIE's Principal Research Analyst role within t he ## Sector Performance team. ## ## Part of this programme is derived from MBIE, the other compone nt ## is me. ## ## Author: James Hogan, 12 April 2013 ##; ## ## Load up some functionality ## library(MASS) library(lattice) library(ggplot2) library(plyr) library(stringr) library(tseries) library(reshape2) library(tseries) library(cluster) library(lubridate) library(splines) library(calibrate) ## ## Read in the GDP data ## gdp <- read.csv("C:\\Temp\\MBIE\\GDP P chain volume flat.csv",na.strings=".. ", check.names=TRUE,header=TRUE) gdp$Date = as.Date(str_c("1",as.numeric(str_sub(gdp$Quarter,-1))*3,str_sub(g dp$Quarter,1,4),sep = "/"),"%d/%m/%Y") str(gdp) ## ## Create shades of colors between blue and red for labels ## shades <- colorRampPalette(c("blue", "red"))(nrow(gdp)) ## ## Exploration of wholesale and retail trade - extract trend series ## wholesale.d <- decompose(ts(gdp$Wholesale.Trade, frequency=4, start=c(1987,2 )),type="multiplicative") wholesale <- wholesale.d$trend retail.d <- decompose(ts(gdp$Retail.Trade, frequency=4, start=c(1987,2)),typ e="multiplicative") retail <- retail.d$trend ## ## Exploratory plot 1 ##

plot(wholesale,retail, bty="l") ## ## Exploratory plot 2 ## plot(cbind(wholesale,retail), bty="l") ## ## Main plot ## png(filename="C:\\Temp\\MBIE\\xx.png",width = 680, height = 480, units = "px ") eqscplot(diff(wholesale), diff(retail), bty="l", cex=.5, col=c("grey60", sha des), xlab="Growth since last quarter in wholesale contribution to GDP, $m", ylab="Growth since last quarter in retail contribution to GDP, $m") grid(col="grey90", lty=1) ## ## Draw diagonal line showing when the two variables are equal ## abline(0,1) ## ## Draw line showing regression ## abline(lm(diff(retail) ~ diff(wholesale), na.action="na.omit"), lty=2) dev.off() ## ## END OF MBIE SUPPLIED PROGRAMME ## ## ## Write programme for the remaining selection tasks. ## ## ## Lets get the dotted line regression stats for the write up. ## summary(lm(diff(retail) ~ diff(wholesale), na.action="na.omit")) ## ## Estimate wholesale trade and retail trade post 2008 recovery ## GDP <- gdp[, !names(gdp) %in% c('Finance.Service.Charge','Quarter','Date')] SeasAdj_GDP <- decompose(ts(GDP, frequency=4, start=c(1987,2)),type="multipl icative") Trend_GDP <- as.data.frame(SeasAdj_GDP$trend) BasePeriodTrend_GDP <- Trend_GDP[76,] BasePeriodTrend_GDP <- BasePeriodTrend_GDP[rep(nrow(BasePeriodTrend_GDP), nr ow(Trend_GDP)), ] TrendGDP_Rebased <- (Trend_GDP / BasePeriodTrend_GDP)*1000 names(TrendGDP_Rebased) <- names(GDP) TrendGDP_Rebased <- cbind(TrendGDP_Rebased, gdp$Quarter, gdp$Date) names(TrendGDP_Rebased)[34:35] <- c("Quarter", "Date") str(TrendGDP_Rebased)

## ## ##

Thats the series index and rebased to 2008 Qrt 1 test <- data.frame(names(TrendGDP_Rebased),time = 1:35 ) test

TrendGDP_Rebased <- reshape(TrendGDP_Rebased, direction = "long", varying=li st(0:33), idvar=c("Quarter", "Date"), v.names=c("Trend_Value_Added")) rownames(TrendGDP_Rebased) <- NULL TrendGDP_Rebased <- merge(TrendGDP_Rebased, test, by=c("time")) head(TrendGDP_Rebased) names(TrendGDP_Rebased)[5]="Industry" For_GGPlot <- as.data.frame(subset(TrendGDP_Rebased, year(Date) > 2004)) For_GGPlot[0:100,] ## ## ## Plot Retail and Wholesale Trade Growth Rates Only

For_GGPlot <- subset(TrendGDP_Rebased, Industry %in% c("Retail.Trade", "Wh olesale.Trade")) For_GGPlot <- subset(For_GGPlot, year(Date) > 2005) ggplot(For_GGPlot, aes(x=Date, y=Trend_Value_Added, group = Industry, colo ur = Industry)) + geom_line(aes(group=Industry)) + geom_point() + theme(legend.background = element_rect(colour = "black"),legend.position = "none") + geom_text(data = For_GGPlot[For_GGPlot$Date == "2012-06-01", ], aes(lab el = Industry), hjust = 0, vjust = 1, size = 4) + ggtitle("Fig 2: Retail and Wholesale Trade Value Added\n(Trend GDP Expre ssed as an Index)\n") + xlab("Date") + ylab("Index of Value Added\n(Base: 2006 Qrt 1)") ## ## Size Height x Width (mm) Height x Width (in) ## A3 420 x 297 mm 16.5 x 11.7 in ## A4 297 x 210 mm 11.7 x 8.3 in ## ggsave("C:\\Temp\\MBIE\\Wholesale_Retail_Only.png",width = 11.7, height = 8.3, dpi=600) ## ## ## Do a Larger Expanded Analysis of GDP Change

For_GGPlot <- subset(TrendGDP_Rebased, !Industry %in% c("Agriculture","Tot al.All.Industries", "Unallocated","Owner.Occupied.Property.Operation..National.A ccounts.Only.")) For_GGPlot <- subset(For_GGPlot, year(Date) > 2005) ggplot(For_GGPlot, aes(x=Date, y=Trend_Value_Added, group = Industry, colo ur = Industry)) + geom_line(aes(group=Industry)) + geom_point() + theme(legend.background = element_rect(colour = "black"),legend.position = "none") + geom_text(data = For_GGPlot[For_GGPlot$Date == "2012-06-01", ], aes(lab el = Industry), hjust = 0, vjust = 1, size = 2) +

ggtitle("Fig 5: GDP by Detailed Industry\n(Trend GDP Expressed as an Ind ex)\n") + xlab("Date") + ylab("Index of Value Added\n(Base: 2006 Qrt 1)") ggsave("C:\\Temp\\MBIE\\All_Industry_Change-Linegraph.png",width = 11.7, h eight = 8.3, dpi=600) ## ## ## Check out the Contribution to GDP

OneDigit <- c("Agriculture, Forestry and Fishing","Agriculture, Forestry a nd Fishing","Agriculture, Forestry and Fishing","Mining", "Manufacturing","Manufacturing","Manufacturing","Manufacturi ng","Manufacturing","Manufacturing","Manufacturing","Manufacturing","Manufacturi ng", "Electricity, Gas, Water and Waste Services","Construction", "Wholesale Trade","Retail Trade","Accommodation and Food Services","Transport, P ostal and Warehousing", "Information Media and Telecommunications","Financial and In surance Services","Rental, Hiring and Real Estate Services","Owner Occupied Dwel lings", "Professional, Scientific and Technical Services","Administr ative and Support Services","Public Administration and Safety","Public Administr ation and Safety", "Education and Training","Health Care and Social Assistance" ,"Arts and Recreation Services","Other Services", "Unallocated",NA) GDP <- gdp[, !names(gdp) %in% c('Finance.Service.Charge')] ## ## ## ## ## ## ## (1) (2) (3) (4) Aggregate the Industries up into one digit Industry Levels Annualise the GDP measures to year ending December Derive Differences, to identify GDP contribution Graph contribution by industry and December year

(1) Aggregate the Industries up into one digit Industry Levels OneDigitGDP <- reshape(GDP, direction = "long", varying=list(2:33) , idvar=c("Quarter", "Date"), v.names=c("Value_Added")) rownames(OneDigitGDP) <- NULL ## )) OneDigitGDP$Financial_Year <- ifelse(month(OneDigitGDP$Date) > 3, year(OneDigitGDP$Date)+1, year(OneDigitGDP$Date)) OneDigitGDP$Year <- year(OneDigitGDP$Date) Aggregated_GDP_by_Industry <- aggregate(OneDigitGDP$Value_Added, l ist(Year=OneDigitGDP$Financial_Year , Industry=OneDigitGDP$OneDigit), sum) Aggregated_GDP <- aggregate(OneDigitGDP$Value_Added, l ist(Year=OneDigitGDP$Financial_Year), sum) (3) Derive Differences, to identify GDP contribution Contribution <- ts(reshape(Aggregated_GDP_by_Industry, timevar="In dustry", idvar="Year", direction = "wide"),start=c(1987)) Contribution <- data.frame(diff(Contribution)) name <- names(Contribution) Contribution <- reshape(Contribution, varying=list(2:22), directio n = "long", v.names=c("GDP_Contribution")) ## (2) Annualise the GDP measures to year ending December test <- data.frame(names(GDP[,2:34]), OneDigit, time = 1:33 ) OneDigitGDP <- as.data.frame(merge(OneDigitGDP, test, by=c("time")

##

(3.1) Put all of the industry and the year labels back on nam <- data.frame(name ,time = 0:21) Contribution <- merge(Contribution, nam, by=c("time")) nam <- data.frame(id = 1:25 ,year = 1989:2013) Contribution <- merge(Contribution, nam, by=c("id")) Contribution <- subset(Contribution, year < 2013) Contribution <- subset(Contribution, year > 1999)

(4) Graph contribution by industry and December year ggplot(Contribution, aes(x=year, y=GDP_Contribution, group = name, colour = name)) + geom_line(aes(group=name)) + geom_point() + geom_text(data = Contribution[Contribution$year == 2012, ], aes (label = name), hjust = 0, vjust = 1, size = 2) + theme(legend.background = element_rect(colour = "black"),legend. position = "none") + ggtitle("Contribution to GDP\n") + xlab("Date") + ylab("$(Mill)") ## ## Set up the labeller functionally for the graphing ## mf_labeller <- function(var, value){ value <- as.character(value) if (var=="name") { value[value=="x.Agriculture..Forestry.and.Fishing"] <- "Agri -\nculture\nForestry\nFishing" value[value=="x.Agriculture"] <- "Agriculture" value[value=="x.Forestry.and.Logging"] <- "Forestry" value[value=="x.Fishing..Aquaculture.and.Agriculture..Forest ry.and.Fishing.Support.Services"] <- "Fishing" value[value=="x.Mining"] <- "Mining" value[value=="x.Food..Beverage.and.Tobacco.Product.Manufactu ring"] <- "Food\nManufacturing" value[value=="x.Textile..Leather..Clothing.and.Footwear.Manu facturing"] <- "Textile\nManufacturing" value[value=="x.Wood.and.Paper.Products.Manufacturing"] <- " Wood\nManufacturing" value[value=="x.Printing"] <- "Printing" value[value=="x.Petroleum..Chemical..Polymer.and.Rubber.Prod uct.Manufacturing"] <- "Petrol\nManufacturing" value[value=="x.Non.Metallic.Mineral.Product.Manufacturing"] <- "Mineral\nManufacturing" value[value=="x.Metal.Product.Manufacturing"] <- "Metal\nMan ufacturing" value[value=="x.Transport.Equipment..Machinery.and.Equipment .Manufacturing"] <- "Trans\nMach & Equip\nManufacturing" value[value=="x.Furniture.and.Other.Manufacturing"] <- "Furn iture\nManufacturing" value[value=="x.Electricity..Gas..Water.and.Waste.Services"] <- "Utilities" value[value=="x.Construction"] <- "Cons-\ntruction" value[value=="x.Manufacturing"] <- "Manu\nfacturing" value[value=="x.Wholesale.Trade"] <- "Whole\nsale\nTrade" value[value=="x.Retail.Trade"] <- "Retail\nTrade" value[value=="x.Accommodation.and.Food.Services"] <- "Accom\

##

n&\nFood" value[value=="x.Transport..Postal.and.Warehousing"] <- "Tran s\nPost\nWare" value[value=="x.Information.Media.and.Telecommunications"] < - "ICT" value[value=="x.Financial.and.Insurance.Services"] <- "Finan cial\n&\nInsur-\nance" value[value=="x.Rental..Hiring.and.Real.Estate.Services"] <"Rental\n&\nReal\nEstate" value[value=="x.Owner.Occupied.Dwellings"] <- "Owner\nOccupi ed\nDwellings" value[value=="x.Professional..Scientific.and.Technical.Servi ces"] <- "Prof\nServices" value[value=="x.Administrative.and.Support.Services"] <- "Ad min\n&\nSupport" value[value=="x.Public.Administration.and Safety"] <- "Publi c\nAdmin\nSafety" value[value=="x.Local.Government.Administration"] <- "Local\ nGovt" value[value=="x.Central.Government.Administration..Defence.a nd.Public.Safety"] <- "Central\nGovt" value[value=="x.Education.and.Training"] <- "Edu-\ncation" value[value=="x.Health.Care.and.Social.Assistance"] <- "Heal th\n&\nSocial" value[value=="x.Arts.and.Recreation.Services"] <- "Arts\n&\n Rec" value[value=="x.Other.Services"] <- "Other\nServices" value[value=="x.Unallocated"] <- "Un\nallo\ncated" value[value=="x.Total.All.Industries"] <- "Total" } return(value) } ## ## ## Contribution to GDP, by Industry

plot <- qplot(year, GDP_Contribution, data = Contribution, color= name, geom = c("line"), main = "Fig 3: Contribution to Annual GDP by Industr y\n(December Annual Years)\n", ylab = "$(Mill)", xlab = "December Annual Years", facets = . ~ name ) plot <- plot + facet_grid(. ~ name, labeller=mf_labeller, scales=" free", space="free") plot <- plot + geom_bar(stat = "identity") plot <- plot + theme(legend.background = element_rect(colour = "bl ack"),legend.position = "none") plot <- plot + theme( plot.title = element_text(size = 15), axis.text.x = element_text(angle=90, vjust=0.5 , size=7)) plot ggsave("C:\\Temp\\MBIE\\All_Industry_Change-Bargraph2.png",width = 1 1.7, height = 8.3, dpi=600) ## ## Contribution to GDP: Take a deeper look at Manufacturing and ## Agriculture - they were the source of change

## GDP <- gdp[, !names(gdp) %in% c('Finance.Service.Charge')] ## ## (1) Aggregate the Industries up into one digit Industry Le vels ## ## ## ## ## y Levels OneDigitGDP <- reshape(GDP, direction = "long", varying=li st(2:33), idvar=c("Quarter", "Date"), v.names=c("Value_Added")) rownames(OneDigitGDP) <- NULL ## (2) Annualise the GDP measures to year ending December . Subset so we've only got Manufacturing and Agriculture test <- data.frame(names(GDP[,2:34]), OneDigit, time = 1:3 3 ) OneDigitGDP <- as.data.frame(merge(OneDigitGDP, test, by=c ("time"))) names(OneDigitGDP)[6] <- "Industry" OneDigitGDP$Financial_Year <- ifelse(month(OneDigitGDP$Dat e) > 3, year(OneDigitGDP$Date)+1, year(OneDigitGDP$Date)) OneDigitGDP$Year <- year(OneDigitGDP$Date) OneDigitGDP <- subset(OneDigitGDP, OneDigit %in% c("Agricu lture, Forestry and Fishing","Manufacturing")) Aggregated_GDP_by_Industry <- aggregate(OneDigitGDP$Value_ Added, list(Year=OneDigitGDP$Financial_Year , Industry=OneDigitGDP$Industry, One Digit=OneDigitGDP$OneDigit), sum) (3) Derive Differences, to identify GDP contribution Contribution <- reshape(Aggregated_GDP_by_Industry, timeva r="Industry", idvar=c("Year","OneDigit"), direction = "wide") ## ## Agriculture ## Contribution_Agg <- subset(Contribution, !OneDigit == 'M anufacturing') Contribution_Agg <- ts(Contribution_Agg,start=c(1987)) Contribution_Agg <- data.frame(diff(Contribution_Agg)) name <- names(Contribution_Agg) Contribution_Agg <- reshape(Contribution_Agg, varying=li st(3:14), direction = "long", v.names=c("GDP_Contribution")) nam <- data.frame(name ,time = -1:12) Contribution_Agg <- merge(Contribution_Agg, nam, by=c("t ime")) nam <- data.frame(id = 1:25 ,year = 1989:2013) Contribution_Agg <- merge(Contribution_Agg, nam, by=c("i d")) ## ## Manufacturing ## Contribution_Man <- subset(Contribution, OneDigit == 'M anufacturing') Contribution_Man <- ts(Contribution_Man,start=c(1987)) Contribution_Man <- data.frame(diff(Contribution_Man)) name <- names(Contribution_Man) ## (2) Annualise the GDP measures to year ending December (3) Derive Differences, to identify GDP contribution (4) Graph contribution by industry and December year (1) Aggregate the Industries up into one digit Industr

Contribution_Man <- reshape(Contribution_Man, varying=li st(3:14), direction = "long", v.names=c("GDP_Contribution")) nam <- data.frame(name ,time = -1:12) Contribution_Man <- merge(Contribution_Man, nam, by=c("t ime")) nam <- data.frame(id = 1:25 ,year = 1989:2013) Contribution_Man <- merge(Contribution_Man, nam, by=c("i d")) ## ## Put them back together ## testa <- subset(Contribution_Agg, !GDP_Contribution == 'NA') testb <- subset(Contribution_Man, !GDP_Contribution == 'NA') testa$Source = "Agriculture" testb$Source = "Manufacturing" Contribution <- rbind(testa,testb) Contribution <- subset(Contribution, year < 2013) Contribution <- subset(Contribution, year > 1999) ## , color= name, facets = . ~ name | Source, geom = c("line"), main = "Fig 4: Contribution to GDP: Agricult ure and Manufacturing Sub-industries\n(December Annual Years)\n", ylab = "$(Mill)", xlab = "December Annual Years" ) plot <- plot + facet_grid(. ~ name, labeller=mf_labeller, scales="free", space="free") plot <- plot + geom_bar(stat = "identity") plot <- plot + theme(legend.background = element_rect(colo ur = "black"),legend.position = "none") plot <- plot + theme( plot.title = element_text(size = 15 ), axis.text.x = element_text(angle=90, vjust=0.5, size=10)) plot ggsave("C:\\Temp\\MBIE\\Agri_Manu_Detail.png",width = 11.7, he ight = 8.3, dpi=600) ## ## END OF PROGRAMME ## (4) Graph contribution by industry and December year plot <- qplot(year, GDP_Contribution, data = Contribution

S-ar putea să vă placă și