## ftse100.R - Display employee productivity for FTSE-100 consitituents ## Copyright © 2010 Allan Engelhardt ## All Rights Reserved. ## Get the index constituents. ftse.100 <- read.csv(file = "http://uk.old.finance.yahoo.com/d/quotes.csv?s=@%5EFTSE&f=s&e=.csv", header = FALSE) names(ftse.100) <- c("symbol") data <- data.frame(symbol=NULL, employees=NULL, profit=NULL, sector=NULL) ## For each stock symbol, get employees, profit, and sector for (symbol in ftse.100$symbol) { profile.url <- paste("http://uk.finance.yahoo.com/q/pr?s=", symbol, sep="") con <- url(profile.url, open = "r") text <- readChar(con, 2^24) # enough bytes close(con) x <- sub('.*Number of employees:[[:space:]]*([[:digit:],]+).*', "\\1", text, ignore.case = TRUE) x <- gsub(',', '', x) empl <- tryCatch(as.integer(x), warning = function(x) NA) x <- sub('.*Net Profit.*?[[:space:]]*([+-]?[[:digit:],]+).*', '\\1', text) x <- gsub(',', '', x) profit <- tryCatch(as.integer(x)*1e6, warning = function(x) NA) sector <- sub('.*Sector:(.*?).*', '\\1', text) if (any(c(empl, profit) <= 0, is.na(c(empl, profit)))) { cat("Error parsing symbol", symbol, "see", profile.url, "\n") } else { data <- rbind(data, data.frame(symbol=symbol, employees=empl, profit=profit, sector=sector)) } Sys.sleep(1) } ## Save the data so we don't have to hit Yahoo all the time. save(data, file = "data.RData") ## Save plot to file: #png(filename="ftse100.png", width=800, height=800, pointsize=14, bg="white", res=100) opar <- par(cex.sub = sqrt(sqrt(2)), font.sub = 3, font.lab = 2) ## x and y coordinates of plot and plot limits x <- with(data, employees) y <- with(data, profit/employees) xlim <- c(10^floor(log10(min(x))), 10^ceiling(log10(max(x)))) ylim <- c(10^floor(log10(min(y))), 10^ceiling(log10(max(y)))) ## Set up to display different color and symbols plot_col <- 1 plot_pch <- 1 markers <- 21:25 pchs <- rep(markers, ceiling(length(levels(data$sector))/length(markers))) palette(rainbow(length(levels(data$sector)), start=3/6, end=6/6)) # Make empty plot: plot.new() plot(profit/employees ~ employees, data = data[FALSE, ], type = "p", pch = pchs[plot_pch], col = plot_col, log="xy", xaxp = c(xlim, 1), yaxp = c(ylim, 1), xlim = xlim, ylim = ylim, main = "Profit per employee (FTSE 100)", xlab = "Employees", ylab = "Profit per employees (GBP)") ## Plot each sector for (sector in levels(data$sector)) { plot.xy(xy.coords(with(data[data$sector == sector,], employees), with(data[data$sector == sector,], profit/employees), log = "xy", xlab = "", ylab = ""), type = "p", pch = pchs[plot_pch], col = plot_col, bg = plot_col) plot_pch <- plot_pch + 1 plot_col <- plot_col + 1 } legend(x = "bottomleft", legend = levels(data$sector), title = "Industry Sectors", col = palette(), pt.bg = palette(), pch = pchs, cex = 2/3, pt.cex = 1, ncol = 2) ## Fit a linear model to the log-log data: m <- lm(log10(y) ~ log10(x)) xl <- c(xlim[1]*5, xlim[2]/5) yl <- 10^predict(m, data.frame(x = xl)) lines(xl, yl, col = "darkred", lty = "dashed", lwd = 2) t <- sprintf("Power = %0.3g", m$coefficients[2]) text(xl[2], yl[2], t, adj = c(0.25, -1.5), col = "darkred", font = 2) ## All done. par(opar) dev.off()