POLS/CSSS 503, University of Washington, Spring 2015
You will first need to download tile and simcf from Chris Adolph’s website.
#install.packages("http://faculty.washington.edu/cadolph/software/simcf_0.2.13.tar.gz")
#install.packages("http://faculty.washington.edu/cadolph/software/tile_0.4.10.tar.gz")
library("devtools")
install_github("chrisadolph/tile-simcf", subdir = "tile")
install_github("chrisadolph/tile-simcf", subdir = "simcf")
Also install RColorBrewer and WhatIf
library(MASS)
library(RColorBrewer)
library(WhatIf)
library(dplyr)
library(ggplot2)
library(tile)
library(simcf)
I am also going set my defaults for displaying scientific notation to be more tolerant of many decimal places
options(scipen=10)
simcf
for linear modelssimcf
provides a framework for defining scenarios for prediction (estimated outcomes), it’s short for “Simulate Counter-Factuals”
Here is a brief introduction to a number of useful commands in simcf
help(package="simcf")
?cfMake #make counterfactual scenario... our equivalent of the "newdata" option in predict()
?linearsimev #simulate expected values for a linear model
?linearsimfd #simulate first differences for a linear model
?extractdata #extract data used in a formula from a larger dataframe
?influencePlot #influence plot for outliers
?lagpanel #lag panel data by one or multiple years
?makeFEdummies #make dummy variables out of a factor for fixed effects models
?logBound
?logitBound
We’re going to go through an example of interpreting a regression with the help of simcf
, using the familiar Rossoil data.
rossdata_url <- "http://staff.washington.edu/csjohns/503/rossoildata.csv"
rossdata_raw <- read.csv(rossdata_url, stringsAsFactors = FALSE)
rossdata <- rossdata_raw %>%
select(cty_name, year, regime1, oil, GDPcap, oecd, islam) %>%
na.omit()
Fist we’ll define the formula object and run the regression:
model_form1 <- regime1 ~ GDPcap + oil + oecd
model1<-lm(model_form1, data = rossdata)
summary(model1)
##
## Call:
## lm(formula = model_form1, data = rossdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.610 -2.757 -0.090 2.071 7.645
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.53967360 0.08079026 56.19 <2e-16 ***
## GDPcap 0.00018684 0.00001426 13.11 <2e-16 ***
## oil -0.09263612 0.00503448 -18.40 <2e-16 ***
## oecd 3.21374857 0.17605195 18.25 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.994 on 2567 degrees of freedom
## Multiple R-squared: 0.4066, Adjusted R-squared: 0.4059
## F-statistic: 586.3 on 3 and 2567 DF, p-value: < 2.2e-16
Next we’ll extract the coefficients and vcov matrix, draw 1000 of each parameter:
pe <- model1$coefficients
vc <- vcov(model1) #note if you wanted to use heteroskedastic standard errors use: vc <- hccm(model1)
sims <- 10000
simbetas <- mvrnorm(sims,pe,vc)
Here are the steps we’ll follow to get expected values of democracy:
simcf
default is mean values for all variablesvalueseq_gdp <- seq(min(rossdata$GDPcap), max(rossdata$GDPcap), length.out=40)
We choose a value range based on the observed range of the variable to avoid extrapolating beyond our data
cfMake()
initializes the set of counterfactual scenarios to be predicted with simcf.In this case, we set all variables in model to their mean, and will later change the ones we want to assign specific values to Note that we initialize as many scenarios as we have hypothetical observations of GDP:
xhyp1_no_oecd <- cfMake(model_form1, rossdata, nscen=length(valueseq_gdp), f=mean)
xhyp1_no_oecd
for(i in 1:length(valueseq_gdp)){
xhyp1_no_oecd <- cfChange(xhyp1_no_oecd, "GDPcap", valueseq_gdp[i], scen=i)
xhyp1_no_oecd <- cfChange(xhyp1_no_oecd, "oecd", 0, scen=i)
}
Look at what changed:
xhyp1_no_oecd
Note two parts of this object,$x
and $xpre
. To calculate expected values, we only need to worry about values in the x
matrix; $pre
is ignored unless we’re doing first differences (expected change in y resulting from change in x), in which case $xpre
will be the initial value of x that will be the starting point for the difference.
This is what took a lengthy loop when we did it “by hand”, looping through each of the 1000 draws from the multivariate normal distribution of the betas.
yhyp_no_oecd <- linearsimev(xhyp1_no_oecd, simbetas)
yhyp_no_oecd
cf1_no_oecd <- cbind(xhyp1_no_oecd$x, yhyp_no_oecd)
ggplot(cf1_no_oecd, aes(x = GDPcap)) +
geom_line(aes(y = pe)) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 1/3) +
labs(x = "GDP per capita",
y = "Expected Democracy Level",
title = "Expected Regime Type Regressed on GDP per Capita")
xhyp1_oecd <- cfMake(model_form1, rossdata, nscen = length(valueseq_gdp), f = mean)
xhyp1_oecd
for(i in 1:length(valueseq_gdp)) {
xhyp1_oecd <- cfChange(xhyp1_oecd, "GDPcap", valueseq_gdp[i], scen=i)
xhyp1_oecd <- cfChange(xhyp1_oecd, "oecd", 1, scen=i)
}
Look at what changed:
xhyp1_oecd
yhyp_oecd <- linearsimev(xhyp1_oecd, simbetas)
yhyp_oecd
At this point, we could plot the way we have before:
cf1_oecd <- cbind(xhyp1_oecd$x, yhyp_oecd)
cf1_all <- merge(cf1_oecd, cf1_no_oecd, all = TRUE)
ggplot() +
geom_line(data = cf1_no_oecd, aes(x = GDPcap, y = pe), color = "blue") +
geom_ribbon(data = cf1_no_oecd, aes(x = GDPcap, ymin = lower, ymax = upper), alpha = 1/3, color = "blue", fill = "blue") +
geom_line(data = cf1_oecd, aes(x = GDPcap,y = pe), color = "red") +
geom_ribbon(data = cf1_oecd, aes(x = GDPcap, ymin = lower, ymax = upper), alpha = 1/3, color = "red", fill = "red") +
labs(x = "GDP per capita",
y = "Expected Democracy Level",
title = "Expected Regime Type Regressed on GDP per Capita")
#Another way, using merged scenarios:
ggplot(cf1_all, aes(x = GDPcap, colour = factor(oecd), fill = factor(oecd))) +
geom_line(aes(y = pe)) +
geom_ribbon(aes(x = GDPcap, ymin = lower, ymax = upper), alpha = 1/3) +
scale_colour_discrete("oecd") +
scale_fill_discrete("oecd") +
labs(x = "GDP per capita",
y = "Expected Democracy Level",
title = "Expected Regime Type Regressed on GDP per Capita")
Here’s another similar example, illustrating the use of logBound()
:
model_form2 <- regime1 ~ logBound(GDPcap) + oil + oecd
model2 <- lm(model_form2, data = rossdata)
summary(model2)
pe <- model2$coefficients
vc <- vcov(model2) #note if you wanted to use heteroskedastic standard errors use: vc <- hccm(model1)
sims <- 10000
simbetas <- mvrnorm(sims, pe, vc)
Set up counterfactual scenarios, creating hypothetical values for OECD and non-OECD countries:
xhyp2_no_oecd <- xhyp2_oecd <- cfMake(model_form2, rossdata, nscen = length(valueseq_gdp), f=mean) #using the same valuesequence defined above
#now change hypothetical value for GDP per capita in each scen, for a non-oecd country:
for(i in 1:length(valueseq_gdp)) {
xhyp2_no_oecd <- cfChange(xhyp2_no_oecd, "GDPcap", valueseq_gdp[i], scen=i)
xhyp2_no_oecd <- cfChange(xhyp2_no_oecd, "oecd", 0, scen=i)
}
xhyp2_no_oecd
Now make expected y’s
yhyp2_no_oecd <- linearsimev(xhyp2_no_oecd, simbetas)
yhyp2_no_oecd
Now change hypothetical value for GDP per capita in each scen, but for an oecd country:
for(i in 1:length(valueseq_gdp)){
xhyp2_oecd <- cfChange(xhyp2_oecd, "GDPcap", valueseq_gdp[i], scen=i)
xhyp2_oecd <- cfChange(xhyp2_oecd, "oecd", 1, scen=i)
}
#look at what changed:
xhyp2_oecd
Now make expected y’s:
yhyp2_oecd <- linearsimev(xhyp2_oecd, simbetas)
yhyp2_oecd
Plot the two together:
cf2_all <- merge(cbind(xhyp2_no_oecd$x, yhyp2_no_oecd), cbind(xhyp2_oecd$x, yhyp2_oecd), all = TRUE)
ggplot(cf2_all, aes(x = GDPcap, colour = factor(oecd), fill = factor(oecd))) +
geom_line(aes(y = pe)) +
geom_ribbon(aes(x = GDPcap, ymin = lower, ymax = upper), alpha = 1/3) +
scale_colour_discrete("oecd") +
scale_fill_discrete("oecd") +
labs(x = "GDP per capita",
y = "Expected Democracy Level",
title = "Expected Regime Type Regressed on GDP per Capita")
Challenge: What can you say about the model (which had a better fit than model 1) now? What substantive claims are made by selecting this model over Model 1?
Set up the basic model (without logit bounding islam):
model_form3 <- regime1 ~ log(GDPcap) + oil + oecd + islam
model3 <- lm(model_form3, data = rossdata)
summary(model3)
For purposes of illustration, convert islam into proper ratio 0-1:
rossdata$islam <- rossdata$islam/100
See what logitBound is doing:
logitBound(rossdata$islam)
Setup the logit bounded model:
model_form3b <- regime1 ~ log(GDPcap) + oil + oecd + logitBound(islam)
model3b<-lm(model_form3b, data = rossdata)
summary(model3b)
Extract the coefficients and vcov matrix, draw 1000 of each parameter:
pe <- model3b$coefficients
vc <- vcov(model3b) #note if you wanted to use heteroskedastic standard errors use: vc <- hccm(model1)
sims <- 10000
simbetas <- mvrnorm(sims,pe,vc)
Set up Islam value sequence
valueseq_islam <- seq(min(rossdata$islam), max(rossdata$islam), length.out=40)
Predicted values for rich country # initialize scenarios:
xhyp3_rich <- xhyp3_poor <- cfMake(model_form3b, rossdata, nscen=length(valueseq_islam), f=mean)
for(i in 1:length(valueseq_islam)) {
xhyp3_rich <- cfChange(xhyp3_rich, "islam", valueseq_islam[i], scen=i)
xhyp3_rich <- cfChange(xhyp3_rich, "GDPcap", x=quantile(rossdata$GDPcap, p=.9), scen=i)
}
for(i in 1:length(valueseq_islam)) {
xhyp3_poor <- cfChange(xhyp3_poor, "islam", valueseq_islam[i], scen=i)
xhyp3_poor <- cfChange(xhyp3_poor, "GDPcap", x=quantile(rossdata$GDPcap, p=.1), scen=i)
}
#Check them out:
xhyp3_rich$x
xhyp3_poor$x
Now make expected y’s:
# rich countries:
yhyp3_rich <- linearsimev(xhyp3_rich, simbetas)
# poor countries:
yhyp3_poor <- linearsimev(xhyp3_poor, simbetas)
Let’s plot these now, similarly to before:
cf3_all <- merge(cbind(xhyp3_rich$x, yhyp3_rich), cbind(xhyp3_poor$x, yhyp3_poor), all = TRUE)
ggplot(cf3_all, aes(x = islam, y = pe, colour = factor(GDPcap), fill = factor(GDPcap))) +
geom_line(aes(y = pe)) +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 1/6) +
scale_colour_discrete("GDPcap") +
scale_fill_discrete("GDPcap") +
labs(x = "Percent Islam",
y = "Expected Democracy Level",
title = "Expected Regime Type Regressed on Islam, Rich and Poor Countries")
This is not actually very interesting, but is a teachable example.
Using data from homework 4, build a model and use simcf to generate illustrative expected values and confidence intervals. Try using log or logit transformations
To load homework data
dataHW <- read.csv("http://faculty.washington.edu/cadolph/503/ross95.csv", header=TRUE, stringsAsFactors=FALSE)
tile
!Tile works by first specifying a series of ‘traces’ that can be lines, ropeladders, legend, text, etc. Once all traces are specified, they can then all be plotted simultaneously in the tile()
command (it’s logically kind of the reverse order to ggplot
). Tile is designed to easily and attractively plot ‘tiled’ figures, aiding visual inference and interpretation from multiple scenarios.
In this example, you can also see that I have not needed to combine x and y values in scenarios into a single tidy dataframe (Wickham would be scandalized). You could also do this using the dataframes of combined x and y scenario information (hey, look, a challenge!)
trace1 <- lineplot(x = valueseq_gdp,
y = yhyp_no_oecd$pe,
lower = yhyp_no_oecd$lower,
upper = yhyp_no_oecd$upper,
ci = list(mark="shaded"),
col = "red",
plot = 1
)
trace1b <- lineplot(x = valueseq_gdp,
y = yhyp_oecd$pe,
lower = yhyp_oecd$lower,
upper = yhyp_oecd$upper,
ci = list(mark="shaded"),
col = "blue",
plot = 1
)
## This legend trace is designed to automatically map 'in plot' labels to the actual plotted locations of the data
legendTrace <- textTile(x = c(min(valueseq_gdp)+2500, max(valueseq_gdp)-1500),
y = c(yhyp_oecd$upper[1]+.75, yhyp_no_oecd$lower[length(valueseq_gdp)]-.75),
labels = c("non-OECD","OECD"),
cex = .8,
col = c("blue","red"),
pos = c(4,4),
plot = 1
)
tc <- tile(trace1, trace1b, legendTrace,
RxC = c(1,1), #increase these if you're using multiple plots at once
limits = c(min(valueseq_gdp), max(valueseq_gdp),0,15), #xlim first, then ylim
xaxistitle = list(labels="GDP per Capita"),
yaxistitle = list(labels="Expected Value of Democracy"),
plottitle = list(labels="Effect of Democracy from Model 1"),
gridlines = list(type="xy"),
frame = TRUE,
output = list(width=6, outfile="model1_results1", type="pdf")
)
The logic of tile
means that you can tweak just one trace and then re-generate the plot, or use the same traces but in a different configuration to create a second plot:
trace1_t <- lineplot(x = valueseq_gdp,
y = yhyp_oecd$pe,
lower = yhyp_oecd$lower,
upper = yhyp_oecd$upper,
ci = list(mark="shaded"),
col = "blue",
plot = 2
)
tc <- tile(trace1, trace1_t,
RxC = c(1,2), #increase these if you're using multiple plots at once
limits = c(min(valueseq_gdp), max(valueseq_gdp),0,15), #xlim first, then ylim
xaxistitle = list(labels="GDP per Capita"),
yaxistitle = list(labels="Expected Value of Democracy"),
maintitle = list(labels="Effect of Democracy from Model 1"),
gridlines = list(type="xy"),
frame = TRUE,
output = list(width=6, outfile="model1_results1_tiled", type="pdf")
)
pe <- model1$coefficients
vc <- vcov(model1) #note if you wanted to use heteroskedastic standard errors use: vc <- hccm(model1)
sims <- 10000
simbetas <- mvrnorm(sims,pe,vc)
valueseq_oil <- seq(min(rossdata$oil), max(rossdata$oil), length.out=40)
xhyp1_oil <- cfMake(model_form1, rossdata, nscen=length(valueseq_oil), f=mean)
xhyp1_oil
for(i in 1:length(valueseq_oil)){
xhyp1_oil <- cfChange(xhyp1_oil, "oil", valueseq_oil[i], scen=i)
}
yhyp1_oil <- linearsimev(xhyp1_oil, simbetas)
trace1c <- lineplot(x = valueseq_oil,
y = yhyp1_oil$pe,
lower = yhyp1_oil$lower,
upper = yhyp1_oil$upper,
ci = list(mark="shaded"),
col = "red",
plot = 2
)
tc <- tile(trace1, trace1b, trace1c, legendTrace,
RxC = c(1,2), #increase these if you're using multiple plots at once
limits=matrix(c(0,25000,0,15,0,75,0,15),nrow=2,ncol=4,byrow=T), #xlim first, then ylim
xaxistitle=list(labels=c("GDP per Capita","Oil Production")),
yaxistitle=list(labels="Expected Value of Democracy"),
maintitle = list(labels="Results from Model 1"),
gridlines = list(type="xy"),
frame=TRUE,
output=list(width=10, outfile="model1_results2", type="pdf")
)
See documentation for more detail: ?tile
This is where simcf really starts to make things easier (that, and transformations, and interactions and more)
Let’s use model 3 this time
Generate simulated betas from the transformed model 3
pe <- model3b$coefficients
vc <- vcov(model3b) #note if you wanted to use heteroskedastic standard errors use: vc <- hccm(model1)
sims <- 10000
simbetas <- mvrnorm(sims,pe,vc)
Intialize and modify the scenarios for comparison, setting up the before and after values of x for the first differencing. We’re going to set up four scenarios, cycling through each variable of interest and holding all other constant at their means.
This illustrates the three main cf functions, cfMake
, cfChange
, and cfName
:
xhyp3fd <- cfMake(model_form3b, rossdata, f = mean, nscen = 4)
xhyp3fd <- cfName(xhyp3fd, "GDP, mean to 95%", scen = 1)
xhyp3fd <- cfChange(xhyp3fd, "GDPcap", xpre = mean(rossdata$GDPcap), x = quantile(rossdata$GDPcap, p = .95), scen = 1)
# Note, because defaults for scenarios are set to mean, don't actually need to specify xpre
xhyp3fd <- cfName(xhyp3fd, "Oil, mean to 95%", scen = 2)
xhyp3fd <- cfChange(xhyp3fd, "oil", x = quantile(rossdata$oil, p = .95), scen = 2)
xhyp3fd <- cfName(xhyp3fd, "Islam, mean to 95%", scen=3)
xhyp3fd <- cfChange(xhyp3fd, "islam", x = quantile(rossdata$islam, p = .95), scen = 3)
xhyp3fd <- cfName(xhyp3fd, "OECD", scen = 4)
# here we set xpre to 0, to show first difference in for a binary covariate:
xhyp3fd <- cfChange(xhyp3fd, "oecd", xpre = 0, x = 1, scen = 4)
xhyp3fd
We calculate the first difference (and its confidence) just as easily as we did the point estimates, just using the linearsimfd()
function instead of linearsimev()
:
yfd<-linearsimfd(xhyp3fd, simbetas)
yfd
One of the easiet and most effective way to present first differences is with a ropeladder plot. This is easily generated in tile()
, but you can also make similar plots in ggplot
(see geom_pointrange()
; use coord_flip()
to get horizontal lines as in ropeladders here). I will illustrate tile
here as another example.
rlTrace <- ropeladder(x = yfd$pe,
lower = yfd$lower,
upper = yfd$upper,
labels = row.names(xhyp3fd$x),
plot=1
)
vertmark <- linesTile(x = c(0,0),
y = c(0,1),
lty = "solid",
plot = 1
)
dev.off()
# Create plot
tc <- tile(rlTrace, vertmark,
output = list(file = "model3rl", width=6), #adjusting output width is an effective way to get nicer proportions within a plot
xaxistitle = list(labels=c("Expected change in democracy score")),
maintitle = list(labels="First differences in regime for all Model 3 covariates"),
gridlines=list(type="xy",lty=3, col="grey"),
draw=TRUE
)
Challenge: Repeat the above simcf() calculations to get first differences for a change from 5th percentile to the mean for the same model. How do the effect sizes change? Why do you think this would happen?
xhyp3fd_2 <- cfMake(model_form3b, rossdata, f = mean, nscen = 4)
xhyp3fd_2 <- cfName(xhyp3fd_2, "GDP, 5% to mean", scen = 1)
xhyp3fd_2 <- cfChange(xhyp3fd_2, "GDPcap", xpre = quantile(rossdata$GDPcap, p = .05), x = mean(rossdata$GDPcap), scen = 1)
# Note, because defaults for scenarios are set to mean, don't actually need to specify x, as we're leaving x at it's mean
xhyp3fd_2 <- cfName(xhyp3fd_2, "Oil, 5% to mean", scen = 2)
xhyp3fd_2 <- cfChange(xhyp3fd_2, "oil", xpre = quantile(rossdata$oil, p = .05), scen = 2)
xhyp3fd_2 <- cfName(xhyp3fd_2, "Islam, 5% to mean", scen=3)
xhyp3fd_2 <- cfChange(xhyp3fd_2, "islam", xpre = quantile(rossdata$islam, p = .05), scen = 3)
xhyp3fd_2 <- cfName(xhyp3fd_2, "OECD", scen = 4)
# here we set xpre to 0, to show first difference in for a binary covariate:
xhyp3fd_2 <- cfChange(xhyp3fd_2, "oecd", xpre = 0, x = 1, scen = 4)
xhyp3fd_2
yfd_2<-linearsimfd(xhyp3fd_2, simbetas)
yfd_2
rlTrace_2 <- ropeladder(x = yfd_2$pe,
lower = yfd_2$lower,
upper = yfd_2$upper,
labels = row.names(xhyp3fd_2$x),
col = "red",
plot=2
)
vertmark$plot = c(1)
rlTrace_2$plot = 1
rlTrace$labels = c("GDP", "Oil", "Islam", "OECD")
rlTrace_2$labels = c("GDP", "Oil", "Islam", "OECD")
rlTrace$sublabels = rep("mean to 95%", 4)
rlTrace_2$sublabels = rep("5% to mean", 4)
rlTrace$sublabelsX <- 0.05
rlTrace_2$sublabelsX <- 0.05
rlTrace$sublabelsyoffset <- 0.04
dev.off()
# Create plot
tc <- tile(rlTrace, rlTrace_2, vertmark,
#RxC = c(2,1),
output = list(file = "model3rl", width=6), #adjusting output width is an effective way to get nicer proportions within a plot
xaxistitle = list(labels=c("Expected change in democracy score")),
maintitle = list(labels="First differences in regime for all Model 3 covariates"),
gridlines=list(type="xy",lty=3, col="grey"),
draw=TRUE
)
simcf/tile
for logistic regressionLogit in simcf/tile works much the same as linear models - one nice thing about Chris’s packages is the consistent structure as you move into more complex models.
Example originally developed by Chris Adolph
nesdata <- read.csv("http://staff.washington.edu/csjohns/503/nes00a.csv", header=TRUE)
This is using the very handy extractdata()
function in the simcf
package. This helps you to be working with a consistent and manageable dataset across multiple models, especially useful in cases of missing data that will be dropped rather than imputed.
model <- vote00 ~ age + I(age^2) + hsdeg + coldeg
mdata <- extractdata(model, nesdata, na.rm=TRUE)
logit.result <- glm(model, family=binomial(link=logit), data=mdata)
pe <- logit.result$coefficients # point estimates
vc <- vcov(logit.result) # var-cov matrix
sims <- 10000
simbetas <- mvrnorm(sims, pe, vc)
xhyp <- seq(18,97,1)
nscen <- length(xhyp)
nohsScen <- hsScen <- collScen <- cfMake(model, mdata, nscen)
for (i in 1:nscen) {
# No High school scenarios (loop over each age)
nohsScen <- cfChange(nohsScen, "age", x = xhyp[i], scen = i)
nohsScen <- cfChange(nohsScen, "hsdeg", x = 0, scen = i)
nohsScen <- cfChange(nohsScen, "coldeg", x = 0, scen = i)
# HS grad scenarios (loop over each age)
hsScen <- cfChange(hsScen, "age", x = xhyp[i], scen = i)
hsScen <- cfChange(hsScen, "hsdeg", x = 1, scen = i)
hsScen <- cfChange(hsScen, "coldeg", x = 0, scen = i)
# College grad scenarios (loop over each age)
collScen <- cfChange(collScen, "age", x = xhyp[i], scen = i)
collScen <- cfChange(collScen, "hsdeg", x = 1, scen = i)
collScen <- cfChange(collScen, "coldeg", x = 1, scen = i)
}
nohsSims <- logitsimev(nohsScen, simbetas, ci=0.95)
hsSims <- logitsimev(hsScen, simbetas, ci=0.95)
collSims <- logitsimev(collScen, simbetas, ci=0.95)
colorbrewer
packagecol <- brewer.pal(3,"Dark2")
This is also showing the use of the extrapolate option to only plot predictions within the convex hull of the data. Uses the WhatIf
package.
nohsTrace <- lineplot(x=xhyp,
y=nohsSims$pe,
lower=nohsSims$lower,
upper=nohsSims$upper,
col=col[1],
extrapolate=list(data=mdata,
cfact=nohsScen$x,
omit.extrapolated=TRUE),
plot=1)
hsTrace <- lineplot(x=xhyp,
y=hsSims$pe,
lower=hsSims$lower,
upper=hsSims$upper,
col=col[2],
extrapolate=list(data=mdata,
cfact=hsScen$x,
omit.extrapolated=TRUE),
plot=1)
collTrace <- lineplot(x=xhyp,
y=collSims$pe,
lower=collSims$lower,
upper=collSims$upper,
col=col[3],
extrapolate=list(data=mdata,
cfact=collScen$x,
omit.extrapolated=TRUE),
plot=1)
# Set up traces with labels and legend
labelTrace <- textTile(labels=c("Less than HS", "High School", "College"),
x=c( 55, 49, 30),
y=c( 0.26, 0.56, 0.87),
col=col,
plot=1)
legendTrace <- textTile(labels=c("Logit estimates:", "95% confidence", "interval is shaded"),
x=c(82, 82, 82),
y=c(0.2, 0.16, 0.12),
plot=1)
# Plot traces using tile
tile(nohsTrace,
hsTrace,
collTrace,
labelTrace,
legendTrace,
limits=c(18,94,0,1),
xaxis=list(at=c(20,30,40,50,60,70,80,90)),
xaxistitle=list(labels="Age of Respondent"),
yaxistitle=list(labels="Probability of Voting"),
frame=TRUE,
output = list(file="logit", width=6)
)
Using either your own data or one of the datasets used in this lab, run an interesting model and use simcf to present the results (you can use either tile or the base graphics to plot).