Chapter 4. Multivariate Distributions#
Figure 4.1. Random Vectors#
suppressPackageStartupMessages(library(spatstat))
suppressPackageStartupMessages(library(diagram))
x <- c(-2,4,4,-2)
y <- c(4,4,-3,-3)
plot.new()
plot.window(c(-8.5,5),c(-5,5))
polygon(x,y,col=gray(.8),border=NA)
W1 <- ellipse(a=3,b=2.5,centre=c(-6,0),phi=0,npoly=1024)
plot(W1,lwd=1.4,add=TRUE)
text(-7,1,"HH",cex=.75)
text(-5,1,"TH",cex=.75)
text(-7,-1,"HT",cex=.75)
text(-5,-1,"TT",cex=.75)
arrows(1,0,4,0,angle=20,length=.1)
arrows(1,0,1,4,angle=20,length=.1)
arrows(1,0,-2,0,angle=20,length=.1)
arrows(1,0,1,-3,angle=20,length=.1)
points(1,0,pch=19,col="black")
points(3,0,pch=19,col="black")
points(1,2,pch=19,col="black")
points(3,2,pch=19,col="black")
arrows(-4.5,-.95,.9,-.1,angle=20,length=.1)
arrows(-4.5,1,.9,1.9,angle=20,length=.1)
text(1.8,-0.5,"(0,0)",cex=.75)
text(1.8,1.7,"(0,1)",cex=.75)
text(4.1,-0.5,"(1,0)",cex=.75)
text(3.8,1.7,"(1,1)",cex=.75)
curvedarrow((c(-7,1.3)),(c(2.9,2.2)),lty=1,lwd=1,arr.pos=1,curve=-.15,arr.type="simple",angle=50)
curvedarrow((c(-7,-1.3)),(c(3,-.2)),lty=1,lwd=1,arr.pos=1,curve=.15,arr.type="simple",angle=40)
text(-8.5,2,expression(italic(S)),cex=.75)
Figure 4.2. Random Vectors#
x <- c(-2,4,4,-2)
y <- c(4,4,-3,-3)
plot.new()
plot.window(c(-8.5,5),c(-5,5))
polygon(x,y,col=gray(.8),border=NA)
W1 <- ellipse(a=2.5,b=2,centre=c(-6,0),phi=0,npoly=1024)
plot(W1,add=TRUE)
text(-5.5,.5,"a",cex=.75)
text(-7.5,1,"b",cex=.75)
text(-6,-0.5,"c",cex=.75)
text(-4.5,-.5,"d",cex=.75)
text(-6.5,-1.5,"e",cex=.75)
arrows(-2,-3,4,-3,angle=20,length=.1)
arrows(-2,-3,-2,4,angle=20,length=.1)
arrows(-5.3,.5,3,1,angle=20,length=.1)
arrows(-7.3,1,1,3,angle=20,length=.1)
arrows(-5.8,-.6,1,-1,angle=20,length=.1)
arrows(-4.3,-.5,0,0,angle=20,length=.1)
arrows(-6.3,-1.5,-1,-2.5,angle=20,length=.1)
points(3.2,1,pch=19,col="black")
text(3.1,.5,"(35,25)",cex=.75)
points(1.2,3,pch=19,col="black")
text(1.2,2.4,"(25,40)",cex=.75)
points(1.2,-1,pch=19,col="black")
text(1.1,-1.5,"(25,15)",cex=.75)
points(0.2,0,pch=19,col="black")
text(0.2,-0.5,"(15,20)",cex=.75)
points(-.8,-2.5,pch=19,col="black")
text(.1,-2.5,"(6,8)",cex=.75)
text(-6,-4,"Sample Space",cex=.75)
text(1.2,-4,"Bivariate Outcomes",cex=.75)
text(4.6,-3.3,"wage",cex=.75)
text(-2,4.3,"experience",cex=.75)
text(-8.0,2,expression(italic(S)),cex=.75)
Figure 4.3. Bivariate Distribution#
dat <- read.table("cps09mar.txt")
wage <- as.matrix(dat[,5]/(dat[,6]*dat[,7]))
ex <- dat[,1] - dat[,4] - 6
F <- function (x,y) mean((wage <= x)*(ex <= y))
y <- seq(0,50,2)
x <- c(seq(0,10,2),seq(12.5,60,2.5))
xn <- length(x)
yn <- length(y)
z <- matrix(0,xn,yn)
for (j in 1:xn){
for (i in 1:yn){
z[j,i] <- F(x[j],y[i])
}}
pmat <- persp(x,y,z,theta=-30,phi=10,d=1.5,ticktype="detailed",box=FALSE,xlim=c(-5,60),ylim=c(-5,50),col=gray(.8))
x.axis <- seq(0,60,by=10)
y.axis <- seq(0,50,by=10)
lines(trans3d(x.axis,0,0,pmat))
lines(trans3d(0,y.axis,0,pmat))
tick.start <- trans3d(x.axis, 0, 0, pmat)
tick.end <- trans3d(x.axis,-2, 0, pmat)
segments(tick.start$x, tick.start$y, tick.end$x, tick.end$y)
labels <- as.character(x.axis)
label.pos <- trans3d(x.axis-2, -4, 0, pmat)
text(label.pos$x, label.pos$y, labels=labels, adj=c(0, NA),cex=.75)
y.axis <- seq(0,50,by=10)
tick.start <- trans3d(0, y.axis, 0, pmat)
tick.end <- trans3d(-2, y.axis, 0, pmat)
segments(tick.start$x, tick.start$y, tick.end$x, tick.end$y)
labels <- as.character(y.axis)
label.pos <- trans3d(-6, y.axis, 0, pmat)
text(label.pos$x, label.pos$y, labels=labels, adj=c(0, NA),cex=.75)
text(trans3d(24,-12,0,pmat),"Wage",cex=.75)
text(trans3d(-18,10,0,pmat),"Experience",cex=.75)
Figure 4.4. \(F(x,y)\)#
x <- c(-5,2,2,-5)
y <- c(2,2,-5,-5)
plot.new()
plot.window(c(-5,5),c(-5,5))
polygon(x,y,col=gray(.8),border=NA)
abline(h=0)
abline(v=0)
points(2,2,pch=19,col="black")
arrows(2,2,2,-5,angle=20,length=.15)
arrows(2,2,-5,2,angle=20,length=.15)
text(3,2.2,"(x,y)",cex=.75)
lab=expression(paste(P,"[",X<=x,", ",Y<=y,"]"))
text(-2.2,-1.5,lab,cex=.75)
Figure 4.5. Prob and \(F(x,y)\)#
x <- c(-2,3,3,-2)
y <- c(3,3,-2,-2)
plot.new()
plot.window(c(-5,5),c(-5,5))
polygon(x,y,col=gray(.8),border=NA)
abline(h=0)
abline(v=0)
points(3,3,pch=19,col="black",cex=.75)
points(3,-2,pch=19,col="black",cex=.75)
points(-2,3,pch=19,col="black",cex=.75)
points(-2,-2,pch=19,col="black",cex=.75)
lines(c(-2,3),c(-2,-2))
lines(c(-2,3),c(3,3))
lines(c(-2,-2),c(-2,3))
lines(c(3,3),c(-2,3))
text(4,3.2,"F(b,d)",cex=.75)
text(4,-2.2,"F(b,c)",cex=.75)
text(-3,3.2,"F(a,d)",cex=.75)
text(-3,-2.2,"F(a,c)",cex=.75)
text(5,-.3,"X",cex=.75)
text(.25,5,"Y",cex=.75)
points(3,0,pch=19,col="black",cex=.75)
points(-2,0,pch=19,col="black",cex=.75)
points(0,3,pch=19,col="black",cex=.75)
points(0,-2,pch=19,col="black",cex=.75)
text(-2.25,.25,"a",cex=.75)
text(3.25,.3,"b",cex=.75)
text(.25,-2.25,"c",cex=.75)
text(.25,3.3,"d",cex=.75)
lab <- expression(paste(P,"[a<",X<=b,", c<",Y<=d,"]"))
text(0.6,1.1,lab,cex=.75)
Figure 4.6. Joint Density#
f <- function (x,y) (1/4)*(x+y)*(x*y)*exp(-x-y)
x <- seq(0,8,0.2)
y <- seq(0,8,0.2)
xn <- length(x)
yn <- length(y)
z <- matrix(0,xn,yn)
for (j in 1:xn){
for (i in 1:yn){
z[j,i] <- f(x[j],y[i])
}}
pmat <- persp(x,y,z,theta=-60,phi=10,d=1.5,r=4,ticktype="detailed",box=FALSE,xlim=c(-1,8),ylim=c(-1,8),col=gray(.8))
x.axis <- seq(0,8,by=2)
tick.start <- trans3d(x.axis, 0, 0, pmat)
tick.end <- trans3d(x.axis,-0.4, 0, pmat)
segments(tick.start$x, tick.start$y, tick.end$x, tick.end$y)
labels <- as.character(x.axis)
label.pos <- trans3d(x.axis, -0.5, 0, pmat)
text(label.pos$x, label.pos$y, labels=labels, adj=c(0, NA),cex=.75)
y.axis <- seq(0,8,by=2)
tick.start <- trans3d(0, y.axis, 0, pmat)
tick.end <- trans3d(-0.4, y.axis, 0, pmat)
segments(tick.start$x, tick.start$y, tick.end$x, tick.end$y)
labels <- as.character(y.axis)
label.pos <- trans3d(-1, y.axis, 0, pmat)
text(label.pos$x, label.pos$y, labels=labels, adj=c(0, NA),cex=.75)
text(trans3d(3,-2,0,pmat),"X",cex=.75)
text(trans3d(-2,3,0,pmat),"Y",cex=.75)
Figure 4.7. Bivariate Density for Wage/Experience#
dat <- read.table("cps09mar.txt")
wage <- as.matrix(dat[,5]/(dat[,6]*dat[,7]))
ex <- dat[,1] - dat[,4] - 6
n6 <- length(wage)^(-1/6)
hw <- 1.1*sd(wage)*n6
he <- 1.5*sd(ex)*n6
F <- function (x,y) mean(dnorm((ex-y)/he)*dnorm((wage-x)/hw))/hw/he
y <- c(seq(0,8,1),seq(10,50,2))
x <- seq(0,60,1)
xn <- length(x)
yn <- length(y)
z <- matrix(0,xn,yn)
for (j in 1:xn){
for (i in 1:yn){
z[j,i] <- F(x[j],y[i])
}}
pmat <- persp(x,y,z,theta=-30,phi=10,d=1.5,ticktype="detailed",box=FALSE,xlim=c(-5,60),ylim=c(-5,50),col=gray(.8))
x.axis <- seq(0,60,by=10)
tick.start <- trans3d(x.axis, 0, 0, pmat)
tick.end <- trans3d(x.axis,-2, 0, pmat)
segments(tick.start$x, tick.start$y, tick.end$x, tick.end$y)
labels <- as.character(x.axis)
label.pos <- trans3d(x.axis-2, -4, 0, pmat)
text(label.pos$x, label.pos$y, labels=labels, adj=c(0, NA),cex=.75)
y.axis <- seq(0,50,by=10)
tick.start <- trans3d(0, y.axis, 0, pmat)
tick.end <- trans3d(-2, y.axis, 0, pmat)
segments(tick.start$x, tick.start$y, tick.end$x, tick.end$y)
labels <- as.character(y.axis)
label.pos <- trans3d(-6, y.axis, 0, pmat)
text(label.pos$x, label.pos$y, labels=labels, adj=c(0, NA),cex=.75)
text(trans3d(24,-12,0,pmat),"Wage",cex=.75)
text(trans3d(-18,10,0,pmat),"Experience",cex=.75)
lines(trans3d(x.axis,0,0,pmat))
lines(trans3d(0,y.axis,0,pmat))
Figures 4.8. Conditional Distribution of Hourly Wages Given Education#
dat <- read.table("cps09mar.txt")
wage <- as.matrix(dat[,5]/(dat[,6]*dat[,7]))
ed <- dat[,4]
wage1 <- wage[which(ed==12)]
wage2 <- wage[which(ed==16)]
wage3 <- wage[which(ed==18)]
wage4 <- wage[which(ed==20)]
x <- c(seq(0,9,1),seq(10,40,2.5),seq(45,80,5))
n <- length(x)
F1 <- matrix(1,n,1)
F2 <- matrix(1,n,1)
F3 <- matrix(1,n,1)
F4 <- matrix(1,n,1)
for (i in 1:n){
F1[i] <- mean(wage1 <= x[i])
F2[i] <- mean(wage2 <= x[i])
F3[i] <- mean(wage3 <= x[i])
F4[i] <- mean(wage4 <= x[i])
}
leg1 <-
plot(x,F1, type="l",lty=1,xlab="",ylab="",xaxt="n", yaxt="n",ylim=c(0,1),title=NULL,xaxs="i",yaxs="i",bty="n")
lines(x,F2)
lines(x,F3)
lines(x,F4)
axis(side=1,seq(0,100,10),cex.axis=.75)
axis(side=2,seq(0,1,0.2),cex.axis=.75)
text(20,.9,"F(y|x=12)",cex=.75)
text(32,.8,"F(y|x=16)",cex=.75)
text(47,.73,"F(y|x=18)",cex=.75)
text(55,.6,"F(y|x=20)",cex=.75)
Figure 4.9: Conditional Density of Hourly Wages Given Education#
f1 <- density(wage1,from=0,to=80,adjust=2.5)
f2 <- density(wage2,from=0,to=80,adjust=2)
f3 <- density(wage3,from=0,to=80,adjust=2)
f4 <- density(wage4,from=0,to=80,adjust=1)
plot(f1$x,f1$y,type="l",lty=1,xlim=c(-.1,80),ylim=c(0,.056),xlab="",ylab="",xaxt="n",yaxt="n",title=NULL,xaxs="i",yaxs="i",bty="n")
lines(f2$x,f2$y)
lines(f3$x,f3$y)
lines(f4$x,f4$y)
axis(side=1,seq(-10,100,10),cex.axis=.75)
lines(c(0,0),c(0,f1$y[1]))
text(25,.05,"f(y|x=12)",cex=.75)
text(30,.033,"f(y|x=16)",cex=.75)
text(40,.025,"f(y|x=18)",cex=.75)
text(70,.01,"f(y|x=20)",cex=.75)
Figure 4.10: Conditional Density of Wages Given Experience#
dat <- read.table("cps09mar.txt")
wage <- as.matrix(dat[,5]/(dat[,6]*dat[,7]))
ex <- dat[,1] - dat[,4] - 6
n6 <- length(wage)^(-1/6)
hw <- .9*sd(wage)*n6
he <- 1.5*sd(ex)*n6
x <- c(0,8,20)
y <- seq(0,60,.25)
N <- length(y)
f <- matrix(0,N,3)
for (i in 1:3){
d <- dnorm((ex-x[i])/he)
fx <- mean(d)
for (j in 1:N) {
fy <- mean(d*dnorm((wage-y[j])/hw))/hw
f[j,i] <- fy/fx
}
}
plot(y,f[,1],type="l",lty=1,xlim=c(-0.1,60),ylim=c(0,.055),xaxs="i",yaxs="i",ylab="",xlab="",bty="n",yaxt="n",xaxt="n")
lines(y,f[,2])
lines(y,f[,3])
lines(c(0,0),c(0,f[1,1]))
axis(side=1,seq(-10,70,10),cex.axis=.75)
text(18,.052,"f(y|x=0)",cex=.75)
text(22,.04,"f(y|x=8)",cex=.75)
text(30,.027,"f(y|x=20)",cex=.75)
Figure 4.11: Conditional Expectation Functions#
# Wages Given Experience
dat <- read.table("cps09mar.txt")
wage <- as.matrix(dat[,5]/(dat[,6]*dat[,7]))
ed <- dat[,4]
ex <- dat[,1] - ed - 6
n6 <- length(wage)^(-1/6)
he <- 2*sd(ex)*n6
x <- seq(0,50,1)
N <- length(x)
m <- matrix(0,N,1)
for (i in 1:N){
d <- dnorm((ex-x[i])/he)
m[i] <- mean(d*wage)/mean(d)
}
wd <- 1.4
plot(x,m,type="l",lty=1,xaxs="i",yaxs="i",ylab="",xlab="",bty="n",xaxt="n",ylim=c(0,55),lwd=wd)
axis(side=1,seq(0,60,10),lwd=wd)
axis(side=2,seq(0,60,10),lwd=wd)
text(40,28,"E[Y|X=x]")
# Wages Given Education
ed <- ed*(ed>8) + matrix(8,length(ed),1)*(ed<=8)
y <- c(8,9,10,11,12,13,14,16,18,20)
N <- length(y)
mwage <- matrix(0,N,1)
for (i in 1:N){
wagei <- wage[which(ed==y[i])]
mwage[i] <- mean(wagei)
}
x <- seq(7,21,.001)
w <- .2
yy <- matrix(0,length(x),N)
for (i in 1:N){
yy[,i] <- (abs(x-y[i]) < w)*mwage[i]
}
plot(x,yy[,1],type="l",lty=1,xaxs="i",yaxs="i",ylab="",xlab="",xaxt="n",bty="n",ylim=c(0,55),lwd=wd)
for (i in 1:N) lines(x,yy[,i],lwd=wd)
axis(side=1,seq(6,22,2),lwd=wd)
axis(side=2,seq(0,60,10),lwd=wd)
text(13,28,"E[Y|X=x]")
Figure 4.12: Conditional Variance Functions#
# Wages Given Experience
dat <- read.table("cps09mar.txt")
wage <- as.matrix(dat[,5]/(dat[,6]*dat[,7]))
ed <- dat[,4]
ex <- dat[,1] - ed - 6
n6 <- length(wage)^(-1/6)
he <- 4*sd(ex)*n6
x <- seq(0,50,1)
N <- length(x)
v <- matrix(0,N,1)
wage2 <- wage^2
for (i in 1:N){
d <- dnorm((ex-x[i])/he)
m1 <- mean(d*wage)/mean(d)
m2 <- mean(d*wage2)/mean(d)
v[i] <- m2 -(m1^2)
}
wd <- 1.4
plot(x,v,type="l",lty=1,xaxs="i",yaxs="i",ylab="",xlab="",bty="n",yaxt="n",xaxt="n",ylim=c(0,1800),lwd=wd)
axis(side=1,seq(0,50,10),lwd=wd)
axis(side=2,seq(0,2000,400),lwd=wd)
text(30,700,"var[Y|X=x]")
# Wages Given Education
ed <- ed*(ed>8) + matrix(8,length(ed),1)*(ed<=8)
y <- c(8,9,10,11,12,13,14,16,18,20)
N <- length(y)
mwage <- matrix(0,N,1)
vwage <- matrix(0,N,1)
for (i in 1:N){
wagei <- wage[which(ed==y[i])]
mwage[i] <- mean(wagei)
vwage[i] <- var(wagei)
}
x <- seq(7,21,.001)
w <- .2
yy <- matrix(0,length(x),N)
for (i in 1:N){
yy[,i] <- (abs(x-y[i]) < w)*vwage[i]
}
plot(x,yy[,1],type="l",lty=1,xaxs="i",yaxs="i",ylab="",xlab="",yaxt="n",xaxt="n",bty="n",ylim=c(0,1800),lwd=wd)
for (i in 1:N) lines(x,yy[,i],lwd=wd)
axis(side=1,seq(6,22,2),lwd=wd)
axis(side=2,seq(0,2000,400),lwd=wd)
text(14,800,"var[Y|X=x]")