Chapter 2. Random Variables#

Figure 2.1#

suppressPackageStartupMessages(library(spatstat))
suppressPackageStartupMessages(library(diagram))
plot.new()
plot.window(c(-5,4),c(-3,3))
W1 <- ellipse(a=2,b=1.5,centre=c(-3,0),phi=0,npoly=1024)
plot(W1,add=TRUE)
arrows(3,0,4,0,angle=20,length=.1)
arrows(3,0,0,0,angle=20,length=.1)
points(1,0,pch=19,col="black",cex=.75)
points(3,0,pch=19,col="black",cex=.75)
text(3,-.5,"1",cex=.75)
text(1,-.5,"0",cex=.75)
text(-4,0,"H",cex=.75)
text(-2,0,"T",cex=.75)
curvedarrow((c(-2,0.4)),(c(1,.3)),lty=1,arr.pos=1,curve=-.3,arr.type="simple",angle=50,lwd=1)
curvedarrow((c(-4,.4)),(c(3,.3)),lty=1,arr.pos=1,curve=-.3,arr.type="simple",angle=40,lwd=1)
text(-4.7,1.2,expression(italic(S)),cex=.8)
text(3.8,.3,expression(italic(R)),cex=.8)

Figure 2.2. ProbabilityMass Functions#

## Poisson Distribution

p <- 1
n <- 5
k <- seq(0,n)
pn <- matrix(1,n+1,1)*p
y <- (pn^k)*exp(-pn)/factorial(k)

x <- seq(-.9,5.5,.001)
w <- .2
y0 <- (abs(x) < w)*y[1]
y1 <- (abs(x-1) < w)*y[2]
y2 <- (abs(x-2) < w)*y[3]
y3 <- (abs(x-3) < w)*y[4]
y4 <- (abs(x-4) < w)*y[5]
y5 <- (abs(x-5) < w)*y[6]

wd <- 1.4
plot(x,y0,type="l",lty=1,xaxs="i",yaxs="i",ylab="",xlab="Poisson Distribution",ylim=c(0,.4),xaxt="n",bty="n",lwd=wd)
lines(x,y1,lwd=wd)
lines(x,y2,lwd=wd)
lines(x,y3,lwd=wd)
lines(x,y4,lwd=wd)
lines(x,y5,lwd=wd)
axis(side=1,seq(-1,6,1),lwd=wd)
axis(side=2,lwd=wd)
## Education

dat <- read.table("cps09mar.txt")
education <- dat[,4]
y <- c(8,9,10,11,12,13,14,16,18,20)
n <- length(y)
p <- matrix(0,n,1)
p[1] <- mean(education <= y[1])
for (i in 2:n){
p[i] <- mean(education == y[i])
}

x <- seq(7,21,.001)
w <- .2
yy <- matrix(0,length(x),n)
for (i in 1:n){
yy[,i] <- (abs(x-y[i]) < w)*p[i]
}
plot(x,yy[,1],type="l",lty=1,xaxs="i",yaxs="i",ylab="",yaxt="n",xlab="Education",xaxt="n",bty="n",ylim=c(0,.3),lwd=wd)
for (i in 1:n){
lines(x,yy[,i],lwd=wd)
}
axis(side=1,seq(6,22,2),lwd=wd)
axis(side=2,seq(0,.3,.1),lwd=wd)

Figure 2.3. St. Petersburg Paradox#

s <- c(2,4,8,16,32,64)
p <- 1/s

x <- seq(-2,max(s)+1,.001)
w <- .5
y1 <- (abs(x-s[1]) < w)*p[1]
y2 <- (abs(x-s[2]) < w)*p[2]
y3 <- (abs(x-s[3]) < w)*p[3]
y4 <- (abs(x-s[4]) < w)*p[4]
y5 <- (abs(x-s[5]) < w)*p[5]
y6 <- (abs(x-s[6]) < w)*p[6]

plot(x,y1,type="l",lty=1,xaxs="i",yaxs="i",ylab="",xlab="",ylim=c(-.001,.52),yaxt="n",xaxt="n",bty="n")
lines(x,y2,lty=1)
lines(x,y3,lty=1)
lines(x,y4,lty=1)
lines(x,y5,lty=1)
lines(x,y6,lty=1)
axis(side=2,seq(0,.5,.1),cex.axis=.75)
axis(side=1,s,cex.axis=.75)
abline(h=0) 

Figure 2.4. Discrete Distribution Functions#

## Poisson Distribution

n <- 5
k <- seq(0,n)
y <- exp(-1)/factorial(k)
p <- cumsum(y)

wd <- 1.4

plot.new()
plot.window(c(-.75,5.5),c(0.0,1))
axis(side=1,seq(-1,6,1),lwd=wd)
axis(side=2,seq(-2,1,.2),lwd=wd)
points(0,0,pch=1,col="black")
points(0,p[1],pch=19,col="black")
for (i in 1:5) {
points(i,p[i],pch=1,col="black")
points(i,p[i+1],pch=19,col="black")
lines(c(i-1,i-.05),c(p[i],p[i]),lwd=wd)
}
lines(c(-2,-.05),c(0,0),lwd=wd)
lines(c(5,7),c(p[6],p[6]),lwd=wd)
## Education Distribution

dat <- read.table("cps09mar.txt")
education <- dat[,4]
y <- c(8,9,10,11,12,13,14,16,18,20)
n <- length(y)
p <- matrix(0,n,1)
p[1] <- mean(education <= y[1])
for (i in 2:n){
p[i] <- mean(education == y[i])
}
p <- cumsum(p)


plot.new()
plot.window(c(7,21),c(0.0,1))
axis(side=1,seq(4,22,2),lwd=wd)
axis(side=2,seq(-1,1,.2),lwd=wd)
points(y[1],0,pch=1,col="black")
points(y[1],p[1],pch=19,col="black")
lines(c(0,y[1]),c(0,0),lwd=wd)
lines(c(20,25),c(1,1),lwd=wd)
for (i in 2:n){
points(y[i],p[i-1],pch=1,col="black")
points(y[i],p[i],pch=19,col="black")
lines(c(y[i-1],y[i]),c(p[i-1],p[i-1]),lwd=wd)
}

Figure 2.5. Continuous Distribution Function – U.S.Wages#

dat <- read.table("cps09mar.txt")
hrwage <- as.matrix(dat[,5]/(dat[,6]*dat[,7]))

x <- seq(0,60,2)
n <- length(x)
F <- matrix(1,n,1)
for (i in 1:n)   F[i] <- mean(hrwage <= x[i])

y <- c(10,20,30,40,50)
Fy <- matrix(1,length(y),1)
for (i in 1:length(y)) Fy[i] <- mean(hrwage <= y[i])

plot(x,F, type="l",lty=1,xlab="",ylab="",xaxt="n", yaxt="n", xlim=c(0,60),ylim=c(0,1),title=NULL,xaxs="i",yaxs="i",bty="n")
axis(side=1,seq(0,60,10),cex.axis=.75)
axis(side=2,seq(0,1,0.2),cex.axis=.75)
for (i in 1:5) {
  arrows(y[i],Fy[i],0,Fy[i],angle=20,length=.1,lty=1,lwd=0.75)
  lines(c(y[i],y[i]),c(0,Fy[i]),lwd=0.75)
}
text(57,.9,"F(x)",cex=.75)

Figure 2.6. Quantile Function – U.S.Wages#

dat <- read.table("cps09mar.txt")
hrwage <- as.matrix(dat[,5]/(dat[,6]*dat[,7]))

x <- seq(0,60,2)
n <- length(x)
F <- matrix(1,n,1)
for (i in 1:n){
  F[i] <- mean(hrwage <= x[i])
}

p <- c(.25,.5,.75)
q <- quantile(hrwage,p)
q[2] = q[2]-.35
q[3] = q[3]-.5

plot(x,F, type="l",lty=1,xlab="",ylab="",xaxt="n", yaxt="n", xlim=c(0,60),ylim=c(0,1),title=NULL,xaxs="i",yaxs="i",bty="n")
axis(side=1,seq(0,60,10),cex.axis=.75)
axis(side=2,seq(0,1,0.25),cex.axis=.75)
arrows(q[1],p[1],q[1],0,angle=20,length=.1,lty=1,lwd=0.75)
arrows(q[2],p[2],q[2],0,angle=20,length=.1,lty=1,lwd=0.75)
arrows(q[3],p[3],q[3],0,angle=20,length=.1,lty=1,lwd=0.75)
lines(c(0,q[1]),c(p[1],p[1]),lwd=0.75)
lines(c(0,q[2]),c(p[2],p[2]),lwd=0.75)
lines(c(0,q[3]),c(p[3],p[3]),lwd=0.75)
text(57,.9,"F(x)",cex=.75)

Figure 2.7. Density Function for Wage Distribution#

dat <- read.table("cps09mar.txt")
hrwage <- as.matrix(dat[,5]/(dat[,6]*dat[,7]))

den <- density(hrwage,from=0,to=100,adjust=2)		
i20 <- 103
i30 <- 154
d20 <- den$y[i20]
d30 <- den$y[i30]
dd <- den$y[i20:i30]
dx <- den$x[i20:i30]

plot(den,type="l",lty=1,xaxs="i",yaxs="i",ylim=c(0,.041),xlim=c(-0.1,70),ylab="",xlab="",bty="n",main="",yaxt="n",cex.axis=.75)
polygon(c(20,20,dx,30,30),c(0.0001,d20,dd,d30,0.0001),col=gray(.8),border=NA)
lines(den$x,den$y)
abline(h=0)
lines(c(20,20),c(0,d20))
lines(c(30,30),c(0,d30))
text(44,(d20+d30)/2,"P[20<X<30]=0.24",cex=.75)
arrows(32.5,(d20+d30)/2,23.5,(d20+d30)/2-.005,angle=20,length=.1)
lines(c(0,0),c(0,den$y[1]))
text(7,.035,"f(x)",cex=.75)

Figure 2.8. Density Function for Log Wage Distribution#

dat <- read.table("cps09mar.txt")
hrwage <- as.matrix(dat[,5]/(dat[,6]*dat[,7]))
lnwage <- log(hrwage)
den <- density(lnwage,from=0,to=6,adjust=2)		

plot(den,type="l",lty=1,xaxs="i",yaxs="i",ylim=c(0,.68),xlim=c(0,6),ylab="",xlab="",bty="n",main="Density Function for Log Wage Distribution",yaxt="n",cex.axis=.75)
abline(h=0)
text(4,.4,"f(x)",cex=.75)

Figure 2.9: Concavity#

x <- seq(1,20,by=0.01)
f <- log(x)
a <- 2
b <- 15
c <- (a+b)/2
fa <- log(a)
fb <- log(b)
fc <- log(c)
ff <- (fa+fb)/2

plot(x,f,type="l",lty=1,xaxs="i",yaxs="i",ylab="",xlab="",xaxt="n",bty="n",yaxt="n",ylim=c(0,3.1),cex.axis=.75)
lines(c(a,b),c(fa,fb),lty=1)
points(a,fa,pch=19,col="black",cex=.75)
points(b,fb,pch=19,col="black",cex=.75)
points(c,fc,pch=19,col="black",cex=.75)
points(c,ff,pch=19,col="black",cex=.75)
text(2.5,.5,"a",cex=.75)
text(15.2,2.5,"b",cex=.75)
text(8.55,1.5,"c",cex=.75)
text(8.5,2.4,"d",cex=.75)
text(18,3,expression(f(x)),cex=.75)

Figure 2.10: Truncation#

x <- seq(0,10,.01)
g1 <- dchisq(x,4)
g2 <- g1*(x >= 2)/(1-pchisq(2,4))

plot(x,g2,type="l",lty=1,xaxs="i",yaxs="i",ylim=c(0,.26),xlim=c(0,10),ylab="",xlab="",xaxt="n",yaxt="n",bty="n")
lines(x,g1)
text(4.7,.18,"f*(x)",cex=.75)
text(3.8,.12,"f(x)",cex=.75)
axis(side=1,seq(0,10,1),cex.axis=.75)