See Lecture by (???).

Kernel density estimation

Given data set sampled from the Gaussian distribution \(\mathcal{N}(0,1)\)

gauss.kernel <- function(x) {
    return(1/sqrt(2*pi)* exp(-(x^2)/2))
}

kde <- function(x,observations, h,kernel) {
    s = sapply(x, function(i) {
        (sum(kernel((i-observations)/h)))
    })
    return(s/length(observations)/h)    
}

experiment <- function(n,bandwidths=c(0.5,1,5)) {
    obs = rnorm(n,0,1)
    x = -300:300/ 100
    hist(obs,freq=F,breaks=12,main=paste('KDE on',n,'samples'),ylim=c(0,1.))
    bandwidths = c(bandwidths, 1.06*sd(obs) *(n^(-0.2)))
    lines(x,dnorm(x,0,1),col=1,lw=2)
    for (j in 1:length(bandwidths)) {
        lines(x,kde(x,obs,h=bandwidths[j],gauss.kernel),col=j+1,lw=2)
    }
    legend('topright',c('Original distribution', paste('Gaussian KDE, h=',bandwidths)),col=c(1:(length(bandwidths)+1)),lwd=2)
}

References