Hi Jerome and Mike (and everyone),
I simulated samples, and calculated Hewitt’s approximation of Land’s Exact 95% UCL (as used in IHstats).
By definition, you’d expect the UCL to be greater than the population mean 95% of the time.
I’m finding it’s more like 99.5% consistently.
No matter what I do to play with the parameters, I get +99%. And when sample size gets around 30, it consistently gives 100%
Have I made a mistake, or does the 95% UCL not work as intended?
R Code for reference:
library(tidyverse)
#Land's Exact 95%UCL function
UCL <- function(data){
sy <- sd(log(data))
n <- as.numeric(length(data))
yhat <- mean(log(data))
mu <- exp(yhat + 1/2 * sy^2)
#Constants
a <- 0.76766658
b <- 3.8716869
c <- 0.80598919
d <- 6.0321019
e <- 0.89998154
f <- 2.012669
g <- 0.21978875
h <- 0.41575588
i <- 0.29258276
#Intermediates
F1 <- sy * (i + 1/(n-2)^c)
F2 <- b + d/(n-2)^c
F3 <- F1 * (1-e*exp(-f*F1))
F4 <- 1 + g * exp(-h*F1)
F5 <- F2 * F3/F4
C <- 1.645 + a/(n-2)+F5
return( as.numeric(exp(log(mu) + C * sy / sqrt(n-1))))
}
#Number of samples (repetitions)
sample.n <- 1e4
#Random Parameters
sim <- data.frame("mu" = runif(sample.n, min = 1, max = 4.5),
"sigma" = runif(sample.n, min = 0.5, max = 2),
"size" = sample(6:10, sample.n, replace = TRUE))
#Consistent Parameters
#mu <- 1
#sigma <- 1
#sample.size <- 6
#sim <- data.frame("mu" = rep(mu, sample.n),
# "sigma" = rep(sigma, sample.n),
# "size" = rep(sample.size, sample.n))
sim <- sim %>%
rowwise() %>%
mutate(samples = list(round(rlnorm(size, mu, sigma),2)),
UCL = UCL(samples),
is.contained = UCL > exp(mu))
# Coverage Percentage
length(sim$is.contained[sim$is.contained == TRUE]) / sample.n *100