swNoData

Gibbs sampling was originally designed by Geman and Geman (1984) for drawing updates from the Gibbs distribution, hence the name. However, single-site Gibbs sampling exhibits poor mixing due to the posterior correlation between the pixel labels. Thus it is very slow to converge when the correlation (controlled by the inverse temperature β) is high.

The algorithm of Swendsen and Wang (1987) addresses this problem by forming clusters of neighbouring pixels, then updating all of the labels within a cluster to the same value. When simulating from the prior, such as a Potts model without an external field, this algorithm is very efficient.

The SW function in the PottsUtils package is implemented in a combination of R and C. The swNoData function in bayesImageS is implemented using RcppArmadillo, which gives it a speed advantage. It is worth noting that the intention of bayesImageS is not to replace PottsUtils. Rather, an efficient Swendsen-Wang algorithm is used as a building block for implementations of ABC (Grelaud et al. 2009), path sampling (Gelman and Meng 1998), and the exchange algorithm (Murray, Ghahramani, and MacKay 2006). These other algorithms will be covered in future posts.

There are two things that we want to keep track of in this simulation study: the speed of the algorithm and the distribution of the summary statistic. We will be using system.time(..) to measure both CPU and elapsed (wall clock) time taken for the same number of iterations, for a range of inverse temperatures:

beta <- seq(0,2,by=0.1)
tmMx.PU <- tmMx.bIS <- matrix(nrow=length(beta),ncol=2)
rownames(tmMx.PU) <- rownames(tmMx.bIS) <- beta
colnames(tmMx.PU) <- colnames(tmMx.bIS) <- c("user","elapsed")

We will discard the first 100 iterations as burn-in and keep the remaining 500.

iter <- 600
burn <- 100
samp.PU <- samp.bIS <- matrix(nrow=length(beta),ncol=iter-burn)

The distribution of pixel labels can be summarised by the sufficient statistic of the Potts model:

S(z) = ∑_{i ∼ ℓ ∈ 𝒩}δ(z_i, z_ℓ)

where i ∼ ℓ ∈ 𝒩 are all of the pairs of neighbours in the lattice (ie. the cliques) and δ(u, v) is 1 if u = v and 0 otherwise (the Kronecker delta function). swNoData returns this automatically, but with SW we will need to use the function sufficientStat to calculate the sufficient statistic for the labels.

library(bayesImageS)

mask <- matrix(1,50,50)
neigh <- getNeighbors(mask, c(2,2,0,0))
block <- getBlocks(mask, 2)
edges <- getEdges(mask, c(2,2,0,0))

n <- sum(mask)
k <- 2
bcrit <- log(1 + sqrt(k))
maxSS <- nrow(edges)

for (i in 1:length(beta)) {
  if (requireNamespace("PottsUtils", quietly = TRUE)) {
    tm <- system.time(result <- PottsUtils::SW(iter,n,k,edges,beta=beta[i]))
    tmMx.PU[i,"user"] <- tm["user.self"]
    tmMx.PU[i,"elapsed"] <- tm["elapsed"]
    res <- sufficientStat(result, neigh, block, k)
    samp.PU[i,] <- res$sum[(burn+1):iter]
    print(paste("PottsUtils::SW",beta[i],tm["elapsed"],median(samp.PU[i,])))
   } else {
      print("PottsUtils::SW unavailable on this platform.")
   }

  
  # bayesImageS
  tm <- system.time(result <- swNoData(beta[i],k,neigh,block,iter))
  tmMx.bIS[i,"user"] <- tm["user.self"]
  tmMx.bIS[i,"elapsed"] <- tm["elapsed"]
  samp.bIS[i,] <- result$sum[(burn+1):iter]
  print(paste("bayesImageS::swNoData",beta[i],tm["elapsed"],median(samp.bIS[i,])))
}

## [1] "PottsUtils::SW 0 0.0330000000000013 2451"
## [1] "bayesImageS::swNoData 0 0.103999999999999 2452"
## [1] "PottsUtils::SW 0.1 0.344999999999999 2575"
## [1] "bayesImageS::swNoData 0.1 0.147000000000002 2574"
## [1] "PottsUtils::SW 0.2 0.293999999999997 2699"
## [1] "bayesImageS::swNoData 0.2 0.157 2698"
## [1] "PottsUtils::SW 0.3 0.277999999999999 2831"
## [1] "bayesImageS::swNoData 0.3 0.161999999999999 2831"
## [1] "PottsUtils::SW 0.4 0.261999999999997 2968"
## [1] "bayesImageS::swNoData 0.4 0.167000000000002 2974"
## [1] "PottsUtils::SW 0.5 0.263999999999999 3129"
## [1] "bayesImageS::swNoData 0.5 0.170000000000002 3133"
## [1] "PottsUtils::SW 0.6 0.218000000000004 3308.5"
## [1] "bayesImageS::swNoData 0.6 0.171000000000003 3309"
## [1] "PottsUtils::SW 0.7 0.195999999999998 3513"
## [1] "bayesImageS::swNoData 0.7 0.166999999999998 3524"
## [1] "PottsUtils::SW 0.8 0.167000000000002 3779"
## [1] "bayesImageS::swNoData 0.8 0.16 3772.5"
## [1] "PottsUtils::SW 0.9 0.141999999999999 4184"
## [1] "bayesImageS::swNoData 0.9 0.147000000000002 4163"
## [1] "PottsUtils::SW 1 0.122 4520.5"
## [1] "bayesImageS::swNoData 1 0.134 4526"
## [1] "PottsUtils::SW 1.1 0.113 4680"
## [1] "bayesImageS::swNoData 1.1 0.128 4677.5"
## [1] "PottsUtils::SW 1.2 0.103000000000002 4763"
## [1] "bayesImageS::swNoData 1.2 0.122999999999998 4760"
## [1] "PottsUtils::SW 1.3 0.0960000000000001 4810"
## [1] "bayesImageS::swNoData 1.3 0.120999999999999 4815"
## [1] "PottsUtils::SW 1.4 0.0919999999999987 4843"
## [1] "bayesImageS::swNoData 1.4 0.117999999999999 4845"
## [1] "PottsUtils::SW 1.5 0.0879999999999974 4862"
## [1] "bayesImageS::swNoData 1.5 0.114999999999998 4863.5"
## [1] "PottsUtils::SW 1.6 0.0860000000000021 4877"
## [1] "bayesImageS::swNoData 1.6 0.113 4877"
## [1] "PottsUtils::SW 1.7 0.083000000000002 4884"
## [1] "bayesImageS::swNoData 1.7 0.112000000000002 4883.5"
## [1] "PottsUtils::SW 1.8 0.0820000000000007 4890"
## [1] "bayesImageS::swNoData 1.8 0.117000000000001 4889"
## [1] "PottsUtils::SW 1.9 0.0829999999999984 4893"
## [1] "bayesImageS::swNoData 1.9 0.108000000000001 4893"
## [1] "PottsUtils::SW 2 0.0800000000000018 4896"
## [1] "bayesImageS::swNoData 2 0.107000000000003 4896"

Here is the comparison of elapsed times between the two algorithms (in seconds):

summary(tmMx.PU)

##       user           elapsed      
##  Min.   :0.0330   Min.   :0.0330  
##  1st Qu.:0.0850   1st Qu.:0.0860  
##  Median :0.1140   Median :0.1130  
##  Mean   :0.1522   Mean   :0.1537  
##  3rd Qu.:0.2180   3rd Qu.:0.2180  
##  Max.   :0.3160   Max.   :0.3450

summary(tmMx.bIS)

##       user           elapsed      
##  Min.   :0.1040   Min.   :0.1040  
##  1st Qu.:0.1150   1st Qu.:0.1150  
##  Median :0.1280   Median :0.1280  
##  Mean   :0.1357   Mean   :0.1356  
##  3rd Qu.:0.1600   3rd Qu.:0.1600  
##  Max.   :0.1710   Max.   :0.1710

boxplot(tmMx.PU[,"elapsed"],tmMx.bIS[,"elapsed"],ylab="seconds elapsed",names=c("SW","swNoData"))

On average, swNoData using RcppArmadillo (Eddelbuettel and Sanderson 2014) is seven times faster than SW.

s_z <- c(samp.PU,samp.bIS)
s_x <- rep(beta,times=iter-burn)
s_a <- rep(1:2,each=length(beta)*(iter-burn))
s.frame <- data.frame(s_z,c(s_x,s_x),s_a)
names(s.frame) <- c("stat","beta","alg")
s.frame$alg <- factor(s_a,labels=c("SW","swNoData"))
  if (requireNamespace("lattice", quietly = TRUE)) {
    lattice::xyplot(stat ~ beta | alg, data=s.frame)
  }

plot(c(s_x,s_x),s_z,pch=s_a,xlab=expression(beta),ylab=expression(S(z)))
abline(v=bcrit,col="red")

The overlap between the two distributions is almost complete, although it is a bit tricky to verify that statistically. The relationship between beta and S(z) is nonlinear and heteroskedastic.

rowMeans(samp.bIS) - rowMeans(samp.PU)

##  [1]  -0.040  -0.340  -1.404  -0.392   1.554   1.546   2.064  10.660  -3.618
## [10] -18.178   7.728  -2.438  -1.772   4.414   1.834   2.222   0.544  -0.576
## [19]  -1.232   0.202  -0.376

apply(samp.PU, 1, sd)

##  [1] 36.698374 35.921500 35.748666 36.849151 37.962016 38.662288 42.819998
##  [8] 48.798967 54.238319 69.861555 44.101127 31.817644 24.592360 19.228992
## [15] 15.688572 12.071654 10.052622  8.320042  5.945174  5.315147  4.304819

apply(samp.bIS, 1, sd)

##  [1] 35.416699 33.803422 37.107419 36.668399 36.605667 42.091585 45.678606
##  [8] 48.604568 52.184749 68.799173 45.259994 32.174729 24.749038 19.682361
## [15] 15.629154 12.256048  9.503787  8.781097  6.338919  5.241626  4.554782

s.frame$beta <- factor(c(s_x,s_x))
  if (requireNamespace("PottsUtils", quietly = TRUE)) {
    s.fit <- aov(stat ~ alg + beta, data=s.frame)
    summary(s.fit)
    TukeyHSD(s.fit,which="alg")
  }

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = stat ~ alg + beta, data = s.frame)
## 
## $alg
##                 diff        lwr      upr    p adj
## swNoData-SW 0.114381 -0.8183198 1.047082 0.810044

References

Eddelbuettel, Dirk, and Conrad Sanderson. 2014. “RcppArmadillo: Accelerating R with High-Performance C++ Linear Algebra.” Comput. Stat. Data Anal. 71: 1054–63. https://doi.org/10.1016/j.csda.2013.02.005.

Gelman, Andrew, and Xiao-Li Meng. 1998. “Simulating Normalizing Constants: From Importance Sampling to Bridge Sampling to Path Sampling.” Statist. Sci. 13 (2): 163–85. https://doi.org/10.1214/ss/1028905934.

Geman, Stuart, and Donald Geman. 1984. “Stochastic Relaxation, Gibbs Distributions and the Bayesian Restoration of Images.” IEEE Trans. PAMI 6: 721–41.

Grelaud, Aude, Christian P. Robert, Jean-Michel Marin, François Rodolphe, and Jean-François Taly. 2009. “ABC Likelihood-Free Methods for Model Choice in Gibbs Random Fields.” Bayesian Analysis 4 (2): 317–36. https://doi.org/10.1214/09-BA412.

Murray, Iain, Zoubin Ghahramani, and David J. C. MacKay. 2006. “MCMC for Doubly-Intractable Distributions.” In Proc. 22^nd Conf. UAI, 359–66. Arlington, VA: AUAI Press.

Swendsen, Robert H., and Jian-Sheng Wang. 1987. “Nonuniversal Critical Dynamics in Monte Carlo Simulations.” Phys. Rev. Lett. 58: 86–88. https://doi.org/10.1103/PhysRevLett.58.86.