The following code implements some of the concepts of the paper:

Reinhard Heckel, Max Simchowitz, Kannan Ramchandran, Martin Wainwright, Approximate Ranking from Pairwise Comparisons, Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics, PMLR 84:1057-1066, 2018. http://proceedings.mlr.press/v84/heckel18a

Create random comparison data to analyze the algorithm

Number of items

n <- 200 

Create Bradely-Terry-Luce (BTL) scores

One way to specify parameterize the model is by \(M_{ij} = P(i > j) = \frac{e^{w_i}}{e^{w_i} + e^{w_j}} = \frac{1}{1 + e^{w_i-w_j}}\), where \(w\) is the BTL score.

Create scores

w <- rlogis(n) # compare to Fig. 1(a)
#w <- sample(seq(0, 1, length.out = n)^2) # compare to Fig. 1(b)
w <- w - min(w)
#w <- w/max(w)

hist(w, breaks = 20)

plot(sort(w, decreasing = TRUE), ylab = "BTL score")

Calculate the probabilities \(M_{ij}\) that \(i\) wins over \(j\) using BTL’s logistic function.

phi <- function(t) 1/(1+exp(-t))
  
M <- outer(1:n, 1:n, FUN = function(i,j) phi(w[i]-w[j]))
diag(M) <- NA
M[1:5,1:5]
##              [,1]      [,2]         [,3]        [,4]         [,5]
## [1,]           NA 0.9990322 0.3378874539 0.662624905 0.0677971854
## [2,] 0.0009677833        NA 0.0004941107 0.001899013 0.0000704481
## [3,] 0.6621125461 0.9995059           NA 0.793759312 0.1247380761
## [4,] 0.3373750951 0.9981010 0.2062406875          NA 0.0357071721
## [5,] 0.9322028146 0.9999296 0.8752619239 0.964292828           NA

Check that \(M_{ij} + M_{ji} = 1\)

table(M + t(M))
## 
##     1 
## 39800
hist(M)

Calculate Borda scores \(\tau_i = \frac{1}{n-1} \sum_{i \ne j} M_{ij}\) which is the probability that item \(i\) beats a different item chosen uniformly at random.

tau_BTL <- rowMeans(M, na.rm = TRUE)
hist(tau_BTL, breaks = 20)

Create a rater that rates if \(i\) is preferred over \(j\) using \([M_{ij}]\)

rate <- function(i, j, M) sample(c(1, 0), 1, prob = c(M[i, j], 1-M[i,j]))

Compare two random items 100 times

table(replicate(100, rate(1,2, M)))
## 
##   1 
## 100

Compare the worst and the best item 100 times

table(replicate(100, rate(which.min(w),which.max(w), M)))
## 
##   0 
## 100

The Hamming-LUCB algorithm

H_LUCB <- function(k, h, delta, M, plot = FALSE) {
  n <- nrow(M)
  tau <- rep(0, length.out = n)

# Initialization: compare each item with a random item
  for(i in 1:n) {
    j <- sample(setdiff(1:n, i), 1)
    tau[i] <- rate(i, j, M)
# interestingly, we ignor the information if j wins!
  }

# number of comparisons (T and u are the same in the paper)
  T <- rep(1, length.out = n) # number of comparisons

  while(TRUE) {
# calculate alpha and find items to compare next
    alpha <- sqrt((log(1/(delta/n)) + 
      0.75*log(log(1/(delta/n))) + 
      1.5*log(1+log(T/2)))/(2*T))

    o <- order(tau, decreasing = TRUE)
    top <- o[1:(k-h)]
    bottom <- o[(k+1+h):n]

    d1 <- top[which.min((tau-alpha)[top])]
    d2 <- bottom[which.max((tau+alpha)[bottom])]
    
    top_h <- union(o[(k-h+1):k], d1)
    bottom_h <- union(o[(k+1):(k+h)], d2)
    b1 <- top_h[which.max(alpha[top_h])]
    b2 <- bottom_h[which.max(alpha[bottom_h])]
   
    if(plot) {
      ul <- tau+alpha
      ll <- tau-alpha
      plot(tau[o], ylim = c(min(ll), max(ul)), cex = .2, ylab = "estimated tau (+ conf. interval)")
      lines(ul[o], lty = 3)
      lines(ll[o], lty= 3)
      abline(v = k-h, col = "red")
      abline(v = k+h, col = "red")
      abline(v = k, col = "red", lwd = 2)

      abline(h=ll[o][k-h], col = "grey", lty = 2)
    
      abline(v = which(o == b1), col = "green", lwd = 2)
      abline(v = which(o == b2), col = "green", lwd = 2)
      
      abline(v = which(o == d1), col = "blue")
      abline(v = which(o == d2), col = "blue")
    
      legend("topright", lty = 1, bty = "n",
        col = c("red", "green", "blue"), legend = c("k+/-h", "b1 & b2", "d1 & d2"))

      r <- readline("Press enter or q for quit.")
      if(r == "q") break
    }

    for(i in c(b1, b2)) {
      j <- sample(setdiff(1:n, i), 1)
      r <- rate(i, j, M)
      T[i] <- T[i] + 1
      tau[i] <- (T[i] - 1)/T[i] * tau[i] + (1/T[i])*r
      
      # the paper ignors j!
      #T[j] <- T[j] + 1
      #tau[j] <- (T[j] - 1)/T[j] * tau[j] + (1/T[j])*-(r-1)
    }

    # termination condition
    alpha <- sqrt((log(1/(delta/n)) + 0.75*log(log(1/(delta/n))) + 1.5*log(1+log(T/2)))/(2*T))
    if((tau[d1] - alpha[d1]) >= (tau[d2] + alpha[d2])) break
  }
  
  list(T = T, tau = tau, alpha = alpha, k = k, h = h, n = n)
}

rank_random <- function(M, rep) {
  n <- nrow(M)
  wins <- rep(0, length.out = n)
  T <- rep(0, length.out = n)

  for(k in 1:rep) {
    ij <- sample(1:n, 2)
    i <- ij[1]; j <- ij[2]
    if(rate(i, j, M)) wins[i] <- wins[i] + 1 else wins[j] <- wins[j] + 1
    T[i] <- T[i] + 1
    T[j] <- T[j] + 1

  }
  tau <- wins/T
  tau
}

it is h-Hamming accurate with probability 1-delta

r <- H_LUCB(k = 100, h = 20, delta = .5, M)

Number of comparisons

sum(r$T)
## [1] 60784
summary(r$T)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    17.0    44.5   129.0   303.9   513.0   937.0
hist(r$T, breaks = 20)

o <- order(r$tau, decreasing = TRUE)
ul <- r$tau+r$alpha
ll <- r$tau-r$alpha
plot(r$tau[o], ylim = c(min(ll), max(ul)), cex = .2, ylab = "tau")
lines(ul[o], lty=3)
lines(ll[o], lty=3)
abline(v = r$k-r$h, col = "red")
abline(v = r$k+r$h, col = "red")
abline(v = r$k, col = "red", lwd  = 2)

abline(h=ll[o][r$k-r$h], col = "grey", lty = 2)

plot(r$T[o], type = "l", ylab = "Number of comparison", )

plot(r$alpha[o], type = "l", ylab = "alpha", )

How well was the original score recovered?

plot(w, r$tau)
abline(v = quantile
(w, probs = (r$n-r$k+r$h)/r$n), col = "red")
abline(h = quantile(r$tau, probs = (r$n-r$k+r$h)/r$n), col = "blue")

cor(w, r$tau, method = "spearman") # non-linear -> rank correlation
## [1] 0.9857666

How well was the original Borda score recovered?

plot(tau_BTL, r$tau)
abline(v = quantile(tau_BTL, probs = (r$n-r$k+r$h)/r$n), col = "red")
abline(h = quantile(r$tau, probs = (r$n-r$k+r$h)/r$n), col = "blue")

cor(tau_BTL, r$tau) # should be linear -> Pearson correlation
## [1] 0.9822839

\(D_h\) should be less than h. Number of mistakes in top k

w_k <- order(w, decreasing = TRUE)[1:r$k]
w_k
##   [1]  34 169 146  22  30  11 164  90   5  20 160 185  76 199 100  19  49
##  [18]  42  65 175 108 170  31 115 140  37 102 114  60  99 139 105  44  66
##  [35]  23  97  54 177  53 144  84 167 141 195  32  50 104 181  39 187 161
##  [52]  67  15 155 182 152 134 101 135 126 137 117 156  17  58 192 198  64
##  [69]  95  68  70 110  62  29  83   3  33 124  92  26  91  74  93 148 200
##  [86] 165  35  98 166 112  73 107 154  47  77 176  88 133   8  71
tau_k <- order(r$tau, decreasing = TRUE)[1:r$k]
tau_k
##   [1]   5  22  34 146  90  20 164 169 170  11  65  23  32  49  30  76 100
##  [18] 115 105  31 161 199  44 108  37  60  19  99 160  54  58 181 177 144
##  [35] 175 185 139 102 114  42  84 140  97  67  39  66 182 187 152  17 126
##  [52]  50  92  15 195 141  53 167 155 135 104 192  70 134  68  62 137 198
##  [69] 117 101 124  83  64  95  35 156  29   3  98  26 110  91  33 165 200
##  [86] 148 166  47  93  74 133  88 176 154  77  89 107  73  71 112
D_h <- r$k - length(intersect(w_k, tau_k))
D_h
## [1] 1
r$h
## [1] 20

Compare with randomly selecting items

rank_random <- function(M, rep) {
  n <- nrow(M)
  wins <- rep(0, length.out = n)
  T <- rep(0, length.out = n)

  for(k in 1:rep) {
    ij <- sample(1:n, 2)
    i <- ij[1]; j <- ij[2]
    if(rate(i, j, M)) wins[i] <- wins[i] + 1 else wins[j] <- wins[j] + 1
    T[i] <- T[i] + 1
    T[j] <- T[j] + 1

  }
  tau <- wins/T
  tau
}

tau <- rank_random(M, 100)
plot(tau_BTL, tau)

cor(tau_BTL, tau)
## [1] NA
tau <- rank_random(M, 1000)
plot(tau_BTL, tau)

cor(tau_BTL, tau)
## [1] 0.8325485
tau <- rank_random(M, 10000)
plot(tau_BTL, tau)

cor(tau_BTL, tau)
## [1] 0.9804478
tau <- rank_random(M, 50000)
plot(tau_BTL, tau)

cor(tau_BTL, tau)
## [1] 0.9956556

Look at what happens during Hamming-LUBC

Small number of items

n <- 20

Create Bradely-Terry-Luce (BTL) scores

w <- rlogis(n) # compare to Fig. 1(a)
w <- w - min(w)

phi <- function(t) 1/(1+exp(-t))
M <- outer(1:n, 1:n, FUN = function(i,j) phi(w[i]-w[j]))
diag(M) <- NA

tau_BTL <- rowMeans(M, na.rm = TRUE)

# try this
#r <- H_LUCB(k = 7, h = 2, delta = .5, M, plot = TRUE)
# d_1 ... item with lowest conf. bound
# d_2 ... item with highest conf. bound
# b_1, b_2 ... items to compare with a randomly chosen item (have the largest conf. interval alpha between d_1 and d_2)