Mean Shift Tracking

This is an implementation of the Mean Shift algorithm for real-time tracking.  The algorithm tracks the object by maximising the similarity of a weighted reference histogram and a weighted scene histogram.

See also

#!/usr/bin/env ruby
require 'hornetseye'
include Hornetseye
class MultiArray
  class << self
    def ramp( w, h )
      idx = sfloat( w, h ).indgen!
      retval = MultiArray.int 2, w, h
      retval.roll[ 0 ] = idx % w - 0.5 * w + 0.5
      retval.roll[ 1 ] = idx / w - 0.5 * h + 0.5
      retval
    end
  end
end
class Sequence_
  def norm2
    roll[ 0 ] ** 2 + roll[ 1 ] ** 2
  end
end
def k( x )
  ( ( 2 * Math::PI ) ** ( -0.5 * 2 ) ) * Math.exp( -0.5 * x )
end
def g( x )
  ( -0.5 * ( ( 2 * Math::PI ) ** ( -0.5 * 2 ) ) ) * Math.exp( -0.5 * x )
end
input = V4L2Input.new '/dev/video0', 320, 240
factor = 2.0
scale = 64 / factor
w, h = ( scale * factor ).to_i, ( scale * factor ).to_i
pos = [ input.width / 2, input.height / 2 ]
div = 0x10
size = 0x100 / div
max_iter = 5
display = X11Display.new
output = XVideoOutput.new
window = X11Window.new display, output, 320, 240
window.title = 'Capture Colour Distribution'
window.show
model = nil
while display.status?
  image = input.read_ubytergb
  box = [ pos[0].to_i - w / 2 ... pos[0].to_i + w / 2,
          pos[1].to_i - h / 2 ... pos[1].to_i + h / 2 ]
  template = image[ *box ]
  r = MultiArray.ramp( *template.shape ).norm2 / scale ** 2
  hist = ( template / div ).hist_weighted size, size, size, k( r )
  model = hist / hist.sum
  image[ *box ] = 0x80 + image[ *box ] / 2
  output.write image
  display.processEvents
end
window.title = 'Mean Shift'
display.status = true
while display.status?
  image = input.read_ubytergb
  scales = [ 0.9 * scale, 1.0 * scale, 1.1 * scale ]
  opt = scales.collect do |s|
    x = pos.dup
    w, h = ( s * factor ).to_i, ( s * factor ).to_i
    ramp = MultiArray.ramp w, h
    r = ramp.norm2 / s ** 2
    g_r = g r
    k_r = k r
    similarity = 0.0
    n = 0
    begin
      box = [ x[0].to_i - w / 2 ... x[0].to_i - w / 2 + w,
              x[1].to_i - h / 2 ... x[1].to_i - h / 2 + h ]
      template = image[ *box ]
      hist = ( template / div ).hist_weighted size, size, size, k_r
      target = hist / hist.sum
      mask = target > 0
      quot = Math.sqrt( model.mask( mask ) / target.mask( mask ) ).
        unmask mask
      weights = ( template / div ).map( quot ) * g_r
      dx = MultiArray.tensor( 1 ) do |k,i,j|
        ramp[ i, j ][ k ] * weights[ i, j ]
      end / g_r.sum
      x[0] = [ [ x[0] + dx[0], w / 2 ].max, input.width  - w + w / 2 ].min
      x[1] = [ [ x[1] + dx[1], h / 2 ].max, input.height - h + h / 2 ].min
      old_similarity = similarity
      similarity = Math.sqrt( target * model ).sum
      n += 1
    end while old_similarity < similarity and n < max_iter
    [ s, x, similarity ]
  end.sort_by { |v| v.last }.last
  scale = opt[ 0 ]
  pos = opt[ 1 ]
  w, h = ( scale * factor ).to_i, ( scale * factor ).to_i
  box = [ pos[0].to_i - w / 2 ... pos[0].to_i + w / 2,
          pos[1].to_i - h / 2 ... pos[1].to_i + h / 2 ]
  image[ *box ] = image[ *box ] / 2
  output.write image
  display.processEvents
end
Close