This is an implementation of the Mean Shift algorithm for real-time tracking. The algorithm tracks the object by maximising the similarity of a weighted reference histogram and a weighted scene histogram.

#!/usr/bin/env ruby
require 'hornetseye'
include Hornetseye
class MultiArray
class << self
def ramp( w, h )
idx = sfloat( w, h ).indgen!
retval = MultiArray.int 2, w, h
retval.roll[ 0 ] = idx % w - 0.5 * w + 0.5
retval.roll[ 1 ] = idx / w - 0.5 * h + 0.5
retval
end
end
end
class Sequence_
def norm2
roll[ 0 ] ** 2 + roll[ 1 ] ** 2
end
end
def k( x )
( ( 2 * Math::PI ) ** ( -0.5 * 2 ) ) * Math.exp( -0.5 * x )
end
def g( x )
( -0.5 * ( ( 2 * Math::PI ) ** ( -0.5 * 2 ) ) ) * Math.exp( -0.5 * x )
end
input = V4L2Input.new '/dev/video0', 320, 240
factor = 2.0
scale = 64 / factor
w, h = ( scale * factor ).to_i, ( scale * factor ).to_i
pos = [ input.width / 2, input.height / 2 ]
div = 0x10
size = 0x100 / div
max_iter = 5
display = X11Display.new
output = XVideoOutput.new
window = X11Window.new display, output, 320, 240
window.title = 'Capture Colour Distribution'
window.show
model = nil
while display.status?
image = input.read_ubytergb
box = [ pos[0].to_i - w / 2 ... pos[0].to_i + w / 2,
pos[1].to_i - h / 2 ... pos[1].to_i + h / 2 ]
template = image[ *box ]
r = MultiArray.ramp( *template.shape ).norm2 / scale ** 2
hist = ( template / div ).hist_weighted size, size, size, k( r )
model = hist / hist.sum
image[ *box ] = 0x80 + image[ *box ] / 2
output.write image
display.processEvents
end
window.title = 'Mean Shift'
display.status = true
while display.status?
image = input.read_ubytergb
scales = [ 0.9 * scale, 1.0 * scale, 1.1 * scale ]
opt = scales.collect do |s|
x = pos.dup
w, h = ( s * factor ).to_i, ( s * factor ).to_i
ramp = MultiArray.ramp w, h
r = ramp.norm2 / s ** 2
g_r = g r
k_r = k r
similarity = 0.0
n = 0
begin
box = [ x[0].to_i - w / 2 ... x[0].to_i - w / 2 + w,
x[1].to_i - h / 2 ... x[1].to_i - h / 2 + h ]
template = image[ *box ]
hist = ( template / div ).hist_weighted size, size, size, k_r
target = hist / hist.sum
mask = target > 0
quot = Math.sqrt( model.mask( mask ) / target.mask( mask ) ).
unmask mask
weights = ( template / div ).map( quot ) * g_r
dx = MultiArray.tensor( 1 ) do |k,i,j|
ramp[ i, j ][ k ] * weights[ i, j ]
end / g_r.sum
x[0] = [ [ x[0] + dx[0], w / 2 ].max, input.width - w + w / 2 ].min
x[1] = [ [ x[1] + dx[1], h / 2 ].max, input.height - h + h / 2 ].min
old_similarity = similarity
similarity = Math.sqrt( target * model ).sum
n += 1
end while old_similarity < similarity and n < max_iter
[ s, x, similarity ]
end.sort_by { |v| v.last }.last
scale = opt[ 0 ]
pos = opt[ 1 ]
w, h = ( scale * factor ).to_i, ( scale * factor ).to_i
box = [ pos[0].to_i - w / 2 ... pos[0].to_i + w / 2,
pos[1].to_i - h / 2 ... pos[1].to_i + h / 2 ]
image[ *box ] = image[ *box ] / 2
output.write image
display.processEvents
end