This is an implementation of the Camshift algorithm for real-time tracking. The algorithm tracks the object by maximising the similarity of a hue reference histogram and a hue scene histogram.

#!/usr/bin/env ruby
require 'hornetseye'
include Hornetseye
def hsv( r, g, b )
min = [ r, g, b ].min
max = [ r, g, b ].max
if max == min
h = 0
elsif max == r and g >= b
h = 60 * ( g - b ) / ( max - min ) + 0
elsif max == r and g < b
h = 60 * ( g - b ) / ( max - min ) + 360
elsif max == g
h = 60 * ( b - r ) / ( max - min ) + 120
else
h = 60 * ( r - g ) / ( max - min ) + 240
end
if max == 0
s = 0
else
s = ( max - min ) / max
end
v = max
[ h, s, v ]
end
n = 32
d = 256 / n
arr = MultiArray.sfloatrgb n, n, n
arr.r.unroll( 2 )[] = Sequence.sfloat( n ).indgen! 0.5 / n.to_f, 1.0 / n.to_f
arr.g.unroll( 1 )[] = Sequence.sfloat( n ).indgen! 0.5 / n.to_f, 1.0 / n.to_f
arr.b.unroll( 0 )[] = Sequence.sfloat( n ).indgen! 0.5 / n.to_f, 1.0 / n.to_f
hue = arr.collect( USINT ) do |x|
hsv( x.r, x.g, x.b )[ 0 ]
end
input = V4L2Input.new '/dev/video0', 320, 240
display = X11Display.new
output = XImageOutput.new
window = X11Window.new display, output, 320, 240
window.title = 'Capture Flesh Histogram'
window.show
f = 1.2
w = 64
h = ( w * f ).to_i
min, max = 0x30, 0xD0
max_iter = 5
box = [ ( input.width - w ) / 2 ... ( input.width + w ) / 2,
( input.height - h ) / 2 ... ( input.height + h ) / 2 ]
hist = nil
while display.status?
image = input.read_ubytergb
hist = ( image[ *box ] / d ).map( hue ).hist 360
image[ *box ] = 0x80 + image[ *box ] / 2
output.write image
display.processEvents
end
flesh_map = hue.map( hist ) * arr.between?( min / 256.0, max / 256.0 ).to_ubyte
window.title = 'Camshift'
display.status = true
cx = input.width / 2
cy = input.height / 2
while display.status?
image = input.read_ubytergb
sum = 0
n = 0
begin
region = image[ cx - w / 2 ... cx - w / 2 + w,
cy - h / 2 ... cy - h / 2 + h ]
weight = ( region / d ).map flesh_map
old_sum = sum
sum = weight.sum
if sum > 0
x = Sequence.sfloat( w ).indgen!
y = Sequence.sfloat( h ).indgen!
dx = MultiArray.tensor { |i,j| weight[ i, j ] * x[ i ] } / sum
dy = MultiArray.tensor { |i,j| weight[ i, j ] * y[ j ] } / sum
cx = cx + dx - w / 2
cy = cy + dy - h / 2
s = 2 * Math.sqrt( sum / flesh_map.max.to_f / f )
w, h = s.to_i, ( s * f ).to_i
w = [ [ w, 3 ].max, input.width ].min
h = [ [ h, 3 ].max, input.height ].min
cx = [ [ cx, w / 2 ].max, input.width - w + w / 2 ].min.to_i
cy = [ [ cy, h / 2 ].max, input.height - h + h / 2 ].min.to_i
end
n += 1
end while old_sum < sum and n < max_iter
image[ cx - w / 2 ... cx - w / 2 + w, cy - h / 2 ... cy - h / 2 + h ] /= 2
output.write image
display.processEvents
end