Camshift Tracking

This is an implementation of the Camshift algorithm for real-time tracking.  The algorithm tracks the object by maximising the similarity of a hue reference histogram and a hue scene histogram.

See also

#!/usr/bin/env ruby
require 'hornetseye'
include Hornetseye
def hsv( r, g, b )
  min = [ r, g, b ].min
  max = [ r, g, b ].max
  if max == min
    h = 0
  elsif max == r and g >= b
    h = 60 * ( g - b ) / ( max - min ) + 0
  elsif max == r and g <  b
    h = 60 * ( g - b ) / ( max - min ) + 360
  elsif max == g
    h = 60 * ( b - r ) / ( max - min ) + 120
  else
    h = 60 * ( r - g ) / ( max - min ) + 240
  end
  if max == 0
    s = 0
  else
    s = ( max - min ) / max
  end
  v = max
  [ h, s, v ]
end
n = 32
d = 256 / n
arr = MultiArray.sfloatrgb n, n, n
arr.r.unroll( 2 )[] = Sequence.sfloat( n ).indgen! 0.5 / n.to_f, 1.0 / n.to_f
arr.g.unroll( 1 )[] = Sequence.sfloat( n ).indgen! 0.5 / n.to_f, 1.0 / n.to_f
arr.b.unroll( 0 )[] = Sequence.sfloat( n ).indgen! 0.5 / n.to_f, 1.0 / n.to_f
hue = arr.collect( USINT ) do |x|
  hsv( x.r, x.g, x.b )[ 0 ]
end
input = V4L2Input.new '/dev/video0', 320, 240
display = X11Display.new
output = XImageOutput.new
window = X11Window.new display, output, 320, 240
window.title = 'Capture Flesh Histogram'
window.show
f = 1.2
w = 64
h = ( w * f ).to_i
min, max = 0x30, 0xD0
max_iter = 5
box = [ ( input.width  - w ) / 2 ... ( input.width  + w ) / 2,
        ( input.height - h ) / 2 ... ( input.height + h ) / 2 ]
hist = nil
while display.status?
  image = input.read_ubytergb
  hist = ( image[ *box ] / d ).map( hue ).hist 360
  image[ *box ] = 0x80 + image[ *box ] / 2
  output.write image
  display.processEvents
end
flesh_map = hue.map( hist ) * arr.between?( min / 256.0, max / 256.0 ).to_ubyte
window.title = 'Camshift'
display.status = true
cx = input.width / 2
cy = input.height / 2
while display.status?
  image = input.read_ubytergb
  sum = 0
  n = 0
  begin
    region = image[ cx - w / 2 ... cx - w / 2 + w,
                    cy - h / 2 ... cy - h / 2 + h ]
    weight = ( region / d ).map flesh_map
    old_sum = sum
    sum = weight.sum
    if sum > 0
      x = Sequence.sfloat( w ).indgen!
      y = Sequence.sfloat( h ).indgen!
      dx = MultiArray.tensor { |i,j| weight[ i, j ] * x[ i ] } / sum
      dy = MultiArray.tensor { |i,j| weight[ i, j ] * y[ j ] } / sum
      cx = cx + dx - w / 2
      cy = cy + dy - h / 2
      s = 2 * Math.sqrt( sum / flesh_map.max.to_f / f )
      w, h = s.to_i, ( s * f ).to_i
      w = [ [ w, 3 ].max, input.width ].min
      h = [ [ h, 3 ].max, input.height ].min
      cx = [ [ cx, w / 2 ].max, input.width - w + w / 2 ].min.to_i
      cy = [ [ cy, h / 2 ].max, input.height - h + h / 2 ].min.to_i
    end
    n += 1
  end while old_sum < sum and n < max_iter
  image[ cx - w / 2 ... cx - w / 2 + w, cy - h / 2 ... cy - h / 2 + h ] /= 2
  output.write image
  display.processEvents
end
Close