Flo

Valid XHTML 1.1
Valid CSS
optimized for firefox
never anoyed me
i use jabber
100%
just say NO!
website by Florian Schmidt

this was a project for a HCI-seminar of my masters study

ocv stands for open computer vision and is an open source library for computer vision. it includes a python interface. the idea was to implement a basic eye tracker in python with opencv.

because i get more and more emails asking about opencv i decided to add working C++ example. it continuously grabs a frame from a ocv compatible camera device, displays the input-frame, the red-,green-,blue- channel and a modified output frame in your favored windowing environment (assumed its OCV compatible ;) )

c++ ocv example
#include <stdio.h>
#include <opencv/cv.h>
#include <opencv/highgui.h>

/*
  Debian GNU/Linux:
  
  # get needed packages:
  apt-get install libcv-dev libcv0.9.7-0  libcvaux0.9.7-0 libhighgui-dev libhighgui0.9.7-0 opencv-doc

  # compiler command line:
  g++ -o opencv_example opencv_example.cpp -I/usr/include/opencv  -lcxcore0.9.7 -lcv0.9.7 -lhighgui0.9.7 -lcvaux0.9.7
  # or better:
  g++ -o opencv_example opencv_example.cpp `pkg-config --cflags --libs opencv`

*/


int main(int argc, char* argv[]) {
        /* 
           this requires a working
           video 4 linux (v4l) capture device
           or a linux FireWire (IEEE1394) device
           or a video for windows (wfv) device
           or a Matrox Imaging Library (MIL) device
        */

        CvCapture* cap = cvCaptureFromCAM(0);
        if(!cap) {
                printf("could not get camera capture device!\n");
                return -1;
        }

        // set desired frame size
        cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, 320);
        cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, 240);

        /*
          open new windows
          required windowing environment as gtk or win32
        */

        cvNamedWindow("input", CV_WINDOW_AUTOSIZE);
        cvNamedWindow("red", CV_WINDOW_AUTOSIZE);
        cvNamedWindow("green", CV_WINDOW_AUTOSIZE);
        cvNamedWindow("blue", CV_WINDOW_AUTOSIZE);
        cvNamedWindow("output", CV_WINDOW_AUTOSIZE);
        
        // get a first frame to get image dimensions
        IplImage* image = cvQueryFrame(cap); // do not release or modify this image...
        if(!image) {
                printf("could not get image from camera capture device!\n");
                return -1;
        }

        CvSize size = cvGetSize(image);
        
        // create three 8bit channel images
        IplImage* red = cvCreateImage(size, IPL_DEPTH_8U, 1);
        IplImage* green = cvCreateImage(size, IPL_DEPTH_8U, 1);
        IplImage* blue = cvCreateImage(size, IPL_DEPTH_8U, 1);
        // get a 3 channel output image
        IplImage* output = cvCreateImage(size, IPL_DEPTH_8U, 3);

        while(1) {
                image = cvQueryFrame(cap); // get next image
                if(!image) {
                        printf("could not get image from camera capture device!\n");
                        return -1;
                }

                // display input image
                cvShowImage("input", image);
        
                // split the input image to its channels
                // from the docs: "...In OpenCV color images have BGR  channel order..."
                cvSplit(image, blue, green, red, NULL);

                // iterate over the image data to look for very red pixels
                int height = red->height;
                int width = red->width;
                int step = red->widthStep;

                unsigned char* red_data = (unsigned char*)red->imageData;
                unsigned char* green_data = (unsigned char*)green->imageData;
                unsigned char* blue_data = (unsigned char*)blue->imageData;
                unsigned char* output_data = (unsigned char*)output->imageData;

                // copy input frame to output frame
                cvCopy(image, output);
                
                /***************************************************************
                print a few pixel in the middle of the frame
                (mark this reagion with a black rectangle into the red channel)
                */

                int w = 5;
                int h = 10;
                for(int j = height/2; j < height/2 + h; j++) {
                        for(int i = width/2; i < width/2 + w ; i++) {
                                printf("(%3d %3d %3d) "
                                       red_data[j * step + i],
                                       green_data[j * step + i],
                                       blue_data[j * step + i]
                                       );
                        }
                        printf("\n");
                }
                // draw rectangle into the red channel
                cvRectangle(
                        red, 
                        cvPoint(width/2, height/2), 
                        cvPoint(width/2 + w, height/2 + h), 
                        cvScalar(0),
                        14);


                /***************************************************************/
                

                /***************************************************************
                display only red-ish pixel in output image
                */

                int red_min = 150;
                int green_max = 100;
                int blue_max = 100;
                int found = 0;
                for(int j = 0; j < height; j++) {
                        for(int i = 0; i < width; i++) {
                                if(red_data[j * step + i] > red_min
                                   && green_data[j * step + i] < green_max
                                   && blue_data[j * step + i]  < blue_max) {
                                        /*
                                        printf("found very red pixel at %4d %4d: %3d\n", 
                                               j, i, red_data[j * step + i]);
                                        */

                                        found ++;
                                } else {
                                        // black out this pixel

                                        // write to single color channels
                                        /// red_data[j * step + i] = 0;
                                        /// green_data[j * step + i] = 0;
                                        /// blue_data[j * step + i] = 0;

                                        // write directly to output image
                                        output_data[j * step * 3 + i * 3 + 0] = 0// clear blue part
                                        output_data[j * step * 3 + i * 3 + 1] = 0// clear green part
                                        output_data[j * step * 3 + i * 3 + 2] = 0// clear red part
                                }
                        }
                }
                printf("found %d red-ish pixels. (%.1f%%)\n"
                       found,
                       (double)found / (double)(width * height) * 100.);
                /// now merge the channels back to one BGR color image
                /// cvMerge(blue, green, red, NULL, output);
                /***************************************************************/

                // display output frame
                cvShowImage("output", output);

                // display red, green, blue channels
                cvShowImage("red", red);
                cvShowImage("green", green);
                cvShowImage("blue", blue);

                printf("\n");

                // update GUI and wait for user key press
                int key = cvWaitKey(50);
                if(key == 27) {
                        // escape key pressed
                        break;
                }
        }
        // release memory for user allocated images
        cvReleaseImage(&red);
        cvReleaseImage(&green);
        cvReleaseImage(&blue);
        cvReleaseImage(&output);
}
written in 9.6ms
another example implementing the houghlines example from the opencv documentation on a image loaded from disk (no webcam needed):
c++ ocv hough example
#include <stdio.h>
#include <opencv/cv.h>
#include <opencv/highgui.h>

/*
  Debian GNU/Linux:
  
  # get needed packages:
  apt-get install libcv-dev libcv0.9.7-0  libcvaux0.9.7-0 libhighgui-dev libhighgui0.9.7-0 opencv-doc

  # compiler command line:
  g++ -o opencv_example opencv_example.cpp -I/usr/include/opencv  -lcxcore0.9.7 -lcv0.9.7 -lhighgui0.9.7 -lcvaux0.9.7
  # or better:
  g++ -o opencv_single_image_example opencv_single_image_example.cpp `pkg-config --cflags --libs opencv`

*/


int main(int argc, char* argv[]) {
        // load image from file
        IplImage* image = cvLoadImage("red_line.png");

        // iterate over the image data to look for very red pixels
        int height = image->height;
        int width = image->width;
        int step = image->widthStep;

        if(image->depth != IPL_DEPTH_8U) {
                printf("unknown image data format %d\n", image->depth);
                return -1;
        }
        unsigned char* data = (unsigned char*)image->imageData;
                
        /***************************************************************
        get red-ish pixel coordinates
        */

        int red_min = 150;
        int green_max = 100;
        int blue_max = 100;

        int found = 0;
        for(int j = 0; j < height; j++) {
                for(int i = 0; i < width; i++) {
                        if((data[j * step + i * 3 + 2] > red_min
                           && data[j * step + i * 3 + 1] < green_max
                           && data[j * step + i * 3 + 0]  < blue_max) || 0) {
                                printf("found very red pixel at %4d %4d: (%3d %3d %3d)\n"
                                       j, i, 
                                       data[j * step + i * 3 + 2], // red
                                       data[j * step + i * 3 + 1], // green
                                       data[j * step + i * 3 + 0// blue
                                        );
                                found ++;
                        }
                }
        }
        printf("found %d red-ish pixels. (%.1f%%)\n"
               found,
               (double)found / (double)(width * height) * 100.);

        IplImage* color_dst = cvCreateImage(cvGetSize(image), 83 );
        cvCopy(image, color_dst);

        /// do spec. hough transformation on input image (from ocv documentation)
        // get grayscale image (should be binary from ocv docs)
        IplImage* gray_image = cvCreateImage(cvGetSize(image), IPL_DEPTH_8U, 1);
        cvCvtColor(image, gray_image, CV_RGB2GRAY);
        // get storage for line params
        CvMemStorage* storage = cvCreateMemStorage(0);
        // 2 pixel distance resolution
        // 2 degree angular resolution
        CvSeq* lines = cvHoughLines2(gray_image, storage, CV_HOUGH_STANDARD, 1, CV_PI/180. * 2100);

        for(int i = 0; i < MIN(lines->total,10); i++) {
                float* line = (float*)cvGetSeqElem(lines, i);
                float rho = line[0];
                float theta = line[1];
                printf("line %d: rho: %.1f, theta: %.1fdeg\n", i + 1, rho, theta / CV_PI * 180.);
                CvPoint pt1, pt2;
                double a = cos(theta), b = sin(theta);
                double x0 = a * rho, y0 = b * rho;
                pt1.x = cvRound(x0 + 1000*(-b));
                pt1.y = cvRound(y0 + 1000*(a));
                pt2.x = cvRound(x0 - 1000*(-b));
                pt2.y = cvRound(y0 - 1000*(a));
                cvLine(color_dst, pt1, pt2, CV_RGB(0,255,0), 14);
        }

        cvNamedWindow("input"1 );
        cvShowImage("input", image);

        cvNamedWindow("output"1 );
        cvShowImage("output", color_dst);

        while(cvWaitKey(100) != 27);
}
written in 5.6ms

and here the resulting screenshots:

c++ ocv test screenshots
cpp_cv_hough_example.png cpp_cv_hough_example.png
71.7 kB
2006-12-18 22:11:05
cpp_cv_example.png cpp_cv_example.png
524.8 kB
2006-12-18 22:11:05

a small .tar.gz bundle can be downloaded including both example sources and images as well as a small makefile

c++ ocv examples download
cpp_cv_examples.tar.gz 1.0 MB348 2006-12-18 22:10:48

here is a small code snipped that demonstrates opencv usage with python:

ocv example
#!/usr/bin/python2.3
import sys
import os
import time
import math

from opencv import cv
from opencv import highgui

if __name__ == "__main__":
        # create windows
        highgui.cvNamedWindow ("original", highgui.CV_WINDOW_AUTOSIZE)
        highgui.cvMoveWindow ("original", 100, 100)
        highgui.cvNamedWindow ("filterwin", highgui.CV_WINDOW_AUTOSIZE)
        highgui.cvMoveWindow ("filterwin", 300, 300)
                
        highgui.cvCreateTrackbar("regler1", "filterwin", 0, 300, None)

        # open captureing device
        capture = highgui.cvCaptureFromCAM(0)
        highgui.cvSetCaptureProperty(capture, highgui.CV_CAP_PROP_FRAME_WIDTH, 320)
        highgui.cvSetCaptureProperty(capture, highgui.CV_CAP_PROP_FRAME_HEIGHT, 240)

        if not capture:
                print "Error opening capture device"
                sys.exit (1)

        # frame anfordern
        frame = highgui.cvQueryFrame(capture)
        # frame aussmaße in "size"-object holen
        frame_size = cv.cvGetSize(frame)
        # frame_size.width, frame_size.height
        
        highgui.cvCreateTrackbar("x pos", "filterwin", 0, frame_size.width, None)
        
        
        # neues bild erzeugen mit gleichen ausmassen
        filtered_frame = cv.cvCloneImage(frame)
        
        # farbe definieren
        green = cv.cvScalar(0, 255, 0, 0)
        
        count = 0
        frame_count = 0
        sum_diff = 0
        last_fps = time.time()
        
        while 1:
                # acquire new frame
                frame = highgui.cvQueryFrame(capture)
                if frame is None:
                        break
                start = time.time()
                frame_count += 1
                
                
                # flip frame
                cv.cvFlip(frame, None, 1)
                
                # jedes 10. bild abspeichern
                if not frame_count % 5:
                        # bild abspeichern
                        highgui.cvSaveImage("frame_%5d.png" % count, frame)

                # make frame copy
                #cv.cvCopy(frame, filtered_frame, None)
                
                # get trackbar values
                #highgui.cvSetTrackbarPos("canny1", "controls", t1)
                v1 = highgui.cvGetTrackbarPos("regler1", "filterwin")
                v1 = float(v1) / 100.0
                
                # pixel werte skalieren
                # von frame auf filtered_frame mit v1 multiplizieren
                cv.cvConvertScale(frame, filtered_frame, v1, 0)
                
                # make grayscale copy
                #cv.cvCvtColor(frame, gray, cv.CV_RGB2GRAY)

                # canny kanten
                #cv.cvCanny(gray, canny, t1, t2, 5)
                
                # farbraeme transformieren
                # cv.cvCvtColor(eimg, canny, cv.CV_RGB2GRAY)
                
                # threshold anwenden
                # cv.cvThreshold(last, interesset, t3, 0, cv.CV_THRESH_TOZERO)
                
                # skalar aufaddieren
                # cv.cvAddS(frame, s, info_frame, interesset);
                        
                        
                # rechteck der groesse 100 x 100 einzeichnen (antialiased)
                v2 = highgui.cvGetTrackbarPos("x pos", "filterwin")
                cv.cvRectangle(filtered_frame,
                        cv.cvPoint(v2, 10),
                        cv.cvPoint(100, 100),
                        green,
                        1, cv.CV_AA, 0
                )
                                
                # calculate pure computation time
                diff = time.time() - start
                
                # show frames
                highgui.cvShowImage('original', frame)
                highgui.cvShowImage('filterwin', filtered_frame)
                        
                # frame groesse aendern
                #cv.cvResize(eyes, eyes_zoom, cv.CV_INTER_NN)
                #cv.cvResize(eyes, eyes_zoom, cv.CV_INTER_LINEAR)
                

                # user input and statistical output
                count += 1
                sum_diff += diff
                if count == 20:
                        print "computing time: %3.1fms / frame => %.1f fps" % (
                                sum_diff / count * 1000.,
                                20.0 / (time.time() - last_fps)
                        )
                        last_fps = time.time()
                        sum_diff = 0
                        count = 0

                k = highgui.cvWaitKey(3) # 3ms warten
                
                if k == 27: # quit
                        break
                        
                # info_windows display mode
                if k == ord("q"): # q gedreuckt
                        break

                # frame von platte laden
                #eimg = highgui.cvLoadImage("ex%d.png" % ex, 1)
written in 5.1ms

and here the resulting screenshot:

ocv test screenshots
ocv_test.png ocv_test.png
252.6 kB
2006-10-22 15:10:59

another example of opencv usage in python - this time with gtk ui:

gtk+ocv example
#!/usr/bin/python2.3
# coding: latin1
#
# basic example how to work with opencv and gtk
#
# by Florian Schmidt
#

import sys
import os
import time
import math

from opencv import cv
from opencv import highgui

import gtk, gobject, gc

class gcv:
        def __init__(self):
                # open captureing device
                self.capture = highgui.cvCaptureFromCAM(0)
                highgui.cvSetCaptureProperty(self.capture, highgui.CV_CAP_PROP_FRAME_WIDTH, 320)
                highgui.cvSetCaptureProperty(self.capture, highgui.CV_CAP_PROP_FRAME_HEIGHT, 240)

                if not self.capture:
                        raise "Error opening capture device"

                # get the first frame
                frame = highgui.cvQueryFrame(self.capture)
                self.frame_size = cv.cvGetSize(frame)
                # create working frames
                self.output_frame = cv.cvCloneImage(frame) # create new image with same props
                self.gray_frame = cv.cvCreateImage(self.frame_size, 8, 1) # create new image with 8 bits per pixel and 1 color plane -> gray

                # init gtk
                self.window = gtk.Window()
                self.window.set_title("gcv")
                # self.window.stick()
                # self.window.set_keep_above(True)
                vb = gtk.VBox()
                self.window.add(vb)

                # a status label
                self.label = gtk.Label("label")
                vb.pack_start(self.label, False, False)
                
                # the drawing area for the input frame
                self.draw = gtk.DrawingArea()
                self.draw.set_size_request(self.frame_size.width, self.frame_size.height)
                self.draw.connect("button-press-event", self.draw_button_press, "input")
                self.draw.add_events(gtk.gdk.BUTTON_PRESS_MASK)
                vb.pack_start(self.draw, False, True)
                
                # a slider
                self.scale1 = gtk.HScale()
                self.scale1.set_range(0, 255)
                self.scale1.set_value(125)
                self.scale1.set_increments(1, 1)
                vb.pack_start(self.scale1, False, True)
                # a 2nd one
                self.scale2 = gtk.HScale()
                self.scale2.set_range(0, 255)
                self.scale2.set_value(65)
                self.scale2.set_increments(1, 1)
                vb.pack_start(self.scale2, False, True)
                
                # a button box
                hb = gtk.HBox()
                vb.pack_start(hb, False, True)
                
                # a few buttons
                button = gtk.Button("save imgs")
                button.connect("clicked", self.save_next)
                hb.pack_start(button, True, True)

                button = gtk.Button("btn1")
                button.connect("clicked", self.button1)
                hb.pack_start(button, True, True)
                
                button = gtk.Button("btn2")
                button.connect("clicked", self.button2)
                hb.pack_start(button, True, True)
                
                # the drawing area for the output frame
                self.draw2 = gtk.DrawingArea()
                self.draw2.set_size_request(self.frame_size.width, self.frame_size.height)
                self.draw2.connect("button-press-event", self.draw_button_press, "output")
                self.draw2.add_events(gtk.gdk.BUTTON_PRESS_MASK)
                vb.pack_start(self.draw2, False, True)
                
                # display all created widgets
                self.window.show_all()
                self.gc = gtk.gdk.GC(self.window.window) # get a graphics context
                
                self.window.connect("delete-event", self.quit) # if window is closed

                self.last = time.time()
                self.count = 0

                self.circle = None
                gobject.idle_add(self.get_frame) # get a new frame if gtk is idle

        def draw_button_press(self, drawing_area, event, where):
                print "button %s press in %s at %d/%d" % (
                        event.button,
                        where,
                        event.x,
                        event.y
                        )
                self.circle = (where, int(event.x), int(event.y))

        def button1(self, button):
                print "button 1 pressed"

        def button2(self, button):
                print "button 2 pressed"

        def save_next(self, button):
                # save last input and output frames to a file
                timestamp = time.strftime("%Y%m%d_%H%M%S")
                highgui.cvSaveImage("input_%s.png" % timestamp, self.last_input)
                highgui.cvSaveImage("output_%s.png" % timestamp, self.output_frame)

        def quit(self, *args):
                sys.exit(0)

        def get_frame(self, *args):
                # get a new frame
                self.last_input = frame = highgui.cvQueryFrame(self.capture)
                if frame is None:
                        print "error getting frame"
                        return

                # frame rate display
                self.count += 1
                now = time.time()
                if now - self.last >= 2:
                        d = now - self.last
                        fps = float(self.count) / d
                        self.label.set_text("frame rate: %.1ffps" % fps)
                        self.last = now
                        self.count = 0

                # convert bgr to rgb
                cv.cvCvtColor(frame, frame, cv.CV_BGR2RGB)
                # flip frame
                cv.cvFlip(frame, None, 1)

                ##
                ## image processing
                ##
                output = self.output_frame
                gray = self.gray_frame
                # make a grayscale copy
                cv.cvCvtColor(frame, gray, cv.CV_RGB2GRAY)
                # canny edges
                cv.cvCanny(gray, gray, self.scale1.get_value() * 10, self.scale2.get_value() * 10, 5)
                # copy to colored output frame
                cv.cvCvtColor(gray, output, cv.CV_GRAY2RGB)

                ##
                ## manipulations
                ##
                if self.circle:
                        # draw a circle
                        if self.circle[0] == "output":
                                color = cv.cvScalar(0, 255, 0, 0)# green
                                f = output # draw to output frame
                        else:
                                color = cv.cvScalar(255, 0.0, 0, 0) # red
                                f = frame # draw to input frame
                        cv.cvCircle(f, cv.cvPoint(*self.circle[1:]), 10, color, 1, cv.CV_AA, 0)

                # display frames
                # show input image
                pixbuf = gtk.gdk.pixbuf_new_from_data(
                        frame.imageData_get(),
                        gtk.gdk.COLORSPACE_RGB, False, 8, self.frame_size.width, self.frame_size.height, frame.widthStep
                        )
                self.draw.window.draw_pixbuf(self.gc, pixbuf, 0, 0, 0, 0, self.frame_size.width, self.frame_size.height)
                # show output image
                pixbuf = gtk.gdk.pixbuf_new_from_data(
                        output.imageData_get(),
                        gtk.gdk.COLORSPACE_RGB, False, 8, self.frame_size.width, self.frame_size.height, frame.widthStep
                        )
                self.draw2.window.draw_pixbuf(self.gc, pixbuf, 0, 0, 0, 0, self.frame_size.width, self.frame_size.height)
                return True


if __name__ == "__main__":
        i = gcv()
        gtk.main()

written in 7.6ms

and here the resulting screenshots:

gtk+ocv test screenshots
output_20061029_164227.png output_20061029_164227.png
4.5 kB
2006-10-29 15:51:09
output_20061029_164206.png output_20061029_164206.png
2.9 kB
2006-10-29 15:51:09
input_20061029_164227.png input_20061029_164227.png
152.5 kB
2006-10-29 15:51:09
input_20061029_164206.png input_20061029_164206.png
139.2 kB
2006-10-29 15:51:09
gcv.png gcv.png
212.2 kB
2006-10-29 15:51:09

so whats about the eye tracker?

that is one of those projects where i didn't reach an end. i simply had no time at the end to complete it. what i've got was a good overview of opencv and much fun with python. i had to hold a speech about that topic and a small presentation of the implemented code. here are a few working images from the eye_tracker:

ocv test screenshots
mask2.png mask2.png
291.0 B
2006-10-22 15:10:59
mask1.png mask1.png
360.0 B
2006-10-22 15:10:59
kern.png kern.png
2.7 kB
2006-10-22 15:10:59
info.png info.png
154.3 kB
2006-10-22 15:11:00
hough.png hough.png
24.6 kB
2006-10-22 15:11:00
frame.png frame.png
154.2 kB
2006-10-22 15:11:00
eyes.png eyes.png
11.2 kB
2006-10-22 15:10:59
ex3.png ex3.png
471.0 B
2006-10-22 15:10:59
ex2.png ex2.png
417.0 B
2006-10-22 15:10:59
canny_mask.png canny_mask.png
6.1 kB
2006-10-22 15:10:59
canny.png canny.png
5.5 kB
2006-10-22 15:10:59
the basic idea is to track an invariant property of my head. in my case this obviously were my glasses. i tracked my glasses using the hough transformation and a previously prepared mask. i did not take any measures to recognize different sizes of my glasses due to different distances to the sensor as i normaly hold a fixed distance to my monitor.
i had to implement my the hough transformation as the one provided by opencv does only recognize basic shapres (only lines IRC). it would be a bad idea to implement that with python. so i wrote a c++ python module to do this job. here is a snippet showing the hough-transformation:
hough transformation
int real_houghmask(IplImage* src, IplImage* dst, IplImage* mask, 
                   int rx1, int ry1, int rx2, int ry2) {
        int non_zero = 0;
        int height = src->height;
        int width = src->width;
        int step = src->widthStep;
        unsigned char* data = (unsigned char*)src->imageData;
        
        
        int xoffset = mask->width / 2;
        int yoffset = mask->height / 2;
        CvRect rect;
        CvRect mask_rect;
        rect.width = mask->width;
        rect.height = mask->height;
        cvResetImageROI(dst);
        cvZero(dst);
        int x1, y1, ox1, oy1, x2, y2;
        rx1 = MAX(0, rx1);
        rx2 = MIN(width, rx2);
        ry1 = MAX(0, ry1);
        ry2 = MIN(height, ry2);
        int add_count = 0;
        for(int j = ry1; j < ry2; j++) {
                for(int i = rx1; i < rx2; i++) {
                        if(data[j * step + i] != 0) {
                                non_zero++;
                                ox1 = x1 = i - xoffset;
                                oy1 = y1 = j - yoffset;
                                x2 = i + xoffset;
                                y2 = j + yoffset;
                                
                                if(x1 < 0
                                        x1 = 0;
                                else if(x2 >= rx2) 
                                        x2 = rx2 - 1;
                                if(y1 < 0
                                        y1 = 0;
                                else if(y2 >= ry2) 
                                        y2 = ry2 - 1;
                                
                                rect.x = x1; rect.y = y1;
                                rect.width = x2 - x1;
                                rect.height = y2 - y1;
                        
                                if(rect.width < 1 || rect.height < 1)
                                        continue;
                                
                                mask_rect.x = x1 - ox1; mask_rect.y = y1 - oy1;
                                mask_rect.width = rect.width;
                                mask_rect.height = rect.height;
                                
                                cvSetImageROI(dst, rect);
                                cvSetImageROI(mask, mask_rect);
                                cvAdd(dst, mask, dst);
                        }
                }
        }
        cvResetImageROI(dst);
        cvResetImageROI(mask);
        return non_zero;
}

PyObject* houghmask(PyObject* self, PyObject* args) {
        PyObject* pyimage1;
        PyObject* pyimage2;
        PyObject* pyimage3;
        int x1, x2, y1, y2;
        if (!PyArg_ParseTuple(args, "OOOiiii", &pyimage1, &pyimage2, &pyimage3, &x1, &y1, &x2, &y2))
                return NULL;

        IplImage* src = (IplImage*)get_ptr(pyimage1);
        IplImage* dst = (IplImage*)get_ptr(pyimage2);
        IplImage* mask = (IplImage*)get_ptr(pyimage3);
        
        int non_zero = real_houghmask(src, dst, mask, x1, y1, x2, y2);
        
        return Py_BuildValue("i", non_zero);
}

written in 5.6ms
this ht is first applied to the complete image. in proximate frames a region of interest is specified around the last position to minimize cpu usage. if the glasses are not reasonable good found again a search on the complete frame is started.
using this approach i was able to faily safe track the position of my moving head in the frame.
the result of this is demonstrated by the eyes.png image above.
the ht alone gives good estimate about the location of the glasses but is too shaky to try to look at the pupils.
to get around this i search an offset from the ht-location with a minimal quadratic color distance to the last found glasses-location. this search is done using comparing a 28 pixels thick stript of the current to the last eye-frame. first in x- then in y-direction. the distance is calculated using the opencv function cvNorm(). this is astonishing fast - i was worried about the runtime...

here is the complete cody of my unfinished eye_tracker experiment:

python eye tracker
#!/usr/bin/python2.3
import sys
import os
import time
import math

from opencv import cv
from opencv import highgui

import flocv
from screen_point import *

def get_keyboard_input():
        fp = os.popen("lsinput 2>&1")
        for line in fp.readlines():
                if line[0] == "/":
                        device_file = line.strip()
                        continue
                if device_file and line.strip(" \t\r\n").startswith("name") and line.find("keyboard") != -1:
                        print "got keyboard at", device_file
                        k = "/event"
                        pos = device_file.find(k)
                        fp.close()
                        return int(device_file[pos + len(k)])
        fp.close()
        return -1
        
if __name__ == '__main__':
        windows = "Camera", "eyes", "kernel"
        
        # open input device for keyboard handling
        input = get_keyboard_input()
        evdev = flocv.open_input_event(input)
        
        # open x display for pointer movement
        dpy = flocv.open_display(tuple())

        laptop_mouse_keys = (125, 127)
        tower_mouse_keys = (125, 126)
        mouse_keys = tower_mouse_keys
        mouse_keys = laptop_mouse_keys
        
        shutter_mode = "manual"
        shutter_mode = "auto"
        
        # create windows
        starty = 5
        y = starty
        startx = 950
        width = 320 + 5
        step = 240 + 20 - 2
        for w in windows:
                highgui.cvNamedWindow (w, highgui.CV_WINDOW_AUTOSIZE)
                highgui.cvMoveWindow (w, startx, y)
                y += step
                
        highgui.cvNamedWindow ("controls", highgui.CV_WINDOW_AUTOSIZE)
        highgui.cvMoveWindow ("controls", startx - 305, starty)

        highgui.cvCreateTrackbar("canny1", "controls", 950, 5000, None)
        highgui.cvCreateTrackbar("canny2", "controls", 950, 5000, None)
        highgui.cvCreateTrackbar("edge", "controls", 220, 255, None)
        highgui.cvCreateTrackbar("hough", "controls", 42, 80, None) # 30
        highgui.cvCreateTrackbar("percent", "controls", 75, 100, None)
        highgui.cvResizeWindow("controls", 300, 300)

        def set_shutter_mode(mode):
                if mode == "auto":
                        os.system("qc-usb-messenger-1.1/qcset shutteradapt=y adaptive=y");
                else:
                        os.system("qc-usb-messenger-1.1/qcset shutteradapt=n adaptive=n");
        os.system("qc-usb-messenger-1.1/qcset keepsettings=y");
        set_shutter_mode(shutter_mode)
        if shutter_mode == "manual":
                # os.system("qc-usb-messenger-1.1/qcset -b 65535 -s 4000");
                os.system("qc-usb-messenger-1.1/qcset -b 65535 -s 3000");
        else:
                os.system("qc-usb-messenger-1.1/qcset -b 10000");

        # open captureing device
        capture = highgui.cvCaptureFromCAM(0)
        highgui.cvSetCaptureProperty (capture, highgui.CV_CAP_PROP_FRAME_WIDTH, 320)
        highgui.cvSetCaptureProperty (capture, highgui.CV_CAP_PROP_FRAME_HEIGHT, 240)

        if not capture:
                print "Error opening capture device"
                sys.exit (1)

        frame = highgui.cvQueryFrame (capture)
        frame_size = cv.cvGetSize (frame)
        
        info_frame = cv.cvCloneImage(frame)
        gray = cv.cvCreateImage(frame_size, 8, 1)
        canny = cv.cvCreateImage(frame_size, 8, 1)
        hough = cv.cvCreateImage(frame_size, cv.IPL_DEPTH_32F, 1)
        
        last = cv.cvCreateImage(frame_size, 8, 1)
        interesset = cv.cvCreateImage(frame_size, 8, 1)

        # load hough mask
        temp = highgui.cvLoadImage("mask2.png", 1)
        mask_size = cv.cvGetSize(temp)
        mask = cv.cvCreateImage(mask_size, 8, 1)
        cv.cvCvtColor(temp, mask, cv.CV_RGB2GRAY)
        
        # create float hough mask
        fmask = cv.cvCreateImage(mask_size, cv.IPL_DEPTH_32F, 1)
        cv.cvConvertScale(mask, fmask, 1.0 / 255.0, 0)
                
        # create float hough kernel
        kernel_size = cv.cvSize(mask_size.width * 2, mask_size.height * 2)
        kernel = cv.cvCreateImage(kernel_size, cv.IPL_DEPTH_32F, 1)
        cv.cvSetZero(kernel)
        
        # create eye* images
        eyes_size = cv.cvSize(mask_size.width / 3 * 3, mask_size.height / 3 * 4)
        eyes = cv.cvCreateImage(eyes_size, 8, 3)
        zoom = 8
        eyes_zoom = cv.cvCreateImage(cv.cvSize(eyes_size.width * zoom, eyes_size.height * zoom), 8, 3)
        cv.cvSetZero(eyes)
        cv.cvSetZero(eyes_zoom)
        
        last_index = 0
        mx, my = mask_size.width, mask_size.height
        while True:
                # for each nonzero point in mask,
                x, y, last_index = flocv.get_next_non_zero(mask, last_index)
                if x == -1:
                        break
                # add our weighting mask to the final hough-kernel
                cv.cvSetImageROI(kernel, cv.cvRect(mx - x, my - y, mask_size.width, mask_size.height))
                cv.cvAdd(kernel, fmask, kernel, None)
        cv.cvResetImageROI(kernel)
        
        
        # scale kernel to max(k) = 1
        min_kernel, max_kernel = cv.cvMinMaxLoc(kernel, None, None, None)
        cv.cvConvertScale(kernel, kernel, 1.0 / max_kernel, 0)
        
        kernel_save = cv.cvCreateImage(cv.cvSize(kernel.width, kernel.height), 8, 1)
        cv.cvConvertScale(kernel, kernel_save, 255.0, 0)
        highgui.cvSaveImage("kern.png", kernel_save)

        # create small red point
        sp = screen_point()

        red = cv.cvScalar(0, 0, 255, 0)
        dark_red = cv.cvScalar(0, 0, 15, 0)
        blue = cv.cvScalar(255, 0, 0, 0)
        green = cv.cvScalar(0, 255, 0, 0)
        black = cv.cvScalar(0, 0, 0, 0)
        
        frame_count = 0
        last_good = False
        last_pos = None
        pointer_motion = "relative"
        have_last_eyes = False
        motion_enabled = False
        reagional = True
        dmode = "camera"
        
        count = 0
        sum_diff = 0
        last_fps = time.time()
        
        s = cv.cvScalar(150, 150, 150, 150)
        
        eimg = None
        ex = 0
        while 1:
                # acquire new frame
                frame = highgui.cvQueryFrame (capture)
                if frame is None:
                        break
                start = time.time()
                frame_count += 1
                
                # flip frame
                cv.cvFlip(frame, None, 1)
                
                # make info copy
                cv.cvCopy(frame, info_frame, None)
                
                # make grayscale copy
                cv.cvCvtColor(frame, gray, cv.CV_RGB2GRAY)
                
                # get trackbar values
                t1 = highgui.cvGetTrackbarPos("canny1", "controls")
                t2 = highgui.cvGetTrackbarPos("canny2", "controls")
                t3 = highgui.cvGetTrackbarPos("edge", "controls")
                min_p = highgui.cvGetTrackbarPos("percent", "controls")
                hough_threshold = highgui.cvGetTrackbarPos("hough", "controls")
                if t1 > t2:
                        t2 = t1
                        highgui.cvSetTrackbarPos("canny2", "controls", t2)
                if t2 < t1:
                        t1 = t2
                        highgui.cvSetTrackbarPos("canny1", "controls", t1)
                        

                # call canny() or use stored eimg
                if eimg is None:
                        cv.cvCanny(gray, canny, t1, t2, 5)
                else:
                        cv.cvCvtColor(eimg, canny, cv.CV_RGB2GRAY)
                        
                if last_good and reagional:
                        # do hough transformation only on ROI
                        x, y = last_pos
                        rx, ry = int(x - mask_size.width / 3 * 3), int(y - mask_size.height / 3 * 5)
                        w, h = x + mask_size.width / 3 * 3, y + mask_size.height / 3 * 5
                        flocv.houghmask(
                                canny, hough, kernel,
                                rx, ry,
                                w, h
                        )
                        # draw searched ROI in info frame
                        cv.cvRectangle(info_frame,
                                cv.cvPoint(rx, ry),
                                cv.cvPoint(w, h),
                                green,
                                1, cv.CV_AA, 0
                        )
                else:
                        # do hough transformation on complete frame
                        flocv.houghmask(
                                canny, hough, kernel,
                                0, 0,
                                frame_size.width, frame_size.height
                        )
                
                # get extrema of hough-room
                #min_hough, max_hough = cv.cvMinMaxLoc(hough, None, None, None)
                # get extrama location
                x, y, width, height, max_hough = flocv.get_best_center(hough)
                
                last_good = False
                print hough_threshold, max_hough, height, width
                if (width <= mask_size.width and height <= mask_size.height and max_hough >= hough_threshold
                        and x < frame.width - mask_size.width / 2 and y < frame.height - mask_size.height / 2
                        and x > mask_size.width / 2 and y > mask_size.height / 2):
                                
                        # hough transformation found reasonable good point
                        rx, ry = x - mask_size.width / 2, y - mask_size.height / 2 # got to top left of mask-sized rectangle

                        # draw mask-sized rectangle into info_frame
                        cv.cvRectangle(info_frame, cv.cvPoint(rx, ry), cv.cvPoint(rx + mask_size.width, ry + mask_size.height), red, 1, cv.CV_AA, 0);
                        last_pos = (x, y)
                        last_good = True
                elif last_pos:
                        x, y = last_pos # center of last match
                        rx, ry = x - mask_size.width / 2, y - mask_size.height / 2 # got to top left of mask-sized rectangle
                        # draw mask sized rectangle of last match in dark red
                        cv.cvRectangle(info_frame, cv.cvPoint(rx, ry), cv.cvPoint(rx + mask_size.width, ry + mask_size.height), dark_red, 1, cv.CV_AA, 0);

                if last_pos:
                        # have a guessed eye position (maybe an old one...)
                        x, y = last_pos
                        ex, ey = x - eyes.width / 2, y - eyes.height / 2 # go to top left of eyes-sized rectangle
                        ex, ey = map(lambda v: max(v, 0), (ex, ey)) # clip to top left of frame
                        
                        if have_last_eyes:
                                # we have a last_eyes image
                                # adjust the new position, to get a minimal quadratic distance between the current and last position
                                compare_width = 28 # pixels
                                max_distance = 10 # pixels
                                
                                xoff = 0
                                yoff = eyes.height / 2 - compare_width / 2
                                cv.cvSetImageROI(eyes, cv.cvRect(xoff, yoff, eyes.width, compare_width))
                                best_shift = -1
                                min_norm = -1
                                min_shift = min(ex, max_distance)
                                max_shift = min(frame.width - ex - eyes.width, max_distance)
                                #print "----"
                                for shift in range(-min_shift, max_shift + 1):
                                        cv.cvSetImageROI(frame, cv.cvRect(ex + xoff + shift, ey + yoff, eyes.width, compare_width))
                                        norm = cv.cvNorm(frame, eyes, cv.CV_L2, None)
                                        if norm < min_norm or min_norm == -1:
                                                min_norm = norm
                                                best_shift = shift
                                #print "X-shift: min_norm: %5.0f at shift: %d" % (min_norm, best_shift)
                                ex = max(0, ex + best_shift)
                                        
                                xoff = eyes.width / 2 - compare_width / 2
                                yoff = 0
                                cv.cvSetImageROI(eyes, cv.cvRect(xoff, yoff, compare_width, eyes.height))
                                best_shift = 0
                                min_norm = -1
                                min_shift = min(ey, max_distance)
                                max_shift = min(frame.height - ey - eyes.height, max_distance)
                                for shift in range(-min_shift, max_shift + 1):
                                        cv.cvSetImageROI(frame, cv.cvRect(ex + xoff, ey + yoff + shift, compare_width, eyes.height))
                                        norm = cv.cvNorm(frame, eyes, cv.CV_L2, None)
                                        if norm < min_norm or min_norm == -1:
                                                min_norm = norm
                                                best_shift = shift
                                #print "Y-shift: min_norm: %5.0f at shift: %d" % (min_norm, best_shift)
                                ey = max(0, ey + best_shift)
                                
                                last_pos = (ex + eyes.width / 2, ey + eyes.height / 2) # store this position, in case we get no more hough-matches
                                
                                rx, ry = last_pos[0] - mask_size.width / 2, last_pos[1] - mask_size.height / 2 # got to top left of mask-sized rectangle
                                # draw mask-sized rectangle into info_frame
                                cv.cvRectangle(info_frame, cv.cvPoint(rx, ry), cv.cvPoint(rx + mask_size.width, ry + mask_size.height), dark_red, 1, cv.CV_AA, 0);
                                
                                #print "store2 lastpos", last_pos
                                cv.cvResetImageROI(eyes)
                        
                        # keep eyes-reagion in frame
                        ex -= max(0, (ex + eyes.width) - frame.width)
                        ey -= max(0, (ey + eyes.height) - frame.height)

                        # copy eyes-region to eyes-image
                        cv.cvSetImageROI(frame, cv.cvRect(ex, ey, eyes.width, eyes.height))
                        cv.cvCopy(frame, eyes, None)
                        cv.cvResetImageROI(frame)
                        have_last_eyes = True
                        
                        # absolute
                        if pointer_motion == "absolute":
                                px = 0.9 * ex / (frame.width - eyes.width)
                                py = 0.9 * ey / (frame.height - eyes.height)
                                if motion_enabled:
                                        sp.hide()
                                        flocv.absolute_motion(dpy, px, py)
                                else:
                                        sp.set_pos(px, py)
                        elif pointer_motion == "relative" and motion_enabled:
                                speed = 0.15
                                sx, sy = relative_start
                                px = speed * (ex - sx) / (frame.width - eyes.width)
                                py = speed * (ey - sy) / (frame.height - eyes.height)
                                flocv.relative_motion(dpy, px, py)
                
                # calculate pure computation time
                diff = time.time() - start
                
                # update window displays
                if dmode == "camera":
                        # scale hough room
                        cv.cvConvertScale(hough, last, 255.0 / max_hough, 0)
                        # visualize thresholded hough room in info framge
                        cv.cvThreshold(last, interesset, t3, 0, cv.CV_THRESH_TOZERO)
                        cv.cvAddS(frame, s, info_frame, interesset);
                        # show info frame
                        highgui.cvShowImage ('Camera', info_frame)
                elif dmode == "canny":
                        highgui.cvShowImage ('Camera', canny)
                elif dmode == "hough":
                        # scale hough room (only used for visualisation)
                        cv.cvConvertScale(hough, last, 255.0 / max_hough, 0)
                        highgui.cvShowImage ('Camera', last)
                        
                #highgui.cvShowImage ('eyes', eyes)
                cv.cvResize(eyes, eyes_zoom, cv.CV_INTER_NN)
                #cv.cvResize(eyes, eyes_zoom, cv.CV_INTER_LINEAR)
                highgui.cvShowImage ('eyes', eyes_zoom)
                highgui.cvShowImage ('kernel', kernel)


                # user input and statistical output
                count += 1
                sum_diff += diff
                if count == 20:
                        print " computing time: %3.1fms / frame => %.1f fps" % (sum_diff / count * 1000., 20.0 / (time.time() - last_fps))
                        last_fps = time.time()
                        sum_diff = 0
                        count = 0
                
                k = highgui.cvWaitKey(3)
                # quit
                if k == 27:
                        break
                        
                # info_windows display mode
                if k == ord("d"):
                        if dmode == "camera":
                                dmode = "canny"
                        elif dmode == "canny":
                                dmode = "hough"
                        elif dmode == "hough":
                                dmode = "camera"
                if k == ord("x"):
                        if dmode == "camera":
                                dmode = "hough"
                        elif dmode == "canny":
                                dmode = "camera"
                        elif dmode == "hough":
                                dmode = "canny"
                
                # camera shutter mode
                if k == ord("m"):
                        if shutter_mode == "auto":
                                shutter_mode = "manual"
                        else:
                                shutter_mode = "auto"
                        print "new shutter mode:", shutter_mode
                        set_shutter_mode(shutter_mode)
                
                # pointer motion mode
                if k == ord("p"):
                        if pointer_motion == "absolute":
                                pointer_motion = "relative"
                        else:
                                pointer_motion = "absolute"
                        print "new pointer mode:", pointer_motion
                        
                # testing functions
                if k == ord('s'):
                        highgui.cvSaveImage("frame.png", frame)
                        highgui.cvSaveImage("info.png", info_frame)
                        highgui.cvSaveImage("canny.png", canny)
                        # scale hough room (only used for visualisation)
                        cv.cvConvertScale(hough, last, 255.0 / max_hough, 0)
                        highgui.cvSaveImage("hough.png", last)
                        highgui.cvSaveImage("eyes.png", eyes_zoom)
                if k == ord("e"):
                        ex = ex % 4 + 1
                        print "e:", ex
                        if ex == 4:
                                eimg = None
                        else:
                                eimg = highgui.cvLoadImage("ex%d.png" % ex, 1)
                if k == ord("r"):
                        reagional = not reagional
                        print "reagional is",
                        if reagional:
                                print "enabled"
                        else:
                                print "disabled"
                
                if k == ord("n"):
                        have_last_eyes = False
                        print "resettet last eyes image"
                        
                # check for global keyboard events
                code, action = flocv.get_input_event(evdev)
                if code != -1:
                        print "key:", code, action
                        if code == mouse_keys[0]: # move mouse!
                                motion_enabled = (action == 1)
                                relative_start = (ex, ey)
                        if code == mouse_keys[1]: # do a mouse click
                                print "mouse click"
                                if action == 1:
                                        flocv.button(dpy, 1, "press")
                                else:
                                        flocv.button(dpy, 1, "release")

                sp.do_loop() # process point-window events
written in 19.8ms

the next step would be to extract the exact pupil centers and put it into relation of the complete eye-frame. that would be a center an elipse (due to the perspective). i've seen a very promising algorithm using RANSAC to fit on a pupil. i think RANSAC is a very good and easy idea. maybe i should write a little bit more about it...
but as mentioned above: the term was almost over at this time :( maybe i come back to this at some time.

a cheap webcam as i used is also not very capable as an eye tracker. as a head tracker it is okay :) the python code above implemented a moving read (sometimes transparent) big dot (or also the mouse pointer) which could be controled by head movements. i didn't plan to implement a mouse click thru an eye-gesture as this is too unnatural for humans. a usually unused windows-key on my keyboard was used to enable mouse-movements and the other windows key to activate a mouse button. it's not very comfortable to do every day work using this "head-tracker" insted of the mouse :)