this was a project for a HCI-seminar of my masters study
ocv stands for open computer vision and is an open source library
for computer vision. it includes a python interface. the idea was to
implement a basic eye tracker in python with opencv.
because i get more and more emails asking about opencv i decided to add
working C++ example. it continuously grabs a frame from a ocv compatible camera
device, displays the input-frame, the red-,green-,blue- channel and a modified
output frame in your favored windowing environment (assumed its OCV compatible ;) )
c++ ocv example
<stdio.h>
<opencv/cv.h>
<opencv/highgui.h>
int main(int argc, char* argv[]) {
CvCapture* cap = cvCaptureFromCAM(0);
if(!cap) {
printf("could not get camera capture device!\n");
return -1;
}
cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_WIDTH, 320);
cvSetCaptureProperty(cap, CV_CAP_PROP_FRAME_HEIGHT, 240);
cvNamedWindow("input", CV_WINDOW_AUTOSIZE);
cvNamedWindow("red", CV_WINDOW_AUTOSIZE);
cvNamedWindow("green", CV_WINDOW_AUTOSIZE);
cvNamedWindow("blue", CV_WINDOW_AUTOSIZE);
cvNamedWindow("output", CV_WINDOW_AUTOSIZE);
IplImage* image = cvQueryFrame(cap);
if(!image) {
printf("could not get image from camera capture device!\n");
return -1;
}
CvSize size = cvGetSize(image);
IplImage* red = cvCreateImage(size, IPL_DEPTH_8U, 1);
IplImage* green = cvCreateImage(size, IPL_DEPTH_8U, 1);
IplImage* blue = cvCreateImage(size, IPL_DEPTH_8U, 1);
IplImage* output = cvCreateImage(size, IPL_DEPTH_8U, 3);
while(1) {
image = cvQueryFrame(cap);
if(!image) {
printf("could not get image from camera capture device!\n");
return -1;
}
cvShowImage("input", image);
cvSplit(image, blue, green, red, NULL);
int height = red->height;
int width = red->width;
int step = red->widthStep;
unsigned char* red_data = (unsigned char*)red->imageData;
unsigned char* green_data = (unsigned char*)green->imageData;
unsigned char* blue_data = (unsigned char*)blue->imageData;
unsigned char* output_data = (unsigned char*)output->imageData;
cvCopy(image, output);
int w = 5;
int h = 10;
for(int j = height/2; j < height/2 + h; j++) {
for(int i = width/2; i < width/2 + w ; i++) {
printf("(%3d %3d %3d) ",
red_data[j * step + i],
green_data[j * step + i],
blue_data[j * step + i]
);
}
printf("\n");
}
cvRectangle(
red,
cvPoint(width/2, height/2),
cvPoint(width/2 + w, height/2 + h),
cvScalar(0),
1, 4);
int red_min = 150;
int green_max = 100;
int blue_max = 100;
int found = 0;
for(int j = 0; j < height; j++) {
for(int i = 0; i < width; i++) {
if(red_data[j * step + i] > red_min
&& green_data[j * step + i] < green_max
&& blue_data[j * step + i] < blue_max) {
found ++;
} else {
output_data[j * step * 3 + i * 3 + 0] = 0;
output_data[j * step * 3 + i * 3 + 1] = 0;
output_data[j * step * 3 + i * 3 + 2] = 0;
}
}
}
printf("found %d red-ish pixels. (%.1f%%)\n",
found,
(double)found / (double)(width * height) * 100.);
cvShowImage("output", output);
cvShowImage("red", red);
cvShowImage("green", green);
cvShowImage("blue", blue);
printf("\n");
int key = cvWaitKey(50);
if(key == 27) {
break;
}
}
cvReleaseImage(&red);
cvReleaseImage(&green);
cvReleaseImage(&blue);
cvReleaseImage(&output);
}
written in 9.6ms
another example implementing the houghlines example from the opencv documentation on
a image loaded from disk (no webcam needed):
c++ ocv hough example
<stdio.h>
<opencv/cv.h>
<opencv/highgui.h>
int main(int argc, char* argv[]) {
IplImage* image = cvLoadImage("red_line.png");
int height = image->height;
int width = image->width;
int step = image->widthStep;
if(image->depth != IPL_DEPTH_8U) {
printf("unknown image data format %d\n", image->depth);
return -1;
}
unsigned char* data = (unsigned char*)image->imageData;
int red_min = 150;
int green_max = 100;
int blue_max = 100;
int found = 0;
for(int j = 0; j < height; j++) {
for(int i = 0; i < width; i++) {
if((data[j * step + i * 3 + 2] > red_min
&& data[j * step + i * 3 + 1] < green_max
&& data[j * step + i * 3 + 0] < blue_max) || 0) {
printf("found very red pixel at %4d %4d: (%3d %3d %3d)\n",
j, i,
data[j * step + i * 3 + 2],
data[j * step + i * 3 + 1],
data[j * step + i * 3 + 0]
);
found ++;
}
}
}
printf("found %d red-ish pixels. (%.1f%%)\n",
found,
(double)found / (double)(width * height) * 100.);
IplImage* color_dst = cvCreateImage(cvGetSize(image), 8, 3 );
cvCopy(image, color_dst);
IplImage* gray_image = cvCreateImage(cvGetSize(image), IPL_DEPTH_8U, 1);
cvCvtColor(image, gray_image, CV_RGB2GRAY);
CvMemStorage* storage = cvCreateMemStorage(0);
CvSeq* lines = cvHoughLines2(gray_image, storage, CV_HOUGH_STANDARD, 1, CV_PI/180. * 2, 100);
for(int i = 0; i < MIN(lines->total,10); i++) {
float* line = (float*)cvGetSeqElem(lines, i);
float rho = line[0];
float theta = line[1];
printf("line %d: rho: %.1f, theta: %.1fdeg\n", i + 1, rho, theta / CV_PI * 180.);
CvPoint pt1, pt2;
double a = cos(theta), b = sin(theta);
double x0 = a * rho, y0 = b * rho;
pt1.x = cvRound(x0 + 1000*(-b));
pt1.y = cvRound(y0 + 1000*(a));
pt2.x = cvRound(x0 - 1000*(-b));
pt2.y = cvRound(y0 - 1000*(a));
cvLine(color_dst, pt1, pt2, CV_RGB(0,255,0), 1, 4);
}
cvNamedWindow("input", 1 );
cvShowImage("input", image);
cvNamedWindow("output", 1 );
cvShowImage("output", color_dst);
while(cvWaitKey(100) != 27);
}
written in 5.6ms
and here the resulting screenshots:
a small .tar.gz bundle can be downloaded including both example sources and images as well as a small makefile
here is a small code snipped that demonstrates opencv usage with
python:
ocv example
import sys
import os
import time
import math
from opencv import cv
from opencv import highgui
if __name__ == "__main__":
highgui.cvNamedWindow ("original", highgui.CV_WINDOW_AUTOSIZE)
highgui.cvMoveWindow ("original", 100, 100)
highgui.cvNamedWindow ("filterwin", highgui.CV_WINDOW_AUTOSIZE)
highgui.cvMoveWindow ("filterwin", 300, 300)
highgui.cvCreateTrackbar("regler1", "filterwin", 0, 300, None)
capture = highgui.cvCaptureFromCAM(0)
highgui.cvSetCaptureProperty(capture, highgui.CV_CAP_PROP_FRAME_WIDTH, 320)
highgui.cvSetCaptureProperty(capture, highgui.CV_CAP_PROP_FRAME_HEIGHT, 240)
if not capture:
print "Error opening capture device"
sys.exit (1)
frame = highgui.cvQueryFrame(capture)
frame_size = cv.cvGetSize(frame)
highgui.cvCreateTrackbar("x pos", "filterwin", 0, frame_size.width, None)
filtered_frame = cv.cvCloneImage(frame)
green = cv.cvScalar(0, 255, 0, 0)
count = 0
frame_count = 0
sum_diff = 0
last_fps = time.time()
while 1:
frame = highgui.cvQueryFrame(capture)
if frame is None:
break
start = time.time()
frame_count += 1
cv.cvFlip(frame, None, 1)
if not frame_count % 5:
highgui.cvSaveImage("frame_%5d.png" % count, frame)
v1 = highgui.cvGetTrackbarPos("regler1", "filterwin")
v1 = float(v1) / 100.0
cv.cvConvertScale(frame, filtered_frame, v1, 0)
v2 = highgui.cvGetTrackbarPos("x pos", "filterwin")
cv.cvRectangle(filtered_frame,
cv.cvPoint(v2, 10),
cv.cvPoint(100, 100),
green,
1, cv.CV_AA, 0
)
diff = time.time() - start
highgui.cvShowImage('original', frame)
highgui.cvShowImage('filterwin', filtered_frame)
count += 1
sum_diff += diff
if count == 20:
print "computing time: %3.1fms / frame => %.1f fps" % (
sum_diff / count * 1000.,
20.0 / (time.time() - last_fps)
)
last_fps = time.time()
sum_diff = 0
count = 0
k = highgui.cvWaitKey(3)
if k == 27:
break
if k == ord("q"):
break
written in 5.1ms
and here the resulting screenshot:
ocv test screenshots
|
ocv_test.png
252.6 kB
2006-10-22 15:10:59
|
another example of opencv usage in python - this time with gtk ui:
gtk+ocv example
import sys
import os
import time
import math
from opencv import cv
from opencv import highgui
import gtk, gobject, gc
class gcv:
def __init__(self):
self.capture = highgui.cvCaptureFromCAM(0)
highgui.cvSetCaptureProperty(self.capture, highgui.CV_CAP_PROP_FRAME_WIDTH, 320)
highgui.cvSetCaptureProperty(self.capture, highgui.CV_CAP_PROP_FRAME_HEIGHT, 240)
if not self.capture:
raise "Error opening capture device"
frame = highgui.cvQueryFrame(self.capture)
self.frame_size = cv.cvGetSize(frame)
self.output_frame = cv.cvCloneImage(frame)
self.gray_frame = cv.cvCreateImage(self.frame_size, 8, 1)
self.window = gtk.Window()
self.window.set_title("gcv")
vb = gtk.VBox()
self.window.add(vb)
self.label = gtk.Label("label")
vb.pack_start(self.label, False, False)
self.draw = gtk.DrawingArea()
self.draw.set_size_request(self.frame_size.width, self.frame_size.height)
self.draw.connect("button-press-event", self.draw_button_press, "input")
self.draw.add_events(gtk.gdk.BUTTON_PRESS_MASK)
vb.pack_start(self.draw, False, True)
self.scale1 = gtk.HScale()
self.scale1.set_range(0, 255)
self.scale1.set_value(125)
self.scale1.set_increments(1, 1)
vb.pack_start(self.scale1, False, True)
self.scale2 = gtk.HScale()
self.scale2.set_range(0, 255)
self.scale2.set_value(65)
self.scale2.set_increments(1, 1)
vb.pack_start(self.scale2, False, True)
hb = gtk.HBox()
vb.pack_start(hb, False, True)
button = gtk.Button("save imgs")
button.connect("clicked", self.save_next)
hb.pack_start(button, True, True)
button = gtk.Button("btn1")
button.connect("clicked", self.button1)
hb.pack_start(button, True, True)
button = gtk.Button("btn2")
button.connect("clicked", self.button2)
hb.pack_start(button, True, True)
self.draw2 = gtk.DrawingArea()
self.draw2.set_size_request(self.frame_size.width, self.frame_size.height)
self.draw2.connect("button-press-event", self.draw_button_press, "output")
self.draw2.add_events(gtk.gdk.BUTTON_PRESS_MASK)
vb.pack_start(self.draw2, False, True)
self.window.show_all()
self.gc = gtk.gdk.GC(self.window.window)
self.window.connect("delete-event", self.quit)
self.last = time.time()
self.count = 0
self.circle = None
gobject.idle_add(self.get_frame)
def draw_button_press(self, drawing_area, event, where):
print "button %s press in %s at %d/%d" % (
event.button,
where,
event.x,
event.y
)
self.circle = (where, int(event.x), int(event.y))
def button1(self, button):
print "button 1 pressed"
def button2(self, button):
print "button 2 pressed"
def save_next(self, button):
timestamp = time.strftime("%Y%m%d_%H%M%S")
highgui.cvSaveImage("input_%s.png" % timestamp, self.last_input)
highgui.cvSaveImage("output_%s.png" % timestamp, self.output_frame)
def quit(self, *args):
sys.exit(0)
def get_frame(self, *args):
self.last_input = frame = highgui.cvQueryFrame(self.capture)
if frame is None:
print "error getting frame"
return
self.count += 1
now = time.time()
if now - self.last >= 2:
d = now - self.last
fps = float(self.count) / d
self.label.set_text("frame rate: %.1ffps" % fps)
self.last = now
self.count = 0
cv.cvCvtColor(frame, frame, cv.CV_BGR2RGB)
cv.cvFlip(frame, None, 1)
output = self.output_frame
gray = self.gray_frame
cv.cvCvtColor(frame, gray, cv.CV_RGB2GRAY)
cv.cvCanny(gray, gray, self.scale1.get_value() * 10, self.scale2.get_value() * 10, 5)
cv.cvCvtColor(gray, output, cv.CV_GRAY2RGB)
if self.circle:
if self.circle[0] == "output":
color = cv.cvScalar(0, 255, 0, 0)
f = output
else:
color = cv.cvScalar(255, 0.0, 0, 0)
f = frame
cv.cvCircle(f, cv.cvPoint(*self.circle[1:]), 10, color, 1, cv.CV_AA, 0)
pixbuf = gtk.gdk.pixbuf_new_from_data(
frame.imageData_get(),
gtk.gdk.COLORSPACE_RGB, False, 8, self.frame_size.width, self.frame_size.height, frame.widthStep
)
self.draw.window.draw_pixbuf(self.gc, pixbuf, 0, 0, 0, 0, self.frame_size.width, self.frame_size.height)
pixbuf = gtk.gdk.pixbuf_new_from_data(
output.imageData_get(),
gtk.gdk.COLORSPACE_RGB, False, 8, self.frame_size.width, self.frame_size.height, frame.widthStep
)
self.draw2.window.draw_pixbuf(self.gc, pixbuf, 0, 0, 0, 0, self.frame_size.width, self.frame_size.height)
return True
if __name__ == "__main__":
i = gcv()
gtk.main()
written in 7.6ms
and here the resulting screenshots:
so whats about the eye tracker?
that is one of those projects where i didn't reach an end. i simply had
no time at the end to complete it. what i've got was a good overview of opencv
and much fun with python. i had to hold a speech about that topic and a small
presentation of the implemented code. here are a few working images from the
eye_tracker:
the basic idea is to track an invariant property of my head. in my case this obviously were my glasses.
i tracked my glasses using the hough transformation and a previously prepared mask. i did not
take any measures to recognize different sizes of my glasses due to different distances to the sensor as
i normaly hold a fixed distance to my monitor.
i had to implement my the hough transformation as the one provided by opencv does only recognize basic shapres
(only lines IRC). it would be a bad idea to implement that with python. so i wrote a c++ python module
to do this job. here is a snippet showing the hough-transformation:
hough transformation
int real_houghmask(IplImage* src, IplImage* dst, IplImage* mask,
int rx1, int ry1, int rx2, int ry2) {
int non_zero = 0;
int height = src->height;
int width = src->width;
int step = src->widthStep;
unsigned char* data = (unsigned char*)src->imageData;
int xoffset = mask->width / 2;
int yoffset = mask->height / 2;
CvRect rect;
CvRect mask_rect;
rect.width = mask->width;
rect.height = mask->height;
cvResetImageROI(dst);
cvZero(dst);
int x1, y1, ox1, oy1, x2, y2;
rx1 = MAX(0, rx1);
rx2 = MIN(width, rx2);
ry1 = MAX(0, ry1);
ry2 = MIN(height, ry2);
int add_count = 0;
for(int j = ry1; j < ry2; j++) {
for(int i = rx1; i < rx2; i++) {
if(data[j * step + i] != 0) {
non_zero++;
ox1 = x1 = i - xoffset;
oy1 = y1 = j - yoffset;
x2 = i + xoffset;
y2 = j + yoffset;
if(x1 < 0)
x1 = 0;
else if(x2 >= rx2)
x2 = rx2 - 1;
if(y1 < 0)
y1 = 0;
else if(y2 >= ry2)
y2 = ry2 - 1;
rect.x = x1; rect.y = y1;
rect.width = x2 - x1;
rect.height = y2 - y1;
if(rect.width < 1 || rect.height < 1)
continue;
mask_rect.x = x1 - ox1; mask_rect.y = y1 - oy1;
mask_rect.width = rect.width;
mask_rect.height = rect.height;
cvSetImageROI(dst, rect);
cvSetImageROI(mask, mask_rect);
cvAdd(dst, mask, dst);
}
}
}
cvResetImageROI(dst);
cvResetImageROI(mask);
return non_zero;
}
PyObject* houghmask(PyObject* self, PyObject* args) {
PyObject* pyimage1;
PyObject* pyimage2;
PyObject* pyimage3;
int x1, x2, y1, y2;
if (!PyArg_ParseTuple(args, "OOOiiii", &pyimage1, &pyimage2, &pyimage3, &x1, &y1, &x2, &y2))
return NULL;
IplImage* src = (IplImage*)get_ptr(pyimage1);
IplImage* dst = (IplImage*)get_ptr(pyimage2);
IplImage* mask = (IplImage*)get_ptr(pyimage3);
int non_zero = real_houghmask(src, dst, mask, x1, y1, x2, y2);
return Py_BuildValue("i", non_zero);
}
written in 5.6ms
this ht is first applied to the complete image. in proximate frames a region of interest is
specified around the last position to minimize cpu usage. if the glasses are not reasonable good
found again a search on the complete frame is started.
using this approach i was able to faily safe track the position of my moving head in the frame.
the result of this is demonstrated by the
eyes.png image above.
the ht alone gives good estimate about the location of the glasses but is too shaky to try to look
at the pupils.
to get around this i search an offset from the ht-location with a minimal quadratic color distance
to the last found glasses-location. this search is done using comparing a 28 pixels thick stript of the
current to the last eye-frame. first in x- then in y-direction. the distance is calculated using the
opencv function cvNorm(). this is astonishing fast - i was worried about the runtime...
here is the complete cody of my unfinished eye_tracker experiment:
python eye tracker
import sys
import os
import time
import math
from opencv import cv
from opencv import highgui
import flocv
from screen_point import *
def get_keyboard_input():
fp = os.popen("lsinput 2>&1")
for line in fp.readlines():
if line[0] == "/":
device_file = line.strip()
continue
if device_file and line.strip(" \t\r\n").startswith("name") and line.find("keyboard") != -1:
print "got keyboard at", device_file
k = "/event"
pos = device_file.find(k)
fp.close()
return int(device_file[pos + len(k)])
fp.close()
return -1
if __name__ == '__main__':
windows = "Camera", "eyes", "kernel"
input = get_keyboard_input()
evdev = flocv.open_input_event(input)
dpy = flocv.open_display(tuple())
laptop_mouse_keys = (125, 127)
tower_mouse_keys = (125, 126)
mouse_keys = tower_mouse_keys
mouse_keys = laptop_mouse_keys
shutter_mode = "manual"
shutter_mode = "auto"
starty = 5
y = starty
startx = 950
width = 320 + 5
step = 240 + 20 - 2
for w in windows:
highgui.cvNamedWindow (w, highgui.CV_WINDOW_AUTOSIZE)
highgui.cvMoveWindow (w, startx, y)
y += step
highgui.cvNamedWindow ("controls", highgui.CV_WINDOW_AUTOSIZE)
highgui.cvMoveWindow ("controls", startx - 305, starty)
highgui.cvCreateTrackbar("canny1", "controls", 950, 5000, None)
highgui.cvCreateTrackbar("canny2", "controls", 950, 5000, None)
highgui.cvCreateTrackbar("edge", "controls", 220, 255, None)
highgui.cvCreateTrackbar("hough", "controls", 42, 80, None)
highgui.cvCreateTrackbar("percent", "controls", 75, 100, None)
highgui.cvResizeWindow("controls", 300, 300)
def set_shutter_mode(mode):
if mode == "auto":
os.system("qc-usb-messenger-1.1/qcset shutteradapt=y adaptive=y");
else:
os.system("qc-usb-messenger-1.1/qcset shutteradapt=n adaptive=n");
os.system("qc-usb-messenger-1.1/qcset keepsettings=y");
set_shutter_mode(shutter_mode)
if shutter_mode == "manual":
os.system("qc-usb-messenger-1.1/qcset -b 65535 -s 3000");
else:
os.system("qc-usb-messenger-1.1/qcset -b 10000");
capture = highgui.cvCaptureFromCAM(0)
highgui.cvSetCaptureProperty (capture, highgui.CV_CAP_PROP_FRAME_WIDTH, 320)
highgui.cvSetCaptureProperty (capture, highgui.CV_CAP_PROP_FRAME_HEIGHT, 240)
if not capture:
print "Error opening capture device"
sys.exit (1)
frame = highgui.cvQueryFrame (capture)
frame_size = cv.cvGetSize (frame)
info_frame = cv.cvCloneImage(frame)
gray = cv.cvCreateImage(frame_size, 8, 1)
canny = cv.cvCreateImage(frame_size, 8, 1)
hough = cv.cvCreateImage(frame_size, cv.IPL_DEPTH_32F, 1)
last = cv.cvCreateImage(frame_size, 8, 1)
interesset = cv.cvCreateImage(frame_size, 8, 1)
temp = highgui.cvLoadImage("mask2.png", 1)
mask_size = cv.cvGetSize(temp)
mask = cv.cvCreateImage(mask_size, 8, 1)
cv.cvCvtColor(temp, mask, cv.CV_RGB2GRAY)
fmask = cv.cvCreateImage(mask_size, cv.IPL_DEPTH_32F, 1)
cv.cvConvertScale(mask, fmask, 1.0 / 255.0, 0)
kernel_size = cv.cvSize(mask_size.width * 2, mask_size.height * 2)
kernel = cv.cvCreateImage(kernel_size, cv.IPL_DEPTH_32F, 1)
cv.cvSetZero(kernel)
eyes_size = cv.cvSize(mask_size.width / 3 * 3, mask_size.height / 3 * 4)
eyes = cv.cvCreateImage(eyes_size, 8, 3)
zoom = 8
eyes_zoom = cv.cvCreateImage(cv.cvSize(eyes_size.width * zoom, eyes_size.height * zoom), 8, 3)
cv.cvSetZero(eyes)
cv.cvSetZero(eyes_zoom)
last_index = 0
mx, my = mask_size.width, mask_size.height
while True:
x, y, last_index = flocv.get_next_non_zero(mask, last_index)
if x == -1:
break
cv.cvSetImageROI(kernel, cv.cvRect(mx - x, my - y, mask_size.width, mask_size.height))
cv.cvAdd(kernel, fmask, kernel, None)
cv.cvResetImageROI(kernel)
min_kernel, max_kernel = cv.cvMinMaxLoc(kernel, None, None, None)
cv.cvConvertScale(kernel, kernel, 1.0 / max_kernel, 0)
kernel_save = cv.cvCreateImage(cv.cvSize(kernel.width, kernel.height), 8, 1)
cv.cvConvertScale(kernel, kernel_save, 255.0, 0)
highgui.cvSaveImage("kern.png", kernel_save)
sp = screen_point()
red = cv.cvScalar(0, 0, 255, 0)
dark_red = cv.cvScalar(0, 0, 15, 0)
blue = cv.cvScalar(255, 0, 0, 0)
green = cv.cvScalar(0, 255, 0, 0)
black = cv.cvScalar(0, 0, 0, 0)
frame_count = 0
last_good = False
last_pos = None
pointer_motion = "relative"
have_last_eyes = False
motion_enabled = False
reagional = True
dmode = "camera"
count = 0
sum_diff = 0
last_fps = time.time()
s = cv.cvScalar(150, 150, 150, 150)
eimg = None
ex = 0
while 1:
frame = highgui.cvQueryFrame (capture)
if frame is None:
break
start = time.time()
frame_count += 1
cv.cvFlip(frame, None, 1)
cv.cvCopy(frame, info_frame, None)
cv.cvCvtColor(frame, gray, cv.CV_RGB2GRAY)
t1 = highgui.cvGetTrackbarPos("canny1", "controls")
t2 = highgui.cvGetTrackbarPos("canny2", "controls")
t3 = highgui.cvGetTrackbarPos("edge", "controls")
min_p = highgui.cvGetTrackbarPos("percent", "controls")
hough_threshold = highgui.cvGetTrackbarPos("hough", "controls")
if t1 > t2:
t2 = t1
highgui.cvSetTrackbarPos("canny2", "controls", t2)
if t2 < t1:
t1 = t2
highgui.cvSetTrackbarPos("canny1", "controls", t1)
if eimg is None:
cv.cvCanny(gray, canny, t1, t2, 5)
else:
cv.cvCvtColor(eimg, canny, cv.CV_RGB2GRAY)
if last_good and reagional:
x, y = last_pos
rx, ry = int(x - mask_size.width / 3 * 3), int(y - mask_size.height / 3 * 5)
w, h = x + mask_size.width / 3 * 3, y + mask_size.height / 3 * 5
flocv.houghmask(
canny, hough, kernel,
rx, ry,
w, h
)
cv.cvRectangle(info_frame,
cv.cvPoint(rx, ry),
cv.cvPoint(w, h),
green,
1, cv.CV_AA, 0
)
else:
flocv.houghmask(
canny, hough, kernel,
0, 0,
frame_size.width, frame_size.height
)
x, y, width, height, max_hough = flocv.get_best_center(hough)
last_good = False
print hough_threshold, max_hough, height, width
if (width <= mask_size.width and height <= mask_size.height and max_hough >= hough_threshold
and x < frame.width - mask_size.width / 2 and y < frame.height - mask_size.height / 2
and x > mask_size.width / 2 and y > mask_size.height / 2):
rx, ry = x - mask_size.width / 2, y - mask_size.height / 2
cv.cvRectangle(info_frame, cv.cvPoint(rx, ry), cv.cvPoint(rx + mask_size.width, ry + mask_size.height), red, 1, cv.CV_AA, 0);
last_pos = (x, y)
last_good = True
elif last_pos:
x, y = last_pos
rx, ry = x - mask_size.width / 2, y - mask_size.height / 2
cv.cvRectangle(info_frame, cv.cvPoint(rx, ry), cv.cvPoint(rx + mask_size.width, ry + mask_size.height), dark_red, 1, cv.CV_AA, 0);
if last_pos:
x, y = last_pos
ex, ey = x - eyes.width / 2, y - eyes.height / 2
ex, ey = map(lambda v: max(v, 0), (ex, ey))
if have_last_eyes:
compare_width = 28
max_distance = 10
xoff = 0
yoff = eyes.height / 2 - compare_width / 2
cv.cvSetImageROI(eyes, cv.cvRect(xoff, yoff, eyes.width, compare_width))
best_shift = -1
min_norm = -1
min_shift = min(ex, max_distance)
max_shift = min(frame.width - ex - eyes.width, max_distance)
for shift in range(-min_shift, max_shift + 1):
cv.cvSetImageROI(frame, cv.cvRect(ex + xoff + shift, ey + yoff, eyes.width, compare_width))
norm = cv.cvNorm(frame, eyes, cv.CV_L2, None)
if norm < min_norm or min_norm == -1:
min_norm = norm
best_shift = shift
ex = max(0, ex + best_shift)
xoff = eyes.width / 2 - compare_width / 2
yoff = 0
cv.cvSetImageROI(eyes, cv.cvRect(xoff, yoff, compare_width, eyes.height))
best_shift = 0
min_norm = -1
min_shift = min(ey, max_distance)
max_shift = min(frame.height - ey - eyes.height, max_distance)
for shift in range(-min_shift, max_shift + 1):
cv.cvSetImageROI(frame, cv.cvRect(ex + xoff, ey + yoff + shift, compare_width, eyes.height))
norm = cv.cvNorm(frame, eyes, cv.CV_L2, None)
if norm < min_norm or min_norm == -1:
min_norm = norm
best_shift = shift
ey = max(0, ey + best_shift)
last_pos = (ex + eyes.width / 2, ey + eyes.height / 2)
rx, ry = last_pos[0] - mask_size.width / 2, last_pos[1] - mask_size.height / 2
cv.cvRectangle(info_frame, cv.cvPoint(rx, ry), cv.cvPoint(rx + mask_size.width, ry + mask_size.height), dark_red, 1, cv.CV_AA, 0);
cv.cvResetImageROI(eyes)
ex -= max(0, (ex + eyes.width) - frame.width)
ey -= max(0, (ey + eyes.height) - frame.height)
cv.cvSetImageROI(frame, cv.cvRect(ex, ey, eyes.width, eyes.height))
cv.cvCopy(frame, eyes, None)
cv.cvResetImageROI(frame)
have_last_eyes = True
if pointer_motion == "absolute":
px = 0.9 * ex / (frame.width - eyes.width)
py = 0.9 * ey / (frame.height - eyes.height)
if motion_enabled:
sp.hide()
flocv.absolute_motion(dpy, px, py)
else:
sp.set_pos(px, py)
elif pointer_motion == "relative" and motion_enabled:
speed = 0.15
sx, sy = relative_start
px = speed * (ex - sx) / (frame.width - eyes.width)
py = speed * (ey - sy) / (frame.height - eyes.height)
flocv.relative_motion(dpy, px, py)
diff = time.time() - start
if dmode == "camera":
cv.cvConvertScale(hough, last, 255.0 / max_hough, 0)
cv.cvThreshold(last, interesset, t3, 0, cv.CV_THRESH_TOZERO)
cv.cvAddS(frame, s, info_frame, interesset);
highgui.cvShowImage ('Camera', info_frame)
elif dmode == "canny":
highgui.cvShowImage ('Camera', canny)
elif dmode == "hough":
cv.cvConvertScale(hough, last, 255.0 / max_hough, 0)
highgui.cvShowImage ('Camera', last)
cv.cvResize(eyes, eyes_zoom, cv.CV_INTER_NN)
highgui.cvShowImage ('eyes', eyes_zoom)
highgui.cvShowImage ('kernel', kernel)
count += 1
sum_diff += diff
if count == 20:
print " computing time: %3.1fms / frame => %.1f fps" % (sum_diff / count * 1000., 20.0 / (time.time() - last_fps))
last_fps = time.time()
sum_diff = 0
count = 0
k = highgui.cvWaitKey(3)
if k == 27:
break
if k == ord("d"):
if dmode == "camera":
dmode = "canny"
elif dmode == "canny":
dmode = "hough"
elif dmode == "hough":
dmode = "camera"
if k == ord("x"):
if dmode == "camera":
dmode = "hough"
elif dmode == "canny":
dmode = "camera"
elif dmode == "hough":
dmode = "canny"
if k == ord("m"):
if shutter_mode == "auto":
shutter_mode = "manual"
else:
shutter_mode = "auto"
print "new shutter mode:", shutter_mode
set_shutter_mode(shutter_mode)
if k == ord("p"):
if pointer_motion == "absolute":
pointer_motion = "relative"
else:
pointer_motion = "absolute"
print "new pointer mode:", pointer_motion
if k == ord('s'):
highgui.cvSaveImage("frame.png", frame)
highgui.cvSaveImage("info.png", info_frame)
highgui.cvSaveImage("canny.png", canny)
cv.cvConvertScale(hough, last, 255.0 / max_hough, 0)
highgui.cvSaveImage("hough.png", last)
highgui.cvSaveImage("eyes.png", eyes_zoom)
if k == ord("e"):
ex = ex % 4 + 1
print "e:", ex
if ex == 4:
eimg = None
else:
eimg = highgui.cvLoadImage("ex%d.png" % ex, 1)
if k == ord("r"):
reagional = not reagional
print "reagional is",
if reagional:
print "enabled"
else:
print "disabled"
if k == ord("n"):
have_last_eyes = False
print "resettet last eyes image"
code, action = flocv.get_input_event(evdev)
if code != -1:
print "key:", code, action
if code == mouse_keys[0]:
motion_enabled = (action == 1)
relative_start = (ex, ey)
if code == mouse_keys[1]:
print "mouse click"
if action == 1:
flocv.button(dpy, 1, "press")
else:
flocv.button(dpy, 1, "release")
sp.do_loop()
written in 19.8ms
the next step would be to extract the exact pupil centers and put it into relation of the complete eye-frame. that would be a center
an elipse (due to the perspective). i've seen a very promising algorithm using RANSAC to fit on a pupil.
i think RANSAC is a very good and easy idea. maybe i should write a little bit more
about it...
but as mentioned above: the term was almost over at this time :( maybe i come back
to this at some time.
a cheap webcam as i used is also not very capable as an eye tracker. as a head tracker it
is okay :) the python code above implemented a moving read (sometimes transparent) big dot
(or also the mouse pointer) which could be controled by head movements. i didn't plan to implement a mouse click thru an
eye-gesture as this is too unnatural for humans. a usually unused windows-key on my keyboard
was used to enable mouse-movements and the other windows key to activate a mouse button. it's
not very comfortable to do every day work using this "head-tracker" insted of the mouse :)