I'm developing an android app which main purpose is detects an asked object in a scenario. To do this I'm using the SURF algorithm of OpenCV. I'm not having "good luck" with the detection because I don't know when an object is "found".
I obtain a frame with my device camera and I follow these steps to get objects' keypoints and descriptors:
Java Code
public void onSnapClick(View v) {
Imgproc.GaussianBlur(frameGray, frameGray, new Size(3, 3), 2);
Imgproc.Canny(frameGray, frameGray, 40, 120);
Imgproc.resize(frameGray, frameGray, new Size(320, 240));
FindFeatures(frameGray.getNativeObjAddr()); //JNI call
//Some code to store data in DB...
}
JNI call
double hessianThreshold=600;
int nOctaves=4;
int nOctaveLayers=2;
bool extended=true;
bool upright=false;
JNIEXPORT void JNICALL Java_es_ugr_reconocimiento_Juego_FindFeatures(JNIEnv* env, jobject, jlong addrGray) {
Mat& frameGray= *(Mat*) addrGray;
vector<KeyPoint> keyPoints;
Mat descriptores;
SurfFeatureDetector detector_Surf(hessianThreshold, nOctaves, nOctaveLayers, extended, upright);
SurfDescriptorExtractor extractor_Surf;
detector_Surf.detect(frameGray, keyPoints);
if (keyPoints.size() > 0)
extractor_Surf.compute(frameGray, keyPoints, descriptores);
}
Now I choose what object I want to find and I follow these steps to do that:
Java Code
public void onSearchClick(View v) {
Imgproc.GaussianBlur(frameGray, frameGray, new Size(3, 3), 2);
Imgproc.Canny(frameGray, frameGray, 40, 120);
Imgproc.resize(frameGray, frameGray, new Size(320, 240));
nObject = FindObjects(frameGray.getNativeObjAddr()); //JNI call
if (nObject = searchObject)
//draw frame with a rectangle around the found object in the scenario....
}
JNI call
double hessianThreshold=600;
int nOctaves=4;
int nOctaveLayers=2;
bool extended=true;
bool upright=false;
JNIEXPORT jint JNICALL Java_es_ugr_reconocimiento_Juego_FindObjects(JNIEnv* env, jobject, jlong addrGray) {
Mat& frameGray = *(Mat*) addrGray;
vector<KeyPoint> keyPoints_esc;
Mat descriptores_esc;
SurfFeatureDetector detector_Surf(hessianThreshold, nOctaves, nOctaveLayers, extended, upright);
SurfDescriptorExtractor extractor_Surf;
detector_Surf.detect(frameGray , keyPoints_esc);
if (keyPoints_esc.size() == 0) return -1;
extractor_Surf.compute(frameGray , keyPoints_esc, descriptores_esc);
if (descriptores_esc.rows() == 0) return -1;
for(int i=0;i<lstObjects.size();i++){
Mat descriptores_obj = lstDescriptors.at(i);
vector<KeyPoint> keyPoints_obj = lstKeyPoints.at(i);
FlannBasedMatcher matcher;
vector<vector<DMatch> > matches;
matcher.knnMatch(descriptores_obj, descriptores_esc, matches, 2);
// ----------------------------------------------------------------------
// Draw only "good" matches (i.e. whose distance is less than 2*min_dist,
// or a small arbitary value ( 0.02 ) in the event that min_dist is very
// small)
// PS.- radiusMatch can also be used here.
// ----------------------------------------------------------------------
vector<DMatch> good_matches;
//THIS LOOP IS SENSITIVE TO SEGFAULTS
for (int i = 0; i < min(descriptores_obj.rows - 1, (int) matches.size());i++){
if ( (matches[i][0].distance < 0.6 * (matches[i][1].distance)) &&
((int) matches[i].size() <= 2 && (int) matches[i].size() > 0) ) {
good_matches.push_back(matches[i][0]);
}
}
if (good_matches.size() >= nThreshold) {
vector < Point2f > obj;
vector < Point2f > scene;
for (int i = 0; i < good_matches.size(); i++) {
//-- Get the keypoints from the good matches
obj.push_back(keyPoints_obj[good_matches[i].queryIdx].pt);
scene.push_back(keyPoints_esc[good_matches[i].trainIdx].pt);
}
Mat H = findHomography(obj, scene, CV_RANSAC);
vector<Point2f> obj_corners(4);
obj_corners[0] = cvPoint(0, 0);
obj_corners[1] = cvPoint(240, 0);
obj_corners[2] = cvPoint(240, 320);
obj_corners[3] = cvPoint(0, 320);
vector<Point2f> scene_corners(4);
perspectiveTransform(obj_corners, scene_corners, H);
line(frameGray, scene_corners[0], scene_corners[1], Scalar(255, 0, 0), 4);
line(frameGray, scene_corners[1], scene_corners[2], Scalar(255, 0, 0), 4);
line(frameGray, scene_corners[2], scene_corners[3], Scalar(255, 0, 0), 4);
line(frameGray, scene_corners[3], scene_corners[0], Scalar(255, 0, 0), 4);
for (unsigned int i = 0; i < scene.size(); i++) {
const Point2f& kp = scene[i];
circle(frameGray, Point(kp.x, kp.y), 10, Scalar(255, 255, 255, 255));
}
return i; //position of the matched object
}
}
}
I don't know what threshold could be the best in this comparison
if (good_matches.size() >= nThreshold) // do findHomography...
I've been searching and almost every code I found contained the number 4 as nThreshold, but for me it's not working good. My code almost every time "find" an object.
Is there any other better way to do this? Like using different matcher or another threshold or trying to figure out if doing the homography is going to create something similar to a rectangle (i said this because sometimes it "find" something but drawing four lines that aren't building a rectangle).
Please make this following changes in your code
int nThreshold= 100;
if (good_matches.size() >= nThreshold)
{
continue; // This line is to prevent further steps of matching if there are too many good matches (Lot of ambiguous points results in false match)
}
vector < Point2f > obj;
vector < Point2f > scene;
for (int i = 0; i < good_matches.size(); i++) {
//-- Get the keypoints from the good matches
obj.push_back(keyPoints_obj[good_matches[i].queryIdx].pt);
scene.push_back(keyPoints_esc[good_matches[i].trainIdx].pt);
}
// Skip doing homography if the object and scene contains less than four points(cant draw a rectangle if less than 4 points, hence your program will crash here if you do not handle the exception)
if(obj.size() < 4 || scene.size() < 4)
{
continue;
}
Mat H = findHomography(obj, scene, CV_RANSAC);
Related
I made a 3D-renderer that parses .obj files (ASCII) and projects them on to a 2d plane.
At first glance the projection model seems to be fine except one thing.
I noticed that the projection model looks a bit odd:
[1]: https://i.stack.imgur.com/iaLOu.png
All polygons are being drawn including the ones in the back of the model, which I
should definitely not be able to see.
I made a quick recherche in Wikipedia to see what this is about and I think I found something called "Sichtbarkeitsproblem" (Hidden-surface determination).
(DE): https://de.wikipedia.org/wiki/Sichtbarkeitsproblem
(EN):
https://en.wikipedia.org/wiki/Hidden-surface_determination
The article mentions that this is a common thing in computer graphics and that there are many different ways to perform a "Verdeckungsberechnung" (cover up calculation).
It mentions things like using a z-Buffer and Raytracing.
Now I don't really know a lot about Raytracing but It seems to be quite applicable as I later want to add a light source.
I am not sure how Raytracing works but If I just send out rays in an angle that matches the slope from the camera to every pixel on screen and check which polygon hits it first I would only end up having some polygons completely missing only due to one vertex being potentially covered.
How do other Raytracers work? Do they remove the entire polygon when not getting a hit? Remove only one or more vertecies? (which I belief would cause massive distortion in shape) or do they just render all the Polygons and arrange them in a way that they are sorted by the minimum distance to the camera? (I guess this would made it very bad at performance)
Please help me implement this into my code or give me a hint, it would mean a lot to me.
My code is as followed, and the link for the projection model (see Image no. 1) I put here:
https://drive.google.com/file/d/10dpjcL2d2QB15qqTSu5p6kQ534hNOzCz/view?usp=sharing
(Note that the 3d-model and code must be in same folder in order to work)
// 12.11.2022
// Siehe Rotation Matrix in Wikipedia
// View Space: The world space vertex positions relative to the view of the camera
/* Die Verdeckungsberechnung ist zum korrekten Rendern einer 3D-Szene notwendig, weil Oberflächen,
die für den Betrachter nicht sichtbar sind, auch nicht dargestellt werden sollten
*/
// -> https://de.wikipedia.org/wiki/Sichtbarkeitsproblem
// TODO: Raytracing/Verdeckungsberechnung
// TODO: Texture Mapping
import java.util.Arrays;
import java.awt.Robot;
import java.nio.ByteBuffer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.ArrayList;
byte b[];
int amount = 0;
String lines[];
PVector[][] vertices;
int[] faces;
float a = 0;
PVector cam, cam_angle, cam_move, cam_speed;
float angle = 0.0;
void setup() {
size(800,600);
frameRate(60);
noCursor();
cam = new PVector(0, 100, -500);
cam_angle = new PVector(0, 0, 0);
cam_move = new PVector(0, 0, 0);
cam_speed = new PVector(50, 50, 50);
lines = loadStrings("UM2_SkullPile28mm.obj");
println("File loaded. Now scanning contents...");
println();
Pattern numbers = Pattern.compile("(-?\\d+)");
ArrayList<PVector> vertices_ = new ArrayList<PVector>();
ArrayList<ArrayList> faces_ = new ArrayList<ArrayList>();
int parsed_lines = 0;
for(String i:lines) {
switch(i.charAt(0)) {
// Find faces
case 'f':
ArrayList<Integer> values = new ArrayList<Integer>();
for(Matcher m = numbers.matcher(i); m.find(); values.add(Integer.parseInt(m.group())));
faces_.add(values);
break;
// Find Vectors
case 'v':
String s[] = i.trim().split("\\s+");
vertices_.add(new PVector(Float.parseFloat(s[1])*20, Float.parseFloat(s[2])*20, Float.parseFloat(s[3])*20));
break;
};
if(++parsed_lines % (lines.length/6) == 0 || parsed_lines == lines.length) println((int)(map(parsed_lines, 0, lines.length, 0, 100)), "%");
}
println();
println("Done. Found", vertices_.size(), "Vertices and", faces_.size(), "faces");
int i=0;
vertices = new PVector[faces_.size()][];
for(ArrayList<Integer> f_:faces_) {
vertices[i] = new PVector[f_.size()];
int j = 0;
for(int f: f_) {
PVector v = vertices_.get(f-1);
vertices[i][j] = Rotate3d_x(v, -90);
j++;
}
i++;
}
}
PVector Rotate2d(PVector p, float a) {
// a = angle
float[][] m2 = {
{cos(a), -sin(a)},
{sin(a), cos(a)}
};
float[][] rotated = matmul(m2, new float[][] {
{ p.x },
{ p.y }
});
return new PVector(rotated[0][0], rotated[1][0]);
}
PVector Rotate3d(PVector p, float[][] m2) {
float[][] rotated = matmul(m2, new float[][] {
{ p.x },
{ p.y },
{ p.z }
});
return new PVector(rotated[0][0], rotated[1][0], rotated[2][0]);
}
PVector Rotate3d_x(PVector p, float a) {
return Rotate3d(p,
new float[][] {
{1, 0, 0},
{0, cos(a), -sin(a)},
{0, sin(a), cos(a)}
});
};
PVector Rotate3d_y(PVector p, float a) {
return Rotate3d(p,
new float[][] {
{cos(a), 0, sin(a)},
{0, 1, 0},
{-sin(a), 0, cos(a)}
});
}
PVector Rotate3d_z(PVector p, float a) {
return Rotate3d(p,
new float[][] {
{cos(a), -sin(a), 0},
{sin(a), cos(a), 0},
{0, 0, 1}
});
}
PVector Rotate3d(PVector p, PVector a) {
return Rotate3d_z( Rotate3d_y(Rotate3d_x(p, a.x), a.y), a.z );
}
// Matrixmultiplikation
float[][] matmul(float[][] m1, float[][] m2) {
int cols_m1 = m1.length,
rows_m1 = m1[0].length;
int cols_m2 = m2.length,
rows_m2 = m2[0].length;
try {
if (rows_m1 != cols_m2) throw new Exception("Rows of m1 must match Columns of m2!");
}
catch(Exception e) {
println(e);
}
float[][] res = new float[cols_m2][rows_m2];
for (int c=0; c < cols_m1; c++) {
for (int r2=0; r2 < rows_m2; r2++) {
float sum = 0;
float[] buf = new float[rows_m1];
// Multiply rows of m1 with columns of m2 and store in buf
for (int r=0; r < rows_m1; r++) {
buf[r] = m1[c][r]* m2[r][r2];
}
// Add up all entries into sum
for (float entry : buf) {
sum += entry;
}
res[c][r2] = sum;
}
}
return res;
}
PVector applyPerspective(PVector p) {
PVector d = applyViewTransform(p);
return applyPerspectiveTransform(d);
}
PVector applyViewTransform(PVector p) {
// c = camera position
// co = camera orientation / camera rotation
PVector c = cam;
PVector co = cam_angle;
// dx, dy, dz https://en.wikipedia.org/wiki/3D_projection : Mathematical Formula
float[][] dxyz = matmul(
matmul(new float[][]{
{1, 0, 0},
{0, cos(co.x), sin(co.x)},
{0, -sin(co.x), cos(co.x)}
}, new float[][]{
{cos(co.y), 0, -sin(co.y)},
{0, 1, 0},
{sin(co.y), 0, cos(co.y)}
}),
matmul(new float[][]{
{cos(co.z), sin(co.z), 0},
{-sin(co.z), cos(co.z), 0},
{0, 0, 1}
}, new float[][]{
{p.x - c.x},
{p.y - c.y},
{p.z - c.z},
}));
PVector d = new PVector(dxyz[0][0], dxyz[1][0], dxyz[2][0]);
return d;
}
PVector applyPerspectiveTransform(PVector d) {
// e = displays surface pos relative to camera pinhole c
PVector e = new PVector(0, 0, 300);
return new PVector((e.z / d.z) * d.x + e.x, (e.z / d.z) * d.y + e.y);
}
void draw() {
background(255);
translate(width/2, height/2);
scale(1,-1);
noStroke();
fill(0, 100, 0, 50);
PVector[][] points_view = new PVector[vertices.length][];
for(int i=0; i < vertices.length; i++) {
points_view[i] = new PVector[vertices[i].length];
for(int j=0; j < vertices[i].length; j++)
points_view[i][j] = applyViewTransform(Rotate3d_y(vertices[i][j], angle));
}
// The following snippet I got from: https://stackoverflow.com/questions/74443149/3d-projection-axis-inversion-problem-java-processing?noredirect=1#comment131433616_74443149
float nearPlane = 1.0;
for (int c = 0; c < points_view.length; c++) {
beginShape();
for (int r = 0; r < points_view[c].length-1; r++) {
// Alle Punkte verbinden
//if (i == a) continue;
PVector p0 = points_view[c][r];
PVector p1 = points_view[c][r+1];
if(p0.z < nearPlane && p1.z < nearPlane){ continue; };
if(p0.z >= nearPlane && p1.z < nearPlane)
p1 = PVector.lerp(p0, p1, (p0.z - nearPlane) / (p0.z - p1.z));
if(p0.z < nearPlane && p1.z >= nearPlane)
p0 = PVector.lerp(p1, p0, (p1.z - nearPlane) / (p1.z - p0.z));
// project
p0 = applyPerspectiveTransform(p0);
p1 = applyPerspectiveTransform(p1);
vertex(p0.x, p0.y);
vertex(p1.x, p1.y);
}
endShape();
}
}
Ray tracing doesn't determine whether or not a polygon is visible. It determines what point (if any) on what polygon is visible in a given direction.
As a simplification: rasterisation works by taking a set of geometry and for each one determining what pixels it affects. Ray tracing works by taking a set of pixels and, for each one determining what geometry is visible along that direction.
With rasterisation, there are many ways of making sure that polygons don't draw in the wrong order. One approach is to sort them by distance to the camera, but that doesn't work with polygons that overlap. The usual approach is to use a z-buffer: when a polygon is rasterised, calculate the distance to the camera in each pixel, and only update the buffer if the new value is nearer to the camera than the old value.
With ray tracing, each ray returns the nearest hit location along a direction, along with what it hit. Since each pixel will only be visited once, you don't need to worry about triangles drawing on top of each other.
If you just want to project a piece of 3D geometry onto a plane, rasterisation will likely be much, much faster. At a very high level, do this:
create an RGBA buffer of size X*Y
create a z buffer of size X*Y and fill it with 'inf'
for each triangle:
project the triangle onto the projection plane
for each pixel the triangle might affect:
calculate distance from camera to the corresponding position on the triangle
if the distance is lower than the current value in the z buffer:
replace the value in the RGBA and z buffers with the new values
I have a Tetris project that needs to work with Camera. That is, there are two windows. One loads webcam (OpenCV) and is detected red triangle or square. Other window is the Tetris game(OpenGL) that each tile will come down one by one from top to bottom.
All functions are already written and everything works. Now I need to connect the two.
I write here part code of classes
In the class BoxLightTextRendererPP you can see the positioning of the game piece.
In the VideoProcessing class you can see the contour detection by the camera.
and in the InteractionHandler class I have to write a method, where e.g. with Switch-Case it is detected, if the camera sees a square, then move the token to the left and if a triangle is detected move it to the right.
How can I do this?
BoxLightTexRendererPP:
// Pointers (names) for data transfer and handling on GPU
private int[] vaoName; // Name of vertex array object
private int[] vboName; // Name of vertex buffer object
private int[] iboName; // Name of index buffer object
float[] barrey = verticies;
int block = 28;
//Startpunkt rechts/links
static float x = -1.5f;
//Startpunkt oben/unten
float h = 1.5f;
//Startpunkt vorne/hinten
float y = 0;
//Fallgeschwindigkeit
float fall = 0.01f;
boolean start = true;
public static boolean go = false;
boolean stay = false;
float[] barrey1 = verticies;
int block1 = 28;
VideoProcessing:
for (int idx = 0; idx < contours.size(); idx++) {
MatOfPoint2f approx = new MatOfPoint2f(); //approx parameter count contours of objects; important for interaction handling
//allows the approximation of polygons and determine scope of object
Imgproc.approxPolyDP(newContours.get(idx), approx, Imgproc.arcLength(newContours.get(idx), true) * 0.02, true);
long count = approx.total();
//filtering small blobs
if(Math.abs(Imgproc.contourArea(contours.get(idx))) > 1000) {
//draw contours on objects
if (count == 5) {
Imgproc.drawContours(frame, contours, idx, new Scalar(75, 0, 0));
}
if (count == 6) {
Imgproc.drawContours(frame, contours, idx, new Scalar(255, 255, 255));
}
if (count == 4) {
Imgproc.drawContours(frame, contours, idx, new Scalar(200, 0, 0));
viereck = (int) count;
}
if (count == 3) {
Imgproc.drawContours(frame, contours, idx, new Scalar(360, 100, 50));
dreieck = (int) count;
}
}
}
InteractionHandler:
public void connection() {
float xAchse = BoxLightTexRendererPP.x;
int viereck = VideoProcessing.viereck;
int dreieck = VideoProcessing.dreieck;
Switch(xAchse)
??????????????
}
So I found the answer: InteractionHandler is not important.
Just modified Video Processing:
(There is still a collision detection problem. I am still working on that.)
for (int idx = 0; idx < contours.size(); idx++) {
MatOfPoint2f approx = new MatOfPoint2f(); //approx parameter count contours of objects; important for interaction handling
//allows the approximation of polygons and determine scope of object
Imgproc.approxPolyDP(newContours.get(idx), approx, Imgproc.arcLength(newContours.get(idx), true) * 0.02, true);
long count = approx.total();
//filtering small blobs
if(Math.abs(Imgproc.contourArea(contours.get(idx))) > 1000) {
//draw contours on objects
if (count == 5) {
Imgproc.drawContours(frame, contours, idx, new Scalar(75, 0, 0));
BoxLightTexMainWindowPP.errorLog.setText("Fünfeck erkannt");
BoxLightTexRendererPP.x -= 0.1;
}
if (count == 6) {
Imgproc.drawContours(frame, contours, idx, new Scalar(255, 255, 255));
BoxLightTexMainWindowPP.errorLog.setText("Sechseck erkannt");
BoxLightTexRendererPP.y -= 0.1;
}
if (count == 4) {
Imgproc.drawContours(frame, contours, idx, new Scalar(200, 0, 0));
BoxLightTexMainWindowPP.errorLog.setText("Viereck erkannt");
BoxLightTexRendererPP.x += 0.1;
}
if (count == 3) {
Imgproc.drawContours(frame, contours, idx, new Scalar(360, 100, 50));
BoxLightTexMainWindowPP.errorLog.setText("Dreieck erkannt");
BoxLightTexRendererPP.y += 0.1;
}
}
}
I am trying to create a program that will recognize the lottery numbers automaticly.
I have recognized the draw moment, seperated the ball, and now my problem is that i cannot recognize the number on the ball.
This is the original picture:
This is my picture after i find the contours:
Now for each contour I try to determine if its a number and what number is it. This is where my app fails.
*Important to say that the ball can be in many angles/the lighting can be differnet , which all afect the quality of the pic.
This is an example of a contour img my prog found:
This is my code for recognizing the number:
private void identifyNumber(Mat inFile) {
System.out.println("\nRunning identifyNumber");
System.out.println("-------------------------");
int match_method = Imgproc.TM_SQDIFF;
Mat img = inFile;
Mat bestImage = new Mat(), rotImg;
int bestDegree = 0, bestNumber = 0;
double lowerstFornumber, lowest = 1E30;
String templateNumber;
for (int k=0 ; k<=9; k++) {
lowerstFornumber = 1E30;
for(int i=-90; i<=90; i=i+5){
templateNumber = "C:\\pics\\drawProcessing\\numbers\\" + k + ".png";
Mat templ = Highgui.imread(templateNumber);
rotImg = rotateImage(img, i);
int result_cols = rotImg.cols() - templ.cols() + 1;
int result_rows = rotImg.rows() - templ.rows() + 1;
Mat result = new Mat(result_rows, result_cols, CvType.CV_32FC1);
Imgproc.matchTemplate(rotImg, templ, result, match_method);
MinMaxLocResult mmr = Core.minMaxLoc(result);
Point matchLoc;
if (match_method == Imgproc.TM_SQDIFF || match_method == Imgproc.TM_SQDIFF_NORMED) {
matchLoc = mmr.minLoc;
} else {
matchLoc = mmr.maxLoc;
}
double minValue = mmr.minVal;
// System.out.println(i+",maxVal:" +maxValue);
if(lowerstFornumber > minValue){
lowerstFornumber = minValue;
}
if(lowest > minValue){
lowest = minValue;
bestImage = rotImg;
bestDegree = i;
bestNumber = arr[k];
}
}
System.out.println("lowerstFornumber " + arr[k] + " :" + lowerstFornumber);
}
System.out.println("bestDegree:" + bestDegree);
System.out.println("bestNumber:" + bestNumber);
System.out.println("_lowest:" + lowest);
Highgui.imwrite("C:\\pics\\drawProcessing\\out-best.jpg", bestImage);
}
Sometimes it finds the number, Sometimes not.
Is it even possible?(I need 100% accuracy)
Am I doning it wrong?
What if you try an affine invariant descriptor for your boxes? You can even start with an easier descriptor, say sift or surf, computed for every region and matched to a database. It should be fast because it looks like scale will not be changing. Sift and surf might give you some results but for something more stable you can use ASIFT.
Not in java, but it describes the idea:
#include <iostream>
#include <vector>
#include <string>
#include <fstream>
#include <opencv2/opencv.hpp>
using namespace cv;
using namespace std;
//----------------------------------------------------------------------
//
//----------------------------------------------------------------------
void DetectContour(Mat& img, Mat& res)
{
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
Mat edges=img.clone();
//Canny(img, edges, 50, 190, 3);
img.copyTo(edges);
findContours(edges,contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_NONE, Point());
if(contours.size()>0)
{
for( int i = 0; i < contours.size(); i++ )
{
vector<Point> approx;
approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true);
double area = contourArea(Mat(approx));
if(area>200)
drawContours( res, contours, i, Scalar(255,0,0), CV_FILLED, 8);
}
}
}
//----------------------------------------------------------------------
//
//----------------------------------------------------------------------
int main(int argc, char **argv)
{
cv::namedWindow("result");
Mat img=imread("ball.png");
// Prepare mask
Mat mask=Mat::zeros(img.size(),CV_8UC1);
Mat img_gray;
cv::cvtColor(img,img_gray,cv::COLOR_BGR2GRAY);
Mat res=Mat(img.size(),CV_8UC1);
res=255;
vector<Vec3f> circles;
/// Apply the Hough Transform to find the circles
HoughCircles( img_gray, circles, cv::HOUGH_GRADIENT, 1, img_gray.rows/8, 140, 70, 0,0 );
/// Draw the circles detected
for( size_t i = 0; i < circles.size(); i++ )
{
Point center(cvRound(circles[i][0]), cvRound(circles[i][1]));
int radius = cvRound(circles[i][2]);
// circle outline
circle( mask, center, radius, Scalar(255,255,255), -1, 8, 0 );
}
img.copyTo(res,mask);
cv::cvtColor(res,res,cv::COLOR_BGR2GRAY);
threshold(res,res,80,255,cv::THRESH_BINARY_INV);
mask=0;
DetectContour(res,mask);
mask.copyTo(res);
int element_size=10;
Mat element = getStructuringElement( cv::MORPH_ELLIPSE,Size( 2*element_size + 1, 2*element_size+1 ),Point( element_size, element_size ) );
int element_size2=5;
Mat element2 = getStructuringElement( cv::MORPH_ELLIPSE,Size( 2*element_size + 1, 2*element_size+1 ),Point( element_size, element_size ) );
cv::dilate(res,res,element2);
cv::erode(res,res,element);
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
findContours(res,contours, hierarchy, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_NONE, Point());
for (int i=0;i<contours.size();++i)
{
RotatedRect box = minAreaRect(contours[i]);
Point2f center, vtx[4];
box.points(vtx);
float w=100;
float h=100;
// Create a column vector with the coordinates of each point (on the field plane)
cv::Mat xField;
xField.create(4, 1, CV_32FC2);
xField.at<Point2f>(0) = ( vtx[0] );
xField.at<Point2f>(1) = ( vtx[1] );
xField.at<Point2f>(2) = ( vtx[2] );
xField.at<Point2f>(3) = ( vtx[3] );
// same thing for xImage but with the pixel coordinates instead of the field coordinates, same order as in xField
cv::Mat xImage;
xImage.create(4, 1, CV_32FC2);
xImage.at<Point2f>(0) = ( cv::Point2f(0, 0) );
xImage.at<Point2f>(1) = ( cv::Point2f(w, 0) );
xImage.at<Point2f>(2) = ( cv::Point2f(w, h) );
xImage.at<Point2f>(3) = ( cv::Point2f(0, h) );
// Compute the homography matrix
cv::Mat H = cv::findHomography(xField,xImage );
xField.release();
xImage.release();
Mat warped;
warpPerspective(img,warped,H,Size(w,h));
H.release();
char win_name[255];
sprintf(win_name,"number_image %d",i);
namedWindow(win_name);
imshow(win_name,warped);
// cv::waitKey(0);
for(int j = 0; j < 4; j++ )
{
line(img, vtx[j], vtx[(j+1)%4], Scalar(0, 255, 0), 1, LINE_AA);
}
}
imshow("result",img);
cv::waitKey(0);
cv::destroyAllWindows();
}
I'm trying to automate a process where someone manually converts a code to a digital one.
Then I started reading about OCR. So I installed tesseract OCR and tried it on some images. It doesn't even detect something close to the code.
I figured after reading some questions on stackoverflow, that the images need some preprocessing like skewing the image to a horizontal one, which can been done by openCV for example.
Now my questions are:
What kind of preprocessing or other methods should be used in a case like the above image?
Secondly, can I rely on the output? Will it always work in cases like the above image?
I hope someone can help me!
I have decided to capture the whole card instead of the code only. By capturing the whole card it is possible to transform it to a plain perspective and then I could easily get the "code" region.
Also I learned a lot of things. Especially regarding speed. This function is slow on high resolution images. It can take up to 10 seconds with a size of 3264 x 1836.
What I did to speed things up, is re-sizing the input matrix by a factor of 1 / 4. Which makes it 4^2 times faster and gave me a minimal lose of precision. The next step is scaling the quadrangle which we found back to the normal size. So that we can transform the quadrangle to a plain perspective using the original source.
The code I created for detecting the largest area is heavily based on code I found on stackoverflow. Unfortunately they didn't work as expected for me, so I combined more code snippets and modified a lot.
This is what I got:
private static double angle(Point p1, Point p2, Point p0 ) {
double dx1 = p1.x - p0.x;
double dy1 = p1.y - p0.y;
double dx2 = p2.x - p0.x;
double dy2 = p2.y - p0.y;
return (dx1 * dx2 + dy1 * dy2) / Math.sqrt((dx1 * dx1 + dy1 * dy1) * (dx2 * dx2 + dy2 * dy2) + 1e-10);
}
private static MatOfPoint find(Mat src) throws Exception {
Mat blurred = src.clone();
Imgproc.medianBlur(src, blurred, 9);
Mat gray0 = new Mat(blurred.size(), CvType.CV_8U), gray = new Mat();
List<MatOfPoint> contours = new ArrayList<>();
List<Mat> blurredChannel = new ArrayList<>();
blurredChannel.add(blurred);
List<Mat> gray0Channel = new ArrayList<>();
gray0Channel.add(gray0);
MatOfPoint2f approxCurve;
double maxArea = 0;
int maxId = -1;
for (int c = 0; c < 3; c++) {
int ch[] = {c, 0};
Core.mixChannels(blurredChannel, gray0Channel, new MatOfInt(ch));
int thresholdLevel = 1;
for (int t = 0; t < thresholdLevel; t++) {
if (t == 0) {
Imgproc.Canny(gray0, gray, 10, 20, 3, true); // true ?
Imgproc.dilate(gray, gray, new Mat(), new Point(-1, -1), 1); // 1 ?
} else {
Imgproc.adaptiveThreshold(gray0, gray, thresholdLevel, Imgproc.ADAPTIVE_THRESH_GAUSSIAN_C, Imgproc.THRESH_BINARY, (src.width() + src.height()) / 200, t);
}
Imgproc.findContours(gray, contours, new Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE);
for (MatOfPoint contour : contours) {
MatOfPoint2f temp = new MatOfPoint2f(contour.toArray());
double area = Imgproc.contourArea(contour);
approxCurve = new MatOfPoint2f();
Imgproc.approxPolyDP(temp, approxCurve, Imgproc.arcLength(temp, true) * 0.02, true);
if (approxCurve.total() == 4 && area >= maxArea) {
double maxCosine = 0;
List<Point> curves = approxCurve.toList();
for (int j = 2; j < 5; j++)
{
double cosine = Math.abs(angle(curves.get(j % 4), curves.get(j - 2), curves.get(j - 1)));
maxCosine = Math.max(maxCosine, cosine);
}
if (maxCosine < 0.3) {
maxArea = area;
maxId = contours.indexOf(contour);
//contours.set(maxId, getHull(contour));
}
}
}
}
}
if (maxId >= 0) {
return contours.get(maxId);
//Imgproc.drawContours(src, contours, maxId, new Scalar(255, 0, 0, .8), 8);
}
return null;
}
You can call it like so:
MathOfPoint contour = find(src);
See this answer for quadrangle detection from a contour and transforming it to a plain perspective:
Java OpenCV deskewing a contour
I went through many questions in StackOverflow and able to develop small program to detect squares and rectangles correctly. This is my sample code
public static CvSeq findSquares(final IplImage src, CvMemStorage storage) {
CvSeq squares = new CvContour();
squares = cvCreateSeq(0, sizeof(CvContour.class), sizeof(CvSeq.class), storage);
IplImage pyr = null, timg = null, gray = null, tgray;
timg = cvCloneImage(src);
CvSize sz = cvSize(src.width(), src.height());
tgray = cvCreateImage(sz, src.depth(), 1);
gray = cvCreateImage(sz, src.depth(), 1);
// cvCvtColor(gray, src, 1);
pyr = cvCreateImage(cvSize(sz.width() / 2, sz.height() / 2), src.depth(), src.nChannels());
// down-scale and upscale the image to filter out the noise
// cvPyrDown(timg, pyr, CV_GAUSSIAN_5x5);
// cvPyrUp(pyr, timg, CV_GAUSSIAN_5x5);
// cvSaveImage("ha.jpg",timg);
CvSeq contours = new CvContour();
// request closing of the application when the image window is closed
// show image on window
// find squares in every color plane of the image
for (int c = 0; c < 3; c++) {
IplImage channels[] = { cvCreateImage(sz, 8, 1), cvCreateImage(sz, 8, 1), cvCreateImage(sz, 8, 1) };
channels[c] = cvCreateImage(sz, 8, 1);
if (src.nChannels() > 1) {
cvSplit(timg, channels[0], channels[1], channels[2], null);
} else {
tgray = cvCloneImage(timg);
}
tgray = channels[c];
// // try several threshold levels
for (int l = 0; l < N; l++) {
// hack: use Canny instead of zero threshold level.
// Canny helps to catch squares with gradient shading
if (l == 0) {
// apply Canny. Take the upper threshold from slider
// and set the lower to 0 (which forces edges merging)
cvCanny(tgray, gray, 0, thresh, 5);
// dilate canny output to remove potential
// // holes between edge segments
cvDilate(gray, gray, null, 1);
} else {
// apply threshold if l!=0:
cvThreshold(tgray, gray, (l + 1) * 255 / N, 255,
CV_THRESH_BINARY);
}
// find contours and store them all as a list
cvFindContours(gray, storage, contours, sizeof(CvContour.class), CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
CvSeq approx;
// test each contour
while (contours != null && !contours.isNull()) {
if (contours.elem_size() > 0) {
approx = cvApproxPoly(contours, Loader.sizeof(CvContour.class), storage, CV_POLY_APPROX_DP, cvContourPerimeter(contours) * 0.02, 0);
if (approx.total() == 4 && Math.abs(cvContourArea(approx, CV_WHOLE_SEQ, 0)) > 1000 && cvCheckContourConvexity(approx) != 0) {
double maxCosine = 0;
for (int j = 2; j < 5; j++) {
// find the maximum cosine of the angle between
// joint edges
double cosine = Math.abs(angle(
new CvPoint(cvGetSeqElem(
approx, j % 4)),
new CvPoint(cvGetSeqElem(
approx, j - 2)),
new CvPoint(cvGetSeqElem(
approx, j - 1))));
maxCosine = Math.max(maxCosine, cosine);
}
if (maxCosine < 0.2) {
CvRect x = cvBoundingRect(approx, l);
if ((x.width() * x.height()) < 50000) {
System.out.println("Width : " + x.width()
+ " Height : " + x.height());
cvSeqPush(squares, approx);
}
}
}
}
contours = contours.h_next();
}
contours = new CvContour();
}
}
return squares;
}
I use this image to detect rectangles and squares
I need to identify the following output
and
But when I run the above code, it detects only the following rectangles. But I don't know the reason for that. Please can someone explain the reason for that.
This is the output that I got.
Please be kind enough to explain the problem in above code and give some suggensions to detect this squares and rectangles.
Given a mask image (binary image, like your second figure), cvFindContours() gives you the contours (several list of points).
look at this link: http://dasl.mem.drexel.edu/~noahKuntz/openCVTut7.html