I am making an OCR for my project and stuck on a point, Right now i am performing segmentation on the basis of contours its working fine with few images but there few more where it fails even when the image quality is good, I would appreciate if someone suggest me more accurate way, and if someone provide a code example, here is my current code.
public static void imageBinarization(IplImage src, IplImage dst){
IplImage r, g, b, s;
r = cvCreateImage(cvGetSize(src), IPL_DEPTH_8U, 1);
g = cvCreateImage(cvGetSize(src), IPL_DEPTH_8U, 1);
b = cvCreateImage(cvGetSize(src), IPL_DEPTH_8U, 1);
cvSplit(src, r, g, b, null);
s = cvCreateImage(cvGetSize(src), IPL_DEPTH_8U, 1);
cvAddWeighted(r, 1./3., g, 1./3., 0.0, s);
cvAddWeighted(s, 2./3., b, 1./3., 0.0, s);
cvThreshold(s, dst, 100, 100, CV_THRESH_BINARY_INV);
cvReleaseImage(r);
cvReleaseImage(g);
cvReleaseImage(b);
cvReleaseImage(s);
}
public static void imageSegmentation(String sourcePath, String targetPath){
cvConvert(t0, mat0);
cvConvert(t8, mat8);
cvConvert(t9, mat9);
IplImage image = cvLoadImage(sourcePath);
IplImage grayImage = cvCreateImage(cvGetSize(image), IPL_DEPTH_8U, 1);
//cvSmooth(image, image, CV_BLUR_NO_SCALE, 2);
//cvSmooth(image, image, CV_BLUR, 9, 9, 2, 2);
//cvSmooth(image, image, CV_GAUSSIAN, 3);
imageBinarization(image, grayImage);
CvMemStorage mem;
CvSeq contours = new CvSeq();
CvSeq ptr = new CvSeq();
mem = cvCreateMemStorage(0);
CvRect rect = null;
int px1,px2, py1, py2;
CvScalar blue = CV_RGB(0, 0, 250);
int n = 0; int i = 0;
cvFindContours(grayImage, mem, contours, sizeof(CvContour.class) , CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, cvPoint(0,0));
Random rand = new Random();
for (ptr = contours; ptr != null; ptr = ptr.h_next()) {
Color randomColor = new Color(rand.nextFloat(), rand.nextFloat(), rand.nextFloat());
CvScalar color = CV_RGB( randomColor.getRed(), randomColor.getGreen(), randomColor.getBlue());
rect = cvBoundingRect(ptr, n);//new CvRect(cvGetSeqElem(c, c.total()));
px1 = rect.x(); py1 = rect.y(); px2 = (rect.x() + rect.width()); py2 = (rect.y() + rect.height());
cvRectangle(image, cvPoint(px1, py1), cvPoint(px2, py2), blue, 1, CV_AA, 0);
//----
xbox = rect.x(); ybox = rect.y(); wbox = rect.width(); hbox = rect.height();
img = cvCreateImage(cvSize(wbox, hbox), image.depth(), image.nChannels());
cvSetImageROI(image, cvRect(xbox, ybox, wbox, hbox));
cvCopy(image, img);
cvResetImageROI(image);
//cvSaveImage(targetPath+i+".jpg", img);
i++;
//---
//cvDrawContours(image, ptr, color, CV_RGB(0,0,0), -1, CV_FILLED, 8, cvPoint(0,0));
}
cvSaveImage(targetPath+"mat.jpg", image);
}
Try to use some Global Thresholding algorithm such as Otsu. But JavaCV haven't implemented that. So try to find the Otsu threshold level using histogram processing and apply that value to
cvThreshold(s, dst, 100, 100, CV_THRESH_BINARY_INV);
Related
picture of processed image + original
I'm working on a project where I'm taking Smash Bros. game output, taking a screenshot, processing it in order to detect the percents which the characters are sitting at.
The program I wrote is detecting the 57 as a 55 and the 11 (which I let settle to it's normal position) as a 51. And while the gameplay is running, the numbers will jump around.
The program I wrote uses Tess4J, and I've configured everything right. I have trained Tesseract with my own custom font that I made using the games percentage numbers. I've tried multiple different fonts as well. What will make it more accurate!?
I've thought about instead of calculating percents, instead just detecting when they're damaged, but I'm also figuring that out.
This is the code I use to process images:
public static Mat blur(Mat input, int numberOfTimes){
Mat sourceImage = new Mat();
Mat destImage = input.clone();
for(int i=0;i<numberOfTimes;i++){
sourceImage = destImage.clone();
Imgproc.blur(sourceImage, destImage, new Size(3.0, 3.0));
}
return destImage;
}
public static BufferedImage purify(BufferedImage image) {
BufferedImage image2 = ImageHelper.convertImageToGrayscale(image);
Mat mat = BufferedImage2Mat(image2, -1);
Mat resizedMat = new Mat();
double width = mat.cols();
double height = mat.rows();
double aspect = width / height;
Size sz = new Size(width * aspect * 1.4, height * aspect * 1.4);
Imgproc.resize(mat, resizedMat, sz);
double thresh = Imgproc.threshold(resizedMat, resizedMat, 23, 255, Imgproc.THRESH_BINARY_INV);
Mat kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(3, 3));
Imgproc.dilate(resizedMat, resizedMat, kernel, new Point(0, 0), 9);
return toBufferedImage(HighGui.toBufferedImage(blur(resizedMat, 0)));
}
public static BufferedImage toBufferedImage(Image img)
{
if (img instanceof BufferedImage)
{
return (BufferedImage) img;
}
BufferedImage bimage = new BufferedImage(img.getWidth(null), img.getHeight(null), BufferedImage.TYPE_INT_ARGB);
Graphics2D bGr = bimage.createGraphics();
bGr.drawImage(img, 0, 0, null);
bGr.dispose();
return bimage;
}
public static Image denoise(BufferedImage img) {
Mat image = BufferedImage2Mat(img, 0);
Mat out = new Mat();
Mat tmp = new Mat();
Mat kernel = new Mat(new Size(3, 3), CvType.CV_8UC1, new Scalar(255));
Imgproc.morphologyEx(image, tmp, Imgproc.MORPH_OPEN, kernel);
Imgproc.morphologyEx(tmp, out, Imgproc.MORPH_CLOSE, kernel);
return HighGui.toBufferedImage(out);
}
public static Mat BufferedImage2Mat(BufferedImage image, int filter) {
try {
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
ImageIO.write(image, "jpg", byteArrayOutputStream);
byteArrayOutputStream.flush();
return Imgcodecs.imdecode(new MatOfByte(byteArrayOutputStream.toByteArray()), filter);
} catch (IOException e) {
return null;
}
}
public static Image clean(BufferedImage image) {
Mat og = BufferedImage2Mat(image, Imgcodecs.IMREAD_UNCHANGED);
Mat im = BufferedImage2Mat(image, 0);
Mat bw = new Mat(im.size(), CvType.CV_8U);
Imgproc.threshold(im, bw, 0, 255, Imgproc.THRESH_BINARY_INV | Imgproc.THRESH_OTSU);
Mat dist = new Mat(im.size(), CvType.CV_32F);
Imgproc.distanceTransform(bw, dist, Imgproc.CV_DIST_L2, Imgproc.CV_DIST_MASK_PRECISE);
Mat dibw32f = new Mat(im.size(), CvType.CV_32F);
final double SWTHRESH = 8.0; // stroke width threshold
Imgproc.threshold(dist, dibw32f, SWTHRESH/2.0, 255, Imgproc.THRESH_BINARY);
Mat dibw8u = new Mat(im.size(), CvType.CV_8U);
dibw32f.convertTo(dibw8u, CvType.CV_8U);
Mat kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(3, 3));
Mat cont = new Mat(im.size(), CvType.CV_8U);
Imgproc.morphologyEx(dibw8u, cont, Imgproc.MORPH_OPEN, kernel);
final double HTHRESH = im.rows() * 0.5;
List<MatOfPoint> contours = new ArrayList<MatOfPoint>();
List<Point> digits = new ArrayList<Point>();
Mat hierchy = new Mat();
Imgproc.findContours(cont, contours, hierchy, Imgproc.RETR_CCOMP, Imgproc.CHAIN_APPROX_SIMPLE, new Point(0, 0));
List<Mat>cleanedMatList = new ArrayList<Mat>();
int c = 0;
for (int i = 0; i >= hierchy.cols(); i++) {
Rect rect = Imgproc.boundingRect(contours.get(i));
if (rect.height > HTHRESH) {
Mat binary = new Mat();
Imgproc.rectangle(binary, new Point(rect.x, rect.y), new Point(rect.x + rect.width - 1, rect.y + rect.height - 1), new Scalar(0, 0, 255), 3);
cleanedMatList.add(c, binary);
c++;
}
}
List<MatOfInt> digitsHull = new ArrayList<MatOfInt>();
for(int i=0; i < contours.size(); i++){
digitsHull.add(new MatOfInt());
}
for(int i=0; i < contours.size(); i++){
Imgproc.convexHull(contours.get(i), digitsHull.get(i));
}
List<MatOfPoint> digitRegions = new ArrayList<MatOfPoint>();
for (int i = 0; i< digitRegions.size(); i++) {
MatOfPoint dr = digitRegions.get(i);
dr.push_back(digitsHull.get(i));
}
Mat digitsMask = new Mat(og.rows(),og.cols(), CvType.CV_8U);
Imgproc.drawContours(digitsMask, digitRegions, 0, new Scalar(255, 255, 255), -1);
Imgproc.morphologyEx(digitsMask, digitsMask, Imgproc.MORPH_DILATE, kernel);
Mat cleaned = new Mat(og.rows(), og.cols(), CvType.CV_8U);
dibw8u.copyTo(cleaned, digitsMask);
return HighGui.toBufferedImage(dibw8u);
}
I have 4 points. For example...
A(1;1)
B(2;5)
C(4;4)
D(3;2)
How I can change RGB parameters in this rectangle (for all pixels)?
Like this:
double[] data = mat.get(x, y);
data[0] = data[0]+30;
data[1] = data[1]+20;
data[2] = data[2]+10;
mat.put(x, y, data);
Try something like that for implementing approach described in Dan MaĊĦek comment:
...
Bitmap sourceBitmap = BitmapFactory.decodeResource(getResources(), R.drawable.<your_image>);
Mat sourceMat = new Mat(sourceBitmap.getWidth(), sourceBitmap.getHeight(), CvType.CV_8UC3);
Utils.bitmapToMat(sourceBitmap, sourceMat);
Mat maskMat = new Mat(sourceBitmap.getWidth(), sourceBitmap.getHeight(), CvType.CV_8UC4);
Mat resultMat = new Mat(sourceBitmap.getWidth(), sourceBitmap.getHeight(), CvType.CV_8UC4);
// create color, which added to sourceMat region (+100 - for red channel)
Scalar color = new Scalar(100, 0, 0, 255);
// or you can try Scalar color = new Scalar(10, 20, 30); as in your question
Point[] region = new Point[4];
// your coords multiplied by 50 for visualization convenience
region[0] = new Point(50, 50);
region[1] = new Point(100, 250);
region[2] = new Point(200, 200);
region[3] = new Point(150, 100);
List<MatOfPoint> contours = new ArrayList();
MatOfPoint regionMat = new MatOfPoint(region);
contours.add(regionMat);
// create mask
Imgproc.drawContours(maskMat, contours, 0, color, -1);
// apply mask to source
Core.add(maskMat, sourceMat, resultMat);
// just for visualisation
Bitmap bitmap = Bitmap.createBitmap(sourceMat.cols(), sourceMat.rows(), Bitmap.Config.ARGB_8888);
Utils.matToBitmap(resultMat, bitmap);
<your_ImageView>.setImageBitmap(bitmap);
...
NB! This is just example of masking, not optimized.
Edited answer due to comment below:
This is OpenCV C++ code but you can easily port it to JAVA. Also my code assumes the points always represent a rectangle.
// read image
Mat image=imread("image.jpg",-1);
// region of interest, shape=rectangle
Point p1(50,50), p2(100,80);
Rect roi(p1.x,p1.y,p2.x,p2.y);
// vector hold channels
std::vector<Mat> channels(3);
// split original image to bgr channels
cv::split(image, channels);
// Mat to hold ROI
Mat extractedRoi;
//For channel B
extractedRoi = channels.at(0)(roi);
extractedRoi += 30;
//For channel G
extractedRoi = channels.at(1)(roi);
extractedRoi += 20;
//For channel R
extractedRoi = channels.at(2)(roi);
extractedRoi += 10;
// merge channels back together
cv::merge(channels, image);
Edit2: A faster approach.
Mat image=imread("/home/haseebullah/Pictures/S1.jpg",-1);
Point p1(50,50), p2(100,80);
Rect roi(p1.x,p1.y,p2.x,p2.y);
Mat extractedRoi;
extractedRoi = image(roi);
Scalar constants(30,20,10);
extractedRoi += constants
I read a tutorial to give the correction of skew angle on this site. But I don't understand how to convert those code to Java.
std::vector<cv::Point> points;
cv::Mat_<uchar>::iterator it = img.begin<uchar>();
cv::Mat_<uchar>::iterator end = img.end<uchar>();
for (; it != end; ++it)
if (*it) //what is the meaning of this code (1)
points.push_back(it.pos()); //what is the meaning of this code (2)
Please help me understand this code.
For OpenCV 3.2.0 Here is full translation of deskew in Java from C++ at https://felix.abecassis.me/2011/10/opencv-bounding-box-skew-angle/ (with little modifications):
public Mat deskew(Mat src, double angle) {
Point center = new Point(src.width()/2, src.height()/2);
Mat rotImage = Imgproc.getRotationMatrix2D(center, angle, 1.0);
//1.0 means 100 % scale
Size size = new Size(src.width(), src.height());
Imgproc.warpAffine(src, src, rotImage, size, Imgproc.INTER_LINEAR + Imgproc.CV_WARP_FILL_OUTLIERS);
return src;
}
public void computeSkew( String inFile ) {
//Load this image in grayscale
Mat img = Imgcodecs.imread( inFile, Imgcodecs.IMREAD_GRAYSCALE );
//Binarize it
//Use adaptive threshold if necessary
//Imgproc.adaptiveThreshold(img, img, 255, ADAPTIVE_THRESH_MEAN_C, THRESH_BINARY, 15, 40);
Imgproc.threshold( img, img, 200, 255, THRESH_BINARY );
//Invert the colors (because objects are represented as white pixels, and the background is represented by black pixels)
Core.bitwise_not( img, img );
Mat element = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(3, 3));
//We can now perform our erosion, we must declare our rectangle-shaped structuring element and call the erode function
Imgproc.erode(img, img, element);
//Find all white pixels
Mat wLocMat = Mat.zeros(img.size(),img.type());
Core.findNonZero(img, wLocMat);
//Create an empty Mat and pass it to the function
MatOfPoint matOfPoint = new MatOfPoint( wLocMat );
//Translate MatOfPoint to MatOfPoint2f in order to user at a next step
MatOfPoint2f mat2f = new MatOfPoint2f();
matOfPoint.convertTo(mat2f, CvType.CV_32FC2);
//Get rotated rect of white pixels
RotatedRect rotatedRect = Imgproc.minAreaRect( mat2f );
Point[] vertices = new Point[4];
rotatedRect.points(vertices);
List<MatOfPoint> boxContours = new ArrayList<>();
boxContours.add(new MatOfPoint(vertices));
Imgproc.drawContours( img, boxContours, 0, new Scalar(128, 128, 128), -1);
double resultAngle = rotatedRect.angle;
if (rotatedRect.size.width > rotatedRect.size.height)
{
rotatedRect.angle += 90.f;
}
//Or
//rotatedRect.angle = rotatedRect.angle < -45 ? rotatedRect.angle + 90.f : rotatedRect.angle;
Mat result = deskew( Imgcodecs.imread( inFile ), rotatedRect.angle );
Imgcodecs.imwrite( outputFile, result );
}
import org.opencv.core.*;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
public class ValidateDocumentAlignment {
public ValidateDocumentAlignment() {
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
}
public boolean isDocumentTiltAngleWithinThresholdLimit(File scannedDoc, int thresholdAngle) {
int kernelSize = 3;
int cannyLowerThreshold = 25;
int cannyUpperThreshold = 50;
Mat image = new Mat();
Mat blur = new Mat();
Mat edged = new Mat();
Mat dilate = new Mat();
Mat erode = new Mat();
int maxValIdx = 0;
double area = 0;
List<MatOfPoint> contours = new ArrayList<>();
Mat sourceImage = Imgcodecs.imread(scannedDoc.getPath(), Imgcodecs.IMREAD_GRAYSCALE);
Imgproc.adaptiveThreshold(sourceImage, sourceImage, 255, Imgproc.ADAPTIVE_THRESH_MEAN_C, Imgproc.THRESH_BINARY, 15, 40);
Core.bitwise_not(sourceImage, sourceImage);
Mat kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(kernelSize, kernelSize));
Imgproc.morphologyEx(sourceImage, image, Imgproc.MORPH_CLOSE, kernel);
Imgproc.GaussianBlur(image, blur, new Size(7, 7), 0);
Imgproc.Canny(blur, edged, cannyLowerThreshold, cannyUpperThreshold);
Imgproc.dilate(edged, dilate, kernel, new Point(-1, -1), 6);
Imgproc.erode(dilate, erode, kernel, new Point(-1, -1), 3);
Imgproc.findContours(erode, contours, new Mat(), Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE);
for (int contourIdx = 0; contourIdx < contours.size(); contourIdx++) {
Rect rect = Imgproc.boundingRect(contours.get(contourIdx));
if ((rect.height * rect.width) > area) {
area = rect.height * rect.width;
maxValIdx = contourIdx;
}
}
RotatedRect rotatedRect = Imgproc.minAreaRect(new MatOfPoint2f(contours.get(maxValIdx).toArray()));
double skewAngle = rotatedRect.angle;
int acuteAngle = (int) (skewAngle % 90);
boolean isProperlyAligned = true;
if (Math.abs(acuteAngle) > thresholdAngle && Math.abs(acuteAngle) < (90 - thresholdAngle)) {
isProperlyAligned = false;
}
return isProperlyAligned;
}
}
private fun main(){
val bmp:Bitmap? = null //Any bitmap (if you are working with bitmap)
var mRgba = Mat() // else you can direct use MAT on onCameraFrame
val mGray = Mat()
val bmp32: Bitmap = bmp.copy(Bitmap.Config.ARGB_8888, true)
Utils.bitmapToMat(bmp32, mRgba)
Imgproc.cvtColor(mRgba, mGray, Imgproc.COLOR_BGR2GRAY)
mRgba = makeOrientationCorrection(mRgba,mGray)// here actual magic starts
Imgproc.cvtColor(mRgba, mGray, Imgproc.COLOR_BGR2GRAY)
val bmpOutX = Bitmap.createBitmap(
mRgba.cols(),
mRgba.rows(),
Bitmap.Config.ARGB_8888
)
Utils.matToBitmap(mRgba, bmpOutX)
binding.imagePreview.setImageBitmap(bmpOutX!!)
}
private fun makeOrientationCorrection(mRGBA:Mat, mGRAY:Mat):Mat{
val dst = Mat()
val cdst = Mat()
val cdstP: Mat
Imgproc.Canny(mGRAY, dst, 50.0, 200.0, 3, false)
Imgproc.cvtColor(dst, cdst, Imgproc.COLOR_GRAY2BGR)
cdstP = cdst.clone()
val linesP = Mat()
Imgproc.HoughLinesP(dst, linesP, 1.0, Math.PI/180, 50, 50.0, 10.0)
var biggestLineX1 = 0.0
var biggestLineY1 = 0.0
var biggestLineX2 = 0.0
var biggestLineY2 = 0.0
var biggestLine = 0.0
for (x in 0 until linesP.rows()) {
val l = linesP[x, 0]
Imgproc.line(
cdstP, org.opencv.core.Point(l[0], l[1]),
org.opencv.core.Point(l[2], l[3]),
Scalar(0.0, 0.0, 255.0), 3, Imgproc.LINE_AA, 0)
}
for (x in 0 until linesP.rows()) {
val l = linesP[x, 0]
val x1 = l[0]
val y1 = l[1]
val x2 = l[2]
val y2 = l[3]
val lineHeight = sqrt(((x2 - x1).pow(2.0)) + ((y2 - y1).pow(2.0)))
if(biggestLine<lineHeight){
val angleOfRotationX1 = angleOf(PointF(x1.toFloat(),y1.toFloat()),PointF(x2.toFloat(),y2.toFloat()))
Log.e("angleOfRotationX1","$angleOfRotationX1")
if(angleOfRotationX1<45.0 || angleOfRotationX1>270.0){
biggestLine = lineHeight
if(angleOfRotationX1<45.0){
biggestLineX1 = x1
biggestLineY1 = y1
biggestLineX2 = x2
biggestLineY2 = y2
}
if(angleOfRotationX1>270.0){
biggestLineX1 = x2
biggestLineY1 = y2
biggestLineX2 = x1
biggestLineY2 = y1
}
}
}
if(x==linesP.rows()-1){
Imgproc.line(
cdstP, org.opencv.core.Point(biggestLineX1, biggestLineY1),
org.opencv.core.Point(biggestLineX2, biggestLineY2),
Scalar(255.0, 0.0, 0.0), 3, Imgproc.LINE_AA, 0)
}
}
var angle = angleOf(PointF(biggestLineX1.toFloat(),biggestLineY1.toFloat()),PointF(biggestLineX2.toFloat(),biggestLineY2.toFloat()))
Log.e("angleOfRotationX2","$angle")
angle -= (angle * 2)
return deskew(mRGBA,angle)
}
fun angleOf(p1: PointF, p2: PointF): Double {
val deltaY = (p1.y - p2.y).toDouble()
val deltaX = (p2.x - p1.x).toDouble()
val result = Math.toDegrees(Math.atan2(deltaY, deltaX))
return if (result < 0) 360.0 + result else result
}
private fun deskew(src:Mat, angle:Double):Mat{
val center = org.opencv.core.Point((src.width() / 2).toDouble(), (src.height() / 2).toDouble())
val scaleBy = if(angle<0){
1.0+((0.5*angle)/45)//max scale down by 0.50(50%) based on angle
}else{
1.0-((0.3*angle)/45)//max scale down by 0.50(50%) based on angle
}
Log.e("scaleBy",""+scaleBy)
val rotImage = Imgproc.getRotationMatrix2D(center, angle, scaleBy)
val size = Size(src.width().toDouble(), src.height().toDouble())
Imgproc.warpAffine(src, src, rotImage, size, Imgproc.INTER_LINEAR + Imgproc.CV_WARP_FILL_OUTLIERS)
return src
}
I am trying to find the location of image inside a specific area in another. I am using javacv to do this issue. But my code is giving an error when executing cvMatchTemplate function. I think I am miss using cvSetImageROI.
This is how I am using it:
public static void main(String c[]) {
IplImage src = cvLoadImage("test.jpg", 0);
IplImage tmp = cvLoadImage("tmp.png", 0);
IplImage result = cvCreateImage(cvSize(src.width() - tmp.width() + 1, src.height() - tmp.height() + 1),
IPL_DEPTH_32F,1);
cvZero(result);
cvSetImageROI(src, new CvRect(22, 50, 30, 30));
cvSetImageROI(result, new CvRect(22, 50, 30, 30));
//Match Template Function from OpenCV
cvMatchTemplate(src, tmp, result, CV_TM_CCORR_NORMED);
double[] min_val = new double[2];
double[] max_val = new double[2];
CvPoint minLoc = new CvPoint();
CvPoint maxLoc = new CvPoint();
cvMinMaxLoc(result, min_val, max_val, minLoc, maxLoc,
null);
CvPoint point = new CvPoint();
point.x(maxLoc.x() + tmp.width());
point.y(maxLoc.y() + tmp.height());
cvRectangle(src, maxLoc, point, CvScalar.RED, 2, 8, 0);
cvShowImage("Lena Image", src);
cvWaitKey(0);
cvReleaseImage(src);
cvReleaseImage(tmp);
cvReleaseImage(result);
}
This is the error:
OpenCV Error: Assertion failed (result.size() == cv::Size(std::abs(img.cols - templ.cols)
+ 1,std::abs(img.rows - templ.rows) + 1) && result.type() == CV_32F) in unknown function,
file ..\..\..\src\opencv\modules\imgproc\src\templmatch.cpp, line 384
Any Help?
Hey see this example that I did with Javacv
public class Test {
static Image image;
public static void main(String[] args) throws Exception {
int width = Integer.parseInt(args[3]);
int height = Integer.parseInt(args[4]);
IplImage src = cvLoadImage(
args[0], 0);
IplImage tmp = cvLoadImage(
args[1], 0);
IplImage result = cvCreateImage(
cvSize(src.width() - tmp.width() + 1,
src.height() - tmp.height() + 1), IPL_DEPTH_32F, src.nChannels());
cvZero(result);
// Match Template Function from OpenCV
cvMatchTemplate(src, tmp, result, CV_TM_CCORR_NORMED);
// double[] min_val = new double[2];
// double[] max_val = new double[2];
DoublePointer min_val = new DoublePointer();
DoublePointer max_val = new DoublePointer();
CvPoint minLoc = new CvPoint();
CvPoint maxLoc = new CvPoint();
cvMinMaxLoc(result, min_val, max_val, minLoc, maxLoc, null);
// Get the Max or Min Correlation Value
// System.out.println(Arrays.toString(min_val));
// System.out.println(Arrays.toString(max_val));
CvPoint point = new CvPoint();
point.x(maxLoc.x() + tmp.width());
point.y(maxLoc.y() + tmp.height());
// cvMinMaxLoc(src, min_val, max_val,0,0,result);
cvRectangle(src, maxLoc, point, CvScalar.RED, 2, 8, 0);// Draw a
// Rectangle for
// Matched
// Region
CvRect rect = new CvRect();
rect.x(maxLoc.x());
rect.y(maxLoc.y());
rect.width(tmp.width() + width);
rect.height(tmp.width() + height);
cvSetImageROI(src, rect);
IplImage imageNew = cvCreateImage(cvGetSize(src), src.depth(),
src.nChannels());
cvCopy(src, imageNew);
cvSaveImage(args[2], imageNew);
cvShowImage("Lena Image", src);
cvWaitKey(0);
cvReleaseImage(src);
cvReleaseImage(tmp);
cvReleaseImage(result);
}
full reference here
we need 4 default parameters like this
"C:\Users\Waldema\Desktop\bg.jpg" "C:\Users\Waldema\Desktop\logosiemens.jpg" "C:\Users\Waldema\Desktop\imageToFind.jpg" 100 200
configurable in the Run configurations of common IDEs.
I think that will help
I found the problem it was in setting the roi of the result image, this is the wrong line :
cvSetImageROI(result, new CvRect(22, 50, 30, 30));
It should be like this :
cvSetImageROI(result, new CvRect(22, 50, 30 - tmp.width() + 1, 30 - tmp.height() + 1));
I am not sure why but I think it is because cvMatchTemplate function takes the result dimension equal to the source dimensions minus the template dimensions plus one pixel.
I have a project where I am trying to track a person as they move throughout a room. I am using an arduino, some servo motors and an xbox kinect for my camera.
I have a vision of allowing the project some training time where it can scan the room and make a database of images for the empty room. Then when a person enters the room the program can do a simple difference image to create a white blob for the person. Using this white blob I would be able to calculate the centre of mass for the person and compare it to the centre of the image frame in order to pass a command to the arduino telling it how far and in which direction to move the servo motors. I am using eclipse, writing in java and using opencv 2.4.6.
I am stuck on getting a clear white blob. I have already written my methods to calculate the distance from the centre of mass of the blob and the centre of the frame but without a clearly defined blob this is useless. I have been trying to get my program to work by taking a snap shot of the background of my room, changing the image to binary then subtracting it from a binary image of my room with me in it. This has not worked. Is my vision of training the system then comparing with these trained images valid or should I be going about a different way to detect an object?
I have tried implementing opticalflow() but it seems erratic and not extremely accurate.
Any information on the topic would be extremely helpful. I thank you in advance for reading my question.
-Trent
Edit: I have attached my code. The area in question is the training() and matdiff() methods.
package testingV1;
//OpenCv + OpenNI + Java Libraries
import java.awt.FlowLayout;
import java.util.ArrayList;
import java.util.List;
import java.awt.image.BufferedImage;
import java.awt.image.DataBuffer;
import java.awt.image.DataBufferByte;
import java.io.*;
import java.nio.ByteBuffer;
import javax.imageio.ImageIO;
import javax.swing.*;
import org.opencv.core.*;
import org.opencv.imgproc.*;
import org.opencv.objdetect.CascadeClassifier;
import org.opencv.video.BackgroundSubtractorMOG;
import org.opencv.video.Video;
import org.opencv.highgui.*;
import org.opencv.*;
import org.OpenNI.*;
public class TestV1 {
static int imWidth = 640, imHeight = 480;
static ImageGenerator imageGen;
static Context context;
static int flag = CvType.CV_8UC3;
static int flag2 = CvType.CV_8UC1;
static Mat background;
public static void main(String[] args) throws GeneralException{
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
//We create a new "context" of the Kinect
context = new Context();
JFrame canvas = new JFrame("Optical Flow");
//need to create and add license to our "context"
License license = new License("PrimeSense", "0KOIk2JeIBYClPWVnMoRKn5cdY4=");
context.addLicense(license);
//defining the data we are taking from the kinect
MapOutputMode mapMode = null; //initialize it to null
mapMode = new MapOutputMode(imWidth, imHeight, 30); //create a 640x480 30fps feed definition
imageGen = ImageGenerator.create(context); //Rgb camera
imageGen.setMapOutputMode(mapMode); //change our feed to 640x480 30 fps
imageGen.setPixelFormat(PixelFormat.RGB24);///Pixel format, RGB 8-bit 3 channel
context.setGlobalMirror(true); //Mirrors our feed to make it more intuitive
BufferedImage rgbImage = new BufferedImage(imWidth, imHeight, BufferedImage.TYPE_INT_RGB);
BufferedImage prevImg = new BufferedImage(imWidth, imHeight, BufferedImage.TYPE_BYTE_GRAY);
BufferedImage currImg = new BufferedImage(imWidth, imHeight, BufferedImage.TYPE_BYTE_GRAY);
BufferedImage diffImg = new BufferedImage(imWidth, imHeight, BufferedImage.TYPE_BYTE_GRAY);
BufferedImage paintedImg = new BufferedImage(imWidth, imHeight, BufferedImage.TYPE_INT_RGB);
BufferedImage facesImg = new BufferedImage(imWidth, imHeight, BufferedImage.TYPE_INT_RGB);
Mat paintedMat = new Mat(imHeight, imWidth, flag);
Mat facesMat = new Mat(imHeight, imWidth, flag);
Mat currMat = new Mat(imHeight, imWidth, flag2);
Mat prevMat = new Mat(imHeight, imWidth, flag2);
Mat diffMat = new Mat(imHeight, imWidth, flag2);
Mat paintedMatg = new Mat(imHeight, imWidth, flag2);
ByteBuffer imageBB;
//First Frame
canvas.getContentPane().setLayout(new FlowLayout());
Icon video = new ImageIcon(rgbImage);
JLabel panel = new JLabel(video);
//Icon video2 = new ImageIcon(paintedImg);
//JLabel panel2 = new JLabel(video2);
//Icon video3 = new ImageIcon(facesImg);
//JLabel panel3 = new JLabel(video3);
Icon video4 = new ImageIcon(diffImg);
JLabel panel4 = new JLabel(video4);
canvas.getContentPane().add(panel);
//canvas.getContentPane().add(panel2);
//canvas.getContentPane().add(panel3);
canvas.getContentPane().add(panel4);
canvas.pack();
canvas.setVisible(true);
canvas.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
CascadeClassifier faceDetectorAlg = new CascadeClassifier("C:/Users/Trent/Desktop/Capstone"
+ "/ComputerVisionCode/November16/testingV1/src/testingV1/haarcascade_frontalface_alt.xml");
boolean firstTime = true;
imageGen.startGenerating();
while(true){
context.waitOneUpdateAll(imageGen);
imageBB = imageGen.getImageMap().createByteBuffer(); //get KinectData
rgbImage = bufToImage(imageBB); //take data from kinect and put in BufferedImage
prevMat = currMat;
currMat = img2Mat(rgb2Gray(rgbImage));
if(firstTime){
training(rgbImage);
firstTime = false;
}
else{
diffMat = findDiff(currMat);
diffImg = mat2Img(diffMat);
}
//optical flow - inaccurate
//paintedMatg = opticalFlow(img2Mat(prevImg), img2Mat(currImg), 300, 0.01, 10);
//Imgproc.cvtColor(paintedMatg, paintedMat, Imgproc.COLOR_GRAY2RGB); //change from gray to color
//paintedImg = mat2Img(paintedMat);
//face detection - extremely resource intensive
//facesMat = faceDetector(img2Mat(rgbImage), faceDetectorAlg);
//facesImg = mat2Img(facesMat);
panel.setIcon(new ImageIcon(rgbImage));
//panel2.setIcon(new ImageIcon(paintedImg));
//panel3.setIcon(new ImageIcon(facesImg));
panel4.setIcon(new ImageIcon(diffImg));
canvas.repaint();
canvas.revalidate();
}
}
//establishes a background for better diff images
private static void training(BufferedImage in){
background = new Mat(imHeight, imWidth, flag2);
background = img2Mat(rgb2Gray(in));
System.out.println("Training Complete");
}
private static Mat findDiff(Mat in){
Mat output = new Mat(imHeight, imWidth, flag2);
Core.absdiff(background, in, output);
Imgproc.threshold(output, output, 20, 255, Imgproc.THRESH_BINARY);
return output;
}
//Face Detection
private static Mat faceDetector(Mat in, CascadeClassifier Alg){
Mat output = in;
MatOfRect faceDetections = new MatOfRect();
if(Alg.empty()){
System.out.println("didnt load");
return output;
}
Alg.detectMultiScale(in, faceDetections);
for(Rect rect : faceDetections.toArray()){
Core.rectangle(output, new Point(rect.x, rect.y),
new Point(rect.x + rect.width, rect.y + rect.height), new Scalar(0, 255, 0), 2);
}
return output;
}
//Returns an image with vectors painted to show movement.
private static Mat opticalFlow(Mat curr, Mat prev, int maxDetectionCount, double qualityLevel, double minDistance){
List<MatOfPoint2f> trackedPoints = new ArrayList<MatOfPoint2f>();
MatOfPoint initial = new MatOfPoint();
MatOfFloat err = new MatOfFloat();
MatOfByte status = new MatOfByte();
MatOfPoint2f initial2f = new MatOfPoint2f();
MatOfPoint2f next2f = new MatOfPoint2f();
double[] temp;
Point p1 = new Point();
Point p2 = new Point();
Mat output = new Mat(imHeight, imWidth, flag);
Scalar red = new Scalar(255, 0, 0);
//Finds Tracking points
if(trackedPoints.size() < 1){
Imgproc.goodFeaturesToTrack(curr, initial, maxDetectionCount, qualityLevel, minDistance);
initial.convertTo(initial2f, CvType.CV_32FC2);
trackedPoints.add(initial2f);
}
//catches first time frame
if(prev.empty())
curr.copyTo(prev);
//find points in current image
if(trackedPoints.get(0).total() > 0){
Video.calcOpticalFlowPyrLK(prev, curr, trackedPoints.get(0), next2f, status, err);
trackedPoints.add(next2f);
}
output = curr;
//draw red lines
for(int i = 0; i < trackedPoints.get(0).cols(); i++){
for(int j = 0; j < trackedPoints.get(0).rows(); j++){
temp = trackedPoints.get(0).get(j, i);
p1.set(temp);
temp = trackedPoints.get(1).get(j, i);
p2.set(temp);
Core.line(output, p1, p2, red);
}
}
return output;
}
//Returns a vector to indicate how the magnitude of movement.
private static double[] opticalFlowAnalysis(Mat curr, Mat prev, int maxDetectionCount, double qualityLevel, double minDistance){
List<MatOfPoint2f> trackedPoints = new ArrayList<MatOfPoint2f>();
MatOfPoint initial = new MatOfPoint();
MatOfFloat err = new MatOfFloat();
MatOfByte status = new MatOfByte();
MatOfPoint2f initial2f = new MatOfPoint2f();
MatOfPoint2f next2f = new MatOfPoint2f();
double[] total = new double[2];
total[0] = 0;
total[1] = 0;
double[] point1;
double[] point2;
double[] output = new double[2];
//Finds Tracking points
if(trackedPoints.size() < 1){
Imgproc.goodFeaturesToTrack(curr, initial, maxDetectionCount, qualityLevel, minDistance);
initial.convertTo(initial2f, CvType.CV_32FC2);
trackedPoints.add(initial2f);
}
//catches first time frame
if(prev.empty())
curr.copyTo(prev);
//find points in current image
if(trackedPoints.get(0).total() > 0){
Video.calcOpticalFlowPyrLK(prev, curr, trackedPoints.get(0), next2f, status, err);
trackedPoints.add(next2f);
}
//average the distance moved
// (-) signifies distance moved right and down
// (+) signifies distance moved left and up
for(int i = 0; i < trackedPoints.get(0).cols(); i++){
for(int j = 0; j < trackedPoints.get(0).rows(); j++){
point1 = trackedPoints.get(0).get(j, i);
point2 = trackedPoints.get(1).get(j, i);
total[0] += point1[0] - point2[0];
total[1] += point1[1] - point2[0];
}
}
output[0] = total[0] / trackedPoints.get(0).cols();
output[1] = total[1] / trackedPoints.get(0).rows();
return output;
}
private static Mat img2Mat(BufferedImage in){
Mat out;
byte[] data;
int r, g, b;
if(in.getType() == BufferedImage.TYPE_INT_RGB){
out = new Mat(imHeight, imWidth, flag);
data = new byte[imWidth * imHeight * (int)out.elemSize()];
int[] dataBuff = in.getRGB(0, 0, imWidth, imHeight, null, 0, imWidth);
for(int i = 0; i < dataBuff.length; i++){
data[i*3] = (byte) ((dataBuff[i] >> 16) & 0xFF);
data[i*3 + 1] = (byte) ((dataBuff[i] >> 8) & 0xFF);
data[i*3 + 2] = (byte) ((dataBuff[i] >> 0) & 0xFF);
}
}
else{
out = new Mat(imHeight, imWidth, flag2);
data = new byte[imWidth * imHeight * (int)out.elemSize()];
int[] dataBuff = in.getRGB(0, 0, imWidth, imHeight, null, 0, imWidth);
for(int i = 0; i < dataBuff.length; i++){
r = (byte) ((dataBuff[i] >> 16) & 0xFF);
g = (byte) ((dataBuff[i] >> 8) & 0xFF);
b = (byte) ((dataBuff[i] >> 0) & 0xFF);
data[i] = (byte)((0.21 * r) + (0.71 * g) + (0.07 * b)); //luminosity
}
}
out.put(0, 0, data);
return out;
}
private static BufferedImage mat2Img(Mat in){
BufferedImage out;
byte[] data = new byte[imWidth * imHeight * (int)in.elemSize()];
int type;
in.get(0, 0, data);
if(in.channels() == 1)
type = BufferedImage.TYPE_BYTE_GRAY;
else
type = BufferedImage.TYPE_3BYTE_BGR;
out = new BufferedImage(imWidth, imHeight, type);
out.getRaster().setDataElements(0, 0, imWidth, imHeight, data);
return out;
}
private static BufferedImage rgb2Gray(BufferedImage in){
BufferedImage out = new BufferedImage(imWidth, imHeight, BufferedImage.TYPE_BYTE_GRAY);
Mat color = new Mat(imHeight, imWidth, flag);
Mat gray = new Mat(imHeight, imWidth, flag);
color = img2Mat(in); //converting bufferedImage to Mat
Imgproc.cvtColor(color, gray, Imgproc.COLOR_RGB2GRAY); //change from color to grayscale
out = mat2Img(gray); //converting Mat to bufferedImage
return out;
}
//Converts bytebuffer to buffered image
private static BufferedImage bufToImage(ByteBuffer pixelsRGB){
int[] pixelInts = new int[imWidth * imHeight];
int rowStart = 0;
int bbIdx; //index to ByteBuffer
int i = 0; //index to pixels
int rowLen = imWidth * 3;
for (int row = 0; row < imHeight; row++){
bbIdx = rowStart;
for(int col = 0; col < imWidth; col++){
int pixR = pixelsRGB.get(bbIdx++);
int pixG = pixelsRGB.get(bbIdx++);
int pixB = pixelsRGB.get(bbIdx++);
pixelInts[i++] = 0xFF000000 | ((pixR & 0xFF) << 16) | ((pixG & 0xFF) << 8) | (pixB & 0xFF);
}
rowStart += rowLen; //Move to next row
}
BufferedImage im = new BufferedImage(imWidth, imHeight, BufferedImage.TYPE_INT_RGB);
im.setRGB(0, 0, imWidth, imHeight, pixelInts, 0, imWidth);
return im;
}
}
Answer to the question is bit late but may help for future references.
I think to learn about object detection you can look here and his code here (I learn from it to do my project). And then I made my project like this based on his object detection. Or you can look for a simple background subtraction here