I am developing an image segmentation application that will use watersheds. For that, I found a code that I will need to use in python. However, I'm having a hard time converting to kotlin, as the Mat Mat () doesn't have the zero_likes function, just the zero function. I am using opencv 3.31. How do I do this check in kotlin:
marked[marked == 1] = 0
marked[marked > 1] = 255
Code python:
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Load the image
img = cv2.imread("/path/to/image.png", 3)
# Create a blank image of zeros (same dimension as img)
# It should be grayscale (1 color channel)
marker = np.zeros_like(img[:,:,0]).astype(np.int32)
# This step is manual. The goal is to find the points
# which create the result we want. I suggest using a
# tool to get the pixel coordinates.
# Dictate the background and set the markers to 1
marker[204][95] = 1
marker[240][137] = 1
marker[245][444] = 1
marker[260][427] = 1
marker[257][378] = 1
marker[217][466] = 1
# Dictate the area of interest
# I used different values for each part of the car (for visibility)
marker[235][370] = 255 # car body
marker[135][294] = 64 # rooftop
marker[190][454] = 64 # rear light
marker[167][458] = 64 # rear wing
marker[205][103] = 128 # front bumper
# rear bumper
marker[225][456] = 128
marker[224][461] = 128
marker[216][461] = 128
# front wheel
marker[225][189] = 192
marker[240][147] = 192
# rear wheel
marker[258][409] = 192
marker[257][391] = 192
marker[254][421] = 192
# Now we have set the markers, we use the watershed
# algorithm to generate a marked image
marked = cv2.watershed(img, marker)
# Plot this one. If it does what we want, proceed;
# otherwise edit your markers and repeat
plt.imshow(marked, cmap='gray')
# Make the background black, and what we want to keep white
marked[marked == 1] = 0
marked[marked > 1] = 255
# Use a kernel to dilate the image, to not lose any detail on the outline
# I used a kernel of 3x3 pixels
kernel = np.ones((3,3),np.uint8)
dilation = cv2.dilate(marked.astype(np.float32), kernel, iterations = 1)
# Plot again to check whether the dilation is according to our needs
# If not, repeat by using a smaller/bigger kernel, or more/less iterations
plt.imshow(dilation, cmap='gray')
# Now apply the mask we created on the initial image
final_img = cv2.bitwise_and(img, img, mask=dilation.astype(np.uint8))
# cv2.imread reads the image as BGR, but matplotlib uses RGB
# BGR to RGB so we can plot the image with accurate colors
b, g, r = cv2.split(final_img)
final_img = cv2.merge([r, g, b])
# Plot the final result
code kotlin:
// Load the image
val srcOriginal = Imgcodecs.imread(currentPhotoPath)
// Create a blank image of zeros (same dimension as img)
// It should be grayscale (1 color channel)
val markers = Mat.zeros(srcOriginal.rows(), srcOriginal.cols(), CvType.CV_32S)
// This step is manual. The goal is to find the points
// which create the result we want. I suggest using a
// tool to get the pixel coordinates.
// Dictate the area of interest
for(x in my_canvas.pointsToDrawX.indices) {
for(y in my_canvas.pointsToDrawY.indices) {
//Now we have set the markers, we use the watershed
//algorithm to generate a marked image
Imgproc.watershed(srcOriginal, markers)
val marker_tempo = Mat()
markers.convertTo(marker_tempo, CvType.CV_8U)
// Plot this one. If it does what we want, proceed;
// otherwise edit your markers and repeat
//Create Bitmap
val bmpOut = Bitmap.createBitmap(srcOriginal.cols(), srcOriginal.rows(), Bitmap.Config.RGB_565)
Utils.matToBitmap(marker_tempo, bmpOut)
val mPath = Environment.getExternalStorageDirectory().toString() + "/gray.png"
//Make the background black, and what we want to keep white
//Use a kernel to dilate the image, to not lose any detail on the outline
//I used a kernel of 3x3 pixels
val kernel = Mat(3, 3, CvType.CV_8U)
val dilatation = Imgproc.dilate(marker_tempo, marker_tempo, kernel)
val mPath1 = Environment.getExternalStorageDirectory().toString() + "/dilation.png"
//Now apply the mask we created on the initial image
val final_image = Core.bitwise_and(srcOriginal, srcOriginal, dilatation)
//cv2.imread reads the image as BGR, but matplotlib uses RGB
//BGR to RGB so we can plot the image with accurate colors
In pointsToDrawX and pointsToDrawY I'm saving all the x, y coordinates of the user's touch event on the screen. It is from these coordinates that I will pass to the watershed algorithm to perform the segmentation and remove the background from the image. Can someone help me convert this code?
//Load the image
srcOriginal = Imgcodecs.imread(currentPhotoPath)
//Create a blank image of zeros (same dimension as img)
//It should be grayscale (1 color channel)
markers = Mat.zeros(srcOriginal.rows(), srcOriginal.cols(), CvType.CV_32S)
//This step is manual. The goal is to find the points
//which create the result we want. I suggest using a
//tool to get the pixel coordinates.
//Dictate the background and set the markers to 1
for (value in 0..my_canvas.pointsToDrawY.size - 1) {
//Dictate the area of interest
//I used different values for each part of the car (for visibility)
for (value in 0..my_canvas.pointsToDrawYStepTwo.size - 1) {
//Now we have set the markers, we use the watershed
//algorithm to generate a marked image
watershed(srcOriginal, markers)
//Plot this one. If it does what we want, proceed;
//otherwise edit your markers and repeat
val mPath1 = Environment.getExternalStorageDirectory().toString() + "/watershed.png"
//Make the background black, and what we want to keep white
for (x in 0 until srcOriginal.rows()-1) {
for (y in 0 until srcOriginal.cols()-1) {
if((markers[x, y].get(0) == 255.0)){
//Use a kernel to dilate the image, to not lose any detail on the outline
//I used a kernel of 3x3 pixels
val marker_tempo = Mat()
val dilatation = Mat()
markers.convertTo(marker_tempo, CvType.CV_8U)
val kernel = Mat(3, 3, CvType.CV_8U)
Imgproc.dilate(marker_tempo, dilatation, kernel)
//Plot again to check whether the dilation is according to our needs
//If not, repeat by using a smaller/bigger kernel, or more/less iterations
val mPath2 = Environment.getExternalStorageDirectory().toString() + "/dilatation.png"
//Now apply the mask we created on the initial image
val final = Mat()
Core.bitwise_and(srcOriginal, srcOriginal, final, dilatation)
//Plot the final result
val mPath = Environment.getExternalStorageDirectory().toString() + "/final.png"
numpy.zeros_like basically creates an array the same shape as the input with all zeros in it.
In this case, you have a simple 2d array so you could roll your own just by indexing through the whole array (all dimensions) and setting everything to zero.
var marker = Array(srcOriginal.rows()) {Array(srcOriginal.cols()) {0} }
You can find all the images I have for testing on my GitHub here:
GitHub repository with sources
There are also 2 videos, where the detection should work on as well
I tried to use OpenCV 4.x.x to find the edges of a blackboard (image following), but somehow I cannot succeed. My code at the moment looks like this: (Android with OpenCV and live camera feed), where imgMat is a Mat from the camera feed:
Mat gray = new Mat();
Imgproc.cvtColor(imgMat, gray, Imgproc.COLOR_RGB2BGR);
Mat blurred = new Mat();
Imgproc.blur(gray, blurred, new org.opencv.core.Size(3, 3));
Mat canny = new Mat();
Imgproc.Canny(blurred, canny, 80, 230);
Mat kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new org.opencv.core.Size(2, 2));
Mat dilated = new Mat();
Imgproc.morphologyEx(canny, dilated, Imgproc.MORPH_DILATE, kernel, new Point(0, 0), 10);
Mat rectImage = new Mat();
Imgproc.morphologyEx(dilated, rectImage, Imgproc.MORPH_CLOSE, kernel, new Point(0, 0), 5);
Mat endproduct = new Mat();
Imgproc.Canny(rectImage, endproduct, 120, 230);
List<MatOfPoint> contours = new ArrayList<>();
Mat hierarchy = new Mat();
Imgproc.findContours(endproduct, contours, hierarchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE);
double maxArea = 0;
boolean hasContour = false;
MatOfPoint2f biggestContour = new MatOfPoint2f();
Iterator<MatOfPoint> each = contours.iterator();
while (each.hasNext()) {
MatOfPoint wrapper = each.next();
double area = Imgproc.contourArea(wrapper);
if (area > maxArea) {
maxArea = area;
biggestContour = new MatOfPoint2f(wrapper.toArray());
hasContour = true;
if (hasContour) {
Mat output = imgMat.clone();
MatOfPoint2f approx = new MatOfPoint2f();
MatOfPoint poly = new MatOfPoint();
Imgproc.approxPolyDP(biggestContour, approx, Imgproc.arcLength(biggestContour, true) * .02, true);
approx.convertTo(poly, CvType.CV_32S);
Rect rect = Imgproc.boundingRect(poly);
Somehow I am not able to get it working, although the same code(written in python) worked on my computer with a video. I take the output from the rectangle and display it on my mobile screen, where it flickers around a lot and does not work properly.
These are my images I tried the python program on, and they worked:
What am I doing wrong? I am not able to constantly detect the edges of the blackboard.
Additional information about the blackboard:
always rectangular
may have different lighting
the text should be ignored, only the main board should be detected
the outer blackboard should be ignored as well
only the contour for the main board should be shown/returned
Thanks for any advice or code!
I used HSV because that's the easiest way to detect specific colors. I used an abundancy test to automatically select the color threshold (so this will work for green or blue boards). However, this test will fail on white or black boards since white and black count as all colors according to hue. Instead, in HSV, white and black are easiest to detect as very low saturation (white) or as very low value (black).
I did a 3-way check for each and selected the mask that had the most pixels in it (I assume that the boards are the majority of the image). I'm not sure how this will work on other images since we only have one here, so this may or may not work for other boards.
I used approxPolyDP to cut down on the number of points in the contour until I had 4 points and used that to draw the shape.
import cv2
import numpy as np
# get unique colors (to speed up search) and return the most abundant mask
def getAbundantColor(channel, margin):
# get uniques
unique_colors, counts = np.unique(channel, return_counts=True);
# check for the most abundant color
most = None;
biggest_count = -1;
for col in unique_colors:
# count number of white pixels
mask = cv2.inRange(channel, int(col - margin), int(col + margin));
count = np.count_nonzero(mask);
# if bigger, set new "most"
if count > biggest_count:
biggest_count = count;
most = mask;
return most, biggest_count;
# load image
img = cv2.imread("blackboard.jpg");
# it's huge, scale down so that we can see the whole thing
h, w = img.shape[:2];
scale = 0.25;
h = int(scale*h);
w = int(scale*w);
img = cv2.resize(img, (w,h));
# hsv
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV);
h,s,v = cv2.split(hsv);
# median blur to get rid of most of the text
h = cv2.medianBlur(h, 5);
s = cv2.medianBlur(s, 5);
v = cv2.medianBlur(v, 5);
# get most abundant color
color_margin = 30;
hmask, hcount = getAbundantColor(h, color_margin);
# detect white and black separately
light_margin = 30;
# white
wmask = cv2.inRange(s, 0, light_margin);
wcount = np.count_nonzero(wmask);
# black
bmask = cv2.inRange(v, 0, light_margin);
bcount = np.count_nonzero(bmask);
# check which is biggest
sorter = [[hcount, hmask], [wcount, wmask], [bcount, bmask]];
mask = sorter[-1][1];
# dilate and erode to close holes
kernel = np.ones((3,3), np.uint8);
mask = cv2.dilate(mask, kernel, iterations = 2);
mask = cv2.erode(mask, kernel, iterations = 4);
mask = cv2.dilate(mask, kernel, iterations = 2);
# get contours # OpenCV 3.4, in OpenCV 2* or 4* it returns (contours, _)
_, contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE);
# for each contour, approximate a simpler shape until we have 4 points
simplified = [];
for con in contours:
# go until we have 4 points
num_points = 999999;
step_size = 0.01;
percent = step_size;
while num_points >= 4:
# get number of points
epsilon = percent * cv2.arcLength(con, True);
approx = cv2.approxPolyDP(con, epsilon, True);
num_points = len(approx);
# increment
percent += step_size;
# step back and get the points
# there could be more than 4 points if our step size misses it
percent -= step_size * 2;
epsilon = percent * cv2.arcLength(con, True);
approx = cv2.approxPolyDP(con, epsilon, True);
cv2.drawContours(img, simplified, -1, (0,0,200), 2);
# print out the number of points
for points in simplified:
print("Num Points: " + str(len(points)));
# show image
cv2.imshow("Image", img);
cv2.imshow("Hue", h);
cv2.imshow("Mask", mask);
Edit: In order to accommodate the uncertainty in the board's color and appearance I run the assumption that the board itself will be the majority of the picture. The lines involving the sorter are looking for the most abundant color in the image. If the white wall behind the board takes up more space in the image then that'll be the color that gets selected for the mask.
There are other ways to try and select just the board, but it's really difficult to come up with a catch-all solution. The rest of the code should do its job the same if you can come up with some way of masking the board. If you're willing to budge on the unknown color assumption and provide the original pictures of the failing cases then I can probably come up with an appropriate mask.
I have an inputMat (RGBA format). I want to keep only the transparent pixels and set them to white color. All the other pixels (that are consequently non transparent) should be changed to transparent.
Beginning of my Java code :
Mat inputMat = new Mat();
Utils.bitmapToMat(bitmap, inputMat);
How can I do what I want to do ? (answers in all languages - not only Java - accepted!)
Thanks !
That is the idea.
Mat inputMat = new Mat();
Utils.bitmapToMat(bitmap, inputMat);
Split image to channels:
List<Mat> channels = new ArrayList<>(4);
Core.split(inputMat, channels);
Get alpha channel:
Mat alpha = channels.get(3);
Invert alpha channel:
Make new list of channels:
List<Mat> channelsOut = new ArrayList<>();
Merge them to image:
Mat outputMat = new Mat();
OpenCV's Mat class has a setTo method that takes a mask argument.
OpenCV has the split procedure that can separate color planes (channels).
Mats support comparison.
OpenCV Mats support "normal" math expressions.
assert input.shape[2] == 4, "it's not a four-channel picture"
alpha = input[..., 3] # select alpha plane
assert ((alpha == 0) | (alpha == 255)).all(), "assuming alpha values to be binary"
mask = (alpha == 0) # boolean array representing the transparent pixels
# change input; if you want a new array, copy it or create an empty one of the same shape
input[mask] = (255, 255, 255, 255) # white, opaque
# ~mask inverts the mask
input[~mask] = (0, 0, 0, 0) # set transparent, clear color information
I am developing an application to detect the lesion area, for this I am using the grabcut to detect the ROI and remove the background from the image. However in some images it is not working well. He ends up not identifying the borders of the region of interest well. The watershed can better identify the edges for this type of work, however I am having difficulties making this transition from grabcut to watershed. Before processing the grabcut, the user uses touchevent to mark a rectangle around the image of interest (wound area) to facilitate the work of the algorithm. As the image below.
However, using other wound images, segmentation is not good, showing flaws in ROI detection.
Image using grabcut in app
Image using watershed in desktop
this is the code:
private fun extractForegroundFromBackground(coordinates: Coordinates, currentPhotoPath: String): String {
// TODO: Provide complex object that has both path and extension
val width = bitmap?.getWidth()!!
val height = bitmap?.getHeight()!!
val rgba = Mat()
val gray_mat = Mat()
val threeChannel = Mat()
Utils.bitmapToMat(bitmap, gray_mat)
cvtColor(gray_mat, rgba, COLOR_RGBA2RGB)
cvtColor(rgba, threeChannel, COLOR_RGB2GRAY)
threshold(threeChannel, threeChannel, 100.0, 255.0, THRESH_OTSU)
val rect = Rect(coordinates.first, coordinates.second)
val fg = Mat(rect.size(), CvType.CV_8U)
erode(threeChannel, fg, Mat(), Point(-1.0, -1.0), 10)
val bg = Mat(rect.size(), CvType.CV_8U)
dilate(threeChannel, bg, Mat(), Point(-1.0, -1.0), 5)
threshold(bg, bg, 1.0, 128.0, THRESH_BINARY_INV)
val markers = Mat(rgba.size(), CvType.CV_8U, Scalar(0.0))
Core.add(fg, bg, markers)
val marker_tempo = Mat()
markers.convertTo(marker_tempo, CvType.CV_32S)
watershed(rgba, marker_tempo)
marker_tempo.convertTo(markers, CvType.CV_8U)
val imgBmpExit = Bitmap.createBitmap(width, height, Bitmap.Config.RGB_565)
Utils.matToBitmap(markers, imgBmpExit)
// Run the grab cut algorithm with a rectangle (for subsequent iterations with touch-up strokes,
// flag should be Imgproc.GC_INIT_WITH_MASK)
//Imgproc.grabCut(srcImage, firstMask, rect, bg, fg, iterations, Imgproc.GC_INIT_WITH_RECT)
// Create a matrix of 0s and 1s, indicating whether individual pixels are equal
// or different between "firstMask" and "source" objects
// Result is stored back to "firstMask"
//Core.compare(mark, source, mark, Core.CMP_EQ)
// Create a matrix to represent the foreground, filled with white color
val foreground = Mat(srcImage.size(), CvType.CV_8UC3, Scalar(255.0, 255.0, 255.0))
// Copy the foreground matrix to the first mask
srcImage.copyTo(foreground, mark)
// Create a red color
val color = Scalar(255.0, 0.0, 0.0, 255.0)
// Draw a rectangle using the coordinates of the bounding box that surrounds the foreground
rectangle(srcImage, coordinates.first, coordinates.second, color)
// Create a new matrix to represent the background, filled with black color
val background = Mat(srcImage.size(), CvType.CV_8UC3, Scalar(0.0, 0.0, 0.0))
val mask = Mat(foreground.size(), CvType.CV_8UC1, Scalar(255.0, 255.0, 255.0))
// Convert the foreground's color space from BGR to gray scale
cvtColor(foreground, mask, Imgproc.COLOR_BGR2GRAY)
// Separate out regions of the mask by comparing the pixel intensity with respect to a threshold value
threshold(mask, mask, 254.0, 255.0, Imgproc.THRESH_BINARY_INV)
// Create a matrix to hold the final image
val dst = Mat()
// copy the background matrix onto the matrix that represents the final result
val vals = Mat(1, 1, CvType.CV_8UC3, Scalar(0.0))
// Replace all 0 values in the background matrix given the foreground mask
background.setTo(vals, mask)
// Add the sum of the background and foreground matrices by applying the mask
Core.add(background, foreground, dst, mask)
// Save the final image to storage
Imgcodecs.imwrite(currentPhotoPath + "_tmp.png", dst)
// Clean up used resources
return currentPhotoPath
How do I update the code to use watershed instead of grabcut?
A description of how to apply the watershed algorithm in OpenCV is here, although it is in Python. The documentation also contains some potentially useful examples. Since you already have a binary image, all that's left is to apply the Euclidean Distance Transform (EDT) and the watershed function. So instead of Imgproc.grabCut(srcImage, firstMask, rect, bg, fg, iterations, Imgproc.GC_INIT_WITH_RECT), you would have:
Mat dist = new Mat();
Imgproc.distanceTransform(srcImage, dist, Imgproc.DIST_L2, Imgproc.DIST_MASK_3); // use L2 for Euclidean Distance
Mat markers = Mat.zeros(dist.size(), CvType.CV_32S);
Imgproc.watershed(dist, markers); # apply watershed to resultant image from EDT
Mat mark = Mat.zeros(markers.size(), CvType.CV_8U);
markers.convertTo(mark, CvType.CV_8UC1);
Imgproc.threshold(mark, firstMask, 0, 255, Imgproc.THRESH_BINARY + Imgproc.THRESH_OTSU); # threshold results to get binary image
The thresholding step is described here. Also, optionally, before you apply Imgproc.watershed, you may want to apply some morphological operations to the result of EDT i.e; dilation, erosion:
Imgproc.dilate(dist, dist, Mat.ones(3, 3, CvType.CV_8U));
If you're not familiar with morphological operations when it comes to processing binary images, the OpenCV documentation contains some good, quick examples.
Hope this helps!
I am actually trying to recognize Braille characters in a document. I intend to convert a braille document into plain text.
I am using OpenCV with Java in order to do the image processing.
First, I imported an image of a Braille document :
Then, I made some image processing in order to binarize the original image. I have read that the important steps are :
Convert the image into gray levels
Reduct the noise
Enhance the edge contrast
Binarize the image
Here is the code that I used :
public static void main(String args[]) {
Mat imgGrayscale = new Mat();
Mat image = Imgcodecs.imread("C:/Users/original_braille.jpg", 1);
Imgproc.cvtColor(image, imgGrayscale, Imgproc.COLOR_BGR2GRAY);
Imgproc.GaussianBlur(imgGrayscale, imgGrayscale, new Size(3, 3), 0);
Imgproc.adaptiveThreshold(imgGrayscale, imgGrayscale, 255, Imgproc.ADAPTIVE_THRESH_MEAN_C, Imgproc.THRESH_BINARY_INV, 5, 4);
Imgproc.medianBlur(imgGrayscale, imgGrayscale, 3);
Imgproc.threshold(imgGrayscale, imgGrayscale, 0, 255, Imgproc.THRESH_OTSU);
Imgproc.GaussianBlur(imgGrayscale, imgGrayscale, new Size(3, 3), 0);
Imgproc.threshold(imgGrayscale, imgGrayscale, 0, 255, Imgproc.THRESH_OTSU);
Imgcodecs.imwrite( "C:/Users/Jean-Baptiste/Desktop/Reconnaissance_de_formes/result.jpg", imgGrayscale );
I obtained the following result for this step :
According to me, we can improve the quality of this image for better results but I'm not experienced with the different image processing techniques. Can I improve the quality of my filters ?
After that, I would like to perform a segmentation of the image in order to detect the different characters of this document. I would like to separate the different characters of the document in order to convert them into text.
For instance I have drawn the separation lines of the document manually :
But I didn't find solutions for this step. Is there a possibility to do the same with OpenCV ?
Here is a small script that finds the lines in your image. It's in python, I don't have a java version of openCV installed, but I think you can get the idea of the algorithm anyway.
Finding vertical lines is not as easy because the space between the dots depends on the letters following each other. You could probably try template matching algorithms with some common letters. Given the fact that at this point you know the height of the letters it shouldn't be too hard.
Of course, this whole approach assumes that the document is not rotated.
import numpy as np
import cv2
# This is just the transposition of your code in python
img = cv2.imread('L1ZzA.jpg')
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray,(3,3),0)
thres = cv2.adaptiveThreshold(blur,255,cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY,5,4)
blur2 = cv2.medianBlur(thres,3)
ret2,th2 = cv2.threshold(blur2,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
blur3 = cv2.GaussianBlur(th2,(3,3),0)
ret3,th3 = cv2.threshold(blur3,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)
# Find connected components and extract the mean height and width
output = cv2.connectedComponentsWithStats(255-th3, 6, cv2.CV_8U)
mean_h = np.mean(output[2][:,cv2.CC_STAT_HEIGHT])
mean_w = np.mean(output[2][:,cv2.CC_STAT_WIDTH])
# Find empty rows, defined as having less than mean_h/2 pixels
empty_rows = []
for i in range(th3.shape[0]):
if np.sum(255-th3[i,:]) < mean_h/2.0:
# Group rows by labels
d = np.ediff1d(empty_rows, to_begin=1)
good_rows = []
good_labels = []
label = 0
# 1: assign labels to each row
# based on whether they are following each other or not (i.e. diff >1)
for i in range(1,len(empty_rows)-1):
if d[i+1] == 1:
elif d[i] > 1 and d[i+1] > 1:
label = good_labels[len(good_labels)-1] + 1
# 2: find the mean row value associated with each label, and color that line in green in the original image
for i in range(label):
frow = np.mean(np.asarray(good_rows)[np.where(np.asarray(good_labels) == i)])
img[int(frow),:,1] = 255
# Display the image with the green rows
I am using OpenCV in an Android application. I want the mobile application to automatically take a photo when a rectangle (something in the shape of a receipt for example) is in view. I am using Canny edge detection but when I am looking for contours, the array size is greater than 1500. Obviously it is not optimal to loop through all the contours and find the largest one so I was wondering is it possible to filter out the largest contour automatically through an api?
My code so far:
ArrayList contours;
public Mat onCameraFrame(final CameraBridgeViewBase.CvCameraViewFrame inputFrame) {
// Clear contours array on each frame
// Get Grayscale image
final Mat gray = inputFrame.gray();
// Canny edge detection
Imgproc.Canny(gray, gray, 300, 1000, 5, true);
// New empty black matrix to store the edges captured
Mat dest = new Mat();
Core.add(dest, Scalar.all(0), dest);
// Copy the edge data over to the empty black matrix
// Is there a way to filter the size of contours so that not everything is returned? Right now this function is returning a lot of contours (1500 +)
Imgproc.findContours(gray, contours, hirearchy, Imgproc.RETR_LIST, Imgproc.CHAIN_APPROX_SIMPLE);
return dest;
The user will be holding the phone and I want the application to automatically take a photo when the receipt is in view.
Example receipt
I have covered the basic techniques you may use, in the following Python code, it won't be hard to translate the code in the language of your choice, java in this case. So the technique involves:
Estimate the color of object you want to segment, which is white in your case, so safe limits for upper and lower bound can be approximated as:
RECEIPT_LOWER_BOUND = np.array([200, 200, 200])
RECEIPT_UPPER_BOUND = np.array([255, 255, 255])
Apply some Blur to input image to make the color distribution smooth, which would reduce the smaller contours in future.
img_blurred = cv2.blur(img, (5, 5))
Apply dilation to the binary image to remove the neighbouring smaller contours which surround your target largest contour
kernel = np.ones((10, 10), dtype=np.uint8)
mask = cv2.dilate(mask, kernel)
Now find contours in the mask after applying above operations and filter out the contour on the basis of contourArea.
im, contours, hierarchy = cv2.findContours(receipt_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
largest_contour = max(contours, key=lambda x: cv2.contourArea(x))
Finally you may apply some threshold over the area to verify if the input was really a ticket or not.
import cv2
import numpy as np
# You may change the following ranges to define your own lower and upper BGR bounds.
RECEIPT_LOWER_BOUND = np.array([200, 200, 200])
RECEIPT_UPPER_BOUND = np.array([255, 255, 255])
def segment_receipt(img):
# Blur the input image to reduce the noise which in-turn reduces the number of contours
img_blurred = cv2.blur(img, (5, 5))
mask = cv2.inRange(img_blurred, RECEIPT_LOWER_BOUND, RECEIPT_UPPER_BOUND)
# Also dilate the binary mask which further reduces the salt and pepper noise
kernel = np.ones((10, 10), dtype=np.uint8)
mask = cv2.dilate(mask, kernel)
return mask
def get_largest_contour_rect(image):
receipt_mask = segment_receipt(image)
im, contours, hierarchy = cv2.findContours(receipt_mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
print "Number of contours found :", len(contours)
# Sorting the contours to get the largest one
largest_contour = max(contours, key=lambda x: cv2.contourArea(x))
# Return the last contour in sorted list as the list is sorted in increasing order.
return cv2.boundingRect(largest_contour)
image = cv2.imread("path/to/your/image.jpg")
rect = get_largest_contour_rect(image)
#J.Doe I am currently working on such a project and I have successfully being able to isolate the largest contour in the image after a whole lot of processing. The only part remaining is recognizing a rectangular contour and taking a picture.
mRgba = inputFrame.rgba();
Imgproc.cvtColor(mRgba, mGray, Imgproc.COLOR_RGB2GRAY);
Imgproc.GaussianBlur(mGray, mGray1, new Size(3, 3), 1);
Mat kernel = Imgproc.getStructuringElement(Imgproc.MORPH_RECT,new Size(9,9));
Imgproc.dilate(mGray1, mGray2, kernel);
Imgproc.Canny(mGray2, mCanny, 50, 200);
double maxVal = 0;
int maxValIdx = 0;
for(int contourIdx = 0; contourIdx < contours.size(); contourIdx++){
double contourArea = Imgproc.contourArea(contours.get(contourIdx));
if(maxVal < contourArea)
maxVal = contourArea;
maxValIdx = contourIdx;
Imgproc.drawContours(mRgba,contours,maxValIdx,new Scalar(0,255,255),-1);
return mRgba;
Be wary of the image names i changed them over different processes.