Related
I made a 3D-renderer that parses .obj files (ASCII) and projects them on to a 2d plane.
At first glance the projection model seems to be fine except one thing.
I noticed that the projection model looks a bit odd:
[1]: https://i.stack.imgur.com/iaLOu.png
All polygons are being drawn including the ones in the back of the model, which I
should definitely not be able to see.
I made a quick recherche in Wikipedia to see what this is about and I think I found something called "Sichtbarkeitsproblem" (Hidden-surface determination).
(DE): https://de.wikipedia.org/wiki/Sichtbarkeitsproblem
(EN):
https://en.wikipedia.org/wiki/Hidden-surface_determination
The article mentions that this is a common thing in computer graphics and that there are many different ways to perform a "Verdeckungsberechnung" (cover up calculation).
It mentions things like using a z-Buffer and Raytracing.
Now I don't really know a lot about Raytracing but It seems to be quite applicable as I later want to add a light source.
I am not sure how Raytracing works but If I just send out rays in an angle that matches the slope from the camera to every pixel on screen and check which polygon hits it first I would only end up having some polygons completely missing only due to one vertex being potentially covered.
How do other Raytracers work? Do they remove the entire polygon when not getting a hit? Remove only one or more vertecies? (which I belief would cause massive distortion in shape) or do they just render all the Polygons and arrange them in a way that they are sorted by the minimum distance to the camera? (I guess this would made it very bad at performance)
Please help me implement this into my code or give me a hint, it would mean a lot to me.
My code is as followed, and the link for the projection model (see Image no. 1) I put here:
https://drive.google.com/file/d/10dpjcL2d2QB15qqTSu5p6kQ534hNOzCz/view?usp=sharing
(Note that the 3d-model and code must be in same folder in order to work)
// 12.11.2022
// Siehe Rotation Matrix in Wikipedia
// View Space: The world space vertex positions relative to the view of the camera
/* Die Verdeckungsberechnung ist zum korrekten Rendern einer 3D-Szene notwendig, weil Oberflächen,
die für den Betrachter nicht sichtbar sind, auch nicht dargestellt werden sollten
*/
// -> https://de.wikipedia.org/wiki/Sichtbarkeitsproblem
// TODO: Raytracing/Verdeckungsberechnung
// TODO: Texture Mapping
import java.util.Arrays;
import java.awt.Robot;
import java.nio.ByteBuffer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.ArrayList;
byte b[];
int amount = 0;
String lines[];
PVector[][] vertices;
int[] faces;
float a = 0;
PVector cam, cam_angle, cam_move, cam_speed;
float angle = 0.0;
void setup() {
size(800,600);
frameRate(60);
noCursor();
cam = new PVector(0, 100, -500);
cam_angle = new PVector(0, 0, 0);
cam_move = new PVector(0, 0, 0);
cam_speed = new PVector(50, 50, 50);
lines = loadStrings("UM2_SkullPile28mm.obj");
println("File loaded. Now scanning contents...");
println();
Pattern numbers = Pattern.compile("(-?\\d+)");
ArrayList<PVector> vertices_ = new ArrayList<PVector>();
ArrayList<ArrayList> faces_ = new ArrayList<ArrayList>();
int parsed_lines = 0;
for(String i:lines) {
switch(i.charAt(0)) {
// Find faces
case 'f':
ArrayList<Integer> values = new ArrayList<Integer>();
for(Matcher m = numbers.matcher(i); m.find(); values.add(Integer.parseInt(m.group())));
faces_.add(values);
break;
// Find Vectors
case 'v':
String s[] = i.trim().split("\\s+");
vertices_.add(new PVector(Float.parseFloat(s[1])*20, Float.parseFloat(s[2])*20, Float.parseFloat(s[3])*20));
break;
};
if(++parsed_lines % (lines.length/6) == 0 || parsed_lines == lines.length) println((int)(map(parsed_lines, 0, lines.length, 0, 100)), "%");
}
println();
println("Done. Found", vertices_.size(), "Vertices and", faces_.size(), "faces");
int i=0;
vertices = new PVector[faces_.size()][];
for(ArrayList<Integer> f_:faces_) {
vertices[i] = new PVector[f_.size()];
int j = 0;
for(int f: f_) {
PVector v = vertices_.get(f-1);
vertices[i][j] = Rotate3d_x(v, -90);
j++;
}
i++;
}
}
PVector Rotate2d(PVector p, float a) {
// a = angle
float[][] m2 = {
{cos(a), -sin(a)},
{sin(a), cos(a)}
};
float[][] rotated = matmul(m2, new float[][] {
{ p.x },
{ p.y }
});
return new PVector(rotated[0][0], rotated[1][0]);
}
PVector Rotate3d(PVector p, float[][] m2) {
float[][] rotated = matmul(m2, new float[][] {
{ p.x },
{ p.y },
{ p.z }
});
return new PVector(rotated[0][0], rotated[1][0], rotated[2][0]);
}
PVector Rotate3d_x(PVector p, float a) {
return Rotate3d(p,
new float[][] {
{1, 0, 0},
{0, cos(a), -sin(a)},
{0, sin(a), cos(a)}
});
};
PVector Rotate3d_y(PVector p, float a) {
return Rotate3d(p,
new float[][] {
{cos(a), 0, sin(a)},
{0, 1, 0},
{-sin(a), 0, cos(a)}
});
}
PVector Rotate3d_z(PVector p, float a) {
return Rotate3d(p,
new float[][] {
{cos(a), -sin(a), 0},
{sin(a), cos(a), 0},
{0, 0, 1}
});
}
PVector Rotate3d(PVector p, PVector a) {
return Rotate3d_z( Rotate3d_y(Rotate3d_x(p, a.x), a.y), a.z );
}
// Matrixmultiplikation
float[][] matmul(float[][] m1, float[][] m2) {
int cols_m1 = m1.length,
rows_m1 = m1[0].length;
int cols_m2 = m2.length,
rows_m2 = m2[0].length;
try {
if (rows_m1 != cols_m2) throw new Exception("Rows of m1 must match Columns of m2!");
}
catch(Exception e) {
println(e);
}
float[][] res = new float[cols_m2][rows_m2];
for (int c=0; c < cols_m1; c++) {
for (int r2=0; r2 < rows_m2; r2++) {
float sum = 0;
float[] buf = new float[rows_m1];
// Multiply rows of m1 with columns of m2 and store in buf
for (int r=0; r < rows_m1; r++) {
buf[r] = m1[c][r]* m2[r][r2];
}
// Add up all entries into sum
for (float entry : buf) {
sum += entry;
}
res[c][r2] = sum;
}
}
return res;
}
PVector applyPerspective(PVector p) {
PVector d = applyViewTransform(p);
return applyPerspectiveTransform(d);
}
PVector applyViewTransform(PVector p) {
// c = camera position
// co = camera orientation / camera rotation
PVector c = cam;
PVector co = cam_angle;
// dx, dy, dz https://en.wikipedia.org/wiki/3D_projection : Mathematical Formula
float[][] dxyz = matmul(
matmul(new float[][]{
{1, 0, 0},
{0, cos(co.x), sin(co.x)},
{0, -sin(co.x), cos(co.x)}
}, new float[][]{
{cos(co.y), 0, -sin(co.y)},
{0, 1, 0},
{sin(co.y), 0, cos(co.y)}
}),
matmul(new float[][]{
{cos(co.z), sin(co.z), 0},
{-sin(co.z), cos(co.z), 0},
{0, 0, 1}
}, new float[][]{
{p.x - c.x},
{p.y - c.y},
{p.z - c.z},
}));
PVector d = new PVector(dxyz[0][0], dxyz[1][0], dxyz[2][0]);
return d;
}
PVector applyPerspectiveTransform(PVector d) {
// e = displays surface pos relative to camera pinhole c
PVector e = new PVector(0, 0, 300);
return new PVector((e.z / d.z) * d.x + e.x, (e.z / d.z) * d.y + e.y);
}
void draw() {
background(255);
translate(width/2, height/2);
scale(1,-1);
noStroke();
fill(0, 100, 0, 50);
PVector[][] points_view = new PVector[vertices.length][];
for(int i=0; i < vertices.length; i++) {
points_view[i] = new PVector[vertices[i].length];
for(int j=0; j < vertices[i].length; j++)
points_view[i][j] = applyViewTransform(Rotate3d_y(vertices[i][j], angle));
}
// The following snippet I got from: https://stackoverflow.com/questions/74443149/3d-projection-axis-inversion-problem-java-processing?noredirect=1#comment131433616_74443149
float nearPlane = 1.0;
for (int c = 0; c < points_view.length; c++) {
beginShape();
for (int r = 0; r < points_view[c].length-1; r++) {
// Alle Punkte verbinden
//if (i == a) continue;
PVector p0 = points_view[c][r];
PVector p1 = points_view[c][r+1];
if(p0.z < nearPlane && p1.z < nearPlane){ continue; };
if(p0.z >= nearPlane && p1.z < nearPlane)
p1 = PVector.lerp(p0, p1, (p0.z - nearPlane) / (p0.z - p1.z));
if(p0.z < nearPlane && p1.z >= nearPlane)
p0 = PVector.lerp(p1, p0, (p1.z - nearPlane) / (p1.z - p0.z));
// project
p0 = applyPerspectiveTransform(p0);
p1 = applyPerspectiveTransform(p1);
vertex(p0.x, p0.y);
vertex(p1.x, p1.y);
}
endShape();
}
}
Ray tracing doesn't determine whether or not a polygon is visible. It determines what point (if any) on what polygon is visible in a given direction.
As a simplification: rasterisation works by taking a set of geometry and for each one determining what pixels it affects. Ray tracing works by taking a set of pixels and, for each one determining what geometry is visible along that direction.
With rasterisation, there are many ways of making sure that polygons don't draw in the wrong order. One approach is to sort them by distance to the camera, but that doesn't work with polygons that overlap. The usual approach is to use a z-buffer: when a polygon is rasterised, calculate the distance to the camera in each pixel, and only update the buffer if the new value is nearer to the camera than the old value.
With ray tracing, each ray returns the nearest hit location along a direction, along with what it hit. Since each pixel will only be visited once, you don't need to worry about triangles drawing on top of each other.
If you just want to project a piece of 3D geometry onto a plane, rasterisation will likely be much, much faster. At a very high level, do this:
create an RGBA buffer of size X*Y
create a z buffer of size X*Y and fill it with 'inf'
for each triangle:
project the triangle onto the projection plane
for each pixel the triangle might affect:
calculate distance from camera to the corresponding position on the triangle
if the distance is lower than the current value in the z buffer:
replace the value in the RGBA and z buffers with the new values
My query is to show programmatically, the fitting of many given non regular (but rectangular) cubes (i.e. boxes) of individually different sizes, inside a larger volume cube, such as a storage unit.
The mathematics part is understood. Like in Linear programming / linear algebra, we can add fit volume of all smaller cubes to find out the best fit for the volume of the larger cube.
The actual requirement is to show or allow this fitting graphically on a web-page, preferably in 3d. If possible, to allow user to interact with the fitting, i.e. shuffling the placement of the cubes, etc.
Also, since I am a Java developer by profession, Java or related languages / frameworks would be my choice. However, I can use any other technology / framework / language if the end results are met.
NB: Weight is also a concern (parameter). There is a maximum weight which can be stacked in any given storage unit.
Also, since storage units can be accessed without permission (by thieves), cost of the cubes stacked in one unit is also limited. The user may desire to fit cubes of higher cost in one unit which has higher security and vice versa.
Example: allow fitting many rectangular boxes containing household electronics in a given room. The boxes maybe of TVs, refrigerators, washing machines, dishwashers, playstations, xbox 360s, etc. The different dimensions of these boxes, is to give you an idea of what to expect while fitting to the limited volume.
If there is any FOSS library / project (or even non FOSS library or project) for the same, a pointer towards it would be welcome.
Disclaimer: Okay, I know it does not 100% answer your question and also the code it veeery old (as can be concluded from the old-fashioned CVS comments) and today I would not write it that way anymore. It does still run on Java 8, though, I tested it. But in addition to solving the little informatics challenge problem of water flowing through a 3D matrix of cuboids from top to bottom depending how "leaky" the matrix (symbolising some kind of Swiss cheese) is, it also uses some very simple 3D visualisation via Java 3D. Thus, you need to install Java 3D and put the corresponding libraries onto your classpath.
The 3D output looks something like this:
package vhs.bwinfo.cheese;
// $Id: Cuboid.java,v 1.1.2.1 2006/01/10 19:48:41 Robin Exp $
import javax.media.j3d.Appearance;
import javax.media.j3d.QuadArray;
import javax.media.j3d.Shape3D;
import javax.vecmath.Point3f;
import javax.vecmath.TexCoord2f;
import javax.vecmath.Vector3f;
public class Cuboid extends Shape3D {
private static final float POS = +0.5f;
private static final float NEG = -0.5f;
private static final Point3f[] POINTS = new Point3f[] {
new Point3f(NEG, NEG, NEG),
new Point3f(POS, NEG, NEG),
new Point3f(POS, NEG, POS),
new Point3f(NEG, NEG, POS),
new Point3f(NEG, POS, NEG),
new Point3f(POS, POS, NEG),
new Point3f(POS, POS, POS),
new Point3f(NEG, POS, POS)
};
private static final TexCoord2f[] TEX_COORDS = new TexCoord2f[] {
new TexCoord2f(0, 1),
new TexCoord2f(1, 1),
new TexCoord2f(1, 0),
new TexCoord2f(0, 0)
};
private static final int VERTEX_FORMAT =
QuadArray.COORDINATES |
QuadArray.NORMALS |
QuadArray.TEXTURE_COORDINATE_2;
public Cuboid(float scaleX, float scaleY, float scaleZ) {
Point3f[] points = new Point3f[8];
for (int i = 0; i < 8; i++)
points[i] = new Point3f(
POINTS[i].x * scaleX,
POINTS[i].y * scaleY,
POINTS[i].z * scaleZ
);
Point3f[] vertices = {
points[3], points[2], points[1], points[0], // bottom
points[4], points[5], points[6], points[7], // top
points[7], points[3], points[0], points[4], // left
points[6], points[5], points[1], points[2], // right
points[7], points[6], points[2], points[3], // front
points[5], points[4], points[0], points[1] // back
};
QuadArray geometry = new QuadArray(24, VERTEX_FORMAT);
geometry.setCoordinates(0, vertices);
for (int i = 0; i < 24; i++)
geometry.setTextureCoordinate(0, i, TEX_COORDS[i % 4]);
Vector3f normal = new Vector3f();
Vector3f v1 = new Vector3f();
Vector3f v2 = new Vector3f();
Point3f[] pts = new Point3f[4];
for (int i = 0; i < 4; i++)
pts[i] = new Point3f();
for (int face = 0; face < 6; face++) {
geometry.getCoordinates(face * 4, pts);
v1.sub(pts[0], pts[2]);
v2.sub(pts[1], pts[3]);
normal.cross(v1, v2);
normal.normalize();
for (int i = 0; i < 4; i++)
geometry.setNormal((face * 4 + i), normal);
}
setGeometry(geometry);
setAppearance(new Appearance());
}
public Cuboid(float scaleFactor) {
this(scaleFactor, scaleFactor, scaleFactor);
}
}
package vhs.bwinfo.cheese;
// $Id: LeakyCheese.java,v 1.2.2.2 2006/01/10 15:37:14 Robin Exp $
import com.sun.j3d.utils.applet.JMainFrame;
import javax.swing.*;
import java.util.Random;
import static java.lang.System.out;
public class LeakyCheese {
private int width = 20, height = 20, depth = 20;
private int numClasses = 100, samplesPerClass = 100;
private double pMin = 0, pMax = 1;
private double pDiff = pMax - pMin;
private double classSize = pDiff / numClasses;
private int[] stats;
enum CubeState {CHEESE, AIR, WATER}
final private CubeState[][][] cheese;
private static final Random RND = new Random();
public LeakyCheese(
int width, int height, int depth,
int numClasses, int samplesPerClass,
double pMin, double pMax
) {
this.width = width;
this.height = height;
this.depth = depth;
this.numClasses = numClasses;
this.samplesPerClass = samplesPerClass;
this.pMin = pMin;
this.pMax = pMax;
pDiff = pMax - pMin;
classSize = pDiff / numClasses;
cheese = new CubeState[width][height][depth];
}
public LeakyCheese(
int width, int height, int depth,
int numClasses, int samplesPerClass
) {
this(width, height, depth, numClasses, samplesPerClass, 0, 1);
}
public LeakyCheese() {
cheese = new CubeState[width][height][depth];
}
private boolean pourWater(int x, int y, int z) {
if (x < 0 || x >= width || y < 0 || y >= height || z < 0 || z >= depth)
return false;
if (cheese[x][y][z] != CubeState.AIR)
return false;
cheese[x][y][z] = CubeState.WATER;
boolean retVal = (y == 0);
retVal = pourWater(x + 1, y, z) || retVal;
retVal = pourWater(x - 1, y, z) || retVal;
retVal = pourWater(x, y + 1, z) || retVal;
retVal = pourWater(x, y - 1, z) || retVal;
retVal = pourWater(x, y, z + 1) || retVal;
retVal = pourWater(x, y, z - 1) || retVal;
return retVal;
}
private boolean isLeaky(double p) {
for (int x = 0; x < width; x++)
for (int y = 0; y < height; y++)
for (int z = 0; z < depth; z++)
cheese[x][y][z] = (RND.nextDouble() < p)
? CubeState.CHEESE
: CubeState.AIR;
boolean retVal = false;
for (int x = 0; x < width; x++)
for (int z = 0; z < depth; z++)
retVal = pourWater(x, height - 1, z) || retVal;
return retVal;
}
private void generateStats() {
if (stats != null)
return;
stats = new int[numClasses];
for (int i = 0; i < numClasses; i++) {
for (int j = 0; j < samplesPerClass; j++) {
double p = pMin + classSize * (RND.nextDouble() + i);
if (isLeaky(p))
stats[i]++;
}
}
}
public void printStats() {
generateStats();
out.println(
"p (cheese) | p (leaky)\n" +
"------------------+-----------"
);
for (int i = 0; i < numClasses; i++) {
out.println(
String.format(
"%1.5f..%1.5f | %1.5f",
pMin + classSize * i,
pMin + classSize * (i + 1),
(double) stats[i] / samplesPerClass
)
);
}
}
public static void main(String[] args) {
//new LeakyCheese().printStats();
//new LeakyCheese(40, 40, 40, 50, 100, 0.66, .71).printStats();
LeakyCheese cheeseBlock = new LeakyCheese(5, 20, 5, 20, 100);
//LeakyCheese cheeseBlock = new LeakyCheese(20, 20, 20, 20, 100);
while (!cheeseBlock.isLeaky(0.65))
;
out.println("*** required solution found - now rendering... ***");
JMainFrame f = new JMainFrame(new LeakyCheeseGUI(cheeseBlock.cheese), 512, 512);
f.setLocationRelativeTo(null);
f.setExtendedState(JFrame.MAXIMIZED_BOTH);
}
}
package vhs.bwinfo.cheese;
// $Id: LeakyCheeseGUI.java,v 1.1.2.1 2006/01/10 15:25:18 Robin Exp $
import com.sun.j3d.utils.applet.MainFrame;
import com.sun.j3d.utils.universe.SimpleUniverse;
import vhs.bwinfo.cheese.LeakyCheese.CubeState;
import javax.media.j3d.*;
import javax.vecmath.Point3d;
import javax.vecmath.Vector3f;
import java.applet.Applet;
import java.awt.*;
import java.util.Random;
public class LeakyCheeseGUI extends Applet {
static final long serialVersionUID = -8194627556699837928L;
public BranchGroup createSceneGraph(CubeState[][][] cheese) {
// Create the root of the branch graph
BranchGroup bgRoot = new BranchGroup();
// Composite of two rotations around different axes. The resulting
// TransformGroup is the parent of all our cheese cubes, because their
// orientation is identical. They only differ in their translation
// values and colours.
Transform3D tRotate = new Transform3D();
Transform3D tRotateTemp = new Transform3D();
tRotate.rotX(Math.PI / 8.0d);
tRotateTemp.rotY(Math.PI / -4.0d);
tRotate.mul(tRotateTemp);
TransformGroup tgRotate = new TransformGroup(tRotate);
bgRoot.addChild(tgRotate);
// Bounding sphere for rendering
BoundingSphere bounds =
new BoundingSphere(new Point3d(0.0, 0.0, 0.0), 100.0);
// Set background colour
// Note: Using Canvas3D.setBackground does not work, because it is an
// AWT method. Java 3D, though, gets its background colour from its
// background node (black, if not present).
Background background = new Background(0.5f, 0.5f, 0.5f);
background.setApplicationBounds(bounds);
bgRoot.addChild(background);
TransparencyAttributes transpAttr;
// Little cheese cubes
Appearance cheeseAppearance = new Appearance();
transpAttr =
new TransparencyAttributes(TransparencyAttributes.NICEST, 0.98f);
cheeseAppearance.setTransparencyAttributes(transpAttr);
cheeseAppearance.setColoringAttributes(
new ColoringAttributes(1, 1, 0, ColoringAttributes.NICEST));
PolygonAttributes pa = new PolygonAttributes();
//pa.setPolygonMode(PolygonAttributes.POLYGON_LINE);
pa.setCullFace(PolygonAttributes.CULL_NONE);
cheeseAppearance.setPolygonAttributes(pa);
// Little water cubes
Appearance waterAppearance = new Appearance();
transpAttr =
new TransparencyAttributes(TransparencyAttributes.NICEST, 0.85f);
waterAppearance.setTransparencyAttributes(transpAttr);
waterAppearance.setColoringAttributes(
new ColoringAttributes(0, 0, 1, ColoringAttributes.NICEST));
pa = new PolygonAttributes();
pa.setCullFace(PolygonAttributes.CULL_NONE);
waterAppearance.setPolygonAttributes(pa);
// Little air cubes
Appearance airAppearance = new Appearance();
transpAttr =
new TransparencyAttributes(TransparencyAttributes.NICEST, 0.95f);
airAppearance.setTransparencyAttributes(transpAttr);
airAppearance.setColoringAttributes(
new ColoringAttributes(1, 1, 1, ColoringAttributes.NICEST));
pa = new PolygonAttributes();
//pa.setPolygonMode(PolygonAttributes.POLYGON_LINE);
pa.setCullFace(PolygonAttributes.CULL_NONE);
airAppearance.setPolygonAttributes(pa);
// Water-coloured (i.e. blue) wire frame around cheese block, if leaky
Appearance waterWireFrameAppearance = new Appearance();
waterWireFrameAppearance.setColoringAttributes(
new ColoringAttributes(0, 0, 1, ColoringAttributes.NICEST));
pa = new PolygonAttributes();
pa.setPolygonMode(PolygonAttributes.POLYGON_LINE);
pa.setCullFace(PolygonAttributes.CULL_NONE);
waterWireFrameAppearance.setPolygonAttributes(pa);
// Cheese-coloured (i.e. yellow) wire frame around cheese block, if not leaky
Appearance cheeseWireFrameAppearance = new Appearance();
cheeseWireFrameAppearance.setColoringAttributes(
new ColoringAttributes(1, 1, 0, ColoringAttributes.NICEST));
pa = new PolygonAttributes();
pa.setPolygonMode(PolygonAttributes.POLYGON_LINE);
pa.setCullFace(PolygonAttributes.CULL_NONE);
cheeseWireFrameAppearance.setPolygonAttributes(pa);
// Absolute offsets for the cheese block to fit into the viewing canvas
final float xOffs = -0.8f;
final float yOffs = -0.55f;
final float zOffs = 0;
// Create all those little cubes ;-)
final int xSize = cheese.length;
final int ySize = cheese[0].length;
final int zSize = cheese[0][0].length;
final int maxSize = Math.max(xSize, Math.max(ySize, zSize));
final float xCenterOffs = 0.5f * (maxSize - xSize) / maxSize;
final float yCenterOffs = 0.5f * (maxSize - ySize) / maxSize;
final float zCenterOffs = -0.5f * (maxSize - zSize) / maxSize;
boolean isLeaky = false;
for (int x = 0; x < xSize; x++)
for (int y = 0; y < ySize; y++)
for (int z = 0; z < zSize; z++) {
Transform3D tTranslate = new Transform3D();
tTranslate.setTranslation(
new Vector3f(
xOffs + xCenterOffs + 1.0f * x / maxSize,
yOffs + yCenterOffs + 1.0f * y / maxSize,
zOffs + zCenterOffs - 1.0f * z / maxSize
)
);
TransformGroup tgTranslate = new TransformGroup(tTranslate);
tgRotate.addChild(tgTranslate);
Cuboid cube = new Cuboid(1.0f / maxSize);
switch (cheese[x][y][z]) {
case CHEESE:
cube.setAppearance(cheeseAppearance);
break;
case WATER:
cube.setAppearance(waterAppearance);
if (y == 0)
isLeaky = true;
break;
case AIR:
cube.setAppearance(airAppearance);
}
tgTranslate.addChild(cube);
}
// If cheese block is leaky, visualise it by drawing a water-coloured
// (i.e. blue) wire frame around it. Otherwise use a cheese-coloured
// (i.e. yellow) one.
Transform3D tTranslate = new Transform3D();
tTranslate.setTranslation(
new Vector3f(
xOffs + xCenterOffs + 0.5f * (xSize - 1) / maxSize,
yOffs + yCenterOffs + 0.5f * (ySize - 1) / maxSize,
zOffs + zCenterOffs - 0.5f * (zSize - 1) / maxSize
)
);
TransformGroup tgTranslate = new TransformGroup(tTranslate);
tgRotate.addChild(tgTranslate);
Cuboid cuboid = new Cuboid(
1.0f * xSize / maxSize,
1.0f * ySize / maxSize,
1.0f * zSize / maxSize
);
cuboid.setAppearance(isLeaky ? waterWireFrameAppearance : cheeseWireFrameAppearance);
tgTranslate.addChild(cuboid);
// Let Java 3D perform optimizations on this scene graph.
bgRoot.compile();
return bgRoot;
}
public LeakyCheeseGUI(CubeState[][][] cheese) {
// Create a simple scene and attach it to the virtual universe
GraphicsConfiguration graphCfg = SimpleUniverse.getPreferredConfiguration();
Canvas3D canvas = new Canvas3D(graphCfg);
setLayout(new BorderLayout());
add(canvas, "Center");
SimpleUniverse universe = new SimpleUniverse(canvas);
// This will move the ViewPlatform back a bit so the objects
// in the scene can be viewed.
universe.getViewingPlatform().setNominalViewingTransform();
universe.addBranchGraph(createSceneGraph(cheese));
}
public static void main(String[] args) {
final Random RND = new Random(System.currentTimeMillis());
CubeState[][][] testCheese = new CubeState[5][8][11];
for (int x = 0; x < 5; x++)
for (int y = 0; y < 8; y++)
for (int z = 0; z < 11; z++)
testCheese[x][y][z] = (RND.nextFloat() < 0.7f)
? CubeState.CHEESE
: (RND.nextBoolean() ? CubeState.WATER : CubeState.AIR);
// Applet can also run as a stand-alone application
new MainFrame(new LeakyCheeseGUI(testCheese), 512, 512);
}
}
You will probably want to use Javascript, and specifically WebGL. Javascript is the de facto language for interactive web pages, and WebGL is a Javascript API for rendering 2D and 3D scenes on an HTML5 canvas element. A solution using WebGL should be compatible with all major browsers. Programming even simple scenes in WebGL can be pretty involved though, so I'd recommend using a framework such as three.js to simplify things.
Here is an example of interactive, draggable cubes using three.js. Some of the key lines of code from that example are:
// create the cube
var object = new THREE.Mesh( geometry, new THREE.MeshLambertMaterial( { color: Math.random() * 0xffffff } ) );
// set coordinates, rotation, and scale of the cubes
object.position.x = ...
object.position.y = ...
object.position.z = ...
object.rotation.x = ...
object.rotation.y = ...
object.rotation.z = ...
object.scale.x = ...
object.scale.y = ...
object.scale.z = ...
// lighting stuff
object.castShadow = true;
object.receiveShadow = true;
// add to scene and list of objects
scene.add( object );
objects.push( object );
Again, the full, working example is found at this link (click view source on that page to view the code on github).
I am trying to extract user silhouette and put it above my images. I was able to make a mask and cut user from rgb image. But the contour is messy.
The question is how I can make the mask more precise (to fit real user). I've tried ERODE-DILATE filters, but they don't do much. Maybe I need some Feather filter like in Photoshop. Or I don't know.
Here is my code.
import SimpleOpenNI.*;
SimpleOpenNI context;
PImage mask;
void setup()
{
size(640*2, 480);
context = new SimpleOpenNI(this);
if (context.isInit() == false)
{
exit();
return;
}
context.enableDepth();
context.enableRGB();
context.enableUser();
context.alternativeViewPointDepthToImage();
}
void draw()
{
frame.setTitle(int(frameRate) + " fps");
context.update();
int[] userMap = context.userMap();
background(0, 0, 0);
mask = loadImage("black640.jpg"); //just a black image
int xSize = context.depthWidth();
int ySize = context.depthHeight();
mask.loadPixels();
for (int y = 0; y < ySize; y++) {
for (int x = 0; x < xSize; x++) {
int index = x + y*xSize;
if (userMap[index]>0) {
mask.pixels[index]=color(255, 255, 255);
}
}
}
mask.updatePixels();
image(mask, 0, 0);
mask.filter(DILATE);
mask.filter(DILATE);
PImage rgb = context.rgbImage();
rgb.mask(mask);
image(rgb, context.depthWidth() + 10, 0);
}
It's good you're aligning the RGB and depth streams.
There are few things that could be improved in terms of efficiency:
No need to reload a black image every single frame (in the draw() loop) since you're modifying all the pixels anyway:
mask = loadImage("black640.jpg"); //just a black image
Also, since you don't need the x,y coordinates as you loop through the user data, you can use a single for loop which should be a bit faster:
for(int i = 0 ; i < numPixels ; i++){
mask.pixels[i] = userMap[i] > 0 ? color(255) : color(0);
}
instead of:
for (int y = 0; y < ySize; y++) {
for (int x = 0; x < xSize; x++) {
int index = x + y*xSize;
if (userMap[index]>0) {
mask.pixels[index]=color(255, 255, 255);
}
}
}
Another hacky thing you could do is retrieve the userImage() from SimpleOpenNI, instead of the userData() and apply a THRESHOLD filter to it, which in theory should give you the same result as above.
For example:
int[] userMap = context.userMap();
background(0, 0, 0);
mask = loadImage("black640.jpg"); //just a black image
int xSize = context.depthWidth();
int ySize = context.depthHeight();
mask.loadPixels();
for (int y = 0; y < ySize; y++) {
for (int x = 0; x < xSize; x++) {
int index = x + y*xSize;
if (userMap[index]>0) {
mask.pixels[index]=color(255, 255, 255);
}
}
}
could be:
mask = context.userImage();
mask.filter(THRESHOLD);
In terms of filtering, if you want to shrink the silhouette you should ERODE and bluring should give you a bit of that Photoshop like feathering.
Note that some filter() calls take arguments (like BLUR), but others don't like the ERODE/DILATE morphological filters, but you can still roll your own loops to deal with that.
I also recommend having some sort of easy to tweak interface (it can be fancy slider or a simple keyboard shortcut) when playing with filters.
Here's a rough attempt at the refactored sketch with the above comments:
import SimpleOpenNI.*;
SimpleOpenNI context;
PImage mask;
int numPixels = 640*480;
int dilateAmt = 1;
int erodeAmt = 1;
int blurAmt = 0;
void setup()
{
size(640*2, 480);
context = new SimpleOpenNI(this);
if (context.isInit() == false)
{
exit();
return;
}
context.enableDepth();
context.enableRGB();
context.enableUser();
context.alternativeViewPointDepthToImage();
mask = createImage(640,480,RGB);
}
void draw()
{
frame.setTitle(int(frameRate) + " fps");
context.update();
int[] userMap = context.userMap();
background(0, 0, 0);
//you don't need to keep reloading the image every single frame since you're updating all the pixels bellow anyway
// mask = loadImage("black640.jpg"); //just a black image
// mask.loadPixels();
// int xSize = context.depthWidth();
// int ySize = context.depthHeight();
// for (int y = 0; y < ySize; y++) {
// for (int x = 0; x < xSize; x++) {
// int index = x + y*xSize;
// if (userMap[index]>0) {
// mask.pixels[index]=color(255, 255, 255);
// }
// }
// }
//a single loop is usually faster than a nested loop and you don't need the x,y coordinates anyway
for(int i = 0 ; i < numPixels ; i++){
mask.pixels[i] = userMap[i] > 0 ? color(255) : color(0);
}
//erode
for(int i = 0 ; i < erodeAmt ; i++) mask.filter(ERODE);
//dilate
for(int i = 0 ; i < dilateAmt; i++) mask.filter(DILATE);
//blur
mask.filter(BLUR,blurAmt);
mask.updatePixels();
//preview the mask after you process it
image(mask, 0, 0);
PImage rgb = context.rgbImage();
rgb.mask(mask);
image(rgb, context.depthWidth() + 10, 0);
//print filter values for debugging purposes
fill(255);
text("erodeAmt: " + erodeAmt + "\tdilateAmt: " + dilateAmt + "\tblurAmt: " + blurAmt,15,15);
}
void keyPressed(){
if(key == 'e') erodeAmt--;
if(key == 'E') erodeAmt++;
if(key == 'd') dilateAmt--;
if(key == 'D') dilateAmt++;
if(key == 'b') blurAmt--;
if(key == 'B') blurAmt++;
//constrain values
if(erodeAmt < 0) erodeAmt = 0;
if(dilateAmt < 0) dilateAmt = 0;
if(blurAmt < 0) blurAmt = 0;
}
Unfortunately I can't test with an actual sensor right now, so please use the concepts explained, but bare in mind the full sketch code isn't tested.
This above sketch (if it runs) should allow you to use keys to control the filter parameters (e/E to decrease/increase erosion, d/D for dilation, b/B for blur). Hopefully you'll get satisfactory results.
When working with SimpleOpenNI in general I advise recording an .oni file (check out the RecorderPlay example for that) of a person for the most common use case. This will save you some time on the long run when testing and will allow you to work remotely with the sensor detached. One thing to bare in mind, the depth resolution is reduced to half on recordings (but using a usingRecording boolean flag should keep things safe)
The last and probably most important point is about the quality of the end result. Your resulting image can't be that much better if the source image isn't easy to work with to begin with. The depth data from the original Kinect sensor isn't great. The Asus sensors feel a wee bit more stable, but still the difference is negligible in most cases. If you are going to stick to one of these sensors, make sure you've got a clear background and decent lighting (without too much direct warm light (sunlight, incandescent lightbulbs, etc.) since they may interfere with the sensor)
If you want a more accurate user cut and the above filtering doesn't get the results you're after, consider switching to a better sensor like KinectV2. The depth quality is much better and the sensor is less susceptible to direct warm light. This may mean you need to use Windows (I see there's a KinectPV2 wrapper available) or OpenFrameworks(c++ collections of libraries similar to Processing) with ofxKinectV2
I've tried built-in erode-dilate-blur in processing. But they are very inefficient. Every time I increment blurAmount in img.filter(BLUR,blurAmount), my FPS decreases by 5 frames.
So I decided to try opencv. It is much better in comparison. The result is satisfactory.
import SimpleOpenNI.*;
import processing.video.*;
import gab.opencv.*;
SimpleOpenNI context;
OpenCV opencv;
PImage mask;
int numPixels = 640*480;
int dilateAmt = 1;
int erodeAmt = 1;
int blurAmt = 1;
Movie mov;
void setup(){
opencv = new OpenCV(this, 640, 480);
size(640*2, 480);
context = new SimpleOpenNI(this);
if (context.isInit() == false) {
exit();
return;
}
context.enableDepth();
context.enableRGB();
context.enableUser();
context.alternativeViewPointDepthToImage();
mask = createImage(640, 480, RGB);
mov = new Movie(this, "wild.mp4");
mov.play();
mov.speed(5);
mov.volume(0);
}
void movieEvent(Movie m) {
m.read();
}
void draw() {
frame.setTitle(int(frameRate) + " fps");
context.update();
int[] userMap = context.userMap();
background(0, 0, 0);
mask.loadPixels();
for (int i = 0; i < numPixels; i++) {
mask.pixels[i] = userMap[i] > 0 ? color(255) : color(0);
}
mask.updatePixels();
opencv.loadImage(mask);
opencv.gray();
for (int i = 0; i < erodeAmt; i++) {
opencv.erode();
}
for (int i = 0; i < dilateAmt; i++) {
opencv.dilate();
}
if (blurAmt>0) {//blur with 0 amount causes error
opencv.blur(blurAmt);
}
mask = opencv.getSnapshot();
image(mask, 0, 0);
PImage rgb = context.rgbImage();
rgb.mask(mask);
image(mov, context.depthWidth() + 10, 0);
image(rgb, context.depthWidth() + 10, 0);
fill(255);
text("erodeAmt: " + erodeAmt + "\tdilateAmt: " + dilateAmt + "\tblurAmt: " + blurAmt, 15, 15);
}
void keyPressed() {
if (key == 'e') erodeAmt--;
if (key == 'E') erodeAmt++;
if (key == 'd') dilateAmt--;
if (key == 'D') dilateAmt++;
if (key == 'b') blurAmt--;
if (key == 'B') blurAmt++;
//constrain values
if (erodeAmt < 0) erodeAmt = 0;
if (dilateAmt < 0) dilateAmt = 0;
if (blurAmt < 0) blurAmt = 0;
}
I'm developing an android app which main purpose is detects an asked object in a scenario. To do this I'm using the SURF algorithm of OpenCV. I'm not having "good luck" with the detection because I don't know when an object is "found".
I obtain a frame with my device camera and I follow these steps to get objects' keypoints and descriptors:
Java Code
public void onSnapClick(View v) {
Imgproc.GaussianBlur(frameGray, frameGray, new Size(3, 3), 2);
Imgproc.Canny(frameGray, frameGray, 40, 120);
Imgproc.resize(frameGray, frameGray, new Size(320, 240));
FindFeatures(frameGray.getNativeObjAddr()); //JNI call
//Some code to store data in DB...
}
JNI call
double hessianThreshold=600;
int nOctaves=4;
int nOctaveLayers=2;
bool extended=true;
bool upright=false;
JNIEXPORT void JNICALL Java_es_ugr_reconocimiento_Juego_FindFeatures(JNIEnv* env, jobject, jlong addrGray) {
Mat& frameGray= *(Mat*) addrGray;
vector<KeyPoint> keyPoints;
Mat descriptores;
SurfFeatureDetector detector_Surf(hessianThreshold, nOctaves, nOctaveLayers, extended, upright);
SurfDescriptorExtractor extractor_Surf;
detector_Surf.detect(frameGray, keyPoints);
if (keyPoints.size() > 0)
extractor_Surf.compute(frameGray, keyPoints, descriptores);
}
Now I choose what object I want to find and I follow these steps to do that:
Java Code
public void onSearchClick(View v) {
Imgproc.GaussianBlur(frameGray, frameGray, new Size(3, 3), 2);
Imgproc.Canny(frameGray, frameGray, 40, 120);
Imgproc.resize(frameGray, frameGray, new Size(320, 240));
nObject = FindObjects(frameGray.getNativeObjAddr()); //JNI call
if (nObject = searchObject)
//draw frame with a rectangle around the found object in the scenario....
}
JNI call
double hessianThreshold=600;
int nOctaves=4;
int nOctaveLayers=2;
bool extended=true;
bool upright=false;
JNIEXPORT jint JNICALL Java_es_ugr_reconocimiento_Juego_FindObjects(JNIEnv* env, jobject, jlong addrGray) {
Mat& frameGray = *(Mat*) addrGray;
vector<KeyPoint> keyPoints_esc;
Mat descriptores_esc;
SurfFeatureDetector detector_Surf(hessianThreshold, nOctaves, nOctaveLayers, extended, upright);
SurfDescriptorExtractor extractor_Surf;
detector_Surf.detect(frameGray , keyPoints_esc);
if (keyPoints_esc.size() == 0) return -1;
extractor_Surf.compute(frameGray , keyPoints_esc, descriptores_esc);
if (descriptores_esc.rows() == 0) return -1;
for(int i=0;i<lstObjects.size();i++){
Mat descriptores_obj = lstDescriptors.at(i);
vector<KeyPoint> keyPoints_obj = lstKeyPoints.at(i);
FlannBasedMatcher matcher;
vector<vector<DMatch> > matches;
matcher.knnMatch(descriptores_obj, descriptores_esc, matches, 2);
// ----------------------------------------------------------------------
// Draw only "good" matches (i.e. whose distance is less than 2*min_dist,
// or a small arbitary value ( 0.02 ) in the event that min_dist is very
// small)
// PS.- radiusMatch can also be used here.
// ----------------------------------------------------------------------
vector<DMatch> good_matches;
//THIS LOOP IS SENSITIVE TO SEGFAULTS
for (int i = 0; i < min(descriptores_obj.rows - 1, (int) matches.size());i++){
if ( (matches[i][0].distance < 0.6 * (matches[i][1].distance)) &&
((int) matches[i].size() <= 2 && (int) matches[i].size() > 0) ) {
good_matches.push_back(matches[i][0]);
}
}
if (good_matches.size() >= nThreshold) {
vector < Point2f > obj;
vector < Point2f > scene;
for (int i = 0; i < good_matches.size(); i++) {
//-- Get the keypoints from the good matches
obj.push_back(keyPoints_obj[good_matches[i].queryIdx].pt);
scene.push_back(keyPoints_esc[good_matches[i].trainIdx].pt);
}
Mat H = findHomography(obj, scene, CV_RANSAC);
vector<Point2f> obj_corners(4);
obj_corners[0] = cvPoint(0, 0);
obj_corners[1] = cvPoint(240, 0);
obj_corners[2] = cvPoint(240, 320);
obj_corners[3] = cvPoint(0, 320);
vector<Point2f> scene_corners(4);
perspectiveTransform(obj_corners, scene_corners, H);
line(frameGray, scene_corners[0], scene_corners[1], Scalar(255, 0, 0), 4);
line(frameGray, scene_corners[1], scene_corners[2], Scalar(255, 0, 0), 4);
line(frameGray, scene_corners[2], scene_corners[3], Scalar(255, 0, 0), 4);
line(frameGray, scene_corners[3], scene_corners[0], Scalar(255, 0, 0), 4);
for (unsigned int i = 0; i < scene.size(); i++) {
const Point2f& kp = scene[i];
circle(frameGray, Point(kp.x, kp.y), 10, Scalar(255, 255, 255, 255));
}
return i; //position of the matched object
}
}
}
I don't know what threshold could be the best in this comparison
if (good_matches.size() >= nThreshold) // do findHomography...
I've been searching and almost every code I found contained the number 4 as nThreshold, but for me it's not working good. My code almost every time "find" an object.
Is there any other better way to do this? Like using different matcher or another threshold or trying to figure out if doing the homography is going to create something similar to a rectangle (i said this because sometimes it "find" something but drawing four lines that aren't building a rectangle).
Please make this following changes in your code
int nThreshold= 100;
if (good_matches.size() >= nThreshold)
{
continue; // This line is to prevent further steps of matching if there are too many good matches (Lot of ambiguous points results in false match)
}
vector < Point2f > obj;
vector < Point2f > scene;
for (int i = 0; i < good_matches.size(); i++) {
//-- Get the keypoints from the good matches
obj.push_back(keyPoints_obj[good_matches[i].queryIdx].pt);
scene.push_back(keyPoints_esc[good_matches[i].trainIdx].pt);
}
// Skip doing homography if the object and scene contains less than four points(cant draw a rectangle if less than 4 points, hence your program will crash here if you do not handle the exception)
if(obj.size() < 4 || scene.size() < 4)
{
continue;
}
Mat H = findHomography(obj, scene, CV_RANSAC);
I went through many questions in StackOverflow and able to develop small program to detect squares and rectangles correctly. This is my sample code
public static CvSeq findSquares(final IplImage src, CvMemStorage storage) {
CvSeq squares = new CvContour();
squares = cvCreateSeq(0, sizeof(CvContour.class), sizeof(CvSeq.class), storage);
IplImage pyr = null, timg = null, gray = null, tgray;
timg = cvCloneImage(src);
CvSize sz = cvSize(src.width(), src.height());
tgray = cvCreateImage(sz, src.depth(), 1);
gray = cvCreateImage(sz, src.depth(), 1);
// cvCvtColor(gray, src, 1);
pyr = cvCreateImage(cvSize(sz.width() / 2, sz.height() / 2), src.depth(), src.nChannels());
// down-scale and upscale the image to filter out the noise
// cvPyrDown(timg, pyr, CV_GAUSSIAN_5x5);
// cvPyrUp(pyr, timg, CV_GAUSSIAN_5x5);
// cvSaveImage("ha.jpg",timg);
CvSeq contours = new CvContour();
// request closing of the application when the image window is closed
// show image on window
// find squares in every color plane of the image
for (int c = 0; c < 3; c++) {
IplImage channels[] = { cvCreateImage(sz, 8, 1), cvCreateImage(sz, 8, 1), cvCreateImage(sz, 8, 1) };
channels[c] = cvCreateImage(sz, 8, 1);
if (src.nChannels() > 1) {
cvSplit(timg, channels[0], channels[1], channels[2], null);
} else {
tgray = cvCloneImage(timg);
}
tgray = channels[c];
// // try several threshold levels
for (int l = 0; l < N; l++) {
// hack: use Canny instead of zero threshold level.
// Canny helps to catch squares with gradient shading
if (l == 0) {
// apply Canny. Take the upper threshold from slider
// and set the lower to 0 (which forces edges merging)
cvCanny(tgray, gray, 0, thresh, 5);
// dilate canny output to remove potential
// // holes between edge segments
cvDilate(gray, gray, null, 1);
} else {
// apply threshold if l!=0:
cvThreshold(tgray, gray, (l + 1) * 255 / N, 255,
CV_THRESH_BINARY);
}
// find contours and store them all as a list
cvFindContours(gray, storage, contours, sizeof(CvContour.class), CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE);
CvSeq approx;
// test each contour
while (contours != null && !contours.isNull()) {
if (contours.elem_size() > 0) {
approx = cvApproxPoly(contours, Loader.sizeof(CvContour.class), storage, CV_POLY_APPROX_DP, cvContourPerimeter(contours) * 0.02, 0);
if (approx.total() == 4 && Math.abs(cvContourArea(approx, CV_WHOLE_SEQ, 0)) > 1000 && cvCheckContourConvexity(approx) != 0) {
double maxCosine = 0;
for (int j = 2; j < 5; j++) {
// find the maximum cosine of the angle between
// joint edges
double cosine = Math.abs(angle(
new CvPoint(cvGetSeqElem(
approx, j % 4)),
new CvPoint(cvGetSeqElem(
approx, j - 2)),
new CvPoint(cvGetSeqElem(
approx, j - 1))));
maxCosine = Math.max(maxCosine, cosine);
}
if (maxCosine < 0.2) {
CvRect x = cvBoundingRect(approx, l);
if ((x.width() * x.height()) < 50000) {
System.out.println("Width : " + x.width()
+ " Height : " + x.height());
cvSeqPush(squares, approx);
}
}
}
}
contours = contours.h_next();
}
contours = new CvContour();
}
}
return squares;
}
I use this image to detect rectangles and squares
I need to identify the following output
and
But when I run the above code, it detects only the following rectangles. But I don't know the reason for that. Please can someone explain the reason for that.
This is the output that I got.
Please be kind enough to explain the problem in above code and give some suggensions to detect this squares and rectangles.
Given a mask image (binary image, like your second figure), cvFindContours() gives you the contours (several list of points).
look at this link: http://dasl.mem.drexel.edu/~noahKuntz/openCVTut7.html