use weka with java for prediction on test set - java

I am trying to get the predictions on test set using evaluateModel function, however evaluation.evaluateModel(classifier, newTest,output) throws an exception.
Exception in thread "main" weka.core.WekaException: No dataset
structure provided!
import weka.classifiers.Evaluation;
import weka.core.Attribute;
import weka.core.Instances;
import weka.core.converters.ConverterUtils.DataSource;
import weka.classifiers.Evaluation;
import weka.core.converters.ConverterUtils.DataSource;
import weka.attributeSelection.CfsSubsetEval;
import weka.attributeSelection.ASSearch;
import weka.attributeSelection.BestFirst;
import weka.classifiers.functions.LinearRegression;
import weka.classifiers.meta.AttributeSelectedClassifier;
import weka.filters.supervised.attribute.AttributeSelection;
import weka.classifiers.evaluation.output.prediction.CSV;
public void evaluateTest() throws Exception
{
DataSource train = new DataSource(trainingData.toString());
Instances traininstances = train.getDataSet();
Attribute attr=traininstances.attribute("regressionLabel");
int trainindex=attr.index();
traininstances.setClassIndex(trainindex);
DataSource test = new DataSource(testData.toString());
Instances testinstances = test.getDataSet();
Attribute testattr=testinstances.attribute(regressionLabel);
int testindex=testattr.index();
testinstances.setClassIndex(testindex);
AttributeSelection filter = new AttributeSelection();
weka.classifiers.AbstractClassifier classifier ;
filter.setSearch(this.search);
filter.setEvaluator(this.eval);
filter.setInputFormat(traininstances); // initializing the filter once with training set
Instances newTrain = AttributeSelection.useFilter( traininstances, filter); // configures the Filter based on train instances and returns filtered instances
Instances newTest = AttributeSelection.useFilter(testinstances, filter);
classifier= new LinearRegression();
classifier.buildClassifier(newTrain);
StringBuffer buffer = new StringBuffer();
CSV output = new CSV();
output.setBuffer(buffer);
output.setOutputFile(predictFile);
Evaluation evaluation = new Evaluation(newTrain);
evaluation.evaluateModel(classifier, newTest,output);
}
The same thing works with evaluation.crossValidateModel.

Related

Converting Shape File to RDF document, in Java

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import javax.management.AttributeChangeNotification;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.RDFNode;
import org.apache.jena.rdf.model.RDFReaderI;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.rdf.model.StmtIterator;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.system.StreamRDFWriter;
import org.apache.jena.vocabulary.VCARD;
import org.geotools.data.DataStore;
import org.geotools.data.DataStoreFinder;
import org.geotools.data.DataUtilities;
import org.geotools.data.FeatureSource;
import org.geotools.data.FileDataStore;
import org.geotools.data.FileDataStoreFinder;
import org.geotools.data.Query;
import org.geotools.data.ServiceInfo;
import org.geotools.data.shapefile.ShapefileDataStore;
import org.geotools.data.simple.SimpleFeatureCollection;
import org.geotools.data.simple.SimpleFeatureIterator;
import org.geotools.data.simple.SimpleFeatureSource;
import org.geotools.feature.FeatureCollection;
import org.geotools.feature.FeatureIterator;
import org.geotools.swing.data.JFileDataStoreChooser;
import org.opengis.feature.ComplexAttribute;
import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;
import org.opengis.feature.type.FeatureType;
import org.opengis.filter.Filter;
public class ShpToRdf {
public static void main(String[] args) throws IOException {
ArrayList<String> names = new ArrayList<String>();
ArrayList<String> values = new ArrayList<String>();
File file = JFileDataStoreChooser.showOpenFile("shp", null);
if (file == null) {
return;
}
FileDataStore myData = FileDataStoreFinder.getDataStore(file);
SimpleFeatureSource source = myData.getFeatureSource();
SimpleFeatureType schema = source.getSchema();
Query query = new Query(schema.getTypeName());
query.setMaxFeatures(100);
Model model = ModelFactory.createDefaultModel();
String shpURI = "http://www.shp.fake/";
Resource shapeFile = model.createResource(shpURI);
FeatureCollection<SimpleFeatureType, SimpleFeature> collection = source.getFeatures(query);
try (FeatureIterator<SimpleFeature> features = collection.features()) {
while (features.hasNext()) {
SimpleFeature feature = features.next();
model.setNsPrefix("shp", shpURI);
for (org.opengis.feature.Property attribute : feature.getProperties()) {
names.add(attribute.getName().toString());
values.add(attribute.getValue().toString());
}
}
}
ArrayList<Integer> ids = new ArrayList<Integer>();
for(int i=0; i<names.size();i++) {
if (names.get(i).equals("Id")) {
ids.add(i);
}
}
Property features = model.createProperty(shpURI,"features");
for(int i = 0; i<ids.size();i++) {
Property id = model.createProperty(shpURI,names.get(ids.get(i)));
shapeFile = model.createResource(shpURI)
.addProperty(features, model.createResource()
.addProperty(id,model.createResource()
.addProperty(id, values.get(ids.get(i)))
.addProperty(features, "feature1")
.addProperty(features, "feature2")
.addProperty(features, "feature3")));
}
RDFDataMgr.write(System.out, model, Lang.RDFXML);
}
}
I am trying to create an application that converts Shape File(shp) to RDF.
The problem is that I can get two ArrayLists from the shp. The one has the names of the values (id,name,geometry etc.), and the other has the values.
To create the RDF, I have to match each Id with the matching values(ex. Id =1 has name = road 1, geometry = line etc.)
Could you help me with this?
Thank you!
I think you should be able to do this by tweaking the following bit of logic
for (org.opengis.feature.Property attribute : feature.getProperties()) {
names.add(attribute.getName().toString());
values.add(attribute.getValue().toString());
}
Instead of putting them in two lists, you can put them in a list of pairs. This way when you iterate over the list, you know the mapping between the subject and object.
It should look something similar to
List<Pair<String, Integer>> contentList = new ArrayList<Pair<String, String>>();
for (org.opengis.feature.Property attribute : feature.getProperties()) {
Pair<String, Integer> subjectObjectPairs = new Pair<String, String>(attribute.getName().toString(), attribute.getValue().toString());
contentList.add(subjectObjectPairs);
}
I'm not sure what the ids ArrayList is for, but you could move that logic into the for loop above to make sure you're only getting identifiers.

Adding values in HBase using Put.add() method

I'm writing a simple Java client code to add values in HBase table. I'm using put.add(byte[] columnFamily, byte[] columnQualifier, byte[] value), but this method is deprecated in new HBase API. Can anyone please help in what is the way of doing it using new Put API?
Using maven I have downloaded jar for HBase version 1.2.0.
I'm using the following code :
package com.NoSQL;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
public class PopulatingData {
public static void main(String[] args) throws IOException{
String table = "Employee";
Logger.getRootLogger().setLevel(Level.WARN);
Configuration conf = HBaseConfiguration.create();
Connection con = ConnectionFactory.createConnection(conf);
Admin admin = con.getAdmin();
if(admin.tableExists(TableName.valueOf(table))) {
Table htable = con.getTable(TableName.valueOf(table));
/*********** adding a new row ***********/
// adding a row key
Put p = new Put(Bytes.toBytes("row1"));
p.add(Bytes.toBytes("ContactDetails"), Bytes.toBytes("Mobile"), Bytes.toBytes("9876543210"));
p.add(Bytes.toBytes("ContactDetails"), Bytes.toBytes("Email"), Bytes.toBytes("abhc#gmail.com"));
p.add(Bytes.toBytes("Personal"), Bytes.toBytes("Name"), Bytes.toBytes("Abhinav Rawat"));
p.add(Bytes.toBytes("Personal"), Bytes.toBytes("Age"), Bytes.toBytes("21"));
p.add(Bytes.toBytes("Personal"), Bytes.toBytes("Gender"), Bytes.toBytes("M"));
p.add(Bytes.toBytes("Employement"), Bytes.toBytes("Company"), Bytes.toBytes("UpGrad"));
p.add(Bytes.toBytes("Employement"), Bytes.toBytes("DOJ"), Bytes.toBytes("11:06:2018"));
p.add(Bytes.toBytes("Employement"), Bytes.toBytes("Designation"), Bytes.toBytes("ContentStrategist"));
htable.put(p);
/**********************/
System.out.print("Table is Populated");`enter code here`
}else {
System.out.println("The HBase Table named "+table+" doesn't exists.");
}
System.out.println("Returnning Main");
}
}
Use addColumn() method :
Put put = new Put(Bytes.toBytes(rowKey));
put.addColumn(NAME_FAMILY, NAME_COL_QUALIFIER, name);
Please refer more details in below javadoc :
https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/client/Put.html

How to create an attribute in Weka

I working on a data mining project using WEKA in Java and the instructions says that I have to create an Attribute object for each attribute in the dataset and add them to a FastVector. I try to look at the API but I don't think I'm doing it right can someone show me the right way to do it. I'm using the iris.arff file
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import weka.core.Attribute;
import weka.core.FastVector;
import weka.core.Instances;
import weka.core.converters.ArffSaver;
public class StartWeka {
public static void main(String[]args)throws Exception{
Instances dataset = new Instances(new BufferedReader(new FileReader("C:/Users/Student/workspace/Data Mining/src/iris.arff.txt")));
Instances train = new Instances(dataset);
train.setClassIndex(train.numAttributes()-1);
System.out.println(dataset.toSummaryString());
Attribute a1 = new Attribute("sepallength", 0);
Attribute a2 = new Attribute("sepalwidth", 1);
Attribute a3 = new Attribute("petalwidth", 2);
FastVector attrs = new FastVector();
attrs.addElement(a1);
}
}
FastVector is deprecated. You can use an ArrayList instead.
If you use an arff file, however, you don't have to do any of that. You can just do the following:
ArffLoader loader = new ArffLoader();
loader.setFile(new File("iris.arff");
Instances structure = loader.getStructure();
structure.setClassIndex(structure.numAttributes() - 1);
From here, you can create a classifier based on your instances. (structure).

How to run particular Test step of soapUi in java

I want to run particular testStep of my testcase of soap ui using java code. My problem is when I try to run at test step level it need argument of TestCase runner which is anonymous inner type and TestCaseRunContext which is interface. Do I have to implement both to run the same? if yes can please any sample how to do that??
here's my code
package com.testauto.soaprunner.soap.impl;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.eviware.soapui.SoapUI;
import com.eviware.soapui.StandaloneSoapUICore;
import com.eviware.soapui.impl.wsdl.WsdlProject;
import com.eviware.soapui.impl.wsdl.WsdlTestSuite;
import com.eviware.soapui.impl.wsdl.testcase.WsdlTestCase;
import com.eviware.soapui.impl.wsdl.testcase.WsdlTestCaseRunner;
import com.eviware.soapui.impl.wsdl.teststeps.WsdlTestStep;
import com.eviware.soapui.model.TestPropertyHolder;
import com.eviware.soapui.model.iface.MessageExchange;
import com.eviware.soapui.model.propertyexpansion.PropertyExpansionUtils;
import com.eviware.soapui.model.testsuite.TestCase;
import com.eviware.soapui.model.testsuite.TestCaseRunContext;
import com.eviware.soapui.model.testsuite.TestProperty;
import com.eviware.soapui.model.testsuite.TestStepResult;
import com.eviware.soapui.model.testsuite.TestSuite;
import com.eviware.soapui.support.types.StringToObjectMap;
import com.eviware.soapui.support.types.StringToStringsMap;
import com.testauto.soaprunner.data.InputData;
import com.testauto.soaprunner.data.ReportData;
public class RunTestImpl{
static Logger logger = LoggerFactory.getLogger(RunTestImpl.class);
List<ReportData> reportDatList=new ArrayList<ReportData>();
public List<ReportData> process(Map<String, String> readDataMap, InputData input, Map<List<String>, String> configurationMap, List<String> configuration, WsdlTestSuite testSuite)
{
List<ReportData> report = new ArrayList<ReportData>();
logger.info("Into the Class for running test cases");
try{
report= getTestSuite(readDataMap,input,configurationMap,configuration,testSuite);
}
catch(Exception e)
{
logger.info(e.getMessage());
}
return report;
}
private List<ReportData> getTestSuite(Map<String, String> readDataMap, InputData input, Map<List<String>, String> configurationMap, List<String> configuration, WsdlTestSuite testSuite) throws Exception {
ReportData report=new ReportData();
logger.info("Into the Class for running test cases");
String suiteName = "";
String reportStr = "";
List<String> testCaseNameList= setPropertyValues(readDataMap,input);
WsdlTestCaseRunner runner = null;
List<TestSuite> suiteList = new ArrayList<TestSuite>();
List<TestCase> caseList = new ArrayList<TestCase>();
SoapUI.setSoapUICore(new StandaloneSoapUICore(true));
System.out.println("testcase name "+ configurationMap.get(configuration));
// WsdlTestCase testCase= testSuite.getTestCaseByName(input.getApiName()+"_"+testCaseName+"_TestCase");
WsdlTestCase testCase= testSuite.getTestCaseByName("my_TESTCASE");
WsdlTestStep tesStep=testCase.getTestStepByName(configurationMap.get(testCaseNameList));
System.out.println("test case name:"+testCase.getName());
report.setTestCase(testCase.getName());
suiteList.add(testSuite);
runner= tesStep.run(?,?);
return reportDatList;
}
private List<String> setPropertyValues(Map<String, String> readDataMap, InputData input) {
String testCaseName="";
TestPropertyHolder holder = PropertyExpansionUtils.getGlobalProperties();
List<String> dataConfigurationList=new ArrayList<String>();
Iterator entries = readDataMap.entrySet().iterator();
while (entries.hasNext()) {
Entry thisEntry = (Entry) entries.next();
String key = (String) thisEntry.getKey();
String value = (String) thisEntry.getValue();
testCaseName+=key;
holder.setPropertyValue(key, holder.getPropertyValue(key));
dataConfigurationList.add(key);
}
System.out.println("testCaseName"+testCaseName);
return dataConfigurationList;
}
}
}
After trying different things I got something like this.
TestCaseRunContext context = new MockTestRunContext(new MockTestRunner(testStep.getTestCase()), testStep);
MockTestRunner runner = new MockTestRunner(testStep.getTestCase());
TestStepResult testStepResult= testStep.run(runner, context);
I don't know how it works this trick worked for me. if someone know the reason behind this please share

How to replace variables in the header and in tables with docx4j?

I'm trying to replace variables in the header of a document and in tables but I don't know how to proceed. I managed to replace variables in the body of the document but this method (using ${}) does not work for the headers and tables.
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.HashMap;
import java.util.List;
import org.docx4j.XmlUtils;
import org.docx4j.customxml.ObjectFactory;
import org.docx4j.dml.wordprocessingDrawing.Inline;
import org.docx4j.jaxb.Context;
import org.docx4j.model.datastorage.migration.VariablePrepare;
import org.docx4j.model.structure.HeaderFooterPolicy;
import org.docx4j.model.structure.SectionWrapper;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.CustomXmlDataStoragePart;
import org.docx4j.openpackaging.parts.Part;
import org.docx4j.openpackaging.parts.PartName;
import org.docx4j.openpackaging.parts.Parts;
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage;
import org.docx4j.openpackaging.parts.WordprocessingML.HeaderPart;
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
import org.docx4j.utils.BufferUtil;
import org.docx4j.wml.Hdr;
import org.docx4j.wml.HdrFtrRef;
import org.docx4j.wml.HeaderReference;
import java.util.Locale;
import javax.xml.bind.JAXBElement;
import java.text.DateFormat;
import org.docx4j.openpackaging.parts.WordprocessingML.HeaderPart;
import org.docx4j.wml.HdrFtrRef;
public class EditInvoice {
private static WordprocessingMLPackage template;
private static ObjectFactory factory;
public static void main (String[] args) throws Exception {
boolean save = true;
String outputfilepath = System.getProperty("user.dir")+ "/InvoiceEdited.docx";
java.util.Date uDate = new java.util.Date();
java.sql.Date sDate = new java.sql.Date(System.currentTimeMillis());
sDate = new java.sql.Date(uDate.getTime());
uDate = new java.util.Date(sDate.getTime());
Locale locale = Locale.getDefault();
DateFormat dateFormat = DateFormat.getDateInstance(DateFormat.FULL, locale);
//System.out.println(dateFormat.format(sDate));
template = WordprocessingMLPackage.load(new FileInputStream(new File("invoice_template_sample.docx")));
VariablePrepare.prepare(template);
List<SectionWrapper> sectionWrappers = template.getDocumentModel().getSections();
MainDocumentPart documentPart = template.getMainDocumentPart();
HashMap<String, String> variables = new HashMap<String, String>();
// populate doc variables
variables.put("Name", "John Doe");
variables.put("Phone", "(123) 456 78 90");
variables.put("CompanyName", "BSI Business Systems Integration AG");
variables.put("Email", "john.doe#bsiag.com");
variables.put("CompanyAddress", "Täfernstrasse 16a, 5405 Baden");
variables.put("InvoiceNo", "No. 2013-007");
variables.put("InvoiceDate", dateFormat.format(sDate));
variables.put("BillingName", "Jane Smith");
variables.put("PayableToName", "John Doe, BSI");
variables.put("SubTotal", "$1,530.00");
variables.put("SalesTax", "$229.50");
variables.put("Shipping", "$250.00");
variables.put("Total", "$2,009.50");
// and content for embedded table
Object[][] orderItems = new Object[][]{
new Object[]{"1", "Table", "$800.00", "$800.00"},
new Object[]{"4", "Chair", "$150.00", "$600.00"},
new Object[]{"1", "Assembling", "$130.00", "$130.00"},
};
try
{
documentPart.variableReplace(variables);
//documentPart.addObject(orderItems);
}
catch (Exception e)
{
System.out.println(e);
}
if (save) {
template.save(new java.io.File(outputfilepath) );
} else {
System.out.println(XmlUtils.marshaltoString(documentPart.getContents(), true, true));
}
}
}
To replace variables in headers, you need to do variable replacement to the relevant header parts. Here, you're only doing it in the main document part.
Regarding tables, the variable replacement stuff isn't designed to duplicate rows (eg one row per invoice line item). In other words, it won't insert rows. So without more code on your part, your Object[][] orderItems won't do anything.
(In contrast, docx4j's XML data binding does handle that, using an OpenDoPE od:repeat)

Categories

Resources