I am getting the below error while running the program. I am trying to find the average of the Dstream in the format (name,avg).
'The method combineByKey(Function, Function2,
Function2, Partitioner) in the type
JavaPairDStream is not applicable for the arguments
(Function,
Function2,
Function2)'
Please help.
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaReceiverInputDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.log4j.*;
import org.json.JSONArray;
import org.json.JSONObject;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import java.util.Map;
import java.util.Map.Entry;
import scala.Tuple2;
public class FirstSparkApplication {
#SuppressWarnings("serial")
public static class AvgCount implements java.io.Serializable {
public AvgCount(double total, int num) {
total_ = total;
num_ = num;
}
public double total_;
public int num_;
public double avg() {
return total_ / (double) num_;
}
}
public static void main(String[] args) throws InterruptedException {
SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("FirstSparkApplication");
JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(20));
Logger.getRootLogger().setLevel(Level.ERROR);
Function2<Double, Double, Double> reduceFunc = new Function2<Double, Double,
Double>() {
public Double call(Double result, Double value)
throws Exception {
System.out.println("Reduce running");
System.out.println(result + "+" + value);
return result + value;
}
};
JavaDStream<String> lines = jssc.textFileStream("/home/dominic/Downloads/DATADIR").cache();
final String[] path = new String[]{ "/home/dominic/Downloads/OUTPUTDIR"};
JavaPairDStream<String, Double> pair = lines.flatMapToPair(
new PairFlatMapFunction<String, String, Double>() {
private static final long serialVersionUID = 67676744;
public Iterator<Tuple2<String, Double>> call(String t) throws Exception {
List<Tuple2<String, Double>> list = new ArrayList<Tuple2<String, Double>>();
JSONArray js1 = new JSONArray(t);
for (int i = 0; i < js1.length(); i++) {
String symbol = js1.getJSONObject(i).get("symbol")
.toString();
JSONObject jo = new JSONObject(js1.getJSONObject(i)
.get("priceData").toString());
list.add(new Tuple2<String, Double>(symbol,jo.getDouble("close")));
}
return list.iterator();
}
});
JavaPairDStream<String, Double> result=pair.reduceByKeyAndWindow(reduceFunc, Durations.seconds(100), Durations.seconds(60));
pair.print();
result.print();
//Average
Function<Double, AvgCount> createAcc = new Function<Double, AvgCount>() {
public AvgCount call(Double x) {
return new AvgCount(x, 1);
}
};
Function2<AvgCount, Double, AvgCount> addAndCount = new Function2<AvgCount, Double, AvgCount>() {
public AvgCount call(AvgCount a, Double x) {
a.total_ += x;
a.num_ += 1;
return a;
}
};
Function2<AvgCount, AvgCount, AvgCount> combine = new Function2<AvgCount, AvgCount, AvgCount>() {
public AvgCount call(AvgCount a, AvgCount b) {
a.total_ += b.total_;
a.num_ += b.num_;
return a;
}
};
AvgCount initial = new AvgCount(0,0);
JavaPairDStream<String, AvgCount> avgCounts = result.combineByKey(createAcc, addAndCount, combine);
// Map<String, AvgCount> countMap = avgCounts.collectAsMap();
// for (Entry<String, AvgCount> entry : countMap.entrySet()) {
// System.out.println(entry.getKey() + ":" + entry.getValue().avg());
jssc.start();
jssc.awaitTermination();
jssc.close();
}
}
You're missing a Partitioner implementation in the call to combineByKey. That's what the compiler is telling you.
result.combineByKey(createAcc, addAndCount, combine /*, Partitioner here */);
It's a good habit to check the appropriate documentation once you get these kind of errors. See Partitioner.
Related
I would like to group specific files based on their file names from multiple paths. I have followed this stackoverflow link. I have not been able to loop through each file after I start streaming the path to find that specific file name.
Here are the paths with files contents:
/var/tmp/data_sample1/data2_first_example.set.csv
/var/tmp/data_sample1/data3_first_example.set.csv
/var/tmp/data_sample1/data1_first_example.set.csv
/var/tmp/data_sample2/data2_second_example.set.csv
/var/tmp/data_sample2/data1_second_example.set.csv
/var/tmp/data_sample2/data3_second_example.set.csv
/tmp/csv_files/data_sample3/data2_third_example.set.csv
/tmp/csv_files/data_sample3/data1_third_example.set.csv
/tmp/csv_files/data_sample3/data3_third_example.set.csv
Enum Class:
enum PersonType {
A,
B
}
FileName.java
import java.util.Arrays;
import java.util.List;
public class FileName {
private final String first = "_first_sample";
private final String second = "_second_sample";
private final String third = "_third_sample";
private final List<String> filenames = Arrays.asList(first, second, third);
public List<String> getFilenames() {
return filenames;
}
}
CSVFiles.java
import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;
import java.util.stream.Collectors;
public class CSVFiles {
private PersonType personType;
private List<String> fileNames = new ArrayList<>();
private List<File> firstSample = new ArrayList<>();
private List<File> secondSample = new ArrayList<>();
private List<File> thirdSample = new ArrayList<>();
public CSVFiles(PersonType personType, List<String> paths) {
if (personType == PersonType.A) {
this.personType = personType;
FileName fileName = new FileName();
this.fileNames = fileName.getFilenames();
setCSVFiles(paths);
}
}
public List<File> setCSVFiles(List<String> paths) {
List<Path> collect = paths.stream()
.flatMap(path -> {
try {
return Files.find(Paths.get(path), Integer.MAX_VALUE,
(p, attrs) -> attrs.isRegularFile()
&& p.toString().contains(".set")
&& p.toString().endsWith(".csv")
);
} catch (IOException ex) {
throw new UncheckedIOException(ex);
}
}).collect(Collectors.toList());
return collect.stream()
.map(Path::toFile)
.filter(file -> {
if (file.getName().contains("_first_sample")) {
firstSample.add(file);
return true;
} else if (file.getName().contains("_second_sample")) {
secondSample.add(file);
return true;
} else if (file.getName().contains("_third_sample")) {
thirdSample.add(file);
return true;
}
return false;
})
.collect(Collectors.toList());
}
}
CSVFilesTest.java
import org.junit.Test;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
public class CSVFilesTest {
#Test
public void test() {
String data_sample1 = "/var/tmp/data_sample1";
String data_sample2 = "/var/tmp/data_sample2";
String data_sample3 = "/tmp/csv_files/data_sample3";
List<String> paths = Arrays.asList(data_sample1, data_sample2, data_sample3);
System.out.println(paths);
CSVFiles csvFiles = new CSVFiles(PersonType.A, paths);
}
}
Desired Output:
firstSample: [data1_first_example.set.csv, data2_first_example.set.csv, data3_first_example.set.csv]
secondSample: [data1_second_example.set.csv, data2_second_example.set.csv, data3_second_example.set.csv]
thirdSample: [data1_third_example.set.csv, data2_third_example.set.csv, data3_third_example.set.csv]
Any feedback is appreciated!
Solution thanks to "sync it" comments:
public Map<String, List<String>> setCSVFiles(List<String> paths) {
List<Path> collect = paths.stream()
.flatMap(path -> {
try {
return Files.find(Paths.get(path), Integer.MAX_VALUE,
(p, attrs) -> attrs.isRegularFile()
&& p.toString().contains(".set")
&& p.toString().endsWith(".csv")
);
} catch (IOException ex) {
throw new UncheckedIOException(ex);
}
}).collect(Collectors.toList());
return collect.stream()
.map(Path::toString)
.collect(Collectors.groupingBy(path ->
path.substring(path.lastIndexOf("/")+1)
));
}
I have a random number of coordinates for a polygon taken from a shapefile.
-119.00072399999999 35.36158, -118.99903 35.361576, -118.999026 35.362579, -118.999023 35.363482, -118.999019 35.36432, -118.999408 35.364847999999995, -118.999406 35.365564, -118.999402 35.366516, -118.999398 35.367467999999995, -118.999394 35.368438, -118.999256 35.368438, -118.998232 35.368441
I now have to check if a point (33.63705, -112.17563) is inside this polygon.
My concern is that, my coordinates doesn't fit into an int datatype.
Here is what I have tried:
import java.awt.Polygon;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.geotools.data.DataStore;
import org.geotools.data.DataStoreFinder;
import org.geotools.data.simple.SimpleFeatureCollection;
import org.geotools.data.simple.SimpleFeatureIterator;
import org.geotools.data.simple.SimpleFeatureSource;
import org.geotools.feature.DefaultFeatureCollection;
import org.geotools.feature.simple.SimpleFeatureBuilder;
import org.geotools.feature.simple.SimpleFeatureTypeBuilder;
import org.geotools.geometry.jts.JTSFactoryFinder;
import org.geotools.referencing.crs.DefaultGeographicCRS;
import org.locationtech.jts.geom.Coordinate;
import org.locationtech.jts.geom.GeometryFactory;
import org.locationtech.jts.geom.Point;
import org.opengis.feature.simple.SimpleFeature;
import org.opengis.feature.simple.SimpleFeatureType;
public class ReadShapeFile {
public static void main(String[] args) {
File file = new File("D:\\shapefile201806\\tl_2018_06_bg.shp");
try {
Map<String, String> connect = new HashMap<String, String>();
connect.put("url", file.toURI().toString());
DataStore dataStore = DataStoreFinder.getDataStore(connect);
String[] typeNames = dataStore.getTypeNames();
String typeName = typeNames[0];
System.out.println("Reading content : " + typeName);
SimpleFeatureSource featureSource = dataStore.getFeatureSource(typeName);
SimpleFeatureCollection collection = featureSource.getFeatures();
SimpleFeatureIterator iterator = collection.features();
try {
while (iterator.hasNext()) {
SimpleFeature feature = iterator.next();
String featureString = feature.toString();
List<String> polygonList = new ArrayList<String>();
String polygonCoordinates = StringUtils.substringBetween(featureString, "(((", ")))");
System.out.println(polygonCoordinates);
polygonList = Arrays.asList(polygonCoordinates.split(","));
SimpleFeatureTypeBuilder b = new SimpleFeatureTypeBuilder();
b.setName("MyFeatureType");
b.setCRS(DefaultGeographicCRS.WGS84);
b.add("location", Point.class);
final SimpleFeatureType TYPE = b.buildFeatureType();
SimpleFeatureBuilder featureBuilder = new SimpleFeatureBuilder(TYPE);
GeometryFactory geometryFactory = JTSFactoryFinder.getGeometryFactory();
SimpleFeature pointFeature = featureBuilder.buildFeature(null);
DefaultFeatureCollection featureCollection = new DefaultFeatureCollection("internal", TYPE);
featureCollection.add(pointFeature);
try {
Polygon polygon = new Polygon();
for (int i = 0; i < polygonList.size(); i++) {
String[] splitAxis = (polygonList.get(i).split("\\s+"));
polygon.addPoint(Integer.valueOf(splitAxis[0]), Integer.valueOf(splitAxis[1]));
}
boolean isInside = polygon.contains(33.63705, -112.17563);
System.out.println(isInside);
} catch (Exception e) {
e.printStackTrace();
}
}
} finally {
iterator.close();
}
} catch (Throwable e) {
}
}
}
I knew that converting a double to string and back to integer is not going to work anyway.
How can I achieve the solution whether a point is in the polygon for negative decimated values? Please help.
Using your SimpleFeature, you can call getDefaultGeometry and get a Geometry object. Once you cast to a Geometry, there should be a contains method which would take a Point class.
Also, you don't want to use the java.awt.Polygon class. Instead you'll be using org.locationtech.jts Geometry classes.
This is my Class to fetch the list of OHLC (open, high, low, close) also the volume and date. i have separated each one of the arraylist for each stocksymbol. i have used my local API to fetch the data. To perform all these calculation i have used ta4j library for JAVA.
package com.infodev.util;
import com.google.gson.Gson;
import com.infodev.Model.Data;
import com.infodev.Model.Find;
import com.infodev.Pojo.RequestForTechnicalCalculation;
import org.apache.log4j.Logger;
import org.json.simple.JSONObject;
import org.springframework.http.*;
import org.springframework.http.converter.json.MappingJackson2HttpMessageConverter;
import org.springframework.web.client.RestTemplate;
import java.math.BigDecimal;
public class ApiTestData {
static Logger logger = Logger.getLogger(ApiTestData.class);
private static Data[] a;
public ApiTestData(RequestForTechnicalCalculation requestForTechnicalCalculation) throws Exception {
//setting request body
JSONObject jsonObject = new JSONObject();
jsonObject.put("sectorId", requestForTechnicalCalculation.getSectorId());
//setting request headers
HttpHeaders httpHeaders = new HttpHeaders();
httpHeaders.setContentType(MediaType.APPLICATION_JSON);
//setting httpEntity as the request for server post request
HttpEntity<?> httpEntity = new HttpEntity<>(jsonObject.toString(), httpHeaders);
//installing restTemplate
RestTemplate restTemplate = new RestTemplate();
restTemplate.getMessageConverters().add(new MappingJackson2HttpMessageConverter());
ResponseEntity<Find> returnedObject = restTemplate.exchange("http://localhost:8081/pull365", HttpMethod.POST, httpEntity, Find.class);
a = returnedObject.getBody().getData();
logger.info("ApiData " + new Gson().toJson(a));
}
public int getDataSize() {
return a.length;
}
public BigDecimal[] getOpen(int index) {
return a[index].getOpen();
}
public BigDecimal[] getHigh(int index) {
return a[index].getHigh();
}
public BigDecimal[] getLow(int index) {
return a[index].getLow();
}
public BigDecimal[] getClose(int index) {
return a[index].getClose();
}
public BigDecimal[] getVolume(int index) {
return a[index].getVolume();
}
public String[] getDates(int index) {
return a[index].getDates();
}
public String getSymbols(int index) {
logger.info("stock name " +new Gson().toJson(a[index].getStockName()));
return a[index].getStockName();
}
}
This one is my calculation part to get the values of RSI. I have calculated another indicators also which is exactly correct according to my manual calculation of indicators but the problem seems to be in the calculation of RSI.
package com.infodev.Services.Indicators;
import com.infodev.Pojo.RequestForTechnicalCalculation;
import com.infodev.util.ApiTestData;
import eu.verdelhan.ta4j.Decimal;
import eu.verdelhan.ta4j.Tick;
import eu.verdelhan.ta4j.TimeSeries;
import eu.verdelhan.ta4j.indicators.candles.LowerShadowIndicator;
import eu.verdelhan.ta4j.indicators.helpers.*;
import eu.verdelhan.ta4j.indicators.oscillators.CMOIndicator;
import eu.verdelhan.ta4j.indicators.oscillators.PPOIndicator;
import eu.verdelhan.ta4j.indicators.oscillators.StochasticOscillatorDIndicator;
import eu.verdelhan.ta4j.indicators.oscillators.StochasticOscillatorKIndicator;
import eu.verdelhan.ta4j.indicators.simple.*;
import eu.verdelhan.ta4j.indicators.statistics.*;
import eu.verdelhan.ta4j.indicators.trackers.*;
import eu.verdelhan.ta4j.indicators.trackers.bollinger.BollingerBandWidthIndicator;
import eu.verdelhan.ta4j.indicators.trackers.bollinger.BollingerBandsLowerIndicator;
import eu.verdelhan.ta4j.indicators.trackers.bollinger.BollingerBandsMiddleIndicator;
import eu.verdelhan.ta4j.indicators.trackers.bollinger.BollingerBandsUpperIndicator;
import eu.verdelhan.ta4j.indicators.volatility.MassIndexIndicator;
import eu.verdelhan.ta4j.indicators.volume.ChaikinMoneyFlowIndicator;
import eu.verdelhan.ta4j.indicators.volume.OnBalanceVolumeIndicator;
import org.apache.log4j.Logger;
import org.joda.time.DateTime;
import org.joda.time.Period;
import org.springframework.stereotype.Service;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.math.BigDecimal;
import java.text.DecimalFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
#Service
public class IndicatorServiceImpl implements IndicatorService {
static Logger logger = Logger.getLogger(IndicatorServiceImpl.class);
private static DecimalFormat df = new DecimalFormat("###,###.##");
private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
List<Tick> ticks;
List<Tick> tickList;
TimeSeries series;
ClosePriceIndicator closePrice;
SMAIndicator shortSma;
SMAIndicator longSma;
EMAIndicator shortEma;
RSIIndicator rsi;
MACDIndicator macd;
BollingerBandsMiddleIndicator bbm;
BollingerBandsLowerIndicator bbl;
BollingerBandsUpperIndicator bbh;
BollingerBandWidthIndicator bbw;
ApiTestData apiData;
String symbol;
String[] date;
BigDecimal[] volume;
BigDecimal[] close;
BigDecimal[] low;
BigDecimal[] high;
BigDecimal[] open;
#Override
public List<Map<Object, Object>> getIndicators(RequestForTechnicalCalculation requestForTechnicalCalculation) {
System.out.println("service state");
List<Map<Object, Object>> finalList = new ArrayList<>();
try {
apiData = new ApiTestData(requestForTechnicalCalculation);
logger.info("----" + apiData.getDataSize());
for (int i = 0; i < apiData.getDataSize(); i++) {
logger.info("----" + i);
// getting the symbol from the api
symbol = apiData.getSymbols(i);
date = apiData.getDates(i);
volume = apiData.getVolume(i);
close = apiData.getClose(i);
low = apiData.getLow(i);
high = apiData.getHigh(i);
open = apiData.getOpen(i);
if (date.length == 0 || volume.length == 0 || close.length == 0 ||
low.length == 0 || high.length == 0 || open.length == 0) {
finalList.add(makeEmptyObject());
} else {
makeCalculation(i);
finalList.add(makeIndicatorObject());
}
}
//return finalList;
} catch (Exception e) {
e.printStackTrace();
finalList.add(makeEmptyObject());
}
return finalList;
}
private void makeCalculation(int ii) throws ParseException {
//instating tick to change the ohlc to Tick class array
ticks = new ArrayList<>();
logger.info("----" + ticks.size());
for (int i = 0; i < close.length; i++) {
this.ticks.add(new Tick(new DateTime(DATE_FORMAT.parse(date[i])), open[i].doubleValue(), high[i].doubleValue()
, low[i].doubleValue(), close[i].doubleValue(), volume[i].doubleValue()));
}
//converting the array to the list of tick
//generating the time Series of the sample data
series = new TimeSeries(apiData.getSymbols(ii), ticks);
if (series == null) {
throw new IllegalArgumentException("Series cannot be null");
} else {
//close price indicator
closePrice = new ClosePriceIndicator(this.series);
logger.info("ClosePrice: " + closePrice.getValue(series.getEnd()));
// Simple moving averages
shortSma = new SMAIndicator(closePrice, 5);
logger.info("shortSMA: " + shortSma.getValue(series.getEnd()));
longSma = new SMAIndicator(closePrice, 20);
logger.info("longSMA: " + longSma.getValue(series.getEnd()));
// Exponential moving averages
shortEma = new EMAIndicator(closePrice, 5);
logger.info("shortEMA: " + shortEma.getValue(series.getEnd()));
longEma = new EMAIndicator(closePrice, 20);
logger.info("longEMA: " + longEma.getValue(series.getEnd()));
rsi = new RSIIndicator(closePrice, 14);
series.getLastTick().addTrade(100, rsi.getValue(series.getEnd()).toDouble());
//newTick.addTrade(100, rsi.getValue(series.getEnd()).toDouble());
logger.info("RsiIndicator: " + rsi.getValue(series.getEnd()));
// Standard deviation
sd = new StandardDeviationIndicator(closePrice, 20);
logger.info("StandardDeviationIndicator: " + sd.getValue(series.getEnd()));
//macd indicator
macd = new MACDIndicator(closePrice, 12, 26);
logger.info("MACD indicator: " + macd.getValue(series.getEnd()));
//bollingerbandsmiddle indicator
bbm = new BollingerBandsMiddleIndicator(longSma);
logger.info("Bollinger Bands Middle Indicator :" + bbm.getValue(series.getEnd()));
bbl = new BollingerBandsLowerIndicator(bbm, sd);
logger.info("Bollinger bands lower indicator :" + bbl.getValue(series.getEnd()));
bbh = new BollingerBandsUpperIndicator(bbm, sd);
logger.info("Bollinger bands upper indicator :" + bbh.getValue(series.getEnd()));
bbw = new BollingerBandWidthIndicator(bbh, bbm, bbl);
logger.info("Bollinger band width :" + bbw.getValue(series.getEnd()));
StringBuilder sb = new StringBuilder("timestamp,close,typical,variation,sma8,sma20,ema8,ema20,ppo,roc,rsi,williamsr,atr,sd\n");
/**
* Adding indicators values
*/
final int nbTick = series.getTickCount();
for (int i = 0; i < nbTick; i++) {
sb.append(series.getTick(i).getEndTime()).append(',')
.append(closePrice.getValue(i)).append(',')
.append(typicalPrice.getValue(i)).append(',')
.append(priceVariation.getValue(i)).append(',')
.append(shortSma.getValue(i)).append(',')
.append(longSma.getValue(i)).append(',')
.append(shortEma.getValue(i)).append(',')
.append(longEma.getValue(i)).append(',')
.append(ppo.getValue(i)).append(',')
.append(roc.getValue(i)).append(',')
.append(rsi.getValue(i)).append(',')
.append(williamsR.getValue(i)).append(',')
.append(atr.getValue(i)).append(',')
.append(sd.getValue(i)).append('\n');
}
/**
* Writing CSV file
*/
BufferedWriter writer = null;
try {
writer = new BufferedWriter(new FileWriter("C:\\Users\\Administrator\\Desktop\\fafa\\indicators.csv"));
writer.write(sb.toString());
} catch (IOException ioe) {
System.out.println(ioe);
} finally {
try {
if (writer != null) {
writer.close();
}
} catch (IOException ioe) {
}
}
}
}
private Map<Object, Object> makeIndicatorObject() {
// Map for indicator values.
try {
logger.info("map state of make indicator");
Map<Object, Object> indicators = new LinkedHashMap<>();
indicators.put("symbol", symbol);
indicators.put("ClosePrice", formatBigDecimal(closePrice.getValue(series.getEnd()).toDouble()));
indicators.put("ShortSMA", formatBigDecimal(shortSma.getValue(series.getEnd()).toDouble()));
indicators.put("LongSMA", formatBigDecimal(longSma.getValue(series.getEnd()).toDouble()));
indicators.put("ShortEMA", formatBigDecimal(shortEma.getValue(series.getEnd()).toDouble()));
indicators.put("LongEMA", formatBigDecimal(longEma.getValue(series.getEnd()).toDouble()));
indicators.put("RSI", formatBigDecimal(rsi.getValue(series.getEnd()).toDouble()));
indicators.put("SD", formatBigDecimal(sd.getValue(series.getEnd()).toDouble()));
indicators.put("MACD", formatBigDecimal(macd.getValue(series.getEnd()).toDouble()));
indicators.put("BBM", formatBigDecimal(bbm.getValue(series.getEnd()).toDouble()));
indicators.put("BBL", formatBigDecimal(bbl.getValue(series.getEnd()).toDouble()));
indicators.put("BBH", formatBigDecimal(bbh.getValue(series.getEnd()).toDouble()));
indicators.put("BBW", formatBigDecimal(bbw.getValue(series.getEnd()).toDouble()));
return indicators;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
private BigDecimal formatBigDecimal(double value) {
try {
return new BigDecimal(df.format(value));
} catch (Exception e) {
return new BigDecimal(0);
}
}
private Map<Object, Object> makeEmptyObject() {
logger.info("map state of empty object");
Map<Object, Object> indicators = new LinkedHashMap<>();
indicators.put("symbol", symbol);
indicators.put("ClosePrice", new BigDecimal(0));
indicators.put("ShortSMA", new BigDecimal(0));
indicators.put("LongSMA", new BigDecimal(0));
indicators.put("ShortEMA", new BigDecimal(0));
indicators.put("LongEMA", new BigDecimal(0));
indicators.put("RSI", new BigDecimal(0));
indicators.put("SD", new BigDecimal(0));
indicators.put("MACD", new BigDecimal(0));
indicators.put("BBM", new BigDecimal(0));
indicators.put("BBL", new BigDecimal(0));
indicators.put("BBH", new BigDecimal(0));
indicators.put("BBW", new BigDecimal(0));
return indicators;
}
}
This is the Json Output from the local API that is used in the first class (ApiTestData)
Currently I try to get the cosine similarity between two document with Lucene (4.10.4).
I already read this answer about cosine similarity with Lucene , and I used this example to understand how it works with Lucene.
But when I tested with 2 same words per each document (ex: "Hello world"), I've got a cosine of similarity at 0.9999999999999998
My code look like that:
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.FileUtils;
import org.apache.commons.math3.linear.ArrayRealVector;
import org.apache.commons.math3.linear.RealVector;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
public class CosineSimeTest {
static String indexName = "/tmp/CosineExample";
public static final String CONTENT = "field";
public static final int N = 2;
private final Set<String> terms = new HashSet<>();
private final RealVector v1;
private final RealVector v2;
public static void main(String[] args) {
try {
CosineSimeTest cosSim = new CosineSimeTest("hello world", "hello world");
System.out.println(cosSim.getCosineSimilarity());
} catch (IOException e) {
e.printStackTrace();
}
}
public CosineSimeTest(String s1, String s2) throws IOException {
Directory directory = createIndex(s1, s2);
IndexReader reader = DirectoryReader.open(directory);
Map<String, Double> f1 = getWieghts(reader, 0);
Map<String, Double> f2 = getWieghts(reader, 1);
reader.close();
v1 = toRealVector(f1);
System.out.println("V1: " + v1);
v2 = toRealVector(f2);
System.out.println("V2: " + v2);
}
public Directory createIndex(String s1, String s2) throws IOException {
File f = new File(indexName);
if (f.exists()) {
FileUtils.deleteDirectory(f);
}
Directory directory = FSDirectory.open(new File(indexName));
StandardAnalyzer analyzer = new StandardAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(null, analyzer);
IndexWriter writer = new IndexWriter(directory, iwc);
addDocument(writer, s1);
addDocument(writer, s2);
writer.close();
return directory;
}
public void addDocument(IndexWriter writer, String data) throws IOException {
Document doc = new Document();
FieldType type = new FieldType();
type.setIndexed(true);
type.setStoreTermVectors(true);
type.setStoreTermVectorPositions(true);
type.freeze();
Field field = new Field(CONTENT, data, type);
doc.add(field);
writer.addDocument(doc);
}
public double getCosineSimilarity() {
double dotProduct = v1.dotProduct(v2);
System.out.println("Dot: " + dotProduct);
System.out.println("V1_norm: " + v1.getNorm() + ", V2_norm: " + v2.getNorm());
double normalization = (v1.getNorm() * v2.getNorm());
System.out.println("Norm: " + normalization);
return dotProduct / normalization;
}
public Map<String, Double> getWieghts(IndexReader reader, int docId) throws IOException {
Terms vector = reader.getTermVector(docId, CONTENT);
Map<String, Integer> docFrequencies = new HashMap<>();
Map<String, Integer> termFrequencies = new HashMap<>();
Map<String, Double> tf_Idf_Weights = new HashMap<>();
TermsEnum termsEnum = null;
DocsEnum docsEnum = null;
termsEnum = vector.iterator(termsEnum);
BytesRef text = null;
while ((text = termsEnum.next()) != null) {
String term = text.utf8ToString();
docFrequencies.put(term, reader.docFreq(new Term(CONTENT, term)));
docsEnum = termsEnum.docs(null, null);
while (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
termFrequencies.put(term, docsEnum.freq());
}
terms.add(term);
}
for (String term : docFrequencies.keySet()) {
int tf = termFrequencies.get(term);
int df = docFrequencies.get(term);
double idf = (1 + Math.log(N) - Math.log(df));
double w = tf * idf;
tf_Idf_Weights.put(term, w);
}
// System.out.println("Printing docFrequencies:");
// printMap(docFrequencies);
//
// System.out.println("Printing termFrequencies:");
// printMap(termFrequencies);
//
// System.out.println("Printing if/idf weights:");
// printMapDouble(tf_Idf_Weights);
return tf_Idf_Weights;
}
public RealVector toRealVector(Map<String, Double> map) {
RealVector vector = new ArrayRealVector(terms.size());
int i = 0;
double value = 0;
for (String term : terms) {
if (map.containsKey(term)) {
value = map.get(term);
} else {
value = 0;
}
vector.setEntry(i++, value);
}
return vector;
}
public static void printMap(Map<String, Integer> map) {
for (String key : map.keySet()) {
System.out.println("Term: " + key + ", value: " + map.get(key));
}
}
public static void printMapDouble(Map<String, Double> map) {
for (String key : map.keySet()) {
System.out.println("Term: " + key + ", value: " + map.get(key));
}
}
public void getVersionOfLucene(StandardAnalyzer analyzer) {
System.out.println("version : " + analyzer.getVersion());
}
}
What is the problem ? How to fix this ?
Thanks in advance.
sorry I am a newbie I have been looking for an example on how I can iterate hashmap and store the entities into MySQL database. the blow code downloads currency rate which I would like to store into my database. the output is
{CCY=USD, RATE=1.5875}
{CCY=EUR, RATE=1.1919}
{CCY=ALL, RATE=166.2959}
{CCY=AMD, RATE=645.4025}
how can I iterate the HashMap and store it into my database? an illustration would be nice or source similar to this scenario.
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
public class Rate {
/** list of string array containing IOSCurrencyCodes*/
final String[] CURRENCY = new String[] { "USD","EUR","ALL","AMD",};
#SuppressWarnings("rawtypes")
void checkRateAllAtEnd() throws Exception {
List<Callable<HashMap>> tasks = new ArrayList<Callable<HashMap>>();
for (final String ccy : CURRENCY) {
tasks.add(new Callable<HashMap>() {
public HashMap<String, Comparable> call() throws Exception {
return getRates(ccy);
}
});
}
ExecutorService executorPool = Executors.newCachedThreadPool();
final List<Future<HashMap>> listRates = executorPool.invokeAll(tasks, 3600, TimeUnit.SECONDS);
for (Future<HashMap> rate : listRates) {
HashMap ccyRate = rate.get();
System.out.println(ccyRate);
}
}
#SuppressWarnings("rawtypes")
public HashMap<String, Comparable> getRates(String ccy) throws Exception {
URL url = new URL("http://finance.yahoo.com/d/quotes.csv?e=.csv&f=sl1d1t1&s=GBP"
+ ccy + "=X");
BufferedReader reader = new BufferedReader(new InputStreamReader(
url.openStream()));
String data = reader.readLine();
String[] dataItems = data.split(",");
Double rate = Double.valueOf(dataItems[1]);
HashMap<String, Comparable> ccyRate = new HashMap<String, Comparable>();
ccyRate.put("CCY", ccy);
ccyRate.put("RATE", rate);
return ccyRate;
}
public static void main(String[] args) {
Rate ccyRate = new Rate();
try {
//ccyConverter.checkRateSequential();
ccyRate.checkRateAllAtEnd();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
Map provides entrySet method which is very useful to iterate over a map.
Map#entrySet returns a Set view of the mappings contained in this map. The set is backed by the map, so changes to the map are reflected in the set, and vice-versa. If the map is modified while an iteration over the set is in progress.
ref - link
Code -
HashMap<String, Comparable> ccyRate = new HashMap<String, Comparable>();
for(Map.Entry<String, Comparable> entry: ccyRate.entrySet){
System.out.println(entry.getKey()+" - "+entry.getValue());
}