Facing some strange issue while bookmarking the paragraph

Facing some strange issue while bookmarking the paragraph - java

public class BookmarkAdd extends AbstractSample {
public static JAXBContext context = org.docx4j.jaxb.Context.jc;
/**
* #param args
*/
#SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
String inputfilepath = "Chapter_3.docx";
File file = new java.io.File(inputfilepath);
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File(inputfilepath));
MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart();
String outputfilepath = System.getProperty("user.dir")+"/ 1.docx";
ClassFinder finder = new ClassFinder(P.class); // <----- change this to suit
new TraversalUtil(documentPart.getContent(), finder);
int Counter = 0;
System.out.println(finder.results.size());
for (Object o : finder.results)
{
P para =(P)o;
String name = "para" + Counter;
bookmarkPara(para, 0, para.getParagraphContent().size(), name, Counter);
Counter++;
SaveToZipFile saver = new SaveToZipFile(wordMLPackage);
saver.save(outputfilepath);
// wordMLPackage.save(new java.io.File(inputfilepath));
}
}
/**
* Surround the specified r in the specified p
* with a bookmark (with specified name and id)
* #param p
* #param r
* #param name
* #param id
*/
public static void bookmarkPara(P p, int StartIndex,int EndIndex, String name, int id) {
ObjectFactory factory = Context.getWmlObjectFactory();
BigInteger ID = BigInteger.valueOf(id);
// Add bookmark end first
CTMarkupRange mr = factory.createCTMarkupRange();
mr.setId(ID);
JAXBElement<CTMarkupRange> bmEnd = factory.createBodyBookmarkEnd(mr);
p.getParagraphContent().add(EndIndex, bmEnd); // from 2.7.0, use getContent()
// Next, bookmark start
CTBookmark bm = factory.createCTBookmark();
bm.setId(ID);
bm.setName(name);
JAXBElement<CTBookmark> bmStart = factory.createBodyBookmarkStart(bm);
p.getParagraphContent().add(StartIndex, bmStart);
}
public static List<Object> getAllElementFromObject(Object obj, Class<?> toSearch) {
List<Object> result = new ArrayList<Object>();
if (obj instanceof JAXBElement)
obj = ((JAXBElement<?>) obj).getValue();
if (obj.getClass().equals(toSearch))
result.add(obj);
else if (obj instanceof ContentAccessor) {
List<?> children = ((ContentAccessor) obj).getContent();
for (Object child : children) {
result.addAll(getAllElementFromObject(child, toSearch));
}
}
return result;
}
}
Using this code I bookmarks each paragraph as para0 to paran and this code works very fine for most of the document But I am not able to bookmark for two of my docx file I don't know why it shows the following error.
java.lang.IllegalArgumentException: obj parameter must not be null
at javax.xml.bind.helpers.AbstractMarshallerImpl.checkNotNull(Unknown Source)
at javax.xml.bind.helpers.AbstractMarshallerImpl.marshal(Unknown Source)
at org.docx4j.openpackaging.parts.JaxbXmlPart.marshal(JaxbXmlPart.java:361)
at org.docx4j.openpackaging.parts.JaxbXmlPart.marshal(JaxbXmlPart.java:330)
at org.docx4j.openpackaging.io.SaveToZipFile.saveRawXmlPart(SaveToZipFile.java:249)
at org.docx4j.openpackaging.io.SaveToZipFile.saveRawXmlPart(SaveToZipFile.java:198)
at org.docx4j.openpackaging.io.SaveToZipFile.savePart(SaveToZipFile.java:424)
at org.docx4j.openpackaging.io.SaveToZipFile.addPartsFromRelationships(SaveToZipFile.java:387)
at org.docx4j.openpackaging.io.SaveToZipFile.savePart(SaveToZipFile.java:442)
at org.docx4j.openpackaging.io.SaveToZipFile.addPartsFromRelationships(SaveToZipFile.java:387)
at org.docx4j.openpackaging.io.SaveToZipFile.save(SaveToZipFile.java:168)
at org.docx4j.openpackaging.io.SaveToZipFile.save(SaveToZipFile.java:97)
at Backup.BookmarkAdd.main(BookmarkAdd.java:64)
.....

Related

Error: Could not find or load main class thredds.catalog.dl.ADNWriter is coming

public static List<Object> getAllElementFromObject(Object obj, Class<?> toSearch) {
List<Object> result = new ArrayList<Object>();
if (obj instanceof JAXBElement)
obj = ((JAXBElement<?>) obj).getValue();
if (obj.getClass().equals(toSearch))
result.add(obj);
else if (obj instanceof ContentAccessor) {
List<?> children = ((ContentAccessor) obj).getContent();
for (Object child : children) {
result.addAll(getAllElementFromObject(child, toSearch));
}
}
return result;
}
public static void main(String[] args) throws Docx4JException {
String inputfilepath = "C:\\Users\\sugreev.sharma\\Desktop\\BMS\\test\\Ireland CTAg - Authority and CTI_July 2018.docx";
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new File(inputfilepath));
MainDocumentPart mainDocumentPart = wordMLPackage.getMainDocumentPart();
List<Object> paragraphs = getAllElementFromObject(mainDocumentPart, P.class);
for (Object par : paragraphs) {
P p = (P) par;
// Get all the runs in the paragraph
List<Object> allRuns = p.getContent();
for (Object run : allRuns) {
R r = (R) run;
// Get the Text in the Run
List<Object> allText = r.getContent();
for (Object text : allText) {
Text txt = (Text) text;
System.out.println("--> " + txt.getValue());
}
}
}
}
This is my code, which I am using in project.
I am using docx4j for docx processing.

Deleting table from word document using Docx4j

My word document has two tables and I am trying to delete last table with following code:
public static void removeTable() throws Docx4JException, JAXBException {
File doc = new File("D:\\Hello.docx");
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(doc);
MainDocumentPart mainDocumentPart = wordMLPackage.getMainDocumentPart();
String xpath = "//w:tbl";
List<Object> list = mainDocumentPart.getJAXBNodesViaXPath(xpath, false);
if(list.size()==2){
Tbl tbl = (Tbl) XmlUtils.unwrap(list.get(list.size()-1));
mainDocumentPart.getContent().remove(tbl.getParent());
wordMLPackage.save(new java.io.File("D:\\Hello.docx"));
System.out.println(list.size());
}
}
But nothing is happening to my document. Can anybody help me in this regard? Thanks

I used this code as base.
A working solution:
public class RemoveLastTable {
public static void main(String[] args) throws Docx4JException {
File doc = new File("d:\\tmp\\tables.docx");
WordprocessingMLPackage pkg = WordprocessingMLPackage.load(doc);
removeLastTable(pkg, "d:\\tmp\\tables_updated.docx");
}
public static void removeLastTable(WordprocessingMLPackage wordMLPackage, String outFile) throws Docx4JException {
Body body = wordMLPackage.getMainDocumentPart().getContents().getBody();
List<Object> tables = getAllElementFromObject(body, Tbl.class);
int indexTableToRemove = tables.size() - 1;
Tbl tableToRemove = (Tbl) tables.get(indexTableToRemove);
body.getContent().remove(tableToRemove.getParent());
wordMLPackage.save(new File(outFile));
}
private static List<Object> getAllElementFromObject(Object obj, Class<?> toSearch) {
List<Object> result = new ArrayList<>();
if (obj instanceof JAXBElement) {
obj = ((JAXBElement<?>) obj).getValue();
}
if (obj.getClass().equals(toSearch)) {
result.add(obj);
}
if (obj instanceof ContentAccessor) {
List<?> children = ((ContentAccessor) obj).getContent();
for (Object child : children) {
result.addAll(getAllElementFromObject(child, toSearch));
}
}
return result;
}
}
However the saving of the updated document is not perfect, my Word 2016 (Office 365) was not able to read the result, only after doing recovery.

First, specify the item you want to delete (in the list of objects your XPath returned).
Object deleteMe = list.get(1);
Use the code:
Object parent = getParent(deleteMe);
if (parent instanceof ContentAccessor) {
boolean result = ((ContentAccessor)parent).getContent().remove(deleteMe);
System.out.println("Deleted? " + result);
} else {
System.out.println("TODO: get content list from " + parent.getClass().getName());
}
with a little helper method:
private Object getParent(Object o) {
return ((Child)XmlUtils.unwrap(o)).getParent();
}

java.lang.RuntimeException: java.lang.ClassCastException: java.lang.Long cannot be cast to java.lang.String

While running a storm topology I am getting this error.The topology runs perfectly for 5mins without any error then it fails.I am using
Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS as 300 sec i.e 5mins.
This is my inputstream :
{"_id":{"$oid":"556809dbe4b0ef41436f7515"},"body":{"ProductCount":NumberInt(1),"category":null,"correctedWord":"bbtp","field":null,"filter":{},"fromAutocomplete":false,"loggedIn":false,"pageNo":"1","pageSize":"64","percentageMatch":NumberInt(100),"searchTerm":"bbtp","sortOrder":null,"suggestedWords":[]},"envelope":{"IP":"115.115.115.98","actionType":"search","sessionId":"10536088910863418864","timestamp":{"$date":"2015-05-29T06:40:00.000Z"}}}
This is the complete error:
java.lang.RuntimeException: java.lang.ClassCastException: java.lang.Long
cannot be cast to java.lang.String at
backtype.storm.utils.DisruptorQueue.consumeBatchToCursor(DisruptorQueue.java:128) at
backtype.storm.utils.DisruptorQueue.consumeBatchWhenAvailable(DisruptorQueue.java:99) at
backtype.storm.disruptor$consume_batch_when_available.invoke(disruptor.clj:80) at
backtype.storm.daemon.executor$fn__4722$fn__4734$fn__4781.invoke(executor.clj:748) at
backtype.storm.util$async_loop$fn__458.invoke(util.clj:463) at
clojure.lang.AFn.run(AFn.java:24) at
java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassCastException: java.lang.Long cannot be cast to
java.lang.String at
backtype.storm.tuple.TupleImpl.getString(TupleImpl.java:112) at
com.inferlytics.InferlyticsStormConsumer.bolt.QueryNormalizer.execute(QueryNor
malizer.java:40) at
backtype.storm.topology.BasicBoltExecutor.execute(BasicBoltExecutor.java:50) at
backtype.storm.daemon.executor$fn__4722$tuple_action_fn__4724.invoke(executor.clj:633) at
backtype.storm.daemon.executor$mk_task_receiver$fn__4645.invoke(executor.clj:4
04) at
backtype.storm.disruptor$clojure_handler$reify__1446.onEvent(disruptor.clj:58) at
backtype.storm.utils.DisruptorQueue.consumeBatchToCursor(DisruptorQueue.java:125) ... 6 more
My topology :
public class TopologyMain {
private static final org.slf4j.Logger LOG = org.slf4j.LoggerFactory
.getLogger(TopologyMain.class);
private static final String SPOUT_ID = "Feed-Emitter";
/**
* #param args
*/
/**
* #param args
* #throws AlreadyAliveException
* #throws InvalidTopologyException
*/
/**
* #param args
* #throws AlreadyAliveException
* #throws InvalidTopologyException
*/
public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
int numSpoutExecutors = 1;
LOG.info("This is SpoutConfig");
KafkaSpout kspout = QueryCounter();
TopologyBuilder builder = new TopologyBuilder();
LOG.info("This is Set Spout");
builder.setSpout(SPOUT_ID, kspout, numSpoutExecutors);
LOG.info("This is Query-Normalizer bolt");
builder.setBolt("Query-normalizer", new QueryNormalizer())
.shuffleGrouping(SPOUT_ID);
LOG.info("This is Query-ProductCount bolt");
builder.setBolt("Query-ProductCount", new QueryProductCount(),1)
.shuffleGrouping("Query-normalizer", "stream1");
LOG.info("This is Query-SearchTerm bolt");
builder.setBolt("Query-SearchTerm", new QuerySearchTermCount(),1)
.shuffleGrouping("Query-normalizer", "stream2");
LOG.info("This is tick-tuple bolt");
builder.setBolt("Tick-Tuple", new TickTuple(),1)
.shuffleGrouping("Query-normalizer", "stream3");
/*
* Storm Constants
* */
String NIMBUS_HOST = FilePropertyManager.getProperty( ApplicationConstants.STORM_CONSTANTS_FILE,
ApplicationConstants.NIMBUS_HOST );
String NIMBUS_THRIFT_PORT = FilePropertyManager.getProperty( ApplicationConstants.STORM_CONSTANTS_FILE,
ApplicationConstants.NIMBUS_THRIFT_PORT );
String TOPOLOGY_TICK_TUPLE_FREQ_SECS = FilePropertyManager.getProperty( ApplicationConstants.STORM_CONSTANTS_FILE,
ApplicationConstants.TOPOLOGY_TICK_TUPLE_FREQ_SECS );
String STORM_JAR = FilePropertyManager.getProperty( ApplicationConstants.STORM_CONSTANTS_FILE,
ApplicationConstants.STORM_JAR );
String SET_NUM_WORKERS = FilePropertyManager.getProperty( ApplicationConstants.STORM_CONSTANTS_FILE,
ApplicationConstants.SET_NUM_WORKERS );
String SET_MAX_SPOUT_PENDING = FilePropertyManager.getProperty( ApplicationConstants.STORM_CONSTANTS_FILE,
ApplicationConstants.SET_MAX_SPOUT_PENDING );
final int setNumWorkers = Integer.parseInt(SET_NUM_WORKERS);
final int setMaxSpoutPending = Integer.parseInt(SET_MAX_SPOUT_PENDING);
final int nimbus_thirft_port = Integer.parseInt(NIMBUS_THRIFT_PORT);
final int topology_tick_tuple_freq_secs = Integer.parseInt(TOPOLOGY_TICK_TUPLE_FREQ_SECS);
/*
* Storm Configurations
*/
LOG.trace("Setting Configuration");
Config conf = new Config();
LocalCluster cluster = new LocalCluster();
conf.put(Config.NIMBUS_HOST, NIMBUS_HOST);
conf.put(Config.NIMBUS_THRIFT_PORT, nimbus_thirft_port);
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, topology_tick_tuple_freq_secs);
System.setProperty("storm.jar",STORM_JAR );
conf.setNumWorkers(setNumWorkers);
conf.setMaxSpoutPending(setMaxSpoutPending);
if (args != null && args.length > 0) {
LOG.trace("Storm Topology Submitted On CLuster");
StormSubmitter. submitTopology(args[0], conf, builder.createTopology());
}
else
{
LOG.trace("Storm Topology Submitted On Local");
cluster.submitTopology("Query", conf, builder.createTopology());
Utils.sleep(10000);
cluster.killTopology("Query");
LOG.trace("This is ShutDown cluster");
cluster.shutdown();
}
LOG.trace("Method: main finished.");
}
private static KafkaSpout QueryCounter() {
//Build a kafka spout
/*
* Kafka Constants
*/
final String topic = FilePropertyManager.getProperty( ApplicationConstants.KAFKA_CONSTANTS_FILE,
ApplicationConstants.TOPIC );
String zkHostPort = FilePropertyManager.getProperty( ApplicationConstants.KAFKA_CONSTANTS_FILE,
ApplicationConstants.ZOOKEEPER_CONNECTION_STRING );
String zkRoot = "/Feed-Emitter";
String zkSpoutId = "Feed-Emitter-spout";
ZkHosts zkHosts = new ZkHosts(zkHostPort);
LOG.trace("This is Inside kafka spout ");
SpoutConfig spoutCfg = new SpoutConfig(zkHosts, topic, zkRoot, zkSpoutId);
spoutCfg.scheme = new SchemeAsMultiScheme(new StringScheme());
KafkaSpout kafkaSpout = new KafkaSpout(spoutCfg);
LOG.trace("Returning From kafka spout ");
return kafkaSpout;
}
}
My QueryNormalizer Bolt :
public class QueryNormalizer extends BaseBasicBolt {
/**
*
*/
private static final org.slf4j.Logger LOG = org.slf4j.LoggerFactory
.getLogger(QueryNormalizer.class);
public void cleanup() {}
/**
* The bolt will receive the line from the
* feed file and process it to Normalize this line
*
* The normalize will be put the terms in lower case
* and split the line to get all terms.
*/
public void execute(Tuple input, BasicOutputCollector collector) {
LOG.trace("Method in QueryNormalizer: execute called.");
String feed = input.getString(0);
String searchTerm = null;
String pageNo = null;
boolean sortOrder = true;
boolean category = true;
boolean field = true;
boolean filter = true;
String pc = null;
int ProductCount = 0;
String timestamp = null;
String year = null;
String month = null;
String day = null;
String hour = null;
Calendar calendar = Calendar.getInstance();
int dayOfYear = calendar.get(Calendar.DAY_OF_YEAR);
int weekOfYear = calendar.get(Calendar.WEEK_OF_YEAR);
JSONObject obj = null;
try {
obj = new JSONObject(feed);
} catch (JSONException e1) {
LOG.error( "Json Exception in Query Normalizer", e1 );
}
try {
searchTerm = obj.getJSONObject("body").getString("correctedWord");
pageNo = obj.getJSONObject("body").getString("pageNo");
sortOrder = obj.getJSONObject("body").isNull("sortOrder");
category = obj.getJSONObject("body").isNull("category");
field = obj.getJSONObject("body").isNull("field");
filter = obj.getJSONObject("body").getJSONObject("filter").isNull("filters");
pc = obj.getJSONObject("body").getString("ProductCount").replaceAll("[^\\d]", "");
ProductCount = Integer.parseInt(pc);
timestamp = (obj.getJSONObject("envelope").get("timestamp")).toString().substring(10,29);
year = (obj.getJSONObject("envelope").get("timestamp")).toString().substring(10, 14);
month = (obj.getJSONObject("envelope").get("timestamp")).toString().substring(15, 17);
day = (obj.getJSONObject("envelope").get("timestamp")).toString().substring(18, 20);
hour = (obj.getJSONObject("envelope").get("timestamp")).toString().substring(21, 23);
} catch (JSONException e) {
LOG.error( "Parsing Value Exception in Query Normalizer", e );
}
searchTerm = searchTerm.trim();
//Condition to eliminate pagination
if(!searchTerm.isEmpty()){
if ((pageNo.equals("1")) && (sortOrder == true) && (category == true) && (field == true) && (filter == true)){
searchTerm = searchTerm.toLowerCase();
System.out.println("In QueryProductCount execute: "+searchTerm+","+year+","+month+","+day+","+hour+","+dayOfYear+","+weekOfYear+","+ProductCount);
System.out.println("Entire Json : "+feed);
System.out.println("In QuerySearchCount execute : "+searchTerm+","+year+","+month+","+day+","+hour);
LOG.trace("In QueryNormalizer execute : "+searchTerm+","+year+","+month+","+day+","+hour+","+dayOfYear+","+weekOfYear+","+ProductCount);
LOG.trace("In QueryNormalizer execute : "+searchTerm+","+year+","+month+","+day+","+hour);
collector.emit("stream1", new Values(searchTerm , year , month , day , hour , dayOfYear , weekOfYear , ProductCount ));
collector.emit("stream2", new Values(searchTerm , year , month , day , hour ));
collector.emit("stream3", new Values());
}LOG.trace("Method in QueryNormalizer: execute finished.");
}
}
/**
* The bolt will only emit the specified streams in collector
*/
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declareStream("stream1", new Fields("searchTerm" ,"year" ,"month" ,"day" ,"hour" ,"dayOfYear" ,"weekOfYear" ,"ProductCount"));
declarer.declareStream("stream2", new Fields("searchTerm" ,"year" ,"month" ,"day" ,"hour"));
declarer.declareStream("stream3", new Fields());
}
}
In the QueryNormalizer class the error is shown at this line
String feed = input.getString(0);
public void execute(Tuple input, BasicOutputCollector collector) {
LOG.trace("Method in QueryNormalizer: execute called.");
String feed = input.getString(0);
String searchTerm = null;
Caused by: java.lang.ClassCastException: java.lang.Long cannot be cast to
java.lang.String at
backtype.storm.tuple.TupleImpl.getString(TupleImpl.java:112) at
com.inferlytics.InferlyticsStormConsumer.bolt.QueryNormalizer.execute(QueryNor
malizer.java:40)
EDIT :
After removing Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS from the config the code works properly.But I have to implement Tick Tuple () . How to achieve it?
I guess there is some problem with my TickTuple class. Is this the right way to Implement it ?
TickTuple
public class TickTuple extends BaseBasicBolt {
private static final long serialVersionUID = 1L;
private static final org.slf4j.Logger LOG = org.slf4j.LoggerFactory
.getLogger(TickTuple.class);
private static final String KEYSPACE = FilePropertyManager.getProperty( ApplicationConstants.CASSANDRA_CONSTANTS_FILE,
ApplicationConstants.KEYSPACE );
private static final String MONGO_DB = FilePropertyManager.getProperty( ApplicationConstants.MONGO_CONSTANTS_FILE,
ApplicationConstants.MONGO_DBE );
private static final String TABLE_CASSANDRA_TOP_QUERY = FilePropertyManager.getProperty( ApplicationConstants.CASSANDRA_CONSTANTS_FILE,
ApplicationConstants.TABLE_CASSANDRA_TOP_QUERY );
private static final String MONGO_COLLECTION_E = FilePropertyManager.getProperty( ApplicationConstants.MONGO_CONSTANTS_FILE,
ApplicationConstants.MONGO_COLLECTION_E );
public void cleanup() {
}
protected static boolean isTickTuple(Tuple tuple) {
return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID)
&& tuple.getSourceStreamId().equals(Constants.SYSTEM_TICK_STREAM_ID);
}
#Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {}
#Override
public void execute(Tuple input, BasicOutputCollector collector) {
try {
if (isTickTuple(input)) {
CassExport.cassExp(KEYSPACE, TABLE_CASSANDRA_TOP_QUERY, MONGO_DB, MONGO_COLLECTION_E);
TruncateCassandraTable.truncateData(TABLE_CASSANDRA_TOP_QUERY);
Log.trace("In Truncate");
return;
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
Can Anyone please suggest the required changes in the code ?

Now I understand: You have data tuples and tick tuples in the same input stream. Thus, for data tuples the first field is of type String, but for tick tuples it is of type Long. Thus, input.getString(0) runs in the ClassCastException for the first arriving tick tuple.
You need to update you bolt code like this:
Object field1 = input.getValue(0);
if (field1 instanceof Long) {
Long tick = (Long)field1;
// process tick tuple further
} else {
String feed = (String)field1;
// process data tuple as you did already
}

You need to differentiate between tick tuples and normal tuples within your execute method. Add this method to your bolt :
public boolean isTickTuple(Tuple tuple) {
return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID)
&& tuple.getSourceStreamId().equals(Constants.SYSTEM_TICK_STREAM_ID);
}
Now in execute, you can do
if(isTickTuple(tuple)){
doSomethingPeriodic()
} else {
executeLikeBefore()
}

The problem was with my TickTuple Bolt Implementation.I have added the
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, topology_tick_tuple_freq_secs)
In my main topology configuration.Instead it should be added in the bolt where TickTuple has be implemented.
I edited my TickTuple code , added this snippet and everything works fine.
#Override
public Map<String, Object> getComponentConfiguration() {
// configure how often a tick tuple will be sent to our bolt
Config conf = new Config();
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, topology_tick_tuple_freq_secs);
return conf;
}
This has to be added in the corresponding bolt instead of the main topology

Java Lucene 4.5 how to search by case insensitive

We have implemented Java Lucene search engine 4.5, I am trying to search the content even if the field value is case insensitive (e.g., if I search a city with name "Banglore" I get a result, but when I search a city with name "banglore" I get 0 results).
I have used StandardAnalyzer for analyzing the data and WildcardQuery to match a Like condition (I tried as mentioned here without success).
I am not sure where I have gone wrong. I appreciate any guidance on fixing this case sensitivity problem.
public SearchHelper
{
Analyzer analyzer;
Directory index;
public IndexSearcher searcher = null;
public IndexWriter indexWriter = null;
public QueryParser parser = null;
private static int hitsPerPage = 100;
/**
* #param indexFileLocation
* #throws IOException
*/
public SearchHelper(String indexFileLocation) throws IOException
{
// this.analyzer =new StandardAnalyzer();
this.analyzer = new CaseStandardAnalyzer();
// analyzer = new ThaiAnalyzer();
this.index = FSDirectory.open(java.nio.file.Paths.get(indexFileLocation));
}
/**
* #param create
* #return
* #throws IOException
*/
public IndexWriter getIndexWriter(boolean create) throws IOException
{
if (indexWriter == null)
{
IndexWriterConfig iwc = new IndexWriterConfig(this.analyzer);
this.indexWriter = new IndexWriter(this.index, iwc);
}
return this.indexWriter;
} //End of getIndexWriter
/**
* #throws IOException
*/
public void closeIndexWriter() throws IOException
{
if (this.indexWriter != null)
{
this.indexWriter.commit();//optimize(); LUCENE_36
this.indexWriter.close();
}
} //End closeIndexWriter
/**
* #param indexFileLocation
* #throws CorruptIndexException
* #throws IOException
*/
public void startSearch(String indexFileLocation) throws CorruptIndexException, IOException
{
// searcher = new IndexSearcher(FSDirectory.open(new File(indexFileLocation)));
IndexReader reader = DirectoryReader.open(FSDirectory.open(java.nio.file.Paths.get(indexFileLocation)));
// IndexReader.open(this.index);
// open(getIndexWriter(true), true);
this.searcher = new IndexSearcher(reader);
}
/**
* #param fieldNames
* #param fieldValues
* #return
* #throws IOException
* #throws ParseException
*
* <p></p>
* https://stackoverflow.com/questions/2005084/how-to-specify-two-fields-in-lucene-queryparser
*/
public ScoreDoc[] searchSEO(String[] fieldNames, String[] fieldValues, int limitSize) throws IOException, ParseException
{
this.analyzer = new StandardAnalyzer();
int searchFieldSize = (null == fieldNames) ? 0 : fieldNames.length;
BooleanQuery booleanQuery = new BooleanQuery();
for (int i = 0; i < searchFieldSize; i++)
{
Query query1 = searchIndexWithWildcardQuery(fieldNames[i], fieldValues[i]);
addQueries(booleanQuery, query1, 2);
}
TopScoreDocCollector collector = null; // Or use by default hitsPerPage instead limitSize
if (limitSize > 0)
{
collector = TopScoreDocCollector.create(limitSize);
} else {
collector = TopScoreDocCollector.create(hitsPerPage);
}
this.searcher.search(booleanQuery,collector);
return collector.topDocs().scoreDocs;
}
/**
* #param whichField
* #param searchString
* #return
* #throws IOException
* #throws ParseException
*/
public Query searchIndexWithWildcardQuery(String whichField, String searchString) throws IOException, ParseException
{
Term term = addTerm(whichField, "*" + searchString + "*");
Query query = new WildcardQuery(term);
return query;
}
/**
* #param whichField
* #param searchString
* #return
*/
public Term addTerm(String whichField, String searchString)
{
Term term = new Term(whichField, searchString);
return term;
}
/**
* #param searchString
* #param operation
* #return
* #throws ParseException
*/
public Query addConditionOpertaion(String searchString, String operation) throws ParseException
{
Query query = null;
if ("and".equals(operation))
{
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
} else if("or".equals(operation)) {
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
}
query = parser.parse(searchString);
return query;
}
/**
* #param booleanQuery <code>BooleanQuery</code>
* #param q <code>Query</code>
* #param type <code>int</code> , 1--> Must, 2-->Should, 3 --> Must Not
*/
public void addQueries(BooleanQuery booleanQuery, Query q, int type)
{
switch(type)
{
case 1: booleanQuery.add(q, Occur.MUST);
break;
case 2: booleanQuery.add(q, Occur.SHOULD);
break;
default:booleanQuery.add(q, Occur.MUST_NOT);
break;
} //End of switch
}
public QueryParser getParser()
{
return parser;
}
public void setParser(String fieldName)
{
this.parser = new QueryParser(fieldName, this.analyzer);
}
public void getDefaultByStatus(int status)
{
this.analyzer = new StandardAnalyzer();
this.parser = new QueryParser("status", this.analyzer);
}
protected void doClear(File dir,boolean deleteSubDir)
{
for (File file: dir.listFiles())
{
if (file.isDirectory() && deleteSubDir)
{
doClear(file,deleteSubDir);
}
file.delete();
}
} //End of doClear();
protected void doClose() throws IOException
{
this.searcher.getIndexReader().close();
}
public boolean add(Object Obj) throws Exception
{
User currentUser = (User)Obj;
boolean isAdded = false;
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));
// addRelatedFields(luceneDoc,city.getStateCode());
IndexWriter writer = getIndexWriter(false);
writer.addDocument(luceneDoc);
closeIndexWriter();
isAdded = true;
System.out.println(isAdded);
return isAdded;
} // End of add
public boolean update(Object Obj) throws Exception
{
boolean isUpdated = false;
User currentUser = (User) Obj;
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
// luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));
// addRelatedFields(luceneDoc,city.getStateCode());
IndexWriter writer = getIndexWriter(false);
writer.updateDocument(new Term("login", currentUser.getLogin()),luceneDoc);
closeIndexWriter();
isUpdated = true;
return isUpdated;
} // End of update
public boolean delete(Object Obj) throws Exception
{
boolean isDeleted = false;
User currentUser = (User) Obj;
Term deleteTerm = new Term("login", currentUser.getLogin());
IndexWriter writer = getIndexWriter(false);
writer.deleteDocuments(deleteTerm); // Or use Query
writer.forceMergeDeletes();
closeIndexWriter();
isDeleted = true;
return isDeleted;
} // End of delete
#Override
public Object search(String[] fieldNames, String[] fieldValues, int returnType, int limit) throws Exception
{
Object obj = null;
org.apache.lucene.search.ScoreDoc[] hits = searchSEO(fieldNames,fieldValues, limit);
int hitSize = (null == hits) ? 0 : hits.length;
System.out.println("total:" + hitSize);
doClose();
return obj;
} // End of search
public void addThreadUser()
{
User user = new User();
addUserPojo(user);
add(user);
}
public void updateThreadUser()
{
User user = new User();
addUserPojo(user);
update(user);
}
public void deleteThreadUser()
{
User user = new User();
addUserPojo(user);
delete(user);
}
private void addUserPojo(User user)
{
user.setOid(3);
user.setLogin("senthil");
user.setFirstName("Semthil");
user.setLastName("Semthil");
user.setStatus(1);
user.setCity("Combiatore");
user.setEmailId("semthil#xyz.com");
}
public void searchUser()
{
searchUser(new String[] {"login"}, new String[] {"Se"}, null);
}
public static void main(String[] args)
{
SearchHelper test = new SearchHelper();
test.searchUser();
}
}

You are usingStringField to index your data but this field will bypass the analyzer chain and always index your term verbatim as one token, regardless of your analyzer. You should use TextField if you want to have your data analyzed and the StandardAnalyzer already does lower-casing.
Other than that, the WildcardQuery does not analyze its term, so if you search for Banglore, it won't match the now-lower-case banglore from the index. You have to lowercase the searchterm yourself (or use an analyzer on it).

Use the LowerCaseFilter as the post you referenced suggests:
TokenStream stream = new StandardFilter(Version.LUCENE_CURRENT, tokenizer);
stream = new LowerCaseFilter(Version.LUCENE_CURRENT, stream);
A more complete example is in this post.

You can use custome compare class
class CaseIgonreCompare extends FieldComparator<String>{
private String field;
private String bottom;
private String topValue;
private BinaryDocValues cache;
private String[] values;
public CaseIgonreCompare(String field, int numHits) {
this.field = field;
this.values = new String[numHits];
}
#Override
public int compare(int arg0, int arg1) {
return compareValues(values[arg0], values[arg1]);
}
#Override
public int compareBottom(int arg0) throws IOException {
return compareValues(bottom, cache.get(arg0).utf8ToString());
}
#Override
public int compareTop(int arg0) throws IOException {
return compareValues(topValue, cache.get(arg0).utf8ToString());
}
public int compareValues(String first, String second) {
int val = first.length() - second.length();
return val == 0 ? first.compareToIgnoreCase(second) : val;
};
#Override
public void copy(int arg0, int arg1) throws IOException {
values[arg0] = cache.get(arg1).utf8ToString();
}
#Override
public void setBottom(int arg0) {
this.bottom = values[arg0];
}
#Override
public FieldComparator<String> setNextReader(AtomicReaderContext arg0)
throws IOException {
this.cache = FieldCache.DEFAULT.getTerms(arg0.reader(),
field , true);
return this;
}
#Override
public void setTopValue(String arg0) {
this.topValue = arg0;
}
#Override
public String value(int arg0) {
return values[arg0];
}
}

How to display XML Object generated using JAXB UnMarshaller?

I'm using JAXB marshalling to create XML files, and it created successfully, and know i want to display these files using JAXB unMarshalling, here is the code i'm using:
public Object display(String fileName) throws IOException, JAXBException {
XmlStructure object;
File file = new File(fileName);
JAXBContext jaxbContext = JAXBContext.newInstance(XmlStructure.class);
Unmarshaller jaxbUnMarshaller = jaxbContext.createUnmarshaller();
object = (XmlStructure) jaxbUnMarshaller.unmarshal(file);
System.out.println(object.toString());
return object;
}
the previous code gives me that result:
com.nc.inotify.dp.xml.impl.XmlStructure#2916a6bf
and i changed the code with that:
public Object display(String fileName) throws IOException, JAXBException {
XmlStructure object;
File file = new File(fileName);
JAXBContext jaxbContext = JAXBContext.newInstance(XmlStructure.class);
Unmarshaller jaxbMarshaller = jaxbContext.createUnmarshaller();
object = (XmlStructure) jaxbMarshaller.unmarshal(file);
Marshaller jaxbMarshallerz = jaxbContext.createMarshaller();
jaxbMarshallerz.marshal(object, System.out);
return object;
}
but it gives me that result:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?><XmlSource/>
and this is the XML file:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<XmlSource>
<XmlConf>
<hostName>weather.yahooapis.com</hostName>
<parameters>
<entry>
<key>w</key>
<value>2502265</value>
</entry>
</parameters>
<URLPath>/forecastrss</URLPath>
<urlProtocol>http</urlProtocol>
</XmlConf>
<XmlConf>
<hostName>weather.yahooapis.com</hostName>
<parameters>
<entry>
<key>w</key>
<value>2553822</value>
</entry>
</parameters>
<URLPath>/forecastrss</URLPath>
<urlProtocol>http</urlProtocol>
</XmlConf>
</XmlSource>
Update
*NOTE*: i'm using more than one class in the marshaling process in order to get that form
The marshaling method:
public void add(String fileName) throws IOException, JAXBException,
ParserConfigurationException, SAXException, TransformerException {
this.fileName = fileName;
File temp = new File(tempName);
JAXBContext jaxbContext = JAXBContext.newInstance(XmlConfList.class);
Marshaller jaxbMarshaller = jaxbContext.createMarshaller();
jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
File source = new File(fileName);
if (source.exists()) {
jaxbMarshaller.marshal(object, temp);
MergeXml merge = new MergeXml();
merge.mergeXML(true, fileName, tempName, mainTag);
} else {
XmlStructure struct = new XmlStructure();
jaxbMarshaller.marshal(struct, source);
jaxbMarshaller.marshal(object, temp);
MergeXml merge = new MergeXml();
merge.mergeXML(true, fileName, tempName, mainTag);
}
temp.delete();
}
The MergeXml class:
public class MergeXml {
private static final String YES = "yes";
private static final String generalTag = "*";
/**
* This method used to merge XML old and new files together
*/
public void mergeXML(boolean condition, String fileName, String tempName, String mainTag)
throws ParserConfigurationException, SAXException, IOException,
TransformerException {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = null;
Document doc = null;
Document doc2 = null;
db = dbf.newDocumentBuilder();
doc = db.parse(new File(fileName));
doc2 = db.parse(new File(tempName));
NodeList elements = doc.getElementsByTagName(mainTag);
if (condition == true) {
NodeList nodeList = doc2.getElementsByTagName(generalTag);
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
Node childNode = doc.adoptNode(node);
elements.item(0).appendChild(childNode);
}
}
TransformerFactory tFactory = TransformerFactory.newInstance();
Transformer transformer = tFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, YES);
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(new StringWriter());
transformer.transform(source, result);
BufferedWriter output = new BufferedWriter(new FileWriter(fileName));
String xmlOutput = result.getWriter().toString();
output.write(xmlOutput);
output.close();
}
}
The XmlStructure class:
#XmlRootElement(name = "XmlSource")
public class XmlStructure{
}
The XmlConf class:
#XmlRootElement(name = "XmlConf")
public class XmlConf extends XmlStructure {
private String URLProtocol;
private List<String> path = new ArrayList<String>();
private String urlp;
private Map<String, String> parameters;
private String host;
/**
* This method used to retrieve the specified URL protocol
* #return {#code String}
*/
public String getUrlProtocol() {
return URLProtocol;
}
/**
* This method used to store the URL protocol as String if the URL is a valid one
* #param URL Protocol
*
*/
#XmlElement
public void setUrlProtocol(String URLProtocol) {
this.URLProtocol = URLProtocol;
}
/**
* This method used to retrieve all the paths selected
* by the user in order to save
* #return {#code List<String>}
*/
#XmlElement
public List<String> getPath() {
return path;
}
/**
* This method used to store a new path added by the user
* #param path
*
*/
public void setPath(String path) {
this.path.add(path);
}
/**
* This method used to set the path of the specified URL
* #param urlp
*
*/
#XmlElement(name = "URLPath")
public void setUrlPath(String urlp) {
this.urlp = urlp;
}
/**
* This method used to retrieve the path of the specified URL
* #return {#code String}
*/
public String getUrlPath() {
return urlp;
}
/**
* This method used to set the parameters of the specified URL
* #param parameters
*
*/
#XmlElementWrapper
public void setParameters(Map<String, String> parameters) {
this.parameters = parameters;
}
/**
* This method used to retrieve the parameters
* of the specified URL
* #return {#code Map<String, String>}
*/
public Map<String, String> getParameters() {
return parameters;
}
/**
* This method used to set the host name of the specified URL
* #param host
*
*/
public void setHostName(String host) {
this.host = host;
}
/**
* This method used to retrieve the host name of the
* specified URL
* #return {#code String}
*/
public String getHostName() {
return host;
}
}
The XmlConfList class:
#XmlRootElement
#XmlAccessorType(XmlAccessType.FIELD)
public class XmlConfList {
#XmlElementWrapper(name = "XmlSource")
#XmlElement(name = "XmlConf")
private List<XmlConf> list = null;
public List<XmlConf> getList() {
if(this.list == null)
this.list = new ArrayList<>();
return this.list;
}
}

Develop Reference

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Facing some strange issue while bookmarking the paragraph - java

Related

Error: Could not find or load main class thredds.catalog.dl.ADNWriter is coming

Deleting table from word document using Docx4j

java.lang.RuntimeException: java.lang.ClassCastException: java.lang.Long cannot be cast to java.lang.String

Java Lucene 4.5 how to search by case insensitive

How to display XML Object generated using JAXB UnMarshaller?

Categories

Resources