I use elasticsearch 2.2.0 and created data mapping as below:
{
"DOCU": {
"_source": { "excludes" : ["fileAttachment.fileContent", "fileAttachmentFr.fileContent", "fileAttachmentEn.fileContent", "contentBody", "contentBodyFr", "contentBodyEn", "geoLocations.boundaries"] },
"properties":{
"id" : { "type" : "string", "store" : "true" },
"categoryId" : { "type" : "string", "store" : "true" } ,
"categoryCode" : { "type" : "string", "store" : "true" } ,
"categoryDesc" : { "type" : "string", "store" : "true" } ,
"typeId" : { "type" : "string", "store" : "true" } ,
"typeCode" : { "type" : "string", "store" : "true" } ,
"typeDesc" : { "type" : "string", "store" : "true" } ,
"domainId" : { "type" : "string", "store" : "true" } ,
"domainCode" : { "type" : "string", "store" : "true" } ,
"domainDesc" : { "type" : "string", "store" : "true" } ,
"groupId" : { "type" : "string", "store" : "true" } ,
"groupCode" : { "type" : "string", "store" : "true" } ,
"groupDesc" : { "type" : "string", "store" : "true" } ,
"title" : { "type" : "string", "store" : "true" } ,
"titleFr" : { "type" : "string", "store" : "true" } ,
"titleEn" : { "type" : "string", "store" : "true" } ,
"subTitle" : { "type" : "string", "store" : "true" } ,
"subTitleFr" : { "type" : "string", "store" : "true" } ,
"subTitleEn" : { "type" : "string", "store" : "true" } ,
"summary" : { "type" : "string", "store" : "true" } ,
"summaryFr" : { "type" : "string", "store" : "true" } ,
"summaryEn" : { "type" : "string", "store" : "true" } ,
"body" : { "type" : "string", "store" : "true" } ,
"bodyFr" : { "type" : "string", "store" : "true" } ,
"bodyKh" : { "type" : "string", "store" : "true" } ,
"contentBody" : {
"type" : "attachment",
"fields": {
"content": { "type": "string", "term_vector":"with_positions_offsets", "store": true, "analyzer" : "english"}
}
},
"contentBodyFr" : {
"type" : "attachment",
"fields": {
"content": { "type": "string", "term_vector":"with_positions_offsets", "store": true, "analyzer" : "french"}
}
},
"contentBodyEn" : {
"type" : "attachment",
"fields": {
"content": { "type": "string", "term_vector":"with_positions_offsets", "store": true, "analyzer" : "english"}
}
},
"wkfStatusId" : {"type" : "integer", "store" : "true"},
"owner" : {"type" : "string", "store" : "true"},
"createdDate": { "type" : "date", "store" : "true", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"},
"modifiedDate":{ "type" : "date", "store" : "true", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"},
"rank" : { "type" : "float", "store" : "true" },
"nbViewed" : { "type" : "integer", "store" : "true" },
"nbDownloaded" : { "type" : "integer", "store" : "true" },
"avgRating" : { "type" : "float", "store" : "true" },
"nbComment" : { "type" : "integer", "store" : "true" },
"contributors" : {
"properties":{
"id" : { "type" : "string", "store" : "true" },
"type" : { "type" : "string", "store" : "true" } ,
"contributorId" : { "type" : "string", "store" : "true" } ,
"contributorName" : { "type" : "string", "store" : "true" } ,
"orgCatId" : { "type" : "string", "store" : "true" } ,
"orgTypeId" : { "type" : "string", "store" : "true" } ,
"orgId" : { "type" : "string", "store" : "true" } ,
"orgName" : { "type" : "string", "store" : "true" }
}
},
"projectES" : {
"properties":{
"id" : { "type" : "string", "store" : "true" },
"orgId" : { "type" : "string", "store" : "true" } ,
"orgName" : { "type" : "string", "store" : "true" } ,
"startDate": { "type" : "date", "store" : "true", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"},
"endDate":{ "type" : "date", "store" : "true", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"},
"website" : { "type" : "string", "store" : "true" },
"budget" : { "type" : "string", "store" : "true" },
"funders" : { "type" : "string", "store" : "true", "term_vector": "with_positions_offsets" }
}
},
"organizationES" : {
"properties":{
"id" : { "type" : "string", "store" : "true" },
"categoryId" : { "type" : "string", "store" : "true" } ,
"categoryDesc" : { "type" : "string", "store" : "true" } ,
"typeId" : { "type" : "string", "store" : "true" } ,
"typeDesc" : { "type" : "string", "store" : "true" } ,
"shortName" : { "type" : "string", "store" : "true" },
"website" : { "type" : "string", "store" : "true" },
"email" : { "type" : "string", "store" : "true" },
"tel" : { "type" : "string", "store" : "true" },
"address" : { "type" : "string", "store" : "true" }
}
},
"employeeES" : {
"properties":{
"id" : { "type" : "string", "store" : "true" },
"categoryId" : { "type" : "string", "store" : "true" } ,
"categoryDesc" : { "type" : "string", "store" : "true" } ,
"genderId" : { "type" : "string", "store" : "true" } ,
"genderDesc" : { "type" : "string", "store" : "true" } ,
"email" : { "type" : "string", "store" : "true" },
"tel" : { "type" : "string", "store" : "true" },
"country" : { "type" : "string", "store" : "true" },
"city" : { "type" : "string", "store" : "true" },
"bio" : { "type" : "string", "store" : "true" }
}
},
"memberES" : {
"properties":{
"id" : { "type" : "string", "store" : "true" },
"memberTypeId" : { "type" : "string", "store" : "true" } ,
"memberTypeDesc" : { "type" : "string", "store" : "true" } ,
"isOnline" : { "type" : "string", "store" : "true" } ,
"isPrivate" : { "type" : "string", "store" : "true" } ,
"individu" : {
"properties":{
"id" : { "type" : "string", "store" : "true" },
"categoryId" : { "type" : "string", "store" : "true" } ,
"categoryDesc" : { "type" : "string", "store" : "true" } ,
"genderId" : { "type" : "string", "store" : "true" } ,
"genderDesc" : { "type" : "string", "store" : "true" } ,
"email" : { "type" : "string", "store" : "true" },
"tel" : { "type" : "string", "store" : "true" },
"country" : { "type" : "string", "store" : "true" },
"city" : { "type" : "string", "store" : "true" },
"bio" : { "type" : "string", "store" : "true" }
}
}
}
},
"urlContent" : {"type" : "string", "store" : "true"},
"isParsing" : { "type" : "boolean", "store" : "true" } ,
"isSelfPublished" : { "type" : "boolean", "store" : "true" } ,
"isPublished" : { "type" : "boolean", "store" : "true" } ,
"publicationDate" : { "type" : "date", "store" : "true", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"} ,
"isPublic" : { "type" : "boolean", "store" : "true" } ,
"isVisible" : { "type" : "boolean", "store" : "true" } ,
"isLocked" : { "type" : "boolean", "store" : "true" } ,
"tagNames" : { "type" : "string", "store" : "true", "term_vector": "with_positions_offsets" },
"geoLocationsNames": { "type" : "string", "store" : "true", "term_vector": "with_positions_offsets" },
"geoLocations" :{
"properties":{
"id" : { "type" : "integer", "store" : "true" },
"code" : { "type" : "string", "store" : "true" },
"desc" : { "type" : "string", "store" : "true" },
"descEn" : { "type" : "string", "store" : "true" },
"point" : { "type" : "geo_point"},
"boundaries" : { "type" : "geo_shape"}
},
"type" : "nested"
},
"fileAttachment" : {
"properties":{
"fileContent" : {
"type" : "attachment",
"fields": {
"content": { "type": "string", "term_vector":"with_positions_offsets", "store": true, "analyzer" : "english"}
}
},
"contentType" : { "type" : "string", "store" : "true", "index" : "not_analyzed" },
"lastModifiedDate":{ "type" : "date", "store" : "true", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"},
"indexing_date": { "type" : "date", "store" : "true", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"},
"filesize": { "type" : "long", "store" : "true" },
"indexed_chars": { "type" : "long", "store" : "true"},
"filename": { "type" : "string", "store" : "true", "index" : "not_analyzed" },
"url": { "type" : "string", "store" : "true", "index" : "no" }
}
},
"fileAttachmentEn" : {
"properties":{
"fileContent" : {
"type" : "attachment",
"fields": {
"content": { "type": "string", "term_vector":"with_positions_offsets", "store": true, "analyzer" : "english"}
}
},
"contentType" : { "type" : "string", "store" : "true", "index" : "not_analyzed" },
"lastModifiedDate":{ "type" : "date", "store" : "true", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"},
"indexing_date": { "type" : "date", "store" : "true", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"},
"filesize": { "type" : "long", "store" : "true" },
"indexed_chars": { "type" : "long", "store" : "true"},
"filename": { "type" : "string", "store" : "true", "index" : "not_analyzed" },
"url": { "type" : "string", "store" : "true", "index" : "no" }
}
},
"fileAttachmentFr" : {
"properties":{
"fileContent" : {
"type" : "attachment",
"fields": {
"content": { "type": "string", "term_vector":"with_positions_offsets", "store": true, "analyzer" : "english"}
}
},
"contentType" : { "type" : "string", "store" : "true", "index" : "not_analyzed" },
"lastModifiedDate":{ "type" : "date", "store" : "true", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"},
"indexing_date": { "type" : "date", "store" : "true", "format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"},
"filesize": { "type" : "long", "store" : "true" },
"indexed_chars": { "type" : "long", "store" : "true"},
"filename": { "type" : "string", "store" : "true", "index" : "not_analyzed" },
"url": { "type" : "string", "store" : "true", "index" : "no" }
}
}
}
}}
How to remove exclude field (as below) from the mapping:
"_source": { "excludes" : ["fileAttachment.fileContent", "fileAttachmentFr.fileContent", "fileAttachmentEn.fileContent", "contentBody", "contentBodyFr", "contentBodyEn", "geoLocations.boundaries"] }
I don't want to re-index my data because it will take all day to finish.
Please help :)
It is not possible to change ElasticSearch mapping without reindexing.
However you can take a look at https://www.elastic.co/blog/changing-mapping-with-zero-downtime
which shows how you can reindex with virtually zero downtime
Related
Here is my test case and I am individually pasing this junit it is passing but when I am doing maven build it is showing some encoding error in json file, Please check the error message I provide in below
#Test
public void testRemoveEmoji() throws Exception {
Items items = getSampleItems("/json/ItemObjectWithEmojiContent.json");
List<Item> itemList = items.getItemsList();
for(Item item : itemList) {
assertNotNull(item);
this.starcImportProcessor.removeEmojis(item);
assertEquals("(Open -> In Progress -> Open -> Closed)", item.getDescription());
for(ItemComment itemComments : item.getItemComments()) {
assertEquals("Test comment", itemComments.getComment());
}
}
getSampleItems code :
private Items getSampleItems(final String path) throws JsonParseException, JsonMappingException, IOException {
Items downloadedItems = new Items();
JSONArray inputWorkitem = new JSONArray(new String(getJSONFromFile(path)));
final ObjectMapper objectMapper = new ObjectMapper();
List<Item> items = objectMapper.readValue(inputWorkitem.toString(), new TypeReference<List<Item>>() {});
downloadedItems.setItemsList(items);
return downloadedItems;
}
private static byte[] getJSONFromFile(final String jsonFilePath) throws IOException {
Resource resource = new ClassPathResource(jsonFilePath);
File file = resource.getFile();
return Files.readAllBytes(file.toPath());
}
Here is json file :
[
{
"id" : 233565,
"name" : "Hafmap DaimlerBosch Test4 CRL Reject",
"createdAt" : "2020-09-29T16:07:23.918",
"modifiedAt" : "2021-07-08T19:03:12.477",
"version" : "15",
"description" : "(Open -> 😀In Progress -> Open -> Closed)",
"descriptionFormat" : "Wiki",
"assignedAt" : "2020-10-01T15:09:26.022",
"typeName" : "Bug",
"endDate" : "2020-10-08T14:40:00.000",
"closedAt" : "2020-10-08T14:40:34.385",
"ordinal" : 0,
"createdBy" : {
"id" : 1255,
"name" : "CHRBOOT",
"type" : "UserReference",
"email" : "christian.bootz#daimler.com",
"value" : null
},
"modifiedBy" : {
"id" : 3,
"name" : "computed.update",
"type" : "UserReference",
"email" : "codebeamer#intland.com",
"value" : null
},
"tracker" : {
"id" : 269438,
"name" : "Change Request Light",
"type" : "TrackerReference",
"email" : null,
"value" : null
},
"priority" : {
"id" : 0,
"name" : "Unset",
"type" : "ChoiceOptionReference",
"email" : null,
"value" : null
},
"status" : {
"id" : 7,
"name" : "Closed",
"type" : "ChoiceOptionReference",
"email" : null,
"value" : null
},
"severities" : [ {
"id" : 3,
"name" : "VoCA Prio 3",
"type" : "ChoiceOptionReference",
"email" : null,
"value" : null
} ],
"resolutions" : [ ],
"children" : [ ],
"subjects" : [ ],
"assignedTo" : [ {
"id" : 3463,
"name" : "PID8CBD",
"type" : "UserReference",
"email" : "christian.bootz#daimler.com",
"value" : null
} ],
"owners" : null,
"comments" : [ {
"id" : 675988,
"name" : "attachment mit leerzeichen.txt",
"type" : "CommentReference",
"email" : null,
"value" : null
}, {
"id" : 675990,
"name" : "Comment-1601992066937",
"type" : "CommentReference",
"email" : null,
"value" : null
}, {
"id" : 675991,
"name" : "Comment-1601992069782",
"type" : "CommentReference",
"email" : null,
"value" : null
} ],
"categories" : [ ],
"platforms" : [ ],
"customFields" : [ {
"fieldId" : 1001,
"name" : "Sync Allowed",
"values" : [ {
"id" : 1,
"name" : "Supplier External Tool",
"type" : "ChoiceOptionReference",
"email" : null,
"value" : null
} ],
"type" : "ChoiceFieldValue",
"value" : null
}, {
"fieldId" : 1003,
"name" : "Top Ten",
"values" : [ {
"id" : 2,
"name" : "No",
"type" : "ChoiceOptionReference",
"email" : null,
"value" : null
} ],
"type" : "ChoiceFieldValue",
"value" : null
}, {
"fieldId" : 1012,
"name" : "Supplier Status",
"values" : [ {
"id" : 7,
"name" : "Supplier Closed",
"type" : "ChoiceOptionReference",
"email" : null,
"value" : null
} ],
"type" : "ChoiceFieldValue",
"value" : null
}, {
"fieldId" : 1015,
"name" : "Verification By Test Group",
"values" : [ {
"id" : 2,
"name" : "No",
"type" : "ChoiceOptionReference",
"email" : null,
"value" : null
} ],
"type" : "ChoiceFieldValue",
"value" : null
}, {
"fieldId" : 1019,
"name" : "Closing Reason",
"values" : [ {
"id" : 6,
"name" : "No Defect / Issue",
"type" : "ChoiceOptionReference",
"email" : null,
"value" : null
} ],
"type" : "ChoiceFieldValue",
"value" : null
}, {
"fieldId" : 1020,
"name" : "Reject Reason",
"values" : [ {
"id" : 4,
"name" : "Not Reproducible",
"type" : "ChoiceOptionReference",
"email" : null,
"value" : null
} ],
"type" : "ChoiceFieldValue",
"value" : null
}, {
"fieldId" : 1028,
"name" : "Supplier",
"values" : [ {
"id" : 594222,
"name" : "Supplier_Bosch_MAP_ECU",
"type" : "UserGroupReference",
"email" : null,
"value" : null
} ],
"type" : "ChoiceFieldValue",
"value" : null
}, {
"fieldId" : 10000,
"name" : "DEFAULT_PERMISSION_1",
"values" : null,
"type" : "TextFieldValue",
"value" : "1"
}, {
"fieldId" : 10002,
"name" : "Created On Date",
"values" : null,
"type" : "DateFieldValue",
"value" : "2020-09-29T02:00:00.000"
}, {
"fieldId" : 10006,
"name" : "Domain",
"values" : null,
"type" : "TextFieldValue",
"value" : "Automated driving"
}, {
"fieldId" : 10007,
"name" : "Supplier ID",
"values" : null,
"type" : "TextFieldValue",
"value" : "DYMAPDAIEX-155919"
}, {
"fieldId" : 10024,
"name" : "Last Status Change",
"values" : null,
"type" : "DateFieldValue",
"value" : "2020-09-29T18:07:23.000"
}, {
"fieldId" : 10026,
"name" : "DEFAULT_PERMISSION_READONLY_INPROGRESS",
"values" : null,
"type" : "TextFieldValue",
"value" : "1"
} ],
"attachmentData" : [ {
"id" : 675988,
"name" : "attachment mit leerzeichen.txt",
"description" : null,
"descriptionFormat" : "PlainText",
"createdAt" : "2020-10-06T13:47:45.956",
"createdBy" : {
"id" : 3463,
"name" : "PID8CBD",
"type" : "UserReference",
"email" : "christian.bootz#daimler.com",
"value" : null
},
"modifiedAt" : "2020-10-06T13:47:45.956",
"modifiedBy" : {
"id" : 3463,
"name" : "PID8CBD",
"type" : "UserReference",
"email" : "christian.bootz#daimler.com",
"value" : null
},
"version" : 1,
"size" : 317,
"md5" : "A2DE48D274A08AD80D19E8829D0093D4",
"filePath" : null
} ],
"itemComments" : [ {
"id" : 675990,
"name" : "Comment-1601992066937",
"createdAt" : "2020-10-06T13:47:46.937",
"modifiedAt" : null,
"version" : "1",
"createdBy" : {
"id" : 3463,
"name" : "PID8CBD",
"type" : "UserReference",
"email" : "christian.bootz#daimler.com",
"value" : null
},
"modifiedBy" : null,
"comment" : "😀Test comment",
"commentFormat" : "PlainText",
"parent" : null,
"attachments" : [ ]
} ]
}
]
Here is the error when i am doing maven build here some encoding error it is showing:
Could you please help me out how to resolve this issue
testRemoveEmoji(com.bosch.edexas.starc.processor.STARCImportProcessorTest) Time elapsed: 0.008 sec <<< FAILURE!
org.junit.ComparisonFailure: expected:<(Open -> []In Progress -> Open ...> but was:<(Open -> **[😀]**In Progress -> Open ...>
at org.junit.Assert.assertEquals(Assert.java:125)
at org.junit.Assert.assertEquals(Assert.java:147)
at com.bosch.edexas.starc.processor.STARCImportProcessorTest.testRemoveEmoji(STARCImportProcessorTest.java:195)
OpenJDK 64-Bit Server VM warning: ignoring option UseSplitVerifier; support was removed in 8.0
Results :
Failed tests:
STARCImportProcessorTest.testRemoveEmoji:195 expected:<(Open -> []In Progress -> Open ...> but was:<(Open -> **[????]**In Progress -> Open ...>
Please do the following when you are trying to make a new String() when your getJSONFromFile(path) returns a byte[] array.
new String(getJSONFromFile(path), StandardCharsets.UTF_8);
I am using kafka-streams to transform xml messages to avro format. I would like to know if it is possible to keep the field names of my union records when using union type for records in my avro schema as in the example below so that instead of having the name "main_record", i would have "record1" or "record2" in my avro message depending on the input data i am receiving:
{
"namespace": "proj.avro",
"protocol": "app_messages",
"doc" : "application messages",
"name": "myRecord",
"type" : "record",
"fields": [
{
"name": "main_record",
"type": [
{
"name": "record1",
"type" : "record",
"fields":
[
{
"name" : "request_id",
"type" : "int"
},
{
"name" : "message_type",
"type" : "int"
},
{
"name" : "users",
"type" : "string"
}
]
},
{
"name" : "record2",
"type" : "record",
"fields" :
[
{
"name" : "request_id",
"type" : "int"
},
{
"name" : "response_code",
"type" : "string"
},
{
"name" : "response_count",
"type" : "int"
},
{
"name" : "reason_code",
"type" : "string"
}
]
}
]
}
]
}
I have written elasticsearch mapping its only only with alphabets. how to do the same for numeric values.
PUT /documents_test8
{
"settings" : {
"analysis" : {
"analyzer" : {
"filename_search" : {
"tokenizer" : "filename",
"filter" : ["lowercase"]
},
"filename_index" : {
"tokenizer" : "filename",
"filter" : ["lowercase","edge_ngram"]
}
},
"tokenizer" : {
"filename" : {
"pattern" : "[^\\p{L}\\d]+",
"type" : "pattern"
}
},
"filter" : {
"edge_ngram" : {
"side" : "front",
"max_gram" : 20,
"min_gram" : 1,
"type" : "edgeNGram"
}
}
}
},
"mappings" : {
"doc" : {
"properties" : {
"filename" : {
"type" : "text",
"search_analyzer" : "filename_search",
"index_analyzer" : "filename_index"
}
}
}
}
}
For numeric, you can define the mapping like this using type as "long"
"type": "long"
And for floating point number, use using type as "float"
"type": "float"
I have created following mapping for in ElasticSearch and same is created. But when I add documents to this type, my nested field type
changes to normal. So when I execute query on nested object fields,it
shows unexpected results while on rest of the fields results are as
expected. I am not sure what I am doing wrong. There must be something
I am missing while creating nested object mapping.
request_body = {
"settings": {
"index": {
"analysis": {
"analyzer": {
"standard": {
"tokenizer": "standard",
"filter": ["lowercase", "stop"]
}
}
}
}
},
"mappings" : {
"userdetails" : {
"properties" : {
"accountType" : {
"type" : "long"
},
"addressLineOne" : {
"type" : "string"
},
"addressLineTwo" : {
"type" : "string"
},
"alternateEmailId" : {
"type" : "string"
},
"archiveId" : {
"type" : "string"
},
"blockedEmployerId" : {
"type" : "long"
},
"cityId" : {
"type" : "long"
},
"country" : {
"type" : "string"
},
"coverPicUrl" : {
"type" : "string"
},
"currentEmployer" : {
"type" : "string"
},
"currentPackageCurrencyId" : {
"type" : "long"
},
"currentPackageUnit" : {
"type" : "string"
},
"currentSalaryPackage" : {
"type" : "double"
},
"currentlocation" : {
"type" : "string"
},
"designation" : {
"type" : "string"
},
"emailId" : {
"type" : "string"
},
"employmentDetail" : {
"type":"nested",
"properties" : {
"companyName" : {
"type" : "string"
},
"designation" : {
"type" : "string"
},
"durationEndMonth" : {
"type" : "long"
},
"durationEndYear" : {
"type" : "long"
},
"durationStartMonth" : {
"type" : "long"
},
"durationStartYear" : {
"type" : "long"
},
"employmentStatus" : {
"type" : "string"
},
"id" : {
"type" : "long"
},
"isCurrent" : {
"type" : "long"
},
"jobProfile" : {
"type" : "string"
},
"location" : {
"type" : "string"
},
"timeDate" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"userId" : {
"type" : "long"
}
}
},
"examscores" : {
"type":"nested",
"properties" : {
"attemptedCount" : {
"type" : "long"
},
"candidateId" : {
"type" : "long"
},
"examDate" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"examId" : {
"type" : "long"
},
"examScoreDetailId" : {
"type" : "long"
},
"examTotalScore" : {
"type" : "double"
},
"examType" : {
"type" : "long"
},
"katExamEventId" : {
"type" : "long"
},
"rightAttempted" : {
"type" : "long"
},
"rptVisStatus" : {
"type" : "long"
},
"scorePercent" : {
"type" : "double"
},
"timeDate" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"token" : {
"type" : "string"
},
"totalQuestion" : {
"type" : "long"
},
"totalScore" : {
"type" : "double"
}
}
},
"experienceInMonths" : {
"type" : "long"
},
"experienceInYears" : {
"type" : "long"
},
"firstName" : {
"type" : "string"
},
"gender" : {
"type" : "string"
},
"hQPS" : {
"type" : "long"
},
"higestQualInstitute" : {
"type" : "string"
},
"higestQualInstituteLoc" : {
"type" : "string"
},
"higestQualPassingYear" : {
"type" : "long"
},
"higestQualPercentCgpa" : {
"type" : "double"
},
"higestQualification" : {
"type" : "string"
},
"highestQualSpecialization" : {
"type" : "string"
},
"highestQualificationStream" : {
"type" : "string"
},
"industryDomain" : {
"type" : "string"
},
"keySkills" : {
"type" : "string"
},
"lastLoginDate" : {
"type" : "string"
},
"lastName" : {
"type" : "string"
},
"lastUpdateDate" : {
"type" : "string"
},
"locationName" : {
"type" : "string"
},
"maritalStatus" : {
"type" : "string"
},
"mobileNumber" : {
"type" : "long"
},
"password" : {
"type" : "string"
},
"paswdChangedDate" : {
"type" : "string"
},
"phoneNumber" : {
"type" : "long"
},
"preferredLocations" : {
"type" : "string"
},
"profileImageUrl" : {
"type" : "string"
},
"profileSummary" : {
"type" : "string"
},
"projectdetail" : {
"type":"nested",
"properties" : {
"clientName" : {
"type" : "string"
},
"designation" : {
"type" : "string"
},
"durationEndMonth" : {
"type" : "long"
},
"durationEndYear" : {
"type" : "long"
},
"durationStartMonth" : {
"type" : "long"
},
"durationStartYear" : {
"type" : "long"
},
"employmentNature" : {
"type" : "long"
},
"id" : {
"type" : "long"
},
"location" : {
"type" : "string"
},
"rolesResponsibilities" : {
"type" : "string"
},
"skillsUsed" : {
"type" : "string"
},
"teamSize" : {
"type" : "long"
},
"timeDate" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"title" : {
"type" : "string"
},
"userId" : {
"type" : "long"
}
}
},
"qualification" : {
"type":"nested",
"properties" : {
"id" : {
"type" : "long"
},
"institute" : {
"type" : "string"
},
"isPrimary" : {
"type" : "long"
},
"passingYear" : {
"type" : "long"
},
"percentageCgpa" : {
"type" : "double"
},
"qualification" : {
"type" : "string"
},
"specialization" : {
"type" : "string"
},
"timeDate" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"userId" : {
"type" : "long"
}
}
},
"registrationDate" : {
"type" : "string"
},
"resumeContentType" : {
"type" : "string"
},
"resumeTitle" : {
"type" : "string"
},
"s3DocKey" : {
"type" : "string"
},
"s3HtmlKey" : {
"type" : "string"
},
"sectionalscores" : {
"type":"nested",
"properties" : {
"attemptedCount" : {
"type" : "long"
},
"candidateId" : {
"type" : "long"
},
"examScoreId" : {
"type" : "long"
},
"examTime" : {
"type" : "long"
},
"examTitleId" : {
"type" : "long"
},
"katExamId" : {
"type" : "long"
},
"katScoreId" : {
"type" : "long"
},
"rightAttempted" : {
"type" : "long"
},
"score" : {
"type" : "double"
},
"scorePercent" : {
"type" : "double"
},
"timeDate" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"totalQuestion" : {
"type" : "long"
},
"totalScore" : {
"type" : "double"
}
}
},
"stateId" : {
"type" : "long"
},
"status" : {
"type" : "long"
},
"timeDate" : {
"type" : "string"
},
"userId" : {
"type" : "long"
},
"uuid" : {
"type" : "string"
},
"videoIntroType" : {
"type" : "long"
},
"videoIntroUrl" : {
"type" : "string"
},
"videoProfileTimeDate" : {
"type" : "date",
"format" : "strict_date_optional_time||epoch_millis"
},
"videoProfileUrl" : {
"type" : "string"
},
"videoProfileVisibility" : {
"type" : "long"
},
"workStatusForOtherCountries" : {
"type" : "long"
},
"workStatusForUS" : {
"type" : "long"
},
"xIIPS" : {
"type" : "long"
},
"xIIPassingYear" : {
"type" : "long"
},
"xIIPercentageCgpa" : {
"type" : "double"
},
"xPS" : {
"type" : "long"
},
"xPassingYear" : {
"type" : "long"
},
"xPercentageCgpa" : {
"type" : "double"
}
}
}
}
}
print("creating 'example_index' index...")
res = es1.indices.create(index = 'no-one', body = request_body)
After creating index using above code, I added the data into this index using following code.
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import MySQLdb
import json
db = MySQLdb.connect(host='localhost',
user='xxxx',
passwd='xxxx',
db='xxxxx'
)
cursor_mysql = db.cursor()
cursor_mysql.execute("select * from USER ")
userTable = cursor_mysql.fetchall()
cursor_mysql.execute("select * from qualificationmapping")
secondaryQualification = cursor_mysql.fetchall()
cursor_mysql.execute("select * from examscoredetail")
examscore = cursor_mysql.fetchall()
cursor_mysql.execute("select * from katsectionalscore")
sectionalScore = cursor_mysql.fetchall()
cursor_mysql.execute("select * from projectexperiencedetails")
proejctDetails = cursor_mysql.fetchall()
cursor_mysql.execute("select * from useremploymentdetails")
employmentDetails = cursor_mysql.fetchall()
data = []
listSecondaryQualification=[]
listExamScores=[]
listSectionalScores=[]
listProjectDetails=[]
listEmploymentDetails=[]
# jsonSecondaryQualification = ''
# jsonExamScores = ''
# jsonSectionalScores = ''
# jsonProjectDetails = ''
# jsonEmploymentDetails = ''
# proejctdetail = {}
# qualification = {}
sectionalscores = {}
indexdata={}
# examscores = {}
# employmentDetail = {}
es1 = Elasticsearch()
# es1.create(index="xxxxx", doc_type='xxxx', body=indexdata)
count = 0
for userid in userTable:
count+=1
indexdata = {
"_index": "xxxxx",
"_type": "xxxxx",
"_id": userid[0]
}
for item in userTable:
if userid[0]== item[0]:
userdata = {
'userId': item[0],
'accountType': item[1],
'addressLineOne': item[2],
'addressLineTwo': item[3],
'alternateEmailId': item[4],
'archiveId':item[5],
'blockedEmployerId': item[6],
'cityId': item[7],
'classXIISchool': item[8],
'classXSchool': item[9],
'country': item[10],
'coverPicUrl': item[11],
'currentEmployer': item[12],
'currentPackageCurrencyId': item[13],
'currentPackageUnit': item[14],
'currentSalaryPackage': item[15],
'currentlocation': item[16],
'designation': item[17],
'emailId': item[19],
'experienceInMonths': item[20],
'experienceInYears': item[21],
'firstName': item[22],
'gender': item[23],
'hQPS': item[24],
'higestQualInstitute': item[25],
'higestQualInstituteLoc': item[26],
'higestQualPassingYear': item[27],
'higestQualPercentCgpa': item[28],
'higestQualification': item[29],
'highestQualSpecialization': item[30],
'highestQualificationStream': item[31],
'industryDomain': item[32],
'keySkills': item[33],
'lastLoginDate': str(item[34]),
'lastName': item[35],
'lastUpdateDate': str(item[36]),
'locationName': item[37],
'maritalStatus': item[38],
'middleName': item[39],
'mobileNumber': item[40],
'password': item[41],
'paswdChangedDate': item[42],
'phoneNumber': item[43],
'preferredLocations': item[44],
'previousEmployer': item[45],
'profileImageUrl': item[46],
'profileSummary': item[47],
'registrationDate': str(item[48]),
'resumeContentType': item[49],
'resumeTitle': item[50],
's3DocKey': item[51],
's3HtmlKey': item[52],
'stateId': item[53],
'status': item[54],
'timeDate': str(item[55]),
'uuid': item[56],
'videoIntroType': item[57],
'videoIntroUrl': item[58],
'videoProfileTimeDate': item[59],
'videoProfileUrl': item[60],
'videoProfileVisibility': item[61],
'workStatusForOtherCountries': item[62],
'workStatusForUS': item[63],
'xIIPS': item[64],
'xIIPassingYear': item[65],
'xIIPercentageCgpa': item[66],
'xPS': item[67],
'xPassingYear': item[68],
'xPercentageCgpa': item[69]
}
for quid in secondaryQualification:
if userid[0]== quid[8]:
indQualification = {
'id': quid[0],
'institute': quid[1],
'isPrimary': quid[2],
'passingYear': quid[3],
'percentageCgpa': quid[4],
'qualification': quid[5],
'specialization': quid[6],
'timeDate': str(quid[7]),
'userId': quid[8]
}
listSecondaryQualification.append(indQualification)
# qualification[quid[0]] = indQualification
# print(qualification)
for esdid in examscore:
if userid[0]== esdid[2]:
indExamscores = {
'examScoreDetailId': esdid[0],
'attemptedCount': esdid[1],
'candidateId': esdid[2],
'examDate': esdid[3],
'examId': esdid[4],
'examTotalScore': esdid[5],
'examType': esdid[6],
'katExamEventId': esdid[7],
'rightAttempted': esdid[8],
'rptVisStatus': esdid[9],
'scorePercent': esdid[10],
'timeDate': str(esdid[11]),
'token': esdid[12],
'totalQuestion': esdid[13],
'totalScore': esdid[14]
}
listExamScores.append(indExamscores)
# examscores[esdid[0]]=indExamscores
# print(examscores)
for scid in sectionalScore:
if userid[0]== scid[2]:
indSectionalscores = {
'katScoreId': scid[0],
'attemptedCount': scid[1],
'candidateId': scid[2],
'examScoreId': scid[3],
'examTime': scid[4],
'examTitleId': scid[5],
'katExamId': scid[6],
'rightAttempted': scid[7],
'score': scid[8],
'scorePercent': scid[9],
'scoreS3Key': scid[10],
'timeDate': str(scid[11]),
'totalQuestion': scid[12],
'totalScore': scid[13]
}
listSectionalScores.append(indSectionalscores)
# sectionalscores[scid[0]]=indSectionalscores
# print(sectionalscores)
for prid in proejctDetails:
if userid[0] == prid[16]:
indvidualProjectdetail = {
'id': prid[0],
'clientName': prid[1],
'designation': prid[2],
'durationEndMonth': prid[3],
'durationEndYear': prid[4],
'durationStartMonth': prid[5],
'durationStartYear': prid[6],
'employmentNature': prid[7],
'location': prid[8],
'projectDetails': prid[9],
'rolesResponsibilities': prid[10],
'site': prid[11],
'skillsUsed': prid[12],
'teamSize': prid[13],
'timeDate': str(prid[14]),
'title': prid[15],
'userId': prid[16]
}
listProjectDetails.append(indvidualProjectdetail)
# proejctdetail[prid[0]]=indvidualProjectdetail
# print(proejctdetail)
for userempid in employmentDetails:
if userid[0]==userempid[12]:
indEmploymentDetail = {
'id': userempid[0],
'companyName': userempid[1],
'designation': userempid[2],
'durationEndMonth': userempid[3],
'durationEndYear': userempid[4],
'durationStartMonth': userempid[5],
'durationStartYear': userempid[6],
'employmentStatus': userempid[7],
'isCurrent': userempid[8],
'jobProfile': userempid[9],
'location': userempid[10],
'timeDate': str(userempid[11]),
'userId': userempid[12]
}
listEmploymentDetails.append(indEmploymentDetail)
# employmentDetail[userempid[0]]=indEmploymentDetail
# print(employmentDetail)
# for item in listSecondaryQualification:
# jsonSecondaryQualification += json.dumps(item) + ','
# jsonSecondaryQualification = jsonSecondaryQualification[:-1]
#
# for item in listEmploymentDetails:
# jsonEmploymentDetails+=json.dumps(item)+','
# jsonEmploymentDetails = jsonEmploymentDetails[:-1]
#
# for item in listExamScores:
# jsonExamScores+=json.dumps(item)+','
# jsonExamScores = jsonExamScores[:-1]
#
# for item in listProjectDetails:
# jsonProjectDetails+=json.dumps(item)+','
# jsonProjectDetails=jsonProjectDetails[:-1]
#
# for item in listSectionalScores:
# jsonSectionalScores+=json.dumps(item)+','
# jsonSectionalScores=jsonSectionalScores[:-1]
userdata['qualification'] = listSecondaryQualification
userdata['examscores'] = listExamScores
userdata['sectionalscores'] = listSectionalScores
userdata['projectdetail'] = listProjectDetails
userdata['employmentDetail'] = listEmploymentDetails
indexdata["_source"] = userdata
# doc['doc'] = indexdata
data.append(indexdata)
# print(data)
# break
#Flusing All lists
listSecondaryQualification = []
listExamScores = []
listSectionalScores = []
listProjectDetails = []
listEmploymentDetails = []
# jsonSecondaryQualification = ''
# jsonExamScores = ''
# jsonSectionalScores = ''
# jsonProjectDetails = ''
# jsonEmploymentDetails = ''
# proejctdetail = {}
# qualification = {}
# sectionalscores = {}
# examscores = {}
# employmentDetail = {}
# print("Document Inserted Successfully")
if (count==2000):
break
print('On my way')
helpers.bulk(es1, data)
print("bulk import done")
In my application I need to get all entries from my sqlite database and generate this json to create a node in a drupal site, I´m new in Json please tell me the best way to achieve this :
{ "body" : [ ],
"changed" : "1393522835",
"cid" : "0",
"comment" : "1",
"comment_count" : "0",
"created" : "1393522835",
"data" : "a:1:{s:7:\"contact\";i:0;}",
"field_associatedterm" : { "und" : [ { "target_id" : "2" } ] },
"field_fat" : { "und" : [ { "value" : "24.4" } ] },
"field_creationts" : { "und" : [ { "value" : "1391016769" } ] },
"field_key" : { "und" : [ { "format" : null,
"safe_value" : "12312312-1123123",
"value" : "12312312-1123123"
} ] },
"field_devicename" : { "und" : [ { "format" : null,
"safe_value" : "iPhone",
"value" : "iPhone"
} ] },
"field_devices" : [ ],
"field_editts" : [ ],
"field_entrydate" : { "und" : [ { "date_type" : "datetime",
"timezone" : "Europe/London",
"timezone_db" : "Europe/London",
"value" : "2013-06-05 09:45:00"
} ] },
"field_humor" : { "und" : [ { "value" : "1" } ] },
"field_synchts" : [ ],
"field_text" : { "und" : [ { "format" : null,
"safe_value" : """",
"value" : "\"\""
} ] },
"field_timezonename" : [ ],
"field_timezoneoffset" : [ ],
"field_useruniqueid" : [ ],
"field_number" : { "und" : [ { "value" : "12" } ] },
"language" : "pt-pt",
"last_comment_name" : null,
"last_comment_timestamp" : "1393522835",
"last_comment_uid" : "1",
"log" : "",
"metatags" : [ ],
"name" : "admin",
"nid" : "302",
"picture" : "0",
"promote" : "1",
"revision_timestamp" : "1393522835",
"revision_uid" : "1",
"status" : "1",
"sticky" : "0",
"title" : "2013-06-05 08:39:01 +0000",
"tnid" : "0",
"translate" : "0",
"type" : "entry",
"uid" : "1",
"vid" : "302"
},
This is not hard if you use some tools to generate the code from json strings.Like this one:http://jsongen.byingtondesign.com/.
I'm sure there are more useful tools to do this work.