I have a sample JSON file and I have also come up with a schema to evaluate above file using below JSON file:
//[gcp_ingestion_parameters_schema.json]
{
...
"properties": {
"application": {
"$ref": "#/definitions/application"
},
"ingestion": {
"$ref": "#/definitions/ingestion"
}
},
"definitions": {
"applicaion": {
"type": "object",
"properties": {
"project_id": {
"type": "string"
},
"path_to_json_key_file": {
"type": "string"
}
},
"required": [
"project_id",
"path_to_json_key_file"
]
},
...
I am still not sure how to write the schema file. In my sample file both application and ingestion tags should occur once, but fileingestion-mappings inside ingestion can occur one or more than once.
I have written some java code to evaluate my JSON file (first file) based on the provided JSON schema file.
but I get exception as follow:
Exception in thread "main"
com.github.fge.jsonschema.core.exceptions.ProcessingException: fatal: JSON Reference "#/definitions/appl
ication" cannot be resolved
level: "fatal"
schema: {"loadingURI":"#","pointer":"/properties/application"}
ref: "#/definitions/application"
Can some with experience working wit above library answer my questions asked in this tread?
As suggested you have a typo in ur schema it should be below
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://json-schema.org/draft-07/schema#",
"title": "Core schema meta-schema",
"definitions": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#" }
},
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"allOf": [
{ "$ref": "#/definitions/nonNegativeInteger" },
{ "default": 0 }
]
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
},
"type": ["object", "boolean"],
"properties": {
"$id": {
"type": "string",
"format": "uri-reference"
},
"$schema": {
"type": "string",
"format": "uri"
},
"$ref": {
"type": "string",
"format": "uri-reference"
},
"$comment": {
"type": "string"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": true,
"readOnly": {
"type": "boolean",
"default": false
},
"examples": {
"type": "array",
"items": true
},
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/definitions/nonNegativeInteger" },
"minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"additionalItems": { "$ref": "#" },
"items": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/schemaArray" }
],
"default": true
},
"maxItems": { "$ref": "#/definitions/nonNegativeInteger" },
"minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"contains": { "$ref": "#" },
"maxProperties": { "$ref": "#/definitions/nonNegativeInteger" },
"minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/definitions/stringArray" },
"additionalProperties": { "$ref": "#" },
"definitions": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"properties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"propertyNames": { "format": "regex" },
"default": {}
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [
{ "$ref": "#" },
{ "$ref": "#/definitions/stringArray" }
]
}
},
"propertyNames": { "$ref": "#" },
"const": true,
"enum": {
"type": "array",
"items": true,
"minItems": 1,
"uniqueItems": true
},
"type": {
"anyOf": [
{ "$ref": "#/definitions/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/definitions/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"format": { "type": "string" },
"contentMediaType": { "type": "string" },
"contentEncoding": { "type": "string" },
"if": {"$ref": "#"},
"then": {"$ref": "#"},
"else": {"$ref": "#"},
"allOf": { "$ref": "#/definitions/schemaArray" },
"anyOf": { "$ref": "#/definitions/schemaArray" },
"oneOf": { "$ref": "#/definitions/schemaArray" },
"not": { "$ref": "#" }
},
"default": true
}
This works perfectly fine with the JSON you have provided.
You have typo error in applicaion it should be application
Change this "definitions": { "applicaion": { to "definitions": { "application": {
also Refer this link to validate your schema https://www.liquid-technologies.com/online-json-schema-validator.
Related
I have a class the I need to converto to an avro schema:
public class MonitorStateSchema {
private MetaResponse c;
private Header a;
private MonitorStateEnvelope b;
public MonitorStateSchema(MetaResponse c, Header a, MonitorStateEnvelope b) {
this.c = c;
this.a = a;
this.b = b;
}
}
I use a generic method to get the schema from it
public static <D> void getFromAvro(Class<D> schemaType) {
Schema schema = ReflectData.get().getSchema(schemaType);
// other stuff
}
After doing it, I got a different order in the result than the expected:
EXPECTED:
{
"type": "record",
"name": "MonitorSchema",
"namespace": "mypackage.monitor",
"fields": [
{
"name": "c",
"type": {
"type": "record",
"name": "MetaResponse",
"namespace": "mypackage.monitor",
"fields": [
{
"name": "uuid",
"type": "string"
},
{
"name": "code",
"type": "int"
},
{
"name": "message",
"type": "string"
}
]
}
},
{
"name": "a",
"type": {
"type": "record",
"name": "Header",
"namespace": "mypackage.monitor",
"fields": [
{
"name": "apiKey",
"type": "string"
},
{
"name": "signature",
"type": "string"
},
{
"name": "nonce",
"type": "int"
}
]
}
},
{
"name": "b",
"type": {
"type": "record",
"name": "MonitorEnvelope",
"fields": [
{
"name": "fields",
"type": {
"type": "array",
"items": {
"type": "record",
"name": "Field",
"fields": [
{
"name": "name",
"type": "string"
},
{
"name": "value",
"type": "string"
}
]
},
"java-class": "[Lmypackage.monitor.Field;"
}
}
]
}
}
]
}
ACTUAL RESULT:
{
"type": "record",
"name": "MonitorStateSchema",
"namespace": "mypackage.monitor",
"fields": [
{
"name": "a",
"type": {
"type": "record",
"name": "Header",
"namespace": "mypackage.monitor",
"fields": [
{
"name": "apiKey",
"type": "string"
},
{
"name": "nonce",
"type": "int"
},
{
"name": "signature",
"type": "string"
}
]
}
},
{
"name": "b",
"type": {
"type": "record",
"name": "MonitorStateEnvelope",
"fields": [
{
"name": "fields",
"type": {
"type": "array",
"items": {
"type": "record",
"name": "Field",
"namespace": "mypackage.monitor",
"fields": [
{
"name": "name",
"type": "string"
},
{
"name": "value",
"type": "string"
}
]
},
"java-class": "[Lmypackage.monitor.Field;"
}
}
]
}
},
{
"name": "c",
"type": {
"type": "record",
"name": "MetaResponse",
"namespace": "mypackage.monitor",
"fields": [
{
"name": "code",
"type": "int"
},
{
"name": "message",
"type": "string"
},
{
"name": "uuid",
"type": "string"
}
]
}
}
]
}
Seems that it is ordering by alphabetical order on the name of the field and it is breaking the application when deserializing the byte array. Is there any reason for this to happen?
Avro version 1.10.0 changed ReflectData to sort fields into alphabetical order because the Class.getDeclaredFields() method API specification does not mandate a specific order of the fields that get returned. Some Java implementations return the fields in the order they appear in the Java source file. Some people complained that was a breaking change.
If you don't want the fields sorted into alphabetical order, then you must stay at Avro version 1.9.2 or lower.
I want convert json document into json schema. I googled it but not got the exact idea according to my requirement.
here is JSON
{
"empId":1001,
"firstName":"jonh",
"lastName":"Springer",
"title": "Engineer",
"address": {
"city": "Mumbai",
"street": "FadkeStreet",
"zipCode":"420125",
"privatePhoneNo":{
"privateMobile": "2564875421",
"privateLandLine":"251201546"
}
},
"salary": 150000,
"department":{
"departmentId": 10521,
"departmentName": "IT",
"companyPhoneNo":{
"cMobile": "8655340546",
"cLandLine": "10251215465"
},
"location":{
"name": "mulund",
"locationId": 14500
}
}
}
I want to generate like this
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"title": "Employee",
"properties": {
"empId": {
"type": "integer"
},
"firstName":{
"type":"string"
},
"lastName": {
"type": "string"
},
"title": {
"type": "string"
},
"address": {
"type": "object",
"properties": {
"city": {
"type": "string"
},
"street": {
"type": "string"
},
"zipCode": {
"type": "string"
},
"privatePhoneNo": {
"type": "object",
"properties": {
"privateMobile": {
"type": "string"
},
"privateLandLine": {
"type": "string"
}
}
}
}
},
"salary": {
"type": "number"
},
"department": {
"type": "object",
"properties": {
"departmentId": {
"type": "integer"
},
"departmentName": {
"type": "string"
},
"companyPhoneNo": {
"type": "object",
"properties": {
"cMobile": {
"type": "string"
},
"cLandLine": {
"type": "string"
}
}
},
"location": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"locationId": {
"type": "integer"
}
}
}
}
}
}
}
Is there any library is doing like this or what is another way?
https://github.com/perenecabuto/json_schema_generator
http://jsonschema.net/#/
I'm think this maybe will help
It's been a while since this was asked but I was having the same issue. So far the best solution I have come across is this library:
https://github.com/saasquatch/json-schema-inferrer
I found this from the json-schema doc itself. It has links to implementations for other languages as well:
https://json-schema.org/implementations.html#from-data
Below is the original response:
{
"emails": [
{
"type": "work",
"value": "bjensen#example.com"
}
],
"id": "2819c223-7f76-453a-919d-413861904646",
"phoneNumbers": [
{
"type": "work",
"value": "555-555-8377"
},
{
"type": "business",
"value": "555-555-8377"
}
],
"schemas": [
"urn:scim:schemas:core:1.0"
],
"userName": "bjensen"
}
And in the above response I would pass excludedAttributes=phoneNumbers.type and the response should be like below:
{
"emails": [
{
"type": "work",
"value": "bjensen#example.com"
}
],
"id": "2819c223-7f76-453a-919d-413861904646",
"phoneNumbers": [
{
"value": "555-555-8377"
},
{
"value": "555-555-8377"
}
],
"schemas": [
"urn:scim:schemas:core:1.0"
],
"userName": "bjensen"
}
The ES index consists of 2 types that are implicitly mapped (default mapping). One type is "person" or an author, the 2nd type is "document".
The index has some 500k entries.
What I have to do is: implement an autocomplete (suggestions) functionality where only the fields "title", "classification" (document) and "name" (author) are relevant for the suggestions shown to the user.
Could it be done without changing the 500k docs in the index?
I found some tutorials that suggest preparing a specific mapping and also altering the documents (this I want to avoid if possible) and so on but I am new to this and I am not sure how to go about the this problem?
Below is the JSON for the index, and how the documents look:
//a Document
{
"rawsource": "Phys.Rev. D67 (2003) 084031",
"pubyear": 2003,
"citedFrom": 19,
"topics": [
{
"name": "General Relativity and Quantum Cosmology"
}
],
"cited": [
{
"ref": 0,
"id": "PN132433"
},
{
"ref": 1,
"id": "PN206900"
}
],
"id": "PN120001",
"collection": "PN",
"source": "Phys Rev D",
"classification": "Physics",
"title": "Observables in causal set cosmology",
"url": "http://arxiv.org/abs/gr-qc/0210061",
"authors": [
{
"name": "Brightwell, Graham"
},
{
"name": "Dowker, H. Fay"
},
{
"name": "Garcia, Raquel S."
},
{
"name": "Henson, Joe"
},
{
"name": "Sorkin, Rafael D."
}
]
}
//a Person (author)
{
"name": "Terasawa, M.",
"documents": [
{
"citedFrom": 0,
"id": "PN039187"
}
],
"coAuthors": [
{
"name": "Famiano, M. A.",
"count": "1"
},
{
"name": "Boyd, R. N.",
"count": "1"
}
],
"topics": [
{
"name": "Astrophysics",
"count": "1"
}
]
}
//the mapping (implicit/default)
{
"dlsnew": {
"aliases": {
},
"mappings": {
"person": {
"properties": {
"coAuthors": {
"properties": {
"count": {
"type": "string"
},
"name": {
"type": "string"
}
}
},
"documents": {
"properties": {
"citedFrom": {
"type": "long"
},
"id": {
"type": "string"
}
}
},
"name": {
"type": "string"
},
"referenced": {
"properties": {
"count": {
"type": "string"
},
"id": {
"type": "string"
}
}
},
"topics": {
"properties": {
"count": {
"type": "string"
},
"name": {
"type": "string"
}
}
}
}
},
"document": {
"properties": {
"abstract": {
"type": "string"
},
"authors": {
"properties": {
"name": {
"type": "string"
}
}
},
"cited": {
"properties": {
"id": {
"type": "string"
},
"ref": {
"type": "long"
}
}
},
"citedFrom": {
"type": "long"
},
"classification": {
"type": "string"
},
"collection": {
"type": "string"
},
"id": {
"type": "string"
},
"pubyear": {
"type": "long"
},
"rawsource": {
"type": "string"
},
"source": {
"type": "string"
},
"title": {
"type": "string"
},
"topics": {
"properties": {
"name": {
"type": "string"
}
}
},
"url": {
"type": "string"
}
}
}
},
"settings": {
"index": {
"creation_date": "1454247029258",
"number_of_shards": "5",
"uuid": "k_CyQaxwSAaae67wW98HyQ",
"version": {
"created": "1050299"
},
"number_of_replicas": "1"
}
},
"warmers": {
}
}
}
The implementation is to be done using JAVA and the Vaadin Framework (this is not relevant at this point, but examples in Java/Vaadin will be most welcomed).
Thanks.
So, I think I solved my problem on the Elasticsearch side or at least to a good enough extend for me and the task at hand. I followed this ruby example.
I had to re-index all documents to accommodate the new settings for my index and to change my mapping explicitly.
They key is in defining proper analyzers and an edgeNGram filter in this case, like so:
"settings": {
"index": {
"analysis": {
"filter": {
"def_ngram_filter": {
"min_gram": "1",
"side": "front",
"type": "edgeNGram",
"max_gram": "16"
}
},
"analyzer": {
"def_search_analyzer": {
"filter": [
"lowercase",
"asciifolding"
],
"type": "custom",
"tokenizer": "def_tokenizer"
},
"def_ngram_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"def_ngram_filter"
],
"type": "custom",
"tokenizer": "def_tokenizer"
},
"def_shingle_analyzer": {
"filter": [
"shingle",
"lowercase",
"asciifolding"
],
"type": "custom",
"tokenizer": "def_tokenizer"
},
"def_default_analyzer": {
"filter": [
"lowercase",
"asciifolding"
],
"type": "custom",
"tokenizer": "def_tokenizer"
}
},
"tokenizer": {
"def_tokenizer": {
"type": "whitespace"
}
}
}
}
}
and the use these in the mapping for the fields to be searched, like so:
"mappings": {
"person": {
"properties": {
"coAuthors": {
"properties": {
"count": {
"type": "string"
},
"name": {
"type": "string"
}
}
},
"documents": {
"properties": {
"citedFrom": {
"type": "long"
},
"id": {
"type": "string"
}
}
},
"name": {
"type": "string",
"analyzer": "def_default_analyzer",
"fields": {
"ngrams": {
"type": "string",
"index_analyzer": "def_ngram_analyzer",
"search_analyzer": "def_search_analyzer"
},
"shingles": {
"type": "string",
"analyzer": "def_shingle_analyzer"
},
"stemmed": {
"type": "string",
"analyzer": "def_snowball_analyzer"
}
}
},
"referenced": {
"properties": {
"count": {
"type": "string"
},
"id": {
"type": "string"
}
}
},
"topics": {
"properties": {
"count": {
"type": "string"
},
"name": {
"type": "string"
}
}
}
}
},
"document": {
"properties": {
"abstract": {
"type": "string"
},
"authors": {
"properties": {
"name": {
"type": "string",
"analyzer": "def_default_analyzer",
"fields": {
"ngrams": {
"type": "string",
"index_analyzer": "def_ngram_analyzer",
"search_analyzer": "def_search_analyzer"
},
"shingles": {
"type": "string",
"analyzer": "def_shingle_analyzer"
},
"stemmed": {
"type": "string",
"analyzer": "def_snowball_analyzer"
}
}
}
}
},
"cited": {
"properties": {
"id": {
"type": "string"
},
"ref": {
"type": "long"
}
}
},
"citedFrom": {
"type": "long"
},
"classification": {
"type": "string"
},
"collection": {
"type": "string"
},
"id": {
"type": "string"
},
"pubyear": {
"type": "long"
},
"rawsource": {
"type": "string"
},
"source": {
"type": "string"
},
"title": {
"type": "string",
"analyzer": "def_default_analyzer",
"fields": {
"ngrams": {
"type": "string",
"index_analyzer": "def_ngram_analyzer",
"search_analyzer": "def_search_analyzer"
},
"shingles": {
"type": "string",
"analyzer": "def_shingle_analyzer"
},
"stemmed": {
"type": "string",
"analyzer": "def_snowball_analyzer"
}
}
},
"topics": {
"properties": {
"name": {
"type": "string",
"analyzer": "def_default_analyzer",
"fields": {
"ngrams": {
"type": "string",
"index_analyzer": "def_ngram_analyzer",
"search_analyzer": "def_search_analyzer"
},
"shingles": {
"type": "string",
"analyzer": "def_shingle_analyzer"
},
"stemmed": {
"type": "string",
"analyzer": "def_snowball_analyzer"
}
}
}
}
},
"url": {
"type": "string"
}
}
}
}
then querying the index with the following works as expected:
curl -XGET "http://localhost:9200/_search " -d'
{
"size": 5,
"query": {
"multi_match": {
"query": "physics",
"type": "most_fields",
"fields": [
"document.title^10",
"document.title.shingles^2",
"document.title.ngrams",
"person.name^10",
"person.name.shingles^2",
"person.name.ngrams",
"document.topics.name^10",
"document.topics.name.shingles^2",
"document.topics.name.ngrams"
],
"operator": "and"
}
}
}'
Hope this will help someone, it is probably not the best example as I am a complete noob to this, but it worked for me.
There exist different Autocomplete components for Vaadin.
Have a look at this link.
Depending on which Add-On you choose, the databinding is done differently, but you have to "connect" it to your index.
I want convert json document into json schema. I googled it but not got the exact idea according to my requirement.
here is JSON
{
"empId":1001,
"firstName":"jonh",
"lastName":"Springer",
"title": "Engineer",
"address": {
"city": "Mumbai",
"street": "FadkeStreet",
"zipCode":"420125",
"privatePhoneNo":{
"privateMobile": "2564875421",
"privateLandLine":"251201546"
}
},
"salary": 150000,
"department":{
"departmentId": 10521,
"departmentName": "IT",
"companyPhoneNo":{
"cMobile": "8655340546",
"cLandLine": "10251215465"
},
"location":{
"name": "mulund",
"locationId": 14500
}
}
}
I want to generate like this
{
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"title": "Employee",
"properties": {
"empId": {
"type": "integer"
},
"firstName":{
"type":"string"
},
"lastName": {
"type": "string"
},
"title": {
"type": "string"
},
"address": {
"type": "object",
"properties": {
"city": {
"type": "string"
},
"street": {
"type": "string"
},
"zipCode": {
"type": "string"
},
"privatePhoneNo": {
"type": "object",
"properties": {
"privateMobile": {
"type": "string"
},
"privateLandLine": {
"type": "string"
}
}
}
}
},
"salary": {
"type": "number"
},
"department": {
"type": "object",
"properties": {
"departmentId": {
"type": "integer"
},
"departmentName": {
"type": "string"
},
"companyPhoneNo": {
"type": "object",
"properties": {
"cMobile": {
"type": "string"
},
"cLandLine": {
"type": "string"
}
}
},
"location": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"locationId": {
"type": "integer"
}
}
}
}
}
}
}
Is there any library is doing like this or what is another way?
https://github.com/perenecabuto/json_schema_generator
http://jsonschema.net/#/
I'm think this maybe will help
It's been a while since this was asked but I was having the same issue. So far the best solution I have come across is this library:
https://github.com/saasquatch/json-schema-inferrer
I found this from the json-schema doc itself. It has links to implementations for other languages as well:
https://json-schema.org/implementations.html#from-data