Revision 57507
Added by Claudio Atzori over 4 years ago
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/dataexport/ProtoConverterTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.dataexport; |
|
2 |
|
|
3 |
import com.fasterxml.jackson.databind.ObjectMapper; |
|
4 |
import com.googlecode.protobuf.format.JsonFormat; |
|
5 |
import eu.dnetlib.data.proto.OafProtos; |
|
6 |
import eu.dnetlib.dhp.schema.oaf.*; |
|
7 |
import org.apache.commons.io.IOUtils; |
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import java.io.IOException; |
|
11 |
|
|
12 |
import static org.junit.Assert.assertNotNull; |
|
13 |
import static org.junit.Assert.assertTrue; |
|
14 |
|
|
15 |
public class ProtoConverterTest { |
|
16 |
|
|
17 |
|
|
18 |
@Test |
|
19 |
public void convertDatasourceTest() throws Exception { |
|
20 |
Oaf result = getOaf("/eu/dnetlib/data/mapreduce/hbase/dataexport/datasource.json"); |
|
21 |
assertNotNull(result); |
|
22 |
assertTrue(result instanceof Datasource); |
|
23 |
Datasource ds = (Datasource) result; |
|
24 |
assertNotNull(ds.getId()); |
|
25 |
|
|
26 |
System.out.println(ds.getId()); |
|
27 |
|
|
28 |
|
|
29 |
ObjectMapper mapper = new ObjectMapper(); |
|
30 |
System.out.println(mapper.writeValueAsString(result)); |
|
31 |
} |
|
32 |
|
|
33 |
|
|
34 |
@Test |
|
35 |
public void convertOrganizationTest() throws Exception { |
|
36 |
|
|
37 |
Oaf result = getOaf("/eu/dnetlib/data/mapreduce/hbase/dataexport/organization.json"); |
|
38 |
assertNotNull(result); |
|
39 |
assertTrue(result instanceof Organization); |
|
40 |
Organization ds = (Organization) result; |
|
41 |
assertNotNull(ds.getId()); |
|
42 |
|
|
43 |
System.out.println(ds.getId()); |
|
44 |
|
|
45 |
|
|
46 |
ObjectMapper mapper = new ObjectMapper(); |
|
47 |
System.out.println(mapper.writeValueAsString(result)); |
|
48 |
|
|
49 |
} |
|
50 |
|
|
51 |
@Test |
|
52 |
public void convertPublicationTest() throws Exception { |
|
53 |
Oaf result = getOaf("/eu/dnetlib/data/mapreduce/hbase/dataexport/publication.json"); |
|
54 |
|
|
55 |
assertNotNull(result); |
|
56 |
assertTrue(result instanceof Publication); |
|
57 |
Publication p = (Publication) result; |
|
58 |
|
|
59 |
ObjectMapper mapper = new ObjectMapper(); |
|
60 |
System.out.println(mapper.writeValueAsString(p)); |
|
61 |
|
|
62 |
} |
|
63 |
|
|
64 |
@Test |
|
65 |
public void convertDatasetTest() throws Exception { |
|
66 |
Oaf result = getOaf("/eu/dnetlib/data/mapreduce/hbase/dataexport/dataset.json"); |
|
67 |
|
|
68 |
assertNotNull(result); |
|
69 |
assertTrue(result instanceof Dataset); |
|
70 |
Dataset d = (Dataset) result; |
|
71 |
|
|
72 |
ObjectMapper mapper = new ObjectMapper(); |
|
73 |
System.out.println(mapper.writeValueAsString(d)); |
|
74 |
|
|
75 |
} |
|
76 |
|
|
77 |
@Test |
|
78 |
public void convertORPTest() throws Exception { |
|
79 |
Oaf result = getOaf("/eu/dnetlib/data/mapreduce/hbase/dataexport/orp.json"); |
|
80 |
|
|
81 |
assertNotNull(result); |
|
82 |
assertTrue(result instanceof OtherResearchProduct); |
|
83 |
OtherResearchProduct orp = (OtherResearchProduct) result; |
|
84 |
|
|
85 |
ObjectMapper mapper = new ObjectMapper(); |
|
86 |
System.out.println(mapper.writeValueAsString(orp)); |
|
87 |
|
|
88 |
} |
|
89 |
|
|
90 |
@Test |
|
91 |
public void convertSoftware() throws Exception { |
|
92 |
Oaf result = getOaf("/eu/dnetlib/data/mapreduce/hbase/dataexport/software.json"); |
|
93 |
|
|
94 |
assertNotNull(result); |
|
95 |
assertTrue(result instanceof Software); |
|
96 |
Software s = (Software) result; |
|
97 |
|
|
98 |
ObjectMapper mapper = new ObjectMapper(); |
|
99 |
System.out.println(mapper.writeValueAsString(s)); |
|
100 |
|
|
101 |
} |
|
102 |
|
|
103 |
private Oaf getOaf(String s2) throws IOException { |
|
104 |
final String json = IOUtils.toString(this.getClass().getResourceAsStream(s2)); |
|
105 |
|
|
106 |
final OafProtos.Oaf.Builder b = OafProtos.Oaf.newBuilder(); |
|
107 |
JsonFormat.merge(json, b); |
|
108 |
|
|
109 |
return ProtoConverter.convert(b.build()); |
|
110 |
} |
|
111 |
|
|
112 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/mapreduce/hbase/dataexport/publication.json | ||
---|---|---|
1 |
{"kind": "entity","entity": {"type": "result","result": {"metadata": {"title": [{"value": "SILK PRINTING WITH RECENT DEVELOPMENTS","qualifier": {"classid": "main title","classname": "main title","schemeid": "dnet:dataCite_title","schemename": "dnet:dataCite_title"}},{"value": "Son Gelişmelerle İpek Baskıcılığı","qualifier": {"classid": "main title","classname": "main title","schemeid": "dnet:dataCite_title","schemename": "dnet:dataCite_title"}}],"dateofacceptance": {"value": "1987-06-01"},"publisher": {"value": "Tekstil Mühendisleri Odası"},"resulttype": {"classid": "publication","classname": "publication","schemeid": "dnet:result_typologies","schemename": "dnet:result_typologies"},"language": {"classid": "tur","classname": "Turkish","schemeid": "dnet:languages","schemename": "dnet:languages"},"journal": {"name": "Tekstil ve Mühendis","issnPrinted": "1300-7599"},"format": [{"value": "application/pdf"},{"value": "application/pdf"}],"description": [{"value": " "},{"value": " "}],"source": [{"value": "Tekstil ve Mühendis; Yıl: 1987 Cilt: 1 Sayı: 4"},{"value": "2147-0510"},{"value": "1300-7599"}],"author": [{"fullname": "YAKARTEPE, Mehmet","name": "Mehmet","surname": "Yakartepe","rank": 1},{"fullname": "YAKARTEPE, Zerrin","name": "Zerrin","surname": "Yakartepe","rank": 2}]},"instance": [{"accessright": {"classid": "OPEN","classname": "Open Access","schemeid": "dnet:access_modes","schemename": "dnet:access_modes"},"instancetype": {"classid": "0001","classname": "Article","schemeid": "dnet:publication_resource","schemename": "dnet:publication_resource"},"hostedby": {"key": "10|tubitakulakb::34a91944da68f59ebc51994b4db64cda","value": "Tekstil ve Mühendis"},"url": ["http://dergi.tekstilvemuhendis.org.tr/article/view/5000000711"],"collectedfrom": {"key": "10|openaire____::85e51732975595215ae3c2514e272ce6","value": "TÜBİTAK ULAKBİM DergiPark"},"dateofacceptance": {"value": "1987-06-01"}}]},"originalId": ["oai:dergipark.ulakbim.gov.tr:record/124507"],"collectedfrom": [{"key": "10|openaire____::85e51732975595215ae3c2514e272ce6","value": "TÜBİTAK ULAKBİM DergiPark"}],"dateofcollection": "2019-07-29T15:35:19Z","id": "50|tubitakulakb::7fe767f5f1dfd5bbe0a3e5e9b2a10cc9","dateoftransformation": "","oaiprovenance": {"originDescription": {"harvestDate": "2018-10-13T09:48:19.806Z","altered": true,"baseURL": "http://dergipark.ulakbim.gov.tr/v2/harvester/index.php/oai","identifier": "oai:dergipark.ulakbim.gov.tr:record/124507","datestamp": "2018-10-13T09:48:19Z","metadataNamespace": "http://www.openarchives.org/OAI/2.0/oai_dc/"}}},"dataInfo": {"inferred": true,"deletedbyinference": true,"trust": "0.9","inferenceprovenance": "dedup-similarity-result-levenstein","provenanceaction": {"classid": "sysimport:crosswalk:repository","classname": "sysimport:crosswalk:repository","schemeid": "dnet:provenanceActions","schemename": "dnet:provenanceActions"},"invisible": false}} |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/mapreduce/hbase/dataexport/datasource.json | ||
---|---|---|
1 |
{ |
|
2 |
"kind": "entity", |
|
3 |
"entity": { |
|
4 |
"type": "datasource", |
|
5 |
"datasource": { |
|
6 |
"metadata": { |
|
7 |
"officialname": { |
|
8 |
"value": "CRIS UNS (Current Research Information System University of Novi Sad)" |
|
9 |
}, |
|
10 |
"englishname": { |
|
11 |
"value": "CRIS UNS (Current Research Information System University of Novi Sad)" |
|
12 |
}, |
|
13 |
"websiteurl": { |
|
14 |
"value": "https://cris.uns.ac.rs/" |
|
15 |
}, |
|
16 |
"accessinfopackage": [ |
|
17 |
{ |
|
18 |
"value": "https://cris.uns.ac.rs/OAIHandlerOpenAIRECRIS" |
|
19 |
} |
|
20 |
], |
|
21 |
"namespaceprefix": { |
|
22 |
"value": "CrisUnsNoviS" |
|
23 |
}, |
|
24 |
"datasourcetype": { |
|
25 |
"classid": "crissystem", |
|
26 |
"classname": "CRIS System", |
|
27 |
"schemeid": "dnet:datasource_typologies", |
|
28 |
"schemename": "dnet:datasource_typologies" |
|
29 |
}, |
|
30 |
"openairecompatibility": { |
|
31 |
"classid": "openaire-cris_1.1", |
|
32 |
"classname": "OpenAIRE CRIS v1.1", |
|
33 |
"schemeid": "dnet:datasourceCompatibilityLevel", |
|
34 |
"schemename": "dnet:datasourceCompatibilityLevel" |
|
35 |
}, |
|
36 |
"latitude": { |
|
37 |
"value": "0.0" |
|
38 |
}, |
|
39 |
"longitude": { |
|
40 |
"value": "0.0" |
|
41 |
}, |
|
42 |
"journal": { |
|
43 |
"issnPrinted": "", |
|
44 |
"issnOnline": "", |
|
45 |
"issnLinking": "" |
|
46 |
} |
|
47 |
} |
|
48 |
}, |
|
49 |
"originalId": [ |
|
50 |
"CRIS_UNS____::openaire" |
|
51 |
], |
|
52 |
"collectedfrom": [ |
|
53 |
{ |
|
54 |
"key": "", |
|
55 |
"value": "" |
|
56 |
} |
|
57 |
], |
|
58 |
"dateofcollection": "2019-04-04", |
|
59 |
"id": "10|CRIS_UNS____::f66f1bd369679b5b077dcdf006089556", |
|
60 |
"dateoftransformation": "" |
|
61 |
}, |
|
62 |
"dataInfo": { |
|
63 |
"inferred": false, |
|
64 |
"deletedbyinference": false, |
|
65 |
"trust": "0.9", |
|
66 |
"provenanceaction": { |
|
67 |
"classid": "sysimport:crosswalk:entityregistry", |
|
68 |
"classname": "sysimport:crosswalk:entityregistry", |
|
69 |
"schemeid": "dnet:provenance_actions", |
|
70 |
"schemename": "dnet:provenance_actions" |
|
71 |
} |
|
72 |
} |
|
73 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/mapreduce/hbase/dataexport/software.json | ||
---|---|---|
1 |
{"kind":1,"dataInfo":{"deletedbyinference":true,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemename":"dnet:provenanceActions","schemeid":"dnet:provenanceActions"},"inferred":true,"inferenceprovenance":"dedup-similarity-result-levenstein","invisible":false,"trust":"0.9"},"entity":{"dateoftransformation":"","pid":[{"qualifier":{"classid":"doi","classname":"doi","schemename":"dnet:pid_types","schemeid":"dnet:pid_types"},"value":"https://doi.org/10.5281/zenodo.27315"},{"qualifier":{"classid":"doi","classname":"doi","schemename":"dnet:pid_types","schemeid":"dnet:pid_types"},"value":"10.5281/zenodo.27315"}],"originalId":["http://dx.doi.org/10.5281/zenodo.27315","https://zenodo.org/record/27315","10.5281/zenodo.27315","https://doi.org/10.5281/zenodo.27315"],"oaiprovenance":{"originDescription":{"metadataNamespace":"","altered":true,"baseURL":"http://ip-90-147-167-25.ct1.garrservices.it:5000","datestamp":"","harvestDate":"2019-08-06T15:52:43.503Z","identifier":"10.5281/zenodo.27315"}},"result":{"instance":[{"hostedby":{"value":"Unknown Repository","key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c"},"license":{"value":""},"url":["http://dx.doi.org/10.5281/zenodo.27315"],"distributionlocation":"","dateofacceptance":{"value":"2015-01-01"},"collectedfrom":{"value":"Datacite","key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254"},"accessright":{"classid":"OPEN","classname":"Open Access","schemename":"dnet:access_modes","schemeid":"dnet:access_modes"},"instancetype":{"classid":"0029","classname":"Software","schemename":"dnet:dataCite_resource","schemeid":"dnet:dataCite_resource"}}],"metadata":{"publisher":{"value":"Zenodo"},"description":[{"value":"Welcome to khmer: k-mer counting, filtering and graph traversal FTW!\n\nDocumentation Status PyPI Package Downloads Counter License http://ci.ged.msu.edu/job/khmer-master/badge/icon Coverity Scan Build Status\nThe official repository is at\n\nhttps://github.com/dib-lab/khmer\n\nand you can read the docs online here:\n\nhttp://khmer.readthedocs.org/\n\nThere are two mailing lists dedicated to khmer, an announcements-only list and a discussion list. To search their archives and sign-up for them, please visit the following URLs:\n\nDiscussion: http://lists.idyll.org/listinfo/khmer\nAnnouncements: http://lists.idyll.org/listinfo/khmer-announce\nWe chat at https://gitter.im/dib-lab/khmer and the maintainers can be contacted at khmer-project@idyll.org.\n\nFor getting help with please see this guide: http://khmer.readthedocs.org/user/getting-help.html\n\nIMPORTANT NOTE: CITE US!\n\nkhmer is research software, so you should cite us when you use it in scientific publications! Please see the CITATION file for citation information.\n\nINSTALL INSTRUCTIONS:\n\nkhmer requires a 64-bit operating system and Python 2.7.x. Linux users will need the Python development libraries and gcc. OS X users may need XCode installed.\n\nIn short:\n\npip install khmer to download, build, and install the latest stable version.\n\nFor more details see doc/install.txt\n\nThe use of a virtualenv is recommended, see https://virtualenv.readthedocs.org/en/latest/installation.html\n\nkhmer is under the BSD license; see doc/LICENSE.txt. Distribution, modification and redistribution, incorporation into other software, and pretty much everything else is allowed.\n\nMRC 2014-05-14"}],"license":[{"value":""}],"title":[{"qualifier":{"classid":"main title","classname":"main title","schemename":"dnet:dataCite_title","schemeid":"dnet:dataCite_title"},"value":"Khmer: Khmer/ The Khmer Software Package: Enabling Efficient Nucleotide Sequence Analysis"}],"programmingLanguage":{"classid":"","classname":"","schemename":"dnet:programming_languages","schemeid":"dnet:programming_languages"},"author":[{"fullname":"C. Titus Brown","surname":"Titus Brown","name":"C.","rank":1},{"fullname":"Crusoe, Michael R.","surname":"Crusoe","name":"Michael R.","rank":2},{"fullname":"Fenton, Jake","surname":"Fenton","name":"Jake","rank":3},{"fullname":"McDonald, Eric","surname":"Mcdonald","name":"Eric","rank":4},{"fullname":"Scott, Camille","surname":"Scott","name":"Camille","rank":5},{"fullname":"Luiz Irber","rank":6},{"fullname":"Murray, Kevin","surname":"Murray","name":"Kevin","rank":7},{"fullname":"Jasonpell","rank":8},{"fullname":"Mansour, Tamer","surname":"Mansour","name":"Tamer","rank":9},{"fullname":"Qingpeng Zhang","rank":10},{"fullname":", Jordan","rank":11},{"fullname":"Standage, Daniel","surname":"Standage","name":"Daniel","rank":12},{"fullname":"Kidd, Rhys","surname":"Kidd","name":"Rhys","rank":13},{"fullname":"Jessicamizzi","rank":14},{"fullname":"Fay, Scott A.","surname":"Fay","name":"Scott A.","rank":15},{"fullname":"Wright, Michael","surname":"Wright","name":"Michael","rank":16},{"fullname":"Guermond, Sarah","surname":"Guermond","name":"Sarah","rank":17},{"fullname":"Bucher, Elmar","surname":"Bucher","name":"Elmar","rank":18},{"fullname":"Lippi, Justin","surname":"Lippi","name":"Justin","rank":19},{"fullname":"Anotherthomas","rank":20},{"fullname":"Srinivasan, Ram","surname":"Srinivasan","name":"Ram","rank":21},{"fullname":"Härpfer, Andreas","surname":"Härpfer","name":"Andreas","rank":22},{"fullname":"Leogargu","rank":23},{"fullname":"Taylor, Ben","surname":"Taylor","name":"Ben","rank":24},{"fullname":"Garland, Phillip","surname":"Garland","name":"Phillip","rank":25},{"fullname":"Alameldin","rank":26},{"fullname":"Jiarong","rank":27},{"fullname":"Kaben Nanlohy","rank":28},{"fullname":"Aditi9783","rank":29},{"fullname":"Hyer, Alex","surname":"Hyer","name":"Alex","rank":30}],"resulttype":{"classid":"software","classname":"software","schemename":"dnet:result_typologies","schemeid":"dnet:result_typologies"},"version":{"value":"None"},"storagedate":{"value":"2015-08-10"},"dateofacceptance":{"value":"2015-01-01"},"size":{"value":""}}},"collectedfrom":[{"value":"Datacite","key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254"}],"dateofcollection":"2018-10-28T00:39:04.337Z","type":50,"id":"50|datacite____::00057c034f36d2d2990b3a2d2db56fc6"}} |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/mapreduce/hbase/dataexport/orp.json | ||
---|---|---|
1 |
{"kind":1,"dataInfo":{"trust":"0.9","invisible":false,"deletedbyinference":false,"inferred":false,"provenanceaction":{"classid":"sysimport:crosswalk:datasetarchive","classname":"sysimport:crosswalk:datasetarchive","schemename":"dnet:provenanceActions","schemeid":"dnet:provenanceActions"}},"entity":{"dateoftransformation":"","pid":[{"qualifier":{"classid":"doi","classname":"doi","schemename":"dnet:pid_types","schemeid":"dnet:pid_types"},"value":"10.3203/iwf/c-13106eng"},{"qualifier":{"classid":"doi","classname":"doi","schemename":"dnet:pid_types","schemeid":"dnet:pid_types"},"value":"https://doi.org/10.3203/iwf/c-13106eng"}],"originalId":["https://doi.org/10.3203/iwf/c-13106eng","http://dx.doi.org/10.3203/iwf/c-13106eng","10.3203/iwf/c-13106eng"],"oaiprovenance":{"originDescription":{"metadataNamespace":"","altered":true,"baseURL":"https%3A%2F%2Foai.datacite.org%2Foai","datestamp":"","harvestDate":"2019-04-03T17:58:12.853Z","identifier":"10.3203/iwf/c-13106eng"}},"result":{"instance":[{"hostedby":{"value":"Unknown Repository","key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c"},"license":{"value":""},"url":["http://dx.doi.org/10.3203/iwf/c-13106eng"],"distributionlocation":"","dateofacceptance":{"value":"2007-01-01"},"collectedfrom":{"value":"Datacite","key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254"},"accessright":{"classid":"UNKNOWN","classname":"UNKNOWN","schemename":"dnet:access_modes","schemeid":"dnet:access_modes"},"instancetype":{"classid":"0000","classname":"Unknown","schemename":"dnet:dataCite_resource","schemeid":"dnet:dataCite_resource"}}],"metadata":{"publisher":{"value":"IWF (Göttingen)"},"license":[{"value":""}],"description":[{"value":"A 2D animation explains the molecular structure of histone octamers. From the CD-ROM: BEREITER-HAHN, JÜRGEN / PETERS, WINFRIED S. (Frankfurt a. M.). The Cell IV - Nucleus of Life - From Gene to Proteins (C 7103)"}],"language":{"classid":"eng","classname":"English","schemename":"dnet:languages","schemeid":"dnet:languages"},"title":[{"qualifier":{"classid":"main title","classname":"main title","schemename":"dnet:dataCite_title","schemeid":"dnet:dataCite_title"},"value":"Histone Octamer"}],"author":[{"fullname":"IWF","rank":1}],"resulttype":{"classid":"other","classname":"other","schemename":"dnet:result_typologies","schemeid":"dnet:result_typologies"},"version":{"value":"None"},"storagedate":{"value":"2007"},"dateofacceptance":{"value":"2007-01-01"},"size":{"value":""},"subject":[{"qualifier":{"classid":"keyword","classname":"keyword","schemename":"dnet:subject_classification_typologies","schemeid":"dnet:subject_classification_typologies"},"value":"Life Sciences"},{"qualifier":{"classid":"keyword","classname":"keyword","schemename":"dnet:subject_classification_typologies","schemeid":"dnet:subject_classification_typologies"},"value":"histone"},{"qualifier":{"classid":"keyword","classname":"keyword","schemename":"dnet:subject_classification_typologies","schemeid":"dnet:subject_classification_typologies"},"value":"nucleosome"}]}},"collectedfrom":[{"value":"Datacite","key":"10|openaire____::9e3be59865b2c1c335d32dae2fe7b254"}],"dateofcollection":"2018-10-28T00:39:04.337Z","type":50,"id":"50|datacite____::0000228dcefe42612ec4bd83810fe348"}} |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/mapreduce/hbase/dataexport/organization.json | ||
---|---|---|
1 |
{ |
|
2 |
"kind": "entity", |
|
3 |
"entity": { |
|
4 |
"type": "organization", |
|
5 |
"organization": { |
|
6 |
"metadata": { |
|
7 |
"legalname": { |
|
8 |
"value": "University of Utrecht" |
|
9 |
}, |
|
10 |
"eclegalbody": { |
|
11 |
"value": "false" |
|
12 |
}, |
|
13 |
"eclegalperson": { |
|
14 |
"value": "false" |
|
15 |
}, |
|
16 |
"ecnonprofit": { |
|
17 |
"value": "false" |
|
18 |
}, |
|
19 |
"ecresearchorganization": { |
|
20 |
"value": "false" |
|
21 |
}, |
|
22 |
"echighereducation": { |
|
23 |
"value": "false" |
|
24 |
}, |
|
25 |
"ecinternationalorganizationeurinterests": { |
|
26 |
"value": "false" |
|
27 |
}, |
|
28 |
"ecinternationalorganization": { |
|
29 |
"value": "false" |
|
30 |
}, |
|
31 |
"ecenterprise": { |
|
32 |
"value": "false" |
|
33 |
}, |
|
34 |
"ecsmevalidated": { |
|
35 |
"value": "false" |
|
36 |
}, |
|
37 |
"ecnutscode": { |
|
38 |
"value": "false" |
|
39 |
}, |
|
40 |
"country": { |
|
41 |
"classid": "FI", |
|
42 |
"classname": "Finland", |
|
43 |
"schemeid": "dnet:countries", |
|
44 |
"schemename": "dnet:countries" |
|
45 |
} |
|
46 |
} |
|
47 |
}, |
|
48 |
"originalId": [ |
|
49 |
"aka_________::f88cc5f874ff27f0fd6e7cb24842e9fb" |
|
50 |
], |
|
51 |
"collectedfrom": [ |
|
52 |
{ |
|
53 |
"key": "10|openaire____::6ac933301a3933c8a22ceebea7000326", |
|
54 |
"value": "Academy of Finland" |
|
55 |
} |
|
56 |
], |
|
57 |
"dateofcollection": "2018-09-28", |
|
58 |
"id": "20|aka_________::0070a5080d7092f960fb33c8a9fca016", |
|
59 |
"dateoftransformation": "2019-04-16" |
|
60 |
}, |
|
61 |
"dataInfo": { |
|
62 |
"inferred": true, |
|
63 |
"deletedbyinference": true, |
|
64 |
"trust": "0.9", |
|
65 |
"inferenceprovenance": "dedup-similarity-organization-simple", |
|
66 |
"provenanceaction": { |
|
67 |
"classid": "sysimport:crosswalk:entityregistry", |
|
68 |
"classname": "sysimport:crosswalk:entityregistry", |
|
69 |
"schemeid": "dnet:provenance_actions", |
|
70 |
"schemename": "dnet:provenance_actions" |
|
71 |
} |
|
72 |
} |
|
73 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/mapreduce/hbase/dataexport/dataset.json | ||
---|---|---|
1 |
{"kind":1,"dataInfo":{"deletedbyinference":true,"provenanceaction":{"classid":"sysimport:actionset","classname":"sysimport:actionset","schemename":"dnet:provenanceActions","schemeid":"dnet:provenanceActions"},"inferred":true,"inferenceprovenance":"dedup-similarity-result-levenstein","invisible":false,"trust":"0.9"},"entity":{"pid":[{"qualifier":{"classid":"doi","classname":"doi","schemename":"dnet:pid_types","schemeid":"dnet:pid_types"},"value":"10.5517/cc11xr4v"}],"result":{"instance":[{"url":["http://dx.doi.org/10.5517/cc11xr4v"],"collectedfrom":{"value":"scholExplorer","key":"10|openaire____::e034d6a11054f5ade9221ebac484e864"},"hostedby":{"value":"Unknown Repository","key":"10|openaire____::55045bd2a65019fd8e6741a755395c8c"},"accessright":{"classid":"UNKNOWN","classname":"not available","schemename":"dnet:access_modes","schemeid":"dnet:access_modes"},"instancetype":{"classid":"0000","classname":"Unknown","schemename":"dnet:publication_resource","schemeid":"dnet:publication_resource"}}],"metadata":{"publisher":{"value":"Cambridge Crystallographic Data Centre"},"description":[{"value":"An entry from the Cambridge Structural Database, the world’s repository for small molecule crystal structures. The entry contains experimental data from a crystal diffraction study. The deposited dataset for this entry is freely available from the CCDC and typically includes 3D coordinates, cell parameters, space group, experimental conditions and quality measures."}],"language":{"classid":"und","classname":"Undetermined","schemename":"dent:languages","schemeid":"dent:languages"},"title":[{"qualifier":{"classid":"main title","classname":"main title","schemename":"dnet:dataCite_title","schemeid":"dnet:dataCite_title"},"value":"CCDC 980937: Experimental Crystal Structure Determination"}],"author":[{"fullname":"Yuan, Xian-You","rank":1},{"fullname":"Ou, Guang-Chuan","rank":2},{"fullname":"Yuan, Lin","rank":3},{"fullname":"Zhang, Xin-Yu","rank":4},{"fullname":"Zhang, Min","rank":5}],"resulttype":{"classid":"dataset","classname":"dataset","schemename":"dnet:result_typologies","schemeid":"dnet:result_typologies"},"relevantdate":[{"qualifier":{"classid":"dnet:date","classname":"dnet:date","schemename":"dnet:date","schemeid":"dnet:date"},"value":"2016-01-01"}],"subject":[{"qualifier":{"classid":"keyword","classname":"keyword","schemename":"dnet:subject","schemeid":"dnet:subject"},"value":"Experimental 3D Coordinates"},{"qualifier":{"classid":"keyword","classname":"keyword","schemename":"dnet:subject","schemeid":"dnet:subject"},"value":"Crystal Structure"},{"qualifier":{"classid":"keyword","classname":"keyword","schemename":"dnet:subject","schemeid":"dnet:subject"},"value":"(5-ethyl-2-methyl-2-phenyl-1,3-dioxan-5-yl)methanol"},{"qualifier":{"classid":"keyword","classname":"keyword","schemename":"dnet:subject","schemeid":"dnet:subject"},"value":"Crystal System"},{"qualifier":{"classid":"keyword","classname":"keyword","schemename":"dnet:subject","schemeid":"dnet:subject"},"value":"Space Group"},{"qualifier":{"classid":"keyword","classname":"keyword","schemename":"dnet:subject","schemeid":"dnet:subject"},"value":"Cell Parameters"},{"qualifier":{"classid":"keyword","classname":"keyword","schemename":"dnet:subject","schemeid":"dnet:subject"},"value":"Crystallography"}]}},"collectedfrom":[{"value":"scholExplorer","key":"10|openaire____::e034d6a11054f5ade9221ebac484e864"}],"dateofcollection":"2019-10-22T14:29:26+00:00","type":50,"id":"50|scholexplore::000023d184acb169596e3e6004abb421"}} |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataexport/ProtoConverter.java | ||
---|---|---|
3 | 3 |
import com.googlecode.protobuf.format.JsonFormat; |
4 | 4 |
import eu.dnetlib.data.proto.*; |
5 | 5 |
import eu.dnetlib.dhp.schema.oaf.*; |
6 |
import eu.dnetlib.dhp.schema.util.ProtoUtils; |
|
7 | 6 |
|
8 | 7 |
import java.io.Serializable; |
9 | 8 |
import java.util.stream.Collectors; |
... | ... | |
209 | 208 |
.stream() |
210 | 209 |
.map(ProtoConverter::mapStructuredProperty) |
211 | 210 |
.collect(Collectors.toList())); |
212 |
software.setCodeRepositoryUrl(ProtoUtils.mapStringField(m.getCodeRepositoryUrl()));
|
|
213 |
software.setProgrammingLanguage(ProtoUtils.mapQualifier(m.getProgrammingLanguage()));
|
|
211 |
software.setCodeRepositoryUrl(mapStringField(m.getCodeRepositoryUrl())); |
|
212 |
software.setProgrammingLanguage(mapQualifier(m.getProgrammingLanguage())); |
|
214 | 213 |
return software; |
215 | 214 |
} |
216 | 215 |
|
217 |
private static OtherResearchProducts createORP(OafProtos.Oaf oaf) {
|
|
216 |
private static OtherResearchProduct createORP(OafProtos.Oaf oaf) { |
|
218 | 217 |
ResultProtos.Result.Metadata m = oaf.getEntity().getResult().getMetadata(); |
219 |
OtherResearchProducts otherResearchProducts = setOaf(new OtherResearchProducts(), oaf);
|
|
218 |
OtherResearchProduct otherResearchProducts = setOaf(new OtherResearchProduct(), oaf);
|
|
220 | 219 |
setEntity(otherResearchProducts, oaf); |
221 | 220 |
setResult(otherResearchProducts, oaf); |
222 | 221 |
otherResearchProducts.setContactperson(m.getContactpersonList() |
... | ... | |
251 | 250 |
Dataset dataset = setOaf(new Dataset(), oaf); |
252 | 251 |
setEntity(dataset, oaf); |
253 | 252 |
setResult(dataset, oaf); |
254 |
dataset.setStoragedate(ProtoUtils.mapStringField(m.getStoragedate()));
|
|
255 |
dataset.setDevice(ProtoUtils.mapStringField(m.getDevice()));
|
|
256 |
dataset.setSize(ProtoUtils.mapStringField(m.getSize()));
|
|
257 |
dataset.setVersion(ProtoUtils.mapStringField(m.getVersion()));
|
|
258 |
dataset.setLastmetadataupdate(ProtoUtils.mapStringField(m.getLastmetadataupdate()));
|
|
259 |
dataset.setMetadataversionnumber(ProtoUtils.mapStringField(m.getMetadataversionnumber()));
|
|
253 |
dataset.setStoragedate(mapStringField(m.getStoragedate())); |
|
254 |
dataset.setDevice(mapStringField(m.getDevice())); |
|
255 |
dataset.setSize(mapStringField(m.getSize())); |
|
256 |
dataset.setVersion(mapStringField(m.getVersion())); |
|
257 |
dataset.setLastmetadataupdate(mapStringField(m.getLastmetadataupdate())); |
|
258 |
dataset.setMetadataversionnumber(mapStringField(m.getMetadataversionnumber())); |
|
260 | 259 |
dataset.setGeolocation(m.getGeolocationList() |
261 | 260 |
.stream() |
262 | 261 |
.map(ProtoConverter::mapGeolocation) |
... | ... | |
300 | 299 |
.map(ProtoConverter::mapAuthor) |
301 | 300 |
.collect(Collectors.toList())); |
302 | 301 |
entity.setResulttype(mapQualifier(m.getResulttype())); |
303 |
entity.setLanguage(ProtoUtils.mapQualifier(m.getLanguage()));
|
|
302 |
entity.setLanguage(mapQualifier(m.getLanguage())); |
|
304 | 303 |
entity.setCountry(m.getCountryList() |
305 | 304 |
.stream() |
306 | 305 |
.map(ProtoConverter::mapQualifier) |
... | ... | |
321 | 320 |
.stream() |
322 | 321 |
.map(ProtoConverter::mapStringField) |
323 | 322 |
.collect(Collectors.toList())); |
324 |
entity.setDateofacceptance(ProtoUtils.mapStringField(m.getDateofacceptance()));
|
|
325 |
entity.setPublisher(ProtoUtils.mapStringField(m.getPublisher()));
|
|
326 |
entity.setEmbargoenddate(ProtoUtils.mapStringField(m.getEmbargoenddate()));
|
|
323 |
entity.setDateofacceptance(mapStringField(m.getDateofacceptance())); |
|
324 |
entity.setPublisher(mapStringField(m.getPublisher())); |
|
325 |
entity.setEmbargoenddate(mapStringField(m.getEmbargoenddate())); |
|
327 | 326 |
entity.setSource(m.getSourceList() |
328 | 327 |
.stream() |
329 | 328 |
.map(ProtoConverter::mapStringField) |
... | ... | |
340 | 339 |
.stream() |
341 | 340 |
.map(ProtoConverter::mapStringField) |
342 | 341 |
.collect(Collectors.toList())); |
343 |
entity.setResourcetype(ProtoUtils.mapQualifier(m.getResourcetype()));
|
|
342 |
entity.setResourcetype(mapQualifier(m.getResourcetype())); |
|
344 | 343 |
entity.setCoverage(m.getCoverageList() |
345 | 344 |
.stream() |
346 | 345 |
.map(ProtoConverter::mapStringField) |
modules/dnet-mapreduce-jobs/trunk/pom.xml | ||
---|---|---|
236 | 236 |
<dependency> |
237 | 237 |
<groupId>eu.dnetlib.dhp</groupId> |
238 | 238 |
<artifactId>dhp-schemas</artifactId> |
239 |
<version>1.0.2</version>
|
|
239 |
<version>[1.0.0,2.0.0)</version>
|
|
240 | 240 |
<exclusions> |
241 | 241 |
<exclusion> |
242 | 242 |
<groupId>com.google.protobuf</groupId> |
Also available in: Unified diff
added dhp mapping test