Project

General

Profile

« Previous | Next » 

Revision 54831

fixed DOIBoost Bug

View differences:

modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/actions/DOIBoostToActionsTest.java
33 33

  
34 34
    @Test
35 35
    public void testSingleDOIBoostAction() throws IOException {
36
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/broken");
36
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction.json");
37 37
    }
38 38

  
39 39

  
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/mapreduce/actions/DOIBoostAction.json
1
  {"publisher": "Informa UK Limited", "doi": "10.3109/01443615.2016.1162772", "license": [], "title": ["Relationship between maternal c-reactive protein level and neonatal outcome in patients with preterm premature rupture of membranes treated with Ampicillin and Azithromycin"], "issued": "2016-8-17", "abstract": [{"provenance": "MAG", "value": "This historical cohort study aimed to assess the relationship between antenatal maternal C-reactive protein (CRP) level and neonatal outcome preterm premature rupture of membranes (PPROM). We reviewed the records of 70 singleton pregnancies with PPROM between 24 and 34 weeks. Maternal CRP levels of neonates with respiratory distress syndrome, neonatal sepsis, grade 3–4 intraventricular haemorrhage and stage 2–3 necrotizing enterocolitis, perinatal mortality were compared with those without these complications. Administration of corticosteroid, tocolysis for two days and prophylactic antibiotics (intravenous ampicillin/sulbactam, and oral azithromycin) were the standard management protocol. The mean age at PPROM was 29 weeks 2 days (±3 weeks), the mean age at birth was 30 weeks 5 days (±20 days). CRP levels were not different between groups. Uni/multivariate analysis showed that maternal CRP levels were not related with neonatal outcomes. Neonatal complications in PPROM are related with the degree of prema..."}], "issn": [{"type": "print", "value": "0144-3615"}, {"type": "electronic", "value": "1364-6893"}], "doi-url": "http://dx.doi.org/10.3109/01443615.2016.1162772", "instances": [{"url": "http://www.tandfonline.com/doi/pdf/10.3109/01443615.2016.1162772", "provenance": "Crossref", "access-rights": "UNKNOWN"}, {"url": "https://academic.microsoft.com/#/detail/2557067899", "provenance": "MAG", "access-rights": "UNKNOWN"}], "authors": [{"affiliations": [{"official-page": "http://www.erciyes.edu.tr/", "provenance": "MAG", "value": "Erciyes University", "identifiers": [{"value": "http://en.wikipedia.org/wiki/Erciyes_University", "schema": "wikpedia"}, {"value": "grid.411739.9", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/87673952", "schema": "URL"}]}], "given": "Mehmet", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2101755603", "schema": "URL"}], "fullname": "Mehmet Serdar Kutuk", "family": "Serdar Kutuk"}, {"affiliations": [{"official-page": "http://www.erciyes.edu.tr/", "provenance": "MAG", "value": "Erciyes University", "identifiers": [{"value": "http://en.wikipedia.org/wiki/Erciyes_University", "schema": "wikpedia"}, {"value": "grid.411739.9", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/87673952", "schema": "URL"}]}], "given": "Osman", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2136821361", "schema": "URL"}], "fullname": "Osman Bastug", "family": "Bastug"}, {"affiliations": [{"official-page": "http://www.erciyes.edu.tr/", "provenance": "MAG", "value": "Erciyes University", "identifiers": [{"value": "http://en.wikipedia.org/wiki/Erciyes_University", "schema": "wikpedia"}, {"value": "grid.411739.9", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/87673952", "schema": "URL"}]}], "given": "Ahmet", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2320913235", "schema": "URL"}], "fullname": "Ahmet Ozdemir", "family": "Ozdemir"}, {"affiliations": [{"official-page": "http://www.erciyes.edu.tr/", "provenance": "MAG", "value": "Erciyes University", "identifiers": [{"value": "http://en.wikipedia.org/wiki/Erciyes_University", "schema": "wikpedia"}, {"value": "grid.411739.9", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/87673952", "schema": "URL"}]}], "given": "Mehmet", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2146413771", "schema": "URL"}], "fullname": "Mehmet Adnan Ozturk", "family": "Adnan Ozturk"}, {"affiliations": [{"official-page": "http://www.erciyes.edu.tr/", "provenance": "MAG", "value": "Erciyes University", "identifiers": [{"value": "http://en.wikipedia.org/wiki/Erciyes_University", "schema": "wikpedia"}, {"value": "grid.411739.9", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/87673952", "schema": "URL"}]}], "given": "Mahmut", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2114775934", "schema": "URL"}], "fullname": "Mahmut Tuncay Ozgun", "family": "Tuncay Ozgun"}, {"affiliations": [{"official-page": "http://www.erciyes.edu.tr/", "provenance": "MAG", "value": "Erciyes University", "identifiers": [{"value": "http://en.wikipedia.org/wiki/Erciyes_University", "schema": "wikpedia"}, {"value": "grid.411739.9", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/87673952", "schema": "URL"}]}], "given": "Mustafa", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/1991766482", "schema": "URL"}], "fullname": "Mustafa Basbug", "family": "Basbug"}, {"affiliations": [{"official-page": "http://www.erciyes.edu.tr/", "provenance": "MAG", "value": "Erciyes University", "identifiers": [{"value": "http://en.wikipedia.org/wiki/Erciyes_University", "schema": "wikpedia"}, {"value": "grid.411739.9", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/87673952", "schema": "URL"}]}], "given": "Tamer", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/1982055276", "schema": "URL"}], "fullname": "Tamer Gunes", "family": "Gunes"}, {"affiliations": [{"official-page": "http://www.erciyes.edu.tr/", "provenance": "MAG", "value": "Erciyes University", "identifiers": [{"value": "http://en.wikipedia.org/wiki/Erciyes_University", "schema": "wikpedia"}, {"value": "grid.411739.9", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/87673952", "schema": "URL"}]}], "given": "Selim", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/130495355", "schema": "URL"}], "fullname": "Selim Kurtoglu", "family": "Kurtoglu"}], "published-print": "2016-8-17", "collectedFrom": ["Crossref", "MAG"], "accepted": null, "type": "journal-article", "published-online": "2016-11-9", "subject": ["Obstetrics and Gynaecology"]}
1
{"publisher": "Wiley", "doi": "10.1002/(sici)1098-0997(1999)7:5<248::aid-idog8>3.0.co;2-v", "hostedByOpenAire": {"name": "Infectious Diseases in Obstetrics and Gynecology", "id": "doajarticles::1064-7449"}, "license": [{"url": "http://doi.wiley.com/10.1002/tdm_license_1.1", "content-version": "tdm", "\"delay-in-days": null, "date-time": "2015-09-01T00:00:00Z"}, {"url": "https://doi.org/10.1002/(sici)1098-0997(1999)7:5<248::aid-idog8>3.0.co;2-v", "provenance": "UnpayWall", "access-rights": "OPEN"}], "title": ["Pelvic inflammatory disease in the postmenopausal woman"], "issued": "1999-1-1", "abstract": [{"provenance": "MAG", "value": "eJx9kk1vFDEMhv+KjyBtR2JFC7vXtqBKLUVcOHsynhmjTBzFzqzm3+PsAuUAvSRx5I/Hr/3c/6Bg\nvNIRvtHKdAJckSP2kSCyUUGrhUASZIorB+A0RlwWNCkbDKyESv4JWdQWSpKxKkY4iRsd3JHylI7w\ndH/3+PDl/u+U5VJuLLLAu8PNDZj4fTh0DqI1mh7h62slFWoKsiyO9t/yD9b8ssRt4VCkZ4w7kNEo\ntf8gKdRSKBmc2Gaw2ousWBgTYK+BVGGU4oVZ0g4wDS3qEo6qEhiNhkus2EzF4XBKoqQd3HryWNUj\nvY9/wF3CXhMVXaSe1MAKvRTilJ2nMWds6C5ndDTjPx3qmRRzLpJLQwRecOI0gVoduMF98hG3Gbjk\nhTSL+/uzZelZjIOCzlLjABuTnwhRTmCz+87itosCWstEZbvI4koqD22y3q8rBBgdLGFbrBdRfift\nCc7ghpxo8Cml0XfQl4Ud7blXIzc+b4mCxA4+HPfvP17tr/e7X+vx5vbt+fWdI21Xj6y68xSh+wkk\nbARZ\n"}], "issn": [{"type": "print", "value": "1064-7449"}, {"type": "electronic", "value": "1098-0997"}], "doi-url": "http://dx.doi.org/10.1002/(sici)1098-0997(1999)7:5<248::aid-idog8>3.0.co;2-v", "instances": [{"url": "https://api.wiley.com/onlinelibrary/tdm/v1/articles/10.1002%2F(SICI)1098-0997(1999)7:5%3C248::AID-IDOG8%3E3.0.CO;2-V", "provenance": "CrossRef", "access-rights": "UNKNOWN"}, {"url": "https://onlinelibrary.wiley.com/doi/full/10.1002/(SICI)1098-0997(1999)7:5%3C248::AID-IDOG8%3E3.0.CO;2-V", "provenance": "CrossRef", "access-rights": "UNKNOWN"}], "authors": [{"affiliations": [{"official-page": "http://www.musc.edu/", "provenance": "MAG", "value": "Medical University of South Carolina", "identifiers": [{"value": "http://en.wikipedia.org/wiki/Medical_University_of_South_Carolina", "schema": "wikpedia"}, {"value": "grid.259828.c", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/153297377", "schema": "URL"}]}], "given": "S.L.", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2517302922", "schema": "URL"}], "fullname": "S.L. Jackson", "family": "Jackson"}, {"affiliations": [{"official-page": "http://www.musc.edu/", "provenance": "MAG", "value": "Medical University of South Carolina", "identifiers": [{"value": "http://en.wikipedia.org/wiki/Medical_University_of_South_Carolina", "schema": "wikpedia"}, {"value": "grid.259828.c", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/153297377", "schema": "URL"}]}], "given": "D.E.", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2163996951", "schema": "URL"}], "fullname": "D.E. Soper", "family": "Soper"}], "published-print": "1999-1-1", "collectedFrom": ["CrossRef", "MAG", "UnpayWall"], "accepted": null, "type": "journal-article", "published-online": null, "subject": ["Obstetrics and Gynaecology", "Infectious Diseases", "Dermatology"]}
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataimport/DOIBoostToActions.java
3 3
import com.google.gson.Gson;
4 4
import com.google.gson.JsonElement;
5 5
import com.google.gson.JsonObject;
6
import com.googlecode.protobuf.format.JsonFormat;
6 7
import eu.dnetlib.actionmanager.actions.ActionFactory;
7 8
import eu.dnetlib.actionmanager.actions.AtomicAction;
8 9
import eu.dnetlib.actionmanager.common.Agent;
......
42 43
    public static final String SEPARATOR = "::";
43 44

  
44 45
    private static Map<String, Pair<String, String>> datasources =  new HashMap<String, Pair<String, String>>() {{
45
        put(MAG, new Pair<>("Microsoft Academic Graph", OPENAIRE_PREFIX + SEPARATOR + "microsoft"));
46
        put(ORCID, new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
47
        put(CROSSREF, new Pair<>(CROSSREF, OPENAIRE_PREFIX + SEPARATOR + "crossref"));
48
        put(UNPAYWALL, new Pair<>(UNPAYWALL, OPENAIRE_PREFIX + SEPARATOR + "unpaywall"));
46
        put(MAG.toLowerCase(), new Pair<>("Microsoft Academic Graph", OPENAIRE_PREFIX + SEPARATOR + "microsoft"));
47
        put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
48
        put(CROSSREF.toLowerCase(), new Pair<>(CROSSREF, OPENAIRE_PREFIX + SEPARATOR + "crossref"));
49
        put(UNPAYWALL.toLowerCase(), new Pair<>(UNPAYWALL, OPENAIRE_PREFIX + SEPARATOR + "unpaywall"));
49 50

  
50 51
    }};
51 52

  
......
112 113
        //creating Result Proto
113 114
        final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setType(TypeProtos.Type.result);
114 115

  
115
        entity.setDateofcollection("2018-10-10");
116
        entity.setDateofcollection("2019-02-15");
116 117

  
118

  
119

  
117 120
        if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()){
118 121
            StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
119 122
                    .map(JsonElement::getAsString)
120 123
                    .forEach(cf -> {
121
                                final String id = datasources.get(cf).getValue();
122
                                final String name = datasources.get(cf).getKey();
124
                                final String id = datasources.get(cf.toLowerCase()).getValue();
125
                                final String name = datasources.get(cf.toLowerCase()).getKey();
123 126
                                if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
124 127
                                    final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
125 128
                                            .setValue(name)
......
134 137
        final String doi = getStringValue(rootElement, "doi");
135 138
        if (doi == null)
136 139
            return null;
140
        entity.addOriginalId(doi);
137 141
        final String sourceId = String.format("50|%s" + SEPARATOR + "%s", doiBoostNSPREFIX, AbstractDNetXsltFunctions.md5(doi));
138 142
        entity.setId(sourceId);
139 143

  
......
196 200
                    .setSchemename("dnet:access_modes")
197 201
                    .build());
198 202

  
199
            final String id =datasources.get(it.get("provenance").getAsString()).getValue();
200
            final String name =datasources.get(it.get("provenance").getAsString()).getKey();
203
            final String id =datasources.get(it.get("provenance").getAsString().toLowerCase()).getValue();
204
            final String name =datasources.get(it.get("provenance").getAsString().toLowerCase()).getKey();
201 205
            if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
202 206
                final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
203 207
                        .setValue(name)
......
212 216

  
213 217
        //Adding DOI URL as  Instance
214 218
        final String doiURL = getStringValue(rootElement, "doi-url");
219
        JsonObject hostedByOpenAire = null;
220
        if (rootElement.has("hostedByOpenAire")) {
221
            hostedByOpenAire = rootElement.getAsJsonObject("hostedByOpenAire");
222
        }
223

  
215 224
        if (StringUtils.isNotBlank(doiURL)) {
216 225

  
217 226

  
227

  
228

  
218 229
            final ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
219 230
            instance.addUrl(doiURL);
231
            instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
232
                    .setClassid(cobjValue)
233
                    .setClassname(typeValue)
234
                    .setSchemeid("dnet:publication_resource")
235
                    .setSchemename("dnet:publication_resource")
236
                    .build());
220 237
            instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
221 238
                    .setClassid("CLOSED")
222 239
                    .setClassname("Closed Access")
......
224 241
                    .setSchemename("dnet:access_modes")
225 242
                    .build());
226 243
            instance.setCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
227
                    .setValue("CrossRef")
244
                    .setValue(CROSSREF)
228 245
                    .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5("crossref"))
229 246
                    .build());
247

  
248
            if (hostedByOpenAire == null)
249
            instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
250
                    .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
251
                    .setValue("Unknown Repository")
252
                    .build());
253
            else{
254
                instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
255
                        .setKey(AbstractDNetXsltFunctions.oafSplitId("datasource",hostedByOpenAire.get("id").getAsString()))
256
                        .setValue(hostedByOpenAire.get("name").getAsString())
257
                        .build());
258
            }
259

  
230 260
            result.addInstance(instance);
231 261
        }
232 262

  
......
306 336
        result.setMetadata(metadata.build());
307 337
        entity.setResult(result.build());
308 338
        oaf.setEntity(entity.build());
339

  
340
        //System.out.println(JsonFormat.printToString(oaf.build()));
341

  
309 342
        final List<AtomicAction> actionList = new ArrayList<>();
310 343

  
311 344
        if (!onlyOrganization)
......
381 414
        rel.setResultOrganization(rel_instance.build());
382 415

  
383 416
        rel.addCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
384
                .setValue(datasources.get(MAG).getKey())
385
                .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(datasources.get(MAG).getValue(), SEPARATOR)))
417
                .setValue(datasources.get(MAG.toLowerCase()).getKey())
418
                .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(datasources.get(MAG.toLowerCase()).getValue(), SEPARATOR)))
386 419
                .build());
387 420

  
388 421

  
......
437 470
    public static FieldTypeProtos.KeyValue extractIdentifier(final String value) {
438 471
        FieldTypeProtos.KeyValue.Builder pid = FieldTypeProtos.KeyValue.newBuilder();
439 472
        if (StringUtils.contains(value, "orcid.org")){
440
            return pid.setValue(value)
473
            return pid.setValue(value.replaceAll("https://orcid.org/",""))
441 474
                    .setKey(ORCID).build();
442 475
        }
443 476
        if (StringUtils.contains(value, "academic.microsoft.com/#/detail")){
444
            return pid.setValue(value)
477
            return pid.setValue(value.replaceAll("https://academic.microsoft.com/#/detail/",""))
445 478
                    .setKey("MAG Identifier").build();
446 479
        }
447 480
        return pid.setValue(value)
......
467 500
            OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder();
468 501
            entity.setType(TypeProtos.Type.organization);
469 502
            entity.setId("20|microsoft___" + SEPARATOR +AbstractDNetXsltFunctions.md5(microsoftID));
470
            final String id =datasources.get(affiliation.get("provenance").getAsString()).getValue();
471
            final String name =datasources.get(affiliation.get("provenance").getAsString()).getKey();
503
            final String id =datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getValue();
504
            final String name =datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getKey();
472 505
            if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
473 506
                final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
474 507
                        .setValue(name)
......
512 545

  
513 546
        List<JsonObject> authors = getArrayObjects(root, "authors");
514 547

  
515
        final AtomicInteger counter = new AtomicInteger();
548
        final AtomicInteger counter = new AtomicInteger(1);
516 549

  
550

  
517 551
        List<FieldTypeProtos.Author> collect = authors.stream().map(author -> {
518 552
            final String given = getStringValue(author, "given");
519 553
            final String family = getStringValue(author, "family");

Also available in: Unified diff