Project

General

Profile

« Previous | Next » 

Revision 55246

Discard records without a valid author as requested in #4392, #4393, #4395, #4396.
If the record has also at least one valid author, the record is kept but the invalid authors are removed.

View differences:

modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/actions/DOIBoostToActionsTest.java
39 39
    public void testSingleDOIBoostAction() throws IOException {
40 40
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction.json");
41 41
    }
42

  
42 43
    @Test
43 44
    public void testSingleDOIBoostActionFilter() throws IOException {
44 45
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction_filterOut.json");
45 46
    }
46 47

  
48
    @Test
49
    public void testMultipleDOIBoostActionDiscardMany() throws IOException {
50
        doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/doiboost_discard_many.json");
51
    }
47 52

  
53

  
48 54
    @Test
49 55
    public void testDOIBoostActionToXML() throws Exception {
50 56
        doTestSingleDOIBoostActionToXML("/eu/dnetlib/data/mapreduce/actions/broken");
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/mapreduce/actions/doiboost_discard_many.json
1
{"publisher": "American Medical Association (AMA)", "doi": "10.1001/2013.jamadermatol.238", "license": [], "title": ["Comparison of Ustekinumab With Other Biological Agents for the Treatment of Moderate to Severe Plaque Psoriasis"], "issued": "2012-12-1", "abstract": [{"provenance": "MAG", "value": "eJylVU2P2zYQ/StzKRADsmrLWq/dnNJkA2yRdIPsAj0EPVDS2GIjkS5J2ev++r4hbVfGpkCKXmyD\n5My8j5nxQ/UH10HvmZ4s1bbfKccUWibebHSt6iPZDQ0+8Fdthl5VdNChxQMV5MLipaNK285u8boj\ntWUTPAK02cY0n7x1Wnnt6Y1jRco09Mh7djoc6d40/EyvPr15vJ+Q6i1CVDN0gXYq6JgnFuttw04F\nwLLkJZZp16k/B3ydk+f0TgVFj3ZwNXv6jUHFNEMduCFF/gj8PXLWiFeubgW6gNsBh23oF2UG5Y60\nmGc0X6+LTCq958rFUxwWszkOE6mPd+8+3P96B9hD9ZGbSUZ3faU8ZzHjW1u3ThmmD7pyiM4i47rT\nRuQJQNv5fGv3OT2GoTlCi07kt0Ywa1N3QwPIyNDYXv+Fn+ARnO06/EzRgv2l4CfnmqQY5Km5smTd\n9zgE+VF6JHprqVXNf9D97jk4lWgkWo2cHiSC0xWQabi9Y3yY0B2pOlKRWitLmBvt1dYx97gnx952\newThGRTwbPxwLjbOeNJLaHjIdfY1KpXRBrrZw3TYnYzOhLyIu40vz4Qzqc2wkJoBfMEio8qChR/c\nBkKSks7VZi+QBB6u8RqeJvGyJCEw7wRqsnzneK/t4IGFVYikIs0XTpxb92iAPM4J/ayO+KUMGQ4H\n675Sz0FNlVHdUR4clBdCG+v6pNBGhyBuLoABEnovRoh7nf8J7b/Rz9xMMc/oNJ/OgfHUZC/OTfMv\nd6dhFCijMrWNi0InKk8tfztW1a3m/cmqin0Q0DDIyYEoDw1yLASYot0Bdpw6WntIml0toAGXIoHy\n3tZahXPT+wDvfBBp0V9eb42WDRa7rdVbGQPbND4WTXBEM0W3Nz9AN1kWwgeDcBmK66FSjep0f4Hw\nKiaL/UJfHj7/jtWRr8rXtEa6GkG66mSiA7s9vP7y1t3HJ7P5dJHflFgbqmPprl1I6R4+43qWL1av\nCW8zWuRlOS1u82UxSa5AeMPuOqDIZ7epZIyZ52WB9LPlhKohfFMlDMRZiGt62mw6/awv9CQ94CxH\n6Wf5vJzO8lUxoehVHDXoqnY8yHKtO1QbGZdWYtSg4wH+TefFj8UCJ62udIAPsmbkTfQHTfG/DDpv\nvQh9uc7L1Qj7Ypmv1tM5vsrl5Lwa0viEoUdFw7WzMl4wRaBdQPqUsCzyohglhDXrcooyi3IS/RFU\nT9Oau25EMIbe5MvFlUuLm2lR5kU5yfGHIUssjtL7NA2jjYEl9R1L+Ho8eoXlyohzl//wtIdaLJR/\nejgb9dNp6i/9KL1jbBi1RP43lFHR7g==\n"}], "issn": [{"type": "print", "value": "0003-987X"}], "doi-url": "http://dx.doi.org/10.1001/2013.jamadermatol.238", "instances": [{"url": "http://jamanetwork.com/journals/jamadermatology/fullarticle/1377948", "provenance": "Crossref", "access-rights": "UNKNOWN"}], "authors": [{"affiliations": [{"official-page": "http://www.washington.edu/", "provenance": "MAG", "value": "University of Washington", "identifiers": [{"value": "http://en.wikipedia.org/wiki/University_of_Washington", "schema": "wikpedia"}, {"value": "grid.34477.33", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/201448701", "schema": "URL"}]}], "given": "Vincent W.", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2116346541", "schema": "URL"}], "fullname": "Vincent W. Lin", "family": "Lin"}, {"affiliations": [{"official-page": "http://www.childrenshospital.org/", "provenance": "MAG", "value": "Boston Children's Hospital", "identifiers": [{"value": "http://en.wikipedia.org/wiki/Boston_Children's_Hospital", "schema": "wikpedia"}, {"value": "grid.2515.3", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/1288882113", "schema": "URL"}]}], "given": "Sarah", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2011423229", "schema": "URL"}], "fullname": "Sarah Ringold", "family": "Ringold"}, {"affiliations": [{"official-page": "http://www.washington.edu/", "provenance": "MAG", "value": "University of Washington", "identifiers": [{"value": "http://en.wikipedia.org/wiki/University_of_Washington", "schema": "wikpedia"}, {"value": "grid.34477.33", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/201448701", "schema": "URL"}]}], "given": "Emily Beth", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/1950057684", "schema": "URL"}], "fullname": "Emily Beth Devine", "family": "Devine"}], "published-print": "2012-12-1", "collectedFrom": ["Crossref", "MAG"], "accepted": null, "type": "journal-article", "published-online": null, "subject": ["Dermatology", "General Medicine"]}
2
{"publisher": "American Medical Association (AMA)", "doi": "10.1001/2013.jamainternmed.335", "license": [], "title": [""], "issued": "2012-11-26", "abstract": [], "issn": [{"type": "print", "value": "0003-9926"}], "doi-url": "http://dx.doi.org/10.1001/2013.jamainternmed.335", "instances": [{"url": "http://jamanetwork.com/journals/jamainternalmedicine/fullarticle/1384244", "provenance": "Crossref", "access-rights": "UNKNOWN"}], "authors": [{"affiliations": [], "given": "Terrence", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2686390077", "schema": "URL"}], "fullname": "Terrence Shaneyfelt", "family": "Shaneyfelt"}], "published-print": "2012-11-26", "collectedFrom": ["Crossref", "MAG"], "accepted": null, "type": "journal-article", "published-online": null, "subject": ["Internal Medicine"]}
3
{"publisher": "American Medical Association (AMA)", "doi": "10.1001/2013.jamainternmed.82", "license": [], "title": ["[NO TITLE AVAILABLE"], "issued": "2012-12-10", "abstract": [], "issn": [{"type": "print", "value": "0003-9926"}], "doi-url": "http://dx.doi.org/10.1001/2013.jamainternmed.82", "instances": [{"url": "http://jamanetwork.com/journals/jamainternalmedicine/fullarticle/1389243", "provenance": "Crossref", "access-rights": "UNKNOWN"}], "authors": [{"affiliations": [{"official-page": "http://www.upenn.edu/", "provenance": "MAG", "value": "University of Pennsylvania", "identifiers": [{"value": "http://en.wikipedia.org/wiki/University_of_Pennsylvania", "schema": "wikpedia"}, {"value": "grid.25879.31", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/79576946", "schema": "URL"}]}], "given": "Shreya", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2092315005", "schema": "URL"}], "fullname": "Shreya Kangovi", "family": "Kangovi"}, {"affiliations": [{"official-page": "http://www.upenn.edu/", "provenance": "MAG", "value": "University of Pennsylvania", "identifiers": [{"value": "http://en.wikipedia.org/wiki/University_of_Pennsylvania", "schema": "wikpedia"}, {"value": "grid.25879.31", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/79576946", "schema": "URL"}]}], "given": "Judith A.", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2012187813", "schema": "URL"}], "fullname": "Judith A. Long", "family": "Long"}, {"affiliations": [{"official-page": "http://www.upenn.edu/", "provenance": "MAG", "value": "University of Pennsylvania", "identifiers": [{"value": "http://en.wikipedia.org/wiki/University_of_Pennsylvania", "schema": "wikpedia"}, {"value": "grid.25879.31", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/79576946", "schema": "URL"}]}], "given": "Ezekiel", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/47466971", "schema": "URL"}], "fullname": "Ezekiel Emanuel", "family": "Emanuel"}], "published-print": "2012-12-10", "collectedFrom": ["Crossref", "MAG"], "accepted": null, "type": "journal-article", "published-online": null, "subject": ["Internal Medicine"]}
4
{"publisher": "American Medical Association (AMA)", "doi": "10.1001/archderm.100.4.515", "license": [], "title": ["Tuberculoid leprosy in state of reaction"], "issued": "1969-10-1", "abstract": [], "issn": [{"type": "print", "value": "0003-987X"}], "doi-url": "http://dx.doi.org/10.1001/archderm.100.4.515", "instances": [], "authors": [{"affiliations": [], "given": "O.", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2639594543", "schema": "URL"}], "fullname": "O. Canizares", "family": "Canizares"}], "published-print": "1969-10-1", "collectedFrom": ["Crossref", "MAG"], "accepted": null, "type": "journal-article", "published-online": null, "subject": ["Dermatology", "General Medicine"]}
5
{"publisher": "American Medical Association (AMA)", "doi": "10.1001/archderm.101.6.679", "license": [], "title": ["Multiple palmar basal cell epitheliomas in basal cell nevus syndrome"], "issued": "1970-6-1", "abstract": [], "issn": [{"type": "print", "value": "0003-987X"}], "doi-url": "http://dx.doi.org/10.1001/archderm.101.6.679", "instances": [], "authors": [{"affiliations": null, "given": "K.", "identifiers": null, "fullname": "K. Holubar", "family": "Holubar"}], "published-print": "1970-6-1", "collectedFrom": ["Crossref"], "accepted": null, "type": "journal-article", "published-online": null, "subject": ["Dermatology", "General Medicine"]}
6
{"publisher": "American Medical Association (AMA)", "doi": "10.1001/archderm.103.5.494", "license": [], "title": ["Syringoma of the vulva"], "issued": "1971-5-1", "abstract": [], "issn": [{"type": "print", "value": "0003-987X"}], "doi-url": "http://dx.doi.org/10.1001/archderm.103.5.494", "instances": [], "authors": [{"affiliations": null, "given": "S. J.", "identifiers": null, "fullname": "S. J. Carneiro", "family": "Carneiro"}], "published-print": "1971-5-1", "collectedFrom": ["Crossref"], "accepted": null, "type": "journal-article", "published-online": null, "subject": ["Dermatology", "General Medicine"]}
7
{"publisher": "American Medical Association (AMA)", "doi": "10.1001/archderm.107.6.872", "license": [], "title": ["Atypical gingivostomatitis. Nineteen cases"], "issued": "1973-6-1", "abstract": [], "issn": [{"type": "print", "value": "0003-987X"}], "doi-url": "http://dx.doi.org/10.1001/archderm.107.6.872", "instances": [], "authors": [{"affiliations": null, "given": "none", "identifiers": null, "fullname": "", "family": "none"}], "published-print": "1973-6-1", "collectedFrom": ["Crossref"], "accepted": null, "type": "journal-article", "published-online": null, "subject": ["Dermatology", "General Medicine"]}
8
{"publisher": "American Medical Association (AMA)", "doi": "10.1001/archderm.108.2.215", "license": [], "title": ["Instantaneous decompression and the skin"], "issued": "1973-8-1", "abstract": [], "issn": [{"type": "print", "value": "0003-987X"}], "doi-url": "http://dx.doi.org/10.1001/archderm.108.2.215", "instances": [], "authors": [{"affiliations": null, "given": "", "identifiers": null, "fullname": "", "family": ""}], "published-print": "1973-8-1", "collectedFrom": ["Crossref"], "accepted": null, "type": "journal-article", "published-online": null, "subject": ["Dermatology", "General Medicine"]}
9
{"publisher": "American Medical Association (AMA)", "doi": "10.1001/archderm.108.2.215", "license": [], "title": ["Instantaneous decompression and the skin"], "issued": "1973-8-1", "abstract": [], "issn": [{"type": "print", "value": "0003-987X"}], "doi-url": "http://dx.doi.org/10.1001/archderm.108.2.215", "instances": [], "authors": [{"affiliations": null, "given": "", "identifiers": null, "fullname": "", "family": ""}, {"affiliations": [], "given": "O.", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2639594543", "schema": "URL"}], "fullname": "O. Canizares", "family": "Canizares"}], "published-print": "1973-8-1", "collectedFrom": ["Crossref"], "accepted": null, "type": "journal-article", "published-online": null, "subject": ["Dermatology", "General Medicine"]}
10

  
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataimport/DOIBoostToActions.java
30 30

  
31 31
public class DOIBoostToActions {
32 32

  
33
    public static final String MAG = "MAG";
34
    public static final String ORCID = "ORCID";
35
    public static final String CROSSREF = "Crossref";
36
    public static final String UNPAYWALL = "UnpayWall";
33
	public static final String MAG = "MAG";
34
	public static final String ORCID = "ORCID";
35
	public static final String CROSSREF = "Crossref";
36
	public static final String UNPAYWALL = "UnpayWall";
37 37

  
38
    public static final String GRID_AC = "grid.ac";
39
    public static final String WIKPEDIA = "wikpedia";
38
	public static final String GRID_AC = "grid.ac";
39
	public static final String WIKPEDIA = "wikpedia";
40 40

  
41
    public final static String doiBoostNSPREFIX ="doiboost____";
42
    public static final String OPENAIRE_PREFIX = "openaire____";
41
	public final static String doiBoostNSPREFIX = "doiboost____";
42
	public static final String OPENAIRE_PREFIX = "openaire____";
43 43

  
44
    public static final String SEPARATOR = "::";
44
	public static final String SEPARATOR = "::";
45 45

  
46
    private static Map<String, Pair<String, String>> datasources =  new HashMap<String, Pair<String, String>>() {{
47
        put(MAG.toLowerCase(), new Pair<>("Microsoft Academic Graph", OPENAIRE_PREFIX + SEPARATOR + "microsoft"));
48
        put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
49
        put(CROSSREF.toLowerCase(), new Pair<>(CROSSREF, OPENAIRE_PREFIX + SEPARATOR + "crossref"));
50
        put(UNPAYWALL.toLowerCase(), new Pair<>(UNPAYWALL, OPENAIRE_PREFIX + SEPARATOR + "unpaywall"));
46
	private static Map<String, Pair<String, String>> datasources = new HashMap<String, Pair<String, String>>() {{
47
		put(MAG.toLowerCase(), new Pair<>("Microsoft Academic Graph", OPENAIRE_PREFIX + SEPARATOR + "microsoft"));
48
		put(ORCID.toLowerCase(), new Pair<>(ORCID, OPENAIRE_PREFIX + SEPARATOR + "orcid"));
49
		put(CROSSREF.toLowerCase(), new Pair<>(CROSSREF, OPENAIRE_PREFIX + SEPARATOR + "crossref"));
50
		put(UNPAYWALL.toLowerCase(), new Pair<>(UNPAYWALL, OPENAIRE_PREFIX + SEPARATOR + "unpaywall"));
51 51

  
52
    }};
52
	}};
53 53

  
54
    private static String decompressAbstract(final String abstractCompressed)  {
55
        try {
56
            byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
57
            final Inflater decompresser = new Inflater();
58
            decompresser.setInput(byteArray);
59
            final ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
60
            byte[] buffer = new byte[8192];
61
            while (!decompresser.finished()) {
62
                int size = decompresser.inflate(buffer);
63
                bos.write(buffer, 0, size);
64
            }
65
            byte[] unzippeddata = bos.toByteArray();
66
            decompresser.end();
67
            return new String(unzippeddata);
68
        } catch (Throwable e) {
69
            System.out.println("Wrong abstract:"+ abstractCompressed);
70
            throw  new RuntimeException(e);
71
        }
72
    }
54
	private static String decompressAbstract(final String abstractCompressed) {
55
		try {
56
			byte[] byteArray = Base64.decodeBase64(abstractCompressed.getBytes());
57
			final Inflater decompresser = new Inflater();
58
			decompresser.setInput(byteArray);
59
			final ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
60
			byte[] buffer = new byte[8192];
61
			while (!decompresser.finished()) {
62
				int size = decompresser.inflate(buffer);
63
				bos.write(buffer, 0, size);
64
			}
65
			byte[] unzippeddata = bos.toByteArray();
66
			decompresser.end();
67
			return new String(unzippeddata);
68
		} catch (Throwable e) {
69
			System.out.println("Wrong abstract:" + abstractCompressed);
70
			throw new RuntimeException(e);
71
		}
72
	}
73 73

  
74
    public static final String PID_TYPES = "dnet:pid_types";
75
    private static Map<String, FieldTypeProtos.Qualifier> affiliationPIDType =  new HashMap<String, FieldTypeProtos.Qualifier>() {{
76
        put(MAG, FieldTypeProtos.Qualifier.newBuilder().setClassid("mag_id" ).setClassname("Microsoft Academic Graph Identifier").setSchemename(PID_TYPES).setSchemeid(PID_TYPES).build());
77
        put(GRID_AC, getQualifier("grid", PID_TYPES));
78
        put(WIKPEDIA, getQualifier("urn", PID_TYPES));
79
    }};
74
	public static final String PID_TYPES = "dnet:pid_types";
75
	private static Map<String, FieldTypeProtos.Qualifier> affiliationPIDType = new HashMap<String, FieldTypeProtos.Qualifier>() {{
76
		put(MAG, FieldTypeProtos.Qualifier.newBuilder().setClassid("mag_id").setClassname("Microsoft Academic Graph Identifier").setSchemename(PID_TYPES)
77
				.setSchemeid(PID_TYPES).build());
78
		put(GRID_AC, getQualifier("grid", PID_TYPES));
79
		put(WIKPEDIA, getQualifier("urn", PID_TYPES));
80
	}};
80 81

  
81
    static Map<String, Map<String, String>> typologiesMapping;
82
	static Map<String, Map<String, String>> typologiesMapping;
82 83

  
83
    static {
84
        try {
85
            final InputStream is = DOIBoostToActions.class.getResourceAsStream("/eu/dnetlib/data/mapreduce/hbase/dataimport/mapping_typologies.json");
86
            final String tt =IOUtils.toString(is);
87
            typologiesMapping = new Gson().fromJson(tt, Map.class);
88
        } catch (IOException e) {
89
            e.printStackTrace();
90
        }
91
    }
84
	static {
85
		try {
86
			final InputStream is = DOIBoostToActions.class.getResourceAsStream("/eu/dnetlib/data/mapreduce/hbase/dataimport/mapping_typologies.json");
87
			final String tt = IOUtils.toString(is);
88
			typologiesMapping = new Gson().fromJson(tt, Map.class);
89
		} catch (IOException e) {
90
			e.printStackTrace();
91
		}
92
	}
92 93

  
93
    protected static boolean isValid(final JsonObject rootElement, final Reporter context){
94
	protected static boolean isValid(final JsonObject rootElement, final Reporter context) {
94 95

  
95
        final String doi = getStringValue(rootElement, "doi");
96
        if (doi == null) {
97
            context.incrementCounter("filtered","no_doi", 1);
98
            return false;
99
        }
100
        final String type = getStringValue(rootElement,"type");
101
        if (!typologiesMapping.containsKey(type)){
102
            context.incrementCounter("filtered","unknowntype_"+type, 1);
103
            return false;
104
        }
105
        // fixes #4360 (test publisher)
106
        final String publisher = getStringValue(rootElement, "publisher");
107
        if(publisher.equalsIgnoreCase("Test accounts")){
108
            context.incrementCounter("filtered","test_publisher", 1);
109
            return false;
110
        }
96
		final String doi = getStringValue(rootElement, "doi");
97
		if (doi == null) {
98
			context.incrementCounter("filtered", "no_doi", 1);
99
			return false;
100
		}
101
		final String type = getStringValue(rootElement, "type");
102
		if (!typologiesMapping.containsKey(type)) {
103
			context.incrementCounter("filtered", "unknowntype_" + type, 1);
104
			return false;
105
		}
106
		// fixes #4360 (test publisher)
107
		final String publisher = getStringValue(rootElement, "publisher");
108
		if (publisher.equalsIgnoreCase("Test accounts")) {
109
			context.incrementCounter("filtered", "test_publisher", 1);
110
			return false;
111
		}
111 112

  
112
        List<JsonObject> authors = getArrayObjects(rootElement, "authors");
113
        boolean hasAuthor = false;
114
        for(JsonObject author : authors){
115
            final String given = getStringValue(author, "given");
116
            final String family = getStringValue(author, "family");
117
            final  String fullname = getStringValue(author, "fullname");
118
            if (StringUtils.isNotBlank(fullname) || (StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family))) {
119
                hasAuthor = true;
120
            }
121
            // fixes #4368
122
            if(StringUtils.isNotBlank(given) && given.equalsIgnoreCase("Addie") && StringUtils.isNotBlank(family) && family.equalsIgnoreCase("Jackson") && publisher.equalsIgnoreCase("Elsevier BV")){
123
                context.incrementCounter("filtered","addieJackson", 1);
124
                return false;
125
            }
126
        }
127
        if(!hasAuthor){
128
            context.incrementCounter("filtered","no_authors", 1);
129
            return false;
130
        }
131
        // fixes #4360
132
        if(getCleanedTitles(rootElement).isEmpty()){
133
            context.incrementCounter("filtered","invalid_title", 1);
134
            return false;
135
        }
113
		List<JsonObject> authors = getArrayObjects(rootElement, "authors");
114
		boolean hasAuthors = false;
115
		for (JsonObject author : authors) {
116
			final String given = getStringValue(author, "given");
117
			final String family = getStringValue(author, "family");
118
			String fullname = getStringValue(author, "fullname");
119
			if (StringUtils.isBlank(fullname) && StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family)) {
120
				fullname = String.format("%s %s", given, family);
121
			}
122
			// fixes #4368
123
			if (fullname.equalsIgnoreCase("Addie Jackson") && publisher.equalsIgnoreCase("Elsevier BV")) {
124
				context.incrementCounter("invalid_author", "addiejackson", 1);
125
				context.incrementCounter("filtered", "invalid_authors", 1);
126
				return false;
127
			}
128
			if (isValidAuthorName(fullname, context)) hasAuthors = true;
129
		}
136 130

  
137
        return true;
138
    }
131
		if (!hasAuthors) {
132
			context.incrementCounter("filtered", "invalid_authors", 1);
133
			return false;
134
		}
135
		// fixes #4360
136
		if (getCleanedTitles(rootElement).isEmpty()) {
137
			context.incrementCounter("filtered", "invalid_title", 1);
138
			return false;
139
		}
139 140

  
140
    private static List<String> getCleanedTitles(final JsonObject rootElement){
141
        List<String> titles = getArrayValues(rootElement, "title");
142
        return titles.stream().filter( t -> StringUtils.isNotBlank(t) && !t.equalsIgnoreCase("[NO TITLE AVAILABLE]")).collect(Collectors.toList());
143
    }
141
		return true;
142
	}
144 143

  
144
	private static List<String> getCleanedTitles(final JsonObject rootElement) {
145
		List<String> titles = getArrayValues(rootElement, "title");
146
		return titles.stream().filter(t -> StringUtils.isNotBlank(t) && !t.equalsIgnoreCase("[NO TITLE AVAILABLE]")).collect(Collectors.toList());
147
	}
145 148

  
146
    public static List<AtomicAction> generatePublicationActionsFromDump(final JsonObject rootElement, final ActionFactory factory, final String setName, final Agent agent, boolean invisible,
147
                                                                        final boolean onlyOrganization, final Reporter context) {
149
	private static boolean isValidAuthorName(final String fullName, final Reporter context) {
150
		if (StringUtils.isBlank(fullName)) {
151
			if(context != null) context.incrementCounter("invalid_author", "blank", 1);
152
			return false;
153
		}
154
		// fixes #4391 and subtasks related to DOIBoost
155
		switch (fullName) {
156
		case ",":
157
		case "none none":
158
		case "none &na;":
159
		case "(:null)":
160
		case "&na; &na;": {
161
			if(context != null) context.incrementCounter("invalid_author", "value_" + fullName, 1);
162
			return false;
163
			}
164
		}
165
		return true;
166
	}
148 167

  
149
        if(!isValid(rootElement, context)) return null;
168
	public static List<AtomicAction> generatePublicationActionsFromDump(final JsonObject rootElement,
169
			final ActionFactory factory,
170
			final String setName,
171
			final Agent agent,
172
			boolean invisible,
173
			final boolean onlyOrganization,
174
			final Reporter context) {
150 175

  
151
        //Create OAF Proto
176
		if (!isValid(rootElement, context)) return null;
152 177

  
153
        final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
154
        //Add Data Info
155
        oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
156
                .setInvisible(invisible)
157
                .setDeletedbyinference(false)
158
                .setInferred(false)
159
                .setTrust("0.9")
160
                .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
161
                .build());
178
		//Create OAF Proto
162 179

  
163
        //Adding Kind
164
        oaf.setKind(KindProtos.Kind.entity);
180
		final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
181
		//Add Data Info
182
		oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
183
				.setInvisible(invisible)
184
				.setDeletedbyinference(false)
185
				.setInferred(false)
186
				.setTrust("0.9")
187
				.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
188
				.build());
165 189

  
166
        //creating Result Proto
167
        final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setType(TypeProtos.Type.result);
190
		//Adding Kind
191
		oaf.setKind(KindProtos.Kind.entity);
168 192

  
169
        entity.setDateofcollection("2019-02-15");
193
		//creating Result Proto
194
		final OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder().setType(TypeProtos.Type.result);
170 195

  
196
		entity.setDateofcollection("2019-02-15");
171 197

  
198
		if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()) {
199
			StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
200
					.map(JsonElement::getAsString)
201
					.forEach(cf -> {
202
								final String id = datasources.get(cf.toLowerCase()).getValue();
203
								final String name = datasources.get(cf.toLowerCase()).getKey();
204
								if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
205
									final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
206
											.setValue(name)
207
											.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
208
											.build();
209
									entity.addCollectedfrom(collectedFrom);
210
								}
211
							}
212
					);
213
		}
214
		//Adding identifier
215
		final String doi = getStringValue(rootElement, "doi");
216
		entity.addOriginalId(doi);
172 217

  
173
        if (rootElement.has("collectedFrom") && rootElement.get("collectedFrom").isJsonArray()){
174
            StreamUtils.toStream(rootElement.getAsJsonArray("collectedFrom").iterator())
175
                    .map(JsonElement::getAsString)
176
                    .forEach(cf -> {
177
                                final String id = datasources.get(cf.toLowerCase()).getValue();
178
                                final String name = datasources.get(cf.toLowerCase()).getKey();
179
                                if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
180
                                    final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
181
                                            .setValue(name)
182
                                            .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
183
                                            .build();
184
                                    entity.addCollectedfrom(collectedFrom);
185
                                }
186
                            }
187
                    );
188
        }
189
        //Adding identifier
190
        final String doi = getStringValue(rootElement, "doi");
191
        entity.addOriginalId(doi);
218
		final String sourceId = String.format("50|%s" + SEPARATOR + "%s", doiBoostNSPREFIX, AbstractDNetXsltFunctions.md5(doi));
219
		entity.setId(sourceId);
192 220

  
193
        final String sourceId = String.format("50|%s" + SEPARATOR + "%s", doiBoostNSPREFIX, AbstractDNetXsltFunctions.md5(doi));
194
        entity.setId(sourceId);
221
		entity.addPid(FieldTypeProtos.StructuredProperty.newBuilder()
222
				.setValue(doi)
223
				.setQualifier(getQualifier("doi", PID_TYPES))
224
				.build());
195 225

  
196
        entity.addPid(FieldTypeProtos.StructuredProperty.newBuilder()
197
                .setValue(doi)
198
                .setQualifier(getQualifier("doi", PID_TYPES))
199
                .build());
226
		//Create Result Field
227
		ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder();
200 228

  
229
		final String type = getStringValue(rootElement, "type");
201 230

  
202
        //Create Result Field
203
        ResultProtos.Result.Builder result = ResultProtos.Result.newBuilder();
231
		//Adding Instances
232
		final String typeValue = typologiesMapping.get(type).get("value");
233
		final String cobjValue = typologiesMapping.get(type).get("cobj");
204 234

  
205
        final String type = getStringValue(rootElement,"type");
235
		// TODO: workaround for #4362: remove it when UnpayWall is correctly mapped
236
		List<JsonObject> unpaywallLicenses = getArrayObjects(rootElement, "license").stream().filter(prov -> {
237
			String provS = getStringValue(prov, "provenance");
238
			if (StringUtils.isNotBlank(provS) && provS.equalsIgnoreCase(UNPAYWALL)) return true;
239
			else return false;
240
		}).collect(Collectors.toList());
206 241

  
207
        //Adding Instances
208
        final String typeValue = typologiesMapping.get(type).get("value");
209
        final String cobjValue = typologiesMapping.get(type).get("cobj");
242
		Stream.concat(unpaywallLicenses.stream(), getArrayObjects(rootElement, "instances").stream()).map(it ->
243
		{
244
			ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
245
			instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
246
					.setClassid(cobjValue)
247
					.setClassname(typeValue)
248
					.setSchemeid("dnet:publication_resource")
249
					.setSchemename("dnet:publication_resource")
250
					.build());
251
			instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
252
					.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
253
					.setValue("Unknown Repository")
254
					.build());
210 255

  
211
        // TODO: workaround for #4362: remove it when UnpayWall is correctly mapped
212
        List<JsonObject> unpaywallLicenses = getArrayObjects(rootElement, "license").stream().filter(prov -> {
213
            String provS = getStringValue(prov,"provenance");
214
            if(StringUtils.isNotBlank(provS) && provS.equalsIgnoreCase(UNPAYWALL)) return true;
215
            else return false;
216
        }).collect(Collectors.toList());
256
			final String acc_class_id = it.get("access-rights").getAsString();
257
			String acc_class_value;
258
			switch (acc_class_id) {
259
			case "OPEN": {
260
				acc_class_value = "open access";
261
				break;
262
			}
263
			case "CLOSED": {
264
				acc_class_value = "closed access";
265
				break;
266
			}
267
			default: {
268
				acc_class_value = "not available";
269
			}
217 270

  
218
        Stream.concat(unpaywallLicenses.stream(), getArrayObjects(rootElement, "instances").stream()).map(it ->
219
        {
220
            ResultProtos.Result.Instance.Builder instance= ResultProtos.Result.Instance.newBuilder();
221
            instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
222
                    .setClassid(cobjValue)
223
                    .setClassname(typeValue)
224
                    .setSchemeid("dnet:publication_resource")
225
                    .setSchemename("dnet:publication_resource")
226
                    .build());
227
            instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
228
                    .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
229
                    .setValue("Unknown Repository")
230
                    .build());
271
			}
231 272

  
232
            final String acc_class_id =it.get("access-rights").getAsString();
233
            String acc_class_value;
234
            switch (acc_class_id){
235
                case "OPEN": {
236
                    acc_class_value = "open access";
237
                    break;
238
                }
239
                case "CLOSED": {
240
                    acc_class_value = "closed access";
241
                    break;
242
                }
243
                default: {
244
                    acc_class_value = "not available";
245
                }
273
			instance.addUrl(it.get("url").getAsString());
274
			instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
275
					.setClassid(acc_class_id)
276
					.setClassname(acc_class_value)
277
					.setSchemeid("dnet:access_modes")
278
					.setSchemename("dnet:access_modes")
279
					.build());
246 280

  
247
            }
281
			final String id = datasources.get(it.get("provenance").getAsString().toLowerCase()).getValue();
282
			final String name = datasources.get(it.get("provenance").getAsString().toLowerCase()).getKey();
283
			if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
284
				final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
285
						.setValue(name)
286
						.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
287
						.build();
248 288

  
249
            instance.addUrl(it.get("url").getAsString());
250
            instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
251
                    .setClassid(acc_class_id)
252
                    .setClassname(acc_class_value)
253
                    .setSchemeid("dnet:access_modes")
254
                    .setSchemename("dnet:access_modes")
255
                    .build());
289
				instance.setCollectedfrom(collectedFrom);
290
			}
256 291

  
257
            final String id =datasources.get(it.get("provenance").getAsString().toLowerCase()).getValue();
258
            final String name =datasources.get(it.get("provenance").getAsString().toLowerCase()).getKey();
259
            if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
260
                final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
261
                        .setValue(name)
262
                        .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
263
                        .build();
292
			return instance.build();
293
		}).forEach(result::addInstance);
264 294

  
265
                instance.setCollectedfrom(collectedFrom);
266
            }
295
		//Adding DOI URL as  Instance
296
		final String doiURL = getStringValue(rootElement, "doi-url");
297
		JsonObject hostedByOpenAire = null;
298
		if (rootElement.has("hostedByOpenAire")) {
299
			hostedByOpenAire = rootElement.getAsJsonObject("hostedByOpenAire");
300
		}
267 301

  
268
            return  instance.build();
269
        }).forEach(result::addInstance);
302
		if (StringUtils.isNotBlank(doiURL)) {
303
			final ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
304
			instance.addUrl(doiURL);
305
			instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
306
					.setClassid(cobjValue)
307
					.setClassname(typeValue)
308
					.setSchemeid("dnet:publication_resource")
309
					.setSchemename("dnet:publication_resource")
310
					.build());
311
			instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
312
					.setClassid("CLOSED")
313
					.setClassname("Closed Access")
314
					.setSchemeid("dnet:access_modes")
315
					.setSchemename("dnet:access_modes")
316
					.build());
317
			instance.setCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
318
					.setValue(CROSSREF)
319
					.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5("crossref"))
320
					.build());
270 321

  
271
        //Adding DOI URL as  Instance
272
        final String doiURL = getStringValue(rootElement, "doi-url");
273
        JsonObject hostedByOpenAire = null;
274
        if (rootElement.has("hostedByOpenAire")) {
275
            hostedByOpenAire = rootElement.getAsJsonObject("hostedByOpenAire");
276
        }
322
			if (hostedByOpenAire == null)
323
				instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
324
						.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
325
						.setValue("Unknown Repository")
326
						.build());
327
			else {
328
				instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
329
						.setKey(AbstractDNetXsltFunctions.oafSplitId("datasource", hostedByOpenAire.get("id").getAsString()))
330
						.setValue(hostedByOpenAire.get("name").getAsString())
331
						.build());
332
			}
277 333

  
278
        if (StringUtils.isNotBlank(doiURL)) {
279
            final ResultProtos.Result.Instance.Builder instance = ResultProtos.Result.Instance.newBuilder();
280
            instance.addUrl(doiURL);
281
            instance.setInstancetype(FieldTypeProtos.Qualifier.newBuilder()
282
                    .setClassid(cobjValue)
283
                    .setClassname(typeValue)
284
                    .setSchemeid("dnet:publication_resource")
285
                    .setSchemename("dnet:publication_resource")
286
                    .build());
287
            instance.setAccessright(FieldTypeProtos.Qualifier.newBuilder()
288
                    .setClassid("CLOSED")
289
                    .setClassname("Closed Access")
290
                    .setSchemeid("dnet:access_modes")
291
                    .setSchemename("dnet:access_modes")
292
                    .build());
293
            instance.setCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
294
                    .setValue(CROSSREF)
295
                    .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5("crossref"))
296
                    .build());
334
			result.addInstance(instance);
335
		}
297 336

  
298
            if (hostedByOpenAire == null)
299
            instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
300
                    .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + "55045bd2a65019fd8e6741a755395c8c")
301
                    .setValue("Unknown Repository")
302
                    .build());
303
            else{
304
                instance.setHostedby(FieldTypeProtos.KeyValue.newBuilder()
305
                        .setKey(AbstractDNetXsltFunctions.oafSplitId("datasource",hostedByOpenAire.get("id").getAsString()))
306
                        .setValue(hostedByOpenAire.get("name").getAsString())
307
                        .build());
308
            }
337
		//Create Metadata Proto
338
		final ResultProtos.Result.Metadata.Builder metadata = ResultProtos.Result.Metadata.newBuilder();
309 339

  
310
            result.addInstance(instance);
311
        }
340
		Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> authorsOrganizations = createAuthorsOrganization(rootElement);
312 341

  
313
        //Create Metadata Proto
314
        final ResultProtos.Result.Metadata.Builder metadata = ResultProtos.Result.Metadata.newBuilder();
342
		if (authorsOrganizations.getKey().size() > 0) {
343
			metadata.addAllAuthor(authorsOrganizations.getKey());
344
		} else {
345
			//Should never enter here becasue of the isValid method at the beginning.
346
			context.incrementCounter("filtered", "unexpected_no_authors", 1);
347
			return null;
348
		}
349
		//adding Language
350
		metadata.setLanguage(FieldTypeProtos.Qualifier.newBuilder()
351
				.setClassid("und")
352
				.setClassname("Undetermined")
353
				.setSchemeid("dent:languages")
354
				.setSchemename("dent:languages")
355
				.build());
315 356

  
357
		//Adding subjects
358
		List<String> subjects = getArrayValues(rootElement, "subject");
316 359

  
317
        Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> authorsOrganizations = createAuthorsOrganization(rootElement);
360
		subjects.forEach(s -> metadata.addSubject(FieldTypeProtos.StructuredProperty.newBuilder()
361
				.setValue(s)
362
				.setQualifier(getQualifier("keyword", "dnet:subject"))
363
				.build()));
318 364

  
365
		List<String> titles = getCleanedTitles(rootElement);
366
		titles.forEach(t ->
367
				metadata.addTitle(FieldTypeProtos.StructuredProperty.newBuilder()
368
						.setValue(t)
369
						.setQualifier(getQualifier("main title", "dnet:dataCite_title"))
370
						.build()));
319 371

  
372
		settingRelevantDate(rootElement, metadata, "issued", "issued", true);
373
		settingRelevantDate(rootElement, metadata, "accepted", "accepted", false);
374
		settingRelevantDate(rootElement, metadata, "published-online", "published-online", false);
375
		settingRelevantDate(rootElement, metadata, "published-print", "published-print", false);
320 376

  
321
        if (authorsOrganizations.getKey().size() > 0) {
322
            metadata.addAllAuthor(authorsOrganizations.getKey());
323
        }
324
        else {
325
            //Should never enter here becasue of the isValid method at the beginning.
326
            context.incrementCounter("filtered","unexpected_no_authors", 1);
327
            return null;
328
        }
329
        //adding Language
330
        metadata.setLanguage(FieldTypeProtos.Qualifier.newBuilder()
331
                .setClassid("und")
332
                .setClassname("Undetermined")
333
                .setSchemeid("dent:languages")
334
                .setSchemename("dent:languages")
335
                .build());
377
		getArrayObjects(rootElement, "abstract").forEach(d ->
378
				{
379
					if (MAG.equals(d.get("provenance").getAsString()))
380
						metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(decompressAbstract(d.get("value").getAsString())).build());
381
					else
382
						metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(d.get("value").getAsString()).build());
383
				}
384
		);
336 385

  
337
        //Adding subjects
338
        List<String> subjects =getArrayValues(rootElement, "subject");
386
		//Adding Journal
387
		final String publisher = getStringValue(rootElement, "publisher");
388
		if (StringUtils.isNotBlank(publisher)) {
339 389

  
340
        subjects.forEach(s-> metadata.addSubject(FieldTypeProtos.StructuredProperty.newBuilder()
341
                .setValue(s)
342
                .setQualifier(getQualifier("keyword", "dnet:subject"))
343
                .build()));
390
			final ResultProtos.Result.Journal.Builder journal = ResultProtos.Result.Journal.newBuilder().setName(publisher);
344 391

  
345
        List<String> titles = getCleanedTitles(rootElement);
346
        titles.forEach(t->
347
                metadata.addTitle(FieldTypeProtos.StructuredProperty.newBuilder()
348
                        .setValue(t)
349
                        .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
350
                        .build()));
392
			if (hasJSONArrayField(rootElement, "issn")) {
393
				StreamUtils.toStream(rootElement.getAsJsonArray("issn").iterator())
394
						.map(JsonElement::getAsJsonObject)
395
						.forEach(it -> {
396
							final String issntype = getStringValue(it, "type");
397
							final String value = getStringValue(it, "value");
398
							if ("electronic".equals(issntype)) {
399
								journal.setIssnOnline(value);
400
							}
401
							if ("print".equals(issntype))
402
								journal.setIssnPrinted(value);
403
						});
404
			}
405
			metadata.setJournal(journal.build());
406
		}
407
		metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies"));
408
		result.setMetadata(metadata.build());
409
		entity.setResult(result.build());
410
		oaf.setEntity(entity.build());
351 411

  
352
        settingRelevantDate(rootElement, metadata, "issued", "issued", true);
353
        settingRelevantDate(rootElement, metadata, "accepted", "accepted", false);
354
        settingRelevantDate(rootElement, metadata, "published-online", "published-online", false);
355
        settingRelevantDate(rootElement, metadata, "published-print", "published-print", false);
412
		//System.out.println(JsonFormat.printToString(oaf.build()));
356 413

  
414
		final List<AtomicAction> actionList = new ArrayList<>();
357 415

  
358
        getArrayObjects(rootElement, "abstract").forEach(d ->
359
                {
360
                    if (MAG.equals(d.get("provenance").getAsString()))
361
                        metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(decompressAbstract(d.get("value").getAsString())).build());
362
                    else
363
                        metadata.addDescription(FieldTypeProtos.StringField.newBuilder().setValue(d.get("value").getAsString()).build());
364
                }
365
                );
416
		if (!onlyOrganization)
417
			actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray()));
366 418

  
419
		if (!authorsOrganizations.getValue().isEmpty()) {
367 420

  
421
			authorsOrganizations.getValue().forEach(o ->
422
			{
368 423

  
369
        //Adding Journal
370
        final String publisher = getStringValue(rootElement,"publisher");
371
        if (StringUtils.isNotBlank(publisher)){
424
				actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organization", "body", o.toByteArray()));
425
				if (!onlyOrganization)
426
					actionList.addAll(createPublicationOrganizationRelation(oaf.build(), o, factory, setName, agent));
427
				final String gridOrganization = getSimilarGridOrganization(o.getEntity());
428
				if (gridOrganization != null) {
429
					actionList.add(factory
430
							.createAtomicAction(setName, agent, o.getEntity().getId(), "organizationOrganization_dedupSimilarity_isSimilarTo", gridOrganization,
431
									"".getBytes()));
432
					actionList.add(factory
433
							.createAtomicAction(setName, agent, gridOrganization, "organizationOrganization_dedupSimilarity_isSimilarTo", o.getEntity().getId(),
434
									"".getBytes()));
435
				}
436
			});
437
		}
438
		return actionList;
372 439

  
373
            final ResultProtos.Result.Journal.Builder journal = ResultProtos.Result.Journal.newBuilder().setName(publisher);
440
	}
374 441

  
375
            if (hasJSONArrayField(rootElement,"issn" )){
376
                StreamUtils.toStream(rootElement.getAsJsonArray("issn").iterator())
377
                        .map(JsonElement::getAsJsonObject)
378
                        .forEach(it -> {
379
                            final String issntype = getStringValue(it, "type");
380
                            final String value = getStringValue(it, "value");
381
                            if("electronic".equals(issntype)){
382
                                journal.setIssnOnline(value);
383
                            }
384
                            if ("print".equals(issntype))
385
                                journal.setIssnPrinted(value);
386
                        });
387
            }
388
            metadata.setJournal(journal.build());
389
        }
390
        metadata.setResulttype(getQualifier(getDefaultResulttype(cobjValue), "dnet:result_typologies"));
391
        result.setMetadata(metadata.build());
392
        entity.setResult(result.build());
393
        oaf.setEntity(entity.build());
442
	private static String getSimilarGridOrganization(final OafProtos.OafEntity organization) {
394 443

  
395
        //System.out.println(JsonFormat.printToString(oaf.build()));
444
		final List<FieldTypeProtos.StructuredProperty> pidList = organization.getPidList();
445
		if (pidList != null) {
446
			for (FieldTypeProtos.StructuredProperty p : pidList) {
447
				if (p.getQualifier().getClassname().equals("grid")) {
448
					return "20|grid________" + SEPARATOR + AbstractDNetXsltFunctions.md5(p.getValue());
449
				}
450
			}
451
		}
452
		return null;
396 453

  
397
        final List<AtomicAction> actionList = new ArrayList<>();
454
	}
398 455

  
399
        if (!onlyOrganization)
400
            actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray()));
456
	private static List<AtomicAction> createPublicationOrganizationRelation(final OafProtos.Oaf publication,
457
			final OafProtos.Oaf organization,
458
			final ActionFactory factory,
459
			final String setName,
460
			final Agent agent) {
401 461

  
402
        if (!authorsOrganizations.getValue().isEmpty()) {
462
		List<AtomicAction> result = new ArrayList<>();
403 463

  
404
            authorsOrganizations.getValue().forEach(o ->
405
            {
464
		final OafProtos.Oaf.Builder roaf = OafProtos.Oaf.newBuilder();
465
		roaf.setKind(KindProtos.Kind.relation);
406 466

  
407
                actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organization", "body", o.toByteArray()));
408
                if (!onlyOrganization)
409
                    actionList.addAll(createPublicationOrganizationRelation(oaf.build(), o, factory, setName, agent));
410
                final String gridOrganization = getSimilarGridOrganization(o.getEntity());
411
                if (gridOrganization!= null) {
412
                    actionList.add(factory.createAtomicAction(setName, agent, o.getEntity().getId(), "organizationOrganization_dedupSimilarity_isSimilarTo", gridOrganization, "".getBytes()));
413
                    actionList.add(factory.createAtomicAction(setName, agent, gridOrganization, "organizationOrganization_dedupSimilarity_isSimilarTo", o.getEntity().getId(), "".getBytes()));
414
                }
415
            });
416
        }
417
        return actionList;
467
		roaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
468
				.setInvisible(false)
469
				.setDeletedbyinference(false)
470
				.setInferred(false)
471
				.setTrust("0.9")
472
				.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
473
				.build());
418 474

  
419
    }
475
		final OafProtos.OafRel.Builder rel = OafProtos.OafRel.newBuilder();
420 476

  
477
		rel.setRelType(RelTypeProtos.RelType.resultOrganization);
478
		rel.setSubRelType(RelTypeProtos.SubRelType.affiliation);
421 479

  
422
    private static String getSimilarGridOrganization(final OafProtos.OafEntity organization) {
480
		//Create a relation Result --> Organization
481
		rel.setSource(publication.getEntity().getId());
482
		rel.setTarget(organization.getEntity().getId());
483
		rel.setRelClass(ResultOrganization.Affiliation.RelName.hasAuthorInstitution.toString());
423 484

  
424
        final List<FieldTypeProtos.StructuredProperty> pidList = organization.getPidList();
425
        if (pidList!= null ) {
426
            for (FieldTypeProtos.StructuredProperty p: pidList) {
427
                if (p.getQualifier().getClassname().equals("grid")){
428
                    return "20|grid________" + SEPARATOR +AbstractDNetXsltFunctions.md5(p.getValue());
429
                }
430
            }
431
        }
432
        return null;
485
		final ResultOrganization.Builder rel_instance = ResultOrganization.newBuilder();
433 486

  
434
    }
487
		final ResultOrganization.Affiliation.Builder affiliationRel = ResultOrganization.Affiliation.newBuilder();
488
		affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
489
				.setSemantics(getQualifier("hasAuthorInstitution", "dnet:result_organization_relations"))
490
				.build());
491
		rel_instance.setAffiliation(affiliationRel.build());
492
		rel.setResultOrganization(rel_instance.build());
435 493

  
436
    private static List<AtomicAction> createPublicationOrganizationRelation(final OafProtos.Oaf publication, final OafProtos.Oaf organization, final ActionFactory factory, final String setName, final Agent agent) {
494
		rel.addCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
495
				.setValue(datasources.get(MAG.toLowerCase()).getKey())
496
				.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions
497
						.md5(StringUtils.substringAfter(datasources.get(MAG.toLowerCase()).getValue(), SEPARATOR)))
498
				.build());
437 499

  
438
        List<AtomicAction> result = new ArrayList<>();
500
		rel.setChild(false);
501
		roaf.setRel(rel.build());
439 502

  
440
        final OafProtos.Oaf.Builder roaf = OafProtos.Oaf.newBuilder();
441
        roaf.setKind(KindProtos.Kind.relation);
503
		result.add(factory.createAtomicAction(setName, agent, publication.getEntity().getId(), "resultOrganization_affiliation_hasAuthorInstitution",
504
				organization.getEntity().getId(), roaf.build().toByteArray()));
442 505

  
443
        roaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
444
                .setInvisible(false)
445
                .setDeletedbyinference(false)
446
                .setInferred(false)
447
                .setTrust("0.9")
448
                .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
449
                .build());
506
		//Create a relation Organization --> Result
507
		rel.setTarget(publication.getEntity().getId());
508
		rel.setSource(organization.getEntity().getId());
509
		rel.setRelClass(ResultOrganization.Affiliation.RelName.isAuthorInstitutionOf.toString());
450 510

  
511
		affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
512
				.setSemantics(getQualifier("isAuthorInstitutionOf", "dnet:result_organization_relations"))
513
				.build());
514
		rel_instance.setAffiliation(affiliationRel.build());
515
		rel.setResultOrganization(rel_instance.build());
516
		roaf.setRel(rel.build());
517
		result.add(factory.createAtomicAction(setName, agent, organization.getEntity().getId(), "resultOrganization_affiliation_isAuthorInstitutionOf",
518
				publication.getEntity().getId(), roaf.build().toByteArray()));
451 519

  
452
        final OafProtos.OafRel.Builder rel = OafProtos.OafRel.newBuilder();
520
		return result;
453 521

  
454
        rel.setRelType(RelTypeProtos.RelType.resultOrganization);
455
        rel.setSubRelType(RelTypeProtos.SubRelType.affiliation);
522
	}
456 523

  
457
        //Create a relation Result --> Organization
458
        rel.setSource(publication.getEntity().getId());
459
        rel.setTarget(organization.getEntity().getId());
460
        rel.setRelClass(ResultOrganization.Affiliation.RelName.hasAuthorInstitution.toString());
524
	private static boolean hasJSONArrayField(final JsonObject root, final String key) {
525
		return root.has(key) && root.get(key).isJsonArray();
526
	}
461 527

  
462
        final ResultOrganization.Builder rel_instance = ResultOrganization.newBuilder();
528
	private static void settingRelevantDate(JsonObject rootElement,
529
			ResultProtos.Result.Metadata.Builder metadata,
530
			final String jsonKey,
531
			final String dictionaryKey,
532
			final boolean addToDateOfAcceptance) {
533
		//Adding date
534
		String date = getStringValue(rootElement, jsonKey);
535
		if (date == null)
536
			return;
537
		if (date.length() == 4) {
538
			date += "-01-01";
539
		}
540
		if (isValidDate(date)) {
541
			if (addToDateOfAcceptance)
542
				metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(date).build());
543
			metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder()
544
					.setValue(date)
545
					.setQualifier(getQualifier(dictionaryKey, "dnet:dataCite_date"))
546
					.build());
547
		}
548
	}
463 549

  
464
        final ResultOrganization.Affiliation.Builder affiliationRel = ResultOrganization.Affiliation.newBuilder();
465
        affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
466
                .setSemantics(getQualifier("hasAuthorInstitution", "dnet:result_organization_relations"))
467
                .build());
468
        rel_instance.setAffiliation(affiliationRel.build());
469
        rel.setResultOrganization(rel_instance.build());
550
	public static FieldTypeProtos.KeyValue extractIdentifier(final String value) {
551
		FieldTypeProtos.KeyValue.Builder pid = FieldTypeProtos.KeyValue.newBuilder();
552
		if (StringUtils.contains(value, "orcid.org")) {
553
			return pid.setValue(value.replaceAll("https://orcid.org/", ""))
554
					.setKey(ORCID).build();
555
		}
556
		if (StringUtils.contains(value, "academic.microsoft.com/#/detail")) {
557
			return pid.setValue(value.replaceAll("https://academic.microsoft.com/#/detail/", ""))
558
					.setKey("MAG Identifier").build();
559
		}
560
		return pid.setValue(value)
561
				.setKey("URL").build();
562
	}
470 563

  
471
        rel.addCollectedfrom(FieldTypeProtos.KeyValue.newBuilder()
472
                .setValue(datasources.get(MAG.toLowerCase()).getKey())
473
                .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(datasources.get(MAG.toLowerCase()).getValue(), SEPARATOR)))
474
                .build());
564
	public static OafProtos.Oaf createOrganizationFromJSON(final JsonObject affiliation) {
565
		final Map<String, FieldTypeProtos.Qualifier> affiliationIdentifiers = new HashMap<>();
566
		final List<String> magId = new ArrayList<>();
567
		getArrayObjects(affiliation, "identifiers").forEach(it -> {
568
			if (StringUtils.contains(it.get("value").getAsString(), "academic.microsoft.com")) {
569
				affiliationIdentifiers.put(it.get("value").getAsString(), affiliationPIDType.get(MAG));
570
				magId.add(it.get("value").getAsString());
571
			} else
572
				affiliationIdentifiers.put(it.get("value").getAsString(), affiliationPIDType.get(it.get("schema").getAsString()));
573
		});
574
		if (magId.size() > 0) {
575
			final String microsoftID = magId.get(0);
576
			OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
577
			oaf.setKind(KindProtos.Kind.entity);
578
			OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder();
579
			entity.setType(TypeProtos.Type.organization);
580
			entity.setId("20|microsoft___" + SEPARATOR + AbstractDNetXsltFunctions.md5(microsoftID));
581
			final String id = datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getValue();
582
			final String name = datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getKey();
583
			if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
584
				final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
585
						.setValue(name)
586
						.setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
587
						.build();
588
				entity.addCollectedfrom(collectedFrom);
589
			} else {
590
				return null;
591
			}
592
			entity.addOriginalId(microsoftID);
475 593

  
594
			affiliationIdentifiers.forEach((key, value) -> entity.addPid(
595
					FieldTypeProtos.StructuredProperty.newBuilder()
596
							.setQualifier(value)
597
							.setValue(key)
598
							.build()));
476 599

  
600
			final OrganizationProtos.Organization.Builder organization = OrganizationProtos.Organization.newBuilder();
601
			organization.setMetadata(OrganizationProtos.Organization.Metadata.newBuilder()
602
					.setWebsiteurl(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("official-page").getAsString()).build())
603
					.setLegalname(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("value").getAsString()).build())
604
					.build());
477 605

  
478
        rel.setChild(false);
479
        roaf.setRel(rel.build());
606
			entity.setOrganization(organization);
607
			oaf.setEntity(entity);
608
			oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
609
					.setInvisible(false)
610
					.setDeletedbyinference(false)
611
					.setInferred(false)
612
					.setTrust("0.9")
613
					.setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
614
					.build());
615
			return oaf.build();
616
		}
617
		return null;
618
	}
480 619

  
481
        result.add(factory.createAtomicAction(setName, agent, publication.getEntity().getId(), "resultOrganization_affiliation_hasAuthorInstitution", organization.getEntity().getId(), roaf.build().toByteArray() ));
620
	public static Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>> createAuthorsOrganization(final JsonObject root) {
482 621

  
622
		final Map<String, OafProtos.Oaf> affiliations = new HashMap<>();
483 623

  
484
        //Create a relation Organization --> Result
485
        rel.setTarget(publication.getEntity().getId());
486
        rel.setSource(organization.getEntity().getId());
487
        rel.setRelClass(ResultOrganization.Affiliation.RelName.isAuthorInstitutionOf.toString());
624
		List<JsonObject> authors = getArrayObjects(root, "authors");
488 625

  
626
		final AtomicInteger counter = new AtomicInteger(1);
489 627

  
490
        affiliationRel.setRelMetadata(RelMetadataProtos.RelMetadata.newBuilder()
491
                .setSemantics(getQualifier("isAuthorInstitutionOf", "dnet:result_organization_relations"))
492
                .build());
493
        rel_instance.setAffiliation(affiliationRel.build());
494
        rel.setResultOrganization(rel_instance.build());
495
        roaf.setRel(rel.build());
496
        result.add(factory.createAtomicAction(setName, agent, organization.getEntity().getId(), "resultOrganization_affiliation_isAuthorInstitutionOf", publication.getEntity().getId(), roaf.build().toByteArray()));
628
		List<FieldTypeProtos.Author> collect = authors.stream().map(author -> {
629
			final String given = getStringValue(author, "given");
630
			final String family = getStringValue(author, "family");
631
			String fullname = getStringValue(author, "fullname");
497 632

  
498
        return result;
633
			if (StringUtils.isBlank(fullname) && StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family)) {
634
				fullname = String.format("%s %s", given, family);
635
			}
499 636

  
500
    }
637
			if (!isValidAuthorName(fullname, null)) {
638
				return null;
639
			}
640
			final FieldTypeProtos.Author.Builder abuilder = FieldTypeProtos.Author.newBuilder();
501 641

  
502
    private static boolean hasJSONArrayField(final JsonObject root, final String key) {
503
        return root.has(key) && root.get(key).isJsonArray();
504
    }
642
			if (StringUtils.isNotBlank(given))
643
				abuilder.setName(given);
644
			if (StringUtils.isNotBlank(family))
645
				abuilder.setSurname(family);
646
			if (StringUtils.isNotBlank(fullname))
647
				abuilder.setFullname(fullname);
505 648

  
506
    private static void settingRelevantDate(JsonObject rootElement, ResultProtos.Result.Metadata.Builder metadata , final String jsonKey, final String dictionaryKey, final boolean addToDateOfAcceptance) {
507
        //Adding date
508
        String date = getStringValue(rootElement,jsonKey);
509
        if (date == null)
510
            return;
511
        if (date.length() == 4) {
512
            date += "-01-01";
513
        }
514
        if (isValidDate(date)) {
515
            if (addToDateOfAcceptance)
516
                metadata.setDateofacceptance(FieldTypeProtos.StringField.newBuilder().setValue(date).build());
517
            metadata.addRelevantdate(FieldTypeProtos.StructuredProperty.newBuilder()
518
                    .setValue(date)
519
                    .setQualifier(getQualifier(dictionaryKey,"dnet:dataCite_date"))
520
                    .build());
521
        }
522
    }
649
			final List<JsonObject> identifiers = getArrayObjects(author, "identifiers");
650
			final List<JsonObject> authorAffiliation = getArrayObjects(author, "affiliations");
523 651

  
652
			authorAffiliation.forEach(it ->
653
			{
654
				OafProtos.Oaf org = createOrganizationFromJSON(it);
655
				if (org != null) {
656
					affiliations.put(org.getEntity().getId(), org);
657
					abuilder.addAffiliation(org.getEntity().getOrganization().getMetadata().getLegalname());
658
				}
659
			});
660
			identifiers.stream().map(id -> {
661
				final String value = id.get("value").getAsString();
662
				return extractIdentifier(value);
663
			}).collect(
664
					Collectors.toMap(
665
							FieldTypeProtos.KeyValue::getKey,
666
							Function.identity(),
667
							(a, b) -> a
668
					)).values().forEach(abuilder::addPid);
669
			abuilder.setRank(counter.getAndIncrement());
524 670

  
525
    public static FieldTypeProtos.KeyValue extractIdentifier(final String value) {
526
        FieldTypeProtos.KeyValue.Builder pid = FieldTypeProtos.KeyValue.newBuilder();
527
        if (StringUtils.contains(value, "orcid.org")){
528
            return pid.setValue(value.replaceAll("https://orcid.org/",""))
529
                    .setKey(ORCID).build();
530
        }
531
        if (StringUtils.contains(value, "academic.microsoft.com/#/detail")){
532
            return pid.setValue(value.replaceAll("https://academic.microsoft.com/#/detail/",""))
533
                    .setKey("MAG Identifier").build();
534
        }
535
        return pid.setValue(value)
536
                .setKey("URL").build();
537
    }
671
			return abuilder.build();
538 672

  
673
		}).filter(Objects::nonNull).collect(Collectors.toList());
539 674

  
540
    public static OafProtos.Oaf createOrganizationFromJSON(final JsonObject affiliation) {
541
        final Map<String, FieldTypeProtos.Qualifier> affiliationIdentifiers = new HashMap<>();
542
        final List<String> magId = new ArrayList<>();
543
        getArrayObjects(affiliation, "identifiers").forEach(it -> {
544
            if (StringUtils.contains(it.get("value").getAsString(), "academic.microsoft.com")) {
545
                affiliationIdentifiers.put(it.get("value").getAsString(), affiliationPIDType.get(MAG));
546
                magId.add(it.get("value").getAsString());
547
            }
548
            else
549
                affiliationIdentifiers.put( it.get("value").getAsString(), affiliationPIDType.get(it.get("schema").getAsString()));
550
        });
551
        if (magId.size() > 0) {
552
            final String microsoftID = magId.get(0);
553
            OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
554
            oaf.setKind(KindProtos.Kind.entity);
555
            OafProtos.OafEntity.Builder entity = OafProtos.OafEntity.newBuilder();
556
            entity.setType(TypeProtos.Type.organization);
557
            entity.setId("20|microsoft___" + SEPARATOR +AbstractDNetXsltFunctions.md5(microsoftID));
558
            final String id =datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getValue();
559
            final String name =datasources.get(affiliation.get("provenance").getAsString().toLowerCase()).getKey();
560
            if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(name)) {
561
                final FieldTypeProtos.KeyValue collectedFrom = FieldTypeProtos.KeyValue.newBuilder()
562
                        .setValue(name)
563
                        .setKey("10|" + OPENAIRE_PREFIX + SEPARATOR + AbstractDNetXsltFunctions.md5(StringUtils.substringAfter(id, SEPARATOR)))
564
                        .build();
565
                entity.addCollectedfrom(collectedFrom);
566
            } else {
567
                return null;
568
            }
569
            entity.addOriginalId(microsoftID);
675
		return new Pair<>(collect, affiliations.values());
676
	}
570 677

  
571
            affiliationIdentifiers.forEach((key, value) -> entity.addPid(
572
                    FieldTypeProtos.StructuredProperty.newBuilder()
573
                            .setQualifier(value)
574
                            .setValue(key)
575
                            .build()));
576

  
577
            final OrganizationProtos.Organization.Builder organization = OrganizationProtos.Organization.newBuilder();
578
            organization.setMetadata(OrganizationProtos.Organization.Metadata.newBuilder()
579
                    .setWebsiteurl(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("official-page").getAsString()).build())
580
                    .setLegalname(FieldTypeProtos.StringField.newBuilder().setValue(affiliation.get("value").getAsString()).build())
581
                    .build());
582

  
583
            entity.setOrganization(organization);
584
            oaf.setEntity(entity);
585
            oaf.setDataInfo(FieldTypeProtos.DataInfo.newBuilder()
586
                    .setInvisible(false)
587
                    .setDeletedbyinference(false)
588
                    .setInferred(false)
589
                    .setTrust("0.9")
590
                    .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
591
                    .build());
592
            return oaf.build();
593
        }
594
        return  null;
595
    }
596

  
597
    public static Pair<List<FieldTypeProtos.Author>, Collection<OafProtos.Oaf>>  createAuthorsOrganization(final JsonObject root) {
598

  
599
        final Map<String, OafProtos.Oaf> affiliations = new HashMap<>();
600

  
601
        List<JsonObject> authors = getArrayObjects(root, "authors");
602

  
603
        final AtomicInteger counter = new AtomicInteger(1);
604

  
605

  
606
        List<FieldTypeProtos.Author> collect = authors.stream().map(author -> {
607
            final String given = getStringValue(author, "given");
608
            final String family = getStringValue(author, "family");
609
            String fullname = getStringValue(author, "fullname");
610

  
611
            if (StringUtils.isBlank(fullname) && StringUtils.isNotBlank(given) && StringUtils.isNotBlank(family)) {
612
                fullname = String.format("%s %s", given, family);
613
            }
614

  
615
            if (StringUtils.isBlank(fullname)){
616
                return null;
617

  
618
            }
619
            final FieldTypeProtos.Author.Builder abuilder = FieldTypeProtos.Author.newBuilder();
620

  
621
            if (StringUtils.isNotBlank(given))
622
                abuilder.setName(given);
623
            if (StringUtils.isNotBlank(family))
624
                abuilder.setSurname(family);
625
            if (StringUtils.isNotBlank(fullname))
626
                abuilder.setFullname(fullname);
627

  
628
            final List<JsonObject> identifiers = getArrayObjects(author, "identifiers");
629
            final List<JsonObject> authorAffiliation = getArrayObjects(author, "affiliations");
630

  
631
            authorAffiliation.forEach(it ->
632
            {
633
                OafProtos.Oaf org = createOrganizationFromJSON(it);
634
                if (org != null) {
635
                    affiliations.put(org.getEntity().getId(), org);
636
                    abuilder.addAffiliation(org.getEntity().getOrganization().getMetadata().getLegalname());
637
                }
638
            });
639
            identifiers.stream().map(id -> {
640
                final String value = id.get("value").getAsString();
641
                return extractIdentifier(value);
642
            }).collect(
643
                    Collectors.toMap(
644
                            FieldTypeProtos.KeyValue::getKey,
645
                            Function.identity(),
646
                            (a,b) -> a
647
                    )).values().forEach(abuilder::addPid);
648
            abuilder.setRank(counter.getAndIncrement());
649

  
650
            return abuilder.build();
651

  
652
        }).filter(Objects::nonNull).collect(Collectors.toList());
653

  
654
        return new Pair<> ( collect,affiliations.values() );
655
    }
656

  
657

  
658

  
659

  
660

  
661

  
662 678
}

Also available in: Unified diff