Project

General

Profile

« Previous | Next » 

Revision 63084

Added by Claudio Atzori 9 months ago

[maven-release-plugin] copy for tag dnet-openaireplus-mapping-utils-7.0.1

View differences:

modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet45-parent</artifactId>
6
		<version>1.0.0</version>
7
		<relativePath />
8
	</parent>
9
	<modelVersion>4.0.0</modelVersion>
10
	<groupId>eu.dnetlib</groupId>
11
	<artifactId>dnet-openaireplus-mapping-utils</artifactId>
12
	<packaging>jar</packaging>
13
	<version>7.0.1</version>
14
	<scm>
15
		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1</developerConnection>
16
	</scm>
17

  
18
	<properties>
19
		<skipITs>true</skipITs>
20
	</properties>
21
	<build>
22
		<plugins>
23
			<plugin>
24
				<groupId>org.apache.maven.plugins</groupId>
25
				<artifactId>maven-failsafe-plugin</artifactId>
26
				<version>2.19.1</version>
27
				<executions>
28
					<execution>
29
						<id>integration-test</id>
30
						<goals>
31
							<goal>integration-test</goal>
32
						</goals>
33
					</execution>
34
					<execution>
35
						<id>verify</id>
36
						<goals>
37
							<goal>verify</goal>
38
						</goals>
39
					</execution>
40
				</executions>
41
				<configuration>
42
					<skipITs>${skipITs}</skipITs>
43
				</configuration>
44
			</plugin>
45
		</plugins>
46
	</build>
47

  
48
	<dependencies>
49
		<dependency>
50
			<groupId>com.google.guava</groupId>
51
			<artifactId>guava</artifactId>
52
			<version>${google.guava.version}</version>
53
		</dependency>		
54
		<dependency>
55
			<groupId>junit</groupId>
56
			<artifactId>junit</artifactId>
57
			<version>${junit.version}</version>
58
			<scope>test</scope>
59
		</dependency>
60
		<dependency>
61
			<groupId>com.ximpleware</groupId>
62
			<artifactId>vtd-xml</artifactId>
63
			<version>[2.12, 3.0.0)</version>
64
		</dependency>
65
		<dependency>
66
			<groupId>commons-codec</groupId>
67
			<artifactId>commons-codec</artifactId>
68
			<version>${commons.codec.version}</version>
69
		</dependency>
70
		<dependency>
71
			<groupId>dom4j</groupId>
72
			<artifactId>dom4j</artifactId>
73
			<version>${dom4j.version}</version>
74
			<exclusions>
75
				<exclusion>
76
					<artifactId>xml-apis</artifactId>
77
					<groupId>xml-apis</groupId>
78
				</exclusion>
79
			</exclusions>
80
		</dependency>
81
		<dependency>
82
			<groupId>net.sf.supercsv</groupId>
83
			<artifactId>super-csv</artifactId>
84
			<version>2.4.0</version>
85
		</dependency>
86
		<dependency>
87
			<groupId>eu.dnetlib</groupId>
88
			<artifactId>dnet-openaire-data-protos</artifactId>
89
			<version>[3.9.8]</version>
90
		</dependency>
91
		<dependency>
92
			<groupId>eu.dnetlib</groupId>
93
			<artifactId>dnet-pace-core</artifactId>
94
			<version>[3.0.0,4.0.0)</version>
95
		</dependency>
96
		<dependency>
97
			<groupId>eu.dnetlib</groupId>
98
			<artifactId>cnr-misc-utils</artifactId>
99
			<version>[1.0.0,2.0.0)</version>
100
		</dependency>
101
		<dependency>
102
			<groupId>eu.dnetlib</groupId>
103
			<artifactId>dnet-hadoop-commons</artifactId>
104
			<version>[2.0.0,3.0.0)</version>
105
		</dependency>
106
		<dependency>
107
			<groupId>eu.dnetlib</groupId>
108
			<artifactId>dnet-index-solr-common</artifactId>
109
			<version>[3.0.1,4.0.0)</version>
110
		</dependency>	
111
		<dependency>
112
			<groupId>com.googlecode.protobuf-java-format</groupId>
113
			<artifactId>protobuf-java-format</artifactId>
114
			<version>1.2</version>
115
		</dependency>
116
		<dependency>
117
			<groupId>org.apache.commons</groupId>
118
			<artifactId>commons-lang3</artifactId>
119
			<version>3.5</version>
120
		</dependency>
121

  
122
		<!-- test deps -->
123
		<dependency>
124
			<groupId>eu.dnetlib</groupId>
125
			<artifactId>dnet-openaireplus-profiles</artifactId>
126
			<version>[1.0.0,2.0.0)</version>
127
			<scope>test</scope>
128
		</dependency>
129
		<dependency>
130
			<groupId>org.mongodb</groupId>
131
			<artifactId>mongo-java-driver</artifactId>
132
			<version>${mongodb.driver.version}</version>
133
			<scope>test</scope>
134
		</dependency>
135
		<dependency>
136
			<groupId>org.springframework</groupId>
137
			<artifactId>spring-context</artifactId>
138
			<version>${spring.version}</version>
139
			<scope>test</scope>
140
		</dependency>
141
		<dependency>
142
			<groupId>org.springframework</groupId>
143
			<artifactId>spring-core</artifactId>
144
			<version>${spring.version}</version>
145
			<scope>test</scope>
146
		</dependency>
147
		<dependency>
148
			<groupId>org.springframework</groupId>
149
			<artifactId>spring-test</artifactId>
150
			<version>${spring.version}</version>
151
			<scope>test</scope>
152
		</dependency>
153

  
154
	</dependencies>
155
</project>
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/bulktag/Community.java
1
package eu.dnetlib.data.bulktag;
2

  
3
import com.google.gson.Gson;
4
import org.apache.commons.logging.Log;
5
import org.apache.commons.logging.LogFactory;
6

  
7
import java.util.ArrayList;
8
import java.util.List;
9

  
10
/**
11
 * Created by miriam on 01/08/2018.
12
 */
13
public class Community {
14

  
15
    private static final Log log = LogFactory.getLog(Community.class);
16

  
17
    private String id;
18
    private List<String> subjects = new ArrayList<>();
19
    private List<Datasource> datasources = new ArrayList<>();
20
    private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
21
    private List<Organization> organizationCommunity = new ArrayList<>();
22

  
23
    public List<Organization> getOrganizationCommunity() {
24
        return organizationCommunity;
25
    }
26

  
27
    public void setOrganizationCommunity(List<Organization> organizationCommunity) {
28
        this.organizationCommunity = organizationCommunity;
29
    }
30

  
31
    public String toJson() {
32
        final Gson g = new Gson();
33
        return g.toJson(this);
34
    }
35

  
36
    public boolean isValid() {
37
        return !getSubjects().isEmpty() || !getDatasources().isEmpty() || !getZenodoCommunities().isEmpty();
38
    }
39

  
40
    public String getId() {
41
        return id;
42
    }
43

  
44
    public void setId(String id) {
45
        this.id = id;
46
    }
47

  
48
    public List<String> getSubjects() {
49
        return subjects;
50
    }
51

  
52
    public void setSubjects(List<String> subjects) {
53
        this.subjects = subjects;
54
    }
55

  
56
    public List<Datasource> getDatasources() {
57
        return datasources;
58
    }
59

  
60
    public void setDatasources(List<Datasource> datasources) {
61
        this.datasources = datasources;
62
    }
63

  
64
    public List<ZenodoCommunity> getZenodoCommunities() {
65
        return zenodoCommunities;
66
    }
67

  
68
    public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) {
69
        this.zenodoCommunities = zenodoCommunities;
70
    }
71

  
72
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/index/CloudIndexClient.java
1
package eu.dnetlib.data.index;
2

  
3
import eu.dnetlib.functionality.index.solr.feed.StreamingInputDocumentFactory;
4
import eu.dnetlib.miscutils.datetime.HumanTime;
5
import eu.dnetlib.miscutils.functional.UnaryFunction;
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8
import org.apache.solr.client.solrj.SolrQuery;
9
import org.apache.solr.client.solrj.SolrServerException;
10
import org.apache.solr.client.solrj.impl.CloudSolrClient;
11
import org.apache.solr.client.solrj.response.QueryResponse;
12
import org.apache.solr.client.solrj.response.UpdateResponse;
13
import org.apache.solr.common.SolrInputDocument;
14

  
15
import java.io.Closeable;
16
import java.io.IOException;
17
import java.text.SimpleDateFormat;
18
import java.util.Date;
19
import java.util.List;
20

  
21
/**
22
 * Created by michele on 11/11/15.
23
 */
24
public class CloudIndexClient implements Closeable {
25

  
26
	private static final Log log = LogFactory.getLog(CloudIndexClient.class);
27
	private static final String INDEX_RECORD_RESULT_FIELD = "dnetResult";
28

  
29
	private final CloudSolrClient solrClient;
30

  
31
	protected CloudIndexClient(final CloudSolrClient solrServer) {
32
		this.solrClient = solrServer;
33
	}
34

  
35
	public int feed(final String record, final UnaryFunction<String, String> toIndexRecord) throws CloudIndexClientException {
36
		return feed(record, toIndexRecord, true);
37
	}
38

  
39
	public int feed(final String record, final UnaryFunction<String, String> toIndexRecord, final boolean commit)
40
			throws CloudIndexClientException {
41
		try {
42
			final SolrInputDocument doc = prepareSolrDocument(record, toIndexRecord);
43
			if ((doc == null) || doc.isEmpty()) throw new CloudIndexClientException("Invalid solr document");
44
			return feed(doc, commit);
45
		} catch (final Throwable e) {
46
			throw new CloudIndexClientException("Error feeding document", e);
47
		}
48
	}
49

  
50
	public int feed(final SolrInputDocument document) throws CloudIndexClientException {
51
		return feed(document, true);
52
	}
53

  
54
	public int feed(final SolrInputDocument document, final boolean commit) throws CloudIndexClientException {
55
		try {
56
			final UpdateResponse res = solrClient.add(document);
57
			log.debug("feed time for single records, elapsed time: " + HumanTime.exactly(res.getElapsedTime()));
58
			if (res.getStatus() != 0) { throw new CloudIndexClientException("bad status: " + res.getStatus()); }
59
			if (commit) {
60
				solrClient.commit();
61
			}
62
			return res.getStatus();
63
		} catch (final Throwable e) {
64
			throw new CloudIndexClientException("Error feeding document", e);
65
		}
66
	}
67

  
68
	public void feed(final List<SolrInputDocument> docs, final AfterFeedingCallback callback) throws CloudIndexClientException {
69
		feed(docs, callback, true);
70
	}
71

  
72
	public void feed(final List<SolrInputDocument> docs, final AfterFeedingCallback callback, final boolean commit) throws CloudIndexClientException {
73
		try {
74
			if (docs.isEmpty()) {
75
				log.debug("Empty list of documents. Calling callback, if needed.");
76
				if (callback != null) {
77
					callback.doAfterFeeding(null);
78
				}
79
				return;
80
			}
81
			final UpdateResponse res = solrClient.add(docs);
82

  
83
			log.debug("feed time for " + docs.size() + " records, elapsed tipe: : " + HumanTime.exactly(res.getElapsedTime()));
84

  
85
			if (commit) {
86
				solrClient.commit();
87
			}
88
			if (callback != null) {
89
				callback.doAfterFeeding(res);
90
			}
91
			if (res.getStatus() != 0) throw new CloudIndexClientException("bad status: " + res.getStatus());
92
		} catch (final Throwable e) {
93
			throw new CloudIndexClientException("Error feeding documents", e);
94
		}
95
	}
96

  
97
	public SolrInputDocument prepareSolrDocument(final String record, final UnaryFunction<String, String> toIndexRecord)
98
			throws CloudIndexClientException {
99
		try {
100
			final StreamingInputDocumentFactory documentFactory = new StreamingInputDocumentFactory();
101

  
102
			final String version = (new SimpleDateFormat("yyyy-MM-dd\'T\'hh:mm:ss\'Z\'")).format(new Date());
103
			final String indexRecord = toIndexRecord.evaluate(record);
104

  
105
			if (log.isDebugEnabled()) {
106
				log.debug("***************************************\nSubmitting index record:\n" + indexRecord + "\n***************************************\n");
107
			}
108

  
109
			return documentFactory.parseDocument(version, indexRecord, INDEX_RECORD_RESULT_FIELD);
110
		} catch (final Throwable e) {
111
			throw new CloudIndexClientException("Error creating solr document", e);
112
		}
113
	}
114

  
115
	public boolean isRecordIndexed(final String id) throws CloudIndexClientException {
116
		final QueryResponse res = query("objidentifier:\"" + id + "\"", null);
117
		return res.getResults().size() > 0;
118
	}
119

  
120
	public int remove(final String id) throws CloudIndexClientException {
121
		return remove(id, true);
122
	}
123

  
124
	public int remove(final String id, final boolean commit) throws CloudIndexClientException {
125
		String q = String.format("objidentifier:\"%s\" OR resultdupid:\"%s\"", id, id);
126
		try {
127
			final UpdateResponse res = solrClient.deleteByQuery(q);
128
			if (commit) {
129
				solrClient.commit();
130
			}
131
			return res.getResponse().size();
132
		} catch (final Throwable e) {
133
			throw new CloudIndexClientException("Error removing documents", e);
134
		}
135
	}
136

  
137
	public int count(final String query) throws CloudIndexClientException {
138
		final QueryResponse res = query(query, 0);
139
		return res.getResults().size();
140
	}
141

  
142
	public QueryResponse query(final String query, Integer rows) throws CloudIndexClientException {
143
		try {
144
			final SolrQuery solrQuery = new SolrQuery();
145
			solrQuery.setQuery(query);
146
			if(rows != null && rows >= 0) {
147
				solrQuery.setRows(rows);
148
			}
149
			return solrClient.query(solrQuery);
150
		} catch (final Throwable e) {
151
			throw new CloudIndexClientException("Error searching documents", e);
152
		}
153
	}
154

  
155
	public void close() throws IOException {
156
		if (solrClient != null) {
157
			solrClient.close();
158
		}
159
	}
160

  
161
	public void commit() throws CloudIndexClientException {
162
		if(solrClient != null) {
163
			try {
164
				solrClient.commit();
165
			} catch (SolrServerException | IOException e) {
166
				throw new CloudIndexClientException(e.getMessage());
167
			}
168
		}
169
	}
170

  
171
	public interface AfterFeedingCallback {
172

  
173
		void doAfterFeeding(final UpdateResponse response);
174
	}
175
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/index/CloudIndexClientFactory.java
1
package eu.dnetlib.data.index;
2

  
3
import eu.dnetlib.functionality.index.utils.ZkServers;
4
import org.apache.commons.logging.Log;
5
import org.apache.commons.logging.LogFactory;
6
import org.apache.solr.client.solrj.impl.CloudSolrClient;
7
import org.apache.solr.client.solrj.response.SolrPingResponse;
8

  
9
/**
10
 * Created by michele on 11/11/15.
11
 */
12
public class CloudIndexClientFactory {
13

  
14
	private static final Log log = LogFactory.getLog(CloudIndexClientFactory.class);
15

  
16
	public static CloudIndexClient newIndexClient(final String baseURL, final String collection, final boolean parallelUpdates)
17
			throws CloudIndexClientException {
18
		try {
19
			log.info(String.format("Initializing solr server (%s) ...", baseURL));
20

  
21
			final ZkServers zk = ZkServers.newInstance(baseURL);
22
			final CloudSolrClient client = new CloudSolrClient.Builder(zk.getHosts(), zk.getChroot())
23
					.withParallelUpdates(parallelUpdates)
24
					.build();
25

  
26
			client.connect();
27
			client.setDefaultCollection(collection);
28

  
29
			final SolrPingResponse rsp = client.ping();
30
			if (rsp.getStatus() != 0) {
31
				log.error("Invalid connection to solr Server (status = 0)");
32
				throw new CloudIndexClientException("Invalid connection to solr Server (status = 0)");
33
			}
34
			return new CloudIndexClient(client);
35
		} catch (Throwable e) {
36
			log.error("The initialization of indexClient is FAILED", e);
37
			throw new CloudIndexClientException("The initialization of indexClient is FAILED", e);
38
		}
39
	}
40

  
41
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/transform/AuthorMerger.java
1
package eu.dnetlib.data.transform;
2

  
3
import com.wcohen.ss.JaroWinkler;
4
import eu.dnetlib.data.bulktag.Pair;
5
import eu.dnetlib.data.proto.FieldTypeProtos.Author;
6
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
7
import eu.dnetlib.pace.model.Person;
8
import org.apache.commons.lang3.StringUtils;
9

  
10
import java.text.Normalizer;
11
import java.util.*;
12
import java.util.function.Function;
13

  
14
import static java.util.stream.Collectors.*;
15

  
16
public class AuthorMerger {
17

  
18
    private static final Double THRESHOLD = 0.95;
19
    private static final String ORCID = "orcid";
20
    private static final int MAX_AUTHORS = 200;
21

  
22
    public static List<Author> merge(final Collection<List<Author>> authors, final double threshold) {
23
        return merge(authors, THRESHOLD);
24
    }
25

  
26
    public static List<Author> merge(final Collection<List<Author>> authors) {
27
        return doMerge(
28
                authors.stream()
29
                        .map(group -> group.stream()
30
                                .map(AuthorMerger::fixORCID)
31
                                .collect(toList()))
32
                        .collect(toList()));
33
    }
34

  
35
    private static List<Author> doMerge(final Collection<List<Author>> authors) {
36
        final List<Author> res = new ArrayList<>();
37

  
38
        if (authors.isEmpty()) {
39
            return res;
40
        }
41

  
42
        if (authors.size() == 1) {
43
            return authors.iterator().next();
44
        }
45

  
46
        final TreeMap<Integer, List<List<Author>>> byOrcidCount = new TreeMap<>(
47
                authors.stream()
48
                        .collect(groupingBy(AuthorMerger::countOrcid))
49
                        .entrySet().stream()
50
                        .filter(e -> e.getKey() > 0)
51
                        .collect(toMap(
52
                                Map.Entry::getKey,
53
                                Map.Entry::getValue
54
                        )));
55

  
56
        if (byOrcidCount == null || byOrcidCount.isEmpty()) {
57
            return authors.iterator().next();
58
        }
59
        final Map.Entry<Integer, List<List<Author>>> mostOrcid = byOrcidCount.lastEntry();
60

  
61
        if (mostOrcid.getKey() > 0) {
62

  
63
            final List<Author> pivots = mostOrcid.getValue().iterator().next();
64

  
65
            res.addAll(mostOrcid.getValue().iterator().next().stream()
66
                    .filter(a -> hasOrcid(a))
67
                    .collect(toList()));
68

  
69
            if (pivots.size() == res.size()) {
70
                return res;
71
            }
72

  
73
            final Collection<Author> authorList = authors.stream()
74
                    .filter(g -> !g.equals(pivots))
75
                    .flatMap(List::stream)
76
                    .filter(a -> hasOrcid(a))
77
                    .limit(MAX_AUTHORS)
78
                    .map(a -> {
79
                        final String orcid = a.getPidList().stream()
80
                                .filter(p -> p.getKey().equalsIgnoreCase(ORCID))
81
                                .findFirst()
82
                                .get().getValue();
83
                        return new Pair<String, Author>(orcid, a);
84
                    })
85
                    .collect(toMap(
86
                            p -> p.getFst(),
87
                            p -> p.getSnd(),
88
                            (p1, p2) -> p2))
89
                    .values();
90

  
91
            pivots.stream().filter(a -> !hasOrcid(a)).forEach(pivot -> {
92
                final Author.Builder b = Author.newBuilder(pivot);
93
                authorList.parallelStream()
94
                        .map(a -> {
95
                            return new Pair<Double, Author>(sim(a, pivot), a);
96
                        })
97
                        .filter(p -> p.getFst() >= THRESHOLD)
98
                        .forEach(p -> {
99
                            b.mergeFrom(p.getSnd());
100
                        });
101

  
102
                Collection<KeyValue> pids = b.getPidList().stream()
103
                        .collect(toMap(
104
                                kv -> kv.getKey(),
105
                                Function.identity(),
106
                                (kv1, kv2) -> kv2
107
                        )).values();
108
                b.clearPid();
109
                b.addAllPid(pids);
110

  
111
                res.add(b.build());
112
            });
113
        }
114

  
115
        return res;
116
    }
117

  
118
    private static Author fixORCID(final Author author) {
119
        final Author.Builder b = Author.newBuilder(author);
120
        for(KeyValue.Builder pid : b.getPidBuilderList()) {
121
            if (pid.getKey().toLowerCase().contains(ORCID)) {
122
                pid.setKey("ORCID");
123
                if (pid.getValue().contains("orcid.org")) {
124
                    pid.setValue(StringUtils.substringAfterLast(pid.getValue(), "/"));
125

  
126
                }
127
            }
128
        }
129
        return b.build();
130
    }
131

  
132
    private static int countOrcid(final List<Author> authors) {
133
        return authors.stream()
134
                .map(a -> {
135
                    return hasOrcid(a) ? 1 : 0;
136
                })
137
                .mapToInt(Integer::intValue)
138
                .sum();
139
    }
140

  
141
    private static boolean hasOrcid(Author a) {
142
        return a.getPidList().stream().anyMatch(p -> p.getKey().equalsIgnoreCase(ORCID));
143
    }
144

  
145
    private static Double sim(Author a, Author b) {
146

  
147
        final Person pa = parse(a);
148
        final Person pb = parse(b);
149

  
150
        if (pa.isAccurate() & pb.isAccurate()) {
151
            return new JaroWinkler().score(
152
                    normalize(pa.getSurnameString()),
153
                    normalize(pb.getSurnameString()));
154
        } else {
155
            return new JaroWinkler().score(
156
                        normalize(pa.getNormalisedFullname()),
157
                        normalize(pb.getNormalisedFullname()));
158
        }
159
    }
160

  
161
    private static Person parse(Author author) {
162
        if (author.hasSurname()) {
163
            return new Person(author.getSurname() + ", " + author.getName(), false);
164
        } else {
165
            return new Person(author.getFullname(), false);
166
        }
167
    }
168

  
169
    private static String normalize(final String s) {
170
        return nfd(s).toLowerCase()
171
                // do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings
172
                .replaceAll("(\\W)+", " ")
173
                .replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
174
                .replaceAll("(\\p{Punct})+", " ")
175
                .replaceAll("(\\d)+", " ")
176
                .replaceAll("(\\n)+", " ")
177
                .trim();
178
    }
179

  
180
    private static String nfd(final String s) {
181
        return Normalizer.normalize(s, Normalizer.Form.NFD);
182
    }
183

  
184

  
185
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/index/CloudIndexClientException.java
1
package eu.dnetlib.data.index;
2

  
3
/**
4
 * Created by michele on 23/11/15.
5
 */
6
public class CloudIndexClientException extends Exception {
7

  
8
	public CloudIndexClientException(final String message) {
9
		super(message);
10
	}
11

  
12
	public CloudIndexClientException(final String message, final Throwable cause) {
13
		super(message, cause);
14
	}
15
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/transform/DatePicker.java
1
package eu.dnetlib.data.transform;
2

  
3
import eu.dnetlib.data.proto.FieldTypeProtos;
4
import org.apache.commons.lang.StringUtils;
5

  
6
import java.time.Year;
7
import java.util.*;
8
import java.util.stream.Collectors;
9

  
10
import static java.util.Collections.reverseOrder;
11
import static java.util.Map.Entry.comparingByValue;
12
import static java.util.stream.Collectors.toMap;
13
import static org.apache.commons.lang.StringUtils.endsWith;
14
import static org.apache.commons.lang.StringUtils.substringBefore;
15

  
16
public class DatePicker {
17

  
18
    private static final String DATE_PATTERN = "\\d{4}-\\d{2}-\\d{2}";
19
    private static final String DATE_DEFAULT_SUFFIX = "01-01";
20
    private static final int YEAR_LB = 1300;
21
    private static final int YEAR_UB = Year.now().getValue() + 5;
22

  
23
    public static FieldTypeProtos.StringField pick(final Collection<String> dateofacceptance) {
24

  
25
        final Map<String, Integer> frequencies = dateofacceptance
26
                .parallelStream()
27
                .filter(StringUtils::isNotBlank)
28
                .collect(
29
                        Collectors.toConcurrentMap(
30
                                w -> w, w -> 1, Integer::sum));
31

  
32
        if (frequencies.isEmpty()) {
33
            return FieldTypeProtos.StringField.newBuilder().setValue("").build();
34
        }
35

  
36
        final FieldTypeProtos.StringField.Builder date = FieldTypeProtos.StringField.newBuilder().setValue(frequencies.keySet().iterator().next());
37

  
38
        // let's sort this map by values first, filtering out invalid dates
39
        final Map<String, Integer> sorted = frequencies
40
                .entrySet()
41
                .stream()
42
                .filter(d -> StringUtils.isNotBlank(d.getKey()))
43
                .filter(d -> d.getKey().matches(DATE_PATTERN))
44
                .filter(d -> inRange(d.getKey()))
45
                .sorted(reverseOrder(comparingByValue()))
46
                .collect(
47
                        toMap(
48
                                Map.Entry::getKey,
49
                                Map.Entry::getValue, (e1, e2) -> e2,
50
                                LinkedHashMap::new));
51

  
52
        // shortcut
53
        if (sorted.size() == 0) {
54
            return date.build();
55
        }
56

  
57
        // voting method (1/3 + 1) wins
58
        if (sorted.size() >= 3) {
59
            final int acceptThreshold = (sorted.size() / 3) + 1;
60
            final List<String> accepted = sorted.entrySet().stream()
61
                    .filter(e -> e.getValue() >= acceptThreshold)
62
                    .map(e -> e.getKey())
63
                    .collect(Collectors.toList());
64

  
65
            // cannot find strong majority
66
            if (accepted.isEmpty()) {
67
                final int max = sorted.values().iterator().next();
68
                Optional<String> first = sorted.entrySet().stream()
69
                        .filter(e -> e.getValue() == max && !endsWith(e.getKey(), DATE_DEFAULT_SUFFIX))
70
                        .map(Map.Entry::getKey)
71
                        .findFirst();
72
                if (first.isPresent()) {
73
                    return date.setValue(first.get()).build();
74
                }
75

  
76
                return date.setValue(sorted.keySet().iterator().next()).build();
77
            }
78

  
79
            if (accepted.size() == 1) {
80
                return date.setValue(accepted.get(0)).build();
81
            } else {
82
                final Optional<String> first = accepted.stream()
83
                        .filter(d -> !endsWith(d, DATE_DEFAULT_SUFFIX))
84
                        .findFirst();
85
                if (first.isPresent()) {
86
                    return date.setValue(first.get()).build();
87
                }
88

  
89
                return date.build();
90
            }
91

  
92
            //1st non YYYY-01-01 is returned
93
        } else {
94
            if (sorted.size() == 2) {
95
                for (Map.Entry<String, Integer> e : sorted.entrySet()) {
96
                    if (!endsWith(e.getKey(), DATE_DEFAULT_SUFFIX)) {
97
                        return date.setValue(e.getKey()).build();
98
                    }
99
                }
100
            }
101

  
102
            // none of the dates seems good enough, return the 1st one
103
            return date.setValue(sorted.keySet().iterator().next()).build();
104
        }
105
    }
106

  
107
    private static boolean inRange(final String date) {
108
        final int year = Integer.parseInt(substringBefore(date, "-"));
109
        return year >= YEAR_LB && year <= YEAR_UB;
110
    }
111

  
112
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/transform/TrustOrdering.java
1
package eu.dnetlib.data.transform;
2

  
3
import com.google.common.collect.ImmutableList;
4
import com.google.common.collect.Ordering;
5

  
6
import eu.dnetlib.data.proto.OafProtos.Oaf;
7
import eu.dnetlib.data.proto.SpecialTrustProtos.SpecialTrust;
8
import org.apache.commons.lang3.StringUtils;
9

  
10
public class TrustOrdering extends Ordering<Oaf> {
11

  
12
	@Override
13
	public int compare(Oaf left, Oaf right) {
14
		String lTrust = left.getDataInfo().getTrust();
15
		String rTrust = right.getDataInfo().getTrust();
16

  
17
		if (lTrust.equals(rTrust)) return 0;
18

  
19
		if (lTrust.equals(SpecialTrust.INFINITE.toString())) return 1;
20
		if (rTrust.equals(SpecialTrust.INFINITE.toString())) return -1;
21

  
22
		if (lTrust.equals(SpecialTrust.NEUTRAL.toString())) return 1;
23
		if (rTrust.equals(SpecialTrust.NEUTRAL.toString())) return -1;
24

  
25
		return Float.compare(
26
				Float.parseFloat(StringUtils.isBlank(lTrust) ? "0.9" : lTrust),
27
				Float.parseFloat(StringUtils.isBlank(rTrust) ? "0.9" : rTrust));
28
	}
29

  
30
	public static ImmutableList<Oaf> sort(Iterable<Oaf> entities) {
31
		return new TrustOrdering().immutableSortedCopy(entities);
32
	}
33

  
34
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/transform/OafToRowMapper.java
1
package eu.dnetlib.data.transform;
2

  
3
import java.util.List;
4
import java.util.function.Function;
5

  
6
import com.google.common.collect.Lists;
7
import eu.dnetlib.data.mapreduce.util.OafDecoder;
8
import eu.dnetlib.data.mapreduce.util.OafEntityDecoder;
9
import eu.dnetlib.data.proto.OafProtos.Oaf;
10

  
11
public class OafToRowMapper implements Function<Oaf, List<Row>> {
12

  
13
	public static final String BODY = "body";
14

  
15
	@Override
16
	public List<Row> apply(final Oaf oaf) {
17
		final List<Row> rows = Lists.newArrayList();
18

  
19
		final OafDecoder d = OafDecoder.decode(oaf);
20
		final OafEntityDecoder entity = d.decodeEntity();
21

  
22
		final Row r = new Row(d.getCFQ(), entity.getId());
23
		switch (entity.getType()) {
24

  
25
		case project:
26
			r.addColumn(new Column<>(BODY, oaf.toByteArray()));
27
			break;
28
		case result:
29
			oaf.getEntity().getCachedOafRelList().stream()
30
					.map(cachedRel -> {
31
						final Oaf.Builder oafRel = Oaf.newBuilder(cachedRel);
32
						oafRel.getRelBuilder().clearCachedOafTarget();
33
						return oafRel.build();
34
					}).forEach(oafRel -> r.addColumn(new Column<>(OafDecoder.decode(oafRel).getCFQ(), oafRel.toByteArray())));
35
			break;
36
		case datasource:
37

  
38
			break;
39
		case organization:
40

  
41
			break;
42
		}
43

  
44
		return rows;
45
	}
46

  
47
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/bulktag/Datasource.java
1
package eu.dnetlib.data.bulktag;
2

  
3

  
4
import com.google.gson.Gson;
5
import eu.dnetlib.data.bulktag.selectioncriteria.VerbResolver;
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8
import org.dom4j.Node;
9

  
10
/**
11
 * Created by miriam on 01/08/2018.
12
 */
13
public class Datasource {
14
    private static final Log log = LogFactory.getLog(Datasource.class);
15

  
16
    private String openaireId;
17

  
18
    private SelectionConstraints selectionConstraints;
19

  
20

  
21
    public SelectionConstraints getSelCriteria() {
22
        return selectionConstraints;
23
    }
24

  
25
    public SelectionConstraints getSelectionConstraints() {
26
        return selectionConstraints;
27
    }
28

  
29
    public void setSelectionConstraints(SelectionConstraints selectionConstraints) {
30
        this.selectionConstraints = selectionConstraints;
31
    }
32

  
33
    public void setSelCriteria(SelectionConstraints selCriteria) {
34
        this.selectionConstraints = selCriteria;
35
    }
36

  
37
    public String getOpenaireId() {
38
        return openaireId;
39
    }
40

  
41
    public void setOpenaireId(String openaireId) {
42
        this.openaireId = openaireId;
43
    }
44

  
45
    private void setSelCriteria(String json, VerbResolver resolver){
46
        log.info("Selection constraints for datasource = " + json);
47
        selectionConstraints = new Gson().fromJson(json, SelectionConstraints.class);
48

  
49
        selectionConstraints.setSelection(resolver);
50
    }
51

  
52
    public void setSelCriteria(Node n, VerbResolver resolver){
53
        try{
54
            setSelCriteria(n.getText(),resolver);
55
        }catch(Exception e) {
56
            log.info("not set selection criteria... ");
57
            selectionConstraints =null;
58
        }
59

  
60
    }
61

  
62

  
63

  
64
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/bulktag/Organization.java
1
package eu.dnetlib.data.bulktag;
2

  
3
import com.google.gson.Gson;
4
import eu.dnetlib.data.bulktag.selectioncriteria.VerbResolver;
5
import org.dom4j.Node;
6

  
7
public class Organization {
8
    private String organizationId;
9

  
10
    private SelectionConstraints selCriteria;
11

  
12
    public String getOrganizationId() {
13
        return organizationId;
14
    }
15

  
16
    public void setOrganizationId(String organizationId) {
17
        this.organizationId = organizationId;
18
    }
19

  
20
    public SelectionConstraints getSelCriteria() {
21
        return selCriteria;
22
    }
23

  
24
    public void setSelCriteria(SelectionConstraints selCriteria) {
25
        this.selCriteria = selCriteria;
26
    }
27

  
28
    private void setSelCriteria(String json){
29
        //Type collectionType = new TypeToken<Collection<Constraints>>(){}.getType();
30
        selCriteria = new Gson().fromJson(json, SelectionConstraints.class);
31

  
32
    }
33

  
34
    public void setSelCriteria(Node n){
35
        if (n==null){
36
            selCriteria = null;
37
        }else{
38
            setSelCriteria(n.getText());
39
        }
40
    }
41

  
42
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/bulktag/Constraints.java
1
package eu.dnetlib.data.bulktag;
2

  
3
import com.google.common.reflect.TypeToken;
4
import com.google.gson.Gson;
5
import eu.dnetlib.data.bulktag.selectioncriteria.VerbResolver;
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8

  
9

  
10
import java.io.Serializable;
11
import java.lang.reflect.InvocationTargetException;
12
import java.lang.reflect.Type;
13
import java.util.Collection;
14
import java.util.List;
15
import java.util.Map;
16

  
17
/**
18
 * Created by miriam on 02/08/2018.
19
 */
20
public class Constraints implements Serializable {
21
    private static final Log log = LogFactory.getLog(Constraints.class);
22
    //private ConstraintEncapsulator ce;
23
    private List<Constraint> constraint;
24

  
25

  
26
    public Constraints() {
27
    }
28
    public List<Constraint> getConstraint() {
29
        return constraint;
30
    }
31

  
32
    public void setConstraint(List<Constraint> constraint) {
33
        this.constraint = constraint;
34
    }
35

  
36
    public void setSc(String json){
37
        Type collectionType = new TypeToken<Collection<Constraint>>(){}.getType();
38
        constraint = new Gson().fromJson(json, collectionType);
39

  
40
    }
41

  
42
    void setSelection(VerbResolver resolver) {
43
        for(Constraint st: constraint){
44

  
45
            try {
46
                st.setSelection(resolver);
47
            } catch (NoSuchMethodException e) {
48
                log.error(e.getMessage());
49
            } catch (IllegalAccessException e) {
50
                log.error(e.getMessage());
51
            } catch (InvocationTargetException e) {
52
                log.error(e.getMessage());
53
            } catch (InstantiationException e) {
54
                log.error(e.getMessage());
55
            }
56
        }
57

  
58
    }
59

  
60

  
61
    //Constraint in and
62
    public boolean verifyCriteria(final Map<String, List<String>> param) {
63

  
64
        for(Constraint sc : constraint) {
65
            boolean verified = false;
66
            for(String value : param.get(sc.getField())){
67
                if (sc.verifyCriteria(value.trim())){
68
                    verified = true;
69
                }
70
            }
71
            if(!verified)
72
                return verified;
73
        }
74
        return true;
75
    }
76

  
77
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/NotEqualVerb.java
1
package eu.dnetlib.data.bulktag.selectioncriteria;
2

  
3

  
4
@VerbClass("not_equals")
5
public class NotEqualVerb implements Selection {
6

  
7
    private String param;
8

  
9

  
10
    public NotEqualVerb(final String param) {
11
        this.param = param;
12
    }
13

  
14
    public NotEqualVerb() {
15
    }
16

  
17
    public String getParam() {
18
        return param;
19
    }
20

  
21
    public void setParam(String param) {
22
        this.param = param;
23
    }
24

  
25
    @Override
26
    public boolean apply(String value) {
27
        return !value.equalsIgnoreCase(param);
28
    }
29
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/bulktag/selectioncriteria/VerbResolver.java
1
package eu.dnetlib.data.bulktag.selectioncriteria;
2

  
3
import org.reflections.Reflections;
4

  
5
import java.io.Serializable;
6
import java.lang.reflect.InvocationTargetException;
7
import java.util.Map;
8
import java.util.stream.Collectors;
9

  
10
public class VerbResolver implements Serializable {
11
    private final Map<String, Class<Selection>> map;
12

  
13
    public VerbResolver(){
14
        this.map = new Reflections("eu.dnetlib").getTypesAnnotatedWith(VerbClass.class).stream()
15
                .collect(Collectors.toMap(v -> v.getAnnotation(VerbClass.class).value(), v->(Class<Selection>)v));
16
    }
17

  
18

  
19
    public Selection getSelectionCriteria(String name, String param) throws NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException {
20

  
21
        return map.get(name).getDeclaredConstructor((String.class)).newInstance(param);
22

  
23
    }
24
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/bulktag/Constraint.java
1
package eu.dnetlib.data.bulktag;
2

  
3
import eu.dnetlib.data.bulktag.selectioncriteria.Selection;
4
import eu.dnetlib.data.bulktag.selectioncriteria.VerbResolver;
5
import org.springframework.beans.factory.annotation.Autowired;
6

  
7
import java.io.Serializable;
8
import java.lang.reflect.InvocationTargetException;
9

  
10

  
11
public class Constraint implements Serializable {
12
    private String verb;
13
    private String field;
14
    private String value;
15
    private Selection selection;
16

  
17
    public Constraint() {
18
    }
19

  
20
    public String getVerb() {
21
        return verb;
22
    }
23

  
24
    public void setVerb(String verb) {
25
        this.verb = verb;
26
    }
27

  
28
    public String getField() {
29
        return field;
30
    }
31

  
32
    public void setField(String field) {
33
        this.field = field;
34
    }
35

  
36
    public String getValue() {
37
        return value;
38
    }
39

  
40
    public void setValue(String value) {
41
        this.value = value;
42
    }
43

  
44

  
45

  
46
    public void setSelection(Selection sel){
47
        selection = sel;
48
    }
49

  
50
    public void setSelection(VerbResolver resolver) throws InvocationTargetException, NoSuchMethodException, InstantiationException, IllegalAccessException {
51
        selection = resolver.getSelectionCriteria(verb,value);
52
    }
53

  
54

  
55
    public boolean verifyCriteria(String metadata){
56
        return selection.apply(metadata);
57
    }
58

  
59

  
60

  
61
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/mapreduce/util/RelDescriptor.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
4
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
5

  
6
public class RelDescriptor {
7

  
8
	public static final String SEPARATOR = "_";
9

  
10
	private final String it;
11

  
12
	// relType also corresponds to the Ontology code
13
	private final RelType relType;
14

  
15
	private final SubRelType subRelType;
16

  
17
	private final String relClass;
18

  
19

  
20
	// <TERM code="hasAmongTopNSimilarDocuments" encoding="resultResult_similarity_hasAmongTopNSimilarDocuments"
21
	//final String rd = oafRel.getRelType().toString() + "_" + oafRel.getSubRelType() + "_" + relClasses.getInverse(oafRel.getRelClass());
22
	//<ONTOLOGY_NAME code="dnet:result_result_relations">
23
	public RelDescriptor(final String value) {
24
		super();
25
		this.it = value;
26

  
27
		String[] s = value.split(SEPARATOR);
28

  
29
		this.relType = RelType.valueOf(s[0]);
30
		this.subRelType = SubRelType.valueOf(s[1]);
31
		this.relClass = s[2];
32

  
33
	}
34

  
35
	public SubRelType getSubRelType() {
36
		return subRelType;
37
	}
38

  
39
	public RelType getRelType() {
40
		return relType;
41
	}
42

  
43
	public String getRelClass() {
44
		return relClass;
45
	}
46

  
47
	public String getIt() {
48
		return it;
49
	}
50

  
51

  
52
	@Override
53
	public String toString() {
54
		return getIt();
55
	}
56

  
57
	@Override
58
	public int hashCode() {
59
		final int prime = 31;
60
		int result = 1;
61
		result = (prime * result) + ((it == null) ? 0 : it.hashCode());
62
		return result;
63
	}
64

  
65
	@Override
66
	public boolean equals(final Object obj) {
67
		if (this == obj) return true;
68
		if (obj == null) return false;
69
		if (getClass() != obj.getClass()) return false;
70
		RelDescriptor other = (RelDescriptor) obj;
71
		if (it == null) {
72
			if (other.it != null) return false;
73
		} else if (!it.equals(other.it)) return false;
74
		return true;
75
	}
76

  
77
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-7.0.1/src/main/java/eu/dnetlib/data/transform/xml/AbstractDNetXsltFunctions.java
1
package eu.dnetlib.data.transform.xml;
2

  
3
import java.nio.charset.Charset;
4
import java.security.MessageDigest;
5
import java.util.Collection;
6
import java.util.List;
7
import java.util.Map;
8
import java.util.Objects;
9
import java.util.Set;
10
import java.util.function.Function;
11
import java.util.stream.Collectors;
12

  
13
import org.apache.commons.codec.binary.Base64;
14
import org.apache.commons.codec.binary.Hex;
15
import org.apache.commons.lang.math.NumberUtils;
16
import org.apache.commons.lang3.StringUtils;
17
import org.w3c.dom.NamedNodeMap;
18
import org.w3c.dom.Node;
19
import org.w3c.dom.NodeList;
20

  
21
import com.google.common.base.Predicate;
22
import com.google.common.base.Splitter;
23
import com.google.common.collect.Lists;
24
import com.google.common.collect.Maps;
25
import com.google.common.collect.Sets;
26
import com.google.gson.JsonObject;
27
import com.google.protobuf.Descriptors.Descriptor;
28
import com.google.protobuf.Descriptors.FieldDescriptor;
29
import com.google.protobuf.InvalidProtocolBufferException;
30
import com.google.protobuf.Message;
31
import com.google.protobuf.Message.Builder;
32
import com.google.protobuf.ProtocolMessageEnum;
33

  
34
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization;
35
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision;
36
import eu.dnetlib.data.proto.DedupProtos.Dedup;
37
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity;
38
import eu.dnetlib.data.proto.FieldTypeProtos.BoolField;
39
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo;
40
import eu.dnetlib.data.proto.FieldTypeProtos.IntField;
41
import eu.dnetlib.data.proto.FieldTypeProtos.Journal;
42
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
43
import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance;
44
import eu.dnetlib.data.proto.FieldTypeProtos.OAIProvenance.OriginDescription;
45
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
46
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
47
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
48
import eu.dnetlib.data.proto.KindProtos.Kind;
49
import eu.dnetlib.data.proto.OafProtos.Oaf;
50
import eu.dnetlib.data.proto.OafProtos.OafEntity;
51
import eu.dnetlib.data.proto.OafProtos.OafRel;
52
import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization;
53
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization;
54
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation;
55
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
56
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
57
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
58
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization;
59
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation;
60
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject;
61
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome;
62
import eu.dnetlib.data.proto.ResultProtos.Result.Metadata;
63
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
64
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Part;
65
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.PublicationDataset;
66
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Similarity;
67
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult.Supplement;
68
import eu.dnetlib.data.proto.TypeProtos.Type;
69
import eu.dnetlib.miscutils.collections.Pair;
70
import eu.dnetlib.miscutils.iterators.IterablePair;
71

  
72
public abstract class AbstractDNetXsltFunctions {
73

  
74
	public static final String URL_REGEX = "^(http|https|ftp)\\://.*";
75
	private static final int MAX_NSPREFIX_LEN = 12;
76
	public static Predicate<String> urlFilter = s -> s.trim().matches(URL_REGEX);
77
	public static Map<String, String> code2name = Maps.newHashMap();
78

  
79
	/*
80
	 * Obtained via COPY (select code, name from class) TO '/tmp/class_scheme.csv' (FORMAT csv, delimiter ',', FORCE_QUOTE *); on the
81
	 * relational db
82
	 */
83
	// code2name.put("openaire2.0_data","OpenAIRE Data (funded, referenced datasets)");
84
	static {
85
		code2name.put("MH", "Marshall Islands");
86
		code2name.put("CF", "Central African Republic");
87
		code2name.put("TD", "Chad");
88
		code2name.put("CN", "China (People's Republic of)");
89
		code2name.put("NG", "Nigeria");
90
		code2name.put("NF", "Norfolk Island");
91
		code2name.put("MP", "Northern Mariana Islands");
92
		code2name.put("PS", "Palestinian-administered areas");
93
		code2name.put("SZ", "Swaziland");
94
		code2name.put("max", "Manx");
95
		code2name.put("TW", "Taiwan");
96
		code2name.put("TJ", "Tajikistan");
97
		code2name.put("BSG", "Research for the benefit of specific groups");
98
		code2name.put("CP", "Collaborative project");
99
		code2name.put("12MONTHS", "12 Months Embargo");
100
		code2name.put("ace", "Achinese");
101
		code2name.put("egy", "Ancient Egyptian");
102
		code2name.put("ara", "Arabic");
103
		code2name.put("arc", "Aramaic");
104
		code2name.put("arp", "Arapaho");
105
		code2name.put("gon", "Gondi");
106
		code2name.put("ine", "Indo-European");
107
		code2name.put("ipk", "Inupiaq");
108
		code2name.put("ira", "Iranian");
109
		code2name.put("lim", "Limburgan; Limburger; Limburgish");
110
		code2name.put("mni", "Manipuri");
111
		code2name.put("mno", "Manobo");
112
		code2name.put("men", "Mende");
113
		code2name.put("CX", "Christmas Island");
114
		code2name.put("CC", "Cocos (Keeling) Islands");
115
		code2name.put("KM", "Comoros");
116
		code2name.put("CG", "Congo");
117
		code2name.put("CK", "Cook Islands");
118
		code2name.put("HR", "Croatia");
119
		code2name.put("arn", "Araucanian");
120
		code2name.put("art", "Artificial");
121
		code2name.put("nah", "Aztec");
122
		code2name.put("bug", "Buginese");
123
		code2name.put("chn", "Chinook jargon");
124
		code2name.put("chv", "Chuvash");
125
		code2name.put("mus", "Creek");
126
		code2name.put("mic", "Micmac");
127
		code2name.put("min", "Minangkabau");
128
		code2name.put("fro", "Old French");
129
		code2name.put("cpp", "Portuguese-based Creoles and Pidgins");
130
		code2name.put("som", "Somali");
131
		code2name.put("wen", "Sorbian");
132
		code2name.put("hrv", "Croatian");
133
		code2name.put("cus", "Cushitic");
134
		code2name.put("sot", "Sotho, Southern");
135
		code2name.put("sai", "South American Indian");
136
		code2name.put("esl/spa", "Spanish");
137
		code2name.put("CU", "Cuba");
138
		code2name.put("CW", "Curaçao");
139
		code2name.put("CZ", "Czech Republic");
140
		code2name.put("DK", "Denmark");
141
		code2name.put("ER", "Eritrea");
142
		code2name.put("TF", "French Southern Territories");
143
		code2name.put("GW", "Guinea-Bissau");
144
		code2name.put("VA", "Holy See (Vatican City State)");
145
		code2name.put("BO", "Bolivia");
146
		code2name.put("KY", "Cayman Islands");
147
		code2name.put("dra", "Dravidian");
148
		code2name.put("cpe", "English-based Creoles and Pidgins");
149
		code2name.put("oji", "Ojibwa");
150
		code2name.put("CIP-EIP-TN", "CIP-Eco-Innovation - CIP-Thematic Network");
151
		code2name.put("jav/jaw", "Javanese");
152
		code2name.put("ach", "Acoli");
153
		code2name.put("ada", "Adangme");
154
		code2name.put("afh", "Afrihili");
155
		code2name.put("afr", "Afrikaans");
156
		code2name.put("afa", "Afro-Asiatic");
157
		code2name.put("ale", "Aleut");
158
		code2name.put("alg", "Algonquian languages");
159
		code2name.put("arw", "Arawak");
160
		code2name.put("asm", "Assamese");
161
		code2name.put("ava", "Avaric");
162
		code2name.put("ave", "Avestan");
163
		code2name.put("bra", "Braj");
164
		code2name.put("bua", "Buriat");
165
		code2name.put("chr", "Cherokee");
166
		code2name.put("chy", "Cheyenne");
167
		code2name.put("jrb", "Judeo-Arabic");
168
		code2name.put("jpr", "Judeo-Persian");
169
		code2name.put("kab", "Kabyle");
170
		code2name.put("kac", "Kachin");
171
		code2name.put("kaa", "Kara-Kalpak");
172
		code2name.put("loz", "Lozi");
173
		code2name.put("mwr", "Marwari");
174
		code2name.put("DJ", "Djibouti");
175
		code2name.put("JM", "Jamaica");
176
		code2name.put("JP", "Japan");
177
		code2name.put("JE", "Jersey");
178
		code2name.put("JO", "Jordan");
179
		code2name.put("KZ", "Kazakhstan");
180
		code2name.put("KE", "Kenya");
181
		code2name.put("KI", "Kiribati");
182
		code2name.put("KR", "Korea (Republic of)");
183
		code2name.put("KP", "Korea, Democatric People's Republic of");
184
		code2name.put("XK", "Kosovo * UN resolution");
185
		code2name.put("KW", "Kuwait");
186
		code2name.put("NL", "Netherlands");
187
		code2name.put("PE", "Peru");
188
		code2name.put("PH", "Philippines");
189
		code2name.put("fre/fra", "French");
190
		code2name.put("PL", "Poland");
191
		code2name.put("PT", "Portugal");
192
		code2name.put("PR", "Puerto Rico");
193
		code2name.put("QA", "Qatar");
194
		code2name.put("RO", "Romania");
195
		code2name.put("RU", "Russian Federation");
196
		code2name.put("RW", "Rwanda");
197
		code2name.put("RE", "Réunion");
198
		code2name.put("sve/swe", "Swedish");
199
		code2name.put("myn", "Mayan");
200
		code2name.put("dum", "Middle Dutch");
201
		code2name.put("mun", "Munda");
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff