Project

General

Profile

« Previous | Next » 

Revision 55825

[maven-release-plugin] copy for tag dnet-openaireplus-mapping-utils-6.3.24

View differences:

modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/src/main/java/eu/dnetlib/data/mapreduce/util/OafDecoder.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import java.util.List;
4
import java.util.Map.Entry;
5

  
6
import com.google.protobuf.Descriptors.EnumValueDescriptor;
7
import com.google.protobuf.Descriptors.FieldDescriptor;
8
import com.google.protobuf.GeneratedMessage;
9
import com.google.protobuf.InvalidProtocolBufferException;
10
import com.google.protobuf.Message;
11

  
12
import eu.dnetlib.data.proto.KindProtos.Kind;
13
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.data.proto.OafProtos.OafEntity;
15
import eu.dnetlib.data.proto.OafProtos.OafRel;
16
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
17

  
18
/**
19
 * Helper class, to be used as accessor helper over the Oaf structure.
20
 *
21
 * @author claudio
22
 *
23
 */
24
public class OafDecoder {
25

  
26
	/**
27
	 * Oaf object
28
	 */
29
	private Oaf oaf;
30

  
31
	/**
32
	 * Cached sub decoder
33
	 */
34
	private OafEntityDecoder entityDecoder = null;
35

  
36
	/**
37
	 * Cached sub decoder
38
	 */
39
	private OafRelDecoder relDecoder = null;
40

  
41
	public static OafDecoder decode(final Oaf oaf) {
42
		return new OafDecoder(oaf);
43
	}
44

  
45
	public static OafDecoder decode(final byte[] oaf) {
46
		return new OafDecoder(oaf);
47
	}
48

  
49
	protected OafDecoder(final byte[] value) {
50
		try {
51
			this.oaf = Oaf.parseFrom(value);
52
		} catch (InvalidProtocolBufferException e) {
53
			throw new RuntimeException("unable to deserialize proto: " + new String(value));
54
		}
55
	}
56

  
57
	private OafDecoder(final Oaf oaf) {
58
		this.oaf = oaf;
59
	}
60

  
61
	public Kind getKind() {
62
		return oaf.getKind();
63
	}
64

  
65
	public Oaf getOaf() {
66
		return oaf;
67
	}
68

  
69
	// / Entity
70

  
71
	public GeneratedMessage getMetadata() {
72
		return decodeEntity().getMetadata();
73
	}
74

  
75
	public GeneratedMessage getOafEntity() {
76
		return decodeEntity().getEntity();
77
	}
78

  
79
	public String getEntityId() {
80
		return decodeEntity().getId();
81
	}
82

  
83
	public OafEntity getEntity() {
84
		return oaf.getEntity();
85
	}
86

  
87
	public OafEntityDecoder decodeEntity() {
88
		if (entityDecoder == null) {
89
			entityDecoder = OafEntityDecoder.decode(getEntity());
90
		}
91
		return entityDecoder;
92
	}
93

  
94
	// / Rel
95

  
96
	public OafRel getOafRel() {
97
		return oaf.getRel();
98
	}
99

  
100
	public GeneratedMessage getRel() {
101
		return decodeRel().getRel();
102
	}
103

  
104
	public RelType relType() {
105
		return decodeRel().getRelType();
106
	}
107

  
108
	public String relTypeName() {
109
		return relType().toString();
110
	}
111

  
112
	public String relSourceId() {
113
		return decodeRel().getRelSourceId();
114
	}
115

  
116
	public String relTargetId() {
117
		return decodeRel().getRelTargetId();
118
	}
119

  
120
	public String getCFQ() {
121
		switch (getKind()) {
122
		case entity:
123
			return getEntity().getType().toString();
124
		case relation:
125
			return decodeRel().getCFQ();
126
		default:
127
			throw new IllegalArgumentException("Invalid Kind: " + getKind());
128
		}
129
	}
130

  
131
	public RelDescriptor getRelDescriptor() {
132
		if (!getKind().equals(Kind.relation)) return null;
133
		return decodeRel().getRelDescriptor();
134
	}
135

  
136
	private OafRelDecoder decodeRel() {
137
		if (relDecoder == null) {
138
			relDecoder = OafRelDecoder.decode(getOafRel());
139
		}
140
		return relDecoder;
141
	}
142

  
143
	public byte[] toByteArray() {
144
		return oaf.toByteArray();
145
	}
146

  
147
	public String asXml() {
148
		StringBuilder sb = new StringBuilder("<oaf>");
149

  
150
		for (Entry<FieldDescriptor, Object> e : oaf.getAllFields().entrySet()) {
151
			asXml(sb, e.getKey(), e.getValue());
152
		}
153
		sb.append("</oaf>");
154
		return sb.toString();
155
	}
156

  
157
	@SuppressWarnings("unchecked")
158
	private void asXml(final StringBuilder sb, final FieldDescriptor fd, final Object value) {
159

  
160
		if (fd.isRepeated() && (value instanceof List<?>)) {
161
			for (Object o : ((List<Object>) value)) {
162
				asXml(sb, fd, o);
163
			}
164
		} else if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
165
			sb.append("<" + fd.getName() + ">");
166
			for (Entry<FieldDescriptor, Object> e : ((Message) value).getAllFields().entrySet()) {
167
				asXml(sb, e.getKey(), e.getValue());
168
			}
169
			sb.append("</" + fd.getName() + ">");
170
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
171
			sb.append("<" + fd.getName() + ">");
172
			sb.append(((EnumValueDescriptor) value).getName());
173
			sb.append("</" + fd.getName() + ">");
174
		} else {
175
			sb.append("<" + fd.getName() + ">");
176
			sb.append(escapeXml(value.toString()));
177
			sb.append("</" + fd.getName() + ">");
178
		}
179
	}
180

  
181
	private static String escapeXml(final String value) {
182
		return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;");
183
	}
184

  
185
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/src/main/java/eu/dnetlib/data/index/CloudIndexClient.java
1
package eu.dnetlib.data.index;
2

  
3
import eu.dnetlib.functionality.index.solr.feed.StreamingInputDocumentFactory;
4
import eu.dnetlib.miscutils.datetime.HumanTime;
5
import eu.dnetlib.miscutils.functional.UnaryFunction;
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8
import org.apache.solr.client.solrj.SolrQuery;
9
import org.apache.solr.client.solrj.SolrServerException;
10
import org.apache.solr.client.solrj.impl.CloudSolrClient;
11
import org.apache.solr.client.solrj.response.QueryResponse;
12
import org.apache.solr.client.solrj.response.UpdateResponse;
13
import org.apache.solr.common.SolrInputDocument;
14

  
15
import java.io.Closeable;
16
import java.io.IOException;
17
import java.text.SimpleDateFormat;
18
import java.util.Date;
19
import java.util.List;
20

  
21
/**
22
 * Created by michele on 11/11/15.
23
 */
24
public class CloudIndexClient implements Closeable {
25

  
26
	private static final Log log = LogFactory.getLog(CloudIndexClient.class);
27
	private static final String INDEX_RECORD_RESULT_FIELD = "dnetResult";
28

  
29
	private final CloudSolrClient solrClient;
30

  
31
	protected CloudIndexClient(final CloudSolrClient solrServer) {
32
		this.solrClient = solrServer;
33
	}
34

  
35
	public int feed(final String record, final String indexDsId, final UnaryFunction<String, String> toIndexRecord) throws CloudIndexClientException {
36
		return feed(record, indexDsId, toIndexRecord, true);
37
	}
38

  
39
	public int feed(final String record, final String indexDsId, final UnaryFunction<String, String> toIndexRecord, final boolean commit)
40
			throws CloudIndexClientException {
41
		try {
42
			final SolrInputDocument doc = prepareSolrDocument(record, indexDsId, toIndexRecord);
43
			if ((doc == null) || doc.isEmpty()) throw new CloudIndexClientException("Invalid solr document");
44
			return feed(doc, commit);
45
		} catch (final Throwable e) {
46
			throw new CloudIndexClientException("Error feeding document", e);
47
		}
48
	}
49

  
50
	public int feed(final SolrInputDocument document) throws CloudIndexClientException {
51
		return feed(document, true);
52
	}
53

  
54
	public int feed(final SolrInputDocument document, final boolean commit) throws CloudIndexClientException {
55
		try {
56
			final UpdateResponse res = solrClient.add(document);
57
			log.debug("feed time for single records, elapsed time: " + HumanTime.exactly(res.getElapsedTime()));
58
			if (res.getStatus() != 0) { throw new CloudIndexClientException("bad status: " + res.getStatus()); }
59
			if (commit) {
60
				solrClient.commit();
61
			}
62
			return res.getStatus();
63
		} catch (final Throwable e) {
64
			throw new CloudIndexClientException("Error feeding document", e);
65
		}
66
	}
67

  
68
	public void feed(final List<SolrInputDocument> docs, final AfterFeedingCallback callback) throws CloudIndexClientException {
69
		feed(docs, callback, true);
70
	}
71

  
72
	public void feed(final List<SolrInputDocument> docs, final AfterFeedingCallback callback, final boolean commit) throws CloudIndexClientException {
73
		try {
74
			if (docs.isEmpty()) {
75
				log.debug("Empty list of documents. Calling callback, if needed.");
76
				if (callback != null) {
77
					callback.doAfterFeeding(null);
78
				}
79
				return;
80
			}
81
			final UpdateResponse res = solrClient.add(docs);
82

  
83
			log.debug("feed time for " + docs.size() + " records, elapsed tipe: : " + HumanTime.exactly(res.getElapsedTime()));
84

  
85
			if (commit) {
86
				solrClient.commit();
87
			}
88
			if (callback != null) {
89
				callback.doAfterFeeding(res);
90
			}
91
			if (res.getStatus() != 0) throw new CloudIndexClientException("bad status: " + res.getStatus());
92
		} catch (final Throwable e) {
93
			throw new CloudIndexClientException("Error feeding documents", e);
94
		}
95
	}
96

  
97
	public SolrInputDocument prepareSolrDocument(final String record, final String indexDsId, final UnaryFunction<String, String> toIndexRecord)
98
			throws CloudIndexClientException {
99
		try {
100
			final StreamingInputDocumentFactory documentFactory = new StreamingInputDocumentFactory();
101

  
102
			final String version = (new SimpleDateFormat("yyyy-MM-dd\'T\'hh:mm:ss\'Z\'")).format(new Date());
103
			final String indexRecord = toIndexRecord.evaluate(record);
104

  
105
			if (log.isDebugEnabled()) {
106
				log.debug("***************************************\nSubmitting index record:\n" + indexRecord + "\n***************************************\n");
107
			}
108

  
109
			return documentFactory.parseDocument(version, indexRecord, indexDsId, INDEX_RECORD_RESULT_FIELD);
110
		} catch (final Throwable e) {
111
			throw new CloudIndexClientException("Error creating solr document", e);
112
		}
113
	}
114

  
115
	public boolean isRecordIndexed(final String id) throws CloudIndexClientException {
116
		final QueryResponse res = query("objidentifier:\"" + id + "\"", null);
117
		return res.getResults().size() > 0;
118
	}
119

  
120
	public int remove(final String id) throws CloudIndexClientException {
121
		return remove(id, true);
122
	}
123

  
124
	public int remove(final String id, final boolean commit) throws CloudIndexClientException {
125
		try {
126
			final UpdateResponse res = solrClient.deleteByQuery("objidentifier:\"" + id + "\"");
127
			if (commit) {
128
				solrClient.commit();
129
			}
130
			return res.getResponse().size();
131
		} catch (final Throwable e) {
132
			throw new CloudIndexClientException("Error removing documents", e);
133
		}
134
	}
135

  
136
	public int count(final String query) throws CloudIndexClientException {
137
		final QueryResponse res = query(query, 0);
138
		return res.getResults().size();
139
	}
140

  
141
	public QueryResponse query(final String query, Integer rows) throws CloudIndexClientException {
142
		try {
143
			final SolrQuery solrQuery = new SolrQuery();
144
			solrQuery.setQuery(query);
145
			if(rows != null && rows >= 0) {
146
				solrQuery.setRows(rows);
147
			}
148
			return solrClient.query(solrQuery);
149
		} catch (final Throwable e) {
150
			throw new CloudIndexClientException("Error searching documents", e);
151
		}
152
	}
153

  
154
	public void close() throws IOException {
155
		if (solrClient != null) {
156
			solrClient.close();
157
		}
158
	}
159

  
160
	public void commit() throws CloudIndexClientException {
161
		if(solrClient != null) {
162
			try {
163
				solrClient.commit();
164
			} catch (SolrServerException | IOException e) {
165
				throw new CloudIndexClientException(e.getMessage());
166
			}
167
		}
168
	}
169

  
170
	public interface AfterFeedingCallback {
171

  
172
		void doAfterFeeding(final UpdateResponse response);
173
	}
174
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/src/main/java/eu/dnetlib/data/bulktag/CommunityConfiguration.java
1
package eu.dnetlib.data.bulktag;
2

  
3
import com.google.common.collect.Lists;
4
import com.google.common.collect.Maps;
5
import com.google.gson.Gson;
6
import org.apache.commons.logging.Log;
7
import org.apache.commons.logging.LogFactory;
8

  
9
import java.util.ArrayList;
10
import java.util.HashMap;
11
import java.util.List;
12
import java.util.Map;
13
import java.util.stream.Collectors;
14

  
15
/**
16
 * Created by miriam on 02/08/2018.
17
 */
18
public class CommunityConfiguration {
19

  
20
    private static final Log log = LogFactory.getLog(CommunityConfiguration.class);
21

  
22
    enum MapModes{
23
        SUBJECT_MAP,
24
        DATASOURCE_MAP,
25
        ZENODO_COMMUNITY_MAP
26
    }
27

  
28
    private Map<String,Community> communities;
29

  
30
    //map subject -> communityid
31
    private transient Map<String,List<Pair<String,SelectionCriteria>>> subjectMap = new HashMap<>();
32
    //map datasourceid -> communityid
33
    private transient Map<String,List<Pair<String,SelectionCriteria>>> datasourceMap = new HashMap<>();
34
    //map zenodocommunityid -> communityid
35
    private transient Map<String,List<Pair<String,SelectionCriteria>>> zenodocommunityMap = new HashMap<>();
36

  
37
    public CommunityConfiguration(final Map<String, Community> communities) {
38
        this.communities = communities;
39
        init();
40
    }
41

  
42
    public void init() {
43

  
44
        if (subjectMap == null) {
45
            subjectMap = Maps.newHashMap();
46
        }
47
        if (datasourceMap == null) {
48
            datasourceMap = Maps.newHashMap();
49
        }
50
        if (zenodocommunityMap == null) {
51
            zenodocommunityMap = Maps.newHashMap();
52
        }
53

  
54
        for(Community c : getCommunities().values()) {
55
            //get subjects
56
            final String id = c.getId();
57
            for(String sbj : c.getSubjects()){
58
                Pair<String,SelectionCriteria> p = new Pair<>(id,new SelectionCriteria(null));
59
                add(sbj.toLowerCase().trim() , p, subjectMap);
60
            }
61
            //get datasources
62
            for(Datasource d: c.getDatasources()){
63
                add(d.getOpenaireId(),new Pair<>(id,d.getSelCriteria()),datasourceMap);
64
            }
65
            //get zenodo communities
66
            for(ZenodoCommunity zc : c.getZenodoCommunities()){
67
                add(zc.getZenodoCommunityId(),new Pair<>(id,zc.getSelCriteria()),zenodocommunityMap);
68
            }
69

  
70
        }
71
    }
72

  
73
    private void add(String key,Pair<String,SelectionCriteria> value, Map<String,List<Pair<String,SelectionCriteria>>> map){
74
        List<Pair<String,SelectionCriteria>> values = map.get(key);
75

  
76
        if (values == null){
77
            values = new ArrayList<>();
78
            map.put(key,values);
79
        }
80
        values.add(value);
81
    }
82

  
83
    public List<Pair<String,SelectionCriteria>> getCommunityForSubject(String sbj){
84
        return subjectMap.get(sbj);
85
    }
86

  
87
    public List<Pair<String,SelectionCriteria>> getCommunityForDatasource(String dts){
88
        return datasourceMap.get(dts);
89
    }
90

  
91
    public List<Pair<String,SelectionCriteria>> getCommunityForZenodoCommunity(String zc){
92
        return zenodocommunityMap.get(zc);
93
    }
94

  
95
    public List<String> getCommunityForSubjectValue(String value) {
96

  
97
        return getContextIds(subjectMap.get(value));
98
    }
99

  
100
    public List<String> getCommunityForDatasourceValue(String value) {
101

  
102
        return getContextIds(datasourceMap.get(value.toLowerCase()));
103
    }
104

  
105
    public List<String> getCommunityForZenodoCommunityValue(String value){
106

  
107
        return getContextIds(zenodocommunityMap.get(value.toLowerCase()));
108
    }
109

  
110
    private List<String> getContextIds(List<Pair<String, SelectionCriteria>> list) {
111
        if (list != null) {
112
            return list.stream().map(p -> p.getFst()).collect(Collectors.toList());
113
        }
114
        return Lists.newArrayList();
115
    }
116

  
117
    /*
118
    public SelectionCriteria getSelCriteria(String value, String community, MapModes map_mode){
119

  
120
        Map<String,List<Pair<String,SelectionCriteria>>> map = null;
121
        if(map_mode == MapModes.DATASOURCE_MAP)
122
            map = datasourceMap;
123
        else
124
        if(map_mode == MapModes.ZENODO_COMMUNITY_MAP)
125
            map = zenodocommunityMap;
126
        else
127
            new Throwable("Impossible to have Selection Criteria over subjects");
128

  
129
        List<Pair<String, SelectionCriteria>> lst = map.get(value);
130
        List<SelectionCriteria> selectionList = lst.stream().map(p -> {
131
            if (p.fst == community)
132
                return p.snd;
133
            return null;
134
        }).collect(Collectors.toList());//for each community there will be only one Selection Criteria per datasource or zenodo community
135
        if(selectionList != null)
136
            if (selectionList.size()>0)
137
                return selectionList.get(0);
138
        return null;
139
    }
140
    */
141

  
142
    public Map<String, Community> getCommunities() {
143
        return communities;
144
    }
145

  
146
    public void setCommunities(Map<String, Community> communities) {
147
        this.communities = communities;
148
    }
149

  
150
    public String toJson() {
151
        final Gson g = new Gson();
152
        return g.toJson(this);
153
    }
154

  
155
    public int size() {
156
        return communities.keySet().size();
157
    }
158

  
159
    public Community getCommunityById(String id){
160
        return communities.get(id);
161
    }
162

  
163
    public List<Community> getCommunityList() {
164
        return Lists.newLinkedList(communities.values());
165
    }
166
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/pom.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
3
	<parent>
4
		<groupId>eu.dnetlib</groupId>
5
		<artifactId>dnet45-parent</artifactId>
6
		<version>1.0.0</version>
7
		<relativePath />
8
	</parent>
9
	<modelVersion>4.0.0</modelVersion>
10
	<groupId>eu.dnetlib</groupId>
11
	<artifactId>dnet-openaireplus-mapping-utils</artifactId>
12
	<packaging>jar</packaging>
13
	<version>6.3.24</version>
14
	<scm>
15
		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24</developerConnection>
16
	</scm>
17

  
18
	<properties>
19
		<skipITs>true</skipITs>
20
	</properties>
21
	<build>
22
		<plugins>
23
			<plugin>
24
				<groupId>org.apache.maven.plugins</groupId>
25
				<artifactId>maven-failsafe-plugin</artifactId>
26
				<version>2.19.1</version>
27
				<executions>
28
					<execution>
29
						<id>integration-test</id>
30
						<goals>
31
							<goal>integration-test</goal>
32
						</goals>
33
					</execution>
34
					<execution>
35
						<id>verify</id>
36
						<goals>
37
							<goal>verify</goal>
38
						</goals>
39
					</execution>
40
				</executions>
41
				<configuration>
42
					<skipITs>${skipITs}</skipITs>
43
				</configuration>
44
			</plugin>
45
		</plugins>
46
	</build>
47

  
48
	<dependencies>
49
		<dependency>
50
			<groupId>com.google.guava</groupId>
51
			<artifactId>guava</artifactId>
52
			<version>${google.guava.version}</version>
53
		</dependency>		
54
		<dependency>
55
			<groupId>junit</groupId>
56
			<artifactId>junit</artifactId>
57
			<version>${junit.version}</version>
58
			<scope>test</scope>
59
		</dependency>
60
		<dependency>
61
			<groupId>com.ximpleware</groupId>
62
			<artifactId>vtd-xml</artifactId>
63
			<version>[2.12, 3.0.0)</version>
64
		</dependency>
65
		<dependency>
66
			<groupId>commons-codec</groupId>
67
			<artifactId>commons-codec</artifactId>
68
			<version>${commons.codec.version}</version>
69
		</dependency>
70
		<dependency>
71
			<groupId>dom4j</groupId>
72
			<artifactId>dom4j</artifactId>
73
			<version>${dom4j.version}</version>
74
			<exclusions>
75
				<exclusion>
76
					<artifactId>xml-apis</artifactId>
77
					<groupId>xml-apis</groupId>
78
				</exclusion>
79
			</exclusions>
80
		</dependency>
81
		<dependency>
82
			<groupId>net.sf.supercsv</groupId>
83
			<artifactId>super-csv</artifactId>
84
			<version>2.4.0</version>
85
		</dependency>
86
		<dependency>
87
			<groupId>eu.dnetlib</groupId>
88
			<artifactId>dnet-openaire-data-protos</artifactId>
89
			<version>[3.9.3]</version>
90
		</dependency>
91
		<dependency>
92
			<groupId>eu.dnetlib</groupId>
93
			<artifactId>dnet-pace-core</artifactId>
94
			<version>[3.0.0,4.0.0)</version>
95
		</dependency>
96
		<dependency>
97
			<groupId>eu.dnetlib</groupId>
98
			<artifactId>cnr-misc-utils</artifactId>
99
			<version>[1.0.0,2.0.0)</version>
100
		</dependency>
101
		<dependency>
102
			<groupId>eu.dnetlib</groupId>
103
			<artifactId>dnet-hadoop-commons</artifactId>
104
			<version>[2.0.0,3.0.0)</version>
105
		</dependency>
106
		<dependency>
107
			<groupId>eu.dnetlib</groupId>
108
			<artifactId>dnet-index-solr-common</artifactId>
109
			<version>[2.3.4,3.0.0)</version>
110
		</dependency>	
111
		<dependency>
112
			<groupId>com.googlecode.protobuf-java-format</groupId>
113
			<artifactId>protobuf-java-format</artifactId>
114
			<version>1.2</version>
115
		</dependency>
116
		<dependency>
117
			<groupId>org.apache.commons</groupId>
118
			<artifactId>commons-lang3</artifactId>
119
			<version>3.5</version>
120
		</dependency>
121

  
122
		<!-- test deps -->
123
		<dependency>
124
			<groupId>eu.dnetlib</groupId>
125
			<artifactId>dnet-openaireplus-profiles</artifactId>
126
			<version>[1.0.0,2.0.0)</version>
127
			<scope>test</scope>
128
		</dependency>
129
		<dependency>
130
			<groupId>org.mongodb</groupId>
131
			<artifactId>mongo-java-driver</artifactId>
132
			<version>${mongodb.driver.version}</version>
133
			<scope>test</scope>
134
		</dependency>
135
		<dependency>
136
			<groupId>org.springframework</groupId>
137
			<artifactId>spring-context</artifactId>
138
			<version>${spring.version}</version>
139
			<scope>test</scope>
140
		</dependency>
141
		<dependency>
142
			<groupId>org.springframework</groupId>
143
			<artifactId>spring-core</artifactId>
144
			<version>${spring.version}</version>
145
			<scope>test</scope>
146
		</dependency>
147
		<dependency>
148
			<groupId>org.springframework</groupId>
149
			<artifactId>spring-test</artifactId>
150
			<version>${spring.version}</version>
151
			<scope>test</scope>
152
		</dependency>
153

  
154
	</dependencies>
155
</project>
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/src/test/java/eu/dnetlib/data/transform/xml/vtd/VtdParserToProtoTest.java
1
package eu.dnetlib.data.transform.xml.vtd;
2

  
3
import java.io.IOException;
4
import java.util.function.Function;
5

  
6
import eu.dnetlib.data.proto.OafProtos.Oaf;
7
import eu.dnetlib.data.transform.xml2.DatasetToProto;
8
import eu.dnetlib.data.transform.xml2.PublicationToProto;
9
import org.apache.commons.io.IOUtils;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.junit.Test;
13

  
14
import static org.junit.Assert.assertNotNull;
15

  
16
public class VtdParserToProtoTest {
17

  
18
	private static final Log log = LogFactory.getLog(VtdParserToProtoTest.class);
19

  
20
	@Test
21
	public void testParsePublication() throws IOException {
22
		doTest("/eu/dnetlib/data/transform/publication.xml", new PublicationToProto());
23
	}
24

  
25
	@Test
26
	public void testParseDataset() throws IOException {
27
		doTest("/eu/dnetlib/data/transform/dataset.xml", new DatasetToProto());
28
	}
29

  
30
	@Test
31
	public void testParseDataset2() throws IOException {
32
		doTest("/eu/dnetlib/data/transform/dataset2.xml", new DatasetToProto());
33
	}
34

  
35
	private void doTest(final String filePath, Function<String, Oaf> f) throws IOException {
36
		final String xml = IOUtils.toString(getClass().getResourceAsStream(filePath));
37

  
38
		assertNotNull(xml);
39

  
40
		final Oaf oaf = f.apply(xml);
41

  
42
		assertNotNull(oaf);
43

  
44
		log.info(oaf.toString());
45
	}
46

  
47
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/src/test/java/eu/dnetlib/data/transform/xml/vtd/ConfigurationTestConfig.java
1
package eu.dnetlib.data.transform.xml.vtd;
2

  
3
import java.io.IOException;
4
import java.io.InputStream;
5
import java.util.Properties;
6

  
7
import com.mongodb.MongoClient;
8
import com.mongodb.client.MongoDatabase;
9
import org.springframework.context.annotation.Bean;
10
import org.springframework.context.annotation.Configuration;
11
import org.springframework.core.io.ClassPathResource;
12

  
13
@Configuration
14
public class ConfigurationTestConfig {
15

  
16
	@Bean
17
	public MongoDatabase db() throws IOException {
18

  
19
		final Properties p = new Properties();
20
		final ClassPathResource cp = new ClassPathResource("/eu/dnetlib/data/transform/xml/vtd/test.properties");
21
		try (final InputStream stream = cp.getInputStream()) {
22
			p.load(stream);
23
		}
24

  
25
		final MongoClient mongo = new MongoClient(
26
				p.getProperty("mongodb.host"),
27
				Integer.valueOf(p.getProperty("mongodb.port")));
28
		return mongo.getDatabase(p.getProperty("mongodb.dbname"));
29
	}
30

  
31
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/src/test/java/eu/dnetlib/data/transform/xml/vtd/VtdParserToProtoIT.java
1
package eu.dnetlib.data.transform.xml.vtd;
2

  
3
import com.mongodb.client.MongoCollection;
4
import com.mongodb.client.MongoDatabase;
5
import eu.dnetlib.data.transform.xml2.DatasetToProto;
6
import org.apache.commons.lang3.time.StopWatch;
7
import org.apache.commons.logging.Log;
8
import org.apache.commons.logging.LogFactory;
9
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics;
10
import org.bson.Document;
11
import org.junit.Test;
12
import org.junit.runner.RunWith;
13
import org.springframework.beans.factory.annotation.Autowired;
14
import org.springframework.test.context.ContextConfiguration;
15
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
16

  
17
import java.io.IOException;
18
import java.util.Objects;
19
import java.util.concurrent.atomic.AtomicInteger;
20
import java.util.stream.StreamSupport;
21

  
22
import static org.junit.Assert.assertNotNull;
23
import static org.junit.Assert.assertTrue;
24

  
25
@RunWith(SpringJUnit4ClassRunner.class)
26
@ContextConfiguration(classes = { ConfigurationTestConfig.class })
27
public class VtdParserToProtoIT {
28

  
29
	private static final Log log = LogFactory.getLog(VtdParserToProtoIT.class);
30
	public static final String COLLECTION_NAME = "datacite";
31
	private static final int BATCH_SIZE = 10000;
32
	public static final int LOG_FREQ = 5000;
33

  
34
	@Autowired
35
	private MongoDatabase db;
36

  
37
	@Test
38
	public void testParseDatacite() throws IOException {
39

  
40
		final MongoCollection<Document> collection = db.getCollection(COLLECTION_NAME);
41

  
42
		final long collectionSize = collection.count();
43
		log.info(String.format("found %s records in collection '%s'", collectionSize, COLLECTION_NAME));
44

  
45
		final AtomicInteger read = new AtomicInteger(0);
46
		final DescriptiveStatistics stats = new DescriptiveStatistics();
47

  
48
		final StopWatch timer = new StopWatch();
49

  
50
		final DatasetToProto mapper = new DatasetToProto();
51
		StreamSupport.stream(collection.find().batchSize(BATCH_SIZE).spliterator(), false)
52
				.peek(d -> {
53
					if (read.addAndGet(1) % LOG_FREQ == 0) {
54
						log.info(String.format("records read so far %s", read.get()));
55
						log.info(String.format("stats so far %s", stats.toString()));
56
					}
57
				})
58
				.map(d -> (String) d.get("body"))
59
				.filter(Objects::nonNull)
60
				.peek(s -> timer.start())
61
				.map(mapper)
62
				.forEach(oaf -> {
63
					assertNotNull(oaf);
64
					assertTrue(oaf.hasEntity());
65

  
66
					timer.stop();
67
					stats.addValue(timer.getTime());
68
					timer.reset();
69
				});
70

  
71
		log.info(String.format("processed %s/%s records", read.get(), collectionSize));
72
	}
73

  
74

  
75
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/src/test/java/eu/dnetlib/data/transform/xml/vtd/VtdUtilityParserTest.java
1
package eu.dnetlib.data.transform.xml.vtd;
2

  
3
import java.io.InputStream;
4
import java.util.List;
5

  
6
import com.ximpleware.AutoPilot;
7
import com.ximpleware.VTDGen;
8
import com.ximpleware.VTDNav;
9
import eu.dnetlib.data.transform.xml2.Node;
10
import eu.dnetlib.data.transform.xml2.VtdUtilityParser;
11
import org.apache.commons.io.IOUtils;
12
import org.junit.Assert;
13
import org.junit.Test;
14

  
15

  
16
public class VtdUtilityParserTest {
17

  
18
	@Test
19
	public void testUtils1() {
20
		String xpath = VtdUtilityParser.xpath("a", "b", "c");
21
		Assert.assertTrue("/*[local-name()='a']/*[local-name()='b']/*[local-name()='c']".equals(xpath));
22
	}
23

  
24
    @Test
25
    public void testPartser() throws Exception {
26
        final InputStream resource = this.getClass().getResourceAsStream("/eu/dnetlib/data/transform/publication.xml");
27
        final String record =IOUtils.toString(resource);
28
        final VTDGen vg = VtdUtilityParser.parseXml(record);
29
        final VTDNav vn = vg.getNav();
30
        final AutoPilot ap = new AutoPilot(vn);
31

  
32
        List<Node> nodes = VtdUtilityParser.getNodes(ap, vn, "//*[local-name()='referenceaa']");
33

  
34
        nodes.forEach(n -> Assert.assertTrue(n.getAttributes().keySet().size()>0));
35

  
36
        System.out.println(VtdUtilityParser.countNodes(ap, vn, "count(//*[local-name()='CobjIdentifier'])"));
37

  
38

  
39

  
40

  
41
    }
42

  
43
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/src/test/java/eu/dnetlib/data/transform/xml/OpenTrialsXsltFunctionsTest.java
1
package eu.dnetlib.data.transform.xml;
2

  
3
import java.util.List;
4

  
5
import eu.dnetlib.data.transform.xml.OpenTrialsXsltFunctions.JsonProv;
6
import org.junit.After;
7
import org.junit.Before;
8
import org.junit.Test;
9

  
10
import static org.junit.Assert.assertEquals;
11

  
12

  
13
/**
14
 * OpenTrialsXsltFunctions Tester.
15
 *
16
 */
17
public class OpenTrialsXsltFunctionsTest {
18

  
19
	private String jsonProv = "[{\"url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]";
20
	private String jsonProvWithNull = "[{\"url\" : \"\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]";
21
	private String jidentifiers = "{112683,NCT00920439}";
22

  
23

  
24
	private String jsonRecord = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\", \"is_primary\" : true},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]";
25
	private String jsonRecordNull = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\"},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]";
26
	private String jsonRecordVoid = "[{\"source_id\" : \"\", \"source_url\" : \"\", \"is_primary\" : \"\"}]";
27
	private String jsonRecondPrimary = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\", \"is_primary\" : false},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]";
28

  
29
	private String jsonPeopleVoid ="[{\"person_name\" : null, \"person_id\" : null, \"person_role\" : null}]";
30
	private String jsonPeopleOne = "[{\"person_name\" : \"Henk Verheul, M.D., PhD\", \"person_id\" : \"116438e9-f8b1-46e5-a1f8-20f851cab73c\", \"person_role\" : \"principal_investigator\"}]";
31
	private String jsonPeopleMore = "[{\"person_name\" : \"Henk Verheul, M.D., PhD\", \"person_id\" : \"116438e9-f8b1-46e5-a1f8-20f851cab73c\", \"person_role\" : \"principal_investigator\"},{\"person_name\" : \"Miriam Pippolippo Baglioni, PhD\", \"person_id\" : \"fake\", \"person_role\" : \"principal_investigator\"}]";
32

  
33
	private String jsonOrganizationVoid = "[{\"organization_name\" : null, \"organization_id\" : null, \"organization_role\" : null}]";
34
	private String jsonOrganizationOne = "[{\"organization_name\" : \"Södertälje sjukhus AB\", \"organization_id\" : \"15f0d004-b82b-408c-8605-38a57352468d\", \"organization_role\" : \"sponsor\"}]";
35
	private String jsonOrganizationMore = "[{\"organization_name\" : \"Södertälje sjukhus AB\", \"organization_id\" : \"15f0d004-b82b-408c-8605-38a57352468d\", \"organization_role\" : \"sponsor\"},{\"organization_name\" : \"Miriam Baglioni AB\", \"organization_id\" : \"fake\", \"organization_role\" : \"primary_sponsor\"}]";
36

  
37
	private String jsonLocationVoid = "[{\"location_name\" : null}]";
38
	private String jsonLocationOne = "[{\"location_name\" : \"China\"}]";
39
	private String jsonLocationMore = "[{\"location_name\" : \"China\"},{\"location_name\" : \"North Korea\"}]";
40

  
41
	@Before
42
	public void before() throws Exception {
43
	}
44

  
45
	@After
46
	public void after() throws Exception {
47
	}
48

  
49
	/**
50
	 * Method: getProvs(String jsonProvList)
51
	 */
52
	@Test
53
	public void testGetProvs() throws Exception {
54
		List<JsonProv> list = OpenTrialsXsltFunctions.getProvs(jsonProv);
55
		assertEquals(2, list.size());
56
	}
57

  
58
	/**
59
	 * Method: getMainIdentifierURL(String jsonProvList)
60
	 */
61
	@Test
62
	public void testGetMainIdentifierURL() throws Exception {
63
		String url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProv);
64
		assertEquals( "http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508", url );
65
		url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProvWithNull);
66
		assertEquals("https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true", url);
67
	}
68

  
69
	@Test
70
	public void testGetPrimaryRecordUrl(){
71
		String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecord);
72
		assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url);
73
	}
74

  
75
	@Test
76
	public void testGetPrimaryRecordID(){
77
		String id = OpenTrialsXsltFunctions.getPrimaryRecordIdentifier(jsonRecord);
78
		assertEquals("nct", id);
79
	}
80

  
81
	@Test
82
	public void testGetPrimaryRecordUrlNull(){
83
		String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecordNull);
84
		assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url);
85
	}
86

  
87
	@Test
88
	public void testGetPrimaryRecordUrlVoid(){
89
		String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecordVoid);
90
		assertEquals("", url);
91
	}
92

  
93
	@Test
94
	public void testGetPrimaryRecordUrlNoPrimary(){
95
		String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecondPrimary);
96
		assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url);
97
	}
98
	@Test
99
	public void testGetPrimaryRecordIDNoPrimary(){
100
		String id = OpenTrialsXsltFunctions.getPrimaryRecordIdentifier(jsonRecondPrimary);
101
		assertEquals("nct", id);
102
	}
103
	@Test
104
	public void testGetPrincipalInvestigatorsVoid(){
105
		String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleVoid);
106
		assertEquals("",url);
107
	}
108

  
109

  
110
	@Test
111
	public void testGetPrincipalInvestigatorsOne(){
112
		String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleOne);
113
		assertEquals("Verheul, Henk", url);
114
	}
115

  
116
	@Test
117
	public void testGetPrincipalInvestigatorsMore(){
118
		String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleMore);
119
		assertEquals("Verheul, Henk@@Baglioni, Miriam Pippolippo", url);
120
	}
121

  
122

  
123

  
124
	@Test
125
	public void testgGetTrialOrganizationsVoid(){
126
		String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationVoid);
127
		assertEquals("",url);
128
	}
129

  
130

  
131
	@Test
132
	public void testgGetTrialOrganizationsOne(){
133
		String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationOne);
134
		assertEquals("Södertälje sjukhus AB@sponsor", url);
135
	}
136

  
137
	@Test
138
	public void testgGetTrialOrganizationsMore(){
139
		String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationMore);
140
		assertEquals("Södertälje sjukhus AB@sponsor@@Miriam Baglioni AB@sponsor", url);
141
	}
142

  
143
	@Test
144
	public void testgGetTrialLocationsVoid(){
145
		String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationVoid);
146
		assertEquals("",url);
147
	}
148

  
149

  
150
	@Test
151
	public void testgGetTrialLocationsOne(){
152
		String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationOne);
153
		assertEquals("China", url);
154
	}
155

  
156
	@Test
157
	public void testgGetTrialLocationsMore(){
158
		String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationMore);
159
		assertEquals("China@@North Korea", url);
160
	}
161

  
162
	@Test
163
	public void testGetNotPrimaryRecordUrlPrimary(){
164
		String url = OpenTrialsXsltFunctions.getNotPrimaryRecordUrl(jsonRecondPrimary);
165
		assertEquals("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059", url);
166
	}
167

  
168
	@Test
169
	public void testGetNotPrimaryRecordUrlVoid(){
170
		String url = OpenTrialsXsltFunctions.getNotPrimaryRecordUrl(jsonRecordVoid);
171
		assertEquals("", url);
172
	}
173

  
174
	@Test
175
	public void testGetNotPrimaryRecordUrl(){
176
		String url = OpenTrialsXsltFunctions.getNotPrimaryRecordUrl(jsonRecord);
177
		assertEquals("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059", url);
178
	}
179

  
180

  
181
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/src/test/java/eu/dnetlib/data/transform/xml/FWFXsltFunctionsTest.java
1
package eu.dnetlib.data.transform.xml;
2

  
3
import org.junit.After;
4
import org.junit.Before;
5
import org.junit.Test;
6

  
7
import static org.junit.Assert.assertEquals;
8
import static org.junit.Assert.assertNotNull;
9

  
10
/**
11
 *
12
 * Created by miriam on 04/05/2017.
13
 */
14
public class FWFXsltFunctionsTest {
15
	private String namesurname ="Gerhard SOMMER";
16
	private String noSurname = "Gerhard";
17
	private String noName = "SOMMER";
18
	private String twoNames = "Gerhard Pippo SOMMER";
19
	private String twoSurname = "Gerhard PIPPO SOMMER";
20
	private String nonamesurname = "";
21
	private String organization ="Universität Linz - Institut für Computational Perception; Universität für Musik und darstellende Kunst Graz - Institut 1: Komposition, Musiktheorie, Musikgeschichte und Dirigieren; Universität Mozarteum Salzburg - Institut für Musikalische Rezeptions- und Interpretationsgeschichte; Anton Bruckner Privatuniversität - Institut für Theorie und Geschichte der Musik; Eliette und Herbert von Karajan Institut - Eliette und Herbert von Karajan Institut";
22

  
23
	@Before
24
	public void before() throws Exception {
25
	}
26

  
27
	@After
28
	public void after() throws Exception {
29
	}
30

  
31
	@Test
32
	public void testGetNamesNameNoNameSurname() throws Exception {
33
		String ret = FWFXsltFunctions.getName(nonamesurname,true);
34
		assertEquals("",ret );
35
	}
36

  
37
	@Test
38
	public void testGetNamesSurnameNoNameSurname() throws Exception {
39
		String ret = FWFXsltFunctions.getName(nonamesurname,false);
40
		assertEquals("",ret );
41
	}
42

  
43
	@Test
44
	public void testGetNamesNameTwoSurname() throws Exception {
45
		String ret = FWFXsltFunctions.getName(twoSurname,true);
46
		assertEquals("Gerhard",ret );
47
	}
48

  
49
	@Test
50
	public void testGetNamesSurnameTwoSurname() throws Exception {
51
		String ret = FWFXsltFunctions.getName(twoSurname,false);
52
		assertEquals("PIPPO SOMMER",ret );
53
	}
54

  
55
	@Test
56
	public void testGetNamesNameTwoNames() throws Exception {
57
		String ret = FWFXsltFunctions.getName(twoNames,true);
58
		assertEquals("Gerhard Pippo",ret );
59
	}
60

  
61
	@Test
62
	public void testGetNamesSurnameTwoNames() throws Exception {
63
		String ret = FWFXsltFunctions.getName(twoNames,false);
64
		assertEquals("SOMMER",ret );
65
	}
66

  
67
	/**
68
	 * Method: getProvs(String jsonProvList)
69
	 */
70
	@Test
71
	public void testGetNamesName() throws Exception {
72
		String ret = FWFXsltFunctions.getName(namesurname,true);
73
		assertEquals("Gerhard",ret );
74
	}
75

  
76
	@Test
77
	public void testGetNamesSurname() throws Exception {
78
		String ret = FWFXsltFunctions.getName(namesurname,false);
79
		assertEquals("SOMMER",ret );
80
	}
81

  
82
	@Test
83
	public void testGetNamesNameNoSurname() throws Exception {
84
		String ret = FWFXsltFunctions.getName(noSurname,true);
85
		assertEquals("Gerhard",ret );
86
	}
87

  
88
	@Test
89
	public void testGetNamesSurnameNoSurname() throws Exception {
90
		String ret = FWFXsltFunctions.getName(noSurname,false);
91
		assertEquals("",ret );
92
	}
93

  
94
	@Test
95
	public void testGetNamesNameNoName() throws Exception {
96
		String ret = FWFXsltFunctions.getName(noName,true);
97
		assertEquals("",ret );
98
	}
99

  
100
	@Test
101
	public void testGetNamesSurnameNoName() throws Exception {
102
		String ret = FWFXsltFunctions.getName(noName,false);
103
		assertEquals("SOMMER",ret );
104
	}
105

  
106
	@Test
107
	public void TestGetMd5()throws Exception{
108
		String md5 = FWFXsltFunctions.getMd5(organization);
109
		System.out.println(md5);
110
		assertNotNull(md5);
111
	}
112

  
113
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/src/main/java/eu/dnetlib/data/bulktag/Community.java
1
package eu.dnetlib.data.bulktag;
2

  
3
import com.google.gson.Gson;
4
import org.apache.commons.logging.Log;
5
import org.apache.commons.logging.LogFactory;
6

  
7
import java.util.ArrayList;
8
import java.util.List;
9

  
10
/**
11
 * Created by miriam on 01/08/2018.
12
 */
13
public class Community {
14

  
15
    private static final Log log = LogFactory.getLog(Community.class);
16

  
17
    private String id;
18
    private List<String> subjects = new ArrayList<>();
19
    private List<Datasource> datasources = new ArrayList<>();
20
    private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>();
21

  
22
    public String toJson() {
23
        final Gson g = new Gson();
24
        return g.toJson(this);
25
    }
26

  
27
    public boolean isValid() {
28
        return !getSubjects().isEmpty() || !getDatasources().isEmpty() || !getZenodoCommunities().isEmpty();
29
    }
30

  
31
    public String getId() {
32
        return id;
33
    }
34

  
35
    public void setId(String id) {
36
        this.id = id;
37
    }
38

  
39
    public List<String> getSubjects() {
40
        return subjects;
41
    }
42

  
43
    public void setSubjects(List<String> subjects) {
44
        this.subjects = subjects;
45
    }
46

  
47
    public List<Datasource> getDatasources() {
48
        return datasources;
49
    }
50

  
51
    public void setDatasources(List<Datasource> datasources) {
52
        this.datasources = datasources;
53
    }
54

  
55
    public List<ZenodoCommunity> getZenodoCommunities() {
56
        return zenodoCommunities;
57
    }
58

  
59
    public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) {
60
        this.zenodoCommunities = zenodoCommunities;
61
    }
62

  
63
}
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.24/src/main/java/eu/dnetlib/data/transform/xml/OafToHbaseXsltFunctions.java
1
package eu.dnetlib.data.transform.xml;
2

  
3
import java.util.HashMap;
4
import java.util.List;
5

  
6
import com.google.common.collect.Iterables;
7
import com.google.common.collect.Lists;
8
import com.google.protobuf.Descriptors.Descriptor;
9
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
10
import eu.dnetlib.data.proto.FieldTypeProtos.Author;
11
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue;
12
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
13
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.data.proto.OafProtos.OafEntity;
15
import eu.dnetlib.data.proto.ResultProtos.Result;
16
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
17
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
18
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
19
import eu.dnetlib.data.proto.TypeProtos.Type;
20
import org.apache.commons.lang3.StringUtils;
21
import org.w3c.dom.NodeList;
22

  
23
public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions {
24

  
25
	public static String oafResult(
26
			final String resultId,
27
			final boolean invisible,
28
			final String provenance,
29
			final String trust,
30
			final NodeList about,
31
			final String originalId,
32
			final String dateOfCollection,
33
			final String dateOfTransformation,
34
			final NodeList metadata) {
35

  
36
		ValueMap values = null;
37
		try {
38
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
39
			values = ValueMap.parseNodeList(metadata);
40
			final Descriptor mDesc = Result.Metadata.getDescriptor();
41

  
42
			final List<KeyValue> collectedFrom = getKeyValues(values, "collectedfrom", Type.datasource);
43
			final List<KeyValue> hostedBy = getKeyValues(values, "hostedby", Type.datasource);
44

  
45
			final Result.Metadata.Builder metadataBuilder = buildMetadata(values, mDesc);
46
			final Result.Builder result = buildResult(metadataBuilder, values, collectedFrom, hostedBy);
47
			final OafEntity.Builder entity = buildOafEntity(result, entityId, metadata, collectedFrom, originalId);
48
			entity.setDateofcollection(dateOfCollection)
49
					.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about));
50

  
51
			final Oaf oaf = getOaf(entity, getDataInfo(invisible, about, provenance, trust, false, false));
52
			return base64(oaf.toByteArray());
53
		} catch (final Throwable e) {
54
			handleException(e, resultId, values);
55
		}
56
		return null;
57
	}
58

  
59
	public static String oafResultUpdate(final String resultId,
60
			final String provenance,
61
			final String trust,
62
			final NodeList nodelist) {
63
		ValueMap values = null;
64
		try {
65
			final String entityId = OafRowKeyDecoder.decode(resultId).getKey();
66
			values = ValueMap.parseNodeList(nodelist);
67
			final List<KeyValue> hostedBy = getKeyValues(values, "hostedby", Type.datasource);
68

  
69
			final Descriptor mDesc = Result.Metadata.getDescriptor();
70

  
71
			final Result.Metadata.Builder metadata = buildMetadata(values, mDesc);
72
			final Result.Builder result = buildResult(metadata, values, null, hostedBy);
73

  
74
			final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, null, null);
75
			final Oaf oaf = getOaf(entity, null);
76
			return base64(oaf.toByteArray());
77
		} catch (final Throwable e) {
78
			handleException(e, resultId, values);
79
		}
80
		return null;
81
	}
82

  
83
	private static OafEntity.Builder buildOafEntity(
84
			final Result.Builder result,
85
			final String entityId,
86
			final NodeList nodelist,
87
			final List<KeyValue> collectedFrom,
88
			final String originalId) {
89

  
90
		final List<StructuredProperty> pids = Lists.newArrayList();
91
		pids.addAll(parsePids(nodelist));
92

  
93
		final OafEntity.Builder entity =
94
				getEntity(Type.result, entityId, collectedFrom, StringUtils.isBlank(originalId) ? null : Lists.newArrayList(originalId), null, null, pids)
95
						.setResult(result);
96
		return entity;
97
	}
98

  
99
	private static Result.Metadata.Builder buildMetadata(final ValueMap values, final Descriptor mDesc) {
100
		final Result.Metadata.Builder metadata = Result.Metadata.newBuilder();
101

  
102
		if (values.get("creator") != null) {
103
			for (final Element e : values.get("creator")) {
104

  
105
				final Author.Builder author = Author.newBuilder();
106

  
107
				final String fullname = e.getText();
108
				author.setFullname(fullname);
109
				author.setRank(Integer.valueOf(e.getAttributeValue(ValueMap.IDX_ATTRIBUTE)));
110

  
111
				final String nameIdentifier = e.getAttributeValue("nameIdentifier");
112
				final String nameIdentifierScheme = e.getAttributeValue("nameIdentifierScheme");
113

  
114
				if (StringUtils.isNotBlank(nameIdentifier) && StringUtils.isNotBlank(nameIdentifierScheme)) {
115
					author.addPid(getKV(nameIdentifierScheme, nameIdentifier));
116
				}
117

  
118
				final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
119
				if (p.isAccurate()) {
120
					author.setName(p.getNormalisedFirstName());
121
					author.setSurname(p.getNormalisedSurname());
122
				}
123
				metadata.addAuthor(author);
124
			}
125
		}
126

  
127
		addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject"), "keyword", "dnet:subject_classification_typologies");
128
		addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title"), "main title", "dnet:dataCite_title");
129
		for (final String fieldname : Lists.newArrayList("description", "source", "contributor")) {
130
			if (values.get(fieldname) != null) {
131
				for (final String s : values.get(fieldname).listValues()) {
132
					addField(metadata, mDesc.findFieldByName(fieldname), s);
133
				}
134
			}
135
		}
136
		addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues()));
137
		addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
138
		addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues());
139
		addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues());
140
		addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues());
141

  
142
		String resulttype = getResultType(values);
143
		addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier(resulttype, "dnet:result_typologies"));
144

  
145
		addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues());
146
		addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues());
147
		if (values.get("concept") != null) {
148
			for (final Element e : values.get("concept")) {
149
				final String id = e.getAttributes().get("id");
150
				if (StringUtils.isNotBlank(id)) {
151
					metadata.addContext(Context.newBuilder().setId(id));
152
				}
153
			}
154
		}
155
		if (values.get("journal") != null) {
156
			for (final Element e : values.get("journal")) {
157
				addJournal(metadata, e);
158
			}
159
		}
160
		return metadata;
161
	}
162

  
163
	private static String getResultType(final ValueMap values) {
164
			
165
		final Element cobjcategory = values.get("cobjcategory").stream()
166
				.map(e -> StringUtils.isNotBlank(e.getText()) ? e : new Element("0000", e.getAttributes()))
167
				.findFirst()
168
				.orElse(new Element("0000", new HashMap<>()));
169

  
170
		final String resulttype = cobjcategory.getAttributeValue("type");
171
		if (StringUtils.isNotBlank(resulttype)) {
172
			return resulttype;
173
		}
174

  
175
		return getDefaultResulttype(cobjcategory);
176
	}
177

  
178
	private static Result.Builder buildResult(final Result.Metadata.Builder metadata,
179
			final ValueMap values,
180
			final List<KeyValue> collectedFrom,
181
			final List<KeyValue> hostedBy) {
182
		final Result.Builder result = Result.newBuilder();
183

  
184
		final Instance.Builder instance = Instance.newBuilder();
185

  
186
		addField(instance, Instance.getDescriptor().findFieldByName("license"), values.get("license").listValues());
187

  
188
		addField(instance, Instance.getDescriptor().findFieldByName("accessright"),
189
				setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues()));
190

  
191
		addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
192
				setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues()));
193

  
194
		addField(instance, Instance.getDescriptor().findFieldByName("hostedby"), hostedBy);
195
		addField(instance, Instance.getDescriptor().findFieldByName("collectedfrom"), collectedFrom);
196
		addField(instance, Instance.getDescriptor().findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues());
197

  
198
		if (values.get("identifier") != null) {
199
			addField(instance, Instance.getDescriptor().findFieldByName("url"),
200
					Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter)));
201
		}
202

  
203
		result.addInstance(instance);
204

  
205
		final List<Element> extrefs = values.get("reference");
206
		if (!extrefs.isEmpty()) {
207
			final Descriptor extDesc = ExternalReference.getDescriptor();
208
			for (final Element element : extrefs) {
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff