Project

General

Profile

« Previous | Next » 

Revision 55760

reintegrated branch solr75 -r53766:HEAD

View differences:

modules/dnet-openaireplus-mapping-utils/trunk/src/test/java/eu/dnetlib/data/bulktag/CommunityConfigurationFactoryTest.java
27 27
    public void parseTest() throws DocumentException {
28 28

  
29 29
        final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml);
30
        assertEquals(cc.size(),4);
30
        assertEquals(5,cc.size());
31 31
        cc.getCommunityList().forEach(c -> assertTrue(StringUtils.isNoneBlank(c.getId())));
32 32

  
33 33

  
modules/dnet-openaireplus-mapping-utils/trunk/src/test/resources/eu/dnetlib/data/bulktag/community_configuration.xml
1 1
<communities>
2 2
    <community id="fet-fp7">
3
        <oacommunity/>
3 4
        <subjects/>
4 5
        <datasources/>
5 6
        <zenodocommunities/>
......
161 162
        </datasources>
162 163
        <zenodocommunities/>
163 164
    </community>
165
    <community id="clarin">
166
        <oacommunity>oac_clarin</oacommunity>
167
        <subjects/>
168
        <datasources>
169
            <datasource>
170
                <openaireId>re3data_____::a507cdacc5bbcc08761c92185dee5cab</openaireId>
171
                <selcriteria/>
172
            </datasource>
173
        </datasources>
174
        <zenodocommunities/>
175
    </community>
164 176
</communities>
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/OafEntityDecoder.java
51 51
		return field;
52 52
	}
53 53

  
54
	public List<String> getFieldValues(final String path) {
55
		return processPath(getOafEntity(), path, eu.dnetlib.pace.config.Type.String).stream()
56
				.map(o -> o.toString())
57
				.collect(Collectors.toCollection(LinkedList::new));
58
	}
59

  
60

  
61 54
	public String getDateOfCollection() {
62 55
		return oafEntity.getDateofcollection();
63 56
	}
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/index/CloudIndexClientFactory.java
1 1
package eu.dnetlib.data.index;
2 2

  
3
import eu.dnetlib.functionality.index.utils.ZkServers;
3 4
import org.apache.commons.logging.Log;
4 5
import org.apache.commons.logging.LogFactory;
5
import org.apache.solr.client.solrj.impl.CloudSolrServer;
6
import org.apache.solr.client.solrj.impl.CloudSolrClient;
6 7
import org.apache.solr.client.solrj.response.SolrPingResponse;
7 8

  
8 9
/**
......
15 16
	public static CloudIndexClient newIndexClient(final String baseURL, final String collection, final boolean parallelUpdates)
16 17
			throws CloudIndexClientException {
17 18
		try {
18
			final CloudSolrServer client = new CloudSolrServer(baseURL);
19

  
20 19
			log.info(String.format("Initializing solr server (%s) ...", baseURL));
21 20

  
21
			final ZkServers zk = ZkServers.newInstance(baseURL);
22
			final CloudSolrClient client = new CloudSolrClient.Builder(zk.getHosts(), zk.getChroot())
23
					.withParallelUpdates(parallelUpdates)
24
					.build();
25

  
22 26
			client.connect();
23

  
24
			client.setParallelUpdates(parallelUpdates);
25 27
			client.setDefaultCollection(collection);
26 28

  
27 29
			final SolrPingResponse rsp = client.ping();
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/index/CloudIndexClient.java
1 1
package eu.dnetlib.data.index;
2 2

  
3
import java.io.Closeable;
4
import java.io.IOException;
5
import java.text.SimpleDateFormat;
6
import java.util.Date;
7
import java.util.List;
8

  
9 3
import eu.dnetlib.functionality.index.solr.feed.StreamingInputDocumentFactory;
10 4
import eu.dnetlib.miscutils.datetime.HumanTime;
11 5
import eu.dnetlib.miscutils.functional.UnaryFunction;
......
13 7
import org.apache.commons.logging.LogFactory;
14 8
import org.apache.solr.client.solrj.SolrQuery;
15 9
import org.apache.solr.client.solrj.SolrServerException;
16
import org.apache.solr.client.solrj.impl.CloudSolrServer;
10
import org.apache.solr.client.solrj.impl.CloudSolrClient;
17 11
import org.apache.solr.client.solrj.response.QueryResponse;
18 12
import org.apache.solr.client.solrj.response.UpdateResponse;
19 13
import org.apache.solr.common.SolrInputDocument;
20 14

  
15
import java.io.Closeable;
16
import java.io.IOException;
17
import java.text.SimpleDateFormat;
18
import java.util.Date;
19
import java.util.List;
20

  
21 21
/**
22 22
 * Created by michele on 11/11/15.
23 23
 */
......
26 26
	private static final Log log = LogFactory.getLog(CloudIndexClient.class);
27 27
	private static final String INDEX_RECORD_RESULT_FIELD = "dnetResult";
28 28

  
29
	private final CloudSolrServer solrClient;
29
	private final CloudSolrClient solrClient;
30 30

  
31
	protected CloudIndexClient(final CloudSolrServer solrServer) {
31
	protected CloudIndexClient(final CloudSolrClient solrServer) {
32 32
		this.solrClient = solrServer;
33 33
	}
34 34

  
......
153 153

  
154 154
	public void close() throws IOException {
155 155
		if (solrClient != null) {
156
			solrClient.shutdown();
156
			solrClient.close();
157 157
		}
158 158
	}
159 159

  
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/bulktag/CommunityConfigurationFactory.java
99 99
    }
100 100

  
101 101
    private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) {
102
        final Node oacommunitynode = node.selectSingleNode("./oacommunity");
103
        String oacommunity = null;
104
        if (oacommunitynode != null){
105
            String tmp = oacommunitynode.getText();
106
            if(StringUtils.isNotBlank(tmp))
107
                oacommunity = tmp;
108
        }
102 109

  
110

  
103 111
        final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity");
104 112
        final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>();
105 113
        for(Node n : list){
......
109 117

  
110 118
            zenodoCommunityList.add(zc);
111 119
        }
120
        if(oacommunity != null){
121
            ZenodoCommunity zc = new ZenodoCommunity();
122
            zc.setZenodoCommunityId(oacommunity);
123
            zenodoCommunityList.add(zc);
124
        }
112 125
        log.info("size of the zenodo community list " + zenodoCommunityList.size());
113 126
        return zenodoCommunityList;
114 127
    }
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/bulktag/CommunityConfiguration.java
66 66
            for(ZenodoCommunity zc : c.getZenodoCommunities()){
67 67
                add(zc.getZenodoCommunityId(),new Pair<>(id,zc.getSelCriteria()),zenodocommunityMap);
68 68
            }
69

  
69 70
        }
70 71
    }
71 72

  
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/transform/xml/AbstractDNetXsltFunctions.java
1463 1463

  
1464 1464

  
1465 1465

  
1466
	private static final Set<String> invalidPidTypes = Sets.newHashSet("distributionlocation", "url", " ", "local accession id", "local", "landingpage");
1467

  
1466
	private static final Set<String> invalidPidTypes =
1467
			Sets.newHashSet("distributionlocation", "url", " ", "local accession id", "local", "local id", "a local accession number", "landingpage", "publisherid", "report number", "uri", "contract", "doc",
1468
					"issn", "issn (online)", "issn (print)", "eissn", "citation", "unknown", "other", "oai", "case number", "section", "series", "report",
1469
					"other numbers", "site id", "fulltext", "internal", "report numbers", "product number", "depositor id", "isbn13", "doe contract number", "revision",
1470
					"issue", "pages", "volume", "another identifier for this resource", "csvdownload", "hepdatarecord", "hepdatarecordalt", "rootdownload", "yamldownload", "yodadownload",
1471
					"md5", "firstid", "uuid", "poster number", "compactidentifiers", "sample_id", "source identifier", "lod-catalog", "internal id", "funder", "department",
1472
					"odin doi viewer", "odin matdb viewer", "bitstream", "dipartimento", "technical note (national research council of canada. division of building research) series",
1473
					"internal report (national research council canada. division of building research) series", "dk.dda.ddieditor.version", "extended kim id", "kim id", "ccin",
1474
					"dk.dda.study.annonymizeddata", "e-issn", "call number", "sequenza");
1468 1475
	protected static List<StructuredProperty> parsePids(final NodeList nodelist) {
1469 1476

  
1470 1477
		final List<StructuredProperty> pids = Lists.newArrayList();
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/transform/xml/OdfToHbaseXsltFunctions.java
17 17
import eu.dnetlib.data.proto.ResultProtos.Result;
18 18
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
19 19
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
20
import eu.dnetlib.data.proto.ResultProtos.Result.Metadata.Builder;
20 21
import eu.dnetlib.data.proto.TypeProtos.Type;
21 22
import org.apache.commons.lang3.StringUtils;
22 23
import org.w3c.dom.Element;
......
143 144

  
144 145
					final NodeList creatorNames = creator.getElementsByTagName("creatorName");
145 146
					if (creatorNames.getLength() > 0) {
146
						final Element creatorName = (Element) creatorNames.item(0);
147

  
148
						final Author.Builder author = Author.newBuilder();
149
						author.setRank(i+1);
150
						final String fullname = StringUtils.trim(creatorName.getTextContent());
151

  
152
						author.setFullname(fullname);
153

  
154
						final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
155
						if (p.isAccurate()) {
156
							author.setName(p.getNormalisedFirstName());
157
							author.setSurname(p.getNormalisedSurname());
147
						createAuthor(metadataProto, i, creator, creatorNames);
148
					} else{
149
						//handle authors with namespaceprefix
150
						final NodeList creatorNamesNs = creator.getElementsByTagNameNS("http://datacite.org/schema/kernel-4", "creatorName");
151
						if (creatorNamesNs.getLength() > 0) {
152
							createAuthor(metadataProto, i, creator, creatorNamesNs);
158 153
						}
159
						final NodeList nameIdentifiers = creator.getElementsByTagName("nameIdentifier");
160
						if (nameIdentifiers.getLength() > 0) {
161
							final Element nameIdentifier = (Element) nameIdentifiers.item(0);
162
							final String nameIdentifierScheme = nameIdentifier.getAttribute("nameIdentifierScheme");
163
							final String id = StringUtils.trim(nameIdentifier.getTextContent());
164
							if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(nameIdentifierScheme)) {
165
								author.addPid(getKV(nameIdentifierScheme, id));
166
							}
167
						}
168 154

  
169
						addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("author"), author);
170 155
					}
171 156
				}
172 157
			}
......
349 334
			addField(instance, Instance.getDescriptor().findFieldByName("instancetype"),
350 335
					setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(cobjcategoryCode)));
351 336

  
352
			addField(instance, Instance.getDescriptor().findFieldByName("url"), instanceUri);
353 337
			if (StringUtils.isNotBlank(landingPage)) {
354 338
				addField(instance, Instance.getDescriptor().findFieldByName("url"), landingPage);
355 339
			}
340
			//sometimes the instanceUri is blank...
341
			if (StringUtils.isNotBlank(instanceUri)) {
342
				addField(instance, Instance.getDescriptor().findFieldByName("url"), instanceUri);
343
			}
344

  
356 345
			addField(instance, Instance.getDescriptor().findFieldByName("distributionlocation"), getFirstItem(distributionlocation));
357 346

  
358 347
			addField(instance, Instance.getDescriptor().findFieldByName("collectedfrom"), collectedFroms);
......
386 375

  
387 376
	}
388 377

  
378
	private static void createAuthor(final Builder metadataProto, final int i, final Element creator, final NodeList creatorNames) {
379
		final Element creatorName = (Element) creatorNames.item(0);
380

  
381
		final Author.Builder author = Author.newBuilder();
382
		author.setRank(i+1);
383
		final String fullname = StringUtils.trim(creatorName.getTextContent());
384

  
385
		author.setFullname(fullname);
386

  
387
		final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false);
388
		if (p.isAccurate()) {
389
			author.setName(p.getNormalisedFirstName());
390
			author.setSurname(p.getNormalisedSurname());
391
		}
392
		final NodeList nameIdentifiers = creator.getElementsByTagName("nameIdentifier");
393
		if (nameIdentifiers.getLength() > 0) {
394
			final Element nameIdentifier = (Element) nameIdentifiers.item(0);
395
			final String nameIdentifierScheme = nameIdentifier.getAttribute("nameIdentifierScheme");
396
			final String id = StringUtils.trim(nameIdentifier.getTextContent());
397
			if (StringUtils.isNotBlank(id) && StringUtils.isNotBlank(nameIdentifierScheme)) {
398
				author.addPid(getKV(nameIdentifierScheme, id));
399
			}
400
		}
401

  
402
		addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("author"), author);
403
	}
404

  
389 405
	private static String getResultType(final NodeList cobjcategoryNode) {
390 406

  
391 407
		final ValueMap values = ValueMap.parseNodeList(cobjcategoryNode);
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/transform/AbstractProtoMapper.java
50 50
	 *
51 51
	 * @param proto
52 52
	 *            the proto
53
	 * @param fieldDef
54
	 *            the field definition descriptor
55 53
	 * @param path
56
	 * 			  the path
54
	 *            the path
57 55
	 * @return the list
58 56
	 */
59 57
	protected List<Object> processPath(final GeneratedMessage proto, final FieldDef fieldDef, final String path) {
......
65 63
	 *
66 64
	 * @param proto
67 65
	 *            the proto
68
	 * @param path
69
	 *            the path
70
	 * @param type
71
	 * 			  the type
72
	 * @return the list
73
	 */
74
	protected List<Object> processPath(final GeneratedMessage proto, final String path, final Type type) {
75
		final FieldDef fieldDef = new FieldDef();
76
		fieldDef.setType(type);
77
		return processPath(proto, fieldDef, Lists.newLinkedList(Splitter.on(PATH_SEPARATOR).trimResults().split(path)));
78
	}
79

  
80
	/**
81
	 * Process path.
82
	 *
83
	 * @param proto
84
	 *            the proto
85 66
	 * @param pathElements
86 67
	 *            the list
87 68
	 * @return the list
modules/dnet-openaireplus-mapping-utils/trunk/pom.xml
10 10
	<groupId>eu.dnetlib</groupId>
11 11
	<artifactId>dnet-openaireplus-mapping-utils</artifactId>
12 12
	<packaging>jar</packaging>
13
	<version>6.2.29-SNAPSHOT</version>
13
	<version>6.2.24-SNAPSHOT</version>
14 14
	<scm>
15 15
		<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-mapping-utils/trunk</developerConnection>
16 16
	</scm>
......
106 106
		<dependency>
107 107
			<groupId>eu.dnetlib</groupId>
108 108
			<artifactId>dnet-index-solr-common</artifactId>
109
			<version>[1.0.0,1.3.3]</version>
110
			<!-- uncomment to include solrj 7.2.0 -->
111
			<!--<version>[1.0.0,2.0.0]</version>-->
109
			<version>[2.3.3-solr75]</version>
112 110
		</dependency>	
113 111
		<dependency>
114 112
			<groupId>com.googlecode.protobuf-java-format</groupId>

Also available in: Unified diff