Revision 48991
Added by Claudio Atzori over 6 years ago
modules/dnet-openaireplus-mapping-utils/trunk/src/test/java/eu/dnetlib/pace/model/ProtoDocumentBuilderTest.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.pace.model; |
2 | 2 |
|
3 |
import static org.junit.Assert.assertFalse; |
|
4 |
import static org.junit.Assert.assertTrue; |
|
5 |
|
|
6 |
import org.junit.Test; |
|
7 |
|
|
8 | 3 |
import com.google.common.collect.Iterables; |
9 | 4 |
import com.google.common.collect.Sets; |
10 | 5 |
import com.google.common.collect.Sets.SetView; |
11 |
|
|
12 | 6 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
13 | 7 |
import eu.dnetlib.pace.config.Config; |
8 |
import org.junit.Test; |
|
14 | 9 |
|
10 |
import static org.junit.Assert.assertFalse; |
|
11 |
import static org.junit.Assert.assertTrue; |
|
12 |
|
|
15 | 13 |
public class ProtoDocumentBuilderTest extends AbstractProtoPaceTest { |
16 | 14 |
|
17 | 15 |
@Test |
... | ... | |
41 | 39 |
System.out.println("decoded:\n" + decoded); |
42 | 40 |
} |
43 | 41 |
|
44 |
@Test |
|
45 |
public void test_serialise2() { |
|
46 |
|
|
47 |
final String id = "12345"; |
|
48 |
final String path = "/eu/dnetlib/pace/model/gt.author.manghi1.json"; |
|
49 |
|
|
50 |
final Config config = getPersonConf(); |
|
51 |
|
|
52 |
final MapDocument document = ProtoDocumentBuilder.newInstance(id, getPersonGT(path).getEntity(), config.model()); |
|
53 |
|
|
54 |
assertFalse(document.fieldNames().isEmpty()); |
|
55 |
assertFalse(Iterables.isEmpty(document.fields())); |
|
56 |
|
|
57 |
System.out.println("original:\n" + document); |
|
58 |
|
|
59 |
final String stringDoc = MapDocumentSerializer.toString(document); |
|
60 |
|
|
61 |
System.out.println("srialization:\n" + stringDoc); |
|
62 |
|
|
63 |
final MapDocument decoded = MapDocumentSerializer.decode(stringDoc.getBytes()); |
|
64 |
|
|
65 |
final SetView<String> diff = Sets.difference(document.fieldNames(), decoded.fieldNames()); |
|
66 |
|
|
67 |
assertTrue(diff.isEmpty()); |
|
68 |
|
|
69 |
System.out.println("decoded:\n" + decoded); |
|
70 |
} |
|
71 |
|
|
72 | 42 |
} |
modules/dnet-openaireplus-mapping-utils/trunk/src/test/java/eu/dnetlib/pace/distance/DetectorTest.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.pace.distance; |
2 | 2 |
|
3 |
import static org.junit.Assert.assertTrue; |
|
4 |
|
|
5 |
import java.util.ArrayList; |
|
6 | 3 |
import java.util.List; |
7 | 4 |
|
8 |
import eu.dnetlib.pace.distance.eval.ScoreResult; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 | 5 |
import com.google.common.collect.Lists; |
12 |
|
|
13 | 6 |
import eu.dnetlib.pace.AbstractProtoPaceTest; |
14 | 7 |
import eu.dnetlib.pace.config.Config; |
8 |
import eu.dnetlib.pace.distance.eval.ScoreResult; |
|
15 | 9 |
import eu.dnetlib.pace.model.MapDocument; |
10 |
import org.junit.Test; |
|
16 | 11 |
|
12 |
import static org.junit.Assert.assertTrue; |
|
13 |
|
|
17 | 14 |
public class DetectorTest extends AbstractProtoPaceTest { |
18 | 15 |
|
19 | 16 |
@Test |
... | ... | |
409 | 406 |
assertTrue(d == 0.0); |
410 | 407 |
} |
411 | 408 |
|
412 |
@Test |
|
413 |
public void testDistancePersonConf1() { |
|
414 |
|
|
415 |
final Config config = getPersonConf(); |
|
416 |
|
|
417 |
final MapDocument p1 = person(config, "p1_id", getPersonGT("/eu/dnetlib/pace/model/gt.author.manghi1.fo.json")); |
|
418 |
final MapDocument p2 = person(config, "p2_id", getPersonGT("/eu/dnetlib/pace/model/gt.author.manghi2.fo.json")); |
|
419 |
|
|
420 |
final ScoreResult sr = new PaceDocumentDistance().between(p1, p2, config); |
|
421 |
final double d = sr.getScore(); |
|
422 |
System.out.println(String.format(" d ---> %s", d)); |
|
423 |
|
|
424 |
// assertTrue(d.getScore() == 0.0); |
|
425 |
} |
|
426 |
|
|
427 | 409 |
} |
modules/dnet-openaireplus-mapping-utils/trunk/src/test/java/eu/dnetlib/data/mapreduce/util/OafRelDecoderTest.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.mapreduce.util; |
2 | 2 |
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
3 |
import eu.dnetlib.data.proto.DedupProtos.Dedup.RelName; |
|
4 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
5 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
6 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
6 | 7 |
import org.junit.Before; |
7 | 8 |
import org.junit.Test; |
8 | 9 |
|
9 |
import com.google.protobuf.Descriptors.FieldDescriptor; |
|
10 |
import static org.junit.Assert.assertEquals; |
|
11 |
import static org.junit.Assert.assertNotNull; |
|
10 | 12 |
|
11 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
12 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship; |
|
13 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
14 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
15 |
|
|
16 | 13 |
public class OafRelDecoderTest { |
17 | 14 |
|
18 | 15 |
private OafRel oafRel; |
19 | 16 |
|
20 | 17 |
@Before |
21 | 18 |
public void setUp() { |
22 |
oafRel = OafTest.getPersonResult("ID_1", "ID_2", "1", "isAuthor");
|
|
19 |
oafRel = OafTest.getDedupRel("ID_1", "ID_2", RelType.resultResult, "isMergedIn");
|
|
23 | 20 |
} |
24 | 21 |
|
25 | 22 |
@Test |
... | ... | |
28 | 25 |
OafRelDecoder d1 = OafRelDecoder.decode(oafRel); |
29 | 26 |
|
30 | 27 |
assertNotNull(d1); |
31 |
assertEquals("isAuthor", d1.getRelClass());
|
|
28 |
assertEquals("isMergedIn", d1.getRelClass());
|
|
32 | 29 |
|
33 |
OafRelDecoder d2 = OafRelDecoder.decode(d1.setClassId("hasAuthor").build());
|
|
30 |
OafRelDecoder d2 = OafRelDecoder.decode(d1.setClassId("isMergedIn").build());
|
|
34 | 31 |
|
35 |
assertEquals("hasAuthor", d2.getRelClass());
|
|
36 |
assertEquals("hasAuthor", d2.getRelMetadata().getSemantics().getClassid());
|
|
37 |
assertEquals("hasAuthor", d2.getRelMetadata().getSemantics().getClassname());
|
|
32 |
assertEquals("isMergedIn", d2.getRelClass());
|
|
33 |
assertEquals("isMergedIn", d2.getRelMetadata().getSemantics().getClassid());
|
|
34 |
assertEquals("isMergedIn", d2.getRelMetadata().getSemantics().getClassname());
|
|
38 | 35 |
|
39 |
FieldDescriptor fd = Authorship.getDescriptor().findFieldByName("ranking"); |
|
40 |
assertEquals(d1.getSubRel().getField(fd), d2.getSubRel().getField(fd)); |
|
41 | 36 |
} |
42 | 37 |
|
43 | 38 |
@Test |
44 | 39 |
public void testGetCF() { |
45 |
assertEquals("personResult_authorship_isAuthorOf", OafRelDecoder.getCFQ(RelType.personResult, SubRelType.authorship, Authorship.RelName.isAuthorOf));
|
|
46 |
assertEquals("personResult_authorship_isAuthorOf", OafRelDecoder.getCFQ(RelType.personResult, SubRelType.authorship, "isAuthorOf"));
|
|
40 |
assertEquals("resultResult_dedup_isMergedIn", OafRelDecoder.getCFQ(RelType.resultResult, SubRelType.dedup, RelName.isMergedIn));
|
|
41 |
assertEquals("resultResult_dedup_isMergedIn", OafRelDecoder.getCFQ(RelType.resultResult, SubRelType.dedup, "isMergedIn"));
|
|
47 | 42 |
} |
48 | 43 |
|
49 | 44 |
} |
modules/dnet-openaireplus-mapping-utils/trunk/src/test/resources/eu/dnetlib/data/transform/fields.xml | ||
---|---|---|
11 | 11 |
<FIELD name="organizationlegalshortname" path="entity/organization/metadata/legalshortname/value" type="ngramtext" stat="false" indexable="true" result="false"/> |
12 | 12 |
<FIELD name="organizationwebsiteurl" path="entity/organization/metadata/websiteurl/value" stat="false" indexable="true" result="false"/> |
13 | 13 |
|
14 |
<!-- PERSON FIELDS --> |
|
15 |
<FIELD name="personfirstname" path="entity/person/metadata/firstname/value" stat="false" indexable="true" result="false"/> |
|
16 |
<FIELD name="personsecondnames" path="entity/person/metadata/secondnames/value" stat="false" indexable="true" result="false"/> |
|
17 |
<FIELD name="personfullname" path="entity/person/metadata/fullname/value" stat="false" indexable="true" result="false"/> |
|
18 |
|
|
19 | 14 |
<!-- RESULT FIELDS --> |
20 | 15 |
<FIELD name="resulttitle" path="entity/result/metadata/title/value | entity/children/result/metadata/title/value" stat="false" result="false" indexable="true"/> |
21 | 16 |
<FIELD name="resultdescription" path="entity/result/metadata/description/value" result="false" indexable="true" stat="false"/> |
22 | 17 |
<FIELD name="resultauthor" path="entity/result/author/metadata/fullname/value | entity/children/result/author/metadata/fullname/value" result="false" indexable="true" stat="false"/> |
23 | 18 |
|
24 |
</FIELDS> |
|
19 |
</FIELDS> |
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/openaire/hadoop/utils/HBaseTableUtils.java | ||
---|---|---|
9 | 9 |
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision; |
10 | 10 |
import eu.dnetlib.data.proto.DedupProtos.Dedup; |
11 | 11 |
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity; |
12 |
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson.CoAuthorship; |
|
13 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship; |
|
14 | 12 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation; |
15 |
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson.ContactPerson; |
|
16 | 13 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
17 | 14 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
18 | 15 |
import eu.dnetlib.data.proto.ResultOrganizationProtos.ResultOrganization.Affiliation; |
... | ... | |
40 | 37 |
cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn); |
41 | 38 |
cfs.add(RelType.organizationOrganization + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo); |
42 | 39 |
|
43 |
cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges); |
|
44 |
cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn); |
|
45 |
cfs.add(RelType.personPerson + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo); |
|
46 |
|
|
47 | 40 |
cfs.add(RelType.resultResult + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.merges); |
48 | 41 |
cfs.add(RelType.resultResult + SEPARATOR + SubRelType.dedup + SEPARATOR + Dedup.RelName.isMergedIn); |
49 | 42 |
cfs.add(RelType.resultResult + SEPARATOR + SubRelType.dedupSimilarity + SEPARATOR + DedupSimilarity.RelName.isSimilarTo); |
... | ... | |
66 | 59 |
cfs.add(RelType.datasourceOrganization + SEPARATOR + SubRelType.provision + SEPARATOR + Provision.RelName.isProvidedBy); |
67 | 60 |
cfs.add(RelType.datasourceOrganization + SEPARATOR + SubRelType.provision + SEPARATOR + Provision.RelName.provides); |
68 | 61 |
|
69 |
cfs.add(RelType.personPerson + SEPARATOR + SubRelType.coauthorship + SEPARATOR + CoAuthorship.RelName.isCoauthorOf); |
|
70 |
|
|
71 |
cfs.add(RelType.personResult + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.isAuthorOf); |
|
72 |
cfs.add(RelType.personResult + SEPARATOR + SubRelType.authorship + SEPARATOR + Authorship.RelName.hasAuthor); |
|
73 |
|
|
74 | 62 |
cfs.add(RelType.projectOrganization + SEPARATOR + SubRelType.participation + SEPARATOR + Participation.RelName.hasParticipant); |
75 | 63 |
cfs.add(RelType.projectOrganization + SEPARATOR + SubRelType.participation + SEPARATOR + Participation.RelName.isParticipant); |
76 | 64 |
|
77 |
cfs.add(RelType.projectPerson + SEPARATOR + SubRelType.contactPerson + SEPARATOR + ContactPerson.RelName.isContact); |
|
78 |
cfs.add(RelType.projectPerson + SEPARATOR + SubRelType.contactPerson + SEPARATOR + ContactPerson.RelName.hasContact); |
|
79 |
|
|
80 | 65 |
cfs.add(RelType.resultProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.isProducedBy); |
81 | 66 |
cfs.add(RelType.resultProject + SEPARATOR + SubRelType.outcome + SEPARATOR + Outcome.RelName.produces); |
82 | 67 |
|
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/pace/model/gt/GTAuthorMapper.java | ||
---|---|---|
33 | 33 |
final OafEntity.Builder entity = OafEntity.newBuilder(); |
34 | 34 |
entity.setType(Type.person); |
35 | 35 |
entity.setId(gta.getId()); |
36 |
entity.setPerson(getPerson(gta)); |
|
36 |
//entity.setPerson(getPerson(gta));
|
|
37 | 37 |
return entity.build(); |
38 | 38 |
} |
39 | 39 |
|
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/OafTest.java | ||
---|---|---|
14 | 14 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
15 | 15 |
import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization; |
16 | 16 |
import eu.dnetlib.data.proto.OrganizationProtos.Organization; |
17 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
18 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult; |
|
19 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship; |
|
20 | 17 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization; |
21 | 18 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation; |
22 |
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson; |
|
23 |
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson.ContactPerson; |
|
24 | 19 |
import eu.dnetlib.data.proto.ProjectProtos.Project; |
25 | 20 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
26 | 21 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
... | ... | |
131 | 126 |
|
132 | 127 |
public static OafRel getDedupRel(final String source, final String target, final RelType relType, final String relClass) { |
133 | 128 |
return OafRel.newBuilder().setSource(source).setTarget(target).setRelType(relType).setSubRelType(SubRelType.dedup).setRelClass(relClass) |
134 |
.setChild(false).setCachedTarget(getResult(target)).build(); |
|
135 |
} |
|
136 |
|
|
137 |
public static OafRel getPersonResult(final String source, final String target, final String ranking, final String relClass) { |
|
138 |
return OafRel |
|
139 |
.newBuilder() |
|
140 |
.setSource(source) |
|
141 |
.setTarget(target) |
|
142 |
.setRelType(RelType.personResult) |
|
143 |
.setSubRelType(SubRelType.authorship) |
|
144 |
.setRelClass(relClass) |
|
145 |
.setChild(false) |
|
146 |
.setPersonResult( |
|
147 |
PersonResult.newBuilder().setAuthorship( |
|
148 |
Authorship.newBuilder().setRanking(ranking).setRelMetadata(relMetadata(relClass, "dnet:personResult_relations")))) |
|
149 |
.setCachedTarget(getPerson()).build(); |
|
150 |
} |
|
151 |
|
|
152 |
public static OafEntity getPerson() { |
|
153 |
return OafEntity |
|
154 |
.newBuilder() |
|
155 |
.setType(Type.person) |
|
156 |
.setId("WOS:000277866500014_A._Shirazi-Adl") |
|
157 |
.setPerson( |
|
158 |
Person.newBuilder().setMetadata( |
|
159 |
Person.Metadata.newBuilder().addSecondnames(sf("Shirazi-Adl")).setFullname(sf("A. Shirazi-Adl")) |
|
160 |
.setEmail(sf("name.surname@gmail.com")).setPhone(sf("12345")).setNationality(getQualifier("EN", "dnet:countries")))) |
|
129 |
.setChild(false).setCachedTarget(getResult(target)) |
|
130 |
.setResultResult(ResultResult.newBuilder().setDedup(Dedup.newBuilder().setRelMetadata(RelMetadata.getDefaultInstance()))) |
|
161 | 131 |
.build(); |
162 | 132 |
} |
163 | 133 |
|
... | ... | |
238 | 208 |
return oafRel.build(); |
239 | 209 |
} |
240 | 210 |
|
241 |
public static OafRel getProjectPerson(final String sourceId, final String targetId, final String relClass) { |
|
242 |
return OafRel |
|
243 |
.newBuilder() |
|
244 |
.setSource(sourceId) |
|
245 |
.setTarget(targetId) |
|
246 |
.setRelType(RelType.projectPerson) |
|
247 |
.setSubRelType(SubRelType.contactPerson) |
|
248 |
.setRelClass(relClass) |
|
249 |
.setChild(true) |
|
250 |
.setProjectPerson( |
|
251 |
ProjectPerson.newBuilder().setContactPerson( |
|
252 |
ContactPerson.newBuilder().setRelMetadata(relMetadata(relClass, "dnet:project_result_relations")))) |
|
253 |
.setCachedTarget(getPerson()).build(); |
|
254 |
} |
|
255 |
|
|
256 | 211 |
public static OafRel getSimilarityRel(final String sourceId, final String targetId, final OafEntity result, final String relClass) { |
257 | 212 |
return OafRel |
258 | 213 |
.newBuilder() |
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/OafRelDecoder.java | ||
---|---|---|
115 | 115 |
return sourceType.equals(Type.datasource) ? Type.organization : Type.datasource; |
116 | 116 |
case organizationOrganization: |
117 | 117 |
return Type.organization; |
118 |
case personPerson: |
|
119 |
return Type.person; |
|
120 |
case personResult: |
|
121 |
return sourceType.equals(Type.person) ? Type.result : Type.person; |
|
122 | 118 |
case projectOrganization: |
123 | 119 |
return sourceType.equals(Type.project) ? Type.organization : Type.project; |
124 |
case projectPerson: |
|
125 |
return sourceType.equals(Type.project) ? Type.person : Type.project; |
|
126 | 120 |
case resultOrganization: |
127 | 121 |
return sourceType.equals(Type.result) ? Type.organization : Type.result; |
128 | 122 |
case resultProject: |
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/index/CloudIndexClientFactory.java | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import org.apache.commons.logging.Log; |
4 | 4 |
import org.apache.commons.logging.LogFactory; |
5 |
import org.apache.solr.client.solrj.impl.CloudSolrServer;
|
|
5 |
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
|
6 | 6 |
import org.apache.solr.client.solrj.response.SolrPingResponse; |
7 | 7 |
|
8 | 8 |
/** |
... | ... | |
14 | 14 |
|
15 | 15 |
public static CloudIndexClient newIndexClient(final String baseURL, final String collection, final boolean parallelUpdates) |
16 | 16 |
throws CloudIndexClientException { |
17 |
CloudSolrServer solrServer = null; |
|
18 |
try { |
|
19 | 17 |
|
18 |
try(final CloudSolrClient solrServer = new CloudSolrClient.Builder().withZkHost(baseURL).build()) { |
|
19 |
|
|
20 | 20 |
log.info(String.format("Initializing solr server (%s) ...", baseURL)); |
21 |
solrServer = new CloudSolrServer(baseURL); |
|
22 | 21 |
|
23 | 22 |
solrServer.connect(); |
24 | 23 |
|
... | ... | |
30 | 29 |
log.error("Invalid connection to solr Server (status = 0)"); |
31 | 30 |
throw new CloudIndexClientException("Invalid connection to solr Server (status = 0)"); |
32 | 31 |
} |
33 |
|
|
34 | 32 |
return new CloudIndexClient(solrServer); |
35 | 33 |
} catch (Throwable e) { |
36 |
if (solrServer != null) { |
|
37 |
solrServer.shutdown(); |
|
38 |
} |
|
39 | 34 |
log.error("The initialization of indexClient is FAILED", e); |
40 | 35 |
throw new CloudIndexClientException("The initialization of indexClient is FAILED", e); |
41 | 36 |
} |
42 | 37 |
} |
38 |
|
|
43 | 39 |
} |
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/index/CloudIndexClient.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.index; |
2 | 2 |
|
3 |
import java.io.IOException; |
|
3 | 4 |
import java.text.SimpleDateFormat; |
4 | 5 |
import java.util.Date; |
5 | 6 |
import java.util.List; |
... | ... | |
7 | 8 |
import org.apache.commons.logging.Log; |
8 | 9 |
import org.apache.commons.logging.LogFactory; |
9 | 10 |
import org.apache.solr.client.solrj.SolrQuery; |
10 |
import org.apache.solr.client.solrj.impl.CloudSolrServer;
|
|
11 |
import org.apache.solr.client.solrj.impl.CloudSolrClient;
|
|
11 | 12 |
import org.apache.solr.client.solrj.response.QueryResponse; |
12 | 13 |
import org.apache.solr.client.solrj.response.UpdateResponse; |
13 | 14 |
import org.apache.solr.common.SolrInputDocument; |
... | ... | |
24 | 25 |
private static final Log log = LogFactory.getLog(CloudIndexClient.class); |
25 | 26 |
private static final String INDEX_RECORD_RESULT_FIELD = "dnetResult"; |
26 | 27 |
|
27 |
private final CloudSolrServer solrServer;
|
|
28 |
private final CloudSolrClient solrServer;
|
|
28 | 29 |
|
29 |
protected CloudIndexClient(final CloudSolrServer solrServer) {
|
|
30 |
protected CloudIndexClient(final CloudSolrClient solrServer) {
|
|
30 | 31 |
this.solrServer = solrServer; |
31 | 32 |
} |
32 | 33 |
|
... | ... | |
149 | 150 |
} |
150 | 151 |
} |
151 | 152 |
|
152 |
public void close() { |
|
153 |
public void close() throws IOException {
|
|
153 | 154 |
if (solrServer != null) { |
154 |
solrServer.shutdown();
|
|
155 |
solrServer.close();
|
|
155 | 156 |
} |
156 | 157 |
} |
157 | 158 |
|
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/transform/xml/AbstractDNetXsltFunctions.java | ||
---|---|---|
31 | 31 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
32 | 32 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
33 | 33 |
import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization; |
34 |
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson; |
|
35 |
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson.CoAuthorship; |
|
36 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult; |
|
37 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship; |
|
38 | 34 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization; |
39 | 35 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation; |
40 |
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson; |
|
41 |
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson.ContactPerson; |
|
42 | 36 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
43 | 37 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
44 | 38 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
... | ... | |
1036 | 1030 |
|
1037 | 1031 |
case datasourceOrganization: |
1038 | 1032 |
return rel.setDatasourceOrganization(DatasourceOrganization.newBuilder().setProvision((Provision.Builder) subRel)); |
1039 |
case personResult: |
|
1040 |
final PersonResult.Builder pr = PersonResult.newBuilder(); |
|
1041 |
switch (subRelType) { |
|
1042 |
case authorship: |
|
1043 |
return rel.setPersonResult(pr.setAuthorship((Authorship.Builder) subRel)); |
|
1044 |
default: |
|
1045 |
throw new IllegalArgumentException("invalid subRelType for result_person relations: " + subRelType.toString()); |
|
1046 |
} |
|
1047 | 1033 |
case projectOrganization: |
1048 | 1034 |
return rel.setProjectOrganization(ProjectOrganization.newBuilder().setParticipation((Participation.Builder) subRel)); |
1049 |
case projectPerson: |
|
1050 |
return rel.setProjectPerson(ProjectPerson.newBuilder().setContactPerson(((ContactPerson.Builder) subRel))); |
|
1051 | 1035 |
case resultOrganization: |
1052 | 1036 |
return rel.setResultOrganization(ResultOrganization.newBuilder().setAffiliation((Affiliation.Builder) subRel)); |
1053 | 1037 |
case resultProject: |
... | ... | |
1071 | 1055 |
default: |
1072 | 1056 |
throw new IllegalArgumentException("invalid subRelType for result_result relations: " + subRelType.toString()); |
1073 | 1057 |
} |
1074 |
case personPerson: |
|
1075 |
final PersonPerson.Builder pp = PersonPerson.newBuilder(); |
|
1076 |
switch (subRelType) { |
|
1077 |
case dedup: |
|
1078 |
return rel.setPersonPerson(pp.setDedup((Dedup.Builder) subRel)); |
|
1079 |
case dedupSimilarity: |
|
1080 |
return rel.setPersonPerson(pp.setDedupSimilarity((DedupSimilarity.Builder) subRel)); |
|
1081 |
default: |
|
1082 |
throw new IllegalArgumentException("invalid subRelType for person_person relations: " + subRelType.toString()); |
|
1083 |
} |
|
1084 | 1058 |
case organizationOrganization: |
1085 | 1059 |
final OrganizationOrganization.Builder oo = OrganizationOrganization.newBuilder(); |
1086 | 1060 |
switch (subRelType) { |
... | ... | |
1101 | 1075 |
|
1102 | 1076 |
case provision: |
1103 | 1077 |
return Provision.newBuilder().setRelMetadata(metadata); |
1104 |
case coauthorship: |
|
1105 |
return CoAuthorship.newBuilder().setRelMetadata(metadata); |
|
1106 |
case authorship: |
|
1107 |
return Authorship.newBuilder().setRelMetadata(metadata).setRanking("" + params.get("rank")); |
|
1108 |
case participation: |
|
1109 |
return Participation.newBuilder().setRelMetadata(metadata); |
|
1110 |
case contactPerson: |
|
1111 |
return ContactPerson.newBuilder().setRelMetadata(metadata); |
|
1112 | 1078 |
case outcome: |
1113 | 1079 |
return Outcome.newBuilder().setRelMetadata(metadata); |
1114 | 1080 |
case similarity: |
... | ... | |
1134 | 1100 |
|
1135 | 1101 |
case datasourceOrganization: |
1136 | 1102 |
return "dnet:datasource_organization_relations"; |
1137 |
case personResult: |
|
1138 |
return "dnet:person_result_relations"; |
|
1139 | 1103 |
case projectOrganization: |
1140 | 1104 |
return "dnet:project_organization_relations"; |
1141 |
case projectPerson: |
|
1142 |
return "dnet:project_person_relations"; |
|
1143 | 1105 |
case resultOrganization: |
1144 | 1106 |
return "dnet:result_organization_relations"; |
1145 | 1107 |
case resultProject: |
1146 | 1108 |
return "dnet:result_project_relations"; |
1147 | 1109 |
case resultResult: |
1148 | 1110 |
return "dnet:result_result_relations"; |
1149 |
case personPerson: |
|
1150 |
return "dnet:person_person_relations"; |
|
1151 | 1111 |
case organizationOrganization: |
1152 | 1112 |
return "dnet:organization_organization_relations"; |
1153 | 1113 |
} |
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/transform/xml/DbmfToHbaseXsltFunctions.java | ||
---|---|---|
16 | 16 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
17 | 17 |
import eu.dnetlib.data.proto.OrganizationProtos.Organization; |
18 | 18 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
19 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult; |
|
20 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship; |
|
21 | 19 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization; |
22 | 20 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation; |
23 |
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson; |
|
24 |
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson.ContactPerson; |
|
25 | 21 |
import eu.dnetlib.data.proto.ProjectProtos.Project; |
26 | 22 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
27 | 23 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
... | ... | |
105 | 101 |
DatasourceOrganization.Builder dorg = DatasourceOrganization.newBuilder().setProvision(provision); |
106 | 102 |
|
107 | 103 |
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.provision, relClass, collectedFromId, collectedFromName, false, dorg, provision); |
108 |
case personResult: |
|
109 |
Authorship.Builder auth = Authorship.newBuilder().setRelMetadata( |
|
110 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Authorship.RelName.valueOf(relClass).toString(), relScheme))); |
|
111 |
PersonResult.Builder personResult = PersonResult.newBuilder().setAuthorship(auth); |
|
112 |
|
|
113 |
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.authorship, relClass, collectedFromId, collectedFromName, false, personResult, auth); |
|
114 |
case projectPerson: |
|
115 |
ContactPerson.Builder contact = ContactPerson.newBuilder().setRelMetadata( |
|
116 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(ContactPerson.RelName.valueOf(relClass).toString(), relScheme))); |
|
117 |
ProjectPerson.Builder projectPerson = ProjectPerson.newBuilder().setContactPerson(contact); |
|
118 |
|
|
119 |
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.contactPerson, relClass, collectedFromId, collectedFromName, false, projectPerson, contact); |
|
120 | 104 |
case projectOrganization: |
121 | 105 |
Participation.Builder participant = Participation.newBuilder().setRelMetadata( |
122 | 106 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Participation.RelName.valueOf(relClass).toString(), relScheme))); |
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/transform/xml/CommonDNetXsltFunctions.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.data.transform.xml; |
2 | 2 |
|
3 |
import java.util.List; |
|
4 | 3 |
import java.util.Map; |
5 | 4 |
|
6 |
import com.google.common.collect.Lists; |
|
7 |
import com.google.common.collect.Maps; |
|
8 | 5 |
import com.google.protobuf.Message.Builder; |
9 | 6 |
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; |
10 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
11 | 7 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
12 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
13 | 8 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
14 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
15 |
import eu.dnetlib.data.proto.PersonProtos.Person.CoAuthor; |
|
16 | 9 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
17 | 10 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
18 | 11 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
19 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
20 |
import org.apache.commons.lang.StringUtils; |
|
21 |
import org.w3c.dom.Node; |
|
22 | 12 |
import org.w3c.dom.NodeList; |
23 | 13 |
|
24 | 14 |
/** |
... | ... | |
28 | 18 |
|
29 | 19 |
private static final int MAX_COAUTHORS = 50; |
30 | 20 |
|
31 |
// dnet:oafPersonResultFromDMF($resultId, $oafPerson, position(), "sysimport:crosswalk:repository", "0.9") |
|
32 |
public static String personResult_Authorship( |
|
33 |
final String source, |
|
34 |
final String target, |
|
35 |
final int rank, |
|
36 |
final String relClass, |
|
37 |
final String collectedFromId, |
|
38 |
final String collectedFromName, |
|
39 |
final String provenanceAction, |
|
40 |
final String trust, |
|
41 |
final NodeList about) { |
|
42 |
|
|
43 |
final Map<String, String> params = Maps.newHashMap(); |
|
44 |
params.put("rank", "" + rank); |
|
45 |
return rel(source, target, RelType.personResult.name(), SubRelType.authorship.name(), relClass, collectedFromId, collectedFromName, provenanceAction, trust, about, params); |
|
46 |
} |
|
47 |
|
|
48 | 21 |
public static String rel( |
49 | 22 |
final String source, |
50 | 23 |
final String target, |
... | ... | |
117 | 90 |
} |
118 | 91 |
} |
119 | 92 |
|
120 |
public static String person( |
|
121 |
final String personId, |
|
122 |
final NodeList about, |
|
123 |
final String provenanceAction, |
|
124 |
final String trust, |
|
125 |
final String collectedFromId, |
|
126 |
final String collectedFromName, |
|
127 |
final String originalId, |
|
128 |
final String dateOfCollection, |
|
129 |
final String dateOfTransformation, |
|
130 |
final String fullname, |
|
131 |
final String nameIdentifier, |
|
132 |
final String nameIdentifierScheme) { |
|
133 |
return person(personId, fullname, nameIdentifier, nameIdentifierScheme, null, null, null, provenanceAction, trust, about, collectedFromId, |
|
134 |
collectedFromName, originalId, dateOfCollection, dateOfTransformation); |
|
135 |
} |
|
136 |
|
|
137 |
// $personId, $about, $provenance, $trust, $collectedfromid, $collectedfromname, $originalPersonId, $dateofcollection, normalize-space(.)) |
|
138 |
public static String person( |
|
139 |
final String personId, |
|
140 |
final String fullname, |
|
141 |
final String nameIdentifier, |
|
142 |
final String nameIdentifierScheme, |
|
143 |
final NodeList authors, |
|
144 |
final String namespaceprefix, |
|
145 |
final String objIdentifier, |
|
146 |
final String provenanceAction, |
|
147 |
final String trust, |
|
148 |
final NodeList about, |
|
149 |
final String collectedFromId, |
|
150 |
final String collectedFromName, |
|
151 |
final String originalId, |
|
152 |
final String dateOfCollection, |
|
153 |
final String dateOfTransformation) { |
|
154 |
try { |
|
155 |
final String entityId = OafRowKeyDecoder.decode(personId).getKey(); |
|
156 |
|
|
157 |
final Person.Builder person = Person.newBuilder(); |
|
158 |
final Person.Metadata.Builder metadata = getMetadata(fullname); |
|
159 |
|
|
160 |
if (authors != null) { |
|
161 |
for (int i = 0; (i < authors.getLength()) && (i < MAX_COAUTHORS); i++) { |
|
162 |
final Node node = authors.item(i); |
|
163 |
|
|
164 |
final String name = StringUtils.trim(node.getTextContent()); |
|
165 |
if (!name.equals(fullname)) { |
|
166 |
|
|
167 |
final CoAuthor.Builder coAuthor = CoAuthor.newBuilder(); |
|
168 |
|
|
169 |
coAuthor.setId(oafId("person", namespaceprefix, objIdentifier + "::" + name)); |
|
170 |
coAuthor.setMetadata(getMetadata(name)); |
|
171 |
|
|
172 |
person.addCoauthor(coAuthor); |
|
173 |
} |
|
174 |
} |
|
175 |
} |
|
176 |
|
|
177 |
final List<StructuredProperty> pids = Lists.newArrayList(); |
|
178 |
if (StringUtils.isNotBlank(nameIdentifier) && StringUtils.isNotBlank(nameIdentifierScheme)) { |
|
179 |
pids.add(getStructuredProperty(nameIdentifier, nameIdentifierScheme, nameIdentifierScheme, "dnet:pid_types", "dnet:pid_types")); |
|
180 |
} |
|
181 |
|
|
182 |
final OafEntity.Builder entity = |
|
183 |
getEntity(Type.person, entityId, getKV(collectedFromId, collectedFromName), Lists.newArrayList(originalId), dateOfCollection, |
|
184 |
dateOfTransformation, pids) |
|
185 |
.setPerson(person.setMetadata(metadata)); |
|
186 |
|
|
187 |
final Oaf oaf = getOaf(entity, getDataInfo(about, provenanceAction, trust, false, false)); |
|
188 |
return base64(oaf.toByteArray()); |
|
189 |
} catch (final Throwable e) { |
|
190 |
System.err.println("personId: " + personId); |
|
191 |
System.err.println("fullname: " + fullname); |
|
192 |
System.err.println("provenanceAction: " + provenanceAction); |
|
193 |
System.err.println("trust: " + trust); |
|
194 |
System.err.println("collectedFromId: " + collectedFromId); |
|
195 |
System.err.println("collectedFromName: " + collectedFromName); |
|
196 |
System.err.println("originalId: " + originalId); |
|
197 |
System.err.println("dateOfCollection: " + dateOfCollection); |
|
198 |
e.printStackTrace(); |
|
199 |
throw new RuntimeException(e); |
|
200 |
} |
|
201 |
} |
|
202 |
|
|
203 |
private static Person.Metadata.Builder getMetadata(final String fullname) { |
|
204 |
final Person.Metadata.Builder metadata = Person.Metadata.newBuilder(); |
|
205 |
|
|
206 |
metadata.setFullname(sf(fullname)); |
|
207 |
|
|
208 |
final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false); |
|
209 |
if (p.isAccurate()) { |
|
210 |
metadata.setFirstname(sf(p.getNormalisedFirstName())); |
|
211 |
metadata.clearSecondnames().addSecondnames(sf(p.getNormalisedSurname())); |
|
212 |
// metadata.setFullname(sf(p.getNormalisedFullname())); |
|
213 |
} |
|
214 |
return metadata; |
|
215 |
} |
|
216 |
|
|
217 | 93 |
} |
modules/dnet-openaireplus-mapping-utils/trunk/src/main/java/eu/dnetlib/data/transform/OafEntityMerger.java | ||
---|---|---|
18 | 18 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
19 | 19 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
20 | 20 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
21 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
22 |
import eu.dnetlib.data.proto.PersonProtos.Person.CoAuthor; |
|
23 |
import eu.dnetlib.data.proto.PersonProtos.Person.MergedPerson; |
|
24 |
import eu.dnetlib.data.proto.PersonProtos.Person.Metadata; |
|
25 | 21 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
26 | 22 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
27 | 23 |
import eu.dnetlib.data.proto.SpecialTrustProtos.SpecialTrust; |
... | ... | |
108 | 104 |
break; |
109 | 105 |
case organization: |
110 | 106 |
break; |
111 |
case person: |
|
112 |
final Person.Builder person = builder.getEntityBuilder().getPersonBuilder().setAnchor(true); |
|
113 |
|
|
114 |
for (final String field : OafUtils.getFieldNames(Person.Metadata.getDescriptor(), Metadata.SECONDNAMES_FIELD_NUMBER)) { |
|
115 |
setSingleString(person.getMetadataBuilder(), field); |
|
116 |
} |
|
117 |
|
|
118 |
final Map<String, MergedPerson> mergedMap = Maps.newHashMap(); |
|
119 |
for(MergedPerson merged : person.getMergedpersonList()) { |
|
120 |
mergedMap.put(merged.getId(), merged); |
|
121 |
} |
|
122 |
person.clearMergedperson().addAllMergedperson(mergedMap.values()); |
|
123 |
|
|
124 |
final Map<String, CoAuthor> coAuthorMap = Maps.newHashMap(); |
|
125 |
for(CoAuthor coAuthor : person.getCoauthorList()) { |
|
126 |
coAuthorMap.put(coAuthor.getId(), coAuthor); |
|
127 |
} |
|
128 |
person.clearCoauthor().addAllCoauthor(coAuthorMap.values()); |
|
129 |
|
|
130 |
break; |
|
131 | 107 |
case project: |
132 | 108 |
break; |
133 | 109 |
case result: |
Also available in: Unified diff
depending on protobuf specific version, getting rid of person entities, updated solrj version