1 |
26600
|
sandro.lab
|
package eu.dnetlib.data.mapreduce.dedup;
|
2 |
|
|
|
3 |
|
|
import java.util.List;
|
4 |
|
|
import java.util.UUID;
|
5 |
|
|
|
6 |
|
|
import org.junit.Before;
|
7 |
|
|
import org.junit.Test;
|
8 |
|
|
|
9 |
|
|
import com.google.common.base.Predicate;
|
10 |
|
|
import com.google.common.collect.Iterables;
|
11 |
|
|
import com.google.common.collect.Lists;
|
12 |
|
|
|
13 |
|
|
import eu.dnetlib.data.mapreduce.util.OafEntityMerger;
|
14 |
28094
|
claudio.at
|
import eu.dnetlib.data.mapreduce.util.OafTest;
|
15 |
|
|
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
|
16 |
26600
|
sandro.lab
|
import eu.dnetlib.data.proto.KindProtos.Kind;
|
17 |
|
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
18 |
|
|
import eu.dnetlib.data.proto.OafProtos.OafEntity;
|
19 |
|
|
import eu.dnetlib.data.proto.OafProtos.OafEntity.Builder;
|
20 |
|
|
import eu.dnetlib.data.proto.ResultProtos.Result;
|
21 |
|
|
import eu.dnetlib.data.proto.TypeProtos.Type;
|
22 |
|
|
|
23 |
|
|
public class OafMergeTest {
|
24 |
|
|
|
25 |
|
|
private List<Oaf> oafList;
|
26 |
|
|
|
27 |
|
|
private OafEntityMerger merger;
|
28 |
|
|
|
29 |
|
|
@Before
|
30 |
|
|
public void setUp() throws Exception {
|
31 |
|
|
|
32 |
|
|
merger = new OafEntityMerger();
|
33 |
|
|
oafList = Lists.newArrayList();
|
34 |
|
|
oafList.add(getOaf("0.1").setEntity(
|
35 |
|
|
getEntity("id_1", null, "pid_1").setResult(
|
36 |
|
|
Result.newBuilder().setMetadata(
|
37 |
28094
|
claudio.at
|
Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf("2012-01-01"))
|
38 |
|
|
.addTitle(OafTest.getStructuredproperty("vvvv Title", "main title", "dnet:dataCite_title"))
|
39 |
|
|
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
|
40 |
26600
|
sandro.lab
|
oafList.add(getOaf("0.1").setEntity(
|
41 |
|
|
getEntity("id_2", "originalId_2", "pid_2").setResult(
|
42 |
|
|
Result.newBuilder().setMetadata(
|
43 |
28094
|
claudio.at
|
Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf(""))
|
44 |
|
|
.addTitle(OafTest.getStructuredproperty("aaaa Title", "main title", "dnet:dataCite_title"))
|
45 |
|
|
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
|
46 |
26600
|
sandro.lab
|
oafList.add(getOaf("0.2").setEntity(
|
47 |
|
|
getEntity("id_3", "originalId_2", "pid_2").setResult(
|
48 |
|
|
Result.newBuilder().setMetadata(
|
49 |
28094
|
claudio.at
|
Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("cccc Title", "sub title", "dnet:dataCite_title"))
|
50 |
|
|
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
|
51 |
26600
|
sandro.lab
|
|
52 |
|
|
oafList.add(getOaf("0.3").setEntity(
|
53 |
|
|
getEntity("id_$", null, "pid_3").setResult(
|
54 |
|
|
Result.newBuilder().setMetadata(
|
55 |
28094
|
claudio.at
|
Result.Metadata.newBuilder().setPublisher(OafTest.sf("AMER CHEMICAL SOCXXXXXXXXXXXXXXXXX"))
|
56 |
|
|
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
|
57 |
26600
|
sandro.lab
|
oafList.add(getOaf("0.5").setEntity(
|
58 |
|
|
getEntity("id_5", null, null).setResult(
|
59 |
|
|
Result.newBuilder().setMetadata(
|
60 |
28094
|
claudio.at
|
Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("hhhh title", "main title", "dnet:dataCite_title"))
|
61 |
|
|
.setPublisher(OafTest.sf("AMER CHEMICAL SOC X"))
|
62 |
|
|
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies")).setStoragedate(OafTest.sf("2012-11-18"))
|
63 |
|
|
.setLanguage(OafTest.getQualifier("eng", "dnet:languages")).addDescription(OafTest.sf("original description")))))
|
64 |
|
|
.build());
|
65 |
26600
|
sandro.lab
|
oafList.add(getOaf("0.6").setEntity(
|
66 |
|
|
getEntity("id_6", null, "pid_6").setResult(
|
67 |
|
|
Result.newBuilder().setMetadata(
|
68 |
28094
|
claudio.at
|
Result.Metadata.newBuilder().setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))
|
69 |
|
|
.addDescription(OafTest.sf("new description"))))).build());
|
70 |
26600
|
sandro.lab
|
}
|
71 |
|
|
|
72 |
|
|
@Test
|
73 |
|
|
public void test_merge() {
|
74 |
|
|
|
75 |
|
|
Oaf.Builder builder = Oaf.newBuilder();
|
76 |
|
|
|
77 |
|
|
for (Oaf oaf : oafList) {
|
78 |
|
|
builder.mergeFrom(oaf);
|
79 |
|
|
}
|
80 |
|
|
|
81 |
|
|
Result.Metadata.Builder metadata = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder();
|
82 |
|
|
Iterable<StructuredProperty> filter = Iterables.filter(metadata.getTitleList(), new Predicate<StructuredProperty>() {
|
83 |
|
|
|
84 |
|
|
@Override
|
85 |
|
|
public boolean apply(StructuredProperty sp) {
|
86 |
|
|
return sp.getQualifier() != null && sp.getQualifier().getClassname().equals("main title");
|
87 |
|
|
}
|
88 |
|
|
});
|
89 |
|
|
|
90 |
|
|
StructuredProperty last = Iterables.getLast(filter);
|
91 |
|
|
|
92 |
|
|
metadata.clearTitle().addAllTitle(Lists.newArrayList(last));
|
93 |
|
|
|
94 |
|
|
System.out.println(builder.build().toString());
|
95 |
|
|
}
|
96 |
|
|
|
97 |
|
|
@Test
|
98 |
|
|
public void test_merger() {
|
99 |
|
|
|
100 |
|
|
Oaf merge = merger.mergeEntities("id", oafList).build();
|
101 |
|
|
|
102 |
|
|
System.out.println(merge.toString());
|
103 |
|
|
}
|
104 |
|
|
|
105 |
|
|
// @Test
|
106 |
|
|
// public void test_sort() {
|
107 |
|
|
// Queue<Result> q = new PriorityQueue<Result>(3, DedupReducer.cmp);
|
108 |
|
|
// for (Oaf oaf : oafList) {
|
109 |
|
|
// q.add(oaf.getEntity().getResult());
|
110 |
|
|
// }
|
111 |
|
|
//
|
112 |
|
|
// while (!q.isEmpty()) {
|
113 |
|
|
// Result r = q.remove();
|
114 |
|
|
// List<StructuredProperty> titles = r.getMetadata().getTitleList();
|
115 |
|
|
// if (!titles.isEmpty()) {
|
116 |
|
|
// System.out.println(titles.get(0).getValue());
|
117 |
|
|
// }
|
118 |
|
|
// }
|
119 |
|
|
// }
|
120 |
|
|
|
121 |
|
|
private Oaf.Builder getOaf(String trust) {
|
122 |
28094
|
claudio.at
|
return Oaf.newBuilder().setKind(Kind.entity).setDataInfo(OafTest.getDataInfo(trust)).setTimestamp(System.currentTimeMillis());
|
123 |
26600
|
sandro.lab
|
}
|
124 |
|
|
|
125 |
|
|
private OafEntity.Builder getEntity(String id, String originalId, String pid) {
|
126 |
|
|
Builder entity = OafEntity.newBuilder().setType(Type.result).setId(id).addOriginalId(originalId != null ? originalId : UUID.randomUUID().toString());
|
127 |
|
|
|
128 |
|
|
if (pid != null) {
|
129 |
28094
|
claudio.at
|
entity.addPid(OafTest.getStructuredproperty(pid, "class", "scheme"));
|
130 |
26600
|
sandro.lab
|
}
|
131 |
|
|
|
132 |
|
|
return entity;
|
133 |
|
|
}
|
134 |
|
|
|
135 |
|
|
}
|