Project

General

Profile

1 26600 sandro.lab
package eu.dnetlib.data.mapreduce.dedup;
2
3
import java.util.List;
4
import java.util.UUID;
5
6
import org.junit.Before;
7
import org.junit.Test;
8
9
import com.google.common.base.Predicate;
10
import com.google.common.collect.Iterables;
11
import com.google.common.collect.Lists;
12
13
import eu.dnetlib.data.mapreduce.util.OafEntityMerger;
14 28094 claudio.at
import eu.dnetlib.data.mapreduce.util.OafTest;
15
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
16 26600 sandro.lab
import eu.dnetlib.data.proto.KindProtos.Kind;
17
import eu.dnetlib.data.proto.OafProtos.Oaf;
18
import eu.dnetlib.data.proto.OafProtos.OafEntity;
19
import eu.dnetlib.data.proto.OafProtos.OafEntity.Builder;
20
import eu.dnetlib.data.proto.ResultProtos.Result;
21
import eu.dnetlib.data.proto.TypeProtos.Type;
22
23
public class OafMergeTest {
24
25
	private List<Oaf> oafList;
26
27
	private OafEntityMerger merger;
28
29
	@Before
30
	public void setUp() throws Exception {
31
32
		merger = new OafEntityMerger();
33
		oafList = Lists.newArrayList();
34
		oafList.add(getOaf("0.1").setEntity(
35
				getEntity("id_1", null, "pid_1").setResult(
36
						Result.newBuilder().setMetadata(
37 28094 claudio.at
								Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf("2012-01-01"))
38
										.addTitle(OafTest.getStructuredproperty("vvvv Title", "main title", "dnet:dataCite_title"))
39
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
40 26600 sandro.lab
		oafList.add(getOaf("0.1").setEntity(
41
				getEntity("id_2", "originalId_2", "pid_2").setResult(
42
						Result.newBuilder().setMetadata(
43 28094 claudio.at
								Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf(""))
44
										.addTitle(OafTest.getStructuredproperty("aaaa Title", "main title", "dnet:dataCite_title"))
45
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
46 26600 sandro.lab
		oafList.add(getOaf("0.2").setEntity(
47
				getEntity("id_3", "originalId_2", "pid_2").setResult(
48
						Result.newBuilder().setMetadata(
49 28094 claudio.at
								Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("cccc Title", "sub title", "dnet:dataCite_title"))
50
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
51 26600 sandro.lab
52
		oafList.add(getOaf("0.3").setEntity(
53
				getEntity("id_$", null, "pid_3").setResult(
54
						Result.newBuilder().setMetadata(
55 28094 claudio.at
								Result.Metadata.newBuilder().setPublisher(OafTest.sf("AMER CHEMICAL SOCXXXXXXXXXXXXXXXXX"))
56
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
57 26600 sandro.lab
		oafList.add(getOaf("0.5").setEntity(
58
				getEntity("id_5", null, null).setResult(
59
						Result.newBuilder().setMetadata(
60 28094 claudio.at
								Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("hhhh title", "main title", "dnet:dataCite_title"))
61
										.setPublisher(OafTest.sf("AMER CHEMICAL SOC X"))
62
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies")).setStoragedate(OafTest.sf("2012-11-18"))
63
										.setLanguage(OafTest.getQualifier("eng", "dnet:languages")).addDescription(OafTest.sf("original description")))))
64
				.build());
65 26600 sandro.lab
		oafList.add(getOaf("0.6").setEntity(
66
				getEntity("id_6", null, "pid_6").setResult(
67
						Result.newBuilder().setMetadata(
68 28094 claudio.at
								Result.Metadata.newBuilder().setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))
69
										.addDescription(OafTest.sf("new description"))))).build());
70 26600 sandro.lab
	}
71
72
	@Test
73
	public void test_merge() {
74
75
		Oaf.Builder builder = Oaf.newBuilder();
76
77
		for (Oaf oaf : oafList) {
78
			builder.mergeFrom(oaf);
79
		}
80
81
		Result.Metadata.Builder metadata = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder();
82
		Iterable<StructuredProperty> filter = Iterables.filter(metadata.getTitleList(), new Predicate<StructuredProperty>() {
83
84
			@Override
85
			public boolean apply(StructuredProperty sp) {
86
				return sp.getQualifier() != null && sp.getQualifier().getClassname().equals("main title");
87
			}
88
		});
89
90
		StructuredProperty last = Iterables.getLast(filter);
91
92
		metadata.clearTitle().addAllTitle(Lists.newArrayList(last));
93
94
		System.out.println(builder.build().toString());
95
	}
96
97
	@Test
98
	public void test_merger() {
99
100
		Oaf merge = merger.mergeEntities("id", oafList).build();
101
102
		System.out.println(merge.toString());
103
	}
104
105
	// @Test
106
	// public void test_sort() {
107
	// Queue<Result> q = new PriorityQueue<Result>(3, DedupReducer.cmp);
108
	// for (Oaf oaf : oafList) {
109
	// q.add(oaf.getEntity().getResult());
110
	// }
111
	//
112
	// while (!q.isEmpty()) {
113
	// Result r = q.remove();
114
	// List<StructuredProperty> titles = r.getMetadata().getTitleList();
115
	// if (!titles.isEmpty()) {
116
	// System.out.println(titles.get(0).getValue());
117
	// }
118
	// }
119
	// }
120
121
	private Oaf.Builder getOaf(String trust) {
122 28094 claudio.at
		return Oaf.newBuilder().setKind(Kind.entity).setDataInfo(OafTest.getDataInfo(trust)).setTimestamp(System.currentTimeMillis());
123 26600 sandro.lab
	}
124
125
	private OafEntity.Builder getEntity(String id, String originalId, String pid) {
126
		Builder entity = OafEntity.newBuilder().setType(Type.result).setId(id).addOriginalId(originalId != null ? originalId : UUID.randomUUID().toString());
127
128
		if (pid != null) {
129 28094 claudio.at
			entity.addPid(OafTest.getStructuredproperty(pid, "class", "scheme"));
130 26600 sandro.lab
		}
131
132
		return entity;
133
	}
134
135
}