Project

General

Profile

1
package eu.dnetlib.data.mapreduce.dedup;
2

    
3
import java.util.List;
4
import java.util.UUID;
5

    
6
import org.junit.Before;
7
import org.junit.Test;
8

    
9
import com.google.common.base.Predicate;
10
import com.google.common.collect.Iterables;
11
import com.google.common.collect.Lists;
12

    
13
import eu.dnetlib.data.mapreduce.util.OafEntityMerger;
14
import eu.dnetlib.data.mapreduce.util.OafTest;
15
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
16
import eu.dnetlib.data.proto.KindProtos.Kind;
17
import eu.dnetlib.data.proto.OafProtos.Oaf;
18
import eu.dnetlib.data.proto.OafProtos.OafEntity;
19
import eu.dnetlib.data.proto.OafProtos.OafEntity.Builder;
20
import eu.dnetlib.data.proto.ResultProtos.Result;
21
import eu.dnetlib.data.proto.TypeProtos.Type;
22

    
23
public class OafMergeTest {
24

    
25
	private List<Oaf> oafList;
26

    
27
	private OafEntityMerger merger;
28

    
29
	@Before
30
	public void setUp() throws Exception {
31

    
32
		merger = new OafEntityMerger();
33
		oafList = Lists.newArrayList();
34
		oafList.add(getOaf("0.1").setEntity(
35
				getEntity("id_1", null, "pid_1").setResult(
36
						Result.newBuilder().setMetadata(
37
								Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf("2012-01-01"))
38
										.addTitle(OafTest.getStructuredproperty("vvvv Title", "main title", "dnet:dataCite_title"))
39
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
40
		oafList.add(getOaf("0.1").setEntity(
41
				getEntity("id_2", "originalId_2", "pid_2").setResult(
42
						Result.newBuilder().setMetadata(
43
								Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf(""))
44
										.addTitle(OafTest.getStructuredproperty("aaaa Title", "main title", "dnet:dataCite_title"))
45
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
46
		oafList.add(getOaf("0.2").setEntity(
47
				getEntity("id_3", "originalId_2", "pid_2").setResult(
48
						Result.newBuilder().setMetadata(
49
								Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("cccc Title", "sub title", "dnet:dataCite_title"))
50
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
51

    
52
		oafList.add(getOaf("0.3").setEntity(
53
				getEntity("id_$", null, "pid_3").setResult(
54
						Result.newBuilder().setMetadata(
55
								Result.Metadata.newBuilder().setPublisher(OafTest.sf("AMER CHEMICAL SOCXXXXXXXXXXXXXXXXX"))
56
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
57
		oafList.add(getOaf("0.5").setEntity(
58
				getEntity("id_5", null, null).setResult(
59
						Result.newBuilder().setMetadata(
60
								Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("hhhh title", "main title", "dnet:dataCite_title"))
61
										.setPublisher(OafTest.sf("AMER CHEMICAL SOC X"))
62
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies")).setStoragedate(OafTest.sf("2012-11-18"))
63
										.setLanguage(OafTest.getQualifier("eng", "dnet:languages")).addDescription(OafTest.sf("original description")))))
64
				.build());
65
		oafList.add(getOaf("0.6").setEntity(
66
				getEntity("id_6", null, "pid_6").setResult(
67
						Result.newBuilder().setMetadata(
68
								Result.Metadata.newBuilder().setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))
69
										.addDescription(OafTest.sf("new description"))))).build());
70
	}
71

    
72
	@Test
73
	public void test_merge() {
74

    
75
		Oaf.Builder builder = Oaf.newBuilder();
76

    
77
		for (Oaf oaf : oafList) {
78
			builder.mergeFrom(oaf);
79
		}
80

    
81
		Result.Metadata.Builder metadata = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder();
82
		Iterable<StructuredProperty> filter = Iterables.filter(metadata.getTitleList(), new Predicate<StructuredProperty>() {
83

    
84
			@Override
85
			public boolean apply(StructuredProperty sp) {
86
				return sp.getQualifier() != null && sp.getQualifier().getClassname().equals("main title");
87
			}
88
		});
89

    
90
		StructuredProperty last = Iterables.getLast(filter);
91

    
92
		metadata.clearTitle().addAllTitle(Lists.newArrayList(last));
93

    
94
		System.out.println(builder.build().toString());
95
	}
96

    
97
	@Test
98
	public void test_merger() {
99

    
100
		Oaf merge = merger.mergeEntities("id", oafList).build();
101

    
102
		System.out.println(merge.toString());
103
	}
104

    
105
	// @Test
106
	// public void test_sort() {
107
	// Queue<Result> q = new PriorityQueue<Result>(3, DedupReducer.cmp);
108
	// for (Oaf oaf : oafList) {
109
	// q.add(oaf.getEntity().getResult());
110
	// }
111
	//
112
	// while (!q.isEmpty()) {
113
	// Result r = q.remove();
114
	// List<StructuredProperty> titles = r.getMetadata().getTitleList();
115
	// if (!titles.isEmpty()) {
116
	// System.out.println(titles.get(0).getValue());
117
	// }
118
	// }
119
	// }
120

    
121
	private Oaf.Builder getOaf(String trust) {
122
		return Oaf.newBuilder().setKind(Kind.entity).setDataInfo(OafTest.getDataInfo(trust)).setTimestamp(System.currentTimeMillis());
123
	}
124

    
125
	private OafEntity.Builder getEntity(String id, String originalId, String pid) {
126
		Builder entity = OafEntity.newBuilder().setType(Type.result).setId(id).addOriginalId(originalId != null ? originalId : UUID.randomUUID().toString());
127

    
128
		if (pid != null) {
129
			entity.addPid(OafTest.getStructuredproperty(pid, "class", "scheme"));
130
		}
131

    
132
		return entity;
133
	}
134

    
135
}
(2-2/3)