Revision 52988
Added by Michele Artini over 5 years ago
modules/dnet-mapreduce-jobs/branches/broker_events/install.sh | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
VERSION=`xmlstarlet sel -t -m "/_:project/_:version" -v "." pom.xml` |
|
4 |
echo "using version: $VERSION" |
|
5 |
|
|
6 |
mvn clean install -DskipTests=true; |
|
7 |
rm -rf ~/.m2/repository/eu/dnetlib/dnet-mapreduce-jobs-assembly; |
|
8 |
mvn assembly:assembly -DskipTests=true && mvn install:install-file -Dfile=target/dnet-mapreduce-jobs-$VERSION-jar-with-dependencies.jar -DgroupId=eu.dnetlib -DartifactId=dnet-mapreduce-jobs-assembly -Dversion=$VERSION -Dpackaging=jar |
|
0 | 9 |
modules/dnet-mapreduce-jobs/branches/broker_events/deploy.info | ||
---|---|---|
1 |
{ |
|
2 |
"type_source": "SVN", |
|
3 |
"goal": "package -U source:jar", |
|
4 |
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet50/modules/dnet-mapreduce-jobs/trunk/", |
|
5 |
"deploy_repository": "dnet5-snapshots", |
|
6 |
"version": "5", |
|
7 |
"mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", |
|
8 |
"deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet5-snapshots", |
|
9 |
"name": "dnet-mapreduce-jobs" |
|
10 |
} |
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/mapreduce/dedup/cc/VertexWritableTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.dedup.cc; |
|
2 |
|
|
3 |
import java.util.TreeSet; |
|
4 |
|
|
5 |
import eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable; |
|
6 |
import org.apache.commons.lang3.StringUtils; |
|
7 |
import org.apache.hadoop.io.Text; |
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import static org.junit.Assert.assertTrue; |
|
11 |
|
|
12 |
/** |
|
13 |
* Created by claudio on 15/10/15. |
|
14 |
*/ |
|
15 |
public class VertexWritableTest { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testSerialise() { |
|
19 |
VertexWritable v = new VertexWritable(); |
|
20 |
|
|
21 |
v.setActivated(true); |
|
22 |
v.setVertexId(new Text("a")); |
|
23 |
|
|
24 |
TreeSet<Text> edges = new TreeSet<Text>(); |
|
25 |
for(int i=0; i<5; i++) { |
|
26 |
edges.add(new Text("" + i)); |
|
27 |
} |
|
28 |
v.setEdges(edges); |
|
29 |
|
|
30 |
assertTrue(v.toString() != null); |
|
31 |
final String json = v.toJSON(); |
|
32 |
assertTrue(StringUtils.isNotBlank(json)); |
|
33 |
|
|
34 |
System.out.println(json); |
|
35 |
|
|
36 |
final VertexWritable v1 = VertexWritable.fromJSON(json); |
|
37 |
final String json1 = v1.toJSON(); |
|
38 |
assertTrue(json.equals(json1)); |
|
39 |
|
|
40 |
System.out.println(json1); |
|
41 |
} |
|
42 |
|
|
43 |
} |
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/mapreduce/dedup/TitleOrderingTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.dedup; |
|
2 |
|
|
3 |
import java.util.LinkedList; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Map; |
|
6 |
import java.util.PriorityQueue; |
|
7 |
import java.util.Queue; |
|
8 |
import java.util.UUID; |
|
9 |
|
|
10 |
import eu.dnetlib.data.mapreduce.hbase.dedup.kv.DNGFKey; |
|
11 |
import eu.dnetlib.data.mapreduce.hbase.dli.kv.DliKey; |
|
12 |
import org.apache.commons.io.IOUtils; |
|
13 |
import org.junit.Assert; |
|
14 |
import org.junit.Before; |
|
15 |
import org.junit.Test; |
|
16 |
import org.springframework.core.io.ClassPathResource; |
|
17 |
|
|
18 |
import com.google.common.collect.Lists; |
|
19 |
import com.google.common.collect.Maps; |
|
20 |
|
|
21 |
import eu.dnetlib.pace.clustering.NGramUtils; |
|
22 |
import eu.dnetlib.pace.config.Type; |
|
23 |
import eu.dnetlib.pace.model.Field; |
|
24 |
import eu.dnetlib.pace.model.FieldListImpl; |
|
25 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
26 |
import eu.dnetlib.pace.model.MapDocument; |
|
27 |
import eu.dnetlib.pace.model.MapDocumentComparator; |
|
28 |
|
|
29 |
public class TitleOrderingTest { |
|
30 |
|
|
31 |
private List<MapDocument> results = Lists.newArrayList(); |
|
32 |
|
|
33 |
@Before |
|
34 |
public void setUp() throws Exception { |
|
35 |
|
|
36 |
final List<String> lines = IOUtils.readLines(new ClassPathResource("eu/dnetlib/data/mapreduce/dedup/titles.txt").getInputStream()); |
|
37 |
for (final String title : lines) { |
|
38 |
final Map<String, Field> fieldMap = Maps.newHashMap(); |
|
39 |
final FieldListImpl list = new FieldListImpl(); |
|
40 |
list.add(new FieldValueImpl(Type.String, "title", title)); |
|
41 |
fieldMap.put("title", list); |
|
42 |
results.add(new MapDocument("id-" + UUID.randomUUID(), fieldMap)); |
|
43 |
} |
|
44 |
} |
|
45 |
|
|
46 |
@Test |
|
47 |
public void test() { |
|
48 |
|
|
49 |
final Queue<MapDocument> queue = new PriorityQueue<MapDocument>(100, new MapDocumentComparator("title")); |
|
50 |
|
|
51 |
queue.addAll(results); |
|
52 |
|
|
53 |
final Queue<MapDocument> queue2 = simplifyQueue(queue); |
|
54 |
|
|
55 |
while (!queue2.isEmpty()) { |
|
56 |
final MapDocument doc = queue2.remove(); |
|
57 |
System.out.println(doc.values("title").stringValue()); |
|
58 |
} |
|
59 |
} |
|
60 |
|
|
61 |
private Queue<MapDocument> simplifyQueue(final Queue<MapDocument> queue) { |
|
62 |
final Queue<MapDocument> q = new LinkedList<MapDocument>(); |
|
63 |
|
|
64 |
String fieldRef = ""; |
|
65 |
final List<MapDocument> tempResults = Lists.newArrayList(); |
|
66 |
|
|
67 |
while (!queue.isEmpty()) { |
|
68 |
final MapDocument result = queue.remove(); |
|
69 |
|
|
70 |
if (!result.values("title").isEmpty()) { |
|
71 |
final String field = NGramUtils.cleanupForOrdering(result.values("title").stringValue()); |
|
72 |
if (field.equals(fieldRef)) { |
|
73 |
tempResults.add(result); |
|
74 |
} else { |
|
75 |
if (tempResults.size() < 5) { |
|
76 |
q.addAll(tempResults); |
|
77 |
} else { |
|
78 |
System.out.println("Skipped field: " + fieldRef + " - size: " + tempResults.size()); |
|
79 |
} |
|
80 |
tempResults.clear(); |
|
81 |
tempResults.add(result); |
|
82 |
fieldRef = field; |
|
83 |
} |
|
84 |
} |
|
85 |
} |
|
86 |
if (tempResults.size() < 5) { |
|
87 |
q.addAll(tempResults); |
|
88 |
} else { |
|
89 |
System.out.println("Skipped field: " + fieldRef + " - size: " + tempResults.size()); |
|
90 |
} |
|
91 |
|
|
92 |
return q; |
|
93 |
} |
|
94 |
|
|
95 |
|
|
96 |
@Test |
|
97 |
public void compareDLIKey() { |
|
98 |
|
|
99 |
DNGFKey k1 = DNGFKey.mergesRel("a"); |
|
100 |
DNGFKey k2 = DNGFKey.otherRel("a"); |
|
101 |
|
|
102 |
Assert.assertEquals(-1, k1.compareTo(k2)); |
|
103 |
Assert.assertEquals(0, k1.compareTo(k1)); |
|
104 |
Assert.assertEquals(1, k2.compareTo(k1)); |
|
105 |
|
|
106 |
|
|
107 |
} |
|
108 |
|
|
109 |
} |
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/mapreduce/dedup/DNGFMergeTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.dedup; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.UUID; |
|
5 |
|
|
6 |
import com.google.common.base.Predicate; |
|
7 |
import com.google.common.collect.Iterables; |
|
8 |
import com.google.common.collect.Lists; |
|
9 |
import eu.dnetlib.data.proto.DatasetProtos.Dataset; |
|
10 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
11 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
12 |
import eu.dnetlib.data.proto.DNGFProtos.DNGF; |
|
13 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity; |
|
14 |
import eu.dnetlib.data.proto.DNGFProtos.DNGFEntity.Builder; |
|
15 |
import eu.dnetlib.data.proto.PublicationProtos.Publication; |
|
16 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
17 |
import eu.dnetlib.data.transform.DNGFEntityMerger; |
|
18 |
import org.junit.Before; |
|
19 |
import org.junit.Test; |
|
20 |
|
|
21 |
import static eu.dnetlib.data.graph.model.DNGFUtils.*; |
|
22 |
|
|
23 |
public class DNGFMergeTest { |
|
24 |
|
|
25 |
private List<DNGF> oafList; |
|
26 |
|
|
27 |
private DNGFEntityMerger merger; |
|
28 |
|
|
29 |
@Before |
|
30 |
public void setUp() throws Exception { |
|
31 |
|
|
32 |
merger = new DNGFEntityMerger(); |
|
33 |
oafList = Lists.newArrayList(); |
|
34 |
oafList.add(getDNGF("0.1").setEntity( |
|
35 |
getEntity("id_1", null, "pid_1").setPublication( |
|
36 |
Publication.newBuilder().setMetadata( |
|
37 |
Publication.Metadata.newBuilder().setDateofacceptance(sf("2012-01-01")) |
|
38 |
.addTitle(sp("vvvv Title", "main title", "dnet:dataCite_title")) |
|
39 |
.setResulttype(simpleQualifier("publication", "dnet:result_typologies"))))).build()); |
|
40 |
oafList.add(getDNGF("0.1").setEntity( |
|
41 |
getEntity("id_2", "originalId_2", "pid_2").setPublication( |
|
42 |
Publication.newBuilder().setMetadata( |
|
43 |
Publication.Metadata.newBuilder().setDateofacceptance(sf("")) |
|
44 |
.addTitle(sp("aaaa Title", "main title", "dnet:dataCite_title")) |
|
45 |
.setResulttype(simpleQualifier("publication", "dnet:result_typologies"))))).build()); |
|
46 |
oafList.add(getDNGF("0.2").setEntity( |
|
47 |
getEntity("id_3", "originalId_2", "pid_2").setPublication( |
|
48 |
Publication.newBuilder().setMetadata( |
|
49 |
Publication.Metadata.newBuilder().addTitle(sp("cccc Title", "sub title", "dnet:dataCite_title")) |
|
50 |
.setResulttype(simpleQualifier("publication", "dnet:result_typologies"))))).build()); |
|
51 |
|
|
52 |
oafList.add(getDNGF("0.3").setEntity( |
|
53 |
getEntity("id_$", null, "pid_3").setPublication( |
|
54 |
Publication.newBuilder().setMetadata( |
|
55 |
Publication.Metadata.newBuilder().setPublisher(sf("AMER CHEMICAL SOCXXXXXXXXXXXXXXXXX")) |
|
56 |
.setResulttype(simpleQualifier("publication", "dnet:result_typologies"))))).build()); |
|
57 |
oafList.add(getDNGF("0.5").setEntity( |
|
58 |
getEntity("id_5", null, null).setDataset( |
|
59 |
Dataset.newBuilder().setMetadata( |
|
60 |
Dataset.Metadata.newBuilder().addTitle(sp("hhhh title", "main title", "dnet:dataCite_title")) |
|
61 |
.setPublisher(sf("AMER CHEMICAL SOC X")) |
|
62 |
.setResulttype(simpleQualifier("publication", "dnet:result_typologies")).setStoragedate(sf("2012-11-18")) |
|
63 |
.setLanguage(simpleQualifier("eng", "dnet:languages")).addDescription(sf("original description"))))) |
|
64 |
.build()); |
|
65 |
oafList.add(getDNGF("0.6").setEntity( |
|
66 |
getEntity("id_6", null, "pid_6").setPublication( |
|
67 |
Publication.newBuilder().setMetadata( |
|
68 |
Publication.Metadata.newBuilder().setResulttype(simpleQualifier("publication", "dnet:result_typologies")) |
|
69 |
.addDescription(sf("new description"))))).build()); |
|
70 |
} |
|
71 |
|
|
72 |
@Test |
|
73 |
public void test_merge() { |
|
74 |
|
|
75 |
final DNGF.Builder builder = DNGF.newBuilder(); |
|
76 |
|
|
77 |
for (final DNGF oaf : oafList) { |
|
78 |
builder.mergeFrom(oaf); |
|
79 |
} |
|
80 |
|
|
81 |
final Publication.Metadata.Builder metadata = builder.getEntityBuilder().getPublicationBuilder().getMetadataBuilder(); |
|
82 |
final Iterable<StructuredProperty> filter = Iterables.filter(metadata.getTitleList(), new Predicate<StructuredProperty>() { |
|
83 |
|
|
84 |
@Override |
|
85 |
public boolean apply(final StructuredProperty sp) { |
|
86 |
return (sp.getQualifier() != null) && sp.getQualifier().getClassname().equals("main title"); |
|
87 |
} |
|
88 |
}); |
|
89 |
|
|
90 |
final StructuredProperty last = Iterables.getLast(filter); |
|
91 |
|
|
92 |
metadata.clearTitle().addAllTitle(Lists.newArrayList(last)); |
|
93 |
|
|
94 |
System.out.println(builder.build().toString()); |
|
95 |
} |
|
96 |
|
|
97 |
@Test |
|
98 |
public void test_merger() { |
|
99 |
|
|
100 |
final DNGF merge = merger.mergeEntities(null, "id", oafList).build(); |
|
101 |
|
|
102 |
System.out.println(merge.toString()); |
|
103 |
} |
|
104 |
|
|
105 |
// @Test |
|
106 |
// public void test_sort() { |
|
107 |
// Queue<Publication> q = new PriorityQueue<Publication>(3, DedupReducer.cmp); |
|
108 |
// for (DNGF oaf : oafList) { |
|
109 |
// q.add(oaf.getEntity().getPublication()); |
|
110 |
// } |
|
111 |
// |
|
112 |
// while (!q.isEmpty()) { |
|
113 |
// Publication r = q.remove(); |
|
114 |
// List<StructuredProperty> titles = r.getMetadata().getTitleList(); |
|
115 |
// if (!titles.isEmpty()) { |
|
116 |
// System.out.println(titles.get(0).getValue()); |
|
117 |
// } |
|
118 |
// } |
|
119 |
// } |
|
120 |
|
|
121 |
private DNGF.Builder getDNGF(final String trust) { |
|
122 |
return DNGF.newBuilder().setKind(Kind.entity).setDataInfo(di(trust)).setLastupdatetimestamp(System.currentTimeMillis()); |
|
123 |
} |
|
124 |
|
|
125 |
private DNGFEntity.Builder getEntity(final String id, final String originalId, final String pid) { |
|
126 |
final Builder entity = |
|
127 |
DNGFEntity.newBuilder().setType(Type.publication).setId(id).addOriginalId(originalId != null ? originalId : UUID.randomUUID().toString()); |
|
128 |
|
|
129 |
if (pid != null) { |
|
130 |
entity.addPid(sp(pid, "class", "scheme")); |
|
131 |
} |
|
132 |
|
|
133 |
return entity; |
|
134 |
} |
|
135 |
|
|
136 |
} |
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/mapreduce/hbase/index/config/ContextMapperTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.index.config; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertFalse; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
import static org.junit.Assert.assertTrue; |
|
6 |
|
|
7 |
import java.io.IOException; |
|
8 |
import java.util.Map.Entry; |
|
9 |
|
|
10 |
import org.apache.commons.io.IOUtils; |
|
11 |
import org.dom4j.DocumentException; |
|
12 |
import org.junit.Before; |
|
13 |
import org.junit.Test; |
|
14 |
|
|
15 |
public class ContextMapperTest { |
|
16 |
|
|
17 |
private ContextMapper contextMapper; |
|
18 |
|
|
19 |
public static String loadContext() throws IOException { |
|
20 |
return IOUtils.toString(ContextMapperTest.class.getResource("context.xml").openStream()); |
|
21 |
} |
|
22 |
|
|
23 |
@Before |
|
24 |
public void setUp() throws Exception { |
|
25 |
contextMapper = ContextMapper.fromXml(loadContext()); |
|
26 |
assertNotNull(contextMapper); |
|
27 |
} |
|
28 |
|
|
29 |
@Test |
|
30 |
public void test() throws DocumentException { |
|
31 |
assertTrue(contextMapper.size() > 0); |
|
32 |
|
|
33 |
for (Entry<String, ContextDef> entry : contextMapper.entrySet()) { |
|
34 |
ContextDef def = entry.getValue(); |
|
35 |
|
|
36 |
try { |
|
37 |
assertNotNull(def.getId()); |
|
38 |
assertNotNull(def.getLabel()); |
|
39 |
assertNotNull(def.getName()); |
|
40 |
|
|
41 |
assertFalse(def.getId().isEmpty()); |
|
42 |
assertFalse(def.getLabel().isEmpty()); |
|
43 |
assertFalse(def.getName().isEmpty()); |
|
44 |
|
|
45 |
if (def.getName().equals("context")) { |
|
46 |
assertNotNull(def.getType()); |
|
47 |
assertFalse(def.getType().isEmpty()); |
|
48 |
} |
|
49 |
} catch (Throwable e) { |
|
50 |
System.out.println("aaa"); |
|
51 |
} |
|
52 |
} |
|
53 |
} |
|
54 |
|
|
55 |
} |
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/mapreduce/hbase/index/config/IndexConfigTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.index.config; |
|
2 |
|
|
3 |
|
|
4 |
import eu.dnetlib.data.graph.utils.RelDescriptor; |
|
5 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
6 |
import eu.dnetlib.data.transform.OntologyLoader; |
|
7 |
import org.apache.commons.io.IOUtils; |
|
8 |
import org.dom4j.Document; |
|
9 |
import org.dom4j.DocumentException; |
|
10 |
import org.dom4j.io.SAXReader; |
|
11 |
import org.junit.Test; |
|
12 |
|
|
13 |
import java.io.IOException; |
|
14 |
import java.io.InputStream; |
|
15 |
import java.io.InputStreamReader; |
|
16 |
|
|
17 |
import static org.junit.Assert.*; |
|
18 |
|
|
19 |
public class IndexConfigTest { |
|
20 |
|
|
21 |
private final static String entityGrouperClassPath = "/eu/dnetlib/bootstrap/profiles/EntityGrouperConfigurationDSResources/EntityGrouperConfigurationDSResourceType/c2b7fa8b-a3a3-439b-9799-02ea17e4d598.xml"; |
|
22 |
|
|
23 |
public static String loadConfiguration() { |
|
24 |
return loadConfiguration(entityGrouperClassPath); |
|
25 |
} |
|
26 |
|
|
27 |
private static String loadConfiguration(final String classPathResource) { |
|
28 |
InputStream profile = OntologyLoader.class.getResourceAsStream(classPathResource); |
|
29 |
SAXReader saxReader = new SAXReader(); |
|
30 |
Document doc = null; |
|
31 |
try { |
|
32 |
doc = saxReader.read(profile); |
|
33 |
} catch (DocumentException e) { |
|
34 |
e.printStackTrace(); |
|
35 |
throw new RuntimeException(e); |
|
36 |
} |
|
37 |
|
|
38 |
return doc.valueOf("//GROUP").trim(); |
|
39 |
} |
|
40 |
|
|
41 |
public static ContextMapper loadContextFromCp() throws IOException, DocumentException { |
|
42 |
final InputStream is = ContextMapper.class.getResourceAsStream("context.xml"); |
|
43 |
return ContextMapper.fromXml(IOUtils.toString(new InputStreamReader(is))); |
|
44 |
} |
|
45 |
|
|
46 |
@Test |
|
47 |
public void testParse() { |
|
48 |
|
|
49 |
final String config = loadConfiguration(entityGrouperClassPath); |
|
50 |
final IndexConfig conf = IndexConfig.load(config); |
|
51 |
assertNotNull(conf); |
|
52 |
} |
|
53 |
|
|
54 |
@Test |
|
55 |
public void testLoadEntityConfTable() { |
|
56 |
final String config = loadConfiguration(entityGrouperClassPath); |
|
57 |
final IndexConfig conf = IndexConfig.load(config); |
|
58 |
|
|
59 |
final EntityConfigTable map = conf.getConfigMap(); |
|
60 |
assertNotNull(map); |
|
61 |
assertTrue(map.size() > 0); |
|
62 |
for (final Type t : Type.values()) { |
|
63 |
assertTrue(map.keySet().contains(t)); |
|
64 |
assertNotNull(map.get(t).getIndexDuplicates()); |
|
65 |
for (final LinkDescriptor ld : map.get(t).getLinks().values()) { |
|
66 |
assertNotNull(ld.getFields()); |
|
67 |
assertNotNull(ld.getTarget()); |
|
68 |
|
|
69 |
final RelDescriptor relDescriptor = ld.getRelDescriptor(); |
|
70 |
assertNotNull(relDescriptor); |
|
71 |
assertNotNull(relDescriptor.getCode()); |
|
72 |
assertFalse(relDescriptor.getCode().isEmpty()); |
|
73 |
assertNotNull(relDescriptor.getOntologyCode()); |
|
74 |
assertNotNull(relDescriptor.getTermCode()); |
|
75 |
|
|
76 |
// System.out.println(ld); |
|
77 |
} |
|
78 |
} |
|
79 |
} |
|
80 |
|
|
81 |
} |
|
0 | 82 |
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/mapreduce/hbase/oai/OAIFeedMapperTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.oai; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertFalse; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
import static org.junit.Assert.assertTrue; |
|
6 |
|
|
7 |
import java.io.IOException; |
|
8 |
import java.text.ParseException; |
|
9 |
import java.util.Date; |
|
10 |
|
|
11 |
import org.apache.commons.io.IOUtils; |
|
12 |
import org.apache.hadoop.mapreduce.Counter; |
|
13 |
import org.apache.solr.common.util.DateUtil; |
|
14 |
import org.dom4j.DocumentException; |
|
15 |
import org.junit.Before; |
|
16 |
import org.junit.Test; |
|
17 |
import org.mockito.Matchers; |
|
18 |
import org.mockito.Mock; |
|
19 |
import org.mockito.Mockito; |
|
20 |
import org.mockito.MockitoAnnotations; |
|
21 |
import org.springframework.core.io.ClassPathResource; |
|
22 |
import org.springframework.core.io.Resource; |
|
23 |
|
|
24 |
import com.google.common.collect.ArrayListMultimap; |
|
25 |
import com.google.common.collect.Lists; |
|
26 |
import com.google.common.collect.Multimap; |
|
27 |
import com.mongodb.DBCollection; |
|
28 |
import com.mongodb.DBObject; |
|
29 |
|
|
30 |
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfiguration; |
|
31 |
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationReader; |
|
32 |
import eu.dnetlib.data.mapreduce.hbase.oai.config.OAIConfigurationStringReader; |
|
33 |
import eu.dnetlib.data.mapreduce.hbase.oai.utils.MongoSetCollection; |
|
34 |
import eu.dnetlib.data.mapreduce.hbase.oai.utils.RecordFieldsExtractor; |
|
35 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
|
36 |
|
|
37 |
public class OAIFeedMapperTest { |
|
38 |
|
|
39 |
private OaiFeedMapper oaiFeedMapper; |
|
40 |
|
|
41 |
/** File path where to find the XML configuration profile **/ |
|
42 |
private String pathToProfile = "eu/dnetlib/data/mapreduce/hbase/oai/config/OAIPublisherConfiguration-1.xml"; |
|
43 |
|
|
44 |
private OAIConfigurationStringReader oaiConfigurationReader; |
|
45 |
private OAIConfiguration oaiConfiguration; |
|
46 |
|
|
47 |
@Mock |
|
48 |
private MongoSetCollection mongoSetCollection; |
|
49 |
@Mock |
|
50 |
private DBCollection discardedCollection; |
|
51 |
@SuppressWarnings("rawtypes") |
|
52 |
@Mock |
|
53 |
private org.apache.hadoop.mapreduce.Mapper.Context context; |
|
54 |
@Mock |
|
55 |
private Counter counter; |
|
56 |
|
|
57 |
private Date feedDate; |
|
58 |
private String objId1 = "oai:dnet:openaire____::2fa6b215ace86e409dde3ba4b2a6b504"; |
|
59 |
private String goodRecord = "<?xml version=\"1.0\"?>\n<record>\n <result xmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n xmlns:dnet=\"eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions\"\n xmlns:dr=\"http://www.driver-repository.eu/namespace/dr\"\n xmlns:dri=\"http://www.driver-repository.eu/namespace/dri\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">\n <header>\n <dri:objIdentifier>openaire____::2fa6b215ace86e409dde3ba4b2a6b504</dri:objIdentifier>\n <dri:repositoryId/>\n <dri:dateOfCollection>2013-10-09</dri:dateOfCollection>\n </header>\n <metadata>\n <oaf:entity xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" \n\t\t xmlns:oaf=\"http://namespace.openaire.eu/oaf\" \n\t\t xsi:schemaLocation=\"http://namespace.openaire.eu/oaf http://www.openaire.eu/schema/0.1/oaf-0.1.xsd\">\n\t\t<oaf:datasource>\n\t\t\t<officialname>The Internet Journal of Orthopedic Surgery</officialname><englishname>The Internet Journal of Orthopedic Surgery</englishname><websiteurl>http://www.ispub.com/journal/the-internet-journal-of-orthopedic-surgery/</websiteurl><accessinfopackage/><namespaceprefix>issn15312968</namespaceprefix><datasourcetypeui classid=\"pubsrepository::journal\" classname=\"pubsrepository::journal\" schemeid=\"dnet:datasource_typologies\" schemename=\"dnet:datasource_typologies\"/><datasourcetype classid=\"pubsrepository::journal\" classname=\"pubsrepository::journal\" schemeid=\"dnet:datasource_typologies\" schemename=\"dnet:datasource_typologies\"/><openairecompatibility classid=\"notCompatible\" classname=\"notCompatible\" schemeid=\"dnet:compatibilityLevel\" schemename=\"dnet:compatibilityLevel\"/><latitude>0.0</latitude><longitude>0.0</longitude><subjects/><policies name=\"\" id=\"\"/><logourl/><contactemail/><dateofvalidation/><description/><odnumberofitems/><odnumberofitemsdate/><odpolicies/><odlanguages/><odcontenttypes/><releasestartdate/><releaseenddate/><missionstatementurl/><dataprovider>false</dataprovider><serviceprovider>false</serviceprovider><databaseaccesstype/><datauploadtype/><databaseaccessrestriction/><datauploadrestriction/><versioning>false</versioning><citationguidelineurl/><qualitymanagementkind/><pidsystems/><certificates/><originalId>openaire____::issn15312968</originalId><collectedfrom name=\"DOAJ-Articles\" id=\"driver______::bee53aa31dc2cbb538c10c2b65fa5824\"/><pid/><datainfo><inferred>false</inferred><deletedbyinference>false</deletedbyinference><trust>0.9</trust><inferenceprovenance/><provenanceaction classid=\"UNKNOWN\" classname=\"UNKNOWN\" schemeid=\"dnet:provenanceActions\" schemename=\"dnet:provenanceActions\"/></datainfo>\n\t\t <rels>\n\t\t </rels>\n\t\t <children>\n\t\t </children>\n\t\t</oaf:datasource>\n </oaf:entity>\n </metadata>\n </result>\n</record>"; |
|
60 |
private String dedupedRecord = "dedupedRecord.xml"; |
|
61 |
private String representativeRecord = "representativeRecord.xml"; |
|
62 |
|
|
63 |
@Before |
|
64 |
public void setUp() throws Exception { |
|
65 |
MockitoAnnotations.initMocks(this); |
|
66 |
oaiFeedMapper = new OaiFeedMapper(); |
|
67 |
|
|
68 |
Resource resource = new ClassPathResource(pathToProfile); |
|
69 |
// setting up the parser and the profile as a string |
|
70 |
String configurationProfile = IOUtils.toString(resource.getInputStream()); |
|
71 |
|
|
72 |
System.out.println("oaiConfiguration:\n" + configurationProfile); |
|
73 |
oaiConfigurationReader = new OAIConfigurationStringReader(configurationProfile); |
|
74 |
|
|
75 |
oaiConfiguration = oaiConfigurationReader.getOaiConfiguration(); |
|
76 |
|
|
77 |
String feedDateString = DateUtils.now_ISO8601(); |
|
78 |
try { |
|
79 |
feedDate = DateUtil.parseDate(feedDateString); |
|
80 |
} catch (ParseException e) { |
|
81 |
e.printStackTrace(System.err); |
|
82 |
throw new RuntimeException(e); |
|
83 |
} |
|
84 |
|
|
85 |
oaiFeedMapper.setFeedDate(feedDate); |
|
86 |
oaiFeedMapper.setMongoSetCollection(mongoSetCollection); |
|
87 |
oaiFeedMapper.setOaiConfiguration(oaiConfiguration); |
|
88 |
oaiFeedMapper.setOaiConfigurationReader(oaiConfigurationReader); |
|
89 |
oaiFeedMapper.setDiscardedCollection(discardedCollection); |
|
90 |
oaiFeedMapper.setSkipDuplicates(true); |
|
91 |
oaiFeedMapper.setDuplicateXPath("//entity//datainfo/deletedbyinference[./text() = 'true']"); |
|
92 |
|
|
93 |
Mockito.when(mongoSetCollection.normalizeSetSpec(Matchers.anyString())).thenReturn("aNormalisedSetName"); |
|
94 |
Mockito.doNothing().when(counter).increment(Matchers.anyLong()); |
|
95 |
Mockito.when(context.getCounter(Matchers.anyString(), Matchers.anyString())).thenReturn(counter); |
|
96 |
Mockito.when(discardedCollection.insert((DBObject) Matchers.any())).thenReturn(null); |
|
97 |
|
|
98 |
} |
|
99 |
|
|
100 |
@Test |
|
101 |
public void testCreateBasicObject() throws DocumentException, IOException, InterruptedException { |
|
102 |
RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire"))); |
|
103 |
Multimap<String, String> parsedRecord = parser.extractFields(goodRecord, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire")); |
|
104 |
DBObject obj = oaiFeedMapper.createBasicObject(objId1, goodRecord, parsedRecord, context); |
|
105 |
// NOTE that LAST_COLLECTION_DATE_FIELD, DATESTAMP_FIELD,UPDATED_FIELD are not set by the method we are testing, but by the caller |
|
106 |
// method (handleRecord) because they values to set depend on the record status (NEW|UPDATED|UNCHANGED) |
|
107 |
System.out.println(obj); |
|
108 |
assertNotNull(obj); |
|
109 |
} |
|
110 |
|
|
111 |
@Test |
|
112 |
public void testCreateBasicObjectRep() throws DocumentException, IOException, InterruptedException { |
|
113 |
RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire"))); |
|
114 |
String repRecordString = IOUtils.toString(this.getClass().getResourceAsStream(representativeRecord)); |
|
115 |
Multimap<String, String> parsedRecord = parser.extractFields(repRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire")); |
|
116 |
DBObject obj = oaiFeedMapper.createBasicObject(objId1, repRecordString, parsedRecord, context); |
|
117 |
// NOTE that LAST_COLLECTION_DATE_FIELD, DATESTAMP_FIELD,UPDATED_FIELD are not set by the method we are testing, but by the caller |
|
118 |
// method (handleRecord) because they values to set depend on the record status (NEW|UPDATED|UNCHANGED) |
|
119 |
System.out.println(obj); |
|
120 |
assertNotNull(obj); |
|
121 |
} |
|
122 |
|
|
123 |
@Test |
|
124 |
public void testParseDeduplicated() throws IOException { |
|
125 |
RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire"))); |
|
126 |
String dedupedRecordString = IOUtils.toString(this.getClass().getResourceAsStream(dedupedRecord)); |
|
127 |
parser.setSkipDuplicates(true); |
|
128 |
parser.setDuplicateXPath("//*[local-name()='entity']//*[local-name()='datainfo']/*[local-name()='deletedbyinference'][./text() = 'true']"); |
|
129 |
Multimap<String, String> parsedRecord = parser.extractFields(dedupedRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire")); |
|
130 |
assertFalse(oaiFeedMapper.checkRecordFields(parsedRecord, context, "x", dedupedRecordString)); |
|
131 |
} |
|
132 |
|
|
133 |
@Test |
|
134 |
public void testParseDeduplicated2() throws IOException { |
|
135 |
RecordFieldsExtractor parser = new RecordFieldsExtractor(Lists.newArrayList(oaiConfiguration.getFieldsFor("oaf", "index", "openaire"))); |
|
136 |
String dedupedRecordString = IOUtils.toString(this.getClass().getResourceAsStream(dedupedRecord)); |
|
137 |
parser.setSkipDuplicates(true); |
|
138 |
parser.setDuplicateXPath("//x"); |
|
139 |
Multimap<String, String> parsedRecord = parser.extractFields(dedupedRecordString, oaiConfiguration.getEnrichmentXPathsFor("oaf", "index", "openaire")); |
|
140 |
assertTrue(oaiFeedMapper.checkRecordFields(parsedRecord, context, "x", dedupedRecordString)); |
|
141 |
} |
|
142 |
|
|
143 |
@Test |
|
144 |
public void testCheckRecordFieldsDeduplicated() { |
|
145 |
Multimap<String, String> recordProps = ArrayListMultimap.create(); |
|
146 |
recordProps.put("duplicate", "true"); |
|
147 |
assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord)); |
|
148 |
} |
|
149 |
|
|
150 |
@Test |
|
151 |
public void testCheckRecordFieldsNotDeduplicated() { |
|
152 |
Multimap<String, String> recordProps = ArrayListMultimap.create(); |
|
153 |
recordProps.put("duplicate", "false"); |
|
154 |
recordProps.put(OAIConfigurationReader.ID_FIELD, "10|xxxx"); |
|
155 |
assertTrue(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord)); |
|
156 |
} |
|
157 |
|
|
158 |
@Test |
|
159 |
public void testCheckRecordFieldsEmpty() { |
|
160 |
Multimap<String, String> recordProps = ArrayListMultimap.create(); |
|
161 |
assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord)); |
|
162 |
} |
|
163 |
|
|
164 |
@Test |
|
165 |
public void testCheckRecordFieldsNull() { |
|
166 |
Multimap<String, String> recordProps = null; |
|
167 |
assertFalse(oaiFeedMapper.checkRecordFields(recordProps, context, objId1, goodRecord)); |
|
168 |
} |
|
169 |
} |
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/mapreduce/hbase/oai/RecordFieldsExtractorTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.oai; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertTrue; |
|
4 |
|
|
5 |
import java.io.IOException; |
|
6 |
import java.io.StringReader; |
|
7 |
import java.util.Collection; |
|
8 |
import java.util.List; |
|
9 |
|
|
10 |
import org.apache.commons.io.IOUtils; |
|
11 |
import org.dom4j.Document; |
|
12 |
import org.dom4j.DocumentException; |
|
13 |
import org.dom4j.io.SAXReader; |
|
14 |
import org.junit.Before; |
|
15 |
import org.junit.Test; |
|
16 |
|
|
17 |
import com.google.common.collect.Lists; |
|
18 |
|
|
19 |
import eu.dnetlib.data.mapreduce.hbase.oai.utils.RecordFieldsExtractor; |
|
20 |
|
|
21 |
public class RecordFieldsExtractorTest { |
|
22 |
|
|
23 |
private String record = "representativeRecord.xml"; |
|
24 |
private String record2 = "dedupedRecord.xml"; |
|
25 |
private String record3 = "originalRecord.xml"; |
|
26 |
private RecordFieldsExtractor fieldExtractor; |
|
27 |
private List<String> enrichmentXPaths = Lists.newArrayList("//subject[./@inferred='true']", "//result/datainfo[./inferenceprovenance='dedup']", |
|
28 |
"//rel[./@inferred='true' and ./@inferenceprovenance != 'dedup']"); |
|
29 |
|
|
30 |
@Before |
|
31 |
public void setUp() throws Exception { |
|
32 |
fieldExtractor = new RecordFieldsExtractor(); |
|
33 |
} |
|
34 |
|
|
35 |
@Test |
|
36 |
public void testEnhanced() throws IOException, DocumentException { |
|
37 |
|
|
38 |
String recordString = IOUtils.toString(this.getClass().getResourceAsStream(record)); |
|
39 |
Document doc = new SAXReader().read(new StringReader(recordString)); |
|
40 |
Collection<String> sets = fieldExtractor.getEnrichedSets(doc, enrichmentXPaths, Lists.newArrayList("set1", "set2")); |
|
41 |
System.out.println(sets); |
|
42 |
assertTrue(sets.contains("set1_enriched") && sets.contains("set2_enriched")); |
|
43 |
} |
|
44 |
|
|
45 |
@Test |
|
46 |
public void testEnhancedDeduped() throws IOException, DocumentException { |
|
47 |
|
|
48 |
String recordString = IOUtils.toString(this.getClass().getResourceAsStream(record2)); |
|
49 |
Document doc = new SAXReader().read(new StringReader(recordString)); |
|
50 |
Collection<String> sets = fieldExtractor.getEnrichedSets(doc, enrichmentXPaths, Lists.newArrayList("set1", "set2")); |
|
51 |
System.out.println(sets); |
|
52 |
assertTrue(sets.contains("set1_enriched") && sets.contains("set2_enriched")); |
|
53 |
} |
|
54 |
|
|
55 |
@Test |
|
56 |
public void testNotEnhanced() throws IOException, DocumentException { |
|
57 |
|
|
58 |
String recordString = IOUtils.toString(this.getClass().getResourceAsStream(record3)); |
|
59 |
Document doc = new SAXReader().read(new StringReader(recordString)); |
|
60 |
Collection<String> sets = fieldExtractor.getEnrichedSets(doc, enrichmentXPaths, Lists.newArrayList("set1", "set2")); |
|
61 |
System.out.println(sets); |
|
62 |
assertTrue(sets.isEmpty()); |
|
63 |
} |
|
64 |
} |
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/mapreduce/hbase/oai/config/OAIConfigurationParserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.oai.config; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
import static org.junit.Assert.assertTrue; |
|
6 |
|
|
7 |
import java.io.IOException; |
|
8 |
import java.util.List; |
|
9 |
import java.util.Map; |
|
10 |
import java.util.Map.Entry; |
|
11 |
|
|
12 |
import org.apache.commons.io.IOUtils; |
|
13 |
import org.apache.commons.lang3.StringUtils; |
|
14 |
import org.apache.commons.logging.Log; |
|
15 |
import org.apache.commons.logging.LogFactory; |
|
16 |
import org.junit.Before; |
|
17 |
import org.junit.Test; |
|
18 |
import org.springframework.core.io.ClassPathResource; |
|
19 |
import org.springframework.core.io.Resource; |
|
20 |
|
|
21 |
import com.google.common.collect.ArrayListMultimap; |
|
22 |
import com.google.common.collect.Lists; |
|
23 |
import com.google.common.collect.Maps; |
|
24 |
import com.google.common.collect.Multimap; |
|
25 |
|
|
26 |
import eu.dnetlib.data.mapreduce.hbase.oai.utils.MDFInfo; |
|
27 |
import eu.dnetlib.data.mapreduce.hbase.oai.utils.PublisherField; |
|
28 |
import eu.dnetlib.data.mapreduce.hbase.oai.utils.SetInfo; |
|
29 |
|
|
30 |
public class OAIConfigurationParserTest { |
|
31 |
|
|
32 |
private static final Log log = LogFactory.getLog(OAIConfigurationParserTest.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
33 |
|
|
34 |
/** Resource under test **/ |
|
35 |
private OAIConfigurationParser parser; |
|
36 |
/** Configuration profile to parse **/ |
|
37 |
private String configurationProfile; |
|
38 |
/** File path where to find the XML configuration profile **/ |
|
39 |
private String pathToProfile = "eu/dnetlib/data/mapreduce/hbase/oai/config/OAIPublisherConfiguration-1.xml"; |
|
40 |
|
|
41 |
/** Expected configuration based on the profile available at the given pathToProfile. **/ |
|
42 |
private OAIConfiguration expectedConfiguration; |
|
43 |
|
|
44 |
@Before |
|
45 |
public void setUp() throws Exception { |
|
46 |
|
|
47 |
// preparing the expected configuration that we will use to perform asserts on the result of parsing |
|
48 |
expectedConfiguration = new OAIConfiguration(); |
|
49 |
expectedConfiguration.setIdScheme("oai"); |
|
50 |
expectedConfiguration.setIdNamespace("dnet"); |
|
51 |
Map<String, SetInfo> setsMap = Maps.newHashMap(); |
|
52 |
setsMap.put("set1", new SetInfo("set1", "set number 1", "test set", "(set =\"aSet\")", true)); |
|
53 |
expectedConfiguration.setSetsMap(setsMap); |
|
54 |
expectedConfiguration.setFieldNames(Lists.newArrayList("title", "objIdentifier", "set")); |
|
55 |
Map<String, MDFInfo> formatsMap = Maps.newHashMap(); |
|
56 |
formatsMap.put("oai_dc", new MDFInfo("oai_dc", "SCHEMA", "NAMESPACE", "oai_dc", "store", "driver", "", true)); |
|
57 |
formatsMap.put("hope", new MDFInfo("hope", "SCHEMA", "NAMESPACE", "hope", "L", "I", "country = IT", "tdsRule", true)); |
|
58 |
expectedConfiguration.setMdFormatsMap(formatsMap); |
|
59 |
List<PublisherField> fields = Lists.newArrayList(); |
|
60 |
String oaidcKey = "oai_dc-store-driver"; |
|
61 |
String hopeKey = "hope-L-I"; |
|
62 |
|
|
63 |
PublisherField titleField = new PublisherField(); |
|
64 |
titleField.setFieldName("title"); |
|
65 |
titleField.setRepeatable(true); |
|
66 |
Multimap<String, String> titleSources = ArrayListMultimap.create(); |
|
67 |
titleSources.put(oaidcKey, "//*[local-name() = 'title']"); |
|
68 |
titleSources.put(oaidcKey, "//*[local-name() ='alternative']"); |
|
69 |
titleSources.put(hopeKey, "//title"); |
|
70 |
titleSources.put(hopeKey, "//titleinfo"); |
|
71 |
titleField.setSources(titleSources); |
|
72 |
|
|
73 |
fields.add(titleField); |
|
74 |
|
|
75 |
PublisherField objIdField = new PublisherField(); |
|
76 |
objIdField.setFieldName("objIdentifier"); |
|
77 |
objIdField.setRepeatable(false); |
|
78 |
Multimap<String, String> objIdSources = ArrayListMultimap.create(); |
|
79 |
objIdSources.put(oaidcKey, "//*[local-name() ='objIdentifier']"); |
|
80 |
objIdField.setSources(objIdSources); |
|
81 |
|
|
82 |
fields.add(objIdField); |
|
83 |
|
|
84 |
PublisherField setField = new PublisherField(); |
|
85 |
setField.setFieldName("set"); |
|
86 |
setField.setRepeatable(true); |
|
87 |
Multimap<String, String> setSources = ArrayListMultimap.create(); |
|
88 |
setSources.put(oaidcKey, "//*[local-name() ='repositoryId']"); |
|
89 |
setField.setSources(setSources); |
|
90 |
|
|
91 |
fields.add(setField); |
|
92 |
|
|
93 |
expectedConfiguration.setFields(fields); |
|
94 |
Resource resource = new ClassPathResource(pathToProfile); |
|
95 |
// setting up the parser and the profile as a string |
|
96 |
parser = new OAIConfigurationParser(); |
|
97 |
configurationProfile = IOUtils.toString(resource.getInputStream()); |
|
98 |
log.info("setUp completed"); |
|
99 |
} |
|
100 |
|
|
101 |
@Test |
|
102 |
public void testGetConfiguration() throws IOException { |
|
103 |
OAIConfiguration result = parser.getConfiguration(configurationProfile); |
|
104 |
assertNotNull(result); |
|
105 |
// asserts on sets |
|
106 |
assertTrue(result.getSetsMap().containsKey("set1")); |
|
107 |
SetInfo expected = expectedConfiguration.getSetsMap().get("set1"); |
|
108 |
SetInfo parsedSet = result.getSetsMap().get("set1"); |
|
109 |
assertEquals(expected.getQuery(), parsedSet.getQuery()); |
|
110 |
assertEquals(expected.getSetDescription(), parsedSet.getSetDescription()); |
|
111 |
assertEquals(expected.getSetName(), parsedSet.getSetName()); |
|
112 |
assertEquals(expected.getSetSpec(), parsedSet.getSetSpec()); |
|
113 |
// asserts on indices |
|
114 |
assertTrue(result.getFieldNames().containsAll(expectedConfiguration.getFieldNames())); |
|
115 |
assertEquals(3, result.getFieldsFor("oai_dc", "store", "driver").size()); |
|
116 |
assertEquals(1, result.getFieldsFor("hope", "L", "I").size()); |
|
117 |
|
|
118 |
// asserts on metadata formats |
|
119 |
assertTrue(result.getMdFormatsMap().containsKey("hope")); |
|
120 |
assertTrue(result.getMdFormatsMap().containsKey("oai_dc")); |
|
121 |
MDFInfo expectedFormat = expectedConfiguration.getMdFormatsMap().get("hope"); |
|
122 |
MDFInfo parsedFormat = result.getMdFormatsMap().get("hope"); |
|
123 |
assertEquals(expectedFormat.getBaseQuery(), parsedFormat.getBaseQuery()); |
|
124 |
assertEquals(expectedFormat.getNamespace(), parsedFormat.getNamespace()); |
|
125 |
assertEquals(expectedFormat.getPrefix(), parsedFormat.getPrefix()); |
|
126 |
assertEquals(expectedFormat.getSchema(), parsedFormat.getSchema()); |
|
127 |
|
|
128 |
assertEquals(expectedFormat.getSourceFormatInterpretation(), parsedFormat.getSourceFormatInterpretation()); |
|
129 |
assertEquals(expectedFormat.getSourceFormatLayout(), parsedFormat.getSourceFormatLayout()); |
|
130 |
assertEquals(expectedFormat.getSourceFormatName(), parsedFormat.getSourceFormatName()); |
|
131 |
assertEquals(expectedFormat.getTransformationRuleID(), parsedFormat.getTransformationRuleID()); |
|
132 |
|
|
133 |
assertEquals(expectedConfiguration.getIdScheme(), result.getIdScheme()); |
|
134 |
assertEquals(expectedConfiguration.getIdNamespace(), result.getIdNamespace()); |
|
135 |
|
|
136 |
for (PublisherField pf : result.getFields()) { |
|
137 |
for (Entry<String, String> e : pf.getSources().entries()) { |
|
138 |
assertTrue(!StringUtils.isBlank(e.getValue())); |
|
139 |
System.out.println(e.getValue()); |
|
140 |
} |
|
141 |
} |
|
142 |
} |
|
143 |
|
|
144 |
@Test |
|
145 |
public void testPrintConfiguration() throws IOException { |
|
146 |
OAIConfiguration result = parser.getConfiguration(configurationProfile); |
|
147 |
System.out.println(result); |
|
148 |
System.out.println("\n\n"); |
|
149 |
for (PublisherField pf : result.getFields()) { |
|
150 |
System.out.println(pf.toString()); |
|
151 |
} |
|
152 |
} |
|
153 |
} |
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/mapreduce/util/XmlRecordFactoryTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
|
|
5 |
import com.google.common.collect.Iterables; |
|
6 |
import com.google.common.collect.Lists; |
|
7 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
8 |
import com.googlecode.protobuf.format.JsonFormat; |
|
9 |
import eu.dnetlib.data.graph.model.DNGFDecoder; |
|
10 |
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder; |
|
11 |
import eu.dnetlib.data.mapreduce.hbase.index.config.*; |
|
12 |
import eu.dnetlib.data.proto.*; |
|
13 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
14 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
15 |
import eu.dnetlib.data.transform.OntologyLoader; |
|
16 |
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions; |
|
17 |
import eu.dnetlib.miscutils.functional.xml.XMLIndenter; |
|
18 |
import org.apache.commons.codec.binary.Base64; |
|
19 |
import org.junit.Before; |
|
20 |
import org.junit.Test; |
|
21 |
|
|
22 |
import static org.junit.Assert.assertTrue; |
|
23 |
import static eu.dnetlib.data.graph.model.DNGFUtils.*; |
|
24 |
|
|
25 |
public class XmlRecordFactoryTest extends AbstractRecordFactoryTest { |
|
26 |
|
|
27 |
public static final String CITATION_XML = |
|
28 |
"<citations>\n <citation>\n <rawText>[10] M. Foret et al., Phys. Rev. B 66, 024204 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[11] B. Ru\175404\264e et al., Phys. Rev. Lett. 90, 095502 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[12] U. Buchenau et al., Phys. Rev. B 34, 5665 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[13] S.N. Taraskin and S.R. Elliott, J. Phys.: Condens. Mat- ter 11, A219 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[14] B. Hehlen et al., Phys. Rev. Lett. 84, 5355 (2000).</rawText>\n </citation>\n <citation>\n <rawText>[15] N.V. Surotsev et al., J. Phys.: Condens. Matter 10, L113 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[16] D.A. Parshin and C. Laermans, Phys. Rev. B 63, 132203 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[17] V.L. Gurevich et al., Phys. Rev. B 67, 094203 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[18] A. Matic et al., Phys. Rev. Lett. 86, 3803 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[19] E. Rat et al., arXiv:cond-mat/0505558, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[1] R.C. Zeller and R.O. Pohl, Phys. Rev. B 4, 2029 (1971).</rawText>\n </citation>\n <citation>\n <rawText>[20] C.A. Angell, J. Non-Cryst. Solids 131\20023133, 13 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[21] A.P. Sokolov et al., Phys. Rev. Lett. 71, 2062 (1993).</rawText>\n </citation>\n <citation>\n <rawText>[22] T. Matsuo et al., Solid State Ionics 154-155, 759 (2002).</rawText>\n </citation>\n <citation>\n <rawText>[23] V.K. Malinovsky et al., Europhys. Lett. 11, 43 (1990).</rawText>\n </citation>\n <citation>\n <rawText>[24] J. Lor\250osch et al., J. Non-Cryst. Solids 69, 1 (1984).</rawText>\n </citation>\n <citation>\n <rawText>[25] U. Buchenau, Z. Phys. B 58, 181 (1985).</rawText>\n </citation>\n <citation>\n <rawText>[26] A.F. Io\175400e and A.R. Regel, Prog. Semicond. 4, 237 (1960).</rawText>\n </citation>\n <citation>\n <rawText>[27] R. Dell\20031Anna et al., Phys. Rev. Lett. 80, 1236 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[28] D. Fioretto et al., Phys. Rev. E 59, 4470 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[29] U. Buchenau et al., Phys. Rev. Lett. 77, 4035 (1996).</rawText>\n </citation>\n <citation>\n <rawText>[2] M. Rothenfusser et al., Phys. Rev. B 27, 5196 (1983).</rawText>\n </citation>\n <citation>\n <rawText>[30] J. Mattsson et al., J. Phys.: Condens. Matter 15, S1259 (2003).</rawText>\n </citation>\n <citation>\n <rawText>[31] T. Scopigno et al., Phys. Rev. Lett. 92, 025503 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[32] M. Foret et al., Phys. Rev. Lett. 81, 2100 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[33] F. Sette et al., Science 280, 1550 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[34] J. Wuttke et al., Phys. Rev. E 52, 4026 (1995).</rawText>\n </citation>\n <citation>\n <rawText>[35] M.A. Ramos et al., Phys. Rev. Lett. 78, 82 (1997).</rawText>\n </citation>\n <citation>\n <rawText>[36] G. Monaco et al., Phys. Rev. Lett. 80, 2161 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[37] A. T\250olle, Rep. Prog. Phys. 64, 1473 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[38] As the straight lines do not cross the origin, this does not 2 imply \1623 \21035 \1651 .</rawText>\n </citation>\n <citation>\n <rawText>[39] A. Matic et al., Europhys. Lett. 54, 77 (2001).</rawText>\n </citation>\n <citation>\n <rawText>[3] S. Hunklinger and W. Arnold, in Physical Acoustics, Vol. XII, W.P. Mason and R.N. Thurston Eds. (Academic Press, N.Y. 1976), p. 155.</rawText>\n </citation>\n <citation>\n <rawText>[40] IXS data are usually not available below \1651co, mostly for experimental reasons. E.g., that the rapid onset was not evidenced in vitreous silica [27], is not indicative of its absence but rather of a low qco \21074 1 nm\210221.</rawText>\n </citation>\n <citation>\n <rawText>[41] G. Ruocco et al., Phys. Rev. Lett. 83, 5583 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[42] D. C\1307 iplys et al., J. Physique (Paris) 42, C6-184 (1981).</rawText>\n </citation>\n <citation>\n <rawText>[43] R. Vacher et al., Rev. Sci. Instrum. 51, 288 (1980).</rawText>\n </citation>\n <citation>\n <rawText>[44] R. Vacher et al., arXiv:cond-mat/0505560, 23 May 2005.</rawText>\n </citation>\n <citation>\n <rawText>[45] T.N. Claytor et al., Phys. Rev. B 18, 5842 (1978).</rawText>\n </citation>\n <citation>\n <rawText>[46] M. Arai et al., Physica B 263-264, 268 (1999).</rawText>\n </citation>\n <citation>\n <rawText>[4] R. Vacher et al., J. Non-Cryst. Solids 45, 397 (1981); T.C. Zhu et al., Phys. Rev. B 44, 4281 (1991).</rawText>\n </citation>\n <citation>\n <rawText>[5] J.E. Graebner et al., Phys. Rev. B 34, 5696 (1986).</rawText>\n </citation>\n <citation>\n <rawText>[6] E. Duval and A. Mermet, Phys. Rev. B 58, 8159 (1998).</rawText>\n </citation>\n <citation>\n <rawText>[7] A. Matic et al., Phys. Rev. Lett. 93, 145502 (2004).</rawText>\n </citation>\n <citation>\n <rawText>[8] Often alluded to, e.g. in the Encyclopedia of Materials: Science and Technology, K.H.J. Buschow et al., Eds., Vol. 1 (Elsevier, Oxford, 2001), articles by S.R. Elliott on pp. 171-174 and U. Buchenau on pp. 212-215.</rawText>\n </citation>\n <citation>\n <rawText>[9] E. Rat et al., Phys. Rev. Lett. 83, 1355 (1999).</rawText>\n </citation>\n</citations>"; |
|
29 |
|
|
30 |
public static final String STATISTICS_JSON = |
|
31 |
"[{ \"citationsPerYear\": \"many\", \"anotherCoolStatistic\": \"WoW\", \"nestedStat\": { \"firstNestedStat\" : \"value 1\", \"secondNestedStat\" : \"value 2\"}, \"listingStat\" : [ \"one\", \"two\" ] }]"; |
|
32 |
|
|
33 |
public static final String SCHEMA_LOCATION = "http://namespace.openaire.eu/DNGF http://www.openaire.eu/schema/0.2/DNGF-0.2.xsd"; |
|
34 |
|
|
35 |
private XmlRecordFactory builder; |
|
36 |
|
|
37 |
|
|
38 |
|
|
39 |
@Before |
|
40 |
public void setUp() throws Exception { |
|
41 |
builder = |
|
42 |
new XmlRecordFactory(IndexConfig.load(IndexConfigTest.loadConfiguration()).getConfigMap(), |
|
43 |
ContextMapper.fromXml(ContextMapperTest.loadContext()), OntologyLoader.loadOntologiesFromCp(), |
|
44 |
SCHEMA_LOCATION, true, false, false); |
|
45 |
} |
|
46 |
|
|
47 |
@Test |
|
48 |
public void testJsonProtobuf() { |
|
49 |
final DNGFDecoder decoder = embed(getPublication("id"), Kind.entity); |
|
50 |
final String json = JsonFormat.printToString(decoder.getDNGF()); |
|
51 |
System.out.println(json); |
|
52 |
System.out.println("json size: " + json.length()); |
|
53 |
System.out.println("binary size: " + decoder.getDNGF().toByteArray().length); |
|
54 |
|
|
55 |
final String base64String = Base64.encodeBase64String(decoder.getDNGF().toByteArray()); |
|
56 |
System.out.println("base64 size: " + base64String.length()); |
|
57 |
|
|
58 |
System.out.println("decoded " + JsonFormat.printToString(DNGFDecoder.decode(Base64.decodeBase64(base64String)).getDNGF())); |
|
59 |
} |
|
60 |
|
|
61 |
@Test |
|
62 |
public void testProjectFP7() throws InvalidProtocolBufferException { |
|
63 |
|
|
64 |
final String projectId = "40|ec__________::20012100000000000000000000000000"; |
|
65 |
final String orgId = "20|WOS_________::organizationId000000000000000000"; |
|
66 |
|
|
67 |
builder.setMainEntity(embed(getProjectFP7(projectId, "SP3"), Kind.entity, false, false, "", "corda")); |
|
68 |
builder.addRelation(Type.organization, embed(getProjectOrganization(orgId, projectId, "isParticipant"), Kind.relation)); |
|
69 |
DNGFProtos.DNGFRel projectPerson = getProjectPerson("30|WOS_________::personId000000000000000000000000", projectId, "isContact"); |
|
70 |
builder.addRelation(Type.person, embed(projectPerson, Kind.relation)); |
|
71 |
final String xml = builder.build(); |
|
72 |
|
|
73 |
System.out.println(XMLIndenter.indent(xml)); |
|
74 |
} |
|
75 |
|
|
76 |
@Test |
|
77 |
public void testOrganization() throws InvalidProtocolBufferException { |
|
78 |
|
|
79 |
final String projectId = "40|ec__________::20012100000000000000000000000000"; |
|
80 |
final String orgIdRoot = "20|org_________::organizationIdRoot00000000000000"; |
|
81 |
final String orgIdDup = "20|org_________::organizationIdDup000000000000000"; |
|
82 |
|
|
83 |
builder.setMainEntity(embed(getOrganization(orgIdRoot), Kind.entity, false, false, "", "corda")); |
|
84 |
builder.addRelation(Type.project, embed(getProjectOrganization(projectId, orgIdRoot, "hasParticipant"), Kind.relation)); |
|
85 |
builder.addRelation(Type.datasource, embed(getDatasourceOrganization("10|dts_________::datasourceId00000000000000000000", orgIdRoot, "provides"), Kind.relation)); |
|
86 |
|
|
87 |
builder.addChild(Type.organization, embed(getOrganizationOrganization(orgIdDup, orgIdRoot, "isMergedIn"), Kind.relation)); |
|
88 |
|
|
89 |
final String xml = builder.build(); |
|
90 |
|
|
91 |
System.out.println(XMLIndenter.indent(xml)); |
|
92 |
} |
|
93 |
|
|
94 |
@Test |
|
95 |
public void testResultFP7() throws InvalidProtocolBufferException { |
|
96 |
final String resultId = "50|WOS_________::00010000000000000000000000000000"; |
|
97 |
final String projectId1 = "40|ec__________::20012100000000000000000000000000"; |
|
98 |
final String projectId2 = "40|ec__________::20012200000000000000000000000000"; |
|
99 |
builder.setMainEntity(embed(getPublication(resultId), Kind.entity, false, false, "", "pubmed")); |
|
100 |
builder.addRelation(Type.person, embed(getPublicationPerson("30|WOS_________::0001name000000000000000000000000", resultId, "isAuthorOf"), Kind.relation)); |
|
101 |
builder.addRelation(Type.project, |
|
102 |
embed(getPublicationProject(projectId1, resultId, getProjectFP7(projectId1, "SP3"), "produces"), Kind.relation)); |
|
103 |
builder.addRelation(Type.project, |
|
104 |
embed(getPublicationProject(projectId2, resultId, getProjectFP7(projectId2, "SP2"), "produces"), Kind.relation)); |
|
105 |
builder.addRelation(Type.publication, |
|
106 |
embed(getSimilarityRel("50|WOS_________::00020000000000000000000000000000", resultId, getPublication(resultId), "isAmongTopNSimilarDocuments"), |
|
107 |
Kind.relation)); |
|
108 |
builder.addChild(Type.publication, embed(getDedupRel("50|WOS_________::anotherResultId00000000000000000", resultId, "publication_publication", "isMergedIn"), Kind.relation)); |
|
109 |
// System.err.println(builder.toString()); |
|
110 |
|
|
111 |
System.out.println(XMLIndenter.indent(builder.build())); |
|
112 |
// System.out.println(builder.build()); |
|
113 |
} |
|
114 |
|
|
115 |
@Test |
|
116 |
public void testResultMerged() throws InvalidProtocolBufferException { |
|
117 |
|
|
118 |
|
|
119 |
final String resultId = "50|WOS_________::00010000000000000000000000000000"; |
|
120 |
final String similarResultId = "50|WOS_________::00020000000000000000000000000000"; |
|
121 |
final String projectId1 = "40|EC__________::20012100000000000000000000000000"; |
|
122 |
final String projectId2 = "40|EC__________::99999900000000000000000000000000"; |
|
123 |
builder.setMainEntity(embed(getPublication(resultId), Kind.entity, false, true, "dedup", "pubmed")); |
|
124 |
builder.addRelation(Type.person, embed(getPublicationPerson("50|WOS_________::00010name00000000000000000000000", resultId, "isAuthorOf"), Kind.relation)); |
|
125 |
builder.addRelation(Type.person, embed(getPublicationPerson("50|WOS_________::00020name00000000000000000000000", resultId, "isAuthorOf"), Kind.relation)); |
|
126 |
builder.addRelation(Type.project, |
|
127 |
embed(getPublicationProject(projectId1, resultId, getProjectFP7(projectId1, "SP3"), "produces"), Kind.relation)); |
|
128 |
builder.addRelation(Type.project, |
|
129 |
embed(getPublicationProject(projectId2, resultId, getProjectFP7(projectId2, "SP3"), "produces"), Kind.relation)); |
|
130 |
|
|
131 |
builder.addRelation(Type.publication, embed( |
|
132 |
getSimilarityRel(similarResultId, resultId, getPublication(similarResultId), "isAmongTopNSimilarDocuments"), Kind.relation)); |
|
133 |
|
|
134 |
builder.addChild(Type.publication, embed(getDedupRel("50|WOS_________::anotherResultId00000000000000000", resultId, "publication_publication", "merges"), Kind.relation)); |
|
135 |
final String xml = builder.build(); |
|
136 |
|
|
137 |
System.out.println(XMLIndenter.indent(xml)); |
|
138 |
} |
|
139 |
|
|
140 |
@Test |
|
141 |
public void testDatasource() throws InvalidProtocolBufferException { |
|
142 |
final String datasourceId = "10|WOS_________::datasourceId00000000000000000000"; |
|
143 |
final String orgId = "20|WOS_________::organizationId000000000000000000"; |
|
144 |
|
|
145 |
builder.setMainEntity(embed(getDatasource(datasourceId), Kind.entity, false, false, "", "opendoar")); |
|
146 |
builder.addRelation(Type.organization, embed(getDatasourceOrganization(orgId, datasourceId, "isProvidedBy"), Kind.relation)); |
|
147 |
final String xml = builder.build(); |
|
148 |
|
|
149 |
System.out.println(XMLIndenter.indent(xml)); |
|
150 |
} |
|
151 |
|
|
152 |
@Test |
|
153 |
public void testProjectWT() throws InvalidProtocolBufferException { |
|
154 |
final String projectId = "40|ec__________::20012100000000000000000000000000"; |
|
155 |
builder.setMainEntity(embed(getProjectWT(), Kind.entity, false, false, "", "wellcometrust")); |
|
156 |
builder.addChild(Type.organization, embed(getProjectOrganization(projectId, "20|ec__________::organizationId000000000000000000", "isParticipant"), Kind.relation)); |
|
157 |
final String xml = builder.build(); |
|
158 |
|
|
159 |
System.out.println(XMLIndenter.indent(xml)); |
|
160 |
} |
|
161 |
|
|
162 |
@Test |
|
163 |
public void testResultWT() throws InvalidProtocolBufferException { |
|
164 |
final String resultId = "50|WOS_________::00001000000000000000000000000000"; |
|
165 |
builder.setMainEntity(embed(getPublication(resultId), Kind.entity, false, false, "", "arxiv")); |
|
166 |
builder.addRelation(Type.person, embed(getPublicationPerson( "50|WOS_________::00001name00000000000000000000000", resultId, "isAuthorOf"), Kind.relation)); |
|
167 |
builder.addRelation(Type.project, embed(getPublicationProject("40|wt__________::08753600000000000000000000000000", resultId, getProjectWT(), "produces"), Kind.relation)); |
|
168 |
builder.addChild(Type.publication, embed(getDedupRel( "50|wt__________::anotherResultId00000000000000000", resultId, "publication_publication", "merges"), Kind.relation)); |
|
169 |
final String xml = builder.build(); |
|
170 |
|
|
171 |
System.out.println(XMLIndenter.indent(xml)); |
|
172 |
} |
|
173 |
|
|
174 |
@Test |
|
175 |
public void testUrlFilter() throws InvalidProtocolBufferException { |
|
176 |
|
|
177 |
final List<String> filtered = |
|
178 |
Lists.newArrayList(Iterables.filter(Lists.newArrayList("http://www.google.com", "www.google.com"), AbstractDNetXsltFunctions.urlFilter)); |
|
179 |
|
|
180 |
assertTrue(filtered.size() == 1); |
|
181 |
} |
|
182 |
|
|
183 |
|
|
184 |
} |
|
185 |
|
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/mapreduce/util/AbstractRecordFactoryTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import com.google.protobuf.Descriptors; |
|
4 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
5 |
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder; |
|
6 |
import eu.dnetlib.data.proto.*; |
|
7 |
|
|
8 |
import static eu.dnetlib.data.graph.model.DNGFUtils.*; |
|
9 |
import static eu.dnetlib.data.graph.model.DNGFUtils.sf; |
|
10 |
import static eu.dnetlib.data.graph.model.DNGFUtils.simpleQualifier; |
|
11 |
|
|
12 |
/** |
|
13 |
* Created by sandro on 1/10/17. |
|
14 |
*/ |
|
15 |
public class AbstractRecordFactoryTest { |
|
16 |
|
|
17 |
public static DNGFProtos.DNGFEntity getPublication(final String id) { |
|
18 |
return getPublicationBuilder(id).build(); |
|
19 |
} |
|
20 |
|
|
21 |
public static DNGFProtos.DNGFEntity.Builder getPublicationBuilder(final String id) { |
|
22 |
return DNGFProtos.DNGFEntity |
|
23 |
.newBuilder() |
|
24 |
.setType(TypeProtos.Type.publication) |
|
25 |
.setId(id) |
|
26 |
.setPublication( |
|
27 |
PublicationProtos.Publication.newBuilder() |
|
28 |
.setMetadata( |
|
29 |
PublicationProtos.Publication.Metadata |
|
30 |
.newBuilder() |
|
31 |
.addTitle( |
|
32 |
sp("Analysis of cell viability in intervertebral disc: Effect of endplate permeability on cell population", |
|
33 |
"main title", "dnet:result_titles")) |
|
34 |
.addTitle(sp("Another title", "alternative title", "dnet:result_titles")) |
|
35 |
.addSubject(sp("Biophysics", "subject", "dnet:result_sujects")) |
|
36 |
.setDateofacceptance(sf("2010-01-01")).addSource(sf("sourceA")).addSource(sf("sourceB")) |
|
37 |
.addContext(context("egi::virtual::970")) |
|
38 |
.addContext(context("egi::classification::natsc::math::applied")) |
|
39 |
.addContext(context("egi::classification::natsc::math")) |
|
40 |
.addContext(context("egi::classification::natsc")) |
|
41 |
.addContext(context("egi::classification")).addContext(context("egi")) |
|
42 |
.addDescription(sf("Responsible for making and maintaining the extracellular matrix ...")) |
|
43 |
.addDescription(sf("Another description ...")).setPublisher(sf("ELSEVIER SCI LTD")) |
|
44 |
.setResulttype(simpleQualifier("publication", "dnet:result_types")) |
|
45 |
.setLanguage(simpleQualifier("eng", "dnet:languages"))) |
|
46 |
.addInstance(instance("10|opendoar____::10", "Uk pubmed")) |
|
47 |
.addInstance(instance("10|opendoar____::10", "arxiv"))) |
|
48 |
.addCollectedfrom(kv("opendoar____::1064", "Oxford University Research Archive")) |
|
49 |
.addPid(sp("doi:74293", "doi", "dnet:pids")).addPid(sp("oai:74295", "oai", "dnet:pids")) |
|
50 |
.setDateofcollection(""); |
|
51 |
} |
|
52 |
|
|
53 |
public static FieldTypeProtos.Instance.Builder instance(final String hostedbyId, final String hostedbyName) { |
|
54 |
return FieldTypeProtos.Instance.newBuilder() |
|
55 |
.setHostedby(kv(hostedbyId, hostedbyName)) |
|
56 |
.setLicence(simpleQualifier("OpenAccess", "dnet:access_modes")) |
|
57 |
.setInstancetype(simpleQualifier("publication", "dnet:result_typologies")) |
|
58 |
.addUrl("webresource url"); |
|
59 |
|
|
60 |
} |
|
61 |
|
|
62 |
public static DNGFProtos.DNGFEntity getProjectFP7(final String projectId, final String fundingProgram) throws InvalidProtocolBufferException { |
|
63 |
return DNGFProtos.DNGFEntity |
|
64 |
.newBuilder() |
|
65 |
.setType(TypeProtos.Type.project) |
|
66 |
.setId(projectId) |
|
67 |
.addCollectedfrom(kv("opendoar_1234", "UK pubmed")) |
|
68 |
.setProject( |
|
69 |
ProjectProtos.Project.newBuilder() |
|
70 |
.setMetadata( |
|
71 |
ProjectProtos.Project.Metadata |
|
72 |
.newBuilder() |
|
73 |
.setAcronym(sf("5CYRQOL")) |
|
74 |
.setTitle(sf("Cypriot Researchers Contribute to our Quality of Life")) |
|
75 |
.setStartdate(sf("2007-05-01")) |
|
76 |
.setEnddate(sf("2007-10-31")) |
|
77 |
.setEcsc39(sf("false")) |
|
78 |
.setContracttype(simpleQualifier("CSA", "ec:FP7contractTypes")) |
|
79 |
.addFundingtree( |
|
80 |
sf("<fundingtree><funder><id>ec__________::EC</id><shortname>EC</shortname><name>European Commission</name></funder><funding_level_2><id>ec__________::EC::FP7::" |
|
81 |
+ fundingProgram |
|
82 |
+ "::PEOPLE</id><description>Marie-Curie Actions</description><name>PEOPLE</name><class>ec:program</class><parent><funding_level_1><id>ec__________::EC::FP7::" |
|
83 |
+ fundingProgram |
|
84 |
+ "</id><description>" |
|
85 |
+ fundingProgram |
|
86 |
+ "-People</description><name>" |
|
87 |
+ fundingProgram |
|
88 |
+ "</name><class>ec:specificprogram</class><parent><funding_level_0><id>ec__________::EC::FP7</id><description>SEVENTH FRAMEWORK PROGRAMME</description><name>FP7</name><parent/><class>ec:frameworkprogram</class></funding_level_0></parent></funding_level_1></parent></funding_level_2></fundingtree>")))) |
|
89 |
.build(); |
|
90 |
} |
|
91 |
|
|
92 |
public static DNGFProtos.DNGFRel getProjectOrganization(final String source, final String target, final String relClass) throws InvalidProtocolBufferException { |
|
93 |
TypeProtos.Type sourceType = DNGFRowKeyDecoder.decode(source).getType(); |
|
94 |
final DNGFProtos.DNGFRel.Builder dngfRel = DNGFProtos.DNGFRel.newBuilder() |
|
95 |
.setSource(source) |
|
96 |
.setSourceType(sourceType) |
|
97 |
.setTarget(target) |
|
98 |
.setTargetType(DNGFRowKeyDecoder.decode(target).getType()) |
|
99 |
.setRelType(simpleQualifier(relClass, "project_organization")) |
|
100 |
.setChild(false); |
|
101 |
|
|
102 |
switch (sourceType) { |
|
103 |
case project: |
|
104 |
dngfRel.setCachedTarget(getProjectFP7(target, "SP3")); |
|
105 |
break; |
|
106 |
case organization: |
|
107 |
dngfRel.setCachedTarget(getOrganization(target)); |
|
108 |
break; |
|
109 |
default: |
|
110 |
throw new IllegalStateException("dafuq"); |
|
111 |
} |
|
112 |
return dngfRel.build(); |
|
113 |
} |
|
114 |
|
|
115 |
public static DNGFProtos.DNGFEntity getOrganization(final String orgId) { |
|
116 |
return DNGFProtos.DNGFEntity |
|
117 |
.newBuilder() |
|
118 |
.setType(TypeProtos.Type.organization) |
|
119 |
.setId(orgId) |
|
120 |
.addCollectedfrom(kv("opendoar_1234", "UK pubmed")) |
|
121 |
.setOrganization( |
|
122 |
OrganizationProtos.Organization.newBuilder().setMetadata( |
|
123 |
OrganizationProtos.Organization.Metadata.newBuilder() |
|
124 |
.setLegalname(sf("CENTRE D'APPUI A LA RECHERCHE ET A LA FORMATION GIE")) |
|
125 |
.setLegalshortname(sf("CAREF")).setWebsiteurl(sf("www.caref-mali.org")) |
|
126 |
.setCountry(simpleQualifier("ML", "dnet:countries")))).build(); |
|
127 |
} |
|
128 |
|
|
129 |
public static DNGFProtos.DNGFRel getProjectPerson(final String sourceId, final String targetId, final String relClass) { |
|
130 |
return DNGFProtos.DNGFRel |
|
131 |
.newBuilder() |
|
132 |
.setSource(sourceId) |
|
133 |
.setTarget(targetId) |
|
134 |
.setSourceType(DNGFRowKeyDecoder.decode(sourceId).getType()) |
|
135 |
.setTargetType(DNGFRowKeyDecoder.decode(targetId).getType()) |
|
136 |
.setRelType(simpleQualifier(relClass, "person_project")) |
|
137 |
.setChild(true) |
|
138 |
.setCachedTarget(getPerson()).build(); |
|
139 |
} |
|
140 |
|
|
141 |
public static DNGFProtos.DNGFEntity getPerson() { |
|
142 |
return DNGFProtos.DNGFEntity |
|
143 |
.newBuilder() |
|
144 |
.setType(TypeProtos.Type.person) |
|
145 |
.setId("WOS:000277866500014_A._Shirazi-Adl") |
|
146 |
.setPerson( |
|
147 |
PersonProtos.Person.newBuilder().setMetadata( |
|
148 |
PersonProtos.Person.Metadata.newBuilder().addSecondnames(sf("Shirazi-Adl")).setFullname(sf("A. Shirazi-Adl")) |
|
149 |
.setEmail(sf("name.surname@gmail.com")).setPhone(sf("12345")).setNationality(simpleQualifier("EN", "dnet:countries")))) |
|
150 |
.build(); |
|
151 |
} |
|
152 |
|
|
153 |
public static DNGFProtos.DNGFRel getDatasourceOrganization(final String source, final String target, final String relClass) throws InvalidProtocolBufferException { |
|
154 |
final DNGFProtos.DNGFRel.Builder oafRel = DNGFProtos.DNGFRel |
|
155 |
.newBuilder() |
|
156 |
.setSource(source) |
|
157 |
.setTarget(target) |
|
158 |
.setSourceType(DNGFRowKeyDecoder.decode(source).getType()) |
|
159 |
.setTargetType(DNGFRowKeyDecoder.decode(target).getType()) |
|
160 |
.setRelType(simpleQualifier(relClass, "datasource_organization")) |
|
161 |
.setChild(false); |
|
162 |
switch (DNGFRowKeyDecoder.decode(source).getType()) { |
|
163 |
case datasource: |
|
164 |
oafRel.setCachedTarget(getOrganization(target)); |
|
165 |
break; |
|
166 |
case organization: |
|
167 |
oafRel.setCachedTarget(getDatasource(target)); |
|
168 |
break; |
|
169 |
default: |
|
170 |
break; |
|
171 |
} |
|
172 |
return oafRel.build(); |
|
173 |
} |
|
174 |
|
|
175 |
public static DNGFProtos.DNGFEntity getDatasource(final String datasourceId) { |
|
176 |
return DNGFProtos.DNGFEntity |
|
177 |
.newBuilder() |
|
178 |
.setType(TypeProtos.Type.datasource) |
|
179 |
.setId(datasourceId) |
|
180 |
.setDatasource( |
|
181 |
DatasourceProtos.Datasource.newBuilder().setMetadata( |
|
182 |
DatasourceProtos.Datasource.Metadata.newBuilder().setOfficialname(sf("officialname")).setEnglishname(sf("englishname")) |
|
183 |
.setWebsiteurl(sf("websiteurl")).setContactemail(sf("contactemail")).addAccessinfopackage(sf("accessinforpackage")) |
|
184 |
.setNamespaceprefix(sf("namespaceprofix")).setDescription(sf("description")).setOdnumberofitems(sf("numberofitems")) |
|
185 |
.setOdnumberofitemsdate(sf("numberofitems date")) |
|
186 |
// .addOdsubjects("subjects") |
|
187 |
.setOdpolicies(sf("policies")).addOdlanguages(sf("languages")).addOdcontenttypes(sf("contenttypes")) |
|
188 |
.setDatasourcetype(simpleQualifier("type class", "type scheme")))).build(); |
|
189 |
} |
|
190 |
|
|
191 |
////////////// |
|
192 |
|
|
193 |
public static DNGFProtos.DNGFRel getOrganizationOrganization(final String source, final String target, final String relClass) { |
|
194 |
final DNGFProtos.DNGFRel.Builder oafRel = DNGFProtos.DNGFRel |
|
195 |
.newBuilder() |
|
196 |
.setSource(source) |
|
197 |
.setTarget(target) |
|
198 |
.setSourceType(DNGFRowKeyDecoder.decode(source).getType()) |
|
199 |
.setTargetType(DNGFRowKeyDecoder.decode(target).getType()) |
|
200 |
.setRelType(simpleQualifier(relClass, "organization_organization")) |
|
201 |
.setChild(true); |
|
202 |
|
|
203 |
switch (relClass) { |
|
204 |
case "isMergedIn": |
|
205 |
oafRel.setCachedTarget(getOrganization(source)); |
|
206 |
break; |
|
207 |
case "merges": |
|
208 |
oafRel.setCachedTarget(getOrganization(target)); |
|
209 |
break; |
|
210 |
default: |
|
211 |
break; |
|
212 |
} |
|
213 |
return oafRel.build(); |
|
214 |
} |
|
215 |
|
|
216 |
public static DNGFProtos.DNGFRel getPublicationProject(final String from, final String to, final DNGFProtos.DNGFEntity project, final String relClass) |
|
217 |
throws InvalidProtocolBufferException { |
|
218 |
return DNGFProtos.DNGFRel |
|
219 |
.newBuilder() |
|
220 |
.setSource(from) |
|
221 |
.setTarget(to) |
|
222 |
.setSourceType(DNGFRowKeyDecoder.decode(from).getType()) |
|
223 |
.setTargetType(DNGFRowKeyDecoder.decode(to).getType()) |
|
224 |
.setRelType(simpleQualifier(relClass, "publication_project")) |
|
225 |
.setChild(false) |
|
226 |
.setCachedTarget(project).build(); |
|
227 |
} |
|
228 |
|
|
229 |
public static DNGFProtos.DNGFRel getPublicationPerson(final String from, final String to, final String relClass) |
|
230 |
throws InvalidProtocolBufferException { |
|
231 |
return DNGFProtos.DNGFRel |
|
232 |
.newBuilder() |
|
233 |
.setSource(from) |
|
234 |
.setTarget(to) |
|
235 |
.setSourceType(DNGFRowKeyDecoder.decode(from).getType()) |
|
236 |
.setTargetType(DNGFRowKeyDecoder.decode(to).getType()) |
|
237 |
.setRelType(simpleQualifier(relClass, "person_publication")) |
|
238 |
.setChild(false).build(); |
|
239 |
|
|
240 |
} |
|
241 |
|
|
242 |
public static DNGFProtos.DNGFRel getSimilarityRel(final String sourceId, final String targetId, final DNGFProtos.DNGFEntity result, final String relClass) { |
|
243 |
return DNGFProtos.DNGFRel |
|
244 |
.newBuilder() |
|
245 |
.setSource(sourceId) |
|
246 |
.setTarget(targetId) |
|
247 |
.setSourceType(DNGFRowKeyDecoder.decode(sourceId).getType()) |
|
248 |
.setTargetType(DNGFRowKeyDecoder.decode(targetId).getType()) |
|
249 |
.setRelType(simpleQualifier(relClass, "publication_publication")) |
|
250 |
.setChild(true) |
|
251 |
.setCachedTarget(result) |
|
252 |
.build(); |
|
253 |
} |
|
254 |
|
|
255 |
public static DNGFProtos.DNGFRel getDedupRel(final String source, final String target, final String relType, final String relClass) { |
|
256 |
return DNGFProtos.DNGFRel.newBuilder() |
|
257 |
.setSource(source) |
|
258 |
.setTarget(target) |
|
259 |
.setSourceType(DNGFRowKeyDecoder.decode(source).getType()) |
|
260 |
.setTargetType(DNGFRowKeyDecoder.decode(target).getType()) |
|
261 |
.setRelType(simpleQualifier(relClass, relType)) |
|
262 |
.setChild(true) |
|
263 |
.setCachedTarget(getPublication(target)).build(); |
|
264 |
} |
|
265 |
|
|
266 |
public static DNGFProtos.DNGFEntity getProjectWT() throws InvalidProtocolBufferException { |
|
267 |
return DNGFProtos.DNGFEntity |
|
268 |
.newBuilder() |
|
269 |
.setType(TypeProtos.Type.project) |
|
270 |
.setId("project|wt::087536") |
|
271 |
.addCollectedfrom(kv("wellcomeTrust", "wellcome trust")) |
|
272 |
.setProject( |
|
273 |
ProjectProtos.Project.newBuilder() |
|
274 |
.setMetadata( |
|
275 |
ProjectProtos.Project.Metadata |
|
276 |
.newBuilder() |
|
277 |
.setAcronym(sf("UNKNOWN")) |
|
278 |
.setTitle(sf("Research Institute for Infectious Diseases of Poverty (IIDP).")) |
|
279 |
.setStartdate(sf("2007-05-01")) |
|
280 |
.setEnddate(sf("2007-10-31")) |
|
281 |
.setEcsc39(sf("false")) |
|
282 |
.setContracttype(simpleQualifier("UNKNOWN", "wt:contractTypes")) |
|
283 |
.addFundingtree( |
|
284 |
sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::UNKNOWN</id><description>UNKNOWN</description><name>UNKNOWN</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>")) |
|
285 |
.addFundingtree( |
|
286 |
sf("<fundingtree><funder><id>wt__________::WT</id><shortname>WT</shortname><name>Wellcome Trust</name></funder><funding_level_0><id>wt__________::WT::Technology Transfer</id><description>Technology Transfer</description><name>Technology Transfer</name><class>wt:fundingStream</class><parent/></funding_level_0></fundingtree>")))) |
|
287 |
.build(); |
|
288 |
} |
|
289 |
|
|
290 |
|
|
291 |
|
|
292 |
} |
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/mapreduce/util/LicenseComparatorTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertTrue; |
|
4 |
|
|
5 |
import org.junit.Before; |
|
6 |
import org.junit.Test; |
|
7 |
|
|
8 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
9 |
|
|
10 |
public class LicenseComparatorTest { |
|
11 |
|
|
12 |
private LicenseComparator lc; |
|
13 |
|
|
14 |
@Before |
|
15 |
public void setUp() { |
|
16 |
lc = new LicenseComparator(); |
|
17 |
} |
|
18 |
|
|
19 |
@Test |
|
20 |
public void test() { |
|
21 |
|
|
22 |
assertTrue(lc.compare(qualifier("OPEN"), qualifier("OPEN")) == 0); |
|
23 |
assertTrue(lc.compare(qualifier("OPEN"), qualifier("EMBARGO")) < 0); |
|
24 |
assertTrue(lc.compare(qualifier("OPEN"), qualifier("RESTRICTED")) < 0); |
|
25 |
assertTrue(lc.compare(qualifier("OPEN"), qualifier("CLOSED")) < 0); |
|
26 |
assertTrue(lc.compare(qualifier("OPEN"), qualifier("UNKNOWN")) < 0); |
|
27 |
|
|
28 |
assertTrue(lc.compare(qualifier("EMBARGO"), qualifier("OPEN")) > 0); |
|
29 |
assertTrue(lc.compare(qualifier("EMBARGO"), qualifier("EMBARGO")) == 0); |
|
30 |
assertTrue(lc.compare(qualifier("EMBARGO"), qualifier("RESTRICTED")) < 0); |
|
31 |
assertTrue(lc.compare(qualifier("EMBARGO"), qualifier("CLOSED")) < 0); |
|
32 |
assertTrue(lc.compare(qualifier("EMBARGO"), qualifier("UNKNOWN")) < 0); |
|
33 |
|
|
34 |
assertTrue(lc.compare(qualifier("RESTRICTED"), qualifier("OPEN")) > 0); |
|
35 |
assertTrue(lc.compare(qualifier("RESTRICTED"), qualifier("EMBARGO")) > 0); |
|
36 |
assertTrue(lc.compare(qualifier("RESTRICTED"), qualifier("RESTRICTED")) == 0); |
|
37 |
assertTrue(lc.compare(qualifier("RESTRICTED"), qualifier("CLOSED")) < 0); |
|
38 |
assertTrue(lc.compare(qualifier("RESTRICTED"), qualifier("UNKNOWN")) < 0); |
|
39 |
|
|
40 |
assertTrue(lc.compare(qualifier("CLOSED"), qualifier("OPEN")) > 0); |
|
41 |
assertTrue(lc.compare(qualifier("CLOSED"), qualifier("EMBARGO")) > 0); |
|
42 |
assertTrue(lc.compare(qualifier("CLOSED"), qualifier("RESTRICTED")) > 0); |
|
43 |
assertTrue(lc.compare(qualifier("CLOSED"), qualifier("CLOSED")) == 0); |
|
44 |
assertTrue(lc.compare(qualifier("CLOSED"), qualifier("UNKNOWN")) < 0); |
|
45 |
|
|
46 |
assertTrue(lc.compare(qualifier("UNKNOWN"), qualifier("OPEN")) > 0); |
|
47 |
assertTrue(lc.compare(qualifier("UNKNOWN"), qualifier("EMBARGO")) > 0); |
|
48 |
assertTrue(lc.compare(qualifier("UNKNOWN"), qualifier("RESTRICTED")) > 0); |
|
49 |
assertTrue(lc.compare(qualifier("UNKNOWN"), qualifier("CLOSED")) > 0); |
|
50 |
assertTrue(lc.compare(qualifier("UNKNOWN"), qualifier("UNKNOWN")) == 0); |
|
51 |
|
|
52 |
} |
|
53 |
|
|
54 |
private Qualifier qualifier(String id) { |
|
55 |
return Qualifier.newBuilder().setClassid(id).build(); |
|
56 |
} |
|
57 |
|
|
58 |
} |
|
0 | 59 |
modules/dnet-mapreduce-jobs/branches/broker_events/src/test/java/eu/dnetlib/data/transform/DLIConvertertTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import com.google.common.collect.Maps; |
|
5 |
import com.googlecode.protobuf.format.JsonFormat; |
|
6 |
import eu.dnetlib.data.graph.model.DNGFRowKeyDecoder; |
|
7 |
import eu.dnetlib.data.mapreduce.util.dao.HBaseTableDAO; |
|
8 |
import eu.dnetlib.data.proto.DNGFProtos.DNGF; |
|
9 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
10 |
|
|
11 |
import eu.dnetlib.data.proto.dli.Scholix2ObjectProtos; |
|
12 |
import eu.dnetlib.dli.proto.DNGF2Scholix2Converter; |
|
13 |
import eu.dnetlib.dli.proto.DNGFDLISummaryConverter; |
|
14 |
import org.apache.commons.lang3.StringUtils; |
|
15 |
import org.apache.commons.logging.Log; |
|
16 |
import org.apache.commons.logging.LogFactory; |
|
17 |
import org.junit.Before; |
|
18 |
import org.junit.Test; |
|
19 |
|
|
20 |
import java.util.Iterator; |
|
21 |
import java.util.List; |
|
22 |
import java.util.Map; |
|
23 |
import java.util.Map.Entry; |
|
24 |
|
|
25 |
import static eu.dnetlib.data.graph.utils.RelDescriptor.QUALIFIER_SEPARATOR; |
|
26 |
import static eu.dnetlib.data.proto.dli.Scholix2ObjectProtos.*; |
|
27 |
|
|
28 |
/** |
|
29 |
* Created by sandro on 2/13/17. |
|
30 |
*/ |
|
31 |
public class DLIConvertertTest extends AbstractTransformerTest { |
|
32 |
|
|
33 |
private static final Log log = LogFactory.getLog(DLIConvertertTest.class); |
|
34 |
|
|
35 |
private Ontologies ontologies; |
|
36 |
|
|
37 |
@Before |
|
38 |
public void setUp() throws Exception { |
|
39 |
factory = new XsltRowTransformerFactory(); |
|
40 |
ontologies = OntologyLoader.loadOntologiesFromCp(); |
|
41 |
} |
|
42 |
|
|
43 |
@Test |
|
44 |
public void testLinkPangaeaDLI() throws Exception { |
|
45 |
|
|
46 |
final List<Row> rows = Lists.newArrayList(); |
Also available in: Unified diff