Revision 57227
Added by Claudio Atzori almost 5 years ago
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/deploy.info | ||
---|---|---|
1 |
{"type_source": "SVN", "goal": "package -U source:jar", |
|
2 |
"url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-openaireplus-mapping-utils/trunk/", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-openaireplus-mapping-utils"} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/openaire/exporter/model/ProjectTest.java | ||
---|---|---|
1 |
package eu.dnetlib.openaire.exporter.model; |
|
2 |
|
|
3 |
import org.junit.Before; |
|
4 |
import org.junit.Test; |
|
5 |
|
|
6 |
import static org.junit.Assert.assertEquals; |
|
7 |
|
|
8 |
public class ProjectTest { |
|
9 |
|
|
10 |
Project pMZOS; |
|
11 |
Project pFP7; |
|
12 |
|
|
13 |
@Before |
|
14 |
public void setUp() throws Exception { |
|
15 |
pMZOS = new Project() |
|
16 |
.setFunder("MZOS") |
|
17 |
.setJurisdiction("HR") |
|
18 |
.setFundingpathid("irb_hr______::MZOS") |
|
19 |
.setAcronym("") |
|
20 |
.setTitle("Project Title") |
|
21 |
.setCode("115-1152437-2500") |
|
22 |
.setStartdate("2007-01-01") |
|
23 |
.setEnddate("2009-01-01"); |
|
24 |
|
|
25 |
pFP7 = new Project() |
|
26 |
.setFunder("EC") |
|
27 |
.setJurisdiction("EU") |
|
28 |
.setFundingpathid("ec__________::EC::FP7::SP1::NMP") |
|
29 |
.setAcronym("REFFIBRE") |
|
30 |
.setTitle("Project Title") |
|
31 |
.setCode("604187") |
|
32 |
.setStartdate("2013-11-01") |
|
33 |
.setEnddate("20015-01-01"); |
|
34 |
} |
|
35 |
|
|
36 |
@Test |
|
37 |
public void testIdNamespaceMZOS(){ |
|
38 |
String ns = pMZOS.getIdnamespace(); |
|
39 |
assertEquals("info:eu-repo/grantAgreement/MZOS//115-1152437-2500/HR", ns); |
|
40 |
} |
|
41 |
|
|
42 |
@Test |
|
43 |
public void testIdNamespaceFP7(){ |
|
44 |
String ns = pFP7.getIdnamespace(); |
|
45 |
assertEquals("info:eu-repo/grantAgreement/EC/FP7/604187/EU", ns); |
|
46 |
} |
|
47 |
|
|
48 |
|
|
49 |
|
|
50 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/openaire/exporter/model/ProjectDetailTest.java | ||
---|---|---|
1 |
package eu.dnetlib.openaire.exporter.model; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.StringReader; |
|
5 |
|
|
6 |
import com.google.common.collect.Lists; |
|
7 |
import org.apache.commons.io.IOUtils; |
|
8 |
import org.springframework.core.io.ClassPathResource; |
|
9 |
import org.springframework.core.io.Resource; |
|
10 |
|
|
11 |
import static org.junit.Assert.assertEquals; |
|
12 |
|
|
13 |
/** |
|
14 |
* Created by claudio on 22/09/16. |
|
15 |
*/ |
|
16 |
public class ProjectDetailTest { |
|
17 |
|
|
18 |
private Resource projectsCsv = new ClassPathResource("/eu/dnetlib/openaire/exporter/model/projectDetails.csv"); |
|
19 |
private Resource projectsJson = new ClassPathResource("/eu/dnetlib/openaire/exporter/model/projectDetails.json"); |
|
20 |
|
|
21 |
//@Test |
|
22 |
public void testSerialisationCSV() throws IOException { |
|
23 |
final String csv = IOUtils.toString(projectsCsv.getInputStream()); |
|
24 |
doTest(csv, "csv"); |
|
25 |
} |
|
26 |
|
|
27 |
//@Test |
|
28 |
public void testSerialisationCSV2() throws IOException { |
|
29 |
final ProjectDetail p = new ProjectDetail() |
|
30 |
.setAcronym("acro") |
|
31 |
.setCode("01") |
|
32 |
.setOptional1("op1") |
|
33 |
.setOptional2("op2") |
|
34 |
.setProjectId("project_01") |
|
35 |
.setJsonextrainfo("extraInfo") |
|
36 |
.setFundingPath(Lists.newArrayList("fundingpath1", "fundingpath2")); |
|
37 |
|
|
38 |
doTest(p.asCSV(), "csv"); |
|
39 |
} |
|
40 |
|
|
41 |
//@Test |
|
42 |
public void testSerialisationJSON() throws IOException { |
|
43 |
final String json = IOUtils.toString(projectsJson.getInputStream()); |
|
44 |
doTest(json, "json"); |
|
45 |
} |
|
46 |
|
|
47 |
private void doTest(final String data, final String format) throws IOException { |
|
48 |
final StringReader reader = new StringReader(data); |
|
49 |
for(String line : IOUtils.readLines(reader)) { |
|
50 |
System.out.println("line: " + line); |
|
51 |
ProjectDetail p; |
|
52 |
String s = ""; |
|
53 |
switch (format) { |
|
54 |
case "csv": |
|
55 |
p = ProjectDetail.fromCSV(line); |
|
56 |
s = p.asCSV(); |
|
57 |
break; |
|
58 |
case "json": |
|
59 |
p = ProjectDetail.fromJson(line); |
|
60 |
s = p.asJson(); |
|
61 |
break; |
|
62 |
default: throw new IllegalArgumentException("invalid format: " + format); |
|
63 |
} |
|
64 |
System.out.println("serialisation: " + s); |
|
65 |
assertEquals(s, line); |
|
66 |
} |
|
67 |
} |
|
68 |
|
|
69 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/mapreduce/util/OafDecoderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertFalse; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
6 |
import java.util.List; |
|
7 |
|
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
11 |
import eu.dnetlib.miscutils.functional.xml.IndentXmlString; |
|
12 |
|
|
13 |
public class OafDecoderTest { |
|
14 |
|
|
15 |
@Test |
|
16 |
public void testAsXml() { |
|
17 |
|
|
18 |
final OafDecoder decoder = OafTest.embed(OafTest.getResult("50|id_1"), Kind.entity); |
|
19 |
|
|
20 |
assertNotNull(decoder); |
|
21 |
|
|
22 |
assertNotNull(decoder.asXml()); |
|
23 |
|
|
24 |
System.out.println(IndentXmlString.apply(decoder.asXml())); |
|
25 |
|
|
26 |
} |
|
27 |
|
|
28 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/mapreduce/util/OafRowKeyDecoderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import org.junit.Test; |
|
4 |
|
|
5 |
public class OafRowKeyDecoderTest { |
|
6 |
|
|
7 |
@Test |
|
8 |
public void test() { |
|
9 |
|
|
10 |
String id1 = "50|acnbad______::0a454baf9c61e63d42fb83ab549f8062"; |
|
11 |
|
|
12 |
OafRowKeyDecoder d = OafRowKeyDecoder.decode(id1); |
|
13 |
|
|
14 |
System.out.println(d.getId()); |
|
15 |
} |
|
16 |
|
|
17 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/mapreduce/util/OafRelDecoderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import eu.dnetlib.data.proto.DedupProtos.Dedup.RelName; |
|
4 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
5 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
6 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
7 |
import org.junit.Before; |
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import static org.junit.Assert.assertEquals; |
|
11 |
import static org.junit.Assert.assertNotNull; |
|
12 |
|
|
13 |
public class OafRelDecoderTest { |
|
14 |
|
|
15 |
private OafRel oafRel; |
|
16 |
|
|
17 |
@Before |
|
18 |
public void setUp() { |
|
19 |
oafRel = OafTest.getDedupRel("ID_1", "ID_2", RelType.resultResult, "isMergedIn"); |
|
20 |
} |
|
21 |
|
|
22 |
@Test |
|
23 |
public void testSetClass() { |
|
24 |
|
|
25 |
OafRelDecoder d1 = OafRelDecoder.decode(oafRel); |
|
26 |
|
|
27 |
assertNotNull(d1); |
|
28 |
assertEquals("isMergedIn", d1.getRelClass()); |
|
29 |
|
|
30 |
OafRelDecoder d2 = OafRelDecoder.decode(d1.setClassId("isMergedIn").build()); |
|
31 |
|
|
32 |
assertEquals("isMergedIn", d2.getRelClass()); |
|
33 |
assertEquals("isMergedIn", d2.getRelMetadata().getSemantics().getClassid()); |
|
34 |
assertEquals("isMergedIn", d2.getRelMetadata().getSemantics().getClassname()); |
|
35 |
|
|
36 |
} |
|
37 |
|
|
38 |
@Test |
|
39 |
public void testGetCF() { |
|
40 |
assertEquals("resultResult_dedup_isMergedIn", OafRelDecoder.getCFQ(RelType.resultResult, SubRelType.dedup, RelName.isMergedIn)); |
|
41 |
assertEquals("resultResult_dedup_isMergedIn", OafRelDecoder.getCFQ(RelType.resultResult, SubRelType.dedup, "isMergedIn")); |
|
42 |
} |
|
43 |
|
|
44 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/bulktag/CommunityConfigurationFactoryTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.bulktag; |
|
2 |
|
|
3 |
import com.google.gson.Gson; |
|
4 |
|
|
5 |
import eu.dnetlib.data.bulktag.selectioncriteria.VerbResolver; |
|
6 |
import eu.dnetlib.data.bulktag.selectioncriteria.VerbResolverFactory; |
|
7 |
import junit.framework.Assert; |
|
8 |
import org.apache.commons.io.IOUtils; |
|
9 |
import org.apache.commons.lang3.StringUtils; |
|
10 |
import org.dom4j.DocumentException; |
|
11 |
import org.junit.Before; |
|
12 |
import org.junit.Ignore; |
|
13 |
import org.junit.Test; |
|
14 |
import java.io.IOException; |
|
15 |
import java.lang.reflect.InvocationTargetException; |
|
16 |
import java.util.*; |
|
17 |
|
|
18 |
import static org.junit.Assert.assertEquals; |
|
19 |
import static org.junit.Assert.assertTrue; |
|
20 |
|
|
21 |
/** |
|
22 |
* Created by miriam on 03/08/2018. |
|
23 |
*/ |
|
24 |
|
|
25 |
|
|
26 |
public class CommunityConfigurationFactoryTest { |
|
27 |
|
|
28 |
private String xml; |
|
29 |
private String xml1; |
|
30 |
|
|
31 |
private final VerbResolver resolver = new VerbResolver(); |
|
32 |
|
|
33 |
@Before |
|
34 |
public void setUp() throws IOException, DocumentException { |
|
35 |
xml = IOUtils.toString(getClass().getResourceAsStream("community_configuration.xml")); |
|
36 |
xml1 = IOUtils.toString(getClass().getResourceAsStream("community_configuration_selcrit.xml")); |
|
37 |
|
|
38 |
} |
|
39 |
|
|
40 |
@Test |
|
41 |
public void parseTest() throws DocumentException { |
|
42 |
|
|
43 |
final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml); |
|
44 |
assertEquals(5,cc.size()); |
|
45 |
cc.getCommunityList().forEach(c -> assertTrue(StringUtils.isNoneBlank(c.getId()))); |
|
46 |
|
|
47 |
|
|
48 |
} |
|
49 |
|
|
50 |
@Test |
|
51 |
public void applyVerb() throws InvocationTargetException, IllegalAccessException, NoSuchMethodException, InstantiationException { |
|
52 |
Constraint sc = new Constraint(); |
|
53 |
String verb = "not_contains"; |
|
54 |
sc.setVerb("not_contains"); |
|
55 |
sc.setField("contributor"); |
|
56 |
sc.setValue("DARIAH"); |
|
57 |
sc.setSelection(resolver.getSelectionCriteria(sc.getVerb(),sc.getValue())); |
|
58 |
String metadata = "This work has been partially supported by DARIAH-EU infrastructure"; |
|
59 |
|
|
60 |
Assert.assertFalse(sc.verifyCriteria(metadata)); |
|
61 |
} |
|
62 |
|
|
63 |
|
|
64 |
@Test |
|
65 |
public void loadSelCriteriaTest() throws DocumentException { |
|
66 |
final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(xml1); |
|
67 |
Map<String,List<String>> param = new HashMap<>(); |
|
68 |
param.put("author",new ArrayList<>(Collections.singletonList("Pippo Pippi"))); |
|
69 |
param.put("description",new ArrayList<>(Collections.singletonList("This work has been partially supported by DARIAH-EU infrastructure"))); |
|
70 |
param.put("contributor",new ArrayList<>(Collections.singletonList("Pallino ha aiutato a scrivere il paper. Pallino lavora per DARIAH"))); |
|
71 |
List<String> comm = cc.getCommunityForDatasource("openaire____::1cfdb2e14977f31a98e0118283401f32",param); |
|
72 |
Assert.assertEquals(1,comm.size()); |
|
73 |
Assert.assertEquals("dariah",comm.get(0)); |
|
74 |
|
|
75 |
} |
|
76 |
|
|
77 |
@Test |
|
78 |
public void test4() throws DocumentException, IOException { |
|
79 |
final CommunityConfiguration cc = CommunityConfigurationFactory.fromJson(IOUtils.toString(getClass().getResourceAsStream("community_configuration_selcrit.json"))); |
|
80 |
cc.toString(); |
|
81 |
|
|
82 |
|
|
83 |
} |
|
84 |
|
|
85 |
@Test |
|
86 |
public void test5() throws IOException, DocumentException { |
|
87 |
|
|
88 |
//final CommunityConfiguration cc = CommunityConfigurationFactory.newInstance(IOUtils.toString(getClass().getResourceAsStream("test.xml"))); |
|
89 |
final CommunityConfiguration cc = CommunityConfigurationFactory.fromJson(IOUtils.toString(getClass().getResourceAsStream("community_configuration.json"))); |
|
90 |
|
|
91 |
System.out.println(cc.toJson()); |
|
92 |
} |
|
93 |
|
|
94 |
|
|
95 |
@Test |
|
96 |
public void test6() { |
|
97 |
String json = "{\"criteria\":[{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}]}"; |
|
98 |
|
|
99 |
// String step1 = "{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}"; |
|
100 |
// |
|
101 |
// Constraint c = new Gson().fromJson(step1, Constraint.class); |
|
102 |
// |
|
103 |
// String step2 = "{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}"; |
|
104 |
// |
|
105 |
// ConstraintEncapsulator ce = new Gson().fromJson(step2,ConstraintEncapsulator.class); |
|
106 |
// |
|
107 |
// |
|
108 |
// String step3 = "{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}"; |
|
109 |
// |
|
110 |
// Constraints cons = new Gson().fromJson(step3,Constraints.class); |
|
111 |
// |
|
112 |
// String step4 = "{\"criteria\":[{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}]}"; |
|
113 |
// |
|
114 |
// ConstraintsList cl = new Gson().fromJson(step4,ConstraintsList.class); |
|
115 |
// |
|
116 |
// String step5 = "{\"cl\":{\"criteria\":[{\"ce\":{\"constraint\":[{\"verb\":\"contains\",\"field\":\"contributor\",\"value\":\"DARIAH\"}]}}]}}"; |
|
117 |
SelectionConstraints sl = new Gson().fromJson(json,SelectionConstraints.class); |
|
118 |
|
|
119 |
|
|
120 |
} |
|
121 |
|
|
122 |
|
|
123 |
|
|
124 |
|
|
125 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/ParserToProtoIT.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.InputStream; |
|
5 |
import java.io.StringReader; |
|
6 |
import java.util.List; |
|
7 |
import java.util.Objects; |
|
8 |
import java.util.Properties; |
|
9 |
import java.util.concurrent.atomic.AtomicInteger; |
|
10 |
import java.util.function.Function; |
|
11 |
import java.util.stream.Collectors; |
|
12 |
import java.util.stream.StreamSupport; |
|
13 |
|
|
14 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
15 |
import com.mongodb.client.MongoCollection; |
|
16 |
import com.mongodb.client.MongoDatabase; |
|
17 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
18 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
19 |
import eu.dnetlib.data.transform.xml2.DatasetToProto; |
|
20 |
import eu.dnetlib.miscutils.collections.Pair; |
|
21 |
import eu.dnetlib.miscutils.datetime.HumanTime; |
|
22 |
import org.apache.commons.io.IOUtils; |
|
23 |
import org.apache.commons.lang3.time.StopWatch; |
|
24 |
import org.apache.commons.logging.Log; |
|
25 |
import org.apache.commons.logging.LogFactory; |
|
26 |
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics; |
|
27 |
import org.bson.Document; |
|
28 |
import org.dom4j.DocumentException; |
|
29 |
import org.dom4j.io.SAXReader; |
|
30 |
import org.junit.Before; |
|
31 |
import org.junit.Ignore; |
|
32 |
import org.junit.Test; |
|
33 |
import org.junit.runner.RunWith; |
|
34 |
import org.springframework.beans.factory.annotation.Autowired; |
|
35 |
import org.springframework.test.context.ContextConfiguration; |
|
36 |
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; |
|
37 |
|
|
38 |
import static org.junit.Assert.assertFalse; |
|
39 |
import static org.junit.Assert.assertNotNull; |
|
40 |
import static org.junit.Assert.assertTrue; |
|
41 |
|
|
42 |
@RunWith(SpringJUnit4ClassRunner.class) |
|
43 |
@ContextConfiguration(classes = { ConfigurationTestConfig.class }) |
|
44 |
public class ParserToProtoIT { |
|
45 |
|
|
46 |
private static final Log log = LogFactory.getLog(ParserToProtoIT.class); |
|
47 |
|
|
48 |
private static final String DATACITE = "datacite"; |
|
49 |
private static final String NARCIS = "narcis"; |
|
50 |
|
|
51 |
private static final int BATCH_SIZE = 10000; |
|
52 |
private static final int LOG_FREQ = 5000; |
|
53 |
private static final int LIMIT = 10000; |
|
54 |
public static final String BODY = "body"; |
|
55 |
|
|
56 |
private static String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/"; |
|
57 |
|
|
58 |
private int batchSize = BATCH_SIZE; |
|
59 |
private int logFreq = LOG_FREQ; |
|
60 |
private int limit = LIMIT; |
|
61 |
|
|
62 |
@Autowired |
|
63 |
private MongoDatabase db; |
|
64 |
|
|
65 |
@Autowired() |
|
66 |
private Properties testProperties; |
|
67 |
|
|
68 |
@Before |
|
69 |
public void setUp() { |
|
70 |
logFreq = Integer.valueOf(testProperties.getProperty("test.logFreq", String.valueOf(LOG_FREQ))); |
|
71 |
batchSize = Integer.valueOf(testProperties.getProperty("test.batchSize", String.valueOf(BATCH_SIZE))); |
|
72 |
limit = Integer.valueOf(testProperties.getProperty("test.limit", String.valueOf(limit))); |
|
73 |
} |
|
74 |
|
|
75 |
@Test |
|
76 |
@Ignore |
|
77 |
public void testParseDataciteWithVTD() throws IOException { |
|
78 |
doTest(s -> new Pair<>(s, new DatasetToProto().apply(s)), DATACITE); |
|
79 |
} |
|
80 |
|
|
81 |
@Test |
|
82 |
@Ignore |
|
83 |
public void testParseDataciteWithXSLT() throws IOException { |
|
84 |
final String xslt = IOUtils.toString(loadFromTransformationProfile("odf2hbase.xml")); |
|
85 |
final XsltRowTransformer transformer = XsltRowTransformerFactory.newInstance(xslt); |
|
86 |
|
|
87 |
doTest(rowToOaf(transformer), DATACITE); |
|
88 |
} |
|
89 |
|
|
90 |
@Test |
|
91 |
@Ignore |
|
92 |
public void testParseNarcisWithXSLT() throws IOException { |
|
93 |
final String xslt = IOUtils.toString(loadFromTransformationProfile("oaf2hbase.xml")); |
|
94 |
final XsltRowTransformer transformer = XsltRowTransformerFactory.newInstance(xslt); |
|
95 |
|
|
96 |
doTest(rowToOaf(transformer), NARCIS); |
|
97 |
} |
|
98 |
|
|
99 |
//// HELPERS |
|
100 |
|
|
101 |
private void doTest(final Function<String, Pair<String, Oaf>> mapper, final String collectionName) { |
|
102 |
final MongoCollection<Document> collection = db.getCollection(collectionName); |
|
103 |
|
|
104 |
final long collectionSize = collection.count(); |
|
105 |
log.info(String.format("found %s records in collection '%s'", collectionSize, collectionName)); |
|
106 |
|
|
107 |
final AtomicInteger read = new AtomicInteger(0); |
|
108 |
final DescriptiveStatistics stats = new DescriptiveStatistics(); |
|
109 |
|
|
110 |
final StopWatch recordTimer = new StopWatch(); |
|
111 |
final StopWatch totalTimer = StopWatch.createStarted(); |
|
112 |
|
|
113 |
StreamSupport.stream(collection.find().batchSize(batchSize).spliterator(), false) |
|
114 |
.limit(limit) |
|
115 |
.peek(d -> { |
|
116 |
if (read.addAndGet(1) % logFreq == 0) { |
|
117 |
log.info(String.format("records read so far %s", read.get())); |
|
118 |
//log.info(String.format("stats so far %s", stats.toString())); |
|
119 |
} |
|
120 |
}) |
|
121 |
.map(d -> (String) d.get("body")) |
|
122 |
.filter(Objects::nonNull) |
|
123 |
.collect(Collectors.toList()) // load them in memory first |
|
124 |
.stream() |
|
125 |
.peek(s -> recordTimer.start()) |
|
126 |
.map(mapper) |
|
127 |
.forEach(pair -> { |
|
128 |
recordTimer.stop(); |
|
129 |
stats.addValue(recordTimer.getTime()); |
|
130 |
recordTimer.reset(); |
|
131 |
|
|
132 |
assertNotNull(pair); |
|
133 |
assertTrue(pair.getValue().hasEntity()); |
|
134 |
|
|
135 |
try { |
|
136 |
final org.dom4j.Document doc = new SAXReader().read(new StringReader(pair.getKey())); |
|
137 |
final OafEntity entity = pair.getValue().getEntity(); |
|
138 |
|
|
139 |
//TODO add more asserts |
|
140 |
assertTrue(entity.getId().contains(doc.valueOf("/*[local-name() = 'record']/*[local-name() = 'header']/*[local-name() = 'objIdentifier']/text()"))); |
|
141 |
|
|
142 |
} catch (DocumentException e) { |
|
143 |
throw new IllegalArgumentException("unable to parse record " + pair.getKey(), e); |
|
144 |
} |
|
145 |
}); |
|
146 |
|
|
147 |
totalTimer.stop(); |
|
148 |
log.info(String.format("processed %s/%s records in %s", read.get(), collectionSize, HumanTime.exactly(totalTimer.getTime()))); |
|
149 |
log.info(stats.toString()); |
|
150 |
} |
|
151 |
|
|
152 |
private Function<String, Pair<String, Oaf>> rowToOaf(final XsltRowTransformer transformer) { |
|
153 |
return xml -> { |
|
154 |
final List<Row> rows = transformer.apply(xml); |
|
155 |
if (rows.isEmpty()) { |
|
156 |
return null; |
|
157 |
} |
|
158 |
|
|
159 |
return rows.stream() |
|
160 |
.filter(row -> row.getColumn(BODY) != null) |
|
161 |
.map(row -> row.getColumn(BODY)) |
|
162 |
.map(c -> c.getValue()) |
|
163 |
.map(b -> { |
|
164 |
try { |
|
165 |
return Oaf.parseFrom(b); |
|
166 |
} catch (InvalidProtocolBufferException e) { |
|
167 |
throw new IllegalStateException(e); |
|
168 |
} |
|
169 |
}) |
|
170 |
.filter(Objects::nonNull) |
|
171 |
.map(oaf -> new Pair<>(xml, oaf)) |
|
172 |
.findFirst() |
|
173 |
.get(); |
|
174 |
}; |
|
175 |
} |
|
176 |
|
|
177 |
private InputStream loadFromTransformationProfile(final String profilePath) { |
|
178 |
log.info("Loading xslt from: " + basePathProfiles + profilePath); |
|
179 |
InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath); |
|
180 |
SAXReader saxReader = new SAXReader(); |
|
181 |
org.dom4j.Document doc = null; |
|
182 |
try { |
|
183 |
doc = saxReader.read(profile); |
|
184 |
} catch (DocumentException e) { |
|
185 |
e.printStackTrace(); |
|
186 |
throw new RuntimeException(e); |
|
187 |
} |
|
188 |
String xslt = doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML(); |
|
189 |
//log.info(xslt); |
|
190 |
return IOUtils.toInputStream(xslt); |
|
191 |
} |
|
192 |
|
|
193 |
|
|
194 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/xml/OpenTrialsXsltFunctionsTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
|
|
5 |
import eu.dnetlib.data.transform.xml.OpenTrialsXsltFunctions.JsonProv; |
|
6 |
import org.junit.After; |
|
7 |
import org.junit.Before; |
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import static org.junit.Assert.assertEquals; |
|
11 |
|
|
12 |
|
|
13 |
/** |
|
14 |
* OpenTrialsXsltFunctions Tester. |
|
15 |
* |
|
16 |
*/ |
|
17 |
public class OpenTrialsXsltFunctionsTest { |
|
18 |
|
|
19 |
private String jsonProv = "[{\"url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]"; |
|
20 |
private String jsonProvWithNull = "[{\"url\" : \"\", \"sourceID\" : \"77eb42c5-0ec7-4e31-963a-5736b66f2d26\", \"sourceName\" : \"ictrp\"},{\"url\" : \"https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true\", \"sourceID\" : \"b389497c-0833-432b-a09b-930526b7b4d4\", \"sourceName\" : \"nct\"}]"; |
|
21 |
private String jidentifiers = "{112683,NCT00920439}"; |
|
22 |
|
|
23 |
|
|
24 |
private String jsonRecord = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\", \"is_primary\" : true},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]"; |
|
25 |
private String jsonRecordNull = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\"},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]"; |
|
26 |
private String jsonRecordVoid = "[{\"source_id\" : \"\", \"source_url\" : \"\", \"is_primary\" : \"\"}]"; |
|
27 |
private String jsonRecondPrimary = "[{\"source_id\" : \"nct\", \"source_url\" : \"https://clinicaltrials.gov/ct2/show/NCT02321059\", \"is_primary\" : false},{\"source_id\" : \"ictrp\", \"source_url\" : \"http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059\", \"is_primary\" : false}]"; |
|
28 |
|
|
29 |
private String jsonPeopleVoid ="[{\"person_name\" : null, \"person_id\" : null, \"person_role\" : null}]"; |
|
30 |
private String jsonPeopleOne = "[{\"person_name\" : \"Henk Verheul, M.D., PhD\", \"person_id\" : \"116438e9-f8b1-46e5-a1f8-20f851cab73c\", \"person_role\" : \"principal_investigator\"}]"; |
|
31 |
private String jsonPeopleMore = "[{\"person_name\" : \"Henk Verheul, M.D., PhD\", \"person_id\" : \"116438e9-f8b1-46e5-a1f8-20f851cab73c\", \"person_role\" : \"principal_investigator\"},{\"person_name\" : \"Miriam Pippolippo Baglioni, PhD\", \"person_id\" : \"fake\", \"person_role\" : \"principal_investigator\"}]"; |
|
32 |
|
|
33 |
private String jsonOrganizationVoid = "[{\"organization_name\" : null, \"organization_id\" : null, \"organization_role\" : null}]"; |
|
34 |
private String jsonOrganizationOne = "[{\"organization_name\" : \"Södertälje sjukhus AB\", \"organization_id\" : \"15f0d004-b82b-408c-8605-38a57352468d\", \"organization_role\" : \"sponsor\"}]"; |
|
35 |
private String jsonOrganizationMore = "[{\"organization_name\" : \"Södertälje sjukhus AB\", \"organization_id\" : \"15f0d004-b82b-408c-8605-38a57352468d\", \"organization_role\" : \"sponsor\"},{\"organization_name\" : \"Miriam Baglioni AB\", \"organization_id\" : \"fake\", \"organization_role\" : \"primary_sponsor\"}]"; |
|
36 |
|
|
37 |
private String jsonLocationVoid = "[{\"location_name\" : null}]"; |
|
38 |
private String jsonLocationOne = "[{\"location_name\" : \"China\"}]"; |
|
39 |
private String jsonLocationMore = "[{\"location_name\" : \"China\"},{\"location_name\" : \"North Korea\"}]"; |
|
40 |
|
|
41 |
@Before |
|
42 |
public void before() throws Exception { |
|
43 |
} |
|
44 |
|
|
45 |
@After |
|
46 |
public void after() throws Exception { |
|
47 |
} |
|
48 |
|
|
49 |
/** |
|
50 |
* Method: getProvs(String jsonProvList) |
|
51 |
*/ |
|
52 |
@Test |
|
53 |
public void testGetProvs() throws Exception { |
|
54 |
List<JsonProv> list = OpenTrialsXsltFunctions.getProvs(jsonProv); |
|
55 |
assertEquals(2, list.size()); |
|
56 |
} |
|
57 |
|
|
58 |
/** |
|
59 |
* Method: getMainIdentifierURL(String jsonProvList) |
|
60 |
*/ |
|
61 |
@Test |
|
62 |
public void testGetMainIdentifierURL() throws Exception { |
|
63 |
String url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProv); |
|
64 |
assertEquals( "http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT00378508", url ); |
|
65 |
url = OpenTrialsXsltFunctions.getMainIdentifierURL(jsonProvWithNull); |
|
66 |
assertEquals("https://www.clinicaltrials.gov/ct2/show/NCT00378508?lup_e=02%2F04%2F2016&lup_s=01%2F01%2F2001&rank=175075&resultsxml=true", url); |
|
67 |
} |
|
68 |
|
|
69 |
@Test |
|
70 |
public void testGetPrimaryRecordUrl(){ |
|
71 |
String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecord); |
|
72 |
assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url); |
|
73 |
} |
|
74 |
|
|
75 |
@Test |
|
76 |
public void testGetPrimaryRecordID(){ |
|
77 |
String id = OpenTrialsXsltFunctions.getPrimaryRecordIdentifier(jsonRecord); |
|
78 |
assertEquals("nct", id); |
|
79 |
} |
|
80 |
|
|
81 |
@Test |
|
82 |
public void testGetPrimaryRecordUrlNull(){ |
|
83 |
String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecordNull); |
|
84 |
assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url); |
|
85 |
} |
|
86 |
|
|
87 |
@Test |
|
88 |
public void testGetPrimaryRecordUrlVoid(){ |
|
89 |
String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecordVoid); |
|
90 |
assertEquals("", url); |
|
91 |
} |
|
92 |
|
|
93 |
@Test |
|
94 |
public void testGetPrimaryRecordUrlNoPrimary(){ |
|
95 |
String url = OpenTrialsXsltFunctions.getPrimaryRecordUrl(jsonRecondPrimary); |
|
96 |
assertEquals("https://clinicaltrials.gov/ct2/show/NCT02321059", url); |
|
97 |
} |
|
98 |
@Test |
|
99 |
public void testGetPrimaryRecordIDNoPrimary(){ |
|
100 |
String id = OpenTrialsXsltFunctions.getPrimaryRecordIdentifier(jsonRecondPrimary); |
|
101 |
assertEquals("nct", id); |
|
102 |
} |
|
103 |
@Test |
|
104 |
public void testGetPrincipalInvestigatorsVoid(){ |
|
105 |
String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleVoid); |
|
106 |
assertEquals("",url); |
|
107 |
} |
|
108 |
|
|
109 |
|
|
110 |
@Test |
|
111 |
public void testGetPrincipalInvestigatorsOne(){ |
|
112 |
String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleOne); |
|
113 |
assertEquals("Verheul, Henk", url); |
|
114 |
} |
|
115 |
|
|
116 |
@Test |
|
117 |
public void testGetPrincipalInvestigatorsMore(){ |
|
118 |
String url = OpenTrialsXsltFunctions.getPrincipalInvestigators(jsonPeopleMore); |
|
119 |
assertEquals("Verheul, Henk@@Baglioni, Miriam Pippolippo", url); |
|
120 |
} |
|
121 |
|
|
122 |
|
|
123 |
|
|
124 |
@Test |
|
125 |
public void testgGetTrialOrganizationsVoid(){ |
|
126 |
String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationVoid); |
|
127 |
assertEquals("",url); |
|
128 |
} |
|
129 |
|
|
130 |
|
|
131 |
@Test |
|
132 |
public void testgGetTrialOrganizationsOne(){ |
|
133 |
String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationOne); |
|
134 |
assertEquals("Södertälje sjukhus AB@sponsor", url); |
|
135 |
} |
|
136 |
|
|
137 |
@Test |
|
138 |
public void testgGetTrialOrganizationsMore(){ |
|
139 |
String url = OpenTrialsXsltFunctions.getTrialOrganizations(jsonOrganizationMore); |
|
140 |
assertEquals("Södertälje sjukhus AB@sponsor@@Miriam Baglioni AB@sponsor", url); |
|
141 |
} |
|
142 |
|
|
143 |
@Test |
|
144 |
public void testgGetTrialLocationsVoid(){ |
|
145 |
String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationVoid); |
|
146 |
assertEquals("",url); |
|
147 |
} |
|
148 |
|
|
149 |
|
|
150 |
@Test |
|
151 |
public void testgGetTrialLocationsOne(){ |
|
152 |
String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationOne); |
|
153 |
assertEquals("China", url); |
|
154 |
} |
|
155 |
|
|
156 |
@Test |
|
157 |
public void testgGetTrialLocationsMore(){ |
|
158 |
String url = OpenTrialsXsltFunctions.getTrialLocations(jsonLocationMore); |
|
159 |
assertEquals("China@@North Korea", url); |
|
160 |
} |
|
161 |
|
|
162 |
@Test |
|
163 |
public void testGetNotPrimaryRecordUrlPrimary(){ |
|
164 |
String url = OpenTrialsXsltFunctions.getNotPrimaryRecordUrl(jsonRecondPrimary); |
|
165 |
assertEquals("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059", url); |
|
166 |
} |
|
167 |
|
|
168 |
@Test |
|
169 |
public void testGetNotPrimaryRecordUrlVoid(){ |
|
170 |
String url = OpenTrialsXsltFunctions.getNotPrimaryRecordUrl(jsonRecordVoid); |
|
171 |
assertEquals("", url); |
|
172 |
} |
|
173 |
|
|
174 |
@Test |
|
175 |
public void testGetNotPrimaryRecordUrl(){ |
|
176 |
String url = OpenTrialsXsltFunctions.getNotPrimaryRecordUrl(jsonRecord); |
|
177 |
assertEquals("http://apps.who.int/trialsearch/Trial3.aspx?trialid=NCT02321059", url); |
|
178 |
} |
|
179 |
|
|
180 |
|
|
181 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/xml/FWFXsltFunctionsTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import org.junit.After; |
|
4 |
import org.junit.Before; |
|
5 |
import org.junit.Test; |
|
6 |
|
|
7 |
import static org.junit.Assert.assertEquals; |
|
8 |
import static org.junit.Assert.assertNotNull; |
|
9 |
|
|
10 |
/** |
|
11 |
* |
|
12 |
* Created by miriam on 04/05/2017. |
|
13 |
*/ |
|
14 |
public class FWFXsltFunctionsTest { |
|
15 |
private String namesurname ="Gerhard SOMMER"; |
|
16 |
private String noSurname = "Gerhard"; |
|
17 |
private String noName = "SOMMER"; |
|
18 |
private String twoNames = "Gerhard Pippo SOMMER"; |
|
19 |
private String twoSurname = "Gerhard PIPPO SOMMER"; |
|
20 |
private String nonamesurname = ""; |
|
21 |
private String organization ="Universität Linz - Institut für Computational Perception; Universität für Musik und darstellende Kunst Graz - Institut 1: Komposition, Musiktheorie, Musikgeschichte und Dirigieren; Universität Mozarteum Salzburg - Institut für Musikalische Rezeptions- und Interpretationsgeschichte; Anton Bruckner Privatuniversität - Institut für Theorie und Geschichte der Musik; Eliette und Herbert von Karajan Institut - Eliette und Herbert von Karajan Institut"; |
|
22 |
|
|
23 |
@Before |
|
24 |
public void before() throws Exception { |
|
25 |
} |
|
26 |
|
|
27 |
@After |
|
28 |
public void after() throws Exception { |
|
29 |
} |
|
30 |
|
|
31 |
@Test |
|
32 |
public void testGetNamesNameNoNameSurname() throws Exception { |
|
33 |
String ret = FWFXsltFunctions.getName(nonamesurname,true); |
|
34 |
assertEquals("",ret ); |
|
35 |
} |
|
36 |
|
|
37 |
@Test |
|
38 |
public void testGetNamesSurnameNoNameSurname() throws Exception { |
|
39 |
String ret = FWFXsltFunctions.getName(nonamesurname,false); |
|
40 |
assertEquals("",ret ); |
|
41 |
} |
|
42 |
|
|
43 |
@Test |
|
44 |
public void testGetNamesNameTwoSurname() throws Exception { |
|
45 |
String ret = FWFXsltFunctions.getName(twoSurname,true); |
|
46 |
assertEquals("Gerhard",ret ); |
|
47 |
} |
|
48 |
|
|
49 |
@Test |
|
50 |
public void testGetNamesSurnameTwoSurname() throws Exception { |
|
51 |
String ret = FWFXsltFunctions.getName(twoSurname,false); |
|
52 |
assertEquals("PIPPO SOMMER",ret ); |
|
53 |
} |
|
54 |
|
|
55 |
@Test |
|
56 |
public void testGetNamesNameTwoNames() throws Exception { |
|
57 |
String ret = FWFXsltFunctions.getName(twoNames,true); |
|
58 |
assertEquals("Gerhard Pippo",ret ); |
|
59 |
} |
|
60 |
|
|
61 |
@Test |
|
62 |
public void testGetNamesSurnameTwoNames() throws Exception { |
|
63 |
String ret = FWFXsltFunctions.getName(twoNames,false); |
|
64 |
assertEquals("SOMMER",ret ); |
|
65 |
} |
|
66 |
|
|
67 |
/** |
|
68 |
* Method: getProvs(String jsonProvList) |
|
69 |
*/ |
|
70 |
@Test |
|
71 |
public void testGetNamesName() throws Exception { |
|
72 |
String ret = FWFXsltFunctions.getName(namesurname,true); |
|
73 |
assertEquals("Gerhard",ret ); |
|
74 |
} |
|
75 |
|
|
76 |
@Test |
|
77 |
public void testGetNamesSurname() throws Exception { |
|
78 |
String ret = FWFXsltFunctions.getName(namesurname,false); |
|
79 |
assertEquals("SOMMER",ret ); |
|
80 |
} |
|
81 |
|
|
82 |
@Test |
|
83 |
public void testGetNamesNameNoSurname() throws Exception { |
|
84 |
String ret = FWFXsltFunctions.getName(noSurname,true); |
|
85 |
assertEquals("Gerhard",ret ); |
|
86 |
} |
|
87 |
|
|
88 |
@Test |
|
89 |
public void testGetNamesSurnameNoSurname() throws Exception { |
|
90 |
String ret = FWFXsltFunctions.getName(noSurname,false); |
|
91 |
assertEquals("",ret ); |
|
92 |
} |
|
93 |
|
|
94 |
@Test |
|
95 |
public void testGetNamesNameNoName() throws Exception { |
|
96 |
String ret = FWFXsltFunctions.getName(noName,true); |
|
97 |
assertEquals("",ret ); |
|
98 |
} |
|
99 |
|
|
100 |
@Test |
|
101 |
public void testGetNamesSurnameNoName() throws Exception { |
|
102 |
String ret = FWFXsltFunctions.getName(noName,false); |
|
103 |
assertEquals("SOMMER",ret ); |
|
104 |
} |
|
105 |
|
|
106 |
@Test |
|
107 |
public void TestGetMd5()throws Exception{ |
|
108 |
String md5 = FWFXsltFunctions.getMd5(organization); |
|
109 |
System.out.println(md5); |
|
110 |
assertNotNull(md5); |
|
111 |
} |
|
112 |
|
|
113 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/xml/vtd/VtdParserToProtoTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml.vtd; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.util.function.Function; |
|
5 |
|
|
6 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
7 |
import eu.dnetlib.data.transform.xml2.DatasetToProto; |
|
8 |
import eu.dnetlib.data.transform.xml2.PublicationToProto; |
|
9 |
import org.apache.commons.io.IOUtils; |
|
10 |
import org.apache.commons.logging.Log; |
|
11 |
import org.apache.commons.logging.LogFactory; |
|
12 |
import org.junit.Test; |
|
13 |
|
|
14 |
import static org.junit.Assert.assertNotNull; |
|
15 |
|
|
16 |
public class VtdParserToProtoTest { |
|
17 |
|
|
18 |
private static final Log log = LogFactory.getLog(VtdParserToProtoTest.class); |
|
19 |
|
|
20 |
@Test |
|
21 |
public void testParsePublication() throws IOException { |
|
22 |
doTest("/eu/dnetlib/data/transform/publication.xml", new PublicationToProto()); |
|
23 |
} |
|
24 |
|
|
25 |
@Test |
|
26 |
public void testParseDataset() throws IOException { |
|
27 |
doTest("/eu/dnetlib/data/transform/dataset.xml", new DatasetToProto()); |
|
28 |
} |
|
29 |
|
|
30 |
@Test |
|
31 |
public void testParseDataset2() throws IOException { |
|
32 |
doTest("/eu/dnetlib/data/transform/dataset2.xml", new DatasetToProto()); |
|
33 |
} |
|
34 |
|
|
35 |
private void doTest(final String filePath, Function<String, Oaf> f) throws IOException { |
|
36 |
final String xml = IOUtils.toString(getClass().getResourceAsStream(filePath)); |
|
37 |
|
|
38 |
assertNotNull(xml); |
|
39 |
|
|
40 |
final Oaf oaf = f.apply(xml); |
|
41 |
|
|
42 |
assertNotNull(oaf); |
|
43 |
|
|
44 |
log.info(oaf.toString()); |
|
45 |
} |
|
46 |
|
|
47 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/xml/vtd/ConfigurationTestConfig.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml.vtd; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.InputStream; |
|
5 |
import java.util.Properties; |
|
6 |
|
|
7 |
import com.mongodb.MongoClient; |
|
8 |
import com.mongodb.client.MongoDatabase; |
|
9 |
import org.springframework.context.annotation.Bean; |
|
10 |
import org.springframework.context.annotation.Configuration; |
|
11 |
import org.springframework.core.io.ClassPathResource; |
|
12 |
|
|
13 |
@Configuration |
|
14 |
public class ConfigurationTestConfig { |
|
15 |
|
|
16 |
@Bean |
|
17 |
public MongoDatabase db() throws IOException { |
|
18 |
|
|
19 |
final Properties p = new Properties(); |
|
20 |
final ClassPathResource cp = new ClassPathResource("test.properties"); |
|
21 |
try (final InputStream stream = cp.getInputStream()) { |
|
22 |
p.load(stream); |
|
23 |
} |
|
24 |
|
|
25 |
final MongoClient mongo = new MongoClient( |
|
26 |
p.getProperty("mongodb.host"), |
|
27 |
Integer.valueOf(p.getProperty("mongodb.port"))); |
|
28 |
return mongo.getDatabase(p.getProperty("mongodb.dbname")); |
|
29 |
} |
|
30 |
|
|
31 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/xml/vtd/VtdParserToProtoIT.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml.vtd; |
|
2 |
|
|
3 |
import com.mongodb.client.MongoCollection; |
|
4 |
import com.mongodb.client.MongoDatabase; |
|
5 |
import eu.dnetlib.data.transform.xml2.DatasetToProto; |
|
6 |
import org.apache.commons.lang3.time.StopWatch; |
|
7 |
import org.apache.commons.logging.Log; |
|
8 |
import org.apache.commons.logging.LogFactory; |
|
9 |
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics; |
|
10 |
import org.bson.Document; |
|
11 |
import org.junit.Ignore; |
|
12 |
import org.junit.Test; |
|
13 |
import org.junit.runner.RunWith; |
|
14 |
import org.springframework.beans.factory.annotation.Autowired; |
|
15 |
import org.springframework.test.context.ContextConfiguration; |
|
16 |
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; |
|
17 |
|
|
18 |
import java.io.IOException; |
|
19 |
import java.util.Objects; |
|
20 |
import java.util.concurrent.atomic.AtomicInteger; |
|
21 |
import java.util.stream.StreamSupport; |
|
22 |
|
|
23 |
import static org.junit.Assert.assertNotNull; |
|
24 |
import static org.junit.Assert.assertTrue; |
|
25 |
|
|
26 |
@RunWith(SpringJUnit4ClassRunner.class) |
|
27 |
@ContextConfiguration(classes = { ConfigurationTestConfig.class }) |
|
28 |
public class VtdParserToProtoIT { |
|
29 |
|
|
30 |
private static final Log log = LogFactory.getLog(VtdParserToProtoIT.class); |
|
31 |
public static final String COLLECTION_NAME = "datacite"; |
|
32 |
private static final int BATCH_SIZE = 10000; |
|
33 |
public static final int LOG_FREQ = 5000; |
|
34 |
|
|
35 |
@Autowired |
|
36 |
private MongoDatabase db; |
|
37 |
|
|
38 |
@Test |
|
39 |
@Ignore |
|
40 |
public void testParseDatacite() throws IOException { |
|
41 |
|
|
42 |
final MongoCollection<Document> collection = db.getCollection(COLLECTION_NAME); |
|
43 |
|
|
44 |
final long collectionSize = collection.count(); |
|
45 |
log.info(String.format("found %s records in collection '%s'", collectionSize, COLLECTION_NAME)); |
|
46 |
|
|
47 |
final AtomicInteger read = new AtomicInteger(0); |
|
48 |
final DescriptiveStatistics stats = new DescriptiveStatistics(); |
|
49 |
|
|
50 |
final StopWatch timer = new StopWatch(); |
|
51 |
|
|
52 |
final DatasetToProto mapper = new DatasetToProto(); |
|
53 |
StreamSupport.stream(collection.find().batchSize(BATCH_SIZE).spliterator(), false) |
|
54 |
.peek(d -> { |
|
55 |
if (read.addAndGet(1) % LOG_FREQ == 0) { |
|
56 |
log.info(String.format("records read so far %s", read.get())); |
|
57 |
log.info(String.format("stats so far %s", stats.toString())); |
|
58 |
} |
|
59 |
}) |
|
60 |
.map(d -> (String) d.get("body")) |
|
61 |
.filter(Objects::nonNull) |
|
62 |
.peek(s -> timer.start()) |
|
63 |
.map(mapper) |
|
64 |
.forEach(oaf -> { |
|
65 |
assertNotNull(oaf); |
|
66 |
assertTrue(oaf.hasEntity()); |
|
67 |
|
|
68 |
timer.stop(); |
|
69 |
stats.addValue(timer.getTime()); |
|
70 |
timer.reset(); |
|
71 |
}); |
|
72 |
|
|
73 |
log.info(String.format("processed %s/%s records", read.get(), collectionSize)); |
|
74 |
} |
|
75 |
|
|
76 |
|
|
77 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/xml/vtd/VtdUtilityParserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml.vtd; |
|
2 |
|
|
3 |
import java.io.InputStream; |
|
4 |
import java.util.List; |
|
5 |
|
|
6 |
import com.ximpleware.AutoPilot; |
|
7 |
import com.ximpleware.VTDGen; |
|
8 |
import com.ximpleware.VTDNav; |
|
9 |
import eu.dnetlib.data.transform.xml2.Node; |
|
10 |
import eu.dnetlib.data.transform.xml2.VtdUtilityParser; |
|
11 |
import org.apache.commons.io.IOUtils; |
|
12 |
import org.junit.Assert; |
|
13 |
import org.junit.Test; |
|
14 |
|
|
15 |
|
|
16 |
public class VtdUtilityParserTest { |
|
17 |
|
|
18 |
@Test |
|
19 |
public void testUtils1() { |
|
20 |
String xpath = VtdUtilityParser.xpath("a", "b", "c"); |
|
21 |
Assert.assertTrue("/*[local-name()='a']/*[local-name()='b']/*[local-name()='c']".equals(xpath)); |
|
22 |
} |
|
23 |
|
|
24 |
@Test |
|
25 |
public void testPartser() throws Exception { |
|
26 |
final InputStream resource = this.getClass().getResourceAsStream("/eu/dnetlib/data/transform/publication.xml"); |
|
27 |
final String record =IOUtils.toString(resource); |
|
28 |
final VTDGen vg = VtdUtilityParser.parseXml(record); |
|
29 |
final VTDNav vn = vg.getNav(); |
|
30 |
final AutoPilot ap = new AutoPilot(vn); |
|
31 |
|
|
32 |
List<Node> nodes = VtdUtilityParser.getNodes(ap, vn, "//*[local-name()='referenceaa']"); |
|
33 |
|
|
34 |
nodes.forEach(n -> Assert.assertTrue(n.getAttributes().keySet().size()>0)); |
|
35 |
|
|
36 |
System.out.println(VtdUtilityParser.countNodes(ap, vn, "count(//*[local-name()='CobjIdentifier'])")); |
|
37 |
|
|
38 |
|
|
39 |
|
|
40 |
|
|
41 |
} |
|
42 |
|
|
43 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/AuthorMergerTestIT.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import com.google.common.base.Predicates; |
|
4 |
import com.mongodb.client.FindIterable; |
|
5 |
import com.mongodb.client.MongoDatabase; |
|
6 |
import com.mongodb.client.MongoIterable; |
|
7 |
import eu.dnetlib.data.transform.xml.vtd.ConfigurationTestConfig; |
|
8 |
import org.bson.Document; |
|
9 |
import org.junit.Test; |
|
10 |
import org.junit.runner.RunWith; |
|
11 |
import org.springframework.beans.factory.annotation.Autowired; |
|
12 |
import org.springframework.test.context.ContextConfiguration; |
|
13 |
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; |
|
14 |
|
|
15 |
import java.io.IOException; |
|
16 |
import java.util.List; |
|
17 |
|
|
18 |
import static java.util.stream.Collectors.toList; |
|
19 |
|
|
20 |
@RunWith(SpringJUnit4ClassRunner.class) |
|
21 |
@ContextConfiguration(classes = { ConfigurationTestConfig.class }) |
|
22 |
public class AuthorMergerTestIT extends AbstractAuthorMergerTest { |
|
23 |
|
|
24 |
@Autowired |
|
25 |
private MongoDatabase db; |
|
26 |
|
|
27 |
private String collection = "authors"; |
|
28 |
|
|
29 |
@Test |
|
30 |
public void test_mergeAuthorsGroup() throws IOException { |
|
31 |
|
|
32 |
final FindIterable<Document> docs = db.getCollection(collection).find(); |
|
33 |
|
|
34 |
MongoIterable<String> i = docs |
|
35 |
.map(d -> ((List<List<Document>>) d.get("authors")).stream() |
|
36 |
.filter(Predicates.notNull()) |
|
37 |
.map(g -> g.stream() |
|
38 |
.map(Document::toJson) |
|
39 |
.collect(toList())) |
|
40 |
.collect(toList())) |
|
41 |
.map(List::toString); |
|
42 |
|
|
43 |
doTestMergeAuthorGroup(i); |
|
44 |
|
|
45 |
} |
|
46 |
|
|
47 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/xml2/VtdParserToProtoTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml2; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.util.function.Function; |
|
5 |
|
|
6 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
7 |
import org.apache.commons.io.IOUtils; |
|
8 |
import org.apache.commons.logging.Log; |
|
9 |
import org.apache.commons.logging.LogFactory; |
|
10 |
import org.junit.Test; |
|
11 |
|
|
12 |
import static org.junit.Assert.assertNotNull; |
|
13 |
|
|
14 |
public class VtdParserToProtoTest { |
|
15 |
|
|
16 |
private static final Log log = LogFactory.getLog(VtdParserToProtoTest.class); |
|
17 |
|
|
18 |
@Test |
|
19 |
public void testParsePublication() throws IOException { |
|
20 |
doTest("/eu/dnetlib/data/transform/publication.xml", new PublicationToProto()); |
|
21 |
} |
|
22 |
|
|
23 |
@Test |
|
24 |
public void testParseDataset() throws IOException { |
|
25 |
doTest("/eu/dnetlib/data/transform/dataset.xml", new DatasetToProto()); |
|
26 |
} |
|
27 |
|
|
28 |
@Test |
|
29 |
public void testParseDataset2() throws IOException { |
|
30 |
doTest("/eu/dnetlib/data/transform/dataset2.xml", new DatasetToProto()); |
|
31 |
} |
|
32 |
|
|
33 |
private void doTest(final String filePath, Function<String, Oaf> f) throws IOException { |
|
34 |
final String xml = IOUtils.toString(getClass().getResourceAsStream(filePath)); |
|
35 |
|
|
36 |
assertNotNull(xml); |
|
37 |
|
|
38 |
final Oaf oaf = f.apply(xml); |
|
39 |
|
|
40 |
assertNotNull(oaf); |
|
41 |
|
|
42 |
log.info(oaf.toString()); |
|
43 |
} |
|
44 |
|
|
45 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/xml2/VtdUtilityParserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml2; |
|
2 |
|
|
3 |
import java.io.InputStream; |
|
4 |
import java.util.List; |
|
5 |
|
|
6 |
import com.ximpleware.AutoPilot; |
|
7 |
import com.ximpleware.VTDGen; |
|
8 |
import com.ximpleware.VTDNav; |
|
9 |
import eu.dnetlib.data.transform.xml2.Node; |
|
10 |
import eu.dnetlib.data.transform.xml2.VtdUtilityParser; |
|
11 |
import org.apache.commons.io.IOUtils; |
|
12 |
import org.junit.Assert; |
|
13 |
import org.junit.Test; |
|
14 |
|
|
15 |
import static eu.dnetlib.data.transform.xml2.VtdUtilityParser.parseXml; |
|
16 |
|
|
17 |
public class VtdUtilityParserTest { |
|
18 |
|
|
19 |
@Test |
|
20 |
public void testUtils1() { |
|
21 |
String xpath = VtdUtilityParser.xpath("a", "b", "c"); |
|
22 |
Assert.assertTrue("/*[local-name()='a']/*[local-name()='b']/*[local-name()='c']".equals(xpath)); |
|
23 |
} |
|
24 |
|
|
25 |
@Test |
|
26 |
public void testPartser() throws Exception { |
|
27 |
final InputStream resource = this.getClass().getResourceAsStream("/eu/dnetlib/data/transform/publication.xml"); |
|
28 |
final String record =IOUtils.toString(resource); |
|
29 |
final VTDGen vg = parseXml(record); |
|
30 |
final VTDNav vn = vg.getNav(); |
|
31 |
final AutoPilot ap = new AutoPilot(vn); |
|
32 |
|
|
33 |
List<Node> nodes = VtdUtilityParser.getNodes(ap, vn, "//*[local-name()='referenceaa']"); |
|
34 |
|
|
35 |
nodes.forEach(n -> Assert.assertTrue(n.getAttributes().keySet().size()>0)); |
|
36 |
|
|
37 |
System.out.println(VtdUtilityParser.countNodes(ap, vn, "count(//*[local-name()='CobjIdentifier'])")); |
|
38 |
|
|
39 |
|
|
40 |
|
|
41 |
|
|
42 |
} |
|
43 |
|
|
44 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/AuthorMergerTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import org.junit.Test; |
|
4 |
|
|
5 |
import java.io.BufferedReader; |
|
6 |
import java.io.IOException; |
|
7 |
import java.io.InputStreamReader; |
|
8 |
import java.util.Iterator; |
|
9 |
import java.util.zip.GZIPInputStream; |
|
10 |
|
|
11 |
public class AuthorMergerTest extends AbstractAuthorMergerTest { |
|
12 |
|
|
13 |
@Test |
|
14 |
public void test_mergeAuthorsGroup() throws IOException { |
|
15 |
|
|
16 |
try(GZIPInputStream gzip = new GZIPInputStream(getClass().getResourceAsStream("author_1000.json.gz"))) { |
|
17 |
final BufferedReader br = new BufferedReader(new InputStreamReader(gzip)); |
|
18 |
|
|
19 |
doTestMergeAuthorGroup(() -> new Iterator<String>() { |
|
20 |
|
|
21 |
String line = null; |
|
22 |
|
|
23 |
@Override |
|
24 |
public boolean hasNext() { |
|
25 |
line = doNext(); |
|
26 |
return line != null; |
|
27 |
} |
|
28 |
|
|
29 |
@Override |
|
30 |
public String next() { |
|
31 |
return line; |
|
32 |
} |
|
33 |
|
|
34 |
private String doNext() { |
|
35 |
try { |
|
36 |
return br.readLine(); |
|
37 |
} catch (IOException e) { |
|
38 |
throw new RuntimeException(e); |
|
39 |
} |
|
40 |
} |
|
41 |
}); |
|
42 |
} |
|
43 |
} |
|
44 |
|
|
45 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/AbstractAuthorMergerTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import com.google.common.collect.Sets; |
|
5 |
import com.google.gson.Gson; |
|
6 |
import com.google.gson.JsonElement; |
|
7 |
import com.google.gson.internal.StringMap; |
|
8 |
import com.googlecode.protobuf.format.JsonFormat; |
|
9 |
import org.apache.commons.lang3.StringUtils; |
|
10 |
import org.apache.commons.math.stat.descriptive.DescriptiveStatistics; |
|
11 |
import org.junit.Assert; |
|
12 |
|
|
13 |
import java.io.IOException; |
|
14 |
import java.util.*; |
|
15 |
import java.util.concurrent.atomic.AtomicInteger; |
|
16 |
import java.util.stream.Collectors; |
|
17 |
import java.util.stream.StreamSupport; |
|
18 |
|
|
19 |
import static eu.dnetlib.data.proto.FieldTypeProtos.Author; |
|
20 |
import static eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; |
|
21 |
import static java.lang.String.format; |
|
22 |
import static java.util.stream.Collectors.*; |
|
23 |
|
|
24 |
public abstract class AbstractAuthorMergerTest { |
|
25 |
|
|
26 |
protected void doTestMergeAuthorGroup(final Iterable<String> input) throws IOException { |
|
27 |
final DescriptiveStatistics groupSize = new DescriptiveStatistics(); |
|
28 |
final AtomicInteger authorCount = new AtomicInteger(0); |
|
29 |
final AtomicInteger authorCountMerged = new AtomicInteger(0); |
|
30 |
|
|
31 |
final AtomicInteger pidCount = new AtomicInteger(0); |
|
32 |
final AtomicInteger pidCountMerged = new AtomicInteger(0); |
|
33 |
final Map<String, Integer> pidTypes = new HashMap<>(); |
|
34 |
final Map<String, Integer> pidTypesMerged = new HashMap<>(); |
|
35 |
final AtomicInteger orcidCount = new AtomicInteger(0); |
|
36 |
|
|
37 |
final AtomicInteger equalGroupSizeCount = new AtomicInteger(0); |
|
38 |
final AtomicInteger lines = new AtomicInteger(0); |
|
39 |
|
|
40 |
final List<List<Author>> authorGroups = Lists.newArrayList(); |
|
41 |
|
|
42 |
final Set<String> orcidIn = new HashSet<>(); |
|
43 |
final Set<String> orcidOut= new HashSet<>(); |
|
44 |
|
|
45 |
for(String line : input) { |
|
46 |
lines.incrementAndGet(); |
|
47 |
final List<List<StringMap<Object>>> list = new Gson().fromJson(line, List.class); |
|
48 |
final List<List<Author>> groups = new ArrayList<>(); |
|
49 |
groups.addAll(list.stream() |
|
50 |
.filter(Objects::nonNull) |
|
51 |
.map(j -> { |
|
52 |
return j.stream().map(AbstractAuthorMergerTest::fixRank).collect(toList()); |
|
53 |
}) |
|
54 |
.map(AbstractAuthorMergerTest::asAuthors) |
|
55 |
.filter(Objects::nonNull) |
|
56 |
.collect(toList())); |
|
57 |
|
|
58 |
groups.stream().forEach(g -> { |
|
59 |
groupSize.addValue(g.size()); |
|
60 |
g.stream().forEach(a -> { |
|
61 |
pidCount.addAndGet(a.getPidCount()); |
|
62 |
authorCount.incrementAndGet(); |
|
63 |
|
|
64 |
countPids(pidTypes, a); |
|
65 |
orcidIn.addAll(getOrcid(a)); |
|
66 |
}); |
|
67 |
}); |
|
68 |
|
|
69 |
final HashSet<Integer> groupSizes = groups.stream() |
|
70 |
.map(a -> a.size()) |
|
71 |
.collect(toCollection(HashSet::new)); |
|
72 |
|
|
73 |
if (groupSizes.size() == 1) { |
|
74 |
equalGroupSizeCount.incrementAndGet(); |
|
75 |
} |
|
76 |
|
|
77 |
authorGroups.addAll(groups); |
|
78 |
|
|
79 |
final List<Author> merged = AuthorMerger.merge(groups); |
|
80 |
|
|
81 |
if (!groupSizes.isEmpty()) { |
|
82 |
Assert.assertTrue( |
|
83 |
format("the size of the merged group must be the size of one of the input groups. Merged size: %s, Sizes: %s ", |
|
84 |
merged.size(), groupSizes), |
|
85 |
groupSizes.contains(merged.size())); |
|
86 |
} |
|
87 |
|
|
88 |
authorCountMerged.addAndGet(merged.size()); |
|
89 |
merged.forEach(a -> { |
|
90 |
pidCountMerged.addAndGet(a.getPidCount()); |
|
91 |
countPids(pidTypesMerged, a); |
|
92 |
orcidOut.addAll(getOrcid(a)); |
|
93 |
}); |
|
94 |
} |
|
95 |
|
|
96 |
System.out.println(format("lines %s", lines.intValue())); |
|
97 |
System.out.println(format("author groups %s", authorGroups.size())); |
|
98 |
System.out.println(format("author count %s", authorCount.intValue())); |
|
99 |
System.out.println(format("avg authorGroups per group %s", authorCount.doubleValue() / authorGroups.size())); |
|
100 |
|
|
101 |
System.out.println(format("pid types %s", pidTypes)); |
|
102 |
System.out.println(format("pid count %s", pidCount.intValue())); |
|
103 |
System.out.println(format("pid rate among authors %s", pidCount.doubleValue() / authorCount.intValue())); |
|
104 |
|
|
105 |
System.out.println(format("equal group size %s", equalGroupSizeCount.incrementAndGet())); |
|
106 |
|
|
107 |
System.out.println(format("[merged] pid count %s", pidCountMerged.intValue())); |
|
108 |
System.out.println(format("[merged] avg pid per group %s", pidCountMerged.doubleValue() / authorCountMerged.intValue())); |
|
109 |
System.out.println(format("[merged] pid types %s", pidTypesMerged)); |
|
110 |
|
|
111 |
System.out.println(format("ORCIDs in %s", orcidIn.size())); |
|
112 |
System.out.println(format("ORCIDs out %s", orcidOut.size())); |
|
113 |
|
|
114 |
Sets.SetView<String> diff = Sets.difference(orcidIn, orcidOut); |
|
115 |
System.out.println(format("Difference between the number of distinct input ORCIDs and output (merged) ORCIDs: %s", diff.size())); |
|
116 |
|
|
117 |
System.out.println(format("\ngroup size %s", groupSize)); |
|
118 |
} |
|
119 |
|
|
120 |
private List<String> getOrcid(Author a) { |
|
121 |
return a.getPidList().stream() |
|
122 |
.filter(p -> p.getKey().equalsIgnoreCase("orcid")) |
|
123 |
.map(KeyValue::getValue) |
|
124 |
.collect(Collectors.toList()); |
|
125 |
} |
|
126 |
|
|
127 |
private void countPids(Map<String, Integer> pidTypes, Author a) { |
|
128 |
a.getPidList().stream() |
|
129 |
.collect(groupingBy(KeyValue::getKey, summingInt(e -> 1))) |
|
130 |
.forEach((k, v) -> pidTypes.merge(k, v, (v1, v2) -> v1 + v2)); |
|
131 |
} |
|
132 |
|
|
133 |
private static StringMap<Object> fixRank(final StringMap<Object> j) { |
|
134 |
final StringMap<Object> m = new StringMap<>(); |
|
135 |
|
|
136 |
m.putAll(j); |
|
137 |
|
|
138 |
Object rank = j.get("rank"); |
|
139 |
rank = StringUtils.substringBefore(rank.toString(), ".0"); |
|
140 |
m.put("rank", Integer.parseInt(rank.toString())); |
|
141 |
return m; |
|
142 |
} |
|
143 |
|
|
144 |
private static List<Author> asAuthors(Object o) { |
|
145 |
|
|
146 |
final JsonElement json = new Gson().toJsonTree(o); |
|
147 |
|
|
148 |
return StreamSupport.stream(json.getAsJsonArray().spliterator(), false) |
|
149 |
.map(j -> j.toString()) |
|
150 |
.filter(Objects::nonNull) |
|
151 |
.map(AbstractAuthorMergerTest::asAuthor) |
|
152 |
.filter(Objects::nonNull) |
|
153 |
.collect(toList()); |
|
154 |
} |
|
155 |
|
|
156 |
private static Author asAuthor(String json) { |
|
157 |
|
|
158 |
final Author.Builder a = Author.newBuilder(); |
|
159 |
try { |
|
160 |
JsonFormat.merge(json, a); |
|
161 |
} catch (JsonFormat.ParseException e) { |
|
162 |
throw new IllegalArgumentException(e); |
|
163 |
} |
|
164 |
return a.build(); |
|
165 |
} |
|
166 |
|
|
167 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.3.37/src/test/java/eu/dnetlib/data/transform/SolrProtoMapperTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertFalse; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
6 |
import java.io.IOException; |
|
7 |
import java.io.StringWriter; |
|
8 |
|
|
9 |
import org.apache.commons.codec.binary.Base64; |
|
10 |
import org.apache.commons.io.IOUtils; |
|
11 |
import org.apache.commons.logging.Log; |
|
12 |
import org.apache.commons.logging.LogFactory; |
|
13 |
import org.apache.solr.common.SolrInputDocument; |
|
14 |
import org.apache.solr.common.SolrInputField; |
|
15 |
import org.dom4j.DocumentException; |
|
16 |
import org.junit.Before; |
|
17 |
import org.junit.Test; |
|
18 |
|
|
19 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
20 |
import com.googlecode.protobuf.format.JsonFormat; |
|
21 |
|
|
22 |
import eu.dnetlib.data.mapreduce.util.OafTest; |
|
23 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
24 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
25 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
26 |
import eu.dnetlib.functionality.index.solr.feed.InputDocumentFactory; |
|
27 |
|
|
28 |
public class SolrProtoMapperTest { |
|
29 |
|
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-openaireplus-mapping-utils-6.3.37