Revision 56498
Added by Miriam Baglioni over 4 years ago
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/install.sh | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
VERSION=`xmlstarlet sel -t -m "/_:project/_:version" -v "." pom.xml` |
|
4 |
echo "using version: $VERSION" |
|
5 |
|
|
6 |
mvn clean install; |
|
7 |
rm -rf ~/.m2/repository/eu/dnetlib/dnet-mapreduce-jobs-assembly; |
|
8 |
mvn assembly:assembly -DskipTests=true && mvn install:install-file -Dfile=target/dnet-mapreduce-jobs-$VERSION-jar-with-dependencies.jar -DgroupId=eu.dnetlib -DartifactId=dnet-mapreduce-jobs-assembly -Dversion=$VERSION -Dpackaging=jar |
|
0 | 9 |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/deploy.info | ||
---|---|---|
1 |
{"type_source": "SVN", "goal": "package -U source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-mapreduce-jobs/trunk", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-mapreduce-jobs"} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/actions/OrcidToActionsTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.actions; |
|
2 |
|
|
3 |
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getStringValue; |
|
4 |
import static org.junit.Assert.assertEquals; |
|
5 |
import static org.junit.Assert.assertNotNull; |
|
6 |
import static org.junit.Assert.assertTrue; |
|
7 |
|
|
8 |
import java.io.BufferedReader; |
|
9 |
import java.io.IOException; |
|
10 |
import java.io.InputStream; |
|
11 |
import java.io.InputStreamReader; |
|
12 |
import java.util.List; |
|
13 |
|
|
14 |
import org.apache.commons.lang3.StringUtils; |
|
15 |
import org.junit.Before; |
|
16 |
import org.junit.Test; |
|
17 |
|
|
18 |
import com.google.gson.JsonArray; |
|
19 |
import com.google.gson.JsonObject; |
|
20 |
import com.google.gson.JsonParser; |
|
21 |
import com.googlecode.protobuf.format.JsonFormat; |
|
22 |
import com.googlecode.protobuf.format.JsonFormat.ParseException; |
|
23 |
|
|
24 |
import eu.dnetlib.actionmanager.actions.ActionFactory; |
|
25 |
import eu.dnetlib.actionmanager.actions.AtomicAction; |
|
26 |
import eu.dnetlib.actionmanager.common.Agent; |
|
27 |
import eu.dnetlib.data.mapreduce.hbase.Reporter; |
|
28 |
import eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility; |
|
29 |
import eu.dnetlib.data.mapreduce.hbase.dataimport.OrcidToActions; |
|
30 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
31 |
|
|
32 |
public class OrcidToActionsTest { |
|
33 |
|
|
34 |
private String setName; |
|
35 |
private Agent agent; |
|
36 |
private Reporter reporter; |
|
37 |
|
|
38 |
private final String generatedJson = |
|
39 |
"{\"kind\": \"entity\",\"entity\": {\"type\": \"result\",\"result\": {\"metadata\": {\"title\": [{\"value\": \"Factors influencing accuracy of referral and the likelihood of false positive referral by optometrists in Bradford, United Kingdom.\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2015-11-01\",\"qualifier\": {\"classid\": \"issued\",\"classname\": \"issued\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"2015-11-01\"},\"resulttype\": {\"classid\": \"publication\",\"classname\": \"publication\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"resourcetype\": {\"classid\": \"journal-article\",\"classname\": \"journal-article\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"source\": [{\"value\": \"Europe PubMed Central\"}],\"author\": [{\"fullname\": \"Christopher Davey\",\"name\": \"Christopher\",\"surname\": \"Davey\",\"rank\": 1,\"pid\": [{\"key\": \"ORCID\",\"value\": \"0000-0002-9050-379X\"}]},{\"fullname\": \"Scally AJ\",\"rank\": 2},{\"fullname\": \"Green C\",\"rank\": 3},{\"fullname\": \"Mitchell ES\",\"rank\": 4},{\"fullname\": \"Elliott DB\",\"rank\": 5}]},\"instance\": [{\"accessright\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0001\",\"classname\": \"Article\",\"schemeid\": \"dnet:publication_resource\",\"schemename\": \"dnet:publication_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"http://europepmc.org/abstract/med/26614021\"],\"collectedfrom\": {\"key\": \"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\": \"ORCID\"},\"dateofacceptance\": {\"value\": \"2015-11-01\"}}]},\"originalId\": [\"24099977\"],\"collectedfrom\": [{\"key\": \"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\": \"ORCID\"}],\"pid\": [{\"value\": \"26614021\",\"qualifier\": {\"classid\": \"pmid\",\"classname\": \"pmid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2018-10-22\",\"id\": \"50|orcid____::57b8b813748049402c0900d3d17f1c23\",\"dateoftransformation\": \"2019-06-21T11:16:10+02:00\"},\"dataInfo\": {\"inferred\": false,\"deletedbyinference\": false,\"trust\": \"0.9\",\"provenanceaction\": {\"classid\": \"sysimport:actionset:orcidworks-no-doi\",\"classname\": \"sysimport:actionset:orcidworks-no-doi\",\"schemeid\": \"dnet:provenanceActions\",\"schemename\": \"dnet:provenanceActions\"}},\"lastupdatetimestamp\": 1561108570991}\n" |
|
40 |
+ |
|
41 |
""; |
|
42 |
|
|
43 |
@Before |
|
44 |
public void setup() { |
|
45 |
setName = "ORCID"; |
|
46 |
agent = new Agent("agentId", "agentName", Agent.AGENT_TYPE.service); |
|
47 |
reporter = |
|
48 |
(Reporter) (counterGroup, counterName, delta) -> System.out.println(String.format("COUNTER: %s - %s : %d", counterGroup, counterName, delta)); |
|
49 |
} |
|
50 |
|
|
51 |
@Test |
|
52 |
public void testUnicodeAction() throws IOException { |
|
53 |
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_1.json"); |
|
54 |
|
|
55 |
} |
|
56 |
|
|
57 |
@Test |
|
58 |
public void testSourceWorkIdAction() throws IOException { |
|
59 |
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_2.json"); |
|
60 |
|
|
61 |
} |
|
62 |
|
|
63 |
@Test |
|
64 |
public void testAuthorsRank() throws IOException { |
|
65 |
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_3.json"); |
|
66 |
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_4.json"); |
|
67 |
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_5.json"); |
|
68 |
|
|
69 |
} |
|
70 |
|
|
71 |
@Test |
|
72 |
public void testUrl() throws IOException { |
|
73 |
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_6.json"); |
|
74 |
|
|
75 |
} |
|
76 |
|
|
77 |
@Test |
|
78 |
public void testNullUrlAction() throws IOException { |
|
79 |
doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_7.json"); |
|
80 |
|
|
81 |
} |
|
82 |
|
|
83 |
@Test |
|
84 |
public void testMassiveOrcidAction() throws IOException { |
|
85 |
doTestAllOrcidAction("/eu/dnetlib/data/mapreduce/actions/part-100"); |
|
86 |
} |
|
87 |
|
|
88 |
@Test |
|
89 |
public void testJsonToProto() throws ParseException { |
|
90 |
final Oaf.Builder builder = Oaf.newBuilder(); |
|
91 |
JsonFormat.merge(generatedJson, builder); |
|
92 |
System.out.println(builder.build()); |
|
93 |
} |
|
94 |
|
|
95 |
private void doTestSingleAction(final String filePath) throws IOException { |
|
96 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
97 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
98 |
|
|
99 |
final String line = in.readLine(); |
|
100 |
|
|
101 |
final JsonParser parser = new JsonParser(); |
|
102 |
final JsonObject root = parser.parse(line).getAsJsonObject(); |
|
103 |
final List<AtomicAction> lista = OrcidToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, reporter); |
|
104 |
// System.out.println(new Gson().toJson(lista)); |
|
105 |
} |
|
106 |
|
|
107 |
private void doTestAllOrcidAction(final String filePath) throws IOException { |
|
108 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
109 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
110 |
String line = in.readLine(); |
|
111 |
int i = 0; |
|
112 |
while (StringUtils.isNotBlank(line)) { |
|
113 |
final JsonParser parser = new JsonParser(); |
|
114 |
final JsonObject root = parser.parse(line).getAsJsonObject(); |
|
115 |
try { |
|
116 |
OrcidToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, reporter); |
|
117 |
} catch (final Throwable e) { |
|
118 |
System.out.println("Exception parsing: " + line); |
|
119 |
throw new RuntimeException(e); |
|
120 |
} |
|
121 |
line = in.readLine(); |
|
122 |
i += 1; |
|
123 |
} |
|
124 |
} |
|
125 |
|
|
126 |
@Test |
|
127 |
public void testGeneratedJson() throws ParseException { |
|
128 |
final JsonParser parser = new JsonParser(); |
|
129 |
final JsonObject root = parser.parse(generatedJson).getAsJsonObject(); |
|
130 |
assertEquals(getStringValue(root, "kind"), "entity"); |
|
131 |
final JsonObject entity = root.getAsJsonObject("entity"); |
|
132 |
final List<JsonObject> pids = DumpToActionsUtility.getArrayObjects(entity, "pid"); |
|
133 |
pids.forEach(pid -> { |
|
134 |
final JsonObject qualifier = pid.getAsJsonObject("qualifier"); |
|
135 |
assertTrue(qualifier.get("classid").getAsString().matches("(ark|arxiv|pmc|pmid|source-work-id|urn)")); |
|
136 |
}); |
|
137 |
assertEquals(getStringValue(entity, "type"), "result"); |
|
138 |
assertNotNull(getStringValue(entity, "id")); |
|
139 |
assertNotNull(getStringValue(entity, "dateofcollection")); |
|
140 |
assertTrue(DumpToActionsUtility.isValidDate(getStringValue(entity, "dateofcollection"))); |
|
141 |
final JsonObject metadata = entity.getAsJsonObject("result").getAsJsonObject("metadata"); |
|
142 |
assertNotNull(metadata.getAsJsonArray("title")); |
|
143 |
assertNotNull(metadata.getAsJsonArray("relevantdate")); |
|
144 |
assertNotNull(metadata.getAsJsonObject("dateofacceptance")); |
|
145 |
assertTrue(DumpToActionsUtility.isValidDate(metadata.getAsJsonObject("dateofacceptance").get("value").getAsString())); |
|
146 |
assertNotNull(metadata.getAsJsonObject("resulttype")); |
|
147 |
assertNotNull(metadata.getAsJsonArray("author")); |
|
148 |
final JsonArray instance = entity.getAsJsonObject("result").getAsJsonArray("instance"); |
|
149 |
instance.forEach(i -> { |
|
150 |
assertNotNull(i.getAsJsonObject().getAsJsonObject("accessright")); |
|
151 |
assertNotNull(i.getAsJsonObject().getAsJsonObject("hostedby")); |
|
152 |
assertNotNull(i.getAsJsonObject().getAsJsonObject("collectedfrom")); |
|
153 |
assertNotNull(i.getAsJsonObject().getAsJsonObject("instancetype")); |
|
154 |
}); |
|
155 |
|
|
156 |
} |
|
157 |
} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/actions/DOIBoostToActionsTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.actions; |
|
2 |
|
|
3 |
import java.io.*; |
|
4 |
import java.util.List; |
|
5 |
import java.util.zip.DataFormatException; |
|
6 |
import java.util.zip.Inflater; |
|
7 |
|
|
8 |
import com.google.common.collect.Lists; |
|
9 |
import com.google.gson.JsonObject; |
|
10 |
import com.google.gson.JsonParser; |
|
11 |
import com.google.protobuf.InvalidProtocolBufferException; |
|
12 |
import eu.dnetlib.actionmanager.actions.ActionFactory; |
|
13 |
import eu.dnetlib.actionmanager.actions.AtomicAction; |
|
14 |
import eu.dnetlib.actionmanager.common.Agent; |
|
15 |
import eu.dnetlib.data.mapreduce.hbase.Reporter; |
|
16 |
import eu.dnetlib.data.mapreduce.hbase.dataimport.DOIBoostToActions; |
|
17 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
18 |
import eu.dnetlib.data.transform.Column; |
|
19 |
import eu.dnetlib.data.transform.Row; |
|
20 |
import org.apache.commons.codec.binary.Base64; |
|
21 |
import org.apache.commons.lang3.StringUtils; |
|
22 |
import org.junit.Before; |
|
23 |
import org.junit.Test; |
|
24 |
|
|
25 |
public class DOIBoostToActionsTest { |
|
26 |
private String setName; |
|
27 |
private Agent agent; |
|
28 |
private Reporter reporter; |
|
29 |
|
|
30 |
|
|
31 |
@Before |
|
32 |
public void setup() { |
|
33 |
setName = "DLI"; |
|
34 |
agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service); |
|
35 |
reporter = (Reporter) (counterGroup, counterName, delta) -> System.out.println(String.format("COUNTER: %s - %s : %d", counterGroup, counterName, delta)); |
|
36 |
} |
|
37 |
|
|
38 |
@Test |
|
39 |
public void testSingleDOIBoostAction() throws IOException { |
|
40 |
doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction.json"); |
|
41 |
} |
|
42 |
|
|
43 |
@Test |
|
44 |
public void testSingleDOIBoostActionFilter() throws IOException { |
|
45 |
doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction_filterOut.json"); |
|
46 |
} |
|
47 |
|
|
48 |
@Test |
|
49 |
public void testMultipleDOIBoostActionDiscardMany() throws IOException { |
|
50 |
doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/doiboost_discard_many.json"); |
|
51 |
} |
|
52 |
|
|
53 |
|
|
54 |
@Test |
|
55 |
public void testDOIBoostActionToXML() throws Exception { |
|
56 |
doTestSingleDOIBoostActionToXML("/eu/dnetlib/data/mapreduce/actions/broken"); |
|
57 |
} |
|
58 |
|
|
59 |
|
|
60 |
|
|
61 |
@Test |
|
62 |
public void testMultipleDOIBoostAction() throws IOException { |
|
63 |
doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/part-00070"); |
|
64 |
} |
|
65 |
|
|
66 |
|
|
67 |
|
|
68 |
@Test |
|
69 |
public void testDecompression() throws DataFormatException { |
|
70 |
final String s ="eJxtVM1u20YQfpW5pHAAWXCCIECORazKQYAEcHrpcbUckgvs7jA7S8vsye/QU4H25fwk/WYpKUbQ\n" + |
|
71 |
"mygOZ775fuaOXawjeVeY+HHi3IU6F1Ya3QNTCcqZvGQNHRd3iAuFTHVkmpxWen76i1gn9sHF9dWe\n" + |
|
72 |
"S3J5sZJlbeFiYdctaO6ZO+7ozc0rkr414VwD5g5FVKmTxFqDp6lIN/tKV193H2/p7c3N+9db+h3l\n" + |
|
73 |
"yWECuqlk6qWgRcBn/MBRpoRehEdHXXHJWZ+OkwzFTSN++9Hlga21Z9Ut7bKUJLNS4i54F+3NgLXR\n" + |
|
74 |
"IHe2cA15tveV/ZglytCqQkLdA9sw3VCVgbFGoWMAhY4m0VADdlbxQa4ZbSS17/D/DEySN2RERzAS\n" + |
|
75 |
"8oAGGEdusN+TTHM81RwBebRtOtDjK6gFtDlWqwMFzhg72kOe0wHzQSdHCITCBn8skm2uaRKxG9oa\n" + |
|
76 |
"4pVFU5O/z5w9r4R5wAGZkB7/LQYqCsgqNIpOoboY/my4Vm7Oo71oVZpQViFJhbDeKW/pTo5QpGxI\n" + |
|
77 |
"5tKIYAzXtnaWSvAQt71jlOOq4PPTP9bM7BfBoOTnp3+t5KQCen7KWkHZ5n83HyBIySYJiHa1cprq\n" + |
|
78 |
"Os8aBh+Mv178rPadGH1J7G3f46XZZq7hvOPFmY9BG91jy0eLBzTATvDPln6D+/jRpSny5qXtz4wp\n" + |
|
79 |
"HRlf1AKdrW2TonBIh7koqDoslHmQGhpxh7mDkVZ6e7ZAlevOLRvkw2Pi6g9HuoCF1dpwoWl0gfyz\n" + |
|
80 |
"Uk3pwgjJiu/Nhw/vV7K9s8L23dqPjk5Jx9AbkipHVzrIWvi64z7kk6wNWgtPh+PQQG7pWzALGVsH\n" + |
|
81 |
"BsTc6O0tsO8otDCu+cbqB252Ly5rz6Wga8hmgjMGK7qoc17mB2Et3AdnzKHiNrghwyhK9xwbgfsi\n" + |
|
82 |
"86R0dXu/19cowcSru92Xzzv6hb59vLvf/fF1v2/AcEk+GdMhGT5AuMx4eZBCvr6AOIVndXkDUhGv\n" + |
|
83 |
"ebDA9+GRm2S4Ne10YgWVaBvLeibVshDqcvbVqdvz098WbuUWmV/b2WpRPwX9pe3s4tj1XT1dBc6g\n" + |
|
84 |
"NOM+NBcnmOjBrdqdjNxWCeWHXwkKX9xuN0LtVGmwKJqwBgxWSVz+Aw5ePDg="; |
|
85 |
byte[] byteArray = Base64.decodeBase64(s.getBytes()); |
|
86 |
Inflater decompresser = new Inflater(); |
|
87 |
decompresser.setInput(byteArray); |
|
88 |
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length); |
|
89 |
byte[] buffer = new byte[8192]; |
|
90 |
while (!decompresser.finished()) { |
|
91 |
int size = decompresser.inflate(buffer); |
|
92 |
bos.write(buffer, 0, size); |
|
93 |
} |
|
94 |
byte[] unzippeddata = bos.toByteArray(); |
|
95 |
decompresser.end(); |
|
96 |
|
|
97 |
System.out.println(new String(unzippeddata)); |
|
98 |
|
|
99 |
} |
|
100 |
|
|
101 |
|
|
102 |
private void doTestSingleDOIBoostActionToXML(final String filePath) throws Exception { |
|
103 |
final List<Row> rows = Lists.newArrayList(); |
|
104 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
105 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
106 |
|
|
107 |
String line = in.readLine(); |
|
108 |
|
|
109 |
final JsonParser parser = new JsonParser(); |
|
110 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
111 |
List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false,reporter); |
|
112 |
|
|
113 |
if (actions!= null) { |
|
114 |
actions.forEach(action-> { |
|
115 |
if (action.getTargetColumn().equals("body") && action.getTargetColumnFamily().equals("result")) |
|
116 |
{ |
|
117 |
Column<String, byte[]> col = new Column<>("body" , action.getTargetValue()); |
|
118 |
rows.add(new Row("result",action.getTargetRowKey() , Lists.newArrayList(col))); |
|
119 |
} |
|
120 |
|
|
121 |
}); |
|
122 |
|
|
123 |
|
|
124 |
|
|
125 |
} |
|
126 |
|
|
127 |
|
|
128 |
|
|
129 |
} |
|
130 |
|
|
131 |
private void doTestSingleDOIBoostAction(final String filePath) throws IOException { |
|
132 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
133 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
134 |
|
|
135 |
String line = in.readLine(); |
|
136 |
|
|
137 |
final JsonParser parser = new JsonParser(); |
|
138 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
139 |
List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false, reporter); |
|
140 |
if (actions!= null) { |
|
141 |
actions.forEach(it -> { |
|
142 |
try { |
|
143 |
System.out.println( |
|
144 |
String.format(" RowKey:%s TargetColumnFamily:%s TargetColumn: %s\n value:\n%s", it.getTargetRowKey(), it.getTargetColumnFamily(), |
|
145 |
it.getTargetColumn(), |
|
146 |
Oaf.parseFrom(it.getTargetValue()))); |
|
147 |
} catch (InvalidProtocolBufferException e) { |
|
148 |
e.printStackTrace(); |
|
149 |
} |
|
150 |
}); |
|
151 |
} |
|
152 |
} |
|
153 |
|
|
154 |
private void doTestAllDOIBoostAction(final String filePath) throws IOException { |
|
155 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
156 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
157 |
|
|
158 |
String line = in.readLine(); |
|
159 |
int i = 0; |
|
160 |
int cnt = 0; |
|
161 |
while(StringUtils.isNotBlank(line)) { |
|
162 |
cnt ++; |
|
163 |
|
|
164 |
final JsonParser parser = new JsonParser(); |
|
165 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
166 |
try { |
|
167 |
List<AtomicAction> atomicActions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false, reporter); |
|
168 |
if (atomicActions!= null) |
|
169 |
{ |
|
170 |
i ++; |
|
171 |
} |
|
172 |
// atomicActions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn()))); |
|
173 |
else{ |
|
174 |
System.out.println("SKIPPED Type "+ root.get("type").getAsString()); |
|
175 |
} |
|
176 |
|
|
177 |
} catch (Throwable e) { |
|
178 |
System.out.println(line); |
|
179 |
throw new RuntimeException(e); |
|
180 |
} |
|
181 |
line= in.readLine(); |
|
182 |
} |
|
183 |
|
|
184 |
System.out.println("total "+i+" / "+cnt); |
|
185 |
} |
|
186 |
|
|
187 |
|
|
188 |
|
|
189 |
} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/actions/CrossRefToActionTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.actions; |
|
2 |
|
|
3 |
import com.google.gson.JsonObject; |
|
4 |
import com.google.gson.JsonParser; |
|
5 |
import eu.dnetlib.actionmanager.actions.ActionFactory; |
|
6 |
import eu.dnetlib.actionmanager.actions.AtomicAction; |
|
7 |
import eu.dnetlib.actionmanager.common.Agent; |
|
8 |
import eu.dnetlib.data.mapreduce.hbase.dataimport.CrossRefToActions; |
|
9 |
import org.apache.commons.lang3.StringUtils; |
|
10 |
import org.junit.Before; |
|
11 |
import org.junit.Ignore; |
|
12 |
import org.junit.Test; |
|
13 |
|
|
14 |
import java.io.BufferedReader; |
|
15 |
import java.io.IOException; |
|
16 |
import java.io.InputStream; |
|
17 |
import java.io.InputStreamReader; |
|
18 |
|
|
19 |
public class CrossRefToActionTest { |
|
20 |
|
|
21 |
private String setName; |
|
22 |
private Agent agent; |
|
23 |
|
|
24 |
@Before |
|
25 |
public void setup() { |
|
26 |
setName = "DLI"; |
|
27 |
agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service); |
|
28 |
} |
|
29 |
|
|
30 |
|
|
31 |
@Ignore |
|
32 |
@Test |
|
33 |
public void testSingleScholixAction2() throws IOException { |
|
34 |
doTestSingleCrossRefAction("/eu/dnetlib/data/mapreduce/actions/broken"); |
|
35 |
} |
|
36 |
|
|
37 |
@Test |
|
38 |
public void testAllScholixAction() throws IOException { |
|
39 |
doTestAllCrossRefAction("/eu/dnetlib/data/mapreduce/actions/part-06036"); |
|
40 |
} |
|
41 |
|
|
42 |
|
|
43 |
private void doTestSingleCrossRefAction(final String filePath) throws IOException { |
|
44 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
45 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
46 |
|
|
47 |
String line = in.readLine(); |
|
48 |
System.out.println(line); |
|
49 |
final JsonParser parser = new JsonParser(); |
|
50 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
51 |
AtomicAction actions = CrossRefToActions.generateActionsFromDump(root, new ActionFactory(), setName, agent, false); |
|
52 |
System.out.println(actions.toJSON()); |
|
53 |
} |
|
54 |
|
|
55 |
|
|
56 |
|
|
57 |
|
|
58 |
private void doTestAllCrossRefAction(final String filePath) throws IOException { |
|
59 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
60 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
61 |
|
|
62 |
String line = in.readLine(); |
|
63 |
while(StringUtils.isNotBlank(line)) { |
|
64 |
|
|
65 |
final JsonParser parser = new JsonParser(); |
|
66 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
67 |
try { |
|
68 |
CrossRefToActions.generateActionsFromDump(root, new ActionFactory(), setName, agent, false); |
|
69 |
} catch (Throwable e) { |
|
70 |
System.out.println(line); |
|
71 |
throw new RuntimeException(e); |
|
72 |
} |
|
73 |
line= in.readLine(); |
|
74 |
} |
|
75 |
} |
|
76 |
|
|
77 |
|
|
78 |
|
|
79 |
} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/actions/ScholexplorerActionMapperTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.actions; |
|
2 |
|
|
3 |
import java.io.BufferedReader; |
|
4 |
import java.io.IOException; |
|
5 |
import java.io.InputStream; |
|
6 |
import java.io.InputStreamReader; |
|
7 |
import java.util.HashMap; |
|
8 |
import java.util.List; |
|
9 |
import java.util.Map; |
|
10 |
|
|
11 |
import com.google.gson.JsonObject; |
|
12 |
import com.google.gson.JsonParser; |
|
13 |
import eu.dnetlib.actionmanager.actions.ActionFactory; |
|
14 |
import eu.dnetlib.actionmanager.actions.AtomicAction; |
|
15 |
import eu.dnetlib.actionmanager.common.Agent; |
|
16 |
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholExplorerConfiguration; |
|
17 |
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholixToActions; |
|
18 |
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions; |
|
19 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
|
20 |
import org.apache.commons.lang3.StringUtils; |
|
21 |
import org.junit.Assert; |
|
22 |
import org.junit.Before; |
|
23 |
import org.junit.Test; |
|
24 |
|
|
25 |
import static org.junit.Assert.assertEquals; |
|
26 |
|
|
27 |
public class ScholexplorerActionMapperTest { |
|
28 |
|
|
29 |
private Map<String, ScholExplorerConfiguration> configurationMap; |
|
30 |
private String setName; |
|
31 |
private Agent agent; |
|
32 |
private String nsPrefix; |
|
33 |
private String dsName; |
|
34 |
private String dsId; |
|
35 |
|
|
36 |
|
|
37 |
|
|
38 |
@Before |
|
39 |
public void initializeCofiguration(){ |
|
40 |
configurationMap = new HashMap<>(); |
|
41 |
configurationMap.put("issn", new ScholExplorerConfiguration(null, false)); |
|
42 |
configurationMap.put("openaire", new ScholExplorerConfiguration(null, false)); |
|
43 |
configurationMap.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s")); |
|
44 |
configurationMap.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s")); |
|
45 |
configurationMap.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s")); |
|
46 |
configurationMap.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s")); |
|
47 |
configurationMap.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s")); |
|
48 |
configurationMap.put("icpsr", new ScholExplorerConfiguration(null, false)); |
|
49 |
configurationMap.put("dnet", new ScholExplorerConfiguration(null, false)); |
|
50 |
configurationMap.put("url", new ScholExplorerConfiguration(null, true,"%s")); |
|
51 |
|
|
52 |
setName = "DLI"; |
|
53 |
agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service); |
|
54 |
nsPrefix = "scholexplore"; |
|
55 |
dsName = "ScholExplorer"; |
|
56 |
dsId = "scholexplorer"; |
|
57 |
} |
|
58 |
|
|
59 |
@Test |
|
60 |
public void testSubString () { |
|
61 |
final String dnetId ="50|dli_resolver::7b7b9a57a40818d10cf2532d71f012fa"; |
|
62 |
assertEquals("7b7b9a57a40818d10cf2532d71f012fa", dnetId.substring(17)); |
|
63 |
|
|
64 |
System.out.println(AbstractDNetXsltFunctions.md5("SNSF - Swiss National Science Foundation")); |
|
65 |
} |
|
66 |
|
|
67 |
@Test |
|
68 |
public void testSingleScholixAction() throws IOException { |
|
69 |
doTestSingleScholixAction("/eu/dnetlib/data/mapreduce/actions/part-00000"); |
|
70 |
} |
|
71 |
|
|
72 |
@Test |
|
73 |
public void testSingleScholixAction2() throws IOException { |
|
74 |
doTestSingleScholixAction("/eu/dnetlib/data/mapreduce/actions/scholix.json"); |
|
75 |
} |
|
76 |
|
|
77 |
private void doTestSingleScholixAction(final String filePath) throws IOException { |
|
78 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
79 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
80 |
// in.readLine(); |
|
81 |
// in.readLine(); |
|
82 |
String line = in.readLine(); |
|
83 |
System.out.println(line); |
|
84 |
final JsonParser parser = new JsonParser(); |
|
85 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
86 |
List<AtomicAction> actions = ScholixToActions |
|
87 |
.generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601()); |
|
88 |
actions.forEach(it-> System.out.println(String.format("%s cf:%s qualifier:%s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn()))); |
|
89 |
|
|
90 |
System.out.println(actions.get(0).toJSON()); |
|
91 |
} |
|
92 |
|
|
93 |
@Test |
|
94 |
public void testScholixAction() throws IOException { |
|
95 |
|
|
96 |
doTestMultipleScholixActions("/eu/dnetlib/data/mapreduce/actions/part-00000"); |
|
97 |
} |
|
98 |
|
|
99 |
@Test |
|
100 |
public void testScholixAction2() throws IOException { |
|
101 |
|
|
102 |
doTestMultipleScholixActions("/eu/dnetlib/data/mapreduce/actions/scholix.json"); |
|
103 |
} |
|
104 |
|
|
105 |
private void doTestMultipleScholixActions(final String filePath) throws IOException { |
|
106 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
107 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
108 |
|
|
109 |
String line = in.readLine(); |
|
110 |
while (StringUtils.isNotEmpty(line)){ |
|
111 |
final JsonParser parser = new JsonParser(); |
|
112 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
113 |
try { |
|
114 |
List<AtomicAction> actions = ScholixToActions |
|
115 |
.generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601()); |
|
116 |
Assert.assertNotNull(actions); |
|
117 |
Assert.assertTrue(actions.size() > 0); |
|
118 |
} catch (Throwable e) { |
|
119 |
System.out.println(line); |
|
120 |
throw (new RuntimeException(e)); |
|
121 |
} |
|
122 |
line = in.readLine(); |
|
123 |
} |
|
124 |
} |
|
125 |
} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/dedup/OafMergeTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.dedup; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.UUID; |
|
5 |
|
|
6 |
import com.google.common.base.Predicate; |
|
7 |
import com.google.common.collect.Iterables; |
|
8 |
import com.google.common.collect.Lists; |
|
9 |
import eu.dnetlib.data.mapreduce.util.OafTest; |
|
10 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
11 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
12 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
13 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
14 |
import eu.dnetlib.data.proto.OafProtos.OafEntity.Builder; |
|
15 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
16 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
|
17 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
18 |
import eu.dnetlib.data.transform.OafEntityMerger; |
|
19 |
import org.junit.Before; |
|
20 |
import org.junit.Test; |
|
21 |
|
|
22 |
public class OafMergeTest { |
|
23 |
|
|
24 |
private List<Oaf> oafList; |
|
25 |
|
|
26 |
private OafEntityMerger merger; |
|
27 |
|
|
28 |
@Before |
|
29 |
public void setUp() throws Exception { |
|
30 |
|
|
31 |
merger = new OafEntityMerger(); |
|
32 |
oafList = Lists.newArrayList(); |
|
33 |
oafList.add(getOaf("0.1").setEntity( |
|
34 |
getEntity("id_1", null, "pid_1").setResult( |
|
35 |
Result.newBuilder().setMetadata( |
|
36 |
Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf("2012-01-01")) |
|
37 |
.addTitle(OafTest.getStructuredproperty("vvvv Title", "main title", "dnet:dataCite_title")) |
|
38 |
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build()); |
|
39 |
oafList.add(getOaf("0.1").setEntity( |
|
40 |
getEntity("id_2", "originalId_2", "pid_2").setResult( |
|
41 |
Result.newBuilder().setMetadata( |
|
42 |
Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf("")) |
|
43 |
.addTitle(OafTest.getStructuredproperty("aaaa Title", "main title", "dnet:dataCite_title")) |
|
44 |
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build()); |
|
45 |
oafList.add(getOaf("0.2").setEntity( |
|
46 |
getEntity("id_3", "originalId_2", "pid_2").setResult( |
|
47 |
Result.newBuilder().setMetadata( |
|
48 |
Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("cccc Title", "sub title", "dnet:dataCite_title")) |
|
49 |
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build()); |
|
50 |
|
|
51 |
oafList.add(getOaf("0.3").setEntity( |
|
52 |
getEntity("id_$", null, "pid_3").setResult( |
|
53 |
Result.newBuilder().setMetadata( |
|
54 |
Result.Metadata.newBuilder().setPublisher(OafTest.sf("AMER CHEMICAL SOCXXXXXXXXXXXXXXXXX")) |
|
55 |
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build()); |
|
56 |
oafList.add(getOaf("0.5").setEntity( |
|
57 |
getEntity("id_5", null, null).setResult( |
|
58 |
Result.newBuilder().setMetadata( |
|
59 |
Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("hhhh title", "main title", "dnet:dataCite_title")) |
|
60 |
.setPublisher(OafTest.sf("AMER CHEMICAL SOC X")) |
|
61 |
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies")).setStoragedate(OafTest.sf("2012-11-18")) |
|
62 |
.setLanguage(OafTest.getQualifier("eng", "dnet:languages")).addDescription(OafTest.sf("original description"))))) |
|
63 |
.build()); |
|
64 |
oafList.add(getOaf("0.6").setEntity( |
|
65 |
getEntity("id_6", null, "pid_6").setResult( |
|
66 |
Result.newBuilder().setMetadata( |
|
67 |
Result.Metadata.newBuilder().setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies")) |
|
68 |
.addDescription(OafTest.sf("new description"))).addInstance(OafTest.getInstance("id", "name")))).build()); |
|
69 |
} |
|
70 |
|
|
71 |
@Test |
|
72 |
public void test_merge() { |
|
73 |
|
|
74 |
final Oaf.Builder builder = Oaf.newBuilder(); |
|
75 |
|
|
76 |
for (final Oaf oaf : oafList) { |
|
77 |
builder.mergeFrom(oaf); |
|
78 |
} |
|
79 |
|
|
80 |
final Result.Metadata.Builder metadata = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder(); |
|
81 |
final Iterable<StructuredProperty> filter = Iterables.filter(metadata.getTitleList(), new Predicate<StructuredProperty>() { |
|
82 |
|
|
83 |
@Override |
|
84 |
public boolean apply(final StructuredProperty sp) { |
|
85 |
return (sp.getQualifier() != null) && sp.getQualifier().getClassname().equals("main title"); |
|
86 |
} |
|
87 |
}); |
|
88 |
|
|
89 |
final StructuredProperty last = Iterables.getLast(filter); |
|
90 |
|
|
91 |
metadata.clearTitle().addAllTitle(Lists.newArrayList(last)); |
|
92 |
|
|
93 |
System.out.println(builder.build().toString()); |
|
94 |
} |
|
95 |
|
|
96 |
@Test |
|
97 |
public void test_merger() { |
|
98 |
|
|
99 |
final Oaf merge = merger.mergeEntities(null, "id", oafList).build(); |
|
100 |
|
|
101 |
System.out.println(merge.toString()); |
|
102 |
} |
|
103 |
|
|
104 |
// @Test |
|
105 |
// public void test_sort() { |
|
106 |
// Queue<Result> q = new PriorityQueue<Result>(3, DedupReducer.cmp); |
|
107 |
// for (Oaf oaf : oafList) { |
|
108 |
// q.add(oaf.getEntity().getResult()); |
|
109 |
// } |
|
110 |
// |
|
111 |
// while (!q.isEmpty()) { |
|
112 |
// Result r = q.remove(); |
|
113 |
// List<StructuredProperty> titles = r.getMetadata().getTitleList(); |
|
114 |
// if (!titles.isEmpty()) { |
|
115 |
// System.out.println(titles.get(0).getValue()); |
|
116 |
// } |
|
117 |
// } |
|
118 |
// } |
|
119 |
|
|
120 |
private Oaf.Builder getOaf(final String trust) { |
|
121 |
return Oaf.newBuilder().setKind(Kind.entity).setDataInfo(OafTest.getDataInfo(trust)).setLastupdatetimestamp(System.currentTimeMillis()); |
|
122 |
} |
|
123 |
|
|
124 |
private OafEntity.Builder getEntity(final String id, final String originalId, final String pid) { |
|
125 |
final Builder entity = |
|
126 |
OafEntity.newBuilder().setType(Type.result).setId(id).addOriginalId(originalId != null ? originalId : UUID.randomUUID().toString()); |
|
127 |
|
|
128 |
if (pid != null) { |
|
129 |
entity.addPid(OafTest.getStructuredproperty(pid, "class", "scheme")); |
|
130 |
} |
|
131 |
|
|
132 |
return entity; |
|
133 |
} |
|
134 |
|
|
135 |
} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/dedup/cc/VertexWritableTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.dedup.cc; |
|
2 |
|
|
3 |
import java.util.TreeSet; |
|
4 |
|
|
5 |
import eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable; |
|
6 |
import org.apache.commons.lang.StringUtils; |
|
7 |
import org.apache.hadoop.io.Text; |
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import static org.junit.Assert.assertTrue; |
|
11 |
|
|
12 |
/** |
|
13 |
* Created by claudio on 15/10/15. |
|
14 |
*/ |
|
15 |
public class VertexWritableTest { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testSerialise() { |
|
19 |
VertexWritable v = new VertexWritable(); |
|
20 |
|
|
21 |
v.setActivated(true); |
|
22 |
v.setVertexId(new Text("a")); |
|
23 |
|
|
24 |
TreeSet<Text> edges = new TreeSet<Text>(); |
|
25 |
for(int i=0; i<5; i++) { |
|
26 |
edges.add(new Text("" + i)); |
|
27 |
} |
|
28 |
v.setEdges(edges); |
|
29 |
|
|
30 |
assertTrue(v.toString() != null); |
|
31 |
final String json = v.toJSON(); |
|
32 |
assertTrue(StringUtils.isNotBlank(json)); |
|
33 |
|
|
34 |
System.out.println(json); |
|
35 |
|
|
36 |
final VertexWritable v1 = VertexWritable.fromJSON(json); |
|
37 |
final String json1 = v1.toJSON(); |
|
38 |
assertTrue(json.equals(json1)); |
|
39 |
|
|
40 |
System.out.println(json1); |
|
41 |
} |
|
42 |
|
|
43 |
} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/dedup/TitleOrderingTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.dedup; |
|
2 |
|
|
3 |
import java.util.LinkedList; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Map; |
|
6 |
import java.util.PriorityQueue; |
|
7 |
import java.util.Queue; |
|
8 |
import java.util.UUID; |
|
9 |
|
|
10 |
import org.apache.commons.io.IOUtils; |
|
11 |
import org.junit.Before; |
|
12 |
import org.junit.Test; |
|
13 |
import org.springframework.core.io.ClassPathResource; |
|
14 |
|
|
15 |
import com.google.common.collect.Lists; |
|
16 |
import com.google.common.collect.Maps; |
|
17 |
|
|
18 |
import eu.dnetlib.pace.clustering.NGramUtils; |
|
19 |
import eu.dnetlib.pace.config.Type; |
|
20 |
import eu.dnetlib.pace.model.Field; |
|
21 |
import eu.dnetlib.pace.model.FieldListImpl; |
|
22 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
23 |
import eu.dnetlib.pace.model.MapDocument; |
|
24 |
import eu.dnetlib.pace.model.MapDocumentComparator; |
|
25 |
|
|
26 |
public class TitleOrderingTest { |
|
27 |
|
|
28 |
private List<MapDocument> results = Lists.newArrayList(); |
|
29 |
|
|
30 |
@Before |
|
31 |
public void setUp() throws Exception { |
|
32 |
|
|
33 |
final List<String> lines = IOUtils.readLines(new ClassPathResource("eu/dnetlib/data/mapreduce/dedup/titles.txt").getInputStream()); |
|
34 |
for (final String title : lines) { |
|
35 |
final Map<String, Field> fieldMap = Maps.newHashMap(); |
|
36 |
final FieldListImpl list = new FieldListImpl(); |
|
37 |
list.add(new FieldValueImpl(Type.String, "title", title)); |
|
38 |
fieldMap.put("title", list); |
|
39 |
results.add(new MapDocument("id-" + UUID.randomUUID(), fieldMap)); |
|
40 |
} |
|
41 |
} |
|
42 |
|
|
43 |
@Test |
|
44 |
public void test() { |
|
45 |
|
|
46 |
final Queue<MapDocument> queue = new PriorityQueue<MapDocument>(100, new MapDocumentComparator("title")); |
|
47 |
|
|
48 |
queue.addAll(results); |
|
49 |
|
|
50 |
final Queue<MapDocument> queue2 = simplifyQueue(queue); |
|
51 |
|
|
52 |
while (!queue2.isEmpty()) { |
|
53 |
final MapDocument doc = queue2.remove(); |
|
54 |
System.out.println(doc.values("title").stringValue()); |
|
55 |
} |
|
56 |
} |
|
57 |
|
|
58 |
private Queue<MapDocument> simplifyQueue(final Queue<MapDocument> queue) { |
|
59 |
final Queue<MapDocument> q = new LinkedList<MapDocument>(); |
|
60 |
|
|
61 |
String fieldRef = ""; |
|
62 |
final List<MapDocument> tempResults = Lists.newArrayList(); |
|
63 |
|
|
64 |
while (!queue.isEmpty()) { |
|
65 |
final MapDocument result = queue.remove(); |
|
66 |
|
|
67 |
if (!result.values("title").isEmpty()) { |
|
68 |
final String field = NGramUtils.cleanupForOrdering(result.values("title").stringValue()); |
|
69 |
if (field.equals(fieldRef)) { |
|
70 |
tempResults.add(result); |
|
71 |
} else { |
|
72 |
if (tempResults.size() < 5) { |
|
73 |
q.addAll(tempResults); |
|
74 |
} else { |
|
75 |
System.out.println("Skipped field: " + fieldRef + " - size: " + tempResults.size()); |
|
76 |
} |
|
77 |
tempResults.clear(); |
|
78 |
tempResults.add(result); |
|
79 |
fieldRef = field; |
|
80 |
} |
|
81 |
} |
|
82 |
} |
|
83 |
if (tempResults.size() < 5) { |
|
84 |
q.addAll(tempResults); |
|
85 |
} else { |
|
86 |
System.out.println("Skipped field: " + fieldRef + " - size: " + tempResults.size()); |
|
87 |
} |
|
88 |
|
|
89 |
return q; |
|
90 |
} |
|
91 |
|
|
92 |
} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/dedup/experiment/CsvEntryTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.dedup.experiment; |
|
2 |
|
|
3 |
import org.hsqldb.util.CSVWriter; |
|
4 |
import org.junit.Test; |
|
5 |
import org.junit.Before; |
|
6 |
import org.junit.After; |
|
7 |
|
|
8 |
import static org.junit.Assert.assertNotNull; |
|
9 |
import static org.junit.Assert.assertTrue; |
|
10 |
|
|
11 |
/** |
|
12 |
* CsvEntry Tester. |
|
13 |
* |
|
14 |
* @author <Authors name> |
|
15 |
* @version 1.0 |
|
16 |
* @since <pre>Apr 20, 2016</pre> |
|
17 |
*/ |
|
18 |
public class CsvEntryTest { |
|
19 |
|
|
20 |
private CsvEntry csvEntry; |
|
21 |
|
|
22 |
@Before |
|
23 |
public void before() throws Exception { |
|
24 |
csvEntry = new CsvEntry(); |
|
25 |
} |
|
26 |
|
|
27 |
/** |
|
28 |
* Method: addFeature(final String f) |
|
29 |
*/ |
|
30 |
@Test |
|
31 |
public void testAddFeature() throws Exception { |
|
32 |
csvEntry.addFeature("a"); |
|
33 |
assertTrue(csvEntry.getFeatures().contains("a")); |
|
34 |
} |
|
35 |
|
|
36 |
/** |
|
37 |
* Method: getFeatures() |
|
38 |
*/ |
|
39 |
@Test |
|
40 |
public void testGetFeatures() throws Exception { |
|
41 |
csvEntry.addFeature("a"); |
|
42 |
assertNotNull(csvEntry.getFeatures()); |
|
43 |
assertTrue(csvEntry.getFeatures().size() == 1); |
|
44 |
} |
|
45 |
|
|
46 |
/** |
|
47 |
* Method: fromJson(final String json) |
|
48 |
*/ |
|
49 |
@Test |
|
50 |
public void testFromJson() throws Exception { |
|
51 |
csvEntry.addFeature("a"); |
|
52 |
csvEntry.addFeature("b"); |
|
53 |
|
|
54 |
final String json = csvEntry.toString(); |
|
55 |
|
|
56 |
final CsvEntry another = CsvEntry.fromJson(json); |
|
57 |
|
|
58 |
assertTrue(another.equals(csvEntry)); |
|
59 |
} |
|
60 |
|
|
61 |
} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/dedup/experiment/CsvSerialiserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.dedup.experiment; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
|
|
5 |
import java.util.Iterator; |
|
6 |
import java.util.List; |
|
7 |
import java.util.Set; |
|
8 |
import java.util.UUID; |
|
9 |
|
|
10 |
import com.google.common.base.Predicate; |
|
11 |
import com.google.common.base.Splitter; |
|
12 |
import com.google.common.collect.Iterables; |
|
13 |
import com.google.common.collect.Lists; |
|
14 |
import com.google.common.collect.Sets; |
|
15 |
import eu.dnetlib.pace.model.Person; |
|
16 |
import org.apache.commons.lang.RandomStringUtils; |
|
17 |
import org.apache.commons.lang.StringUtils; |
|
18 |
import org.apache.commons.lang.math.RandomUtils; |
|
19 |
import org.apache.commons.logging.Log; |
|
20 |
import org.apache.commons.logging.LogFactory; |
|
21 |
import org.junit.Before; |
|
22 |
import org.junit.Test; |
|
23 |
|
|
24 |
/** |
|
25 |
* Created by claudio on 26/04/16. |
|
26 |
*/ |
|
27 |
public class CsvSerialiserTest { |
|
28 |
|
|
29 |
private static final Log log = LogFactory.getLog(CsvSerialiserTest.class); |
|
30 |
|
|
31 |
private CsvSerialiser csvSerialiser; |
|
32 |
|
|
33 |
private static List<String> ALL_FEATURES = Lists.newLinkedList(); |
|
34 |
|
|
35 |
static { |
|
36 |
ALL_FEATURES.add("a"); |
|
37 |
ALL_FEATURES.add("b"); |
|
38 |
ALL_FEATURES.add("c"); |
|
39 |
ALL_FEATURES.add("d"); |
|
40 |
ALL_FEATURES.add("e"); |
|
41 |
ALL_FEATURES.add("f"); |
|
42 |
} |
|
43 |
|
|
44 |
@Before |
|
45 |
public void setUp() { |
|
46 |
csvSerialiser = new CsvSerialiser(); |
|
47 |
} |
|
48 |
|
|
49 |
@Test |
|
50 |
public void testSerialiser() { |
|
51 |
final List<CsvEntry> list = Lists.newArrayList(); |
|
52 |
|
|
53 |
final int nRows = RandomUtils.nextInt(10) + 3; |
|
54 |
for(int i = 0; i<nRows; i++) { |
|
55 |
list.add(getRandomCsvEntry()); |
|
56 |
} |
|
57 |
|
|
58 |
final String csv = csvSerialiser.asCSV(list); |
|
59 |
|
|
60 |
log.info("\n" + csv); |
|
61 |
|
|
62 |
verifyLength(csv); |
|
63 |
} |
|
64 |
|
|
65 |
@Test |
|
66 |
public void testSerialiser2() { |
|
67 |
final List<CsvEntry> list = Lists.newArrayList(); |
|
68 |
|
|
69 |
final int nRows = RandomUtils.nextInt(10) + 3; |
|
70 |
for(int i = 0; i<nRows; i++) { |
|
71 |
list.add(getCsvEntry(i)); |
|
72 |
} |
|
73 |
|
|
74 |
final String csv = csvSerialiser.asCSV(list); |
|
75 |
log.info("\n" + csv); |
|
76 |
|
|
77 |
verifyLength(csv); |
|
78 |
} |
|
79 |
|
|
80 |
@Test |
|
81 |
public void testSerialiser3() { |
|
82 |
final String name = "Manghi, Paolo as "; |
|
83 |
final Person p = new Person(name, false); |
|
84 |
final String s = p.getSurnameString() + StringUtils.substring(p.getNameString(), 0, 1); |
|
85 |
|
|
86 |
log.info(String.format("'%s'", s.replaceAll("[^a-zA-Z ]", "").toLowerCase().trim())); |
|
87 |
} |
|
88 |
|
|
89 |
private void verifyLength(final String csv) { |
|
90 |
final Iterator<String> lines = Splitter.on("\n").split(csv).iterator(); |
|
91 |
final List<String> header = Lists.newArrayList(Splitter.on(",").split(lines.next())); |
|
92 |
|
|
93 |
while(lines.hasNext()) { |
|
94 |
List<String> line = Lists.newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(lines.next())); |
|
95 |
if (!line.isEmpty()) { |
|
96 |
assertEquals(header.size(), line.size()); |
|
97 |
} |
|
98 |
} |
|
99 |
} |
|
100 |
|
|
101 |
private CsvEntry getCsvEntry(final int i) { |
|
102 |
final CsvEntry e = new CsvEntry(); |
|
103 |
e.setKey(i+""); |
|
104 |
|
|
105 |
final Set<String> features = Sets.newLinkedHashSet(Iterables.filter(ALL_FEATURES, new Predicate<String>() { |
|
106 |
@Override |
|
107 |
public boolean apply(final String s) { |
|
108 |
return RandomUtils.nextBoolean(); |
|
109 |
} |
|
110 |
})); |
|
111 |
|
|
112 |
e.setFeatures(features); |
|
113 |
|
|
114 |
log.info(String.format("%s - %s", i, features)); |
|
115 |
//log.info(e.toString() + "\n"); |
|
116 |
|
|
117 |
return e; |
|
118 |
} |
|
119 |
|
|
120 |
public CsvEntry getRandomCsvEntry() { |
|
121 |
final CsvEntry e = new CsvEntry(); |
|
122 |
e.setKey(UUID.randomUUID().toString()); |
|
123 |
final int nFeatures = RandomUtils.nextInt(4) + 1; |
|
124 |
for(int i = 0; i<nFeatures; i++) { |
|
125 |
e.addFeature("ft." + RandomStringUtils.randomAlphabetic(2) + " \"" + RandomStringUtils.randomAlphabetic(2) + "\" " + RandomStringUtils.randomAlphabetic(2)); |
|
126 |
} |
|
127 |
return e; |
|
128 |
} |
|
129 |
} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/propagation/country/institutionalrepositories/CountryPropagationTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories; |
|
2 |
|
|
3 |
import eu.dnetlib.data.mapreduce.hbase.propagation.NotValidResultSequenceException; |
|
4 |
import eu.dnetlib.data.mapreduce.hbase.propagation.ValueList; |
|
5 |
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; |
|
6 |
import eu.dnetlib.data.proto.OafProtos; |
|
7 |
import eu.dnetlib.data.proto.TypeProtos; |
|
8 |
import org.apache.hadoop.io.Text; |
|
9 |
import org.elasticsearch.hadoop.util.Assert; |
|
10 |
import org.junit.Test; |
|
11 |
|
|
12 |
import java.io.IOException; |
|
13 |
import java.util.List; |
|
14 |
|
|
15 |
public class CountryPropagationTest { |
|
16 |
List<Text> list; |
|
17 |
ValueList vl; |
|
18 |
|
|
19 |
@Test |
|
20 |
public void testRegularValueList1() throws IOException { |
|
21 |
ValueList vl = new ValueList("pcountry.json"); |
|
22 |
|
|
23 |
list = vl.getValueToText();//new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) )); |
|
24 |
|
|
25 |
try { |
|
26 |
ResultCountryIterator rci = new ResultCountryIterator(list, TypeProtos.Type.datasource.getNumber()); |
|
27 |
|
|
28 |
Assert.isTrue(rci.hasNext()); |
|
29 |
while(rci.hasNext()){ |
|
30 |
System.out.println( rci.next().toString()); |
|
31 |
|
|
32 |
} |
|
33 |
|
|
34 |
} catch (NotValidResultSequenceException e) { |
|
35 |
//e.printStackTrace(); |
|
36 |
} |
|
37 |
} |
|
38 |
|
|
39 |
@Test |
|
40 |
public void testRegularValueList2() throws IOException { |
|
41 |
ValueList vl = new ValueList("pcountry2.json"); |
|
42 |
|
|
43 |
list = vl.getValueToText();//new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) )); |
|
44 |
|
|
45 |
try { |
|
46 |
ResultCountryIterator rci = new ResultCountryIterator(list, TypeProtos.Type.datasource.getNumber()); |
|
47 |
|
|
48 |
Assert.isTrue(rci.hasNext()); |
|
49 |
while(rci.hasNext()){ |
|
50 |
System.out.println( rci.next().toString()); |
|
51 |
|
|
52 |
} |
|
53 |
|
|
54 |
} catch (NotValidResultSequenceException e) { |
|
55 |
e.printStackTrace(); |
|
56 |
} |
|
57 |
} |
|
58 |
|
|
59 |
|
|
60 |
@Test |
|
61 |
public void differentCountriesTest() throws IOException { |
|
62 |
vl = new ValueList("pcountrynotvalid1.json"); |
|
63 |
|
|
64 |
list = vl.getValueToText();//new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) )); |
|
65 |
|
|
66 |
try { |
|
67 |
ResultCountryIterator rci = new ResultCountryIterator(list,TypeProtos.Type.datasource.getNumber()); |
|
68 |
|
|
69 |
Assert.isTrue(!rci.hasNext()); |
|
70 |
|
|
71 |
} catch (NotValidResultSequenceException e) { |
|
72 |
e.printStackTrace(); |
|
73 |
} |
|
74 |
} |
|
75 |
|
|
76 |
@Test |
|
77 |
public void notPresentDataSourceTest() throws IOException{ |
|
78 |
vl = new ValueList("pcountrynotvalid2.json"); |
|
79 |
|
|
80 |
common(); |
|
81 |
} |
|
82 |
|
|
83 |
@Test |
|
84 |
public void missingCountryTest() throws IOException{ |
|
85 |
vl = new ValueList("pcountrynotvalid3.json"); |
|
86 |
common (); |
|
87 |
|
|
88 |
} |
|
89 |
|
|
90 |
@Test |
|
91 |
public void missingResultTest()throws IOException{ |
|
92 |
vl = new ValueList("pcountrynotvalid4.json"); |
|
93 |
common(); |
|
94 |
} |
|
95 |
|
|
96 |
@Test |
|
97 |
public void notValidResultInSequence()throws IOException{ |
|
98 |
vl=new ValueList("pcountrynotvalid5.json"); |
|
99 |
try{ |
|
100 |
ResultCountryIterator rci = new ResultCountryIterator(vl.getValueToText(),TypeProtos.Type.datasource.getNumber()); |
|
101 |
while(rci.hasNext()){ |
|
102 |
try { |
|
103 |
OafProtos.Oaf oap = rci.next().get(0); |
|
104 |
OafRowKeyDecoder.decode(oap.getEntity().getId()); |
|
105 |
System.out.println(oap.toString()); |
|
106 |
}catch(Exception rse){ |
|
107 |
System.out.println(rse.getMessage()); |
|
108 |
} |
|
109 |
} |
|
110 |
}catch(NotValidResultSequenceException e){ |
|
111 |
|
|
112 |
} |
|
113 |
} |
|
114 |
|
|
115 |
private void common() throws IOException { |
|
116 |
list = vl.getValueToText(); |
|
117 |
|
|
118 |
try { |
|
119 |
ResultCountryIterator rci = new ResultCountryIterator(list,TypeProtos.Type.datasource.getNumber()); |
|
120 |
|
|
121 |
Assert.isTrue(false); |
|
122 |
|
|
123 |
} catch (NotValidResultSequenceException e) { |
|
124 |
System.out.println("e.getMessage() = " + e.getMessage()); |
|
125 |
Assert.isTrue(true); |
|
126 |
} |
|
127 |
} |
|
128 |
} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/propagation/ValueList.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.propagation; |
|
2 |
|
|
3 |
import com.google.gson.Gson; |
|
4 |
import org.apache.commons.io.IOUtils; |
|
5 |
import org.apache.hadoop.io.Text; |
|
6 |
|
|
7 |
import java.io.IOException; |
|
8 |
import java.util.List; |
|
9 |
import java.util.stream.Collectors; |
|
10 |
|
|
11 |
public class ValueList { |
|
12 |
private String jsonFile; |
|
13 |
public ValueList(String jsonFile) { |
|
14 |
this.jsonFile=jsonFile; |
|
15 |
} |
|
16 |
|
|
17 |
public List<Value> getValueList() { |
|
18 |
return valueList; |
|
19 |
} |
|
20 |
|
|
21 |
public void setValueList(List<Value> valueList) { |
|
22 |
this.valueList = valueList; |
|
23 |
} |
|
24 |
|
|
25 |
private List<Value> valueList ; |
|
26 |
|
|
27 |
public List<Text> getValueToText() throws IOException { |
|
28 |
ValueList tmp = new Gson().fromJson(IOUtils.toString(getClass().getResourceAsStream(jsonFile)), ValueList.class); |
|
29 |
//IOUtils.toString(getClass().getResourceAsStream("community_configuration.xml")); |
|
30 |
return tmp.getValueList().stream().map(Value::toJson).map(Text::new).collect(Collectors.toList()); |
|
31 |
} |
|
32 |
} |
|
33 |
|
|
34 |
|
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/propagation/projecttoresult/ProjectPropagationTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.propagation.projecttoresult; |
|
2 |
|
|
3 |
import com.google.gson.Gson; |
|
4 |
import com.googlecode.protobuf.format.JsonFormat; |
|
5 |
import eu.dnetlib.data.mapreduce.hbase.propagation.NotValidResultSequenceException; |
|
6 |
import eu.dnetlib.data.mapreduce.hbase.propagation.PropagationConstants; |
|
7 |
import eu.dnetlib.data.mapreduce.hbase.propagation.Value; |
|
8 |
import eu.dnetlib.data.mapreduce.hbase.propagation.ValueList; |
|
9 |
import eu.dnetlib.data.proto.OafProtos; |
|
10 |
import eu.dnetlib.data.proto.ResultProtos; |
|
11 |
import org.apache.commons.io.IOUtils; |
|
12 |
import org.apache.hadoop.io.Text; |
|
13 |
import org.elasticsearch.hadoop.util.Assert; |
|
14 |
import org.junit.Test; |
|
15 |
|
|
16 |
import java.io.IOException; |
|
17 |
import java.util.ArrayList; |
|
18 |
import java.util.Arrays; |
|
19 |
import java.util.List; |
|
20 |
import java.util.stream.Collectors; |
|
21 |
|
|
22 |
import static org.junit.Assert.*; |
|
23 |
|
|
24 |
public class ProjectPropagationTest { |
|
25 |
|
|
26 |
// ParseResult pr = new ParseResult(); |
|
27 |
List<Text> list; |
|
28 |
|
|
29 |
@Test |
|
30 |
public void testCompleteOverlap() throws IOException { |
|
31 |
// list = new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) )); |
|
32 |
ValueList vl = new ValueList("pprojectcompleteoverlap.json"); |
|
33 |
|
|
34 |
list = vl.getValueToText(); |
|
35 |
try { |
|
36 |
ResultProjectIterator it = new ResultProjectIterator(list, "fakeId"); |
|
37 |
Assert.isTrue(!it.hasNext()); |
|
38 |
} catch (NotValidResultSequenceException e) { |
|
39 |
e.printStackTrace(); |
|
40 |
} |
|
41 |
} |
|
42 |
|
|
43 |
@Test |
|
44 |
public void testSemIncludesResult() throws IOException { |
|
45 |
ValueList vl = new ValueList("pprojectsemincludesresult.json"); |
|
46 |
list = vl.getValueToText(); |
|
47 |
//list = new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) )); |
|
48 |
int count = 0; |
|
49 |
try { |
|
50 |
ResultProjectIterator it = new ResultProjectIterator(list, "fakeId"); |
|
51 |
Assert.isTrue(it.hasNext()); |
|
52 |
while(it.hasNext()){ |
|
53 |
it.next(); |
|
54 |
count +=1; |
|
55 |
} |
|
56 |
assertEquals(1,count); |
|
57 |
} catch (NotValidResultSequenceException e) { |
|
58 |
e.printStackTrace(); |
|
59 |
} |
|
60 |
} |
|
61 |
|
|
62 |
@Test |
|
63 |
public void testResultIncludesSem() throws IOException { |
|
64 |
ValueList vl = new ValueList("pprojectresultincludessem.json"); |
|
65 |
list = vl.getValueToText(); |
|
66 |
//list = new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2,p3,p4","0.9", PropagationConstants.Type.fromresult).toJson()) )); |
|
67 |
|
|
68 |
try { |
|
69 |
ResultProjectIterator it = new ResultProjectIterator(list, "fakeId"); |
|
70 |
Assert.isTrue(!it.hasNext()); |
|
71 |
|
|
72 |
} catch (NotValidResultSequenceException e) { |
|
73 |
e.printStackTrace(); |
|
74 |
} |
|
75 |
} |
|
76 |
|
|
77 |
@Test |
|
78 |
public void testOafOneRelationBuilder() throws IOException { |
|
79 |
ValueList vl = new ValueList("valuetooaf.json"); |
|
80 |
|
|
81 |
list = vl.getValueToText();//new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) )); |
|
82 |
|
|
83 |
try { |
|
84 |
ResultProjectIterator it = new ResultProjectIterator(list, "fakeId"); |
|
85 |
Assert.isTrue(it.hasNext()); |
|
86 |
while(it.hasNext()){ |
|
87 |
System.out.println( it.next().toString()); |
|
88 |
|
|
89 |
} |
|
90 |
|
|
91 |
} catch (NotValidResultSequenceException e) { |
|
92 |
e.printStackTrace(); |
|
93 |
} |
|
94 |
} |
|
95 |
|
|
96 |
@Test |
|
97 |
public void testOafTwoRelationBuilder() throws IOException { |
|
98 |
ValueList vl = new ValueList("pproject2relationbuilder.json"); |
|
99 |
|
|
100 |
list = vl.getValueToText(); |
|
101 |
// list = new ArrayList<>(Arrays.asList( |
|
102 |
// new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()), |
|
103 |
// new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()), |
|
104 |
// new Text(Value.newInstance( "p3,p4","0.9", PropagationConstants.Type.fromsemrel).toJson()) ) |
|
105 |
// ); |
|
106 |
|
|
107 |
try { |
|
108 |
ResultProjectIterator it = new ResultProjectIterator(list, "fakeId"); |
|
109 |
Assert.isTrue(it.hasNext()); |
|
110 |
int count = 0; |
|
111 |
while(it.hasNext()){ |
|
112 |
System.out.println( it.next().toString()); |
|
113 |
count +=1; |
|
114 |
} |
|
115 |
assertEquals(2,count); |
|
116 |
|
|
117 |
} catch (NotValidResultSequenceException e) { |
|
118 |
e.printStackTrace(); |
|
119 |
} |
|
120 |
} |
|
121 |
|
|
122 |
@Test |
|
123 |
public void testEmptyProjectList() throws IOException{ |
|
124 |
ValueList vl = new ValueList("pprojectemptyprojectlist.json"); |
|
125 |
|
|
126 |
list = vl.getValueToText(); |
|
127 |
|
|
128 |
try { |
|
129 |
ResultProjectIterator it = new ResultProjectIterator(list, "fakeId"); |
|
130 |
Assert.isTrue(!it.hasNext()); |
|
131 |
|
|
132 |
|
|
133 |
} catch (NotValidResultSequenceException e) { |
|
134 |
e.printStackTrace(); |
|
135 |
} |
|
136 |
} |
|
137 |
|
|
138 |
@Test |
|
139 |
public void testParseResult1() throws IOException { |
|
140 |
OafProtos.Oaf oaf = getOaf("wxample.json"); |
|
141 |
System.out.println("oaf = " + oaf.getRel().getTarget()); |
|
142 |
System.out.println(oaf.getDataInfo().toString()); |
|
143 |
} |
|
144 |
|
|
145 |
@Test |
|
146 |
public void testParseResult2() throws IOException{ |
|
147 |
OafProtos.Oaf oaf = getOaf("example2.json"); |
|
148 |
//System.out.println("oaf = " + oaf.getRel().getTarget()); |
|
149 |
List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList(); |
|
150 |
|
|
151 |
|
|
152 |
} |
|
153 |
|
|
154 |
private OafProtos.Oaf getOaf(String oafjson) throws IOException { |
|
155 |
final String json = IOUtils.toString(getClass().getResourceAsStream(oafjson)); |
|
156 |
|
|
157 |
final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder(); |
|
158 |
JsonFormat.merge(json, oaf); |
|
159 |
|
|
160 |
return oaf.build(); |
|
161 |
} |
|
162 |
|
|
163 |
|
|
164 |
} |
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/broker/EventWrapperTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.broker; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.InputStream; |
|
5 |
import java.util.List; |
|
6 |
import java.util.Map; |
|
7 |
import java.util.Set; |
|
8 |
|
|
9 |
import com.google.common.collect.Maps; |
|
10 |
import com.google.common.collect.Sets; |
|
11 |
import com.googlecode.protobuf.format.JsonFormat; |
|
12 |
import eu.dnetlib.data.proto.OafProtos; |
|
13 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
14 |
import eu.dnetlib.data.transform.Column; |
|
15 |
import eu.dnetlib.data.transform.Row; |
|
16 |
import eu.dnetlib.data.transform.XsltRowTransformer; |
|
17 |
import eu.dnetlib.data.transform.XsltRowTransformerFactory; |
|
18 |
import org.apache.commons.io.IOUtils; |
|
19 |
import org.apache.commons.logging.Log; |
|
20 |
import org.apache.commons.logging.LogFactory; |
|
21 |
import org.apache.hadoop.mapreduce.Counter; |
|
22 |
import org.apache.hadoop.mapreduce.Reducer.Context; |
|
23 |
import org.dom4j.Document; |
|
24 |
import org.dom4j.DocumentException; |
|
25 |
import org.dom4j.io.SAXReader; |
|
26 |
import org.junit.Before; |
|
27 |
import org.junit.Ignore; |
|
28 |
import org.junit.Test; |
|
29 |
import org.junit.runner.RunWith; |
|
30 |
import org.mockito.Mock; |
|
31 |
import org.mockito.junit.MockitoJUnitRunner; |
|
32 |
|
|
33 |
import static org.junit.Assert.*; |
|
34 |
import static org.mockito.ArgumentMatchers.anyString; |
|
35 |
import static org.mockito.Mockito.when; |
|
36 |
|
|
37 |
/** |
|
38 |
* Created by claudio on 22/07/16. |
|
39 |
*/ |
|
40 |
@RunWith(MockitoJUnitRunner.Silent.class) |
|
41 |
public class EventWrapperTest { |
|
42 |
|
|
43 |
private static final Log log = LogFactory.getLog(EventWrapperTest.class); |
|
44 |
|
|
45 |
private Oaf oaf1_1; |
|
46 |
private Oaf oaf1_2; |
|
47 |
private Oaf oaf2_1; |
|
48 |
private Oaf oaf2_2; |
|
49 |
private Oaf oaf3_1; |
|
50 |
private Oaf oaf3_2; |
|
51 |
|
|
52 |
private String xslt; |
|
53 |
|
|
54 |
@Mock |
|
55 |
private Context context; |
|
56 |
|
|
57 |
@Mock |
|
58 |
private Counter counter; |
|
59 |
|
|
60 |
private Map<String, String> baseUrlMap = Maps.newHashMap(); |
|
61 |
|
|
62 |
private static final String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/2hbase/"; |
|
63 |
|
|
64 |
@Before |
|
65 |
public void setUp() throws Exception { |
|
66 |
xslt = loadFromTransformationProfile("oaf2hbase.xml"); |
|
67 |
oaf1_1 = asOaf("recordOaf_1_1.xml"); |
|
68 |
oaf1_2 = asOaf("recordOaf_1_2.xml"); |
|
69 |
oaf2_1 = asOaf("recordOaf_2_1.xml"); |
|
70 |
oaf2_2 = asOaf("recordOaf_2_2.xml"); |
|
71 |
oaf3_1 = asOaf("recordOaf_3_1.xml"); |
|
72 |
oaf3_2 = asOaf("recordOaf_3_2.xml"); |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-mapreduce-jobs-1.2.0