Revision 53800
Added by Claudio Atzori over 5 years ago
modules/dnet-mapreduce-jobs/trunk/install.sh | ||
---|---|---|
1 |
#!/bin/bash |
|
2 |
|
|
3 |
VERSION=`xmlstarlet sel -t -m "/_:project/_:version" -v "." pom.xml` |
|
4 |
echo "using version: $VERSION" |
|
5 |
|
|
6 |
mvn clean install; |
|
7 |
rm -rf ~/.m2/repository/eu/dnetlib/dnet-mapreduce-jobs-assembly; |
|
8 |
mvn assembly:assembly -DskipTests=true && mvn install:install-file -Dfile=target/dnet-mapreduce-jobs-$VERSION-jar-with-dependencies.jar -DgroupId=eu.dnetlib -DartifactId=dnet-mapreduce-jobs-assembly -Dversion=$VERSION -Dpackaging=jar |
|
0 | 9 |
modules/dnet-mapreduce-jobs/trunk/deploy.info | ||
---|---|---|
1 |
{"type_source": "SVN", "goal": "package -U source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-mapreduce-jobs/branches/beta", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-mapreduce-jobs-BETA"} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/actions/DOIBoostToActionsTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.actions; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import com.google.gson.JsonObject; |
|
5 |
import com.google.gson.JsonParser; |
|
6 |
import eu.dnetlib.actionmanager.actions.ActionFactory; |
|
7 |
import eu.dnetlib.actionmanager.actions.AtomicAction; |
|
8 |
import eu.dnetlib.actionmanager.common.Agent; |
|
9 |
import eu.dnetlib.data.mapreduce.hbase.dataimport.DOIBoostToActions; |
|
10 |
import eu.dnetlib.data.transform.Column; |
|
11 |
import eu.dnetlib.data.transform.Row; |
|
12 |
import org.apache.commons.codec.binary.Base64; |
|
13 |
import org.apache.commons.lang3.StringUtils; |
|
14 |
import org.junit.Before; |
|
15 |
import org.junit.Test; |
|
16 |
|
|
17 |
import java.io.*; |
|
18 |
import java.util.List; |
|
19 |
import java.util.zip.DataFormatException; |
|
20 |
import java.util.zip.Inflater; |
|
21 |
|
|
22 |
public class DOIBoostToActionsTest { |
|
23 |
private String setName; |
|
24 |
private Agent agent; |
|
25 |
|
|
26 |
|
|
27 |
|
|
28 |
@Before |
|
29 |
public void setup() { |
|
30 |
setName = "DLI"; |
|
31 |
agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service); |
|
32 |
} |
|
33 |
|
|
34 |
@Test |
|
35 |
public void testSingleDOIBoostAction() throws IOException { |
|
36 |
doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/broken"); |
|
37 |
} |
|
38 |
|
|
39 |
|
|
40 |
@Test |
|
41 |
public void testDOIBoostActionToXML() throws Exception { |
|
42 |
doTestSingleDOIBoostActionToXML("/eu/dnetlib/data/mapreduce/actions/broken"); |
|
43 |
} |
|
44 |
|
|
45 |
|
|
46 |
|
|
47 |
@Test |
|
48 |
public void testMultipleDOIBoostAction() throws IOException { |
|
49 |
doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/part-00070"); |
|
50 |
} |
|
51 |
|
|
52 |
|
|
53 |
|
|
54 |
@Test |
|
55 |
public void testDecompression() throws DataFormatException { |
|
56 |
final String s ="eJxtVM1u20YQfpW5pHAAWXCCIECORazKQYAEcHrpcbUckgvs7jA7S8vsye/QU4H25fwk/WYpKUbQ\n" + |
|
57 |
"mygOZ775fuaOXawjeVeY+HHi3IU6F1Ya3QNTCcqZvGQNHRd3iAuFTHVkmpxWen76i1gn9sHF9dWe\n" + |
|
58 |
"S3J5sZJlbeFiYdctaO6ZO+7ozc0rkr414VwD5g5FVKmTxFqDp6lIN/tKV193H2/p7c3N+9db+h3l\n" + |
|
59 |
"yWECuqlk6qWgRcBn/MBRpoRehEdHXXHJWZ+OkwzFTSN++9Hlga21Z9Ut7bKUJLNS4i54F+3NgLXR\n" + |
|
60 |
"IHe2cA15tveV/ZglytCqQkLdA9sw3VCVgbFGoWMAhY4m0VADdlbxQa4ZbSS17/D/DEySN2RERzAS\n" + |
|
61 |
"8oAGGEdusN+TTHM81RwBebRtOtDjK6gFtDlWqwMFzhg72kOe0wHzQSdHCITCBn8skm2uaRKxG9oa\n" + |
|
62 |
"4pVFU5O/z5w9r4R5wAGZkB7/LQYqCsgqNIpOoboY/my4Vm7Oo71oVZpQViFJhbDeKW/pTo5QpGxI\n" + |
|
63 |
"5tKIYAzXtnaWSvAQt71jlOOq4PPTP9bM7BfBoOTnp3+t5KQCen7KWkHZ5n83HyBIySYJiHa1cprq\n" + |
|
64 |
"Os8aBh+Mv178rPadGH1J7G3f46XZZq7hvOPFmY9BG91jy0eLBzTATvDPln6D+/jRpSny5qXtz4wp\n" + |
|
65 |
"HRlf1AKdrW2TonBIh7koqDoslHmQGhpxh7mDkVZ6e7ZAlevOLRvkw2Pi6g9HuoCF1dpwoWl0gfyz\n" + |
|
66 |
"Uk3pwgjJiu/Nhw/vV7K9s8L23dqPjk5Jx9AbkipHVzrIWvi64z7kk6wNWgtPh+PQQG7pWzALGVsH\n" + |
|
67 |
"BsTc6O0tsO8otDCu+cbqB252Ly5rz6Wga8hmgjMGK7qoc17mB2Et3AdnzKHiNrghwyhK9xwbgfsi\n" + |
|
68 |
"86R0dXu/19cowcSru92Xzzv6hb59vLvf/fF1v2/AcEk+GdMhGT5AuMx4eZBCvr6AOIVndXkDUhGv\n" + |
|
69 |
"ebDA9+GRm2S4Ne10YgWVaBvLeibVshDqcvbVqdvz098WbuUWmV/b2WpRPwX9pe3s4tj1XT1dBc6g\n" + |
|
70 |
"NOM+NBcnmOjBrdqdjNxWCeWHXwkKX9xuN0LtVGmwKJqwBgxWSVz+Aw5ePDg="; |
|
71 |
byte[] byteArray = Base64.decodeBase64(s.getBytes()); |
|
72 |
Inflater decompresser = new Inflater(); |
|
73 |
decompresser.setInput(byteArray); |
|
74 |
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length); |
|
75 |
byte[] buffer = new byte[8192]; |
|
76 |
while (!decompresser.finished()) { |
|
77 |
int size = decompresser.inflate(buffer); |
|
78 |
bos.write(buffer, 0, size); |
|
79 |
} |
|
80 |
byte[] unzippeddata = bos.toByteArray(); |
|
81 |
decompresser.end(); |
|
82 |
|
|
83 |
System.out.println(new String(unzippeddata)); |
|
84 |
|
|
85 |
} |
|
86 |
|
|
87 |
|
|
88 |
private void doTestSingleDOIBoostActionToXML(final String filePath) throws Exception { |
|
89 |
final List<Row> rows = Lists.newArrayList(); |
|
90 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
91 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
92 |
|
|
93 |
String line = in.readLine(); |
|
94 |
|
|
95 |
final JsonParser parser = new JsonParser(); |
|
96 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
97 |
List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false); |
|
98 |
if (actions!= null) { |
|
99 |
actions.forEach(action-> { |
|
100 |
if (action.getTargetColumn().equals("body") && action.getTargetColumnFamily().equals("result")) |
|
101 |
{ |
|
102 |
Column<String, byte[]> col = new Column<>("body" , action.getTargetValue()); |
|
103 |
rows.add(new Row("result",action.getTargetRowKey() , Lists.newArrayList(col))); |
|
104 |
} |
|
105 |
|
|
106 |
}); |
|
107 |
|
|
108 |
|
|
109 |
|
|
110 |
} |
|
111 |
|
|
112 |
|
|
113 |
|
|
114 |
} |
|
115 |
|
|
116 |
private void doTestSingleDOIBoostAction(final String filePath) throws IOException { |
|
117 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
118 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
119 |
|
|
120 |
String line = in.readLine(); |
|
121 |
|
|
122 |
final JsonParser parser = new JsonParser(); |
|
123 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
124 |
List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false); |
|
125 |
if (actions!= null) { |
|
126 |
|
|
127 |
actions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn()))); |
|
128 |
} |
|
129 |
} |
|
130 |
|
|
131 |
private void doTestAllDOIBoostAction(final String filePath) throws IOException { |
|
132 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
133 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
134 |
|
|
135 |
String line = in.readLine(); |
|
136 |
int i = 0; |
|
137 |
int cnt = 0; |
|
138 |
while(StringUtils.isNotBlank(line)) { |
|
139 |
cnt ++; |
|
140 |
|
|
141 |
final JsonParser parser = new JsonParser(); |
|
142 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
143 |
try { |
|
144 |
List<AtomicAction> atomicActions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false); |
|
145 |
if (atomicActions!= null) |
|
146 |
{ |
|
147 |
i ++; |
|
148 |
} |
|
149 |
// atomicActions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn()))); |
|
150 |
else{ |
|
151 |
System.out.println("SKIPPED Type "+ root.get("type").getAsString()); |
|
152 |
} |
|
153 |
|
|
154 |
} catch (Throwable e) { |
|
155 |
System.out.println(line); |
|
156 |
throw new RuntimeException(e); |
|
157 |
} |
|
158 |
line= in.readLine(); |
|
159 |
} |
|
160 |
|
|
161 |
System.out.println("total "+i+" / "+cnt); |
|
162 |
} |
|
163 |
|
|
164 |
|
|
165 |
|
|
166 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/actions/CrossRefToActionTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.actions; |
|
2 |
|
|
3 |
import com.google.gson.JsonObject; |
|
4 |
import com.google.gson.JsonParser; |
|
5 |
import eu.dnetlib.actionmanager.actions.ActionFactory; |
|
6 |
import eu.dnetlib.actionmanager.actions.AtomicAction; |
|
7 |
import eu.dnetlib.actionmanager.common.Agent; |
|
8 |
import eu.dnetlib.data.mapreduce.hbase.dataimport.CrossRefToActions; |
|
9 |
import org.apache.commons.lang3.StringUtils; |
|
10 |
import org.junit.Before; |
|
11 |
import org.junit.Ignore; |
|
12 |
import org.junit.Test; |
|
13 |
|
|
14 |
import java.io.BufferedReader; |
|
15 |
import java.io.IOException; |
|
16 |
import java.io.InputStream; |
|
17 |
import java.io.InputStreamReader; |
|
18 |
|
|
19 |
public class CrossRefToActionTest { |
|
20 |
|
|
21 |
private String setName; |
|
22 |
private Agent agent; |
|
23 |
|
|
24 |
@Before |
|
25 |
public void setup() { |
|
26 |
setName = "DLI"; |
|
27 |
agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service); |
|
28 |
} |
|
29 |
|
|
30 |
|
|
31 |
@Ignore |
|
32 |
@Test |
|
33 |
public void testSingleScholixAction2() throws IOException { |
|
34 |
doTestSingleCrossRefAction("/eu/dnetlib/data/mapreduce/actions/broken"); |
|
35 |
} |
|
36 |
|
|
37 |
@Test |
|
38 |
public void testAllScholixAction() throws IOException { |
|
39 |
doTestAllCrossRefAction("/eu/dnetlib/data/mapreduce/actions/part-06036"); |
|
40 |
} |
|
41 |
|
|
42 |
|
|
43 |
private void doTestSingleCrossRefAction(final String filePath) throws IOException { |
|
44 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
45 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
46 |
|
|
47 |
String line = in.readLine(); |
|
48 |
System.out.println(line); |
|
49 |
final JsonParser parser = new JsonParser(); |
|
50 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
51 |
AtomicAction actions = CrossRefToActions.generateActionsFromDump(root, new ActionFactory(), setName, agent, false); |
|
52 |
System.out.println(actions.toJSON()); |
|
53 |
} |
|
54 |
|
|
55 |
|
|
56 |
|
|
57 |
|
|
58 |
private void doTestAllCrossRefAction(final String filePath) throws IOException { |
|
59 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
60 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
61 |
|
|
62 |
String line = in.readLine(); |
|
63 |
while(StringUtils.isNotBlank(line)) { |
|
64 |
|
|
65 |
final JsonParser parser = new JsonParser(); |
|
66 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
67 |
try { |
|
68 |
CrossRefToActions.generateActionsFromDump(root, new ActionFactory(), setName, agent, false); |
|
69 |
} catch (Throwable e) { |
|
70 |
System.out.println(line); |
|
71 |
throw new RuntimeException(e); |
|
72 |
} |
|
73 |
line= in.readLine(); |
|
74 |
} |
|
75 |
} |
|
76 |
|
|
77 |
|
|
78 |
|
|
79 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/actions/ScholexplorerActionMapperTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.actions; |
|
2 |
|
|
3 |
import java.io.BufferedReader; |
|
4 |
import java.io.IOException; |
|
5 |
import java.io.InputStream; |
|
6 |
import java.io.InputStreamReader; |
|
7 |
import java.util.HashMap; |
|
8 |
import java.util.List; |
|
9 |
import java.util.Map; |
|
10 |
|
|
11 |
import com.google.gson.JsonObject; |
|
12 |
import com.google.gson.JsonParser; |
|
13 |
import eu.dnetlib.actionmanager.actions.ActionFactory; |
|
14 |
import eu.dnetlib.actionmanager.actions.AtomicAction; |
|
15 |
import eu.dnetlib.actionmanager.common.Agent; |
|
16 |
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholExplorerConfiguration; |
|
17 |
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholixToActions; |
|
18 |
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions; |
|
19 |
import eu.dnetlib.miscutils.datetime.DateUtils; |
|
20 |
import org.apache.commons.lang3.StringUtils; |
|
21 |
import org.junit.Assert; |
|
22 |
import org.junit.Before; |
|
23 |
import org.junit.Test; |
|
24 |
|
|
25 |
import static org.junit.Assert.assertEquals; |
|
26 |
|
|
27 |
public class ScholexplorerActionMapperTest { |
|
28 |
|
|
29 |
private Map<String, ScholExplorerConfiguration> configurationMap; |
|
30 |
private String setName; |
|
31 |
private Agent agent; |
|
32 |
private String nsPrefix; |
|
33 |
private String dsName; |
|
34 |
private String dsId; |
|
35 |
|
|
36 |
|
|
37 |
|
|
38 |
@Before |
|
39 |
public void initializeCofiguration(){ |
|
40 |
configurationMap = new HashMap<>(); |
|
41 |
configurationMap.put("issn", new ScholExplorerConfiguration(null, false)); |
|
42 |
configurationMap.put("openaire", new ScholExplorerConfiguration(null, false)); |
|
43 |
configurationMap.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s")); |
|
44 |
configurationMap.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s")); |
|
45 |
configurationMap.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s")); |
|
46 |
configurationMap.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s")); |
|
47 |
configurationMap.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s")); |
|
48 |
configurationMap.put("icpsr", new ScholExplorerConfiguration(null, false)); |
|
49 |
configurationMap.put("dnet", new ScholExplorerConfiguration(null, false)); |
|
50 |
configurationMap.put("url", new ScholExplorerConfiguration(null, true,"%s")); |
|
51 |
|
|
52 |
setName = "DLI"; |
|
53 |
agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service); |
|
54 |
nsPrefix = "scholexplore"; |
|
55 |
dsName = "ScholExplorer"; |
|
56 |
dsId = "scholexplorer"; |
|
57 |
} |
|
58 |
|
|
59 |
@Test |
|
60 |
public void testSubString () { |
|
61 |
final String dnetId ="50|dli_resolver::7b7b9a57a40818d10cf2532d71f012fa"; |
|
62 |
assertEquals("7b7b9a57a40818d10cf2532d71f012fa", dnetId.substring(17)); |
|
63 |
|
|
64 |
System.out.println(AbstractDNetXsltFunctions.md5("SNSF - Swiss National Science Foundation")); |
|
65 |
} |
|
66 |
|
|
67 |
@Test |
|
68 |
public void testSingleScholixAction() throws IOException { |
|
69 |
doTestSingleScholixAction("/eu/dnetlib/data/mapreduce/actions/part-00000"); |
|
70 |
} |
|
71 |
|
|
72 |
@Test |
|
73 |
public void testSingleScholixAction2() throws IOException { |
|
74 |
doTestSingleScholixAction("/eu/dnetlib/data/mapreduce/actions/scholix.json"); |
|
75 |
} |
|
76 |
|
|
77 |
private void doTestSingleScholixAction(final String filePath) throws IOException { |
|
78 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
79 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
80 |
// in.readLine(); |
|
81 |
// in.readLine(); |
|
82 |
String line = in.readLine(); |
|
83 |
System.out.println(line); |
|
84 |
final JsonParser parser = new JsonParser(); |
|
85 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
86 |
List<AtomicAction> actions = ScholixToActions |
|
87 |
.generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601()); |
|
88 |
actions.forEach(it-> System.out.println(String.format("%s cf:%s qualifier:%s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn()))); |
|
89 |
|
|
90 |
System.out.println(actions.get(0).toJSON()); |
|
91 |
} |
|
92 |
|
|
93 |
@Test |
|
94 |
public void testScholixAction() throws IOException { |
|
95 |
|
|
96 |
doTestMultipleScholixActions("/eu/dnetlib/data/mapreduce/actions/part-00000"); |
|
97 |
} |
|
98 |
|
|
99 |
@Test |
|
100 |
public void testScholixAction2() throws IOException { |
|
101 |
|
|
102 |
doTestMultipleScholixActions("/eu/dnetlib/data/mapreduce/actions/scholix.json"); |
|
103 |
} |
|
104 |
|
|
105 |
private void doTestMultipleScholixActions(final String filePath) throws IOException { |
|
106 |
final InputStream is = this.getClass().getResourceAsStream(filePath); |
|
107 |
final BufferedReader in = new BufferedReader(new InputStreamReader(is)); |
|
108 |
|
|
109 |
String line = in.readLine(); |
|
110 |
while (StringUtils.isNotEmpty(line)){ |
|
111 |
final JsonParser parser = new JsonParser(); |
|
112 |
JsonObject root = parser.parse(line).getAsJsonObject(); |
|
113 |
try { |
|
114 |
List<AtomicAction> actions = ScholixToActions |
|
115 |
.generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601()); |
|
116 |
Assert.assertNotNull(actions); |
|
117 |
Assert.assertTrue(actions.size() > 0); |
|
118 |
} catch (Throwable e) { |
|
119 |
System.out.println(line); |
|
120 |
throw (new RuntimeException(e)); |
|
121 |
} |
|
122 |
line = in.readLine(); |
|
123 |
} |
|
124 |
} |
|
125 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/dedup/OafMergeTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.dedup; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.UUID; |
|
5 |
|
|
6 |
import com.google.common.base.Predicate; |
|
7 |
import com.google.common.collect.Iterables; |
|
8 |
import com.google.common.collect.Lists; |
|
9 |
import eu.dnetlib.data.mapreduce.util.OafTest; |
|
10 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
11 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
12 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
13 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
14 |
import eu.dnetlib.data.proto.OafProtos.OafEntity.Builder; |
|
15 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
16 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
|
17 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
18 |
import eu.dnetlib.data.transform.OafEntityMerger; |
|
19 |
import org.junit.Before; |
|
20 |
import org.junit.Test; |
|
21 |
|
|
22 |
public class OafMergeTest { |
|
23 |
|
|
24 |
private List<Oaf> oafList; |
|
25 |
|
|
26 |
private OafEntityMerger merger; |
|
27 |
|
|
28 |
@Before |
|
29 |
public void setUp() throws Exception { |
|
30 |
|
|
31 |
merger = new OafEntityMerger(); |
|
32 |
oafList = Lists.newArrayList(); |
|
33 |
oafList.add(getOaf("0.1").setEntity( |
|
34 |
getEntity("id_1", null, "pid_1").setResult( |
|
35 |
Result.newBuilder().setMetadata( |
|
36 |
Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf("2012-01-01")) |
|
37 |
.addTitle(OafTest.getStructuredproperty("vvvv Title", "main title", "dnet:dataCite_title")) |
|
38 |
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build()); |
|
39 |
oafList.add(getOaf("0.1").setEntity( |
|
40 |
getEntity("id_2", "originalId_2", "pid_2").setResult( |
|
41 |
Result.newBuilder().setMetadata( |
|
42 |
Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf("")) |
|
43 |
.addTitle(OafTest.getStructuredproperty("aaaa Title", "main title", "dnet:dataCite_title")) |
|
44 |
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build()); |
|
45 |
oafList.add(getOaf("0.2").setEntity( |
|
46 |
getEntity("id_3", "originalId_2", "pid_2").setResult( |
|
47 |
Result.newBuilder().setMetadata( |
|
48 |
Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("cccc Title", "sub title", "dnet:dataCite_title")) |
|
49 |
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build()); |
|
50 |
|
|
51 |
oafList.add(getOaf("0.3").setEntity( |
|
52 |
getEntity("id_$", null, "pid_3").setResult( |
|
53 |
Result.newBuilder().setMetadata( |
|
54 |
Result.Metadata.newBuilder().setPublisher(OafTest.sf("AMER CHEMICAL SOCXXXXXXXXXXXXXXXXX")) |
|
55 |
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build()); |
|
56 |
oafList.add(getOaf("0.5").setEntity( |
|
57 |
getEntity("id_5", null, null).setResult( |
|
58 |
Result.newBuilder().setMetadata( |
|
59 |
Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("hhhh title", "main title", "dnet:dataCite_title")) |
|
60 |
.setPublisher(OafTest.sf("AMER CHEMICAL SOC X")) |
|
61 |
.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies")).setStoragedate(OafTest.sf("2012-11-18")) |
|
62 |
.setLanguage(OafTest.getQualifier("eng", "dnet:languages")).addDescription(OafTest.sf("original description"))))) |
|
63 |
.build()); |
|
64 |
oafList.add(getOaf("0.6").setEntity( |
|
65 |
getEntity("id_6", null, "pid_6").setResult( |
|
66 |
Result.newBuilder().setMetadata( |
|
67 |
Result.Metadata.newBuilder().setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies")) |
|
68 |
.addDescription(OafTest.sf("new description"))).addInstance(OafTest.getInstance("id", "name")))).build()); |
|
69 |
} |
|
70 |
|
|
71 |
@Test |
|
72 |
public void test_merge() { |
|
73 |
|
|
74 |
final Oaf.Builder builder = Oaf.newBuilder(); |
|
75 |
|
|
76 |
for (final Oaf oaf : oafList) { |
|
77 |
builder.mergeFrom(oaf); |
|
78 |
} |
|
79 |
|
|
80 |
final Result.Metadata.Builder metadata = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder(); |
|
81 |
final Iterable<StructuredProperty> filter = Iterables.filter(metadata.getTitleList(), new Predicate<StructuredProperty>() { |
|
82 |
|
|
83 |
@Override |
|
84 |
public boolean apply(final StructuredProperty sp) { |
|
85 |
return (sp.getQualifier() != null) && sp.getQualifier().getClassname().equals("main title"); |
|
86 |
} |
|
87 |
}); |
|
88 |
|
|
89 |
final StructuredProperty last = Iterables.getLast(filter); |
|
90 |
|
|
91 |
metadata.clearTitle().addAllTitle(Lists.newArrayList(last)); |
|
92 |
|
|
93 |
System.out.println(builder.build().toString()); |
|
94 |
} |
|
95 |
|
|
96 |
@Test |
|
97 |
public void test_merger() { |
|
98 |
|
|
99 |
final Oaf merge = merger.mergeEntities(null, "id", oafList).build(); |
|
100 |
|
|
101 |
System.out.println(merge.toString()); |
|
102 |
} |
|
103 |
|
|
104 |
// @Test |
|
105 |
// public void test_sort() { |
|
106 |
// Queue<Result> q = new PriorityQueue<Result>(3, DedupReducer.cmp); |
|
107 |
// for (Oaf oaf : oafList) { |
|
108 |
// q.add(oaf.getEntity().getResult()); |
|
109 |
// } |
|
110 |
// |
|
111 |
// while (!q.isEmpty()) { |
|
112 |
// Result r = q.remove(); |
|
113 |
// List<StructuredProperty> titles = r.getMetadata().getTitleList(); |
|
114 |
// if (!titles.isEmpty()) { |
|
115 |
// System.out.println(titles.get(0).getValue()); |
|
116 |
// } |
|
117 |
// } |
|
118 |
// } |
|
119 |
|
|
120 |
private Oaf.Builder getOaf(final String trust) { |
|
121 |
return Oaf.newBuilder().setKind(Kind.entity).setDataInfo(OafTest.getDataInfo(trust)).setLastupdatetimestamp(System.currentTimeMillis()); |
|
122 |
} |
|
123 |
|
|
124 |
private OafEntity.Builder getEntity(final String id, final String originalId, final String pid) { |
|
125 |
final Builder entity = |
|
126 |
OafEntity.newBuilder().setType(Type.result).setId(id).addOriginalId(originalId != null ? originalId : UUID.randomUUID().toString()); |
|
127 |
|
|
128 |
if (pid != null) { |
|
129 |
entity.addPid(OafTest.getStructuredproperty(pid, "class", "scheme")); |
|
130 |
} |
|
131 |
|
|
132 |
return entity; |
|
133 |
} |
|
134 |
|
|
135 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/dedup/cc/VertexWritableTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.dedup.cc; |
|
2 |
|
|
3 |
import java.util.TreeSet; |
|
4 |
|
|
5 |
import eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable; |
|
6 |
import org.apache.commons.lang.StringUtils; |
|
7 |
import org.apache.hadoop.io.Text; |
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import static org.junit.Assert.assertTrue; |
|
11 |
|
|
12 |
/** |
|
13 |
* Created by claudio on 15/10/15. |
|
14 |
*/ |
|
15 |
public class VertexWritableTest { |
|
16 |
|
|
17 |
@Test |
|
18 |
public void testSerialise() { |
|
19 |
VertexWritable v = new VertexWritable(); |
|
20 |
|
|
21 |
v.setActivated(true); |
|
22 |
v.setVertexId(new Text("a")); |
|
23 |
|
|
24 |
TreeSet<Text> edges = new TreeSet<Text>(); |
|
25 |
for(int i=0; i<5; i++) { |
|
26 |
edges.add(new Text("" + i)); |
|
27 |
} |
|
28 |
v.setEdges(edges); |
|
29 |
|
|
30 |
assertTrue(v.toString() != null); |
|
31 |
final String json = v.toJSON(); |
|
32 |
assertTrue(StringUtils.isNotBlank(json)); |
|
33 |
|
|
34 |
System.out.println(json); |
|
35 |
|
|
36 |
final VertexWritable v1 = VertexWritable.fromJSON(json); |
|
37 |
final String json1 = v1.toJSON(); |
|
38 |
assertTrue(json.equals(json1)); |
|
39 |
|
|
40 |
System.out.println(json1); |
|
41 |
} |
|
42 |
|
|
43 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/dedup/TitleOrderingTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.dedup; |
|
2 |
|
|
3 |
import java.util.LinkedList; |
|
4 |
import java.util.List; |
|
5 |
import java.util.Map; |
|
6 |
import java.util.PriorityQueue; |
|
7 |
import java.util.Queue; |
|
8 |
import java.util.UUID; |
|
9 |
|
|
10 |
import org.apache.commons.io.IOUtils; |
|
11 |
import org.junit.Before; |
|
12 |
import org.junit.Test; |
|
13 |
import org.springframework.core.io.ClassPathResource; |
|
14 |
|
|
15 |
import com.google.common.collect.Lists; |
|
16 |
import com.google.common.collect.Maps; |
|
17 |
|
|
18 |
import eu.dnetlib.pace.clustering.NGramUtils; |
|
19 |
import eu.dnetlib.pace.config.Type; |
|
20 |
import eu.dnetlib.pace.model.Field; |
|
21 |
import eu.dnetlib.pace.model.FieldListImpl; |
|
22 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
23 |
import eu.dnetlib.pace.model.MapDocument; |
|
24 |
import eu.dnetlib.pace.model.MapDocumentComparator; |
|
25 |
|
|
26 |
public class TitleOrderingTest { |
|
27 |
|
|
28 |
private List<MapDocument> results = Lists.newArrayList(); |
|
29 |
|
|
30 |
@Before |
|
31 |
public void setUp() throws Exception { |
|
32 |
|
|
33 |
final List<String> lines = IOUtils.readLines(new ClassPathResource("eu/dnetlib/data/mapreduce/dedup/titles.txt").getInputStream()); |
|
34 |
for (final String title : lines) { |
|
35 |
final Map<String, Field> fieldMap = Maps.newHashMap(); |
|
36 |
final FieldListImpl list = new FieldListImpl(); |
|
37 |
list.add(new FieldValueImpl(Type.String, "title", title)); |
|
38 |
fieldMap.put("title", list); |
|
39 |
results.add(new MapDocument("id-" + UUID.randomUUID(), fieldMap)); |
|
40 |
} |
|
41 |
} |
|
42 |
|
|
43 |
@Test |
|
44 |
public void test() { |
|
45 |
|
|
46 |
final Queue<MapDocument> queue = new PriorityQueue<MapDocument>(100, new MapDocumentComparator("title")); |
|
47 |
|
|
48 |
queue.addAll(results); |
|
49 |
|
|
50 |
final Queue<MapDocument> queue2 = simplifyQueue(queue); |
|
51 |
|
|
52 |
while (!queue2.isEmpty()) { |
|
53 |
final MapDocument doc = queue2.remove(); |
|
54 |
System.out.println(doc.values("title").stringValue()); |
|
55 |
} |
|
56 |
} |
|
57 |
|
|
58 |
private Queue<MapDocument> simplifyQueue(final Queue<MapDocument> queue) { |
|
59 |
final Queue<MapDocument> q = new LinkedList<MapDocument>(); |
|
60 |
|
|
61 |
String fieldRef = ""; |
|
62 |
final List<MapDocument> tempResults = Lists.newArrayList(); |
|
63 |
|
|
64 |
while (!queue.isEmpty()) { |
|
65 |
final MapDocument result = queue.remove(); |
|
66 |
|
|
67 |
if (!result.values("title").isEmpty()) { |
|
68 |
final String field = NGramUtils.cleanupForOrdering(result.values("title").stringValue()); |
|
69 |
if (field.equals(fieldRef)) { |
|
70 |
tempResults.add(result); |
|
71 |
} else { |
|
72 |
if (tempResults.size() < 5) { |
|
73 |
q.addAll(tempResults); |
|
74 |
} else { |
|
75 |
System.out.println("Skipped field: " + fieldRef + " - size: " + tempResults.size()); |
|
76 |
} |
|
77 |
tempResults.clear(); |
|
78 |
tempResults.add(result); |
|
79 |
fieldRef = field; |
|
80 |
} |
|
81 |
} |
|
82 |
} |
|
83 |
if (tempResults.size() < 5) { |
|
84 |
q.addAll(tempResults); |
|
85 |
} else { |
|
86 |
System.out.println("Skipped field: " + fieldRef + " - size: " + tempResults.size()); |
|
87 |
} |
|
88 |
|
|
89 |
return q; |
|
90 |
} |
|
91 |
|
|
92 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/dedup/experiment/CsvEntryTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.dedup.experiment; |
|
2 |
|
|
3 |
import org.hsqldb.util.CSVWriter; |
|
4 |
import org.junit.Test; |
|
5 |
import org.junit.Before; |
|
6 |
import org.junit.After; |
|
7 |
|
|
8 |
import static org.junit.Assert.assertNotNull; |
|
9 |
import static org.junit.Assert.assertTrue; |
|
10 |
|
|
11 |
/** |
|
12 |
* CsvEntry Tester. |
|
13 |
* |
|
14 |
* @author <Authors name> |
|
15 |
* @version 1.0 |
|
16 |
* @since <pre>Apr 20, 2016</pre> |
|
17 |
*/ |
|
18 |
public class CsvEntryTest { |
|
19 |
|
|
20 |
private CsvEntry csvEntry; |
|
21 |
|
|
22 |
@Before |
|
23 |
public void before() throws Exception { |
|
24 |
csvEntry = new CsvEntry(); |
|
25 |
} |
|
26 |
|
|
27 |
/** |
|
28 |
* Method: addFeature(final String f) |
|
29 |
*/ |
|
30 |
@Test |
|
31 |
public void testAddFeature() throws Exception { |
|
32 |
csvEntry.addFeature("a"); |
|
33 |
assertTrue(csvEntry.getFeatures().contains("a")); |
|
34 |
} |
|
35 |
|
|
36 |
/** |
|
37 |
* Method: getFeatures() |
|
38 |
*/ |
|
39 |
@Test |
|
40 |
public void testGetFeatures() throws Exception { |
|
41 |
csvEntry.addFeature("a"); |
|
42 |
assertNotNull(csvEntry.getFeatures()); |
|
43 |
assertTrue(csvEntry.getFeatures().size() == 1); |
|
44 |
} |
|
45 |
|
|
46 |
/** |
|
47 |
* Method: fromJson(final String json) |
|
48 |
*/ |
|
49 |
@Test |
|
50 |
public void testFromJson() throws Exception { |
|
51 |
csvEntry.addFeature("a"); |
|
52 |
csvEntry.addFeature("b"); |
|
53 |
|
|
54 |
final String json = csvEntry.toString(); |
|
55 |
|
|
56 |
final CsvEntry another = CsvEntry.fromJson(json); |
|
57 |
|
|
58 |
assertTrue(another.equals(csvEntry)); |
|
59 |
} |
|
60 |
|
|
61 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/dedup/experiment/CsvSerialiserTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.dedup.experiment; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertEquals; |
|
4 |
|
|
5 |
import java.util.Iterator; |
|
6 |
import java.util.List; |
|
7 |
import java.util.Set; |
|
8 |
import java.util.UUID; |
|
9 |
|
|
10 |
import com.google.common.base.Predicate; |
|
11 |
import com.google.common.base.Splitter; |
|
12 |
import com.google.common.collect.Iterables; |
|
13 |
import com.google.common.collect.Lists; |
|
14 |
import com.google.common.collect.Sets; |
|
15 |
import eu.dnetlib.pace.model.Person; |
|
16 |
import org.apache.commons.lang.RandomStringUtils; |
|
17 |
import org.apache.commons.lang.StringUtils; |
|
18 |
import org.apache.commons.lang.math.RandomUtils; |
|
19 |
import org.apache.commons.logging.Log; |
|
20 |
import org.apache.commons.logging.LogFactory; |
|
21 |
import org.junit.Before; |
|
22 |
import org.junit.Test; |
|
23 |
|
|
24 |
/** |
|
25 |
* Created by claudio on 26/04/16. |
|
26 |
*/ |
|
27 |
public class CsvSerialiserTest { |
|
28 |
|
|
29 |
private static final Log log = LogFactory.getLog(CsvSerialiserTest.class); |
|
30 |
|
|
31 |
private CsvSerialiser csvSerialiser; |
|
32 |
|
|
33 |
private static List<String> ALL_FEATURES = Lists.newLinkedList(); |
|
34 |
|
|
35 |
static { |
|
36 |
ALL_FEATURES.add("a"); |
|
37 |
ALL_FEATURES.add("b"); |
|
38 |
ALL_FEATURES.add("c"); |
|
39 |
ALL_FEATURES.add("d"); |
|
40 |
ALL_FEATURES.add("e"); |
|
41 |
ALL_FEATURES.add("f"); |
|
42 |
} |
|
43 |
|
|
44 |
@Before |
|
45 |
public void setUp() { |
|
46 |
csvSerialiser = new CsvSerialiser(); |
|
47 |
} |
|
48 |
|
|
49 |
@Test |
|
50 |
public void testSerialiser() { |
|
51 |
final List<CsvEntry> list = Lists.newArrayList(); |
|
52 |
|
|
53 |
final int nRows = RandomUtils.nextInt(10) + 3; |
|
54 |
for(int i = 0; i<nRows; i++) { |
|
55 |
list.add(getRandomCsvEntry()); |
|
56 |
} |
|
57 |
|
|
58 |
final String csv = csvSerialiser.asCSV(list); |
|
59 |
|
|
60 |
log.info("\n" + csv); |
|
61 |
|
|
62 |
verifyLength(csv); |
|
63 |
} |
|
64 |
|
|
65 |
@Test |
|
66 |
public void testSerialiser2() { |
|
67 |
final List<CsvEntry> list = Lists.newArrayList(); |
|
68 |
|
|
69 |
final int nRows = RandomUtils.nextInt(10) + 3; |
|
70 |
for(int i = 0; i<nRows; i++) { |
|
71 |
list.add(getCsvEntry(i)); |
|
72 |
} |
|
73 |
|
|
74 |
final String csv = csvSerialiser.asCSV(list); |
|
75 |
log.info("\n" + csv); |
|
76 |
|
|
77 |
verifyLength(csv); |
|
78 |
} |
|
79 |
|
|
80 |
@Test |
|
81 |
public void testSerialiser3() { |
|
82 |
final String name = "Manghi, Paolo as "; |
|
83 |
final Person p = new Person(name, false); |
|
84 |
final String s = p.getSurnameString() + StringUtils.substring(p.getNameString(), 0, 1); |
|
85 |
|
|
86 |
log.info(String.format("'%s'", s.replaceAll("[^a-zA-Z ]", "").toLowerCase().trim())); |
|
87 |
} |
|
88 |
|
|
89 |
private void verifyLength(final String csv) { |
|
90 |
final Iterator<String> lines = Splitter.on("\n").split(csv).iterator(); |
|
91 |
final List<String> header = Lists.newArrayList(Splitter.on(",").split(lines.next())); |
|
92 |
|
|
93 |
while(lines.hasNext()) { |
|
94 |
List<String> line = Lists.newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(lines.next())); |
|
95 |
if (!line.isEmpty()) { |
|
96 |
assertEquals(header.size(), line.size()); |
|
97 |
} |
|
98 |
} |
|
99 |
} |
|
100 |
|
|
101 |
private CsvEntry getCsvEntry(final int i) { |
|
102 |
final CsvEntry e = new CsvEntry(); |
|
103 |
e.setKey(i+""); |
|
104 |
|
|
105 |
final Set<String> features = Sets.newLinkedHashSet(Iterables.filter(ALL_FEATURES, new Predicate<String>() { |
|
106 |
@Override |
|
107 |
public boolean apply(final String s) { |
|
108 |
return RandomUtils.nextBoolean(); |
|
109 |
} |
|
110 |
})); |
|
111 |
|
|
112 |
e.setFeatures(features); |
|
113 |
|
|
114 |
log.info(String.format("%s - %s", i, features)); |
|
115 |
//log.info(e.toString() + "\n"); |
|
116 |
|
|
117 |
return e; |
|
118 |
} |
|
119 |
|
|
120 |
public CsvEntry getRandomCsvEntry() { |
|
121 |
final CsvEntry e = new CsvEntry(); |
|
122 |
e.setKey(UUID.randomUUID().toString()); |
|
123 |
final int nFeatures = RandomUtils.nextInt(4) + 1; |
|
124 |
for(int i = 0; i<nFeatures; i++) { |
|
125 |
e.addFeature("ft." + RandomStringUtils.randomAlphabetic(2) + " \"" + RandomStringUtils.randomAlphabetic(2) + "\" " + RandomStringUtils.randomAlphabetic(2)); |
|
126 |
} |
|
127 |
return e; |
|
128 |
} |
|
129 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/broker/EventWrapperTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.broker; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.InputStream; |
|
5 |
import java.util.List; |
|
6 |
import java.util.Map; |
|
7 |
import java.util.Set; |
|
8 |
|
|
9 |
import com.google.common.collect.Maps; |
|
10 |
import com.google.common.collect.Sets; |
|
11 |
import com.googlecode.protobuf.format.JsonFormat; |
|
12 |
import eu.dnetlib.data.proto.OafProtos; |
|
13 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
14 |
import eu.dnetlib.data.transform.Column; |
|
15 |
import eu.dnetlib.data.transform.Row; |
|
16 |
import eu.dnetlib.data.transform.XsltRowTransformer; |
|
17 |
import eu.dnetlib.data.transform.XsltRowTransformerFactory; |
|
18 |
import org.apache.commons.io.IOUtils; |
|
19 |
import org.apache.commons.logging.Log; |
|
20 |
import org.apache.commons.logging.LogFactory; |
|
21 |
import org.apache.hadoop.mapreduce.Counter; |
|
22 |
import org.apache.hadoop.mapreduce.Reducer.Context; |
|
23 |
import org.dom4j.Document; |
|
24 |
import org.dom4j.DocumentException; |
|
25 |
import org.dom4j.io.SAXReader; |
|
26 |
import org.junit.Before; |
|
27 |
import org.junit.Ignore; |
|
28 |
import org.junit.Test; |
|
29 |
import org.junit.runner.RunWith; |
|
30 |
import org.mockito.Mock; |
|
31 |
import org.mockito.junit.MockitoJUnitRunner; |
|
32 |
|
|
33 |
import static org.junit.Assert.*; |
|
34 |
import static org.mockito.ArgumentMatchers.anyString; |
|
35 |
import static org.mockito.Mockito.when; |
|
36 |
|
|
37 |
/** |
|
38 |
* Created by claudio on 22/07/16. |
|
39 |
*/ |
|
40 |
@RunWith(MockitoJUnitRunner.Silent.class) |
|
41 |
public class EventWrapperTest { |
|
42 |
|
|
43 |
private static final Log log = LogFactory.getLog(EventWrapperTest.class); |
|
44 |
|
|
45 |
private Oaf oaf1_1; |
|
46 |
private Oaf oaf1_2; |
|
47 |
private Oaf oaf2_1; |
|
48 |
private Oaf oaf2_2; |
|
49 |
private Oaf oaf3_1; |
|
50 |
private Oaf oaf3_2; |
|
51 |
|
|
52 |
private String xslt; |
|
53 |
|
|
54 |
@Mock |
|
55 |
private Context context; |
|
56 |
|
|
57 |
@Mock |
|
58 |
private Counter counter; |
|
59 |
|
|
60 |
private Map<String, String> baseUrlMap = Maps.newHashMap(); |
|
61 |
|
|
62 |
private static final String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/2hbase/"; |
|
63 |
|
|
64 |
@Before |
|
65 |
public void setUp() throws Exception { |
|
66 |
xslt = loadFromTransformationProfile("oaf2hbase.xml"); |
|
67 |
oaf1_1 = asOaf("recordOaf_1_1.xml"); |
|
68 |
oaf1_2 = asOaf("recordOaf_1_2.xml"); |
|
69 |
oaf2_1 = asOaf("recordOaf_2_1.xml"); |
|
70 |
oaf2_2 = asOaf("recordOaf_2_2.xml"); |
|
71 |
oaf3_1 = asOaf("recordOaf_3_1.xml"); |
|
72 |
oaf3_2 = asOaf("recordOaf_3_2.xml"); |
|
73 |
|
|
74 |
when(context.getCounter(anyString(), anyString())).thenReturn(counter); |
|
75 |
|
|
76 |
baseUrlMap.put("publication", "https://explore.openaire.eu/search/publication?articleId=%s"); |
|
77 |
baseUrlMap.put("dataset", "https://explore.openaire.eu/search/dataset?datasetId=%s"); |
|
78 |
baseUrlMap.put("software", "https://explore.openaire.eu/search/software?softwareId=%s"); |
|
79 |
baseUrlMap.put("other", "https://explore.openaire.eu/search/other?orpId=%s"); |
|
80 |
} |
|
81 |
|
|
82 |
@Test |
|
83 |
public void testAbstractEvent() throws Exception { |
|
84 |
final String id1 = AbstractEventFactory.process(oaf1_1, oaf1_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
85 |
final String id2 = AbstractEventFactory.process(oaf2_1, oaf2_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
86 |
final String id3 = AbstractEventFactory.process(oaf3_1, oaf3_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
87 |
|
|
88 |
verifyEquals(id1, id2); |
|
89 |
verifyDifferent(id1, id3); |
|
90 |
} |
|
91 |
|
|
92 |
@Test |
|
93 |
public void testOAVersionEvent() throws Exception { |
|
94 |
final Set<String> untrustedOaDs = Sets.newHashSet(); |
|
95 |
final String id1 = OAVersionEventFactory.process(oaf1_1, oaf1_2, 1.0f, untrustedOaDs).get(0).asBrokerEvent().getEventId(); |
|
96 |
final String id2 = OAVersionEventFactory.process(oaf2_1, oaf2_2, 1.0f, untrustedOaDs).get(0).asBrokerEvent().getEventId(); |
|
97 |
|
|
98 |
verifyEquals(id1, id2); |
|
99 |
} |
|
100 |
|
|
101 |
@Test |
|
102 |
public void testPIDEvent() throws Exception { |
|
103 |
final String id1 = PIDEventFactory.process(oaf1_1, oaf1_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
104 |
final String id2 = PIDEventFactory.process(oaf2_1, oaf2_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
105 |
final String id3 = PIDEventFactory.process(oaf3_1, oaf3_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
106 |
|
|
107 |
verifyEquals(id1, id2); |
|
108 |
verifyDifferent(id1, id3); |
|
109 |
} |
|
110 |
|
|
111 |
@Test |
|
112 |
//@Ignore |
|
113 |
public void testProjectEvent() throws Exception { |
|
114 |
|
|
115 |
Oaf oaf1_1_c = enrichWithCachedOafRel(oaf1_1, "oafRel_isProducedBy_1.json", "project1.json"); |
|
116 |
Oaf oaf1_2_c = enrichWithCachedOafRel(oaf1_2, "oafRel_isProducedBy_2.json", "project2.json"); |
|
117 |
|
|
118 |
final String id1 = ProjectEventFactory.process(context, oaf1_1_c, oaf1_2_c, 1.0f, baseUrlMap).get(0).asBrokerEvent().getEventId(); |
|
119 |
/*final String id2 = ProjectEventFactory.process(context, oaf2_1, oaf2_2, 1.0f, baseUrlMap).get(0).asBrokerEvent().getEventId(); |
|
120 |
final String id3 = ProjectEventFactory.process(context, oaf3_1, oaf3_2, 1.0f, baseUrlMap).get(0).asBrokerEvent().getEventId(); |
|
121 |
|
|
122 |
verifyEquals(id1, id2); |
|
123 |
verifyDifferent(id1, id3); |
|
124 |
*/ |
|
125 |
} |
|
126 |
|
|
127 |
private Oaf enrichWithCachedOafRel(final Oaf oaf, final String oafRel, final String oafProject) throws IOException { |
|
128 |
final Oaf.Builder cachedRel = Oaf.newBuilder(); |
|
129 |
JsonFormat.merge(IOUtils.toString(getClass().getResourceAsStream(oafRel)), cachedRel); |
|
130 |
|
|
131 |
final Oaf.Builder cachedTarget = Oaf.newBuilder(); |
|
132 |
JsonFormat.merge(IOUtils.toString(getClass().getResourceAsStream(oafProject)), cachedTarget); |
|
133 |
|
|
134 |
cachedRel.getRelBuilder().setCachedOafTarget(cachedTarget); |
|
135 |
|
|
136 |
Oaf.Builder builder = Oaf.newBuilder(oaf); |
|
137 |
builder.getEntityBuilder().addCachedOafRel(cachedRel); |
|
138 |
return builder.build(); |
|
139 |
} |
|
140 |
|
|
141 |
@Test |
|
142 |
public void testPublicationDateEvent() throws Exception { |
|
143 |
final String id1 = PublicationDateEventFactory.process(oaf1_1, oaf1_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
144 |
final String id2 = PublicationDateEventFactory.process(oaf2_1, oaf2_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
145 |
final String id3 = PublicationDateEventFactory.process(oaf3_1, oaf3_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
146 |
|
|
147 |
verifyEquals(id1, id2); |
|
148 |
verifyDifferent(id1, id3); |
|
149 |
} |
|
150 |
|
|
151 |
@Test |
|
152 |
@Ignore |
|
153 |
public void testSoftwareEventFactory() throws Exception { |
|
154 |
final String id1 = SoftwareEventFactory.process(context, oaf1_1, oaf1_2, 1.0f, baseUrlMap).get(0).asBrokerEvent().getEventId(); |
|
155 |
final String id2 = SoftwareEventFactory.process(context, oaf2_1, oaf2_2, 1.0f, baseUrlMap).get(0).asBrokerEvent().getEventId(); |
|
156 |
final String id3 = SoftwareEventFactory.process(context, oaf3_1, oaf3_2, 1.0f, baseUrlMap).get(0).asBrokerEvent().getEventId(); |
|
157 |
|
|
158 |
verifyEquals(id1, id2); |
|
159 |
verifyDifferent(id1, id3); |
|
160 |
} |
|
161 |
|
|
162 |
@Test |
|
163 |
public void testSubjectEventFactory() throws Exception { |
|
164 |
final String id1 = SubjectEventFactory.process(context, oaf1_1, oaf1_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
165 |
final String id2 = SubjectEventFactory.process(context, oaf2_1, oaf2_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
166 |
final String id3 = SubjectEventFactory.process(context, oaf3_1, oaf3_2, 1.0f).get(0).asBrokerEvent().getEventId(); |
|
167 |
|
|
168 |
verifyEquals(id1, id2); |
|
169 |
verifyDifferent(id1, id3); |
|
170 |
} |
|
171 |
|
|
172 |
private void verifyEquals(final String x, final String y) { |
|
173 |
assertNotNull(x); |
|
174 |
assertNotNull(y); |
|
175 |
assertTrue(x.length() > 10); |
|
176 |
assertTrue(y.length() > 10); |
|
177 |
assertEquals(x, y); |
|
178 |
log.info(String.format("%s = %s", x, y)); |
|
179 |
} |
|
180 |
|
|
181 |
private void verifyDifferent(final String x, final String y) { |
|
182 |
log.info(String.format("%s != %s", x, y)); |
|
183 |
assertNotNull(x); |
|
184 |
assertNotNull(y); |
|
185 |
assertTrue(x.length() > 10); |
|
186 |
assertTrue(y.length() > 10); |
|
187 |
assertFalse(x.equals(y)); |
|
188 |
|
|
189 |
} |
|
190 |
|
|
191 |
private Oaf asOaf(final String xmlFile) throws Exception { |
|
192 |
for (final Row row : asRows(getClass().getResourceAsStream(xmlFile))) { |
|
193 |
for (final Column<String, byte[]> c : row.getColumns()) { |
|
194 |
final String cf = row.getColumnFamily(); |
|
195 |
if (cf.equals("result") && c.getName().equals("body")) { return Oaf.parseFrom(c.getValue()); } |
|
196 |
} |
|
197 |
} |
|
198 |
throw new RuntimeException("Oaf not found"); |
|
199 |
} |
|
200 |
|
|
201 |
private String loadFromTransformationProfile(final String profilePath) { |
|
202 |
log.info("Loading xslt from: " + basePathProfiles + profilePath); |
|
203 |
final InputStream profile = getClass().getResourceAsStream(basePathProfiles + profilePath); |
|
204 |
final SAXReader saxReader = new SAXReader(); |
|
205 |
try { |
|
206 |
final Document doc = saxReader.read(profile); |
|
207 |
return doc.selectSingleNode("//SCRIPT/CODE/*[local-name()='stylesheet']").asXML(); |
|
208 |
} catch (final DocumentException e) { |
|
209 |
e.printStackTrace(); |
|
210 |
throw new RuntimeException(e); |
|
211 |
} |
|
212 |
} |
|
213 |
|
|
214 |
private List<Row> asRows(final InputStream recordStream) |
|
215 |
throws Exception { |
|
216 |
|
|
217 |
final XsltRowTransformer transformer = new XsltRowTransformerFactory().getTransformer(xslt); |
|
218 |
assertNotNull(transformer); |
|
219 |
|
|
220 |
final String record = IOUtils.toString(recordStream); |
|
221 |
final List<Row> rows = transformer.apply(record); |
|
222 |
|
|
223 |
assertNotNull(rows); |
|
224 |
assertFalse(rows.isEmpty()); |
|
225 |
|
|
226 |
return rows; |
|
227 |
} |
|
228 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/broker/EventFactoryTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.broker; |
|
2 |
|
|
3 |
import java.util.UUID; |
|
4 |
|
|
5 |
import org.apache.commons.lang.math.RandomUtils; |
|
6 |
import org.apache.commons.logging.Log; |
|
7 |
import org.apache.commons.logging.LogFactory; |
|
8 |
import org.dom4j.DocumentException; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 |
import com.googlecode.protobuf.format.JsonFormat; |
|
12 |
|
|
13 |
import eu.dnetlib.broker.objects.OpenAireEventPayload; |
|
14 |
import eu.dnetlib.data.mapreduce.hbase.broker.mapping.EventFactory; |
|
15 |
import eu.dnetlib.data.mapreduce.hbase.broker.mapping.HighlightFactory; |
|
16 |
import eu.dnetlib.data.mapreduce.hbase.broker.mapping.OpenAireEventPayloadFactory; |
|
17 |
import eu.dnetlib.data.mapreduce.hbase.broker.model.EventMessage; |
|
18 |
import eu.dnetlib.data.mapreduce.hbase.broker.model.EventWrapper; |
|
19 |
import eu.dnetlib.data.mapreduce.util.OafTest; |
|
20 |
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; |
|
21 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
22 |
import eu.dnetlib.data.proto.FieldTypeProtos.StringField; |
|
23 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
24 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
25 |
import eu.dnetlib.data.proto.OafProtos.OafEntity.Builder; |
|
26 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
27 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
|
28 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
29 |
|
|
30 |
/** |
|
31 |
* Created by claudio on 22/07/16. |
|
32 |
*/ |
|
33 |
public class EventFactoryTest { |
|
34 |
|
|
35 |
private static final Log log = LogFactory.getLog(EventFactoryTest.class); |
|
36 |
|
|
37 |
@Test |
|
38 |
public void testEventFactory() throws DocumentException { |
|
39 |
|
|
40 |
final OafEntity source = getEntity("id_1", null, "pid_1").setResult( |
|
41 |
Result.newBuilder().setMetadata( |
|
42 |
Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf("2012-01-01")) |
|
43 |
.addTitle(sp("vvvv Title", "main title", "dnet:dataCite_title")) |
|
44 |
.setResulttype(q("publication", "dnet:result_typologies"))) |
|
45 |
.addInstance(Instance.newBuilder() |
|
46 |
.setHostedby(kv("456", "PubMed")) |
|
47 |
.setAccessright(q("OPEN", "dnet:licenses")) |
|
48 |
.addUrl("http://456"))) |
|
49 |
.build(); |
|
50 |
|
|
51 |
final OafEntity oaf = getEntity("id_2", "originalId_2", "pid_2").setResult( |
|
52 |
Result.newBuilder().setMetadata( |
|
53 |
Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf("")) |
|
54 |
.addTitle(sp("aaaa Title", "main title", "dnet:dataCite_title")) |
|
55 |
.setDateofacceptance(sf("2017-07-13")) |
|
56 |
.setResulttype(q("publication", "dnet:result_typologies"))) |
|
57 |
.addInstance( |
|
58 |
Instance.newBuilder() |
|
59 |
.setHostedby(kv("123", "Puma")) |
|
60 |
.setAccessright(q("CLOSED", "dnet:licenses")) |
|
61 |
.addUrl("http://123"))) |
|
62 |
.build(); |
|
63 |
|
|
64 |
log.info("oaf: " + JsonFormat.printToString(oaf)); |
|
65 |
|
|
66 |
final float trust = RandomUtils.nextFloat(); |
|
67 |
final OpenAireEventPayload openAireEventPayload = new OpenAireEventPayload(); |
|
68 |
final EventMessage event = EventFactory.asEvent(oaf, Topic.ENRICH_MISSING_PUBLICATION_DATE, openAireEventPayload, source, trust); |
|
69 |
|
|
70 |
final OpenAireEventPayload p = OpenAireEventPayloadFactory.fromOAF(oaf, source, trust); |
|
71 |
final OpenAireEventPayload payload = HighlightFactory.highlightEnrichPublicationDate(p, OafTest.sf("2012-01-01")); |
|
72 |
|
|
73 |
final String payloadJson = payload.toJSON(); |
|
74 |
|
|
75 |
log.info("payload: " + payloadJson); |
|
76 |
log.info("payload: " + OpenAireEventPayload.fromJSON(payloadJson).toJSON()); |
|
77 |
|
|
78 |
event.setPayload(payload.toJSON()); |
|
79 |
|
|
80 |
log.info("event message: " + event.toString()); |
|
81 |
|
|
82 |
final EventWrapper wrapper = new EventWrapper(event, "2012-01-01", "test"); |
|
83 |
|
|
84 |
log.info("broker event: " + wrapper.asBrokerEvent().toJson()); |
|
85 |
|
|
86 |
} |
|
87 |
|
|
88 |
private Qualifier.Builder q(final String classid, final String scheme) { |
|
89 |
return OafTest.getQualifier(classid, scheme); |
|
90 |
} |
|
91 |
|
|
92 |
private KeyValue.Builder kv(final String k, final String v) { |
|
93 |
return KeyValue.newBuilder().setKey(k).setValue(v); |
|
94 |
} |
|
95 |
|
|
96 |
private StringField.Builder sf(final String value) { |
|
97 |
return StringField.newBuilder().setValue(value); |
|
98 |
} |
|
99 |
|
|
100 |
private StructuredProperty.Builder sp(final String v, final String classid, final String scheme) { |
|
101 |
return OafTest.getStructuredproperty(v, classid, scheme); |
|
102 |
} |
|
103 |
|
|
104 |
private OafEntity.Builder getEntity(final String id, final String originalId, final String pid) { |
|
105 |
final Builder entity = |
|
106 |
OafEntity.newBuilder().setType(Type.result).setId(id).addOriginalId(originalId != null ? originalId : UUID.randomUUID().toString()); |
|
107 |
|
|
108 |
if (pid != null) { |
|
109 |
entity.addPid(OafTest.getStructuredproperty(pid, "doi", "scheme")); |
|
110 |
} |
|
111 |
|
|
112 |
return entity; |
|
113 |
} |
|
114 |
|
|
115 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/dataimport/GetInvalidXmlRecordsMapperTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.dataimport; |
|
2 |
|
|
3 |
import org.apache.commons.io.IOUtils; |
|
4 |
import org.junit.Assert; |
|
5 |
import org.junit.Test; |
|
6 |
|
|
7 |
import java.io.IOException; |
|
8 |
|
|
9 |
public class GetInvalidXmlRecordsMapperTest { |
|
10 |
|
|
11 |
@Test |
|
12 |
public void testParseDoi() throws IOException { |
|
13 |
|
|
14 |
final String xml = IOUtils.toString(getClass().getResourceAsStream("record.xml")); |
|
15 |
|
|
16 |
Assert.assertNotNull(xml); |
|
17 |
|
|
18 |
String doi = GetInvalidXmlRecordsMapper.getDoi(xml); |
|
19 |
|
|
20 |
Assert.assertNotNull(doi); |
|
21 |
|
|
22 |
System.out.println("doi = " + doi); |
|
23 |
|
|
24 |
} |
|
25 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/index/config/RelClassesTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.index.config; |
|
2 |
|
|
3 |
import org.junit.Test; |
|
4 |
|
|
5 |
import static org.junit.Assert.assertEquals; |
|
6 |
import static org.junit.Assert.assertNotNull; |
|
7 |
|
|
8 |
public class RelClassesTest { |
|
9 |
|
|
10 |
public static final String relClassesJson = |
|
11 |
"[\"isProvidedBy:provides\"," |
|
12 |
+ "\"provides:isProvidedBy\"," |
|
13 |
+ "\"merges:isMergedIn\"," |
|
14 |
+ "\"isMergedIn:merges\"," |
|
15 |
+ "\"isSimilarTo:isSimilarTo\"," |
|
16 |
+ "\"isCoAuthorOf:isCoAuthorOf\"," |
|
17 |
+ "\"isAuthorOf:hasAuthor\"," |
|
18 |
+ "\"hasAuthor:isAuthorOf\"," |
|
19 |
+ "\"isParticipant:hasParticipant\"," |
|
20 |
+ "\"hasParticipant:isParticipant\"," |
|
21 |
+ "\"isProducedBy:produces\"," |
|
22 |
+ "\"produces:isProducedBy\"," |
|
23 |
+ "\"hasAmongTopNSimilarDocuments:isAmongTopNSimilarDocuments\"," |
|
24 |
+ "\"isAmongTopNSimilarDocuments:hasAmongTopNSimilarDocuments\"," |
|
25 |
+ "\"isRelatedTo:isRelatedTo\"," |
|
26 |
+ "\"isContact:hasContact\"," |
|
27 |
+ "\"hasContact:isContact\"," |
|
28 |
+ "\"isContributorOf:hasContributor\"," |
|
29 |
+ "\"hasContributor:isContributorOf\"," |
|
30 |
+ "\"isPartOf:hasPart\"," |
|
31 |
+ "\"hasPart:isPartOf\"," |
|
32 |
+ "\"isSupplementedBy:isSupplementTo\"," |
|
33 |
+ "\"isSupplementTo:isSupplementedBy\"," |
|
34 |
+ "\"hasAuthorInstitution:isAuthorInstitutionOf\"," |
|
35 |
+ "\"isAuthorInstitutionOf:hasAuthorInstitution\"]"; |
|
36 |
|
|
37 |
@Test |
|
38 |
public void testRelClasses() { |
|
39 |
RelClasses relClasses = new RelClasses(relClassesJson); |
|
40 |
assertNotNull(relClasses); |
|
41 |
|
|
42 |
String providedBy = relClasses.getInverse("provides"); |
|
43 |
assertEquals(providedBy, "isProvidedBy"); |
|
44 |
|
|
45 |
String provides = relClasses.getInverse("isProvidedBy"); |
|
46 |
assertEquals(provides, "provides"); |
|
47 |
} |
|
48 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/index/config/Context.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.index.config; |
|
2 |
|
|
3 |
public class Context { |
|
4 |
|
|
5 |
public final static String xml = "<ContextDSResources>" |
|
6 |
+ "<entry id=\"fam\" label=\"Fisheries and Acquacolture Management\" name=\"context\" type=\"community\"/>" |
|
7 |
+ "<entry id=\"clarin\" label=\"CLARIN\" name=\"context\" type=\"ri\"/>" |
|
8 |
+"<entry id=\"dh-ch\" label=\"DH-CH\" name=\"context\" type=\"community\"/>" |
|
9 |
+ "<entry id=\"dh-ch::subcommunity\" label=\"Subcommunitiess\" name=\"category\" type=\"\"/>" |
|
10 |
+ "<entry id=\"dh-ch::subcommunity::2\" label=\"CLARIN\" name=\"concept\" type=\"\"/>" |
|
11 |
+"<entry id=\"egi\" label=\"EGI\" name=\"context\" type=\"ri\"/>" |
|
12 |
+ "<entry id=\"egi::classification\" label=\"Scientific Disciplines\" name=\"category\" type=\"\"/>" |
|
13 |
+ "<entry id=\"egi::classification::natsc\" label=\"Natural Sciences\" name=\"concept\" type=\"\"/>" |
|
14 |
+ "<entry id=\"egi::classification::natsc::math\" label=\"Mathematics\" name=\"concept\" type=\"\"/>" |
|
15 |
+ "<entry id=\"egi::classification::natsc::math::applied\" label=\"Applied Mathematics\" name=\"concept\" type=\"\"/>" |
|
16 |
+ "<entry id=\"egi::classification::natsc::math::pure\" label=\"Pure Mathematics\" name=\"concept\" type=\"\"/>" |
|
17 |
+ "<entry id=\"egi::classification::natsc::math::stats\" label=\"Statistics and Probability\" name=\"concept\" type=\"\"/>" |
|
18 |
+ "<entry id=\"egi::classification::natsc::math::other\" label=\"Other\" name=\"concept\" type=\"\"/>" |
|
19 |
+ "<entry id=\"egi::classification::natsc::cosc\" label=\"Computer sciences\" name=\"concept\" type=\"\"/>" |
|
20 |
+ "<entry id=\"egi::classification::natsc::cosc::algo\" label=\"Algorithms\" name=\"concept\" type=\"\"/>" |
|
21 |
+ "<entry id=\"egi::classification::natsc::cosc::artii\" label=\"Artificial Intelligence\" name=\"concept\" type=\"\"/>" |
|
22 |
+ "<entry id=\"egi::classification::natsc::cosc::carc\" label=\"Computer architecture\" name=\"concept\" type=\"\"/>" |
|
23 |
+ "<entry id=\"egi::classification::natsc::cosc::ccomm\" label=\"Computer communications\" name=\"concept\" type=\"\"/>" |
|
24 |
+ "<entry id=\"egi::classification::natsc::cosc::cgraph\" label=\"Computer graphics\" name=\"concept\" type=\"\"/>" |
|
25 |
+ "<entry id=\"egi::classification::natsc::cosc::csecr\" label=\"Computer security and reliability\" name=\"concept\" type=\"\"/>" |
|
26 |
+ "<entry id=\"egi::classification::natsc::cosc::dstr\" label=\"Data structures\" name=\"concept\" type=\"\"/>" |
|
27 |
+ "<entry id=\"egi::classification::natsc::cosc::disc\" label=\"Distributed computing\" name=\"concept\" type=\"\"/>" |
|
28 |
+ "<entry id=\"egi::classification::natsc::cosc::hci\" label=\"Human-computer interaction\" name=\"concept\" type=\"\"/>" |
|
29 |
+ "<entry id=\"egi::classification::natsc::cosc::opes\" label=\"Operating systems\" name=\"concept\" type=\"\"/>" |
|
30 |
+ "<entry id=\"egi::classification::natsc::cosc::parc\" label=\"Parallel computing\" name=\"concept\" type=\"\"/>" |
|
31 |
+ "<entry id=\"egi::classification::natsc::cosc::plan\" label=\"Programming languages\" name=\"concept\" type=\"\"/>" |
|
32 |
+ "<entry id=\"egi::classification::natsc::cosc::qcom\" label=\"Quantum computing\" name=\"concept\" type=\"\"/>" |
|
33 |
+ "<entry id=\"egi::classification::natsc::cosc::softe\" label=\"software engineering\" name=\"concept\" type=\"\"/>" |
|
34 |
+ "<entry id=\"egi::classification::natsc::cosc::theo\" label=\"Theory of computation\" name=\"concept\" type=\"\"/>" |
|
35 |
+ "<entry id=\"egi::classification::natsc::cosc::other\" label=\"Other\" name=\"concept\" type=\"\"/>" |
|
36 |
+ "<entry id=\"egi::classification::natsc::infos\" label=\"Information sciences\" name=\"concept\" type=\"\"/>" |
|
37 |
+ "<entry id=\"egi::classification::natsc::infos::dman\" label=\"Data management\" name=\"concept\" type=\"\"/>" |
|
38 |
+ "<entry id=\"egi::classification::natsc::infos::dmin\" label=\"Data mining\" name=\"concept\" type=\"\"/>" |
|
39 |
+ "<entry id=\"egi::classification::natsc::infos::iret\" label=\"Information retrieval\" name=\"concept\" type=\"\"/>" |
|
40 |
+ "<entry id=\"egi::classification::natsc::infos::iman\" label=\"Information management\" name=\"concept\" type=\"\"/>" |
|
41 |
+ "<entry id=\"egi::classification::natsc::infos::kman\" label=\"Knowledge management\" name=\"concept\" type=\"\"/>" |
|
42 |
+ "<entry id=\"egi::classification::natsc::infos::mult\" label=\"Multimedia, hypermedia\" name=\"concept\" type=\"\"/>" |
|
43 |
+ "<entry id=\"egi::classification::natsc::infos::other\" label=\"Other\" name=\"concept\" type=\"\"/>" |
|
44 |
+ "<entry id=\"egi::classification::natsc::earths\" label=\"Earth sciences\" name=\"concept\" type=\"\"/>" |
|
45 |
+ "<entry id=\"egi::classification::natsc::earths::atms\" label=\"Atmospheric science\" name=\"concept\" type=\"\"/>" |
|
46 |
+ "<entry id=\"egi::classification::natsc::earths::clim\" label=\"Climate research\" name=\"concept\" type=\"\"/>" |
|
47 |
+ "<entry id=\"egi::classification::natsc::earths::geoc\" label=\"Geochemistry\" name=\"concept\" type=\"\"/>" |
|
48 |
+ "<entry id=\"egi::classification::natsc::earths::geol\" label=\"Geology\" name=\"concept\" type=\"\"/>" |
|
49 |
+ "<entry id=\"egi::classification::natsc::earths::geop\" label=\"Geophysics\" name=\"concept\" type=\"\"/>" |
|
50 |
+ "<entry id=\"egi::classification::natsc::earths::hydr\" label=\"Hydrology\" name=\"concept\" type=\"\"/>" |
|
51 |
+ "<entry id=\"egi::classification::natsc::earths::mine\" label=\"Mineralogy\" name=\"concept\" type=\"\"/>" |
|
52 |
+ "<entry id=\"egi::classification::natsc::earths::ocea\" label=\"Oceanography\" name=\"concept\" type=\"\"/>" |
|
53 |
+ "<entry id=\"egi::classification::natsc::earths::pala\" label=\"palaeontology\" name=\"concept\" type=\"\"/>" |
|
54 |
+ "<entry id=\"egi::classification::natsc::earths::pgeo\" label=\"Physical geography\" name=\"concept\" type=\"\"/>" |
|
55 |
+ "<entry id=\"egi::classification::natsc::earths::seis\" label=\"Seismology\" name=\"concept\" type=\"\"/>" |
|
56 |
+ "<entry id=\"egi::classification::natsc::earths::volc\" label=\"Volcanology\" name=\"concept\" type=\"\"/>" |
|
57 |
+ "<entry id=\"egi::classification::natsc::earths::other\" label=\"Other\" name=\"concept\" type=\"\"/>" |
|
58 |
+ "<entry id=\"egi::classification::natsc::biosc\" label=\"Biological sciences\" name=\"concept\" type=\"\"/>" |
|
59 |
+ "<entry id=\"egi::classification::natsc::biosc::aerob\" label=\"Aerobiology\" name=\"concept\" type=\"\"/>" |
|
60 |
+ "<entry id=\"egi::classification::natsc::biosc::bact\" label=\"Bacteriology\" name=\"concept\" type=\"\"/>" |
|
61 |
+ "<entry id=\"egi::classification::natsc::biosc::behav\" label=\"Behavioural biology\" name=\"concept\" type=\"\"/>" |
|
62 |
+ "<entry id=\"egi::classification::natsc::biosc::bimol\" label=\"Biochemistry and molecular biology\" name=\"concept\" type=\"\"/>" |
|
63 |
+ "<entry id=\"egi::classification::natsc::biosc::bcons\" label=\"Biodiversity conservation\" name=\"concept\" type=\"\"/>" |
|
64 |
+ "<entry id=\"egi::classification::natsc::biosc::binfo\" label=\"Bioinformatics\" name=\"concept\" type=\"\"/>" |
|
65 |
+ "<entry id=\"egi::classification::natsc::biosc::brhy\" label=\"Biological rhythm\" name=\"concept\" type=\"\"/>" |
|
66 |
+ "<entry id=\"egi::classification::natsc::biosc::biol\" label=\"Biology\" name=\"concept\" type=\"\"/>" |
|
67 |
+ "<entry id=\"egi::classification::natsc::biosc::bioph\" label=\"Biophysics\" name=\"concept\" type=\"\"/>" |
|
68 |
+ "<entry id=\"egi::classification::natsc::biosc::bota\" label=\"Botany\" name=\"concept\" type=\"\"/>" |
|
69 |
+ "<entry id=\"egi::classification::natsc::biosc::cellb\" label=\"Cell biology\" name=\"concept\" type=\"\"/>" |
|
70 |
+ "<entry id=\"egi::classification::natsc::biosc::cobio\" label=\"Computational biology\" name=\"concept\" type=\"\"/>" |
|
71 |
+ "<entry id=\"egi::classification::natsc::biosc::cryo\" label=\"Cryobiology\" name=\"concept\" type=\"\"/>" |
|
72 |
+ "<entry id=\"egi::classification::natsc::biosc::devb\" label=\"Developmental biology\" name=\"concept\" type=\"\"/>" |
|
73 |
+ "<entry id=\"egi::classification::natsc::biosc::ecol\" label=\"Ecology\" name=\"concept\" type=\"\"/>" |
|
74 |
+ "<entry id=\"egi::classification::natsc::biosc::evolb\" label=\"Evolutionary biology\" name=\"concept\" type=\"\"/>" |
|
75 |
+ "<entry id=\"egi::classification::natsc::biosc::gehe\" label=\"Genetics and heredity\" name=\"concept\" type=\"\"/>" |
|
76 |
+ "<entry id=\"egi::classification::natsc::biosc::marin\" label=\"Marine and Freshwater biology\" name=\"concept\" type=\"\"/>" |
|
77 |
+ "<entry id=\"egi::classification::natsc::biosc::mbio\" label=\"Mathematical biology\" name=\"concept\" type=\"\"/>" |
|
78 |
+ "<entry id=\"egi::classification::natsc::biosc::mibio\" label=\"Microbiology\" name=\"concept\" type=\"\"/>" |
|
79 |
+ "<entry id=\"egi::classification::natsc::biosc::myco\" label=\"Mycology\" name=\"concept\" type=\"\"/>" |
|
80 |
+ "<entry id=\"egi::classification::natsc::biosc::plant\" label=\"Plant science\" name=\"concept\" type=\"\"/>" |
|
81 |
+ "<entry id=\"egi::classification::natsc::biosc::repr\" label=\"Reproductive biology\" name=\"concept\" type=\"\"/>" |
|
82 |
+ "<entry id=\"egi::classification::natsc::biosc::strub\" label=\"Structural biology\" name=\"concept\" type=\"\"/>" |
|
83 |
+ "<entry id=\"egi::classification::natsc::biosc::taxo\" label=\"Taxonomy\" name=\"concept\" type=\"\"/>" |
|
84 |
+ "<entry id=\"egi::classification::natsc::biosc::theob\" label=\"Theoretical biology\" name=\"concept\" type=\"\"/>" |
|
85 |
+ "<entry id=\"egi::classification::natsc::biosc::therm\" label=\"Thermal biology\" name=\"concept\" type=\"\"/>" |
|
86 |
+ "<entry id=\"egi::classification::natsc::biosc::viro\" label=\"Virology\" name=\"concept\" type=\"\"/>" |
|
87 |
+ "<entry id=\"egi::classification::natsc::biosc::zool\" label=\"Zoology\" name=\"concept\" type=\"\"/>" |
|
88 |
+ "<entry id=\"egi::classification::natsc::biosc::other\" label=\"Other\" name=\"concept\" type=\"\"/>" |
|
89 |
+ "<entry id=\"egi::classification::natsc::physc\" label=\"Physical sciences\" name=\"concept\" type=\"\"/>" |
|
90 |
+ "<entry id=\"egi::classification::natsc::physc::acce\" label=\"Acceleration physics\" name=\"concept\" type=\"\"/>" |
|
91 |
+ "<entry id=\"egi::classification::natsc::physc::acou\" label=\"Acoustics\" name=\"concept\" type=\"\"/>" |
|
92 |
+ "<entry id=\"egi::classification::natsc::physc::aero\" label=\"Aerosol physics\" name=\"concept\" type=\"\"/>" |
|
93 |
+ "<entry id=\"egi::classification::natsc::physc::astrb\" label=\"Astrobiology\" name=\"concept\" type=\"\"/>" |
|
94 |
+ "<entry id=\"egi::classification::natsc::physc::astrn\" label=\"Astronomy\" name=\"concept\" type=\"\"/>" |
|
95 |
+ "<entry id=\"egi::classification::natsc::physc::astrp\" label=\"Astroparticle physics\" name=\"concept\" type=\"\"/>" |
|
96 |
+ "<entry id=\"egi::classification::natsc::physc::astrph\" label=\"Astrophysics\" name=\"concept\" type=\"\"/>" |
|
97 |
+ "<entry id=\"egi::classification::natsc::physc::atmc\" label=\"Atomic\" name=\"concept\" type=\"\"/>" |
|
98 |
+ "<entry id=\"egi::classification::natsc::physc::chph\" label=\"Chemical physics\" name=\"concept\" type=\"\"/>" |
|
99 |
+ "<entry id=\"egi::classification::natsc::physc::comph\" label=\"Computational physics\" name=\"concept\" type=\"\"/>" |
|
100 |
+ "<entry id=\"egi::classification::natsc::physc::conds\" label=\"Condensed matter physics\" name=\"concept\" type=\"\"/>" |
|
101 |
+ "<entry id=\"egi::classification::natsc::physc::cryog\" label=\"Cryogenics\" name=\"concept\" type=\"\"/>" |
|
102 |
+ "<entry id=\"egi::classification::natsc::physc::fluid\" label=\"Fluid mechanics\" name=\"concept\" type=\"\"/>" |
|
103 |
+ "<entry id=\"egi::classification::natsc::physc::fusi\" label=\"Fusion\" name=\"concept\" type=\"\"/>" |
|
104 |
+ "<entry id=\"egi::classification::natsc::physc::hene\" label=\"High energy physics\" name=\"concept\" type=\"\"/>" |
|
105 |
+ "<entry id=\"egi::classification::natsc::physc::mathp\" label=\"Mathematical physics\" name=\"concept\" type=\"\"/>" |
|
106 |
+ "<entry id=\"egi::classification::natsc::physc::medip\" label=\"Medical physics\" name=\"concept\" type=\"\"/>" |
|
107 |
+ "<entry id=\"egi::classification::natsc::physc::molep\" label=\"Molecular physics\" name=\"concept\" type=\"\"/>" |
|
108 |
+ "<entry id=\"egi::classification::natsc::physc::nuclp\" label=\"Nuclear physics\" name=\"concept\" type=\"\"/>" |
|
109 |
+ "<entry id=\"egi::classification::natsc::physc::optc\" label=\"Optics\" name=\"concept\" type=\"\"/>" |
|
110 |
+ "<entry id=\"egi::classification::natsc::physc::partp\" label=\"Particle physics\" name=\"concept\" type=\"\"/>" |
|
111 |
+ "<entry id=\"egi::classification::natsc::physc::phys\" label=\"Physics\" name=\"concept\" type=\"\"/>" |
|
112 |
+ "<entry id=\"egi::classification::natsc::physc::plans\" label=\"Planetary science\" name=\"concept\" type=\"\"/>" |
|
113 |
+ "<entry id=\"egi::classification::natsc::physc::plsm\" label=\"Plasma physics\" name=\"concept\" type=\"\"/>" |
|
114 |
+ "<entry id=\"egi::classification::natsc::physc::spac\" label=\"Space physics\" name=\"concept\" type=\"\"/>" |
|
115 |
+ "<entry id=\"egi::classification::natsc::physc::quant\" label=\"Quantum physics\" name=\"concept\" type=\"\"/>" |
|
116 |
+ "<entry id=\"egi::classification::natsc::physc::other\" label=\"Other\" name=\"concept\" type=\"\"/>" |
|
117 |
+ "<entry id=\"egi::classification::natsc::chesc\" label=\"Chemical sciences\" name=\"concept\" type=\"\"/>" |
|
118 |
+ "<entry id=\"egi::classification::natsc::chesc::analc\" label=\"Analytical chemistry\" name=\"concept\" type=\"\"/>" |
|
119 |
+ "<entry id=\"egi::classification::natsc::chesc::chem\" label=\"Chemistry\" name=\"concept\" type=\"\"/>" |
|
120 |
+ "<entry id=\"egi::classification::natsc::chesc::collc\" label=\"Colloid chemistry\" name=\"concept\" type=\"\"/>" |
|
121 |
+ "<entry id=\"egi::classification::natsc::chesc::compc\" label=\"Computational chemistry\" name=\"concept\" type=\"\"/>" |
|
122 |
+ "<entry id=\"egi::classification::natsc::chesc::elecc\" label=\"Electrochemistry\" name=\"concept\" type=\"\"/>" |
|
123 |
+ "<entry id=\"egi::classification::natsc::chesc::innuc\" label=\"Inorganic and nuclear chemistry\" name=\"concept\" type=\"\"/>" |
|
124 |
+ "<entry id=\"egi::classification::natsc::chesc::mathc\" label=\"Mathematical chemistry\" name=\"concept\" type=\"\"/>" |
|
125 |
+ "<entry id=\"egi::classification::natsc::chesc::orgc\" label=\"Organic chemistry\" name=\"concept\" type=\"\"/>" |
|
126 |
+ "<entry id=\"egi::classification::natsc::chesc::phyc\" label=\"Physical chemistry\" name=\"concept\" type=\"\"/>" |
|
127 |
+ "<entry id=\"egi::classification::natsc::chesc::polc\" label=\"Polymer chemistry\" name=\"concept\" type=\"\"/>" |
|
128 |
+ "<entry id=\"egi::classification::natsc::chesc::other\" label=\"Other\" name=\"concept\" type=\"\"/>" |
|
129 |
+ "<entry id=\"egi::classification::engt\" label=\"Engineering and Technology\" name=\"concept\" type=\"\"/>" |
|
130 |
+ "<entry id=\"egi::classification::engt::cieng\" label=\"Civil engineering\" name=\"concept\" type=\"\"/>" |
|
131 |
+ "<entry id=\"egi::classification::engt::civeng::arche\" label=\"Architecture engineering\" name=\"concept\" type=\"\"/>" |
|
132 |
+ "<entry id=\"egi::classification::engt::civeng::cieng\" label=\"Civil engineering\" name=\"concept\" type=\"\"/>" |
|
133 |
+ "<entry id=\"egi::classification::engt::civeng::cipro\" label=\"Civil protection\" name=\"concept\" type=\"\"/>" |
|
134 |
+ "<entry id=\"egi::classification::engt::civeng::costr\" label=\"Construction/Structural engineering\" name=\"concept\" type=\"\"/>" |
|
135 |
+ "<entry id=\"egi::classification::engt::civeng::treng\" label=\"Transport engineering\" name=\"concept\" type=\"\"/>" |
|
136 |
+ "<entry id=\"egi::classification::engt::civeng::other\" label=\"Other\" name=\"concept\" type=\"\"/>" |
|
137 |
+ "<entry id=\"egi::classification::engt::eeie\" label=\"Electrical, electronic and information engineering\" name=\"concept\" type=\"\"/>" |
|
138 |
+ "<entry id=\"egi::classification::engt::eeie::comes\" label=\"Communication engineering and systems\" name=\"concept\" type=\"\"/>" |
|
139 |
+ "<entry id=\"egi::classification::engt::eeie::comha\" label=\"Computer hardware and architecture\" name=\"concept\" type=\"\"/>" |
|
140 |
+ "<entry id=\"egi::classification::engt::eeie::elele\" label=\"Electrical and electronic engineering\" name=\"concept\" type=\"\"/>" |
|
141 |
+ "<entry id=\"egi::classification::engt::eeie::robaut\" label=\"Robotics, Automation and Control Systems\" name=\"concept\" type=\"\"/>" |
|
142 |
+ "<entry id=\"egi::classification::engt::eeie::other\" label=\"Other\" name=\"concept\" type=\"\"/>" |
|
143 |
+ "<entry id=\"egi::classification::engt::meng\" label=\"Mechanical engineering\" name=\"concept\" type=\"\"/>" |
|
144 |
+ "<entry id=\"egi::classification::engt::meng::apmec\" label=\"Applied mechanics\" name=\"concept\" type=\"\"/>" |
|
145 |
+ "<entry id=\"egi::classification::engt::meng::audm\" label=\"Audio engineering\" name=\"concept\" type=\"\"/>" |
|
146 |
+ "<entry id=\"egi::classification::engt::meng::nucl\" label=\"Nuclear related engineering\" name=\"concept\" type=\"\"/>" |
|
147 |
+ "<entry id=\"egi::classification::engt::meng::relia\" label=\"Reliability analysis\" name=\"concept\" type=\"\"/>" |
|
148 |
+ "<entry id=\"egi::classification::engt::meng::therd\" label=\"Thermodynamics\" name=\"concept\" type=\"\"/>" |
|
149 |
+ "<entry id=\"egi::classification::engt::meng::other\" label=\"other\" name=\"concept\" type=\"\"/>" |
|
150 |
+ "<entry id=\"egi::classification::engt::aeros\" label=\"Aerospace engineering\" name=\"concept\" type=\"\"/>" |
|
151 |
+ "<entry id=\"egi::classification::engt::aeros::aeren\" label=\"Aeronautical engineering\" name=\"concept\" type=\"\"/>" |
|
152 |
+ "<entry id=\"egi::classification::engt::aeros::astren\" label=\"Astronautical engineering\" name=\"concept\" type=\"\"/>" |
|
153 |
+ "<entry id=\"egi::classification::engt::aeros::other\" label=\"Other\" name=\"concept\" type=\"\"/>" |
|
154 |
+ "<entry id=\"egi::classification::engt::cheme\" label=\"Chemical engineering\" name=\"concept\" type=\"\"/>" |
|
155 |
+ "<entry id=\"egi::classification::engt::cheme::cheme\" label=\"Chemical engineering\" name=\"concept\" type=\"\"/>" |
|
156 |
+ "<entry id=\"egi::classification::engt::cheme::chempr\" label=\"Chemical process engineering\" name=\"concept\" type=\"\"/>" |
|
157 |
+ "<entry id=\"egi::classification::engt::cheme::other\" label=\"Other\" name=\"concept\" type=\"\"/>" |
|
158 |
+ "<entry id=\"egi::classification::engt::maten\" label=\"Materials engineering\" name=\"concept\" type=\"\"/>" |
|
159 |
+ "<entry id=\"egi::classification::engt::maten::ceram\" label=\"Ceramics\" name=\"concept\" type=\"\"/>" |
|
160 |
+ "<entry id=\"egi::classification::engt::maten::coatf\" label=\"Coating and films\" name=\"concept\" type=\"\"/>" |
|
161 |
+ "<entry id=\"egi::classification::engt::maten::compos\" label=\"Composites\" name=\"concept\" type=\"\"/>" |
|
162 |
+ "<entry id=\"egi::classification::engt::maten::paperw\" label=\"Paper and wood\" name=\"concept\" type=\"\"/>" |
Also available in: Unified diff
import form master branch