1 |
52912
|
sandro.lab
|
package eu.dnetlib.data.mapreduce.actions;
|
2 |
|
|
|
3 |
52935
|
claudio.at
|
import java.io.BufferedReader;
|
4 |
|
|
import java.io.IOException;
|
5 |
|
|
import java.io.InputStream;
|
6 |
|
|
import java.io.InputStreamReader;
|
7 |
|
|
import java.util.HashMap;
|
8 |
|
|
import java.util.List;
|
9 |
|
|
import java.util.Map;
|
10 |
|
|
|
11 |
52912
|
sandro.lab
|
import com.google.gson.JsonObject;
|
12 |
|
|
import com.google.gson.JsonParser;
|
13 |
|
|
import eu.dnetlib.actionmanager.actions.ActionFactory;
|
14 |
|
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
15 |
|
|
import eu.dnetlib.actionmanager.common.Agent;
|
16 |
|
|
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholExplorerConfiguration;
|
17 |
|
|
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholixToActions;
|
18 |
|
|
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
|
19 |
|
|
import eu.dnetlib.miscutils.datetime.DateUtils;
|
20 |
|
|
import org.apache.commons.lang3.StringUtils;
|
21 |
|
|
import org.junit.Assert;
|
22 |
|
|
import org.junit.Before;
|
23 |
|
|
import org.junit.Test;
|
24 |
|
|
|
25 |
|
|
import static org.junit.Assert.assertEquals;
|
26 |
|
|
|
27 |
|
|
public class ScholexplorerActionMapperTest {
|
28 |
|
|
|
29 |
|
|
private Map<String, ScholExplorerConfiguration> configurationMap;
|
30 |
|
|
private String setName;
|
31 |
|
|
private Agent agent;
|
32 |
|
|
private String nsPrefix;
|
33 |
|
|
private String dsName;
|
34 |
|
|
private String dsId;
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
|
38 |
|
|
@Before
|
39 |
|
|
public void initializeCofiguration(){
|
40 |
52935
|
claudio.at
|
configurationMap = new HashMap<>();
|
41 |
52912
|
sandro.lab
|
configurationMap.put("issn", new ScholExplorerConfiguration(null, false));
|
42 |
|
|
configurationMap.put("openaire", new ScholExplorerConfiguration(null, false));
|
43 |
|
|
configurationMap.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
|
44 |
|
|
configurationMap.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s"));
|
45 |
|
|
configurationMap.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
|
46 |
|
|
configurationMap.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s"));
|
47 |
|
|
configurationMap.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
|
48 |
|
|
configurationMap.put("icpsr", new ScholExplorerConfiguration(null, false));
|
49 |
|
|
configurationMap.put("dnet", new ScholExplorerConfiguration(null, false));
|
50 |
52935
|
claudio.at
|
configurationMap.put("url", new ScholExplorerConfiguration(null, true,"%s"));
|
51 |
52912
|
sandro.lab
|
|
52 |
|
|
setName = "DLI";
|
53 |
|
|
agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service);
|
54 |
|
|
nsPrefix = "scholexplore";
|
55 |
|
|
dsName = "ScholExplorer";
|
56 |
|
|
dsId = "scholexplorer";
|
57 |
|
|
}
|
58 |
|
|
|
59 |
|
|
@Test
|
60 |
|
|
public void testSubString () {
|
61 |
|
|
final String dnetId ="50|dli_resolver::7b7b9a57a40818d10cf2532d71f012fa";
|
62 |
|
|
assertEquals("7b7b9a57a40818d10cf2532d71f012fa", dnetId.substring(17));
|
63 |
|
|
|
64 |
|
|
System.out.println(AbstractDNetXsltFunctions.md5("SNSF - Swiss National Science Foundation"));
|
65 |
|
|
}
|
66 |
|
|
|
67 |
|
|
@Test
|
68 |
|
|
public void testSingleScholixAction() throws IOException {
|
69 |
52935
|
claudio.at
|
doTestSingleScholixAction("/eu/dnetlib/data/mapreduce/actions/part-00000");
|
70 |
|
|
}
|
71 |
52912
|
sandro.lab
|
|
72 |
52935
|
claudio.at
|
@Test
|
73 |
|
|
public void testSingleScholixAction2() throws IOException {
|
74 |
|
|
doTestSingleScholixAction("/eu/dnetlib/data/mapreduce/actions/scholix.json");
|
75 |
|
|
}
|
76 |
|
|
|
77 |
|
|
private void doTestSingleScholixAction(final String filePath) throws IOException {
|
78 |
|
|
final InputStream is = this.getClass().getResourceAsStream(filePath);
|
79 |
52912
|
sandro.lab
|
final BufferedReader in = new BufferedReader(new InputStreamReader(is));
|
80 |
52935
|
claudio.at
|
// in.readLine();
|
81 |
|
|
// in.readLine();
|
82 |
52912
|
sandro.lab
|
String line = in.readLine();
|
83 |
|
|
System.out.println(line);
|
84 |
|
|
final JsonParser parser = new JsonParser();
|
85 |
|
|
JsonObject root = parser.parse(line).getAsJsonObject();
|
86 |
52935
|
claudio.at
|
List<AtomicAction> actions = ScholixToActions
|
87 |
|
|
.generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601());
|
88 |
52912
|
sandro.lab
|
actions.forEach(it-> System.out.println(String.format("%s cf:%s qualifier:%s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
|
89 |
|
|
|
90 |
52916
|
sandro.lab
|
System.out.println(actions.get(0).toJSON());
|
91 |
52912
|
sandro.lab
|
}
|
92 |
|
|
|
93 |
|
|
@Test
|
94 |
|
|
public void testScholixAction() throws IOException {
|
95 |
|
|
|
96 |
52935
|
claudio.at
|
doTestMultipleScholixActions("/eu/dnetlib/data/mapreduce/actions/part-00000");
|
97 |
|
|
}
|
98 |
|
|
|
99 |
|
|
@Test
|
100 |
|
|
public void testScholixAction2() throws IOException {
|
101 |
|
|
|
102 |
|
|
doTestMultipleScholixActions("/eu/dnetlib/data/mapreduce/actions/scholix.json");
|
103 |
|
|
}
|
104 |
|
|
|
105 |
|
|
private void doTestMultipleScholixActions(final String filePath) throws IOException {
|
106 |
|
|
final InputStream is = this.getClass().getResourceAsStream(filePath);
|
107 |
52912
|
sandro.lab
|
final BufferedReader in = new BufferedReader(new InputStreamReader(is));
|
108 |
|
|
|
109 |
|
|
String line = in.readLine();
|
110 |
|
|
while (StringUtils.isNotEmpty(line)){
|
111 |
|
|
final JsonParser parser = new JsonParser();
|
112 |
|
|
JsonObject root = parser.parse(line).getAsJsonObject();
|
113 |
|
|
try {
|
114 |
52935
|
claudio.at
|
List<AtomicAction> actions = ScholixToActions
|
115 |
|
|
.generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601());
|
116 |
52912
|
sandro.lab
|
Assert.assertNotNull(actions);
|
117 |
|
|
Assert.assertTrue(actions.size() > 0);
|
118 |
|
|
} catch (Throwable e) {
|
119 |
|
|
System.out.println(line);
|
120 |
|
|
throw (new RuntimeException(e));
|
121 |
|
|
}
|
122 |
|
|
line = in.readLine();
|
123 |
|
|
}
|
124 |
|
|
}
|
125 |
|
|
}
|