1
|
package eu.dnetlib.data.mapreduce.actions;
|
2
|
|
3
|
import com.google.gson.JsonObject;
|
4
|
import com.google.gson.JsonParser;
|
5
|
import eu.dnetlib.actionmanager.actions.ActionFactory;
|
6
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
7
|
import eu.dnetlib.actionmanager.common.Agent;
|
8
|
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholExplorerConfiguration;
|
9
|
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholixToActions;
|
10
|
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
|
11
|
import eu.dnetlib.miscutils.datetime.DateUtils;
|
12
|
import org.apache.commons.lang3.StringUtils;
|
13
|
import org.junit.Assert;
|
14
|
import org.junit.Before;
|
15
|
import org.junit.Test;
|
16
|
|
17
|
import java.io.BufferedReader;
|
18
|
import java.io.IOException;
|
19
|
import java.io.InputStream;
|
20
|
import java.io.InputStreamReader;
|
21
|
import java.util.HashMap;
|
22
|
import java.util.List;
|
23
|
import java.util.Map;
|
24
|
|
25
|
import static org.junit.Assert.assertEquals;
|
26
|
|
27
|
public class ScholexplorerActionMapperTest {
|
28
|
|
29
|
private Map<String, ScholExplorerConfiguration> configurationMap;
|
30
|
private String setName;
|
31
|
private Agent agent;
|
32
|
private String nsPrefix;
|
33
|
private String dsName;
|
34
|
private String dsId;
|
35
|
|
36
|
|
37
|
|
38
|
@Before
|
39
|
public void initializeCofiguration(){
|
40
|
configurationMap= new HashMap<>();
|
41
|
configurationMap.put("issn", new ScholExplorerConfiguration(null, false));
|
42
|
configurationMap.put("openaire", new ScholExplorerConfiguration(null, false));
|
43
|
configurationMap.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
|
44
|
configurationMap.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s"));
|
45
|
configurationMap.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
|
46
|
configurationMap.put("openaire ", new ScholExplorerConfiguration(null, false));
|
47
|
configurationMap.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s"));
|
48
|
configurationMap.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
|
49
|
configurationMap.put("icpsr", new ScholExplorerConfiguration(null, false));
|
50
|
configurationMap.put("dnet", new ScholExplorerConfiguration(null, false));
|
51
|
configurationMap.put("url ", new ScholExplorerConfiguration(null, true,"%s"));
|
52
|
|
53
|
setName = "DLI";
|
54
|
agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service);
|
55
|
nsPrefix = "scholexplore";
|
56
|
dsName = "ScholExplorer";
|
57
|
dsId = "scholexplorer";
|
58
|
}
|
59
|
|
60
|
@Test
|
61
|
public void testSubString () {
|
62
|
final String dnetId ="50|dli_resolver::7b7b9a57a40818d10cf2532d71f012fa";
|
63
|
assertEquals("7b7b9a57a40818d10cf2532d71f012fa", dnetId.substring(17));
|
64
|
|
65
|
System.out.println(AbstractDNetXsltFunctions.md5("SNSF - Swiss National Science Foundation"));
|
66
|
}
|
67
|
|
68
|
|
69
|
|
70
|
|
71
|
@Test
|
72
|
public void testSingleScholixAction() throws IOException {
|
73
|
|
74
|
final InputStream is = this.getClass().getResourceAsStream("/eu/dnetlib/data/mapreduce/actions/part-00000");
|
75
|
final BufferedReader in = new BufferedReader(new InputStreamReader(is));
|
76
|
// in.readLine();
|
77
|
// in.readLine();
|
78
|
String line = in.readLine();
|
79
|
System.out.println(line);
|
80
|
final JsonParser parser = new JsonParser();
|
81
|
JsonObject root = parser.parse(line).getAsJsonObject();
|
82
|
List<AtomicAction> actions = ScholixToActions.generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601());
|
83
|
actions.forEach(it-> System.out.println(String.format("%s cf:%s qualifier:%s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
|
84
|
|
85
|
}
|
86
|
|
87
|
@Test
|
88
|
public void testScholixAction() throws IOException {
|
89
|
|
90
|
final InputStream is = this.getClass().getResourceAsStream("/eu/dnetlib/data/mapreduce/actions/part-00000");
|
91
|
final BufferedReader in = new BufferedReader(new InputStreamReader(is));
|
92
|
|
93
|
String line = in.readLine();
|
94
|
while (StringUtils.isNotEmpty(line)){
|
95
|
final JsonParser parser = new JsonParser();
|
96
|
JsonObject root = parser.parse(line).getAsJsonObject();
|
97
|
try {
|
98
|
List<AtomicAction> actions = ScholixToActions.generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601());
|
99
|
Assert.assertNotNull(actions);
|
100
|
Assert.assertTrue(actions.size() > 0);
|
101
|
} catch (Throwable e) {
|
102
|
System.out.println(line);
|
103
|
throw (new RuntimeException(e));
|
104
|
}
|
105
|
line = in.readLine();
|
106
|
}
|
107
|
}
|
108
|
}
|