Project

General

Profile

1
package eu.dnetlib.data.mapreduce.actions;
2

    
3
import java.io.BufferedReader;
4
import java.io.IOException;
5
import java.io.InputStream;
6
import java.io.InputStreamReader;
7
import java.util.HashMap;
8
import java.util.List;
9
import java.util.Map;
10

    
11
import com.google.gson.JsonObject;
12
import com.google.gson.JsonParser;
13
import eu.dnetlib.actionmanager.actions.ActionFactory;
14
import eu.dnetlib.actionmanager.actions.AtomicAction;
15
import eu.dnetlib.actionmanager.common.Agent;
16
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholExplorerConfiguration;
17
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholixToActions;
18
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
19
import eu.dnetlib.miscutils.datetime.DateUtils;
20
import org.apache.commons.lang3.StringUtils;
21
import org.junit.Assert;
22
import org.junit.Before;
23
import org.junit.Test;
24

    
25
import static org.junit.Assert.assertEquals;
26

    
27
public class ScholexplorerActionMapperTest {
28

    
29
    private Map<String, ScholExplorerConfiguration> configurationMap;
30
    private String setName;
31
    private Agent agent;
32
    private String nsPrefix;
33
    private String dsName;
34
    private String dsId;
35

    
36

    
37

    
38
    @Before
39
    public void initializeCofiguration(){
40
        configurationMap = new HashMap<>();
41
        configurationMap.put("issn", new ScholExplorerConfiguration(null, false));
42
        configurationMap.put("openaire", new ScholExplorerConfiguration(null, false));
43
        configurationMap.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
44
        configurationMap.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s"));
45
        configurationMap.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
46
        configurationMap.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s"));
47
        configurationMap.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
48
        configurationMap.put("icpsr", new ScholExplorerConfiguration(null, false));
49
        configurationMap.put("dnet", new ScholExplorerConfiguration(null, false));
50
        configurationMap.put("url", new ScholExplorerConfiguration(null, true,"%s"));
51

    
52
        setName = "DLI";
53
        agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service);
54
        nsPrefix = "scholexplore";
55
        dsName = "ScholExplorer";
56
        dsId = "scholexplorer";
57
    }
58

    
59
    @Test
60
    public void testSubString () {
61
        final String dnetId ="50|dli_resolver::7b7b9a57a40818d10cf2532d71f012fa";
62
        assertEquals("7b7b9a57a40818d10cf2532d71f012fa", dnetId.substring(17));
63

    
64
        System.out.println(AbstractDNetXsltFunctions.md5("SNSF - Swiss National Science Foundation"));
65
    }
66

    
67
    @Test
68
    public void testSingleScholixAction() throws IOException {
69
        doTestSingleScholixAction("/eu/dnetlib/data/mapreduce/actions/part-00000");
70
    }
71

    
72
    @Test
73
    public void testSingleScholixAction2() throws IOException {
74
        doTestSingleScholixAction("/eu/dnetlib/data/mapreduce/actions/scholix.json");
75
    }
76

    
77
    private void doTestSingleScholixAction(final String filePath) throws IOException {
78
        final InputStream is = this.getClass().getResourceAsStream(filePath);
79
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
80
        //        in.readLine();
81
        //        in.readLine();
82
        String line = in.readLine();
83
        System.out.println(line);
84
        final JsonParser parser = new JsonParser();
85
        JsonObject root = parser.parse(line).getAsJsonObject();
86
        List<AtomicAction> actions = ScholixToActions
87
                .generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601());
88
        actions.forEach(it-> System.out.println(String.format("%s    cf:%s    qualifier:%s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
89

    
90
        System.out.println(actions.get(0).toJSON());
91
    }
92

    
93
    @Test
94
    public void testScholixAction() throws IOException {
95

    
96
        doTestMultipleScholixActions("/eu/dnetlib/data/mapreduce/actions/part-00000");
97
    }
98

    
99
    @Test
100
    public void testScholixAction2() throws IOException {
101

    
102
        doTestMultipleScholixActions("/eu/dnetlib/data/mapreduce/actions/scholix.json");
103
    }
104

    
105
    private void doTestMultipleScholixActions(final String filePath) throws IOException {
106
        final InputStream is = this.getClass().getResourceAsStream(filePath);
107
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
108

    
109
        String line = in.readLine();
110
        while (StringUtils.isNotEmpty(line)){
111
            final JsonParser parser = new JsonParser();
112
            JsonObject root = parser.parse(line).getAsJsonObject();
113
            try {
114
                List<AtomicAction> actions = ScholixToActions
115
                        .generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601());
116
//                Assert.assertNotNull(actions);
117
//                Assert.assertTrue(actions.size() > 0);
118
            } catch (Throwable e) {
119
                System.out.println(line);
120
                throw (new RuntimeException(e));
121
            }
122
            line = in.readLine();
123
        }
124
    }
125
}
(4-4/4)