Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2

    
3

    
4
import com.google.gson.JsonObject;
5
import com.google.gson.JsonParser;
6
import eu.dnetlib.actionmanager.actions.ActionFactory;
7
import eu.dnetlib.actionmanager.actions.AtomicAction;
8
import eu.dnetlib.actionmanager.common.Agent;
9
import eu.dnetlib.miscutils.datetime.DateUtils;
10
import org.apache.hadoop.io.Text;
11
import org.apache.hadoop.mapreduce.Mapper;
12

    
13
import java.io.IOException;
14
import java.util.HashMap;
15
import java.util.List;
16
import java.util.Map;
17

    
18
public class ScholexplorerMapper extends Mapper<Text, Text, Text, Text> {
19

    
20
    private ActionFactory factory;
21
    private JsonParser parser;
22
    private String setName;
23
    private Agent agent;
24
    private String nsPrefix;
25
    private String dsName;
26
    private String dsId;
27
    private String dateOfCollection;
28
    private Text keyout;
29
    private Text valueOut;
30
    private Map<String, ScholExplorerConfiguration> configurationMap= new HashMap<>();
31

    
32
    @Override
33
    protected void setup(Context context) throws IOException, InterruptedException {
34
        factory = new ActionFactory();
35
        parser = new JsonParser();
36
        configurationMap.put("issn", new ScholExplorerConfiguration(null, false));
37
        configurationMap.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
38
        configurationMap.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s"));
39
        configurationMap.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
40
        configurationMap.put("openaire ", new ScholExplorerConfiguration(null, false));
41
        configurationMap.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s"));
42
        configurationMap.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
43
        configurationMap.put("icpsr", new ScholExplorerConfiguration(null, false));
44
        configurationMap.put("dnet", new ScholExplorerConfiguration(null, false));
45
        configurationMap.put("url ", new ScholExplorerConfiguration(null, true,"%s"));
46
        configurationMap.put("openaire", new ScholExplorerConfiguration(null, false));
47
        setName = context.getConfiguration().get("setName");
48
        agent= new Agent(context.getConfiguration().get("agentId"), context.getConfiguration().get("agentName"), Agent.AGENT_TYPE.service);
49
        nsPrefix = context.getConfiguration().get("ns_prefix");
50
        dsName = context.getConfiguration().get("dsName");
51
        dsId = context.getConfiguration().get("dsId");
52
        dateOfCollection = context.getConfiguration().get("dateOfCollection", DateUtils.now_ISO8601());
53
        keyout = new Text("");
54
        valueOut = new Text("");
55

    
56

    
57
    }
58

    
59
    @Override
60
    protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
61

    
62
        final String inputJson =value.toString();
63
        final JsonObject rootElement = parser.parse(inputJson).getAsJsonObject();
64
        final List<AtomicAction> actions = ScholixToActions.generateActionsFromScholix(rootElement, configurationMap, setName,agent, factory, nsPrefix, dsName, dsId, dateOfCollection);
65
        for(final AtomicAction action : actions) {
66
            keyout.set(action.getRowKey());
67
            valueOut.set(action.toJSON());
68
            context.write(keyout, valueOut);
69
        }
70
    }
71

    
72

    
73

    
74

    
75

    
76

    
77

    
78

    
79
}
(6-6/8)