Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2

    
3

    
4
import com.google.gson.JsonObject;
5
import com.google.gson.JsonParser;
6
import eu.dnetlib.actionmanager.actions.ActionFactory;
7
import eu.dnetlib.actionmanager.actions.AtomicAction;
8
import eu.dnetlib.actionmanager.common.Agent;
9
import eu.dnetlib.miscutils.datetime.DateUtils;
10
import org.apache.hadoop.io.LongWritable;
11
import org.apache.hadoop.io.Text;
12
import org.apache.hadoop.mapreduce.Mapper;
13

    
14
import java.io.IOException;
15
import java.util.HashMap;
16
import java.util.List;
17
import java.util.Map;
18

    
19
public class ScholexplorerMapper extends Mapper<LongWritable, Text, Text, Text> {
20

    
21
    private ActionFactory factory;
22
    private JsonParser parser;
23
    private String setName;
24
    private Agent agent;
25
    private String nsPrefix;
26
    private String dsName;
27
    private String dsId;
28
    private String dateOfCollection;
29
    private Text keyout;
30
    private Text valueOut;
31
    private Map<String, ScholExplorerConfiguration> configurationMap= new HashMap<>();
32

    
33
    @Override
34
    protected void setup(Context context) throws IOException, InterruptedException {
35
        factory = new ActionFactory();
36
        parser = new JsonParser();
37
        configurationMap.put("issn", new ScholExplorerConfiguration(null, false));
38
        configurationMap.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
39
        configurationMap.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s"));
40
        configurationMap.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
41
        configurationMap.put("openaire ", new ScholExplorerConfiguration(null, false));
42
        configurationMap.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s"));
43
        configurationMap.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
44
        configurationMap.put("icpsr", new ScholExplorerConfiguration(null, false));
45
        configurationMap.put("dnet", new ScholExplorerConfiguration(null, false));
46
        configurationMap.put("url ", new ScholExplorerConfiguration(null, true,"%s"));
47
        configurationMap.put("openaire", new ScholExplorerConfiguration(null, false));
48
        setName = context.getConfiguration().get("setName");
49
        agent= new Agent(context.getConfiguration().get("agentId"), context.getConfiguration().get("agentName"), Agent.AGENT_TYPE.service);
50
        nsPrefix = context.getConfiguration().get("ns_prefix");
51
        dsName = context.getConfiguration().get("dsName");
52
        dsId = context.getConfiguration().get("dsId");
53
        dateOfCollection = context.getConfiguration().get("dateOfCollection", DateUtils.now_ISO8601());
54
        keyout = new Text("");
55
        valueOut = new Text("");
56

    
57

    
58
    }
59

    
60
    @Override
61
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
62

    
63
        final String inputJson =value.toString();
64
        final JsonObject rootElement = parser.parse(inputJson).getAsJsonObject();
65
        final List<AtomicAction> actions = ScholixToActions.generateActionsFromScholix(rootElement, configurationMap, setName,agent, factory, nsPrefix, dsName, dsId, dateOfCollection);
66
        for(final AtomicAction action : actions) {
67
            keyout.set(action.getRowKey());
68
            valueOut.set(action.toJSON());
69
            context.write(keyout, valueOut);
70
        }
71
    }
72

    
73

    
74

    
75

    
76

    
77

    
78

    
79

    
80
}
(6-6/8)