Project

General

Profile

« Previous | Next » 

Revision 52931

small fixes

View differences:

ScholexplorerMapper.java
1 1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2 2

  
3
import java.io.IOException;
4
import java.util.HashMap;
5
import java.util.List;
6
import java.util.Map;
3 7

  
4 8
import com.google.gson.JsonObject;
5 9
import com.google.gson.JsonParser;
......
7 11
import eu.dnetlib.actionmanager.actions.AtomicAction;
8 12
import eu.dnetlib.actionmanager.common.Agent;
9 13
import eu.dnetlib.miscutils.datetime.DateUtils;
14
import org.apache.hadoop.io.LongWritable;
10 15
import org.apache.hadoop.io.Text;
11 16
import org.apache.hadoop.mapreduce.Mapper;
12 17

  
13
import java.io.IOException;
14
import java.util.HashMap;
15
import java.util.List;
16
import java.util.Map;
18
public class ScholexplorerMapper extends Mapper<LongWritable, Text, Text, Text> {
17 19

  
18
public class ScholexplorerMapper extends Mapper<Text, Text, Text, Text> {
19

  
20 20
    private ActionFactory factory;
21 21
    private JsonParser parser;
22 22
    private String setName;
......
27 27
    private String dateOfCollection;
28 28
    private Text keyout;
29 29
    private Text valueOut;
30
    private Map<String, ScholExplorerConfiguration> configurationMap= new HashMap<>();
30
    private Map<String, ScholExplorerConfiguration> conf = new HashMap<>();
31 31

  
32 32
    @Override
33 33
    protected void setup(Context context) throws IOException, InterruptedException {
34 34
        factory = new ActionFactory();
35 35
        parser = new JsonParser();
36
        configurationMap.put("issn", new ScholExplorerConfiguration(null, false));
37
        configurationMap.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
38
        configurationMap.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s"));
39
        configurationMap.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
40
        configurationMap.put("openaire ", new ScholExplorerConfiguration(null, false));
41
        configurationMap.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s"));
42
        configurationMap.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
43
        configurationMap.put("icpsr", new ScholExplorerConfiguration(null, false));
44
        configurationMap.put("dnet", new ScholExplorerConfiguration(null, false));
45
        configurationMap.put("url ", new ScholExplorerConfiguration(null, true,"%s"));
46
        configurationMap.put("openaire", new ScholExplorerConfiguration(null, false));
36
        conf.put("issn", new ScholExplorerConfiguration(null, false));
37
        conf.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
38
        conf.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s"));
39
        conf.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
40
        conf.put("openaire ", new ScholExplorerConfiguration(null, false));
41
        conf.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s"));
42
        conf.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
43
        conf.put("icpsr", new ScholExplorerConfiguration(null, false));
44
        conf.put("dnet", new ScholExplorerConfiguration(null, false));
45
        conf.put("url ", new ScholExplorerConfiguration(null, true,"%s"));
46
        conf.put("openaire", new ScholExplorerConfiguration(null, false));
47 47
        setName = context.getConfiguration().get("setName");
48 48
        agent= new Agent(context.getConfiguration().get("agentId"), context.getConfiguration().get("agentName"), Agent.AGENT_TYPE.service);
49 49
        nsPrefix = context.getConfiguration().get("ns_prefix");
50 50
        dsName = context.getConfiguration().get("dsName");
51 51
        dsId = context.getConfiguration().get("dsId");
52 52
        dateOfCollection = context.getConfiguration().get("dateOfCollection", DateUtils.now_ISO8601());
53

  
53 54
        keyout = new Text("");
54 55
        valueOut = new Text("");
55

  
56

  
57 56
    }
58 57

  
59 58
    @Override
60
    protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
59
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
61 60

  
62
        final String inputJson =value.toString();
61
        final String inputJson = value.toString();
63 62
        final JsonObject rootElement = parser.parse(inputJson).getAsJsonObject();
64
        final List<AtomicAction> actions = ScholixToActions.generateActionsFromScholix(rootElement, configurationMap, setName,agent, factory, nsPrefix, dsName, dsId, dateOfCollection);
63
        final List<AtomicAction> actions = ScholixToActions.generateActionsFromScholix(rootElement, conf, setName,agent, factory, nsPrefix, dsName, dsId, dateOfCollection);
65 64
        for(final AtomicAction action : actions) {
66 65
            keyout.set(action.getRowKey());
67 66
            valueOut.set(action.toJSON());
......
69 68
        }
70 69
    }
71 70

  
72

  
73

  
74

  
75

  
76

  
77

  
78

  
79 71
}

Also available in: Unified diff