Project

General

Profile

« Previous | Next » 

Revision 52931

small fixes

View differences:

modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataimport/ScholixToActions.java
1 1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2 2

  
3
import java.util.ArrayList;
4
import java.util.List;
5
import java.util.Map;
6
import java.util.Objects;
7

  
3 8
import com.google.gson.JsonArray;
4 9
import com.google.gson.JsonObject;
5
import com.googlecode.protobuf.format.JsonFormat;
6 10
import eu.dnetlib.actionmanager.actions.ActionFactory;
7 11
import eu.dnetlib.actionmanager.actions.AtomicAction;
8 12
import eu.dnetlib.actionmanager.common.Agent;
......
11 15
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
12 16
import org.apache.commons.lang3.StringUtils;
13 17

  
14
import java.util.ArrayList;
15
import java.util.List;
16
import java.util.Map;
17
import java.util.Objects;
18

  
19 18
import static eu.dnetlib.data.proto.FieldTypeProtos.*;
20 19
import static eu.dnetlib.data.proto.KindProtos.Kind;
21 20
import static eu.dnetlib.data.proto.OafProtos.*;
......
26 25

  
27 26
public class ScholixToActions {
28 27

  
29
    public static List<AtomicAction> generateActionsFromScholix(final JsonObject rootElement, final Map<String, ScholExplorerConfiguration> configurationMap,
28
    public static List<AtomicAction> generateActionsFromScholix(final JsonObject rootElement, final Map<String, ScholExplorerConfiguration> conf,
30 29
                                                                final String setName, final Agent agent, ActionFactory factory, String nsPrefix, final String dsName,
31 30
                                                                final String dsId, String dateOfCollection) {
32 31

  
......
40 39
        final JsonObject localIdentifier = rootElement.getAsJsonArray("localIdentifier").get(0).getAsJsonObject();
41 40
        final String dnetId = getStringValue(rootElement, "id").substring(17);
42 41

  
43

  
44
        String title;
42
        String title = "";
45 43
        if (rootElement.has("title") && rootElement.get("title").isJsonArray()) {
46 44
            StringBuilder ttl = new StringBuilder();
47 45
            getArrayValues(rootElement, "title").forEach(ttl::append);
......
50 48
            title = getStringValue(rootElement, "title");
51 49
        }
52 50

  
53
        if (title != null && title.charAt(0) == '"' && title.charAt(title.length() - 1) == '"')
51
        if (title != null && title.charAt(0) == '"' && title.charAt(title.length() - 1) == '"') {
54 52
            title = title.substring(1, title.length() - 1);
53
        }
55 54

  
56

  
57 55
        final Oaf.Builder oafBuilder = Oaf.newBuilder();
58
        final boolean isVisible = title != null && configurationMap.get(getStringValue(localIdentifier, "type")).isVisible();
56
        final boolean isVisible = StringUtils.isNotBlank(title) && conf.get(getStringValue(localIdentifier, "type")).isVisible();
59 57
        oafBuilder.setDataInfo(
60 58
                DataInfo.newBuilder()
61 59
                        .setInvisible(!isVisible)
......
65 63
                        .setProvenanceaction(getQualifier("sysimport:actionset", "dnet:provenanceActions"))
66 64
                        .build());
67 65
        oafBuilder.setKind(Kind.entity);
68
        final OafEntity.Builder oafEntityBuilder = OafEntity.newBuilder();
69
        oafEntityBuilder.setType(Type.result);
70 66
        final String sourceId = String.format("50|%s::%s", nsPrefix, dnetId);
71
        oafEntityBuilder.setId(sourceId);
72
        final StructuredProperty pid = getPid(localIdentifier, configurationMap);
73
        if (pid != null)
67
        final KeyValue collectedFrom = KeyValue.newBuilder()
68
                .setValue(dsName)
69
                .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(dsId))
70
                .build();
71
        final OafEntity.Builder oafEntityBuilder = OafEntity.newBuilder()
72
                .setType(Type.result)
73
                .setDateofcollection(dateOfCollection)
74
                .addCollectedfrom(collectedFrom)
75
                .setId(sourceId);
76

  
77
        final StructuredProperty pid = getPid(localIdentifier, conf);
78
        if (pid != null) {
74 79
            oafEntityBuilder.addPid(pid);
80
        }
75 81
        final Result.Builder result = Result.newBuilder();
76
        final Metadata.Builder metadata = Metadata.newBuilder();
77
        metadata.setResulttype(getQualifier(typology, "dnet:result_typologies"));
78
        metadata.setLanguage(Qualifier.newBuilder()
82

  
83
        final Metadata.Builder metadata = Metadata.newBuilder()
84
            .setResulttype(getQualifier(typology, "dnet:result_typologies"))
85
            .setLanguage(Qualifier.newBuilder()
79 86
                .setClassid("und")
80 87
                .setClassname("Undetermined")
81 88
                .setSchemeid("dent:languages")
82 89
                .setSchemename("dent:languages")
83 90
                .build());
84
        if (title!= null)
85
        metadata.addTitle(StructuredProperty.newBuilder()
86
                .setValue(title)
87
                .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
88
                .build());
91
        if (title != null) {
92
            metadata.addTitle(StructuredProperty.newBuilder()
93
                    .setValue(title)
94
                    .setQualifier(getQualifier("main title", "dnet:dataCite_title"))
95
                    .build());
96
        }
89 97
        if (publisher.size() > 0)
90 98
            metadata.setPublisher(StringField.newBuilder().setValue(publisher.get(0)).build());
91 99
        if (StringUtils.isNotEmpty(abstractValue)) {
......
127 135
        }
128 136
        result.setMetadata(metadata.build());
129 137

  
130
        KeyValue collectedFrom = KeyValue.newBuilder()
131
                .setValue(dsName)
132
                .setKey("10|openaire____::" + AbstractDNetXsltFunctions.md5(dsId))
133
                .build();
134
        oafEntityBuilder.addCollectedfrom(collectedFrom);
135

  
136
        oafEntityBuilder.setDateofcollection(dateOfCollection);
137

  
138 138
        final String pidType = getStringValue(localIdentifier, "type");
139
        final ScholExplorerConfiguration currentConfiguration = configurationMap.get(pidType);
140
        if (currentConfiguration.getGeneratedUrl() != null) {
139
        final ScholExplorerConfiguration currentConf = conf.get(pidType);
140
        if (currentConf.getGeneratedUrl() != null) {
141 141
            final Instance.Builder instance = Instance.newBuilder();
142 142
            final String pidValue = getStringValue(localIdentifier, "id");
143
            instance.addUrl(String.format(currentConfiguration.getGeneratedUrl(), pidValue));
143
            instance.addUrl(String.format(currentConf.getGeneratedUrl(), pidValue));
144 144
            instance.setAccessright(Qualifier.newBuilder()
145 145
                    .setClassid("UNKNOWN")
146 146
                    .setClassname("not available")
......
214 214
        ResultResult.Builder resultInverseRel = ResultResult.newBuilder();
215 215
        String relClass;
216 216
        String inverseRelClass;
217

  
217 218
        switch (relationSemantic) {
218 219
            case "isSupplementedBy": {
219 220
                cfRelation = "resultResult_supplement_isSupplementedBy";
......
264 265
            }
265 266
        }
266 267

  
267
        List<AtomicAction> actions = new ArrayList<>();
268
        final List<AtomicAction> actions = new ArrayList<>();
268 269
        actions.add(createResultResultRelation(sourceId, targetId, collectedFrom, resultRel.build(), relClass, cfRelation, factory, setName, agent));
269 270
        actions.add(createResultResultRelation(targetId, sourceId, collectedFrom, resultInverseRel.build(), inverseRelClass, cfInverseRelation, factory, setName, agent));
270 271

  
......
293 294
        return result;
294 295
    }
295 296

  
296

  
297 297
    private static List<JsonObject> extractRelations(final JsonObject rootElement, final String fieldType) {
298 298
        final List<JsonObject> result = new ArrayList<>();
299 299
        if (rootElement.has(fieldType) && rootElement.get(fieldType).isJsonArray()) {
......
303 303
        return result;
304 304
    }
305 305

  
306

  
307 306
    private static Qualifier getQualifier(final String classValue, final String schemeValue) {
308 307

  
309 308
        return Qualifier.newBuilder()
modules/dnet-mapreduce-jobs/branches/beta/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataimport/ScholexplorerMapper.java
1 1
package eu.dnetlib.data.mapreduce.hbase.dataimport;
2 2

  
3
import java.io.IOException;
4
import java.util.HashMap;
5
import java.util.List;
6
import java.util.Map;
3 7

  
4 8
import com.google.gson.JsonObject;
5 9
import com.google.gson.JsonParser;
......
7 11
import eu.dnetlib.actionmanager.actions.AtomicAction;
8 12
import eu.dnetlib.actionmanager.common.Agent;
9 13
import eu.dnetlib.miscutils.datetime.DateUtils;
14
import org.apache.hadoop.io.LongWritable;
10 15
import org.apache.hadoop.io.Text;
11 16
import org.apache.hadoop.mapreduce.Mapper;
12 17

  
13
import java.io.IOException;
14
import java.util.HashMap;
15
import java.util.List;
16
import java.util.Map;
18
public class ScholexplorerMapper extends Mapper<LongWritable, Text, Text, Text> {
17 19

  
18
public class ScholexplorerMapper extends Mapper<Text, Text, Text, Text> {
19

  
20 20
    private ActionFactory factory;
21 21
    private JsonParser parser;
22 22
    private String setName;
......
27 27
    private String dateOfCollection;
28 28
    private Text keyout;
29 29
    private Text valueOut;
30
    private Map<String, ScholExplorerConfiguration> configurationMap= new HashMap<>();
30
    private Map<String, ScholExplorerConfiguration> conf = new HashMap<>();
31 31

  
32 32
    @Override
33 33
    protected void setup(Context context) throws IOException, InterruptedException {
34 34
        factory = new ActionFactory();
35 35
        parser = new JsonParser();
36
        configurationMap.put("issn", new ScholExplorerConfiguration(null, false));
37
        configurationMap.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
38
        configurationMap.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s"));
39
        configurationMap.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
40
        configurationMap.put("openaire ", new ScholExplorerConfiguration(null, false));
41
        configurationMap.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s"));
42
        configurationMap.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
43
        configurationMap.put("icpsr", new ScholExplorerConfiguration(null, false));
44
        configurationMap.put("dnet", new ScholExplorerConfiguration(null, false));
45
        configurationMap.put("url ", new ScholExplorerConfiguration(null, true,"%s"));
46
        configurationMap.put("openaire", new ScholExplorerConfiguration(null, false));
36
        conf.put("issn", new ScholExplorerConfiguration(null, false));
37
        conf.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
38
        conf.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s"));
39
        conf.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
40
        conf.put("openaire ", new ScholExplorerConfiguration(null, false));
41
        conf.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s"));
42
        conf.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
43
        conf.put("icpsr", new ScholExplorerConfiguration(null, false));
44
        conf.put("dnet", new ScholExplorerConfiguration(null, false));
45
        conf.put("url ", new ScholExplorerConfiguration(null, true,"%s"));
46
        conf.put("openaire", new ScholExplorerConfiguration(null, false));
47 47
        setName = context.getConfiguration().get("setName");
48 48
        agent= new Agent(context.getConfiguration().get("agentId"), context.getConfiguration().get("agentName"), Agent.AGENT_TYPE.service);
49 49
        nsPrefix = context.getConfiguration().get("ns_prefix");
50 50
        dsName = context.getConfiguration().get("dsName");
51 51
        dsId = context.getConfiguration().get("dsId");
52 52
        dateOfCollection = context.getConfiguration().get("dateOfCollection", DateUtils.now_ISO8601());
53

  
53 54
        keyout = new Text("");
54 55
        valueOut = new Text("");
55

  
56

  
57 56
    }
58 57

  
59 58
    @Override
60
    protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
59
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
61 60

  
62
        final String inputJson =value.toString();
61
        final String inputJson = value.toString();
63 62
        final JsonObject rootElement = parser.parse(inputJson).getAsJsonObject();
64
        final List<AtomicAction> actions = ScholixToActions.generateActionsFromScholix(rootElement, configurationMap, setName,agent, factory, nsPrefix, dsName, dsId, dateOfCollection);
63
        final List<AtomicAction> actions = ScholixToActions.generateActionsFromScholix(rootElement, conf, setName,agent, factory, nsPrefix, dsName, dsId, dateOfCollection);
65 64
        for(final AtomicAction action : actions) {
66 65
            keyout.set(action.getRowKey());
67 66
            valueOut.set(action.toJSON());
......
69 68
        }
70 69
    }
71 70

  
72

  
73

  
74

  
75

  
76

  
77

  
78

  
79 71
}

Also available in: Unified diff