Project

General

Profile

« Previous | Next » 

Revision 53588

refactored Action

View differences:

modules/dnet-mapreduce-jobs/branches/master/src/test/java/eu/dnetlib/data/mapreduce/actions/DOIBoostToActionsTest.java
1 1
package eu.dnetlib.data.mapreduce.actions;
2 2

  
3
import com.google.common.collect.Lists;
3 4
import com.google.gson.JsonObject;
4 5
import com.google.gson.JsonParser;
5 6
import eu.dnetlib.actionmanager.actions.ActionFactory;
6 7
import eu.dnetlib.actionmanager.actions.AtomicAction;
7 8
import eu.dnetlib.actionmanager.common.Agent;
8
import eu.dnetlib.data.mapreduce.hbase.dataimport.CrossRefToActions;
9 9
import eu.dnetlib.data.mapreduce.hbase.dataimport.DOIBoostToActions;
10
import eu.dnetlib.data.mapreduce.util.XmlRecordFactory;
11
import eu.dnetlib.data.transform.Column;
12
import eu.dnetlib.data.transform.Row;
13
import eu.dnetlib.data.transform.XsltRowTransformerFactoryTest;
10 14
import org.apache.commons.lang3.StringUtils;
15
import org.dom4j.Document;
16
import org.dom4j.io.DocumentResult;
17
import org.dom4j.io.DocumentSource;
18
import org.dom4j.io.SAXReader;
11 19
import org.junit.Before;
12 20
import org.junit.Test;
13 21

  
14
import java.io.BufferedReader;
15
import java.io.IOException;
16
import java.io.InputStream;
17
import java.io.InputStreamReader;
22
import javax.xml.transform.Transformer;
23
import javax.xml.transform.TransformerFactory;
24
import java.io.*;
18 25
import java.util.List;
26
import java.util.Map;
19 27

  
20
public class DOIBoostToActionsTest {
28
public class DOIBoostToActionsTest extends XsltRowTransformerFactoryTest {
21 29
    private String setName;
22 30
    private Agent agent;
23 31

  
32
    private final static String xslt =
33
            "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\n"
34
                    + "  <xsl:template match=\"@*|node()\">\n"
35
                    + "    <xsl:copy>\n"
36
                    + "      <xsl:apply-templates select=\"@*|node()\"/>\n"
37
                    + "    </xsl:copy>\n"
38
                    + "  </xsl:template>\n"
39
                    + "</xsl:stylesheet>";
40

  
41
    private Transformer transformer;
42

  
24 43
    @Before
25 44
    public void setup() {
26 45
        setName = "DLI";
......
29 48

  
30 49
    @Test
31 50
    public void testSingleDOIBoostAction() throws IOException {
32
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction.json");
51
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/broken");
33 52
    }
34 53

  
35 54

  
36 55
    @Test
56
    public void testDOIBoostActionToXML() throws Exception {
57
        doTestSingleDOIBoostActionToXML("/eu/dnetlib/data/mapreduce/actions/broken");
58
    }
59

  
60

  
61

  
62
    @Test
37 63
    public void testMultipleDOIBoostAction() throws IOException {
38 64
        doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/part-00070");
39 65
    }
40 66

  
41 67

  
68
    private void doTestSingleDOIBoostActionToXML(final String filePath) throws Exception {
69
        final List<Row> rows = Lists.newArrayList();
70
        final InputStream is = this.getClass().getResourceAsStream(filePath);
71
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
42 72

  
73
        String line = in.readLine();
74

  
75
        final JsonParser parser = new JsonParser();
76
        JsonObject root = parser.parse(line).getAsJsonObject();
77
        List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false);
78
        if (actions!= null) {
79
            actions.forEach(action-> {
80
                if (action.getTargetColumn().equals("body") && action.getTargetColumnFamily().equals("result"))
81
                {
82
                    Column<String, byte[]> col = new Column<>("body" , action.getTargetValue());
83
                    rows.add(new Row("result",action.getTargetRowKey() , Lists.newArrayList(col)));
84
                }
85

  
86
            });
87

  
88

  
89

  
90
        }
91

  
92
        Map<String, XmlRecordFactory> stringXmlRecordFactoryMap = mapAll(buildTable(rows));
93
        transformer = TransformerFactory.newInstance().newTransformer(new DocumentSource((new SAXReader()).read(new StringReader(xslt))));
94

  
95
        final SAXReader saxReader = new SAXReader();
96
        stringXmlRecordFactoryMap.values().forEach(it -> {
97
            try {
98
                final DocumentResult result = new DocumentResult();
99
                final Document document = saxReader.read(new StringReader(it.build()));
100
                transformer.transform(new DocumentSource(document), result);
101
                System.out.println(result.getDocument().asXML());
102
            }
103
            catch (Exception e) {
104
                e.printStackTrace();
105
            }
106

  
107
            });
108

  
109
    }
110

  
43 111
    private void doTestSingleDOIBoostAction(final String filePath) throws IOException {
44 112
        final InputStream is = this.getClass().getResourceAsStream(filePath);
45 113
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
......
48 116

  
49 117
        final JsonParser parser = new JsonParser();
50 118
        JsonObject root = parser.parse(line).getAsJsonObject();
51
        List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, true);
119
        List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false);
52 120
        if (actions!= null) {
53 121

  
54 122
            actions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s   TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
modules/dnet-mapreduce-jobs/branches/master/src/test/java/eu/dnetlib/data/transform/XsltRowTransformerFactoryTest.java
679 679

  
680 680
	}
681 681

  
682
	private Map<String, Map<String, Map<String, byte[]>>> buildTable(final List<Row> rows) throws UnsupportedEncodingException {
682
	protected Map<String, Map<String, Map<String, byte[]>>> buildTable(final List<Row> rows) throws UnsupportedEncodingException {
683 683
		final Map<String, Map<String, Map<String, byte[]>>> table = Maps.newHashMap();
684 684

  
685 685
		for (final Row row : rows) {
......
705 705

  
706 706
	}
707 707

  
708
	private Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
708
	protected Map<String, XmlRecordFactory> mapAll(final Map<String, Map<String, Map<String, byte[]>>> table) throws Exception {
709 709

  
710 710
		final Map<String, XmlRecordFactory> builders = Maps.newHashMap();
711 711
		for (final Entry<String, Map<String, Map<String, byte[]>>> e : table.entrySet()) {
modules/dnet-mapreduce-jobs/branches/master/src/test/resources/eu/dnetlib/data/mapreduce/actions/broken
1
{"best_oa_location_url": "", "issued": "2012-3-10", "abstract": [], "objIdentifier": "8201506ef2275362065a6b228c560e51", "pissn": "1220-1766", "subject": ["Electrical and Electronic Engineering", "Computer Science(all)"], "eissn": "1841-429X", "author": [{"affiliation": [], "given": "Diego", "family": "Soto"}, {"affiliation": [], "given": "Jaime", "family": "Borquez"}], "dateOfCollection": "2018-08-07 12:23:27Z", "type": {"cobj": "0001", "value": "Article"}, "is_oa": false, "published-online": "2012-3-10", "link": null, "published-print": "2012-3-10", "accepted": null, "publisher": null, "doi": "10.24846/v21i1y201210", "license": null, "url": "http://dx.doi.org/10.24846/v21i1y201210", "issn": [{"type": "print", "value": "1220-1766"}, {"type": "electronic", "value": "1841-429X"}], "collectedFrom": [{"name": "UnpayWall", "id": "openaire____::unpaywall"}, {"name": "CrossRef", "id": "openaire____::crossref"}], "title": ["Control of a Modular Multilevel Matrix Converter for High Power Applications"], "funder": null, "datasourcePrefix": "crossref____"}
1
{"publisher": "Elsevier BV", "doi": "10.1016/j.dib.2015.09.011", "license": [{"url": "http://www.elsevier.com/tdm/userlicense/1.0/", "content-version": "tdm", "\"delay-in-days": null, "date-time": "2015-12-01T00:00:00Z"}, {"url": "http://creativecommons.org/licenses/by/4.0/", "content-version": "vor", "\"delay-in-days": null, "date-time": "2015-09-15T00:00:00Z"}], "title": ["Data on individual PCR efficiency values as quality control for circulating miRNAs"], "issued": "2015-12-1", "abstract": [{"provenance": "MAG", "value": "This data article contains data related to the research article entitled “Variability in microRNA recovery from plasma: Comparison of five commercial kits, doi:10.1016/j.ab.2015.07.018” Brunet-Vega (2015) [1]. PCR efficiency, along with RNA and cDNA quality, are the most important factors affecting the quality of qPCR results. Constant amplification efficiency in all compared samples is indispensable when relative quantification is used to measure changes in gene expression. An easy way to measure PCR efficiency, without the need of a standard curve, is LinRegPCR software. Individual PCR efficiency can be determined as a part of qPCR quality control. This is especially important when the initial RNA quantity is so low that cannot be accurately quantified, such as in circulating RNA extractions. This data article reports the Cqs and PCR efficiencies of 5 miRNAs quantified in RNA isolated from 4 patients with colorectal cancer (CRC) and 4 healthy donors using five commercially available kits."}], "issn": [{"type": "print", "value": "2352-3409"}], "doi-url": "http://dx.doi.org/10.1016/j.dib.2015.09.011", "instances": [{"url": "http://api.elsevier.com/content/article/PII:S235234091500205X?httpAccept=text/xml", "provenance": "CrossRef", "access-rights": "UNKNOWN"}, {"url": "http://api.elsevier.com/content/article/PII:S235234091500205X?httpAccept=text/plain", "provenance": "CrossRef", "access-rights": "UNKNOWN"}, {"url": "http://doi.org/10.1016/j.dib.2015.09.011", "provenance": "UnpayWall", "access-rights": "OPEN"}, {"url": "https://academic.microsoft.com/#/detail/2115687396", "provenance": "MAG", "access-rights": "UNKNOWN"}], "published-online": null, "authors": [{"affiliations": [{"official-page": "http://www.uab.edu/", "provenance": "MAG", "value": "University of Alabama at Birmingham", "identifiers": [{"value": "http://en.wikipedia.org/wiki/University_of_Alabama_at_Birmingham", "schema": "wikpedia"}, {"value": "grid.265892.2", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/32389192", "schema": "URL"}]}], "given": "Anna", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2335602571", "schema": "URL"}], "fullname": "Anna Brunet-Vega", "family": "Brunet-Vega"}, {"affiliations": [{"official-page": "http://www.uab.edu/", "provenance": "MAG", "value": "University of Alabama at Birmingham", "identifiers": [{"value": "http://en.wikipedia.org/wiki/University_of_Alabama_at_Birmingham", "schema": "wikpedia"}, {"value": "grid.265892.2", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/32389192", "schema": "URL"}]}], "given": "Carles", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2692345643", "schema": "URL"}], "fullname": "Carles Pericay", "family": "Pericay"}, {"affiliations": [{"official-page": "http://www.uab.edu/", "provenance": "MAG", "value": "University of Alabama at Birmingham", "identifiers": [{"value": "http://en.wikipedia.org/wiki/University_of_Alabama_at_Birmingham", "schema": "wikpedia"}, {"value": "grid.265892.2", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/32389192", "schema": "URL"}]}], "given": "María Elisa", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2160096374", "schema": "URL"}], "fullname": "María Elisa Quílez", "family": "Quílez"}, {"affiliations": [{"official-page": "http://www.uab.edu/", "provenance": "MAG", "value": "University of Alabama at Birmingham", "identifiers": [{"value": "http://en.wikipedia.org/wiki/University_of_Alabama_at_Birmingham", "schema": "wikpedia"}, {"value": "grid.265892.2", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/32389192", "schema": "URL"}]}], "given": "María José", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2751674309", "schema": "URL"}], "fullname": "María José Ramírez-Lázaro", "family": "Ramírez-Lázaro"}, {"affiliations": [{"official-page": "http://www.uab.edu/", "provenance": "MAG", "value": "University of Alabama at Birmingham", "identifiers": [{"value": "http://en.wikipedia.org/wiki/University_of_Alabama_at_Birmingham", "schema": "wikpedia"}, {"value": "grid.265892.2", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/32389192", "schema": "URL"}]}], "given": "Xavier", "identifiers": [{"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2683017426", "schema": "URL"}], "fullname": "Xavier Calvet", "family": "Calvet"}, {"affiliations": [{"official-page": "http://www.uab.edu/", "provenance": "MAG", "value": "University of Alabama at Birmingham", "identifiers": [{"value": "http://en.wikipedia.org/wiki/University_of_Alabama_at_Birmingham", "schema": "wikpedia"}, {"value": "grid.265892.2", "schema": "grid.ac"}, {"value": "https://academic.microsoft.com/#/detail/32389192", "schema": "URL"}]}], "given": "Sergio", "identifiers": [{"provenance": "CrossRef", "value": "http://orcid.org/0000-0001-9294-1585", "schema": null}, {"provenance": "MAG", "value": "https://academic.microsoft.com/#/detail/2752779186", "schema": "URL"}, {"provenance": "ORCID", "value": "https://orcid.org/0000-0001-9294-1585", "schema": null}], "fullname": "Sergio Lario", "family": "Lario"}], "collectedFrom": ["CrossRef", "MAG", "ORCID", "UnpayWall"], "accepted": null, "type": "journal-article", "published-print": "2015-12-1", "subject": ["Multidisciplinary"]}
modules/dnet-mapreduce-jobs/branches/master/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataimport/DOIBoostToActions.java
17 17
import java.io.InputStream;
18 18
import java.util.*;
19 19
import java.util.concurrent.atomic.AtomicInteger;
20
import java.util.function.Function;
20 21
import java.util.stream.Collectors;
21 22

  
22 23
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.*;
......
263 264
        result.setMetadata(metadata.build());
264 265
        entity.setResult(result.build());
265 266
        oaf.setEntity(entity.build());
267
        System.out.println(JsonFormat.printToString(oaf.build()));
266 268
        final List<AtomicAction> actionList = new ArrayList<>();
267 269
        if (!onlyOrganization)
268 270
            actionList.add(factory.createAtomicAction(setName, agent, oaf.getEntity().getId(), "result", "body", oaf.build().toByteArray()));
......
506 508
            identifiers.stream().map(id -> {
507 509
                final String value = id.get("value").getAsString();
508 510
                return extractIdentifier(value);
509
            }).forEach(abuilder::addPid);
511
            }).collect(
512
                    Collectors.toMap(
513
                            FieldTypeProtos.KeyValue::getKey,
514
                            Function.identity(),
515
                            (a,b) -> a
516
                    )).values().forEach(abuilder::addPid);
510 517
            abuilder.setRank(counter.getAndIncrement());
511 518

  
512 519
            return abuilder.build();
modules/dnet-mapreduce-jobs/branches/master/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataexport/ExportDuplicatesMapper.java
1
package eu.dnetlib.data.mapreduce.hbase.dataexport;
2

  
3
import com.googlecode.protobuf.format.JsonFormat;
4
import eu.dnetlib.data.mapreduce.util.DedupUtils;
5
import eu.dnetlib.data.proto.OafProtos;
6
import eu.dnetlib.data.proto.TypeProtos;
7
import org.apache.hadoop.hbase.client.Result;
8
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
9
import org.apache.hadoop.hbase.mapreduce.TableMapper;
10
import org.apache.hadoop.io.Text;
11

  
12
import java.io.IOException;
13
import java.util.Map;
14

  
15
public class ExportDuplicatesMapper extends TableMapper<Text, Text> {
16

  
17
    private Text keyOut;
18
    private Text valueOut;
19

  
20
    @Override
21
    protected void setup(Context context) throws IOException, InterruptedException {
22
        keyOut = new Text("");
23
        valueOut = new Text();
24
    }
25

  
26
    @Override
27
    protected void map(ImmutableBytesWritable key, Result value, Context context) throws IOException, InterruptedException {
28

  
29
        final Map<byte[], byte[]> mergedInMap = value.getFamilyMap(DedupUtils.getDedupCF_mergedInBytes(TypeProtos.Type.result));
30

  
31
        if (mergedInMap != null && !mergedInMap.isEmpty()) {
32
            final byte[] body = value.getValue("result".getBytes(), DedupUtils.BODY_B);
33

  
34
            if (body != null) {
35
                OafProtos.Oaf oaf = OafProtos.Oaf.parseFrom(body);
36
                valueOut.set(JsonFormat.printToString(oaf));
37
                context.write(keyOut, valueOut);
38
            }
39
        }
40

  
41
    }
42
}

Also available in: Unified diff