Project

General

Profile

« Previous | Next » 

Revision 56498

[maven-release-plugin] copy for tag dnet-mapreduce-jobs-1.2.0

View differences:

modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/install.sh
1
#!/bin/bash
2

  
3
VERSION=`xmlstarlet sel -t -m "/_:project/_:version" -v "." pom.xml`
4
echo "using version: $VERSION"
5

  
6
mvn clean install;
7
rm -rf ~/.m2/repository/eu/dnetlib/dnet-mapreduce-jobs-assembly;
8
mvn assembly:assembly -DskipTests=true && mvn install:install-file -Dfile=target/dnet-mapreduce-jobs-$VERSION-jar-with-dependencies.jar -DgroupId=eu.dnetlib -DartifactId=dnet-mapreduce-jobs-assembly -Dversion=$VERSION -Dpackaging=jar
0 9

  
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/deploy.info
1
{"type_source": "SVN", "goal": "package -U source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-mapreduce-jobs/trunk", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-mapreduce-jobs"}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/actions/OrcidToActionsTest.java
1
package eu.dnetlib.data.mapreduce.actions;
2

  
3
import static eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility.getStringValue;
4
import static org.junit.Assert.assertEquals;
5
import static org.junit.Assert.assertNotNull;
6
import static org.junit.Assert.assertTrue;
7

  
8
import java.io.BufferedReader;
9
import java.io.IOException;
10
import java.io.InputStream;
11
import java.io.InputStreamReader;
12
import java.util.List;
13

  
14
import org.apache.commons.lang3.StringUtils;
15
import org.junit.Before;
16
import org.junit.Test;
17

  
18
import com.google.gson.JsonArray;
19
import com.google.gson.JsonObject;
20
import com.google.gson.JsonParser;
21
import com.googlecode.protobuf.format.JsonFormat;
22
import com.googlecode.protobuf.format.JsonFormat.ParseException;
23

  
24
import eu.dnetlib.actionmanager.actions.ActionFactory;
25
import eu.dnetlib.actionmanager.actions.AtomicAction;
26
import eu.dnetlib.actionmanager.common.Agent;
27
import eu.dnetlib.data.mapreduce.hbase.Reporter;
28
import eu.dnetlib.data.mapreduce.hbase.dataimport.DumpToActionsUtility;
29
import eu.dnetlib.data.mapreduce.hbase.dataimport.OrcidToActions;
30
import eu.dnetlib.data.proto.OafProtos.Oaf;
31

  
32
public class OrcidToActionsTest {
33

  
34
	private String setName;
35
	private Agent agent;
36
	private Reporter reporter;
37

  
38
	private final String generatedJson =
39
			"{\"kind\": \"entity\",\"entity\": {\"type\": \"result\",\"result\": {\"metadata\": {\"title\": [{\"value\": \"Factors influencing accuracy of referral and the likelihood of false positive referral by optometrists in Bradford, United Kingdom.\",\"qualifier\": {\"classid\": \"main title\",\"classname\": \"main title\",\"schemeid\": \"dnet:dataCite_title\",\"schemename\": \"dnet:dataCite_title\"}}],\"relevantdate\": [{\"value\": \"2015-11-01\",\"qualifier\": {\"classid\": \"issued\",\"classname\": \"issued\",\"schemeid\": \"dnet:dataCite_date\",\"schemename\": \"dnet:dataCite_date\"}}],\"dateofacceptance\": {\"value\": \"2015-11-01\"},\"resulttype\": {\"classid\": \"publication\",\"classname\": \"publication\",\"schemeid\": \"dnet:result_typologies\",\"schemename\": \"dnet:result_typologies\"},\"resourcetype\": {\"classid\": \"journal-article\",\"classname\": \"journal-article\",\"schemeid\": \"dnet:dataCite_resource\",\"schemename\": \"dnet:dataCite_resource\"},\"source\": [{\"value\": \"Europe PubMed Central\"}],\"author\": [{\"fullname\": \"Christopher Davey\",\"name\": \"Christopher\",\"surname\": \"Davey\",\"rank\": 1,\"pid\": [{\"key\": \"ORCID\",\"value\": \"0000-0002-9050-379X\"}]},{\"fullname\": \"Scally AJ\",\"rank\": 2},{\"fullname\": \"Green C\",\"rank\": 3},{\"fullname\": \"Mitchell ES\",\"rank\": 4},{\"fullname\": \"Elliott DB\",\"rank\": 5}]},\"instance\": [{\"accessright\": {\"classid\": \"UNKNOWN\",\"classname\": \"UNKNOWN\",\"schemeid\": \"dnet:access_modes\",\"schemename\": \"dnet:access_modes\"},\"instancetype\": {\"classid\": \"0001\",\"classname\": \"Article\",\"schemeid\": \"dnet:publication_resource\",\"schemename\": \"dnet:publication_resource\"},\"hostedby\": {\"key\": \"10|openaire____::55045bd2a65019fd8e6741a755395c8c\",\"value\": \"Unknown Repository\"},\"url\": [\"http://europepmc.org/abstract/med/26614021\"],\"collectedfrom\": {\"key\": \"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\": \"ORCID\"},\"dateofacceptance\": {\"value\": \"2015-11-01\"}}]},\"originalId\": [\"24099977\"],\"collectedfrom\": [{\"key\": \"10|openaire____::806360c771262b4d6770e7cdf04b5c5a\",\"value\": \"ORCID\"}],\"pid\": [{\"value\": \"26614021\",\"qualifier\": {\"classid\": \"pmid\",\"classname\": \"pmid\",\"schemeid\": \"dnet:pid_types\",\"schemename\": \"dnet:pid_types\"}}],\"dateofcollection\": \"2018-10-22\",\"id\": \"50|orcid____::57b8b813748049402c0900d3d17f1c23\",\"dateoftransformation\": \"2019-06-21T11:16:10+02:00\"},\"dataInfo\": {\"inferred\": false,\"deletedbyinference\": false,\"trust\": \"0.9\",\"provenanceaction\": {\"classid\": \"sysimport:actionset:orcidworks-no-doi\",\"classname\": \"sysimport:actionset:orcidworks-no-doi\",\"schemeid\": \"dnet:provenanceActions\",\"schemename\": \"dnet:provenanceActions\"}},\"lastupdatetimestamp\": 1561108570991}\n"
40
					+
41
					"";
42

  
43
	@Before
44
	public void setup() {
45
		setName = "ORCID";
46
		agent = new Agent("agentId", "agentName", Agent.AGENT_TYPE.service);
47
		reporter =
48
				(Reporter) (counterGroup, counterName, delta) -> System.out.println(String.format("COUNTER: %s - %s : %d", counterGroup, counterName, delta));
49
	}
50

  
51
	@Test
52
	public void testUnicodeAction() throws IOException {
53
		doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_1.json");
54

  
55
	}
56

  
57
	@Test
58
	public void testSourceWorkIdAction() throws IOException {
59
		doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_2.json");
60

  
61
	}
62

  
63
	@Test
64
	public void testAuthorsRank() throws IOException {
65
		doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_3.json");
66
		doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_4.json");
67
		doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_5.json");
68

  
69
	}
70

  
71
	@Test
72
	public void testUrl() throws IOException {
73
		doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_6.json");
74

  
75
	}
76

  
77
	@Test
78
	public void testNullUrlAction() throws IOException {
79
		doTestSingleAction("/eu/dnetlib/data/mapreduce/actions/OrcidAction_7.json");
80

  
81
	}
82

  
83
	@Test
84
	public void testMassiveOrcidAction() throws IOException {
85
		doTestAllOrcidAction("/eu/dnetlib/data/mapreduce/actions/part-100");
86
	}
87

  
88
	@Test
89
	public void testJsonToProto() throws ParseException {
90
		final Oaf.Builder builder = Oaf.newBuilder();
91
		JsonFormat.merge(generatedJson, builder);
92
		System.out.println(builder.build());
93
	}
94

  
95
	private void doTestSingleAction(final String filePath) throws IOException {
96
		final InputStream is = this.getClass().getResourceAsStream(filePath);
97
		final BufferedReader in = new BufferedReader(new InputStreamReader(is));
98

  
99
		final String line = in.readLine();
100

  
101
		final JsonParser parser = new JsonParser();
102
		final JsonObject root = parser.parse(line).getAsJsonObject();
103
		final List<AtomicAction> lista = OrcidToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, reporter);
104
		// System.out.println(new Gson().toJson(lista));
105
	}
106

  
107
	private void doTestAllOrcidAction(final String filePath) throws IOException {
108
		final InputStream is = this.getClass().getResourceAsStream(filePath);
109
		final BufferedReader in = new BufferedReader(new InputStreamReader(is));
110
		String line = in.readLine();
111
		int i = 0;
112
		while (StringUtils.isNotBlank(line)) {
113
			final JsonParser parser = new JsonParser();
114
			final JsonObject root = parser.parse(line).getAsJsonObject();
115
			try {
116
				OrcidToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, reporter);
117
			} catch (final Throwable e) {
118
				System.out.println("Exception parsing: " + line);
119
				throw new RuntimeException(e);
120
			}
121
			line = in.readLine();
122
			i += 1;
123
		}
124
	}
125

  
126
	@Test
127
	public void testGeneratedJson() throws ParseException {
128
		final JsonParser parser = new JsonParser();
129
		final JsonObject root = parser.parse(generatedJson).getAsJsonObject();
130
		assertEquals(getStringValue(root, "kind"), "entity");
131
		final JsonObject entity = root.getAsJsonObject("entity");
132
		final List<JsonObject> pids = DumpToActionsUtility.getArrayObjects(entity, "pid");
133
		pids.forEach(pid -> {
134
			final JsonObject qualifier = pid.getAsJsonObject("qualifier");
135
			assertTrue(qualifier.get("classid").getAsString().matches("(ark|arxiv|pmc|pmid|source-work-id|urn)"));
136
		});
137
		assertEquals(getStringValue(entity, "type"), "result");
138
		assertNotNull(getStringValue(entity, "id"));
139
		assertNotNull(getStringValue(entity, "dateofcollection"));
140
		assertTrue(DumpToActionsUtility.isValidDate(getStringValue(entity, "dateofcollection")));
141
		final JsonObject metadata = entity.getAsJsonObject("result").getAsJsonObject("metadata");
142
		assertNotNull(metadata.getAsJsonArray("title"));
143
		assertNotNull(metadata.getAsJsonArray("relevantdate"));
144
		assertNotNull(metadata.getAsJsonObject("dateofacceptance"));
145
		assertTrue(DumpToActionsUtility.isValidDate(metadata.getAsJsonObject("dateofacceptance").get("value").getAsString()));
146
		assertNotNull(metadata.getAsJsonObject("resulttype"));
147
		assertNotNull(metadata.getAsJsonArray("author"));
148
		final JsonArray instance = entity.getAsJsonObject("result").getAsJsonArray("instance");
149
		instance.forEach(i -> {
150
			assertNotNull(i.getAsJsonObject().getAsJsonObject("accessright"));
151
			assertNotNull(i.getAsJsonObject().getAsJsonObject("hostedby"));
152
			assertNotNull(i.getAsJsonObject().getAsJsonObject("collectedfrom"));
153
			assertNotNull(i.getAsJsonObject().getAsJsonObject("instancetype"));
154
		});
155

  
156
	}
157
}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/actions/DOIBoostToActionsTest.java
1
package eu.dnetlib.data.mapreduce.actions;
2

  
3
import java.io.*;
4
import java.util.List;
5
import java.util.zip.DataFormatException;
6
import java.util.zip.Inflater;
7

  
8
import com.google.common.collect.Lists;
9
import com.google.gson.JsonObject;
10
import com.google.gson.JsonParser;
11
import com.google.protobuf.InvalidProtocolBufferException;
12
import eu.dnetlib.actionmanager.actions.ActionFactory;
13
import eu.dnetlib.actionmanager.actions.AtomicAction;
14
import eu.dnetlib.actionmanager.common.Agent;
15
import eu.dnetlib.data.mapreduce.hbase.Reporter;
16
import eu.dnetlib.data.mapreduce.hbase.dataimport.DOIBoostToActions;
17
import eu.dnetlib.data.proto.OafProtos.Oaf;
18
import eu.dnetlib.data.transform.Column;
19
import eu.dnetlib.data.transform.Row;
20
import org.apache.commons.codec.binary.Base64;
21
import org.apache.commons.lang3.StringUtils;
22
import org.junit.Before;
23
import org.junit.Test;
24

  
25
public class DOIBoostToActionsTest  {
26
    private String setName;
27
    private Agent agent;
28
    private Reporter reporter;
29

  
30

  
31
    @Before
32
    public void setup() {
33
        setName = "DLI";
34
        agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service);
35
        reporter = (Reporter) (counterGroup, counterName, delta) -> System.out.println(String.format("COUNTER: %s - %s : %d", counterGroup, counterName, delta));
36
    }
37

  
38
    @Test
39
    public void testSingleDOIBoostAction() throws IOException {
40
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction.json");
41
    }
42

  
43
    @Test
44
    public void testSingleDOIBoostActionFilter() throws IOException {
45
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction_filterOut.json");
46
    }
47

  
48
    @Test
49
    public void testMultipleDOIBoostActionDiscardMany() throws IOException {
50
        doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/doiboost_discard_many.json");
51
    }
52

  
53

  
54
    @Test
55
    public void testDOIBoostActionToXML() throws Exception {
56
        doTestSingleDOIBoostActionToXML("/eu/dnetlib/data/mapreduce/actions/broken");
57
    }
58

  
59

  
60

  
61
    @Test
62
    public void testMultipleDOIBoostAction() throws IOException {
63
        doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/part-00070");
64
    }
65

  
66

  
67

  
68
    @Test
69
    public void testDecompression() throws DataFormatException {
70
        final String s ="eJxtVM1u20YQfpW5pHAAWXCCIECORazKQYAEcHrpcbUckgvs7jA7S8vsye/QU4H25fwk/WYpKUbQ\n" +
71
                "mygOZ775fuaOXawjeVeY+HHi3IU6F1Ya3QNTCcqZvGQNHRd3iAuFTHVkmpxWen76i1gn9sHF9dWe\n" +
72
                "S3J5sZJlbeFiYdctaO6ZO+7ozc0rkr414VwD5g5FVKmTxFqDp6lIN/tKV193H2/p7c3N+9db+h3l\n" +
73
                "yWECuqlk6qWgRcBn/MBRpoRehEdHXXHJWZ+OkwzFTSN++9Hlga21Z9Ut7bKUJLNS4i54F+3NgLXR\n" +
74
                "IHe2cA15tveV/ZglytCqQkLdA9sw3VCVgbFGoWMAhY4m0VADdlbxQa4ZbSS17/D/DEySN2RERzAS\n" +
75
                "8oAGGEdusN+TTHM81RwBebRtOtDjK6gFtDlWqwMFzhg72kOe0wHzQSdHCITCBn8skm2uaRKxG9oa\n" +
76
                "4pVFU5O/z5w9r4R5wAGZkB7/LQYqCsgqNIpOoboY/my4Vm7Oo71oVZpQViFJhbDeKW/pTo5QpGxI\n" +
77
                "5tKIYAzXtnaWSvAQt71jlOOq4PPTP9bM7BfBoOTnp3+t5KQCen7KWkHZ5n83HyBIySYJiHa1cprq\n" +
78
                "Os8aBh+Mv178rPadGH1J7G3f46XZZq7hvOPFmY9BG91jy0eLBzTATvDPln6D+/jRpSny5qXtz4wp\n" +
79
                "HRlf1AKdrW2TonBIh7koqDoslHmQGhpxh7mDkVZ6e7ZAlevOLRvkw2Pi6g9HuoCF1dpwoWl0gfyz\n" +
80
                "Uk3pwgjJiu/Nhw/vV7K9s8L23dqPjk5Jx9AbkipHVzrIWvi64z7kk6wNWgtPh+PQQG7pWzALGVsH\n" +
81
                "BsTc6O0tsO8otDCu+cbqB252Ly5rz6Wga8hmgjMGK7qoc17mB2Et3AdnzKHiNrghwyhK9xwbgfsi\n" +
82
                "86R0dXu/19cowcSru92Xzzv6hb59vLvf/fF1v2/AcEk+GdMhGT5AuMx4eZBCvr6AOIVndXkDUhGv\n" +
83
                "ebDA9+GRm2S4Ne10YgWVaBvLeibVshDqcvbVqdvz098WbuUWmV/b2WpRPwX9pe3s4tj1XT1dBc6g\n" +
84
                "NOM+NBcnmOjBrdqdjNxWCeWHXwkKX9xuN0LtVGmwKJqwBgxWSVz+Aw5ePDg=";
85
        byte[] byteArray = Base64.decodeBase64(s.getBytes());
86
        Inflater decompresser = new Inflater();
87
        decompresser.setInput(byteArray);
88
        ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
89
        byte[] buffer = new byte[8192];
90
        while (!decompresser.finished()) {
91
            int size = decompresser.inflate(buffer);
92
            bos.write(buffer, 0, size);
93
        }
94
        byte[] unzippeddata = bos.toByteArray();
95
        decompresser.end();
96

  
97
        System.out.println(new String(unzippeddata));
98

  
99
    }
100

  
101

  
102
    private void doTestSingleDOIBoostActionToXML(final String filePath) throws Exception {
103
        final List<Row> rows = Lists.newArrayList();
104
        final InputStream is = this.getClass().getResourceAsStream(filePath);
105
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
106

  
107
        String line = in.readLine();
108

  
109
        final JsonParser parser = new JsonParser();
110
        JsonObject root = parser.parse(line).getAsJsonObject();
111
        List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false,reporter);
112

  
113
        if (actions!= null) {
114
            actions.forEach(action-> {
115
                if (action.getTargetColumn().equals("body") && action.getTargetColumnFamily().equals("result"))
116
                {
117
                    Column<String, byte[]> col = new Column<>("body" , action.getTargetValue());
118
                    rows.add(new Row("result",action.getTargetRowKey() , Lists.newArrayList(col)));
119
                }
120

  
121
            });
122

  
123

  
124

  
125
        }
126

  
127

  
128

  
129
    }
130

  
131
    private void doTestSingleDOIBoostAction(final String filePath) throws IOException {
132
        final InputStream is = this.getClass().getResourceAsStream(filePath);
133
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
134

  
135
        String line = in.readLine();
136

  
137
        final JsonParser parser = new JsonParser();
138
        JsonObject root = parser.parse(line).getAsJsonObject();
139
        List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false, reporter);
140
        if (actions!= null) {
141
            actions.forEach(it -> {
142
                try {
143
                    System.out.println(
144
                            String.format(" RowKey:%s TargetColumnFamily:%s   TargetColumn: %s\n value:\n%s", it.getTargetRowKey(), it.getTargetColumnFamily(),
145
                                    it.getTargetColumn(),
146
                                    Oaf.parseFrom(it.getTargetValue())));
147
                } catch (InvalidProtocolBufferException e) {
148
                    e.printStackTrace();
149
                }
150
            });
151
        }
152
    }
153

  
154
    private void doTestAllDOIBoostAction(final String filePath) throws IOException {
155
        final InputStream is = this.getClass().getResourceAsStream(filePath);
156
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
157

  
158
        String line = in.readLine();
159
        int i = 0;
160
        int cnt = 0;
161
        while(StringUtils.isNotBlank(line)) {
162
            cnt ++;
163

  
164
            final JsonParser parser = new JsonParser();
165
            JsonObject root = parser.parse(line).getAsJsonObject();
166
            try {
167
                List<AtomicAction> atomicActions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false, reporter);
168
                if (atomicActions!= null)
169
                {
170
                    i ++;
171
                }
172
//                    atomicActions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s   TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
173
                else{
174
                    System.out.println("SKIPPED Type "+ root.get("type").getAsString());
175
                }
176

  
177
            } catch (Throwable e) {
178
                System.out.println(line);
179
                throw new RuntimeException(e);
180
            }
181
            line= in.readLine();
182
        }
183

  
184
        System.out.println("total "+i+" / "+cnt);
185
    }
186

  
187

  
188

  
189
}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/actions/CrossRefToActionTest.java
1
package eu.dnetlib.data.mapreduce.actions;
2

  
3
import com.google.gson.JsonObject;
4
import com.google.gson.JsonParser;
5
import eu.dnetlib.actionmanager.actions.ActionFactory;
6
import eu.dnetlib.actionmanager.actions.AtomicAction;
7
import eu.dnetlib.actionmanager.common.Agent;
8
import eu.dnetlib.data.mapreduce.hbase.dataimport.CrossRefToActions;
9
import org.apache.commons.lang3.StringUtils;
10
import org.junit.Before;
11
import org.junit.Ignore;
12
import org.junit.Test;
13

  
14
import java.io.BufferedReader;
15
import java.io.IOException;
16
import java.io.InputStream;
17
import java.io.InputStreamReader;
18

  
19
public class CrossRefToActionTest {
20

  
21
    private String setName;
22
    private Agent agent;
23

  
24
    @Before
25
    public void setup() {
26
        setName = "DLI";
27
        agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service);
28
    }
29

  
30

  
31
    @Ignore
32
    @Test
33
    public void testSingleScholixAction2() throws IOException {
34
        doTestSingleCrossRefAction("/eu/dnetlib/data/mapreduce/actions/broken");
35
    }
36

  
37
    @Test
38
    public void testAllScholixAction() throws IOException {
39
        doTestAllCrossRefAction("/eu/dnetlib/data/mapreduce/actions/part-06036");
40
    }
41

  
42

  
43
    private void doTestSingleCrossRefAction(final String filePath) throws IOException {
44
        final InputStream is = this.getClass().getResourceAsStream(filePath);
45
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
46

  
47
        String line = in.readLine();
48
        System.out.println(line);
49
        final JsonParser parser = new JsonParser();
50
        JsonObject root = parser.parse(line).getAsJsonObject();
51
        AtomicAction actions = CrossRefToActions.generateActionsFromDump(root, new ActionFactory(), setName, agent, false);
52
        System.out.println(actions.toJSON());
53
    }
54

  
55

  
56

  
57

  
58
    private void doTestAllCrossRefAction(final String filePath) throws IOException {
59
        final InputStream is = this.getClass().getResourceAsStream(filePath);
60
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
61

  
62
        String line = in.readLine();
63
        while(StringUtils.isNotBlank(line)) {
64

  
65
            final JsonParser parser = new JsonParser();
66
            JsonObject root = parser.parse(line).getAsJsonObject();
67
            try {
68
                CrossRefToActions.generateActionsFromDump(root, new ActionFactory(), setName, agent, false);
69
            } catch (Throwable e) {
70
                System.out.println(line);
71
                throw new RuntimeException(e);
72
            }
73
            line= in.readLine();
74
        }
75
    }
76

  
77

  
78

  
79
}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/actions/ScholexplorerActionMapperTest.java
1
package eu.dnetlib.data.mapreduce.actions;
2

  
3
import java.io.BufferedReader;
4
import java.io.IOException;
5
import java.io.InputStream;
6
import java.io.InputStreamReader;
7
import java.util.HashMap;
8
import java.util.List;
9
import java.util.Map;
10

  
11
import com.google.gson.JsonObject;
12
import com.google.gson.JsonParser;
13
import eu.dnetlib.actionmanager.actions.ActionFactory;
14
import eu.dnetlib.actionmanager.actions.AtomicAction;
15
import eu.dnetlib.actionmanager.common.Agent;
16
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholExplorerConfiguration;
17
import eu.dnetlib.data.mapreduce.hbase.dataimport.ScholixToActions;
18
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions;
19
import eu.dnetlib.miscutils.datetime.DateUtils;
20
import org.apache.commons.lang3.StringUtils;
21
import org.junit.Assert;
22
import org.junit.Before;
23
import org.junit.Test;
24

  
25
import static org.junit.Assert.assertEquals;
26

  
27
public class ScholexplorerActionMapperTest {
28

  
29
    private Map<String, ScholExplorerConfiguration> configurationMap;
30
    private String setName;
31
    private Agent agent;
32
    private String nsPrefix;
33
    private String dsName;
34
    private String dsId;
35

  
36

  
37

  
38
    @Before
39
    public void initializeCofiguration(){
40
        configurationMap = new HashMap<>();
41
        configurationMap.put("issn", new ScholExplorerConfiguration(null, false));
42
        configurationMap.put("openaire", new ScholExplorerConfiguration(null, false));
43
        configurationMap.put("pmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
44
        configurationMap.put("doi", new ScholExplorerConfiguration("doi", true,"http://dx.doi.org/%s"));
45
        configurationMap.put("pbmid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
46
        configurationMap.put("pmcid", new ScholExplorerConfiguration("pmc", true,"https://europepmc.org/articles/%s"));
47
        configurationMap.put("pubmedid", new ScholExplorerConfiguration("pmid", true,"https://www.ncbi.nlm.nih.gov/pubmed/%s"));
48
        configurationMap.put("icpsr", new ScholExplorerConfiguration(null, false));
49
        configurationMap.put("dnet", new ScholExplorerConfiguration(null, false));
50
        configurationMap.put("url", new ScholExplorerConfiguration(null, true,"%s"));
51

  
52
        setName = "DLI";
53
        agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service);
54
        nsPrefix = "scholexplore";
55
        dsName = "ScholExplorer";
56
        dsId = "scholexplorer";
57
    }
58

  
59
    @Test
60
    public void testSubString () {
61
        final String dnetId ="50|dli_resolver::7b7b9a57a40818d10cf2532d71f012fa";
62
        assertEquals("7b7b9a57a40818d10cf2532d71f012fa", dnetId.substring(17));
63

  
64
        System.out.println(AbstractDNetXsltFunctions.md5("SNSF - Swiss National Science Foundation"));
65
    }
66

  
67
    @Test
68
    public void testSingleScholixAction() throws IOException {
69
        doTestSingleScholixAction("/eu/dnetlib/data/mapreduce/actions/part-00000");
70
    }
71

  
72
    @Test
73
    public void testSingleScholixAction2() throws IOException {
74
        doTestSingleScholixAction("/eu/dnetlib/data/mapreduce/actions/scholix.json");
75
    }
76

  
77
    private void doTestSingleScholixAction(final String filePath) throws IOException {
78
        final InputStream is = this.getClass().getResourceAsStream(filePath);
79
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
80
        //        in.readLine();
81
        //        in.readLine();
82
        String line = in.readLine();
83
        System.out.println(line);
84
        final JsonParser parser = new JsonParser();
85
        JsonObject root = parser.parse(line).getAsJsonObject();
86
        List<AtomicAction> actions = ScholixToActions
87
                .generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601());
88
        actions.forEach(it-> System.out.println(String.format("%s    cf:%s    qualifier:%s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
89

  
90
        System.out.println(actions.get(0).toJSON());
91
    }
92

  
93
    @Test
94
    public void testScholixAction() throws IOException {
95

  
96
        doTestMultipleScholixActions("/eu/dnetlib/data/mapreduce/actions/part-00000");
97
    }
98

  
99
    @Test
100
    public void testScholixAction2() throws IOException {
101

  
102
        doTestMultipleScholixActions("/eu/dnetlib/data/mapreduce/actions/scholix.json");
103
    }
104

  
105
    private void doTestMultipleScholixActions(final String filePath) throws IOException {
106
        final InputStream is = this.getClass().getResourceAsStream(filePath);
107
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
108

  
109
        String line = in.readLine();
110
        while (StringUtils.isNotEmpty(line)){
111
            final JsonParser parser = new JsonParser();
112
            JsonObject root = parser.parse(line).getAsJsonObject();
113
            try {
114
                List<AtomicAction> actions = ScholixToActions
115
                        .generateActionsFromScholix(root, configurationMap, setName, agent, new ActionFactory(), nsPrefix, dsName, dsId, DateUtils.now_ISO8601());
116
                Assert.assertNotNull(actions);
117
                Assert.assertTrue(actions.size() > 0);
118
            } catch (Throwable e) {
119
                System.out.println(line);
120
                throw (new RuntimeException(e));
121
            }
122
            line = in.readLine();
123
        }
124
    }
125
}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/dedup/OafMergeTest.java
1
package eu.dnetlib.data.mapreduce.dedup;
2

  
3
import java.util.List;
4
import java.util.UUID;
5

  
6
import com.google.common.base.Predicate;
7
import com.google.common.collect.Iterables;
8
import com.google.common.collect.Lists;
9
import eu.dnetlib.data.mapreduce.util.OafTest;
10
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
11
import eu.dnetlib.data.proto.KindProtos.Kind;
12
import eu.dnetlib.data.proto.OafProtos.Oaf;
13
import eu.dnetlib.data.proto.OafProtos.OafEntity;
14
import eu.dnetlib.data.proto.OafProtos.OafEntity.Builder;
15
import eu.dnetlib.data.proto.ResultProtos.Result;
16
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
17
import eu.dnetlib.data.proto.TypeProtos.Type;
18
import eu.dnetlib.data.transform.OafEntityMerger;
19
import org.junit.Before;
20
import org.junit.Test;
21

  
22
public class OafMergeTest {
23

  
24
	private List<Oaf> oafList;
25

  
26
	private OafEntityMerger merger;
27

  
28
	@Before
29
	public void setUp() throws Exception {
30

  
31
		merger = new OafEntityMerger();
32
		oafList = Lists.newArrayList();
33
		oafList.add(getOaf("0.1").setEntity(
34
				getEntity("id_1", null, "pid_1").setResult(
35
						Result.newBuilder().setMetadata(
36
								Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf("2012-01-01"))
37
										.addTitle(OafTest.getStructuredproperty("vvvv Title", "main title", "dnet:dataCite_title"))
38
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
39
		oafList.add(getOaf("0.1").setEntity(
40
				getEntity("id_2", "originalId_2", "pid_2").setResult(
41
						Result.newBuilder().setMetadata(
42
								Result.Metadata.newBuilder().setDateofacceptance(OafTest.sf(""))
43
										.addTitle(OafTest.getStructuredproperty("aaaa Title", "main title", "dnet:dataCite_title"))
44
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
45
		oafList.add(getOaf("0.2").setEntity(
46
				getEntity("id_3", "originalId_2", "pid_2").setResult(
47
						Result.newBuilder().setMetadata(
48
								Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("cccc Title", "sub title", "dnet:dataCite_title"))
49
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
50

  
51
		oafList.add(getOaf("0.3").setEntity(
52
				getEntity("id_$", null, "pid_3").setResult(
53
						Result.newBuilder().setMetadata(
54
								Result.Metadata.newBuilder().setPublisher(OafTest.sf("AMER CHEMICAL SOCXXXXXXXXXXXXXXXXX"))
55
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))))).build());
56
		oafList.add(getOaf("0.5").setEntity(
57
				getEntity("id_5", null, null).setResult(
58
						Result.newBuilder().setMetadata(
59
								Result.Metadata.newBuilder().addTitle(OafTest.getStructuredproperty("hhhh title", "main title", "dnet:dataCite_title"))
60
										.setPublisher(OafTest.sf("AMER CHEMICAL SOC X"))
61
										.setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies")).setStoragedate(OafTest.sf("2012-11-18"))
62
										.setLanguage(OafTest.getQualifier("eng", "dnet:languages")).addDescription(OafTest.sf("original description")))))
63
				.build());
64
		oafList.add(getOaf("0.6").setEntity(
65
				getEntity("id_6", null, "pid_6").setResult(
66
						Result.newBuilder().setMetadata(
67
								Result.Metadata.newBuilder().setResulttype(OafTest.getQualifier("publication", "dnet:result_typologies"))
68
										.addDescription(OafTest.sf("new description"))).addInstance(OafTest.getInstance("id", "name")))).build());
69
	}
70

  
71
	@Test
72
	public void test_merge() {
73

  
74
		final Oaf.Builder builder = Oaf.newBuilder();
75

  
76
		for (final Oaf oaf : oafList) {
77
			builder.mergeFrom(oaf);
78
		}
79

  
80
		final Result.Metadata.Builder metadata = builder.getEntityBuilder().getResultBuilder().getMetadataBuilder();
81
		final Iterable<StructuredProperty> filter = Iterables.filter(metadata.getTitleList(), new Predicate<StructuredProperty>() {
82

  
83
			@Override
84
			public boolean apply(final StructuredProperty sp) {
85
				return (sp.getQualifier() != null) && sp.getQualifier().getClassname().equals("main title");
86
			}
87
		});
88

  
89
		final StructuredProperty last = Iterables.getLast(filter);
90

  
91
		metadata.clearTitle().addAllTitle(Lists.newArrayList(last));
92

  
93
		System.out.println(builder.build().toString());
94
	}
95

  
96
	@Test
97
	public void test_merger() {
98

  
99
		final Oaf merge = merger.mergeEntities(null, "id", oafList).build();
100

  
101
		System.out.println(merge.toString());
102
	}
103

  
104
	// @Test
105
	// public void test_sort() {
106
	// Queue<Result> q = new PriorityQueue<Result>(3, DedupReducer.cmp);
107
	// for (Oaf oaf : oafList) {
108
	// q.add(oaf.getEntity().getResult());
109
	// }
110
	//
111
	// while (!q.isEmpty()) {
112
	// Result r = q.remove();
113
	// List<StructuredProperty> titles = r.getMetadata().getTitleList();
114
	// if (!titles.isEmpty()) {
115
	// System.out.println(titles.get(0).getValue());
116
	// }
117
	// }
118
	// }
119

  
120
	private Oaf.Builder getOaf(final String trust) {
121
		return Oaf.newBuilder().setKind(Kind.entity).setDataInfo(OafTest.getDataInfo(trust)).setLastupdatetimestamp(System.currentTimeMillis());
122
	}
123

  
124
	private OafEntity.Builder getEntity(final String id, final String originalId, final String pid) {
125
		final Builder entity =
126
				OafEntity.newBuilder().setType(Type.result).setId(id).addOriginalId(originalId != null ? originalId : UUID.randomUUID().toString());
127

  
128
		if (pid != null) {
129
			entity.addPid(OafTest.getStructuredproperty(pid, "class", "scheme"));
130
		}
131

  
132
		return entity;
133
	}
134

  
135
}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/dedup/cc/VertexWritableTest.java
1
package eu.dnetlib.data.mapreduce.dedup.cc;
2

  
3
import java.util.TreeSet;
4

  
5
import eu.dnetlib.data.mapreduce.hbase.dedup.cc.VertexWritable;
6
import org.apache.commons.lang.StringUtils;
7
import org.apache.hadoop.io.Text;
8
import org.junit.Test;
9

  
10
import static org.junit.Assert.assertTrue;
11

  
12
/**
13
 * Created by claudio on 15/10/15.
14
 */
15
public class VertexWritableTest {
16

  
17
	@Test
18
	public void testSerialise() {
19
		VertexWritable v = new VertexWritable();
20

  
21
		v.setActivated(true);
22
		v.setVertexId(new Text("a"));
23

  
24
		TreeSet<Text> edges = new TreeSet<Text>();
25
		for(int i=0; i<5; i++) {
26
			edges.add(new Text("" + i));
27
		}
28
		v.setEdges(edges);
29

  
30
		assertTrue(v.toString() != null);
31
		final String json = v.toJSON();
32
		assertTrue(StringUtils.isNotBlank(json));
33

  
34
		System.out.println(json);
35

  
36
		final VertexWritable v1 = VertexWritable.fromJSON(json);
37
		final String json1 = v1.toJSON();
38
		assertTrue(json.equals(json1));
39

  
40
		System.out.println(json1);
41
	}
42

  
43
}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/dedup/TitleOrderingTest.java
1
package eu.dnetlib.data.mapreduce.dedup;
2

  
3
import java.util.LinkedList;
4
import java.util.List;
5
import java.util.Map;
6
import java.util.PriorityQueue;
7
import java.util.Queue;
8
import java.util.UUID;
9

  
10
import org.apache.commons.io.IOUtils;
11
import org.junit.Before;
12
import org.junit.Test;
13
import org.springframework.core.io.ClassPathResource;
14

  
15
import com.google.common.collect.Lists;
16
import com.google.common.collect.Maps;
17

  
18
import eu.dnetlib.pace.clustering.NGramUtils;
19
import eu.dnetlib.pace.config.Type;
20
import eu.dnetlib.pace.model.Field;
21
import eu.dnetlib.pace.model.FieldListImpl;
22
import eu.dnetlib.pace.model.FieldValueImpl;
23
import eu.dnetlib.pace.model.MapDocument;
24
import eu.dnetlib.pace.model.MapDocumentComparator;
25

  
26
public class TitleOrderingTest {
27

  
28
	private List<MapDocument> results = Lists.newArrayList();
29

  
30
	@Before
31
	public void setUp() throws Exception {
32

  
33
		final List<String> lines = IOUtils.readLines(new ClassPathResource("eu/dnetlib/data/mapreduce/dedup/titles.txt").getInputStream());
34
		for (final String title : lines) {
35
			final Map<String, Field> fieldMap = Maps.newHashMap();
36
			final FieldListImpl list = new FieldListImpl();
37
			list.add(new FieldValueImpl(Type.String, "title", title));
38
			fieldMap.put("title", list);
39
			results.add(new MapDocument("id-" + UUID.randomUUID(), fieldMap));
40
		}
41
	}
42

  
43
	@Test
44
	public void test() {
45

  
46
		final Queue<MapDocument> queue = new PriorityQueue<MapDocument>(100, new MapDocumentComparator("title"));
47

  
48
		queue.addAll(results);
49

  
50
		final Queue<MapDocument> queue2 = simplifyQueue(queue);
51

  
52
		while (!queue2.isEmpty()) {
53
			final MapDocument doc = queue2.remove();
54
			System.out.println(doc.values("title").stringValue());
55
		}
56
	}
57

  
58
	private Queue<MapDocument> simplifyQueue(final Queue<MapDocument> queue) {
59
		final Queue<MapDocument> q = new LinkedList<MapDocument>();
60

  
61
		String fieldRef = "";
62
		final List<MapDocument> tempResults = Lists.newArrayList();
63

  
64
		while (!queue.isEmpty()) {
65
			final MapDocument result = queue.remove();
66

  
67
			if (!result.values("title").isEmpty()) {
68
				final String field = NGramUtils.cleanupForOrdering(result.values("title").stringValue());
69
				if (field.equals(fieldRef)) {
70
					tempResults.add(result);
71
				} else {
72
					if (tempResults.size() < 5) {
73
						q.addAll(tempResults);
74
					} else {
75
						System.out.println("Skipped field: " + fieldRef + " - size: " + tempResults.size());
76
					}
77
					tempResults.clear();
78
					tempResults.add(result);
79
					fieldRef = field;
80
				}
81
			}
82
		}
83
		if (tempResults.size() < 5) {
84
			q.addAll(tempResults);
85
		} else {
86
			System.out.println("Skipped field: " + fieldRef + " - size: " + tempResults.size());
87
		}
88

  
89
		return q;
90
	}
91

  
92
}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/dedup/experiment/CsvEntryTest.java
1
package eu.dnetlib.data.mapreduce.hbase.dedup.experiment;
2

  
3
import org.hsqldb.util.CSVWriter;
4
import org.junit.Test;
5
import org.junit.Before;
6
import org.junit.After;
7

  
8
import static org.junit.Assert.assertNotNull;
9
import static org.junit.Assert.assertTrue;
10

  
11
/**
12
 * CsvEntry Tester.
13
 *
14
 * @author <Authors name>
15
 * @version 1.0
16
 * @since <pre>Apr 20, 2016</pre>
17
 */
18
public class CsvEntryTest {
19

  
20
	private CsvEntry csvEntry;
21

  
22
	@Before
23
	public void before() throws Exception {
24
		csvEntry = new CsvEntry();
25
	}
26

  
27
	/**
28
	 * Method: addFeature(final String f)
29
	 */
30
	@Test
31
	public void testAddFeature() throws Exception {
32
		csvEntry.addFeature("a");
33
		assertTrue(csvEntry.getFeatures().contains("a"));
34
	}
35

  
36
	/**
37
	 * Method: getFeatures()
38
	 */
39
	@Test
40
	public void testGetFeatures() throws Exception {
41
		csvEntry.addFeature("a");
42
		assertNotNull(csvEntry.getFeatures());
43
		assertTrue(csvEntry.getFeatures().size() == 1);
44
	}
45

  
46
	/**
47
	 * Method: fromJson(final String json)
48
	 */
49
	@Test
50
	public void testFromJson() throws Exception {
51
		csvEntry.addFeature("a");
52
		csvEntry.addFeature("b");
53

  
54
		final String json = csvEntry.toString();
55

  
56
		final CsvEntry another = CsvEntry.fromJson(json);
57

  
58
		assertTrue(another.equals(csvEntry));
59
	}
60

  
61
}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/dedup/experiment/CsvSerialiserTest.java
1
package eu.dnetlib.data.mapreduce.hbase.dedup.experiment;
2

  
3
import static org.junit.Assert.assertEquals;
4

  
5
import java.util.Iterator;
6
import java.util.List;
7
import java.util.Set;
8
import java.util.UUID;
9

  
10
import com.google.common.base.Predicate;
11
import com.google.common.base.Splitter;
12
import com.google.common.collect.Iterables;
13
import com.google.common.collect.Lists;
14
import com.google.common.collect.Sets;
15
import eu.dnetlib.pace.model.Person;
16
import org.apache.commons.lang.RandomStringUtils;
17
import org.apache.commons.lang.StringUtils;
18
import org.apache.commons.lang.math.RandomUtils;
19
import org.apache.commons.logging.Log;
20
import org.apache.commons.logging.LogFactory;
21
import org.junit.Before;
22
import org.junit.Test;
23

  
24
/**
25
 * Created by claudio on 26/04/16.
26
 */
27
public class CsvSerialiserTest {
28

  
29
	private static final Log log = LogFactory.getLog(CsvSerialiserTest.class);
30

  
31
	private CsvSerialiser csvSerialiser;
32

  
33
	private static List<String> ALL_FEATURES = Lists.newLinkedList();
34

  
35
	static {
36
		ALL_FEATURES.add("a");
37
		ALL_FEATURES.add("b");
38
		ALL_FEATURES.add("c");
39
		ALL_FEATURES.add("d");
40
		ALL_FEATURES.add("e");
41
		ALL_FEATURES.add("f");
42
	}
43

  
44
	@Before
45
	public void setUp() {
46
		csvSerialiser = new CsvSerialiser();
47
	}
48

  
49
	@Test
50
	public void testSerialiser() {
51
		final List<CsvEntry> list = Lists.newArrayList();
52

  
53
		final int nRows = RandomUtils.nextInt(10) + 3;
54
		for(int i = 0; i<nRows; i++) {
55
			list.add(getRandomCsvEntry());
56
		}
57

  
58
		final String csv = csvSerialiser.asCSV(list);
59

  
60
		log.info("\n" + csv);
61

  
62
		verifyLength(csv);
63
	}
64

  
65
	@Test
66
	public void testSerialiser2() {
67
		final List<CsvEntry> list = Lists.newArrayList();
68

  
69
		final int nRows = RandomUtils.nextInt(10) + 3;
70
		for(int i = 0; i<nRows; i++) {
71
			list.add(getCsvEntry(i));
72
		}
73

  
74
		final String csv = csvSerialiser.asCSV(list);
75
		log.info("\n" + csv);
76

  
77
		verifyLength(csv);
78
	}
79

  
80
	@Test
81
	public void testSerialiser3() {
82
		final String name = "Manghi, Paolo as ";
83
		final Person p = new Person(name, false);
84
		final String s = p.getSurnameString() + StringUtils.substring(p.getNameString(), 0, 1);
85

  
86
		log.info(String.format("'%s'", s.replaceAll("[^a-zA-Z ]", "").toLowerCase().trim()));
87
	}
88

  
89
	private void verifyLength(final String csv) {
90
		final Iterator<String> lines = Splitter.on("\n").split(csv).iterator();
91
		final List<String> header = Lists.newArrayList(Splitter.on(",").split(lines.next()));
92

  
93
		while(lines.hasNext()) {
94
			List<String> line = Lists.newArrayList(Splitter.on(",").trimResults().omitEmptyStrings().split(lines.next()));
95
			if (!line.isEmpty()) {
96
				assertEquals(header.size(), line.size());
97
			}
98
		}
99
	}
100

  
101
	private CsvEntry getCsvEntry(final int i) {
102
		final CsvEntry e = new CsvEntry();
103
		e.setKey(i+"");
104

  
105
		final Set<String> features = Sets.newLinkedHashSet(Iterables.filter(ALL_FEATURES, new Predicate<String>() {
106
			@Override
107
			public boolean apply(final String s) {
108
				return RandomUtils.nextBoolean();
109
			}
110
		}));
111

  
112
		e.setFeatures(features);
113

  
114
		log.info(String.format("%s - %s", i, features));
115
		//log.info(e.toString() + "\n");
116

  
117
		return e;
118
	}
119

  
120
	public CsvEntry getRandomCsvEntry() {
121
		final CsvEntry e = new CsvEntry();
122
		e.setKey(UUID.randomUUID().toString());
123
		final int nFeatures = RandomUtils.nextInt(4) + 1;
124
		for(int i = 0; i<nFeatures; i++) {
125
			e.addFeature("ft." + RandomStringUtils.randomAlphabetic(2) + " \"" + RandomStringUtils.randomAlphabetic(2) + "\" " + RandomStringUtils.randomAlphabetic(2));
126
		}
127
		return e;
128
	}
129
}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/propagation/country/institutionalrepositories/CountryPropagationTest.java
1
package eu.dnetlib.data.mapreduce.hbase.propagation.country.institutionalrepositories;
2

  
3
import eu.dnetlib.data.mapreduce.hbase.propagation.NotValidResultSequenceException;
4
import eu.dnetlib.data.mapreduce.hbase.propagation.ValueList;
5
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder;
6
import eu.dnetlib.data.proto.OafProtos;
7
import eu.dnetlib.data.proto.TypeProtos;
8
import org.apache.hadoop.io.Text;
9
import org.elasticsearch.hadoop.util.Assert;
10
import org.junit.Test;
11

  
12
import java.io.IOException;
13
import java.util.List;
14

  
15
public class CountryPropagationTest {
16
    List<Text> list;
17
    ValueList vl;
18

  
19
    @Test
20
    public void testRegularValueList1() throws IOException {
21
        ValueList vl = new ValueList("pcountry.json");
22

  
23
        list  = vl.getValueToText();//new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) ));
24

  
25
        try {
26
            ResultCountryIterator rci = new ResultCountryIterator(list, TypeProtos.Type.datasource.getNumber());
27

  
28
            Assert.isTrue(rci.hasNext());
29
            while(rci.hasNext()){
30
                System.out.println( rci.next().toString());
31

  
32
            }
33

  
34
        } catch (NotValidResultSequenceException e) {
35
           //e.printStackTrace();
36
        }
37
    }
38

  
39
    @Test
40
    public void testRegularValueList2() throws IOException {
41
        ValueList vl = new ValueList("pcountry2.json");
42

  
43
        list  = vl.getValueToText();//new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) ));
44

  
45
        try {
46
            ResultCountryIterator rci = new ResultCountryIterator(list, TypeProtos.Type.datasource.getNumber());
47

  
48
            Assert.isTrue(rci.hasNext());
49
            while(rci.hasNext()){
50
                System.out.println( rci.next().toString());
51

  
52
            }
53

  
54
        } catch (NotValidResultSequenceException e) {
55
            e.printStackTrace();
56
        }
57
    }
58

  
59

  
60
    @Test
61
    public void differentCountriesTest() throws IOException {
62
        vl = new ValueList("pcountrynotvalid1.json");
63

  
64
        list  = vl.getValueToText();//new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) ));
65

  
66
        try {
67
            ResultCountryIterator rci = new ResultCountryIterator(list,TypeProtos.Type.datasource.getNumber());
68

  
69
            Assert.isTrue(!rci.hasNext());
70

  
71
        } catch (NotValidResultSequenceException e) {
72
            e.printStackTrace();
73
        }
74
    }
75

  
76
    @Test
77
    public void notPresentDataSourceTest() throws IOException{
78
        vl = new ValueList("pcountrynotvalid2.json");
79

  
80
        common();
81
    }
82

  
83
    @Test
84
    public void missingCountryTest() throws IOException{
85
        vl = new ValueList("pcountrynotvalid3.json");
86
        common ();
87

  
88
    }
89

  
90
    @Test
91
    public void missingResultTest()throws IOException{
92
        vl = new ValueList("pcountrynotvalid4.json");
93
        common();
94
    }
95

  
96
    @Test
97
    public void notValidResultInSequence()throws IOException{
98
        vl=new ValueList("pcountrynotvalid5.json");
99
        try{
100
            ResultCountryIterator rci = new ResultCountryIterator(vl.getValueToText(),TypeProtos.Type.datasource.getNumber());
101
            while(rci.hasNext()){
102
try {
103
OafProtos.Oaf oap = rci.next().get(0);
104
    OafRowKeyDecoder.decode(oap.getEntity().getId());
105
    System.out.println(oap.toString());
106
}catch(Exception rse){
107
    System.out.println(rse.getMessage());
108
}
109
            }
110
        }catch(NotValidResultSequenceException e){
111

  
112
        }
113
    }
114

  
115
private void common() throws IOException {
116
    list  = vl.getValueToText();
117

  
118
    try {
119
        ResultCountryIterator rci = new ResultCountryIterator(list,TypeProtos.Type.datasource.getNumber());
120

  
121
        Assert.isTrue(false);
122

  
123
    } catch (NotValidResultSequenceException e) {
124
        System.out.println("e.getMessage() = " + e.getMessage());
125
        Assert.isTrue(true);
126
    }
127
}
128
}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/propagation/ValueList.java
1
package eu.dnetlib.data.mapreduce.hbase.propagation;
2

  
3
import com.google.gson.Gson;
4
import org.apache.commons.io.IOUtils;
5
import org.apache.hadoop.io.Text;
6

  
7
import java.io.IOException;
8
import java.util.List;
9
import java.util.stream.Collectors;
10

  
11
public class ValueList {
12
    private String jsonFile;
13
    public ValueList(String jsonFile) {
14
        this.jsonFile=jsonFile;
15
    }
16

  
17
    public List<Value> getValueList() {
18
            return valueList;
19
        }
20

  
21
        public void setValueList(List<Value> valueList) {
22
            this.valueList = valueList;
23
        }
24

  
25
        private List<Value> valueList ;
26

  
27
        public List<Text> getValueToText() throws IOException {
28
            ValueList tmp = new Gson().fromJson(IOUtils.toString(getClass().getResourceAsStream(jsonFile)), ValueList.class);
29
            //IOUtils.toString(getClass().getResourceAsStream("community_configuration.xml"));
30
            return tmp.getValueList().stream().map(Value::toJson).map(Text::new).collect(Collectors.toList());
31
        }
32
    }
33

  
34

  
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/propagation/projecttoresult/ProjectPropagationTest.java
1
package eu.dnetlib.data.mapreduce.hbase.propagation.projecttoresult;
2

  
3
import com.google.gson.Gson;
4
import com.googlecode.protobuf.format.JsonFormat;
5
import eu.dnetlib.data.mapreduce.hbase.propagation.NotValidResultSequenceException;
6
import eu.dnetlib.data.mapreduce.hbase.propagation.PropagationConstants;
7
import eu.dnetlib.data.mapreduce.hbase.propagation.Value;
8
import eu.dnetlib.data.mapreduce.hbase.propagation.ValueList;
9
import eu.dnetlib.data.proto.OafProtos;
10
import eu.dnetlib.data.proto.ResultProtos;
11
import org.apache.commons.io.IOUtils;
12
import org.apache.hadoop.io.Text;
13
import org.elasticsearch.hadoop.util.Assert;
14
import org.junit.Test;
15

  
16
import java.io.IOException;
17
import java.util.ArrayList;
18
import java.util.Arrays;
19
import java.util.List;
20
import java.util.stream.Collectors;
21

  
22
import static org.junit.Assert.*;
23

  
24
public class ProjectPropagationTest {
25

  
26
    // ParseResult pr = new ParseResult();
27
    List<Text> list;
28

  
29
    @Test
30
    public void testCompleteOverlap() throws IOException {
31
       // list  = new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) ));
32
        ValueList vl = new ValueList("pprojectcompleteoverlap.json");
33

  
34
        list  = vl.getValueToText();
35
        try {
36
            ResultProjectIterator it = new ResultProjectIterator(list, "fakeId");
37
            Assert.isTrue(!it.hasNext());
38
        } catch (NotValidResultSequenceException e) {
39
            e.printStackTrace();
40
        }
41
    }
42

  
43
    @Test
44
    public void testSemIncludesResult() throws IOException {
45
        ValueList vl = new ValueList("pprojectsemincludesresult.json");
46
        list  = vl.getValueToText();
47
        //list  = new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) ));
48
        int count = 0;
49
        try {
50
            ResultProjectIterator it = new ResultProjectIterator(list, "fakeId");
51
            Assert.isTrue(it.hasNext());
52
            while(it.hasNext()){
53
                it.next();
54
                count +=1;
55
            }
56
            assertEquals(1,count);
57
        } catch (NotValidResultSequenceException e) {
58
            e.printStackTrace();
59
        }
60
    }
61

  
62
    @Test
63
    public void testResultIncludesSem() throws IOException {
64
        ValueList vl = new ValueList("pprojectresultincludessem.json");
65
        list  = vl.getValueToText();
66
        //list  = new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2,p3,p4","0.9", PropagationConstants.Type.fromresult).toJson()) ));
67

  
68
        try {
69
            ResultProjectIterator it = new ResultProjectIterator(list, "fakeId");
70
            Assert.isTrue(!it.hasNext());
71

  
72
        } catch (NotValidResultSequenceException e) {
73
            e.printStackTrace();
74
        }
75
    }
76

  
77
    @Test
78
    public void testOafOneRelationBuilder() throws IOException {
79
        ValueList vl = new ValueList("valuetooaf.json");
80

  
81
        list  = vl.getValueToText();//new ArrayList<>(Arrays.asList(new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()) ));
82

  
83
        try {
84
            ResultProjectIterator it = new ResultProjectIterator(list, "fakeId");
85
            Assert.isTrue(it.hasNext());
86
            while(it.hasNext()){
87
                System.out.println( it.next().toString());
88

  
89
            }
90

  
91
        } catch (NotValidResultSequenceException e) {
92
            e.printStackTrace();
93
        }
94
    }
95

  
96
    @Test
97
    public void testOafTwoRelationBuilder() throws IOException {
98
        ValueList vl = new ValueList("pproject2relationbuilder.json");
99

  
100
        list  = vl.getValueToText();
101
//        list  = new ArrayList<>(Arrays.asList(
102
//                    new Text(Value.newInstance( "p1,p2,p3","0.9", PropagationConstants.Type.fromsemrel).toJson()),
103
//                    new Text(Value.newInstance( "p1,p2","0.9", PropagationConstants.Type.fromresult).toJson()),
104
//                    new Text(Value.newInstance( "p3,p4","0.9", PropagationConstants.Type.fromsemrel).toJson()) )
105
//                );
106

  
107
        try {
108
            ResultProjectIterator it = new ResultProjectIterator(list, "fakeId");
109
            Assert.isTrue(it.hasNext());
110
            int count = 0;
111
            while(it.hasNext()){
112
                System.out.println( it.next().toString());
113
                count +=1;
114
            }
115
            assertEquals(2,count);
116

  
117
        } catch (NotValidResultSequenceException e) {
118
            e.printStackTrace();
119
        }
120
    }
121

  
122
    @Test
123
    public void testEmptyProjectList() throws IOException{
124
        ValueList vl = new ValueList("pprojectemptyprojectlist.json");
125

  
126
        list  = vl.getValueToText();
127

  
128
        try {
129
            ResultProjectIterator it = new ResultProjectIterator(list, "fakeId");
130
            Assert.isTrue(!it.hasNext());
131

  
132

  
133
        } catch (NotValidResultSequenceException e) {
134
            e.printStackTrace();
135
        }
136
    }
137

  
138
    @Test
139
    public void testParseResult1() throws IOException {
140
        OafProtos.Oaf oaf = getOaf("wxample.json");
141
        System.out.println("oaf = " + oaf.getRel().getTarget());
142
        System.out.println(oaf.getDataInfo().toString());
143
    }
144

  
145
    @Test
146
    public void testParseResult2() throws IOException{
147
        OafProtos.Oaf oaf = getOaf("example2.json");
148
        //System.out.println("oaf = " + oaf.getRel().getTarget());
149
        List<ResultProtos.Result.Context> contextList = oaf.getEntity().getResult().getMetadata().getContextList();
150

  
151

  
152
    }
153

  
154
    private OafProtos.Oaf getOaf(String oafjson) throws IOException {
155
        final String json = IOUtils.toString(getClass().getResourceAsStream(oafjson));
156

  
157
        final OafProtos.Oaf.Builder oaf = OafProtos.Oaf.newBuilder();
158
        JsonFormat.merge(json, oaf);
159

  
160
        return oaf.build();
161
    }
162

  
163

  
164
}
modules/dnet-mapreduce-jobs/tags/dnet-mapreduce-jobs-1.2.0/src/test/java/eu/dnetlib/data/mapreduce/hbase/broker/EventWrapperTest.java
1
package eu.dnetlib.data.mapreduce.hbase.broker;
2

  
3
import java.io.IOException;
4
import java.io.InputStream;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Set;
8

  
9
import com.google.common.collect.Maps;
10
import com.google.common.collect.Sets;
11
import com.googlecode.protobuf.format.JsonFormat;
12
import eu.dnetlib.data.proto.OafProtos;
13
import eu.dnetlib.data.proto.OafProtos.Oaf;
14
import eu.dnetlib.data.transform.Column;
15
import eu.dnetlib.data.transform.Row;
16
import eu.dnetlib.data.transform.XsltRowTransformer;
17
import eu.dnetlib.data.transform.XsltRowTransformerFactory;
18
import org.apache.commons.io.IOUtils;
19
import org.apache.commons.logging.Log;
20
import org.apache.commons.logging.LogFactory;
21
import org.apache.hadoop.mapreduce.Counter;
22
import org.apache.hadoop.mapreduce.Reducer.Context;
23
import org.dom4j.Document;
24
import org.dom4j.DocumentException;
25
import org.dom4j.io.SAXReader;
26
import org.junit.Before;
27
import org.junit.Ignore;
28
import org.junit.Test;
29
import org.junit.runner.RunWith;
30
import org.mockito.Mock;
31
import org.mockito.junit.MockitoJUnitRunner;
32

  
33
import static org.junit.Assert.*;
34
import static org.mockito.ArgumentMatchers.anyString;
35
import static org.mockito.Mockito.when;
36

  
37
/**
38
 * Created by claudio on 22/07/16.
39
 */
40
@RunWith(MockitoJUnitRunner.Silent.class)
41
public class EventWrapperTest {
42

  
43
	private static final Log log = LogFactory.getLog(EventWrapperTest.class);
44

  
45
	private Oaf oaf1_1;
46
	private Oaf oaf1_2;
47
	private Oaf oaf2_1;
48
	private Oaf oaf2_2;
49
	private Oaf oaf3_1;
50
	private Oaf oaf3_2;
51

  
52
	private String xslt;
53

  
54
	@Mock
55
	private Context context;
56

  
57
	@Mock
58
	private Counter counter;
59

  
60
	private Map<String, String> baseUrlMap = Maps.newHashMap();
61

  
62
	private static final String basePathProfiles = "/eu/dnetlib/test/profiles/TransformationRuleDSResources/TransformationRuleDSResourceType/2hbase/";
63

  
64
	@Before
65
	public void setUp() throws Exception {
66
		xslt = loadFromTransformationProfile("oaf2hbase.xml");
67
		oaf1_1 = asOaf("recordOaf_1_1.xml");
68
		oaf1_2 = asOaf("recordOaf_1_2.xml");
69
		oaf2_1 = asOaf("recordOaf_2_1.xml");
70
		oaf2_2 = asOaf("recordOaf_2_2.xml");
71
		oaf3_1 = asOaf("recordOaf_3_1.xml");
72
		oaf3_2 = asOaf("recordOaf_3_2.xml");
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff