1 |
53554
|
sandro.lab
|
package eu.dnetlib.data.mapreduce.actions;
|
2 |
|
|
|
3 |
55238
|
alessia.ba
|
import java.io.*;
|
4 |
|
|
import java.util.List;
|
5 |
|
|
import java.util.zip.DataFormatException;
|
6 |
|
|
import java.util.zip.Inflater;
|
7 |
|
|
|
8 |
53588
|
sandro.lab
|
import com.google.common.collect.Lists;
|
9 |
53554
|
sandro.lab
|
import com.google.gson.JsonObject;
|
10 |
|
|
import com.google.gson.JsonParser;
|
11 |
|
|
import eu.dnetlib.actionmanager.actions.ActionFactory;
|
12 |
|
|
import eu.dnetlib.actionmanager.actions.AtomicAction;
|
13 |
|
|
import eu.dnetlib.actionmanager.common.Agent;
|
14 |
55238
|
alessia.ba
|
import eu.dnetlib.data.mapreduce.hbase.Reporter;
|
15 |
53554
|
sandro.lab
|
import eu.dnetlib.data.mapreduce.hbase.dataimport.DOIBoostToActions;
|
16 |
53588
|
sandro.lab
|
import eu.dnetlib.data.transform.Column;
|
17 |
|
|
import eu.dnetlib.data.transform.Row;
|
18 |
53736
|
sandro.lab
|
import org.apache.commons.codec.binary.Base64;
|
19 |
53554
|
sandro.lab
|
import org.apache.commons.lang3.StringUtils;
|
20 |
|
|
import org.junit.Before;
|
21 |
|
|
import org.junit.Test;
|
22 |
|
|
|
23 |
53592
|
sandro.lab
|
public class DOIBoostToActionsTest {
|
24 |
53554
|
sandro.lab
|
private String setName;
|
25 |
|
|
private Agent agent;
|
26 |
55238
|
alessia.ba
|
private Reporter reporter;
|
27 |
53554
|
sandro.lab
|
|
28 |
53588
|
sandro.lab
|
|
29 |
53554
|
sandro.lab
|
@Before
|
30 |
|
|
public void setup() {
|
31 |
|
|
setName = "DLI";
|
32 |
|
|
agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service);
|
33 |
55238
|
alessia.ba
|
reporter = (Reporter) (counterGroup, counterName, delta) -> System.out.println(String.format("COUNTER: %s - %s : %d", counterGroup, counterName, delta));
|
34 |
53554
|
sandro.lab
|
}
|
35 |
|
|
|
36 |
|
|
@Test
|
37 |
|
|
public void testSingleDOIBoostAction() throws IOException {
|
38 |
54831
|
sandro.lab
|
doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction.json");
|
39 |
53554
|
sandro.lab
|
}
|
40 |
55238
|
alessia.ba
|
@Test
|
41 |
|
|
public void testSingleDOIBoostActionFilter() throws IOException {
|
42 |
|
|
doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction_filterOut.json");
|
43 |
|
|
}
|
44 |
53554
|
sandro.lab
|
|
45 |
|
|
|
46 |
|
|
@Test
|
47 |
53588
|
sandro.lab
|
public void testDOIBoostActionToXML() throws Exception {
|
48 |
|
|
doTestSingleDOIBoostActionToXML("/eu/dnetlib/data/mapreduce/actions/broken");
|
49 |
|
|
}
|
50 |
|
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
|
@Test
|
54 |
53554
|
sandro.lab
|
public void testMultipleDOIBoostAction() throws IOException {
|
55 |
|
|
doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/part-00070");
|
56 |
|
|
}
|
57 |
|
|
|
58 |
|
|
|
59 |
53592
|
sandro.lab
|
|
60 |
53736
|
sandro.lab
|
@Test
|
61 |
|
|
public void testDecompression() throws DataFormatException {
|
62 |
|
|
final String s ="eJxtVM1u20YQfpW5pHAAWXCCIECORazKQYAEcHrpcbUckgvs7jA7S8vsye/QU4H25fwk/WYpKUbQ\n" +
|
63 |
|
|
"mygOZ775fuaOXawjeVeY+HHi3IU6F1Ya3QNTCcqZvGQNHRd3iAuFTHVkmpxWen76i1gn9sHF9dWe\n" +
|
64 |
|
|
"S3J5sZJlbeFiYdctaO6ZO+7ozc0rkr414VwD5g5FVKmTxFqDp6lIN/tKV193H2/p7c3N+9db+h3l\n" +
|
65 |
|
|
"yWECuqlk6qWgRcBn/MBRpoRehEdHXXHJWZ+OkwzFTSN++9Hlga21Z9Ut7bKUJLNS4i54F+3NgLXR\n" +
|
66 |
|
|
"IHe2cA15tveV/ZglytCqQkLdA9sw3VCVgbFGoWMAhY4m0VADdlbxQa4ZbSS17/D/DEySN2RERzAS\n" +
|
67 |
|
|
"8oAGGEdusN+TTHM81RwBebRtOtDjK6gFtDlWqwMFzhg72kOe0wHzQSdHCITCBn8skm2uaRKxG9oa\n" +
|
68 |
|
|
"4pVFU5O/z5w9r4R5wAGZkB7/LQYqCsgqNIpOoboY/my4Vm7Oo71oVZpQViFJhbDeKW/pTo5QpGxI\n" +
|
69 |
|
|
"5tKIYAzXtnaWSvAQt71jlOOq4PPTP9bM7BfBoOTnp3+t5KQCen7KWkHZ5n83HyBIySYJiHa1cprq\n" +
|
70 |
|
|
"Os8aBh+Mv178rPadGH1J7G3f46XZZq7hvOPFmY9BG91jy0eLBzTATvDPln6D+/jRpSny5qXtz4wp\n" +
|
71 |
|
|
"HRlf1AKdrW2TonBIh7koqDoslHmQGhpxh7mDkVZ6e7ZAlevOLRvkw2Pi6g9HuoCF1dpwoWl0gfyz\n" +
|
72 |
|
|
"Uk3pwgjJiu/Nhw/vV7K9s8L23dqPjk5Jx9AbkipHVzrIWvi64z7kk6wNWgtPh+PQQG7pWzALGVsH\n" +
|
73 |
|
|
"BsTc6O0tsO8otDCu+cbqB252Ly5rz6Wga8hmgjMGK7qoc17mB2Et3AdnzKHiNrghwyhK9xwbgfsi\n" +
|
74 |
|
|
"86R0dXu/19cowcSru92Xzzv6hb59vLvf/fF1v2/AcEk+GdMhGT5AuMx4eZBCvr6AOIVndXkDUhGv\n" +
|
75 |
|
|
"ebDA9+GRm2S4Ne10YgWVaBvLeibVshDqcvbVqdvz098WbuUWmV/b2WpRPwX9pe3s4tj1XT1dBc6g\n" +
|
76 |
|
|
"NOM+NBcnmOjBrdqdjNxWCeWHXwkKX9xuN0LtVGmwKJqwBgxWSVz+Aw5ePDg=";
|
77 |
|
|
byte[] byteArray = Base64.decodeBase64(s.getBytes());
|
78 |
|
|
Inflater decompresser = new Inflater();
|
79 |
|
|
decompresser.setInput(byteArray);
|
80 |
|
|
ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
|
81 |
|
|
byte[] buffer = new byte[8192];
|
82 |
|
|
while (!decompresser.finished()) {
|
83 |
|
|
int size = decompresser.inflate(buffer);
|
84 |
|
|
bos.write(buffer, 0, size);
|
85 |
|
|
}
|
86 |
|
|
byte[] unzippeddata = bos.toByteArray();
|
87 |
|
|
decompresser.end();
|
88 |
53592
|
sandro.lab
|
|
89 |
53736
|
sandro.lab
|
System.out.println(new String(unzippeddata));
|
90 |
|
|
|
91 |
|
|
}
|
92 |
|
|
|
93 |
|
|
|
94 |
53588
|
sandro.lab
|
private void doTestSingleDOIBoostActionToXML(final String filePath) throws Exception {
|
95 |
|
|
final List<Row> rows = Lists.newArrayList();
|
96 |
|
|
final InputStream is = this.getClass().getResourceAsStream(filePath);
|
97 |
|
|
final BufferedReader in = new BufferedReader(new InputStreamReader(is));
|
98 |
53554
|
sandro.lab
|
|
99 |
53588
|
sandro.lab
|
String line = in.readLine();
|
100 |
|
|
|
101 |
|
|
final JsonParser parser = new JsonParser();
|
102 |
|
|
JsonObject root = parser.parse(line).getAsJsonObject();
|
103 |
55238
|
alessia.ba
|
List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false,reporter);
|
104 |
|
|
|
105 |
53588
|
sandro.lab
|
if (actions!= null) {
|
106 |
|
|
actions.forEach(action-> {
|
107 |
|
|
if (action.getTargetColumn().equals("body") && action.getTargetColumnFamily().equals("result"))
|
108 |
|
|
{
|
109 |
|
|
Column<String, byte[]> col = new Column<>("body" , action.getTargetValue());
|
110 |
|
|
rows.add(new Row("result",action.getTargetRowKey() , Lists.newArrayList(col)));
|
111 |
|
|
}
|
112 |
|
|
|
113 |
|
|
});
|
114 |
|
|
|
115 |
|
|
|
116 |
|
|
|
117 |
|
|
}
|
118 |
|
|
|
119 |
|
|
|
120 |
|
|
|
121 |
|
|
}
|
122 |
|
|
|
123 |
53554
|
sandro.lab
|
private void doTestSingleDOIBoostAction(final String filePath) throws IOException {
|
124 |
|
|
final InputStream is = this.getClass().getResourceAsStream(filePath);
|
125 |
|
|
final BufferedReader in = new BufferedReader(new InputStreamReader(is));
|
126 |
|
|
|
127 |
|
|
String line = in.readLine();
|
128 |
|
|
|
129 |
|
|
final JsonParser parser = new JsonParser();
|
130 |
|
|
JsonObject root = parser.parse(line).getAsJsonObject();
|
131 |
55238
|
alessia.ba
|
List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false, reporter);
|
132 |
53556
|
sandro.lab
|
if (actions!= null) {
|
133 |
53554
|
sandro.lab
|
|
134 |
53556
|
sandro.lab
|
actions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
|
135 |
|
|
}
|
136 |
53554
|
sandro.lab
|
}
|
137 |
|
|
|
138 |
|
|
private void doTestAllDOIBoostAction(final String filePath) throws IOException {
|
139 |
|
|
final InputStream is = this.getClass().getResourceAsStream(filePath);
|
140 |
|
|
final BufferedReader in = new BufferedReader(new InputStreamReader(is));
|
141 |
|
|
|
142 |
|
|
String line = in.readLine();
|
143 |
|
|
int i = 0;
|
144 |
|
|
int cnt = 0;
|
145 |
|
|
while(StringUtils.isNotBlank(line)) {
|
146 |
|
|
cnt ++;
|
147 |
|
|
|
148 |
|
|
final JsonParser parser = new JsonParser();
|
149 |
|
|
JsonObject root = parser.parse(line).getAsJsonObject();
|
150 |
|
|
try {
|
151 |
55238
|
alessia.ba
|
List<AtomicAction> atomicActions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false, reporter);
|
152 |
53554
|
sandro.lab
|
if (atomicActions!= null)
|
153 |
|
|
{
|
154 |
|
|
i ++;
|
155 |
|
|
}
|
156 |
|
|
// atomicActions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
|
157 |
|
|
else{
|
158 |
|
|
System.out.println("SKIPPED Type "+ root.get("type").getAsString());
|
159 |
|
|
}
|
160 |
|
|
|
161 |
|
|
} catch (Throwable e) {
|
162 |
|
|
System.out.println(line);
|
163 |
|
|
throw new RuntimeException(e);
|
164 |
|
|
}
|
165 |
|
|
line= in.readLine();
|
166 |
|
|
}
|
167 |
|
|
|
168 |
|
|
System.out.println("total "+i+" / "+cnt);
|
169 |
|
|
}
|
170 |
|
|
|
171 |
|
|
|
172 |
|
|
|
173 |
|
|
}
|