Project

General

Profile

1
package eu.dnetlib.data.mapreduce.actions;
2

    
3
import java.io.*;
4
import java.util.List;
5
import java.util.zip.DataFormatException;
6
import java.util.zip.Inflater;
7

    
8
import com.google.common.collect.Lists;
9
import com.google.gson.JsonObject;
10
import com.google.gson.JsonParser;
11
import eu.dnetlib.actionmanager.actions.ActionFactory;
12
import eu.dnetlib.actionmanager.actions.AtomicAction;
13
import eu.dnetlib.actionmanager.common.Agent;
14
import eu.dnetlib.data.mapreduce.hbase.Reporter;
15
import eu.dnetlib.data.mapreduce.hbase.dataimport.DOIBoostToActions;
16
import eu.dnetlib.data.transform.Column;
17
import eu.dnetlib.data.transform.Row;
18
import org.apache.commons.codec.binary.Base64;
19
import org.apache.commons.lang3.StringUtils;
20
import org.junit.Before;
21
import org.junit.Test;
22

    
23
public class DOIBoostToActionsTest  {
24
    private String setName;
25
    private Agent agent;
26
    private Reporter reporter;
27

    
28

    
29
    @Before
30
    public void setup() {
31
        setName = "DLI";
32
        agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service);
33
        reporter = (Reporter) (counterGroup, counterName, delta) -> System.out.println(String.format("COUNTER: %s - %s : %d", counterGroup, counterName, delta));
34
    }
35

    
36
    @Test
37
    public void testSingleDOIBoostAction() throws IOException {
38
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction.json");
39
    }
40
    @Test
41
    public void testSingleDOIBoostActionFilter() throws IOException {
42
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction_filterOut.json");
43
    }
44

    
45

    
46
    @Test
47
    public void testDOIBoostActionToXML() throws Exception {
48
        doTestSingleDOIBoostActionToXML("/eu/dnetlib/data/mapreduce/actions/broken");
49
    }
50

    
51

    
52

    
53
    @Test
54
    public void testMultipleDOIBoostAction() throws IOException {
55
        doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/part-00070");
56
    }
57

    
58

    
59

    
60
    @Test
61
    public void testDecompression() throws DataFormatException {
62
        final String s ="eJxtVM1u20YQfpW5pHAAWXCCIECORazKQYAEcHrpcbUckgvs7jA7S8vsye/QU4H25fwk/WYpKUbQ\n" +
63
                "mygOZ775fuaOXawjeVeY+HHi3IU6F1Ya3QNTCcqZvGQNHRd3iAuFTHVkmpxWen76i1gn9sHF9dWe\n" +
64
                "S3J5sZJlbeFiYdctaO6ZO+7ozc0rkr414VwD5g5FVKmTxFqDp6lIN/tKV193H2/p7c3N+9db+h3l\n" +
65
                "yWECuqlk6qWgRcBn/MBRpoRehEdHXXHJWZ+OkwzFTSN++9Hlga21Z9Ut7bKUJLNS4i54F+3NgLXR\n" +
66
                "IHe2cA15tveV/ZglytCqQkLdA9sw3VCVgbFGoWMAhY4m0VADdlbxQa4ZbSS17/D/DEySN2RERzAS\n" +
67
                "8oAGGEdusN+TTHM81RwBebRtOtDjK6gFtDlWqwMFzhg72kOe0wHzQSdHCITCBn8skm2uaRKxG9oa\n" +
68
                "4pVFU5O/z5w9r4R5wAGZkB7/LQYqCsgqNIpOoboY/my4Vm7Oo71oVZpQViFJhbDeKW/pTo5QpGxI\n" +
69
                "5tKIYAzXtnaWSvAQt71jlOOq4PPTP9bM7BfBoOTnp3+t5KQCen7KWkHZ5n83HyBIySYJiHa1cprq\n" +
70
                "Os8aBh+Mv178rPadGH1J7G3f46XZZq7hvOPFmY9BG91jy0eLBzTATvDPln6D+/jRpSny5qXtz4wp\n" +
71
                "HRlf1AKdrW2TonBIh7koqDoslHmQGhpxh7mDkVZ6e7ZAlevOLRvkw2Pi6g9HuoCF1dpwoWl0gfyz\n" +
72
                "Uk3pwgjJiu/Nhw/vV7K9s8L23dqPjk5Jx9AbkipHVzrIWvi64z7kk6wNWgtPh+PQQG7pWzALGVsH\n" +
73
                "BsTc6O0tsO8otDCu+cbqB252Ly5rz6Wga8hmgjMGK7qoc17mB2Et3AdnzKHiNrghwyhK9xwbgfsi\n" +
74
                "86R0dXu/19cowcSru92Xzzv6hb59vLvf/fF1v2/AcEk+GdMhGT5AuMx4eZBCvr6AOIVndXkDUhGv\n" +
75
                "ebDA9+GRm2S4Ne10YgWVaBvLeibVshDqcvbVqdvz098WbuUWmV/b2WpRPwX9pe3s4tj1XT1dBc6g\n" +
76
                "NOM+NBcnmOjBrdqdjNxWCeWHXwkKX9xuN0LtVGmwKJqwBgxWSVz+Aw5ePDg=";
77
        byte[] byteArray = Base64.decodeBase64(s.getBytes());
78
        Inflater decompresser = new Inflater();
79
        decompresser.setInput(byteArray);
80
        ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
81
        byte[] buffer = new byte[8192];
82
        while (!decompresser.finished()) {
83
            int size = decompresser.inflate(buffer);
84
            bos.write(buffer, 0, size);
85
        }
86
        byte[] unzippeddata = bos.toByteArray();
87
        decompresser.end();
88

    
89
        System.out.println(new String(unzippeddata));
90

    
91
    }
92

    
93

    
94
    private void doTestSingleDOIBoostActionToXML(final String filePath) throws Exception {
95
        final List<Row> rows = Lists.newArrayList();
96
        final InputStream is = this.getClass().getResourceAsStream(filePath);
97
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
98

    
99
        String line = in.readLine();
100

    
101
        final JsonParser parser = new JsonParser();
102
        JsonObject root = parser.parse(line).getAsJsonObject();
103
        List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false,reporter);
104

    
105
        if (actions!= null) {
106
            actions.forEach(action-> {
107
                if (action.getTargetColumn().equals("body") && action.getTargetColumnFamily().equals("result"))
108
                {
109
                    Column<String, byte[]> col = new Column<>("body" , action.getTargetValue());
110
                    rows.add(new Row("result",action.getTargetRowKey() , Lists.newArrayList(col)));
111
                }
112

    
113
            });
114

    
115

    
116

    
117
        }
118

    
119

    
120

    
121
    }
122

    
123
    private void doTestSingleDOIBoostAction(final String filePath) throws IOException {
124
        final InputStream is = this.getClass().getResourceAsStream(filePath);
125
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
126

    
127
        String line = in.readLine();
128

    
129
        final JsonParser parser = new JsonParser();
130
        JsonObject root = parser.parse(line).getAsJsonObject();
131
        List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false, reporter);
132
        if (actions!= null) {
133

    
134
            actions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s   TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
135
        }
136
    }
137

    
138
    private void doTestAllDOIBoostAction(final String filePath) throws IOException {
139
        final InputStream is = this.getClass().getResourceAsStream(filePath);
140
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
141

    
142
        String line = in.readLine();
143
        int i = 0;
144
        int cnt = 0;
145
        while(StringUtils.isNotBlank(line)) {
146
            cnt ++;
147

    
148
            final JsonParser parser = new JsonParser();
149
            JsonObject root = parser.parse(line).getAsJsonObject();
150
            try {
151
                List<AtomicAction> atomicActions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false, reporter);
152
                if (atomicActions!= null)
153
                {
154
                    i ++;
155
                }
156
//                    atomicActions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s   TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
157
                else{
158
                    System.out.println("SKIPPED Type "+ root.get("type").getAsString());
159
                }
160

    
161
            } catch (Throwable e) {
162
                System.out.println(line);
163
                throw new RuntimeException(e);
164
            }
165
            line= in.readLine();
166
        }
167

    
168
        System.out.println("total "+i+" / "+cnt);
169
    }
170

    
171

    
172

    
173
}
(2-2/3)