Project

General

Profile

1
package eu.dnetlib.data.mapreduce.actions;
2

    
3
import java.io.*;
4
import java.util.List;
5
import java.util.zip.DataFormatException;
6
import java.util.zip.Inflater;
7

    
8
import com.google.common.collect.Lists;
9
import com.google.gson.JsonObject;
10
import com.google.gson.JsonParser;
11
import com.google.protobuf.InvalidProtocolBufferException;
12
import eu.dnetlib.actionmanager.actions.ActionFactory;
13
import eu.dnetlib.actionmanager.actions.AtomicAction;
14
import eu.dnetlib.actionmanager.common.Agent;
15
import eu.dnetlib.data.mapreduce.hbase.Reporter;
16
import eu.dnetlib.data.mapreduce.hbase.dataimport.DOIBoostToActions;
17
import eu.dnetlib.data.proto.OafProtos.Oaf;
18
import eu.dnetlib.data.transform.Column;
19
import eu.dnetlib.data.transform.Row;
20
import org.apache.commons.codec.binary.Base64;
21
import org.apache.commons.lang3.StringUtils;
22
import org.junit.Before;
23
import org.junit.Test;
24

    
25
public class DOIBoostToActionsTest  {
26
    private String setName;
27
    private Agent agent;
28
    private Reporter reporter;
29

    
30

    
31
    @Before
32
    public void setup() {
33
        setName = "DLI";
34
        agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service);
35
        reporter = (Reporter) (counterGroup, counterName, delta) -> System.out.println(String.format("COUNTER: %s - %s : %d", counterGroup, counterName, delta));
36
    }
37

    
38
    @Test
39
    public void testSingleDOIBoostAction() throws IOException {
40
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction.json");
41
    }
42

    
43
    @Test
44
    public void testSingleDOIBoostActionFilter() throws IOException {
45
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction_filterOut.json");
46
    }
47

    
48
    @Test
49
    public void testMultipleDOIBoostActionDiscardMany() throws IOException {
50
        doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/doiboost_discard_many.json");
51
    }
52

    
53

    
54
    @Test
55
    public void testDOIBoostActionToXML() throws Exception {
56
        doTestSingleDOIBoostActionToXML("/eu/dnetlib/data/mapreduce/actions/broken");
57
    }
58

    
59

    
60

    
61
    @Test
62
    public void testMultipleDOIBoostAction() throws IOException {
63
        doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/part-00070");
64
    }
65

    
66

    
67

    
68
    @Test
69
    public void testDecompression() throws DataFormatException {
70
        final String s ="eJxtVM1u20YQfpW5pHAAWXCCIECORazKQYAEcHrpcbUckgvs7jA7S8vsye/QU4H25fwk/WYpKUbQ\n" +
71
                "mygOZ775fuaOXawjeVeY+HHi3IU6F1Ya3QNTCcqZvGQNHRd3iAuFTHVkmpxWen76i1gn9sHF9dWe\n" +
72
                "S3J5sZJlbeFiYdctaO6ZO+7ozc0rkr414VwD5g5FVKmTxFqDp6lIN/tKV193H2/p7c3N+9db+h3l\n" +
73
                "yWECuqlk6qWgRcBn/MBRpoRehEdHXXHJWZ+OkwzFTSN++9Hlga21Z9Ut7bKUJLNS4i54F+3NgLXR\n" +
74
                "IHe2cA15tveV/ZglytCqQkLdA9sw3VCVgbFGoWMAhY4m0VADdlbxQa4ZbSS17/D/DEySN2RERzAS\n" +
75
                "8oAGGEdusN+TTHM81RwBebRtOtDjK6gFtDlWqwMFzhg72kOe0wHzQSdHCITCBn8skm2uaRKxG9oa\n" +
76
                "4pVFU5O/z5w9r4R5wAGZkB7/LQYqCsgqNIpOoboY/my4Vm7Oo71oVZpQViFJhbDeKW/pTo5QpGxI\n" +
77
                "5tKIYAzXtnaWSvAQt71jlOOq4PPTP9bM7BfBoOTnp3+t5KQCen7KWkHZ5n83HyBIySYJiHa1cprq\n" +
78
                "Os8aBh+Mv178rPadGH1J7G3f46XZZq7hvOPFmY9BG91jy0eLBzTATvDPln6D+/jRpSny5qXtz4wp\n" +
79
                "HRlf1AKdrW2TonBIh7koqDoslHmQGhpxh7mDkVZ6e7ZAlevOLRvkw2Pi6g9HuoCF1dpwoWl0gfyz\n" +
80
                "Uk3pwgjJiu/Nhw/vV7K9s8L23dqPjk5Jx9AbkipHVzrIWvi64z7kk6wNWgtPh+PQQG7pWzALGVsH\n" +
81
                "BsTc6O0tsO8otDCu+cbqB252Ly5rz6Wga8hmgjMGK7qoc17mB2Et3AdnzKHiNrghwyhK9xwbgfsi\n" +
82
                "86R0dXu/19cowcSru92Xzzv6hb59vLvf/fF1v2/AcEk+GdMhGT5AuMx4eZBCvr6AOIVndXkDUhGv\n" +
83
                "ebDA9+GRm2S4Ne10YgWVaBvLeibVshDqcvbVqdvz098WbuUWmV/b2WpRPwX9pe3s4tj1XT1dBc6g\n" +
84
                "NOM+NBcnmOjBrdqdjNxWCeWHXwkKX9xuN0LtVGmwKJqwBgxWSVz+Aw5ePDg=";
85
        byte[] byteArray = Base64.decodeBase64(s.getBytes());
86
        Inflater decompresser = new Inflater();
87
        decompresser.setInput(byteArray);
88
        ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
89
        byte[] buffer = new byte[8192];
90
        while (!decompresser.finished()) {
91
            int size = decompresser.inflate(buffer);
92
            bos.write(buffer, 0, size);
93
        }
94
        byte[] unzippeddata = bos.toByteArray();
95
        decompresser.end();
96

    
97
        System.out.println(new String(unzippeddata));
98

    
99
    }
100

    
101

    
102
    private void doTestSingleDOIBoostActionToXML(final String filePath) throws Exception {
103
        final List<Row> rows = Lists.newArrayList();
104
        final InputStream is = this.getClass().getResourceAsStream(filePath);
105
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
106

    
107
        String line = in.readLine();
108

    
109
        final JsonParser parser = new JsonParser();
110
        JsonObject root = parser.parse(line).getAsJsonObject();
111
        List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false,reporter);
112

    
113
        if (actions!= null) {
114
            actions.forEach(action-> {
115
                if (action.getTargetColumn().equals("body") && action.getTargetColumnFamily().equals("result"))
116
                {
117
                    Column<String, byte[]> col = new Column<>("body" , action.getTargetValue());
118
                    rows.add(new Row("result",action.getTargetRowKey() , Lists.newArrayList(col)));
119
                }
120

    
121
            });
122

    
123

    
124

    
125
        }
126

    
127

    
128

    
129
    }
130

    
131
    private void doTestSingleDOIBoostAction(final String filePath) throws IOException {
132
        final InputStream is = this.getClass().getResourceAsStream(filePath);
133
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
134

    
135
        String line = in.readLine();
136

    
137
        final JsonParser parser = new JsonParser();
138
        JsonObject root = parser.parse(line).getAsJsonObject();
139
        List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false, reporter);
140
        if (actions!= null) {
141
            actions.forEach(it -> {
142
                try {
143
                    System.out.println(
144
                            String.format(" RowKey:%s TargetColumnFamily:%s   TargetColumn: %s\n value:\n%s", it.getTargetRowKey(), it.getTargetColumnFamily(),
145
                                    it.getTargetColumn(),
146
                                    Oaf.parseFrom(it.getTargetValue())));
147
                } catch (InvalidProtocolBufferException e) {
148
                    e.printStackTrace();
149
                }
150
            });
151
        }
152
    }
153

    
154
    private void doTestAllDOIBoostAction(final String filePath) throws IOException {
155
        final InputStream is = this.getClass().getResourceAsStream(filePath);
156
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
157

    
158
        String line = in.readLine();
159
        int i = 0;
160
        int cnt = 0;
161
        while(StringUtils.isNotBlank(line)) {
162
            cnt ++;
163

    
164
            final JsonParser parser = new JsonParser();
165
            JsonObject root = parser.parse(line).getAsJsonObject();
166
            try {
167
                List<AtomicAction> atomicActions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false, reporter);
168
                if (atomicActions!= null)
169
                {
170
                    i ++;
171
                }
172
//                    atomicActions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s   TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
173
                else{
174
                    System.out.println("SKIPPED Type "+ root.get("type").getAsString());
175
                }
176

    
177
            } catch (Throwable e) {
178
                System.out.println(line);
179
                throw new RuntimeException(e);
180
            }
181
            line= in.readLine();
182
        }
183

    
184
        System.out.println("total "+i+" / "+cnt);
185
    }
186

    
187

    
188

    
189
}
(2-2/4)