Project

General

Profile

1
package eu.dnetlib.data.mapreduce.actions;
2

    
3
import com.google.common.collect.Lists;
4
import com.google.gson.JsonObject;
5
import com.google.gson.JsonParser;
6
import eu.dnetlib.actionmanager.actions.ActionFactory;
7
import eu.dnetlib.actionmanager.actions.AtomicAction;
8
import eu.dnetlib.actionmanager.common.Agent;
9
import eu.dnetlib.data.mapreduce.hbase.dataimport.DOIBoostToActions;
10
import eu.dnetlib.data.transform.Column;
11
import eu.dnetlib.data.transform.Row;
12
import org.apache.commons.codec.binary.Base64;
13
import org.apache.commons.lang3.StringUtils;
14
import org.junit.Before;
15
import org.junit.Test;
16

    
17
import java.io.*;
18
import java.util.List;
19
import java.util.zip.DataFormatException;
20
import java.util.zip.Inflater;
21

    
22
public class DOIBoostToActionsTest  {
23
    private String setName;
24
    private Agent agent;
25

    
26

    
27

    
28
    @Before
29
    public void setup() {
30
        setName = "DLI";
31
        agent= new Agent("agentId","agentName", Agent.AGENT_TYPE.service);
32
    }
33

    
34
    @Test
35
    public void testSingleDOIBoostAction() throws IOException {
36
        doTestSingleDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/DOIBoostAction.json");
37
    }
38

    
39

    
40
    @Test
41
    public void testDOIBoostActionToXML() throws Exception {
42
        doTestSingleDOIBoostActionToXML("/eu/dnetlib/data/mapreduce/actions/broken");
43
    }
44

    
45

    
46

    
47
    @Test
48
    public void testMultipleDOIBoostAction() throws IOException {
49
        doTestAllDOIBoostAction("/eu/dnetlib/data/mapreduce/actions/part-00070");
50
    }
51

    
52

    
53

    
54
    @Test
55
    public void testDecompression() throws DataFormatException {
56
        final String s ="eJxtVM1u20YQfpW5pHAAWXCCIECORazKQYAEcHrpcbUckgvs7jA7S8vsye/QU4H25fwk/WYpKUbQ\n" +
57
                "mygOZ775fuaOXawjeVeY+HHi3IU6F1Ya3QNTCcqZvGQNHRd3iAuFTHVkmpxWen76i1gn9sHF9dWe\n" +
58
                "S3J5sZJlbeFiYdctaO6ZO+7ozc0rkr414VwD5g5FVKmTxFqDp6lIN/tKV193H2/p7c3N+9db+h3l\n" +
59
                "yWECuqlk6qWgRcBn/MBRpoRehEdHXXHJWZ+OkwzFTSN++9Hlga21Z9Ut7bKUJLNS4i54F+3NgLXR\n" +
60
                "IHe2cA15tveV/ZglytCqQkLdA9sw3VCVgbFGoWMAhY4m0VADdlbxQa4ZbSS17/D/DEySN2RERzAS\n" +
61
                "8oAGGEdusN+TTHM81RwBebRtOtDjK6gFtDlWqwMFzhg72kOe0wHzQSdHCITCBn8skm2uaRKxG9oa\n" +
62
                "4pVFU5O/z5w9r4R5wAGZkB7/LQYqCsgqNIpOoboY/my4Vm7Oo71oVZpQViFJhbDeKW/pTo5QpGxI\n" +
63
                "5tKIYAzXtnaWSvAQt71jlOOq4PPTP9bM7BfBoOTnp3+t5KQCen7KWkHZ5n83HyBIySYJiHa1cprq\n" +
64
                "Os8aBh+Mv178rPadGH1J7G3f46XZZq7hvOPFmY9BG91jy0eLBzTATvDPln6D+/jRpSny5qXtz4wp\n" +
65
                "HRlf1AKdrW2TonBIh7koqDoslHmQGhpxh7mDkVZ6e7ZAlevOLRvkw2Pi6g9HuoCF1dpwoWl0gfyz\n" +
66
                "Uk3pwgjJiu/Nhw/vV7K9s8L23dqPjk5Jx9AbkipHVzrIWvi64z7kk6wNWgtPh+PQQG7pWzALGVsH\n" +
67
                "BsTc6O0tsO8otDCu+cbqB252Ly5rz6Wga8hmgjMGK7qoc17mB2Et3AdnzKHiNrghwyhK9xwbgfsi\n" +
68
                "86R0dXu/19cowcSru92Xzzv6hb59vLvf/fF1v2/AcEk+GdMhGT5AuMx4eZBCvr6AOIVndXkDUhGv\n" +
69
                "ebDA9+GRm2S4Ne10YgWVaBvLeibVshDqcvbVqdvz098WbuUWmV/b2WpRPwX9pe3s4tj1XT1dBc6g\n" +
70
                "NOM+NBcnmOjBrdqdjNxWCeWHXwkKX9xuN0LtVGmwKJqwBgxWSVz+Aw5ePDg=";
71
        byte[] byteArray = Base64.decodeBase64(s.getBytes());
72
        Inflater decompresser = new Inflater();
73
        decompresser.setInput(byteArray);
74
        ByteArrayOutputStream bos = new ByteArrayOutputStream(byteArray.length);
75
        byte[] buffer = new byte[8192];
76
        while (!decompresser.finished()) {
77
            int size = decompresser.inflate(buffer);
78
            bos.write(buffer, 0, size);
79
        }
80
        byte[] unzippeddata = bos.toByteArray();
81
        decompresser.end();
82

    
83
        System.out.println(new String(unzippeddata));
84

    
85
    }
86

    
87

    
88
    private void doTestSingleDOIBoostActionToXML(final String filePath) throws Exception {
89
        final List<Row> rows = Lists.newArrayList();
90
        final InputStream is = this.getClass().getResourceAsStream(filePath);
91
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
92

    
93
        String line = in.readLine();
94

    
95
        final JsonParser parser = new JsonParser();
96
        JsonObject root = parser.parse(line).getAsJsonObject();
97
        List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false);
98
        if (actions!= null) {
99
            actions.forEach(action-> {
100
                if (action.getTargetColumn().equals("body") && action.getTargetColumnFamily().equals("result"))
101
                {
102
                    Column<String, byte[]> col = new Column<>("body" , action.getTargetValue());
103
                    rows.add(new Row("result",action.getTargetRowKey() , Lists.newArrayList(col)));
104
                }
105

    
106
            });
107

    
108

    
109

    
110
        }
111

    
112

    
113

    
114
    }
115

    
116
    private void doTestSingleDOIBoostAction(final String filePath) throws IOException {
117
        final InputStream is = this.getClass().getResourceAsStream(filePath);
118
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
119

    
120
        String line = in.readLine();
121

    
122
        final JsonParser parser = new JsonParser();
123
        JsonObject root = parser.parse(line).getAsJsonObject();
124
        List<AtomicAction> actions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false);
125
        if (actions!= null) {
126

    
127
            actions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s   TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
128
        }
129
    }
130

    
131
    private void doTestAllDOIBoostAction(final String filePath) throws IOException {
132
        final InputStream is = this.getClass().getResourceAsStream(filePath);
133
        final BufferedReader in = new BufferedReader(new InputStreamReader(is));
134

    
135
        String line = in.readLine();
136
        int i = 0;
137
        int cnt = 0;
138
        while(StringUtils.isNotBlank(line)) {
139
            cnt ++;
140

    
141
            final JsonParser parser = new JsonParser();
142
            JsonObject root = parser.parse(line).getAsJsonObject();
143
            try {
144
                List<AtomicAction> atomicActions = DOIBoostToActions.generatePublicationActionsFromDump(root, new ActionFactory(), setName, agent, false, false);
145
                if (atomicActions!= null)
146
                {
147
                    i ++;
148
                }
149
//                    atomicActions.forEach(it -> System.out.println(String.format(" RowKey:%s TargetColumnFamily:%s   TargetColumn: %s", it.getTargetRowKey(), it.getTargetColumnFamily(), it.getTargetColumn())));
150
                else{
151
                    System.out.println("SKIPPED Type "+ root.get("type").getAsString());
152
                }
153

    
154
            } catch (Throwable e) {
155
                System.out.println(line);
156
                throw new RuntimeException(e);
157
            }
158
            line= in.readLine();
159
        }
160

    
161
        System.out.println("total "+i+" / "+cnt);
162
    }
163

    
164

    
165

    
166
}
(2-2/3)