Project

General

Profile

« Previous | Next » 

Revision 46238

changed a way to create DLIObject to avoid java heap space

View differences:

modules/dnet-dli/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/index.hbase.test.xml
37 37
                        <PARAM name="cluster" ref="cluster"/>
38 38
                    </PARAMETERS>
39 39
                    <ARCS>
40
                        <ARC to="prepareData"/>
41
                    </ARCS>
42
                </NODE>
43

  
44

  
45
                <NODE name="prepareData" type="SubmitHadoopJob">
46
                    <DESCRIPTION>index data on ES</DESCRIPTION>
47
                    <PARAMETERS>
48
                        <PARAM name="hadoopJob" value="prepareDLIDataJob"/>
49
                        <PARAM name="cluster" ref="cluster"/>
50
                        <PARAM name="jobParams">
51
                            <MAP>
52
                                <ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
53
                                <ENTRY key="mapred.output.dir" ref="workDirObject"/>
54
                                <ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
55
                                <ENTRY key="ontologies" env="ontologies"/>
56
                            </MAP>
57
                        </PARAM>
58
                    </PARAMETERS>
59
                    <ARCS>
60 40
                        <ARC to="deleteIndex"/>
61 41
                    </ARCS>
62 42
                </NODE>
......
66 46
                    <PARAMETERS>
67 47
                        <PARAM name="index" value="dli"/>
68 48
                        <PARAM name="type" value="object"/>
69
                        <PARAM name="host" value="node0-d-dli.d4science.org"/>
49
                        <PARAM name="host" value="ip-90-147-167-27.ct1.garrservices.it"/>
70 50
                    </PARAMETERS>
71 51
                    <ARCS>
72
                        <ARC to="indexDataObject"/>
52
                        <ARC to="prepareData"/>
73 53
                    </ARCS>
74 54
                </NODE>
75 55

  
76
                <NODE name="indexDataObject" type="SubmitHadoopJob">
77
                    <DESCRIPTION>index data on ES</DESCRIPTION>
78
                    <PARAMETERS>
79
                        <PARAM name="hadoopJob" value="elasticsearchFeedJob"/>
80
                        <PARAM name="cluster" ref="cluster"/>
81
                        <PARAM name="jobParams">
82
                            <MAP>
83
                                <ENTRY key="mapred.input.dir" ref="workDirObject"/>
84
                            </MAP>
85
                        </PARAM>
86
                    </PARAMETERS>
87
                    <ARCS>
88
                        <ARC to="indexDataScholix"/>
89
                    </ARCS>
90
                </NODE>
91 56

  
92
                <NODE name="indexDataScholix" type="SubmitHadoopJob">
57
                <NODE name="prepareData" type="SubmitHadoopJob">
93 58
                    <DESCRIPTION>index data on ES</DESCRIPTION>
94 59
                    <PARAMETERS>
95
                        <PARAM name="hadoopJob" value="elasticsearchFeedScholixJob"/>
60
                        <PARAM name="hadoopJob" value="prepareDLIDataJob"/>
96 61
                        <PARAM name="cluster" ref="cluster"/>
97 62
                        <PARAM name="jobParams">
98 63
                            <MAP>
99
                                <ENTRY key="mapred.input.dir" ref="workDirObject"/>
64
                                <ENTRY key="hbase.mapred.inputtable" ref="hbaseTable"/>
65
                                <ENTRY key="mapred.output.dir" ref="workDirObject"/>
66
                                <ENTRY key="hbase.mapreduce.inputtable" ref="hbaseTable"/>
67
                                <ENTRY key="ontologies" env="ontologies"/>
100 68
                            </MAP>
101 69
                        </PARAM>
102 70
                    </PARAMETERS>
......
105 73
                    </ARCS>
106 74
                </NODE>
107 75

  
76

  
77
                <!--<NODE name="indexDataObject" type="SubmitHadoopJob">-->
78
                <!--<DESCRIPTION>index data on ES</DESCRIPTION>-->
79
                <!--<PARAMETERS>-->
80
                <!--<PARAM name="hadoopJob" value="elasticsearchFeedJob"/>-->
81
                <!--<PARAM name="cluster" ref="cluster"/>-->
82
                <!--<PARAM name="jobParams">-->
83
                <!--<MAP>-->
84
                <!--<ENTRY key="mapred.input.dir" ref="workDirObject"/>-->
85
                <!--</MAP>-->
86
                <!--</PARAM>-->
87
                <!--</PARAMETERS>-->
88
                <!--<ARCS>-->
89
                <!--<ARC to="success"/>-->
90
                <!--</ARCS>-->
91
                <!--</NODE>-->
92

  
93
                <!--<NODE name="indexDataScholix" type="SubmitHadoopJob">-->
94
                <!--<DESCRIPTION>index data on ES</DESCRIPTION>-->
95
                <!--<PARAMETERS>-->
96
                <!--<PARAM name="hadoopJob" value="elasticsearchFeedScholixJob"/>-->
97
                <!--<PARAM name="cluster" ref="cluster"/>-->
98
                <!--<PARAM name="jobParams">-->
99
                <!--<MAP>-->
100
                <!--<ENTRY key="mapred.input.dir" ref="workDirObject"/>-->
101
                <!--</MAP>-->
102
                <!--</PARAM>-->
103
                <!--</PARAMETERS>-->
104
                <!--<ARCS>-->
105
                <!--<ARC to="success"/>-->
106
                <!--</ARCS>-->
107
                <!--</NODE>-->
108

  
108 109
            </WORKFLOW>
109 110
        </CONFIGURATION>
110 111
    </BODY>
modules/dnet-dli-domain/trunk/src/main/java/eu/dnetlib/dli/proto/DNGFDLIConverter.java
1
package eu.dnetlib.dli.proto;
2

  
3
import java.util.*;
4
import java.util.function.Function;
5
import java.util.function.Supplier;
6
import java.util.stream.Collectors;
7

  
8
import eu.dnetlib.data.proto.*;
9
import eu.dnetlib.data.proto.KindProtos.Kind;
10

  
11
import static eu.dnetlib.data.proto.DNGFProtos.*;
12
import static eu.dnetlib.data.proto.dli.DLIObjectProtos.*;
13

  
14

  
15
/**
16
 * Created by sandro on 2/13/17.
17
 */
18
//public class DNGFDLIConverter {
19
//
20
//    private DNGF mainDNGFEntity;
21
//
22
//    private List<DNGF> relations = new ArrayList<>();
23
//
24
//    public void setMainEntity(final DNGF mainEntity) {
25
//        this.mainDNGFEntity = mainEntity;
26
//    }
27
//
28
//    public void addRelation(final DNGF relation) {
29
//        this.relations.add(relation);
30
//    }
31
//
32
//    public boolean isValid() {
33
//	    return !relations.isEmpty() && mainDNGFEntity != null && mainDNGFEntity.getKind().equals(Kind.entity);
34
//    }
35
//
36
//    public DLIObject convert() {
37
//        if (relations.isEmpty() || mainDNGFEntity == null) {
38
//            throw new IllegalStateException("missing relations or mainEntity");
39
//        }
40
//        if (mainDNGFEntity.getKind().equals(KindProtos.Kind.relation)) {
41
//            throw new IllegalStateException("the main Entity must be an Entity and not a relation");
42
//        }
43
//
44
//        final DNGFEntity entity = mainDNGFEntity.getEntity();
45
//
46
//        final DLIObject.Builder builder = makeDLIObject(entity);
47
//
48
//
49
//        this.relations.forEach(r -> manageRelation(r.getRel(), builder));
50
//
51
//        //Setting Titles
52
//        return builder.build();
53
//    }
54
//
55
//    public DLIObjectSummary summaryOf(final DLIObject input) {
56
//        final DLIObjectSummary.Builder summary = DLIObjectSummary.newBuilder();
57
//        summary.setId(input.getId());
58
//        if (input.getMetadata().getTitleList() != null)
59
//            summary.addAllTitle(input.getMetadata().getTitleList());
60
//        if (input.getMetadata().getAuthorList() != null)
61
//            summary.addAllAuthor(input.getMetadata().getAuthorList());
62
//        if (input.getMetadata().getDateList() != null)
63
//            summary.addAllDate(input.getMetadata().getDateList());
64
//
65
//        summary.setAbstract(input.getMetadata().getAbstract());
66
//
67
//        if (input.getMetadata().getSubjectList() != null)
68
//            summary.addAllSubject(input.getMetadata().getSubjectList());
69
//
70
//        if (input.getMetadata().getPublisherList() != null)
71
//            summary.addAllPublisher(input.getMetadata().getPublisherList());
72
//
73
//        if (input.getLocalIdentifierList() != null)
74
//            summary.addAllLocalIdentifier(input.getLocalIdentifierList());
75
//
76
//        final Map<String, CollectedFromType> datasources = new HashMap<>();
77
//
78
//        final List<CollectedFromType> collectedFromList = input.getMetadata().getCollectedFromList();
79
//        if (collectedFromList != null) {
80
//            collectedFromList.forEach(c -> {
81
//                if (c.getDatasourceId() != null) {
82
//                    if (!datasources.containsKey(c.getDatasourceId()))
83
//                        datasources.put(c.getDatasourceId(), c);
84
//                    else
85
//                        datasources.put(c.getDatasourceId(), CollectedFromType.newBuilder(c).setCompletionStatus(CompletionStatus.incomplete).build());
86
//                }
87
//            });
88
//        }
89
//
90
//        if (input.getRelationsList() != null) {
91
//
92
//            Map<Typology, Integer> countRelations = new HashMap<>();
93
//            countRelations.put(Typology.publication, 0);
94
//            countRelations.put(Typology.dataset, 0);
95
//            countRelations.put(Typology.unknown, 0);
96
//
97
//            summary.setTypology(input.getMetadata().getType());
98
//            input.getRelationsList().forEach(rel -> {
99
//                Typology t = rel.getTarget().getMetadata().getType();
100
//                switch (t) {
101
//                    case publication:
102
//                        countRelations.put(Typology.publication, countRelations.get(Typology.publication) + 1);
103
//                        break;
104
//                    case dataset:
105
//                        countRelations.put(Typology.dataset, countRelations.get(Typology.dataset) + 1);
106
//                        break;
107
//                    case unknown:
108
//                        countRelations.put(Typology.unknown, countRelations.get(Typology.unknown) + 1);
109
//                        break;
110
//                }
111
//            });
112
//            summary.setRelatedPublications(countRelations.get(Typology.publication));
113
//            summary.setRelatedDatasets(countRelations.get(Typology.dataset));
114
//            summary.setRelatedUnknown(countRelations.get(Typology.unknown));
115
//
116
//
117
//            final List<ResolvedFromType> resolvedFromTypeList = input.getMetadata().getResolvedFromList();
118
//            resolvedFromTypeList.forEach(r -> {
119
//                if (r.getDatasourceId() != null) {
120
//                    datasources.put(r.getDatasourceId(),
121
//                            CollectedFromType.newBuilder()
122
//                                    .setDatasourceId(r.getDatasourceId())
123
//                                    .setDatasourceName(r.getDatasourceName())
124
//                                    .setCompletionStatus(CompletionStatus.complete)
125
//                                    .build());
126
//                }
127
//            });
128
//
129
//
130
//            summary.addAllDatasources(datasources.values());
131
//
132
//
133
//        }
134
//
135
//        return summary.build();
136
//
137
//    }
138
//
139
//    private DLIObject.Builder makeDLIObject(DNGFEntity entity) {
140
//        //Set main Identifier
141
//        final DLIObject.Builder builder = DLIObject.newBuilder();
142
//        final Set<String> authorList = new HashSet<>();
143
//        builder.setId(entity.getId());
144
//
145
//        //Set all typed Identifier
146
//        final List<FieldTypeProtos.StructuredProperty> identifiers = entity.getExtension(DliProtos.typedIdentifier);
147
//        if (identifiers == null || identifiers.isEmpty()) {
148
//            throw new IllegalStateException(String.format("missing typedIdentifiers on main Entity \n%s", entity));
149
//
150
//        }
151
//
152
//        identifiers.stream()
153
//                .collect(
154
//                        Collectors.groupingBy(s -> String.format("%s::%s", s.getValue().toLowerCase().trim(), s.getQualifier().getClassid().toLowerCase().trim())))
155
//                .values()
156
//                .forEach(
157
//                        v -> builder.addLocalIdentifier(
158
//                        TypedIdentifier.newBuilder()
159
//                                .setId(v.get(0).getValue())
160
//                                .setType(v.get(0).getQualifier().getClassid())
161
//                ));
162
//
163
//        Metadata.Builder dliMetadata = Metadata.newBuilder();
164
//
165
//        //ADD Resolved From
166
//        final List<FieldTypeProtos.KeyValue> resolvedFromList = entity.getExtension(DliProtos.resolvedfrom);
167
//        if (resolvedFromList != null) {
168
//            Map<String, FieldTypeProtos.KeyValue> setRFrom = new HashMap<>();
169
//            resolvedFromList.forEach(rfrom -> setRFrom.put(rfrom.getKey(), rfrom));
170
//            setRFrom.values().forEach(r -> dliMetadata.addResolvedFrom(
171
//                    ResolvedFromType.newBuilder()
172
//                            .setDatasourceId(r.getKey())
173
//                            .setDatasourceName(r.getValue())
174
//                            .build()
175
//            ));
176
//        }
177
//
178
//        final List<FieldTypeProtos.KeyValue> collectedFromList = entity.getCollectedfromList();
179
//        if (collectedFromList != null) {
180
//            Map<String, FieldTypeProtos.KeyValue> setCFrom = new HashMap<>();
181
//            collectedFromList.forEach(cfrom -> setCFrom.put(cfrom.getKey(), cfrom));
182
//            setCFrom.values().forEach(c -> {
183
//                final String completionStatus = c.getExtension(DliFieldTypeProtos.completionStatus);
184
//                final CollectedFromType.Builder cType = CollectedFromType.newBuilder();
185
//                cType.setDatasourceId(c.getKey());
186
//                cType.setDatasourceName(c.getValue());
187
//                cType.setCompletionStatus(CompletionStatus.valueOf(completionStatus));
188
//                dliMetadata.addCollectedFrom(cType);
189
//            });
190
//        }
191
//
192
//        final String completionStatus = entity.getExtension(DliProtos.completionStatus);
193
//        dliMetadata.setCompletionStatus(CompletionStatus.valueOf(completionStatus));
194
//
195
//        //populateAuthor(entity, authorList);
196
//        //Setting Metadata
197
//        switch (entity.getType()) {
198
//            case dataset:
199
//                dliMetadata.setType(Typology.dataset);
200
//                setMetadataFromDataset(entity.getDataset(), dliMetadata, authorList);
201
//                break;
202
//            case publication:
203
//                dliMetadata.setType(Typology.publication);
204
//                setMetadataFromPublication(entity.getPublication(), dliMetadata, authorList);
205
//                break;
206
//            case unknown:
207
//                dliMetadata.setType(Typology.unknown);
208
//                break;
209
//            default:
210
//                throw new IllegalStateException(String.format("wrong Type for mainEntity: \n%s", entity));
211
//        }
212
//        return builder.setMetadata(dliMetadata);
213
//    }
214
//
215
////    private void populateAuthor(final DNGFEntity entity, final Set<String> authorList) {
216
////        if (entity.getChildrenList() != null) {
217
////            entity.getChildrenList().forEach(dedupItem -> {
218
////                List<PersonProtos.Person> personList = null;
219
////                List<FieldTypeProtos.StringField> contributorList = null;
220
////                switch (dedupItem.getType()) {
221
////                    case publication:
222
////                        personList = dedupItem.getPublication().getAuthorList();
223
////                        contributorList = entity.getDataset().getMetadata().getContributorList();
224
////                        break;
225
////                    case dataset:
226
////                        personList = dedupItem.getDataset().getAuthorList();
227
////                        contributorList = entity.getDataset().getMetadata().getContributorList();
228
////                        break;
229
////                }
230
////                if (contributorList != null) {
231
////                    contributorList.forEach(c -> authorList.add(c.getValue()));
232
////                }
233
////                if (personList != null) {
234
////                    personList.forEach(person -> {
235
////                        authorList.add(person.getMetadata().getFullname().getValue());
236
////                    });
237
////                }
238
////            });
239
////        }
240
////    }
241
//
242
//    private void manageRelation(final DNGFRel relation, final DLIObject.Builder dliObj) {
243
//        final DLIObject.Builder targetObject = makeDLIObject(relation.getCachedTarget());
244
//
245
//        final DLIRelation.Builder relBuilder = DLIRelation.newBuilder()
246
//		        .setTarget(targetObject)
247
//		        .setRelationSemantic(SchemeValue.newBuilder().setScheme("datacite").setValue(relation.getRelType().getClassid()));
248
//
249
//        final List<FieldTypeProtos.KeyValue> collectedFromList = relation.getCollectedfromList();
250
//        if (collectedFromList != null) {
251
//            collectedFromList.forEach(c ->
252
//                    relBuilder.addCollectedFrom(
253
//                            CollectedFromType.newBuilder()
254
//                                    .setDatasourceName(c.getValue())
255
//                                    .setDatasourceId(c.getKey())
256
//		                            .setCompletionStatus(CompletionStatus.complete)
257
//                                    .build()
258
//                    )
259
//            );
260
//        }
261
//
262
//        dliObj.addRelations(relBuilder.build());
263
//    }
264
//
265
//
266
//
267
//    private void managePublicationDates(final PublicationProtos.Publication publication, final Metadata.Builder metadata) {
268
//        if (publication.getMetadata().getDateofacceptance() != null) {
269
//            metadata.addDate(publication.getMetadata().getDateofacceptance().getValue());
270
//        }
271
//
272
//        if (publication.getMetadata().getRelevantdateList() != null && !publication.getMetadata().getRelevantdateList().isEmpty()) {
273
//            publication.getMetadata().getRelevantdateList().forEach(
274
//                    date -> metadata.addDate(date.getValue())
275
//            );
276
//        }
277
//    }
278
//
279
//    private void manageDatasetDates(final DatasetProtos.Dataset dataset, final Metadata.Builder metadata) {
280
//        if (dataset.getMetadata().getDateofacceptance() != null) {
281
//            metadata.addDate(dataset.getMetadata().getDateofacceptance().getValue());
282
//        }
283
//
284
//        if (dataset.getMetadata().getRelevantdateList() != null && !dataset.getMetadata().getRelevantdateList().isEmpty()) {
285
//            dataset.getMetadata().getRelevantdateList().forEach(
286
//                    date -> metadata.addDate(date.getValue())
287
//            );
288
//        }
289
//
290
//    }
291
//
292
//    public DNGF getMainDNGFEntity() {
293
//        return mainDNGFEntity;
294
//    }
295
//
296
//    public void setMainDNGFEntity(DNGF mainDNGFEntity) {
297
//        this.mainDNGFEntity = mainDNGFEntity;
298
//    }
299
//
300
//    public List<DNGF> getRelations() {
301
//        return relations;
302
//    }
303
//
304
//    public void setRelations(List<DNGF> relations) {
305
//        this.relations = relations;
306
//    }
307
//}
modules/dnet-dli-domain/trunk/src/main/java/eu/dnetlib/dli/proto/DNGFDLISummaryConverter.java
1 1
package eu.dnetlib.dli.proto;
2 2

  
3
import com.googlecode.protobuf.format.JsonFormat;
3 4
import eu.dnetlib.data.proto.*;
4 5
import eu.dnetlib.data.proto.dli.DLIObjectProtos;
5 6

  
......
45 46

  
46 47
    }
47 48

  
49
    public String convertAsJson() {
50
        return JsonFormat.printToString(convert());
48 51

  
52

  
53
    }
54

  
55

  
49 56
    public DLIObjectSummary convert() {
50 57
        if (mainDNGFEntity == null) {
51 58
            return null;
......
149 156
                    contributorList.forEach(c -> authorList.add(c.getValue()));
150 157
                }
151 158
                if (personList != null) {
152
                    personList.forEach(person -> {
153
                        authorList.add(person.getMetadata().getFullname().getValue());
154
                    });
159
                    personList.forEach(person -> authorList.add(person.getMetadata().getFullname().getValue()));
155 160
                }
156 161
            });
157 162
        }

Also available in: Unified diff