Project

General

Profile

1 27955 claudio.at
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
2
3 42734 eri.katsar
import com.google.common.collect.Multimap;
4 47072 tsampikos.
5 27955 claudio.at
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
6
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
7
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
8 34194 eri.katsar
import eu.dnetlib.data.proto.FieldTypeProtos;
9 29712 eri.katsar
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
10
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
11
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
12 27955 claudio.at
import eu.dnetlib.data.proto.OafProtos.Oaf;
13
import eu.dnetlib.data.proto.OafProtos.OafEntity;
14
import eu.dnetlib.data.proto.OafProtos.OafRel;
15
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
16
import eu.dnetlib.data.proto.ProjectProtos.Project;
17
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
18
import eu.dnetlib.data.proto.ResultProtos.Result;
19
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
20 34084 eri.katsar
import org.apache.log4j.Logger;
21 27955 claudio.at
22 54431 tsampikos.
import java.text.DateFormat;
23
import java.text.ParseException;
24 34084 eri.katsar
import java.text.SimpleDateFormat;
25
import java.util.Date;
26
import java.util.List;
27
28 47072 tsampikos.
import org.w3c.dom.Element;
29
import org.w3c.dom.NodeList;
30
import org.xml.sax.InputSource;
31
import com.sun.org.apache.xerces.internal.parsers.DOMParser;
32
import org.w3c.dom.Document;
33
34 27955 claudio.at
/**
35
 * @author eri Simple serializer that parses input Oaf Protos and prepares them
36
 *         for sqoop
37
 */
38 28471 eri.katsar
public class Serializer {
39 55644 antonis.le
    private static Logger logger = Logger.getLogger(Serializer.class);
40 27955 claudio.at
41 56504 antonis.le
    private String DELIM;
42
    private String ENCLOSING;
43 29375 eri.katsar
44 56504 antonis.le
    public Serializer(String DELIM, String ENCLOSING) {
45
        this.DELIM = DELIM;
46
        this.ENCLOSING = ENCLOSING;
47
    }
48
49
    public String serialize(Oaf oaf) {
50
51 41790 eri.katsar
        switch (oaf.getKind()) {
52
            case entity:
53
                OafEntity valueEntity = oaf.getEntity();
54 27955 claudio.at
55 41790 eri.katsar
                switch (valueEntity.getType()) {
56
                    case datasource:
57 27955 claudio.at
58 56504 antonis.le
                        return buildDatasource(oaf);
59 27955 claudio.at
60 41790 eri.katsar
                    case organization:
61 27955 claudio.at
62 56504 antonis.le
                        return buildOrganization(oaf);
63 27955 claudio.at
64 41790 eri.katsar
                    case project:
65 36921 eri.katsar
66 56504 antonis.le
                        return buildProject(oaf);
67 41790 eri.katsar
                    case result:
68 36921 eri.katsar
69 56504 antonis.le
                        return buildResult(oaf);
70 41790 eri.katsar
                    default:
71
                        break;
72
                }
73
                break;
74
            case relation:
75 56504 antonis.le
                return buildRel(oaf.getRel());
76 56505 antonis.le
        }
77 27955 claudio.at
78 41790 eri.katsar
        return null;
79
    }
80 27955 claudio.at
81 56504 antonis.le
    public String serialize(OafRel oaf) {
82 27955 claudio.at
83 41790 eri.katsar
        switch (oaf.getRelType()) {
84
            case resultProject:
85 56504 antonis.le
                return getResultProject(oaf);
86 41790 eri.katsar
            default:
87 56504 antonis.le
                return buildRel(oaf);
88 41790 eri.katsar
        }
89
    }
90 27955 claudio.at
91 56504 antonis.le
    private String buildRel(OafRel Rel) {
92
        return cleanId(Rel.getTarget()) + DELIM;
93 41790 eri.katsar
    }
94 27955 claudio.at
95 56504 antonis.le
    public void extractRelations(Oaf oaf, Multimap<String, String> relations) {
96 42734 eri.katsar
        OafEntity valueEntity = oaf.getEntity();
97 56504 antonis.le
        getOriginalId(valueEntity, relations);
98 36689 eri.katsar
99 41790 eri.katsar
        switch (valueEntity.getType()) {
100
            case datasource:
101 56504 antonis.le
                getDatasourceLanguages(valueEntity, relations);
102 57089 antonis.le
                break;
103 41790 eri.katsar
            case result:
104 56504 antonis.le
                getResultTopics(valueEntity, relations);
105
                getResultLanguages(valueEntity, relations);
106
                getResultClassifications(valueEntity, relations);
107
                getResultDatasources(valueEntity, relations);
108
                getResultConcepts(valueEntity, relations);
109
                getResultDois(valueEntity, relations);
110
                getResultCitations(valueEntity, relations);
111 57089 antonis.le
                break;
112 42734 eri.katsar
113
            case project:
114 56504 antonis.le
                getProjectKeywords(valueEntity, relations);
115
                getProjectSubjects(valueEntity, relations);
116 57089 antonis.le
                break;
117 42734 eri.katsar
        }
118 29637 eri.katsar
119 42734 eri.katsar
    }
120
121 56504 antonis.le
    private void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations) {
122 42734 eri.katsar
123
        String relName = oafEntity.getType().toString().toLowerCase() + "Oid";
124
        for (String oid : oafEntity.getOriginalIdList()) {
125 56504 antonis.le
            relations.put(relName, cleanId(oid));
126 41790 eri.katsar
        }
127 29739 eri.katsar
128 41790 eri.katsar
    }
129 27955 claudio.at
130 56504 antonis.le
    private void getProjectKeywords(OafEntity oafEntity, Multimap<String, String> relations) {
131
        relations.put("projectKeyword", getStringField(oafEntity.getProject().getMetadata().getKeywords().getValue()));
132 29637 eri.katsar
133 42734 eri.katsar
    }
134 27955 claudio.at
135 56504 antonis.le
    private void getProjectSubjects(OafEntity oafEntity, Multimap<String, String> relations) {
136 42734 eri.katsar
        for (StructuredProperty subj : oafEntity.getProject().getMetadata().getSubjectsList()) {
137 56504 antonis.le
            relations.put("projectSubject", getStringField(subj.getValue()));
138 42734 eri.katsar
        }
139
    }
140
141 56504 antonis.le
    private String getResultProject(OafRel oaf) {
142 42734 eri.katsar
        StringBuilder buff = new StringBuilder();
143 56508 antonis.le
        buff.append(cleanId(oaf.getTarget())).append(DELIM);
144
        // is declared as int!!!
145 41790 eri.katsar
        long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
146 56508 antonis.le
147 41790 eri.katsar
        if (diff < 0) {
148
            diff = 0;
149
        }
150 27955 claudio.at
151 56504 antonis.le
        buff.append(getNumericField(String.valueOf(diff)));
152 42734 eri.katsar
        return buff.toString();
153 41790 eri.katsar
    }
154 27955 claudio.at
155
156 56504 antonis.le
    private void getDatasourceLanguages(OafEntity valueEntity, Multimap<String, String> rels) {
157 41790 eri.katsar
        Datasource d = valueEntity.getDatasource();
158
        Metadata metadata = d.getMetadata();
159 27955 claudio.at
160 41790 eri.katsar
        for (StringField lang : metadata.getOdlanguagesList()) {
161 56504 antonis.le
            rels.put("datasourceLanguage", getStringField(lang.getValue()));
162 41790 eri.katsar
        }
163
    }
164 27955 claudio.at
165 56504 antonis.le
    private void getResultLanguages(OafEntity valueEntity, Multimap<String, String> rels) {
166 42734 eri.katsar
167 41790 eri.katsar
        Result d = valueEntity.getResult();
168 42734 eri.katsar
        Result.Metadata metadata = d.getMetadata();
169
        if (metadata.getLanguage().getClassname() != null && !metadata.getLanguage().getClassname().isEmpty()) {
170 56504 antonis.le
            rels.put("resultLanguage", getStringField(metadata.getLanguage().getClassname()));
171 42734 eri.katsar
        }
172 27955 claudio.at
173 42734 eri.katsar
    }
174 27955 claudio.at
175 56504 antonis.le
    private void getResultDois(OafEntity valueEntity, Multimap<String, String> rels) {
176 27955 claudio.at
177 42734 eri.katsar
        for (StructuredProperty pid : valueEntity.getPidList()) {
178 56504 antonis.le
            rels.put("resultPid", getStringField(pid.getQualifier().getClassname()) + getStringField(pid.getValue()));
179 41790 eri.katsar
        }
180
    }
181 27955 claudio.at
182 56504 antonis.le
    private void getResultClassifications(OafEntity valueEntity, Multimap<String, String> rels) {
183 27955 claudio.at
184 41790 eri.katsar
        Result result = valueEntity.getResult();
185 29735 eri.katsar
186 41790 eri.katsar
        for (Instance instance : (result.getInstanceList())) {
187
            String classification = instance.getInstancetype().getClassname();
188 56504 antonis.le
189 41790 eri.katsar
            if (classification != null && !classification.isEmpty()) {
190 56504 antonis.le
                rels.put("resultClassification", getStringField(instance.getInstancetype().getClassname()));
191 41790 eri.katsar
            }
192
        }
193 42734 eri.katsar
    }
194 27955 claudio.at
195 56504 antonis.le
    private void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels) {
196 41790 eri.katsar
        Result result = valueEntity.getResult();
197 27955 claudio.at
198 42734 eri.katsar
        for (Result.Context context : result.getMetadata().getContextList()) {
199 56504 antonis.le
            rels.put("resultConcept", cleanId(context.getId()));
200 41790 eri.katsar
        }
201
    }
202 27955 claudio.at
203 56504 antonis.le
    private void getResultDatasources(OafEntity valueEntity, Multimap<String, String> rels) {
204 41790 eri.katsar
        Result result = valueEntity.getResult();
205 29382 eri.katsar
206 56505 antonis.le
    // hosted by
207 41790 eri.katsar
        for (Instance instance : (result.getInstanceList())) {
208
            String hostedBy = instance.getHostedby().getKey();
209 56504 antonis.le
210 41790 eri.katsar
            if (hostedBy != null && !hostedBy.isEmpty()) {
211 56504 antonis.le
                rels.put("resultDatasource", cleanId(hostedBy) + DELIM);
212 41790 eri.katsar
            }
213
        }
214 34084 eri.katsar
215 56505 antonis.le
    // collected from
216 41790 eri.katsar
        for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
217 56504 antonis.le
            String collectedFrom = collectedFromValue.getKey();
218 34084 eri.katsar
219 42734 eri.katsar
            if (collectedFrom != null && !collectedFrom.isEmpty()) {
220 56504 antonis.le
                rels.put("resultDatasource", cleanId(collectedFrom) + DELIM);
221 42734 eri.katsar
            }
222 41790 eri.katsar
        }
223
    }
224 29386 eri.katsar
225 56504 antonis.le
    private void getResultTopics(OafEntity valueEntity, Multimap<String, String> rels) {
226 42734 eri.katsar
        Result d = valueEntity.getResult();
227
        Result.Metadata metadata = d.getMetadata();
228
        List<StructuredProperty> Topics = metadata.getSubjectList();
229 29754 eri.katsar
230 42734 eri.katsar
        for (StructuredProperty topic : Topics) {
231 56504 antonis.le
            rels.put("resultTopic", getStringField(topic.getValue()));
232 41790 eri.katsar
        }
233
    }
234 29957 eri.katsar
235 27955 claudio.at
236 56504 antonis.le
    private void getResultCitations(OafEntity oafEntity, Multimap<String, String> rels) {
237 42734 eri.katsar
        for (FieldTypeProtos.ExtraInfo extraInfo : oafEntity.getExtraInfoList()) {
238
            if (extraInfo.getName().equals("result citations")) {
239 47072 tsampikos.
                DOMParser parser = new DOMParser();
240
                try {
241
                    parser.parse(new InputSource(new java.io.StringReader(extraInfo.getValue())));
242
                    Document doc = parser.getDocument();
243
                    doc.getDocumentElement().normalize();
244
245
                    NodeList citations = doc.getElementsByTagName("citation");
246
                    for (int temp = 0; temp < citations.getLength(); temp++) {
247
                        Element citation = (Element) citations.item(temp);
248
                        NodeList ids = citation.getElementsByTagName("id");
249
                        for(int temp1 = 0; temp1 < ids.getLength(); temp1++){
250
                            Element id = (Element) ids.item(temp1);
251
                            if(id.getAttribute("type").equals("openaire")){
252
                                //System.out.println(id.getAttribute("value"));
253
                                rels.put("resultCitation", id.getAttribute("value"));
254
                            }
255
                        }
256
                    }
257
                } catch (Exception e) {
258 56508 antonis.le
                    logger.error("Error getting result citations", e);
259 47072 tsampikos.
                }
260 41790 eri.katsar
            }
261
        }
262
    }
263 27955 claudio.at
264 56504 antonis.le
    private String buildDatasource(Oaf oaf) {
265 42734 eri.katsar
        Metadata metadata = oaf.getEntity().getDatasource().getMetadata();
266
        StringBuilder buff = new StringBuilder();
267 27955 claudio.at
268 41790 eri.katsar
        // name
269
        if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) {
270 56504 antonis.le
            buff.append(getStringField("Unknown Repository"));
271 41790 eri.katsar
        } else {
272 56504 antonis.le
            buff.append(getStringField(metadata.getOfficialname().getValue()));
273 41790 eri.katsar
        }
274 54431 tsampikos.
275 41790 eri.katsar
        // type
276 42734 eri.katsar
        if (metadata.hasDatasourcetype()) {
277 56504 antonis.le
            buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", "")));
278 41790 eri.katsar
        }
279 27955 claudio.at
280 41790 eri.katsar
        // compatibility,
281 56504 antonis.le
        buff.append(getStringField(metadata.getOpenairecompatibility().getClassname()));
282 27955 claudio.at
283 55644 antonis.le
        // latitude
284 56504 antonis.le
        buff.append(getLatLongField(metadata.getLatitude().getValue()));
285 55644 antonis.le
286
        // longtitude
287 56504 antonis.le
        buff.append(getLatLongField(metadata.getLongitude().getValue()));
288 55644 antonis.le
289 41790 eri.katsar
        // dateofvalidation,
290 56504 antonis.le
        buff.append(getStringDateField(metadata.getDateofvalidation().getValue()));
291 27955 claudio.at
292 41790 eri.katsar
        // yearofvalidation,
293 56504 antonis.le
        buff.append(getYearInt(metadata.getDateofvalidation().getValue()));
294 27955 claudio.at
295 54431 tsampikos.
        //harvested
296 56504 antonis.le
        buff.append(getStringField("false"));
297 27955 claudio.at
298 45523 tsampikos.
        //piwik_id
299
        String piwik_id = "";
300
        for (String oid : oaf.getEntity().getOriginalIdList()) {
301
            if (oid.contains("piwik")) {
302
                piwik_id = oid.split(":")[1];
303
                break;
304
            }
305
        }
306 56504 antonis.le
        buff.append(getStringField(cleanNumber(piwik_id)));
307 45523 tsampikos.
308 57521 antonis.le
        buff.append(getStringField(metadata.getWebsiteurl().getValue()));
309
310 42734 eri.katsar
        return buff.toString();
311 27955 claudio.at
312 41790 eri.katsar
    }
313 27955 claudio.at
314 56504 antonis.le
    private String buildOrganization(Oaf oaf) {
315 27955 claudio.at
316 42734 eri.katsar
        StringBuilder buff = new StringBuilder();
317
        Organization.Metadata metadata = oaf.getEntity().getOrganization().getMetadata();
318 27955 claudio.at
319 41790 eri.katsar
        // `name`,
320 56504 antonis.le
        buff.append(getStringField(metadata.getLegalname().getValue()));
321 42734 eri.katsar
322 41790 eri.katsar
        // `country`,
323 56504 antonis.le
        buff.append(getStringField(metadata.getCountry().getClassid()));
324 29323 eri.katsar
325 42734 eri.katsar
        return buff.toString();
326 41790 eri.katsar
    }
327 27955 claudio.at
328 56504 antonis.le
    private String buildResult(Oaf oaf) {
329 42734 eri.katsar
        StringBuilder buff = new StringBuilder();
330 27955 claudio.at
331 42734 eri.katsar
        Result.Metadata metadata = oaf.getEntity().getResult().getMetadata();
332 27955 claudio.at
333 57521 antonis.le
        // originalId
334
        buff.append(getId(oaf)).append(DELIM);
335
336 56508 antonis.le
        String titleString = "";
337 56483 antonis.le
338 56508 antonis.le
        if (metadata.getTitleList().size() > 0) {
339
            StructuredProperty title = metadata.getTitleList().get(0);
340 56483 antonis.le
341 56508 antonis.le
            titleString = title.getValue().replaceAll("\\s+", " ");
342
            titleString = titleString.replaceAll("\n", " ");
343 56483 antonis.le
        }
344
345
        //  pubtitle
346 56504 antonis.le
        buff.append(getStringField(titleString));
347 56483 antonis.le
348 42734 eri.katsar
        //  publisher
349 56504 antonis.le
        buff.append(getStringField(metadata.getPublisher().getValue()));
350 42734 eri.katsar
351
        //  journal
352 56504 antonis.le
        buff.append(getStringField(metadata.getJournal().getName()));  //#null#!
353 42734 eri.katsar
354 41790 eri.katsar
        // year
355 56504 antonis.le
        buff.append(getYearInt(metadata.getDateofacceptance().getValue()));
356 27955 claudio.at
357 54431 tsampikos.
        // date
358 56504 antonis.le
        buff.append(getStringDateField(metadata.getDateofacceptance().getValue()));
359 29211 eri.katsar
360 41790 eri.katsar
        // bestlicense
361 56504 antonis.le
        buff.append(getStringField(getBestLicense(oaf.getEntity().getResult())));
362 29735 eri.katsar
363 41790 eri.katsar
        // type
364 56504 antonis.le
        buff.append(getStringField(metadata.getResulttype().getClassname()));
365 42734 eri.katsar
366 41790 eri.katsar
        // embargo_end_date
367 56504 antonis.le
        buff.append(getStringDateField(metadata.getEmbargoenddate().getValue()));
368 29637 eri.katsar
369 41790 eri.katsar
        // `authors`,
370 54431 tsampikos.
        int authors = metadata.getAuthorCount();
371 41790 eri.katsar
        String delayed = "no";
372 27955 claudio.at
373 42734 eri.katsar
        for (OafRel rel : oaf.getEntity().getCachedRelList()) {
374 48302 tsampikos.
            if (rel.getRelType().equals(RelType.resultProject))
375 56505 antonis.le
            // remember : in result Project, first id is project, second is result.
376 41790 eri.katsar
            {
377 42734 eri.katsar
                String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(),
378 56504 antonis.le
                        rel.getResultProject().getOutcome().getRelMetadata().getStartdate());
379 41790 eri.katsar
                if (Integer.parseInt(daysfromend) > 0) {
380
                    delayed = "yes";
381
                }
382
            }
383
        }
384 42734 eri.katsar
385 41790 eri.katsar
        // `delayed`,
386 56504 antonis.le
        buff.append(getStringField(delayed));
387 42734 eri.katsar
        //authors
388 56504 antonis.le
        buff.append(getNumericField(String.valueOf(authors)));
389 29336 eri.katsar
390 57521 antonis.le
        String authorNames = "";
391
        for (FieldTypeProtos.Author author:metadata.getAuthorList()) {
392
            authorNames += author.getFullname() + ";";
393
        }
394
395
        buff.append(getStringField(authorNames));
396
397 56508 antonis.le
        String sources = "";
398 56483 antonis.le
399
        for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) {
400
            List<String> urls = instance.getUrlList();
401
            for (String url : urls) {
402 56504 antonis.le
                sources += cleanUrl(url) + " ;";
403 56483 antonis.le
            }
404
        }
405
406
        //sources
407
        sources = ENCLOSING + sources + ENCLOSING + DELIM;
408
409
        buff.append(sources);
410
411 57089 antonis.le
        boolean hasAbstract = false;
412
        for (StringField desc:metadata.getDescriptionList()) {
413
            if (desc != null && desc.getValue() != null && !desc.getValue().trim().isEmpty())
414
                hasAbstract = true;
415
        }
416
417
        buff.append(getStringField(Boolean.toString(hasAbstract)));
418
419 42734 eri.katsar
        return buff.toString();
420
421 41790 eri.katsar
    }
422 31183 eri.katsar
423 56504 antonis.le
    private String getBestLicense(Result result) {
424 53034 tsampikos.
        Qualifier bestLicense = null;
425
        LicenseComparator lc = new LicenseComparator();
426
        for (Instance instance : (result.getInstanceList())) {
427 50242 tsampikos.
            if (lc.compare(bestLicense, instance.getAccessright()) > 0) {
428
                bestLicense = instance.getAccessright();
429 41790 eri.katsar
            }
430
        }
431
        if (bestLicense != null) {
432
            return bestLicense.getClassname();
433
        } else {
434 55644 antonis.le
            return "";
435 41790 eri.katsar
        }
436
    }
437 27955 claudio.at
438 56504 antonis.le
    private String buildProject(Oaf oaf) {
439 27955 claudio.at
440 56504 antonis.le
        FundingParser fundingParser = new FundingParser(DELIM, ENCLOSING);
441 42734 eri.katsar
        StringBuilder buff = new StringBuilder();
442
        Project.Metadata metadata = oaf.getEntity().getProject().getMetadata();
443 43392 tsampikos.
444 41790 eri.katsar
        // `acronym`,
445
        String acronym = metadata.getAcronym().getValue();
446
        if (acronym.equalsIgnoreCase("UNKNOWN")) {
447
            acronym = metadata.getTitle().getValue();
448
        }
449 56504 antonis.le
        buff.append(getStringField(acronym));
450 31183 eri.katsar
451 54431 tsampikos.
        //title
452 56504 antonis.le
        buff.append(getStringField(metadata.getTitle().getValue()));
453 41790 eri.katsar
454 54431 tsampikos.
        //funding_lvl
455 41790 eri.katsar
        List<StringField> fundList = metadata.getFundingtreeList();
456
        if (!fundList.isEmpty()) // `funding_lvl0`,
457
        {
458 56505 antonis.le
            //funder + 3 funding levels
459 56504 antonis.le
            buff.append(fundingParser.getFundingInfo(fundList.get(0).getValue()));
460 41790 eri.katsar
        } else {
461 56504 antonis.le
            buff.append(fundingParser.getFundingInfo(""));
462 41790 eri.katsar
        }
463 36995 eri.katsar
464 54431 tsampikos.
        //sc39
465 56508 antonis.le
        String sc39 = metadata.getEcsc39().getValue();
466 41790 eri.katsar
        if (sc39.equalsIgnoreCase("true") || sc39.equalsIgnoreCase("t") || sc39.contains("yes")) {
467
            sc39 = "yes";
468
        } else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) {
469
            sc39 = "no";
470
        }
471 56504 antonis.le
        buff.append(getStringField(sc39));
472 27955 claudio.at
473 45523 tsampikos.
        //project_type
474 56504 antonis.le
        buff.append(getStringField(metadata.getContracttype().getClassid()));
475 45523 tsampikos.
476 41790 eri.katsar
        // start_year
477 56504 antonis.le
        buff.append(getYearInt(metadata.getStartdate().getValue()));
478 27955 claudio.at
479 41790 eri.katsar
        // end_year
480 56504 antonis.le
        buff.append(getYearInt(metadata.getEnddate().getValue()));
481 27955 claudio.at
482 41790 eri.katsar
        // duration enddate-startdate
483 56504 antonis.le
        buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue()));
484 27955 claudio.at
485 41790 eri.katsar
        // haspubs
486 56504 antonis.le
        buff.append(getStringField("no"));
487 27955 claudio.at
488 41790 eri.katsar
        // numpubs
489 56504 antonis.le
        buff.append(getNumericField("0"));
490 42734 eri.katsar
491 41790 eri.katsar
        // enddate
492 56504 antonis.le
        buff.append(getStringDateField(metadata.getEnddate().getValue()));
493 42734 eri.katsar
494 41790 eri.katsar
        // startdate
495 56504 antonis.le
        buff.append(getStringDateField(metadata.getStartdate().getValue()));
496 27955 claudio.at
497 41790 eri.katsar
        // `daysforlastpub`,
498 56504 antonis.le
        buff.append(getNumericField(""));
499 42734 eri.katsar
500 41790 eri.katsar
        // `delayedpubs`,
501 56504 antonis.le
        buff.append(getNumericField(""));
502 42734 eri.katsar
503
        //call identifier
504 56504 antonis.le
        buff.append(getStringField(metadata.getCallidentifier().getValue()));
505 54431 tsampikos.
506 42734 eri.katsar
        //code
507 56504 antonis.le
        buff.append(getStringField(metadata.getCode().getValue()));
508 42734 eri.katsar
509
        return buff.toString();
510 41790 eri.katsar
    }
511 27955 claudio.at
512
513 56504 antonis.le
    private String getYearDifferenceInteger(String enddate, String startdate) {
514 34202 eri.katsar
515 41790 eri.katsar
        if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
516 29754 eri.katsar
517 41790 eri.katsar
            String[] split = startdate.split("-");
518 27955 claudio.at
519 56508 antonis.le
            if (split.length == 0) {
520 42734 eri.katsar
                return ENCLOSING + "0" + ENCLOSING + DELIM;
521 41790 eri.katsar
            }
522 27955 claudio.at
523 41790 eri.katsar
            int Startdate = Integer.parseInt(split[0]);
524 27955 claudio.at
525 41790 eri.katsar
            split = enddate.split("-");
526 27955 claudio.at
527 56508 antonis.le
            if (split.length == 0) {
528 42734 eri.katsar
                return ENCLOSING + "0" + ENCLOSING + DELIM;
529 41790 eri.katsar
            }
530 27955 claudio.at
531 41790 eri.katsar
            int Enddate = Integer.parseInt(split[0]);
532 29211 eri.katsar
533 41790 eri.katsar
            int diff = Enddate - Startdate;
534 29211 eri.katsar
535 42734 eri.katsar
            return ENCLOSING + diff + ENCLOSING + DELIM;
536 29384 eri.katsar
537 41790 eri.katsar
        }
538 31279 eri.katsar
539 42734 eri.katsar
        return ENCLOSING + "0" + ENCLOSING + DELIM;
540 41790 eri.katsar
    }
541 29211 eri.katsar
542 56504 antonis.le
    private String getYearInt(String data) {
543 41790 eri.katsar
        if (data == null || data.isEmpty() || data.equals("-1")) {
544 42734 eri.katsar
            return ENCLOSING + "0" + ENCLOSING + DELIM;
545 41790 eri.katsar
        }
546 27955 claudio.at
547 41790 eri.katsar
        String[] split = data.split("-");
548 29384 eri.katsar
549 56508 antonis.le
        if (split.length == 0) {
550 42734 eri.katsar
            return ENCLOSING + "0" + ENCLOSING + DELIM;
551 41790 eri.katsar
        }
552 29336 eri.katsar
553 41790 eri.katsar
        String year = split[0];
554 29336 eri.katsar
555 41790 eri.katsar
        year = cleanNumber(year);
556 27955 claudio.at
557 42734 eri.katsar
        if (year == null || year.isEmpty()) year = "0";
558 30977 eri.katsar
559 42734 eri.katsar
        return ENCLOSING + year + ENCLOSING + DELIM;
560 27955 claudio.at
561 41790 eri.katsar
    }
562 30043 eri.katsar
563 56504 antonis.le
    private String cleanNumber(String number) {
564 56508 antonis.le
        number = number.replaceAll("[^A-Za-z0-9:,_]", "");
565 41790 eri.katsar
        return number;
566
    }
567 30043 eri.katsar
568 56504 antonis.le
    private String getLatLongField(String data) {
569 43739 tsampikos.
570 56504 antonis.le
        if (data == null || data.isEmpty())
571
            return ENCLOSING + "null" + ENCLOSING + DELIM;
572 43739 tsampikos.
573
        return ENCLOSING + data.replaceAll("[^-0-9.]+", "")  + ENCLOSING + DELIM;
574
575
    }
576
577 56504 antonis.le
    private String getStringField(String data) {
578 30977 eri.katsar
579 56504 antonis.le
        if (data == null || data.isEmpty())
580
            return ENCLOSING + "null" + ENCLOSING + DELIM;
581 27955 claudio.at
582 56504 antonis.le
        return ENCLOSING + clean(data) + ENCLOSING + DELIM;
583 27955 claudio.at
584 41790 eri.katsar
    }
585 27955 claudio.at
586 56504 antonis.le
    private String getStringDateField(String data) {
587 56508 antonis.le
        if (data == null || data.isEmpty() || data.equals("-1")) {
588 42734 eri.katsar
            return ENCLOSING + "0" + ENCLOSING + DELIM;
589 41790 eri.katsar
        } else {
590 42734 eri.katsar
            data = data.replace(DELIM, " ");
591
            data = data.replace(ENCLOSING, " ");
592 53034 tsampikos.
            data = data.replaceAll("\\r\\n|\\r|\\n", "");
593 54431 tsampikos.
            try {
594
                DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
595
                data = format.format(format.parse(data));
596
                return ENCLOSING + data + ENCLOSING + DELIM;
597
            } catch (ParseException e) {
598
                return ENCLOSING + "0" + ENCLOSING + DELIM;
599
            }
600 41790 eri.katsar
        }
601
    }
602 27955 claudio.at
603 56504 antonis.le
    private String getNumericField(String data) {
604 56508 antonis.le
        if (data == null || data.isEmpty()) {
605 42734 eri.katsar
            return ENCLOSING + "0" + ENCLOSING + DELIM;
606 41790 eri.katsar
        } else {
607 42734 eri.katsar
            return ENCLOSING + data + ENCLOSING + DELIM;
608 41790 eri.katsar
        }
609
    }
610 29634 eri.katsar
611 56504 antonis.le
    public String getId(Oaf oaf) {
612 41790 eri.katsar
        switch (oaf.getKind()) {
613
            case entity:
614 56504 antonis.le
                return cleanId(oaf.getEntity().getId());
615 41790 eri.katsar
            case relation:
616 56504 antonis.le
                return cleanId(oaf.getRel().getSource());
617 36689 eri.katsar
618 41790 eri.katsar
        }
619
        return null;
620 27955 claudio.at
621 41790 eri.katsar
    }
622 27955 claudio.at
623 56504 antonis.le
    public String getId(OafRel relOaf) {
624
        return cleanId(relOaf.getSource());
625 41790 eri.katsar
    }
626 27955 claudio.at
627 56508 antonis.le
    private String clean(String value) {
628 41790 eri.katsar
        if (value != null) {
629 36689 eri.katsar
630 42734 eri.katsar
            value = value.replaceAll("[\"\\r\\\\;]", "");
631
            value = value.replace(DELIM, " ");
632
            value = value.replace(ENCLOSING, " ");
633 47371 tsampikos.
            value = value.replaceAll("\\r\\n|\\r|\\n", " ");
634 36689 eri.katsar
635 42734 eri.katsar
            return value;
636
        } else {
637
            return "";
638 41790 eri.katsar
        }
639 27955 claudio.at
640 41790 eri.katsar
    }
641 27955 claudio.at
642 56508 antonis.le
    private String cleanId(String value) {
643 41790 eri.katsar
        if (value != null) {
644 56505 antonis.le
            // DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( "5|datacite____::" to "datacite____::")
645
            // AND REPLACES OCCURRENCES OF DELIM CHARS IN DATA
646 41790 eri.katsar
            value = value.replaceFirst(".*\\|", "");
647 42734 eri.katsar
            value = value.replace("\n", "");
648
            value = value.replace(ENCLOSING, "");
649
            value = value.replace(DELIM, "");
650
            value = value.replace("\"", "");
651
            value = value.replace("«", " ");
652
            value = value.replace("»", " ");
653 41790 eri.katsar
        }
654 29336 eri.katsar
655 42734 eri.katsar
        return ENCLOSING + value + ENCLOSING;
656 41790 eri.katsar
    }
657 31900 eri.katsar
658 56508 antonis.le
    private String cleanUrl(String value) {
659 42734 eri.katsar
        value = value.replace(DELIM, " ");
660
        value = value.replace(ENCLOSING, " ");
661
        value = value.replace(" ", "");
662
        value = value.replace("\n", "");
663 57089 antonis.le
        value = value.replace("\r", "");
664
        value = value.replace("\\n", "");
665
        value = value.replace("\\r", "");
666 42734 eri.katsar
        return value;
667
    }
668
669 56508 antonis.le
    private long DATEDIFF(String startDate, String endDate) {
670
        long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000L;
671
        long days;
672 41790 eri.katsar
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
673
        // <startdate>2011-09-01</startdate>
674
        // <enddate>2015-08-31</enddate>
675 56508 antonis.le
        Date dateIni;
676
        Date dateFin;
677 37693 eri.katsar
678 41790 eri.katsar
        if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) {
679
            return 0;
680
        }
681
        try {
682 56508 antonis.le
            dateIni = format.parse(startDate);
683
            dateFin = format.parse(endDate);
684 41790 eri.katsar
            days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
685
        } catch (Exception e) {
686 42734 eri.katsar
687 41790 eri.katsar
            return 0;
688
        }
689 37693 eri.katsar
690 41790 eri.katsar
        return days;
691
    }
692 27955 claudio.at
}