1 |
27955
|
claudio.at
|
package eu.dnetlib.data.mapreduce.hbase.statsExport.utils;
|
2 |
|
|
|
3 |
42734
|
eri.katsar
|
import com.google.common.collect.Multimap;
|
4 |
47072
|
tsampikos.
|
|
5 |
|
|
|
6 |
27955
|
claudio.at
|
import eu.dnetlib.data.mapreduce.util.LicenseComparator;
|
7 |
|
|
import eu.dnetlib.data.proto.DatasourceProtos.Datasource;
|
8 |
|
|
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata;
|
9 |
34194
|
eri.katsar
|
import eu.dnetlib.data.proto.FieldTypeProtos;
|
10 |
29712
|
eri.katsar
|
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
|
11 |
|
|
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
|
12 |
|
|
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty;
|
13 |
27955
|
claudio.at
|
import eu.dnetlib.data.proto.OafProtos.Oaf;
|
14 |
|
|
import eu.dnetlib.data.proto.OafProtos.OafEntity;
|
15 |
|
|
import eu.dnetlib.data.proto.OafProtos.OafRel;
|
16 |
|
|
import eu.dnetlib.data.proto.OrganizationProtos.Organization;
|
17 |
48302
|
tsampikos.
|
//import eu.dnetlib.data.proto.PersonProtos;
|
18 |
27955
|
claudio.at
|
import eu.dnetlib.data.proto.ProjectProtos.Project;
|
19 |
|
|
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
|
20 |
|
|
import eu.dnetlib.data.proto.ResultProtos.Result;
|
21 |
|
|
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
|
22 |
34084
|
eri.katsar
|
import org.apache.log4j.Logger;
|
23 |
42734
|
eri.katsar
|
import org.jsoup.Jsoup;
|
24 |
27955
|
claudio.at
|
|
25 |
54431
|
tsampikos.
|
import java.text.DateFormat;
|
26 |
|
|
import java.text.ParseException;
|
27 |
34084
|
eri.katsar
|
import java.text.SimpleDateFormat;
|
28 |
|
|
import java.util.ArrayList;
|
29 |
|
|
import java.util.Date;
|
30 |
|
|
import java.util.HashMap;
|
31 |
|
|
import java.util.List;
|
32 |
|
|
|
33 |
47072
|
tsampikos.
|
import org.w3c.dom.Element;
|
34 |
|
|
import org.w3c.dom.NodeList;
|
35 |
|
|
import org.xml.sax.InputSource;
|
36 |
|
|
import com.sun.org.apache.xerces.internal.parsers.DOMParser;
|
37 |
|
|
import org.w3c.dom.Document;
|
38 |
|
|
|
39 |
27955
|
claudio.at
|
/**
|
40 |
|
|
* @author eri Simple serializer that parses input Oaf Protos and prepares them
|
41 |
|
|
* for sqoop
|
42 |
|
|
*/
|
43 |
28471
|
eri.katsar
|
public class Serializer {
|
44 |
27955
|
claudio.at
|
|
45 |
42734
|
eri.katsar
|
public static String serialize(Oaf oaf, String DELIM, String ENCLOSING) {
|
46 |
29375
|
eri.katsar
|
|
47 |
41790
|
eri.katsar
|
switch (oaf.getKind()) {
|
48 |
|
|
case entity:
|
49 |
|
|
OafEntity valueEntity = oaf.getEntity();
|
50 |
27955
|
claudio.at
|
|
51 |
41790
|
eri.katsar
|
switch (valueEntity.getType()) {
|
52 |
|
|
case datasource:
|
53 |
27955
|
claudio.at
|
|
54 |
42734
|
eri.katsar
|
return buildDatasource(oaf, DELIM, ENCLOSING);
|
55 |
27955
|
claudio.at
|
|
56 |
41790
|
eri.katsar
|
case organization:
|
57 |
27955
|
claudio.at
|
|
58 |
42734
|
eri.katsar
|
return buildOrganization(oaf, DELIM, ENCLOSING);
|
59 |
27955
|
claudio.at
|
|
60 |
41790
|
eri.katsar
|
case project:
|
61 |
36921
|
eri.katsar
|
|
62 |
42734
|
eri.katsar
|
return buildProject(oaf, DELIM, ENCLOSING);
|
63 |
41790
|
eri.katsar
|
case result:
|
64 |
36921
|
eri.katsar
|
|
65 |
42734
|
eri.katsar
|
return buildResult(oaf, DELIM, ENCLOSING);
|
66 |
48302
|
tsampikos.
|
/*
|
67 |
42734
|
eri.katsar
|
case person:
|
68 |
|
|
return buildPerson(oaf, DELIM, ENCLOSING);
|
69 |
48302
|
tsampikos.
|
*/
|
70 |
41790
|
eri.katsar
|
default:
|
71 |
|
|
break;
|
72 |
|
|
}
|
73 |
|
|
break;
|
74 |
|
|
case relation:
|
75 |
34202
|
eri.katsar
|
|
76 |
42734
|
eri.katsar
|
return buildRel(oaf.getRel(), DELIM, ENCLOSING);
|
77 |
27955
|
claudio.at
|
|
78 |
41790
|
eri.katsar
|
}
|
79 |
|
|
return null;
|
80 |
|
|
}
|
81 |
27955
|
claudio.at
|
|
82 |
42734
|
eri.katsar
|
public static String serialize(OafRel oaf, String DELIM, String ENCLOSING) {
|
83 |
27955
|
claudio.at
|
|
84 |
41790
|
eri.katsar
|
switch (oaf.getRelType()) {
|
85 |
|
|
case resultProject:
|
86 |
42734
|
eri.katsar
|
return getResultProject(oaf, DELIM, ENCLOSING);
|
87 |
41790
|
eri.katsar
|
default:
|
88 |
42734
|
eri.katsar
|
return buildRel(oaf, DELIM, ENCLOSING);
|
89 |
41790
|
eri.katsar
|
}
|
90 |
|
|
}
|
91 |
27955
|
claudio.at
|
|
92 |
42734
|
eri.katsar
|
private static String buildRel(OafRel Rel, String DELIM, String ENCLOSING) {
|
93 |
|
|
|
94 |
|
|
return cleanId(Rel.getTarget(), DELIM, ENCLOSING) + DELIM;
|
95 |
|
|
|
96 |
41790
|
eri.katsar
|
}
|
97 |
27955
|
claudio.at
|
|
98 |
42734
|
eri.katsar
|
public static void extractRelations(Oaf oaf, String DELIM, String ENCLOSING, Multimap<String, String> relations) {
|
99 |
|
|
OafEntity valueEntity = oaf.getEntity();
|
100 |
|
|
getOriginalId(valueEntity, relations, DELIM, ENCLOSING);
|
101 |
36689
|
eri.katsar
|
|
102 |
41790
|
eri.katsar
|
switch (valueEntity.getType()) {
|
103 |
|
|
case datasource:
|
104 |
42734
|
eri.katsar
|
getDatasourceLanguages(valueEntity, relations, DELIM, ENCLOSING);
|
105 |
54431
|
tsampikos.
|
getDatasourceWebsite(valueEntity, relations, DELIM, ENCLOSING);
|
106 |
41790
|
eri.katsar
|
case result:
|
107 |
42734
|
eri.katsar
|
getResultTopics(valueEntity, relations, DELIM, ENCLOSING);
|
108 |
|
|
getResultLanguages(valueEntity, relations, DELIM, ENCLOSING);
|
109 |
|
|
getResultClassifications(valueEntity, relations, DELIM, ENCLOSING);
|
110 |
|
|
getResultDatasources(valueEntity, relations, DELIM, ENCLOSING);
|
111 |
|
|
getResultConcepts(valueEntity, relations, DELIM, ENCLOSING);
|
112 |
|
|
getResultDois(valueEntity, relations, DELIM, ENCLOSING);
|
113 |
|
|
getResultCitations(valueEntity, relations, DELIM, ENCLOSING);
|
114 |
|
|
getResultDescriptions(valueEntity, relations, DELIM, ENCLOSING);
|
115 |
54431
|
tsampikos.
|
getResultExtra(valueEntity, relations, DELIM, ENCLOSING);
|
116 |
42734
|
eri.katsar
|
|
117 |
|
|
case project:
|
118 |
|
|
getProjectKeywords(valueEntity, relations, DELIM, ENCLOSING);
|
119 |
|
|
getProjectSubjects(valueEntity, relations, DELIM, ENCLOSING);
|
120 |
|
|
|
121 |
41790
|
eri.katsar
|
default:
|
122 |
42734
|
eri.katsar
|
}
|
123 |
29637
|
eri.katsar
|
|
124 |
42734
|
eri.katsar
|
}
|
125 |
|
|
|
126 |
54431
|
tsampikos.
|
private static void getDatasourceWebsite(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
|
127 |
|
|
Datasource d = valueEntity.getDatasource();
|
128 |
|
|
Metadata metadata = d.getMetadata();
|
129 |
42734
|
eri.katsar
|
|
130 |
54431
|
tsampikos.
|
relations.put("datasourceWebsite", getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING));
|
131 |
|
|
}
|
132 |
|
|
|
133 |
|
|
private static void getResultExtra(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
|
134 |
|
|
Result result = valueEntity.getResult();
|
135 |
|
|
Result.Metadata metadata = result.getMetadata();
|
136 |
|
|
|
137 |
|
|
StringBuilder buff = new StringBuilder();
|
138 |
|
|
String titleString = "";
|
139 |
|
|
|
140 |
|
|
for (int i = 0; i < metadata.getTitleList().size(); i++) {
|
141 |
|
|
StructuredProperty title = metadata.getTitleList().get(i);
|
142 |
|
|
|
143 |
|
|
titleString = title.getValue().replaceAll("\\s+", " ");
|
144 |
|
|
titleString = titleString.replaceAll("\n", " ");
|
145 |
|
|
break;
|
146 |
|
|
}
|
147 |
|
|
|
148 |
|
|
// pubtitle
|
149 |
|
|
buff.append(getStringField(titleString, DELIM, ENCLOSING));
|
150 |
|
|
|
151 |
|
|
String sources = "";
|
152 |
|
|
for (Instance instance : (result.getInstanceList())) {
|
153 |
|
|
List<String> urls = instance.getUrlList();
|
154 |
|
|
for (String url : urls) {
|
155 |
|
|
sources += cleanUrl(url, DELIM, ENCLOSING) + " ;";
|
156 |
|
|
}
|
157 |
|
|
}
|
158 |
|
|
|
159 |
|
|
//sources
|
160 |
|
|
sources = ENCLOSING + sources + ENCLOSING + DELIM;
|
161 |
|
|
buff.append(sources);
|
162 |
|
|
|
163 |
|
|
relations.put("resultExtra", buff.toString());
|
164 |
|
|
}
|
165 |
|
|
|
166 |
42734
|
eri.katsar
|
private static void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
|
167 |
|
|
|
168 |
|
|
String relName = oafEntity.getType().toString().toLowerCase() + "Oid";
|
169 |
|
|
for (String oid : oafEntity.getOriginalIdList()) {
|
170 |
|
|
relations.put(relName, cleanId(oid, DELIM, ENCLOSING));
|
171 |
41790
|
eri.katsar
|
}
|
172 |
29739
|
eri.katsar
|
|
173 |
41790
|
eri.katsar
|
}
|
174 |
27955
|
claudio.at
|
|
175 |
42734
|
eri.katsar
|
private static void getProjectKeywords(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
|
176 |
|
|
relations.put("projectKeyword", getStringField(oafEntity.getProject().getMetadata().getKeywords().getValue(), DELIM, ENCLOSING));
|
177 |
29637
|
eri.katsar
|
|
178 |
42734
|
eri.katsar
|
}
|
179 |
27955
|
claudio.at
|
|
180 |
42734
|
eri.katsar
|
private static void getProjectSubjects(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
|
181 |
|
|
for (StructuredProperty subj : oafEntity.getProject().getMetadata().getSubjectsList()) {
|
182 |
|
|
relations.put("projectSubject", getStringField(subj.getValue(), DELIM, ENCLOSING));
|
183 |
|
|
}
|
184 |
|
|
}
|
185 |
|
|
|
186 |
|
|
private static String getResultProject(OafRel oaf, String DELIM, String ENCLOSING) {
|
187 |
|
|
StringBuilder buff = new StringBuilder();
|
188 |
|
|
buff.append(cleanId(oaf.getTarget(), DELIM, ENCLOSING) + DELIM);
|
189 |
41790
|
eri.katsar
|
// TODO is declared as int!!!
|
190 |
|
|
long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate());
|
191 |
|
|
if (diff < 0) {
|
192 |
|
|
diff = 0;
|
193 |
|
|
}
|
194 |
27955
|
claudio.at
|
|
195 |
42734
|
eri.katsar
|
buff.append(getNumericField(String.valueOf(diff), DELIM, ENCLOSING));
|
196 |
|
|
return buff.toString();
|
197 |
41790
|
eri.katsar
|
}
|
198 |
27955
|
claudio.at
|
|
199 |
|
|
|
200 |
42734
|
eri.katsar
|
private static void getDatasourceLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
201 |
41790
|
eri.katsar
|
Datasource d = valueEntity.getDatasource();
|
202 |
|
|
Metadata metadata = d.getMetadata();
|
203 |
27955
|
claudio.at
|
|
204 |
41790
|
eri.katsar
|
for (StringField lang : metadata.getOdlanguagesList()) {
|
205 |
42734
|
eri.katsar
|
rels.put("datasourceLanguage", getStringField(lang.getValue(), DELIM, ENCLOSING));
|
206 |
41790
|
eri.katsar
|
}
|
207 |
|
|
}
|
208 |
27955
|
claudio.at
|
|
209 |
42734
|
eri.katsar
|
private static void getResultLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
210 |
|
|
|
211 |
41790
|
eri.katsar
|
Result d = valueEntity.getResult();
|
212 |
42734
|
eri.katsar
|
Result.Metadata metadata = d.getMetadata();
|
213 |
|
|
if (metadata.getLanguage().getClassname() != null && !metadata.getLanguage().getClassname().isEmpty()) {
|
214 |
|
|
rels.put("resultLanguage", getStringField(metadata.getLanguage().getClassname(), DELIM, ENCLOSING));
|
215 |
|
|
}
|
216 |
27955
|
claudio.at
|
|
217 |
42734
|
eri.katsar
|
}
|
218 |
27955
|
claudio.at
|
|
219 |
42734
|
eri.katsar
|
private static void getResultDois(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
220 |
27955
|
claudio.at
|
|
221 |
42734
|
eri.katsar
|
for (StructuredProperty pid : valueEntity.getPidList()) {
|
222 |
|
|
|
223 |
|
|
rels.put("resultPid",
|
224 |
|
|
getStringField(pid.getQualifier().getClassname(), DELIM, ENCLOSING) + getStringField(pid.getValue(), DELIM, ENCLOSING));
|
225 |
41790
|
eri.katsar
|
}
|
226 |
|
|
}
|
227 |
27955
|
claudio.at
|
|
228 |
42734
|
eri.katsar
|
private static void getResultClassifications(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
229 |
27955
|
claudio.at
|
|
230 |
41790
|
eri.katsar
|
Result result = valueEntity.getResult();
|
231 |
29735
|
eri.katsar
|
|
232 |
41790
|
eri.katsar
|
for (Instance instance : (result.getInstanceList())) {
|
233 |
|
|
String classification = instance.getInstancetype().getClassname();
|
234 |
|
|
if (classification != null && !classification.isEmpty()) {
|
235 |
42734
|
eri.katsar
|
rels.put("resultClassification", getStringField(instance.getInstancetype().getClassname(), DELIM, ENCLOSING));
|
236 |
41790
|
eri.katsar
|
// TODO HERE KEEP ONLY ONE CLASSIFICATIONS PER RESULT
|
237 |
|
|
break;
|
238 |
|
|
}
|
239 |
|
|
}
|
240 |
42734
|
eri.katsar
|
}
|
241 |
27955
|
claudio.at
|
|
242 |
42734
|
eri.katsar
|
private static void getResultDescriptions(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
243 |
|
|
Result result = valueEntity.getResult();
|
244 |
|
|
//description
|
245 |
|
|
for (StringField s : result.getMetadata().getDescriptionList()) {
|
246 |
|
|
|
247 |
|
|
rels.put("resultDescription", getStringField(Jsoup.parse(s.getValue()).text(), DELIM, ENCLOSING));
|
248 |
|
|
}
|
249 |
41790
|
eri.katsar
|
}
|
250 |
27955
|
claudio.at
|
|
251 |
42734
|
eri.katsar
|
private static void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
252 |
27955
|
claudio.at
|
|
253 |
41790
|
eri.katsar
|
Result result = valueEntity.getResult();
|
254 |
27955
|
claudio.at
|
|
255 |
47072
|
tsampikos.
|
|
256 |
42734
|
eri.katsar
|
for (Result.Context context : result.getMetadata().getContextList()) {
|
257 |
27955
|
claudio.at
|
|
258 |
42734
|
eri.katsar
|
rels.put("resultConcept", cleanId(context.getId(), DELIM, ENCLOSING));
|
259 |
41790
|
eri.katsar
|
}
|
260 |
|
|
}
|
261 |
27955
|
claudio.at
|
|
262 |
42734
|
eri.katsar
|
private static void getResultDatasources(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
263 |
41790
|
eri.katsar
|
Result result = valueEntity.getResult();
|
264 |
29382
|
eri.katsar
|
|
265 |
34200
|
eri.katsar
|
//TODO hosted by
|
266 |
41790
|
eri.katsar
|
for (Instance instance : (result.getInstanceList())) {
|
267 |
|
|
String hostedBy = instance.getHostedby().getKey();
|
268 |
|
|
if (hostedBy != null && !hostedBy.isEmpty()) {
|
269 |
42734
|
eri.katsar
|
rels.put("resultDatasource", cleanId(hostedBy, DELIM, ENCLOSING) + DELIM);
|
270 |
41790
|
eri.katsar
|
}
|
271 |
|
|
}
|
272 |
34084
|
eri.katsar
|
|
273 |
34200
|
eri.katsar
|
//TODO collected froms
|
274 |
41790
|
eri.katsar
|
for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) {
|
275 |
34084
|
eri.katsar
|
|
276 |
41790
|
eri.katsar
|
String collectedFrom = collectedFromValue.getKey();
|
277 |
42734
|
eri.katsar
|
if (collectedFrom != null && !collectedFrom.isEmpty()) {
|
278 |
|
|
rels.put("resultDatasource", cleanId(collectedFrom, DELIM, ENCLOSING) + DELIM);
|
279 |
|
|
}
|
280 |
41790
|
eri.katsar
|
}
|
281 |
|
|
}
|
282 |
29386
|
eri.katsar
|
|
283 |
42734
|
eri.katsar
|
private static void getResultTopics(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
284 |
29380
|
eri.katsar
|
|
285 |
42734
|
eri.katsar
|
Result d = valueEntity.getResult();
|
286 |
|
|
Result.Metadata metadata = d.getMetadata();
|
287 |
27955
|
claudio.at
|
|
288 |
42734
|
eri.katsar
|
List<StructuredProperty> Topics = metadata.getSubjectList();
|
289 |
29754
|
eri.katsar
|
|
290 |
42734
|
eri.katsar
|
for (StructuredProperty topic : Topics) {
|
291 |
|
|
// TODO result topics
|
292 |
|
|
rels.put("resultTopic", getStringField(topic.getValue(), DELIM, ENCLOSING));
|
293 |
41790
|
eri.katsar
|
}
|
294 |
|
|
}
|
295 |
29957
|
eri.katsar
|
|
296 |
27955
|
claudio.at
|
|
297 |
42734
|
eri.katsar
|
private static void getResultCitations(OafEntity oafEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
298 |
|
|
for (FieldTypeProtos.ExtraInfo extraInfo : oafEntity.getExtraInfoList()) {
|
299 |
|
|
if (extraInfo.getName().equals("result citations")) {
|
300 |
47072
|
tsampikos.
|
DOMParser parser = new DOMParser();
|
301 |
|
|
try {
|
302 |
|
|
parser.parse(new InputSource(new java.io.StringReader(extraInfo.getValue())));
|
303 |
|
|
Document doc = parser.getDocument();
|
304 |
|
|
doc.getDocumentElement().normalize();
|
305 |
|
|
|
306 |
|
|
NodeList citations = doc.getElementsByTagName("citation");
|
307 |
|
|
for (int temp = 0; temp < citations.getLength(); temp++) {
|
308 |
|
|
Element citation = (Element) citations.item(temp);
|
309 |
|
|
NodeList ids = citation.getElementsByTagName("id");
|
310 |
|
|
for(int temp1 = 0; temp1 < ids.getLength(); temp1++){
|
311 |
|
|
Element id = (Element) ids.item(temp1);
|
312 |
|
|
if(id.getAttribute("type").equals("openaire")){
|
313 |
|
|
//System.out.println(id.getAttribute("value"));
|
314 |
|
|
rels.put("resultCitation", id.getAttribute("value"));
|
315 |
|
|
}
|
316 |
|
|
}
|
317 |
|
|
}
|
318 |
|
|
} catch (Exception e) {
|
319 |
|
|
|
320 |
|
|
}
|
321 |
|
|
|
322 |
|
|
/*
|
323 |
42734
|
eri.katsar
|
rels.put("resultCitation", getStringField(extraInfo.getTrust(), DELIM, ENCLOSING) +
|
324 |
|
|
getStringField(extraInfo.getProvenance(), DELIM, ENCLOSING) + getStringField(extraInfo.getValue(), DELIM, ENCLOSING));
|
325 |
47072
|
tsampikos.
|
*/
|
326 |
41790
|
eri.katsar
|
}
|
327 |
27955
|
claudio.at
|
|
328 |
41790
|
eri.katsar
|
}
|
329 |
|
|
}
|
330 |
27955
|
claudio.at
|
|
331 |
42734
|
eri.katsar
|
private static String buildDatasource(Oaf oaf, String DELIM, String ENCLOSING) {
|
332 |
|
|
Metadata metadata = oaf.getEntity().getDatasource().getMetadata();
|
333 |
|
|
StringBuilder buff = new StringBuilder();
|
334 |
27955
|
claudio.at
|
|
335 |
41790
|
eri.katsar
|
// name
|
336 |
|
|
if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) {
|
337 |
42734
|
eri.katsar
|
buff.append(getStringField("Unknown Repository", DELIM, ENCLOSING));
|
338 |
41790
|
eri.katsar
|
} else {
|
339 |
42734
|
eri.katsar
|
buff.append(getStringField(metadata.getOfficialname().getValue(), DELIM, ENCLOSING));
|
340 |
41790
|
eri.katsar
|
}
|
341 |
54431
|
tsampikos.
|
|
342 |
41790
|
eri.katsar
|
// type
|
343 |
42734
|
eri.katsar
|
if (metadata.hasDatasourcetype()) {
|
344 |
|
|
buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""), DELIM, ENCLOSING));
|
345 |
41790
|
eri.katsar
|
}
|
346 |
27955
|
claudio.at
|
|
347 |
41790
|
eri.katsar
|
// compatibility,
|
348 |
42734
|
eri.katsar
|
buff.append(getStringField(metadata.getOpenairecompatibility().getClassname(), DELIM, ENCLOSING));
|
349 |
27955
|
claudio.at
|
|
350 |
41790
|
eri.katsar
|
// dateofvalidation,
|
351 |
42734
|
eri.katsar
|
buff.append(getStringDateField(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
|
352 |
27955
|
claudio.at
|
|
353 |
41790
|
eri.katsar
|
// yearofvalidation,
|
354 |
42734
|
eri.katsar
|
buff.append(getYearInt(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
|
355 |
27955
|
claudio.at
|
|
356 |
54431
|
tsampikos.
|
//harvested
|
357 |
|
|
buff.append(getStringField("false", DELIM, ENCLOSING));
|
358 |
27955
|
claudio.at
|
|
359 |
45523
|
tsampikos.
|
//piwik_id
|
360 |
|
|
String piwik_id = "";
|
361 |
|
|
for (String oid : oaf.getEntity().getOriginalIdList()) {
|
362 |
|
|
if (oid.contains("piwik")) {
|
363 |
|
|
piwik_id = oid.split(":")[1];
|
364 |
|
|
break;
|
365 |
|
|
}
|
366 |
|
|
}
|
367 |
|
|
buff.append(getStringField(cleanNumber(piwik_id), DELIM, ENCLOSING));
|
368 |
|
|
|
369 |
42734
|
eri.katsar
|
return buff.toString();
|
370 |
27955
|
claudio.at
|
|
371 |
41790
|
eri.katsar
|
}
|
372 |
27955
|
claudio.at
|
|
373 |
42734
|
eri.katsar
|
private static String buildOrganization(Oaf oaf, String DELIM, String ENCLOSING) {
|
374 |
27955
|
claudio.at
|
|
375 |
42734
|
eri.katsar
|
StringBuilder buff = new StringBuilder();
|
376 |
|
|
Organization.Metadata metadata = oaf.getEntity().getOrganization().getMetadata();
|
377 |
27955
|
claudio.at
|
|
378 |
41790
|
eri.katsar
|
// `name`,
|
379 |
42734
|
eri.katsar
|
buff.append(getStringField(metadata.getLegalname().getValue(), DELIM, ENCLOSING));
|
380 |
|
|
|
381 |
41790
|
eri.katsar
|
// `country`,
|
382 |
53471
|
tsampikos.
|
buff.append(getStringField(metadata.getCountry().getClassid(), DELIM, ENCLOSING));
|
383 |
29323
|
eri.katsar
|
|
384 |
42734
|
eri.katsar
|
return buff.toString();
|
385 |
41790
|
eri.katsar
|
}
|
386 |
27955
|
claudio.at
|
|
387 |
42734
|
eri.katsar
|
private static String buildResult(Oaf oaf, String DELIM, String ENCLOSING) {
|
388 |
|
|
StringBuilder buff = new StringBuilder();
|
389 |
27955
|
claudio.at
|
|
390 |
42734
|
eri.katsar
|
Result.Metadata metadata = oaf.getEntity().getResult().getMetadata();
|
391 |
27955
|
claudio.at
|
|
392 |
42734
|
eri.katsar
|
// publisher
|
393 |
|
|
buff.append(getStringField(metadata.getPublisher().getValue(), DELIM, ENCLOSING));
|
394 |
|
|
|
395 |
|
|
// journal
|
396 |
|
|
buff.append(getStringField(metadata.getJournal().getName(), DELIM, ENCLOSING)); //#null#!
|
397 |
|
|
|
398 |
41790
|
eri.katsar
|
// year
|
399 |
42734
|
eri.katsar
|
buff.append(getYearInt(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
|
400 |
27955
|
claudio.at
|
|
401 |
54431
|
tsampikos.
|
// date
|
402 |
42734
|
eri.katsar
|
buff.append(getStringDateField(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
|
403 |
29211
|
eri.katsar
|
|
404 |
41790
|
eri.katsar
|
// bestlicense
|
405 |
42734
|
eri.katsar
|
buff.append(getStringField(getBestLicense(oaf.getEntity().getResult()), DELIM, ENCLOSING));
|
406 |
29735
|
eri.katsar
|
|
407 |
41790
|
eri.katsar
|
// type
|
408 |
42734
|
eri.katsar
|
buff.append(getStringField(metadata.getResulttype().getClassname(), DELIM, ENCLOSING));
|
409 |
|
|
|
410 |
41790
|
eri.katsar
|
// embargo_end_date
|
411 |
42734
|
eri.katsar
|
buff.append(getStringDateField(metadata.getEmbargoenddate().getValue(), DELIM, ENCLOSING));
|
412 |
29637
|
eri.katsar
|
|
413 |
41790
|
eri.katsar
|
// `authors`,
|
414 |
54431
|
tsampikos.
|
int authors = metadata.getAuthorCount();
|
415 |
42734
|
eri.katsar
|
|
416 |
54431
|
tsampikos.
|
|
417 |
41790
|
eri.katsar
|
String delayed = "no";
|
418 |
27955
|
claudio.at
|
|
419 |
42734
|
eri.katsar
|
for (OafRel rel : oaf.getEntity().getCachedRelList()) {
|
420 |
27955
|
claudio.at
|
|
421 |
48302
|
tsampikos.
|
/*
|
422 |
41790
|
eri.katsar
|
if (rel.getRelType().equals(RelType.personResult)) {
|
423 |
29634
|
eri.katsar
|
|
424 |
41790
|
eri.katsar
|
authors++;
|
425 |
48302
|
tsampikos.
|
} else
|
426 |
|
|
*/
|
427 |
|
|
if (rel.getRelType().equals(RelType.resultProject))
|
428 |
41790
|
eri.katsar
|
// TODO remember : in result Project, first id is project, second is
|
429 |
|
|
// result.
|
430 |
|
|
{
|
431 |
42734
|
eri.katsar
|
String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(),
|
432 |
|
|
rel.getResultProject().getOutcome().getRelMetadata().getStartdate(), DELIM, ENCLOSING);
|
433 |
41790
|
eri.katsar
|
if (Integer.parseInt(daysfromend) > 0) {
|
434 |
|
|
delayed = "yes";
|
435 |
|
|
}
|
436 |
|
|
}
|
437 |
|
|
}
|
438 |
42734
|
eri.katsar
|
|
439 |
41790
|
eri.katsar
|
// `delayed`,
|
440 |
42734
|
eri.katsar
|
buff.append(getStringField(delayed, DELIM, ENCLOSING));
|
441 |
|
|
//authors
|
442 |
|
|
buff.append(getNumericField(String.valueOf(authors), DELIM, ENCLOSING));
|
443 |
29336
|
eri.katsar
|
|
444 |
42734
|
eri.katsar
|
return buff.toString();
|
445 |
|
|
|
446 |
41790
|
eri.katsar
|
}
|
447 |
31183
|
eri.katsar
|
|
448 |
42734
|
eri.katsar
|
|
449 |
53034
|
tsampikos.
|
/*
|
450 |
42734
|
eri.katsar
|
private static String getBestLicense(Result result) {
|
451 |
41790
|
eri.katsar
|
Qualifier bestLicense = null;
|
452 |
|
|
LicenseComparator lc = new LicenseComparator();
|
453 |
|
|
for (Instance instance : (result.getInstanceList())) {
|
454 |
53034
|
tsampikos.
|
if (lc.compare(bestLicense, instance.getLicence()) > 0) {
|
455 |
|
|
bestLicense = instance.getLicence();
|
456 |
|
|
}
|
457 |
|
|
}
|
458 |
|
|
if (bestLicense != null) {
|
459 |
|
|
return bestLicense.getClassname();
|
460 |
|
|
} else {
|
461 |
|
|
return null;
|
462 |
|
|
}
|
463 |
|
|
}
|
464 |
|
|
|
465 |
|
|
// TODO here iterate over all values
|
466 |
|
|
private static String getAccessMode(Result result) {
|
467 |
|
|
String accessMode = " ";
|
468 |
|
|
for (Instance instance : (result.getInstanceList())) {
|
469 |
|
|
if (instance.getLicence().getClassname() != null && !instance.getLicence().getClassname().isEmpty()) {
|
470 |
|
|
accessMode = instance.getLicence().getClassname();
|
471 |
|
|
break;
|
472 |
|
|
}
|
473 |
|
|
|
474 |
|
|
}
|
475 |
|
|
|
476 |
|
|
return accessMode;
|
477 |
|
|
}
|
478 |
|
|
*/
|
479 |
|
|
|
480 |
|
|
private static String getBestLicense(Result result) {
|
481 |
|
|
Qualifier bestLicense = null;
|
482 |
|
|
LicenseComparator lc = new LicenseComparator();
|
483 |
|
|
for (Instance instance : (result.getInstanceList())) {
|
484 |
50242
|
tsampikos.
|
if (lc.compare(bestLicense, instance.getAccessright()) > 0) {
|
485 |
|
|
bestLicense = instance.getAccessright();
|
486 |
41790
|
eri.katsar
|
}
|
487 |
|
|
}
|
488 |
|
|
if (bestLicense != null) {
|
489 |
|
|
return bestLicense.getClassname();
|
490 |
|
|
} else {
|
491 |
|
|
return null;
|
492 |
|
|
}
|
493 |
|
|
}
|
494 |
27955
|
claudio.at
|
|
495 |
41790
|
eri.katsar
|
// TODO here iterate over all values
|
496 |
42734
|
eri.katsar
|
private static String getAccessMode(Result result) {
|
497 |
|
|
String accessMode = " ";
|
498 |
41790
|
eri.katsar
|
for (Instance instance : (result.getInstanceList())) {
|
499 |
50242
|
tsampikos.
|
if (instance.getAccessright().getClassname() != null && !instance.getAccessright().getClassname().isEmpty()) {
|
500 |
|
|
accessMode = instance.getAccessright().getClassname();
|
501 |
41790
|
eri.katsar
|
break;
|
502 |
|
|
}
|
503 |
27955
|
claudio.at
|
|
504 |
41790
|
eri.katsar
|
}
|
505 |
31279
|
eri.katsar
|
|
506 |
41790
|
eri.katsar
|
return accessMode;
|
507 |
|
|
}
|
508 |
27955
|
claudio.at
|
|
509 |
42734
|
eri.katsar
|
private static String buildProject(Oaf oaf, String DELIM, String ENCLOSING) {
|
510 |
27955
|
claudio.at
|
|
511 |
42734
|
eri.katsar
|
StringBuilder buff = new StringBuilder();
|
512 |
|
|
Project.Metadata metadata = oaf.getEntity().getProject().getMetadata();
|
513 |
43392
|
tsampikos.
|
|
514 |
41790
|
eri.katsar
|
// `acronym`,
|
515 |
|
|
String acronym = metadata.getAcronym().getValue();
|
516 |
|
|
if (acronym.equalsIgnoreCase("UNKNOWN")) {
|
517 |
|
|
acronym = metadata.getTitle().getValue();
|
518 |
|
|
}
|
519 |
42734
|
eri.katsar
|
buff.append(getStringField(acronym, DELIM, ENCLOSING));
|
520 |
31183
|
eri.katsar
|
|
521 |
54431
|
tsampikos.
|
//title
|
522 |
42734
|
eri.katsar
|
buff.append(getStringField(metadata.getTitle().getValue(), DELIM, ENCLOSING));
|
523 |
41790
|
eri.katsar
|
|
524 |
54431
|
tsampikos.
|
//funding_lvl
|
525 |
41790
|
eri.katsar
|
List<StringField> fundList = metadata.getFundingtreeList();
|
526 |
|
|
if (!fundList.isEmpty()) // `funding_lvl0`,
|
527 |
|
|
{
|
528 |
|
|
//TODO funder + 3 funding levels
|
529 |
|
|
/* funder text,
|
530 |
|
|
funding_lvl0 text,
|
531 |
37706
|
eri.katsar
|
funding_lvl1 text,
|
532 |
|
|
funding_lvl2 text,
|
533 |
|
|
funding_lvl3 text,*/
|
534 |
42734
|
eri.katsar
|
buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM, ENCLOSING));
|
535 |
41790
|
eri.katsar
|
} else {
|
536 |
42734
|
eri.katsar
|
buff.append(FundingParser.getFundingInfo("", DELIM, ENCLOSING));
|
537 |
41790
|
eri.katsar
|
}
|
538 |
36995
|
eri.katsar
|
|
539 |
54431
|
tsampikos.
|
//sc39
|
540 |
41790
|
eri.katsar
|
String sc39 = metadata.getEcsc39().getValue().toString();
|
541 |
|
|
if (sc39.equalsIgnoreCase("true") || sc39.equalsIgnoreCase("t") || sc39.contains("yes")) {
|
542 |
|
|
sc39 = "yes";
|
543 |
|
|
} else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) {
|
544 |
|
|
sc39 = "no";
|
545 |
|
|
}
|
546 |
42734
|
eri.katsar
|
buff.append(getStringField(sc39, DELIM, ENCLOSING));
|
547 |
27955
|
claudio.at
|
|
548 |
45523
|
tsampikos.
|
//project_type
|
549 |
|
|
buff.append(getStringField(metadata.getContracttype().getClassid(),DELIM, ENCLOSING));
|
550 |
|
|
|
551 |
41790
|
eri.katsar
|
// start_year
|
552 |
42734
|
eri.katsar
|
buff.append(getYearInt(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
|
553 |
27955
|
claudio.at
|
|
554 |
41790
|
eri.katsar
|
// end_year
|
555 |
42734
|
eri.katsar
|
buff.append(getYearInt(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
|
556 |
27955
|
claudio.at
|
|
557 |
41790
|
eri.katsar
|
// duration enddate-startdate
|
558 |
42734
|
eri.katsar
|
buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue(), DELIM, ENCLOSING));
|
559 |
27955
|
claudio.at
|
|
560 |
41790
|
eri.katsar
|
// haspubs
|
561 |
42734
|
eri.katsar
|
buff.append(getStringField("no", DELIM, ENCLOSING));
|
562 |
27955
|
claudio.at
|
|
563 |
41790
|
eri.katsar
|
// numpubs
|
564 |
42734
|
eri.katsar
|
buff.append(getNumericField("0", DELIM, ENCLOSING));
|
565 |
|
|
|
566 |
41790
|
eri.katsar
|
// enddate
|
567 |
54431
|
tsampikos.
|
buff.append(getStringDateField(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
|
568 |
42734
|
eri.katsar
|
|
569 |
41790
|
eri.katsar
|
// startdate
|
570 |
54431
|
tsampikos.
|
buff.append(getStringDateField(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
|
571 |
27955
|
claudio.at
|
|
572 |
41790
|
eri.katsar
|
// `daysforlastpub`,
|
573 |
42734
|
eri.katsar
|
buff.append(getNumericField("", DELIM, ENCLOSING));
|
574 |
|
|
|
575 |
41790
|
eri.katsar
|
// `delayedpubs`,
|
576 |
42734
|
eri.katsar
|
buff.append(getNumericField("", DELIM, ENCLOSING));
|
577 |
|
|
|
578 |
|
|
//call identifier
|
579 |
|
|
buff.append(getStringField(metadata.getCallidentifier().getValue(), DELIM, ENCLOSING));
|
580 |
54431
|
tsampikos.
|
|
581 |
42734
|
eri.katsar
|
//code
|
582 |
|
|
buff.append(getStringField(metadata.getCode().getValue(), DELIM, ENCLOSING));
|
583 |
|
|
|
584 |
|
|
return buff.toString();
|
585 |
27955
|
claudio.at
|
|
586 |
41790
|
eri.katsar
|
}
|
587 |
27955
|
claudio.at
|
|
588 |
48302
|
tsampikos.
|
/*
|
589 |
42734
|
eri.katsar
|
private static String buildPerson(Oaf oaf, String DELIM, String ENCLOSING) {
|
590 |
|
|
StringBuilder buff = new StringBuilder();
|
591 |
27955
|
claudio.at
|
|
592 |
42734
|
eri.katsar
|
PersonProtos.Person.Metadata metadata = oaf.getEntity().getPerson().getMetadata();
|
593 |
27955
|
claudio.at
|
|
594 |
41790
|
eri.katsar
|
// `person_id`,
|
595 |
43392
|
tsampikos.
|
//buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
|
596 |
|
|
|
597 |
|
|
|
598 |
41790
|
eri.katsar
|
// person_result
|
599 |
42734
|
eri.katsar
|
buff.append(cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING) + DELIM);
|
600 |
27955
|
claudio.at
|
|
601 |
42734
|
eri.katsar
|
|
602 |
|
|
//firstName
|
603 |
|
|
buff.append(getStringField(metadata.getFirstname().getValue(), DELIM, ENCLOSING));
|
604 |
|
|
|
605 |
|
|
//LastName
|
606 |
|
|
|
607 |
|
|
String secondName = new String();
|
608 |
|
|
for (StringField s : metadata.getSecondnamesList()) {
|
609 |
|
|
secondName += s.getValue().replace("\n", " ").replace(",", " ") + " ";
|
610 |
|
|
}
|
611 |
|
|
|
612 |
|
|
buff.append(getStringField(secondName, DELIM, ENCLOSING));
|
613 |
|
|
|
614 |
41790
|
eri.katsar
|
// `fullname`,
|
615 |
42734
|
eri.katsar
|
buff.append(getStringField(metadata.getFullname().getValue(), DELIM, ENCLOSING));
|
616 |
34202
|
eri.katsar
|
|
617 |
41790
|
eri.katsar
|
// `Nationality`,
|
618 |
42734
|
eri.katsar
|
buff.append(getStringField(metadata.getNationality().getClassid(), DELIM, ENCLOSING));
|
619 |
|
|
|
620 |
41790
|
eri.katsar
|
// `Email`,
|
621 |
42734
|
eri.katsar
|
buff.append(getStringField(metadata.getEmail().getValue(), DELIM, ENCLOSING));
|
622 |
|
|
|
623 |
41790
|
eri.katsar
|
// `Phone`,
|
624 |
42734
|
eri.katsar
|
buff.append(getStringField(metadata.getPhone().getValue(), DELIM, ENCLOSING));
|
625 |
|
|
|
626 |
|
|
//deletedByInference
|
627 |
|
|
|
628 |
|
|
buff.append(getStringField(String.valueOf(oaf.getDataInfo().getDeletedbyinference()), DELIM, ENCLOSING));
|
629 |
|
|
|
630 |
41790
|
eri.katsar
|
// `number`,
|
631 |
42734
|
eri.katsar
|
buff.append(getStringField("1", DELIM, ENCLOSING));
|
632 |
34202
|
eri.katsar
|
|
633 |
42734
|
eri.katsar
|
return buff.toString();
|
634 |
|
|
|
635 |
41790
|
eri.katsar
|
}
|
636 |
48302
|
tsampikos.
|
*/
|
637 |
34202
|
eri.katsar
|
|
638 |
|
|
|
639 |
42734
|
eri.katsar
|
private static String getYearDifferenceInteger(String enddate, String startdate, String DELIM, String ENCLOSING) {
|
640 |
34202
|
eri.katsar
|
|
641 |
41790
|
eri.katsar
|
if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) {
|
642 |
29754
|
eri.katsar
|
|
643 |
41790
|
eri.katsar
|
String[] split = startdate.split("-");
|
644 |
27955
|
claudio.at
|
|
645 |
41790
|
eri.katsar
|
if (split == null || split.length == 0) {
|
646 |
42734
|
eri.katsar
|
return ENCLOSING + "0" + ENCLOSING + DELIM;
|
647 |
41790
|
eri.katsar
|
}
|
648 |
27955
|
claudio.at
|
|
649 |
41790
|
eri.katsar
|
int Startdate = Integer.parseInt(split[0]);
|
650 |
27955
|
claudio.at
|
|
651 |
41790
|
eri.katsar
|
split = enddate.split("-");
|
652 |
27955
|
claudio.at
|
|
653 |
41790
|
eri.katsar
|
if (split == null || split.length == 0) {
|
654 |
42734
|
eri.katsar
|
return ENCLOSING + "0" + ENCLOSING + DELIM;
|
655 |
41790
|
eri.katsar
|
}
|
656 |
27955
|
claudio.at
|
|
657 |
41790
|
eri.katsar
|
int Enddate = Integer.parseInt(split[0]);
|
658 |
29211
|
eri.katsar
|
|
659 |
41790
|
eri.katsar
|
int diff = Enddate - Startdate;
|
660 |
29211
|
eri.katsar
|
|
661 |
42734
|
eri.katsar
|
return ENCLOSING + diff + ENCLOSING + DELIM;
|
662 |
29384
|
eri.katsar
|
|
663 |
41790
|
eri.katsar
|
}
|
664 |
31279
|
eri.katsar
|
|
665 |
42734
|
eri.katsar
|
return ENCLOSING + "0" + ENCLOSING + DELIM;
|
666 |
41790
|
eri.katsar
|
}
|
667 |
29211
|
eri.katsar
|
|
668 |
42734
|
eri.katsar
|
private static String getYearInt(String data, String DELIM, String ENCLOSING) {
|
669 |
41790
|
eri.katsar
|
if (data == null || data.isEmpty() || data.equals("-1")) {
|
670 |
42734
|
eri.katsar
|
return ENCLOSING + "0" + ENCLOSING + DELIM;
|
671 |
41790
|
eri.katsar
|
}
|
672 |
27955
|
claudio.at
|
|
673 |
41790
|
eri.katsar
|
String[] split = data.split("-");
|
674 |
29384
|
eri.katsar
|
|
675 |
41790
|
eri.katsar
|
if (split == null || split.length == 0) {
|
676 |
42734
|
eri.katsar
|
return ENCLOSING + "0" + ENCLOSING + DELIM;
|
677 |
41790
|
eri.katsar
|
}
|
678 |
29336
|
eri.katsar
|
|
679 |
41790
|
eri.katsar
|
String year = split[0];
|
680 |
29336
|
eri.katsar
|
|
681 |
41790
|
eri.katsar
|
year = cleanNumber(year);
|
682 |
27955
|
claudio.at
|
|
683 |
42734
|
eri.katsar
|
if (year == null || year.isEmpty()) year = "0";
|
684 |
30977
|
eri.katsar
|
|
685 |
42734
|
eri.katsar
|
return ENCLOSING + year + ENCLOSING + DELIM;
|
686 |
27955
|
claudio.at
|
|
687 |
41790
|
eri.katsar
|
}
|
688 |
30043
|
eri.katsar
|
|
689 |
42734
|
eri.katsar
|
private static String cleanNumber(String number) {
|
690 |
41790
|
eri.katsar
|
number = number.replaceAll("[^A-Za-z0-9:,____]", "");
|
691 |
|
|
return number;
|
692 |
|
|
}
|
693 |
30043
|
eri.katsar
|
|
694 |
43739
|
tsampikos.
|
private static String getLatLongField(String data, String DELIM, String ENCLOSING) {
|
695 |
|
|
|
696 |
|
|
if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
|
697 |
|
|
|
698 |
|
|
return ENCLOSING + data.replaceAll("[^-0-9.]+", "") + ENCLOSING + DELIM;
|
699 |
|
|
|
700 |
|
|
}
|
701 |
|
|
|
702 |
42734
|
eri.katsar
|
private static String getStringField(String data, String DELIM, String ENCLOSING) {
|
703 |
30977
|
eri.katsar
|
|
704 |
42734
|
eri.katsar
|
if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM;
|
705 |
27955
|
claudio.at
|
|
706 |
42734
|
eri.katsar
|
return ENCLOSING + clean(data, DELIM, ENCLOSING) + ENCLOSING + DELIM;
|
707 |
27955
|
claudio.at
|
|
708 |
41790
|
eri.katsar
|
}
|
709 |
27955
|
claudio.at
|
|
710 |
42734
|
eri.katsar
|
private static String getStringDateField(String data, String DELIM, String ENCLOSING) {
|
711 |
41790
|
eri.katsar
|
if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) {
|
712 |
42734
|
eri.katsar
|
return ENCLOSING + "0" + ENCLOSING + DELIM;
|
713 |
41790
|
eri.katsar
|
} else {
|
714 |
42734
|
eri.katsar
|
data = data.replace(DELIM, " ");
|
715 |
|
|
data = data.replace(ENCLOSING, " ");
|
716 |
53034
|
tsampikos.
|
data = data.replaceAll("\\r\\n|\\r|\\n", "");
|
717 |
54431
|
tsampikos.
|
try {
|
718 |
|
|
DateFormat format = new SimpleDateFormat("yyyy-MM-dd");
|
719 |
|
|
data = format.format(format.parse(data));
|
720 |
|
|
return ENCLOSING + data + ENCLOSING + DELIM;
|
721 |
|
|
} catch (ParseException e) {
|
722 |
|
|
return ENCLOSING + "0" + ENCLOSING + DELIM;
|
723 |
|
|
}
|
724 |
41790
|
eri.katsar
|
}
|
725 |
|
|
}
|
726 |
27955
|
claudio.at
|
|
727 |
42734
|
eri.katsar
|
private static String getNumericField(String data, String DELIM, String ENCLOSING) {
|
728 |
41790
|
eri.katsar
|
if (data == null || data.isEmpty() || data.equals("")) {
|
729 |
42734
|
eri.katsar
|
return ENCLOSING + "0" + ENCLOSING + DELIM;
|
730 |
41790
|
eri.katsar
|
} else {
|
731 |
42734
|
eri.katsar
|
return ENCLOSING + data + ENCLOSING + DELIM;
|
732 |
41790
|
eri.katsar
|
}
|
733 |
|
|
}
|
734 |
29634
|
eri.katsar
|
|
735 |
42734
|
eri.katsar
|
public static String getId(Oaf oaf, String DELIM, String ENCLOSING) {
|
736 |
41790
|
eri.katsar
|
switch (oaf.getKind()) {
|
737 |
|
|
case entity:
|
738 |
42734
|
eri.katsar
|
return cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING);
|
739 |
41790
|
eri.katsar
|
case relation:
|
740 |
36689
|
eri.katsar
|
|
741 |
42734
|
eri.katsar
|
return cleanId(oaf.getRel().getSource(), DELIM, ENCLOSING);
|
742 |
36689
|
eri.katsar
|
|
743 |
41790
|
eri.katsar
|
}
|
744 |
|
|
return null;
|
745 |
27955
|
claudio.at
|
|
746 |
41790
|
eri.katsar
|
}
|
747 |
27955
|
claudio.at
|
|
748 |
42734
|
eri.katsar
|
public static String getId(OafRel relOaf, String DELIM, String ENCLOSING) {
|
749 |
|
|
return cleanId(relOaf.getSource(), DELIM, ENCLOSING);
|
750 |
41790
|
eri.katsar
|
}
|
751 |
27955
|
claudio.at
|
|
752 |
42734
|
eri.katsar
|
public static String clean(String value, String DELIM, String ENCLOSING) {
|
753 |
41790
|
eri.katsar
|
if (value != null) {
|
754 |
36689
|
eri.katsar
|
|
755 |
42734
|
eri.katsar
|
value = value.replaceAll("[\"\\r\\\\;]", "");
|
756 |
|
|
value = value.replace(DELIM, " ");
|
757 |
|
|
value = value.replace(ENCLOSING, " ");
|
758 |
47371
|
tsampikos.
|
value = value.replaceAll("\\r\\n|\\r|\\n", " ");
|
759 |
|
|
//value = value.replace("\"", "");
|
760 |
|
|
//value = value.replace("'", "");
|
761 |
|
|
//value = value.replace("«", " ");
|
762 |
|
|
//value = value.replace("»", " ");
|
763 |
36689
|
eri.katsar
|
|
764 |
47371
|
tsampikos.
|
//value = value.replaceAll("[^a-zA-Z0-9 .-_:/!@+=]+", " ");
|
765 |
27955
|
claudio.at
|
|
766 |
42734
|
eri.katsar
|
return value;
|
767 |
27955
|
claudio.at
|
|
768 |
42734
|
eri.katsar
|
} else {
|
769 |
|
|
return "";
|
770 |
|
|
|
771 |
41790
|
eri.katsar
|
}
|
772 |
27955
|
claudio.at
|
|
773 |
41790
|
eri.katsar
|
}
|
774 |
27955
|
claudio.at
|
|
775 |
42734
|
eri.katsar
|
public static String cleanId(String value, String DELIM, String ENCLOSING) {
|
776 |
41790
|
eri.katsar
|
if (value != null) {
|
777 |
|
|
// TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____::
|
778 |
27955
|
claudio.at
|
|
779 |
41790
|
eri.katsar
|
// to datacite____:: )
|
780 |
|
|
// AND REPLACES OCCURENCES OF DELIM CHARS IN DATA
|
781 |
|
|
value = value.replaceFirst(".*\\|", "");
|
782 |
42734
|
eri.katsar
|
value = value.replace("\n", "");
|
783 |
|
|
value = value.replace(ENCLOSING, "");
|
784 |
|
|
value = value.replace(DELIM, "");
|
785 |
|
|
value = value.replace("\"", "");
|
786 |
|
|
value = value.replace("«", " ");
|
787 |
|
|
value = value.replace("»", " ");
|
788 |
41790
|
eri.katsar
|
}
|
789 |
29336
|
eri.katsar
|
|
790 |
42734
|
eri.katsar
|
return ENCLOSING + value + ENCLOSING;
|
791 |
|
|
|
792 |
41790
|
eri.katsar
|
}
|
793 |
31900
|
eri.katsar
|
|
794 |
42734
|
eri.katsar
|
|
795 |
|
|
public static String cleanUrl(String value, String DELIM, String ENCLOSING) {
|
796 |
|
|
value = value.replace(DELIM, " ");
|
797 |
|
|
value = value.replace(ENCLOSING, " ");
|
798 |
|
|
value = value.replace(" ", "");
|
799 |
|
|
value = value.replace("\n", "");
|
800 |
|
|
return value;
|
801 |
|
|
}
|
802 |
|
|
|
803 |
|
|
|
804 |
|
|
public static long DATEDIFF(String startDate, String endDate) {
|
805 |
41790
|
eri.katsar
|
long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000;
|
806 |
|
|
long days = 0l;
|
807 |
|
|
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss");
|
808 |
|
|
// <startdate>2011-09-01</startdate>
|
809 |
|
|
// <enddate>2015-08-31</enddate>
|
810 |
|
|
Date dateIni = null;
|
811 |
|
|
Date dateFin = null;
|
812 |
37693
|
eri.katsar
|
|
813 |
41790
|
eri.katsar
|
if (startDate == null || startDate.isEmpty() || endDate == null || endDate.isEmpty()) {
|
814 |
|
|
return 0;
|
815 |
|
|
}
|
816 |
|
|
try {
|
817 |
|
|
dateIni = (Date) format.parse(startDate);
|
818 |
|
|
dateFin = (Date) format.parse(endDate);
|
819 |
|
|
days = (dateFin.getTime() - dateIni.getTime()) / MILLISECS_PER_DAY;
|
820 |
|
|
} catch (Exception e) {
|
821 |
42734
|
eri.katsar
|
|
822 |
41790
|
eri.katsar
|
return 0;
|
823 |
|
|
}
|
824 |
37693
|
eri.katsar
|
|
825 |
41790
|
eri.katsar
|
return days;
|
826 |
|
|
}
|
827 |
37693
|
eri.katsar
|
|
828 |
|
|
|
829 |
27955
|
claudio.at
|
}
|