Revision 56504
Added by Antonis Lempesis over 4 years ago
Serializer.java | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import com.google.common.collect.Multimap; |
4 | 4 |
|
5 |
|
|
6 | 5 |
import eu.dnetlib.data.mapreduce.util.LicenseComparator; |
7 | 6 |
import eu.dnetlib.data.proto.DatasourceProtos.Datasource; |
8 | 7 |
import eu.dnetlib.data.proto.DatasourceProtos.Datasource.Metadata; |
... | ... | |
14 | 13 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
15 | 14 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
16 | 15 |
import eu.dnetlib.data.proto.OrganizationProtos.Organization; |
17 |
//import eu.dnetlib.data.proto.PersonProtos; |
|
18 | 16 |
import eu.dnetlib.data.proto.ProjectProtos.Project; |
19 | 17 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
20 | 18 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
... | ... | |
25 | 23 |
import java.text.DateFormat; |
26 | 24 |
import java.text.ParseException; |
27 | 25 |
import java.text.SimpleDateFormat; |
28 |
import java.util.ArrayList; |
|
29 | 26 |
import java.util.Date; |
30 |
import java.util.HashMap; |
|
31 | 27 |
import java.util.List; |
32 | 28 |
|
33 | 29 |
import org.w3c.dom.Element; |
... | ... | |
43 | 39 |
public class Serializer { |
44 | 40 |
private static Logger logger = Logger.getLogger(Serializer.class); |
45 | 41 |
|
46 |
public static String serialize(Oaf oaf, String DELIM, String ENCLOSING) { |
|
42 |
private String DELIM; |
|
43 |
private String ENCLOSING; |
|
47 | 44 |
|
45 |
public Serializer(String DELIM, String ENCLOSING) { |
|
46 |
this.DELIM = DELIM; |
|
47 |
this.ENCLOSING = ENCLOSING; |
|
48 |
} |
|
49 |
|
|
50 |
public String serialize(Oaf oaf) { |
|
51 |
|
|
48 | 52 |
switch (oaf.getKind()) { |
49 | 53 |
case entity: |
50 | 54 |
OafEntity valueEntity = oaf.getEntity(); |
... | ... | |
52 | 56 |
switch (valueEntity.getType()) { |
53 | 57 |
case datasource: |
54 | 58 |
|
55 |
return buildDatasource(oaf, DELIM, ENCLOSING);
|
|
59 |
return buildDatasource(oaf); |
|
56 | 60 |
|
57 | 61 |
case organization: |
58 | 62 |
|
59 |
return buildOrganization(oaf, DELIM, ENCLOSING);
|
|
63 |
return buildOrganization(oaf); |
|
60 | 64 |
|
61 | 65 |
case project: |
62 | 66 |
|
63 |
return buildProject(oaf, DELIM, ENCLOSING);
|
|
67 |
return buildProject(oaf); |
|
64 | 68 |
case result: |
65 | 69 |
|
66 |
return buildResult(oaf, DELIM, ENCLOSING);
|
|
70 |
return buildResult(oaf); |
|
67 | 71 |
/* |
68 | 72 |
case person: |
69 | 73 |
return buildPerson(oaf, DELIM, ENCLOSING); |
... | ... | |
74 | 78 |
break; |
75 | 79 |
case relation: |
76 | 80 |
|
77 |
return buildRel(oaf.getRel(), DELIM, ENCLOSING);
|
|
81 |
return buildRel(oaf.getRel()); |
|
78 | 82 |
|
79 | 83 |
} |
80 | 84 |
return null; |
81 | 85 |
} |
82 | 86 |
|
83 |
public static String serialize(OafRel oaf, String DELIM, String ENCLOSING) {
|
|
87 |
public String serialize(OafRel oaf) {
|
|
84 | 88 |
|
85 | 89 |
switch (oaf.getRelType()) { |
86 | 90 |
case resultProject: |
87 |
return getResultProject(oaf, DELIM, ENCLOSING);
|
|
91 |
return getResultProject(oaf); |
|
88 | 92 |
default: |
89 |
return buildRel(oaf, DELIM, ENCLOSING);
|
|
93 |
return buildRel(oaf); |
|
90 | 94 |
} |
91 | 95 |
} |
92 | 96 |
|
93 |
private static String buildRel(OafRel Rel, String DELIM, String ENCLOSING) {
|
|
97 |
private String buildRel(OafRel Rel) {
|
|
94 | 98 |
|
95 |
return cleanId(Rel.getTarget(), DELIM, ENCLOSING) + DELIM;
|
|
99 |
return cleanId(Rel.getTarget()) + DELIM; |
|
96 | 100 |
|
97 | 101 |
} |
98 | 102 |
|
99 |
public static void extractRelations(Oaf oaf, String DELIM, String ENCLOSING, Multimap<String, String> relations) {
|
|
103 |
public void extractRelations(Oaf oaf, Multimap<String, String> relations) {
|
|
100 | 104 |
OafEntity valueEntity = oaf.getEntity(); |
101 |
getOriginalId(valueEntity, relations, DELIM, ENCLOSING);
|
|
105 |
getOriginalId(valueEntity, relations); |
|
102 | 106 |
|
103 | 107 |
switch (valueEntity.getType()) { |
104 | 108 |
case datasource: |
105 |
getDatasourceLanguages(valueEntity, relations, DELIM, ENCLOSING);
|
|
106 |
// getDatasourceWebsite(valueEntity, relations, DELIM, ENCLOSING);
|
|
109 |
getDatasourceLanguages(valueEntity, relations); |
|
110 |
// getDatasourceWebsite(valueEntity, relations); |
|
107 | 111 |
case result: |
108 |
getResultTopics(valueEntity, relations, DELIM, ENCLOSING);
|
|
109 |
getResultLanguages(valueEntity, relations, DELIM, ENCLOSING);
|
|
110 |
getResultClassifications(valueEntity, relations, DELIM, ENCLOSING);
|
|
111 |
getResultDatasources(valueEntity, relations, DELIM, ENCLOSING);
|
|
112 |
getResultConcepts(valueEntity, relations, DELIM, ENCLOSING);
|
|
113 |
getResultDois(valueEntity, relations, DELIM, ENCLOSING);
|
|
114 |
getResultCitations(valueEntity, relations, DELIM, ENCLOSING);
|
|
112 |
getResultTopics(valueEntity, relations); |
|
113 |
getResultLanguages(valueEntity, relations); |
|
114 |
getResultClassifications(valueEntity, relations); |
|
115 |
getResultDatasources(valueEntity, relations); |
|
116 |
getResultConcepts(valueEntity, relations); |
|
117 |
getResultDois(valueEntity, relations); |
|
118 |
getResultCitations(valueEntity, relations); |
|
115 | 119 |
// getResultDescriptions(valueEntity, relations, DELIM, ENCLOSING); |
116 | 120 |
// getResultExtra(valueEntity, relations, DELIM, ENCLOSING); |
117 | 121 |
|
118 | 122 |
case project: |
119 |
getProjectKeywords(valueEntity, relations, DELIM, ENCLOSING);
|
|
120 |
getProjectSubjects(valueEntity, relations, DELIM, ENCLOSING);
|
|
123 |
getProjectKeywords(valueEntity, relations); |
|
124 |
getProjectSubjects(valueEntity, relations); |
|
121 | 125 |
|
122 | 126 |
default: |
123 | 127 |
} |
124 | 128 |
|
125 | 129 |
} |
126 | 130 |
|
127 |
private static void getDatasourceWebsite(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
|
|
131 |
private void getDatasourceWebsite(OafEntity valueEntity, Multimap<String, String> relations) {
|
|
128 | 132 |
Datasource d = valueEntity.getDatasource(); |
129 | 133 |
Metadata metadata = d.getMetadata(); |
130 | 134 |
|
131 |
relations.put("datasourceWebsite", getStringField(metadata.getWebsiteurl().getValue(), DELIM, ENCLOSING));
|
|
135 |
relations.put("datasourceWebsite", getStringField(metadata.getWebsiteurl().getValue())); |
|
132 | 136 |
} |
133 | 137 |
|
134 |
private static void getResultExtra(OafEntity valueEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
|
|
138 |
private void getResultExtra(OafEntity valueEntity, Multimap<String, String> relations) {
|
|
135 | 139 |
Result result = valueEntity.getResult(); |
136 | 140 |
Result.Metadata metadata = result.getMetadata(); |
137 | 141 |
|
... | ... | |
147 | 151 |
} |
148 | 152 |
|
149 | 153 |
// pubtitle |
150 |
buff.append(getStringField(titleString, DELIM, ENCLOSING));
|
|
154 |
buff.append(getStringField(titleString)); |
|
151 | 155 |
|
152 | 156 |
String sources = ""; |
153 | 157 |
for (Instance instance : (result.getInstanceList())) { |
154 | 158 |
List<String> urls = instance.getUrlList(); |
155 | 159 |
for (String url : urls) { |
156 |
sources += cleanUrl(url, DELIM, ENCLOSING) + " ;";
|
|
160 |
sources += cleanUrl(url) + " ;"; |
|
157 | 161 |
} |
158 | 162 |
} |
159 | 163 |
|
... | ... | |
164 | 168 |
relations.put("resultExtra", buff.toString()); |
165 | 169 |
} |
166 | 170 |
|
167 |
private static void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
|
|
171 |
private void getOriginalId(OafEntity oafEntity, Multimap<String, String> relations) {
|
|
168 | 172 |
|
169 | 173 |
String relName = oafEntity.getType().toString().toLowerCase() + "Oid"; |
170 | 174 |
for (String oid : oafEntity.getOriginalIdList()) { |
171 |
relations.put(relName, cleanId(oid, DELIM, ENCLOSING));
|
|
175 |
relations.put(relName, cleanId(oid)); |
|
172 | 176 |
} |
173 | 177 |
|
174 | 178 |
} |
175 | 179 |
|
176 |
private static void getProjectKeywords(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
|
|
177 |
relations.put("projectKeyword", getStringField(oafEntity.getProject().getMetadata().getKeywords().getValue(), DELIM, ENCLOSING));
|
|
180 |
private void getProjectKeywords(OafEntity oafEntity, Multimap<String, String> relations) {
|
|
181 |
relations.put("projectKeyword", getStringField(oafEntity.getProject().getMetadata().getKeywords().getValue())); |
|
178 | 182 |
|
179 | 183 |
} |
180 | 184 |
|
181 |
private static void getProjectSubjects(OafEntity oafEntity, Multimap<String, String> relations, String DELIM, String ENCLOSING) {
|
|
185 |
private void getProjectSubjects(OafEntity oafEntity, Multimap<String, String> relations) {
|
|
182 | 186 |
for (StructuredProperty subj : oafEntity.getProject().getMetadata().getSubjectsList()) { |
183 |
relations.put("projectSubject", getStringField(subj.getValue(), DELIM, ENCLOSING));
|
|
187 |
relations.put("projectSubject", getStringField(subj.getValue())); |
|
184 | 188 |
} |
185 | 189 |
} |
186 | 190 |
|
187 |
private static String getResultProject(OafRel oaf, String DELIM, String ENCLOSING) {
|
|
191 |
private String getResultProject(OafRel oaf) {
|
|
188 | 192 |
StringBuilder buff = new StringBuilder(); |
189 |
buff.append(cleanId(oaf.getTarget(), DELIM, ENCLOSING) + DELIM);
|
|
193 |
buff.append(cleanId(oaf.getTarget()) + DELIM); |
|
190 | 194 |
// TODO is declared as int!!! |
191 | 195 |
long diff = DATEDIFF(oaf.getResultProject().getOutcome().getRelMetadata().getEnddate(), oaf.getResultProject().getOutcome().getRelMetadata().getStartdate()); |
192 | 196 |
if (diff < 0) { |
193 | 197 |
diff = 0; |
194 | 198 |
} |
195 | 199 |
|
196 |
buff.append(getNumericField(String.valueOf(diff), DELIM, ENCLOSING));
|
|
200 |
buff.append(getNumericField(String.valueOf(diff))); |
|
197 | 201 |
return buff.toString(); |
198 | 202 |
} |
199 | 203 |
|
200 | 204 |
|
201 |
private static void getDatasourceLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
|
205 |
private void getDatasourceLanguages(OafEntity valueEntity, Multimap<String, String> rels) {
|
|
202 | 206 |
Datasource d = valueEntity.getDatasource(); |
203 | 207 |
Metadata metadata = d.getMetadata(); |
204 | 208 |
|
205 | 209 |
for (StringField lang : metadata.getOdlanguagesList()) { |
206 |
rels.put("datasourceLanguage", getStringField(lang.getValue(), DELIM, ENCLOSING));
|
|
210 |
rels.put("datasourceLanguage", getStringField(lang.getValue())); |
|
207 | 211 |
} |
208 | 212 |
} |
209 | 213 |
|
210 |
private static void getResultLanguages(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
|
214 |
private void getResultLanguages(OafEntity valueEntity, Multimap<String, String> rels) {
|
|
211 | 215 |
|
212 | 216 |
Result d = valueEntity.getResult(); |
213 | 217 |
Result.Metadata metadata = d.getMetadata(); |
214 | 218 |
if (metadata.getLanguage().getClassname() != null && !metadata.getLanguage().getClassname().isEmpty()) { |
215 |
rels.put("resultLanguage", getStringField(metadata.getLanguage().getClassname(), DELIM, ENCLOSING));
|
|
219 |
rels.put("resultLanguage", getStringField(metadata.getLanguage().getClassname())); |
|
216 | 220 |
} |
217 | 221 |
|
218 | 222 |
} |
219 | 223 |
|
220 |
private static void getResultDois(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
|
224 |
private void getResultDois(OafEntity valueEntity, Multimap<String, String> rels) {
|
|
221 | 225 |
|
222 | 226 |
for (StructuredProperty pid : valueEntity.getPidList()) { |
223 |
|
|
224 |
rels.put("resultPid", |
|
225 |
getStringField(pid.getQualifier().getClassname(), DELIM, ENCLOSING) + getStringField(pid.getValue(), DELIM, ENCLOSING)); |
|
227 |
rels.put("resultPid", getStringField(pid.getQualifier().getClassname()) + getStringField(pid.getValue())); |
|
226 | 228 |
} |
227 | 229 |
} |
228 | 230 |
|
229 |
private static void getResultClassifications(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
|
231 |
private void getResultClassifications(OafEntity valueEntity, Multimap<String, String> rels) {
|
|
230 | 232 |
|
231 | 233 |
Result result = valueEntity.getResult(); |
232 | 234 |
|
233 | 235 |
for (Instance instance : (result.getInstanceList())) { |
234 | 236 |
String classification = instance.getInstancetype().getClassname(); |
237 |
|
|
235 | 238 |
if (classification != null && !classification.isEmpty()) { |
236 |
rels.put("resultClassification", getStringField(instance.getInstancetype().getClassname(), DELIM, ENCLOSING)); |
|
237 |
// TODO HERE KEEP ONLY ONE CLASSIFICATIONS PER RESULT |
|
238 |
// break; |
|
239 |
rels.put("resultClassification", getStringField(instance.getInstancetype().getClassname())); |
|
239 | 240 |
} |
240 | 241 |
} |
241 | 242 |
} |
242 | 243 |
|
243 |
private static void getResultDescriptions(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
|
244 |
private void getResultDescriptions(OafEntity valueEntity, Multimap<String, String> rels) {
|
|
244 | 245 |
Result result = valueEntity.getResult(); |
245 |
//description |
|
246 |
|
|
246 | 247 |
for (StringField s : result.getMetadata().getDescriptionList()) { |
247 |
|
|
248 |
rels.put("resultDescription", getStringField(Jsoup.parse(s.getValue()).text(), DELIM, ENCLOSING)); |
|
248 |
rels.put("resultDescription", getStringField(Jsoup.parse(s.getValue()).text())); |
|
249 | 249 |
} |
250 | 250 |
} |
251 | 251 |
|
252 |
private static void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) { |
|
253 |
|
|
252 |
private void getResultConcepts(OafEntity valueEntity, Multimap<String, String> rels) { |
|
254 | 253 |
Result result = valueEntity.getResult(); |
255 | 254 |
|
256 |
|
|
257 | 255 |
for (Result.Context context : result.getMetadata().getContextList()) { |
258 |
|
|
259 |
rels.put("resultConcept", cleanId(context.getId(), DELIM, ENCLOSING)); |
|
256 |
rels.put("resultConcept", cleanId(context.getId())); |
|
260 | 257 |
} |
261 | 258 |
} |
262 | 259 |
|
263 |
private static void getResultDatasources(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
|
260 |
private void getResultDatasources(OafEntity valueEntity, Multimap<String, String> rels) {
|
|
264 | 261 |
Result result = valueEntity.getResult(); |
265 | 262 |
|
266 | 263 |
//TODO hosted by |
267 | 264 |
for (Instance instance : (result.getInstanceList())) { |
268 | 265 |
String hostedBy = instance.getHostedby().getKey(); |
266 |
|
|
269 | 267 |
if (hostedBy != null && !hostedBy.isEmpty()) { |
270 |
rels.put("resultDatasource", cleanId(hostedBy, DELIM, ENCLOSING) + DELIM);
|
|
268 |
rels.put("resultDatasource", cleanId(hostedBy) + DELIM); |
|
271 | 269 |
} |
272 | 270 |
} |
273 | 271 |
|
274 | 272 |
//TODO collected froms |
275 | 273 |
for (FieldTypeProtos.KeyValue collectedFromValue : (valueEntity.getCollectedfromList())) { |
274 |
String collectedFrom = collectedFromValue.getKey(); |
|
276 | 275 |
|
277 |
String collectedFrom = collectedFromValue.getKey(); |
|
278 | 276 |
if (collectedFrom != null && !collectedFrom.isEmpty()) { |
279 |
rels.put("resultDatasource", cleanId(collectedFrom, DELIM, ENCLOSING) + DELIM);
|
|
277 |
rels.put("resultDatasource", cleanId(collectedFrom) + DELIM); |
|
280 | 278 |
} |
281 | 279 |
} |
282 | 280 |
} |
283 | 281 |
|
284 |
private static void getResultTopics(OafEntity valueEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) { |
|
285 |
|
|
282 |
private void getResultTopics(OafEntity valueEntity, Multimap<String, String> rels) { |
|
286 | 283 |
Result d = valueEntity.getResult(); |
287 | 284 |
Result.Metadata metadata = d.getMetadata(); |
288 |
|
|
289 | 285 |
List<StructuredProperty> Topics = metadata.getSubjectList(); |
290 | 286 |
|
291 | 287 |
for (StructuredProperty topic : Topics) { |
292 |
// TODO result topics |
|
293 |
rels.put("resultTopic", getStringField(topic.getValue(), DELIM, ENCLOSING)); |
|
288 |
rels.put("resultTopic", getStringField(topic.getValue())); |
|
294 | 289 |
} |
295 | 290 |
} |
296 | 291 |
|
297 | 292 |
|
298 |
private static void getResultCitations(OafEntity oafEntity, Multimap<String, String> rels, String DELIM, String ENCLOSING) {
|
|
293 |
private void getResultCitations(OafEntity oafEntity, Multimap<String, String> rels) {
|
|
299 | 294 |
for (FieldTypeProtos.ExtraInfo extraInfo : oafEntity.getExtraInfoList()) { |
300 | 295 |
if (extraInfo.getName().equals("result citations")) { |
301 | 296 |
DOMParser parser = new DOMParser(); |
... | ... | |
329 | 324 |
} |
330 | 325 |
} |
331 | 326 |
|
332 |
private static String buildDatasource(Oaf oaf, String DELIM, String ENCLOSING) {
|
|
327 |
private String buildDatasource(Oaf oaf) {
|
|
333 | 328 |
Metadata metadata = oaf.getEntity().getDatasource().getMetadata(); |
334 | 329 |
StringBuilder buff = new StringBuilder(); |
335 | 330 |
|
336 | 331 |
// name |
337 | 332 |
if (metadata.getOfficialname().getValue().equalsIgnoreCase("unknown")) { |
338 |
buff.append(getStringField("Unknown Repository", DELIM, ENCLOSING));
|
|
333 |
buff.append(getStringField("Unknown Repository")); |
|
339 | 334 |
} else { |
340 |
buff.append(getStringField(metadata.getOfficialname().getValue(), DELIM, ENCLOSING));
|
|
335 |
buff.append(getStringField(metadata.getOfficialname().getValue())); |
|
341 | 336 |
} |
342 | 337 |
|
343 | 338 |
// type |
344 | 339 |
if (metadata.hasDatasourcetype()) { |
345 |
buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""), DELIM, ENCLOSING));
|
|
340 |
buff.append(getStringField(metadata.getDatasourcetype().getClassname().replaceFirst(".*::", ""))); |
|
346 | 341 |
} |
347 | 342 |
|
348 | 343 |
// compatibility, |
349 |
buff.append(getStringField(metadata.getOpenairecompatibility().getClassname(), DELIM, ENCLOSING));
|
|
344 |
buff.append(getStringField(metadata.getOpenairecompatibility().getClassname())); |
|
350 | 345 |
|
351 | 346 |
// latitude |
352 |
buff.append(getLatLongField(metadata.getLatitude().getValue(), DELIM, ENCLOSING));
|
|
347 |
buff.append(getLatLongField(metadata.getLatitude().getValue())); |
|
353 | 348 |
|
354 | 349 |
// longtitude |
355 |
buff.append(getLatLongField(metadata.getLongitude().getValue(), DELIM, ENCLOSING));
|
|
350 |
buff.append(getLatLongField(metadata.getLongitude().getValue())); |
|
356 | 351 |
|
357 | 352 |
// dateofvalidation, |
358 |
buff.append(getStringDateField(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
|
|
353 |
buff.append(getStringDateField(metadata.getDateofvalidation().getValue())); |
|
359 | 354 |
|
360 | 355 |
// yearofvalidation, |
361 |
buff.append(getYearInt(metadata.getDateofvalidation().getValue(), DELIM, ENCLOSING));
|
|
356 |
buff.append(getYearInt(metadata.getDateofvalidation().getValue())); |
|
362 | 357 |
|
363 | 358 |
//harvested |
364 |
buff.append(getStringField("false", DELIM, ENCLOSING));
|
|
359 |
buff.append(getStringField("false")); |
|
365 | 360 |
|
366 | 361 |
//piwik_id |
367 | 362 |
String piwik_id = ""; |
... | ... | |
371 | 366 |
break; |
372 | 367 |
} |
373 | 368 |
} |
374 |
buff.append(getStringField(cleanNumber(piwik_id), DELIM, ENCLOSING));
|
|
369 |
buff.append(getStringField(cleanNumber(piwik_id))); |
|
375 | 370 |
|
376 | 371 |
return buff.toString(); |
377 | 372 |
|
378 | 373 |
} |
379 | 374 |
|
380 |
private static String buildOrganization(Oaf oaf, String DELIM, String ENCLOSING) {
|
|
375 |
private String buildOrganization(Oaf oaf) {
|
|
381 | 376 |
|
382 | 377 |
StringBuilder buff = new StringBuilder(); |
383 | 378 |
Organization.Metadata metadata = oaf.getEntity().getOrganization().getMetadata(); |
384 | 379 |
|
385 | 380 |
// `name`, |
386 |
buff.append(getStringField(metadata.getLegalname().getValue(), DELIM, ENCLOSING));
|
|
381 |
buff.append(getStringField(metadata.getLegalname().getValue())); |
|
387 | 382 |
|
388 | 383 |
// `country`, |
389 |
buff.append(getStringField(metadata.getCountry().getClassid(), DELIM, ENCLOSING));
|
|
384 |
buff.append(getStringField(metadata.getCountry().getClassid())); |
|
390 | 385 |
|
391 | 386 |
return buff.toString(); |
392 | 387 |
} |
393 | 388 |
|
394 |
private static String buildResult(Oaf oaf, String DELIM, String ENCLOSING) {
|
|
389 |
private String buildResult(Oaf oaf) {
|
|
395 | 390 |
StringBuilder buff = new StringBuilder(); |
396 | 391 |
|
397 | 392 |
Result.Metadata metadata = oaf.getEntity().getResult().getMetadata(); |
... | ... | |
409 | 404 |
} |
410 | 405 |
|
411 | 406 |
// pubtitle |
412 |
buff.append(getStringField(titleString, DELIM, ENCLOSING));
|
|
407 |
buff.append(getStringField(titleString)); |
|
413 | 408 |
|
414 | 409 |
|
415 | 410 |
// publisher |
416 |
buff.append(getStringField(metadata.getPublisher().getValue(), DELIM, ENCLOSING));
|
|
411 |
buff.append(getStringField(metadata.getPublisher().getValue())); |
|
417 | 412 |
|
418 | 413 |
// journal |
419 |
buff.append(getStringField(metadata.getJournal().getName(), DELIM, ENCLOSING)); //#null#!
|
|
414 |
buff.append(getStringField(metadata.getJournal().getName())); //#null#! |
|
420 | 415 |
|
421 | 416 |
// year |
422 |
buff.append(getYearInt(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
|
|
417 |
buff.append(getYearInt(metadata.getDateofacceptance().getValue())); |
|
423 | 418 |
|
424 | 419 |
// date |
425 |
buff.append(getStringDateField(metadata.getDateofacceptance().getValue(), DELIM, ENCLOSING));
|
|
420 |
buff.append(getStringDateField(metadata.getDateofacceptance().getValue())); |
|
426 | 421 |
|
427 | 422 |
// bestlicense |
428 |
buff.append(getStringField(getBestLicense(oaf.getEntity().getResult()), DELIM, ENCLOSING));
|
|
423 |
buff.append(getStringField(getBestLicense(oaf.getEntity().getResult()))); |
|
429 | 424 |
|
430 | 425 |
// type |
431 |
buff.append(getStringField(metadata.getResulttype().getClassname(), DELIM, ENCLOSING));
|
|
426 |
buff.append(getStringField(metadata.getResulttype().getClassname())); |
|
432 | 427 |
|
433 | 428 |
// embargo_end_date |
434 |
buff.append(getStringDateField(metadata.getEmbargoenddate().getValue(), DELIM, ENCLOSING));
|
|
429 |
buff.append(getStringDateField(metadata.getEmbargoenddate().getValue())); |
|
435 | 430 |
|
436 | 431 |
// `authors`, |
437 | 432 |
int authors = metadata.getAuthorCount(); |
... | ... | |
452 | 447 |
// result. |
453 | 448 |
{ |
454 | 449 |
String daysfromend = getYearDifferenceInteger(rel.getResultProject().getOutcome().getRelMetadata().getEnddate(), |
455 |
rel.getResultProject().getOutcome().getRelMetadata().getStartdate(), DELIM, ENCLOSING);
|
|
450 |
rel.getResultProject().getOutcome().getRelMetadata().getStartdate()); |
|
456 | 451 |
if (Integer.parseInt(daysfromend) > 0) { |
457 | 452 |
delayed = "yes"; |
458 | 453 |
} |
... | ... | |
460 | 455 |
} |
461 | 456 |
|
462 | 457 |
// `delayed`, |
463 |
buff.append(getStringField(delayed, DELIM, ENCLOSING));
|
|
458 |
buff.append(getStringField(delayed)); |
|
464 | 459 |
//authors |
465 |
buff.append(getNumericField(String.valueOf(authors), DELIM, ENCLOSING));
|
|
460 |
buff.append(getNumericField(String.valueOf(authors))); |
|
466 | 461 |
|
467 | 462 |
String sources = new String(); |
468 | 463 |
|
... | ... | |
470 | 465 |
for (Instance instance : (oaf.getEntity().getResult().getInstanceList())) { |
471 | 466 |
List<String> urls = instance.getUrlList(); |
472 | 467 |
for (String url : urls) { |
473 |
sources += cleanUrl(url, DELIM, ENCLOSING) + " ;";
|
|
468 |
sources += cleanUrl(url) + " ;"; |
|
474 | 469 |
} |
475 | 470 |
} |
476 | 471 |
|
... | ... | |
515 | 510 |
} |
516 | 511 |
*/ |
517 | 512 |
|
518 |
private static String getBestLicense(Result result) {
|
|
513 |
private String getBestLicense(Result result) { |
|
519 | 514 |
Qualifier bestLicense = null; |
520 | 515 |
LicenseComparator lc = new LicenseComparator(); |
521 | 516 |
for (Instance instance : (result.getInstanceList())) { |
... | ... | |
531 | 526 |
} |
532 | 527 |
|
533 | 528 |
// TODO here iterate over all values |
534 |
private static String getAccessMode(Result result) {
|
|
529 |
private String getAccessMode(Result result) { |
|
535 | 530 |
String accessMode = " "; |
536 | 531 |
for (Instance instance : (result.getInstanceList())) { |
537 | 532 |
if (instance.getAccessright().getClassname() != null && !instance.getAccessright().getClassname().isEmpty()) { |
... | ... | |
544 | 539 |
return accessMode; |
545 | 540 |
} |
546 | 541 |
|
547 |
private static String buildProject(Oaf oaf, String DELIM, String ENCLOSING) {
|
|
542 |
private String buildProject(Oaf oaf) {
|
|
548 | 543 |
|
544 |
FundingParser fundingParser = new FundingParser(DELIM, ENCLOSING); |
|
549 | 545 |
StringBuilder buff = new StringBuilder(); |
550 | 546 |
Project.Metadata metadata = oaf.getEntity().getProject().getMetadata(); |
551 | 547 |
|
... | ... | |
554 | 550 |
if (acronym.equalsIgnoreCase("UNKNOWN")) { |
555 | 551 |
acronym = metadata.getTitle().getValue(); |
556 | 552 |
} |
557 |
buff.append(getStringField(acronym, DELIM, ENCLOSING));
|
|
553 |
buff.append(getStringField(acronym)); |
|
558 | 554 |
|
559 | 555 |
//title |
560 |
buff.append(getStringField(metadata.getTitle().getValue(), DELIM, ENCLOSING));
|
|
556 |
buff.append(getStringField(metadata.getTitle().getValue())); |
|
561 | 557 |
|
562 | 558 |
//funding_lvl |
563 | 559 |
List<StringField> fundList = metadata.getFundingtreeList(); |
... | ... | |
569 | 565 |
funding_lvl1 text, |
570 | 566 |
funding_lvl2 text, |
571 | 567 |
funding_lvl3 text,*/ |
572 |
buff.append(FundingParser.getFundingInfo(fundList.get(0).getValue(), DELIM, ENCLOSING));
|
|
568 |
buff.append(fundingParser.getFundingInfo(fundList.get(0).getValue()));
|
|
573 | 569 |
} else { |
574 |
buff.append(FundingParser.getFundingInfo("", DELIM, ENCLOSING));
|
|
570 |
buff.append(fundingParser.getFundingInfo(""));
|
|
575 | 571 |
} |
576 | 572 |
|
577 | 573 |
//sc39 |
... | ... | |
581 | 577 |
} else if (sc39.equalsIgnoreCase("false") || sc39.equalsIgnoreCase("f") || sc39.contains("no")) { |
582 | 578 |
sc39 = "no"; |
583 | 579 |
} |
584 |
buff.append(getStringField(sc39, DELIM, ENCLOSING));
|
|
580 |
buff.append(getStringField(sc39)); |
|
585 | 581 |
|
586 | 582 |
//project_type |
587 |
buff.append(getStringField(metadata.getContracttype().getClassid(),DELIM, ENCLOSING));
|
|
583 |
buff.append(getStringField(metadata.getContracttype().getClassid())); |
|
588 | 584 |
|
589 | 585 |
// start_year |
590 |
buff.append(getYearInt(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
|
|
586 |
buff.append(getYearInt(metadata.getStartdate().getValue())); |
|
591 | 587 |
|
592 | 588 |
// end_year |
593 |
buff.append(getYearInt(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
|
|
589 |
buff.append(getYearInt(metadata.getEnddate().getValue())); |
|
594 | 590 |
|
595 | 591 |
// duration enddate-startdate |
596 |
buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue(), DELIM, ENCLOSING));
|
|
592 |
buff.append(getYearDifferenceInteger(metadata.getEnddate().getValue(), metadata.getStartdate().getValue())); |
|
597 | 593 |
|
598 | 594 |
// haspubs |
599 |
buff.append(getStringField("no", DELIM, ENCLOSING));
|
|
595 |
buff.append(getStringField("no")); |
|
600 | 596 |
|
601 | 597 |
// numpubs |
602 |
buff.append(getNumericField("0", DELIM, ENCLOSING));
|
|
598 |
buff.append(getNumericField("0")); |
|
603 | 599 |
|
604 | 600 |
// enddate |
605 |
buff.append(getStringDateField(metadata.getEnddate().getValue(), DELIM, ENCLOSING));
|
|
601 |
buff.append(getStringDateField(metadata.getEnddate().getValue())); |
|
606 | 602 |
|
607 | 603 |
// startdate |
608 |
buff.append(getStringDateField(metadata.getStartdate().getValue(), DELIM, ENCLOSING));
|
|
604 |
buff.append(getStringDateField(metadata.getStartdate().getValue())); |
|
609 | 605 |
|
610 | 606 |
// `daysforlastpub`, |
611 |
buff.append(getNumericField("", DELIM, ENCLOSING));
|
|
607 |
buff.append(getNumericField("")); |
|
612 | 608 |
|
613 | 609 |
// `delayedpubs`, |
614 |
buff.append(getNumericField("", DELIM, ENCLOSING));
|
|
610 |
buff.append(getNumericField("")); |
|
615 | 611 |
|
616 | 612 |
//call identifier |
617 |
buff.append(getStringField(metadata.getCallidentifier().getValue(), DELIM, ENCLOSING));
|
|
613 |
buff.append(getStringField(metadata.getCallidentifier().getValue())); |
|
618 | 614 |
|
619 | 615 |
//code |
620 |
buff.append(getStringField(metadata.getCode().getValue(), DELIM, ENCLOSING));
|
|
616 |
buff.append(getStringField(metadata.getCode().getValue())); |
|
621 | 617 |
|
622 | 618 |
return buff.toString(); |
623 |
|
|
624 | 619 |
} |
625 | 620 |
|
626 | 621 |
/* |
... | ... | |
674 | 669 |
*/ |
675 | 670 |
|
676 | 671 |
|
677 |
private static String getYearDifferenceInteger(String enddate, String startdate, String DELIM, String ENCLOSING) {
|
|
672 |
private String getYearDifferenceInteger(String enddate, String startdate) {
|
|
678 | 673 |
|
679 | 674 |
if (enddate != null && !enddate.isEmpty() && startdate != null && !startdate.isEmpty()) { |
680 | 675 |
|
... | ... | |
703 | 698 |
return ENCLOSING + "0" + ENCLOSING + DELIM; |
704 | 699 |
} |
705 | 700 |
|
706 |
private static String getYearInt(String data, String DELIM, String ENCLOSING) {
|
|
701 |
private String getYearInt(String data) {
|
|
707 | 702 |
if (data == null || data.isEmpty() || data.equals("-1")) { |
708 | 703 |
return ENCLOSING + "0" + ENCLOSING + DELIM; |
709 | 704 |
} |
... | ... | |
724 | 719 |
|
725 | 720 |
} |
726 | 721 |
|
727 |
private static String cleanNumber(String number) {
|
|
722 |
private String cleanNumber(String number) { |
|
728 | 723 |
number = number.replaceAll("[^A-Za-z0-9:,____]", ""); |
729 | 724 |
return number; |
730 | 725 |
} |
731 | 726 |
|
732 |
private static String getLatLongField(String data, String DELIM, String ENCLOSING) {
|
|
727 |
private String getLatLongField(String data) {
|
|
733 | 728 |
|
734 |
if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM; |
|
729 |
if (data == null || data.isEmpty()) |
|
730 |
return ENCLOSING + "null" + ENCLOSING + DELIM; |
|
735 | 731 |
|
736 | 732 |
return ENCLOSING + data.replaceAll("[^-0-9.]+", "") + ENCLOSING + DELIM; |
737 | 733 |
|
738 | 734 |
} |
739 | 735 |
|
740 |
private static String getStringField(String data, String DELIM, String ENCLOSING) {
|
|
736 |
private String getStringField(String data) {
|
|
741 | 737 |
|
742 |
if (data == null || data.isEmpty()) return ENCLOSING + "null" + ENCLOSING + DELIM; |
|
738 |
if (data == null || data.isEmpty()) |
|
739 |
return ENCLOSING + "null" + ENCLOSING + DELIM; |
|
743 | 740 |
|
744 |
return ENCLOSING + clean(data, DELIM, ENCLOSING) + ENCLOSING + DELIM;
|
|
741 |
return ENCLOSING + clean(data) + ENCLOSING + DELIM; |
|
745 | 742 |
|
746 | 743 |
} |
747 | 744 |
|
748 |
private static String getStringDateField(String data, String DELIM, String ENCLOSING) {
|
|
745 |
private String getStringDateField(String data) {
|
|
749 | 746 |
if (data == null || data.isEmpty() || data.equals("") || data.equals("-1")) { |
750 | 747 |
return ENCLOSING + "0" + ENCLOSING + DELIM; |
751 | 748 |
} else { |
... | ... | |
762 | 759 |
} |
763 | 760 |
} |
764 | 761 |
|
765 |
private static String getNumericField(String data, String DELIM, String ENCLOSING) {
|
|
762 |
private String getNumericField(String data) {
|
|
766 | 763 |
if (data == null || data.isEmpty() || data.equals("")) { |
767 | 764 |
return ENCLOSING + "0" + ENCLOSING + DELIM; |
768 | 765 |
} else { |
... | ... | |
770 | 767 |
} |
771 | 768 |
} |
772 | 769 |
|
773 |
public static String getId(Oaf oaf, String DELIM, String ENCLOSING) {
|
|
770 |
public String getId(Oaf oaf) {
|
|
774 | 771 |
switch (oaf.getKind()) { |
775 | 772 |
case entity: |
776 |
return cleanId(oaf.getEntity().getId(), DELIM, ENCLOSING);
|
|
773 |
return cleanId(oaf.getEntity().getId()); |
|
777 | 774 |
case relation: |
778 |
return cleanId(oaf.getRel().getSource(), DELIM, ENCLOSING);
|
|
775 |
return cleanId(oaf.getRel().getSource()); |
|
779 | 776 |
|
780 | 777 |
} |
781 | 778 |
return null; |
782 | 779 |
|
783 | 780 |
} |
784 | 781 |
|
785 |
public static String getId(OafRel relOaf, String DELIM, String ENCLOSING) {
|
|
786 |
return cleanId(relOaf.getSource(), DELIM, ENCLOSING);
|
|
782 |
public String getId(OafRel relOaf) {
|
|
783 |
return cleanId(relOaf.getSource()); |
|
787 | 784 |
} |
788 | 785 |
|
789 |
public static String clean(String value, String DELIM, String ENCLOSING) {
|
|
786 |
public String clean(String value) {
|
|
790 | 787 |
if (value != null) { |
791 | 788 |
|
792 | 789 |
value = value.replaceAll("[\"\\r\\\\;]", ""); |
... | ... | |
801 | 798 |
//value = value.replaceAll("[^a-zA-Z0-9 .-_:/!@+=]+", " "); |
802 | 799 |
|
803 | 800 |
return value; |
804 |
|
|
805 | 801 |
} else { |
806 | 802 |
return ""; |
807 |
|
|
808 | 803 |
} |
809 | 804 |
|
810 | 805 |
} |
811 | 806 |
|
812 |
public static String cleanId(String value, String DELIM, String ENCLOSING) {
|
|
807 |
public String cleanId(String value) {
|
|
813 | 808 |
if (value != null) { |
814 | 809 |
// TODO DO NOT CHANGE THIS: IT REMOVES ID PREFIX ( 5|datacite____:: |
815 | 810 |
|
... | ... | |
829 | 824 |
} |
830 | 825 |
|
831 | 826 |
|
832 |
public static String cleanUrl(String value, String DELIM, String ENCLOSING) {
|
|
827 |
public String cleanUrl(String value) {
|
|
833 | 828 |
value = value.replace(DELIM, " "); |
834 | 829 |
value = value.replace(ENCLOSING, " "); |
835 | 830 |
value = value.replace(" ", ""); |
... | ... | |
838 | 833 |
} |
839 | 834 |
|
840 | 835 |
|
841 |
public static long DATEDIFF(String startDate, String endDate) {
|
|
836 |
public long DATEDIFF(String startDate, String endDate) { |
|
842 | 837 |
long MILLISECS_PER_DAY = 24 * 60 * 60 * 1000; |
843 | 838 |
long days = 0l; |
844 | 839 |
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); // "dd/MM/yyyy HH:mm:ss"); |
... | ... | |
861 | 856 |
|
862 | 857 |
return days; |
863 | 858 |
} |
864 |
|
|
865 |
|
|
866 | 859 |
} |
Also available in: Unified diff
- Made Serializers methods non static to improve readability and maintenability.
- Code cleanup