1
|
/**
|
2
|
*
|
3
|
*/
|
4
|
package eu.dnetlib.data.collective.transformation.engine.functions;
|
5
|
|
6
|
import java.text.ParseException;
|
7
|
import java.text.SimpleDateFormat;
|
8
|
import java.util.Arrays;
|
9
|
import java.util.Date;
|
10
|
//import java.util.HashMap;
|
11
|
import java.util.LinkedList;
|
12
|
import java.util.List;
|
13
|
//import java.util.Map;
|
14
|
|
15
|
|
16
|
|
17
|
|
18
|
import org.apache.oro.text.perl.Perl5Util;
|
19
|
|
20
|
|
21
|
/**
|
22
|
* @author jochen
|
23
|
*
|
24
|
*/
|
25
|
public class DateVocabulary extends Vocabulary{
|
26
|
|
27
|
private static final String filterFuncMin = "min()";
|
28
|
private String pattern_1 = "/^(\\d{4,4}-\\d{1,2}-\\d{1,2})/";
|
29
|
private String pattern_2 = "/^(\\d{4,4}-\\d{1,2})$/";
|
30
|
private String pattern_3 = "/^(\\d{4,4})$/";
|
31
|
private String pattern_4 = "/^(\\d{1,2}.\\d{1,2}.\\d{4,4})$/";
|
32
|
private SimpleDateFormat df;
|
33
|
|
34
|
private transient Perl5Util perl5 = new Perl5Util();
|
35
|
|
36
|
public String encoding(List<String> aKeys) throws ProcessingException{
|
37
|
String tempKey_1 = null;
|
38
|
String tempKey_2 = null;
|
39
|
String tempKey_3 = null;
|
40
|
String currentKey = null;
|
41
|
String twoDigitFormat = String.format("%%0%dd", 2);
|
42
|
|
43
|
try{
|
44
|
for (String key: aKeys){
|
45
|
key = key.trim();
|
46
|
currentKey = key;
|
47
|
if (perl5.match(pattern_1, key)){
|
48
|
String[] dateSplitted = perl5.getMatch().toString().split("-");
|
49
|
String dateNormalized = dateSplitted[0] + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[1])) + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[2]));
|
50
|
return dateNormalized;
|
51
|
}else if (perl5.match(pattern_2, key)){
|
52
|
String[] dateSplitted = key.split("-");
|
53
|
tempKey_1 = dateSplitted[0] + "-" + String.format(twoDigitFormat, Integer.parseInt(dateSplitted[1])) + "-01";
|
54
|
}else if (perl5.match(pattern_3, key)){
|
55
|
tempKey_2 = key + "-01-01";
|
56
|
}else if (perl5.match(pattern_4, key)){
|
57
|
String[] components = key.split("[\\-\\/\\.]");
|
58
|
// ignore this key if it has less than 3 components
|
59
|
if (components.length >= 3)
|
60
|
tempKey_3 = components[2] + "-" + String.format(twoDigitFormat, Integer.parseInt(components[1])) + "-" + String.format(twoDigitFormat, Integer.parseInt(components[0]));
|
61
|
}
|
62
|
}
|
63
|
}catch(Throwable e){
|
64
|
throw new ProcessingException("Exception thrown in Datevocabulary (tried to match for value '" + currentKey + "'):", e);
|
65
|
}
|
66
|
if (tempKey_1 != null){
|
67
|
return tempKey_1;
|
68
|
}else if (tempKey_2 != null){
|
69
|
return tempKey_2;
|
70
|
}else if (tempKey_3 != null){
|
71
|
return tempKey_3;
|
72
|
}else{
|
73
|
return "";
|
74
|
}
|
75
|
}
|
76
|
|
77
|
@Override
|
78
|
public List<String> encoding(List<String> aKeys, String aDefaultPattern,
|
79
|
String aFilterFunction) throws ProcessingException {
|
80
|
List<String> evList = new LinkedList<String>();
|
81
|
df = new SimpleDateFormat(aDefaultPattern);
|
82
|
for (String v: aKeys){
|
83
|
String ev = encoding(Arrays.asList(new String[]{v}));
|
84
|
if (ev.length() > 0){
|
85
|
try {
|
86
|
if (aFilterFunction.trim().length() > 0 && !evList.isEmpty())
|
87
|
evList.add( filter(df.parse(ev), df.parse(evList.remove(0)), aFilterFunction) );
|
88
|
else
|
89
|
evList.add(df.format(df.parse(ev)));
|
90
|
} catch (ParseException e) {
|
91
|
throw new ProcessingException("invalid date format: " + ev);
|
92
|
}
|
93
|
}
|
94
|
}
|
95
|
return evList;
|
96
|
}
|
97
|
|
98
|
private String filter(Date d1, Date d2, String filter) throws ProcessingException{
|
99
|
if (filter.equals(filterFuncMin))
|
100
|
if (d1.before(d2))
|
101
|
return df.format(d1);
|
102
|
else
|
103
|
return df.format(d2);
|
104
|
else
|
105
|
throw new ProcessingException("unsupported filter function: " + filter);
|
106
|
}
|
107
|
|
108
|
}
|