1
|
package eu.dnetlib.data.mdstore.plugins.objects;
|
2
|
|
3
|
import java.io.StringReader;
|
4
|
import java.net.URI;
|
5
|
import java.util.concurrent.TimeUnit;
|
6
|
import java.util.regex.Matcher;
|
7
|
import java.util.regex.Pattern;
|
8
|
|
9
|
import javax.xml.bind.annotation.XmlAccessType;
|
10
|
import javax.xml.bind.annotation.XmlAccessorType;
|
11
|
import javax.xml.bind.annotation.XmlElement;
|
12
|
|
13
|
import org.apache.commons.lang3.StringUtils;
|
14
|
import org.apache.commons.logging.Log;
|
15
|
import org.apache.commons.logging.LogFactory;
|
16
|
import org.apache.http.impl.client.HttpClientBuilder;
|
17
|
import org.dom4j.Document;
|
18
|
import org.dom4j.DocumentException;
|
19
|
import org.dom4j.io.SAXReader;
|
20
|
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
|
21
|
import org.springframework.web.client.RestTemplate;
|
22
|
|
23
|
@XmlAccessorType(XmlAccessType.FIELD)
|
24
|
public class Project {
|
25
|
|
26
|
@XmlElement(name = "infoId")
|
27
|
private String infoId;
|
28
|
@XmlElement(name = "openaireId")
|
29
|
private String openaireId;
|
30
|
@XmlElement(name = "code")
|
31
|
private String code;
|
32
|
@XmlElement(name = "name")
|
33
|
private String name;
|
34
|
@XmlElement(name = "acronym")
|
35
|
private String acronym;
|
36
|
@XmlElement(name = "funder")
|
37
|
private String funder;
|
38
|
@XmlElement(name = "program")
|
39
|
private String program;
|
40
|
@XmlElement(name = "jurisdiction")
|
41
|
private String jurisdiction;
|
42
|
|
43
|
private static final Log log = LogFactory.getLog(Project.class);
|
44
|
|
45
|
private static final int MAX_NUMBER_OF_ATTEMPTS = 10;
|
46
|
private static final int INTERVAL_MILLIS = 20000;
|
47
|
private static final String PROJECT_REGEX = "info:eu-repo\\/grantAgreement\\/(.*)\\/(.*)\\/(.*)\\/(.*)\\/(.*)\\/(.*)";
|
48
|
|
49
|
private static final HttpComponentsClientHttpRequestFactory httpRequestFactory = new HttpComponentsClientHttpRequestFactory(HttpClientBuilder
|
50
|
.create()
|
51
|
.setConnectionTimeToLive(0, TimeUnit.MILLISECONDS)
|
52
|
.setMaxConnPerRoute(1)
|
53
|
.setMaxConnTotal(1)
|
54
|
.disableAutomaticRetries()
|
55
|
.disableConnectionState()
|
56
|
.build());
|
57
|
|
58
|
public static Project newInstance(final URI url) {
|
59
|
|
60
|
try {
|
61
|
final SAXReader reader = new SAXReader();
|
62
|
final Document doc = reader.read(new StringReader(fetchUrl(url, MAX_NUMBER_OF_ATTEMPTS)));
|
63
|
|
64
|
final String openaireId = doc.valueOf("//*[local-name()='objIdentifier']");
|
65
|
|
66
|
if (StringUtils.isBlank(openaireId)) {
|
67
|
log.warn("Project not found using OpenAIRE API, url: " + url);
|
68
|
return null;
|
69
|
}
|
70
|
|
71
|
final String code = doc.valueOf("//code");
|
72
|
final String name = doc.valueOf("//title");
|
73
|
final String acronym = doc.valueOf("//acronym");
|
74
|
final String funder = doc.valueOf("//funder/shortname");
|
75
|
final String program = doc.valueOf("//funding_level_0/name");
|
76
|
final String jurisdiction = doc.valueOf("//funder/jurisdiction");
|
77
|
|
78
|
return new Project(openaireId, code, name, acronym, funder, program, jurisdiction);
|
79
|
} catch (final DocumentException e) {
|
80
|
log.error("Error parsing document from url " + url);
|
81
|
throw new RuntimeException(e);
|
82
|
}
|
83
|
|
84
|
}
|
85
|
|
86
|
public static Project newInstance(final String infoId) {
|
87
|
final Pattern pattern = Pattern.compile(PROJECT_REGEX);
|
88
|
final Matcher matcher = pattern.matcher(infoId);
|
89
|
if (matcher.find()) {
|
90
|
final String openaireId = "";
|
91
|
final String funder = matcher.group(1);
|
92
|
final String program = matcher.group(2);
|
93
|
final String code = matcher.group(3);
|
94
|
final String jurisdiction = matcher.group(4);
|
95
|
final String name = StringUtils.defaultIfBlank(matcher.group(5), funder + "/" + program + "/" + code);
|
96
|
final String acronym = StringUtils.defaultIfBlank(matcher.group(6), name);
|
97
|
|
98
|
if (StringUtils.isNotEmpty(code) && StringUtils.isNotEmpty(program)
|
99
|
&& StringUtils.isNotEmpty(funder)) { return new Project(openaireId, code, name, acronym, funder, program, jurisdiction); }
|
100
|
}
|
101
|
|
102
|
log.warn("Invalid project ID: " + infoId);
|
103
|
return null;
|
104
|
|
105
|
}
|
106
|
|
107
|
public static boolean isValid(final String infoId) {
|
108
|
return Project.newInstance(infoId) != null;
|
109
|
}
|
110
|
|
111
|
private static String fetchUrl(final URI url, final int attempts) {
|
112
|
if (attempts == 0) { throw new RuntimeException("Max number of attempts reached, downloading url: " + url); }
|
113
|
|
114
|
/*
|
115
|
* try (CloseableHttpClient client = HttpClientBuilder.create().build()) { log.debug("Invoking url: " + url); final HttpGet request
|
116
|
* = new HttpGet(url); request.setHeader("Connection", "close"); request.setProtocolVersion(HttpVersion.HTTP_1_0); try (final
|
117
|
* CloseableHttpResponse response = client.execute(request)) { return EntityUtils.toString(response.getEntity()); }
|
118
|
*/
|
119
|
|
120
|
try {
|
121
|
final RestTemplate restTemplate = new RestTemplate();
|
122
|
restTemplate.setRequestFactory(httpRequestFactory);
|
123
|
return restTemplate.getForObject(url, String.class);
|
124
|
} catch (
|
125
|
|
126
|
final Exception e) {
|
127
|
try {
|
128
|
log.error("Error downloading url: " + url + " - " + e.getMessage());
|
129
|
Thread.sleep(INTERVAL_MILLIS);
|
130
|
return fetchUrl(url, attempts - 1);
|
131
|
} catch (final InterruptedException e1) {
|
132
|
throw new RuntimeException(e1);
|
133
|
}
|
134
|
}
|
135
|
}
|
136
|
|
137
|
public Project() {}
|
138
|
|
139
|
public Project(final String openaireId, final String code, final String name, final String acronym, final String funder, final String program,
|
140
|
final String jurisdiction) {
|
141
|
this.openaireId = openaireId;
|
142
|
this.code = code;
|
143
|
this.name = name;
|
144
|
this.acronym = acronym;
|
145
|
this.funder = funder;
|
146
|
this.program = program;
|
147
|
this.jurisdiction = jurisdiction;
|
148
|
}
|
149
|
|
150
|
public String getOpenaireId() {
|
151
|
return openaireId;
|
152
|
}
|
153
|
|
154
|
public String getCode() {
|
155
|
return code;
|
156
|
}
|
157
|
|
158
|
public String getName() {
|
159
|
return name;
|
160
|
}
|
161
|
|
162
|
public String getAcronym() {
|
163
|
return acronym;
|
164
|
}
|
165
|
|
166
|
public String getFunder() {
|
167
|
return funder;
|
168
|
}
|
169
|
|
170
|
public String getProgram() {
|
171
|
return program;
|
172
|
}
|
173
|
|
174
|
public String getJurisdiction() {
|
175
|
return jurisdiction;
|
176
|
}
|
177
|
|
178
|
public boolean match(final String infoId) {
|
179
|
final Project p = Project.newInstance(infoId);
|
180
|
return ((p != null) && funder.equals(p.funder) && program.equals(p.program) && code.equals(p.code));
|
181
|
}
|
182
|
|
183
|
@Override
|
184
|
public String toString() {
|
185
|
return "Project [openaireId=" + openaireId + ", code=" + code + ", name=" + name + ", acronym=" + acronym + ", funder=" + funder + ", program="
|
186
|
+ program + ", jurisdiction=" + jurisdiction + "]";
|
187
|
}
|
188
|
|
189
|
@Override
|
190
|
public int hashCode() {
|
191
|
final int prime = 31;
|
192
|
int result = 1;
|
193
|
result = (prime * result) + ((code == null) ? 0 : code.hashCode());
|
194
|
result = (prime * result) + ((funder == null) ? 0 : funder.hashCode());
|
195
|
result = (prime * result) + ((program == null) ? 0 : program.hashCode());
|
196
|
return result;
|
197
|
}
|
198
|
|
199
|
@Override
|
200
|
public boolean equals(final Object obj) {
|
201
|
if (this == obj) { return true; }
|
202
|
if (obj == null) { return false; }
|
203
|
if (getClass() != obj.getClass()) { return false; }
|
204
|
final Project other = (Project) obj;
|
205
|
if (code == null) {
|
206
|
if (other.code != null) { return false; }
|
207
|
} else if (!code.equals(other.code)) { return false; }
|
208
|
if (funder == null) {
|
209
|
if (other.funder != null) { return false; }
|
210
|
} else if (!funder.equals(other.funder)) { return false; }
|
211
|
if (program == null) {
|
212
|
if (other.program != null) { return false; }
|
213
|
} else if (!program.equals(other.program)) { return false; }
|
214
|
return true;
|
215
|
}
|
216
|
|
217
|
public String getInfoId() {
|
218
|
return infoId;
|
219
|
}
|
220
|
|
221
|
public void setInfoId(final String infoId) {
|
222
|
this.infoId = infoId;
|
223
|
}
|
224
|
|
225
|
public void setOpenaireId(final String openaireId) {
|
226
|
this.openaireId = openaireId;
|
227
|
}
|
228
|
|
229
|
public void setCode(final String code) {
|
230
|
this.code = code;
|
231
|
}
|
232
|
|
233
|
public void setName(final String name) {
|
234
|
this.name = name;
|
235
|
}
|
236
|
|
237
|
public void setAcronym(final String acronym) {
|
238
|
this.acronym = acronym;
|
239
|
}
|
240
|
|
241
|
public void setFunder(final String funder) {
|
242
|
this.funder = funder;
|
243
|
}
|
244
|
|
245
|
public void setProgram(final String program) {
|
246
|
this.program = program;
|
247
|
}
|
248
|
|
249
|
public void setJurisdiction(final String jurisdiction) {
|
250
|
this.jurisdiction = jurisdiction;
|
251
|
}
|
252
|
}
|