1 |
57029
|
argiro.kok
|
package eu.dnetlib.data.claims.utils;
|
2 |
40946
|
argiro.kok
|
|
3 |
57029
|
argiro.kok
|
import eu.dnetlib.data.claims.entity.*;
|
4 |
42291
|
katerina.i
|
import org.apache.log4j.Logger;
|
5 |
41003
|
argiro.kok
|
|
6 |
40946
|
argiro.kok
|
import java.io.BufferedWriter;
|
7 |
|
|
import java.io.FileWriter;
|
8 |
|
|
import java.io.PrintWriter;
|
9 |
|
|
|
10 |
|
|
/**
|
11 |
|
|
* Created by argirok on 21/1/2016.
|
12 |
|
|
*/
|
13 |
|
|
public class ClaimValidation {
|
14 |
|
|
private PrintWriter out;
|
15 |
47008
|
argiro.kok
|
String pathToSaveReport = null;
|
16 |
40946
|
argiro.kok
|
|
17 |
47297
|
argiro.kok
|
private static Logger logger = Logger.getLogger(ClaimValidation.class);
|
18 |
40946
|
argiro.kok
|
|
19 |
|
|
/*
|
20 |
|
|
Claim:
|
21 |
|
|
Mandatory Fields : id, sourceType,targetType, User e-mail, Date, Target, Source
|
22 |
|
|
|
23 |
|
|
*/
|
24 |
|
|
public boolean validateClaim(Claim claim){
|
25 |
|
|
boolean isValid=true;
|
26 |
45974
|
argiro.kok
|
try {
|
27 |
47297
|
argiro.kok
|
out = new PrintWriter(new BufferedWriter(new FileWriter(pathToSaveReport+"/validationFile.txt", true)));
|
28 |
47215
|
katerina.i
|
|
29 |
45974
|
argiro.kok
|
} catch (Exception e) {
|
30 |
|
|
out = null;
|
31 |
|
|
logger.error("Couldn't write to file " + "validationFile.txt", e);
|
32 |
|
|
}
|
33 |
|
|
|
34 |
|
|
if(claim==null){
|
35 |
40946
|
argiro.kok
|
isValid=false;
|
36 |
45974
|
argiro.kok
|
if(out!=null) {
|
37 |
|
|
out.println("Claim invalid (null)...");
|
38 |
|
|
}
|
39 |
42291
|
katerina.i
|
logger.warn("Claim invalid (null)...");
|
40 |
41350
|
argiro.kok
|
// }else if(claim.getId()==null||claim.getId().isEmpty()){
|
41 |
|
|
// isValid=false;
|
42 |
|
|
// out.println("Claim invalid (id)..." );
|
43 |
40946
|
argiro.kok
|
}else if(claim.getTargetType()==null||claim.getTargetType().isEmpty()){
|
44 |
|
|
isValid=false;
|
45 |
45974
|
argiro.kok
|
if(out!=null) {
|
46 |
|
|
out.println("Claim invalid (target type): " + claim.getId());
|
47 |
|
|
}
|
48 |
42291
|
katerina.i
|
logger.warn("Claim invalid (target type): "+claim.getId());
|
49 |
|
|
|
50 |
40946
|
argiro.kok
|
}else if(claim.getSourceType()==null||claim.getSourceType().isEmpty()){
|
51 |
|
|
isValid=false;
|
52 |
45974
|
argiro.kok
|
if(out!=null) {
|
53 |
|
|
out.println("Claim invalid (source type): " + claim.getId());
|
54 |
|
|
}
|
55 |
42291
|
katerina.i
|
logger.warn("Claim invalid (source type): "+claim.getId());
|
56 |
|
|
|
57 |
40946
|
argiro.kok
|
}else if(claim.getDate()==null){
|
58 |
|
|
isValid=false;
|
59 |
45974
|
argiro.kok
|
if(out!=null) {
|
60 |
|
|
out.println("Claim invalid (date): " + claim.getId());
|
61 |
|
|
}
|
62 |
42291
|
katerina.i
|
logger.warn("Claim invalid (date): "+claim.getId());
|
63 |
|
|
|
64 |
40946
|
argiro.kok
|
}else if(claim.getUserMail()==null||claim.getUserMail().isEmpty()){
|
65 |
|
|
isValid=false;
|
66 |
45974
|
argiro.kok
|
if(out!=null) {
|
67 |
|
|
out.println("Claim invalid (mail): " + claim.getId());
|
68 |
|
|
}
|
69 |
42291
|
katerina.i
|
logger.warn("Claim invalid (mail): "+claim.getId());
|
70 |
|
|
|
71 |
40946
|
argiro.kok
|
}else if(claim.getSource()==null || claim.getTarget()==null){
|
72 |
|
|
isValid=false;
|
73 |
45974
|
argiro.kok
|
if(out!=null) {
|
74 |
|
|
out.println("Result invalid : " + claim.getId());
|
75 |
|
|
}
|
76 |
42291
|
katerina.i
|
logger.warn("Result invalid : "+claim.getId());
|
77 |
|
|
|
78 |
41621
|
argiro.kok
|
}else if(claim.getSource()==null || claim.getTarget()==null){
|
79 |
|
|
isValid=false;
|
80 |
45974
|
argiro.kok
|
if(out!=null) {
|
81 |
|
|
out.println("Result invalid : " + claim.getId());
|
82 |
|
|
}
|
83 |
42291
|
katerina.i
|
logger.warn("Result invalid : "+claim.getId());
|
84 |
|
|
|
85 |
41621
|
argiro.kok
|
}else if(claim.getSource().getOpenaireId()!=null && claim.getTarget().getOpenaireId()!=null && claim.getSource().getOpenaireId().equals(claim.getTarget().getOpenaireId())){
|
86 |
|
|
isValid=false;
|
87 |
45974
|
argiro.kok
|
if(out!=null) {
|
88 |
|
|
out.println("Claim invalid (source == target): " + claim.getId());
|
89 |
|
|
}
|
90 |
42291
|
katerina.i
|
logger.warn("Claim invalid (source == target): "+claim.getId());
|
91 |
|
|
|
92 |
40946
|
argiro.kok
|
}else{
|
93 |
|
|
boolean sourceIsValid=validateByType(claim.getSource(),claim.getSourceType(),claim.getId());
|
94 |
|
|
boolean targetIsValid=validateByType(claim.getTarget(),claim.getTargetType(),claim.getId());
|
95 |
|
|
isValid= isValid&&sourceIsValid&&targetIsValid;
|
96 |
41003
|
argiro.kok
|
if(!isValid){
|
97 |
45974
|
argiro.kok
|
if(out!=null) {
|
98 |
|
|
out.println("Claim invalid (source /target): " + claim.getId() + "\n");
|
99 |
|
|
}
|
100 |
42291
|
katerina.i
|
logger.warn("Claim invalid (source /target): "+claim.getId()+"\n");
|
101 |
41003
|
argiro.kok
|
}
|
102 |
40946
|
argiro.kok
|
}
|
103 |
45974
|
argiro.kok
|
if(out!=null) {
|
104 |
|
|
out.close();
|
105 |
|
|
}
|
106 |
|
|
|
107 |
40946
|
argiro.kok
|
return isValid;
|
108 |
|
|
}
|
109 |
|
|
/*
|
110 |
|
|
Project:
|
111 |
|
|
Mandatory Fields : id, type, Collected from
|
112 |
|
|
|
113 |
|
|
*/
|
114 |
|
|
public boolean validateResult(Result result, String claimId){
|
115 |
|
|
boolean isValid=true;
|
116 |
|
|
if(result==null){
|
117 |
|
|
isValid=false;
|
118 |
41003
|
argiro.kok
|
out.println("Result invalid (null): - claimId:"+claimId);
|
119 |
41350
|
argiro.kok
|
// }else if(result.getOpenaireId()==null||result.getOpenaireId().isEmpty()){
|
120 |
|
|
// isValid=false;
|
121 |
|
|
// out.println("Result invalid (Id): - claimId:"+claimId);
|
122 |
40946
|
argiro.kok
|
}else if(result.getResultType()==null||result.getResultType().isEmpty()){
|
123 |
|
|
isValid=false;
|
124 |
41003
|
argiro.kok
|
out.println("Result invalid (Type): "+result.getOpenaireId()+"- claimId:"+claimId);
|
125 |
40946
|
argiro.kok
|
}/*
|
126 |
|
|
//http://services.openaire.eu:8380/search/search?action=search&sTransformer=results_openaire&query=%28%28%28deletedbyinference+%3D+false%29+AND+%28oaftype+exact+result%29%29+and+%28%28resulttypeid+exact+publication%29+or+%28resulttypeid+exact+dataset%29%29%29+and+%28%28objIdentifier+
|
127 |
|
|
//exact+od______1389%3A%3A6ba133c8460d5a67dfb18f34cc5303fb%29+or+%28resultdupid+exact+od______1389%3A%3A6ba133c8460d5a67dfb18f34cc5303fb%29%29&size=10&locale=en_GB
|
128 |
|
|
//There are resuts with no title
|
129 |
|
|
//so I am
|
130 |
|
|
else if(result.getTitle()==null||result.getTitle().isEmpty()){
|
131 |
|
|
isValid=false;
|
132 |
41003
|
argiro.kok
|
out.println("Result invalid (title): "+result.getOpenaireId()+"- claimId:"+claimId);
|
133 |
40946
|
argiro.kok
|
}*/else if(result.getCollectedFrom()==null||result.getCollectedFrom().isEmpty()){
|
134 |
|
|
isValid=false;
|
135 |
41003
|
argiro.kok
|
out.println("Result invalid (collectedFrom) : "+result.getOpenaireId()+"- claimId:"+claimId);
|
136 |
41350
|
argiro.kok
|
}else if (result.getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_OPENAIRE) && result.getOpenaireId()==null) {
|
137 |
|
|
isValid = false;
|
138 |
|
|
out.println("Result invalid (Openaire - no id): - claimId:" + claimId);
|
139 |
|
|
}else if ((result.getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_DATACITE)|| result.getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_CROSSREF) ) && result.getDoi()==null) {
|
140 |
|
|
isValid = false;
|
141 |
|
|
out.println("Result invalid (Crossref/Datacite - no doi): - claimId:" + claimId);
|
142 |
|
|
}else if (result.getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_ORCID) && result.getOrcidworkid()==null) {
|
143 |
|
|
isValid = false;
|
144 |
|
|
out.println("Result invalid (Orcid - no work id): - claimId:" + claimId);
|
145 |
40946
|
argiro.kok
|
}
|
146 |
41350
|
argiro.kok
|
|
147 |
40946
|
argiro.kok
|
return isValid;
|
148 |
|
|
}
|
149 |
|
|
/*
|
150 |
|
|
Project:
|
151 |
|
|
Mandatory Fields : id, name
|
152 |
|
|
|
153 |
|
|
*/
|
154 |
|
|
public boolean validateProject(Project project, String claimId){
|
155 |
|
|
boolean isValid=true;
|
156 |
41003
|
argiro.kok
|
if(project==null){
|
157 |
40946
|
argiro.kok
|
isValid=false;
|
158 |
41003
|
argiro.kok
|
out.println("Project invalid (null): - claimId:"+claimId);
|
159 |
40946
|
argiro.kok
|
}else if(project.getOpenaireId()==null||project.getOpenaireId().isEmpty()){
|
160 |
|
|
isValid=false;
|
161 |
41003
|
argiro.kok
|
out.println("Project invalid (id): - claimId:"+claimId);
|
162 |
41157
|
argiro.kok
|
}else if((project.getName()==null||project.getName().isEmpty())&&(project.getAcronym()==null||project.getAcronym().isEmpty())){
|
163 |
40946
|
argiro.kok
|
isValid=false;
|
164 |
41157
|
argiro.kok
|
out.println("Project invalid (name/acronym): "+project.getOpenaireId()+"- claimId:"+claimId);
|
165 |
40946
|
argiro.kok
|
}else if(project.getFunderId()==null||project.getFunderId().isEmpty()){
|
166 |
|
|
isValid=false;
|
167 |
41003
|
argiro.kok
|
out.println("Project invalid (funder id): "+project.getOpenaireId()+"- claimId:"+claimId);
|
168 |
40946
|
argiro.kok
|
}
|
169 |
|
|
else if(project.getFunderName()==null||project.getFunderName().isEmpty()){
|
170 |
|
|
isValid=false;
|
171 |
41003
|
argiro.kok
|
out.println("Project invalid (funder name): "+project.getOpenaireId()+"- claimId:"+claimId);
|
172 |
40946
|
argiro.kok
|
}
|
173 |
41003
|
argiro.kok
|
|
174 |
40946
|
argiro.kok
|
return isValid;
|
175 |
|
|
}
|
176 |
|
|
/*
|
177 |
|
|
Context:
|
178 |
|
|
Mandatory Fields : id, title
|
179 |
|
|
|
180 |
|
|
*/
|
181 |
|
|
public boolean validateContext(Context context, String claimId){
|
182 |
|
|
boolean isValid=true;
|
183 |
|
|
if(context==null){
|
184 |
|
|
isValid=false;
|
185 |
41003
|
argiro.kok
|
out.println("Context invalid (null): - claimId:"+claimId);
|
186 |
40946
|
argiro.kok
|
}else if(context.getOpenaireId()==null||context.getOpenaireId().isEmpty()){
|
187 |
|
|
isValid=false;
|
188 |
41003
|
argiro.kok
|
out.println("Context invalid (id): - claimId:"+claimId);
|
189 |
40946
|
argiro.kok
|
}/*
|
190 |
|
|
//This is commented since There isn't a way to fill this field right now.
|
191 |
|
|
else if(context.getTitle()==null||context.getTitle().isEmpty()){
|
192 |
|
|
isValid=false;
|
193 |
|
|
out.println("Context invalid : "+context.getOpenaireId()+"- claimId:"+claimId);
|
194 |
|
|
}*/
|
195 |
|
|
return isValid;
|
196 |
|
|
}
|
197 |
57029
|
argiro.kok
|
private boolean validateByType(OpenaireEntity entity, String type , String claimId ){
|
198 |
40946
|
argiro.kok
|
//TODO add more types (e.g. patends) when they are available
|
199 |
52857
|
argiro.kok
|
if(type.equals(ClaimUtils.PUBLICATION)||type.equals(ClaimUtils.DATASET)||type.equals(ClaimUtils.SOFTWARE) ||type.equals(ClaimUtils.OTHER)){
|
200 |
41003
|
argiro.kok
|
return validateResult((Result)entity,claimId);
|
201 |
|
|
} else if(type.equals(ClaimUtils.CONTEXT)){
|
202 |
|
|
return validateContext((Context)entity,claimId);
|
203 |
|
|
}else if(type.equals(ClaimUtils.PROJECT)){
|
204 |
|
|
return validateProject((Project)entity,claimId);
|
205 |
|
|
}else{
|
206 |
|
|
if(entity.getOpenaireId()!=null){
|
207 |
|
|
out.println("Entity invalid type ("+type+"):"+entity.getOpenaireId()+" - claimId:"+claimId);
|
208 |
|
|
}else {
|
209 |
|
|
out.println("Entity invalid type (" + type + "): - claimId:" + claimId);
|
210 |
|
|
}
|
211 |
40946
|
argiro.kok
|
}
|
212 |
|
|
return false;
|
213 |
|
|
}
|
214 |
41166
|
argiro.kok
|
|
215 |
42291
|
katerina.i
|
public static boolean validateCollectedFrom(String collectedFrom) throws ClaimValidationException {
|
216 |
|
|
//TODO add more collected from when they are available
|
217 |
|
|
if (collectedFrom==null || collectedFrom.isEmpty()) {
|
218 |
47297
|
argiro.kok
|
logger.error("CollectedFrom field of source and/or target is null or empty");
|
219 |
42291
|
katerina.i
|
throw new ClaimValidationException("CollectedFrom field of source and/or target cannot be empty.");
|
220 |
|
|
|
221 |
|
|
} else if (!collectedFrom.equals(ClaimUtils.OPENAIRE) &&
|
222 |
|
|
!collectedFrom.equals(ClaimUtils.DATACITE) &&
|
223 |
|
|
!collectedFrom.equals(ClaimUtils.CROSSREF) &&
|
224 |
|
|
!collectedFrom.equals(ClaimUtils.ORCID)) {
|
225 |
47297
|
argiro.kok
|
logger.error("CollectedFrom field of source and/or target is invalid. Value is:"+collectedFrom);
|
226 |
42291
|
katerina.i
|
throw new ClaimValidationException("CollectedFrom field of source and/or target is invalid. The allowed " +
|
227 |
|
|
"types are: " + ClaimUtils.OPENAIRE + ", " + ClaimUtils.DATACITE
|
228 |
|
|
+ ", " + ClaimUtils.CROSSREF + ", " + ClaimUtils.ORCID +".");
|
229 |
|
|
|
230 |
47297
|
argiro.kok
|
|
231 |
42291
|
katerina.i
|
}
|
232 |
|
|
|
233 |
|
|
return true;
|
234 |
|
|
}
|
235 |
|
|
|
236 |
|
|
public static boolean validateType(String type) throws ClaimValidationException {
|
237 |
|
|
if (type == null || type.isEmpty()) {
|
238 |
|
|
throw new ClaimValidationException("The type of source and/or target cannot be empty.");
|
239 |
|
|
|
240 |
|
|
} else if (!type.equals(ClaimUtils.PUBLICATION) && !type.equals(ClaimUtils.DATASET) &&
|
241 |
|
|
!type.equals(ClaimUtils.CONTEXT) && !type.equals(ClaimUtils.PROJECT)) {
|
242 |
|
|
throw new ClaimValidationException("The given type is invalid. The allowed types are: " +
|
243 |
|
|
ClaimUtils.PUBLICATION + ", " + ClaimUtils.DATASET + ", " + ClaimUtils.CONTEXT + ", " +
|
244 |
|
|
ClaimUtils.PROJECT);
|
245 |
|
|
}
|
246 |
|
|
|
247 |
|
|
return true;
|
248 |
|
|
}
|
249 |
|
|
|
250 |
41444
|
argiro.kok
|
// /*
|
251 |
|
|
// Checks if the doi is valid. If it is not then replaces the existing DOI with the valid one.
|
252 |
|
|
// */
|
253 |
|
|
// public static void checkForDOI(Result r) throws Exception {
|
254 |
47219
|
argiro.kok
|
// if(r.getDOI()==null){
|
255 |
41444
|
argiro.kok
|
// return ;
|
256 |
|
|
// }
|
257 |
47219
|
argiro.kok
|
// String validDOI=isValidDoi(r.getDOI());
|
258 |
|
|
// if(!validDOI.equals(r.getDOI())){
|
259 |
41444
|
argiro.kok
|
// if(r.getMetadataRecord()!=null) {
|
260 |
47219
|
argiro.kok
|
// r.setMetadataRecord(r.getMetadataRecord().replace(r.getDOI(), validDOI));
|
261 |
41444
|
argiro.kok
|
// }
|
262 |
|
|
// if(r.getExternalUrl()!=null){
|
263 |
47219
|
argiro.kok
|
// r.setExternalUrl(r.getExternalUrl().replace(r.getDOI(), validDOI));
|
264 |
41444
|
argiro.kok
|
// }
|
265 |
|
|
// r.setDoi(validDOI);
|
266 |
|
|
// }
|
267 |
|
|
// }
|
268 |
|
|
// public static String isValidDoi(String doi) throws Exception {
|
269 |
|
|
// if(doi==null){
|
270 |
|
|
// return null;
|
271 |
|
|
// }
|
272 |
|
|
// try{
|
273 |
|
|
// PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("invalid_dois.txt", true)));
|
274 |
|
|
// boolean valid=SearchUtils.isDoiValid(doi);
|
275 |
|
|
// if(!valid&&(doi.length() - (doi.replace("_", "")).length() > 1)){
|
276 |
|
|
// String transformDoi = doi.replace("_", "-");
|
277 |
|
|
// valid=SearchUtils.isDoiValid(transformDoi);
|
278 |
|
|
// if(valid){
|
279 |
|
|
// Date date= new java.util.Date();
|
280 |
|
|
// out.println(new Timestamp(date.getTime())+" - Invalid doi: "+doi+" replaced with : "+transformDoi);
|
281 |
|
|
// doi=transformDoi;
|
282 |
|
|
// }
|
283 |
|
|
// }else if(!valid&&(doi.length() - (doi.replace("_", "")).length() == 1)) {
|
284 |
|
|
// String transformDoi = doi.replace("_", "-");
|
285 |
|
|
// valid=SearchUtils.isDoiValid(transformDoi);
|
286 |
|
|
// if(valid){
|
287 |
|
|
// Date date= new java.util.Date();
|
288 |
|
|
// out.println(new Timestamp(date.getTime())+" - Invalid doi: "+doi+" replaced with : "+transformDoi);
|
289 |
|
|
// doi = transformDoi;
|
290 |
|
|
// }
|
291 |
|
|
// }else if(!valid) {
|
292 |
|
|
// String transformDoi =doi.replaceAll("\\p{C}", "");
|
293 |
|
|
// valid=SearchUtils.isDoiValid(transformDoi);
|
294 |
|
|
// if(valid){
|
295 |
|
|
// Date date= new java.util.Date();
|
296 |
|
|
// out.println(new Timestamp(date.getTime())+" - Invalid doi: "+doi+" replaced with (zero-width-char): "+transformDoi);
|
297 |
|
|
// doi = transformDoi;
|
298 |
|
|
// }
|
299 |
|
|
// }
|
300 |
|
|
// out.close();
|
301 |
|
|
// }catch (IOException e) {
|
302 |
|
|
// e.printStackTrace();
|
303 |
|
|
// System.err.println("Couldn't write to file " + "invalid_dois.txt");
|
304 |
|
|
// }
|
305 |
|
|
// return doi;
|
306 |
|
|
// }
|
307 |
47008
|
argiro.kok
|
|
308 |
|
|
|
309 |
|
|
public String getPathToSaveReport() {
|
310 |
|
|
return pathToSaveReport;
|
311 |
|
|
}
|
312 |
|
|
|
313 |
|
|
public void setPathToSaveReport(String pathToSaveReport) {
|
314 |
|
|
this.pathToSaveReport = pathToSaveReport;
|
315 |
|
|
}
|
316 |
40946
|
argiro.kok
|
}
|