1 |
41200
|
katerina.i
|
package eu.dnetlib.data.claims.migration;
|
2 |
|
|
|
3 |
41791
|
argiro.kok
|
import eu.dnetlib.data.claims.migration.entity.*;
|
4 |
41213
|
katerina.i
|
import eu.dnetlib.data.claims.migration.handler.*;
|
5 |
41200
|
katerina.i
|
import eu.dnetlib.data.claimsDemo.ClaimUtils;
|
6 |
47219
|
argiro.kok
|
import eu.dnetlib.data.claimsDemo.SQLStoreException;
|
7 |
49865
|
argiro.kok
|
import org.apache.log4j.BasicConfigurator;
|
8 |
|
|
import org.apache.log4j.Level;
|
9 |
47219
|
argiro.kok
|
import org.apache.log4j.Logger;
|
10 |
41200
|
katerina.i
|
import org.springframework.context.ApplicationContext;
|
11 |
|
|
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
12 |
47008
|
argiro.kok
|
import org.springframework.context.support.PropertySourcesPlaceholderConfigurer;
|
13 |
41200
|
katerina.i
|
|
14 |
41350
|
argiro.kok
|
import java.io.*;
|
15 |
46977
|
argiro.kok
|
import java.math.BigInteger;
|
16 |
|
|
import java.security.MessageDigest;
|
17 |
41254
|
argiro.kok
|
import java.sql.Timestamp;
|
18 |
|
|
import java.util.ArrayList;
|
19 |
|
|
import java.util.Date;
|
20 |
41200
|
katerina.i
|
import java.util.List;
|
21 |
|
|
|
22 |
|
|
/**
|
23 |
|
|
* Created by kiatrop on 4/2/2016.
|
24 |
|
|
*/
|
25 |
|
|
|
26 |
|
|
public class Migration {
|
27 |
47219
|
argiro.kok
|
private static final Logger logger = Logger.getLogger(Migration.class);
|
28 |
41200
|
katerina.i
|
|
29 |
41350
|
argiro.kok
|
RelationHandler relationHandler = null;
|
30 |
|
|
ContextRelationHandler contextRelationHandler = null;
|
31 |
|
|
ProjectHandler projectHandler = null;
|
32 |
|
|
DMFContextHandler dmfContextHandler = null;
|
33 |
|
|
DMFResultHandler dmfResultHandler = null;
|
34 |
|
|
IndexResultHandler indexResultHandler = null;
|
35 |
|
|
ExternalRecordHandler externalRecordHandler = null;
|
36 |
|
|
ResultHandler resultHandler = null;
|
37 |
41200
|
katerina.i
|
|
38 |
41350
|
argiro.kok
|
ClaimHandler claimHandler = null;
|
39 |
|
|
ClaimValidation claimValidation = new ClaimValidation();
|
40 |
41254
|
argiro.kok
|
|
41 |
|
|
|
42 |
41350
|
argiro.kok
|
public Migration() {
|
43 |
41213
|
katerina.i
|
ApplicationContext context = new ClassPathXmlApplicationContext("eu/dnetlib/data/claims/migration/springContext-claimsDemo.xml");
|
44 |
41350
|
argiro.kok
|
relationHandler = context.getBean(RelationHandler.class);
|
45 |
|
|
contextRelationHandler = context.getBean(ContextRelationHandler.class);
|
46 |
|
|
projectHandler = context.getBean(ProjectHandler.class);
|
47 |
|
|
dmfContextHandler = context.getBean(DMFContextHandler.class);
|
48 |
|
|
dmfResultHandler = context.getBean(DMFResultHandler.class);
|
49 |
|
|
externalRecordHandler = context.getBean(ExternalRecordHandler.class);
|
50 |
|
|
indexResultHandler = context.getBean(IndexResultHandler.class);
|
51 |
|
|
resultHandler = context.getBean(ResultHandler.class);
|
52 |
|
|
claimHandler = context.getBean(ClaimHandler.class);
|
53 |
47039
|
argiro.kok
|
claimValidation = context.getBean(ClaimValidation.class);
|
54 |
41200
|
katerina.i
|
|
55 |
|
|
}
|
56 |
|
|
|
57 |
|
|
|
58 |
47219
|
argiro.kok
|
public List<Claim> createRelationsClaims() throws Exception, SQLStoreException {
|
59 |
41200
|
katerina.i
|
|
60 |
41254
|
argiro.kok
|
|
61 |
41200
|
katerina.i
|
List<Relation> relations = relationHandler.fetchAllRelations();
|
62 |
41254
|
argiro.kok
|
List<Claim> claims = new ArrayList<Claim>();
|
63 |
41213
|
katerina.i
|
for (Relation relation : relations) {
|
64 |
|
|
|
65 |
41254
|
argiro.kok
|
Claim claim= new Claim();
|
66 |
|
|
|
67 |
|
|
claim.setUserMail(relation.getClaimedBy());
|
68 |
|
|
claim.setDate(relation.getClaimDate());
|
69 |
41350
|
argiro.kok
|
claim.setId(relation.getClaimId());
|
70 |
46977
|
argiro.kok
|
System.out.println("Claim id:" + claim.getId());
|
71 |
41254
|
argiro.kok
|
OpenaireEntity source = null;
|
72 |
41213
|
katerina.i
|
//first work with the Source
|
73 |
41200
|
katerina.i
|
if (relation.getSourceType().equals(ClaimUtils.PROJECT)) {
|
74 |
41621
|
argiro.kok
|
source= buildProject(relation.getSourceId(),relation.getClaimId());
|
75 |
41254
|
argiro.kok
|
claim.setSourceType(ClaimUtils.PROJECT);
|
76 |
41200
|
katerina.i
|
} else if (relation.getSourceType().equals(ClaimUtils.CONTEXT)) {
|
77 |
41791
|
argiro.kok
|
Context context = dmfContextHandler.fetchContextByIdFromDmf(relation.getSourceId());
|
78 |
41254
|
argiro.kok
|
source = context;
|
79 |
|
|
claim.setSourceType(ClaimUtils.CONTEXT);
|
80 |
41200
|
katerina.i
|
} else if (relation.getSourceType().equals(ClaimUtils.DATASET) || relation.getSourceType().equals(ClaimUtils.PUBLICATION)) {
|
81 |
41350
|
argiro.kok
|
Result result = buildResult(relation.getDmf(), relation.getCollectedFrom(), relation.getSourceId(), relation.getClaimId());
|
82 |
41254
|
argiro.kok
|
source = result;
|
83 |
|
|
claim.setSourceType(result.getResultType());
|
84 |
|
|
}
|
85 |
|
|
claim.setSource(source);
|
86 |
41213
|
katerina.i
|
|
87 |
41254
|
argiro.kok
|
Result target = null;
|
88 |
|
|
if (relation.getTargetType().equals(ClaimUtils.DATASET) || relation.getTargetType().equals(ClaimUtils.PUBLICATION)) {
|
89 |
41350
|
argiro.kok
|
target = buildResult(relation.getDmf(), relation.getCollectedFrom(), relation.getTargetId(), relation.getClaimId());
|
90 |
41213
|
katerina.i
|
|
91 |
41254
|
argiro.kok
|
}
|
92 |
41350
|
argiro.kok
|
if( target != null) {
|
93 |
|
|
claim.setTarget(target);
|
94 |
|
|
claim.setTargetType(target.getResultType());
|
95 |
|
|
}
|
96 |
|
|
if(claimValidation.validateClaim(claim)){ // is valid
|
97 |
41621
|
argiro.kok
|
// System.out.println(claim);
|
98 |
41350
|
argiro.kok
|
claims.add(claim);
|
99 |
|
|
}
|
100 |
41213
|
katerina.i
|
|
101 |
41350
|
argiro.kok
|
}
|
102 |
|
|
return claims;
|
103 |
|
|
|
104 |
|
|
}
|
105 |
49865
|
argiro.kok
|
public void createAndSaveRelationsClaims(boolean save) throws Exception, SQLStoreException {
|
106 |
|
|
/*for stats*/
|
107 |
|
|
List<Claim> claimsOp= new ArrayList<Claim>();
|
108 |
|
|
List<Claim> claimsCr= new ArrayList<Claim>();
|
109 |
|
|
List<Claim> claimsDat= new ArrayList<Claim>();
|
110 |
|
|
List<Claim> claimsOrc= new ArrayList<Claim>();
|
111 |
|
|
Integer targetOp=0;
|
112 |
|
|
Integer targetCr=0;
|
113 |
|
|
Integer targetDat=0;
|
114 |
|
|
Integer targetOrc=0;
|
115 |
|
|
Integer sourceRes=0;
|
116 |
|
|
Integer sourceOp=0;
|
117 |
|
|
Integer sourceCr=0;
|
118 |
|
|
Integer sourceDat=0;
|
119 |
|
|
Integer sourceOrc=0;
|
120 |
|
|
Integer projects=0;
|
121 |
|
|
Integer contexts=0;
|
122 |
|
|
Integer totalClaims=0;
|
123 |
|
|
|
124 |
|
|
|
125 |
|
|
List<Relation> relations = relationHandler.fetchAllRelations();
|
126 |
|
|
List<Claim> claims = new ArrayList<Claim>();
|
127 |
|
|
logger.info("Relation size: "+relations.size());
|
128 |
|
|
for (Relation relation : relations) {
|
129 |
|
|
try {
|
130 |
|
|
Claim claim = new Claim();
|
131 |
|
|
|
132 |
|
|
claim.setUserMail(relation.getClaimedBy());
|
133 |
|
|
claim.setDate(relation.getClaimDate());
|
134 |
|
|
claim.setId(relation.getClaimId());
|
135 |
|
|
logger.info("Claim id:" + claim.getId());
|
136 |
|
|
OpenaireEntity source = null;
|
137 |
|
|
//first work with the Source
|
138 |
|
|
if (relation.getSourceType().equals(ClaimUtils.PROJECT)) {
|
139 |
|
|
source = buildProject(relation.getSourceId(), relation.getClaimId());
|
140 |
|
|
claim.setSourceType(ClaimUtils.PROJECT);
|
141 |
|
|
} else if (relation.getSourceType().equals(ClaimUtils.CONTEXT)) {
|
142 |
|
|
Context context = dmfContextHandler.fetchContextByIdFromDmf(relation.getSourceId());
|
143 |
|
|
source = context;
|
144 |
|
|
claim.setSourceType(ClaimUtils.CONTEXT);
|
145 |
|
|
} else if (relation.getSourceType().equals(ClaimUtils.DATASET) || relation.getSourceType().equals(ClaimUtils.PUBLICATION)) {
|
146 |
|
|
Result result = buildResult(relation.getDmf(), relation.getCollectedFrom(), relation.getSourceId(), relation.getClaimId());
|
147 |
|
|
source = result;
|
148 |
|
|
claim.setSourceType(result.getResultType());
|
149 |
|
|
}
|
150 |
|
|
|
151 |
|
|
claim.setSource(source);
|
152 |
|
|
|
153 |
|
|
Result target = null;
|
154 |
|
|
if (relation.getTargetType().equals(ClaimUtils.DATASET) || relation.getTargetType().equals(ClaimUtils.PUBLICATION)) {
|
155 |
|
|
target = buildResult(relation.getDmf(), relation.getCollectedFrom(), relation.getTargetId(), relation.getClaimId());
|
156 |
|
|
|
157 |
|
|
}
|
158 |
|
|
if (target != null) {
|
159 |
|
|
claim.setTarget(target);
|
160 |
|
|
claim.setTargetType(target.getResultType());
|
161 |
|
|
}
|
162 |
|
|
|
163 |
|
|
if (claimValidation.validateClaim(claim)) { // is valid
|
164 |
|
|
if(save) {
|
165 |
|
|
claim = claimHandler.exportMedatataForClaim(claim);
|
166 |
|
|
claimHandler.saveClaim(claim);
|
167 |
|
|
}
|
168 |
|
|
/*for stats*/
|
169 |
|
|
if(((Result)claim.getTarget()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_CROSSREF)){
|
170 |
|
|
targetCr++;
|
171 |
|
|
}else if(((Result)claim.getTarget()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_DATACITE)){
|
172 |
|
|
targetDat++;
|
173 |
|
|
}else if(((Result)claim.getTarget()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_ORCID)){
|
174 |
|
|
targetOrc++;
|
175 |
|
|
}else {
|
176 |
|
|
targetOp++;
|
177 |
|
|
}
|
178 |
|
|
if(claim.getSourceType().equals(ClaimUtils.PUBLICATION)||claim.getSourceType().equals(ClaimUtils.DATASET)){
|
179 |
|
|
sourceRes++;
|
180 |
|
|
if(((Result)claim.getSource()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_CROSSREF)){
|
181 |
|
|
sourceCr++;
|
182 |
|
|
}else if(((Result)claim.getSource()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_DATACITE)){
|
183 |
|
|
sourceDat++;
|
184 |
|
|
}else if(((Result)claim.getSource()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_ORCID)){
|
185 |
|
|
sourceOrc++;
|
186 |
|
|
}else {
|
187 |
|
|
sourceOp++;
|
188 |
|
|
}
|
189 |
|
|
}else if(claim.getSourceType().equals(ClaimUtils.PROJECT)) {
|
190 |
|
|
projects++;
|
191 |
|
|
}else if(claim.getSourceType().equals(ClaimUtils.CONTEXT)){
|
192 |
|
|
contexts++;
|
193 |
|
|
}
|
194 |
|
|
|
195 |
|
|
}
|
196 |
|
|
}catch (Exception e){
|
197 |
|
|
e.printStackTrace();
|
198 |
|
|
logger.error("Exception: "+e.getMessage());
|
199 |
|
|
|
200 |
|
|
}
|
201 |
|
|
}
|
202 |
|
|
|
203 |
|
|
/*for stats*/
|
204 |
|
|
System.out.println("\n\nTargets:\n\nCrossref : "+targetCr+ " DataCite :"+targetDat+" Orcid : "+targetOrc+" OpenAire : "+targetOp);
|
205 |
|
|
System.out.println("\n\nSources:\n\nAll : "+sourceRes+" Crossref : "+sourceCr+ " DataCite :"+sourceDat+" Orcid : "+sourceOrc+" OpenAire : "+sourceOp);
|
206 |
|
|
System.out.println("\n\nTotalClaims: "+totalClaims+" Projects: "+projects+ " Contexts :"+contexts);
|
207 |
|
|
|
208 |
|
|
|
209 |
|
|
}
|
210 |
47219
|
argiro.kok
|
public List<Claim> createContextRelationClaims() throws Exception, SQLStoreException {
|
211 |
41350
|
argiro.kok
|
|
212 |
|
|
|
213 |
|
|
List<ContextRelation> relations = contextRelationHandler.fetchAllConceptRelations();
|
214 |
|
|
List<Claim> claims = new ArrayList<Claim>();
|
215 |
|
|
for (ContextRelation relation : relations) {
|
216 |
|
|
|
217 |
|
|
Claim claim= new Claim();
|
218 |
|
|
claim.setId(relation.getClaimId());
|
219 |
|
|
claim.setUserMail(relation.getClaimedBy());
|
220 |
|
|
claim.setDate(relation.getClaimDate());
|
221 |
47008
|
argiro.kok
|
System.out.println("Claim id:" + claim.getId());
|
222 |
41350
|
argiro.kok
|
|
223 |
|
|
//first work with the Source
|
224 |
41793
|
argiro.kok
|
Context context = dmfContextHandler.extractContextFromDMF(relation.getContextDmf());
|
225 |
41350
|
argiro.kok
|
claim.setSourceType(ClaimUtils.CONTEXT);
|
226 |
|
|
claim.setSource(context);
|
227 |
41444
|
argiro.kok
|
|
228 |
41350
|
argiro.kok
|
Result target = buildResult(relation.getResultDmf(), relation.getCollectedFrom(), relation.getResultId(), relation.getClaimId());
|
229 |
|
|
if( target != null){
|
230 |
|
|
claim.setTarget(target);
|
231 |
|
|
claim.setTargetType(target.getResultType());
|
232 |
|
|
}
|
233 |
|
|
if(claimValidation.validateClaim(claim)){ // is valid
|
234 |
41254
|
argiro.kok
|
claims.add(claim);
|
235 |
|
|
}
|
236 |
41213
|
katerina.i
|
|
237 |
41254
|
argiro.kok
|
}
|
238 |
41350
|
argiro.kok
|
return claims;
|
239 |
41213
|
katerina.i
|
|
240 |
|
|
|
241 |
41254
|
argiro.kok
|
}
|
242 |
49865
|
argiro.kok
|
public void createAndSaveContextRelationClaims(Boolean save) throws Exception, SQLStoreException {
|
243 |
|
|
/*for stats*/
|
244 |
|
|
List<Claim> claimsOp= new ArrayList<Claim>();
|
245 |
|
|
List<Claim> claimsCr= new ArrayList<Claim>();
|
246 |
|
|
List<Claim> claimsDat= new ArrayList<Claim>();
|
247 |
|
|
List<Claim> claimsOrc= new ArrayList<Claim>();
|
248 |
|
|
Integer targetOp=0;
|
249 |
|
|
Integer targetCr=0;
|
250 |
|
|
Integer targetDat=0;
|
251 |
|
|
Integer targetOrc=0;
|
252 |
|
|
Integer sourceRes=0;
|
253 |
|
|
Integer sourceOp=0;
|
254 |
|
|
Integer sourceCr=0;
|
255 |
|
|
Integer sourceDat=0;
|
256 |
|
|
Integer sourceOrc=0;
|
257 |
|
|
Integer projects=0;
|
258 |
|
|
Integer contexts=0;
|
259 |
|
|
Integer totalClaims=0;
|
260 |
41621
|
argiro.kok
|
|
261 |
49865
|
argiro.kok
|
List<ContextRelation> relations = contextRelationHandler.fetchAllConceptRelations();
|
262 |
|
|
// List<Claim> claims = new ArrayList<Claim>();
|
263 |
|
|
for (ContextRelation relation : relations) {
|
264 |
|
|
|
265 |
|
|
Claim claim= new Claim();
|
266 |
|
|
claim.setId(relation.getClaimId());
|
267 |
|
|
claim.setUserMail(relation.getClaimedBy());
|
268 |
|
|
claim.setDate(relation.getClaimDate());
|
269 |
|
|
logger.info("Claim id:" + claim.getId());
|
270 |
|
|
|
271 |
|
|
//first work with the Source
|
272 |
|
|
Context context = dmfContextHandler.extractContextFromDMF(relation.getContextDmf());
|
273 |
|
|
claim.setSourceType(ClaimUtils.CONTEXT);
|
274 |
|
|
claim.setSource(context);
|
275 |
|
|
|
276 |
|
|
Result target = buildResult(relation.getResultDmf(), relation.getCollectedFrom(), relation.getResultId(), relation.getClaimId());
|
277 |
|
|
if( target != null){
|
278 |
|
|
claim.setTarget(target);
|
279 |
|
|
claim.setTargetType(target.getResultType());
|
280 |
|
|
}
|
281 |
|
|
if(claimValidation.validateClaim(claim)){ // is valid
|
282 |
|
|
if(save){
|
283 |
|
|
claim = claimHandler.exportMedatataForClaim(claim);
|
284 |
|
|
claimHandler.saveClaim(claim);
|
285 |
|
|
}
|
286 |
|
|
|
287 |
|
|
/*for stats*/
|
288 |
|
|
if(((Result)claim.getTarget()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_CROSSREF)){
|
289 |
|
|
targetCr++;
|
290 |
|
|
}else if(((Result)claim.getTarget()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_DATACITE)){
|
291 |
|
|
targetDat++;
|
292 |
|
|
}else if(((Result)claim.getTarget()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_ORCID)){
|
293 |
|
|
targetOrc++;
|
294 |
|
|
}else {
|
295 |
|
|
targetOp++;
|
296 |
|
|
}
|
297 |
|
|
if(claim.getSourceType().equals(ClaimUtils.PUBLICATION)||claim.getSourceType().equals(ClaimUtils.DATASET)){
|
298 |
|
|
sourceRes++;
|
299 |
|
|
if(((Result)claim.getSource()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_CROSSREF)){
|
300 |
|
|
sourceCr++;
|
301 |
|
|
}else if(((Result)claim.getSource()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_DATACITE)){
|
302 |
|
|
sourceDat++;
|
303 |
|
|
}else if(((Result)claim.getSource()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_ORCID)){
|
304 |
|
|
sourceOrc++;
|
305 |
|
|
}else {
|
306 |
|
|
sourceOp++;
|
307 |
|
|
}
|
308 |
|
|
}else if(claim.getSourceType().equals(ClaimUtils.PROJECT)) {
|
309 |
|
|
projects++;
|
310 |
|
|
}else if(claim.getSourceType().equals(ClaimUtils.CONTEXT)){
|
311 |
|
|
contexts++;
|
312 |
|
|
}
|
313 |
|
|
}
|
314 |
|
|
|
315 |
|
|
|
316 |
|
|
}
|
317 |
|
|
|
318 |
|
|
/*for stats*/
|
319 |
|
|
System.out.println("\n\nTargets:\n\nCrossref : "+targetCr+ " DataCite :"+targetDat+" Orcid : "+targetOrc+" OpenAire : "+targetOp);
|
320 |
|
|
System.out.println("\n\nSources:\n\nAll : "+sourceRes+" Crossref : "+sourceCr+ " DataCite :"+sourceDat+" Orcid : "+sourceOrc+" OpenAire : "+sourceOp);
|
321 |
|
|
System.out.println("\n\nTotalClaims: "+totalClaims+" Projects: "+projects+ " Contexts :"+contexts);
|
322 |
|
|
|
323 |
|
|
}
|
324 |
|
|
|
325 |
41254
|
argiro.kok
|
/**
|
326 |
41621
|
argiro.kok
|
*Returns a project from Openaire.
|
327 |
|
|
* In case the project id has a "welcometrust" prefix changes it to "wt__________", before search for it in the index.
|
328 |
|
|
*
|
329 |
|
|
* @param projectId
|
330 |
|
|
* @param claimId
|
331 |
|
|
* @return Project
|
332 |
|
|
* @throws Exception
|
333 |
|
|
*/
|
334 |
46918
|
argiro.kok
|
public Project buildProject(String projectId, String claimId) throws Exception {
|
335 |
47219
|
argiro.kok
|
String id = projectId;
|
336 |
41621
|
argiro.kok
|
if(projectId != null && projectId.contains("welcometrust")){
|
337 |
47219
|
argiro.kok
|
id=projectId.replace("welcometrust","wt__________");
|
338 |
41621
|
argiro.kok
|
}
|
339 |
49865
|
argiro.kok
|
Project project = projectHandler.fetchProjectByID(id, false);
|
340 |
41621
|
argiro.kok
|
if (project == null) {
|
341 |
49865
|
argiro.kok
|
project = projectHandler.fetchProjectByID(id, true);
|
342 |
|
|
}
|
343 |
|
|
if (project == null) {
|
344 |
41621
|
argiro.kok
|
try{
|
345 |
49865
|
argiro.kok
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(claimValidation.getPathToSaveReport()+"missing_projects.txt", true)));
|
346 |
41621
|
argiro.kok
|
Date date= new java.util.Date();
|
347 |
47219
|
argiro.kok
|
out.println(new Timestamp(date.getTime())+" ProjectId: "+id+ " ClaimId: "+claimId);
|
348 |
41621
|
argiro.kok
|
out.close();
|
349 |
|
|
}catch (IOException e) {
|
350 |
47219
|
argiro.kok
|
logger.error("Couldn't write to file " + "missing_projects.txt",e);
|
351 |
41621
|
argiro.kok
|
}
|
352 |
|
|
}
|
353 |
|
|
return project;
|
354 |
|
|
|
355 |
|
|
}
|
356 |
|
|
/**
|
357 |
41350
|
argiro.kok
|
* *If there is a dmf, the result is extracted from the external sources (crossRef, Datacite, Orcid)
|
358 |
41254
|
argiro.kok
|
* Otherwise Result is extracted from Openaire
|
359 |
41350
|
argiro.kok
|
* @param dmf
|
360 |
|
|
* @param collectedFrom
|
361 |
|
|
* @param resultId
|
362 |
|
|
* @param claimId
|
363 |
41254
|
argiro.kok
|
* @return Result or null
|
364 |
|
|
*/
|
365 |
41350
|
argiro.kok
|
private Result buildResult(String dmf, String collectedFrom, String resultId, String claimId){
|
366 |
41254
|
argiro.kok
|
Result result = null;
|
367 |
41444
|
argiro.kok
|
String external_id= null; // testing
|
368 |
41621
|
argiro.kok
|
if((dmf !=null && collectedFrom != null)&& !collectedFrom.equals(ClaimUtils.COLLECTED_FROM_OPENAIRE)){
|
369 |
41213
|
katerina.i
|
|
370 |
41350
|
argiro.kok
|
if (collectedFrom.equals(ClaimUtils.COLLECTED_FROM_CROSSREF)) {
|
371 |
41254
|
argiro.kok
|
try {
|
372 |
41350
|
argiro.kok
|
String doi= dmfResultHandler.fetchDoiByDMF(dmf);
|
373 |
41444
|
argiro.kok
|
external_id=doi;
|
374 |
41254
|
argiro.kok
|
if(doi != null) {
|
375 |
|
|
result = externalRecordHandler.fetchResultfromCrossref(doi);
|
376 |
|
|
}
|
377 |
|
|
} catch (Exception e) {
|
378 |
47219
|
argiro.kok
|
logger.error("Error fetching result from Crossref",e);
|
379 |
41444
|
argiro.kok
|
}
|
380 |
41213
|
katerina.i
|
|
381 |
41350
|
argiro.kok
|
} else if (collectedFrom.equals(ClaimUtils.COLLECTED_FROM_ORCID)) {
|
382 |
41254
|
argiro.kok
|
try {
|
383 |
41350
|
argiro.kok
|
String orcidwork= dmfResultHandler.fetchOrcidWorkByDMF(dmf);
|
384 |
41444
|
argiro.kok
|
external_id=orcidwork;
|
385 |
41350
|
argiro.kok
|
if(orcidwork != null) {
|
386 |
41254
|
argiro.kok
|
result = externalRecordHandler.fetchResultfromOrcid(orcidwork);
|
387 |
|
|
}
|
388 |
|
|
} catch (Exception e) {
|
389 |
47219
|
argiro.kok
|
logger.error("Error fetching result from Orcid",e);
|
390 |
41254
|
argiro.kok
|
}
|
391 |
41350
|
argiro.kok
|
} else if (collectedFrom.equals(ClaimUtils.COLLECTED_FROM_DATACITE)) {
|
392 |
41254
|
argiro.kok
|
try {
|
393 |
41350
|
argiro.kok
|
String doi= dmfResultHandler.fetchDoiByDMF(dmf);
|
394 |
41444
|
argiro.kok
|
external_id=doi;
|
395 |
41254
|
argiro.kok
|
if(doi != null) {
|
396 |
|
|
result = externalRecordHandler.fetchResultfromDatacite(doi);
|
397 |
|
|
}
|
398 |
|
|
} catch (Exception e) {
|
399 |
47219
|
argiro.kok
|
logger.error("Error fetching result from Datacite",e);
|
400 |
41254
|
argiro.kok
|
}
|
401 |
|
|
}
|
402 |
41449
|
argiro.kok
|
if(result == null){
|
403 |
|
|
//report missing external results
|
404 |
41350
|
argiro.kok
|
try{
|
405 |
47008
|
argiro.kok
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(claimValidation.getPathToSaveReport()+"missing_results_external_sources.txt", true)));
|
406 |
41350
|
argiro.kok
|
Date date= new java.util.Date();
|
407 |
41444
|
argiro.kok
|
out.println(new Timestamp(date.getTime())+" OpenaireId: "+resultId+ " collectedFrom: "+collectedFrom +" externalId: "+external_id+" claimId: "+claimId);
|
408 |
41350
|
argiro.kok
|
out.close();
|
409 |
|
|
}catch (IOException e) {
|
410 |
47219
|
argiro.kok
|
logger.error("Couldn't write to file " + "missing_results_external_sources.txt",e);
|
411 |
41350
|
argiro.kok
|
}
|
412 |
41449
|
argiro.kok
|
//give a second chance - search index
|
413 |
|
|
result = buildOpenaireResult(resultId,claimId);
|
414 |
41450
|
argiro.kok
|
if(result!=null) {
|
415 |
41449
|
argiro.kok
|
try {
|
416 |
47008
|
argiro.kok
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(claimValidation.getPathToSaveReport()+"external_results_found_inOpenaire.txt", true)));
|
417 |
41449
|
argiro.kok
|
Date date = new java.util.Date();
|
418 |
|
|
out.println(new Timestamp(date.getTime()) + " OpenaireId: " + resultId + " collectedFrom: " + collectedFrom + " externalId: " + external_id + " claimId: " + claimId);
|
419 |
|
|
out.close();
|
420 |
|
|
} catch (IOException e) {
|
421 |
47219
|
argiro.kok
|
logger.error("Couldn't write to file " + "external_results_found_inOpenaire.txt",e);
|
422 |
41449
|
argiro.kok
|
}
|
423 |
|
|
}
|
424 |
41350
|
argiro.kok
|
|
425 |
41254
|
argiro.kok
|
}
|
426 |
41449
|
argiro.kok
|
if(result!=null){
|
427 |
|
|
// if result found enriched with access rights/ embargo date from DMF
|
428 |
|
|
try {
|
429 |
|
|
result.setAccessRights(dmfResultHandler.fetchAccessRights(dmf));
|
430 |
|
|
result.setEmbargoEndDate(dmfResultHandler.fetchEmbargoEndDateByDMF(dmf));
|
431 |
|
|
} catch (Exception e) {
|
432 |
47219
|
argiro.kok
|
logger.error("Error fetching Access or Embargo end date from DMF",e);
|
433 |
41449
|
argiro.kok
|
}
|
434 |
|
|
}
|
435 |
|
|
} else { //If dmf/ collecteFrom is null or is collected from openaire search in the index
|
436 |
41350
|
argiro.kok
|
result = buildOpenaireResult(resultId,claimId);
|
437 |
41254
|
argiro.kok
|
}
|
438 |
41200
|
katerina.i
|
|
439 |
|
|
|
440 |
41254
|
argiro.kok
|
return result;
|
441 |
|
|
}
|
442 |
|
|
|
443 |
|
|
/**
|
444 |
|
|
*
|
445 |
41350
|
argiro.kok
|
* @param resultId
|
446 |
|
|
* @param claimId
|
447 |
41254
|
argiro.kok
|
* @return target Result from Openaire or null
|
448 |
|
|
*/
|
449 |
41350
|
argiro.kok
|
public Result buildOpenaireResult(String resultId, String claimId ){
|
450 |
41254
|
argiro.kok
|
|
451 |
|
|
Result result = null;
|
452 |
41350
|
argiro.kok
|
try {
|
453 |
49865
|
argiro.kok
|
result = indexResultHandler.fetchPublicationById(resultId, false);
|
454 |
41350
|
argiro.kok
|
} catch (Exception e) {
|
455 |
47219
|
argiro.kok
|
logger.error("Error fetching result from Openaire",e);
|
456 |
49865
|
argiro.kok
|
System.out.println("Error fetching result from Openaire");
|
457 |
41350
|
argiro.kok
|
}
|
458 |
|
|
|
459 |
41254
|
argiro.kok
|
//If it is not found in the index
|
460 |
|
|
if (result == null) {
|
461 |
41450
|
argiro.kok
|
//give a second chance as dedup
|
462 |
49865
|
argiro.kok
|
// try {
|
463 |
|
|
// result = indexResultHandler.fetchDedupResultById(resultId);
|
464 |
|
|
// } catch (Exception e) {
|
465 |
|
|
// logger.error("Error fetching dedup Result",e);
|
466 |
|
|
// }
|
467 |
|
|
//give a second chance in production
|
468 |
41450
|
argiro.kok
|
try {
|
469 |
49865
|
argiro.kok
|
result = indexResultHandler.fetchPublicationById(resultId, true);
|
470 |
41450
|
argiro.kok
|
} catch (Exception e) {
|
471 |
49865
|
argiro.kok
|
logger.error("Error fetching result from Openaire",e);
|
472 |
|
|
System.out.println("Error fetching result from Openaire");
|
473 |
41254
|
argiro.kok
|
}
|
474 |
49865
|
argiro.kok
|
if (result == null) {
|
475 |
|
|
try {
|
476 |
|
|
result = indexResultHandler.fetchDatasetById(resultId, false);
|
477 |
|
|
} catch (Exception e) {
|
478 |
|
|
logger.error("Error fetching result from Openaire",e);
|
479 |
|
|
System.out.println("Error fetching result from Openaire");
|
480 |
|
|
}
|
481 |
|
|
}
|
482 |
|
|
if (result == null) {
|
483 |
|
|
try {
|
484 |
|
|
result = indexResultHandler.fetchDatasetById(resultId, true);
|
485 |
|
|
} catch (Exception e) {
|
486 |
|
|
logger.error("Error fetching result from Openaire",e);
|
487 |
|
|
System.out.println("Error fetching result from Openaire");
|
488 |
|
|
}
|
489 |
|
|
}
|
490 |
|
|
if (result == null) {
|
491 |
|
|
try {
|
492 |
|
|
result = indexResultHandler.fetchDedupById(resultId, false);
|
493 |
|
|
} catch (Exception e) {
|
494 |
|
|
logger.error("Error fetching result from Openaire",e);
|
495 |
|
|
System.out.println("Error fetching result from Openaire");
|
496 |
|
|
}
|
497 |
|
|
}
|
498 |
|
|
// if (result == null) {
|
499 |
|
|
// try {
|
500 |
|
|
// result = indexResultHandler.fetchDedupById(resultId, true);
|
501 |
|
|
// } catch (Exception e) {
|
502 |
|
|
// logger.error("Error fetching result from Openaire",e);
|
503 |
|
|
// System.out.println("Error fetching result from Openaire");
|
504 |
|
|
// }
|
505 |
|
|
// }
|
506 |
41450
|
argiro.kok
|
if (result != null) {
|
507 |
|
|
try {
|
508 |
47008
|
argiro.kok
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(claimValidation.getPathToSaveReport()+"dedup_results_found.txt", true)));
|
509 |
41450
|
argiro.kok
|
Date date = new java.util.Date();
|
510 |
|
|
out.println(new Timestamp(date.getTime()) + " OpenaireId: " + resultId + " claimId: " + claimId);
|
511 |
|
|
out.close();
|
512 |
|
|
} catch (IOException e) {
|
513 |
47219
|
argiro.kok
|
logger.error("Couldn't write to file " + "external_results_found_inOpenaire.txt",e);
|
514 |
41450
|
argiro.kok
|
}
|
515 |
|
|
}else {
|
516 |
|
|
//report it as missing
|
517 |
|
|
try {
|
518 |
47008
|
argiro.kok
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(claimValidation.getPathToSaveReport()+"missing_results.txt", true)));
|
519 |
41450
|
argiro.kok
|
Date date = new java.util.Date();
|
520 |
|
|
out.println(new Timestamp(date.getTime()) + " OpenaireId: " + resultId + " ClaimId: " + claimId);
|
521 |
|
|
out.close();
|
522 |
|
|
} catch (IOException e) {
|
523 |
47219
|
argiro.kok
|
logger.error("Couldn't write to file " + "missing_results.txt",e);
|
524 |
41450
|
argiro.kok
|
}
|
525 |
|
|
}
|
526 |
|
|
|
527 |
41350
|
argiro.kok
|
}else{
|
528 |
|
|
//FOUND report results that came from datacite but are publications
|
529 |
|
|
//TODO deal with them as externals?
|
530 |
|
|
|
531 |
|
|
if(result.getProvenanceaction() != null && result.getProvenanceaction().equals("user:claim:datacite")&&result.getOai()!=null){
|
532 |
|
|
try{
|
533 |
47008
|
argiro.kok
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(claimValidation.getPathToSaveReport()+"datacite_claim_results.txt", true)));
|
534 |
41350
|
argiro.kok
|
Date date= new java.util.Date();
|
535 |
41444
|
argiro.kok
|
out.println(new Timestamp(date.getTime())+" openaireId:"+resultId+" aoi: " +result.getOai() +" provenanceactions: "+result.getProvenanceaction()+" type: "+result.getResultType()+ " ClaimId: "+claimId);
|
536 |
41350
|
argiro.kok
|
out.close();
|
537 |
|
|
try {
|
538 |
|
|
result= externalRecordHandler.fetchResultfromDatacite(result.getOai());
|
539 |
|
|
result.setOpenaireId(resultId);
|
540 |
|
|
}catch(Exception e){
|
541 |
47219
|
argiro.kok
|
logger.error("Couldn't get Result from Datacite "+result.getOai(),e);
|
542 |
41350
|
argiro.kok
|
}
|
543 |
|
|
}catch (IOException e) {
|
544 |
47219
|
argiro.kok
|
logger.error("Couldn't write to file " + "datacite_claim_results.txt",e);
|
545 |
41350
|
argiro.kok
|
}
|
546 |
|
|
|
547 |
|
|
}
|
548 |
41200
|
katerina.i
|
}
|
549 |
|
|
|
550 |
41254
|
argiro.kok
|
return result;
|
551 |
41200
|
katerina.i
|
}
|
552 |
|
|
|
553 |
41350
|
argiro.kok
|
|
554 |
|
|
/**
|
555 |
|
|
* For each claim of the list
|
556 |
|
|
* if there are results in the relation exports their metadaa file
|
557 |
|
|
* Save the claim in DB
|
558 |
|
|
* @param claims
|
559 |
|
|
* @throws Exception
|
560 |
|
|
*/
|
561 |
47219
|
argiro.kok
|
private void saveClaims(List<Claim> claims) throws Exception, SQLStoreException {
|
562 |
41350
|
argiro.kok
|
for(Claim claim : claims){
|
563 |
41791
|
argiro.kok
|
/* if(claim.getTargetType().equals(ClaimUtils.DATASET)||claim.getTargetType().equals(ClaimUtils.PUBLICATION)){
|
564 |
41350
|
argiro.kok
|
String path = resultHandler.exportMetadataFileForResult((Result)claim.getTarget());
|
565 |
41408
|
argiro.kok
|
((Result) claim.getTarget()).setRecordPath(path);
|
566 |
41350
|
argiro.kok
|
}
|
567 |
|
|
if(claim.getSourceType().equals(ClaimUtils.DATASET)||claim.getSourceType().equals(ClaimUtils.PUBLICATION)){
|
568 |
|
|
String path = resultHandler.exportMetadataFileForResult((Result)claim.getSource());
|
569 |
41408
|
argiro.kok
|
((Result) claim.getSource()).setRecordPath(path);
|
570 |
41791
|
argiro.kok
|
}*/
|
571 |
|
|
claim = claimHandler.exportMedatataForClaim(claim);
|
572 |
41350
|
argiro.kok
|
claimHandler.saveClaim(claim);
|
573 |
|
|
}
|
574 |
|
|
|
575 |
|
|
}
|
576 |
|
|
|
577 |
41791
|
argiro.kok
|
|
578 |
|
|
|
579 |
47008
|
argiro.kok
|
public static void main(String[] args) throws IOException {
|
580 |
49865
|
argiro.kok
|
BasicConfigurator.configure();
|
581 |
|
|
logger.setLevel(Level.DEBUG);
|
582 |
41350
|
argiro.kok
|
Migration migration = new Migration();
|
583 |
|
|
try {
|
584 |
49865
|
argiro.kok
|
// migration.claimHandler.getQueryGenerator().setMigrationTable("claims_view");
|
585 |
|
|
// migration.createAndSaveRelationsClaims(true);
|
586 |
|
|
// migration.createAndSaveContextRelationClaims(true);
|
587 |
41444
|
argiro.kok
|
|
588 |
49865
|
argiro.kok
|
migration.claimHandler.getQueryGenerator().setMigrationTable("export_last_claims");
|
589 |
|
|
migration.createAndSaveRelationsClaims(true);
|
590 |
|
|
migration.createAndSaveContextRelationClaims(true);
|
591 |
41350
|
argiro.kok
|
|
592 |
49865
|
argiro.kok
|
|
593 |
41350
|
argiro.kok
|
} catch (Exception e) {
|
594 |
47219
|
argiro.kok
|
logger.error("Error in migration",e);
|
595 |
|
|
} catch (SQLStoreException e) {
|
596 |
41350
|
argiro.kok
|
e.printStackTrace();
|
597 |
|
|
}
|
598 |
47008
|
argiro.kok
|
|
599 |
41200
|
katerina.i
|
}
|
600 |
41350
|
argiro.kok
|
public void printStatistics(List<Claim> claims){
|
601 |
41254
|
argiro.kok
|
|
602 |
41350
|
argiro.kok
|
//testing
|
603 |
|
|
List<Claim> claimsOp= new ArrayList<Claim>();
|
604 |
|
|
List<Claim> claimsCr= new ArrayList<Claim>();
|
605 |
|
|
List<Claim> claimsDat= new ArrayList<Claim>();
|
606 |
|
|
List<Claim> claimsOrc= new ArrayList<Claim>();
|
607 |
|
|
Integer targetOp=0;
|
608 |
|
|
Integer targetCr=0;
|
609 |
|
|
Integer targetDat=0;
|
610 |
|
|
Integer targetOrc=0;
|
611 |
|
|
Integer sourceRes=0;
|
612 |
|
|
Integer sourceOp=0;
|
613 |
|
|
Integer sourceCr=0;
|
614 |
|
|
Integer sourceDat=0;
|
615 |
|
|
Integer sourceOrc=0;
|
616 |
|
|
Integer projects=0;
|
617 |
|
|
Integer contexts=0;
|
618 |
|
|
Integer totalClaims=0;
|
619 |
|
|
for(Claim claim: claims){
|
620 |
41444
|
argiro.kok
|
// System.out.println(claim.toString());
|
621 |
41350
|
argiro.kok
|
if(((Result)claim.getTarget()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_CROSSREF)){
|
622 |
|
|
targetCr++;
|
623 |
|
|
}else if(((Result)claim.getTarget()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_DATACITE)){
|
624 |
|
|
targetDat++;
|
625 |
|
|
}else if(((Result)claim.getTarget()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_ORCID)){
|
626 |
|
|
targetOrc++;
|
627 |
|
|
}else {
|
628 |
|
|
targetOp++;
|
629 |
|
|
}
|
630 |
|
|
if(claim.getSourceType().equals(ClaimUtils.PUBLICATION)||claim.getSourceType().equals(ClaimUtils.DATASET)){
|
631 |
|
|
sourceRes++;
|
632 |
|
|
if(((Result)claim.getSource()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_CROSSREF)){
|
633 |
|
|
sourceCr++;
|
634 |
|
|
}else if(((Result)claim.getSource()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_DATACITE)){
|
635 |
|
|
sourceDat++;
|
636 |
|
|
}else if(((Result)claim.getSource()).getCollectedFrom().equals(ClaimUtils.COLLECTED_FROM_ORCID)){
|
637 |
|
|
sourceOrc++;
|
638 |
|
|
}else {
|
639 |
|
|
sourceOp++;
|
640 |
|
|
}
|
641 |
|
|
}else if(claim.getSourceType().equals(ClaimUtils.PROJECT)) {
|
642 |
|
|
projects++;
|
643 |
|
|
}else if(claim.getSourceType().equals(ClaimUtils.CONTEXT)){
|
644 |
|
|
contexts++;
|
645 |
|
|
}
|
646 |
|
|
}
|
647 |
|
|
totalClaims=claims.size();
|
648 |
|
|
System.out.println("\n\nTargets:\n\nCrossref : "+targetCr+ " DataCite :"+targetDat+" Orcid : "+targetOrc+" OpenAire : "+targetOp);
|
649 |
|
|
System.out.println("\n\nSources:\n\nAll : "+sourceRes+" Crossref : "+sourceCr+ " DataCite :"+sourceDat+" Orcid : "+sourceOrc+" OpenAire : "+sourceOp);
|
650 |
|
|
System.out.println("\n\nTotalClaims: "+totalClaims+" Projects: "+projects+ " Contexts :"+contexts);
|
651 |
41254
|
argiro.kok
|
|
652 |
41350
|
argiro.kok
|
}
|
653 |
|
|
|
654 |
|
|
|
655 |
41200
|
katerina.i
|
}
|