1 |
41200
|
katerina.i
|
package eu.dnetlib.data.claims.migration.handler;
|
2 |
|
|
|
3 |
49865
|
argiro.kok
|
import eu.dnetlib.data.claims.migration.ClaimValidation;
|
4 |
41791
|
argiro.kok
|
import eu.dnetlib.data.claims.migration.entity.Result;
|
5 |
41200
|
katerina.i
|
import eu.dnetlib.data.claims.migration.parser.DMFParser;
|
6 |
41350
|
argiro.kok
|
import eu.dnetlib.data.claimsDemo.*;
|
7 |
47219
|
argiro.kok
|
import org.apache.log4j.Logger;
|
8 |
41200
|
katerina.i
|
import org.xml.sax.SAXException;
|
9 |
|
|
|
10 |
|
|
import javax.xml.parsers.ParserConfigurationException;
|
11 |
41254
|
argiro.kok
|
import javax.xml.xpath.XPathExpressionException;
|
12 |
|
|
import java.io.BufferedWriter;
|
13 |
|
|
import java.io.FileWriter;
|
14 |
41200
|
katerina.i
|
import java.io.IOException;
|
15 |
41254
|
argiro.kok
|
import java.io.PrintWriter;
|
16 |
41200
|
katerina.i
|
import java.sql.ResultSet;
|
17 |
47219
|
argiro.kok
|
import java.sql.SQLException;
|
18 |
41254
|
argiro.kok
|
import java.sql.Timestamp;
|
19 |
|
|
import java.util.Date;
|
20 |
41200
|
katerina.i
|
|
21 |
|
|
/**
|
22 |
|
|
* Created by kiatrop on 5/2/2016.
|
23 |
|
|
*/
|
24 |
|
|
public class DMFResultHandler {
|
25 |
47219
|
argiro.kok
|
private static final Logger logger = Logger.getLogger(DMFResultHandler.class);
|
26 |
41200
|
katerina.i
|
|
27 |
|
|
SqlDAO sqlDAO = null;
|
28 |
|
|
QueryGenerator queryGenerator = null;
|
29 |
|
|
DMFParser dmfParser = null;
|
30 |
|
|
|
31 |
41350
|
argiro.kok
|
/**
|
32 |
|
|
* Given an openaire id decides the datasource that the result was collected from
|
33 |
|
|
* Default datasource is openaire
|
34 |
|
|
* @param openaireId
|
35 |
|
|
* @return collectedFrom datasource
|
36 |
|
|
*/
|
37 |
|
|
public static String identifyCollectedFrom(String openaireId){
|
38 |
|
|
if(openaireId==null){
|
39 |
|
|
return null;
|
40 |
|
|
}
|
41 |
|
|
if(openaireId.contains("crossref")){
|
42 |
|
|
return ClaimUtils.COLLECTED_FROM_CROSSREF;
|
43 |
|
|
}else if (openaireId.contains("datacite")){
|
44 |
|
|
return ClaimUtils.COLLECTED_FROM_DATACITE;
|
45 |
|
|
} else if (openaireId.contains("orcid")){
|
46 |
|
|
return ClaimUtils.COLLECTED_FROM_ORCID;
|
47 |
|
|
}else{
|
48 |
|
|
return ClaimUtils.COLLECTED_FROM_OPENAIRE;
|
49 |
|
|
}
|
50 |
|
|
|
51 |
|
|
}
|
52 |
|
|
|
53 |
47219
|
argiro.kok
|
private String fetchDMFResult(String id) throws SQLStoreException, SQLException {
|
54 |
41200
|
katerina.i
|
ResultSet rs = sqlDAO.executePreparedQuery(queryGenerator.generateSelectDMFByIdQuery(id));
|
55 |
|
|
return rs.getString("xml");
|
56 |
|
|
}
|
57 |
|
|
|
58 |
|
|
public Result fetchResultByDMF(String dmf) throws IOException, SAXException, ParserConfigurationException {
|
59 |
41350
|
argiro.kok
|
return dmfParser.dmf2Result(dmf);
|
60 |
41200
|
katerina.i
|
}
|
61 |
|
|
|
62 |
47219
|
argiro.kok
|
public Result fetchResultById(String id) throws SQLStoreException, SQLException, ParserConfigurationException, SAXException, IOException {
|
63 |
41350
|
argiro.kok
|
//TODO check
|
64 |
|
|
return fetchResultByDMF(fetchDMFResult(id));
|
65 |
|
|
// return ParsingClaimUtils.getResultFromDMF(fetchDMFResult(id));
|
66 |
41200
|
katerina.i
|
}
|
67 |
|
|
|
68 |
41254
|
argiro.kok
|
/**
|
69 |
|
|
*
|
70 |
|
|
* @param dmf
|
71 |
|
|
* @return DOI from DMF - a *valid* one
|
72 |
|
|
* @throws IOException
|
73 |
|
|
* @throws SAXException
|
74 |
|
|
* @throws ParserConfigurationException
|
75 |
|
|
* @throws XPathExpressionException
|
76 |
|
|
*/
|
77 |
|
|
public String fetchDoiByDMF(String dmf) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException {
|
78 |
|
|
|
79 |
41350
|
argiro.kok
|
return checkDoiValidity(dmfParser.getDOIIdentifierFromDMF(dmf));
|
80 |
41254
|
argiro.kok
|
}
|
81 |
|
|
public String fetchOrcidWorkByDMF(String dmf) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException {
|
82 |
41350
|
argiro.kok
|
return dmfParser.getOrcidworkIdentifierFromDMF(dmf);
|
83 |
41254
|
argiro.kok
|
}
|
84 |
|
|
public String fetchEmbargoEndDateByDMF(String dmf) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException {
|
85 |
41350
|
argiro.kok
|
return dmfParser.getEmbargoEndDateFromDMF(dmf);
|
86 |
41254
|
argiro.kok
|
}
|
87 |
|
|
public String fetchAccessRights(String dmf) throws IOException, SAXException, ParserConfigurationException, XPathExpressionException {
|
88 |
41350
|
argiro.kok
|
return dmfParser.getAccessRightsFromDMF(dmf);
|
89 |
41254
|
argiro.kok
|
}
|
90 |
|
|
|
91 |
|
|
/**
|
92 |
|
|
*
|
93 |
|
|
*
|
94 |
|
|
* @param doi
|
95 |
|
|
* @return DOI String : the given one if it's valid
|
96 |
|
|
* @throws Exception
|
97 |
|
|
*/
|
98 |
41444
|
argiro.kok
|
public static String checkDoiValidity(String doi) {
|
99 |
41254
|
argiro.kok
|
if(doi==null){
|
100 |
|
|
return null;
|
101 |
|
|
}
|
102 |
47219
|
argiro.kok
|
String newDoi = doi;
|
103 |
41254
|
argiro.kok
|
try{
|
104 |
|
|
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("invalid_dois.txt", true)));
|
105 |
|
|
boolean isValid= SearchUtils.isDoiValid(doi);
|
106 |
|
|
if(!isValid&&(doi.length() - (doi.replace("_", "")).length() > 1)){
|
107 |
|
|
String transformDoi = doi.replace("_", "-");
|
108 |
|
|
isValid=SearchUtils.isDoiValid(transformDoi);
|
109 |
|
|
if(isValid){
|
110 |
|
|
Date date= new java.util.Date();
|
111 |
|
|
out.println(new Timestamp(date.getTime())+" - Invalid doi: "+doi+" replaced with : "+transformDoi);
|
112 |
47219
|
argiro.kok
|
newDoi=transformDoi;
|
113 |
41254
|
argiro.kok
|
}
|
114 |
|
|
}else if(!isValid&&(doi.length() - (doi.replace("_", "")).length() == 1)) {
|
115 |
|
|
String transformDoi = doi.replace("_", "-");
|
116 |
|
|
isValid=SearchUtils.isDoiValid(transformDoi);
|
117 |
|
|
if(isValid){
|
118 |
|
|
Date date= new java.util.Date();
|
119 |
|
|
out.println(new Timestamp(date.getTime())+" - Invalid doi: "+doi+" replaced with : "+transformDoi);
|
120 |
47219
|
argiro.kok
|
newDoi = transformDoi;
|
121 |
41254
|
argiro.kok
|
}
|
122 |
|
|
}else if(!isValid) {
|
123 |
|
|
String transformDoi =doi.replaceAll("\\p{C}", "");
|
124 |
|
|
isValid=SearchUtils.isDoiValid(transformDoi);
|
125 |
|
|
if(isValid){
|
126 |
|
|
Date date= new java.util.Date();
|
127 |
|
|
out.println(new Timestamp(date.getTime())+" - Invalid doi: "+doi+" replaced with (zero-width-char): "+transformDoi);
|
128 |
47219
|
argiro.kok
|
newDoi = transformDoi;
|
129 |
41254
|
argiro.kok
|
}
|
130 |
|
|
}
|
131 |
|
|
out.close();
|
132 |
|
|
}catch (IOException e) {
|
133 |
47219
|
argiro.kok
|
logger.error("Couldn't write to file " + "invalid_dois.txt",e);
|
134 |
41254
|
argiro.kok
|
}
|
135 |
47219
|
argiro.kok
|
return newDoi;
|
136 |
41254
|
argiro.kok
|
}
|
137 |
41350
|
argiro.kok
|
|
138 |
|
|
public SqlDAO getSqlDAO() {
|
139 |
|
|
return sqlDAO;
|
140 |
|
|
}
|
141 |
|
|
|
142 |
|
|
public void setSqlDAO(SqlDAO sqlDAO) {
|
143 |
|
|
this.sqlDAO = sqlDAO;
|
144 |
|
|
}
|
145 |
|
|
|
146 |
|
|
public QueryGenerator getQueryGenerator() {
|
147 |
|
|
return queryGenerator;
|
148 |
|
|
}
|
149 |
|
|
|
150 |
|
|
public void setQueryGenerator(QueryGenerator queryGenerator) {
|
151 |
|
|
this.queryGenerator = queryGenerator;
|
152 |
|
|
}
|
153 |
|
|
|
154 |
|
|
public DMFParser getDmfParser() {
|
155 |
|
|
return dmfParser;
|
156 |
|
|
}
|
157 |
|
|
|
158 |
|
|
public void setDmfParser(DMFParser dmfParser) {
|
159 |
|
|
this.dmfParser = dmfParser;
|
160 |
|
|
}
|
161 |
49865
|
argiro.kok
|
|
162 |
41200
|
katerina.i
|
}
|