Revision 36745
Added by Sandro La Bruzzo over 9 years ago
modules/dnet-openaire_dli-workflows/trunk/src/test/java/eu/dnetlib/dli/openaire/transform/ContentDatabaseTest.java | ||
---|---|---|
1 |
package eu.dnetlib.dli.openaire.transform; |
|
2 |
|
|
3 |
import java.util.Iterator; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import org.junit.Ignore; |
|
7 |
import org.junit.Test; |
|
8 |
import org.junit.runner.RunWith; |
|
9 |
import org.springframework.beans.factory.annotation.Autowired; |
|
10 |
import org.springframework.test.context.ContextConfiguration; |
|
11 |
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; |
|
12 |
|
|
13 |
import com.google.common.collect.Maps; |
|
14 |
|
|
15 |
import eu.dnetlib.msro.workflows.dli.manager.DOIManager; |
|
16 |
import eu.dnetlib.msro.workflows.dli.model.DLIObject; |
|
17 |
|
|
18 |
@RunWith(SpringJUnit4ClassRunner.class) |
|
19 |
@ContextConfiguration(classes = ContentDatabaseConfiguration.class) |
|
20 |
public class ContentDatabaseTest { |
|
21 |
|
|
22 |
@Autowired |
|
23 |
private DOIManager doiManager; |
|
24 |
|
|
25 |
private Map<String, Integer> datasourcesMap; |
|
26 |
|
|
27 |
@Test |
|
28 |
@Ignore |
|
29 |
public void test() { |
|
30 |
|
|
31 |
datasourcesMap = Maps.newHashMap(); |
|
32 |
Iterator<DLIObject> it = doiManager.getAllDLIRecord().iterator(); |
|
33 |
|
|
34 |
int i = 0; |
|
35 |
long start = System.currentTimeMillis(); |
|
36 |
long end = System.currentTimeMillis(); |
|
37 |
double average = 0; |
|
38 |
while (it.hasNext()) { |
|
39 |
|
|
40 |
DLIObject obj = it.next(); |
|
41 |
|
|
42 |
if ((i++ % 1000) == 0) { |
|
43 |
end = System.currentTimeMillis(); |
|
44 |
long total = end - start; |
|
45 |
System.out.println(i + "record done in " + (total / 1000) + " average: " + (total / i) + " ms"); |
|
46 |
|
|
47 |
} |
|
48 |
|
|
49 |
//TODO FIX DATA MODEL |
|
50 |
// String[] datasources = obj.getDatasources(); |
|
51 |
// for (String s : datasources) { |
|
52 |
// if (datasourcesMap.containsKey(s) == false) { |
|
53 |
// datasourcesMap.put(s, 0); |
|
54 |
// } |
|
55 |
// |
|
56 |
// datasourcesMap.put(s, datasourcesMap.get(s) + 1); |
|
57 |
// } |
|
58 |
} |
|
59 |
|
|
60 |
for (String s : datasourcesMap.keySet()) { |
|
61 |
System.out.println(s + " ->" + datasourcesMap.get(s)); |
|
62 |
} |
|
63 |
} |
|
64 |
|
|
65 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/test/java/eu/dnetlib/dli/openaire/transform/ContentDatabaseConfiguration.java | ||
---|---|---|
1 |
package eu.dnetlib.dli.openaire.transform; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
|
|
5 |
import org.apache.commons.dbcp.BasicDataSource; |
|
6 |
import org.springframework.context.annotation.Bean; |
|
7 |
import org.springframework.context.annotation.Configuration; |
|
8 |
import org.springframework.core.io.ClassPathResource; |
|
9 |
import org.springframework.core.io.Resource; |
|
10 |
|
|
11 |
import eu.dnetlib.msro.workflows.dli.manager.DOIManager; |
|
12 |
|
|
13 |
@Configuration |
|
14 |
public class ContentDatabaseConfiguration { |
|
15 |
|
|
16 |
@Bean |
|
17 |
public BasicDataSource dliInterlinkingDataSource() { |
|
18 |
BasicDataSource ds = new BasicDataSource(); |
|
19 |
|
|
20 |
ds.setDriverClassName("org.postgresql.Driver"); |
|
21 |
ds.setUrl("jdbc:postgresql://db0.d.dli.research-infrastructures.eu:5432/dlidb"); |
|
22 |
ds.setUsername("dnet"); |
|
23 |
ds.setPassword("dnetPwd"); |
|
24 |
ds.setMinIdle(5); |
|
25 |
ds.setMaxIdle(20); |
|
26 |
ds.setMaxOpenPreparedStatements(180); |
|
27 |
|
|
28 |
return ds; |
|
29 |
} |
|
30 |
|
|
31 |
@Bean |
|
32 |
public DOIManager publicationManagar() throws IOException { |
|
33 |
DOIManager publicationManager = new DOIManager(); |
|
34 |
publicationManager.setTemplateTable(new ClassPathResource("eu/dnetlib/templates/DOIResolver.sql")); |
|
35 |
Resource r = new ClassPathResource("/eu/dnetlib/msro/workflows/doimanager/xmlTemplate.st"); |
|
36 |
publicationManager.setTemplate(r); |
|
37 |
return publicationManager; |
|
38 |
|
|
39 |
} |
|
40 |
|
|
41 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/test/java/eu/dnetlib/dli/openaire/transform/TestDOIManager.java | ||
---|---|---|
1 |
package eu.dnetlib.dli.openaire.transform; |
|
2 |
|
|
3 |
import static org.junit.Assert.fail; |
|
4 |
|
|
5 |
import java.io.ByteArrayInputStream; |
|
6 |
import java.util.Iterator; |
|
7 |
|
|
8 |
import org.apache.commons.logging.Log; |
|
9 |
import org.apache.commons.logging.LogFactory; |
|
10 |
import org.junit.Assert; |
|
11 |
import org.junit.Ignore; |
|
12 |
import org.junit.Test; |
|
13 |
import org.junit.runner.RunWith; |
|
14 |
import org.springframework.beans.factory.annotation.Autowired; |
|
15 |
import org.springframework.test.context.ContextConfiguration; |
|
16 |
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; |
|
17 |
import org.xml.sax.InputSource; |
|
18 |
import org.xml.sax.XMLReader; |
|
19 |
import org.xml.sax.helpers.DefaultHandler; |
|
20 |
import org.xml.sax.helpers.XMLReaderFactory; |
|
21 |
|
|
22 |
import eu.dnetlib.msro.workflows.dli.manager.DOIManager; |
|
23 |
|
|
24 |
/** |
|
25 |
* The Class TestPublicationManager. |
|
26 |
*/ |
|
27 |
@RunWith(SpringJUnit4ClassRunner.class) |
|
28 |
@ContextConfiguration(classes = ConfigurationTestConfig.class) |
|
29 |
public class TestDOIManager { |
|
30 |
|
|
31 |
private static final Log log = LogFactory.getLog(TestDOIManager.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
32 |
|
|
33 |
/** The publication manager. */ |
|
34 |
@Autowired |
|
35 |
private DOIManager doiManager; |
|
36 |
|
|
37 |
private final String doi = "10.1002/etc.2522"; |
|
38 |
|
|
39 |
private void testInit() { |
|
40 |
Assert.assertNotNull(doiManager); |
|
41 |
try { |
|
42 |
doiManager.init(); |
|
43 |
} catch (Exception e) { |
|
44 |
fail("Exception has been raised on init database" + e.getMessage()); |
|
45 |
} |
|
46 |
} |
|
47 |
|
|
48 |
/** |
|
49 |
* Test insert same record multiple time on the table |
|
50 |
*/ |
|
51 |
@Test |
|
52 |
@Ignore |
|
53 |
public void populateElement() { |
|
54 |
|
|
55 |
|
|
56 |
testInit(); |
|
57 |
//TODO FIX DATA MODEL |
|
58 |
// List<Pair<String, String>> datasources = Lists.newArrayList(); |
|
59 |
// datasources.add(new Pair<String, String>("ds1", "datasource1")); |
|
60 |
// datasources.add(new Pair<String, String>("ds2", "datasource2")); |
|
61 |
// datasources.add(new Pair<String, String>("ds3", "datasource3")); |
|
62 |
// datasources.add(new Pair<String, String>("ds4", "datasource4")); |
|
63 |
// datasources.add(new Pair<String, String>("ds5", "datasource5")); |
|
64 |
// doiManager.addDatasources(datasources); |
|
65 |
// |
|
66 |
// DLIObject object = new DLIObject(); |
|
67 |
// object.setPid("aeiouy"); |
|
68 |
// object.setTitles(new String[] { "Titolo 1" }); |
|
69 |
// object.setDate("2012-11-20"); |
|
70 |
// object.setType(DLIObjectType.publication); |
|
71 |
// object.setDatasources(new String[] { "ds1", "ds3" }); |
|
72 |
// |
|
73 |
// List<DLIRelation> relations = Lists.newArrayList(); |
|
74 |
// DLIRelation rel = new DLIRelation(); |
|
75 |
// rel.setRelationProvenance(Lists.newArrayList("ds1", "ds4")); |
|
76 |
// rel.setSourcePid("aeiouy"); |
|
77 |
// PID pid = new PID(); |
|
78 |
// pid.setId("pid1"); |
|
79 |
// pid.setType("pid_a_cazzo"); |
|
80 |
// rel.setTargetPID(pid); |
|
81 |
// relations.add(rel); |
|
82 |
// |
|
83 |
// DLIRelation rel1 = new DLIRelation(); |
|
84 |
// rel1.setRelationProvenance(Lists.newArrayList("ds2", "ds3")); |
|
85 |
// rel1.setSourcePid("aeiouy"); |
|
86 |
// PID pid1 = new PID(); |
|
87 |
// pid1.setId("pid2"); |
|
88 |
// pid1.setType("DOI"); |
|
89 |
// rel1.setTargetPID(pid1); |
|
90 |
// rel1.setTargetTitle("A title"); |
|
91 |
// rel1.setRelationSemantics("relation1"); |
|
92 |
// relations.add(rel1); |
|
93 |
// |
|
94 |
// object.setRelations(relations); |
|
95 |
// long start = System.currentTimeMillis(); |
|
96 |
// doiManager.upsertRecord(object); |
|
97 |
// long end = System.currentTimeMillis(); |
|
98 |
// long total = end - start; |
|
99 |
// log.info("Total time for upserting record " + total); |
|
100 |
} |
|
101 |
|
|
102 |
|
|
103 |
public void deleteElement() { |
|
104 |
log.info("Start deleting record"); |
|
105 |
doiManager.removeRecord("aeiouy"); |
|
106 |
log.info("deleted"); |
|
107 |
} |
|
108 |
|
|
109 |
@Test |
|
110 |
@Ignore |
|
111 |
public void testGetObject() throws Exception { |
|
112 |
|
|
113 |
Iterator<String> items = doiManager.getAllRecord().iterator(); |
|
114 |
|
|
115 |
long totalTime = 0; |
|
116 |
|
|
117 |
int i = 1; |
|
118 |
while (items.hasNext()) { |
|
119 |
long start = System.currentTimeMillis(); |
|
120 |
doValidate(items.next()); |
|
121 |
totalTime += System.currentTimeMillis() - start; |
|
122 |
if ((i % 100) == 0) { |
|
123 |
System.out.println("Read " + i + " Average time :" + (totalTime / i) + " ms"); |
|
124 |
} |
|
125 |
i++; |
|
126 |
} |
|
127 |
System.out.println("Total time for " + i + " items: " + totalTime + " ms"); |
|
128 |
System.out.println("Average Time: " + (totalTime / 1000) + " ms"); |
|
129 |
} |
|
130 |
|
|
131 |
private void doValidate(final String xml) throws Exception { |
|
132 |
try { |
|
133 |
XMLReader parser = XMLReaderFactory.createXMLReader(); |
|
134 |
parser.setContentHandler(new DefaultHandler()); |
|
135 |
InputSource source = new InputSource(new ByteArrayInputStream(xml.getBytes())); |
|
136 |
parser.parse(source); |
|
137 |
} catch (Exception e) { |
|
138 |
System.out.println(xml); |
|
139 |
e.printStackTrace(); |
|
140 |
throw new Exception(); |
|
141 |
} |
|
142 |
} |
|
143 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/dli/workflows/nodes/transform/DOIWorker.java | ||
---|---|---|
1 |
/* |
|
2 |
* |
|
3 |
*/ |
|
4 |
package eu.dnetlib.msro.dli.workflows.nodes.transform; |
|
5 |
|
|
6 |
import java.util.List; |
|
7 |
import java.util.concurrent.BlockingQueue; |
|
8 |
import java.util.concurrent.Callable; |
|
9 |
|
|
10 |
import org.apache.commons.logging.Log; |
|
11 |
import org.apache.commons.logging.LogFactory; |
|
12 |
|
|
13 |
import com.google.common.collect.Lists; |
|
14 |
|
|
15 |
import eu.dnetlib.msro.workflows.dli.manager.DOIManager; |
|
16 |
import eu.dnetlib.msro.workflows.dli.model.DLICompletionStatus; |
|
17 |
import eu.dnetlib.msro.workflows.dli.model.DLIObject; |
|
18 |
import eu.dnetlib.msro.workflows.dli.model.DLIPID; |
|
19 |
import eu.dnetlib.msro.workflows.dli.model.DLIProvenance; |
|
20 |
import eu.dnetlib.msro.workflows.dli.model.DLIProvisionMode; |
|
21 |
import eu.dnetlib.msro.workflows.dli.model.DLIRelation; |
|
22 |
import eu.dnetlib.msro.workflows.dli.resolver.DOIResolver; |
|
23 |
|
|
24 |
/** |
|
25 |
* The Class DOIWorker. |
|
26 |
*/ |
|
27 |
public class DOIWorker implements Callable<Boolean> { |
|
28 |
|
|
29 |
/** The Constant log. */ |
|
30 |
private static final Log log = LogFactory.getLog(DOIWorker.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
31 |
|
|
32 |
/** The queue. */ |
|
33 |
private final BlockingQueue<String> queue; |
|
34 |
|
|
35 |
/** The terminator queue. */ |
|
36 |
private final String terminatorQueue; |
|
37 |
|
|
38 |
/** The doi resolvers. */ |
|
39 |
private List<DOIResolver> doiResolvers; |
|
40 |
|
|
41 |
/** The parser. */ |
|
42 |
private final DLIRecordParser parser; |
|
43 |
|
|
44 |
/** The namespace prefix. */ |
|
45 |
private final String namespacePrefix; |
|
46 |
|
|
47 |
/** The DOI manager. */ |
|
48 |
private final DOIManager manager; |
|
49 |
|
|
50 |
private final int threadCount; |
|
51 |
|
|
52 |
private final List<DLIObject> toInsert; |
|
53 |
|
|
54 |
/** |
|
55 |
* Instantiates a new DOI worker. |
|
56 |
* |
|
57 |
* @param queue |
|
58 |
* the queue |
|
59 |
* @param outputQueue |
|
60 |
* the output queue |
|
61 |
* @param terminatorQueue |
|
62 |
* the terminator queue |
|
63 |
* @param doiResolvers |
|
64 |
* the doi resolvers |
|
65 |
* @param parser |
|
66 |
* the parser |
|
67 |
* @param namespacePrefix |
|
68 |
* the namespace prefix |
|
69 |
*/ |
|
70 |
public DOIWorker(final BlockingQueue<String> queue, final String terminatorQueue, final List<DOIResolver> doiResolvers, final String namespacePrefix, |
|
71 |
final DOIManager manager, final int threadCount) { |
|
72 |
this.queue = queue; |
|
73 |
this.terminatorQueue = terminatorQueue; |
|
74 |
this.doiResolvers = doiResolvers; |
|
75 |
this.parser = new DLIRecordParser(); |
|
76 |
this.namespacePrefix = namespacePrefix; |
|
77 |
this.manager = manager; |
|
78 |
this.threadCount = threadCount; |
|
79 |
toInsert = Lists.newArrayList(); |
|
80 |
} |
|
81 |
|
|
82 |
/* |
|
83 |
* (non-Javadoc) |
|
84 |
* |
|
85 |
* @see java.lang.Runnable#run() |
|
86 |
*/ |
|
87 |
|
|
88 |
@Override |
|
89 |
public Boolean call() { |
|
90 |
|
|
91 |
String nextRecord = null; |
|
92 |
try { |
|
93 |
nextRecord = this.queue.take(); |
|
94 |
if (nextRecord == this.terminatorQueue) { |
|
95 |
log.debug(this.threadCount + ":Found terminator record"); |
|
96 |
this.queue.put(terminatorQueue); |
|
97 |
return true; |
|
98 |
} |
|
99 |
} catch (InterruptedException e) { |
|
100 |
log.error(this.threadCount + ":Error on taking an element on queue", e); |
|
101 |
} |
|
102 |
int extracted = 0; |
|
103 |
while ((nextRecord != null) && (nextRecord != terminatorQueue)) { |
|
104 |
try { |
|
105 |
log.debug(this.threadCount + ":Extracted " + extracted++); |
|
106 |
final DLIObject extractedObject = parser.parseRecord(nextRecord); |
|
107 |
if (extractedObject != null) { |
|
108 |
if ((extractedObject.getDatasourceProvenance() == null) || (extractedObject.getDatasourceProvenance().size() == 0)) { |
|
109 |
DLIProvenance prov = new DLIProvenance(namespacePrefix, DLIProvisionMode.collected.toString(), DLICompletionStatus.complete.toString()); |
|
110 |
extractedObject.setDatasourceProvenance(Lists.newArrayList(prov)); |
|
111 |
} |
|
112 |
final List<DLIObject> resolvedRelation = Lists.newArrayList(); |
|
113 |
List<DLIRelation> relations = extractedObject.getRelations(); |
|
114 |
if (relations != null) { |
|
115 |
for (DLIRelation relation : relations) { |
|
116 |
if ((relation.getTargetPID() != null) |
|
117 |
&& (relation.getTargetPID().getType() != null) |
|
118 |
&& ("doi".equals(relation.getTargetPID().getType().toLowerCase().trim()) || "openaire".equals(relation.getTargetPID() |
|
119 |
.getType().toLowerCase().trim()))) { |
|
120 |
DLIObject currentRelation = extractDOI(extractedObject, relation); |
|
121 |
if (currentRelation != null) { |
|
122 |
resolvedRelation.add(currentRelation); |
|
123 |
} |
|
124 |
} |
|
125 |
|
|
126 |
if (relation.getRelationProvenance() == null) { |
|
127 |
DLIProvenance prov = new DLIProvenance(namespacePrefix, DLIProvisionMode.unknown.toString(), DLICompletionStatus.incomplete.toString()); |
|
128 |
relation.setRelationProvenance(Lists.newArrayList(prov)); |
|
129 |
} |
|
130 |
} |
|
131 |
} |
|
132 |
try { |
|
133 |
int i = 1; |
|
134 |
long start = System.currentTimeMillis(); |
|
135 |
extractedObject.setCompletionStatus("complete"); |
|
136 |
toInsert.add(extractedObject); |
|
137 |
manager.upsertRecord(extractedObject); |
|
138 |
for (DLIObject obj : resolvedRelation) { |
|
139 |
obj.setCompletionStatus("incomplete"); |
|
140 |
toInsert.add(obj); |
|
141 |
i++; |
|
142 |
} |
|
143 |
long end = System.currentTimeMillis(); |
|
144 |
long average = (end - start) / i; |
|
145 |
log.debug(this.threadCount + ": average of inserting object: " + average); |
|
146 |
if(toInsert.size()>1000) { |
|
147 |
manager.upsertRecords(toInsert); |
|
148 |
toInsert.clear(); |
|
149 |
} |
|
150 |
|
|
151 |
} catch (Exception e) { |
|
152 |
log.error(this.threadCount + ": Error on iserting data", e); |
|
153 |
} |
|
154 |
} |
|
155 |
log.debug(this.threadCount + ":Getting item"); |
|
156 |
nextRecord = this.queue.take(); |
|
157 |
log.debug(this.threadCount + ":Taken item"); |
|
158 |
if (nextRecord == this.terminatorQueue) { |
|
159 |
log.debug(this.threadCount + ":Found terminator record"); |
|
160 |
this.queue.put(terminatorQueue); |
|
161 |
} |
|
162 |
|
|
163 |
} catch (Exception e) { |
|
164 |
DOIWorker.log.error(this.threadCount + ":Error on taking an element on queue", e); |
|
165 |
return false; |
|
166 |
} |
|
167 |
|
|
168 |
} |
|
169 |
log.debug(this.threadCount + ":Exit from while " + extracted); |
|
170 |
return true; |
|
171 |
|
|
172 |
} |
|
173 |
|
|
174 |
private DLIObject extractDOI(final DLIObject extractedObject, final DLIRelation relation) { |
|
175 |
DLIPID currentPIDRelation = relation.getTargetPID(); |
|
176 |
DLIObject dbObject = new DLIObject(); |
|
177 |
dbObject.setPid(currentPIDRelation.getId()); |
|
178 |
dbObject.setPidType(currentPIDRelation.getType()); |
|
179 |
DLIRelation rel = new DLIRelation(); |
|
180 |
rel.setSourcePid(currentPIDRelation.getId()); |
|
181 |
rel.setSourceRecordId(extractedObject.getIdentifier()); |
|
182 |
rel.setTargetPID(new DLIPID(extractedObject.getPid(), extractedObject.getPidType())); |
|
183 |
rel.setTargetType(extractedObject.getType()); |
|
184 |
rel.setCompletionStatus("complete"); |
|
185 |
rel.setRelationSemantics(DLIUtils.getInverse(relation.getRelationSemantics())); |
|
186 |
if((extractedObject.getTitles()!= null) && (extractedObject.getTitles().length>0)) { |
|
187 |
rel.setTargetTitle(extractedObject.getTitles()[0]); |
|
188 |
} |
|
189 |
dbObject.setRelations(Lists.newArrayList(rel)); |
|
190 |
return dbObject; |
|
191 |
|
|
192 |
} |
|
193 |
|
|
194 |
/** |
|
195 |
* This methods extract and Try to resolve DOI related from extracedobject if it can resolve the DOI then return a new DLIObject and |
|
196 |
* creates the relations between these object else fill a simple relation only with the PID |
|
197 |
* |
|
198 |
* @param extractedObject |
|
199 |
* the extracted object |
|
200 |
* @param relation |
|
201 |
* the relation |
|
202 |
* @return the DLI object |
|
203 |
*/ |
|
204 |
// private DLIObject extractDOI(final DLIObject extractedObject, final DLIRelation relation) { |
|
205 |
// PID currentPIDRelation = relation.getTargetPID(); |
|
206 |
// relation.setRelationProvenance(Lists.newArrayList(namespacePrefix)); |
|
207 |
// log.debug(this.threadCount + ":Resolving " + currentPIDRelation.getId()); |
|
208 |
// long start = System.currentTimeMillis(); |
|
209 |
// DLIObject dbObject = null; |
|
210 |
// // this.manager.getObjectNoRel(currentPIDRelation.getId()); |
|
211 |
// long end = System.currentTimeMillis(); |
|
212 |
// log.debug(threadCount + ": Time for getting object " + (end - start)); |
|
213 |
// |
|
214 |
// if (dbObject != null) { |
|
215 |
// |
|
216 |
// // Enrich from relation with target TYpe and title |
|
217 |
// relation.setTargetType(dbObject.getType()); |
|
218 |
// if (dbObject.getTitles() != null && dbObject.getTitles().length > 0) { |
|
219 |
// relation.setTargetTitle(dbObject.getTitles()[0]); |
|
220 |
// } |
|
221 |
// |
|
222 |
// // Creating opposite relation |
|
223 |
// DLIRelation relationToExtractedObject = new DLIRelation(); |
|
224 |
// relationToExtractedObject.setSourceDOI(dbObject.getPid()); |
|
225 |
// relationToExtractedObject.setTargetPID(new PID(extractedObject.getPid(), extractedObject.getPidType())); |
|
226 |
// relationToExtractedObject.setTargetType(extractedObject.getType()); |
|
227 |
// relationToExtractedObject.setRelationProvenance(new ArrayList<String>()); |
|
228 |
// relationToExtractedObject.getRelationProvenance().add(namespacePrefix); |
|
229 |
// if (extractedObject.getTitles() != null && extractedObject.getTitles().length > 0) { |
|
230 |
// relationToExtractedObject.setTargetTitle(extractedObject.getTitles()[0]); |
|
231 |
// } |
|
232 |
// relationToExtractedObject.setRelationSemantics(DOIUtils.getInverse(relation.getRelationSemantics())); |
|
233 |
// dbObject.setRelations(Lists.newArrayList(relationToExtractedObject)); |
|
234 |
// return dbObject; |
|
235 |
// } |
|
236 |
// |
|
237 |
// for (DOIResolver resolver : this.doiResolvers) { |
|
238 |
// DLIObject obj = resolver.retrieveDOI(currentPIDRelation.getId()); |
|
239 |
// if (obj != null) { |
|
240 |
// // FOUND RELATION! |
|
241 |
// // ADD INFO RELATION EXTRACTED_OBJ -> RESOLVED OBJECT |
|
242 |
// |
|
243 |
// // Enrich from relation with target TYpe and title |
|
244 |
// relation.setTargetType(obj.getType()); |
|
245 |
// if (obj.getTitles() != null && obj.getTitles().length > 0) { |
|
246 |
// relation.setTargetTitle(obj.getTitles()[0]); |
|
247 |
// } |
|
248 |
// |
|
249 |
// DLIRelation relationResolvedSource = new DLIRelation(); |
|
250 |
// relationResolvedSource.setSourceDOI(obj.getPid()); |
|
251 |
// relationResolvedSource.setTargetPID(new PID(extractedObject.getPid(), extractedObject.getPidType())); |
|
252 |
// relationResolvedSource.setRelationProvenance(Lists.newArrayList(namespacePrefix)); |
|
253 |
// if (extractedObject.getTitles() != null && extractedObject.getTitles().length > 0) { |
|
254 |
// relationResolvedSource.setTargetTitle(extractedObject.getTitles()[0]); |
|
255 |
// } |
|
256 |
// relationResolvedSource.setRelationSemantics(DOIUtils.getInverse(relation.getRelationSemantics())); |
|
257 |
// relationResolvedSource.setTargetType(extractedObject.getType()); |
|
258 |
// obj.setRelations(Lists.newArrayList(relationResolvedSource)); |
|
259 |
// return obj; |
|
260 |
// } |
|
261 |
// } |
|
262 |
// return null; |
|
263 |
// } |
|
264 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/workflows/dli/resolver/RCBSParser.java | ||
---|---|---|
1 |
/** |
|
2 |
* |
|
3 |
*/ |
|
4 |
package eu.dnetlib.msro.workflows.dli.resolver; |
|
5 |
|
|
6 |
import java.io.ByteArrayInputStream; |
|
7 |
import java.util.Stack; |
|
8 |
|
|
9 |
import javax.xml.stream.XMLInputFactory; |
|
10 |
import javax.xml.stream.XMLStreamConstants; |
|
11 |
import javax.xml.stream.XMLStreamException; |
|
12 |
import javax.xml.stream.XMLStreamReader; |
|
13 |
|
|
14 |
import org.apache.commons.logging.Log; |
|
15 |
import org.apache.commons.logging.LogFactory; |
|
16 |
|
|
17 |
import com.google.common.collect.Lists; |
|
18 |
|
|
19 |
import eu.dnetlib.msro.workflows.dli.model.DLICompletionStatus; |
|
20 |
import eu.dnetlib.msro.workflows.dli.model.DLIObject; |
|
21 |
import eu.dnetlib.msro.workflows.dli.model.DLIObjectType; |
|
22 |
import eu.dnetlib.msro.workflows.dli.model.DLIProvenance; |
|
23 |
|
|
24 |
|
|
25 |
/** |
|
26 |
* @author sandro |
|
27 |
* |
|
28 |
*/ |
|
29 |
public class RCBSParser { |
|
30 |
|
|
31 |
/** The Constant log. */ |
|
32 |
private static final Log log = LogFactory.getLog(RCBSParser.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
33 |
|
|
34 |
/** The Constant CROSSREF_NS_PREFIX. */ |
|
35 |
public static final String RCBS_NS_PREFIX = "rcbs" ; |
|
36 |
|
|
37 |
/** |
|
38 |
* Parses the record. |
|
39 |
* |
|
40 |
* @param record the record |
|
41 |
* @return the DLI object |
|
42 |
*/ |
|
43 |
public DLIObject parseRecord(final String record) { |
|
44 |
try { |
|
45 |
XMLInputFactory factory = XMLInputFactory.newInstance(); |
|
46 |
XMLStreamReader parser = factory.createXMLStreamReader(new ByteArrayInputStream(record.getBytes())); |
|
47 |
DLIObject object = new DLIObject(); |
|
48 |
Stack<String> elementStack = new Stack<String>(); |
|
49 |
elementStack.push("/"); |
|
50 |
while (parser.hasNext()) { |
|
51 |
int event = parser.next(); |
|
52 |
if (event == XMLStreamConstants.END_ELEMENT) { |
|
53 |
|
|
54 |
elementStack.pop(); |
|
55 |
} else if (event == XMLStreamConstants.START_ELEMENT) { |
|
56 |
final String localName = parser.getLocalName(); |
|
57 |
elementStack.push(localName); |
|
58 |
if("dimStructure.structureId".equals(localName)) { |
|
59 |
log.debug("found open tag structureId"); |
|
60 |
parser.next(); |
|
61 |
if (parser.hasText()) { |
|
62 |
object.setPid(parser.getText().trim()); |
|
63 |
object.setPidType("pdb"); |
|
64 |
} |
|
65 |
} else if ("dimStructure.structureTitle".equals(localName)) { |
|
66 |
log.debug("found open tag structureTitle"); |
|
67 |
parser.next(); |
|
68 |
if (parser.hasText()) { |
|
69 |
String title =parser.getText().trim(); |
|
70 |
object.setTitles(new String[]{title}); |
|
71 |
} |
|
72 |
} else if ("dimStructure.structureAuthor".equals(localName)) { |
|
73 |
log.debug("found open tag structureAuthor"); |
|
74 |
parser.next(); |
|
75 |
if (parser.hasText()) { |
|
76 |
String author =parser.getText().trim(); |
|
77 |
String [] splittedAuthrs = author.split("#"); |
|
78 |
if (splittedAuthrs != null) { |
|
79 |
object.setAuthors(splittedAuthrs); |
|
80 |
} |
|
81 |
} |
|
82 |
} else if ("dimStructure.releaseDate".equals(localName)) { |
|
83 |
log.debug("found open tag releaseDate"); |
|
84 |
parser.next(); |
|
85 |
if (parser.hasText()) { |
|
86 |
String date =parser.getText().trim(); |
|
87 |
object.setDate(date); |
|
88 |
} |
|
89 |
} |
|
90 |
|
|
91 |
} |
|
92 |
} |
|
93 |
if((object.getPid()!= null) && (object.getPidType()!= null)) { |
|
94 |
DLIProvenance provenance = new DLIProvenance(RCBS_NS_PREFIX, "resolved", "complete"); |
|
95 |
object.setDatasourceProvenance(Lists.newArrayList(provenance)); |
|
96 |
object.setType(DLIObjectType.dataset); |
|
97 |
object.setCompletionStatus(DLICompletionStatus.complete.toString()); |
|
98 |
return object; |
|
99 |
} |
|
100 |
return null; |
|
101 |
|
|
102 |
} catch (XMLStreamException e) { |
|
103 |
log.error("error during parsing record " + record, e); |
|
104 |
return null; |
|
105 |
} |
|
106 |
|
|
107 |
|
|
108 |
} |
|
109 |
|
|
110 |
|
|
111 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/workflows/dli/resolver/RCBSResolver.java | ||
---|---|---|
1 |
/** |
|
2 |
* |
|
3 |
*/ |
|
4 |
package eu.dnetlib.msro.workflows.dli.resolver; |
|
5 |
|
|
6 |
import java.io.IOException; |
|
7 |
import java.net.MalformedURLException; |
|
8 |
import java.net.URL; |
|
9 |
|
|
10 |
import org.apache.commons.io.IOUtils; |
|
11 |
import org.apache.commons.logging.Log; |
|
12 |
import org.apache.commons.logging.LogFactory; |
|
13 |
import org.springframework.beans.factory.annotation.Autowired; |
|
14 |
|
|
15 |
import eu.dnetlib.msro.workflows.dli.model.DLIObject; |
|
16 |
|
|
17 |
|
|
18 |
|
|
19 |
/** |
|
20 |
* The Class RCBSResolver. |
|
21 |
* |
|
22 |
* @author sandro |
|
23 |
*/ |
|
24 |
public class RCBSResolver implements DOIResolver { |
|
25 |
|
|
26 |
|
|
27 |
/** The Constant log. */ |
|
28 |
private static final Log log = LogFactory.getLog(RCBSResolver.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
29 |
|
|
30 |
|
|
31 |
/** The template url. */ |
|
32 |
private static String TEMPLATE_URL = "http://www.rcsb.org/pdb/rest/customReport?pdbids=%s&customReportColumns=structureTitle,structureAuthor,releaseDate&service=wsfile"; |
|
33 |
|
|
34 |
|
|
35 |
/** The rcbs parser. */ |
|
36 |
@Autowired |
|
37 |
private RCBSParser rcbsParser; |
|
38 |
|
|
39 |
|
|
40 |
|
|
41 |
/** |
|
42 |
* Request doi. |
|
43 |
* |
|
44 |
* @param URL the url |
|
45 |
* @return the string |
|
46 |
*/ |
|
47 |
private String requestDOI(final String URL) { |
|
48 |
|
|
49 |
|
|
50 |
try { |
|
51 |
final URL myURl = new URL(URL); |
|
52 |
final String response = IOUtils.toString(myURl.openStream()); |
|
53 |
return response; |
|
54 |
} catch (MalformedURLException e) { |
|
55 |
log.error("Error on request DOI, request :" + URL, e); |
|
56 |
return null; |
|
57 |
} catch (IOException e) { |
|
58 |
log.error("Error on request DOI, request :" + URL, e); |
|
59 |
return null; |
|
60 |
} |
|
61 |
|
|
62 |
} |
|
63 |
|
|
64 |
|
|
65 |
|
|
66 |
/** |
|
67 |
* {@inheritDoc} |
|
68 |
* @see eu.dnetlib.msro.workflows.dli.resolver.DOIResolver#retrieveDOI(java.lang.String, java.lang.String) |
|
69 |
*/ |
|
70 |
@Override |
|
71 |
public DLIObject retrieveDOI(final String doi, final String doiType) { |
|
72 |
if(doiType== null) return null; |
|
73 |
if (doiType.toLowerCase().trim().equals("pdb")) { |
|
74 |
String URL = String.format(TEMPLATE_URL,doi); |
|
75 |
String response = requestDOI(URL); |
|
76 |
if(response== null) return null; |
|
77 |
return rcbsParser.parseRecord(response); |
|
78 |
} |
|
79 |
return null; |
|
80 |
} |
|
81 |
|
|
82 |
|
|
83 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/workflows/dli/manager/DLIIterator.java | ||
---|---|---|
1 |
/* |
|
2 |
* |
|
3 |
*/ |
|
4 |
package eu.dnetlib.msro.workflows.dli.manager; |
|
5 |
|
|
6 |
import java.sql.Array; |
|
7 |
import java.sql.Connection; |
|
8 |
import java.sql.PreparedStatement; |
|
9 |
import java.sql.ResultSet; |
|
10 |
import java.sql.SQLException; |
|
11 |
import java.util.Iterator; |
|
12 |
import java.util.List; |
|
13 |
|
|
14 |
import javax.sql.DataSource; |
|
15 |
|
|
16 |
import org.apache.commons.logging.Log; |
|
17 |
import org.apache.commons.logging.LogFactory; |
|
18 |
|
|
19 |
import com.google.common.collect.Lists; |
|
20 |
|
|
21 |
import eu.dnetlib.msro.workflows.dli.model.DLIObject; |
|
22 |
import eu.dnetlib.msro.workflows.dli.model.DLIObjectType; |
|
23 |
import eu.dnetlib.msro.workflows.dli.model.DLIPID; |
|
24 |
import eu.dnetlib.msro.workflows.dli.model.DLIRelation; |
|
25 |
|
|
26 |
public class DLIIterator implements Iterator<String> { |
|
27 |
|
|
28 |
private static final Log log = LogFactory.getLog(DLIIterator.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
29 |
|
|
30 |
private ResultSet inputResutlSet; |
|
31 |
|
|
32 |
private final String getAllObject = "select * from dois"; |
|
33 |
|
|
34 |
private final DataSource inputDatasource; |
|
35 |
|
|
36 |
private Connection currentConnection; |
|
37 |
|
|
38 |
private final DOIManager parentDOIManager; |
|
39 |
|
|
40 |
private String nextObject; |
|
41 |
|
|
42 |
public DLIIterator(final DataSource inputDatasource, final DOIManager parentDOIManager) { |
|
43 |
this.inputDatasource = inputDatasource; |
|
44 |
this.parentDOIManager = parentDOIManager; |
|
45 |
nextObject = constructNextObject(); |
|
46 |
|
|
47 |
} |
|
48 |
|
|
49 |
@Override |
|
50 |
public boolean hasNext() { |
|
51 |
if (nextObject == null) { |
|
52 |
tryToCloseConnection(); |
|
53 |
} |
|
54 |
return nextObject != null; |
|
55 |
} |
|
56 |
|
|
57 |
@Override |
|
58 |
public String next() { |
|
59 |
String tmp = nextObject; |
|
60 |
nextObject = constructNextObject(); |
|
61 |
return tmp; |
|
62 |
} |
|
63 |
|
|
64 |
@Override |
|
65 |
public void remove() { |
|
66 |
// TODO Auto-generated method stub |
|
67 |
|
|
68 |
} |
|
69 |
|
|
70 |
private void tryToCloseConnection() { |
|
71 |
if (currentConnection != null) { |
|
72 |
try { |
|
73 |
currentConnection.close(); |
|
74 |
} catch (SQLException e1) { |
|
75 |
log.error("Error on closing connection", e1); |
|
76 |
} |
|
77 |
} |
|
78 |
|
|
79 |
} |
|
80 |
|
|
81 |
private void createInputResultSet() { |
|
82 |
try { |
|
83 |
currentConnection = this.inputDatasource.getConnection(); |
|
84 |
PreparedStatement st = currentConnection.prepareStatement(getAllObject); |
|
85 |
this.inputResutlSet = st.executeQuery(); |
|
86 |
} catch (Exception e) { |
|
87 |
log.error("Error on iterating on all objects", e); |
|
88 |
if (currentConnection != null) { |
|
89 |
tryToCloseConnection(); |
|
90 |
} |
|
91 |
} |
|
92 |
} |
|
93 |
|
|
94 |
private String constructNextObject() { |
|
95 |
Connection connection = null; |
|
96 |
if (inputResutlSet == null) { |
|
97 |
createInputResultSet(); |
|
98 |
} |
|
99 |
|
|
100 |
try { |
|
101 |
if ((this.inputResutlSet == null) || (inputResutlSet.next() == false)) { |
|
102 |
tryToCloseConnection(); |
|
103 |
return null; |
|
104 |
} |
|
105 |
connection = inputDatasource.getConnection(); |
|
106 |
final String dliRelationQuery = "select array_to_string(array_agg(DISTINCT COALESCE(r.relation, 'NONE')||'@@@'||r.pid || '@@@' ||r.pidtype|| '@@@' ||COALESCE(r.title, 'NONE')|| '@@@' ||COALESCE(r.target_type, 'NONE')|| '@@@' ||COALESCE(r.datasources, 'NONE') ),'<-->') as relations from dois o LEFT OUTER JOIN reldoi r ON o.doi=r.source_doi where o.doi=? group by o.doi"; |
|
107 |
DLIObject result = new DLIObject(); |
|
108 |
result.setPid(this.inputResutlSet.getString("doi")); |
|
109 |
result.setPidType(this.inputResutlSet.getString("doi_type")); |
|
110 |
Array titlesArray = this.inputResutlSet.getArray("titles"); |
|
111 |
result.setTitles((String[]) titlesArray.getArray()); |
|
112 |
Array authorsArray = this.inputResutlSet.getArray("authors"); |
|
113 |
result.setAuthors((String[]) authorsArray.getArray()); |
|
114 |
|
|
115 |
result.setDate(this.inputResutlSet.getString("date")); |
|
116 |
String type = this.inputResutlSet.getString("type"); |
|
117 |
if (type!= null) { |
|
118 |
result.setType(DLIObjectType.valueOf(type)); |
|
119 |
} else { |
|
120 |
result.setType(DLIObjectType.unknown); |
|
121 |
} |
|
122 |
String datasources = this.inputResutlSet.getString("datasources"); |
|
123 |
//TODO FIX THE DATASOURCE PROVENANCE |
|
124 |
// if (datasources != null) { |
|
125 |
// result.setDatasources(datasources.split(",")); |
|
126 |
// } |
|
127 |
|
|
128 |
PreparedStatement statement = connection.prepareStatement(dliRelationQuery); |
|
129 |
statement.setString(1, result.getPid()); |
|
130 |
ResultSet resultset = statement.executeQuery(); |
|
131 |
if (resultset.next()) { |
|
132 |
String resultString = resultset.getString(1); |
|
133 |
|
|
134 |
List<DLIRelation> rels = Lists.newArrayList(); |
|
135 |
|
|
136 |
if (resultString != null) { |
|
137 |
|
|
138 |
String[] data = resultString.split("<-->"); |
|
139 |
if (data != null) { |
|
140 |
for (String itm : data) { |
|
141 |
DLIRelation currentRel = constructRelation(itm, result.getPid()); |
|
142 |
if (currentRel != null) { |
|
143 |
rels.add(currentRel); |
|
144 |
} |
|
145 |
} |
|
146 |
} |
|
147 |
result.setRelations(rels); |
|
148 |
} |
|
149 |
} |
|
150 |
return parentDOIManager.extractXMLfromDLIObject(result); |
|
151 |
|
|
152 |
} catch (Exception e) { |
|
153 |
log.error("Error on iterating on all objects", e); |
|
154 |
return null; |
|
155 |
} finally { |
|
156 |
if (connection != null) { |
|
157 |
try { |
|
158 |
connection.close(); |
|
159 |
} catch (SQLException e) { |
|
160 |
log.error("Erron on closing connection", e); |
|
161 |
} |
|
162 |
} |
|
163 |
} |
|
164 |
|
|
165 |
} |
|
166 |
|
|
167 |
private DLIRelation constructRelation(final String input, final String doi) { |
|
168 |
if (input.contains("@@@")) { |
|
169 |
|
|
170 |
String[] data = input.split("@@@"); |
|
171 |
if ((data != null) && (data.length == 6)) { |
|
172 |
|
|
173 |
String relsemantic = data[0]; |
|
174 |
String pid = data[1]; |
|
175 |
String pidtype = data[2]; |
|
176 |
String title = data[3]; |
|
177 |
String entityType = data[4]; |
|
178 |
String provenance = data[5]; |
|
179 |
|
|
180 |
DLIPID curretPID = new DLIPID(pid, pidtype); |
|
181 |
DLIRelation rel = new DLIRelation(); |
|
182 |
rel.setTargetPID(curretPID); |
|
183 |
//TODO IMPLEMENT the CHANGES OF THE DATA MODEL |
|
184 |
//rel.setSourceDOI(doi); |
|
185 |
// if (provenance.toLowerCase() != "none") { |
|
186 |
// String[] splittedProv = provenance.split(","); |
|
187 |
// if (splittedProv != null) { |
|
188 |
// rel.setRelationProvenance(Lists.newArrayList(splittedProv)); |
|
189 |
// } |
|
190 |
// } |
|
191 |
if (!title.trim().toLowerCase().equals("none")) { |
|
192 |
rel.setTargetTitle(title); |
|
193 |
} |
|
194 |
|
|
195 |
if (!entityType.trim().toLowerCase().equals("none")) { |
|
196 |
try { |
|
197 |
rel.setTargetType(DLIObjectType.valueOf(entityType.replace("@", ""))); |
|
198 |
} catch (Exception e) { |
|
199 |
log.info("Unable to cast entity type: " + entityType + " \n input value:" + input + " DOI: " + doi); |
|
200 |
} |
|
201 |
} |
|
202 |
|
|
203 |
if (!relsemantic.trim().toLowerCase().equals("none")) { |
|
204 |
rel.setRelationSemantics(relsemantic); |
|
205 |
} |
|
206 |
|
|
207 |
return rel; |
|
208 |
} else { |
|
209 |
log.error("unespected data on relation field"); |
|
210 |
} |
|
211 |
|
|
212 |
} else { |
|
213 |
log.info("input does not contains expected splitter charachter"); |
|
214 |
} |
|
215 |
return null; |
|
216 |
|
|
217 |
} |
|
218 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/workflows/dli/manager/DLIIteratorMock.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.workflows.dli.manager; |
|
2 |
|
|
3 |
import java.sql.Array; |
|
4 |
import java.sql.Connection; |
|
5 |
import java.sql.PreparedStatement; |
|
6 |
import java.sql.ResultSet; |
|
7 |
import java.sql.SQLException; |
|
8 |
import java.util.Iterator; |
|
9 |
import java.util.List; |
|
10 |
|
|
11 |
import javax.sql.DataSource; |
|
12 |
|
|
13 |
import org.apache.commons.logging.Log; |
|
14 |
import org.apache.commons.logging.LogFactory; |
|
15 |
|
|
16 |
import com.google.common.collect.Lists; |
|
17 |
|
|
18 |
import eu.dnetlib.msro.workflows.dli.model.DLIObject; |
|
19 |
import eu.dnetlib.msro.workflows.dli.model.DLIObjectType; |
|
20 |
import eu.dnetlib.msro.workflows.dli.model.DLIPID; |
|
21 |
import eu.dnetlib.msro.workflows.dli.model.DLIRelation; |
|
22 |
|
|
23 |
public class DLIIteratorMock implements Iterator<DLIObject> { |
|
24 |
|
|
25 |
private static final Log log = LogFactory.getLog(DLIIterator.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
26 |
|
|
27 |
private ResultSet inputResutlSet; |
|
28 |
|
|
29 |
private final String getAllObject = "select * from dois"; |
|
30 |
|
|
31 |
private final DataSource inputDatasource; |
|
32 |
|
|
33 |
private Connection currentConnection; |
|
34 |
|
|
35 |
private final DOIManager parentDOIManager; |
|
36 |
|
|
37 |
private DLIObject nextObject; |
|
38 |
|
|
39 |
public DLIIteratorMock(final DataSource inputDatasource, final DOIManager parentDOIManager) { |
|
40 |
this.inputDatasource = inputDatasource; |
|
41 |
this.parentDOIManager = parentDOIManager; |
|
42 |
nextObject = constructNextObject(); |
|
43 |
|
|
44 |
} |
|
45 |
|
|
46 |
@Override |
|
47 |
public boolean hasNext() { |
|
48 |
if (nextObject == null) { |
|
49 |
tryToCloseConnection(); |
|
50 |
} |
|
51 |
return nextObject != null; |
|
52 |
} |
|
53 |
|
|
54 |
@Override |
|
55 |
public DLIObject next() { |
|
56 |
DLIObject tmp = nextObject; |
|
57 |
nextObject = constructNextObject(); |
|
58 |
return tmp; |
|
59 |
} |
|
60 |
|
|
61 |
@Override |
|
62 |
public void remove() { |
|
63 |
// TODO Auto-generated method stub |
|
64 |
|
|
65 |
} |
|
66 |
|
|
67 |
private void tryToCloseConnection() { |
|
68 |
if (currentConnection != null) { |
|
69 |
try { |
|
70 |
currentConnection.close(); |
|
71 |
} catch (SQLException e1) { |
|
72 |
log.error("Error on closing connection", e1); |
|
73 |
} |
|
74 |
} |
|
75 |
|
|
76 |
} |
|
77 |
|
|
78 |
private void createInputResultSet() { |
|
79 |
try { |
|
80 |
currentConnection = this.inputDatasource.getConnection(); |
|
81 |
PreparedStatement st = currentConnection.prepareStatement(getAllObject); |
|
82 |
this.inputResutlSet = st.executeQuery(); |
|
83 |
} catch (Exception e) { |
|
84 |
log.error("Error on iterating on all objects", e); |
|
85 |
if (currentConnection != null) { |
|
86 |
tryToCloseConnection(); |
|
87 |
} |
|
88 |
} |
|
89 |
} |
|
90 |
|
|
91 |
private DLIObject constructNextObject() { |
|
92 |
Connection connection = null; |
|
93 |
if (inputResutlSet == null) { |
|
94 |
createInputResultSet(); |
|
95 |
} |
|
96 |
|
|
97 |
try { |
|
98 |
if ((this.inputResutlSet == null) || (inputResutlSet.next() == false)) { |
|
99 |
tryToCloseConnection(); |
|
100 |
return null; |
|
101 |
} |
|
102 |
connection = inputDatasource.getConnection(); |
|
103 |
final String dliRelationQuery = "select array_to_string(array_agg(DISTINCT COALESCE(r.relation, 'NONE')||'@@@'||r.pid || '@@@' ||r.pidtype|| '@@@' ||COALESCE(r.title, 'NONE')|| '@@@' ||COALESCE(r.target_type, 'NONE')|| '@@@' ||COALESCE(r.datasources, 'NONE') ),'<-->') as relations from dois o LEFT OUTER JOIN reldoi r ON o.doi=r.source_doi where o.doi=? group by o.doi"; |
|
104 |
|
|
105 |
DLIObject result = new DLIObject(); |
|
106 |
result.setPid(this.inputResutlSet.getString("doi")); |
|
107 |
result.setPidType(this.inputResutlSet.getString("doi_type")); |
|
108 |
Array titlesArray = this.inputResutlSet.getArray("titles"); |
|
109 |
result.setTitles((String[]) titlesArray.getArray()); |
|
110 |
Array authorsArray = this.inputResutlSet.getArray("authors"); |
|
111 |
result.setAuthors((String[]) authorsArray.getArray()); |
|
112 |
|
|
113 |
result.setDate(this.inputResutlSet.getString("date")); |
|
114 |
String type = this.inputResutlSet.getString("type"); |
|
115 |
result.setType(DLIObjectType.valueOf(type)); |
|
116 |
String datasources = this.inputResutlSet.getString("datasources"); |
|
117 |
// if (datasources != null) { |
|
118 |
// result.setDatasources(datasources.split(",")); |
|
119 |
// } |
|
120 |
|
|
121 |
PreparedStatement statement = connection.prepareStatement(dliRelationQuery); |
|
122 |
statement.setString(1, result.getPid()); |
|
123 |
ResultSet resultset = statement.executeQuery(); |
|
124 |
if (resultset.next()) { |
|
125 |
String resultString = resultset.getString(1); |
|
126 |
|
|
127 |
List<DLIRelation> rels = Lists.newArrayList(); |
|
128 |
|
|
129 |
if (resultString != null) { |
|
130 |
|
|
131 |
String[] data = resultString.split("<-->"); |
|
132 |
if (data != null) { |
|
133 |
for (String itm : data) { |
|
134 |
DLIRelation currentRel = constructRelation(itm, result.getPid()); |
|
135 |
if (currentRel != null) { |
|
136 |
rels.add(currentRel); |
|
137 |
} |
|
138 |
} |
|
139 |
} |
|
140 |
result.setRelations(rels); |
|
141 |
} |
|
142 |
} |
|
143 |
return result; |
|
144 |
|
|
145 |
} catch (Exception e) { |
|
146 |
log.error("Error on iterating on all objects", e); |
|
147 |
return null; |
|
148 |
} finally { |
|
149 |
if (connection != null) { |
|
150 |
try { |
|
151 |
connection.close(); |
|
152 |
} catch (SQLException e) { |
|
153 |
log.error("Erron on closing connection", e); |
|
154 |
} |
|
155 |
} |
|
156 |
} |
|
157 |
|
|
158 |
} |
|
159 |
|
|
160 |
private DLIRelation constructRelation(final String input, final String doi) { |
|
161 |
if (input.contains("@@@")) { |
|
162 |
|
|
163 |
String[] data = input.split("@@@"); |
|
164 |
if ((data != null) && (data.length == 6)) { |
|
165 |
|
|
166 |
String relsemantic = data[0]; |
|
167 |
String pid = data[1]; |
|
168 |
String pidtype = data[2]; |
|
169 |
String title = data[3]; |
|
170 |
String entityType = data[4]; |
|
171 |
String provenance = data[5]; |
|
172 |
|
|
173 |
DLIPID curretPID = new DLIPID(pid, pidtype); |
|
174 |
DLIRelation rel = new DLIRelation(); |
|
175 |
rel.setTargetPID(curretPID); |
|
176 |
// //rel.setSourceDOI(doi); |
|
177 |
// if (provenance.toLowerCase() != "none") { |
|
178 |
// String[] splittedProv = provenance.split(","); |
|
179 |
// if (splittedProv != null) { |
|
180 |
// rel.setRelationProvenance(Lists.newArrayList(splittedProv)); |
|
181 |
// } |
|
182 |
// } |
|
183 |
if (!title.trim().toLowerCase().equals("none")) { |
|
184 |
rel.setTargetTitle(title); |
|
185 |
} |
|
186 |
|
|
187 |
if (!entityType.trim().toLowerCase().equals("none")) { |
|
188 |
rel.setTargetType(DLIObjectType.valueOf(entityType)); |
|
189 |
} |
|
190 |
|
|
191 |
if (!relsemantic.trim().toLowerCase().equals("none")) { |
|
192 |
rel.setRelationSemantics(relsemantic); |
|
193 |
} |
|
194 |
|
|
195 |
return rel; |
|
196 |
} else { |
|
197 |
log.error("unespected data on relation field"); |
|
198 |
} |
|
199 |
|
|
200 |
} else { |
|
201 |
log.info("input does not contains expected splitter charachter"); |
|
202 |
} |
|
203 |
return null; |
|
204 |
|
|
205 |
} |
|
206 |
|
|
207 |
} |
modules/dnet-openaire_dli-workflows/trunk/src/main/java/eu/dnetlib/msro/workflows/dli/manager/DOIManager.java | ||
---|---|---|
1 |
/* |
|
2 |
* |
|
3 |
*/ |
|
4 |
package eu.dnetlib.msro.workflows.dli.manager; |
|
5 |
|
|
6 |
import java.io.IOException; |
|
7 |
import java.io.InputStream; |
|
8 |
import java.sql.Array; |
|
9 |
import java.sql.Connection; |
|
10 |
import java.sql.DatabaseMetaData; |
|
11 |
import java.sql.PreparedStatement; |
|
12 |
import java.sql.ResultSet; |
|
13 |
import java.sql.SQLException; |
|
14 |
import java.util.Iterator; |
|
15 |
import java.util.List; |
|
16 |
import java.util.Map; |
|
17 |
|
|
18 |
import javax.sql.DataSource; |
|
19 |
|
|
20 |
import org.antlr.stringtemplate.StringTemplate; |
|
21 |
import org.apache.commons.dbcp.BasicDataSource; |
|
22 |
import org.apache.commons.io.IOUtils; |
|
23 |
import org.apache.commons.logging.Log; |
|
24 |
import org.apache.commons.logging.LogFactory; |
|
25 |
import org.springframework.beans.factory.annotation.Required; |
|
26 |
import org.springframework.core.io.Resource; |
|
27 |
|
|
28 |
import com.google.common.collect.Lists; |
|
29 |
import com.google.common.collect.Maps; |
|
30 |
|
|
31 |
import eu.dnetlib.miscutils.collections.Pair; |
|
32 |
import eu.dnetlib.msro.workflows.dli.model.DLIObject; |
|
33 |
import eu.dnetlib.msro.workflows.dli.model.DLIObjectType; |
|
34 |
import eu.dnetlib.msro.workflows.dli.model.DLIPID; |
|
35 |
import eu.dnetlib.msro.workflows.dli.model.DLIRelation; |
|
36 |
|
|
37 |
/** |
|
38 |
* The Class DOIResolver. |
|
39 |
*/ |
|
40 |
public class DOIManager { |
|
41 |
|
|
42 |
/** The datasource. */ |
|
43 |
@javax.annotation.Resource(name = "dliInterlinkingDataSource") |
|
44 |
private BasicDataSource datasource; |
|
45 |
|
|
46 |
/** The template table. */ |
|
47 |
private Resource templateTable; |
|
48 |
|
|
49 |
/** The Constant DLI_TABLE. */ |
|
50 |
private final static String DLI_TABLE = "dli_object"; |
|
51 |
|
|
52 |
/** The template. */ |
|
53 |
private Resource template; |
|
54 |
|
|
55 |
private StringTemplate xmlTemplate; |
|
56 |
|
|
57 |
/** The Constant log. */ |
|
58 |
private static final Log log = LogFactory.getLog(DOIManager.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
59 |
|
|
60 |
/** |
|
61 |
* Inits the. |
|
62 |
* |
|
63 |
* @throws SQLException |
|
64 |
* the SQL exception |
|
65 |
* @throws IOException |
|
66 |
* Signals that an I/O exception has occurred. |
|
67 |
*/ |
|
68 |
public void init() throws SQLException, IOException { |
|
69 |
Connection conn = datasource.getConnection(); |
|
70 |
if (existsTable(DLI_TABLE, conn)) return; |
|
71 |
initializeTables(conn); |
|
72 |
} |
|
73 |
|
|
74 |
/** |
|
75 |
* Initialize tables. |
|
76 |
* |
|
77 |
* @param connection |
|
78 |
* the connection |
|
79 |
* @throws IOException |
|
80 |
* Signals that an I/O exception has occurred. |
|
81 |
* @throws SQLException |
|
82 |
* the SQL exception |
|
83 |
*/ |
|
84 |
private void initializeTables(final Connection connection) throws IOException, SQLException { |
|
85 |
InputStream is = getTemplateTable().getInputStream(); |
|
86 |
String createTableQuery = IOUtils.toString(is); |
|
87 |
PreparedStatement ps = connection.prepareStatement(createTableQuery); |
|
88 |
ps.executeUpdate(); |
|
89 |
} |
|
90 |
|
|
91 |
/** |
|
92 |
* Exists doi. |
|
93 |
* |
|
94 |
* @param DOI |
|
95 |
* the doi |
|
96 |
* @return true, if successful |
|
97 |
*/ |
|
98 |
public boolean existsPid(final String Pid) { |
|
99 |
final String sqlSearch = "select pid from dli_object where pid=?"; |
|
100 |
Connection connection = null; |
|
101 |
try { |
|
102 |
connection = datasource.getConnection(); |
|
103 |
final PreparedStatement statement = connection.prepareStatement(sqlSearch); |
|
104 |
statement.setString(1, Pid); |
|
105 |
ResultSet result = statement.executeQuery(); |
|
106 |
return result.next(); |
|
107 |
|
|
108 |
} catch (SQLException e) { |
|
109 |
log.error("Error on executing query" + sqlSearch, e); |
|
110 |
return false; |
|
111 |
} finally { |
|
112 |
if (connection != null) { |
|
113 |
try { |
|
114 |
connection.close(); |
|
115 |
} catch (SQLException e) { |
|
116 |
log.error("Erron on closing connection", e); |
|
117 |
} |
|
118 |
} |
|
119 |
} |
|
120 |
} |
|
121 |
|
|
122 |
/** |
|
123 |
* Adds the datasources. |
|
124 |
* |
|
125 |
* @param datasources |
|
126 |
* the datasources |
|
127 |
*/ |
|
128 |
public void addDatasources(final List<Pair<String, String>> datasources) { |
|
129 |
String sql = "WITH upsert AS (UPDATE dli_datasource SET ds_id=? WHERE ds_id=? RETURNING *) " |
|
130 |
+ " INSERT INTO dli_datasource (ds_id, name) SELECT ?,? WHERE NOT EXISTS (SELECT * FROM upsert) "; |
|
131 |
Connection connection = null; |
|
132 |
try { |
|
133 |
connection = datasource.getConnection(); |
|
134 |
for (Pair<String, String> currentDatasource : datasources) { |
|
135 |
PreparedStatement statement = connection.prepareStatement(sql); |
|
136 |
statement.setString(1, currentDatasource.getKey()); |
|
137 |
statement.setString(2, currentDatasource.getKey()); |
|
138 |
statement.setString(3, currentDatasource.getKey()); |
|
139 |
statement.setString(4, currentDatasource.getValue()); |
|
140 |
log.debug("executing statement " + statement.toString()); |
|
141 |
int totalUpdate = statement.executeUpdate(); |
|
142 |
log.debug("Updated " + totalUpdate + " record"); |
|
143 |
|
|
144 |
} |
|
145 |
|
|
146 |
} catch (SQLException e) { |
|
147 |
log.error("Error on executing query", e); |
|
148 |
} finally { |
|
149 |
if (connection != null) { |
|
150 |
try { |
|
151 |
connection.close(); |
|
152 |
} catch (SQLException e) { |
|
153 |
log.error("Erron on closing connection", e); |
|
154 |
} |
|
155 |
} |
|
156 |
} |
|
157 |
|
|
158 |
} |
|
159 |
|
|
160 |
|
|
161 |
public void upsertRecord(final DLIObject input) { |
|
162 |
|
|
163 |
} |
|
164 |
|
|
165 |
public void upsertRecords(final List<DLIObject> records) { |
|
166 |
|
|
167 |
Connection connection = null; |
|
168 |
DLIObject lastRecord = null; |
|
169 |
try { |
|
170 |
|
|
171 |
connection = datasource.getConnection(); |
|
172 |
connection.setAutoCommit(false); |
|
173 |
|
|
174 |
for(DLIObject input: records) { |
|
175 |
lastRecord = input; |
|
176 |
if (input.getPid() == null) return; |
|
177 |
insertObjectInfoMock(input, connection); |
|
178 |
|
|
179 |
// ADDING RELATION |
|
180 |
if (input.getRelations() != null) { |
|
181 |
for (DLIRelation relation : input.getRelations()) { |
|
182 |
insertRelationMock(connection, relation); |
|
183 |
} |
|
184 |
} |
|
185 |
} |
|
186 |
connection.commit(); |
|
187 |
connection.setAutoCommit(true); |
|
188 |
} catch (Exception e) { |
|
189 |
log.error("Error on executing query", e); |
|
190 |
try { |
|
191 |
connection.rollback(); |
|
192 |
} catch (SQLException e1) { |
|
193 |
log.error("Error on rollback ", e); |
|
194 |
} |
|
195 |
} finally { |
|
196 |
if (connection != null) { |
|
197 |
try { |
|
198 |
connection.close(); |
|
199 |
} catch (SQLException e) { |
|
200 |
log.error("Erron on closing connection", e); |
|
201 |
} |
|
202 |
} |
|
203 |
} |
|
204 |
|
|
205 |
} |
|
206 |
|
|
207 |
/** |
|
208 |
* Upsert record. |
|
209 |
* |
|
210 |
* @param input |
|
211 |
* the input |
|
212 |
*/ |
|
213 |
private void upsertRecordRight(final DLIObject input) { |
|
214 |
String sql = "WITH upsert AS (UPDATE dli_object SET pid=?, pid_type=? WHERE pid=? RETURNING *) " |
|
215 |
+ " INSERT INTO dli_object (pid, pid_type, title, authors, creation_date, type) SELECT ?,?,?,?,?,? WHERE NOT EXISTS (SELECT * FROM upsert) "; |
|
216 |
Connection connection = null; |
|
217 |
try { |
|
218 |
|
|
219 |
connection = datasource.getConnection(); |
|
220 |
connection.setAutoCommit(false); |
|
221 |
if (input.getPid() == null) return; |
|
222 |
insertObjectInfo(input, sql, connection); |
|
223 |
|
|
224 |
// ADDING RECORD PROVENANCE |
|
225 |
//TODO IMPLEMENT CHANGE DATA MODEL |
|
226 |
// if (input.getDatasources() != null) { |
|
227 |
// for (String datasource : input.getDatasources()) { |
|
228 |
// insertObjectProvenance(input, connection, datasource); |
|
229 |
// } |
|
230 |
// } |
|
231 |
// ADDING RELATION |
|
232 |
//TODO IMPLEMENT CHANGE DATA MODEL |
|
233 |
// if (input.getRelations() != null) { |
|
234 |
// for (DLIRelation relation : input.getRelations()) { |
|
235 |
// insertRelation(connection, relation); |
|
236 |
// if (relation.getRelationProvenance() != null) { |
|
237 |
// for (String datasouce : relation.getRelationProvenance()) { |
|
238 |
// insertRelationProvenance(connection, relation, datasouce); |
|
239 |
// } |
|
240 |
// } |
|
241 |
// } |
|
242 |
// } |
|
243 |
connection.commit(); |
|
244 |
connection.setAutoCommit(true); |
|
245 |
|
|
246 |
} catch (SQLException e) { |
|
247 |
log.error("Error on executing query", e); |
|
248 |
try { |
|
249 |
connection.rollback(); |
|
250 |
} catch (SQLException e1) { |
|
251 |
log.error("Error on rollback ", e); |
|
252 |
} |
|
253 |
} finally { |
|
254 |
if (connection != null) { |
|
255 |
try { |
|
256 |
connection.close(); |
|
257 |
} catch (SQLException e) { |
|
258 |
log.error("Erron on closing connection", e); |
|
259 |
} |
|
260 |
} |
|
261 |
} |
|
262 |
|
|
263 |
} |
|
264 |
|
|
265 |
public Map<String, StatsInfo> getStats() { |
|
266 |
String sql = "select d.name as id, 'publication' as type, count(*) as total from dli_object o left outer join record_provenance r on (o.pid= r.doi) left outer join dli_datasource d on (r.ds_id =d.ds_id) where o.type='publication' group by r.ds_id, d.name union (select d.name as id, 'dataset' as type, count(*) as total from dli_object o left outer join record_provenance r on (o.pid= r.doi) left outer join dli_datasource d on (r.ds_id =d.ds_id) where o.type='dataset' group by r.ds_id, d.name) union (select d.name as id,'relations' as type, count(*) as total from relation_provenance r left outer join dli_datasource d on (r.ds_id =d.ds_id) group by r.ds_id, d.name)"; |
|
267 |
Map<String, StatsInfo> stats = Maps.newHashMap(); |
|
268 |
|
|
269 |
Connection connection = null; |
|
270 |
try { |
|
271 |
connection = datasource.getConnection(); |
|
272 |
PreparedStatement statement = connection.prepareStatement(sql); |
|
273 |
log.debug("executing query: " + statement.toString()); |
|
274 |
ResultSet resultset = statement.executeQuery(); |
|
275 |
while (resultset.next()) { |
|
276 |
String dsId = resultset.getString("id"); |
|
277 |
if (!stats.containsKey(dsId)) { |
|
278 |
StatsInfo info = new StatsInfo(); |
|
279 |
info.setDatasource(dsId); |
|
280 |
stats.put(dsId, info); |
|
281 |
} |
|
282 |
StatsInfo currentInfo = stats.get(dsId); |
|
283 |
String type = resultset.getString("type"); |
|
284 |
if ("publication".equals(type)) { |
|
285 |
currentInfo.setNumberOfPublication(resultset.getInt("total")); |
|
286 |
} else if ("relations".equals(type)) { |
|
287 |
currentInfo.setNumberOfRelations(resultset.getInt("total")); |
|
288 |
} else if ("dataset".equals(type)) { |
|
289 |
currentInfo.setNumberOfDatasets(resultset.getInt("total")); |
|
290 |
} |
|
291 |
|
|
292 |
} |
|
293 |
return stats; |
|
294 |
|
|
295 |
} catch (SQLException e) { |
|
296 |
log.error("Error on getting stats ", e); |
|
297 |
|
|
298 |
return null; |
|
299 |
} finally { |
|
300 |
if (connection != null) { |
|
301 |
try { |
|
302 |
connection.close(); |
|
303 |
} catch (SQLException e) { |
|
304 |
log.error("Erron on closing connection", e); |
|
305 |
} |
|
306 |
} |
|
307 |
} |
|
308 |
|
|
309 |
} |
|
310 |
|
|
311 |
/** |
|
312 |
* Insert relation provenance. |
|
313 |
* |
|
314 |
* @param connection |
|
315 |
* the connection |
|
316 |
* @param relation |
|
317 |
* the relation |
|
318 |
* @param datasouce |
|
319 |
* the datasouce |
|
320 |
* @throws SQLException |
|
321 |
* the SQL exception |
|
322 |
*/ |
|
323 |
private void insertRelationProvenance(final Connection connection, final DLIRelation relation, final String datasouce) throws SQLException { |
|
324 |
PreparedStatement statement; |
Also available in: Unified diff
Changed data model added contribution field for each repos