1
|
package eu.dnetlib.data.mapreduce.hbase.lodImport.utils;
|
2
|
|
3
|
import java.net.URI;
|
4
|
import java.sql.Connection;
|
5
|
import java.sql.Statement;
|
6
|
import java.util.List;
|
7
|
|
8
|
import org.apache.hadoop.conf.Configuration;
|
9
|
import org.apache.log4j.Logger;
|
10
|
import org.json.JSONArray;
|
11
|
import org.json.JSONObject;
|
12
|
|
13
|
import com.jolbox.bonecp.BoneCPDataSource;
|
14
|
|
15
|
public class RDFizer {
|
16
|
|
17
|
private static Logger log = Logger.getLogger(RDFizer.class);
|
18
|
|
19
|
public RDFizer() {
|
20
|
}
|
21
|
|
22
|
// public static String RDFizeEntityRow(List<String> row,VirtGraph graph, JSONObject mappings, Configuration conf, MapCountries mapCountries,MapLanguages mapLanguages){
|
23
|
public static String RDFizeEntityRow(List<String> row,BoneCPDataSource ds, JSONObject mappings, Configuration conf, MapCountries mapCountries,MapLanguages mapLanguages){
|
24
|
|
25
|
// VirtModel model = new VirtModel(graph);
|
26
|
|
27
|
String graph = conf.get("lod.defaultGraph");
|
28
|
|
29
|
String insertQuery ="";
|
30
|
|
31
|
String baseURI = conf.get("lod.baseURI");
|
32
|
String id = row.get(1).toString();
|
33
|
String type = row.get(0).toString();
|
34
|
String resourceURI = baseURI+type+"/"+id;
|
35
|
// String askQuery = "SELECT ?o FROM <"+conf.get("lod.defaultGraph")+"> WHERE {<"+resourceURI+"> ?p ?o}";
|
36
|
// VirtuosoQueryExecution vqe = VirtuosoQueryExecutionFactory.create (askQuery, graph);
|
37
|
boolean ask = false;
|
38
|
try{
|
39
|
ask = resourceExists(ds.getConnection(), resourceURI, graph);
|
40
|
}catch(Exception e){
|
41
|
log.error("Could not ASK "+e.toString(),e);
|
42
|
}
|
43
|
// ResultSet rs = vqe.execSelect();
|
44
|
// if(rs.hasNext()) ask=true;
|
45
|
// vqe.close();
|
46
|
if(ask){
|
47
|
try{
|
48
|
deleteResource(ds.getConnection(), resourceURI, graph);
|
49
|
}catch(Exception e){
|
50
|
log.error("Could not DELETE "+resourceURI+" "+e.toString(),e);
|
51
|
}
|
52
|
// String deleteQueryString ="DELETE FROM <"+conf.get("lod.defaultGraph")+"> {<"+resourceURI+"> ?p ?o} WHERE {<"+resourceURI+"> ?p ?o}" ;
|
53
|
// VirtuosoUpdateRequest vur = VirtuosoUpdateFactory.create(deleteQueryString, graph);
|
54
|
//// System.out.println("DELETED:\t"+deleteQueryString);
|
55
|
// vur.exec();
|
56
|
}
|
57
|
|
58
|
JSONArray typeMappings = mappings.getJSONArray(type);
|
59
|
|
60
|
for(int i=0; i<row.size(); i++){
|
61
|
JSONObject propertyObject = typeMappings.getJSONObject(0);
|
62
|
String index = new Integer(i).toString();
|
63
|
String propertyString;
|
64
|
|
65
|
|
66
|
if(type.equals("project") && i>24) continue;
|
67
|
if(type.equals("organization") && i>11) continue;
|
68
|
|
69
|
try{
|
70
|
propertyString = propertyObject.getString(index);
|
71
|
}catch(Exception e){
|
72
|
log.error("Could not get the property for type "+type+" : "+e.toString(),e);
|
73
|
continue;
|
74
|
}
|
75
|
|
76
|
if(i==0){
|
77
|
String resourceType = propertyObject.getString(propertyString);
|
78
|
insertQuery+="<"+resourceURI+"> <"+propertyString+"> <"+resourceType+">";
|
79
|
continue;
|
80
|
}
|
81
|
|
82
|
String value = row.get(i).trim();
|
83
|
if(value.trim().equals("null") || value==null || value.trim().equals("")) continue;
|
84
|
|
85
|
if(i==16 && type.equals("result") && !value.equals("und") || i==11 && type.equals("person") && !value.equals("und") || i==9 && type.equals("organization") && !value.equals("und")){
|
86
|
try{
|
87
|
String countryURI = mapCountries.getCountryURI(value);
|
88
|
URI uri = new URI(countryURI);
|
89
|
if(countryURI.equals("")){
|
90
|
insertQuery+="; <"+propertyString+"> \""+value+"\"";
|
91
|
continue;
|
92
|
}
|
93
|
insertQuery+="; <"+propertyString+"> <"+uri+">";
|
94
|
// log.info("COUNTRY "+countryURI+" FROM "+value);
|
95
|
}catch(Exception e){
|
96
|
insertQuery+="; <"+propertyString+"> \""+value+"\"";
|
97
|
log.error("No country URI for: "+e.toString(),e);
|
98
|
continue;
|
99
|
}
|
100
|
continue;
|
101
|
}
|
102
|
|
103
|
if(i==9 && type.equals("result") && !value.equals("und")){
|
104
|
try{
|
105
|
String langURI = mapLanguages.getLangURI(value);
|
106
|
URI uri = new URI(langURI);
|
107
|
if(langURI.equals("")){
|
108
|
insertQuery+="; <"+propertyString+"> \""+value+"\"";
|
109
|
continue;
|
110
|
}
|
111
|
insertQuery+="; <"+propertyString+"> <"+uri+">";
|
112
|
// log.info("LANG "+langURI+" FROM "+value.trim());
|
113
|
}catch(Exception e){
|
114
|
insertQuery+="; <"+propertyString+"> \""+value+"\"";
|
115
|
log.error("No Language URI for: "+e.toString(),e);
|
116
|
continue;
|
117
|
}
|
118
|
continue;
|
119
|
}
|
120
|
|
121
|
if(value.contains(conf.get("lod.seperator"))){
|
122
|
String[] splittedValue = value.split(conf.get("lod.seperator"));
|
123
|
for(String v:splittedValue){
|
124
|
v= v.replace(conf.get("lod.seperator"), "").trim();
|
125
|
insertQuery+="; <"+propertyString+"> \""+v+"\"";
|
126
|
}
|
127
|
}else{
|
128
|
if(value.startsWith("http://")){
|
129
|
try{
|
130
|
URI uri = new URI(value);
|
131
|
insertQuery+="; <"+propertyString+"> <"+uri+">";
|
132
|
}catch(Exception e){
|
133
|
insertQuery+="; <"+propertyString+"> \""+value+"\"";
|
134
|
log.error("NOT URI "+e.toString(),e);
|
135
|
continue;
|
136
|
}
|
137
|
}
|
138
|
else insertQuery+="; <"+propertyString+"> \""+value+"\"";
|
139
|
}
|
140
|
|
141
|
}
|
142
|
|
143
|
insertQuery+=".";
|
144
|
|
145
|
return insertQuery;
|
146
|
|
147
|
}
|
148
|
|
149
|
// public static String RDFizeRelationRow(List<String> row, VirtGraph graph, JSONObject mappings, Configuration conf){
|
150
|
public static String RDFizeRelationRow(List<String> row, JSONObject mappings, Configuration conf){
|
151
|
|
152
|
String insertQuery = "";
|
153
|
String baseURI = conf.get("lod.baseURI");
|
154
|
JSONObject typeMappings = mappings.getJSONArray(row.get(0)).getJSONObject(0);
|
155
|
String sourceType = typeMappings.getString("sourceType");
|
156
|
String sourceId = typeMappings.getString("sourceId");
|
157
|
String targetType = typeMappings.getString("targetType");
|
158
|
String targetId = typeMappings.getString("targetId");
|
159
|
String property = typeMappings.getString("property");
|
160
|
String sourceURI = baseURI+row.get(Integer.parseInt(sourceType))+"/"+row.get(Integer.parseInt(sourceId));
|
161
|
String targetURI = baseURI+row.get(Integer.parseInt(targetType))+"/"+row.get(Integer.parseInt(targetId));
|
162
|
insertQuery= "<"+sourceURI+"> <"+property+"> <"+targetURI+">. ";
|
163
|
return insertQuery;
|
164
|
}
|
165
|
|
166
|
public static boolean resourceExists(Connection conn, String resourceURI, String graph){
|
167
|
boolean exists = false;
|
168
|
String askQuery = "SPARQL SELECT ?o FROM <"+graph+"> WHERE {<"+resourceURI+"> ?p ?o}";
|
169
|
Statement stmt;
|
170
|
try {
|
171
|
stmt = conn.createStatement();
|
172
|
// ResultSet rs= stmt.executeQuery(askQuery);
|
173
|
exists = stmt.execute(askQuery);
|
174
|
// if(rs.next()) exists=true;
|
175
|
stmt.close();
|
176
|
conn.commit();
|
177
|
conn.close();
|
178
|
} catch (Exception e) {
|
179
|
log.error("Virtuoso ask Query failed. Checkpoint was "+ askQuery +"\n" + e.toString(), e);
|
180
|
}
|
181
|
return exists;
|
182
|
}
|
183
|
|
184
|
public static void deleteResource(Connection conn, String resourceURI, String graph){
|
185
|
String deleteQueryString ="SPARQL DELETE FROM <"+graph+"> {<"+resourceURI+"> ?p ?o} WHERE {<"+resourceURI+"> ?p ?o}" ;
|
186
|
Statement stmt;
|
187
|
try {
|
188
|
stmt = conn.createStatement();
|
189
|
stmt.execute(deleteQueryString);
|
190
|
stmt.close();
|
191
|
conn.commit();
|
192
|
conn.close();
|
193
|
} catch (Exception e) {
|
194
|
log.error("Virtuoso ask Query failed. Checkpoint was "+ deleteQueryString +"\n" + e.toString(), e);
|
195
|
}
|
196
|
}
|
197
|
|
198
|
public static void clearGraph(String graph, Connection conn){
|
199
|
String clearQuery="SPARQL DEFINE sql:log-enable 3 CLEAR GRAPH <"+graph+">";
|
200
|
Statement stmt;
|
201
|
try {
|
202
|
stmt = conn.createStatement();
|
203
|
stmt.execute(clearQuery);
|
204
|
stmt.close();
|
205
|
conn.commit();
|
206
|
conn.close();
|
207
|
} catch (Exception e) {
|
208
|
log.error("Virtuoso FAILED TO CLEAR graph "+ clearQuery +"\n" + e.toString(), e);
|
209
|
}
|
210
|
}
|
211
|
|
212
|
public static void setCheckpoint(Connection conn, String checkpointValue){
|
213
|
Statement stmt;
|
214
|
try {
|
215
|
stmt = conn.createStatement();
|
216
|
stmt.execute("checkpoint_interval("+checkpointValue+");");
|
217
|
stmt.close();
|
218
|
conn.commit();
|
219
|
conn.close();
|
220
|
} catch (Exception e) {
|
221
|
log.error("Virtuoso set checkpoint failed. Checkpoint was "+ checkpointValue +"\n" + e.toString(), e);
|
222
|
}
|
223
|
}
|
224
|
}
|