1
|
package eu.dnetlib.data.mapreduce.hbase.lodImport.utils;
|
2
|
|
3
|
import java.net.URI;
|
4
|
import java.sql.Connection;
|
5
|
import java.sql.ResultSet;
|
6
|
import java.sql.Statement;
|
7
|
import java.util.List;
|
8
|
|
9
|
import org.apache.hadoop.conf.Configuration;
|
10
|
import org.apache.log4j.Logger;
|
11
|
import org.json.JSONArray;
|
12
|
import org.json.JSONObject;
|
13
|
|
14
|
import com.jolbox.bonecp.BoneCPDataSource;
|
15
|
|
16
|
public class RDFizer {
|
17
|
|
18
|
private static Logger log = Logger.getLogger(RDFizer.class);
|
19
|
|
20
|
public RDFizer() {
|
21
|
}
|
22
|
|
23
|
// public static String RDFizeEntityRow(List<String> row,VirtGraph graph, JSONObject mappings, Configuration conf, MapCountries mapCountries,MapLanguages mapLanguages){
|
24
|
public static String[] RDFizeEntityRow(List<String> row,BoneCPDataSource ds, JSONObject mappings, Configuration conf, MapCountries mapCountries,MapLanguages mapLanguages, String defaultGraph){
|
25
|
|
26
|
// VirtModel model = new VirtModel(graph);
|
27
|
|
28
|
// String graph = conf.get("lod.defaultGraph");
|
29
|
|
30
|
String[] buildQuery = new String[2];
|
31
|
buildQuery[0]="";
|
32
|
buildQuery[1]="";
|
33
|
|
34
|
// String insertQuery ="";
|
35
|
// String deleteQuery="";
|
36
|
|
37
|
String baseURI = conf.get("lod.baseURI");
|
38
|
String id = row.get(1).toString();
|
39
|
String type = row.get(0).toString();
|
40
|
String resourceURI = baseURI+type+"/"+id;
|
41
|
|
42
|
// ####################################################################################
|
43
|
// THIS IS FOR INCREMENTAL UPDATE ON-HOLD FOR THE BATCH
|
44
|
// boolean ask = false;
|
45
|
// try{
|
46
|
// ask = resourceExists(ds.getConnection(), resourceURI, defaultGraph);
|
47
|
// }catch(Exception e){
|
48
|
// log.error("Could not ASK "+e.toString(),e);
|
49
|
// }
|
50
|
|
51
|
|
52
|
|
53
|
// buildQuery[1]+= "<"+resourceURI+"> ?p ?o. ";
|
54
|
|
55
|
// if(ask){
|
56
|
// try{
|
57
|
// deleteResource(ds.getConnection(), resourceURI, defaultGraph);
|
58
|
// }catch(Exception e){
|
59
|
// log.error("Could not DELETE "+resourceURI+" "+e.toString(),e);
|
60
|
// }
|
61
|
//####################################################################################
|
62
|
|
63
|
JSONArray typeMappings = new JSONArray();
|
64
|
JSONObject propertyObject = new JSONObject();
|
65
|
try{
|
66
|
typeMappings = mappings.getJSONArray(type);
|
67
|
propertyObject = typeMappings.getJSONObject(0);
|
68
|
}catch(Exception e){
|
69
|
log.error("ROW "+row.toString()+" "+e.toString(),e);
|
70
|
}
|
71
|
|
72
|
for(int i=0; i<row.size(); i++){
|
73
|
String index = new Integer(i).toString();
|
74
|
String propertyString;
|
75
|
|
76
|
|
77
|
if(type.equals("project") && i>24) continue;
|
78
|
if(type.equals("organization") && i>11) continue;
|
79
|
|
80
|
try{
|
81
|
propertyString = propertyObject.getString(index);
|
82
|
}catch(Exception e){
|
83
|
// log.error("Could not get the property for type "+type+" and ID"+row.get(1).toString()+" : "+e.toString(),e);
|
84
|
continue;
|
85
|
}
|
86
|
|
87
|
if(i==0){
|
88
|
String resourceType = propertyObject.getString(propertyString);
|
89
|
buildQuery[0]+="<"+resourceURI+"> <"+propertyString+"> <"+resourceType+">";
|
90
|
continue;
|
91
|
}
|
92
|
|
93
|
String value = row.get(i).trim();
|
94
|
value = value.replace("\\", "");
|
95
|
value = value.replace("\""," ");
|
96
|
|
97
|
if(value.trim().equals("null") || value==null || value.trim().equals("")) continue;
|
98
|
|
99
|
if(i==16 && type.equals("result") && !value.equals("und") || i==11 && type.equals("person") && !value.equals("und") || i==9 && type.equals("organization") && !value.equals("und")){
|
100
|
try{
|
101
|
// log.info("Country code is "+value);
|
102
|
String countryURI = mapCountries.getCountryURI(value);
|
103
|
URI uri = new URI(countryURI);
|
104
|
if(countryURI.equals("")){
|
105
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
106
|
continue;
|
107
|
}
|
108
|
buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
|
109
|
}catch(Exception e){
|
110
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
111
|
continue;
|
112
|
}
|
113
|
continue;
|
114
|
}
|
115
|
|
116
|
if(type.equals("result") && i==9 && !value.equals("und")){
|
117
|
try{
|
118
|
String langURI = mapLanguages.getLangURI(value);
|
119
|
URI uri = new URI(langURI);
|
120
|
if(langURI.equals("")){
|
121
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
122
|
continue;
|
123
|
}
|
124
|
buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
|
125
|
}catch(Exception e){
|
126
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
127
|
continue;
|
128
|
}
|
129
|
continue;
|
130
|
}
|
131
|
|
132
|
if(type.equals("datasource") && i==21 && !value.equals("und") && !value.isEmpty()){
|
133
|
if(value.contains(conf.get("lod.seperator"))){
|
134
|
String[] splittedValue = value.split(conf.get("lod.seperator"));
|
135
|
for(String v:splittedValue){
|
136
|
try{
|
137
|
String langURI = mapLanguages.getLangURI(v);
|
138
|
URI uri = new URI(langURI);
|
139
|
if(langURI.equals("")){
|
140
|
buildQuery[0]+="; <"+propertyString+"> \""+v+"\"";
|
141
|
continue;
|
142
|
}
|
143
|
buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
|
144
|
}catch(Exception e){
|
145
|
buildQuery[0]+="; <"+propertyString+"> \""+v+"\"";
|
146
|
continue;
|
147
|
}
|
148
|
}
|
149
|
continue;
|
150
|
}
|
151
|
}
|
152
|
|
153
|
if(value.contains(conf.get("lod.seperator"))){
|
154
|
String[] splittedValue = value.split(conf.get("lod.seperator"));
|
155
|
for(String v:splittedValue){
|
156
|
v= v.replace(conf.get("lod.seperator"), "").trim();
|
157
|
buildQuery[0]+="; <"+propertyString+"> \""+v+"\"";
|
158
|
}
|
159
|
}else{
|
160
|
if(value.startsWith("http://")){
|
161
|
try{
|
162
|
if(value.contains(" ")){
|
163
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
164
|
}else{
|
165
|
value = value.replaceAll("\\s","-");
|
166
|
URI uri = new URI(value);
|
167
|
buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
|
168
|
}
|
169
|
}catch(Exception e){
|
170
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
171
|
continue;
|
172
|
}
|
173
|
}
|
174
|
else buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
175
|
}
|
176
|
|
177
|
}
|
178
|
|
179
|
buildQuery[0]+=". ";
|
180
|
|
181
|
return buildQuery;
|
182
|
|
183
|
}
|
184
|
|
185
|
public static String[] RDFizeRelationRow(List<String> row, JSONObject mappings, Configuration conf){
|
186
|
|
187
|
// String insertQuery = "";
|
188
|
String[] buildQuery = new String[1];
|
189
|
buildQuery[0]="";
|
190
|
String baseURI = conf.get("lod.baseURI");
|
191
|
JSONObject typeMappings = mappings.getJSONArray(row.get(0)).getJSONObject(0);
|
192
|
String sourceType = typeMappings.getString("sourceType");
|
193
|
String sourceId = typeMappings.getString("sourceId");
|
194
|
String targetType = typeMappings.getString("targetType");
|
195
|
String targetId = typeMappings.getString("targetId");
|
196
|
String property = typeMappings.getString("property");
|
197
|
String sourceURI = baseURI+row.get(Integer.parseInt(sourceType))+"/"+row.get(Integer.parseInt(sourceId));
|
198
|
String targetURI = baseURI+row.get(Integer.parseInt(targetType))+"/"+row.get(Integer.parseInt(targetId));
|
199
|
buildQuery[0]="<"+sourceURI+"> <"+property+"> <"+targetURI+">. ";
|
200
|
return buildQuery;
|
201
|
}
|
202
|
|
203
|
public static boolean resourceExists(Connection conn, String resourceURI, String graph){
|
204
|
boolean exists = false;
|
205
|
String askQuery = "SPARQL SELECT ?o FROM <"+graph+"> WHERE {<"+resourceURI+"> ?p ?o}";
|
206
|
Statement stmt;
|
207
|
try {
|
208
|
stmt = conn.createStatement();
|
209
|
ResultSet rs=stmt.executeQuery(askQuery);
|
210
|
if(rs.next()) exists=true;
|
211
|
rs.close();
|
212
|
stmt.close();
|
213
|
conn.close();
|
214
|
} catch (Exception e) {
|
215
|
log.error("Virtuoso ask Query failed. Query was: "+ askQuery +"\n" + e.toString(), e);
|
216
|
}
|
217
|
return exists;
|
218
|
}
|
219
|
|
220
|
public static void deleteResource(Connection conn, String resourceURI, String graph){
|
221
|
String deleteQueryString ="SPARQL DELETE FROM <"+graph+"> {<"+resourceURI+"> ?p ?o} WHERE {<"+resourceURI+"> ?p ?o}" ;
|
222
|
Statement stmt;
|
223
|
try {
|
224
|
stmt = conn.createStatement();
|
225
|
stmt.execute(deleteQueryString);
|
226
|
conn.commit();
|
227
|
stmt.close();
|
228
|
conn.close();
|
229
|
} catch (Exception e) {
|
230
|
log.error("Virtuoso ask Query failed. "+ deleteQueryString +"\n" + e.toString(), e);
|
231
|
}
|
232
|
}
|
233
|
|
234
|
public static void clearGraph(String graph, Connection conn){
|
235
|
String clearQuery="SPARQL DEFINE sql:log-enable 3 CLEAR GRAPH <"+graph+">";
|
236
|
Statement stmt;
|
237
|
try {
|
238
|
stmt = conn.createStatement();
|
239
|
stmt.execute(clearQuery);
|
240
|
conn.commit();
|
241
|
stmt.close();
|
242
|
} catch (Exception e) {
|
243
|
log.error("Virtuoso FAILED TO CLEAR graph "+ clearQuery +"\n" + e.toString(), e);
|
244
|
}
|
245
|
}
|
246
|
|
247
|
public static void setCheckpoint(Connection conn, int checkpointValue){
|
248
|
Statement stmt;
|
249
|
try {
|
250
|
|
251
|
stmt = conn.createStatement();
|
252
|
stmt.execute("checkpoint_interval("+checkpointValue+")");
|
253
|
if(checkpointValue==120){
|
254
|
log.info("CheckPoint Started");
|
255
|
stmt.execute("checkpoint");
|
256
|
// stmt.execute("log_enable(3,1)");
|
257
|
conn.commit();
|
258
|
log.info("CheckPoint Finished");
|
259
|
}
|
260
|
|
261
|
stmt.close();
|
262
|
} catch (Exception e) {
|
263
|
log.error("Virtuoso set checkpoint failed. Checkpoint was "+ checkpointValue +"\n" + e.toString(), e);
|
264
|
}
|
265
|
}
|
266
|
|
267
|
public static void autoIndexing(Connection conn, boolean state){
|
268
|
Statement stmt;
|
269
|
String query="";
|
270
|
if(state) query = "DB.DBA.RDF_OBJ_FT_RULE_ADD (null, null, 'All')";
|
271
|
else query = "DB.DBA.VT_BATCH_UPDATE ('DB.DBA.RDF_OBJ', 'ON', NULL)";
|
272
|
try {
|
273
|
stmt = conn.createStatement();
|
274
|
stmt.execute(query);
|
275
|
log.info("autoIndex "+query);
|
276
|
stmt.close();
|
277
|
} catch (Exception e) {
|
278
|
log.error("Virtuoso autoIndexing failed. State was "+ query +"\n" + e.toString(), e);
|
279
|
}
|
280
|
|
281
|
}
|
282
|
}
|