1 |
40545
|
eri.katsar
|
package eu.dnetlib.data.mapreduce.hbase.lodImport.utils;
|
2 |
|
|
|
3 |
41057
|
giorgos.al
|
import java.net.URI;
|
4 |
41076
|
giorgos.al
|
import java.sql.Connection;
|
5 |
41421
|
giorgos.al
|
import java.sql.ResultSet;
|
6 |
40966
|
giorgos.al
|
import java.sql.Statement;
|
7 |
|
|
import java.util.List;
|
8 |
|
|
|
9 |
40841
|
giorgos.al
|
import org.apache.hadoop.conf.Configuration;
|
10 |
40966
|
giorgos.al
|
import org.apache.log4j.Logger;
|
11 |
40836
|
giorgos.al
|
import org.json.JSONArray;
|
12 |
|
|
import org.json.JSONObject;
|
13 |
|
|
|
14 |
41076
|
giorgos.al
|
import com.jolbox.bonecp.BoneCPDataSource;
|
15 |
40841
|
giorgos.al
|
|
16 |
40545
|
eri.katsar
|
public class RDFizer {
|
17 |
41057
|
giorgos.al
|
|
18 |
40966
|
giorgos.al
|
private static Logger log = Logger.getLogger(RDFizer.class);
|
19 |
41002
|
giorgos.al
|
|
20 |
40545
|
eri.katsar
|
public RDFizer() {
|
21 |
|
|
}
|
22 |
|
|
|
23 |
41076
|
giorgos.al
|
// public static String RDFizeEntityRow(List<String> row,VirtGraph graph, JSONObject mappings, Configuration conf, MapCountries mapCountries,MapLanguages mapLanguages){
|
24 |
42719
|
giorgos.al
|
public static String[] RDFizeEntityRow(List<String> row,BoneCPDataSource ds, JSONObject mappings, Configuration conf, MapCountries mapCountries,MapLanguages mapLanguages, String defaultGraph){
|
25 |
40836
|
giorgos.al
|
|
26 |
41076
|
giorgos.al
|
// VirtModel model = new VirtModel(graph);
|
27 |
|
|
|
28 |
42240
|
giorgos.al
|
// String graph = conf.get("lod.defaultGraph");
|
29 |
41076
|
giorgos.al
|
|
30 |
42719
|
giorgos.al
|
String[] buildQuery = new String[2];
|
31 |
|
|
buildQuery[0]="";
|
32 |
|
|
buildQuery[1]="";
|
33 |
|
|
|
34 |
|
|
// String insertQuery ="";
|
35 |
|
|
// String deleteQuery="";
|
36 |
40843
|
giorgos.al
|
|
37 |
40728
|
giorgos.al
|
String baseURI = conf.get("lod.baseURI");
|
38 |
|
|
String id = row.get(1).toString();
|
39 |
40841
|
giorgos.al
|
String type = row.get(0).toString();
|
40 |
|
|
String resourceURI = baseURI+type+"/"+id;
|
41 |
41421
|
giorgos.al
|
|
42 |
|
|
// ####################################################################################
|
43 |
|
|
// THIS IS FOR INCREMENTAL UPDATE ON-HOLD FOR THE BATCH
|
44 |
|
|
// boolean ask = false;
|
45 |
|
|
// try{
|
46 |
|
|
// ask = resourceExists(ds.getConnection(), resourceURI, graph);
|
47 |
|
|
// }catch(Exception e){
|
48 |
|
|
// log.error("Could not ASK "+e.toString(),e);
|
49 |
|
|
// }
|
50 |
|
|
//
|
51 |
42719
|
giorgos.al
|
|
52 |
|
|
|
53 |
|
|
buildQuery[1]+= "<"+resourceURI+"> ?p ?o. ";
|
54 |
|
|
|
55 |
41421
|
giorgos.al
|
// if(ask){
|
56 |
42719
|
giorgos.al
|
// try{
|
57 |
|
|
// deleteResource(ds.getConnection(), resourceURI, defaultGraph);
|
58 |
|
|
// }catch(Exception e){
|
59 |
|
|
// log.error("Could not DELETE "+resourceURI+" "+e.toString(),e);
|
60 |
41421
|
giorgos.al
|
// }
|
61 |
|
|
//####################################################################################
|
62 |
40802
|
giorgos.al
|
|
63 |
40843
|
giorgos.al
|
JSONArray typeMappings = mappings.getJSONArray(type);
|
64 |
41286
|
giorgos.al
|
JSONObject propertyObject = typeMappings.getJSONObject(0);
|
65 |
40854
|
giorgos.al
|
|
66 |
40778
|
giorgos.al
|
for(int i=0; i<row.size(); i++){
|
67 |
40819
|
eri.katsar
|
String index = new Integer(i).toString();
|
68 |
40843
|
giorgos.al
|
String propertyString;
|
69 |
41057
|
giorgos.al
|
|
70 |
41076
|
giorgos.al
|
|
71 |
|
|
if(type.equals("project") && i>24) continue;
|
72 |
|
|
if(type.equals("organization") && i>11) continue;
|
73 |
|
|
|
74 |
40843
|
giorgos.al
|
try{
|
75 |
|
|
propertyString = propertyObject.getString(index);
|
76 |
|
|
}catch(Exception e){
|
77 |
42240
|
giorgos.al
|
// log.error("Could not get the property for type "+type+" and ID"+row.get(1).toString()+" : "+e.toString(),e);
|
78 |
40843
|
giorgos.al
|
continue;
|
79 |
|
|
}
|
80 |
41057
|
giorgos.al
|
|
81 |
40778
|
giorgos.al
|
if(i==0){
|
82 |
|
|
String resourceType = propertyObject.getString(propertyString);
|
83 |
42719
|
giorgos.al
|
// insertQuery+="<"+resourceURI+"> <"+propertyString+"> <"+resourceType+">";
|
84 |
|
|
buildQuery[0]+="<"+resourceURI+"> <"+propertyString+"> <"+resourceType+">";
|
85 |
40836
|
giorgos.al
|
continue;
|
86 |
40778
|
giorgos.al
|
}
|
87 |
41002
|
giorgos.al
|
|
88 |
41057
|
giorgos.al
|
String value = row.get(i).trim();
|
89 |
42714
|
giorgos.al
|
value = value.replace("\\", "");
|
90 |
|
|
value = value.replace("\""," ");
|
91 |
|
|
|
92 |
41002
|
giorgos.al
|
if(value.trim().equals("null") || value==null || value.trim().equals("")) continue;
|
93 |
|
|
|
94 |
41057
|
giorgos.al
|
if(i==16 && type.equals("result") && !value.equals("und") || i==11 && type.equals("person") && !value.equals("und") || i==9 && type.equals("organization") && !value.equals("und")){
|
95 |
41000
|
giorgos.al
|
try{
|
96 |
42714
|
giorgos.al
|
// log.info("Country code is "+value);
|
97 |
41019
|
giorgos.al
|
String countryURI = mapCountries.getCountryURI(value);
|
98 |
41076
|
giorgos.al
|
URI uri = new URI(countryURI);
|
99 |
41057
|
giorgos.al
|
if(countryURI.equals("")){
|
100 |
42719
|
giorgos.al
|
// insertQuery+="; <"+propertyString+"> \""+value+"\"";
|
101 |
|
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
102 |
41057
|
giorgos.al
|
continue;
|
103 |
|
|
}
|
104 |
42719
|
giorgos.al
|
// insertQuery+="; <"+propertyString+"> <"+uri+">";
|
105 |
|
|
buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
|
106 |
41057
|
giorgos.al
|
// log.info("COUNTRY "+countryURI+" FROM "+value);
|
107 |
41000
|
giorgos.al
|
}catch(Exception e){
|
108 |
42719
|
giorgos.al
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
109 |
42240
|
giorgos.al
|
// log.error("No country URI for: "+e.toString(),e);
|
110 |
41057
|
giorgos.al
|
continue;
|
111 |
41000
|
giorgos.al
|
}
|
112 |
41057
|
giorgos.al
|
continue;
|
113 |
41000
|
giorgos.al
|
}
|
114 |
41057
|
giorgos.al
|
|
115 |
|
|
if(i==9 && type.equals("result") && !value.equals("und")){
|
116 |
41000
|
giorgos.al
|
try{
|
117 |
41019
|
giorgos.al
|
String langURI = mapLanguages.getLangURI(value);
|
118 |
41076
|
giorgos.al
|
URI uri = new URI(langURI);
|
119 |
41057
|
giorgos.al
|
if(langURI.equals("")){
|
120 |
42719
|
giorgos.al
|
// insertQuery+="; <"+propertyString+"> \""+value+"\"";
|
121 |
|
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
122 |
41057
|
giorgos.al
|
continue;
|
123 |
|
|
}
|
124 |
42719
|
giorgos.al
|
// insertQuery+="; <"+propertyString+"> <"+uri+">";
|
125 |
|
|
buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
|
126 |
41057
|
giorgos.al
|
// log.info("LANG "+langURI+" FROM "+value.trim());
|
127 |
41000
|
giorgos.al
|
}catch(Exception e){
|
128 |
42719
|
giorgos.al
|
// insertQuery+="; <"+propertyString+"> \""+value+"\"";
|
129 |
|
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
130 |
42240
|
giorgos.al
|
// log.error("No Language URI for: "+e.toString(),e);
|
131 |
41057
|
giorgos.al
|
continue;
|
132 |
41000
|
giorgos.al
|
}
|
133 |
|
|
continue;
|
134 |
|
|
}
|
135 |
|
|
|
136 |
40841
|
giorgos.al
|
if(value.contains(conf.get("lod.seperator"))){
|
137 |
41057
|
giorgos.al
|
String[] splittedValue = value.split(conf.get("lod.seperator"));
|
138 |
40778
|
giorgos.al
|
for(String v:splittedValue){
|
139 |
40843
|
giorgos.al
|
v= v.replace(conf.get("lod.seperator"), "").trim();
|
140 |
42719
|
giorgos.al
|
// insertQuery+="; <"+propertyString+"> \""+v+"\"";
|
141 |
|
|
buildQuery[0]+="; <"+propertyString+"> \""+v+"\"";
|
142 |
40778
|
giorgos.al
|
}
|
143 |
|
|
}else{
|
144 |
41057
|
giorgos.al
|
if(value.startsWith("http://")){
|
145 |
|
|
try{
|
146 |
41421
|
giorgos.al
|
if(value.contains(" ")){
|
147 |
42719
|
giorgos.al
|
// insertQuery+="; <"+propertyString+"> \""+value+"\"";
|
148 |
|
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
149 |
|
|
|
150 |
41421
|
giorgos.al
|
}else{
|
151 |
|
|
value = value.replaceAll("\\s","-");
|
152 |
|
|
URI uri = new URI(value);
|
153 |
42719
|
giorgos.al
|
// insertQuery+="; <"+propertyString+"> <"+uri+">";
|
154 |
|
|
buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
|
155 |
41421
|
giorgos.al
|
}
|
156 |
41057
|
giorgos.al
|
}catch(Exception e){
|
157 |
42719
|
giorgos.al
|
// insertQuery+="; <"+propertyString+"> \""+value+"\"";
|
158 |
|
|
buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
159 |
42240
|
giorgos.al
|
// log.error("NOT URI "+e.toString(),e);
|
160 |
41057
|
giorgos.al
|
continue;
|
161 |
|
|
}
|
162 |
|
|
}
|
163 |
42719
|
giorgos.al
|
else buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
|
164 |
40843
|
giorgos.al
|
}
|
165 |
40728
|
giorgos.al
|
|
166 |
|
|
}
|
167 |
40723
|
giorgos.al
|
|
168 |
42719
|
giorgos.al
|
// insertQuery+=". ";
|
169 |
|
|
buildQuery[0]+=". ";
|
170 |
40728
|
giorgos.al
|
|
171 |
42719
|
giorgos.al
|
return buildQuery;
|
172 |
40843
|
giorgos.al
|
|
173 |
40719
|
giorgos.al
|
}
|
174 |
|
|
|
175 |
41076
|
giorgos.al
|
// public static String RDFizeRelationRow(List<String> row, VirtGraph graph, JSONObject mappings, Configuration conf){
|
176 |
42719
|
giorgos.al
|
public static String[] RDFizeRelationRow(List<String> row, JSONObject mappings, Configuration conf){
|
177 |
41076
|
giorgos.al
|
|
178 |
42719
|
giorgos.al
|
// String insertQuery = "";
|
179 |
|
|
String[] buildQuery = new String[1];
|
180 |
|
|
buildQuery[0]="";
|
181 |
40966
|
giorgos.al
|
String baseURI = conf.get("lod.baseURI");
|
182 |
|
|
JSONObject typeMappings = mappings.getJSONArray(row.get(0)).getJSONObject(0);
|
183 |
|
|
String sourceType = typeMappings.getString("sourceType");
|
184 |
|
|
String sourceId = typeMappings.getString("sourceId");
|
185 |
|
|
String targetType = typeMappings.getString("targetType");
|
186 |
|
|
String targetId = typeMappings.getString("targetId");
|
187 |
|
|
String property = typeMappings.getString("property");
|
188 |
|
|
String sourceURI = baseURI+row.get(Integer.parseInt(sourceType))+"/"+row.get(Integer.parseInt(sourceId));
|
189 |
|
|
String targetURI = baseURI+row.get(Integer.parseInt(targetType))+"/"+row.get(Integer.parseInt(targetId));
|
190 |
42719
|
giorgos.al
|
// insertQuery= "<"+sourceURI+"> <"+property+"> <"+targetURI+">. ";
|
191 |
|
|
buildQuery[0]="<"+sourceURI+"> <"+property+"> <"+targetURI+">. ";
|
192 |
|
|
// return insertQuery;
|
193 |
|
|
return buildQuery;
|
194 |
40966
|
giorgos.al
|
}
|
195 |
|
|
|
196 |
41076
|
giorgos.al
|
public static boolean resourceExists(Connection conn, String resourceURI, String graph){
|
197 |
|
|
boolean exists = false;
|
198 |
|
|
String askQuery = "SPARQL SELECT ?o FROM <"+graph+"> WHERE {<"+resourceURI+"> ?p ?o}";
|
199 |
|
|
Statement stmt;
|
200 |
|
|
try {
|
201 |
|
|
stmt = conn.createStatement();
|
202 |
41421
|
giorgos.al
|
ResultSet rs=stmt.executeQuery(askQuery);
|
203 |
|
|
if(rs.next())exists=true;
|
204 |
|
|
rs.close();
|
205 |
41076
|
giorgos.al
|
// if(rs.next()) exists=true;
|
206 |
|
|
stmt.close();
|
207 |
41140
|
giorgos.al
|
// conn.commit();
|
208 |
41076
|
giorgos.al
|
conn.close();
|
209 |
|
|
} catch (Exception e) {
|
210 |
41140
|
giorgos.al
|
log.error("Virtuoso ask Query failed. Query was "+ askQuery +"\n" + e.toString(), e);
|
211 |
41076
|
giorgos.al
|
}
|
212 |
|
|
return exists;
|
213 |
40719
|
giorgos.al
|
}
|
214 |
40917
|
giorgos.al
|
|
215 |
41076
|
giorgos.al
|
public static void deleteResource(Connection conn, String resourceURI, String graph){
|
216 |
|
|
String deleteQueryString ="SPARQL DELETE FROM <"+graph+"> {<"+resourceURI+"> ?p ?o} WHERE {<"+resourceURI+"> ?p ?o}" ;
|
217 |
40966
|
giorgos.al
|
Statement stmt;
|
218 |
|
|
try {
|
219 |
41076
|
giorgos.al
|
stmt = conn.createStatement();
|
220 |
|
|
stmt.execute(deleteQueryString);
|
221 |
41140
|
giorgos.al
|
conn.commit();
|
222 |
41076
|
giorgos.al
|
stmt.close();
|
223 |
|
|
conn.close();
|
224 |
|
|
} catch (Exception e) {
|
225 |
41421
|
giorgos.al
|
log.error("Virtuoso ask Query failed. "+ deleteQueryString +"\n" + e.toString(), e);
|
226 |
41076
|
giorgos.al
|
}
|
227 |
|
|
}
|
228 |
|
|
|
229 |
|
|
public static void clearGraph(String graph, Connection conn){
|
230 |
41421
|
giorgos.al
|
String clearQuery="SPARQL DEFINE sql:log-enable 0 CLEAR GRAPH <"+graph+">";
|
231 |
41076
|
giorgos.al
|
Statement stmt;
|
232 |
|
|
try {
|
233 |
|
|
stmt = conn.createStatement();
|
234 |
|
|
stmt.execute(clearQuery);
|
235 |
41140
|
giorgos.al
|
conn.commit();
|
236 |
41076
|
giorgos.al
|
stmt.close();
|
237 |
41140
|
giorgos.al
|
// conn.commit();
|
238 |
|
|
// conn.close();
|
239 |
41076
|
giorgos.al
|
} catch (Exception e) {
|
240 |
|
|
log.error("Virtuoso FAILED TO CLEAR graph "+ clearQuery +"\n" + e.toString(), e);
|
241 |
|
|
}
|
242 |
|
|
}
|
243 |
|
|
|
244 |
41140
|
giorgos.al
|
public static void setCheckpoint(Connection conn, int checkpointValue){
|
245 |
41076
|
giorgos.al
|
Statement stmt;
|
246 |
|
|
try {
|
247 |
42240
|
giorgos.al
|
|
248 |
41421
|
giorgos.al
|
stmt = conn.createStatement();
|
249 |
42240
|
giorgos.al
|
stmt.execute("checkpoint_interval("+checkpointValue+")");
|
250 |
41145
|
giorgos.al
|
if(checkpointValue==120){
|
251 |
42240
|
giorgos.al
|
log.info("CheckPoint Started");
|
252 |
41146
|
giorgos.al
|
stmt.execute("checkpoint");
|
253 |
42240
|
giorgos.al
|
stmt.execute("log_enable(3,1)");
|
254 |
41145
|
giorgos.al
|
conn.commit();
|
255 |
42240
|
giorgos.al
|
log.info("CheckPoint Finished");
|
256 |
41145
|
giorgos.al
|
}
|
257 |
|
|
|
258 |
40966
|
giorgos.al
|
stmt.close();
|
259 |
41140
|
giorgos.al
|
//
|
260 |
|
|
// conn.close();
|
261 |
41002
|
giorgos.al
|
} catch (Exception e) {
|
262 |
41057
|
giorgos.al
|
log.error("Virtuoso set checkpoint failed. Checkpoint was "+ checkpointValue +"\n" + e.toString(), e);
|
263 |
40966
|
giorgos.al
|
}
|
264 |
|
|
}
|
265 |
40725
|
eri.katsar
|
}
|