Project

General

Profile

1 40545 eri.katsar
package eu.dnetlib.data.mapreduce.hbase.lodImport.utils;
2
3 41057 giorgos.al
import java.net.URI;
4 41076 giorgos.al
import java.sql.Connection;
5 41421 giorgos.al
import java.sql.ResultSet;
6 40966 giorgos.al
import java.sql.Statement;
7
import java.util.List;
8
9 40841 giorgos.al
import org.apache.hadoop.conf.Configuration;
10 40966 giorgos.al
import org.apache.log4j.Logger;
11 40836 giorgos.al
import org.json.JSONArray;
12
import org.json.JSONObject;
13
14 41076 giorgos.al
import com.jolbox.bonecp.BoneCPDataSource;
15 40841 giorgos.al
16 40545 eri.katsar
public class RDFizer {
17 41057 giorgos.al
18 40966 giorgos.al
	private static Logger log = Logger.getLogger(RDFizer.class);
19 41002 giorgos.al
20 40545 eri.katsar
	public RDFizer() {
21
	}
22
23 41076 giorgos.al
//	public static String RDFizeEntityRow(List<String> row,VirtGraph graph, JSONObject mappings, Configuration conf, MapCountries mapCountries,MapLanguages mapLanguages){
24 42719 giorgos.al
	public static String[] RDFizeEntityRow(List<String> row,BoneCPDataSource ds, JSONObject mappings, Configuration conf, MapCountries mapCountries,MapLanguages mapLanguages, String defaultGraph){
25 40836 giorgos.al
26 41076 giorgos.al
		//		VirtModel model = new VirtModel(graph);
27
28 42240 giorgos.al
//		String graph = conf.get("lod.defaultGraph");
29 41076 giorgos.al
30 42719 giorgos.al
		String[] buildQuery = new String[2];
31
		buildQuery[0]="";
32
		buildQuery[1]="";
33
34
//		String insertQuery ="";
35
//		String deleteQuery="";
36 40843 giorgos.al
37 40728 giorgos.al
		String baseURI = conf.get("lod.baseURI");
38
		String id = row.get(1).toString();
39 40841 giorgos.al
		String type = row.get(0).toString();
40
		String resourceURI = baseURI+type+"/"+id;
41 41421 giorgos.al
42
//	####################################################################################
43
//	THIS IS FOR INCREMENTAL UPDATE ON-HOLD FOR THE BATCH
44
//		boolean ask = false;
45
//		try{
46
//			ask = resourceExists(ds.getConnection(), resourceURI, graph);
47
//		}catch(Exception e){
48
//			log.error("Could not ASK "+e.toString(),e);
49
//		}
50
//
51 42719 giorgos.al
52
53
		buildQuery[1]+= "<"+resourceURI+"> ?p ?o. ";
54
55 41421 giorgos.al
//		if(ask){
56 42719 giorgos.al
//			try{
57
//				deleteResource(ds.getConnection(), resourceURI, defaultGraph);
58
//			}catch(Exception e){
59
//				log.error("Could not DELETE  "+resourceURI+" "+e.toString(),e);
60 41421 giorgos.al
//		}
61
		//####################################################################################
62 40802 giorgos.al
63 40843 giorgos.al
	    JSONArray typeMappings = mappings.getJSONArray(type);
64 41286 giorgos.al
	    JSONObject propertyObject = typeMappings.getJSONObject(0);
65 40854 giorgos.al
66 40778 giorgos.al
		for(int i=0; i<row.size(); i++){
67 40819 eri.katsar
            String index = new Integer(i).toString();
68 40843 giorgos.al
            String propertyString;
69 41057 giorgos.al
70 41076 giorgos.al
71
            if(type.equals("project") && i>24) continue;
72
            if(type.equals("organization") && i>11) continue;
73
74 40843 giorgos.al
            try{
75
            	propertyString = propertyObject.getString(index);
76
            }catch(Exception e){
77 42240 giorgos.al
//            	log.error("Could not get the property for type "+type+" and ID"+row.get(1).toString()+" :  "+e.toString(),e);
78 40843 giorgos.al
            	continue;
79
            }
80 41057 giorgos.al
81 40778 giorgos.al
			if(i==0){
82
				String resourceType = propertyObject.getString(propertyString);
83 42719 giorgos.al
//				insertQuery+="<"+resourceURI+"> <"+propertyString+"> <"+resourceType+">";
84
				buildQuery[0]+="<"+resourceURI+"> <"+propertyString+"> <"+resourceType+">";
85 40836 giorgos.al
				continue;
86 40778 giorgos.al
			}
87 41002 giorgos.al
88 41057 giorgos.al
			String value = row.get(i).trim();
89 42714 giorgos.al
			value = value.replace("\\", "");
90
			value = value.replace("\""," ");
91
92 41002 giorgos.al
			if(value.trim().equals("null") || value==null || value.trim().equals("")) continue;
93
94 41057 giorgos.al
			if(i==16 && type.equals("result")  && !value.equals("und") || i==11 && type.equals("person")  && !value.equals("und") || i==9 && type.equals("organization") && !value.equals("und")){
95 41000 giorgos.al
				try{
96 42714 giorgos.al
//					log.info("Country code is  "+value);
97 41019 giorgos.al
					String countryURI = mapCountries.getCountryURI(value);
98 41076 giorgos.al
					URI uri = new URI(countryURI);
99 41057 giorgos.al
					if(countryURI.equals("")){
100 42719 giorgos.al
//						insertQuery+="; <"+propertyString+"> \""+value+"\"";
101
						buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
102 41057 giorgos.al
						continue;
103
					}
104 42719 giorgos.al
//					insertQuery+="; <"+propertyString+"> <"+uri+">";
105
					buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
106 41057 giorgos.al
//					log.info("COUNTRY "+countryURI+"  FROM  "+value);
107 41000 giorgos.al
				}catch(Exception e){
108 42719 giorgos.al
					buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
109 42240 giorgos.al
//					log.error("No country URI for: "+e.toString(),e);
110 41057 giorgos.al
					continue;
111 41000 giorgos.al
				}
112 41057 giorgos.al
				continue;
113 41000 giorgos.al
			}
114 41057 giorgos.al
115
			if(i==9 && type.equals("result") && !value.equals("und")){
116 41000 giorgos.al
				try{
117 41019 giorgos.al
					String langURI = mapLanguages.getLangURI(value);
118 41076 giorgos.al
					URI uri = new URI(langURI);
119 41057 giorgos.al
					if(langURI.equals("")){
120 42719 giorgos.al
//						insertQuery+="; <"+propertyString+"> \""+value+"\"";
121
						buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
122 41057 giorgos.al
						continue;
123
					}
124 42719 giorgos.al
//					insertQuery+="; <"+propertyString+"> <"+uri+">";
125
					buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
126 41057 giorgos.al
//					log.info("LANG "+langURI+"  FROM  "+value.trim());
127 41000 giorgos.al
				}catch(Exception e){
128 42719 giorgos.al
//					insertQuery+="; <"+propertyString+"> \""+value+"\"";
129
					buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
130 42240 giorgos.al
//					log.error("No Language URI for: "+e.toString(),e);
131 41057 giorgos.al
					continue;
132 41000 giorgos.al
				}
133
				continue;
134
			}
135
136 40841 giorgos.al
			if(value.contains(conf.get("lod.seperator"))){
137 41057 giorgos.al
				String[] splittedValue = value.split(conf.get("lod.seperator"));
138 40778 giorgos.al
				for(String v:splittedValue){
139 40843 giorgos.al
					v= v.replace(conf.get("lod.seperator"), "").trim();
140 42719 giorgos.al
//					insertQuery+="; <"+propertyString+"> \""+v+"\"";
141
					buildQuery[0]+="; <"+propertyString+"> \""+v+"\"";
142 40778 giorgos.al
				}
143
			}else{
144 41057 giorgos.al
				if(value.startsWith("http://")){
145
					try{
146 41421 giorgos.al
						if(value.contains(" ")){
147 42719 giorgos.al
//							insertQuery+="; <"+propertyString+"> \""+value+"\"";
148
							buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
149
150 41421 giorgos.al
						}else{
151
							value = value.replaceAll("\\s","-");
152
							URI uri = new URI(value);
153 42719 giorgos.al
//							insertQuery+="; <"+propertyString+"> <"+uri+">";
154
							buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
155 41421 giorgos.al
						}
156 41057 giorgos.al
					}catch(Exception e){
157 42719 giorgos.al
//						insertQuery+="; <"+propertyString+"> \""+value+"\"";
158
						buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
159 42240 giorgos.al
//						log.error("NOT URI "+e.toString(),e);
160 41057 giorgos.al
						continue;
161
					}
162
				}
163 42719 giorgos.al
				else buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
164 40843 giorgos.al
			}
165 40728 giorgos.al
166
		}
167 40723 giorgos.al
168 42719 giorgos.al
//		insertQuery+=". ";
169
		buildQuery[0]+=". ";
170 40728 giorgos.al
171 42719 giorgos.al
		return buildQuery;
172 40843 giorgos.al
173 40719 giorgos.al
	}
174
175 41076 giorgos.al
//	public static String RDFizeRelationRow(List<String> row, VirtGraph graph, JSONObject mappings, Configuration conf){
176 42719 giorgos.al
	public static String[] RDFizeRelationRow(List<String> row, JSONObject mappings, Configuration conf){
177 41076 giorgos.al
178 42719 giorgos.al
//			String insertQuery = "";
179
			String[] buildQuery = new String[1];
180
			buildQuery[0]="";
181 40966 giorgos.al
			String baseURI = conf.get("lod.baseURI");
182
			JSONObject typeMappings = mappings.getJSONArray(row.get(0)).getJSONObject(0);
183
			String sourceType = typeMappings.getString("sourceType");
184
			String sourceId = typeMappings.getString("sourceId");
185
			String targetType = typeMappings.getString("targetType");
186
			String targetId = typeMappings.getString("targetId");
187
			String property = typeMappings.getString("property");
188
			String sourceURI = baseURI+row.get(Integer.parseInt(sourceType))+"/"+row.get(Integer.parseInt(sourceId));
189
			String targetURI = baseURI+row.get(Integer.parseInt(targetType))+"/"+row.get(Integer.parseInt(targetId));
190 42719 giorgos.al
//			insertQuery= "<"+sourceURI+"> <"+property+"> <"+targetURI+">. ";
191
			buildQuery[0]="<"+sourceURI+"> <"+property+"> <"+targetURI+">. ";
192
//			return insertQuery;
193
			return buildQuery;
194 40966 giorgos.al
		}
195
196 41076 giorgos.al
	public static boolean resourceExists(Connection conn, String resourceURI, String graph){
197
		boolean exists = false;
198
		String askQuery = "SPARQL SELECT ?o FROM <"+graph+"> WHERE {<"+resourceURI+"> ?p ?o}";
199
		Statement stmt;
200
		try {
201
			stmt = conn.createStatement();
202 41421 giorgos.al
			ResultSet rs=stmt.executeQuery(askQuery);
203
			if(rs.next())exists=true;
204
			rs.close();
205 41076 giorgos.al
//			if(rs.next()) exists=true;
206
			stmt.close();
207 41140 giorgos.al
//			conn.commit();
208 41076 giorgos.al
			conn.close();
209
		} catch (Exception e) {
210 41140 giorgos.al
			log.error("Virtuoso ask Query failed. Query was "+ askQuery +"\n" + e.toString(), e);
211 41076 giorgos.al
		}
212
		return exists;
213 40719 giorgos.al
	}
214 40917 giorgos.al
215 41076 giorgos.al
	public static void deleteResource(Connection conn, String resourceURI, String graph){
216
		String deleteQueryString ="SPARQL DELETE FROM <"+graph+"> {<"+resourceURI+"> ?p ?o} WHERE {<"+resourceURI+"> ?p ?o}" ;
217 40966 giorgos.al
		Statement stmt;
218
		try {
219 41076 giorgos.al
			stmt = conn.createStatement();
220
			stmt.execute(deleteQueryString);
221 41140 giorgos.al
			conn.commit();
222 41076 giorgos.al
			stmt.close();
223
			conn.close();
224
		} catch (Exception e) {
225 41421 giorgos.al
			log.error("Virtuoso ask Query failed.  "+ deleteQueryString +"\n" + e.toString(), e);
226 41076 giorgos.al
		}
227
	}
228
229
	public static void clearGraph(String graph, Connection conn){
230 41421 giorgos.al
		String clearQuery="SPARQL DEFINE sql:log-enable 0 CLEAR GRAPH  <"+graph+">";
231 41076 giorgos.al
		Statement stmt;
232
		try {
233
			stmt = conn.createStatement();
234
			stmt.execute(clearQuery);
235 41140 giorgos.al
			conn.commit();
236 41076 giorgos.al
			stmt.close();
237 41140 giorgos.al
//			conn.commit();
238
//			conn.close();
239 41076 giorgos.al
		} catch (Exception e) {
240
			log.error("Virtuoso FAILED TO CLEAR graph "+ clearQuery +"\n" + e.toString(), e);
241
		}
242
	}
243
244 41140 giorgos.al
	public static void setCheckpoint(Connection conn, int checkpointValue){
245 41076 giorgos.al
		Statement stmt;
246
		try {
247 42240 giorgos.al
248 41421 giorgos.al
			stmt = conn.createStatement();
249 42240 giorgos.al
			stmt.execute("checkpoint_interval("+checkpointValue+")");
250 41145 giorgos.al
			if(checkpointValue==120){
251 42240 giorgos.al
				log.info("CheckPoint Started");
252 41146 giorgos.al
				stmt.execute("checkpoint");
253 42240 giorgos.al
				stmt.execute("log_enable(3,1)");
254 41145 giorgos.al
				conn.commit();
255 42240 giorgos.al
				log.info("CheckPoint Finished");
256 41145 giorgos.al
			}
257
258 40966 giorgos.al
			stmt.close();
259 41140 giorgos.al
//
260
//			conn.close();
261 41002 giorgos.al
		} catch (Exception e) {
262 41057 giorgos.al
			log.error("Virtuoso set checkpoint failed. Checkpoint was "+ checkpointValue +"\n" + e.toString(), e);
263 40966 giorgos.al
		}
264
	}
265 40725 eri.katsar
}