Project

General

Profile

1
package eu.dnetlib.data.mapreduce.hbase.lodImport.utils;
2

    
3
import java.net.URI;
4
import java.sql.Connection;
5
import java.sql.ResultSet;
6
import java.sql.Statement;
7
import java.util.List;
8

    
9
import org.apache.hadoop.conf.Configuration;
10
import org.apache.log4j.Logger;
11
import org.json.JSONArray;
12
import org.json.JSONObject;
13

    
14
import com.jolbox.bonecp.BoneCPDataSource;
15

    
16
public class RDFizer {
17
	
18
	private static Logger log = Logger.getLogger(RDFizer.class);
19
	
20
	public RDFizer() {
21
	}
22

    
23
//	public static String RDFizeEntityRow(List<String> row,VirtGraph graph, JSONObject mappings, Configuration conf, MapCountries mapCountries,MapLanguages mapLanguages){
24
	public static String[] RDFizeEntityRow(List<String> row,BoneCPDataSource ds, JSONObject mappings, Configuration conf, MapCountries mapCountries,MapLanguages mapLanguages, String defaultGraph){
25
		
26
		//		VirtModel model = new VirtModel(graph);
27
		
28
//		String graph = conf.get("lod.defaultGraph");
29
		
30
		String[] buildQuery = new String[2];
31
		buildQuery[0]="";
32
		buildQuery[1]="";
33

    
34
//		String insertQuery ="";
35
//		String deleteQuery="";
36
		
37
		String baseURI = conf.get("lod.baseURI");
38
		String id = row.get(1).toString();
39
		String type = row.get(0).toString();
40
		String resourceURI = baseURI+type+"/"+id;
41
	
42
//	####################################################################################	
43
//	THIS IS FOR INCREMENTAL UPDATE ON-HOLD FOR THE BATCH	
44
//		boolean ask = false;
45
//		try{
46
//			ask = resourceExists(ds.getConnection(), resourceURI, defaultGraph);
47
//		}catch(Exception e){
48
//			log.error("Could not ASK "+e.toString(),e);
49
//		}
50

    
51
		
52
		
53
//		buildQuery[1]+= "<"+resourceURI+"> ?p ?o. ";
54
		
55
//		if(ask){
56
//			try{
57
//				deleteResource(ds.getConnection(), resourceURI, defaultGraph);
58
//			}catch(Exception e){
59
//				log.error("Could not DELETE  "+resourceURI+" "+e.toString(),e);
60
//		}
61
		//####################################################################################
62
		
63
		JSONArray typeMappings = new JSONArray();
64
		JSONObject propertyObject = new JSONObject();
65
		try{
66
	    	typeMappings = mappings.getJSONArray(type);
67
	    	propertyObject = typeMappings.getJSONObject(0);
68
		}catch(Exception e){
69
			log.error("ROW "+row.toString()+"  "+e.toString(),e);
70
		}
71
	    
72
		for(int i=0; i<row.size(); i++){
73
            String index = new Integer(i).toString();
74
            String propertyString;
75
            
76
            
77
            if(type.equals("project") && i>24) continue;
78
            if(type.equals("organization") && i>11) continue;
79
            
80
            try{
81
            	propertyString = propertyObject.getString(index);
82
            }catch(Exception e){
83
//            	log.error("Could not get the property for type "+type+" and ID"+row.get(1).toString()+" :  "+e.toString(),e);
84
            	continue;
85
            }
86
            
87
			if(i==0){
88
				String resourceType = propertyObject.getString(propertyString);
89
				buildQuery[0]+="<"+resourceURI+"> <"+propertyString+"> <"+resourceType+">";
90
				continue;
91
			}
92
			
93
			String value = row.get(i).trim();
94
			value = value.replace("\\", "");
95
			value = value.replace("\""," ");
96
			
97
			if(value.trim().equals("null") || value==null || value.trim().equals("")) continue;
98
			
99
			if(i==16 && type.equals("result")  && !value.equals("und") || i==11 && type.equals("person")  && !value.equals("und") || i==9 && type.equals("organization") && !value.equals("und")){
100
				try{
101
//					log.info("Country code is  "+value);
102
					String countryURI = mapCountries.getCountryURI(value);
103
					URI uri = new URI(countryURI);
104
					if(countryURI.equals("")){
105
						buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
106
						continue;
107
					}
108
					buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
109
				}catch(Exception e){
110
					buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
111
					continue;
112
				}
113
				continue;	
114
			}
115
			
116
			if(type.equals("result") && i==9  && !value.equals("und")){
117
				try{
118
					String langURI = mapLanguages.getLangURI(value);
119
					URI uri = new URI(langURI);
120
					if(langURI.equals("")){
121
						buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
122
						continue;
123
					}
124
					buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
125
				}catch(Exception e){
126
					buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
127
					continue;
128
				}
129
				continue;	
130
			}
131
			
132
			if(type.equals("datasource") && i==21 && !value.equals("und") && !value.isEmpty()){
133
				if(value.contains(conf.get("lod.seperator"))){
134
					String[] splittedValue = value.split(conf.get("lod.seperator"));
135
					for(String v:splittedValue){
136
						try{
137
							String langURI = mapLanguages.getLangURI(v);
138
							URI uri = new URI(langURI);
139
							if(langURI.equals("")){
140
								buildQuery[0]+="; <"+propertyString+"> \""+v+"\"";
141
								continue;
142
							}
143
							buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
144
						}catch(Exception e){
145
							buildQuery[0]+="; <"+propertyString+"> \""+v+"\"";
146
							continue;
147
						}		
148
					 }
149
					continue;
150
				}
151
			}
152
			
153
			if(value.contains(conf.get("lod.seperator"))){
154
				String[] splittedValue = value.split(conf.get("lod.seperator"));
155
				for(String v:splittedValue){
156
					v= v.replace(conf.get("lod.seperator"), "").trim();
157
					buildQuery[0]+="; <"+propertyString+"> \""+v+"\"";
158
				}
159
			}else{
160
				if(value.startsWith("http://")){
161
					try{
162
						if(value.contains(" ")){
163
							buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
164
						}else{
165
							value = value.replaceAll("\\s","-");
166
							URI uri = new URI(value);
167
							buildQuery[0]+="; <"+propertyString+"> <"+uri+">";
168
						}
169
					}catch(Exception e){
170
						buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
171
						continue;
172
					}
173
				}
174
				else buildQuery[0]+="; <"+propertyString+"> \""+value+"\"";
175
			}	
176
			
177
		}
178
		
179
		buildQuery[0]+=". ";
180
		
181
		return buildQuery;
182
		
183
	}
184
	
185
	public static String[] RDFizeRelationRow(List<String> row, JSONObject mappings, Configuration conf){
186
		
187
//			String insertQuery = "";
188
			String[] buildQuery = new String[1];
189
			buildQuery[0]="";
190
			String baseURI = conf.get("lod.baseURI");
191
			JSONObject typeMappings = mappings.getJSONArray(row.get(0)).getJSONObject(0);
192
			String sourceType = typeMappings.getString("sourceType");
193
			String sourceId = typeMappings.getString("sourceId");
194
			String targetType = typeMappings.getString("targetType");
195
			String targetId = typeMappings.getString("targetId");
196
			String property = typeMappings.getString("property");
197
			String sourceURI = baseURI+row.get(Integer.parseInt(sourceType))+"/"+row.get(Integer.parseInt(sourceId));
198
			String targetURI = baseURI+row.get(Integer.parseInt(targetType))+"/"+row.get(Integer.parseInt(targetId));
199
			buildQuery[0]="<"+sourceURI+"> <"+property+"> <"+targetURI+">. ";
200
			return buildQuery;
201
		}
202
	
203
	public static boolean resourceExists(Connection conn, String resourceURI, String graph){
204
		boolean exists = false;
205
		String askQuery = "SPARQL SELECT ?o FROM <"+graph+"> WHERE {<"+resourceURI+"> ?p ?o}";
206
		Statement stmt;
207
		try {
208
			stmt = conn.createStatement();
209
			ResultSet rs=stmt.executeQuery(askQuery);
210
			if(rs.next()) exists=true;
211
			rs.close();
212
			stmt.close();
213
			conn.close();
214
		} catch (Exception e) {
215
			log.error("Virtuoso ask Query failed. Query was: "+ askQuery +"\n" + e.toString(), e);
216
		}
217
		return exists;
218
	}
219
	
220
	public static void deleteResource(Connection conn, String resourceURI, String graph){
221
		String deleteQueryString ="SPARQL DELETE FROM <"+graph+"> {<"+resourceURI+"> ?p ?o} WHERE {<"+resourceURI+"> ?p ?o}" ;
222
		Statement stmt;
223
		try {
224
			stmt = conn.createStatement();
225
			stmt.execute(deleteQueryString);
226
			conn.commit();
227
			stmt.close();
228
			conn.close();
229
		} catch (Exception e) {
230
			log.error("Virtuoso ask Query failed.  "+ deleteQueryString +"\n" + e.toString(), e);
231
		}
232
	}
233
	
234
	public static void clearGraph(String graph, Connection conn){
235
		String clearQuery="SPARQL DEFINE sql:log-enable 3 CLEAR GRAPH  <"+graph+">";
236
		Statement stmt;
237
		try {
238
			stmt = conn.createStatement();
239
			stmt.execute(clearQuery);
240
			conn.commit();
241
			stmt.close();
242
		} catch (Exception e) {
243
			log.error("Virtuoso FAILED TO CLEAR graph "+ clearQuery +"\n" + e.toString(), e);
244
		}
245
	}
246

    
247
	public static void setCheckpoint(Connection conn, int checkpointValue){
248
		Statement stmt;
249
		try {
250

    
251
			stmt = conn.createStatement();			
252
			stmt.execute("checkpoint_interval("+checkpointValue+")");			
253
			if(checkpointValue==120){
254
				log.info("CheckPoint Started");
255
				stmt.execute("checkpoint");
256
//				stmt.execute("log_enable(3,1)");
257
				conn.commit();
258
				log.info("CheckPoint Finished");
259
			}
260
			
261
			stmt.close();
262
		} catch (Exception e) {
263
			log.error("Virtuoso set checkpoint failed. Checkpoint was "+ checkpointValue +"\n" + e.toString(), e);
264
		}
265
	}
266
	
267
	public static void autoIndexing(Connection conn, boolean state){
268
		Statement stmt;
269
		String query="";
270
		if(state) query = "DB.DBA.RDF_OBJ_FT_RULE_ADD (null, null, 'All')";
271
		else query = "DB.DBA.VT_BATCH_UPDATE ('DB.DBA.RDF_OBJ', 'ON', NULL)";
272
		try {
273
			stmt = conn.createStatement();			
274
			stmt.execute(query);	
275
			log.info("autoIndex "+query);		
276
			stmt.close();
277
		} catch (Exception e) {
278
			log.error("Virtuoso autoIndexing failed. State was "+ query +"\n" + e.toString(), e);
279
		}
280
		
281
	}
282
}
(4-4/4)