1
|
package eu.dnetlib.parthenos.workflows.nodes;
|
2
|
|
3
|
import java.io.IOException;
|
4
|
import java.nio.charset.Charset;
|
5
|
import java.nio.file.FileVisitResult;
|
6
|
import java.nio.file.Files;
|
7
|
import java.nio.file.Path;
|
8
|
import java.nio.file.SimpleFileVisitor;
|
9
|
import java.nio.file.attribute.BasicFileAttributes;
|
10
|
import java.util.List;
|
11
|
import java.util.Map;
|
12
|
import java.util.stream.Collectors;
|
13
|
|
14
|
import com.google.common.collect.Lists;
|
15
|
import com.google.common.collect.Maps;
|
16
|
import eu.dnetlib.clients.enabling.ISLookUpClient;
|
17
|
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
|
18
|
import eu.dnetlib.miscutils.datetime.DateUtils;
|
19
|
import eu.dnetlib.rmi.datasource.DatasourceManagerService;
|
20
|
import eu.dnetlib.rmi.datasource.DatasourceManagerServiceException;
|
21
|
import eu.dnetlib.rmi.datasource.IfaceDesc;
|
22
|
import eu.dnetlib.rmi.enabling.ISLookUpException;
|
23
|
import eu.dnetlib.rmi.enabling.ISRegistryException;
|
24
|
import eu.dnetlib.rmi.enabling.ISRegistryService;
|
25
|
import org.antlr.stringtemplate.StringTemplate;
|
26
|
import org.apache.commons.io.IOUtils;
|
27
|
import org.apache.commons.lang3.StringUtils;
|
28
|
import org.apache.commons.logging.Log;
|
29
|
import org.apache.commons.logging.LogFactory;
|
30
|
|
31
|
import static java.nio.file.FileVisitResult.CONTINUE;
|
32
|
|
33
|
/**
|
34
|
* Created by Alessia Bardi on 12/01/2018.
|
35
|
*
|
36
|
* @author Alessia Bardi
|
37
|
*/
|
38
|
public class ClarinFileVisitor extends SimpleFileVisitor<Path> {
|
39
|
|
40
|
private static final Log log = LogFactory.getLog(ClarinFileVisitor.class);
|
41
|
private static final String API_PREFIX = "api_________::";
|
42
|
private static final String TDS_TEMPLATE ="/eu/dnetlib/parthenos/workflows/nodes/clarin_tds.xml.st";
|
43
|
|
44
|
private List<String> interfaces = Lists.newArrayList();
|
45
|
private String clarinDatasourceProfileID;
|
46
|
private String clarinDatasourceOriginalId;
|
47
|
private DatasourceManagerService dsMan = null;
|
48
|
private String inputBaseUrlPrefix;
|
49
|
private String metadataIdentifierPath;
|
50
|
private ISLookUpClient lookupClient;
|
51
|
private ISRegistryService registryService;
|
52
|
|
53
|
private int countVisitedFiles = 0;
|
54
|
private int countCreatedTDS = 0;
|
55
|
private int countUpdatedTDS = 0;
|
56
|
private int countVisitedFolders = 0;
|
57
|
private int countCreatedInterfaces = 0;
|
58
|
|
59
|
|
60
|
@Override
|
61
|
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) throws IOException {
|
62
|
log.info("Processing " + dir.toString());
|
63
|
countVisitedFolders++;
|
64
|
String dirName = dir.getFileName().toString();
|
65
|
if(dirName.equalsIgnoreCase("x3ml-mappings")) return CONTINUE;
|
66
|
String apiId = API_PREFIX + getClarinDatasourceOriginalId() + "::" + dirName;
|
67
|
if (!interfaces.contains(apiId)) {
|
68
|
//API TO BE CREATED
|
69
|
IfaceDesc iface = new IfaceDesc();
|
70
|
iface.setActive(false);
|
71
|
iface.setCompliance("metadata");
|
72
|
iface.setContentDescription("metadata");
|
73
|
iface.setId(apiId);
|
74
|
iface.setRemovable(true);
|
75
|
iface.setTypology("dnet:repository::clarin");
|
76
|
iface.setAccessProtocol("filesystem");
|
77
|
Map<String, String> accessParams = Maps.newHashMap();
|
78
|
accessParams.put("extensions", "xml");
|
79
|
iface.setAccessParams(accessParams);
|
80
|
iface.setBaseUrl(StringUtils.appendIfMissing(getInputBaseUrlPrefix(), "/") + dirName);
|
81
|
Map<String, String> extraFields = Maps.newHashMap();
|
82
|
extraFields.put("metadata_identifier_path", getMetadataIdentifierPath());
|
83
|
iface.setExtraFields(extraFields);
|
84
|
try {
|
85
|
dsMan.addInterface(getClarinDatasourceProfileID(), iface);
|
86
|
countCreatedInterfaces++;
|
87
|
log.info("CREATED NEW INTERFACE " + iface.getId() + " for " + getClarinDatasourceOriginalId() + "(" + getClarinDatasourceProfileID() + ")");
|
88
|
} catch (DatasourceManagerServiceException e) {
|
89
|
log.error("Can't add interface " + iface.getId() + " to " + getClarinDatasourceOriginalId() + "(" + getClarinDatasourceProfileID() + ")", e);
|
90
|
return CONTINUE;
|
91
|
}
|
92
|
} else {
|
93
|
if (log.isDebugEnabled()) {
|
94
|
log.debug("Interface " + apiId + " already exists");
|
95
|
}
|
96
|
}
|
97
|
return CONTINUE;
|
98
|
}
|
99
|
|
100
|
@Override
|
101
|
public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
|
102
|
String filename = file.getFileName().toString();
|
103
|
log.info("Processing " + file.toString());
|
104
|
countVisitedFiles++;
|
105
|
String tdsTitle = filename;
|
106
|
//call Files.lines which will use a stream to iterate over each line of the file.
|
107
|
//Next we will convert the stream to a string by calling Collectors.joining() which will join all the strings together.
|
108
|
String updatedCode = Files.lines(file).collect(Collectors.joining()).replace("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>", "");
|
109
|
try {
|
110
|
List<String> res = this.lookupClient.search("//RESOURCE_PROFILE[.//RESOURCE_TYPE/@value=\"TransformationRuleDSResourceType\" and .//CONFIGURATION/SCRIPT/TITLE/string()=\""+tdsTitle+"\"]/HEADER/RESOURCE_IDENTIFIER/@value/string()");
|
111
|
if(res == null || res.isEmpty()){
|
112
|
log.debug("Creating new TDS profile for "+filename);
|
113
|
final String template = IOUtils.toString(getClass().getResourceAsStream(TDS_TEMPLATE), Charset.forName("UTF-8"));
|
114
|
final StringTemplate st = new StringTemplate(template);
|
115
|
st.setAttribute("date", DateUtils.calculate_ISO8601(DateUtils.now()));
|
116
|
st.setAttribute("title", tdsTitle);
|
117
|
st.setAttribute("mapping", "<![CDATA["+updatedCode+"]]>");
|
118
|
String profId = this.registryService.registerProfile(st.toString());
|
119
|
countCreatedTDS++;
|
120
|
log.info("REGISTERED NEW TDS FOR "+filename+": "+profId);
|
121
|
}
|
122
|
else{
|
123
|
String tdsProfileId = res.get(0);
|
124
|
log.debug("Updating TDS profile "+tdsProfileId+"for "+filename);
|
125
|
|
126
|
boolean done = this.registryService.updateProfileNode(tdsProfileId, "//CONFIGURATION/SCRIPT/CODE", "<CODE><![CDATA["+updatedCode+"]]></CODE>");
|
127
|
if(done){
|
128
|
log.info("TDS PROFILE "+tdsProfileId+" UPDATED with contents from "+filename);
|
129
|
countUpdatedTDS++;
|
130
|
}
|
131
|
if(!done){
|
132
|
log.error("!!! TDS PROFILE "+tdsProfileId+" COULD NOT BE UPDATED with contents from "+filename);
|
133
|
}
|
134
|
}
|
135
|
} catch (ISLookUpException | ISRegistryException e) {
|
136
|
log.error("CANNOT UPDATE/CREATE TDS PROFILE FOR "+filename, e);
|
137
|
}
|
138
|
|
139
|
return CONTINUE;
|
140
|
}
|
141
|
|
142
|
protected ClarinFileVisitor() {
|
143
|
super();
|
144
|
}
|
145
|
|
146
|
public ClarinFileVisitor(final String clarinDatasourceProfileID, final String clarinDatasourceOriginalId,
|
147
|
final String inputBaseUrlPrefix, final String metadataIdentifierPath, final List<String> interfaces,
|
148
|
final UniqueServiceLocator locator, final ISLookUpClient lookupClient) {
|
149
|
super();
|
150
|
this.clarinDatasourceOriginalId = clarinDatasourceOriginalId;
|
151
|
this.clarinDatasourceProfileID = clarinDatasourceProfileID;
|
152
|
this.inputBaseUrlPrefix = inputBaseUrlPrefix;
|
153
|
this.metadataIdentifierPath = metadataIdentifierPath;
|
154
|
this.interfaces = interfaces;
|
155
|
this.dsMan = locator.getService(DatasourceManagerService.class);
|
156
|
this.registryService = locator.getService(ISRegistryService.class);
|
157
|
this.lookupClient = lookupClient;
|
158
|
}
|
159
|
|
160
|
public List<String> getInterfaces() {
|
161
|
return interfaces;
|
162
|
}
|
163
|
|
164
|
public void setInterfaces(final List<String> interfaces) {
|
165
|
this.interfaces = interfaces;
|
166
|
}
|
167
|
|
168
|
public String getClarinDatasourceOriginalId() {
|
169
|
return clarinDatasourceOriginalId;
|
170
|
}
|
171
|
|
172
|
public void setClarinDatasourceOriginalId(final String clarinDatasourceOriginalId) {
|
173
|
this.clarinDatasourceOriginalId = clarinDatasourceOriginalId;
|
174
|
}
|
175
|
|
176
|
public DatasourceManagerService getDsMan() {
|
177
|
return dsMan;
|
178
|
}
|
179
|
|
180
|
public void setDsMan(final DatasourceManagerService dsMan) {
|
181
|
this.dsMan = dsMan;
|
182
|
}
|
183
|
|
184
|
public String getInputBaseUrlPrefix() {
|
185
|
return inputBaseUrlPrefix;
|
186
|
}
|
187
|
|
188
|
public void setInputBaseUrlPrefix(final String inputBaseUrlPrefix) {
|
189
|
this.inputBaseUrlPrefix = inputBaseUrlPrefix;
|
190
|
}
|
191
|
|
192
|
public String getMetadataIdentifierPath() {
|
193
|
return metadataIdentifierPath;
|
194
|
}
|
195
|
|
196
|
public void setMetadataIdentifierPath(final String metadataIdentifierPath) {
|
197
|
this.metadataIdentifierPath = metadataIdentifierPath;
|
198
|
}
|
199
|
|
200
|
public String getClarinDatasourceProfileID() {
|
201
|
return clarinDatasourceProfileID;
|
202
|
}
|
203
|
|
204
|
public void setClarinDatasourceProfileID(final String clarinDatasourceProfileID) {
|
205
|
this.clarinDatasourceProfileID = clarinDatasourceProfileID;
|
206
|
}
|
207
|
|
208
|
public ISLookUpClient getLookupClient() {
|
209
|
return lookupClient;
|
210
|
}
|
211
|
|
212
|
public void setLookupClient(final ISLookUpClient lookupClient) {
|
213
|
this.lookupClient = lookupClient;
|
214
|
}
|
215
|
|
216
|
public ISRegistryService getRegistryService() {
|
217
|
return registryService;
|
218
|
}
|
219
|
|
220
|
public void setRegistryService(final ISRegistryService registryService) {
|
221
|
this.registryService = registryService;
|
222
|
}
|
223
|
|
224
|
public int getCountVisitedFiles() {
|
225
|
return countVisitedFiles;
|
226
|
}
|
227
|
|
228
|
public void setCountVisitedFiles(final int countVisitedFiles) {
|
229
|
this.countVisitedFiles = countVisitedFiles;
|
230
|
}
|
231
|
|
232
|
public int getCountCreatedTDS() {
|
233
|
return countCreatedTDS;
|
234
|
}
|
235
|
|
236
|
public void setCountCreatedTDS(final int countCreatedTDS) {
|
237
|
this.countCreatedTDS = countCreatedTDS;
|
238
|
}
|
239
|
|
240
|
public int getCountUpdatedTDS() {
|
241
|
return countUpdatedTDS;
|
242
|
}
|
243
|
|
244
|
public void setCountUpdatedTDS(final int countUpdatedTDS) {
|
245
|
this.countUpdatedTDS = countUpdatedTDS;
|
246
|
}
|
247
|
|
248
|
public int getCountVisitedFolders() {
|
249
|
return countVisitedFolders;
|
250
|
}
|
251
|
|
252
|
public void setCountVisitedFolders(final int countVisitedFolders) {
|
253
|
this.countVisitedFolders = countVisitedFolders;
|
254
|
}
|
255
|
|
256
|
public int getCountCreatedInterfaces() {
|
257
|
return countCreatedInterfaces;
|
258
|
}
|
259
|
|
260
|
public void setCountCreatedInterfaces(final int countCreatedInterfaces) {
|
261
|
this.countCreatedInterfaces = countCreatedInterfaces;
|
262
|
}
|
263
|
}
|