1 |
50297
|
alessia.ba
|
package eu.dnetlib.parthenos.workflows.nodes;
|
2 |
|
|
|
3 |
|
|
import java.io.IOException;
|
4 |
|
|
import java.nio.charset.Charset;
|
5 |
|
|
import java.nio.file.FileVisitResult;
|
6 |
|
|
import java.nio.file.Files;
|
7 |
|
|
import java.nio.file.Path;
|
8 |
|
|
import java.nio.file.SimpleFileVisitor;
|
9 |
|
|
import java.nio.file.attribute.BasicFileAttributes;
|
10 |
|
|
import java.util.List;
|
11 |
|
|
import java.util.Map;
|
12 |
|
|
import java.util.stream.Collectors;
|
13 |
|
|
|
14 |
|
|
import com.google.common.collect.Lists;
|
15 |
|
|
import com.google.common.collect.Maps;
|
16 |
|
|
import eu.dnetlib.clients.enabling.ISLookUpClient;
|
17 |
|
|
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
|
18 |
|
|
import eu.dnetlib.miscutils.datetime.DateUtils;
|
19 |
|
|
import eu.dnetlib.rmi.datasource.DatasourceManagerService;
|
20 |
|
|
import eu.dnetlib.rmi.datasource.DatasourceManagerServiceException;
|
21 |
|
|
import eu.dnetlib.rmi.datasource.IfaceDesc;
|
22 |
|
|
import eu.dnetlib.rmi.enabling.ISLookUpException;
|
23 |
|
|
import eu.dnetlib.rmi.enabling.ISRegistryException;
|
24 |
|
|
import eu.dnetlib.rmi.enabling.ISRegistryService;
|
25 |
|
|
import org.antlr.stringtemplate.StringTemplate;
|
26 |
|
|
import org.apache.commons.io.IOUtils;
|
27 |
|
|
import org.apache.commons.lang3.StringUtils;
|
28 |
|
|
import org.apache.commons.logging.Log;
|
29 |
|
|
import org.apache.commons.logging.LogFactory;
|
30 |
|
|
|
31 |
|
|
import static java.nio.file.FileVisitResult.CONTINUE;
|
32 |
|
|
|
33 |
|
|
/**
|
34 |
|
|
* Created by Alessia Bardi on 12/01/2018.
|
35 |
|
|
*
|
36 |
|
|
* @author Alessia Bardi
|
37 |
|
|
*/
|
38 |
|
|
public class ClarinFileVisitor extends SimpleFileVisitor<Path> {
|
39 |
|
|
|
40 |
|
|
private static final Log log = LogFactory.getLog(ClarinFileVisitor.class);
|
41 |
|
|
private static final String API_PREFIX = "api_________::";
|
42 |
|
|
private static final String TDS_TEMPLATE ="/eu/dnetlib/parthenos/workflows/nodes/clarin_tds.xml.st";
|
43 |
|
|
|
44 |
|
|
private List<String> interfaces = Lists.newArrayList();
|
45 |
|
|
private String clarinDatasourceProfileID;
|
46 |
|
|
private String clarinDatasourceOriginalId;
|
47 |
|
|
private DatasourceManagerService dsMan = null;
|
48 |
|
|
private String inputBaseUrlPrefix;
|
49 |
|
|
private String metadataIdentifierPath;
|
50 |
|
|
private ISLookUpClient lookupClient;
|
51 |
|
|
private ISRegistryService registryService;
|
52 |
|
|
|
53 |
|
|
private int countVisitedFiles = 0;
|
54 |
|
|
private int countCreatedTDS = 0;
|
55 |
|
|
private int countUpdatedTDS = 0;
|
56 |
|
|
private int countVisitedFolders = 0;
|
57 |
|
|
private int countCreatedInterfaces = 0;
|
58 |
|
|
|
59 |
|
|
|
60 |
|
|
@Override
|
61 |
|
|
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) throws IOException {
|
62 |
|
|
log.info("Processing " + dir.toString());
|
63 |
|
|
countVisitedFolders++;
|
64 |
|
|
String dirName = dir.getFileName().toString();
|
65 |
50403
|
alessia.ba
|
if(dirName.equalsIgnoreCase("x3ml-mappings")) return CONTINUE;
|
66 |
50297
|
alessia.ba
|
String apiId = API_PREFIX + getClarinDatasourceOriginalId() + "::" + dirName;
|
67 |
|
|
if (!interfaces.contains(apiId)) {
|
68 |
|
|
//API TO BE CREATED
|
69 |
|
|
IfaceDesc iface = new IfaceDesc();
|
70 |
|
|
iface.setActive(false);
|
71 |
|
|
iface.setCompliance("metadata");
|
72 |
|
|
iface.setContentDescription("metadata");
|
73 |
|
|
iface.setId(apiId);
|
74 |
|
|
iface.setRemovable(true);
|
75 |
50300
|
alessia.ba
|
iface.setTypology("dnet:repository::clarin");
|
76 |
50297
|
alessia.ba
|
iface.setAccessProtocol("filesystem");
|
77 |
|
|
Map<String, String> accessParams = Maps.newHashMap();
|
78 |
|
|
accessParams.put("extensions", "xml");
|
79 |
|
|
iface.setAccessParams(accessParams);
|
80 |
|
|
iface.setBaseUrl(StringUtils.appendIfMissing(getInputBaseUrlPrefix(), "/") + dirName);
|
81 |
|
|
Map<String, String> extraFields = Maps.newHashMap();
|
82 |
|
|
extraFields.put("metadata_identifier_path", getMetadataIdentifierPath());
|
83 |
|
|
iface.setExtraFields(extraFields);
|
84 |
|
|
try {
|
85 |
|
|
dsMan.addInterface(getClarinDatasourceProfileID(), iface);
|
86 |
|
|
countCreatedInterfaces++;
|
87 |
50404
|
alessia.ba
|
log.info("CREATED NEW INTERFACE " + iface.getId() + " for " + getClarinDatasourceOriginalId() + "(" + getClarinDatasourceProfileID() + ")");
|
88 |
50297
|
alessia.ba
|
} catch (DatasourceManagerServiceException e) {
|
89 |
|
|
log.error("Can't add interface " + iface.getId() + " to " + getClarinDatasourceOriginalId() + "(" + getClarinDatasourceProfileID() + ")", e);
|
90 |
|
|
return CONTINUE;
|
91 |
|
|
}
|
92 |
|
|
} else {
|
93 |
|
|
if (log.isDebugEnabled()) {
|
94 |
|
|
log.debug("Interface " + apiId + " already exists");
|
95 |
|
|
}
|
96 |
|
|
}
|
97 |
|
|
return CONTINUE;
|
98 |
|
|
}
|
99 |
|
|
|
100 |
|
|
@Override
|
101 |
|
|
public FileVisitResult visitFile(final Path file, final BasicFileAttributes attrs) throws IOException {
|
102 |
|
|
String filename = file.getFileName().toString();
|
103 |
|
|
log.info("Processing " + file.toString());
|
104 |
|
|
countVisitedFiles++;
|
105 |
|
|
String tdsTitle = filename;
|
106 |
|
|
//call Files.lines which will use a stream to iterate over each line of the file.
|
107 |
|
|
//Next we will convert the stream to a string by calling Collectors.joining() which will join all the strings together.
|
108 |
|
|
String updatedCode = Files.lines(file).collect(Collectors.joining()).replace("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>", "");
|
109 |
|
|
try {
|
110 |
|
|
List<String> res = this.lookupClient.search("//RESOURCE_PROFILE[.//RESOURCE_TYPE/@value=\"TransformationRuleDSResourceType\" and .//CONFIGURATION/SCRIPT/TITLE/string()=\""+tdsTitle+"\"]/HEADER/RESOURCE_IDENTIFIER/@value/string()");
|
111 |
|
|
if(res == null || res.isEmpty()){
|
112 |
|
|
log.debug("Creating new TDS profile for "+filename);
|
113 |
|
|
final String template = IOUtils.toString(getClass().getResourceAsStream(TDS_TEMPLATE), Charset.forName("UTF-8"));
|
114 |
|
|
final StringTemplate st = new StringTemplate(template);
|
115 |
|
|
st.setAttribute("date", DateUtils.calculate_ISO8601(DateUtils.now()));
|
116 |
|
|
st.setAttribute("title", tdsTitle);
|
117 |
|
|
st.setAttribute("mapping", "<![CDATA["+updatedCode+"]]>");
|
118 |
|
|
String profId = this.registryService.registerProfile(st.toString());
|
119 |
|
|
countCreatedTDS++;
|
120 |
50404
|
alessia.ba
|
log.info("REGISTERED NEW TDS FOR "+filename+": "+profId);
|
121 |
50297
|
alessia.ba
|
}
|
122 |
|
|
else{
|
123 |
|
|
String tdsProfileId = res.get(0);
|
124 |
|
|
log.debug("Updating TDS profile "+tdsProfileId+"for "+filename);
|
125 |
|
|
|
126 |
|
|
boolean done = this.registryService.updateProfileNode(tdsProfileId, "//CONFIGURATION/SCRIPT/CODE", "<CODE><![CDATA["+updatedCode+"]]></CODE>");
|
127 |
|
|
if(done){
|
128 |
50404
|
alessia.ba
|
log.info("TDS PROFILE "+tdsProfileId+" UPDATED with contents from "+filename);
|
129 |
50297
|
alessia.ba
|
countUpdatedTDS++;
|
130 |
|
|
}
|
131 |
|
|
if(!done){
|
132 |
|
|
log.error("!!! TDS PROFILE "+tdsProfileId+" COULD NOT BE UPDATED with contents from "+filename);
|
133 |
|
|
}
|
134 |
|
|
}
|
135 |
|
|
} catch (ISLookUpException | ISRegistryException e) {
|
136 |
|
|
log.error("CANNOT UPDATE/CREATE TDS PROFILE FOR "+filename, e);
|
137 |
|
|
}
|
138 |
|
|
|
139 |
|
|
return CONTINUE;
|
140 |
|
|
}
|
141 |
|
|
|
142 |
|
|
protected ClarinFileVisitor() {
|
143 |
|
|
super();
|
144 |
|
|
}
|
145 |
|
|
|
146 |
|
|
public ClarinFileVisitor(final String clarinDatasourceProfileID, final String clarinDatasourceOriginalId,
|
147 |
|
|
final String inputBaseUrlPrefix, final String metadataIdentifierPath, final List<String> interfaces,
|
148 |
|
|
final UniqueServiceLocator locator, final ISLookUpClient lookupClient) {
|
149 |
|
|
super();
|
150 |
|
|
this.clarinDatasourceOriginalId = clarinDatasourceOriginalId;
|
151 |
|
|
this.clarinDatasourceProfileID = clarinDatasourceProfileID;
|
152 |
|
|
this.inputBaseUrlPrefix = inputBaseUrlPrefix;
|
153 |
|
|
this.metadataIdentifierPath = metadataIdentifierPath;
|
154 |
|
|
this.interfaces = interfaces;
|
155 |
|
|
this.dsMan = locator.getService(DatasourceManagerService.class);
|
156 |
|
|
this.registryService = locator.getService(ISRegistryService.class);
|
157 |
|
|
this.lookupClient = lookupClient;
|
158 |
|
|
}
|
159 |
|
|
|
160 |
|
|
public List<String> getInterfaces() {
|
161 |
|
|
return interfaces;
|
162 |
|
|
}
|
163 |
|
|
|
164 |
|
|
public void setInterfaces(final List<String> interfaces) {
|
165 |
|
|
this.interfaces = interfaces;
|
166 |
|
|
}
|
167 |
|
|
|
168 |
|
|
public String getClarinDatasourceOriginalId() {
|
169 |
|
|
return clarinDatasourceOriginalId;
|
170 |
|
|
}
|
171 |
|
|
|
172 |
|
|
public void setClarinDatasourceOriginalId(final String clarinDatasourceOriginalId) {
|
173 |
|
|
this.clarinDatasourceOriginalId = clarinDatasourceOriginalId;
|
174 |
|
|
}
|
175 |
|
|
|
176 |
|
|
public DatasourceManagerService getDsMan() {
|
177 |
|
|
return dsMan;
|
178 |
|
|
}
|
179 |
|
|
|
180 |
|
|
public void setDsMan(final DatasourceManagerService dsMan) {
|
181 |
|
|
this.dsMan = dsMan;
|
182 |
|
|
}
|
183 |
|
|
|
184 |
|
|
public String getInputBaseUrlPrefix() {
|
185 |
|
|
return inputBaseUrlPrefix;
|
186 |
|
|
}
|
187 |
|
|
|
188 |
|
|
public void setInputBaseUrlPrefix(final String inputBaseUrlPrefix) {
|
189 |
|
|
this.inputBaseUrlPrefix = inputBaseUrlPrefix;
|
190 |
|
|
}
|
191 |
|
|
|
192 |
|
|
public String getMetadataIdentifierPath() {
|
193 |
|
|
return metadataIdentifierPath;
|
194 |
|
|
}
|
195 |
|
|
|
196 |
|
|
public void setMetadataIdentifierPath(final String metadataIdentifierPath) {
|
197 |
|
|
this.metadataIdentifierPath = metadataIdentifierPath;
|
198 |
|
|
}
|
199 |
|
|
|
200 |
|
|
public String getClarinDatasourceProfileID() {
|
201 |
|
|
return clarinDatasourceProfileID;
|
202 |
|
|
}
|
203 |
|
|
|
204 |
|
|
public void setClarinDatasourceProfileID(final String clarinDatasourceProfileID) {
|
205 |
|
|
this.clarinDatasourceProfileID = clarinDatasourceProfileID;
|
206 |
|
|
}
|
207 |
|
|
|
208 |
|
|
public ISLookUpClient getLookupClient() {
|
209 |
|
|
return lookupClient;
|
210 |
|
|
}
|
211 |
|
|
|
212 |
|
|
public void setLookupClient(final ISLookUpClient lookupClient) {
|
213 |
|
|
this.lookupClient = lookupClient;
|
214 |
|
|
}
|
215 |
|
|
|
216 |
|
|
public ISRegistryService getRegistryService() {
|
217 |
|
|
return registryService;
|
218 |
|
|
}
|
219 |
|
|
|
220 |
|
|
public void setRegistryService(final ISRegistryService registryService) {
|
221 |
|
|
this.registryService = registryService;
|
222 |
|
|
}
|
223 |
|
|
|
224 |
|
|
public int getCountVisitedFiles() {
|
225 |
|
|
return countVisitedFiles;
|
226 |
|
|
}
|
227 |
|
|
|
228 |
|
|
public void setCountVisitedFiles(final int countVisitedFiles) {
|
229 |
|
|
this.countVisitedFiles = countVisitedFiles;
|
230 |
|
|
}
|
231 |
|
|
|
232 |
|
|
public int getCountCreatedTDS() {
|
233 |
|
|
return countCreatedTDS;
|
234 |
|
|
}
|
235 |
|
|
|
236 |
|
|
public void setCountCreatedTDS(final int countCreatedTDS) {
|
237 |
|
|
this.countCreatedTDS = countCreatedTDS;
|
238 |
|
|
}
|
239 |
|
|
|
240 |
|
|
public int getCountUpdatedTDS() {
|
241 |
|
|
return countUpdatedTDS;
|
242 |
|
|
}
|
243 |
|
|
|
244 |
|
|
public void setCountUpdatedTDS(final int countUpdatedTDS) {
|
245 |
|
|
this.countUpdatedTDS = countUpdatedTDS;
|
246 |
|
|
}
|
247 |
|
|
|
248 |
|
|
public int getCountVisitedFolders() {
|
249 |
|
|
return countVisitedFolders;
|
250 |
|
|
}
|
251 |
|
|
|
252 |
|
|
public void setCountVisitedFolders(final int countVisitedFolders) {
|
253 |
|
|
this.countVisitedFolders = countVisitedFolders;
|
254 |
|
|
}
|
255 |
|
|
|
256 |
|
|
public int getCountCreatedInterfaces() {
|
257 |
|
|
return countCreatedInterfaces;
|
258 |
|
|
}
|
259 |
|
|
|
260 |
|
|
public void setCountCreatedInterfaces(final int countCreatedInterfaces) {
|
261 |
|
|
this.countCreatedInterfaces = countCreatedInterfaces;
|
262 |
|
|
}
|
263 |
|
|
}
|