1
|
package eu.dnetlib.data.mdstore.plugins;
|
2
|
|
3
|
import java.io.ByteArrayInputStream;
|
4
|
import java.io.IOException;
|
5
|
import java.io.InputStream;
|
6
|
import java.util.Arrays;
|
7
|
import java.util.List;
|
8
|
import java.util.Map;
|
9
|
import java.util.Set;
|
10
|
import java.util.stream.Collectors;
|
11
|
|
12
|
import org.apache.commons.lang3.StringUtils;
|
13
|
import org.apache.commons.logging.Log;
|
14
|
import org.apache.commons.logging.LogFactory;
|
15
|
import org.apache.commons.net.ftp.FTPClient;
|
16
|
import org.apache.commons.net.ftp.FTPFile;
|
17
|
import org.apache.commons.net.ftp.FTPSClient;
|
18
|
|
19
|
import com.google.common.net.UrlEscapers;
|
20
|
|
21
|
import eu.dnetlib.data.mdstore.plugins.objects.MdRecord;
|
22
|
import eu.dnetlib.data.mdstore.plugins.objects.MyURL;
|
23
|
|
24
|
public class EnrichLocalLinksPlugin extends MdRecordPlugin {
|
25
|
|
26
|
private static final String DEFAULT_RIGHTS = "Open Access";
|
27
|
|
28
|
private static final String INFO_FILENAME = "info.txt";
|
29
|
|
30
|
private static final Log log = LogFactory.getLog(EnrichLocalLinksPlugin.class);
|
31
|
|
32
|
private FTPClient ftpClient;
|
33
|
|
34
|
private String hostedBy;
|
35
|
private String baseUrl;
|
36
|
|
37
|
private String ftpServer;
|
38
|
private String ftpUser;
|
39
|
private String ftpPassword;
|
40
|
private String ftpBaseDir;
|
41
|
|
42
|
@Override
|
43
|
protected void reconfigure(final Map<String, String> params) {
|
44
|
setHostedBy(params.get("hostedBy"));
|
45
|
setBaseUrl(params.get("baseUrl"));
|
46
|
setFtpServer(params.get("ftpServer"));
|
47
|
setFtpUser(params.get("ftpUser"));
|
48
|
setFtpPassword(params.get("ftpPassword"));
|
49
|
setFtpBaseDir(params.get("ftpBaseDir"));
|
50
|
|
51
|
if (!getFtpBaseDir().startsWith("/")) {
|
52
|
setFtpBaseDir("/" + getFtpBaseDir());
|
53
|
}
|
54
|
|
55
|
try {
|
56
|
ftpClient = new FTPSClient();
|
57
|
ftpClient.connect(getFtpServer());
|
58
|
// Set protection buffer size
|
59
|
((FTPSClient) ftpClient).execPBSZ(0);
|
60
|
// Set data channel protection to private
|
61
|
((FTPSClient) ftpClient).execPROT("P");
|
62
|
|
63
|
if (!ftpClient.login(getFtpUser(), getFtpPassword())) {
|
64
|
// I RETRY USING not secure FTP
|
65
|
ftpClient = new FTPClient();
|
66
|
ftpClient.connect(getFtpServer());
|
67
|
if (!ftpClient.login(getFtpUser(), getFtpPassword())) { throw new RuntimeException("FTP login failed"); }
|
68
|
}
|
69
|
|
70
|
ftpClient.setFileType(FTPClient.BINARY_FILE_TYPE);
|
71
|
ftpClient.enterLocalPassiveMode();
|
72
|
ftpClient.setBufferSize(1024);
|
73
|
|
74
|
log.info("Connected to " + ftpServer);
|
75
|
} catch (final IOException e) {
|
76
|
log.error("Connection Failed");
|
77
|
throw new RuntimeException(e);
|
78
|
}
|
79
|
}
|
80
|
|
81
|
@Override
|
82
|
protected void resetConfiguration() {
|
83
|
if (ftpClient.isConnected()) {
|
84
|
try {
|
85
|
ftpClient.disconnect();
|
86
|
log.info("Disconnected from " + ftpServer);
|
87
|
} catch (final IOException e) {
|
88
|
log.error("Disconnection Failed");
|
89
|
throw new RuntimeException(e);
|
90
|
}
|
91
|
}
|
92
|
setHostedBy(null);
|
93
|
setBaseUrl(null);
|
94
|
setFtpServer(null);
|
95
|
setFtpUser(null);
|
96
|
setFtpPassword(null);
|
97
|
setFtpBaseDir(null);
|
98
|
setFtpClient(null);
|
99
|
}
|
100
|
|
101
|
@Override
|
102
|
protected boolean updateRecord(final String recordId, final MdRecord doc) {
|
103
|
|
104
|
final int year = doc.getDate();
|
105
|
final String code = StringUtils.substringAfterLast(doc.getId(), ":");
|
106
|
|
107
|
log.info(String.format("Processing record: %s (%s/%s)", doc.getId(), year, code));
|
108
|
|
109
|
final List<String> files = touchAndListDir(doc.getTitle(), doc.getCreators(), doc.getType(), year, code);
|
110
|
|
111
|
if (files.isEmpty()) {
|
112
|
return false;
|
113
|
} else {
|
114
|
log.info(" - adding new urls: " + files.size());
|
115
|
doc.setBestRights(DEFAULT_RIGHTS);
|
116
|
for (final String f : files) {
|
117
|
doc.getUrls().add(new MyURL(calculateUrl(recordId, code, year, f), getHostedBy(), DEFAULT_RIGHTS));
|
118
|
}
|
119
|
}
|
120
|
return true;
|
121
|
}
|
122
|
|
123
|
private List<String> touchAndListDir(final String title, final Set<String> authors, final String type, final int year, final String code) {
|
124
|
final String content = String.format(
|
125
|
"TITLE : %s\nAUTHOR(S) : %s\nTYPE : %s\nYEAR : %s\nCODE : %s\n\n*** DO NOT EDIT THIS FILE ***\n\n",
|
126
|
title,
|
127
|
StringUtils.join(authors, ", "),
|
128
|
type,
|
129
|
year,
|
130
|
code);
|
131
|
|
132
|
if (ftpChangeDir(getFtpBaseDir()) && ftpChangeDir(Integer.toString(year)) && ftpChangeDir(code)) {
|
133
|
|
134
|
try (InputStream is = new ByteArrayInputStream(content.getBytes())) {
|
135
|
if (log.isDebugEnabled()) {
|
136
|
log.debug(String.format(" - Saving file %s/%s/%s/%s", getFtpBaseDir(), year, code, INFO_FILENAME));
|
137
|
log.debug(content);
|
138
|
}
|
139
|
if (!ftpClient.storeFile(INFO_FILENAME, is)) {
|
140
|
log.error("Error saving file: " + ftpClient.getReplyCode() + " - " + ftpClient.getReplyString());
|
141
|
throw new RuntimeException("Error saving file: " + ftpClient.getReplyString());
|
142
|
}
|
143
|
} catch (final IOException e) {
|
144
|
log.error("Error saving info file");
|
145
|
throw new RuntimeException("Error saving info file", e);
|
146
|
}
|
147
|
|
148
|
try {
|
149
|
return Arrays.stream(ftpClient.listFiles())
|
150
|
.map(FTPFile::getName)
|
151
|
.filter(s -> s.toLowerCase().endsWith(".pdf"))
|
152
|
.sorted()
|
153
|
.collect(Collectors.toList());
|
154
|
} catch (final IOException e) {
|
155
|
log.error("Error listing files");
|
156
|
throw new RuntimeException("Error listing files", e);
|
157
|
}
|
158
|
} else {
|
159
|
log.error(String.format("Directory not found: %s/%s/%s", getFtpBaseDir(), year, code));
|
160
|
throw new RuntimeException(String.format("Directory not found: %s/%s/%s", getFtpBaseDir(), year, code));
|
161
|
}
|
162
|
|
163
|
}
|
164
|
|
165
|
private boolean ftpChangeDir(final String dir) {
|
166
|
try {
|
167
|
if (!ftpClient.changeWorkingDirectory(dir)) {
|
168
|
ftpClient.makeDirectory(dir);
|
169
|
return ftpClient.changeWorkingDirectory(dir);
|
170
|
}
|
171
|
return true;
|
172
|
} catch (final IOException e) {
|
173
|
log.error("Error changing or create dir: " + dir);
|
174
|
throw new RuntimeException("Error changing or create dir: " + dir, e);
|
175
|
}
|
176
|
}
|
177
|
|
178
|
private String calculateUrl(final String id, final String code, final int year, final String f) {
|
179
|
// the parameter ID is necessary for a better integration with OpenAIRE
|
180
|
return String.format("%s/%s/%s/%s?id=%s", getBaseUrl(), year, code, UrlEscapers.urlPathSegmentEscaper().escape(f),
|
181
|
UrlEscapers.urlFormParameterEscaper().escape(id));
|
182
|
}
|
183
|
|
184
|
public String getHostedBy() {
|
185
|
return hostedBy;
|
186
|
}
|
187
|
|
188
|
public void setHostedBy(final String hostedBy) {
|
189
|
this.hostedBy = hostedBy;
|
190
|
}
|
191
|
|
192
|
public String getBaseUrl() {
|
193
|
return baseUrl;
|
194
|
}
|
195
|
|
196
|
public void setBaseUrl(final String baseUrl) {
|
197
|
this.baseUrl = baseUrl;
|
198
|
}
|
199
|
|
200
|
public String getFtpBaseDir() {
|
201
|
return ftpBaseDir;
|
202
|
}
|
203
|
|
204
|
public void setFtpBaseDir(final String ftpBaseDir) {
|
205
|
this.ftpBaseDir = ftpBaseDir;
|
206
|
}
|
207
|
|
208
|
public FTPClient getFtpClient() {
|
209
|
return ftpClient;
|
210
|
}
|
211
|
|
212
|
public void setFtpClient(final FTPClient ftpClient) {
|
213
|
this.ftpClient = ftpClient;
|
214
|
}
|
215
|
|
216
|
public String getFtpServer() {
|
217
|
return ftpServer;
|
218
|
}
|
219
|
|
220
|
public void setFtpServer(final String ftpServer) {
|
221
|
this.ftpServer = ftpServer;
|
222
|
}
|
223
|
|
224
|
public String getFtpUser() {
|
225
|
return ftpUser;
|
226
|
}
|
227
|
|
228
|
public void setFtpUser(final String ftpUser) {
|
229
|
this.ftpUser = ftpUser;
|
230
|
}
|
231
|
|
232
|
public String getFtpPassword() {
|
233
|
return ftpPassword;
|
234
|
}
|
235
|
|
236
|
public void setFtpPassword(final String ftpPassword) {
|
237
|
this.ftpPassword = ftpPassword;
|
238
|
}
|
239
|
|
240
|
}
|