Project

General

Profile

1
package eu.dnetlib.msro.workflows.nodes;
2

    
3
import java.io.PrintWriter;
4
import java.io.StringReader;
5
import java.io.StringWriter;
6
import java.nio.charset.StandardCharsets;
7
import java.nio.file.Files;
8
import java.nio.file.Paths;
9
import java.text.SimpleDateFormat;
10
import java.util.ArrayList;
11
import java.util.Date;
12
import java.util.List;
13
import java.util.stream.StreamSupport;
14

    
15
import org.apache.commons.lang3.StringUtils;
16
import org.apache.commons.logging.Log;
17
import org.apache.commons.logging.LogFactory;
18
import org.dom4j.Document;
19
import org.dom4j.Node;
20
import org.dom4j.io.SAXReader;
21
import org.springframework.beans.factory.annotation.Autowired;
22

    
23
import eu.dnetlib.data.transformation.service.DataTransformerFactory;
24
import eu.dnetlib.data.transformation.service.SimpleDataTransformer;
25
import eu.dnetlib.enabling.resultset.client.ResultSetClient;
26
import eu.dnetlib.msro.workflows.graph.Arc;
27
import eu.dnetlib.msro.workflows.procs.Env;
28
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
29
import eu.dnetlib.rmi.common.ResultSet;
30

    
31
public class GeneratePersonReportJobNode extends SimpleJobNode {
32

    
33
	private String inputEprParam;
34

    
35
	private String ruleId;
36

    
37
	@Autowired
38
	private DataTransformerFactory dataTransformerFactory;
39

    
40
	@Autowired
41
	private ResultSetClient resultSetClient;
42

    
43
	private static final Log log = LogFactory.getLog(GeneratePersonReportJobNode.class);
44

    
45
	protected class SinglePersonReport {
46

    
47
		private final String id;
48
		private final String title;
49
		private final List<String> oldCreators = new ArrayList<>();
50
		private final List<String> newCreators = new ArrayList<>();
51

    
52
		public SinglePersonReport(final String id, final String title) {
53
			this.id = id;
54
			this.title = title;
55
		}
56

    
57
		public String getId() {
58
			return id;
59
		}
60

    
61
		public String getTitle() {
62
			return title;
63
		}
64

    
65
		public List<String> getOldCreators() {
66
			return oldCreators;
67
		}
68

    
69
		public List<String> getNewCreators() {
70
			return newCreators;
71
		}
72

    
73
		public SinglePersonReport removeDuplicates() {
74

    
75
			final List<String> list = new ArrayList<>();
76
			list.addAll(oldCreators);
77
			list.addAll(newCreators);
78

    
79
			for (final String s : list) {
80
				if (oldCreators.contains(s) && newCreators.contains(s)) {
81
					newCreators.remove(s);
82
					oldCreators.remove(s);
83
				}
84
			}
85

    
86
			return this;
87
		}
88

    
89
		public boolean hasCorrections() {
90
			return (oldCreators.size() > 0) || (newCreators.size() > 0);
91
		}
92

    
93
		@Override
94
		public String toString() {
95
			final StringWriter sw = new StringWriter();
96
			sw.write("** ");
97
			sw.write(id);
98
			sw.write(" - ");
99
			sw.write(title);
100
			sw.write(" **\n");
101

    
102
			if (oldCreators.size() > 0) {
103
				sw.write("ORIGINAL:\n");
104
				for (int i = 0; i < oldCreators.size(); i++) {
105
					final String s = oldCreators.get(i);
106
					sw.write(String.format("%5d) %s\n", i + 1, StringUtils.isNotBlank(s) ? s : "[empty]"));
107
				}
108
			}
109
			if (newCreators.size() > 0) {
110
				sw.write("CORRECTED:\n");
111
				for (int i = 0; i < newCreators.size(); i++) {
112
					final String s = newCreators.get(i);
113
					sw.write(String.format("%5d) %s\n", i + 1, StringUtils.isNotBlank(s) ? s : "[empty]"));
114
				}
115
			}
116
			return sw.toString();
117
		}
118

    
119
	}
120

    
121
	@Override
122
	protected String execute(final Env env) throws Exception {
123

    
124
		@SuppressWarnings("unchecked")
125
		final ResultSet<String> rsIn = env.getAttribute(inputEprParam, ResultSet.class);
126

    
127
		final SimpleDataTransformer f = dataTransformerFactory.createTransformer(ruleId);
128
		final SAXReader reader = new SAXReader();
129

    
130
		final String fileName = "/tmp/report_" + (new SimpleDateFormat("yyyyMMdd_HHmmss_S")).format(new Date()) + ".txt";
131

    
132
		try (PrintWriter pw = new PrintWriter(Files.newBufferedWriter(Paths.get(fileName), StandardCharsets.UTF_8))) {
133
			StreamSupport.stream(resultSetClient.iter(rsIn, String.class).spliterator(), false).map(
134
					xml -> {
135
						try {
136
							final Document docOld = reader.read(new StringReader(xml));
137
							final Document docNew = reader.read(new StringReader(f.apply(xml)));
138

    
139
							final String id = docNew.valueOf("//*[local-name() = 'recordIdentifier']");
140
							final String title = docNew.valueOf("//*[local-name() = 'title']");
141

    
142
							final SinglePersonReport report = new SinglePersonReport(id, title);
143
							for (final Object o : docOld.selectNodes("//*[local-name() = 'creator']")) {
144
								final String p1 = ((Node) o).valueOf("./*[local-name() = 'surname']").trim();
145
								final String p2 = ((Node) o).valueOf("./*[local-name() = 'name']").trim();
146
								final String author = StringUtils.isEmpty(p1) && StringUtils.isEmpty(p2) ? ((Node) o).getText().trim() : (p1 + " " + p2).trim();
147
								report.getOldCreators().add(author);
148
							}
149
							for (final Object o : docNew.selectNodes("//*[local-name() = 'creator']")) {
150
								final String author = ((Node) o).valueOf("./*[local-name() = 'creatorName']").trim();
151
								report.getNewCreators().add(author);
152
							}
153
							return report;
154
						} catch (final Exception e) {
155
							throw new RuntimeException(e);
156
						}
157
					})
158
					.map(SinglePersonReport::removeDuplicates)
159
					.filter(SinglePersonReport::hasCorrections)
160
					.forEach(pw::println);
161
		}
162

    
163
		env.setAttribute(WorkflowsConstants.MAIN_LOG_PREFIX + "report", fileName);
164

    
165
		log.info("Saved a person report in " + fileName);
166

    
167
		return Arc.DEFAULT_ARC;
168
	}
169

    
170
	public String getInputEprParam() {
171
		return inputEprParam;
172
	}
173

    
174
	public void setInputEprParam(final String inputEprParam) {
175
		this.inputEprParam = inputEprParam;
176
	}
177

    
178
	public String getRuleId() {
179
		return ruleId;
180
	}
181

    
182
	public void setRuleId(final String ruleId) {
183
		this.ruleId = ruleId;
184
	}
185

    
186
}
(2-2/4)