Revision 57188
Added by Claudio Atzori over 4 years ago
modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/mapreduce/hbase/dataexport/RecordFilterTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.hbase.dataexport; |
|
2 |
|
|
3 |
import com.google.gson.Gson; |
|
4 |
import eu.dnetlib.data.mapreduce.hbase.bulktag.ProtoMap; |
|
5 |
import org.dom4j.Document; |
|
6 |
import org.dom4j.DocumentException; |
|
7 |
import org.dom4j.io.SAXReader; |
|
8 |
import org.junit.Assert; |
|
9 |
import org.junit.Test; |
|
10 |
|
|
11 |
import java.text.ParseException; |
|
12 |
import java.util.Map; |
|
13 |
|
|
14 |
public class RecordFilterTest { |
|
15 |
|
|
16 |
private static final String REGEX = ".*\\\\s(interdisciplinar.*|transdisciplinary.*)\\\\s.*"; |
|
17 |
|
|
18 |
private static final String DEFAULT_CRITERIA = "{ " + |
|
19 |
"\"/*[local-name() ='record']/*[local-name() ='result']/*[local-name() ='metadata']/*[local-name() ='entity']/*[local-name() ='result']/*[local-name() ='datainfo']/*[local-name() ='deletedbyinference']/text()\" : \"false\", " + |
|
20 |
"\"local-name(//*[local-name()='entity']/*)\" : \"result\" " + |
|
21 |
"}"; |
|
22 |
|
|
23 |
private static final String CRITERIA = "{ " + |
|
24 |
"\"/*[local-name() ='record']/*[local-name() ='result']/*[local-name() ='metadata']/*[local-name() ='entity']/*[local-name() ='result']/*[local-name() ='title']/text()\" : \""+REGEX+"\", " + |
|
25 |
"\"/*[local-name() ='record']/*[local-name() ='result']/*[local-name() ='metadata']/*[local-name() ='entity']/*[local-name() ='result']/*[local-name() ='description']/text()\" : \""+REGEX+"\", " + |
|
26 |
"\"/*[local-name() ='record']/*[local-name() ='result']/*[local-name() ='metadata']/*[local-name() ='entity']/*[local-name() ='result']/*[local-name() ='subject']/text()\" : \""+REGEX+"\" " + |
|
27 |
"}"; |
|
28 |
|
|
29 |
private static final String YEAR_XPATH = "/*[local-name() ='record']/*[local-name() ='result']/*[local-name() ='metadata']/*[local-name() ='entity']/*[local-name() ='result']/*[local-name() ='dateofacceptance']"; |
|
30 |
|
|
31 |
@Test |
|
32 |
public void recordNonMatchFilterTest() throws DocumentException, ParseException { |
|
33 |
|
|
34 |
final Map<String, String> defaultCriteria = new Gson().fromJson(DEFAULT_CRITERIA, ProtoMap.class); |
|
35 |
|
|
36 |
final RecordFilter defaultFilter = new RecordFilter(defaultCriteria, YEAR_XPATH, 1990, 2019); |
|
37 |
|
|
38 |
final Document doc = new SAXReader().read(getClass().getResourceAsStream("non_match_record.xml")); |
|
39 |
Assert.assertTrue(defaultFilter.matches(doc, true)); |
|
40 |
|
|
41 |
final RecordFilter filter = new RecordFilter(new Gson().fromJson(CRITERIA, ProtoMap.class), YEAR_XPATH, 1990, 2019); |
|
42 |
Assert.assertFalse(filter.matches(doc, false)); |
|
43 |
} |
|
44 |
|
|
45 |
@Test |
|
46 |
public void recordMatchFilterTest() throws DocumentException, ParseException { |
|
47 |
|
|
48 |
final Map<String, String> defaultCriteria = new Gson().fromJson(DEFAULT_CRITERIA, ProtoMap.class); |
|
49 |
|
|
50 |
final RecordFilter defaultFilter = new RecordFilter(defaultCriteria, YEAR_XPATH, 1990, 2019); |
|
51 |
|
|
52 |
final Document doc = new SAXReader().read(getClass().getResourceAsStream("match_record.xml")); |
|
53 |
Assert.assertTrue(defaultFilter.matches(doc, true)); |
|
54 |
|
|
55 |
final RecordFilter filter = new RecordFilter(new Gson().fromJson(CRITERIA, ProtoMap.class), YEAR_XPATH, 1990, 2019); |
|
56 |
Assert.assertTrue(filter.matches(doc, false)); |
|
57 |
} |
|
58 |
|
|
59 |
} |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/mapreduce/hbase/dataexport/non_match_record.xml | ||
---|---|---|
1 |
<record rank="null"> |
|
2 |
<result xmlns:dri="http://www.driver-repository.eu/namespace/dri"> |
|
3 |
<header xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> |
|
4 |
<dri:objIdentifier>dedup_wf_001::3ed91398db93e83c7f2a2f09c1e229ce</dri:objIdentifier> |
|
5 |
<dri:dateOfCollection>2019-03-21T11:39:15.746Z</dri:dateOfCollection> |
|
6 |
<dri:dateOfTransformation>2019-03-21T11:46:25.499Z</dri:dateOfTransformation> |
|
7 |
<counters> |
|
8 |
<counter_similarity_inferred value="6"/> |
|
9 |
<counter_similarity value="6"/> |
|
10 |
<counter_dedup value="2"/> |
|
11 |
<counter_doi value="1"/> |
|
12 |
</counters> |
|
13 |
</header> |
|
14 |
<metadata> |
|
15 |
<oaf:entity xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:oaf="http://namespace.openaire.eu/oaf" xsi:schemaLocation="http://namespace.openaire.eu/oaf https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"> |
|
16 |
<oaf:result> |
|
17 |
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">T</subject> |
|
18 |
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">G</subject> |
|
19 |
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">GE1-350</subject> |
|
20 |
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">Geography. Anthropology. Recreation</subject> |
|
21 |
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">Environmental technology. Sanitary engineering</subject> |
|
22 |
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">Environmental sciences</subject> |
|
23 |
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">Technology</subject> |
|
24 |
<subject classid="keyword" classname="keyword" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies">TD1-1066</subject> |
|
25 |
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"> |
|
26 |
Impacts of climate change under CMIP5 RCP scenarios on the streamflow in the Dinder River and ecosystem habitats in Dinder National Park, Sudan |
|
27 |
</title> |
|
28 |
<publisher>Copernicus Publications</publisher> |
|
29 |
<journal issn="1607-7938" eissn="1607-7938" lissn="" ep="" iss="" sp="" vol=""/> |
|
30 |
<dateofacceptance>2018-09-27</dateofacceptance> |
|
31 |
<language classid="eng" classname="English" schemeid="dnet:languages" schemename="dnet:languages"/> |
|
32 |
<resulttype classid="publication" classname="publication" schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/> |
|
33 |
<creator rank="1" name="Amir K." surname="Basheer">Basheer, Amir K.</creator> |
|
34 |
<creator rank="2" name="Haishen" surname="Lu">Lu, Haishen</creator> |
|
35 |
<creator rank="3" name="Abubaker" surname="Omer">Omer, Abubaker</creator> |
|
36 |
<creator rank="4" name="Abubaker B." surname="Ali">Ali, Abubaker B.</creator> |
|
37 |
<creator rank="5" name="Abdeldime M. S." surname="Abdelgader">Abdelgader, Abdeldime M. S.</creator> |
|
38 |
<fulltext> |
|
39 |
file:///mnt/uploaded_dumps/copernicus/upload/hess-20-1331-2016.pdf |
|
40 |
</fulltext> |
|
41 |
<description> |
|
42 |
The fate of seasonal river ecosystem habitats under climate change essentially depends on the changes in annual recharge of the river, which are related to alterations in precipitation and evaporation over the river basin. Therefore, the change in climate conditions is expected to significantly affect hydrological and ecological components, particularly in fragmented ecosystems. This study aims to assess the impacts of climate change on the streamflow in the Dinder River basin (DRB) and to infer its relative possible effects on the Dinder National Park (DNP) ecosystem habitats in Sudan. Four global circulation models (GCMs) from Coupled Model Intercomparison Project Phase 5 and two statistical downscaling approaches combined with a hydrological model (SWAT ndash; the Soil and Water Assessment Tool) were used to project the climate change conditions over the study periods 2020s, 2050s, and 2080s. The results indicated that the climate over the DRB will become warmer and wetter under most scenarios. The projected precipitation variability mainly depends on the selected GCM and downscaling approach. Moreover, the projected streamflow is quite sensitive to rainfall and temperature variation, and will likely increase in this century. In contrast to drought periods during the 1960s, 1970s, and 1980s, the predicted climate change is likely to affect ecosystems in DNP positively and promote the ecological restoration for the habitats of flora and fauna. |
|
43 |
</description> |
|
44 |
<format>application/pdf</format> |
|
45 |
<source> |
|
46 |
Hydrology and Earth System Sciences, Vol 20, Iss 4, Pp 1331-1353 (2016) |
|
47 |
</source> |
|
48 |
<source>eISSN: 1607-7938</source> |
|
49 |
<country classid="" classname="" schemeid="" schemename=""/> |
|
50 |
<relevantdate classid="" classname="" schemeid="" schemename=""/> |
|
51 |
<embargoenddate/> |
|
52 |
<contributor/> |
|
53 |
<resourcetype classid="" classname="" schemeid="" schemename=""/> |
|
54 |
<coverage/> |
|
55 |
<refereed/> |
|
56 |
<storagedate/> |
|
57 |
<device/> |
|
58 |
<size/> |
|
59 |
<version/> |
|
60 |
<lastmetadataupdate/> |
|
61 |
<metadataversionnumber/> |
|
62 |
<documentationUrl/> |
|
63 |
<codeRepositoryUrl/> |
|
64 |
<programmingLanguage classid="" classname="" schemeid="" schemename=""/> |
|
65 |
<contactperson/> |
|
66 |
<contactgroup/> |
|
67 |
<tool/> |
|
68 |
<originalId> |
|
69 |
oai:doaj.org/article:8956bc6de28f4705af47f57ae01a0d6e |
|
70 |
</originalId> |
|
71 |
<originalId>oai:publications.copernicus.org:hess32404</originalId> |
|
72 |
<collectedfrom name="DOAJ-Articles" id="driver______::bee53aa31dc2cbb538c10c2b65fa5824"/> |
|
73 |
<collectedfrom name="Copernicus Publications" id="openaire____::5a38cb462ac487bf26bdb86009fe3e74"/> |
|
74 |
<pid classid="doi" classname="doi" schemeid="dnet:pid_types" schemename="dnet:pid_types">10.5194/hess-20-1331-2016</pid> |
|
75 |
<bestaccessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes"/> |
|
76 |
<datainfo> |
|
77 |
<inferred>true</inferred> |
|
78 |
<deletedbyinference>false</deletedbyinference> |
|
79 |
<trust>0.9</trust> |
|
80 |
<inferenceprovenance>dedup-similarity-result-levenstein</inferenceprovenance> |
|
81 |
<provenanceaction classid="sysimport:dedup" classname="sysimport:dedup" schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/> |
|
82 |
</datainfo> |
|
83 |
<rels> |
|
84 |
<rel inferred="true" trust="0.9" inferenceprovenance="iis::document_similarities_standard" provenanceaction="iis"> |
|
85 |
<to class="hasAmongTopNSimilarDocuments" scheme="dnet:result_result_relations" type="result">copernicuspu::86069783ef5f47bf6b4b23a26f9b632c</to> |
|
86 |
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"> |
|
87 |
Investigation of the long-term variations in hydro-climatology of the Dinder and Rahad basins and its implications on ecosystems of the Dinder National Park, Sudan |
|
88 |
</title> |
|
89 |
<resulttype classid="publication" classname="publication" schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/> |
|
90 |
<pid classid="doi" classname="doi" schemeid="dnet:pid_types" schemename="dnet:pid_types">10.5194/hess-2016-407</pid> |
|
91 |
<similarity>0.86955255</similarity> |
|
92 |
<type>STANDARD</type> |
|
93 |
<dateofacceptance>2018-09-27</dateofacceptance> |
|
94 |
</rel> |
|
95 |
<rel inferred="true" trust="0.9" inferenceprovenance="iis::document_similarities_standard" provenanceaction="iis"> |
|
96 |
<to class="isAmongTopNSimilarDocuments" scheme="dnet:result_result_relations" type="result">copernicuspu::86069783ef5f47bf6b4b23a26f9b632c</to> |
|
97 |
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"> |
|
98 |
Investigation of the long-term variations in hydro-climatology of the Dinder and Rahad basins and its implications on ecosystems of the Dinder National Park, Sudan |
|
99 |
</title> |
|
100 |
<resulttype classid="publication" classname="publication" schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/> |
|
101 |
<pid classid="doi" classname="doi" schemeid="dnet:pid_types" schemename="dnet:pid_types">10.5194/hess-2016-407</pid> |
|
102 |
<similarity>0.86955255</similarity> |
|
103 |
<type>STANDARD</type> |
|
104 |
<dateofacceptance>2018-09-27</dateofacceptance> |
|
105 |
</rel> |
|
106 |
<rel inferred="true" trust="0.9" inferenceprovenance="iis::document_similarities_standard" provenanceaction="iis"> |
|
107 |
<to class="hasAmongTopNSimilarDocuments" scheme="dnet:result_result_relations" type="result">dedup_wf_001::959d58f74f5139ae3e8dd4b02c7314c0</to> |
|
108 |
<similarity>0.7040297</similarity> |
|
109 |
<resulttype classid="publication" classname="publication" schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/> |
|
110 |
<publisher>Copernicus Publications</publisher> |
|
111 |
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"> |
|
112 |
Analysis of streamflow response to land use and land cover changes using satellite data and hydrological modelling: case study of Dinder and Rahad tributaries of the Blue Nile (Ethiopia–Sudan) |
|
113 |
</title> |
|
114 |
<type>STANDARD</type> |
|
115 |
<dateofacceptance>2018-09-27</dateofacceptance> |
|
116 |
<pid classid="doi" classname="doi" schemeid="dnet:pid_types" schemename="dnet:pid_types">10.5194/hess-21-5217-2017</pid> |
|
117 |
<dateofacceptance>2017-10-01</dateofacceptance> |
|
118 |
</rel> |
|
119 |
<rel inferred="true" trust="0.9" inferenceprovenance="iis::document_similarities_standard" provenanceaction="iis"> |
|
120 |
<to class="isAmongTopNSimilarDocuments" scheme="dnet:result_result_relations" type="result">dedup_wf_001::959d58f74f5139ae3e8dd4b02c7314c0</to> |
|
121 |
<similarity>0.7040297</similarity> |
|
122 |
<resulttype classid="publication" classname="publication" schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/> |
|
123 |
<publisher>Copernicus Publications</publisher> |
|
124 |
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"> |
|
125 |
Analysis of streamflow response to land use and land cover changes using satellite data and hydrological modelling: case study of Dinder and Rahad tributaries of the Blue Nile (Ethiopia–Sudan) |
|
126 |
</title> |
|
127 |
<type>STANDARD</type> |
|
128 |
<dateofacceptance>2018-09-27</dateofacceptance> |
|
129 |
<pid classid="doi" classname="doi" schemeid="dnet:pid_types" schemename="dnet:pid_types">10.5194/hess-21-5217-2017</pid> |
|
130 |
<dateofacceptance>2017-10-01</dateofacceptance> |
|
131 |
</rel> |
|
132 |
<rel inferred="true" trust="0.9" inferenceprovenance="iis::document_similarities_standard" provenanceaction="iis"> |
|
133 |
<to class="hasAmongTopNSimilarDocuments" scheme="dnet:result_result_relations" type="result">dedup_wf_001::df4969fc6413f99334a2451f5f4a3221</to> |
|
134 |
<publisher> |
|
135 |
Freie Universität Berlin Universitätsbibliothek, Garystr. 39, 14195 Berlin |
|
136 |
</publisher> |
|
137 |
<dateofacceptance>2010-02-19</dateofacceptance> |
|
138 |
<similarity>0.7040297</similarity> |
|
139 |
<resulttype classid="publication" classname="publication" schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/> |
|
140 |
<type>STANDARD</type> |
|
141 |
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"> |
|
142 |
integration of socio-economic development and conservation as a challange for the protected area management ; the cases of Wadi Howar National Park and Dinder National Park |
|
143 |
</title> |
|
144 |
</rel> |
|
145 |
<rel inferred="true" trust="0.9" inferenceprovenance="iis::document_similarities_standard" provenanceaction="iis"> |
|
146 |
<to class="isAmongTopNSimilarDocuments" scheme="dnet:result_result_relations" type="result">dedup_wf_001::df4969fc6413f99334a2451f5f4a3221</to> |
|
147 |
<publisher> |
|
148 |
Freie Universität Berlin Universitätsbibliothek, Garystr. 39, 14195 Berlin |
|
149 |
</publisher> |
|
150 |
<dateofacceptance>2010-02-19</dateofacceptance> |
|
151 |
<similarity>0.7040297</similarity> |
|
152 |
<resulttype classid="publication" classname="publication" schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/> |
|
153 |
<type>STANDARD</type> |
|
154 |
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"> |
|
155 |
integration of socio-economic development and conservation as a challange for the protected area management ; the cases of Wadi Howar National Park and Dinder National Park |
|
156 |
</title> |
|
157 |
</rel> |
|
158 |
</rels> |
|
159 |
<children> |
|
160 |
<result objidentifier="copernicuspu::3ed91398db93e83c7f2a2f09c1e229ce"> |
|
161 |
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"> |
|
162 |
Impacts of climate change under CMIP5 RCP scenarios on the streamflow in the Dinder River and ecosystem habitats in Dinder National Park, Sudan |
|
163 |
</title> |
|
164 |
<dateofacceptance>2018-09-27</dateofacceptance> |
|
165 |
<resulttype classid="publication" classname="publication" schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/> |
|
166 |
</result> |
|
167 |
<result objidentifier="doajarticles::7cca16547fba04de50f609f0f7a2b34f"> |
|
168 |
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title"> |
|
169 |
Impacts of climate change under CMIP5 RCP scenarios on the streamflow in the Dinder River and ecosystem habitats in Dinder National Park, Sudan |
|
170 |
</title> |
|
171 |
<publisher>Copernicus Publications</publisher> |
|
172 |
<dateofacceptance>2016-04-01</dateofacceptance> |
|
173 |
<resulttype classid="publication" classname="publication" schemeid="dnet:result_typologies" schemename="dnet:result_typologies"/> |
|
174 |
</result> |
|
175 |
<instance id="copernicuspu::7e1a08d9f8c386b160529380308fb039"> |
|
176 |
<accessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes"/> |
|
177 |
<dateofacceptance>2018-09-27</dateofacceptance> |
|
178 |
<instancetype classid="0038" classname="Other literature type" schemeid="dnet:publication_resource" schemename="dnet:publication_resource"/> |
|
179 |
<hostedby name="Hydrology and Earth System Sciences (HESS)" id="copernicuspu::7e1a08d9f8c386b160529380308fb039"/> |
|
180 |
<collectedfrom name="Copernicus Publications" id="openaire____::5a38cb462ac487bf26bdb86009fe3e74"/> |
|
181 |
<webresource> |
|
182 |
<url> |
|
183 |
https://www.hydrol-earth-syst-sci.net/20/1331/2016/ |
|
184 |
</url> |
|
185 |
</webresource> |
|
186 |
</instance> |
|
187 |
<instance id="doajarticles::e11c3334d13cac1b2a8a89976026f695"> |
|
188 |
<accessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes"/> |
|
189 |
<dateofacceptance>2016-04-01</dateofacceptance> |
|
190 |
<instancetype classid="0001" classname="Article" schemeid="dnet:publication_resource" schemename="dnet:publication_resource"/> |
|
191 |
<hostedby name="Hydrology and Earth System Sciences" id="doajarticles::e11c3334d13cac1b2a8a89976026f695"/> |
|
192 |
<collectedfrom name="DOAJ-Articles" id="driver______::bee53aa31dc2cbb538c10c2b65fa5824"/> |
|
193 |
<webresource> |
|
194 |
<url>https://doaj.org/toc/1027-5606</url> |
|
195 |
</webresource> |
|
196 |
<webresource> |
|
197 |
<url>https://doaj.org/toc/1607-7938</url> |
|
198 |
</webresource> |
|
199 |
<webresource> |
|
200 |
<url> |
|
201 |
http://www.hydrol-earth-syst-sci.net/20/1331/2016/hess-20-1331-2016.pdf |
|
202 |
</url> |
|
203 |
</webresource> |
|
204 |
</instance> |
|
205 |
</children> |
|
206 |
</oaf:result> |
|
207 |
</oaf:entity> |
|
208 |
</metadata> |
|
209 |
</result> |
|
210 |
</record> |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/mapreduce/hbase/dataexport/match_record.xml | ||
---|---|---|
1 |
<record rank="null"> |
|
2 |
<result> |
|
3 |
<header> |
|
4 |
<objIdentifier>od________65::3a1124fd7b91a4c18376ae175b59b4ce</objIdentifier> |
|
5 |
<dateOfCollection>2019-05-23T20:02:01.021Z</dateOfCollection> |
|
6 |
<dateOfTransformation>2019-06-17T16:16:05.741Z</dateOfTransformation> |
|
7 |
<counters></counters> |
|
8 |
</header> |
|
9 |
<metadata> |
|
10 |
<entity schemaLocation="http://namespace.openaire.eu/oaf https://www.openaire.eu/schema/1.0/oaf-1.0.xsd"> |
|
11 |
<result> |
|
12 |
<subject classid="arxiv" classname="arxiv" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies" inferred="true" inferenceprovenance="iis::document_classes" provenanceaction="iis" trust="0.7245">Physics::Instrumentation and Detectors</subject> |
|
13 |
<subject classid="keyword" classname="keyword" schemeid="dnet:result_subject" schemename="dnet:result_subject">Particle Physics - Experiment</subject> |
|
14 |
<subject classid="arxiv" classname="arxiv" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies" inferred="true" inferenceprovenance="iis::document_classes" provenanceaction="iis" trust="0.8055">High Energy Physics::Phenomenology</subject> |
|
15 |
<subject classid="arxiv" classname="arxiv" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies" inferred="true" inferenceprovenance="iis::document_classes" provenanceaction="iis" trust="0.7542">Nuclear Experiment</subject> |
|
16 |
<subject classid="arxiv" classname="arxiv" schemeid="dnet:subject_classification_typologies" schemename="dnet:subject_classification_typologies" inferred="true" inferenceprovenance="iis::document_classes" provenanceaction="iis" trust="0.882">High Energy Physics::Experiment</subject> |
|
17 |
<resulttype classid="publication" classname="publication" schemeid="dnet:result_typologies" schemename="dnet:result_typologies" /> |
|
18 |
<journal issn="" eissn="" lissn="" ep="" iss="" sp="" vol="" /> |
|
19 |
<language classid="und" classname="Undetermined" schemeid="dnet:languages" schemename="dnet:languages" /> |
|
20 |
<creator rank="1" name="Collaboration" surname="Cms">CMS Collaboration</creator> |
|
21 |
<embargoenddate /> |
|
22 |
<dateofacceptance>2010-01-01</dateofacceptance> |
|
23 |
<description>Proton--proton collision INTERDISCIPLINA events collected with the CMS experiment at LHC at a center--of--mass energy of $\sqrt{s} = 7$~TeV in 2010 are used to commission the algorithms for reconstruction and identification of tau lepton hadronic decays. Four different types of algorithms are considered: three based on particle--flow event reconstruction and one based on combinations of tracks and calorimeter clusters. Probabilities for quark and gluon jets to pass the tau identification criteria of the different algorithms are measured in data dominated by QCD multi--jet events and compared to predictions of Monte Carlo simulations.</description> |
|
24 |
<title classid="main title" classname="main title" schemeid="dnet:dataCite_title" schemename="dnet:dataCite_title">Study INTERDISCIPLINARY of tau reconstruction algorithms using pp collisions data collected at sqrt(s) = 7 TeV</title> |
|
25 |
<country classid="" classname="" schemeid="" schemename="" /> |
|
26 |
<relevantdate classid="" classname="" schemeid="" schemename="" /> |
|
27 |
<publisher /> |
|
28 |
<source /> |
|
29 |
<fulltext /> |
|
30 |
<format /> |
|
31 |
<contributor /> |
|
32 |
<resourcetype classid="" classname="" schemeid="" schemename="" /> |
|
33 |
<coverage /> |
|
34 |
<refereed /> |
|
35 |
<storagedate /> |
|
36 |
<device /> |
|
37 |
<size /> |
|
38 |
<version /> |
|
39 |
<lastmetadataupdate /> |
|
40 |
<metadataversionnumber /> |
|
41 |
<documentationUrl /> |
|
42 |
<codeRepositoryUrl /> |
|
43 |
<programmingLanguage classid="" classname="" schemeid="" schemename="" /> |
|
44 |
<contactperson /> |
|
45 |
<contactgroup /> |
|
46 |
<tool /> |
|
47 |
<collectedfrom name="CERN Document Server" id="opendoar____::fc490ca45c00b1249bbe3554a4fdf6fb" /> |
|
48 |
<originalId>oai:cds.cern.ch:1279358</originalId> |
|
49 |
<pid classid="" classname="" schemeid="" schemename="" /> |
|
50 |
<bestaccessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" /> |
|
51 |
<context id="egi" label="EGI Federation" type="community"> |
|
52 |
<category id="egi::virtual" label="EGI virtual organizations"> |
|
53 |
<concept id="egi::virtual::4" label="cms" /> |
|
54 |
</category> |
|
55 |
</context> |
|
56 |
<datainfo> |
|
57 |
<inferred>false</inferred> |
|
58 |
<deletedbyinference>false</deletedbyinference> |
|
59 |
<trust>0.9</trust> |
|
60 |
<inferenceprovenance /> |
|
61 |
<provenanceaction classid="sysimport:crosswalk:repository" classname="sysimport:crosswalk:repository" schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions" /> |
|
62 |
</datainfo> |
|
63 |
<rels></rels> |
|
64 |
<children> |
|
65 |
<instance id="opendoar____::fc490ca45c00b1249bbe3554a4fdf6fb"> |
|
66 |
<collectedfrom name="CERN Document Server" id="opendoar____::fc490ca45c00b1249bbe3554a4fdf6fb" /> |
|
67 |
<accessright classid="OPEN" classname="Open Access" schemeid="dnet:access_modes" schemename="dnet:access_modes" /> |
|
68 |
<hostedby name="CERN Document Server" id="opendoar____::fc490ca45c00b1249bbe3554a4fdf6fb" /> |
|
69 |
<instancetype classid="0038" classname="Other literature type" schemeid="dnet:publication_resource" schemename="dnet:publication_resource" /> |
|
70 |
<dateofacceptance>2010-01-01</dateofacceptance> |
|
71 |
<webresource> |
|
72 |
<url>http://cds.cern.ch/record/1279358</url> |
|
73 |
</webresource> |
|
74 |
</instance> |
|
75 |
</children> |
|
76 |
</result> |
|
77 |
</entity> |
|
78 |
</metadata> |
|
79 |
</result> |
|
80 |
</record> |
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/odf.xml | ||
---|---|---|
26 | 26 |
<dc:creator>Corso, Mariano</dc:creator> |
27 | 27 |
<dc:title>(Re-)Designing the Business Model of a Digital Ecosystem: An Example in the Socio-Care Context</dc:title> |
28 | 28 |
<dc:date>2018</dc:date> |
29 |
<oaf:projectid>corda__h2020::643588</oaf:projectid> |
|
30 | 29 |
</metadata> |
31 | 30 |
<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd"> |
32 | 31 |
<originDescription harvestDate="2019-04-11T14:51:27.828+02:00" altered="true"> |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/SoftwareEventFactory.java | ||
---|---|---|
44 | 44 |
|
45 | 45 |
public static List<EventWrapper> process(final Context context, final Oaf current, final Oaf other, final Float trust, final Map<String, String> baseUrlMap) |
46 | 46 |
throws IOException, InterruptedException, DocumentException { |
47 |
|
|
48 |
/* |
|
49 |
* if (!current.getRel().hasCachedOafTarget() || (other != null && !other.getRel().hasCachedOafTarget())) { |
|
50 |
* context.getCounter(COUNTER_GROUP, "events skipped: missing project 2nd step").increment(1); return; } |
|
51 |
*/ |
|
52 |
|
|
53 | 47 |
return new SoftwareEventFactory(baseUrlMap).processSoftware(context, current, other, trust); |
54 | 48 |
} |
55 | 49 |
|
... | ... | |
67 | 61 |
|
68 | 62 |
final String provenance = oafRel.getDataInfo().getProvenanceaction().getClassid(); |
69 | 63 |
if (inferenceProvenance.contains(provenance)) { |
70 |
final OafEntity result = oafRel.getRel().getCachedOafTarget().getEntity();
|
|
71 |
events.add(doProcessSoftware(context, current, current, result, provenance, Topic.ENRICH_MISSING_SOFTWARE, trust(trust, oafRel)));
|
|
64 |
final Software software = mapRelatedSoftware(oafRel.getRel().getCachedOafTarget().getEntity().getResult());
|
|
65 |
events.add(doProcessSoftware(context, current, current, software, provenance, Topic.ENRICH_MISSING_SOFTWARE, trust(trust, oafRel)));
|
|
72 | 66 |
} |
73 | 67 |
} |
74 | 68 |
} else { |
... | ... | |
82 | 76 |
|
83 | 77 |
final String provenance = otherOafRel.getDataInfo().getProvenanceaction().getClassid(); |
84 | 78 |
|
85 |
final OafEntity software = otherOafRel.getRel().getCachedOafTarget().getEntity(); |
|
79 |
final OafEntity swEntity = otherOafRel.getRel().getCachedOafTarget().getEntity(); |
|
80 |
final Software software = mapRelatedSoftware(swEntity.getResult()); |
|
86 | 81 |
|
87 | 82 |
final boolean currentHasSw = Iterables.tryFind(current.getEntity().getCachedOafRelList(), oaf -> { |
88 | 83 |
final String currentSwId = oaf.getRel().getCachedOafTarget().getEntity().getId(); |
89 |
// System.out.println(String.format("%s = %s ? %s", currentProjectId, project.getId(), |
|
90 |
// currentProjectId.equals(project.getId()))); |
|
91 |
return currentSwId.equals(software.getId()); |
|
84 |
return currentSwId.equals(swEntity.getId()); |
|
92 | 85 |
}).isPresent(); |
93 | 86 |
|
94 | 87 |
if (!currentHasSw) { |
95 |
// System.out.println(String.format("generating event for other = %s\n\nproject = %s", other, project)); |
|
96 |
events.add(doProcessSoftware(context, current, other, software, provenance, Topic.ENRICH_MISSING_PROJECT, |
|
88 |
events.add(doProcessSoftware(context, current, other, software, provenance, Topic.ENRICH_MISSING_SOFTWARE, |
|
97 | 89 |
trust(trust, currentOafRel))); |
98 | 90 |
} |
99 | 91 |
} |
... | ... | |
106 | 98 |
private EventWrapper doProcessSoftware(final Context context, |
107 | 99 |
final Oaf current, |
108 | 100 |
final Oaf other, |
109 |
final OafEntity software,
|
|
101 |
final Software software,
|
|
110 | 102 |
final String provenance, |
111 | 103 |
final Topic topic, |
112 | 104 |
final Float trust) |
... | ... | |
117 | 109 |
|
118 | 110 |
final Provenance prov = getProvenance(otherEntity, provenance); |
119 | 111 |
|
120 |
final OpenAireEventPayload payload = addSoftware(OpenAireEventPayloadFactory.fromOAF(currentEntity, trust, prov), software);
|
|
112 |
final OpenAireEventPayload payload = OpenAireEventPayloadFactory.fromOAF(currentEntity, trust, prov);
|
|
121 | 113 |
|
122 |
final EventMessage event = asEvent(currentEntity, topic, payload, otherEntity, trust); |
|
123 |
event.setPayload(HighlightFactory.highlightEnrichSoftware(payload, software, provenance).toJSON()); |
|
124 |
return EventWrapper.newInstance(event, |
|
125 |
payload.getHighlight().getSoftwares().stream().filter(Objects::nonNull).map(s -> s.getName()).sorted() |
|
126 |
.collect(Collectors.joining(", ")), |
|
127 |
topic.getValue()); |
|
128 |
} |
|
129 |
|
|
130 |
private OpenAireEventPayload addSoftware(final OpenAireEventPayload payload, final OafEntity software) { |
|
131 | 114 |
final Map<String, Software> swMap = Maps.newHashMap(); |
132 | 115 |
for (final Software s : payload.getPublication().getSoftwares()) { |
133 | 116 |
swMap.put(s.getLandingPage(), s); |
134 | 117 |
} |
135 |
final Software hlSw = mapRelatedSoftware(software.getResult()); |
|
136 |
swMap.put(hlSw.getLandingPage(), hlSw); |
|
137 | 118 |
|
119 |
swMap.put(software.getLandingPage(), software); |
|
120 |
|
|
138 | 121 |
payload.getPublication().setSoftwares(Lists.newArrayList(swMap.values())); |
122 |
payload.getHighlight().setSoftwares(Lists.newArrayList(software)); |
|
139 | 123 |
|
140 |
return payload; |
|
124 |
final EventMessage event = asEvent(currentEntity, topic, payload, otherEntity, trust); |
|
125 |
|
|
126 |
event.setPayload(payload.toJSON()); |
|
127 |
return EventWrapper.newInstance(event, |
|
128 |
payload.getHighlight().getSoftwares().stream().filter(Objects::nonNull).map(s -> s.getName()).sorted() |
|
129 |
.collect(Collectors.joining(", ")), |
|
130 |
topic.getValue()); |
|
141 | 131 |
} |
142 | 132 |
|
143 | 133 |
private Provenance getProvenance(final OafEntity entity, final String provenance) { |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/SoftwareEnrichmentReducer.java | ||
---|---|---|
26 | 26 |
|
27 | 27 |
@Override |
28 | 28 |
protected String counterGroup() { |
29 |
return "Broker Enrichment projects";
|
|
29 |
return "Broker Enrichment Software";
|
|
30 | 30 |
} |
31 | 31 |
|
32 | 32 |
@Override |
... | ... | |
70 | 70 |
.collect(Collectors.toList()); |
71 | 71 |
|
72 | 72 |
if (valid.isEmpty()) { |
73 |
context.getCounter(counterGroup(), "Events Skipped - Missing project").increment(1);
|
|
73 |
context.getCounter(counterGroup(), "Events Skipped - Missing software").increment(1);
|
|
74 | 74 |
return; |
75 | 75 |
} |
76 | 76 |
|
... | ... | |
116 | 116 |
|
117 | 117 |
} |
118 | 118 |
|
119 |
private Oaf addSoftware(final Oaf current, final Map<String, Oaf> software) { |
|
119 |
private Oaf addSoftware(final Oaf current, final Map<String, Oaf> softwareMap) {
|
|
120 | 120 |
|
121 | 121 |
final Oaf.Builder oafBuilder = Oaf.newBuilder(current); |
122 | 122 |
final List<Oaf> cachedRels = Lists.newArrayList(); |
123 | 123 |
|
124 | 124 |
for (final Oaf.Builder cachedOafRel : oafBuilder.getEntityBuilder().getCachedOafRelBuilderList()) { |
125 |
final String projectId = cachedOafRel.getRel().getTarget();
|
|
125 |
final String softwareId = cachedOafRel.getRel().getTarget();
|
|
126 | 126 |
|
127 |
if (software.containsKey(projectId)) {
|
|
128 |
final Oaf project = software.get(projectId);
|
|
127 |
if (softwareMap.containsKey(softwareId)) {
|
|
128 |
final Oaf software = softwareMap.get(softwareId);
|
|
129 | 129 |
|
130 |
cachedOafRel.getRelBuilder().setCachedOafTarget(project);
|
|
130 |
cachedOafRel.getRelBuilder().setCachedOafTarget(software);
|
|
131 | 131 |
cachedRels.add(cachedOafRel.build()); |
132 | 132 |
} |
133 | 133 |
} |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/EnrichmentReducer.java | ||
---|---|---|
72 | 72 |
|
73 | 73 |
final float trust = scale(similarity); |
74 | 74 |
if (!DedupUtils.isRoot(currentId) && !DedupUtils.isRoot(otherId)) { |
75 |
events.addAll(PIDEventFactory.process(current, other, trust)); |
|
76 |
events.addAll(OAVersionEventFactory.process(current, other, trust, untrustedOaDsList)); |
|
77 |
events.addAll(AbstractEventFactory.process(current, other, trust)); |
|
78 |
events.addAll(PublicationDateEventFactory.process(current, other, trust)); |
|
75 |
//events.addAll(PIDEventFactory.process(current, other, trust));
|
|
76 |
//events.addAll(OAVersionEventFactory.process(current, other, trust, untrustedOaDsList));
|
|
77 |
//events.addAll(AbstractEventFactory.process(current, other, trust));
|
|
78 |
//events.addAll(PublicationDateEventFactory.process(current, other, trust));
|
|
79 | 79 |
events.addAll(OrcidEventFactory.process(current, other, trust)); |
80 | 80 |
} |
81 | 81 |
|
82 |
events.addAll(SubjectEventFactory.process(context, current, other, trust)); |
|
82 |
//events.addAll(SubjectEventFactory.process(context, current, other, trust));
|
|
83 | 83 |
} else { |
84 | 84 |
context.getCounter(counterGroup(), "d < " + dedupConf.getWf().getThreshold()).increment(1); |
85 | 85 |
} |
86 | 86 |
|
87 | 87 |
} else if (oafList.size() == 1) { |
88 |
events.addAll(SubjectEventFactory.process(context, current)); |
|
88 |
//events.addAll(SubjectEventFactory.process(context, current));
|
|
89 | 89 |
} |
90 | 90 |
} |
91 | 91 |
emit(events, context); |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/enrich/SoftwareEnrichmentMapper.java | ||
---|---|---|
69 | 69 |
emit(context, key.copyBytes(), oafBuilder.build().toByteArray(), PUBLICATION); |
70 | 70 |
break; |
71 | 71 |
} |
72 |
break; |
|
72 | 73 |
default: |
73 | 74 |
throw new IllegalArgumentException("invalid type: " + type); |
74 | 75 |
} |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/mapping/HighlightFactory.java | ||
---|---|---|
68 | 68 |
} |
69 | 69 |
|
70 | 70 |
public static OpenAireEventPayload highlightEnrichSoftware(final OpenAireEventPayload p, final OafEntity software, final String provenance) { |
71 |
// TODO: this can wait. Think about generating the openaire string for project links: it will be easier for subscribers to integrate |
|
72 |
// it back to their records! |
|
73 | 71 |
|
74 | 72 |
p.getHighlight().setSoftwares(Lists.newArrayList(mapRelatedSoftware(software.getResult()))); |
75 | 73 |
|
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/broker/mapping/ProtoMapping.java | ||
---|---|---|
100 | 100 |
return p; |
101 | 101 |
} |
102 | 102 |
|
103 |
protected static final List<Software> mapRelatedSoftwares(final OafEntity entity) { |
|
104 |
final Map<String, Oaf> softwareMap = Maps.newHashMap(); |
|
105 |
for(Oaf rel : entity.getCachedOafRelList()) { |
|
106 |
final OafEntity p = rel.getRel().getCachedOafTarget().getEntity(); |
|
107 |
softwareMap.put(p.getId(), Oaf.newBuilder(rel).build()); |
|
108 |
} |
|
109 |
|
|
110 |
return softwareMap.values().stream() |
|
111 |
.map(o -> mapRelatedSoftware(o.getRel().getCachedOafTarget().getEntity().getResult())) |
|
112 |
.collect(Collectors.toList()); |
|
113 |
} |
|
114 |
|
|
103 | 115 |
protected static final Software mapRelatedSoftware(final ResultProtos.Result result) { |
104 | 116 |
final Software s = new Software(); |
105 | 117 |
final ResultProtos.Result.Metadata rp = result.getMetadata(); |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataexport/ExportFilteredResultMapper.java | ||
---|---|---|
2 | 2 |
|
3 | 3 |
import com.google.gson.Gson; |
4 | 4 |
import eu.dnetlib.data.mapreduce.hbase.bulktag.ProtoMap; |
5 |
import org.apache.commons.logging.Log; |
|
6 |
import org.apache.commons.logging.LogFactory; |
|
7 | 5 |
import org.apache.hadoop.io.Text; |
8 | 6 |
import org.apache.hadoop.mapreduce.Mapper; |
9 | 7 |
import org.dom4j.Document; |
... | ... | |
14 | 12 |
import java.time.Year; |
15 | 13 |
|
16 | 14 |
/** |
17 |
* Exports the result matching the criteria found in the confguration. |
|
15 |
* Exports the result matching the criteria found in the configuration.
|
|
18 | 16 |
* |
19 | 17 |
* @author claudio |
20 | 18 |
*/ |
21 | 19 |
public class ExportFilteredResultMapper extends Mapper<Text, Text, Text, Text> { |
22 | 20 |
|
23 |
/** |
|
24 |
* logger. |
|
25 |
*/ |
|
26 |
private static final Log log = LogFactory.getLog(ExportFilteredResultMapper.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
21 |
private final static String RESULT_TYPE_XPATH = "/*[local-name() ='record']/*[local-name() ='result']/*[local-name() ='metadata']/*[local-name() ='entity']/*[local-name() ='result']/*[local-name() ='resulttype']/@classid"; |
|
27 | 22 |
|
28 | 23 |
private Text keyOut; |
29 | 24 |
|
... | ... | |
35 | 30 |
|
36 | 31 |
@Override |
37 | 32 |
protected void setup(final Context context) throws IOException, InterruptedException { |
38 |
super.setup(context); |
|
39 |
|
|
40 | 33 |
keyOut = new Text(""); |
41 | 34 |
valueOut = new Text(); |
42 | 35 |
|
... | ... | |
60 | 53 |
|
61 | 54 |
final Document doc = new SAXReader().read(new StringReader(record)); |
62 | 55 |
|
63 |
if (defaultFilter.matches(doc)) { |
|
56 |
if (defaultFilter.matches(doc, true)) {
|
|
64 | 57 |
|
65 |
if (userFilter.matches(doc)) { |
|
58 |
if (userFilter.matches(doc, false)) {
|
|
66 | 59 |
keyOut.set(keyIn.toString()); |
67 | 60 |
valueOut.set(value.toString()); |
68 | 61 |
|
69 | 62 |
context.write(keyOut, valueOut); |
70 |
context.getCounter("filter", "matched criteria").increment(1);
|
|
63 |
context.getCounter("filter", "matched criteria " +doc.valueOf(RESULT_TYPE_XPATH)).increment(1);
|
|
71 | 64 |
} else { |
72 | 65 |
context.getCounter("filter", "filtered by criteria").increment(1); |
73 | 66 |
} |
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/dataexport/RecordFilter.java | ||
---|---|---|
3 | 3 |
import eu.dnetlib.miscutils.functional.xml.DnetXsltFunctions; |
4 | 4 |
import org.apache.commons.lang3.StringUtils; |
5 | 5 |
import org.dom4j.Document; |
6 |
import org.dom4j.tree.DefaultText; |
|
6 | 7 |
|
7 | 8 |
import java.text.ParseException; |
8 |
import java.util.List; |
|
9 | 9 |
import java.util.Map; |
10 | 10 |
import java.util.Objects; |
11 | 11 |
|
... | ... | |
24 | 24 |
this.toYear = toYear; |
25 | 25 |
} |
26 | 26 |
|
27 |
public boolean matches(final Document record) throws ParseException { |
|
27 |
public boolean matches(final Document record, final boolean strict) throws ParseException {
|
|
28 | 28 |
|
29 | 29 |
final String date = record.valueOf(yearXpath); |
30 | 30 |
if (StringUtils.isBlank(date)) { |
31 | 31 |
return false; |
32 | 32 |
} |
33 | 33 |
|
34 |
final Integer year = Integer.valueOf(DnetXsltFunctions.extractYear(date)); |
|
34 |
final String yyyy = DnetXsltFunctions.extractYear(date); |
|
35 |
if (StringUtils.isBlank(yyyy)) { |
|
36 |
return false; |
|
37 |
} |
|
38 |
final Integer year = Integer.valueOf(yyyy); |
|
35 | 39 |
|
36 | 40 |
if (year < fromYear | year > toYear) { |
37 | 41 |
return false; |
38 | 42 |
} |
39 | 43 |
|
44 |
|
|
45 |
boolean matched = false; |
|
40 | 46 |
for(final Map.Entry<String, String> c : criteria.entrySet()) { |
41 | 47 |
|
42 |
List<String> nodes = record.selectNodes(c.getKey()); |
|
43 |
if (nodes != null) { |
|
44 |
boolean matches = nodes.stream() |
|
45 |
.filter(Objects::nonNull) |
|
46 |
.map(s -> s.toLowerCase()) |
|
47 |
.map(s -> s.trim()) |
|
48 |
.anyMatch(s -> s.matches(c.getValue())); |
|
49 |
if (matches) { |
|
50 |
return true; |
|
51 |
} |
|
48 |
boolean matches = matched = record.selectNodes(c.getKey()).stream() |
|
49 |
.filter(Objects::nonNull) |
|
50 |
.map(o -> textOf(o)) |
|
51 |
.map(s -> ((String) s).toLowerCase()) |
|
52 |
.map(s -> ((String) s).trim()) |
|
53 |
.anyMatch(s -> { |
|
54 |
return ((String) s).matches(c.getValue().toLowerCase()); |
|
55 |
}); |
|
56 |
|
|
57 |
if (matches && !strict) { |
|
58 |
return true; |
|
52 | 59 |
} |
53 | 60 |
} |
54 |
return false;
|
|
61 |
return matched;
|
|
55 | 62 |
} |
56 | 63 |
|
64 |
private String textOf(final Object o) { |
|
65 |
if (o instanceof org.dom4j.tree.DefaultText) { |
|
66 |
return ((DefaultText) o).getText(); |
|
67 |
} |
|
68 |
return o.toString(); |
|
69 |
} |
|
70 |
|
|
57 | 71 |
} |
modules/dnet-mapreduce-jobs/trunk/src/main/resources/log4j.properties | ||
---|---|---|
27 | 27 |
log4j.logger.eu.dnetlib.conf.PropertyFetcher=WARN |
28 | 28 |
#log4j.logger.eu.dnetlib.data.transform.XsltRowTransformerFactory=DEBUG |
29 | 29 |
|
30 |
log4j.logger.org.reflections.Reflections=OFF |
|
31 |
|
|
30 | 32 |
log4j.logger.eu.dnetlib.enabling.is.sn.ISSNServiceImpl=OFF |
31 | 33 |
log4j.logger.eu.dnetlib.enabling.datasources.DatasourceManagerClients=FATAL |
32 | 34 |
log4j.logger.eu.dnetlib.data.mdstore.modular.mongodb.utils.MetadataCheckJob=DEBUG |
Also available in: Unified diff
removed project reference from src/test/resources/eu/dnetlib/data/transform/odf.xml, the test didn't include any check against it