Project

General

Profile

« Previous | Next » 

Revision 48697

some java8 refactorings, added more tests for the software entities mapping

View differences:

modules/dnet-mapreduce-jobs/trunk/src/test/java/eu/dnetlib/data/transform/XsltRowTransformerFactoryTest.java
340 340
		duplicates.add(getOafBody(puma2));
341 341
		final Oaf.Builder oafMerge = OafEntityMerger.merge(mergeId, duplicates);
342 342

  
343
		final Row mergeRow = new Row("result", mergeId, Lists.<Column<String,byte[]>>newArrayList(new Column("body", oafMerge.build().toByteArray())));
343
		final Row mergeRow = new Row("result", mergeId, Lists.newArrayList(new Column("body", oafMerge.build().toByteArray())));
344 344

  
345 345
		rows.add(mergeRow);
346 346

  
......
432 432
		printAll(mapAll(buildTable(rows)));
433 433
	}
434 434

  
435
	@Test
436
	public void testParseSoftwareFromODF() throws Exception {
437
		final List<Row> rows = Lists.newArrayList();
438
		rows.addAll(asRows(loadFromTransformationProfile("odf2hbase.xml"), load("softwareODF.xml")));
439
		rows.addAll(asRows(loadFromTransformationProfile("projects_2_hbase.xsl"), load("projectRecordCorda.xml")));
440
		printAll(mapAll(buildTable(rows)));
441
	}
442

  
443
	@Test(expected = AssertionError.class)
444
	public void testParseSoftwareFromOAF() throws Exception {
445
		final List<Row> rows = Lists.newArrayList();
446
		rows.addAll(asRows(loadFromTransformationProfile("oaf2hbase.xml"), load("recordOAFsoftware.xml")));
447
		printAll(mapAll(buildTable(rows)));
448
	}
449

  
435 450
	private void doTest(final InputStream xsltStream, final InputStream recordStream) throws Exception {
436 451
		try {
437 452
			final List<Row> rows = asRows(xsltStream, recordStream);
......
498 513
	}
499 514

  
500 515
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream, final Function<Row, Row> p) throws Exception {
501
		return asRows(xsltStream, new HashMap<String, Object>(), recordStream, p);
516
		return asRows(xsltStream, new HashMap<>(), recordStream, p);
502 517
	}
503 518

  
504 519
	private List<Row> asRows(final InputStream xsltStream, final InputStream recordStream) throws Exception {
505
		return asRows(xsltStream, new HashMap<String, Object>(), recordStream);
520
		return asRows(xsltStream, new HashMap<>(), recordStream);
506 521
	}
507 522

  
508 523
	private List<Row> asRows(final InputStream xsltStream, final Map<String, Object> params, final InputStream recordStream) throws Exception {
......
579 594
			final String rowKey = row.getKey();
580 595
			final String cf = row.getColumnFamily();
581 596
			if (!table.containsKey(rowKey)) {
582
				table.put(rowKey, new HashMap<String, Map<String, byte[]>>());
597
				table.put(rowKey, new HashMap<>());
583 598
			}
584 599
			if (!table.get(rowKey).containsKey(cf)) {
585
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<String, byte[]>());
600
				table.get(rowKey).put(row.getColumnFamily(), new HashMap<>());
586 601
			}
587 602
			for (final Column<String, byte[]> c : row.getColumns()) {
588 603
				// System.out.println(String.format("ADDING K:%s CF:%s Q:%s", rowKey, cf, c.getName()));
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/softwareODF.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<record xmlns:dr="http://www.driver-repository.eu/namespace/dr"
3
        xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
4
	<oai:header xmlns:dri="http://www.driver-repository.eu/namespace/dri"
5
	            xmlns:oai="http://www.openarchives.org/OAI/2.0/"
6
	            xmlns="http://namespace.openaire.eu/">
7
		<dri:objIdentifier>r3c903d4c5a8::00a0cdbbc0c27cca1ccfafb33e7a862d</dri:objIdentifier>
8
		<dri:recordIdentifier/>
9
		<dri:dateOfCollection>2014-10-08T13:12:50.337Z</dri:dateOfCollection>
10
		<dri:repositoryId>19365384-4f97-4446-9a81-8c414d2ca27a_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU=</dri:repositoryId>
11
		<oaf:datasourceprefix>r3c903d4c5a8</oaf:datasourceprefix>
12
		<dr:dateOfTransformation>2016-05-10T17:26:38.15+02:00</dr:dateOfTransformation>
13
		<identifier xmlns="http://www.openarchives.org/OAI/2.0/">oai:ucd.ie:ucdlib:31727</identifier>
14
		<datestamp xmlns="http://www.openarchives.org/OAI/2.0/">2014-10-07T14:28:12Z</datestamp>
15
		<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">ucd_digital_library</setSpec>
16
		<setSpec xmlns="http://www.openarchives.org/OAI/2.0/">openaire_data</setSpec>
17
	</oai:header>
18
	<metadata>
19
		<resource xmlns:dri="http://www.driver-repository.eu/namespace/dri"
20
		          xmlns:oai="http://www.openarchives.org/OAI/2.0/"
21
		          xmlns="http://datacite.org/schema/kernel-3"
22
		          xsi:schemaLocation="http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd">
23
			<identifier identifierType="DOI">10.7925/drs1.ucdlib_31727</identifier>
24
			<creators>
25
				<creator>
26
					<creatorName>Laefer, Debra L.</creatorName>
27
					<nameIdentifier nameIdentifierScheme="ORCID">0000-0001-5134-5322</nameIdentifier>
28
				</creator>
29
				<creator>
30
					<creatorName>University College, Dublin. Library</creatorName>
31
					<nameIdentifier nameIdentifierScheme="VIAF" schemeURI="http://www.viaf.org">261450561</nameIdentifier>
32
				</creator>
33
			</creators>
34
			<titles>
35
				<title>39 Grafton Street, Dublin</title>
36
			</titles>
37
			<publisher>UCD School of Architecture Landscape Civil Engineering</publisher>
38
			<publicationYear>2013</publicationYear>
39
			<subjects>
40
				<subject
41
						schemeURI="http://id.loc.gov/authorities/subjects/sh99004820#concept" subjectScheme="NAF">Buildings, structures, etc</subject>
42
			</subjects>
43
			<contributors>
44
				<contributor contributorType="Other">
45
					<contributorName>Clarke, Julie</contributorName>
46
					<nameIdentifier nameIdentifierScheme="ORCID">0000-0001-6092-9741</nameIdentifier>
47
				</contributor>
48
				<contributor contributorType="Sponsor">
49
					<contributorName>UCD School of Architecture, Landscape, and Civil Engineering</contributorName>
50
				</contributor>
51
				<contributor contributorType="HostingInstitution">
52
					<contributorName>University College, Dublin. Library</contributorName>
53
					<nameIdentifier nameIdentifierScheme="VIAF" schemeURI="http://www.viaf.org">261450561</nameIdentifier>
54
				</contributor>
55
				<contributor contributorType="Funder">
56
					<contributorName>Irish Research Council for Science, Engineering and Technology</contributorName>
57
					<nameIdentifier nameIdentifierScheme="info">info:eu-repo/grantAgreement/IRCSET//RS-2011-435</nameIdentifier>
58
				</contributor>
59
			</contributors>
60
			<dates>
61
				<date dateType="Issued">2013</date>
62
				<date dateType="Available">2014-09-29</date>
63
			</dates>
64
			<resourceType resourceTypeGeneral="Dataset">Dataset</resourceType>
65
			<alternateIdentifiers>
66
				<alternateIdentifier alternateIdentifierType="URL">http://digital.ucd.ie/view/ucdlib:31727</alternateIdentifier>
67
			</alternateIdentifiers>
68
			<relatedIdentifiers>
69
				<relatedIdentifier relatedIdentifierType="DOI" relationType="IsPartOf">10.7925/drs1.ucdlib_31100</relatedIdentifier>
70
				<relatedIdentifier relatedIdentifierType="DOI" relationType="IsReferencedBy">10.7925/drs1.ucdlib_31057</relatedIdentifier>
71
			</relatedIdentifiers>
72
			<sizes>
73
				<size>1 still image</size>
74
				<size>26859085 bytes</size>
75
				<size>1545x2946 pixels</size>
76
			</sizes>
77
			<formats>
78
				<format>image/vnd.adobe.photoshop</format>
79
				<format>image/jpeg</format>
80
			</formats>
81
			<rightsList>
82
				<rights rightsURI="info:eu-repo/semantics/openAccess"/>
83
				<rights rightsURI="http://creativecommons.org/publicdomain/zero/1.0/">Use and reproduction: CC0 1.0 Universal</rights>
84
			</rightsList>
85
			<descriptions>
86
				<description descriptionType="Abstract">Consists of an orthorectified image of the structure in Adobe PhotoShop (PSD) format. The orthorectified image has been derived from one or more photographic images that are included as layers in the original Adobe PhotoShop (PSD) file. The composite and all other image layers have been extracted and saved in TIFF image files, and JPEG files in various resolutions have been derived from the flattened image layer to facilitate inspection.</description>
87
				<description descriptionType="Other">Brickwork structure, 4 levels above ground, constructed 1864.</description>
88
				<description descriptionType="Other">Funded by an award from the Irish Research Council for Science, Engineering and Technology (IRCSET), Embark Initiative Postgraduate Scholarship, grant identifier RS/2011/435.</description>
89
			</descriptions>
90
			<geoLocations>
91
				<geoLocation>
92
					<geoLocationPoint>53.3407951 -6.26054559383235</geoLocationPoint>
93
				</geoLocation>
94
			</geoLocations>
95
		</resource>
96
		<oaf:projectid>corda__h2020::632927</oaf:projectid>
97
		<oaf:embargoenddate>2014-09-29</oaf:embargoenddate>
98
		<dr:CobjCategory>0029</dr:CobjCategory>
99
		<oaf:dateAccepted>2013-01-01</oaf:dateAccepted>
100
		<oaf:accessrights>OPEN</oaf:accessrights>
101
		<oaf:language>und</oaf:language>
102
		<oaf:hostedBy id="re3data_____::r3d100010742" name="UCD Digital Library"/>
103
		<oaf:collectedFrom id="re3data_____::r3d100010742" name="UCD Digital Library"/>
104
	</metadata>
105
	<about xmlns:dri="http://www.driver-repository.eu/namespace/dri"
106
	       xmlns:oai="http://www.openarchives.org/OAI/2.0/"
107
	       xmlns="http://namespace.openaire.eu/">
108
		<oaf:datainfo>
109
			<oaf:inferred>false</oaf:inferred>
110
			<oaf:deletedbyinference>false</oaf:deletedbyinference>
111
			<oaf:trust>0.9</oaf:trust>
112
			<oaf:inferenceprovenance/>
113
			<oaf:provenanceaction classid="sysimport:crosswalk:datasetarchive"
114
			                      classname="sysimport:crosswalk:datasetarchive"
115
			                      schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
116
		</oaf:datainfo>
117
	</about>
118
</record>
modules/dnet-mapreduce-jobs/trunk/src/test/resources/eu/dnetlib/data/transform/recordOAFsoftware.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<record xmlns:dc="http://purl.org/dc/elements/1.1/"
3
        xmlns:dr="http://www.driver-repository.eu/namespace/dr"
4
        xmlns:dri="http://www.driver-repository.eu/namespace/dri"
5
        xmlns:oaf="http://namespace.openaire.eu/oaf"
6
        >
7
    <header xmlns="http://namespace.openaire.eu/">
8
        <dri:objIdentifier>acm_________::02a8fbd0aa341df6dbb8323f453091f8</dri:objIdentifier>
9
        <dri:recordIdentifier>2043516</dri:recordIdentifier>
10
        <dri:dateOfCollection/>
11
        <dri:mdFormat/>
12
        <dri:mdFormatInterpretation/>
13
        <dri:repositoryId/>
14
        <dr:objectIdentifier/>
15
        <dr:dateOfCollection>2015-01-20T00:00:00Z</dr:dateOfCollection>
16
        <oaf:datasourceprefix>openaire____</oaf:datasourceprefix>
17
    </header>
18
    <metadata xmlns="http://namespace.openaire.eu/">
19
        <dc:title>Traffic-centric modeling of future wireless internet access technologies</dc:title>
20
        <dc:creator>Paolo Pileggi</dc:creator>
21
        <dc:creator>Giuseppe Bianchi</dc:creator>
22
        <dc:date/>
23
        <dc:identifier>http://dl.acm.org/citation.cfm?id=2043516</dc:identifier>
24
        <dc:language>und</dc:language>
25
        <dc:type/>
26
        <dr:CobjCategory>0021</dr:CobjCategory>
27
        <dr:CobjIdentifier/>
28
        <oaf:dateAccepted>2011-09-06</oaf:dateAccepted>
29
        <oaf:embargoenddate/>
30
        <oaf:accessrights>CLOSED</oaf:accessrights>
31
        <oaf:hostedBy id="openaire____::acm" name="ACM Digital Library"/>
32
        <oaf:collectedFrom id="openaire____::acm" name="ACM Digital Library"/>
33
        <oaf:identifier identifierType="doi"/>
34
    </metadata>
35
    <about>
36
        <oaf:datainfo>
37
            <oaf:inferred>false</oaf:inferred>
38
            <oaf:deletedbyinference>false</oaf:deletedbyinference>
39
            <oaf:trust>0.9</oaf:trust>
40
            <oaf:inferenceprovenance/>
41
            <oaf:provenanceaction classid="sysimport:mining:repository"
42
                                  classname="sysimport:mining:repository"
43
                                  schemeid="dnet:provenanceActions" schemename="dnet:provenanceActions"/>
44
        </oaf:datainfo>
45
    </about>
46
</record>
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/config/EntityConfigTable.java
27 27

  
28 28
	public Set<String> getFilter(final Type type, final RelDescriptor relDescriptor) {
29 29
		final LinkDescriptor ld = getDescriptor(type, relDescriptor);
30
		return ld != null ? Sets.newHashSet(ld.getFields()) : new HashSet<String>();
30
		return ld != null ? Sets.newHashSet(ld.getFields()) : new HashSet<>();
31 31
	}
32 32

  
33 33
	public boolean includeDuplicates(final Type type) {
......
35 35
	}
36 36

  
37 37
	public boolean hasIncludeFields(final Type type){
38
		return CollectionUtils.isNotEmpty(super.get(type).getIncludeFields());
38
		final EntityConfig config = super.get(type);
39
		if (config == null) {
40
			throw new IllegalArgumentException("the configuration table does not contain type: " + type.toString());
41
		}
42
		return CollectionUtils.isNotEmpty(config.getIncludeFields());
39 43
	}
40 44

  
41 45
	public boolean hasExcludeFields(final Type type){
......
80 84
	}
81 85

  
82 86

  
83
}
87
}
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/hbase/index/config/IndexConfig.java
6 6
import java.util.Map;
7 7
import java.util.Set;
8 8

  
9
import com.google.common.base.Function;
10 9
import com.google.common.collect.Maps;
11 10
import com.google.common.collect.Sets;
12 11
import com.google.gson.Gson;
13 12
import com.typesafe.config.Config;
14 13
import com.typesafe.config.ConfigFactory;
15 14
import com.typesafe.config.ConfigObject;
16
import com.typesafe.config.ConfigValue;
17 15
import eu.dnetlib.data.mapreduce.OptionalConfig;
18 16
import eu.dnetlib.data.mapreduce.util.RelDescriptor;
19 17
import eu.dnetlib.data.proto.TypeProtos.Type;
......
32 30
	@SuppressWarnings("unchecked")
33 31
	public EntityConfigTable getConfigMap() {
34 32
		final ConfigObject modelMap = getConfig().getObject("index.conf");
35
		final Map<String, EntityConfig> map = Maps.transformValues(modelMap, new Function<ConfigValue, EntityConfig>() {
33
		final Map<String, EntityConfig> map = Maps.transformValues(modelMap, value -> {
36 34

  
37
			@Override
38
			public EntityConfig apply(final ConfigValue value) {
35
			final HashMap<RelDescriptor, LinkDescriptor> links = Maps.newHashMap();
36
			for (final Object o : (List<?>) ((Map<String, ?>) value.unwrapped()).get("links")) {
37
				final Map<String, ?> map1 = (Map<String, ?>) o;
39 38

  
40
				final HashMap<RelDescriptor, LinkDescriptor> links = Maps.newHashMap();
41
				for (final Object o : (List<?>) ((Map<String, ?>) value.unwrapped()).get("links")) {
42
					final Map<String, ?> map = (Map<String, ?>) o;
39
				final RelDescriptor relDescriptor = new RelDescriptor((String) map1.get("relType"));
40
				final Set<String> fields = Sets.newHashSet((List<String>) map1.get("fields"));
41
				final Type target = Type.valueOf((String) map1.get("targetEntity"));
42
				final Boolean isChild = ((String) map1.get("expandAs")).equals("child");
43
				final Boolean symmetric = (Boolean) map1.get("symmetric");
43 44

  
44
					final RelDescriptor relDescriptor = new RelDescriptor((String) map.get("relType"));
45
					final Set<String> fields = Sets.newHashSet((List<String>) map.get("fields"));
46
					final Type target = Type.valueOf((String) map.get("targetEntity"));
47
					final Boolean isChild = ((String) map.get("expandAs")).equals("child");
48
					final Boolean symmetric = (Boolean) map.get("symmetric");
45
				final LinkDescriptor ld = new LinkDescriptor(relDescriptor, target, isChild, symmetric, fields);
49 46

  
50
					final LinkDescriptor ld = new LinkDescriptor(relDescriptor, target, isChild, symmetric, fields);
47
				if (map1.containsKey("max")) {
48
					ld.setMax((Integer) map1.get("max"));
49
				}
51 50

  
52
					if (map.containsKey("max")) {
53
						ld.setMax((Integer) map.get("max"));
54
					}
55

  
56
					links.put(relDescriptor, ld);
57
				}
58
				final List<String> includeFields = (List<String>) ((Map<String, ?>) value.unwrapped()).get("includeFields");
59
				final List<String> excludeFields = (List<String>) ((Map<String, ?>) value.unwrapped()).get("excludeFields");
60
				if(CollectionUtils.isNotEmpty(includeFields) & CollectionUtils.isNotEmpty(excludeFields)){
61
					throw new RuntimeException("Cannot create the index configuration: includeFields and excludeFields are both not null");
62
				}
63
				return new EntityConfig((Boolean) ((Map<String, ?>) value.unwrapped()).get("dups"), links, includeFields, excludeFields);
51
				links.put(relDescriptor, ld);
64 52
			}
53
			final List<String> includeFields = (List<String>) ((Map<String, ?>) value.unwrapped()).get("includeFields");
54
			final List<String> excludeFields = (List<String>) ((Map<String, ?>) value.unwrapped()).get("excludeFields");
55
			if(CollectionUtils.isNotEmpty(includeFields) & CollectionUtils.isNotEmpty(excludeFields)){
56
				throw new RuntimeException("Cannot create the index configuration: includeFields and excludeFields are both not null");
57
			}
58
			return new EntityConfig((Boolean) ((Map<String, ?>) value.unwrapped()).get("dups"), links, includeFields, excludeFields);
65 59
		});
66 60

  
67 61
		final EntityConfigTable res = new EntityConfigTable();
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/TemplateFactory.java
2 2

  
3 3
import java.io.IOException;
4 4
import java.io.StringWriter;
5
import java.util.Collection;
5 6
import java.util.List;
6 7

  
7 8
import eu.dnetlib.data.proto.TypeProtos.Type;
......
63 64

  
64 65
	public String getRel(final Type type,
65 66
			final String objIdentifier,
66
			final List<String> fields,
67
			final Collection<String> fields,
67 68
			final String semanticclass,
68 69
			final String semantischeme,
69 70
			final boolean inferred,
modules/dnet-mapreduce-jobs/trunk/src/main/java/eu/dnetlib/data/mapreduce/util/XmlRecordFactory.java
36 36
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
37 37
import eu.dnetlib.data.proto.TypeProtos;
38 38
import eu.dnetlib.data.proto.TypeProtos.Type;
39
import eu.dnetlib.miscutils.functional.UnaryFunction;
40 39
import org.apache.commons.lang.StringUtils;
41 40
import org.dom4j.Document;
42 41
import org.dom4j.DocumentException;
......
70 69
	protected Transformer transformer;
71 70

  
72 71
	protected static Predicate<String> instanceFilter = new Predicate<String>() {
73
		final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "licence");
72
		final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "licence", "collectedfrom", "dateofacceptance");
74 73
		@Override
75 74
		public boolean apply(final String s) {
76 75
			return instanceFieldFilter.contains(s);
......
264 263
					metadata.addAll(decodeType(d, entityConfigTable.getIncludeFilter(targetType, relDescriptor), relDefaults, true));
265 264
					if (d.getType().equals(Type.result)) {
266 265
						for(Instance i : cachedTarget.getResult().getInstanceList()) {
267
							metadata.addAll(listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true));
266
							final List<String> fields = listFields(i, entityConfigTable.getIncludeFilter(targetType, relDecoder.getRelDescriptor()), false, true);
267
							metadata.addAll(fields);
268 268
						}
269 269
					}
270 270
				}
......
295 295

  
296 296
				final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
297 297

  
298
				rels.add(templateFactory.getRel(targetType, relId, metadata, semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
298
				rels.add(templateFactory.getRel(targetType, relId, Sets.newHashSet(metadata), semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
299 299
						info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
300 300
			}
301 301
		}
......
314 314
		final OafEntityDecoder entity = mainEntity.decodeEntity();
315 315
		if (entity.getType().equals(Type.result)) {
316 316
			for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
317

  
318

  
319 317
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, instanceFilter, false, false),
320
						listMap(instance.getUrlList(), new UnaryFunction<String, String>() {
321

  
322
							@Override
323
							public String evaluate(final String identifier) {
324
								return templateFactory.getWebResource(identifier);
325
							}
326
						})));
318
						listMap(instance.getUrlList(), identifier -> templateFactory.getWebResource(identifier))));
327 319
			}
328 320
			for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
329 321
				// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
......
505 497
			}
506 498

  
507 499
			if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
508
				contextes.add(((Result.Context) o).getId());
500
				contextes.add(((Context) o).getId());
509 501
			}
510 502

  
511 503
			if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {

Also available in: Unified diff