Revision 44910
Added by Alessia Bardi almost 8 years ago
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/src/main/java/eu/dnetlib/openaire/exporter/model/ProjectDetail.java | ||
---|---|---|
1 |
package eu.dnetlib.openaire.exporter.model; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.StringReader; |
|
5 |
import java.io.StringWriter; |
|
6 |
import java.util.List; |
|
7 |
|
|
8 |
import com.google.gson.Gson; |
|
9 |
import org.supercsv.cellprocessor.Optional; |
|
10 |
import org.supercsv.cellprocessor.ift.CellProcessor; |
|
11 |
import org.supercsv.cellprocessor.ift.StringCellProcessor; |
|
12 |
import org.supercsv.io.CsvBeanReader; |
|
13 |
import org.supercsv.io.CsvBeanWriter; |
|
14 |
import org.supercsv.io.ICsvBeanReader; |
|
15 |
import org.supercsv.io.ICsvBeanWriter; |
|
16 |
import org.supercsv.prefs.CsvPreference; |
|
17 |
import org.supercsv.util.CsvContext; |
|
18 |
|
|
19 |
/** |
|
20 |
* Created by claudio on 22/09/16. |
|
21 |
*/ |
|
22 |
public class ProjectDetail { |
|
23 |
|
|
24 |
private static final String[] NAMEMAPPING = { "projectId", "acronym", "code", "optional1", "optional2", "jsonextrainfo", "fundingPath" }; |
|
25 |
|
|
26 |
private String projectId; |
|
27 |
private String acronym; |
|
28 |
private String code; |
|
29 |
private String optional1; |
|
30 |
private String optional2; |
|
31 |
private String jsonextrainfo; |
|
32 |
private List<String> fundingPath; |
|
33 |
|
|
34 |
public ProjectDetail() {} |
|
35 |
|
|
36 |
public static ProjectDetail fromJson(final String json) { |
|
37 |
return new Gson().fromJson(json, ProjectDetail.class); |
|
38 |
} |
|
39 |
|
|
40 |
public static ProjectDetail fromCSV(final String csv) throws IOException { |
|
41 |
ICsvBeanReader beanReader = null; |
|
42 |
try { |
|
43 |
beanReader = new CsvBeanReader(new StringReader(csv), CsvPreference.STANDARD_PREFERENCE); |
|
44 |
return beanReader.read(ProjectDetail.class, NAMEMAPPING, getProcessors(new StringCellProcessor() { |
|
45 |
@Override |
|
46 |
public Object execute(final Object value, final CsvContext context) { |
|
47 |
return new Gson().fromJson(value.toString(), List.class); |
|
48 |
} |
|
49 |
})); |
|
50 |
} finally { |
|
51 |
if (beanReader != null) { |
|
52 |
beanReader.close(); |
|
53 |
} |
|
54 |
} |
|
55 |
} |
|
56 |
|
|
57 |
/** |
|
58 |
* Sets up the processors used for the examples. There are 10 CSV columns, so 10 processors are defined. Empty |
|
59 |
* columns are read as null (hence the NotNull() for mandatory columns). |
|
60 |
* |
|
61 |
* @return the cell processors |
|
62 |
*/ |
|
63 |
private static CellProcessor[] getProcessors(final CellProcessor fundingPathProcessor) { |
|
64 |
return new CellProcessor[] { |
|
65 |
new Optional(), // projectId |
|
66 |
new Optional(), // acronym |
|
67 |
new Optional(), // code |
|
68 |
new Optional(), // optional1 |
|
69 |
new Optional(), // optional2 |
|
70 |
new Optional(), // jsonextrainfo |
|
71 |
fundingPathProcessor |
|
72 |
}; |
|
73 |
} |
|
74 |
|
|
75 |
public String asJson() { |
|
76 |
return new Gson().toJson(this) + '\n'; |
|
77 |
} |
|
78 |
|
|
79 |
public String asCSV() throws IOException { |
|
80 |
final StringWriter sb = new StringWriter(); |
|
81 |
try (ICsvBeanWriter beanWriter = new CsvBeanWriter(sb, CsvPreference.STANDARD_PREFERENCE)) { |
|
82 |
beanWriter.write(this, NAMEMAPPING, getProcessors(new StringCellProcessor() { |
|
83 |
@Override |
|
84 |
public Object execute(final Object value, final CsvContext context) { |
|
85 |
return new Gson().toJson(value); |
|
86 |
} |
|
87 |
})); |
|
88 |
beanWriter.flush(); |
|
89 |
} |
|
90 |
|
|
91 |
return sb.toString(); |
|
92 |
} |
|
93 |
|
|
94 |
public String getProjectId() { |
|
95 |
return projectId; |
|
96 |
} |
|
97 |
|
|
98 |
public ProjectDetail setProjectId(final String projectId) { |
|
99 |
this.projectId = projectId; |
|
100 |
return this; |
|
101 |
} |
|
102 |
|
|
103 |
public String getAcronym() { |
|
104 |
return acronym; |
|
105 |
} |
|
106 |
|
|
107 |
public ProjectDetail setAcronym(final String acronym) { |
|
108 |
this.acronym = acronym; |
|
109 |
return this; |
|
110 |
} |
|
111 |
|
|
112 |
public String getCode() { |
|
113 |
return code; |
|
114 |
} |
|
115 |
|
|
116 |
public ProjectDetail setCode(final String code) { |
|
117 |
this.code = code; |
|
118 |
return this; |
|
119 |
} |
|
120 |
|
|
121 |
public String getOptional1() { |
|
122 |
return optional1; |
|
123 |
} |
|
124 |
|
|
125 |
public ProjectDetail setOptional1(final String optional1) { |
|
126 |
this.optional1 = optional1; |
|
127 |
return this; |
|
128 |
} |
|
129 |
|
|
130 |
public String getOptional2() { |
|
131 |
return optional2; |
|
132 |
} |
|
133 |
|
|
134 |
public ProjectDetail setOptional2(final String optional2) { |
|
135 |
this.optional2 = optional2; |
|
136 |
return this; |
|
137 |
} |
|
138 |
|
|
139 |
public String getJsonextrainfo() { |
|
140 |
return jsonextrainfo; |
|
141 |
} |
|
142 |
|
|
143 |
public ProjectDetail setJsonextrainfo(final String jsonextrainfo) { |
|
144 |
this.jsonextrainfo = jsonextrainfo; |
|
145 |
return this; |
|
146 |
} |
|
147 |
|
|
148 |
public List<String> getFundingPath() { |
|
149 |
return fundingPath; |
|
150 |
} |
|
151 |
|
|
152 |
public ProjectDetail setFundingPath(final List<String> fundingPath) { |
|
153 |
this.fundingPath = fundingPath; |
|
154 |
return this; |
|
155 |
} |
|
156 |
|
|
157 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/src/test/java/eu/dnetlib/openaire/exporter/model/ProjectTest.java | ||
---|---|---|
1 |
package eu.dnetlib.openaire.exporter.model; |
|
2 |
|
|
3 |
import org.junit.Before; |
|
4 |
import org.junit.Test; |
|
5 |
|
|
6 |
import static org.junit.Assert.assertEquals; |
|
7 |
|
|
8 |
public class ProjectTest { |
|
9 |
|
|
10 |
Project pMZOS; |
|
11 |
Project pFP7; |
|
12 |
|
|
13 |
@Before |
|
14 |
public void setUp() throws Exception { |
|
15 |
pMZOS = new Project() |
|
16 |
.setFunder("MZOS") |
|
17 |
.setJurisdiction("HR") |
|
18 |
.setFundingpathid("irb_hr______::MZOS") |
|
19 |
.setAcronym("") |
|
20 |
.setTitle("Project Title") |
|
21 |
.setCode("115-1152437-2500") |
|
22 |
.setStartdate("2007-01-01") |
|
23 |
.setEnddate("2009-01-01"); |
|
24 |
|
|
25 |
pFP7 = new Project() |
|
26 |
.setFunder("EC") |
|
27 |
.setJurisdiction("EU") |
|
28 |
.setFundingpathid("ec__________::EC::FP7::SP1::NMP") |
|
29 |
.setAcronym("REFFIBRE") |
|
30 |
.setTitle("Project Title") |
|
31 |
.setCode("604187") |
|
32 |
.setStartdate("2013-11-01") |
|
33 |
.setEnddate("20015-01-01"); |
|
34 |
} |
|
35 |
|
|
36 |
@Test |
|
37 |
public void testIdNamespaceMZOS(){ |
|
38 |
String ns = pMZOS.getIdnamespace(); |
|
39 |
assertEquals("info:eu-repo/grantAgreement/MZOS//115-1152437-2500/HR", ns); |
|
40 |
} |
|
41 |
|
|
42 |
@Test |
|
43 |
public void testIdNamespaceFP7(){ |
|
44 |
String ns = pFP7.getIdnamespace(); |
|
45 |
assertEquals("info:eu-repo/grantAgreement/EC/FP7/604187/EU", ns); |
|
46 |
} |
|
47 |
|
|
48 |
|
|
49 |
|
|
50 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/src/test/java/eu/dnetlib/data/mapreduce/util/OafDecoderTest.java | ||
---|---|---|
1 |
package eu.dnetlib.data.mapreduce.util; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertFalse; |
|
4 |
import static org.junit.Assert.assertNotNull; |
|
5 |
|
|
6 |
import java.util.List; |
|
7 |
|
|
8 |
import org.junit.Test; |
|
9 |
|
|
10 |
import eu.dnetlib.data.proto.KindProtos.Kind; |
|
11 |
import eu.dnetlib.miscutils.functional.xml.IndentXmlString; |
|
12 |
|
|
13 |
public class OafDecoderTest { |
|
14 |
|
|
15 |
@Test |
|
16 |
public void testAsXml() { |
|
17 |
|
|
18 |
final OafDecoder decoder = OafTest.embed(OafTest.getResult("50|id_1"), Kind.entity); |
|
19 |
|
|
20 |
assertNotNull(decoder); |
|
21 |
|
|
22 |
assertNotNull(decoder.asXml()); |
|
23 |
|
|
24 |
System.out.println(IndentXmlString.apply(decoder.asXml())); |
|
25 |
|
|
26 |
} |
|
27 |
|
|
28 |
@Test |
|
29 |
public void testGetFieldValues() { |
|
30 |
final OafDecoder decoder = OafTest.embed(OafTest.getResult("50|id_1"), Kind.entity); |
|
31 |
|
|
32 |
final String path = "result/metadata/title/value"; |
|
33 |
final List<String> titles = decoder.decodeEntity().getFieldValues(path); |
|
34 |
|
|
35 |
assertNotNull(titles); |
|
36 |
assertFalse(titles.isEmpty()); |
|
37 |
} |
|
38 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/src/test/java/eu/dnetlib/pace/AbstractProtoPaceTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.io.StringWriter; |
|
5 |
import java.util.ArrayList; |
|
6 |
import java.util.List; |
|
7 |
|
|
8 |
import com.google.common.collect.Lists; |
|
9 |
import org.apache.commons.io.IOUtils; |
|
10 |
import org.apache.commons.lang.RandomStringUtils; |
|
11 |
import org.apache.commons.lang.StringUtils; |
|
12 |
|
|
13 |
import com.google.gson.Gson; |
|
14 |
|
|
15 |
import eu.dnetlib.data.mapreduce.util.OafTest; |
|
16 |
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier; |
|
17 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
18 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty.Builder; |
|
19 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
20 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
21 |
import eu.dnetlib.data.proto.OrganizationProtos.Organization; |
|
22 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
23 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
24 |
import eu.dnetlib.pace.config.Config; |
|
25 |
import eu.dnetlib.pace.config.DedupConfig; |
|
26 |
import eu.dnetlib.pace.config.Type; |
|
27 |
import eu.dnetlib.pace.model.Field; |
|
28 |
import eu.dnetlib.pace.model.FieldValueImpl; |
|
29 |
import eu.dnetlib.pace.model.MapDocument; |
|
30 |
import eu.dnetlib.pace.model.ProtoDocumentBuilder; |
|
31 |
import eu.dnetlib.pace.model.gt.GTAuthor; |
|
32 |
import eu.dnetlib.pace.model.gt.GTAuthorMapper; |
|
33 |
|
|
34 |
public abstract class AbstractProtoPaceTest extends OafTest { |
|
35 |
|
|
36 |
protected DedupConfig getResultFullConf() { |
|
37 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.full.pace.conf")); |
|
38 |
} |
|
39 |
|
|
40 |
protected DedupConfig getResultSimpleConf() { |
|
41 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.simple.pace.conf")); |
|
42 |
} |
|
43 |
|
|
44 |
protected DedupConfig getResultConf() { |
|
45 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.pace.conf")); |
|
46 |
} |
|
47 |
|
|
48 |
protected DedupConfig getOrganizationSimpleConf() { |
|
49 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/organization.pace.conf")); |
|
50 |
} |
|
51 |
|
|
52 |
protected DedupConfig getResultAuthorsConf() { |
|
53 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.authors.pace.conf")); |
|
54 |
} |
|
55 |
|
|
56 |
protected DedupConfig getPersonConf() { |
|
57 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/person.pace.conf")); |
|
58 |
} |
|
59 |
|
|
60 |
protected DedupConfig getResultProdConf() { |
|
61 |
return DedupConfig.load(readFromClasspath("/eu/dnetlib/pace/result.prod.pace.conf")); |
|
62 |
} |
|
63 |
|
|
64 |
protected MapDocument person(final Config conf, final String id, final Oaf oaf) { |
|
65 |
return ProtoDocumentBuilder.newInstance(id, oaf.getEntity(), conf.model()); |
|
66 |
} |
|
67 |
|
|
68 |
protected Oaf getPersonGT(final String path) { |
|
69 |
return new GTAuthorMapper().map(getGTAuthor(path)); |
|
70 |
} |
|
71 |
|
|
72 |
protected GTAuthor getGTAuthor(final String path) { |
|
73 |
|
|
74 |
final Gson gson = new Gson(); |
|
75 |
|
|
76 |
final String json = readFromClasspath(path); |
|
77 |
|
|
78 |
final GTAuthor gta = gson.fromJson(json, GTAuthor.class); |
|
79 |
|
|
80 |
return gta; |
|
81 |
} |
|
82 |
|
|
83 |
private String readFromClasspath(final String filename) { |
|
84 |
final StringWriter sw = new StringWriter(); |
|
85 |
try { |
|
86 |
IOUtils.copy(getClass().getResourceAsStream(filename), sw); |
|
87 |
return sw.toString(); |
|
88 |
} catch (final IOException e) { |
|
89 |
throw new RuntimeException("cannot load resource from classpath: " + filename); |
|
90 |
} |
|
91 |
} |
|
92 |
|
|
93 |
protected MapDocument result(final Config config, final String id, final String title) { |
|
94 |
return result(config, id, title, null, new ArrayList<String>(), null); |
|
95 |
} |
|
96 |
|
|
97 |
protected MapDocument result(final Config config, final String id, final String title, final String date) { |
|
98 |
return result(config, id, title, date, new ArrayList<String>(), null); |
|
99 |
} |
|
100 |
|
|
101 |
protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid) { |
|
102 |
return result(config, id, title, date, pid, null); |
|
103 |
} |
|
104 |
|
|
105 |
protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid) { |
|
106 |
return result(config, id, title, date, pid, null); |
|
107 |
} |
|
108 |
|
|
109 |
protected MapDocument result(final Config config, final String id, final String title, final String date, final String pid, final List<String> authors) { |
|
110 |
return result(config, id, title, date, Lists.newArrayList(pid), authors); |
|
111 |
} |
|
112 |
|
|
113 |
protected MapDocument result(final Config config, final String id, final String title, final String date, final List<String> pid, final List<String> authors) { |
|
114 |
final Result.Metadata.Builder metadata = Result.Metadata.newBuilder(); |
|
115 |
if (!StringUtils.isBlank(title)) { |
|
116 |
metadata.addTitle(getStruct(title, getQualifier("main title", "dnet:titles"))); |
|
117 |
metadata.addTitle(getStruct(RandomStringUtils.randomAlphabetic(10), getQualifier("alternative title", "dnet:titles"))); |
|
118 |
} |
|
119 |
if (!StringUtils.isBlank(date)) { |
|
120 |
metadata.setDateofacceptance(sf(date)); |
|
121 |
} |
|
122 |
|
|
123 |
final OafEntity.Builder entity = oafEntity(id, eu.dnetlib.data.proto.TypeProtos.Type.result); |
|
124 |
final Result.Builder result = Result.newBuilder().setMetadata(metadata); |
|
125 |
|
|
126 |
if (authors != null) { |
|
127 |
for (final String author : authors) { |
|
128 |
result.addAuthor(person(author)); |
|
129 |
} |
|
130 |
} |
|
131 |
|
|
132 |
entity.setResult(result); |
|
133 |
|
|
134 |
if (pid != null) { |
|
135 |
for(String p : pid) { |
|
136 |
if (!StringUtils.isBlank(p)) { |
|
137 |
entity.addPid(sp(p, "doi")); |
|
138 |
//entity.addPid(sp(RandomStringUtils.randomAlphabetic(10), "oai")); |
|
139 |
} |
|
140 |
} |
|
141 |
} |
|
142 |
|
|
143 |
final OafEntity build = entity.build(); |
|
144 |
return ProtoDocumentBuilder.newInstance(id, build, config.model()); |
|
145 |
} |
|
146 |
|
|
147 |
private Person.Builder person(final String author) { |
|
148 |
final Person.Builder person = Person.newBuilder(); |
|
149 |
|
|
150 |
final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(author, false); |
|
151 |
final Person.Metadata.Builder metadata = Person.Metadata.newBuilder(); |
|
152 |
if (p.isAccurate()) { |
|
153 |
metadata.setFirstname(sf(p.getNormalisedFirstName())); |
|
154 |
metadata.addSecondnames(sf(p.getNormalisedSurname())); |
|
155 |
metadata.setFullname(sf(p.getNormalisedFullname())); |
|
156 |
} else { |
|
157 |
metadata.setFullname(sf(p.getOriginal())); |
|
158 |
} |
|
159 |
|
|
160 |
return person.setMetadata(metadata); |
|
161 |
} |
|
162 |
|
|
163 |
private OafEntity.Builder oafEntity(final String id, final eu.dnetlib.data.proto.TypeProtos.Type type) { |
|
164 |
final OafEntity.Builder entity = OafEntity.newBuilder().setId(id).setType(type); |
|
165 |
return entity; |
|
166 |
} |
|
167 |
|
|
168 |
protected MapDocument organization(final Config config, final String id, final String legalName) { |
|
169 |
return organization(config, id, legalName, null); |
|
170 |
} |
|
171 |
|
|
172 |
protected MapDocument organization(final Config config, final String id, final String legalName, final String legalShortName) { |
|
173 |
final Organization.Metadata.Builder metadata = Organization.Metadata.newBuilder(); |
|
174 |
if (legalName != null) { |
|
175 |
metadata.setLegalname(sf(legalName)); |
|
176 |
} |
|
177 |
if (legalShortName != null) { |
|
178 |
metadata.setLegalshortname(sf(legalShortName)); |
|
179 |
} |
|
180 |
|
|
181 |
final OafEntity.Builder entity = oafEntity(id, eu.dnetlib.data.proto.TypeProtos.Type.result); |
|
182 |
entity.setOrganization(Organization.newBuilder().setMetadata(metadata)); |
|
183 |
|
|
184 |
return ProtoDocumentBuilder.newInstance(id, entity.build(), config.model()); |
|
185 |
} |
|
186 |
|
|
187 |
private StructuredProperty sp(final String pid, final String type) { |
|
188 |
final Builder pidSp = |
|
189 |
StructuredProperty.newBuilder().setValue(pid) |
|
190 |
.setQualifier(Qualifier.newBuilder().setClassid(type).setClassname(type).setSchemeid("dnet:pid_types").setSchemename("dnet:pid_types")); |
|
191 |
return pidSp.build(); |
|
192 |
} |
|
193 |
|
|
194 |
protected Field title(final String s) { |
|
195 |
return new FieldValueImpl(Type.String, "title", s); |
|
196 |
} |
|
197 |
|
|
198 |
protected static StructuredProperty.Builder getStruct(final String value, final Qualifier.Builder qualifier) { |
|
199 |
return StructuredProperty.newBuilder().setValue(value).setQualifier(qualifier); |
|
200 |
} |
|
201 |
|
|
202 |
/* |
|
203 |
* protected static StringField.Builder sf(final String s) { return StringField.newBuilder().setValue(s); } |
|
204 |
* |
|
205 |
* protected static Qualifier.Builder getQualifier(final String classname, final String schemename) { return |
|
206 |
* Qualifier.newBuilder().setClassid(classname).setClassname(classname).setSchemeid(schemename).setSchemename(schemename); } |
|
207 |
*/ |
|
208 |
|
|
209 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/pom.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
3 |
<parent> |
|
4 |
<groupId>eu.dnetlib</groupId> |
|
5 |
<artifactId>dnet-parent</artifactId> |
|
6 |
<version>1.0.0</version> |
|
7 |
<relativePath /> |
|
8 |
</parent> |
|
9 |
<modelVersion>4.0.0</modelVersion> |
|
10 |
<groupId>eu.dnetlib</groupId> |
|
11 |
<artifactId>dnet-openaireplus-mapping-utils</artifactId> |
|
12 |
<packaging>jar</packaging> |
|
13 |
<version>6.0.0</version> |
|
14 |
<scm> |
|
15 |
<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0</developerConnection> |
|
16 |
</scm> |
|
17 |
<dependencies> |
|
18 |
<dependency> |
|
19 |
<groupId>com.google.guava</groupId> |
|
20 |
<artifactId>guava</artifactId> |
|
21 |
<version>${google.guava.version}</version> |
|
22 |
</dependency> |
|
23 |
<dependency> |
|
24 |
<groupId>junit</groupId> |
|
25 |
<artifactId>junit</artifactId> |
|
26 |
<version>${junit.version}</version> |
|
27 |
<scope>test</scope> |
|
28 |
</dependency> |
|
29 |
<dependency> |
|
30 |
<groupId>commons-codec</groupId> |
|
31 |
<artifactId>commons-codec</artifactId> |
|
32 |
<version>${commons.codec.version}</version> |
|
33 |
</dependency> |
|
34 |
<dependency> |
|
35 |
<groupId>dom4j</groupId> |
|
36 |
<artifactId>dom4j</artifactId> |
|
37 |
<version>${dom4j.version}</version> |
|
38 |
<exclusions> |
|
39 |
<exclusion> |
|
40 |
<artifactId>xml-apis</artifactId> |
|
41 |
<groupId>xml-apis</groupId> |
|
42 |
</exclusion> |
|
43 |
</exclusions> |
|
44 |
</dependency> |
|
45 |
<dependency> |
|
46 |
<groupId>net.sf.supercsv</groupId> |
|
47 |
<artifactId>super-csv</artifactId> |
|
48 |
<version>2.4.0</version> |
|
49 |
</dependency> |
|
50 |
<dependency> |
|
51 |
<groupId>eu.dnetlib</groupId> |
|
52 |
<artifactId>dnet-openaire-data-protos</artifactId> |
|
53 |
<version>[3.7.4]</version> |
|
54 |
</dependency> |
|
55 |
<dependency> |
|
56 |
<groupId>eu.dnetlib</groupId> |
|
57 |
<artifactId>dnet-pace-core</artifactId> |
|
58 |
<version>[2.0.0,3.0.0)</version> |
|
59 |
</dependency> |
|
60 |
<dependency> |
|
61 |
<groupId>eu.dnetlib</groupId> |
|
62 |
<artifactId>cnr-misc-utils</artifactId> |
|
63 |
<version>[1.0.0,2.0.0)</version> |
|
64 |
</dependency> |
|
65 |
<dependency> |
|
66 |
<groupId>eu.dnetlib</groupId> |
|
67 |
<artifactId>dnet-hadoop-commons</artifactId> |
|
68 |
<version>[2.0.0,3.0.0)</version> |
|
69 |
</dependency> |
|
70 |
<dependency> |
|
71 |
<groupId>eu.dnetlib</groupId> |
|
72 |
<artifactId>dnet-index-solr-common</artifactId> |
|
73 |
<version>[1.0.0,2.0.0)</version> |
|
74 |
</dependency> |
|
75 |
<dependency> |
|
76 |
<groupId>com.googlecode.protobuf-java-format</groupId> |
|
77 |
<artifactId>protobuf-java-format</artifactId> |
|
78 |
<version>1.2</version> |
|
79 |
</dependency> |
|
80 |
<dependency> |
|
81 |
<groupId>eu.dnetlib</groupId> |
|
82 |
<artifactId>dnet-openaireplus-profiles</artifactId> |
|
83 |
<version>[1.0.0,2.0.0)</version> |
|
84 |
<scope>test</scope> |
|
85 |
</dependency> |
|
86 |
</dependencies> |
|
87 |
</project> |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/src/test/java/eu/dnetlib/pace/model/gt/AuthorTest.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.model.gt; |
|
2 |
|
|
3 |
import static org.junit.Assert.assertTrue; |
|
4 |
|
|
5 |
import java.util.Set; |
|
6 |
|
|
7 |
import org.junit.Test; |
|
8 |
|
|
9 |
import com.google.common.collect.Sets; |
|
10 |
|
|
11 |
import eu.dnetlib.pace.model.gt.Author; |
|
12 |
import eu.dnetlib.pace.model.gt.Authors; |
|
13 |
|
|
14 |
public class AuthorTest { |
|
15 |
|
|
16 |
@Test |
|
17 |
public void test() { |
|
18 |
final Set<Author> s1 = getAuthors(3); |
|
19 |
final Set<Author> s2 = getAuthors(3); |
|
20 |
|
|
21 |
final Set<Author> i = Sets.intersection(s1, s2); |
|
22 |
|
|
23 |
System.out.println(i); |
|
24 |
|
|
25 |
assertTrue(i.size() == 3); |
|
26 |
|
|
27 |
} |
|
28 |
|
|
29 |
@Test |
|
30 |
public void test1() { |
|
31 |
final Authors a1 = new Authors(a("1", "Wang, M.")); |
|
32 |
final Authors a2 = new Authors(a("1", "Wang, M.")); |
|
33 |
|
|
34 |
final Set<Author> i = Sets.intersection(a1, a2); |
|
35 |
|
|
36 |
assertTrue(i.size() == 1); |
|
37 |
|
|
38 |
} |
|
39 |
|
|
40 |
private Set<Author> getAuthors(final int n) { |
|
41 |
final Set<Author> s = Sets.newHashSet(); |
|
42 |
|
|
43 |
for (int i = 0; i < n; i++) { |
|
44 |
s.add(a(i + "", "name" + i)); |
|
45 |
} |
|
46 |
return s; |
|
47 |
} |
|
48 |
|
|
49 |
private Author a(final String id, final String fullname) { |
|
50 |
final Author a = new Author(); |
|
51 |
a.setId(id); |
|
52 |
a.setFullname(fullname); |
|
53 |
return a; |
|
54 |
} |
|
55 |
|
|
56 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/src/main/java/eu/dnetlib/pace/model/ProtoDocumentBuilder.java | ||
---|---|---|
1 |
package eu.dnetlib.pace.model; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import com.google.common.collect.Maps; |
|
7 |
import com.google.protobuf.GeneratedMessage; |
|
8 |
|
|
9 |
import eu.dnetlib.data.transform.AbstractProtoMapper; |
|
10 |
|
|
11 |
public class ProtoDocumentBuilder extends AbstractProtoMapper { |
|
12 |
|
|
13 |
public static MapDocument newInstance(final String id, final GeneratedMessage proto, final List<FieldDef> fields) { |
|
14 |
final Map<String, Field> fieldMap = new ProtoDocumentBuilder().generateFieldMap(proto, fields); |
|
15 |
return new MapDocument(id, fieldMap); |
|
16 |
} |
|
17 |
|
|
18 |
private Map<String, Field> generateFieldMap(final GeneratedMessage proto, final List<FieldDef> fields) { |
|
19 |
final Map<String, Field> fieldMap = Maps.newHashMap(); |
|
20 |
|
|
21 |
for (final FieldDef fd : fields) { |
|
22 |
|
|
23 |
final FieldList fl = new FieldListImpl(fd.getName(), fd.getType()); |
|
24 |
|
|
25 |
for (final Object o : processPath(proto, fd.getPathList(), fd.getType())) { |
|
26 |
|
|
27 |
fl.add(new FieldValueImpl(fd.getType(), fd.getName(), o)); |
|
28 |
} |
|
29 |
|
|
30 |
fieldMap.put(fd.getName(), fl); |
|
31 |
} |
|
32 |
|
|
33 |
return fieldMap; |
|
34 |
} |
|
35 |
|
|
36 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/src/main/resources/eu/dnetlib/actionmanager/xslt/rels2actions.xslt | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" |
|
3 |
xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dr="http://www.driver-repository.eu/namespace/dr" |
|
4 |
xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:oaa="http://namespace.openaire.eu/oaa" |
|
5 |
xmlns:oaf="http://namespace.openaire.eu/oaf" xmlns:dnet="eu.dnetlib.data.transform.xml.OafToHbaseXsltFunctions" |
|
6 |
xmlns:exslt="http://exslt.org/common" |
|
7 |
xmlns:action="http://namespace.openaire.eu/action" version="1.0" |
|
8 |
extension-element-prefixes="exslt" |
|
9 |
exclude-result-prefixes="xsl dc dr dri oaa oaf dnet exslt"> |
|
10 |
|
|
11 |
<xsl:output omit-xml-declaration="yes" indent="yes" /> |
|
12 |
|
|
13 |
<xsl:param name="trust" select="string('0.9')" /> |
|
14 |
<xsl:param name="provenance" select="string('UNKNOWN')" /> |
|
15 |
|
|
16 |
<xsl:template match="/*"> |
|
17 |
|
|
18 |
<xsl:choose> |
|
19 |
<xsl:when test="count(//RELATION) = 0"> |
|
20 |
<ACTIONS /> |
|
21 |
</xsl:when> |
|
22 |
<xsl:otherwise> |
|
23 |
<ACTIONS> |
|
24 |
<xsl:for-each select="//RELATION"> |
|
25 |
<xsl:choose> |
|
26 |
<xsl:when test="./@type = 'resultProject'"> |
|
27 |
<xsl:variable name="resultId" select="./@source" /> |
|
28 |
<xsl:variable name="projectId"> |
|
29 |
<xsl:choose> |
|
30 |
<xsl:when test="starts-with(@target, '40|')"> |
|
31 |
<xsl:value-of select="./@target" /> |
|
32 |
</xsl:when> |
|
33 |
<xsl:otherwise> |
|
34 |
<xsl:value-of select="dnet:oafSplitId('project', normalize-space(@target))"/> |
|
35 |
</xsl:otherwise> |
|
36 |
</xsl:choose> |
|
37 |
</xsl:variable> |
|
38 |
|
|
39 |
<ACTION targetKey="{$resultId}" targetColumnFamily="resultProject_outcome_isProducedBy" targetColumn="{$projectId}"> |
|
40 |
<xsl:value-of select="dnet:rel($resultId, $projectId, 'resultProject', 'outcome', 'isProducedBy', $provenance, $trust)" /> |
|
41 |
</ACTION> |
|
42 |
<ACTION targetKey="{$projectId}" targetColumnFamily="resultProject_outcome_produces" targetColumn="{$resultId}"> |
|
43 |
<xsl:value-of select="dnet:rel($projectId, $resultId, 'resultProject', 'outcome', 'produces', $provenance, $trust)" /> |
|
44 |
</ACTION> |
|
45 |
</xsl:when> |
|
46 |
|
|
47 |
<!-- TODO: check this block, it caused problems: |
|
48 |
Cannot convert argument/return type in call to method 'eu.dnetlib.actionmanager.actions.infopackage.DMFInfoPackageToHbaseXsltFunctions.oafPersonResultFromInfoPackage(node-set, node-set, result-tree, reference, reference)' |
|
49 |
FATAL ERROR: 'Could not compile stylesheet' |
|
50 |
|
|
51 |
<xsl:when test="./@type = 'personResult'"> |
|
52 |
<xsl:variable name="personId" select="./@source" /> |
|
53 |
<xsl:variable name="resultId" select="./@target" /> |
|
54 |
|
|
55 |
<xsl:variable name="pos"> |
|
56 |
<xsl:choose> |
|
57 |
<xsl:when test="@position"><xsl:value-of select="@position"/></xsl:when> |
|
58 |
<xsl:otherwise>1000</xsl:otherwise> |
|
59 |
</xsl:choose> |
|
60 |
</xsl:variable> |
|
61 |
|
|
62 |
<ACTION targetKey="{$personId}" targetColumnFamily="personResult" targetColumn="{$resultId}"> |
|
63 |
<xsl:value-of select="dnet:oafPersonResultFromInfoPackage($personId, $resultId, $pos, $provenance, $trust)" /> |
|
64 |
</ACTION> |
|
65 |
<ACTION targetKey="{$resultId}" targetColumnFamily="personResult" targetColumn="{$personId}"> |
|
66 |
<xsl:value-of select="dnet:oafPersonResultFromInfoPackage($resultId, $personId, $pos, $provenance, $trust)" /> |
|
67 |
</ACTION> |
|
68 |
</xsl:when> |
|
69 |
--> |
|
70 |
</xsl:choose> |
|
71 |
</xsl:for-each> |
|
72 |
</ACTIONS> |
|
73 |
</xsl:otherwise> |
|
74 |
</xsl:choose> |
|
75 |
</xsl:template> |
|
76 |
|
|
77 |
</xsl:stylesheet> |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/src/main/java/eu/dnetlib/data/transform/xml/DbmfToHbaseXsltFunctions.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
|
|
5 |
import com.google.common.collect.Lists; |
|
6 |
import com.google.protobuf.Descriptors.FieldDescriptor; |
|
7 |
import com.google.protobuf.Message.Builder; |
|
8 |
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; |
|
9 |
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization; |
|
10 |
import eu.dnetlib.data.proto.DatasourceOrganizationProtos.DatasourceOrganization.Provision; |
|
11 |
import eu.dnetlib.data.proto.DatasourceProtos.Datasource; |
|
12 |
import eu.dnetlib.data.proto.FieldTypeProtos.DataInfo; |
|
13 |
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; |
|
14 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
15 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
16 |
import eu.dnetlib.data.proto.OafProtos.OafRel; |
|
17 |
import eu.dnetlib.data.proto.OrganizationProtos.Organization; |
|
18 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
19 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult; |
|
20 |
import eu.dnetlib.data.proto.PersonResultProtos.PersonResult.Authorship; |
|
21 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization; |
|
22 |
import eu.dnetlib.data.proto.ProjectOrganizationProtos.ProjectOrganization.Participation; |
|
23 |
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson; |
|
24 |
import eu.dnetlib.data.proto.ProjectPersonProtos.ProjectPerson.ContactPerson; |
|
25 |
import eu.dnetlib.data.proto.ProjectProtos.Project; |
|
26 |
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata; |
|
27 |
import eu.dnetlib.data.proto.RelTypeProtos.RelType; |
|
28 |
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType; |
|
29 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject; |
|
30 |
import eu.dnetlib.data.proto.ResultProjectProtos.ResultProject.Outcome; |
|
31 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
32 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
|
33 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
34 |
import org.apache.commons.lang.StringUtils; |
|
35 |
import org.w3c.dom.Node; |
|
36 |
import org.w3c.dom.NodeList; |
|
37 |
|
|
38 |
public class DbmfToHbaseXsltFunctions extends CommonDNetXsltFunctions { |
|
39 |
|
|
40 |
public static String oafEntity(final String type, |
|
41 |
final String id, |
|
42 |
final String collectedFromId, |
|
43 |
final String collectedFromName, |
|
44 |
final NodeList identities, |
|
45 |
final String dateOfCollection, |
|
46 |
final String dateOfTransformation, |
|
47 |
final NodeList nodeList) { |
|
48 |
|
|
49 |
final String entityId = OafRowKeyDecoder.decode(id).getKey(); |
|
50 |
List<String> ids = Lists.newArrayList(); |
|
51 |
for(int i = 0; i < identities.getLength(); i++){ |
|
52 |
Node n = identities.item(i); |
|
53 |
String s = n.getTextContent(); |
|
54 |
ids.add(s); |
|
55 |
} |
|
56 |
switch (Type.valueOf(type)) { |
|
57 |
case datasource: |
|
58 |
return serializeOafEntity(nodeList, Type.datasource, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection, |
|
59 |
dateOfTransformation, Datasource.newBuilder()); |
|
60 |
case organization: |
|
61 |
return serializeOafEntity(nodeList, Type.organization, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection, |
|
62 |
dateOfTransformation, Organization.newBuilder()); |
|
63 |
case person: |
|
64 |
return serializeOafEntity(nodeList, Type.person, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection, |
|
65 |
dateOfTransformation, Person.newBuilder()); |
|
66 |
case project: |
|
67 |
return serializeOafEntity(nodeList, Type.project, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection, |
|
68 |
dateOfTransformation, Project.newBuilder()); |
|
69 |
case result: |
|
70 |
return serializeOafEntity(nodeList, Type.result, entityId, getKV(collectedFromId, collectedFromName), ids, dateOfCollection, |
|
71 |
dateOfTransformation ,Result.newBuilder()); |
|
72 |
default: |
|
73 |
throw new IllegalArgumentException("Invalid entity type: " + type); |
|
74 |
} |
|
75 |
} |
|
76 |
|
|
77 |
public static String oafRel(final String relationType, |
|
78 |
final String source, |
|
79 |
final String target, |
|
80 |
final NodeList nodeList, |
|
81 |
final String relClass, |
|
82 |
final String relScheme) { |
|
83 |
return oafRel(relationType, source, target, nodeList, relClass, relScheme, null, null); |
|
84 |
} |
|
85 |
|
|
86 |
public static String oafRel(final String relationType, |
|
87 |
final String source, |
|
88 |
final String target, |
|
89 |
final NodeList nodeList, |
|
90 |
final String relClass, |
|
91 |
final String relScheme, |
|
92 |
final String collectedFromId, |
|
93 |
final String collectedFromName) { |
|
94 |
|
|
95 |
final String eSource = OafRowKeyDecoder.decode(source).getKey(); |
|
96 |
final String eTarget = OafRowKeyDecoder.decode(target).getKey(); |
|
97 |
final RelType relType = RelType.valueOf(relationType); |
|
98 |
|
|
99 |
switch (relType) { |
|
100 |
case datasourceOrganization: |
|
101 |
Provision.Builder provision = Provision.newBuilder().setRelMetadata( |
|
102 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Provision.RelName.valueOf(relClass).toString(), relScheme))); |
|
103 |
DatasourceOrganization.Builder dorg = DatasourceOrganization.newBuilder().setProvision(provision); |
|
104 |
|
|
105 |
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.provision, relClass, collectedFromId, collectedFromName, false, dorg, provision); |
|
106 |
case personResult: |
|
107 |
Authorship.Builder auth = Authorship.newBuilder().setRelMetadata( |
|
108 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Authorship.RelName.valueOf(relClass).toString(), relScheme))); |
|
109 |
PersonResult.Builder personResult = PersonResult.newBuilder().setAuthorship(auth); |
|
110 |
|
|
111 |
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.authorship, relClass, collectedFromId, collectedFromName, false, personResult, auth); |
|
112 |
case projectPerson: |
|
113 |
ContactPerson.Builder contact = ContactPerson.newBuilder().setRelMetadata( |
|
114 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(ContactPerson.RelName.valueOf(relClass).toString(), relScheme))); |
|
115 |
ProjectPerson.Builder projectPerson = ProjectPerson.newBuilder().setContactPerson(contact); |
|
116 |
|
|
117 |
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.contactPerson, relClass, collectedFromId, collectedFromName, false, projectPerson, contact); |
|
118 |
case projectOrganization: |
|
119 |
Participation.Builder participant = Participation.newBuilder().setRelMetadata( |
|
120 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Participation.RelName.valueOf(relClass).toString(), relScheme))); |
|
121 |
ProjectOrganization.Builder projectOrganization = ProjectOrganization.newBuilder().setParticipation(participant); |
|
122 |
|
|
123 |
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.participation, relClass, collectedFromId, collectedFromName, false, projectOrganization, participant); |
|
124 |
case resultProject: |
|
125 |
Outcome.Builder outcome = Outcome.newBuilder().setRelMetadata( |
|
126 |
RelMetadata.newBuilder().setSemantics(getSimpleQualifier(Outcome.RelName.valueOf(relClass).toString(), relScheme))); |
|
127 |
ResultProject.Builder resultProject = ResultProject.newBuilder().setOutcome(outcome); |
|
128 |
|
|
129 |
return serializeOafRel(nodeList, eSource, eTarget, relType, SubRelType.outcome, relClass, collectedFromId, collectedFromName, false, resultProject, outcome); |
|
130 |
default: |
|
131 |
throw new IllegalArgumentException("unhandled relType: " + relationType); |
|
132 |
} |
|
133 |
} |
|
134 |
|
|
135 |
// //////////////////////////////////////////////////////// |
|
136 |
|
|
137 |
protected static String serializeOafEntity(final NodeList nodelist, |
|
138 |
final Type type, |
|
139 |
final String id, |
|
140 |
final KeyValue collectedFrom, |
|
141 |
final List<String> identities, |
|
142 |
final String dateOfCollection, |
|
143 |
final String dateOfTransformation, |
|
144 |
final Builder entity) { |
|
145 |
try { |
|
146 |
final FieldDescriptor md = entity.getDescriptorForType().findFieldByName("metadata"); |
|
147 |
|
|
148 |
final OafEntity.Builder parent = getEntity(type, id, collectedFrom, identities, dateOfCollection, dateOfTransformation, null); |
|
149 |
final Builder metadata = entity.newBuilderForField(md); |
|
150 |
final DataInfo.Builder dataInfo = DataInfo.newBuilder(); |
|
151 |
|
|
152 |
if (type.equals(Type.result)) { |
|
153 |
final Instance.Builder instance = Instance.newBuilder(); |
|
154 |
parseNodelist(nodelist, instance); |
|
155 |
FieldDescriptor instanceDescriptor = Result.getDescriptor().findFieldByName(Instance.getDescriptor().getName()); |
|
156 |
if (instanceDescriptor != null) { |
|
157 |
entity.setField(instanceDescriptor, instance); |
|
158 |
} |
|
159 |
} |
|
160 |
parseNodelist(nodelist, parent, entity, metadata, dataInfo); |
|
161 |
|
|
162 |
final FieldDescriptor entityDescriptor = OafEntity.getDescriptor().findFieldByName(type.toString()); |
|
163 |
|
|
164 |
final Oaf build = getOaf(parent.setField(entityDescriptor, entity.setField(md, metadata.build()).build()), dataInfo); |
|
165 |
|
|
166 |
return base64(build.toByteArray()); |
|
167 |
} catch (Exception e) { |
|
168 |
e.printStackTrace(System.err); |
|
169 |
throw new RuntimeException(e); |
|
170 |
} |
|
171 |
} |
|
172 |
|
|
173 |
protected static String serializeOafRel(final NodeList nodeList, |
|
174 |
final String sourceId, |
|
175 |
final String targetId, |
|
176 |
final RelType relType, |
|
177 |
final SubRelType subRelType, |
|
178 |
final String relClass, |
|
179 |
final String collectedFromId, |
|
180 |
final String collectedFromName, |
|
181 |
final boolean isChild, |
|
182 |
final Builder rel, |
|
183 |
final Builder subRel) { |
|
184 |
try { |
|
185 |
|
|
186 |
final DataInfo.Builder dataInfo = DataInfo.newBuilder(); |
|
187 |
|
|
188 |
parseNodelist(nodeList, rel, subRel, dataInfo); |
|
189 |
|
|
190 |
OafRel.Builder builder = getRel(sourceId, targetId, relType, subRelType, relClass, collectedFromId, collectedFromName, isChild); |
|
191 |
|
|
192 |
FieldDescriptor subRelDescriptor = rel.getDescriptorForType().findFieldByName(subRelType.toString()); |
|
193 |
rel.setField(subRelDescriptor, subRel.build()); |
|
194 |
|
|
195 |
FieldDescriptor relDescriptor = OafRel.getDescriptor().findFieldByName(relType.toString()); |
|
196 |
builder.setField(relDescriptor, rel.build()); |
|
197 |
|
|
198 |
Oaf build = getOaf(builder, dataInfo); |
|
199 |
return base64(build.toByteArray()); |
|
200 |
} catch (Exception e) { |
|
201 |
e.printStackTrace(System.err); |
|
202 |
throw new RuntimeException(e); |
|
203 |
} |
|
204 |
} |
|
205 |
|
|
206 |
private static void parseNodelist(final NodeList nodeList, final Builder... builders) { |
|
207 |
|
|
208 |
for (int i = 0; i < nodeList.getLength(); i++) { |
|
209 |
|
|
210 |
final Node fieldNode = nodeList.item(i); |
|
211 |
final Node attr = fieldNode.getAttributes().getNamedItem("name"); |
|
212 |
|
|
213 |
final String fieldName = attr.getNodeValue(); |
|
214 |
final NodeList children = fieldNode.getChildNodes(); |
|
215 |
|
|
216 |
for (int j = 0; j < children.getLength(); j++) { |
|
217 |
|
|
218 |
final Node child = children.item(j); |
|
219 |
final String childName = child.getLocalName(); |
|
220 |
if ("ITEM".equals(childName) || StringUtils.isBlank(childName)) { |
|
221 |
for (Builder builder : builders) { |
|
222 |
FieldDescriptor desc = builder.getDescriptorForType().findFieldByName(fieldName); |
|
223 |
if (desc != null) { |
|
224 |
String text = getText((StringUtils.isBlank(childName)) ? fieldNode : child); |
|
225 |
if (!StringUtils.isBlank(text)) { |
|
226 |
addField(builder, desc, text); |
|
227 |
} |
|
228 |
} |
|
229 |
} |
|
230 |
} |
|
231 |
} |
|
232 |
} |
|
233 |
} |
|
234 |
|
|
235 |
private static String getText(final Node node) { |
|
236 |
StringBuffer result = new StringBuffer(); |
|
237 |
if (!node.hasChildNodes()) { return ""; } |
|
238 |
|
|
239 |
NodeList list = node.getChildNodes(); |
|
240 |
for (int i = 0; i < list.getLength(); i++) { |
|
241 |
Node subnode = list.item(i); |
|
242 |
if (subnode.getNodeType() == Node.TEXT_NODE) { |
|
243 |
result.append(subnode.getNodeValue()); |
|
244 |
} else if (subnode.getNodeType() == Node.CDATA_SECTION_NODE) { |
|
245 |
result.append(subnode.getNodeValue()); |
|
246 |
} else if (subnode.getNodeType() == Node.ENTITY_REFERENCE_NODE) { |
|
247 |
result.append(getText(subnode)); |
|
248 |
} |
|
249 |
} |
|
250 |
return result.toString().trim(); |
|
251 |
} |
|
252 |
|
|
253 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/src/test/resources/eu/dnetlib/openaire/exporter/model/projectDetails.csv | ||
---|---|---|
1 |
nih_________::3R01GM073898-02S1,,3R01GM073898-02S1,23188 $,,"{""orgname"":""UNIVERSITY OF CALIFORNIA SAN DIEGO"", ""activity"":""R01"", ""administeringic"":""GM"", ""serialnumber"":""73898"", ""coreprojectnum"":""R01GM073898""}","[""\u003cfundingtree\u003e\n \u003cfunder\u003e\n \u003cid\u003enih_________::NIH\u003c/id\u003e\n \u003cshortname\u003eNIH\u003c/shortname\u003e\n \u003cname\u003eNational Institutes of Health\u003c/name\u003e\n \u003cjurisdiction\u003eUS\u003c/jurisdiction\u003e\n \u003c/funder\u003e\n \u003cfunding_level_0\u003e\n \u003cid\u003enih_________::NIH::NATIONAL_INSTITUTE_OF_GENERAL_MEDICAL_SCIENCES\u003c/id\u003e\n \u003cname\u003eNATIONAL INSTITUTE OF GENERAL MEDICAL SCIENCES\u003c/name\u003e\n \u003cdescription\u003eNATIONAL INSTITUTE OF GENERAL MEDICAL SCIENCES\u003c/description\u003e\n \u003cparent/\u003e\n \u003cclass\u003enih:fundingStream\u003c/class\u003e\n \u003c/funding_level_0\u003e\n \u003c/fundingtree\u003e""]" |
|
2 |
corda_______::100202,RECOMP,100202,JTI-CP-ARTEMIS,http://cordis.europa.eu/fp7/home_en.html,{},"[""\u003cfundingtree\u003e\u003cfunder\u003e\u003cid\u003eec__________::EC\u003c/id\u003e\u003cshortname\u003eEC\u003c/shortname\u003e\u003cname\u003eEuropean Commission\u003c/name\u003e\u003cjurisdiction\u003eEU\u003c/jurisdiction\u003e\u003c/funder\u003e\u003cfunding_level_2\u003e\u003cid\u003eec__________::EC::FP7::SP1::SP1-JTI\u003c/id\u003e\u003cdescription\u003eJoint Technology Initiatives (Annex IV-SP1)\u003c/description\u003e\u003cname\u003eSP1-JTI\u003c/name\u003e\u003cclass\u003eec:program\u003c/class\u003e\u003cparent\u003e\u003cfunding_level_1\u003e\u003cid\u003eec__________::EC::FP7::SP1\u003c/id\u003e\u003cdescription\u003eSP1-Cooperation\u003c/description\u003e\u003cname\u003eSP1\u003c/name\u003e\u003cclass\u003eec:specificprogram\u003c/class\u003e\u003cparent\u003e\u003cfunding_level_0\u003e\u003cid\u003eec__________::EC::FP7\u003c/id\u003e\u003cdescription\u003eSEVENTH FRAMEWORK PROGRAMME\u003c/description\u003e\u003cname\u003eFP7\u003c/name\u003e\u003cparent/\u003e\u003cclass\u003eec:frameworkprogram\u003c/class\u003e\u003c/funding_level_0\u003e\u003c/parent\u003e\u003c/funding_level_1\u003e\u003c/parent\u003e\u003c/funding_level_2\u003e\u003c/fundingtree\u003e""]" |
|
3 |
corda__h2020::633080,MACC-III,633080,SPACE,SPACE,{},"[""\u003cfundingtree\u003e\u003cfunder\u003e\u003cid\u003eec__________::EC\u003c/id\u003e\u003cshortname\u003eEC\u003c/shortname\u003e\u003cname\u003eEuropean Commission\u003c/name\u003e\u003cjurisdiction\u003eEU\u003c/jurisdiction\u003e\u003c/funder\u003e\u003cfunding_level_1\u003e\u003cid\u003eec__________::EC::H2020::CSA\u003c/id\u003e\u003cdescription\u003eCoordination and support action\u003c/description\u003e\u003cname\u003eCSA\u003c/name\u003e\u003cclass\u003eec:h2020toas\u003c/class\u003e\u003cparent\u003e\u003cfunding_level_0\u003e\u003cid\u003eec__________::EC::H2020\u003c/id\u003e\u003cname\u003eH2020\u003c/name\u003e\u003cdescription\u003eHorizon 2020 Framework Programme\u003c/description\u003e\u003cparent/\u003e\u003cclass\u003eec:h2020fundings\u003c/class\u003e\u003c/funding_level_0\u003e\u003c/parent\u003e\u003c/funding_level_1\u003e\u003c/fundingtree\u003e""]" |
|
4 |
nsf_________::0000096,,0000096,,,{},"[""\u003cfundingtree\u003e\u003cfunder\u003e\u003cid\u003ensf_________::NSF\u003c/id\u003e\u003cshortname\u003eNSF\u003c/shortname\u003e\u003cname\u003eNational Science Foundation\u003c/name\u003e\u003cjurisdiction\u003eUS\u003c/jurisdiction\u003e\u003c/funder\u003e\u003cfunding_level_1\u003e\u003cid\u003ensf_________::NSF::OD::OD/OIA\u003c/id\u003e\u003cdescription\u003eOffice of Integrative Activities\u003c/description\u003e\u003cname\u003eOffice of Integrative Activities\u003c/name\u003e\u003cparent\u003e\u003cfunding_level_0\u003e\u003cid\u003ensf_________::NSF::OD\u003c/id\u003e\u003cdescription\u003eOffice of the Director\u003c/description\u003e\u003cname\u003eOffice of the Director\u003c/name\u003e\u003cparent/\u003e\u003cclass\u003ensf:fundingStream\u003c/class\u003e\u003c/funding_level_0\u003e\u003c/parent\u003e\u003c/funding_level_1\u003e\u003c/fundingtree\u003e""]" |
|
5 |
fct_________::100107,PTDC/SAU-ESA/100107/2008,100107,,,{},"[""\u003cfundingtree\u003e\u003cfunder\u003e\u003cid\u003efct_________::FCT\u003c/id\u003e\u003cshortname\u003eFCT\u003c/shortname\u003e\u003cname\u003eFundação para a Ciência e a Tecnologia, I.P.\u003c/name\u003e\u003cjurisdiction\u003ePT\u003c/jurisdiction\u003e\u003c/funder\u003e\u003cfunding_level_0\u003e\u003cid\u003efct_________::FCT::5876-PPCDTI\u003c/id\u003e\u003cdescription\u003e5876-PPCDTI\u003c/description\u003e\u003cname\u003e5876-PPCDTI\u003c/name\u003e\u003cparent/\u003e\u003cclass\u003efct:program\u003c/class\u003e\u003c/funding_level_0\u003e\u003c/fundingtree\u003e""]" |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/src/main/java/eu/dnetlib/data/transform/xml/OafToHbaseXsltFunctions.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import com.google.common.collect.Iterables; |
|
7 |
import com.google.common.collect.Lists; |
|
8 |
import com.google.protobuf.Descriptors.Descriptor; |
|
9 |
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; |
|
10 |
import eu.dnetlib.data.proto.FieldTypeProtos.KeyValue; |
|
11 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
12 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
13 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
14 |
import eu.dnetlib.data.proto.PersonProtos.Person; |
|
15 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
16 |
import eu.dnetlib.data.proto.ResultProtos.Result.Context; |
|
17 |
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference; |
|
18 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
|
19 |
import eu.dnetlib.data.proto.ResultProtos.Result.Journal; |
|
20 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
21 |
import org.apache.commons.lang.StringUtils; |
|
22 |
import org.w3c.dom.NodeList; |
|
23 |
|
|
24 |
public class OafToHbaseXsltFunctions extends CommonDNetXsltFunctions { |
|
25 |
|
|
26 |
public static String oafResult( |
|
27 |
final String resultId, |
|
28 |
final String provenance, |
|
29 |
final String trust, |
|
30 |
final NodeList about, |
|
31 |
final String hostedbyId, |
|
32 |
final String hostedbyName, |
|
33 |
final String collectedFromId, |
|
34 |
final String collectedFromName, |
|
35 |
final String originalId, |
|
36 |
final String dateOfCollection, |
|
37 |
final String dateOfTransformation, |
|
38 |
final NodeList nodelist) { |
|
39 |
try { |
|
40 |
final String entityId = OafRowKeyDecoder.decode(resultId).getKey(); |
|
41 |
final ValueMap values = ValueMap.parseNodeList(nodelist); |
|
42 |
final Descriptor mDesc = Result.Metadata.getDescriptor(); |
|
43 |
|
|
44 |
final Result.Metadata.Builder metadata = buildMetadata(values, mDesc); |
|
45 |
final Result.Builder result = buildResult(metadata, values, mDesc, hostedbyId, hostedbyName); |
|
46 |
final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, getKV(collectedFromId, collectedFromName), originalId); |
|
47 |
entity.setDateofcollection(dateOfCollection) |
|
48 |
.setDateoftransformation(dateOfTransformation).setOaiprovenance(getOAIProvenance(about)); |
|
49 |
|
|
50 |
final Oaf oaf = getOaf(entity, getDataInfo(about, provenance, trust, false, false)); |
|
51 |
return base64(oaf.toByteArray()); |
|
52 |
} catch (final Throwable e) { |
|
53 |
handleException(e, resultId, hostedbyId, hostedbyName, provenance, trust, collectedFromId, collectedFromName, originalId, dateOfCollection); |
|
54 |
} |
|
55 |
return null; |
|
56 |
} |
|
57 |
|
|
58 |
public static String oafResultUpdate(final String resultId, |
|
59 |
final String provenance, |
|
60 |
final String trust, |
|
61 |
final NodeList nodelist, |
|
62 |
final String hostedbyId, |
|
63 |
final String hostedbyName) { |
|
64 |
try { |
|
65 |
final String entityId = OafRowKeyDecoder.decode(resultId).getKey(); |
|
66 |
final ValueMap values = ValueMap.parseNodeList(nodelist); |
|
67 |
final Descriptor mDesc = Result.Metadata.getDescriptor(); |
|
68 |
|
|
69 |
final Result.Metadata.Builder metadata = buildMetadata(values, mDesc); |
|
70 |
final Result.Builder result = buildResult(metadata, values, mDesc, hostedbyId, hostedbyName); |
|
71 |
|
|
72 |
final OafEntity.Builder entity = buildOafEntity(result, entityId, nodelist, null, null); |
|
73 |
final Oaf oaf = getOaf(entity, null); |
|
74 |
return base64(oaf.toByteArray()); |
|
75 |
} catch (final Throwable e) { |
|
76 |
handleException(e, resultId, hostedbyId, hostedbyName, provenance, trust, null, null, null, null); |
|
77 |
} |
|
78 |
return null; |
|
79 |
} |
|
80 |
|
|
81 |
private static OafEntity.Builder buildOafEntity(final Result.Builder result, |
|
82 |
final String entityId, |
|
83 |
final NodeList nodelist, |
|
84 |
KeyValue collectedFrom, |
|
85 |
String originalId) { |
|
86 |
|
|
87 |
final List<StructuredProperty> pids = Lists.newArrayList(); |
|
88 |
pids.addAll(parsePids(nodelist)); |
|
89 |
|
|
90 |
final OafEntity.Builder entity = |
|
91 |
getEntity(Type.result, entityId, collectedFrom, StringUtils.isBlank(originalId) ? null : Lists.newArrayList(originalId), null, null, pids) |
|
92 |
.setResult(result); |
|
93 |
return entity; |
|
94 |
} |
|
95 |
|
|
96 |
private static Result.Metadata.Builder buildMetadata(final ValueMap values, final Descriptor mDesc) { |
|
97 |
final Result.Metadata.Builder metadata = Result.Metadata.newBuilder(); |
|
98 |
|
|
99 |
addStructuredProps(metadata, mDesc.findFieldByName("subject"), values.get("subject"), "keyword", "dnet:subject_classification_typologies"); |
|
100 |
addStructuredProps(metadata, mDesc.findFieldByName("title"), values.get("title"), "main title", "dnet:dataCite_title"); |
|
101 |
for (final String fieldname : Lists.newArrayList("description", "source", "contributor")) { |
|
102 |
if (values.get(fieldname) != null) { |
|
103 |
for (final String s : values.get(fieldname).listValues()) { |
|
104 |
addField(metadata, mDesc.findFieldByName(fieldname), s); |
|
105 |
} |
|
106 |
} |
|
107 |
} |
|
108 |
addField(metadata, mDesc.findFieldByName("language"), setQualifier(getDefaultQualifier("dnet:languages"), values.get("language").listValues())); |
|
109 |
addField(metadata, mDesc.findFieldByName("dateofacceptance"), values.get("dateaccepted").listValues()); |
|
110 |
addField(metadata, mDesc.findFieldByName("publisher"), values.get("publisher").listValues()); |
|
111 |
addField(metadata, mDesc.findFieldByName("embargoenddate"), values.get("embargoenddate").listValues()); |
|
112 |
addField(metadata, mDesc.findFieldByName("storagedate"), values.get("storagedate").listValues()); |
|
113 |
|
|
114 |
addField(metadata, mDesc.findFieldByName("resulttype"), getSimpleQualifier("publication", "dnet:result_typologies")); |
|
115 |
|
|
116 |
addField(metadata, mDesc.findFieldByName("fulltext"), values.get("fulltext").listValues()); |
|
117 |
addField(metadata, mDesc.findFieldByName("format"), values.get("format").listValues()); |
|
118 |
if (values.get("concept") != null) { |
|
119 |
for (final Element e : values.get("concept")) { |
|
120 |
final String id = e.getAttributes().get("id"); |
|
121 |
if (StringUtils.isBlank(id)) throw new IllegalArgumentException("Context id cannot be blank"); |
|
122 |
metadata.addContext(Context.newBuilder().setId(id)); |
|
123 |
} |
|
124 |
} |
|
125 |
if (values.get("journal") != null) { |
|
126 |
for (final Element e : values.get("journal")) { |
|
127 |
|
|
128 |
final Journal.Builder journal = Journal.newBuilder(); |
|
129 |
if (e.getText() != null) { |
|
130 |
journal.setName(e.getText()); |
|
131 |
} |
|
132 |
|
|
133 |
final Map<String, String> attr = e.getAttributes(); |
|
134 |
if (attr != null) { |
|
135 |
if (attr.get("issn") != null) { |
|
136 |
journal.setIssnPrinted(attr.get("issn")); |
|
137 |
} |
|
138 |
if (attr.get("eissn") != null) { |
|
139 |
journal.setIssnOnline(attr.get("eissn")); |
|
140 |
} |
|
141 |
if (attr.get("lissn") != null) { |
|
142 |
journal.setIssnLinking(attr.get("lissn")); |
|
143 |
} |
|
144 |
} |
|
145 |
metadata.setJournal(journal.build()); |
|
146 |
} |
|
147 |
} |
|
148 |
return metadata; |
|
149 |
} |
|
150 |
|
|
151 |
private static Result.Builder buildResult(final Result.Metadata.Builder metadata, |
|
152 |
final ValueMap values, |
|
153 |
final Descriptor mDesc, |
|
154 |
final String hostedbyId, |
|
155 |
final String hostedbyName) { |
|
156 |
final Result.Builder result = Result.newBuilder(); |
|
157 |
if (values.get("creator") != null) { |
|
158 |
for (final String fullname : Iterables.limit(values.get("creator").listValues(), 10)) { |
|
159 |
|
|
160 |
final Person.Metadata.Builder authorMetadata = Person.Metadata.newBuilder(); |
|
161 |
|
|
162 |
authorMetadata.setFullname(sf(fullname)); |
|
163 |
|
|
164 |
final eu.dnetlib.pace.model.Person p = new eu.dnetlib.pace.model.Person(fullname, false); |
|
165 |
if (p.isAccurate()) { |
|
166 |
authorMetadata.setFirstname(sf(p.getNormalisedFirstName())); |
|
167 |
authorMetadata.clearSecondnames().addSecondnames(sf(p.getNormalisedSurname())); |
|
168 |
authorMetadata.setFullname(sf(p.getNormalisedFullname())); |
|
169 |
} |
|
170 |
|
|
171 |
result.addAuthor(Person.newBuilder().setMetadata(authorMetadata)); |
|
172 |
} |
|
173 |
} |
|
174 |
|
|
175 |
final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(hostedbyId, hostedbyName)); |
|
176 |
|
|
177 |
addField(instance, Instance.getDescriptor().findFieldByName("licence"), |
|
178 |
setQualifier(getDefaultQualifier("dnet:access_modes"), values.get("accessrights").listValues())); |
|
179 |
addField(instance, Instance.getDescriptor().findFieldByName("instancetype"), |
|
180 |
setQualifier(getDefaultQualifier("dnet:publication_resource"), values.get("cobjcategory").listValues())); |
|
181 |
|
|
182 |
if (values.get("identifier") != null) { |
|
183 |
addField(instance, Instance.getDescriptor().findFieldByName("url"), |
|
184 |
Lists.newArrayList(Iterables.filter(values.get("identifier").listValues(), urlFilter))); |
|
185 |
} |
|
186 |
|
|
187 |
result.addInstance(instance); |
|
188 |
|
|
189 |
final List<Element> extrefs = values.get("reference"); |
|
190 |
if (!extrefs.isEmpty()) { |
|
191 |
final Descriptor extDesc = ExternalReference.getDescriptor(); |
|
192 |
for (final Element element : extrefs) { |
|
193 |
final ExternalReference.Builder extref = ExternalReference.newBuilder(); |
|
194 |
addField(extref, extDesc.findFieldByName("url"), element.getText()); |
|
195 |
addField(extref, extDesc.findFieldByName("sitename"), element.getAttributes().get("source")); |
|
196 |
addField(extref, extDesc.findFieldByName("refidentifier"), element.getAttributes().get("identifier")); |
|
197 |
addField(extref, extDesc.findFieldByName("label"), element.getAttributes().get("title")); |
|
198 |
addField(extref, extDesc.findFieldByName("query"), element.getAttributes().get("query")); |
|
199 |
addField(extref, extDesc.findFieldByName("qualifier"), |
|
200 |
setQualifier(getDefaultQualifier("dnet:externalReference_typologies"), Lists.newArrayList(element.getAttributes().get("type"))) |
|
201 |
.build()); |
|
202 |
|
|
203 |
result.addExternalReference(extref); |
|
204 |
} |
|
205 |
} |
|
206 |
|
|
207 |
return result.setMetadata(metadata); |
|
208 |
} |
|
209 |
|
|
210 |
private static void handleException(Throwable e, final String resultId, final String hostedbyId, final String hostedbyName, |
|
211 |
final String provenance, final String trust, final String collectedFromId, final String collectedFromName, |
|
212 |
final String originalId, final String dateOfCollection) { |
|
213 |
System.err.println("resultId: " + resultId); |
|
214 |
if (StringUtils.isNotBlank(hostedbyId)) System.err.println("hostedbyId: " + hostedbyId); |
|
215 |
if (StringUtils.isNotBlank(hostedbyName)) System.err.println("hostedbyName: " + hostedbyName); |
|
216 |
if (StringUtils.isNotBlank(provenance)) System.err.println("provenance: " + provenance); |
|
217 |
if (StringUtils.isNotBlank(trust)) System.err.println("trust: " + trust); |
|
218 |
if (StringUtils.isNotBlank(collectedFromId)) System.err.println("collectedFromId: " + collectedFromId); |
|
219 |
if (StringUtils.isNotBlank(collectedFromName)) System.err.println("collectedFromName: " + collectedFromName); |
|
220 |
if (StringUtils.isNotBlank(originalId)) System.err.println("originalId: " + originalId); |
|
221 |
if (StringUtils.isNotBlank(dateOfCollection)) System.err.println("dateOfCollection: " + dateOfCollection); |
|
222 |
e.printStackTrace(); |
|
223 |
throw new RuntimeException(e); |
|
224 |
} |
|
225 |
} |
modules/dnet-openaireplus-mapping-utils/tags/dnet-openaireplus-mapping-utils-6.0.0/src/main/java/eu/dnetlib/data/transform/xml/OdfToHbaseXsltFunctions.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform.xml; |
|
2 |
|
|
3 |
import java.util.List; |
|
4 |
import java.util.Map; |
|
5 |
|
|
6 |
import com.google.common.collect.Lists; |
|
7 |
import com.google.common.collect.Maps; |
|
8 |
import eu.dnetlib.data.mapreduce.util.OafRowKeyDecoder; |
|
9 |
import eu.dnetlib.data.proto.FieldTypeProtos.StructuredProperty; |
|
10 |
import eu.dnetlib.data.proto.OafProtos.Oaf; |
|
11 |
import eu.dnetlib.data.proto.OafProtos.OafEntity; |
|
12 |
import eu.dnetlib.data.proto.ResultProtos.Result; |
|
13 |
import eu.dnetlib.data.proto.ResultProtos.Result.Instance; |
|
14 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
15 |
import org.w3c.dom.NamedNodeMap; |
|
16 |
import org.w3c.dom.Node; |
|
17 |
import org.w3c.dom.NodeList; |
|
18 |
|
|
19 |
public class OdfToHbaseXsltFunctions extends CommonDNetXsltFunctions { |
|
20 |
|
|
21 |
private static Map<String, String> mappingAccess = Maps.newHashMap(); |
|
22 |
|
|
23 |
static { |
|
24 |
|
|
25 |
mappingAccess.put("info:eu-repo/semantics/openAccess", "OPEN"); |
|
26 |
mappingAccess.put("info:eu-repo/semantics/closedAccess", "CLOSED"); |
|
27 |
mappingAccess.put("info:eu-repo/semantics/restrictedAccess", "RESTRICTED"); |
|
28 |
mappingAccess.put("info:eu-repo/semantics/embargoedAccess", "EMBARGO"); |
|
29 |
|
|
30 |
// Transformator now maps the access rights into proper values, not sure if it does for all datasets. |
|
31 |
mappingAccess.put("OPEN", "OPEN"); |
|
32 |
mappingAccess.put("CLOSED", "CLOSED"); |
|
33 |
mappingAccess.put("RESTRICTED", "RESTRICTED"); |
|
34 |
mappingAccess.put("EMBARGO", "EMBARGO"); |
|
35 |
|
|
36 |
} |
|
37 |
|
|
38 |
public static String odfResult( |
|
39 |
final String resultId, |
|
40 |
final NodeList about, |
|
41 |
final NodeList metadata, |
|
42 |
final NodeList titles, |
|
43 |
final NodeList subjects, |
|
44 |
final NodeList publisher, |
|
45 |
final NodeList descriptions, |
|
46 |
final NodeList dates, |
|
47 |
final NodeList dateaccepted, |
|
48 |
final NodeList resourceTypes, |
|
49 |
final NodeList formats, |
|
50 |
final NodeList sizes, |
|
51 |
final NodeList languages, |
|
52 |
final NodeList cobjcategory, |
|
53 |
final NodeList contributors, |
|
54 |
final NodeList rights, |
|
55 |
final NodeList version, |
|
56 |
final NodeList pidList, |
|
57 |
final String provenance, |
|
58 |
final String trust, |
|
59 |
final String hostedbyId, |
|
60 |
final String hostedbyName, |
|
61 |
final String collectedfromId, |
|
62 |
final String collectedfromName, |
|
63 |
final NodeList originalIds, |
|
64 |
final String instanceUri, |
|
65 |
final String dateOfCollection, |
|
66 |
final String dateOfTransformation) { |
|
67 |
|
|
68 |
try { |
|
69 |
final String entityId = OafRowKeyDecoder.decode(resultId).getKey(); |
|
70 |
|
|
71 |
final Result.Builder result = Result.newBuilder(); |
|
72 |
Result.Metadata.Builder metadataProto = Result.Metadata.newBuilder(); |
|
73 |
|
|
74 |
// subject |
|
75 |
for (int i = 0; i < subjects.getLength(); i++) { |
|
76 |
Node currentNode = subjects.item(i); |
|
77 |
NodeList childNodes = currentNode.getChildNodes(); |
|
78 |
if (childNodes.getLength() > 0) { |
|
79 |
String subjectValue = childNodes.item(0).getNodeValue(); |
|
80 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("subject"), |
|
81 |
getStructuredProperty(subjectValue, "keyword", "keyword", "dnet:subject_classification_typologies", "dnet:subject_classification_typologies")); |
|
82 |
} |
|
83 |
} |
|
84 |
|
|
85 |
// title |
|
86 |
for (int i = 0; i < titles.getLength(); i++) { |
|
87 |
Node currentNode = titles.item(i); |
|
88 |
NodeList childNodes = currentNode.getChildNodes(); |
|
89 |
if (childNodes.getLength() > 0) { |
|
90 |
String titleValue = childNodes.item(0).getNodeValue(); |
|
91 |
String classname = "main title"; |
|
92 |
String classid = "main title"; |
|
93 |
if (currentNode.hasAttributes()) { |
|
94 |
NamedNodeMap attributes = currentNode.getAttributes(); |
|
95 |
Node titleType = attributes.getNamedItem("titleType"); |
|
96 |
|
|
97 |
if (titleType != null && titleType.getNodeValue().equals("AlternativeTitle")) { |
|
98 |
classname = "alternative title"; |
|
99 |
classid = "alternative title"; |
|
100 |
} |
|
101 |
if (titleType != null && titleType.getNodeValue().equals("Subtitle")) { |
|
102 |
classname = "subtitle"; |
|
103 |
classid = "subtitle"; |
|
104 |
} |
|
105 |
if (titleType != null && titleType.getNodeValue().equals("TranslatedTitle")) { |
|
106 |
classname = "translated title"; |
|
107 |
classid = "translated title"; |
|
108 |
} |
|
109 |
} |
|
110 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("title"), |
|
111 |
getStructuredProperty(titleValue, classname, classid, "dnet:dataCite_title", "dnet:dataCite_title")); |
|
112 |
} |
|
113 |
} |
|
114 |
|
|
115 |
// description |
|
116 |
for (int i = 0; i < descriptions.getLength(); i++) { |
|
117 |
Node currentNode = descriptions.item(i); |
|
118 |
if (currentNode != null && currentNode.hasChildNodes()) { |
|
119 |
String descriptionValue = currentNode.getChildNodes().item(0).getNodeValue(); |
|
120 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("description"), descriptionValue); |
|
121 |
} |
|
122 |
} |
|
123 |
|
|
124 |
// contributors |
|
125 |
for (int i = 0; i < contributors.getLength(); i++) { |
|
126 |
Node currentNode = contributors.item(i); |
|
127 |
if (currentNode != null && currentNode.hasChildNodes()) { |
|
128 |
String contributorValue = currentNode.getChildNodes().item(0).getNodeValue(); |
|
129 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("contributor"), contributorValue); |
|
130 |
} |
|
131 |
} |
|
132 |
|
|
133 |
// publisher |
|
134 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("publisher"), getFirstItem(publisher)); |
|
135 |
|
|
136 |
// dates |
|
137 |
for (int i = 0; i < dates.getLength(); i++) { |
|
138 |
Node currentNode = dates.item(i); |
|
139 |
if (currentNode != null && currentNode.hasAttributes() && currentNode.hasChildNodes()) { |
|
140 |
String dateAttribute = currentNode.getAttributes().getNamedItem("dateType").getNodeValue(); |
|
141 |
String dateValue = currentNode.getChildNodes().item(0).getNodeValue(); |
|
142 |
String protoAttribute = "relevantdate"; |
|
143 |
if ("Accepted".equals(dateAttribute)) { |
|
144 |
protoAttribute = "dateofacceptance"; |
|
145 |
} else if ("Issued".equals(dateAttribute)) { |
|
146 |
protoAttribute = "storagedate"; |
|
147 |
} else if ("Updated".equals(dateAttribute)) { |
|
148 |
protoAttribute = "lastmetadataupdate"; |
|
149 |
} else if ("Available".equals(dateAttribute)) { |
|
150 |
protoAttribute = "embargoenddate"; |
|
151 |
} |
|
152 |
if (protoAttribute.equals("relevantdate") == false) { |
|
153 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute), dateValue); |
|
154 |
} else { |
|
155 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName(protoAttribute), |
|
156 |
getStructuredProperty(dateValue, "UNKNOWN", "UNKNOWN", "dnet:dataCite_date", "dnet:dataCite_date")); |
|
157 |
} |
|
158 |
} |
|
159 |
} |
|
160 |
|
|
161 |
// dateofacceptance |
|
162 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("dateofacceptance"), getFirstItem(dateaccepted)); |
|
163 |
|
|
164 |
// size |
|
165 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("size"), getFirstItem(sizes)); |
|
166 |
|
|
167 |
// format |
|
168 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("format"), getFirstItem(formats)); |
|
169 |
|
|
170 |
// version |
|
171 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("version"), getFirstItem(version)); |
|
172 |
|
|
173 |
// language |
|
174 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("language"), |
|
175 |
setQualifier(getDefaultQualifier("dnet:languages"), Lists.newArrayList(getFirstItem(languages)))); |
|
176 |
|
|
177 |
//resource type |
|
178 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resourcetype"), |
|
179 |
setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(getFirstItem(resourceTypes)))); |
|
180 |
|
|
181 |
// resultType |
|
182 |
addField(metadataProto, Result.Metadata.getDescriptor().findFieldByName("resulttype"), getSimpleQualifier("dataset", "dnet:result_typologies")); |
|
183 |
|
|
184 |
String tmpID; |
|
185 |
String TmpName; |
|
186 |
|
|
187 |
if (hostedbyId == null && hostedbyName == null) { |
|
188 |
tmpID = collectedfromId; |
|
189 |
TmpName = collectedfromName; |
|
190 |
} else { |
|
191 |
tmpID = hostedbyId; |
|
192 |
TmpName = hostedbyName; |
|
193 |
} |
|
194 |
|
|
195 |
final Instance.Builder instance = Instance.newBuilder().setHostedby(getKV(tmpID, TmpName)); |
|
196 |
|
|
197 |
String tmpRigths = "UNKNOWN"; |
|
198 |
final String firstRight = getFirstItem(rights); |
|
199 |
if (mappingAccess.containsKey(firstRight)) { |
|
200 |
tmpRigths = mappingAccess.get(firstRight); |
|
201 |
} |
|
202 |
|
|
203 |
addField(instance, Instance.getDescriptor().findFieldByName("licence"), |
|
204 |
setQualifier(getDefaultQualifier("dnet:access_modes"), Lists.newArrayList(tmpRigths))); |
|
205 |
|
|
206 |
addField(instance, Instance.getDescriptor().findFieldByName("instancetype"), |
|
207 |
setQualifier(getDefaultQualifier("dnet:dataCite_resource"), Lists.newArrayList(getFirstItem(cobjcategory)))); |
|
208 |
|
|
209 |
addField(instance, Instance.getDescriptor().findFieldByName("url"), instanceUri); |
|
210 |
|
|
211 |
result.addInstance(instance); |
|
212 |
|
|
213 |
List<StructuredProperty> pids = parsePids(pidList); |
|
214 |
|
|
215 |
// original ids |
|
216 |
final List<String> originalIdList = Lists.newArrayList(); |
|
217 |
for (int i = 0; i < originalIds.getLength(); i++) { |
|
218 |
Node currentNode = originalIds.item(i); |
|
219 |
if (currentNode != null && currentNode.hasChildNodes()) { |
|
220 |
originalIdList.add(currentNode.getChildNodes().item(0).getNodeValue()); |
|
221 |
} |
|
222 |
} |
|
223 |
|
|
224 |
OafEntity.Builder entity = |
|
225 |
getEntity(Type.result, entityId, getKV(collectedfromId, collectedfromName), originalIdList, dateOfCollection, dateOfTransformation, pids).setResult( |
|
226 |
result.setMetadata(metadataProto)); |
|
227 |
|
|
228 |
entity.setOaiprovenance(getOAIProvenance(about)); |
|
229 |
|
|
230 |
Oaf oaf = getOaf(entity, getDataInfo(about, provenance, trust, false, false)); |
|
231 |
return base64(oaf.toByteArray()); |
|
232 |
} catch (Exception e) { |
|
233 |
e.printStackTrace(System.err); |
|
234 |
throw new RuntimeException(e); |
|
235 |
} |
|
236 |
|
|
237 |
} |
|
238 |
|
|
239 |
public static String getFirstItem(final NodeList list) { |
|
240 |
String out = ""; |
|
241 |
if (list != null) { |
|
242 |
|
|
243 |
if (list.getLength() > 0 && list.item(0).getChildNodes() != null && list.item(0).getChildNodes().getLength() > 0) { |
|
244 |
out = list.item(0).getChildNodes().item(0).getNodeValue(); |
|
245 |
} |
|
246 |
} |
|
247 |
return out; |
|
248 |
} |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-openaireplus-mapping-utils-6.0.0