Project

General

Profile

« Previous | Next » 

Revision 51215

cleanup

View differences:

modules/dnet-mapreduce-jobs/branches/scoreResult/deploy.info
1
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet40/modules/dnet-mapreduce-jobs/trunk/", "deploy_repository": "dnet4-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet4-snapshots", "name": "dnet-mapreduce-jobs"}
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/resources/META-INF/services/javax.xml.transform.TransformerFactory
1
net.sf.saxon.TransformerFactoryImpl
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/resources/eu/dnetlib/data/mapreduce/hbase/dedup/blacklist/title_blacklist.txt
1
^(Corpus Oral Dialectal \(COD\)\.).*$
2
^(Kiri Karl Morgensternile).*$
3
^(\[Eksliibris Aleksandr).*\]$
4
^(Kiri A\. de Vignolles).*$
5
^(2 kirja Karl Morgensternile).*$
6
^(Pirita kloostri idaosa arheoloogilised).*$
7
^(Kiri tundmatule).*$
8
^(Kiri Jenaer Allgemeine Literaturzeitung toimetusele).*$
9
^(Eksliibris Nikolai Birukovile).*$
10
^(Eksliibris Nikolai Issakovile).*$
11
^(\[Eksliibris Aleksandr).*$
12
^(WHP Cruise Summary Information of section).*$
13
^(Measurement of the top quark\-pair production cross section with ATLAS in pp collisions at).*$
14
^(Measurement of the spin\-dependent structure function).*
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/resources/eu/dnetlib/data/mapreduce/util/entity.st
1
<oaf:$name$>
2
	$metadata:{$it$}$
3
  <rels>
4
    $rels:{$it$}$
5
  </rels>
6
  <children>
7
	$children:{$it$}$
8
  </children>
9
</oaf:$name$>
10
$inference:{$it$}$
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/resources/eu/dnetlib/data/mapreduce/util/record.st
1
<?xml version="1.0"?>
2
<record>
3
  <result xmlns:dri="http://www.driver-repository.eu/namespace/dri" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
4
    <header>
5
      <dri:objIdentifier>$id$</dri:objIdentifier>
6
      <dri:dateOfCollection>$dateofcollection$</dri:dateOfCollection>
7
      <dri:dateOfTransformation>$dateoftransformation$</dri:dateOfTransformation>
8
      <counters>
9
	  $counters:{$it$}$
10
	  </counters>
11
    </header>
12
    <metadata>
13
      <oaf:entity xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
14
		    xmlns:oaf="http://namespace.openaire.eu/oaf" 
15
		    xsi:schemaLocation="http://namespace.openaire.eu/oaf $schemaLocation$">
16
		$it$
17
      </oaf:entity>
18
    </metadata>
19
  </result>
20
</record>
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/resources/eu/dnetlib/data/mapreduce/util/childresult.st
1
<result>
2
    <dri:objIdentifier>$objIdentifier$</dri:objIdentifier>
3
	$metadata:{$it$}$
4
</result>
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/resources/eu/dnetlib/data/mapreduce/util/rel.st
1
<rel inferred="$inferred$" trust="$trust$" inferenceprovenance="$inferenceprovenance$" provenanceaction="$provenanceaction$">
2
  <to class="$class$" scheme="$scheme$" type="$type$">$objIdentifier$</to>
3
  $metadata:{$it$}$
4
</rel>
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/resources/eu/dnetlib/data/mapreduce/util/child.st
1
<$name$$if(hasId)$ objidentifier="$id$"$else$$endif$>
2
	$metadata:{$it$}$
3
</$name$>
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/resources/eu/dnetlib/data/mapreduce/util/instance.st
1
<instance id="$instanceId$">
2
    $metadata:{$it$}$
3
	$webresources:{$it$}$
4
</instance>
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/resources/eu/dnetlib/data/mapreduce/util/webresource.st
1
<webresource>
2
  <url>$identifier$</url>
3
</webresource>
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/resources/log4j.properties
1
### Root Level ###
2
log4j.rootLogger=WARN, CONSOLE
3

  
4
### Configuration for the CONSOLE appender ###
5
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
6
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
7
log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c - %m%n
8

  
9
org.apache.cxf.Logger=org.apache.cxf.common.logging.Log4jLogger
10

  
11
### Application Level ###
12
log4j.logger.eu.dnetlib=INFO
13
log4j.logger.eu.dnetlib.pace=DEBUG
14
log4j.logger.eu.dnetlib.data.transform.OafEntityMerger=DEBUG
15

  
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/java/eu/dnetlib/data/mapreduce/OptionalConfig.java
1
package eu.dnetlib.data.mapreduce;
2

  
3
import java.util.List;
4

  
5
public class OptionalConfig {
6

  
7
	private com.typesafe.config.Config config;
8

  
9
	public OptionalConfig(com.typesafe.config.Config config) {
10
		this.config = config;
11
	}
12

  
13
	protected Object safe(String path) {
14
		return config.hasPath(path) ? config.getAnyRef(path) : null;
15
	}
16

  
17
	public Integer getInt(String path) {
18
		return (Integer) safe(path);
19
	}
20

  
21
	public String getString(String path) {
22
		return (String) safe(path);
23
	}
24
	
25
	@SuppressWarnings("unchecked")
26
	public <T> List<T> getList(String path) {
27
		return (List<T>) safe(path);
28
	}	
29

  
30
	public Double getDouble(String path) {
31
		Object safe = safe(path);
32
		if (safe instanceof Integer) {
33
			return Double.parseDouble(safe.toString());
34
		}
35
		return (Double) safe;
36
	}
37

  
38
	public Object getObject(String path) {
39
		return safe(path);
40
	}
41

  
42
	public Boolean getBoolean(String path) {
43
		return (Boolean) safe(path);
44
	}
45
	
46
	public Boolean hasPath(String path) {
47
		return getConfig().hasPath(path);
48
	}
49

  
50
	public com.typesafe.config.Config getConfig() {
51
		return config;
52
	}
53

  
54
}
55 0

  
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/java/eu/dnetlib/data/mapreduce/util/UpdateMerger.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import java.util.Collections;
4
import java.util.List;
5
import java.util.Map;
6
import java.util.Map.Entry;
7

  
8
import com.google.common.collect.Maps;
9
import org.apache.hadoop.hbase.util.Bytes;
10
import org.apache.hadoop.mapreduce.Mapper.Context;
11

  
12
import com.google.common.collect.Lists;
13
import com.google.protobuf.InvalidProtocolBufferException;
14

  
15
import eu.dnetlib.data.proto.OafProtos.Oaf;
16
import eu.dnetlib.data.transform.OafEntityMerger;
17

  
18
public class UpdateMerger {
19

  
20
	private static final String UPDATE_MERGER = "UPDATE_MERGE";
21
	private static final String N_MERGES = "N_MERGES";
22

  
23
	@SuppressWarnings("rawtypes")
24
	public static Oaf mergeBodyUpdates(final Context context, final Map<byte[], byte[]> map) throws InvalidProtocolBufferException {
25

  
26
		final Map<String, byte[]> stringMap = Maps.newHashMap();
27
		for(Entry<byte[], byte[]> e : map.entrySet()) {
28
			stringMap.put(Bytes.toString(e.getKey()), e.getValue());
29
		}
30

  
31
		return doMerge(context, stringMap);
32
	}
33

  
34
	public static Oaf mergeBodyUpdates(final Map<String, byte[]> map) throws InvalidProtocolBufferException {
35
		return doMerge(null, map);
36
	}
37

  
38
	private static Oaf doMerge(final Context context, final Map<String, byte[]> map)
39
			throws InvalidProtocolBufferException {
40

  
41
		final byte[] value = map.get(DedupUtils.BODY_S);
42
		if (value == null) return null;
43

  
44
		Oaf.Builder builder = Oaf.newBuilder(Oaf.parseFrom(value));
45
		final List<String> keys = Lists.newArrayList();
46

  
47
		// we fetch all the body updates
48
		for (final String o : map.keySet()) {
49
			if (o.startsWith("update_")) {
50
				keys.add(o);
51
			}
52
		}
53
		if (!keys.isEmpty()) {
54
			// we merge all the sorted updates with the body
55
			Collections.sort(keys);
56
			for (final String k : keys) {
57
				final Oaf update = Oaf.parseFrom(map.get(k));
58
				// System.out.println("\n\nBODY: \n" + body.build().toString());
59
				// System.out.println("UPDATE: \n" + update.toString());
60
				builder.mergeFrom(update);
61
				// System.out.println("UDPATED BODY: \n" + body.build().toString() + "\n\n");
62
			}
63

  
64
			builder = OafEntityMerger.merge(builder);
65

  
66
			if (context != null) {
67
				context.getCounter(UPDATE_MERGER, N_MERGES).increment(keys.size());
68
			}
69
		}
70
		return builder.build();
71
	}
72

  
73

  
74
}
75 0

  
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/java/eu/dnetlib/data/mapreduce/util/TemplateResources.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import org.springframework.core.io.ClassPathResource;
4
import org.springframework.core.io.Resource;
5

  
6
public class TemplateResources {
7
	
8
	private Resource record = new ClassPathResource("/eu/dnetlib/data/mapreduce/util/record.st");
9
		
10
	private Resource instance = new ClassPathResource("/eu/dnetlib/data/mapreduce/util/instance.st");
11
	
12
	private Resource rel = new ClassPathResource("/eu/dnetlib/data/mapreduce/util/rel.st");
13
	
14
	private Resource webresource = new ClassPathResource("/eu/dnetlib/data/mapreduce/util/webresource.st");
15
	
16
	private Resource child = new ClassPathResource("/eu/dnetlib/data/mapreduce/util/child.st");
17
	
18
	private Resource tree = new ClassPathResource("/eu/dnetlib/data/mapreduce/util/tree.st");
19
	
20
	private Resource entity = new ClassPathResource("/eu/dnetlib/data/mapreduce/util/entity.st");
21
	
22
	public Resource getEntity() {
23
		return entity;
24
	}	
25
	
26
	public Resource getRecord() {
27
		return record;
28
	}
29
	
30
	public Resource getInstance() {
31
		return instance;
32
	}
33

  
34
	public Resource getRel() {
35
		return rel;
36
	}
37

  
38
	public Resource getWebresource() {
39
		return webresource;
40
	}
41

  
42
	public Resource getChild() {
43
		return child;
44
	}
45
	
46
	public Resource getTree() {
47
		return tree;
48
	}
49

  
50
}
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/java/eu/dnetlib/data/mapreduce/util/XmlRecordFactory.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import java.io.StringReader;
4
import java.io.StringWriter;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Map.Entry;
8
import java.util.Set;
9
import javax.xml.transform.*;
10
import javax.xml.transform.dom.DOMSource;
11
import javax.xml.transform.stream.StreamResult;
12

  
13
import com.google.common.base.Joiner;
14
import com.google.common.base.Predicate;
15
import com.google.common.base.Splitter;
16
import com.google.common.collect.Lists;
17
import com.google.common.collect.Maps;
18
import com.google.common.collect.Sets;
19
import com.google.protobuf.Descriptors.EnumValueDescriptor;
20
import com.google.protobuf.Descriptors.FieldDescriptor;
21
import com.google.protobuf.GeneratedMessage;
22
import com.mycila.xmltool.XMLDoc;
23
import com.mycila.xmltool.XMLTag;
24
import eu.dnetlib.data.mapreduce.hbase.index.config.*;
25
import eu.dnetlib.data.proto.FieldTypeProtos.*;
26
import eu.dnetlib.data.proto.OafProtos.OafEntity;
27
import eu.dnetlib.data.proto.OafProtos.OafRel;
28
import eu.dnetlib.data.proto.PersonProtos.Person;
29
import eu.dnetlib.data.proto.ProjectProtos.Project;
30
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
31
import eu.dnetlib.data.proto.ResultProtos.Result;
32
import eu.dnetlib.data.proto.ResultProtos.Result.Context;
33
import eu.dnetlib.data.proto.ResultProtos.Result.ExternalReference;
34
import eu.dnetlib.data.proto.ResultProtos.Result.Instance;
35
import eu.dnetlib.data.proto.ResultProtos.Result.Journal;
36
import eu.dnetlib.data.proto.TypeProtos.Type;
37
import eu.dnetlib.miscutils.functional.UnaryFunction;
38
import org.apache.commons.lang.StringUtils;
39
import org.dom4j.Document;
40
import org.dom4j.DocumentException;
41
import org.dom4j.Element;
42
import org.dom4j.Node;
43
import org.dom4j.io.SAXReader;
44

  
45
import static eu.dnetlib.miscutils.collections.MappedCollection.listMap;
46

  
47
public class XmlRecordFactory {
48

  
49
	// private static final Log log = LogFactory.getLog(XmlRecordFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
50

  
51
	private final Map<String, Integer> relCounters = Maps.newHashMap();
52
	protected Set<String> specialDatasourceTypes = Sets.newHashSet("scholarcomminfra", "infospace", "pubsrepository::mock", "entityregistry");
53
	protected TemplateFactory templateFactory = new TemplateFactory();
54
	protected OafDecoder mainEntity = null;
55
	protected String key = null;
56
	protected List<OafDecoder> relations = Lists.newLinkedList();
57
	protected List<OafDecoder> children = Lists.newLinkedList();
58
	protected EntityConfigTable entityConfigTable;
59
	protected ContextMapper contextMapper;
60
	protected RelClasses relClasses;
61
	protected String schemaLocation;
62
	protected boolean entityDefaults;
63
	protected boolean relDefaults;
64
	protected boolean childDefaults;
65
	protected Set<String> contextes = Sets.newHashSet();
66
	protected List<String> extraInfo = Lists.newArrayList();
67
	protected Map<String, Integer> counters = Maps.newHashMap();
68
	protected Transformer transformer;
69

  
70
	public XmlRecordFactory(final EntityConfigTable entityConfigTable, final ContextMapper contextMapper, final RelClasses relClasses,
71
			final String schemaLocation, final boolean entityDefaults, final boolean relDefaults, final boolean childDefeaults)
72
			throws TransformerConfigurationException, TransformerFactoryConfigurationError {
73
		this.entityConfigTable = entityConfigTable;
74
		this.contextMapper = contextMapper;
75
		this.relClasses = relClasses;
76
		this.schemaLocation = schemaLocation;
77
		this.entityDefaults = entityDefaults;
78
		this.relDefaults = relDefaults;
79
		this.childDefaults = childDefeaults;
80

  
81
		transformer = TransformerFactory.newInstance().newTransformer();
82
		transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
83
	}
84

  
85
	public static String removePrefix(final String s) {
86
		if (s.contains("|")) return StringUtils.substringAfter(s, "|");
87
		return s;
88
	}
89

  
90
	public static String escapeXml(final String value) {
91
		return value.replaceAll("&", "&amp;").replaceAll("<", "&lt;").replaceAll(">", "&gt;").replaceAll("\"", "&quot;").replaceAll("'", "&apos;");
92
	}
93

  
94
	public Map<String, Integer> getRelCounters() {
95
		return relCounters;
96
	}
97

  
98
	public RelClasses getRelClasses() {
99
		return relClasses;
100
	}
101

  
102
	public String getId() {
103
		return key;
104
	}
105

  
106
	public boolean isValid() {
107
		return mainEntity != null;
108
	}
109

  
110
	public void setMainEntity(final OafDecoder mainEntity) {
111
		this.mainEntity = mainEntity;
112
		this.key = mainEntity.decodeEntity().getId();
113
	}
114

  
115
	public void addRelation(final Type type, final OafDecoder rel) {
116
		addRelOrChild(type, relations, rel);
117
	}
118

  
119
	public void addChild(final Type type, final OafDecoder child) {
120
		addRelOrChild(type, children, child);
121
	}
122

  
123
	private void addRelOrChild(final Type type, final List<OafDecoder> list, final OafDecoder decoder) {
124

  
125
		final OafRel oafRel = decoder.getOafRel();
126
		final String rd = oafRel.getRelType().toString() + "_" + oafRel.getSubRelType() + "_" + relClasses.getInverse(oafRel.getRelClass());
127
		final LinkDescriptor ld = entityConfigTable.getDescriptor(type, new RelDescriptor(rd));
128

  
129
		if (getRelCounters().get(rd) == null) {
130
			getRelCounters().put(rd, 0);
131
		}
132

  
133
		if (ld == null) {
134
			list.add(decoder);
135
			return;
136
		}
137

  
138
		if (ld.getMax() < 0) {
139
			list.add(decoder);
140
			return;
141
		}
142

  
143
		if (getRelCounters().get(rd) < ld.getMax()) {
144
			getRelCounters().put(rd, getRelCounters().get(rd) + 1);
145
			list.add(decoder);
146
		}
147
	}
148

  
149
	public String build() {
150
		try {
151
			final OafEntityDecoder entity = mainEntity.decodeEntity();
152
			// log.info("building");
153
			// log.info("main: " + mainEntity);
154
			// log.info("rel:  " + relations);
155
			// log.info("chi:  " + children);
156
			// log.info("=============");
157

  
158
			final Type type = entity.getType();
159
			final List<String> metadata = decodeType(entity, null, entityDefaults, false);
160

  
161
			// rels has to be processed before the contexts because they enrich the contextMap with the funding info.
162
			final List<String> rels = listRelations();
163
			metadata.addAll(buildContexts(type));
164
			metadata.add(parseDataInfo(mainEntity));
165

  
166
			final String body = templateFactory.buildBody(type, metadata, rels, listChildren(), extraInfo);
167

  
168
			return templateFactory
169
					.buildRecord(type, key, entity.getDateOfCollection(), entity.getDateOfTransformation(), schemaLocation, body, countersAsXml());
170
		} catch (final Throwable e) {
171
			throw new RuntimeException(String.format("error building record '%s'", this.key), e);
172
		}
173
	}
174

  
175
	private String parseDataInfo(final OafDecoder decoder) {
176
		final DataInfo dataInfo = decoder.getOaf().getDataInfo();
177

  
178
		final StringBuilder sb = new StringBuilder();
179
		sb.append("<datainfo>");
180
		sb.append(asXmlElement("inferred", dataInfo.getInferred() + "", null, null));
181
		sb.append(asXmlElement("deletedbyinference", dataInfo.getDeletedbyinference() + "", null, null));
182
		sb.append(asXmlElement("trust", dataInfo.getTrust() + "", null, null));
183
		sb.append(asXmlElement("inferenceprovenance", dataInfo.getInferenceprovenance() + "", null, null));
184
		sb.append(asXmlElement("provenanceaction", null, dataInfo.getProvenanceaction(), null));
185
		sb.append("</datainfo>");
186

  
187
		return sb.toString();
188
	}
189

  
190
	private List<String> decodeType(final OafEntityDecoder decoder, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
191

  
192
		final List<String> metadata = Lists.newArrayList();
193
		metadata.addAll(listFields(decoder.getMetadata(), filter, defaults, expandingRel));
194
		metadata.addAll(listFields(decoder.getOafEntity(), filter, defaults, expandingRel));
195

  
196
		if ((decoder.getEntity() instanceof Result) && !expandingRel) {
197
			metadata.add(asXmlElement("bestlicense", "", getBestLicense(), null));
198

  
199
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
200
		}
201
		if ((decoder.getEntity() instanceof Person) && !expandingRel) {
202
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
203
		}
204
		if ((decoder.getEntity() instanceof Project) && !expandingRel) {
205
			metadata.addAll(listFields(decoder.getEntity(), filter, defaults, expandingRel));
206
		}
207

  
208
		return metadata;
209
	}
210

  
211
	private Qualifier getBestLicense() {
212
		Qualifier bestLicense = getQualifier("UNKNOWN", "not available", "dnet:access_modes");
213
		final LicenseComparator lc = new LicenseComparator();
214
		for (final Instance instance : ((Result) mainEntity.decodeEntity().getEntity()).getInstanceList()) {
215
			if (lc.compare(bestLicense, instance.getLicence()) > 0) {
216
				bestLicense = instance.getLicence();
217
			}
218
		}
219
		return bestLicense;
220
	}
221

  
222
	public Qualifier getQualifier(final String classid, final String classname, final String schemename) {
223
		return Qualifier.newBuilder().setClassid(classid).setClassname(classname).setSchemeid(schemename).setSchemename(schemename).build();
224
	}
225

  
226
	private List<String> listRelations() {
227

  
228
		final List<String> rels = Lists.newArrayList();
229

  
230
		for (final OafDecoder decoder : this.relations) {
231

  
232
			final OafRel rel = decoder.getOafRel();
233
			final OafEntity cachedTarget = rel.getCachedTarget();
234
			final OafRelDecoder relDecoder = OafRelDecoder.decode(rel);
235

  
236
			// if (!relDecoder.getRelType().equals(RelType.personResult) || relDecoder.getRelTargetId().equals(key)) {
237
			if (relDecoder.getRelSourceId().equals(key) || relDecoder.getRelTargetId().equals(key)) {
238

  
239
				final List<String> metadata = Lists.newArrayList();
240
				final Type targetType = relDecoder.getTargetType(mainEntity.getEntity().getType());
241
				final Set<String> relFilter = entityConfigTable.getFilter(targetType, relDecoder.getRelDescriptor());
242
				metadata.addAll(listFields(relDecoder.getSubRel(), relFilter, false, true));
243

  
244
				String semanticclass = "";
245
				String semanticscheme = "";
246

  
247
				final RelDescriptor relDescriptor = relDecoder.getRelDescriptor();
248

  
249
				if ((cachedTarget != null) && cachedTarget.isInitialized()) {
250

  
251
					final Set<String> filter = entityConfigTable.getFilter(targetType, relDescriptor);
252
					metadata.addAll(decodeType(OafEntityDecoder.decode(cachedTarget), filter, relDefaults, true));
253
				}
254

  
255
				final RelMetadata relMetadata = relDecoder.getRelMetadata();
256
				// debug
257
				if (relMetadata == null) {
258
					// System.err.println(this);
259
					semanticclass = semanticscheme = "UNKNOWN";
260
				} else {
261
					semanticclass = relClasses.getInverse(relMetadata.getSemantics().getClassname());
262
					semanticscheme = relMetadata.getSemantics().getSchemename();
263
				}
264

  
265
				incrementCounter(relDescriptor.getSubRelType().toString());
266

  
267
				final LinkDescriptor ld = entityConfigTable.getDescriptor(relDecoder.getTargetType(mainEntity.getEntity().getType()), relDescriptor);
268

  
269
				final String relId = (ld != null) && !ld.isSymmetric() ? relDecoder.getRelTargetId() : relDecoder.getRelSourceId();
270

  
271
				final DataInfo info = decoder.getOaf().getDataInfo();
272

  
273
				rels.add(templateFactory.getRel(targetType, relId, metadata, semanticclass, semanticscheme, info.getInferred(), info.getTrust(),
274
						info.getInferenceprovenance(), info.getProvenanceaction().getClassid()));
275
			}
276
		}
277
		return rels;
278
	}
279

  
280
	// //////////////////////////////////
281

  
282
	private List<String> listChildren() {
283

  
284
		final List<String> children = Lists.newArrayList();
285
		for (final OafDecoder decoder : this.children) {
286
			final OafEntity cachedTarget = decoder.getOafRel().getCachedTarget();
287
			addChildren(children, cachedTarget, decoder.getRelDescriptor());
288
		}
289
		final OafEntityDecoder entity = mainEntity.decodeEntity();
290
		if (entity.getType().equals(Type.result)) {
291
			for (final Instance instance : ((Result) entity.getEntity()).getInstanceList()) {
292
				final Set<String> instanceFieldFilter = Sets.newHashSet("instancetype", "hostedby", "licence");
293
				children.add(templateFactory.getInstance(instance.getHostedby().getKey(), listFields(instance, instanceFieldFilter, false, false),
294
						listMap(instance.getUrlList(), new UnaryFunction<String, String>() {
295

  
296
							@Override
297
							public String evaluate(final String identifier) {
298
								return templateFactory.getWebResource(identifier);
299
							}
300
						})));
301
			}
302
			for (final ExternalReference er : ((Result) entity.getEntity()).getExternalReferenceList()) {
303
				// Set<String> filters = entityConfigTable.getFilter(Type.result, RelType.resultResult);
304
				final List<String> fields = listFields(er, null, false, false);
305
				children.add(templateFactory.getChild("externalreference", null, fields));
306
			}
307
		}
308

  
309
		return children;
310
	}
311

  
312
	private void addChildren(final List<String> children, final OafEntity target, final RelDescriptor relDescriptor) {
313
		final OafEntityDecoder decoder = OafEntityDecoder.decode(target);
314
		incrementCounter(relDescriptor.getSubRelType().toString());
315
		final Set<String> filters = entityConfigTable.getFilter(target.getType(), relDescriptor);
316
		children.add(templateFactory.getChild(decoder.getType().toString(), decoder.getId(), listFields(decoder.getMetadata(), filters, childDefaults, false)));
317
	}
318

  
319
	private List<String> listFields(final GeneratedMessage fields, final Set<String> filter, final boolean defaults, final boolean expandingRel) {
320

  
321
		final List<String> metadata = Lists.newArrayList();
322

  
323
		if (fields != null) {
324

  
325
			final Set<String> seen = Sets.newHashSet();
326
			for (final Entry<FieldDescriptor, Object> e : filterFields(fields, filter)) {
327

  
328
				// final String name = getFieldName(e.getKey().getName());
329
				final String name = e.getKey().getName();
330
				seen.add(name);
331

  
332
				addFieldValue(metadata, e.getKey(), e.getValue(), expandingRel);
333
			}
334

  
335
			if (defaults) {
336
				for (final FieldDescriptor fd : fields.getDescriptorForType().getFields()) {
337
					if (!seen.contains(fd.getName())) {
338
						addFieldValue(metadata, fd, getDefault(fd), expandingRel);
339
					}
340
				}
341
			}
342
		}
343
		return metadata;
344
	}
345

  
346
	private Object getDefault(final FieldDescriptor fd) {
347
		switch (fd.getType()) {
348
		case BOOL:
349
			return false;
350
		case BYTES:
351
			return "".getBytes();
352
		case MESSAGE: {
353
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) return defaultQualifier();
354
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType()))
355
				return StructuredProperty.newBuilder().setValue("").setQualifier(defaultQualifier()).build();
356
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) return KeyValue.newBuilder().setKey("").setValue("").build();
357
			if (StringField.getDescriptor().equals(fd.getMessageType())) return StringField.newBuilder().setValue("").build();
358
			if (BoolField.getDescriptor().equals(fd.getMessageType())) return BoolField.newBuilder().buildPartial();
359
			return null;
360
		}
361
		case SFIXED32:
362
		case SFIXED64:
363
		case SINT32:
364
		case SINT64:
365
		case INT32:
366
		case INT64:
367
		case DOUBLE:
368
		case FIXED32:
369
		case FIXED64:
370
		case FLOAT:
371
			return 0;
372
		case STRING:
373
			return "";
374
		default:
375
			return null;
376
		}
377
	}
378

  
379
	private Qualifier defaultQualifier() {
380
		return Qualifier.newBuilder().setClassid("").setClassname("").setSchemeid("").setSchemename("").build();
381
	}
382

  
383
	@SuppressWarnings("unchecked")
384
	private void addFieldValue(final List<String> metadata, final FieldDescriptor fd, final Object value, final boolean expandingRel) {
385
		if (fd.getName().equals("dateofcollection") || fd.getName().equals("id") || /* fd.getName().equals("url") || */(value == null)) return;
386

  
387
		if (fd.getName().equals("datasourcetype")) {
388
			final String classid = ((Qualifier) value).getClassid();
389

  
390
			final Qualifier.Builder q = Qualifier.newBuilder((Qualifier) value);
391
			if (specialDatasourceTypes.contains(classid)) {
392
				q.setClassid("other").setClassname("other");
393
			}
394
			metadata.add(asXmlElement("datasourcetypeui", "", q.build(), null));
395
		}
396

  
397
		if (fd.isRepeated() && (value instanceof List<?>)) {
398
			for (final Object o : (List<Object>) value) {
399
				guessType(metadata, fd, o, expandingRel);
400
			}
401
		} else {
402
			guessType(metadata, fd, value, expandingRel);
403
		}
404
	}
405

  
406
	private void guessType(final List<String> metadata, final FieldDescriptor fd, final Object o, final boolean expandingRel) {
407

  
408
		if (fd.getType().equals(FieldDescriptor.Type.MESSAGE)) {
409

  
410
			if (Qualifier.getDescriptor().equals(fd.getMessageType())) {
411
				final Qualifier qualifier = (Qualifier) o;
412
				metadata.add(asXmlElement(fd.getName(), "", qualifier, null));
413
			}
414

  
415
			if (StructuredProperty.getDescriptor().equals(fd.getMessageType())) {
416
				final StructuredProperty sp = (StructuredProperty) o;
417
				metadata.add(asXmlElement(fd.getName(), sp.getValue(), sp.getQualifier(), sp.hasDataInfo() ? sp.getDataInfo() : null));
418

  
419
				if (!expandingRel && fd.getName().equals("pid")) {
420
					if (sp.getQualifier().getClassid().equalsIgnoreCase("doi")) {
421
						incrementCounter("doi");
422
					}
423
				}
424
			}
425

  
426
			if (KeyValue.getDescriptor().equals(fd.getMessageType())) {
427
				final KeyValue kv = (KeyValue) o;
428
				metadata.add("<" + fd.getName() + " name=\"" + escapeXml(kv.getValue()) + "\" id=\"" + escapeXml(removePrefix(kv.getKey())) + "\"/>");
429
			}
430

  
431
			if (StringField.getDescriptor().equals(fd.getMessageType())) {
432
				final String fieldName = fd.getName();
433

  
434
				if (fieldName.equals("fundingtree")) {
435
					final String xmlTree = o instanceof StringField ? ((StringField) o).getValue() : o.toString();
436

  
437
					if (expandingRel) {
438
						metadata.add(getRelFundingTree(xmlTree));
439
						fillContextMap(xmlTree);
440
					} else {
441
						metadata.add(xmlTree);
442
					}
443
				} else {
444
					final StringField sf = (StringField) o;
445
					final StringBuilder sb = new StringBuilder("<" + fd.getName());
446
					if (sf.hasDataInfo()) {
447
						final DataInfo dataInfo = sf.getDataInfo();
448
						dataInfoAsAttributes(sb, dataInfo);
449
					}
450
					sb.append(">" + escapeXml(sf.getValue()) + "</" + fd.getName() + ">");
451
					metadata.add(sb.toString());
452
				}
453
			}
454

  
455
			if (BoolField.getDescriptor().equals(fd.getMessageType())) {
456
				final BoolField bf = (BoolField) o;
457
				final StringBuilder sb = new StringBuilder("<" + fd.getName());
458
				if (bf.hasDataInfo()) {
459
					final DataInfo dataInfo = bf.getDataInfo();
460
					dataInfoAsAttributes(sb, dataInfo);
461
				}
462

  
463
				sb.append(">" + (bf.hasValue() ? bf.getValue() : "") + "</" + fd.getName() + ">");
464
				metadata.add(sb.toString());
465
			}
466

  
467
			if (Journal.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
468
				final Journal j = (Journal) o;
469
				metadata.add("<journal " + "issn=\"" + escapeXml(j.getIssnPrinted()) + "\" " + "eissn=\"" + escapeXml(j.getIssnOnline()) + "\" " + "lissn=\""
470
						+ escapeXml(j.getIssnLinking()) + "\">" + escapeXml(j.getName()) + "</journal>");
471
			}
472

  
473
			if (Context.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
474
				contextes.add(((Result.Context) o).getId());
475
			}
476

  
477
			if (ExtraInfo.getDescriptor().equals(fd.getMessageType()) && (o != null)) {
478

  
479
				final ExtraInfo e = (ExtraInfo) o;
480
				final StringBuilder sb = new StringBuilder("<" + fd.getName() + " ");
481

  
482
				sb.append("name=\"" + e.getName() + "\" ");
483
				sb.append("typology=\"" + e.getTypology() + "\" ");
484
				sb.append("provenance=\"" + e.getProvenance() + "\" ");
485
				sb.append("trust=\"" + e.getTrust() + "\"");
486
				sb.append(">");
487
				sb.append(e.getValue());
488
				sb.append("</" + fd.getName() + ">");
489

  
490
				extraInfo.add(sb.toString());
491
			}
492

  
493
		} else if (fd.getType().equals(FieldDescriptor.Type.ENUM)) {
494
			if (fd.getFullName().equals("eu.dnetlib.data.proto.OafEntity.type")) return;
495
			metadata.add(asXmlElement(fd.getName(), ((EnumValueDescriptor) o).getName(), null, null));
496
		} else {
497
			metadata.add(asXmlElement(fd.getName(), o.toString(), null, null));
498
		}
499
	}
500

  
501
	private StringBuilder dataInfoAsAttributes(final StringBuilder sb, final DataInfo dataInfo) {
502
		sb.append(" inferred=\"" + dataInfo.getInferred() + "\"");
503
		sb.append(" inferenceprovenance=\"" + dataInfo.getInferenceprovenance() + "\"");
504
		sb.append(" provenanceaction=\"" + dataInfo.getProvenanceaction().getClassid() + "\"");
505
		sb.append(" trust=\"" + dataInfo.getTrust() + "\" ");
506
		return sb;
507
	}
508

  
509
	private List<String> buildContexts(final Type type) {
510
		final List<String> res = Lists.newArrayList();
511

  
512
		if ((contextMapper != null) && !contextMapper.isEmpty() && type.equals(Type.result)) {
513

  
514
			XMLTag document = XMLDoc.newDocument(true).addRoot("contextRoot");
515

  
516
			for (final String context : contextes) {
517

  
518
				String id = "";
519
				for (final String token : Splitter.on("::").split(context)) {
520
					id += token;
521

  
522
					final ContextDef def = contextMapper.get(id);
523

  
524
					if (def == null) throw new IllegalStateException(String.format("cannot find context for id '%s'", id));
525

  
526
					if (def.getName().equals("context")) {
527
						final String xpath = "//context/@id='" + def.getId() + "'";
528
						if (!document.gotoRoot().rawXpathBoolean(xpath, new Object())) {
529
							document = addContextDef(document.gotoRoot(), def);
530
						}
531
					}
532

  
533
					if (def.getName().equals("category")) {
534
						final String rootId = StringUtils.substringBefore(def.getId(), "::");
535
						document = addContextDef(document.gotoRoot().gotoTag("//context[./@id='" + rootId + "']", new Object()), def);
536
					}
537

  
538
					if (def.getName().equals("concept")) {
539
						document = addContextDef(document, def).gotoParent();
540
					}
541
					id += "::";
542
				}
543
			}
544

  
545
			for (final org.w3c.dom.Element x : document.gotoRoot().getChildElement()) {
546
				try {
547
					res.add(asStringElement(x));
548
				} catch (final TransformerException e) {
549
					throw new RuntimeException(e);
550
				}
551
			}
552
		}
553

  
554
		return res;
555
	}
556

  
557
	private XMLTag addContextDef(final XMLTag tag, final ContextDef def) {
558
		tag.addTag(def.getName()).addAttribute("id", def.getId()).addAttribute("label", def.getLabel());
559
		if ((def.getType() != null) && !def.getType().isEmpty()) {
560
			tag.addAttribute("type", def.getType());
561
		}
562
		return tag;
563
	}
564

  
565
	private String asStringElement(final org.w3c.dom.Element element) throws TransformerException {
566
		final StringWriter buffer = new StringWriter();
567
		transformer.transform(new DOMSource(element), new StreamResult(buffer));
568
		return buffer.toString();
569
	}
570

  
571
	@SuppressWarnings("unchecked")
572
	private String getRelFundingTree(final String xmlTree) {
573
		String funding = "<funding>";
574
		try {
575
			final Document ftree = new SAXReader().read(new StringReader(xmlTree));
576
			funding = "<funding>";
577
			// String _id = "";
578

  
579
			funding += getFunderElement(ftree);
580

  
581
			for (final Object o : Lists.reverse(ftree.selectNodes("//fundingtree//*[starts-with(local-name(),'funding_level_')]"))) {
582
				final Element e = (Element) o;
583
				final String _id = e.valueOf("./id");
584
				funding += "<" + e.getName() + " name=\"" + escapeXml(e.valueOf("./name")) + "\">" + escapeXml(_id) + "</" + e.getName() + ">";
585
				// _id += "::";
586
			}
587
		} catch (final DocumentException e) {
588
			throw new IllegalArgumentException("unable to parse funding tree: " + xmlTree + "\n" + e.getMessage());
589
		} finally {
590
			funding += "</funding>";
591
		}
592
		return funding;
593
	}
594

  
595
	private String getFunderElement(final Document ftree) {
596
		final String funderId = ftree.valueOf("//fundingtree/funder/id/text()");
597
		final String funderShortName = ftree.valueOf("//fundingtree/funder/shortname/text()");
598
		final String funderName = ftree.valueOf("//fundingtree/funder/name/text()");
599
		final String funderJurisdiction = ftree.valueOf("//fundingtree/funder/jurisdiction/text()");
600

  
601
		return "<funder id=\"" + escapeXml(funderId) + "\" shortname=\"" + escapeXml(funderShortName) + "\" name=\"" + escapeXml(funderName)
602
				+ "\" jurisdiction=\"" + escapeXml(funderJurisdiction) + "\" />";
603
	}
604

  
605
	private void fillContextMap(final String xmlTree) {
606

  
607
		Document fundingPath;
608
		try {
609
			fundingPath = new SAXReader().read(new StringReader(xmlTree));
610
		} catch (final DocumentException e) {
611
			throw new RuntimeException(e);
612
		}
613
		try {
614
			final Node funder = fundingPath.selectSingleNode("//funder");
615

  
616
			if (funder != null) {
617

  
618
				final String funderShortName = funder.valueOf("./shortname");
619
				contextes.add(funderShortName);
620

  
621
				contextMapper.put(funderShortName, new ContextDef(funderShortName, funder.valueOf("./name"), "context", "funding"));
622
				final Node level0 = fundingPath.selectSingleNode("//funding_level_0");
623
				if (level0 != null) {
624
					final String level0Id = Joiner.on("::").join(funderShortName, level0.valueOf("./name"));
625
					contextMapper.put(level0Id, new ContextDef(level0Id, level0.valueOf("./description"), "category", ""));
626
					final Node level1 = fundingPath.selectSingleNode("//funding_level_1");
627
					if (level1 == null) {
628
						contextes.add(level0Id);
629
					} else {
630
						final String level1Id = Joiner.on("::").join(level0Id, level1.valueOf("./name"));
631
						contextMapper.put(level1Id, new ContextDef(level1Id, level1.valueOf("./description"), "concept", ""));
632
						final Node level2 = fundingPath.selectSingleNode("//funding_level_2");
633
						if (level2 == null) {
634
							contextes.add(level1Id);
635
						} else {
636
							final String level2Id = Joiner.on("::").join(level1Id, level2.valueOf("./name"));
637
							contextMapper.put(level2Id, new ContextDef(level2Id, level2.valueOf("./description"), "concept", ""));
638
							contextes.add(level2Id);
639
						}
640
					}
641
				}
642
			}
643
		} catch (final NullPointerException e) {
644
			throw new IllegalArgumentException("malformed funding path: " + xmlTree, e);
645
		}
646
	}
647

  
648
	private String asXmlElement(final String name, final String value, final Qualifier q, final DataInfo dataInfo) {
649
		StringBuilder sb = new StringBuilder();
650
		sb.append("<");
651
		sb.append(name);
652
		if (q != null) {
653
			sb.append(getAttributes(q));
654
		}
655
		if (dataInfo != null) {
656
			sb = dataInfoAsAttributes(sb, dataInfo);
657
		}
658
		if ((value == null) || value.isEmpty()) {
659
			sb.append("/>");
660
			return sb.toString();
661
			// return "<" + name + getAttributes(q) + "/>";
662
		}
663

  
664
		sb.append(">");
665
		// sb.append(escapeXml(Normalizer.normalize(value, Normalizer.Form.NFD)));
666
		sb.append(escapeXml(value));
667
		sb.append("</");
668
		sb.append(name);
669
		sb.append(">");
670

  
671
		return sb.toString();
672
		// return "<" + name + getAttributes(q) + ">" + escapeXml(value) + "</" + name + ">";
673
	}
674

  
675
	private String getAttributes(final Qualifier q) {
676
		if (q == null) return "";
677

  
678
		final StringBuilder sb = new StringBuilder();
679
		for (final Entry<FieldDescriptor, Object> e : q.getAllFields().entrySet()) {
680
			// sb.append(" " + e.getKey().getName() + "=\"" + escapeXml(e.getValue().toString()) + "\"");
681
			sb.append(" ");
682
			sb.append(e.getKey().getName());
683
			sb.append("=\"");
684
			sb.append(escapeXml(e.getValue().toString()));
685
			sb.append("\"");
686
		}
687
		return sb.toString();
688
	}
689

  
690
	private Set<Entry<FieldDescriptor, Object>> filterFields(final GeneratedMessage fields, final Set<String> filter) {
691

  
692
		if (filter != null) {
693
			final Predicate<FieldDescriptor> p = new Predicate<FieldDescriptor>() {
694

  
695
				@Override
696
				public boolean apply(final FieldDescriptor descriptor) {
697
					if (fields == null) return false;
698
					final String name = descriptor.getName();
699
					return filter.contains(name);
700
				}
701
			};
702
			final Map<FieldDescriptor, Object> filtered = Maps.filterKeys(fields.getAllFields(), p);
703
			// log.info(
704
			// "filtered " + type.toString() + ": " + toString(filterEntries.keySet()) + "\n" +
705
			// "builder  " + fields.getDescriptorForType().getFullName() + ": " + toString(fields.getAllFields().keySet()));
706
			return filtered.entrySet();
707
		}
708
		return fields.getAllFields().entrySet();
709
	}
710

  
711
	private List<String> countersAsXml() {
712
		final List<String> out = Lists.newArrayList();
713
		for (final Entry<String, Integer> e : counters.entrySet()) {
714
			out.add(String.format("<counter_%s value=\"%s\"/>", e.getKey(), e.getValue()));
715
		}
716
		return out;
717
	}
718

  
719
	private void incrementCounter(final String type) {
720
		if (!counters.containsKey(type)) {
721
			counters.put(type, 1);
722
		} else {
723
			counters.put(type, counters.get(type) + 1);
724
		}
725
	}
726

  
727
	@Override
728
	public String toString() {
729
		final StringBuilder sb = new StringBuilder();
730
		sb.append("################################################\n");
731
		sb.append("ID: ").append(key).append("\n");
732
		if (mainEntity != null) {
733
			sb.append("MAIN ENTITY:\n").append(mainEntity.getEntity().toString() + "\n");
734
		}
735
		if (relations != null) {
736
			sb.append("\nRELATIONS:\n");
737
			for (final OafDecoder decoder : relations) {
738
				sb.append(decoder.getOafRel().toString() + "\n");
739
			}
740
		}
741
		if (children != null) {
742
			sb.append("\nCHILDREN:\n");
743
			for (final OafDecoder decoder : children) {
744
				sb.append(decoder.getOafRel().toString() + "\n");
745
			}
746
		}
747
		return sb.toString();
748
	}
749

  
750
}
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/java/eu/dnetlib/data/mapreduce/util/OafHbaseUtils.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
4

  
5
import com.google.common.base.Function;
6
import com.google.protobuf.InvalidProtocolBufferException;
7

  
8
import eu.dnetlib.data.proto.OafProtos.Oaf;
9
import eu.dnetlib.data.transform.OafUtils;
10

  
11
public class OafHbaseUtils extends OafUtils {
12

  
13
	public static OafDecoder decode(final ImmutableBytesWritable oaf) {
14
		return new OafDecoder(oaf.copyBytes());
15
	}
16

  
17
	public static Function<ImmutableBytesWritable, OafDecoder> decoder() {
18
		return new Function<ImmutableBytesWritable, OafDecoder>() {
19

  
20
			@Override
21
			public OafDecoder apply(final ImmutableBytesWritable input) {
22
				return OafDecoder.decode(input.copyBytes());
23
			}
24
		};
25
	}
26

  
27
	public static Function<ImmutableBytesWritable, Oaf> oafDecoder() {
28
		return new Function<ImmutableBytesWritable, Oaf>() {
29

  
30
			@Override
31
			public Oaf apply(final ImmutableBytesWritable input) {
32
				return parse(input);
33
			}
34
		};
35
	}
36

  
37
	public static Oaf parse(final ImmutableBytesWritable input) {
38
		try {
39
			return Oaf.parseFrom(input.copyBytes());
40
		} catch (final InvalidProtocolBufferException e) {
41
			throw new IllegalArgumentException(e);
42
		}
43
	}
44

  
45
}
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/java/eu/dnetlib/data/mapreduce/util/LicenseComparator.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import java.util.Comparator;
4

  
5
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
6

  
7
public class LicenseComparator implements Comparator<Qualifier> {
8

  
9
	@Override
10
	public int compare(Qualifier left, Qualifier right) {
11

  
12
		if (left == null && right == null) return 0;
13
		if (left == null) return 1;
14
		if (right == null) return -1;
15

  
16
		String lClass = left.getClassid();
17
		String rClass = right.getClassid();
18

  
19
		if (lClass.equals(rClass)) return 0;
20

  
21
		if (lClass.equals("OPEN")) return -1;
22
		if (rClass.equals("OPEN")) return 1;
23

  
24
		if (lClass.equals("6MONTHS")) return -1;
25
		if (rClass.equals("6MONTHS")) return 1;
26

  
27
		if (lClass.equals("12MONTHS")) return -1;
28
		if (rClass.equals("12MONTHS")) return 1;
29

  
30
		if (lClass.equals("EMBARGO")) return -1;
31
		if (rClass.equals("EMBARGO")) return 1;
32

  
33
		if (lClass.equals("RESTRICTED")) return -1;
34
		if (rClass.equals("RESTRICTED")) return 1;
35

  
36
		if (lClass.equals("CLOSED")) return -1;
37
		if (rClass.equals("CLOSED")) return 1;
38

  
39
		if (lClass.equals("UNKNOWN")) return -1;
40
		if (rClass.equals("UNKNOWN")) return 1;
41

  
42
		// Else (but unlikely), lexicographical ordering will do.
43
		return Float.compare(Float.parseFloat(lClass), Float.parseFloat(rClass));
44
	}
45

  
46
}
47 0

  
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/java/eu/dnetlib/data/mapreduce/util/PersonResultFilter.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import com.google.common.base.Predicate;
4

  
5
import eu.dnetlib.data.proto.OafProtos.Oaf;
6
import eu.dnetlib.data.proto.OafProtos.OafRel;
7
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
8

  
9
public class PersonResultFilter implements Predicate<Oaf> {
10
	private String key;
11

  
12
	public PersonResultFilter(String key) {
13
		this.key = key;
14
	}
15

  
16
	@Override
17
	public boolean apply(Oaf oaf) {
18
		final OafRel oafRel = oaf.getRel();
19
		if (RelType.personResult.equals(oafRel.getRelType())) {
20
			return !key.contains(oafRel.getSource().replaceAll("^.*\\:\\:", ""));
21
		}
22
		return true;
23
	}
24
}
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/java/eu/dnetlib/data/mapreduce/util/DedupUtils.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import java.nio.ByteBuffer;
4

  
5
import eu.dnetlib.data.proto.DedupDissimilarityProtos.DedupDissimilarity;
6
import eu.dnetlib.data.proto.DedupDissimilarityProtos.DedupDissimilarity.RelName;
7
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
8
import org.apache.hadoop.hbase.util.Bytes;
9

  
10
import eu.dnetlib.data.proto.DedupProtos.Dedup;
11
import eu.dnetlib.data.proto.DedupSimilarityProtos.DedupSimilarity;
12
import eu.dnetlib.data.proto.FieldTypeProtos.Qualifier;
13
import eu.dnetlib.data.proto.OafProtos.OafRel;
14
import eu.dnetlib.data.proto.OafProtos.OafRel.Builder;
15
import eu.dnetlib.data.proto.OrganizationOrganizationProtos.OrganizationOrganization;
16
import eu.dnetlib.data.proto.PersonPersonProtos.PersonPerson;
17
import eu.dnetlib.data.proto.RelMetadataProtos.RelMetadata;
18
import eu.dnetlib.data.proto.RelTypeProtos.RelType;
19
import eu.dnetlib.data.proto.RelTypeProtos.SubRelType;
20
import eu.dnetlib.data.proto.ResultResultProtos.ResultResult;
21
import eu.dnetlib.data.proto.TypeProtos.Type;
22
import eu.dnetlib.pace.config.DedupConfig;
23

  
24
public class DedupUtils {
25

  
26
	public static final String CF_SEPARATOR = "_";
27

  
28
	public static final String ROOT = "dedup_wf";
29

  
30
	public static final String BODY_S = "body";
31

  
32
	public static final byte[] BODY_B = Bytes.toBytes(BODY_S);
33

  
34
	public static String dedupPrefix(final String dedupRun) {
35
		return "|" + ROOT + "_" + dedupRun + "::";
36
	}
37

  
38
	public static String newId(final String id, final String dedupRun) {
39
		if ((dedupRun == null) || (dedupRun.length() != 3)) throw new IllegalArgumentException("wrong dedupRun param");
40

  
41
		return id.replaceFirst("\\|.*\\:\\:", dedupPrefix(dedupRun));
42
	}
43

  
44
	public static byte[] newIdBytes(final String s, final String dedupRun) {
45
		return newId(s, dedupRun).getBytes();
46
	}
47

  
48
	public static byte[] newIdBytes(final ByteBuffer b, final String dedupRun) {
49
		return newId(new String(b.array()), dedupRun).getBytes();
50
	}
51

  
52
	public static boolean isRoot(final String s) {
53
		return s.contains(ROOT);
54
	}
55

  
56
	public static boolean isRoot(final ImmutableBytesWritable s) {
57
		return isRoot(s.copyBytes());
58
	}
59

  
60
	public static boolean isRoot(final byte[] s) {
61
		return isRoot(new String(s));
62
	}
63

  
64
	public static String getDedupCF_merges(final Type type) {
65
		return getRelType(type) + CF_SEPARATOR + SubRelType.dedup + CF_SEPARATOR + Dedup.RelName.merges;
66
	}
67

  
68
	public static String getDedupCF_merges(final String type) {
69
		return getDedupCF_merges(Type.valueOf(type));
70
	}
71

  
72
	public static byte[] getDedupCF_mergesBytes(final Type type) {
73
		return Bytes.toBytes(getDedupCF_merges(type));
74
	}
75

  
76
	public static byte[] getDedupCF_mergesBytes(final String type) {
77
		return getDedupCF_mergesBytes(Type.valueOf(type));
78
	}
79

  
80
	public static String getDedupCF_mergedIn(final Type type) {
81
		return getRelType(type) + CF_SEPARATOR + SubRelType.dedup + CF_SEPARATOR + Dedup.RelName.isMergedIn;
82
	}
83

  
84
	public static String getDedupCF_mergedIn(final String type) {
85
		return getDedupCF_mergedIn(Type.valueOf(type));
86
	}
87

  
88
	public static byte[] getDedupCF_mergedInBytes(final Type type) {
89
		return Bytes.toBytes(getDedupCF_mergedIn(type));
90
	}
91

  
92
	public static byte[] getDedupCF_mergedInBytes(final String type) {
93
		return getDedupCF_mergedInBytes(Type.valueOf(type));
94
	}
95

  
96
	public static String getSimilarityCF(final Type type) {
97
		return getRelType(type) + CF_SEPARATOR + SubRelType.dedupSimilarity + CF_SEPARATOR + DedupSimilarity.RelName.isSimilarTo;
98
	}
99

  
100
	public static String getSimilarityCF(final String type) {
101
		return getSimilarityCF(Type.valueOf(type));
102
	}
103

  
104
	public static byte[] getSimilarityCFBytes(final Type type) {
105
		return Bytes.toBytes(getSimilarityCF(type));
106
	}
107

  
108
	public static byte[] getSimilarityCFBytes(final String type) {
109
		return getSimilarityCFBytes(Type.valueOf(type));
110
	}
111

  
112
	public static String getRelTypeString(final Type type) {
113
		return getRelType(type).toString();
114
	}
115

  
116
	public static byte[] getDissimilarityCFBytes(final Type type) {
117
		return Bytes.toBytes(getDissimilarityCF(type));
118
	}
119

  
120
	public static String getDissimilarityCF(final Type type) {
121
		return getRelType(type) + CF_SEPARATOR + SubRelType.dedupDissimilarity + CF_SEPARATOR + DedupDissimilarity.RelName.isDissimilarFrom;
122
	}
123

  
124
	public static RelType getRelType(final Type type) {
125
		switch (type) {
126
		case organization:
127
			return RelType.organizationOrganization;
128
		case person:
129
			return RelType.personPerson;
130
		case result:
131
			return RelType.resultResult;
132
		default:
133
			throw new IllegalArgumentException("Deduplication not supported for entity type: " + type);
134
		}
135
	}
136

  
137
	public static ColumnFamily decodeCF(final byte[] b) {
138
		final String[] s = new String(b).split(CF_SEPARATOR);
139
		return new DedupUtils().getCF(RelType.valueOf(s[0]), SubRelType.valueOf(s[1]));
140
	}
141

  
142
	private ColumnFamily getCF(final RelType relType, final SubRelType subRelType) {
143
		return new ColumnFamily(relType, subRelType);
144
	}
145

  
146
	public static OafRel.Builder getDedup(final DedupConfig dedupConf, final String from, final String to, final Dedup.RelName relClass) {
147
		final Type type = Type.valueOf(dedupConf.getWf().getEntityType());
148
		final RelType relType = DedupUtils.getRelType(type);
149
		final Builder oafRel =
150
				OafRel.newBuilder().setRelType(relType).setSubRelType(SubRelType.dedup).setRelClass(relClass.toString()).setChild(false)
151
						.setSource(new String(from)).setTarget(new String(to));
152
		switch (type) {
153
		case organization:
154
			oafRel.setOrganizationOrganization(OrganizationOrganization.newBuilder().setDedup(
155
					DedupUtils.dedup(relClass, "dnet:organization_organization_relations")));
156
			break;
157
		case person:
158
			oafRel.setPersonPerson(PersonPerson.newBuilder().setDedup(DedupUtils.dedup(relClass, "dnet:person_person_relations")));
159
			break;
160
		case result:
161
			oafRel.setResultResult(ResultResult.newBuilder().setDedup(DedupUtils.dedup(relClass, "dnet:result_result_relations")));
162
			break;
163
		default:
164
			throw new IllegalArgumentException("Deduplication not supported for entity type: " + dedupConf.getWf().getEntityType());
165
		}
166
		return oafRel;
167
	}
168

  
169
	private static Dedup.Builder dedup(final Dedup.RelName relClass, final String scheme) {
170
		return Dedup.newBuilder().setRelMetadata(
171
				RelMetadata.newBuilder().setSemantics(
172
						Qualifier.newBuilder().setClassid(relClass.toString()).setClassname(relClass.toString()).setSchemeid(scheme).setSchemename(scheme)));
173
	}
174

  
175
	class ColumnFamily {
176

  
177
		private final RelType relType;
178
		private final SubRelType subRelType;
179

  
180
		public ColumnFamily(final RelType relType, final SubRelType subRelType) {
181
			this.relType = relType;
182
			this.subRelType = subRelType;
183
		}
184

  
185
		@Override
186
		public String toString() {
187
			return getRelType() + CF_SEPARATOR + getSubRelType();
188
		}
189

  
190
		public RelType getRelType() {
191
			return relType;
192
		}
193

  
194
		public SubRelType getSubRelType() {
195
			return subRelType;
196
		}
197

  
198
	}
199

  
200
}
modules/dnet-mapreduce-jobs/branches/scoreResult/src/main/java/eu/dnetlib/data/mapreduce/util/TemplateFactory.java
1
package eu.dnetlib.data.mapreduce.util;
2

  
3
import java.io.IOException;
4
import java.io.StringWriter;
5
import java.util.List;
6

  
7
import eu.dnetlib.data.proto.TypeProtos.Type;
8
import org.antlr.stringtemplate.StringTemplate;
9
import org.apache.commons.io.IOUtils;
10
import org.apache.commons.logging.Log;
11
import org.apache.commons.logging.LogFactory;
12
import org.springframework.core.io.Resource;
13

  
14
public class TemplateFactory {
15

  
16
	private static final Log log = LogFactory.getLog(TemplateFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
17

  
18
	protected TemplateResources resources = new TemplateResources();
19

  
20
	public String buildBody(final Type type, final List<String> metadata, final List<String> rels, final List<String> children, final List<String> inference) {
21

  
22
		StringTemplate body = getTemplate(resources.getEntity());
23

  
24
		body.setAttribute("name", type.toString());
25
		body.setAttribute("metadata", metadata);
26
		body.setAttribute("rels", rels);
27
		body.setAttribute("children", children);
28
		body.setAttribute("inference", inference);
29

  
30
		return body.toString();
31
	}
32

  
33
	public String getChild(final String name, final String id, final List<String> metadata) {
34
		StringTemplate child = getTemplate(resources.getChild());
35

  
36
		child.setAttribute("name", name);
37
		child.setAttribute("hasId", !(id == null));
38
		child.setAttribute("id", id != null ? XmlRecordFactory.escapeXml(XmlRecordFactory.removePrefix(id)) : "");
39
		child.setAttribute("metadata", metadata);
40

  
41
		String string = child.toString();
42
		return string;
43
	}
44

  
45
	public String buildRecord(final Type type,
46
			final String objIdentifier,
47
			final String dateOfCollection,
48
			final String dateOfTransformation,
49
			final String schemaLocation,
50
			final String body,
51
			final List<String> counters) {
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff