Project

General

Profile

« Previous | Next » 

Revision 51263

first version paving the road for the project structure

View differences:

dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Process.java
1
package eu.dnetlib.dhp.common.java;
2

  
3
import java.util.Map;
4

  
5
import org.apache.hadoop.conf.Configuration;
6

  
7
import eu.dnetlib.dhp.common.java.porttype.PortType;
8

  
9
/** Workflow node written in Java.
10
 * 
11
 * The implementing class has to define a constructor with no parameters 
12
 * (possibly the default one) or a constructor with String[] as a single
13
 * parameter.
14
 * @author Mateusz Kobos
15
 */
16
public interface Process {
17
	/**
18
	 * Run the process.
19
	 * 
20
	 * The process ends with a success status if no exception is thrown, 
21
	 * otherwise it ends with an error status.
22
	 *
23
	 * @param parameters parameters of the process. Each parameter
24
	 * corresponds to a single entry in the map, its name is the key, its
25
	 * value is the value.
26
	 * @throws Exception if thrown, it means that the process finished
27
	 * with an error status
28
	 */
29
	void run(PortBindings portBindings, Configuration conf, 
30
			Map<String, String> parameters) throws Exception;
31
	
32
	/**
33
	 * @return map containing as the key: name of the port, as the value: type
34
	 * of the port 
35
	 */
36
	Map<String, PortType> getInputPorts();
37
	
38
	/**
39
	 * @return map containing as the key: name of the port, as the value: type
40
	 * of the port 
41
	 */
42
	Map<String, PortType> getOutputPorts();
43
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/oozie/property/ConditionalPropertySetter.java
1
package eu.dnetlib.dhp.common.oozie.property;
2

  
3
import java.io.File;
4
import java.io.FileOutputStream;
5
import java.io.OutputStream;
6
import java.util.Collections;
7
import java.util.Map;
8
import java.util.Properties;
9

  
10
import eu.dnetlib.dhp.common.java.PortBindings;
11
import eu.dnetlib.dhp.common.java.Process;
12
import eu.dnetlib.dhp.common.java.porttype.PortType;
13
import org.apache.hadoop.conf.Configuration;
14

  
15
import static eu.dnetlib.dhp.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME;
16

  
17
/**
18
 * This process is a solution for setting dynamic properties in oozie workflow definition.
19
 * 
20
 * Expects three parameters to be provided: the first 'condition' parameter is boolean value 
21
 * based on which either first 'inCaseOfTrue' or second 'elseCase' parameter value is set as 
22
 * the 'result' property.
23
 *  
24
 * This can be understood as the: 
25
 * 
26
 * condition ? inCaseOfTrue : elseCase
27
 * 
28
 * java syntax equivalent.
29
 * 
30
 * @author mhorst
31
 *
32
 */
33
public class ConditionalPropertySetter implements Process {
34

  
35
	public static final String PARAM_CONDITION = "condition";
36
	public static final String PARAM_INCASEOFTRUE = "inCaseOfTrue";
37
	public static final String PARAM_ELSECASE = "elseCase";
38
	
39
	public static final String OUTPUT_PROPERTY_RESULT = "result";
40
	
41
	@Override
42
	public Map<String, PortType> getInputPorts() {
43
		return Collections.emptyMap();
44
	}
45

  
46
	@Override
47
	public Map<String, PortType> getOutputPorts() {
48
		return Collections.emptyMap();
49
	}
50

  
51
	@Override
52
	public void run(PortBindings portBindings, Configuration conf,
53
			Map<String, String> parameters) throws Exception {
54

  
55
		String condition = parameters.get(PARAM_CONDITION);
56
		if (condition == null) {
57
			throw new RuntimeException("unable to make decision: " + 
58
					PARAM_CONDITION + " parameter was not set!");
59
		}
60

  
61
		Properties props = new Properties();
62
        props.setProperty(OUTPUT_PROPERTY_RESULT, 
63
        		Boolean.parseBoolean(condition)?
64
        				parameters.get(PARAM_INCASEOFTRUE):
65
        					parameters.get(PARAM_ELSECASE));
66
        OutputStream os = new FileOutputStream(
67
        		new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME)));
68
        try {
69
        	props.store(os, "");	
70
        } finally {
71
        	os.close();	
72
        }
73

  
74
	}
75

  
76
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/CharSequenceUtils.java
1
package eu.dnetlib.dhp.common.string;
2

  
3
/**
4
 * Operations on {@link CharSequence} 
5
 * 
6
 * @author Łukasz Dumiszewski
7
*/
8

  
9
public final class CharSequenceUtils {
10

  
11
    
12
    //------------------------ CONSTRUCTORS --------------------------
13
    
14
    private CharSequenceUtils() {
15
        throw new IllegalStateException("may not be initialized");
16
    }
17
    
18
    
19
    //------------------------ LOGIC --------------------------
20
    
21
    /**
22
     * Converts the given {@link CharSequence} <code>value</code> to {@link String} by using {@link CharSequence#toString()}.
23
     * Returns empty string if <code>value</code> is null.
24
     */
25
    public static String toStringWithNullToEmpty(CharSequence value) {
26
        
27
        return value == null? "": value.toString();
28
        
29
    }
30
    
31
}
dnet-hadoop/dhp-common/dhp-common.iml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
3
  <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_8">
4
    <output url="file://$MODULE_DIR$/target/classes" />
5
    <output-test url="file://$MODULE_DIR$/target/test-classes" />
6
    <content url="file://$MODULE_DIR$">
7
      <sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
8
      <sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
9
      <sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
10
      <excludeFolder url="file://$MODULE_DIR$/target" />
11
    </content>
12
    <orderEntry type="inheritedJdk" />
13
    <orderEntry type="sourceFolder" forTests="false" />
14
    <orderEntry type="module" module-name="dhp-schemas" />
15
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.oozie:oozie-core:4.1.0-cdh5.9.0" level="project" />
16
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.oozie:oozie-hbase:1.2.0-cdh5.9.0.oozie-4.1.0-cdh5.9.0" level="project" />
17
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hbase:hbase-server:1.2.0-cdh5.9.0" level="project" />
18
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hbase:hbase-procedure:1.2.0-cdh5.9.0" level="project" />
19
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hbase:hbase-common:tests:1.2.0-cdh5.9.0" level="project" />
20
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hbase:hbase-prefix-tree:1.2.0-cdh5.9.0" level="project" />
21
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.lmax:disruptor:3.3.0" level="project" />
22
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.oozie:oozie-hcatalog:1.1.0-cdh5.9.0.oozie-4.1.0-cdh5.9.0" level="project" />
23
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive.hcatalog:hive-hcatalog-server-extensions:1.1.0-cdh5.9.0" level="project" />
24
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive.hcatalog:hive-hcatalog-core:1.1.0-cdh5.9.0" level="project" />
25
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive.hcatalog:hive-webhcat-java-client:1.1.0-cdh5.9.0" level="project" />
26
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive:hive-common:1.1.0-cdh5.9.0" level="project" />
27
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.eclipse.jetty.aggregate:jetty-all:7.6.0.v20120127" level="project" />
28
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.geronimo.specs:geronimo-jaspic_1.0_spec:1.0" level="project" />
29
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.geronimo.specs:geronimo-annotation_1.0_spec:1.1.1" level="project" />
30
    <orderEntry type="library" scope="PROVIDED" name="Maven: asm:asm-commons:3.1" level="project" />
31
    <orderEntry type="library" scope="PROVIDED" name="Maven: asm:asm-tree:3.1" level="project" />
32
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.ant:ant:1.9.1" level="project" />
33
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.ant:ant-launcher:1.9.1" level="project" />
34
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive:hive-metastore:1.1.0-cdh5.9.0" level="project" />
35
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.jolbox:bonecp:0.8.0.RELEASE" level="project" />
36
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.datanucleus:datanucleus-api-jdo:3.2.6" level="project" />
37
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.datanucleus:datanucleus-core:3.2.10" level="project" />
38
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.datanucleus:datanucleus-rdbms:3.2.9" level="project" />
39
    <orderEntry type="library" scope="PROVIDED" name="Maven: javax.jdo:jdo-api:3.0.1" level="project" />
40
    <orderEntry type="library" scope="PROVIDED" name="Maven: javax.transaction:jta:1.1" level="project" />
41
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.antlr:antlr-runtime:3.4" level="project" />
42
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.antlr:stringtemplate:3.2.1" level="project" />
43
    <orderEntry type="library" scope="PROVIDED" name="Maven: antlr:antlr:2.7.7" level="project" />
44
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive:hive-exec:1.1.0-cdh5.9.0" level="project" />
45
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive:hive-ant:1.1.0-cdh5.9.0" level="project" />
46
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.cloudera.logredactor:logredactor:1.0.3" level="project" />
47
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.antlr:ST4:4.0.4" level="project" />
48
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.codehaus.groovy:groovy-all:2.4.4" level="project" />
49
    <orderEntry type="library" scope="PROVIDED" name="Maven: stax:stax-api:1.0.1" level="project" />
50
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive:hive-serde:1.1.0-cdh5.9.0" level="project" />
51
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.opencsv:opencsv:2.3" level="project" />
52
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-hadoop-bundle:1.5.0-cdh5.9.0" level="project" />
53
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey:jersey-servlet:1.14" level="project" />
54
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.thrift:libfb303:0.9.2" level="project" />
55
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.oozie:oozie-client:4.1.0-cdh5.9.0" level="project" />
56
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.activemq:activemq-client:5.13.2" level="project" />
57
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.geronimo.specs:geronimo-jms_1.1_spec:1.1.1" level="project" />
58
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.fusesource.hawtbuf:hawtbuf:1.11" level="project" />
59
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.geronimo.specs:geronimo-j2ee-management_1.1_spec:1.0.1" level="project" />
60
    <orderEntry type="library" scope="PROVIDED" name="Maven: xerces:xercesImpl:2.10.0" level="project" />
61
    <orderEntry type="library" scope="PROVIDED" name="Maven: xml-apis:xml-apis:1.4.01" level="project" />
62
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.openjpa:openjpa-persistence:2.2.2" level="project" />
63
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.openjpa:openjpa-kernel:2.2.2" level="project" />
64
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.openjpa:openjpa-lib:2.2.2" level="project" />
65
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.sourceforge.serp:serp:1.14.1" level="project" />
66
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.geronimo.specs:geronimo-jta_1.1_spec:1.1.1" level="project" />
67
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.geronimo.specs:geronimo-jpa_2.0_spec:1.1" level="project" />
68
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.openjpa:openjpa-jdbc:2.2.2" level="project" />
69
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.openjpa:openjpa-persistence-jdbc:2.2.2" level="project" />
70
    <orderEntry type="library" name="Maven: commons-lang:commons-lang:2.4" level="project" />
71
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.ehcache:ehcache-core:2.6.3" level="project" />
72
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.googlecode.json-simple:json-simple:1.1.1" level="project" />
73
    <orderEntry type="library" scope="PROVIDED" name="Maven: log4j:apache-log4j-extras:1.2.17" level="project" />
74
    <orderEntry type="library" scope="PROVIDED" name="Maven: commons-el:commons-el:1.0" level="project" />
75
    <orderEntry type="library" scope="PROVIDED" name="Maven: commons-dbcp:commons-dbcp:1.4" level="project" />
76
    <orderEntry type="library" scope="PROVIDED" name="Maven: commons-pool:commons-pool:1.5.4" level="project" />
77
    <orderEntry type="library" scope="PROVIDED" name="Maven: hsqldb:hsqldb:1.8.0.10" level="project" />
78
    <orderEntry type="library" name="Maven: commons-logging:commons-logging:1.1" level="project" />
79
    <orderEntry type="library" scope="PROVIDED" name="Maven: commons-httpclient:commons-httpclient:3.1" level="project" />
80
    <orderEntry type="library" scope="PROVIDED" name="Maven: javax.servlet.jsp:jsp-api:2.0" level="project" />
81
    <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty:6.1.26.cloudera.2" level="project" />
82
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.derby:derby:10.10.1.1" level="project" />
83
    <orderEntry type="library" scope="PROVIDED" name="Maven: javax.mail:mail:1.4" level="project" />
84
    <orderEntry type="library" scope="RUNTIME" name="Maven: javax.activation:activation:1.1" level="project" />
85
    <orderEntry type="library" scope="PROVIDED" name="Maven: postgresql:postgresql:9.0-801.jdbc4" level="project" />
86
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.oozie:oozie-sharelib-oozie:4.1.0-cdh5.9.0" level="project" />
87
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.oozie:oozie-hadoop-utils:2.6.0-cdh5.9.0.oozie-4.1.0-cdh5.9.0" level="project" />
88
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.oozie:oozie-sharelib-hcatalog:4.1.0-cdh5.9.0" level="project" />
89
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive.hcatalog:hive-hcatalog-pig-adapter:1.1.0-cdh5.9.0" level="project" />
90
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.oozie:oozie-sharelib-distcp:4.1.0-cdh5.9.0" level="project" />
91
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.oozie:oozie-hadoop-distcp:2.6.0-cdh5.9.0.oozie-4.1.0-cdh5.9.0" level="project" />
92
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-distcp:2.6.0-cdh5.9.0" level="project" />
93
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.quartz-scheduler:quartz:2.1.7" level="project" />
94
    <orderEntry type="library" scope="PROVIDED" name="Maven: c3p0:c3p0:0.9.1.1" level="project" />
95
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.curator:curator-recipes:2.7.1" level="project" />
96
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.curator:curator-framework:2.7.1" level="project" />
97
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.curator:curator-x-discovery:2.7.1" level="project" />
98
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.guava:guava:12.0" level="project" />
99
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.codahale.metrics:metrics-core:3.0.2" level="project" />
100
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.codahale.metrics:metrics-jvm:3.0.2" level="project" />
101
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.codahale.metrics:metrics-json:3.0.2" level="project" />
102
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.jung:jung-graph-impl:2.0.1" level="project" />
103
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.jung:jung-api:2.0.1" level="project" />
104
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.jung:jung-visualization:2.0.1" level="project" />
105
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.jung:jung-algorithms:2.0.1" level="project" />
106
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.cloudera:collections-generic:4.01-cloudera" level="project" />
107
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.eclipse.jgit:org.eclipse.jgit:3.3.1.201403241930-r" level="project" />
108
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.googlecode.javaewah:JavaEWAH:0.7.9" level="project" />
109
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive:hive-jdbc:1.1.0-cdh5.9.0" level="project" />
110
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive:hive-service:1.1.0-cdh5.9.0" level="project" />
111
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.jpam:jpam:1.1" level="project" />
112
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.jamon:jamon-runtime:2.3.1" level="project" />
113
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive:hive-shims:1.1.0-cdh5.9.0" level="project" />
114
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive.shims:hive-shims-common:1.1.0-cdh5.9.0" level="project" />
115
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive.shims:hive-shims-0.23:1.1.0-cdh5.9.0" level="project" />
116
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-yarn-server-resourcemanager:2.6.0-cdh5.9.0" level="project" />
117
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-yarn-server-applicationhistoryservice:2.6.0-cdh5.9.0" level="project" />
118
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-yarn-server-web-proxy:2.6.0-cdh5.9.0" level="project" />
119
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hive.shims:hive-shims-scheduler:1.1.0-cdh5.9.0" level="project" />
120
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.httpcomponents:httpclient:4.2.5" level="project" />
121
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.httpcomponents:httpcore:4.2.5" level="project" />
122
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.thrift:libthrift:0.9.3" level="project" />
123
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-core:2.6.0-cdh5.9.0" level="project" />
124
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-yarn-common:2.6.0-cdh5.9.0" level="project" />
125
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-yarn-api:2.6.0-cdh5.9.0" level="project" />
126
    <orderEntry type="library" scope="RUNTIME" name="Maven: javax.xml.bind:jaxb-api:2.2.2" level="project" />
127
    <orderEntry type="library" scope="RUNTIME" name="Maven: javax.xml.stream:stax-api:1.0-2" level="project" />
128
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey:jersey-client:1.9" level="project" />
129
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.codehaus.jackson:jackson-jaxrs:1.8.8" level="project" />
130
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.codehaus.jackson:jackson-xc:1.8.8" level="project" />
131
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.inject:guice:3.0" level="project" />
132
    <orderEntry type="library" scope="PROVIDED" name="Maven: javax.inject:javax.inject:1" level="project" />
133
    <orderEntry type="library" scope="PROVIDED" name="Maven: aopalliance:aopalliance:1.0" level="project" />
134
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey.contribs:jersey-guice:1.9" level="project" />
135
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.protobuf:protobuf-java:2.5.0" level="project" />
136
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-annotations:2.6.0-cdh5.9.0" level="project" />
137
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.inject.extensions:guice-servlet:3.0" level="project" />
138
    <orderEntry type="library" scope="PROVIDED" name="Maven: io.netty:netty:3.10.5.Final" level="project" />
139
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-common:2.6.0-cdh5.9.0" level="project" />
140
    <orderEntry type="library" scope="PROVIDED" name="Maven: commons-cli:commons-cli:1.3.1" level="project" />
141
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.commons:commons-math3:3.1.1" level="project" />
142
    <orderEntry type="library" scope="PROVIDED" name="Maven: xmlenc:xmlenc:0.52" level="project" />
143
    <orderEntry type="library" scope="PROVIDED" name="Maven: commons-codec:commons-codec:1.4" level="project" />
144
    <orderEntry type="library" scope="PROVIDED" name="Maven: commons-net:commons-net:3.1" level="project" />
145
    <orderEntry type="library" name="Maven: commons-collections:commons-collections:3.2.2" level="project" />
146
    <orderEntry type="library" name="Maven: org.mortbay.jetty:jetty-util:6.1.26.cloudera.4" level="project" />
147
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey:jersey-core:1.9" level="project" />
148
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey:jersey-json:1.9" level="project" />
149
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.codehaus.jettison:jettison:1.1" level="project" />
150
    <orderEntry type="library" scope="RUNTIME" name="Maven: com.sun.xml.bind:jaxb-impl:2.2.7" level="project" />
151
    <orderEntry type="library" scope="RUNTIME" name="Maven: com.sun.xml.bind:jaxb-core:2.2.7" level="project" />
152
    <orderEntry type="library" scope="RUNTIME" name="Maven: com.sun.istack:istack-commons-runtime:2.16" level="project" />
153
    <orderEntry type="library" scope="RUNTIME" name="Maven: com.sun.xml.fastinfoset:FastInfoset:1.2.12" level="project" />
154
    <orderEntry type="library" scope="RUNTIME" name="Maven: javax.xml.bind:jsr173_api:1.0" level="project" />
155
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey:jersey-server:1.9" level="project" />
156
    <orderEntry type="library" scope="PROVIDED" name="Maven: asm:asm:3.1" level="project" />
157
    <orderEntry type="library" scope="PROVIDED" name="Maven: tomcat:jasper-compiler:5.5.23" level="project" />
158
    <orderEntry type="library" scope="PROVIDED" name="Maven: tomcat:jasper-runtime:5.5.23" level="project" />
159
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.java.dev.jets3t:jets3t:0.9.0" level="project" />
160
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.jamesmurty.utils:java-xmlbuilder:0.4" level="project" />
161
    <orderEntry type="library" scope="PROVIDED" name="Maven: commons-configuration:commons-configuration:1.6" level="project" />
162
    <orderEntry type="library" scope="PROVIDED" name="Maven: commons-digester:commons-digester:1.8" level="project" />
163
    <orderEntry type="library" scope="PROVIDED" name="Maven: commons-beanutils:commons-beanutils-core:1.8.0" level="project" />
164
    <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-core-asl:1.8.8" level="project" />
165
    <orderEntry type="library" name="Maven: org.codehaus.jackson:jackson-mapper-asl:1.8.8" level="project" />
166
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.code.gson:gson:2.8.0" level="project" />
167
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-auth:2.6.0-cdh5.9.0" level="project" />
168
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.directory.server:apacheds-kerberos-codec:2.0.0-M15" level="project" />
169
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.directory.server:apacheds-i18n:2.0.0-M15" level="project" />
170
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.directory.api:api-asn1-api:1.0.0-M20" level="project" />
171
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.directory.api:api-util:1.0.0-M20" level="project" />
172
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.jcraft:jsch:0.1.42" level="project" />
173
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.curator:curator-client:2.7.1" level="project" />
174
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.htrace:htrace-core4:4.0.1-incubating" level="project" />
175
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.zookeeper:zookeeper:3.4.5-cdh5.9.0" level="project" />
176
    <orderEntry type="library" name="Maven: org.apache.commons:commons-compress:1.4.1" level="project" />
177
    <orderEntry type="library" name="Maven: org.tukaani:xz:1.0" level="project" />
178
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-core_2.10:1.6.0-cdh5.9.0" level="project" />
179
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:chill_2.10:0.5.0" level="project" />
180
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.esotericsoftware.kryo:kryo:2.21" level="project" />
181
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.esotericsoftware.reflectasm:reflectasm:shaded:1.07" level="project" />
182
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.esotericsoftware.minlog:minlog:1.2" level="project" />
183
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:chill-java:0.5.0" level="project" />
184
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.xbean:xbean-asm5-shaded:4.4" level="project" />
185
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-client:2.6.0-cdh5.9.0" level="project" />
186
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-hdfs:2.6.0-cdh5.9.0" level="project" />
187
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-app:2.6.0-cdh5.9.0" level="project" />
188
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-common:2.6.0-cdh5.9.0" level="project" />
189
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-yarn-client:2.6.0-cdh5.9.0" level="project" />
190
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-yarn-server-common:2.6.0-cdh5.9.0" level="project" />
191
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-shuffle:2.6.0-cdh5.9.0" level="project" />
192
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.6.0-cdh5.9.0" level="project" />
193
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-aws:2.6.0-cdh5.9.0" level="project" />
194
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.amazonaws:aws-java-sdk-s3:1.10.6" level="project" />
195
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.amazonaws:aws-java-sdk-kms:1.10.6" level="project" />
196
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.amazonaws:aws-java-sdk-core:1.10.6" level="project" />
197
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.amazonaws:aws-java-sdk-sts:1.10.6" level="project" />
198
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-launcher_2.10:1.6.0-cdh5.9.0" level="project" />
199
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-network-common_2.10:1.6.0-cdh5.9.0" level="project" />
200
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-network-shuffle_2.10:1.6.0-cdh5.9.0" level="project" />
201
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.fusesource.leveldbjni:leveldbjni-all:1.8" level="project" />
202
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.fasterxml.jackson.core:jackson-annotations:2.2.3" level="project" />
203
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-unsafe_2.10:1.6.0-cdh5.9.0" level="project" />
204
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.eclipse.jetty.orbit:javax.servlet:3.0.0.v201112011016" level="project" />
205
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.slf4j:jul-to-slf4j:1.7.5" level="project" />
206
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.slf4j:jcl-over-slf4j:1.7.5" level="project" />
207
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.ning:compress-lzf:1.0.3" level="project" />
208
    <orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.0.4.1" level="project" />
209
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.jpountz.lz4:lz4:1.3.0" level="project" />
210
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.roaringbitmap:RoaringBitmap:0.5.11" level="project" />
211
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.spark-project.akka:akka-remote_2.10:2.2.3-shaded-protobuf" level="project" />
212
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.spark-project.akka:akka-actor_2.10:2.2.3-shaded-protobuf" level="project" />
213
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.typesafe:config:1.0.2" level="project" />
214
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.spark-project.protobuf:protobuf-java:2.4.1-shaded" level="project" />
215
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.uncommons.maths:uncommons-maths:1.2.2a" level="project" />
216
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.spark-project.akka:akka-slf4j_2.10:2.2.3-shaded-protobuf" level="project" />
217
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.scala-lang:scala-library:2.10.6" level="project" />
218
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.json4s:json4s-jackson_2.10:3.2.10" level="project" />
219
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.json4s:json4s-core_2.10:3.2.10" level="project" />
220
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.json4s:json4s-ast_2.10:3.2.10" level="project" />
221
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.scala-lang:scalap:2.10.0" level="project" />
222
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.scala-lang:scala-compiler:2.10.0" level="project" />
223
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.mesos:mesos:shaded-protobuf:0.21.1" level="project" />
224
    <orderEntry type="library" scope="PROVIDED" name="Maven: io.netty:netty-all:4.0.29.Final" level="project" />
225
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.clearspring.analytics:stream:2.7.0" level="project" />
226
    <orderEntry type="library" scope="PROVIDED" name="Maven: io.dropwizard.metrics:metrics-core:3.1.2" level="project" />
227
    <orderEntry type="library" scope="PROVIDED" name="Maven: io.dropwizard.metrics:metrics-jvm:3.1.2" level="project" />
228
    <orderEntry type="library" scope="PROVIDED" name="Maven: io.dropwizard.metrics:metrics-json:3.1.2" level="project" />
229
    <orderEntry type="library" scope="PROVIDED" name="Maven: io.dropwizard.metrics:metrics-graphite:3.1.2" level="project" />
230
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.fasterxml.jackson.core:jackson-databind:2.2.3" level="project" />
231
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.fasterxml.jackson.core:jackson-core:2.2.3" level="project" />
232
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.fasterxml.jackson.module:jackson-module-scala_2.10:2.2.3" level="project" />
233
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.ivy:ivy:2.4.0" level="project" />
234
    <orderEntry type="library" scope="PROVIDED" name="Maven: oro:oro:2.0.8" level="project" />
235
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.tachyonproject:tachyon-client:0.8.2" level="project" />
236
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.tachyonproject:tachyon-underfs-hdfs:0.8.2" level="project" />
237
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.tachyonproject:tachyon-underfs-s3:0.8.2" level="project" />
238
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.tachyonproject:tachyon-underfs-local:0.8.2" level="project" />
239
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.razorvine:pyrolite:4.9" level="project" />
240
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.py4j:py4j:0.9" level="project" />
241
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.intel.chimera:chimera:0.9.2" level="project" />
242
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.spark-project.spark:unused:1.0.0" level="project" />
243
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-sql_2.10:1.6.0-cdh5.9.0" level="project" />
244
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-catalyst_2.10:1.6.0-cdh5.9.0" level="project" />
245
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.scala-lang:scala-reflect:2.10.5" level="project" />
246
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.codehaus.janino:janino:2.7.8" level="project" />
247
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.codehaus.janino:commons-compiler:2.7.8" level="project" />
248
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-column:1.5.0-cdh5.9.0" level="project" />
249
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-common:1.5.0-cdh5.9.0" level="project" />
250
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-encoding:1.5.0-cdh5.9.0" level="project" />
251
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-hadoop:1.5.0-cdh5.9.0" level="project" />
252
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-format:2.1.0-cdh5.9.0" level="project" />
253
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-jackson:1.5.0-cdh5.9.0" level="project" />
254
    <orderEntry type="library" name="Maven: org.apache.avro:avro:1.7.6-cdh5.9.0" level="project" />
255
    <orderEntry type="library" name="Maven: com.thoughtworks.paranamer:paranamer:2.8" level="project" />
256
    <orderEntry type="library" name="Maven: org.apache.avro:avro-mapred:hadoop2:1.7.6-cdh5.9.0" level="project" />
257
    <orderEntry type="library" name="Maven: org.apache.avro:avro-ipc:1.7.6-cdh5.9.0" level="project" />
258
    <orderEntry type="library" name="Maven: org.apache.velocity:velocity:1.7" level="project" />
259
    <orderEntry type="library" name="Maven: org.apache.avro:avro-ipc:tests:1.7.6-cdh5.9.0" level="project" />
260
    <orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.5" level="project" />
261
    <orderEntry type="library" name="Maven: org.springframework:spring-beans:4.2.5.RELEASE" level="project" />
262
    <orderEntry type="library" name="Maven: org.springframework:spring-core:4.2.5.RELEASE" level="project" />
263
    <orderEntry type="library" name="Maven: com.beust:jcommander:1.60" level="project" />
264
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.pig:pig:0.12.0-cdh5.9.0" level="project" />
265
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hbase:hbase-client:1.2.0-cdh5.9.0" level="project" />
266
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hbase:hbase-annotations:1.2.0-cdh5.9.0" level="project" />
267
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hbase:hbase-common:1.2.0-cdh5.9.0" level="project" />
268
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-core:2.6.0-mr1-cdh5.9.0" level="project" />
269
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hbase:hbase-protocol:1.2.0-cdh5.9.0" level="project" />
270
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.htrace:htrace-core:3.2.0-incubating" level="project" />
271
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.jruby.jcodings:jcodings:1.0.8" level="project" />
272
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.jruby.joni:joni:2.1.2" level="project" />
273
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.yammer.metrics:metrics-core:2.2.0" level="project" />
274
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.github.stephenc.findbugs:findbugs-annotations:1.3.9-1" level="project" />
275
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hbase:hbase-client:tests:1.2.0-cdh5.9.0" level="project" />
276
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.mortbay.jetty:jsp-api-2.1:6.1.14" level="project" />
277
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.mortbay.jetty:jsp-2.1:6.1.14" level="project" />
278
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.eclipse.jdt:core:3.1.1" level="project" />
279
    <orderEntry type="library" scope="PROVIDED" name="Maven: ant:ant:1.6.5" level="project" />
280
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.python:jython-standalone:2.5.2" level="project" />
281
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.kosmosfs:kfs:0.3" level="project" />
282
    <orderEntry type="library" scope="PROVIDED" name="Maven: jline:jline:2.11" level="project" />
283
    <orderEntry type="library" name="Maven: joda-time:joda-time:1.6" level="project" />
284
    <orderEntry type="library" scope="PROVIDED" name="Maven: dk.brics.automaton:automaton:1.11-8" level="project" />
285
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.fusesource.jansi:jansi:1.9" level="project" />
286
    <orderEntry type="library" name="Maven: com.linkedin.datafu:datafu:1.2.0" level="project" />
287
    <orderEntry type="library" name="Maven: commons-beanutils:commons-beanutils:1.9.3" level="project" />
288
    <orderEntry type="library" name="Maven: commons-io:commons-io:2.5" level="project" />
289
    <orderEntry type="library" name="Maven: org.jdom:jdom:1.1.3" level="project" />
290
    <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.22" level="project" />
291
    <orderEntry type="library" name="Maven: org.slf4j:slf4j-log4j12:1.7.22" level="project" />
292
    <orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
293
    <orderEntry type="library" scope="RUNTIME" name="Maven: javax.servlet:javax.servlet-api:3.1.0" level="project" />
294
    <orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.12" level="project" />
295
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
296
    <orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-all:1.3" level="project" />
297
    <orderEntry type="library" scope="TEST" name="Maven: org.mockito:mockito-all:1.10.19" level="project" />
298
    <orderEntry type="library" scope="TEST" name="Maven: org.powermock:powermock-core:1.6.6" level="project" />
299
    <orderEntry type="library" scope="TEST" name="Maven: org.powermock:powermock-reflect:1.6.6" level="project" />
300
    <orderEntry type="library" scope="PROVIDED" name="Maven: org.objenesis:objenesis:2.4" level="project" />
301
    <orderEntry type="library" scope="TEST" name="Maven: org.javassist:javassist:3.21.0-GA" level="project" />
302
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.code.findbugs:annotations:3.0.1" level="project" />
303
    <orderEntry type="library" scope="PROVIDED" name="Maven: net.jcip:jcip-annotations:1.0" level="project" />
304
    <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.code.findbugs:jsr305:3.0.1" level="project" />
305
  </component>
306
</module>
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersFileWriter.java
1
package eu.dnetlib.dhp.common.counter;
2

  
3
import java.io.File;
4
import java.io.FileOutputStream;
5
import java.io.IOException;
6
import java.io.OutputStream;
7
import java.util.Properties;
8

  
9
/**
10
 * Writer of {@link NamedCounters} object into a properties file.
11
 * 
12
 * @author madryk
13
 */
14
public class NamedCountersFileWriter {
15
    
16
    
17
    //------------------------ LOGIC --------------------------
18
    
19
    /**
20
     * Writes {@link NamedCounters} as a properties file located under
21
     * provided filePath.
22
     * 
23
     * @throws IOException if writing to properties file resulted in an error
24
     */
25
    public void writeCounters(NamedCounters counters, String filePath) throws IOException {
26
        
27
        Properties counterProperties = buildPropertiesFromCounters(counters);
28
        
29
        File file = new File(filePath);
30
        try (OutputStream os = new FileOutputStream(file)) {
31
            
32
            counterProperties.store(os, null);
33
            
34
        }
35
        
36
    }
37
    
38
    
39
    //------------------------ PRIVATE --------------------------
40
    
41
    private Properties buildPropertiesFromCounters(NamedCounters counters) {
42
        
43
        Properties properties = new Properties();
44
        
45
        for (String counterName : counters.counterNames()) {
46
            long count = counters.currentValue(counterName);
47
            properties.put(counterName, String.valueOf(count));
48
        }
49
        
50
        return properties;
51
    }
52
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCountersAccumulableParam.java
1
package eu.dnetlib.dhp.common.counter;
2

  
3
import org.apache.spark.AccumulableParam;
4

  
5
import scala.Tuple2;
6

  
7
/**
8
 * Spark {@link AccumulableParam} for tracking multiple counter values using {@link NamedCounters}.
9
 * 
10
 * @author madryk
11
 */
12
public class NamedCountersAccumulableParam implements AccumulableParam<NamedCounters, Tuple2<String,Long>> {
13

  
14
    private static final long serialVersionUID = 1L;
15

  
16
    
17
    //------------------------ LOGIC --------------------------
18
    
19
    /**
20
     * Increments {@link NamedCounters} counter with the name same as the first element of passed incrementValue tuple
21
     * by value defined in the second element of incrementValue tuple.
22
     */
23
    @Override
24
    public NamedCounters addAccumulator(NamedCounters counters, Tuple2<String, Long> incrementValue) {
25
        counters.increment(incrementValue._1, incrementValue._2);
26
        return counters;
27
    }
28

  
29
    /**
30
     * Merges two passed {@link NamedCounters}.
31
     */
32
    @Override
33
    public NamedCounters addInPlace(NamedCounters counters1, NamedCounters counters2) {
34
        for (String counterName2 : counters2.counterNames()) {
35
            counters1.increment(counterName2, counters2.currentValue(counterName2));
36
        }
37
        return counters1;
38
    }
39

  
40
    /**
41
     * Returns passed initialCounters value without any modifications.
42
     */
43
    @Override
44
    public NamedCounters zero(NamedCounters initialCounters) {
45
        return initialCounters;
46
    }
47

  
48
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/Ports.java
1
package eu.dnetlib.dhp.common.java;
2

  
3
import java.util.Map;
4

  
5
import eu.dnetlib.dhp.common.java.porttype.PortType;
6

  
7
/**
8
 * A class that groups information about input and output ports, i.e.
9
 * their (name of the port -> type of the port) mappings. 
10
 * @author Mateusz Kobos
11
 */
12
public class Ports {
13
	private final Map<String, PortType> input;
14
	private final Map<String, PortType> output;
15
	
16
	public Ports(Map<String, PortType> input, Map<String, PortType> output){
17
		this.input = input;
18
		this.output = output;
19
	}
20

  
21
	public Map<String, PortType> getInput() {
22
		return input;
23
	}
24
	public Map<String, PortType> getOutput() {
25
		return output;
26
	}
27
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessRunParameters.java
1
package eu.dnetlib.dhp.common.java;
2

  
3
import java.util.HashMap;
4
import java.util.Map;
5
import java.util.Map.Entry;
6
import java.util.Properties;
7
import java.util.Set;
8

  
9
import org.apache.commons.cli.CommandLine;
10
import org.apache.hadoop.fs.Path;
11

  
12
/**
13
 * Handles parsing parameters passed to the {@link Process}
14
 * @author Mateusz Kobos
15
 *
16
 */
17
public class CmdLineParserForProcessRunParameters {
18
	/** Parse the command line arguments.
19
	 * 
20
	 * @param cmdLine command line arguments
21
	 * @param ports names of ports that ought to be extracted from command line
22
	 */
23
	public ProcessParameters run(CommandLine cmdLine, Ports ports) {
24

  
25
		Properties inputProperties = cmdLine.getOptionProperties(
26
				CmdLineParser.inputPrefix);
27
		assumePortNamesMatch(CmdLineParser.inputPrefix, inputProperties, 
28
				ports.getInput().keySet());
29
		Map<String, Path> inputBindings = getBindings(
30
				inputProperties, ports.getInput().keySet());
31

  
32
		Properties outputProperties = cmdLine.getOptionProperties(
33
				CmdLineParser.outputPrefix);
34
		assumePortNamesMatch(CmdLineParser.outputPrefix, outputProperties, 
35
				ports.getOutput().keySet());
36
		Map<String, Path> outputBindings = getBindings(
37
				outputProperties, ports.getOutput().keySet());
38

  
39
		PortBindings bindings = new PortBindings(inputBindings, outputBindings);
40

  
41
		Properties specialProperties = cmdLine.getOptionProperties(
42
				CmdLineParser.specialParametersPrefix);
43
		assumeContainAllMandatoryParameters(
44
				specialProperties, CmdLineParser.mandatorySpecialParameters);
45

  
46
		Properties rawProperties = cmdLine.getOptionProperties(
47
				CmdLineParser.processParametersPrefix);
48
		Map<String, String> processParameters = new HashMap<String, String>();
49
		for(Entry<Object, Object> entry: rawProperties.entrySet()){
50
			processParameters.put(
51
					(String)entry.getKey(),	(String)entry.getValue());
52
		}
53
		
54
		return new ProcessParameters(bindings, processParameters);
55
	}
56
	
57
	private static void assumeContainAllMandatoryParameters(
58
			Properties properties, String[] mandatoryParameters){
59
		for(String otherParameter: mandatoryParameters){
60
			if(!properties.containsKey(otherParameter)){
61
				throw new CmdLineParserException(String.format(
62
						"Not all mandatory properties are set using the \"%s\" "
63
						+ "option are given, e.g. \"-%s\" parameter is missing",
64
						CmdLineParser.specialParametersPrefix, otherParameter));
65
			}
66
		}
67
	}
68
	
69
	private static void assumePortNamesMatch(String cmdLineParamPrefix,
70
			Properties cmdLineProperties, Set<String> portNames) {
71
		for (String name : portNames) {
72
			if (!cmdLineProperties.containsKey(name)) {
73
				throw new CmdLineParserException(String.format(
74
					"The port with name \"%s\" is not specified in "
75
					+ "command line (command line option \"-%s\" is missing)",
76
					name, cmdLineParamPrefix + name));
77
			}
78
		}
79
		for (Object cmdLineKeyObject : cmdLineProperties.keySet()) {
80
			String name = (String) cmdLineKeyObject;
81
			if (!portNames.contains(name)) {
82
				throw new CmdLineParserException(String.format(
83
						"A port name \"%s\" which is not specified is given "
84
						+ "in the command line "
85
						+ "(command line option \"%s\" is excess)",
86
						name, cmdLineParamPrefix + name));
87
			}
88
		}
89
	}
90

  
91
	private static Map<String, Path> getBindings(
92
			Properties cmdLineProperties, Set<String> portNames) {
93
		Map<String, Path> bindings = new HashMap<String, Path>();
94
		for (String name : portNames) {
95
			Path path = new Path((String) cmdLineProperties.get(name));
96
			bindings.put(name, path);
97
		}
98
		return bindings;
99
	}
100
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/CmdLineParserForProcessConstruction.java
1
package eu.dnetlib.dhp.common.java;
2

  
3
import java.lang.reflect.Constructor;
4

  
5
import org.apache.commons.cli.CommandLine;
6

  
7
/**
8
 * Handles parsing the command line arguments provided by the Oozie
9
 * to create a {@link Process}
10
 * @author Mateusz Kobos
11
 *
12
 */
13
public class CmdLineParserForProcessConstruction {
14
	public Process run(CommandLine cmdLine){
15
		String[] args = cmdLine.getArgs();
16
		if(args.length != 1){
17
			throw new CmdLineParserException("The name of the class has "+
18
					"to be specified as the first agrument");
19
		}
20
		String className = args[0];
21
		
22
		String[] constructorParams = cmdLine.getOptionValues(
23
				CmdLineParser.constructorPrefix);
24
		if(constructorParams == null){
25
			constructorParams = new String[0];
26
		}
27
		try {
28
			Class<?> processClass = Class.forName(className);
29
			Constructor<?> processConstructor = null;
30
			if(constructorParams.length == 0){
31
				try{
32
					processConstructor = processClass.getConstructor();
33
					return (Process) processConstructor.newInstance();
34
				} catch(NoSuchMethodException ex){
35
				}
36
			}
37
			processConstructor = processClass.getConstructor(String[].class);
38
			return (Process) processConstructor.newInstance(
39
						(Object)constructorParams);
40
		} catch (Exception e) {
41
			throw new CmdLineParserException(String.format(
42
					"Problem while creating class \"%s\"", className), e);
43
		}
44
	}
45
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/jsonworkflownodes/PortSpecifications.java
1
package eu.dnetlib.dhp.common.java.jsonworkflownodes;
2

  
3
import java.util.HashMap;
4
import java.util.Map;
5

  
6
import org.apache.avro.Schema;
7

  
8
import eu.dnetlib.dhp.common.java.jsonworkflownodes.StringPortSpecificationExtractor.PortSpecification;
9
import eu.dnetlib.dhp.common.java.porttype.AvroPortType;
10
import eu.dnetlib.dhp.common.java.porttype.PortType;
11
import eu.dnetlib.dhp.common.utils.AvroUtils;
12

  
13
/**
14
 * @author Mateusz Kobos
15
 */
16
public class PortSpecifications {
17
	private static final String[] propertyRegexps = 
18
			new String[]{"[\\w\\.]+", "[\\w\\./_\\-]+"};
19
	private final Map<String, SpecificationValues> specs;
20
	
21
    public static class SpecificationValues {
22

  
23
        private final Schema schema;
24

  
25
        private final String jsonFilePath;
26

  
27
        public SpecificationValues(Schema schema, String jsonFilePath) {
28
            this.schema = schema;
29
            this.jsonFilePath = jsonFilePath;
30
        }
31

  
32
        public Schema getSchema() {
33
            return schema;
34
        }
35

  
36
        public String getJsonFilePath() {
37
            return jsonFilePath;
38
        }
39

  
40
    }
41
	
42
	public PortSpecifications(String[] portSpecifications){
43
		StringPortSpecificationExtractor portSpecExtractor = 
44
				new StringPortSpecificationExtractor(propertyRegexps);
45
		specs = new HashMap<String, SpecificationValues>();
46
		for(int i = 0; i < portSpecifications.length; i++){
47
			PortSpecification portSpec = portSpecExtractor.getSpecification(portSpecifications[i]);
48
			Schema schema = AvroUtils.toSchema(portSpec.getProperties()[0]);
49
			String jsonPath = portSpec.getProperties()[1];
50
			specs.put(portSpec.getName(), new SpecificationValues(schema, jsonPath));
51
		}
52
	}
53
	
54
	public SpecificationValues get(String portName){
55
		return specs.get(portName);
56
	}
57
	
58
	public Map<String, PortType> getPortTypes(){
59
		Map<String, PortType> ports = new HashMap<String, PortType>();
60
		for(Map.Entry<String, SpecificationValues> e: specs.entrySet()){
61
			Schema schema = e.getValue().schema;
62
			ports.put(e.getKey(), new AvroPortType(schema));
63
		}
64
		return ports;	
65
	}
66
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/java/ProcessException.java
1
package eu.dnetlib.dhp.common.java;
2

  
3
/**
4
 * Process exception
5
 * @author Dominika Tkaczyk
6
 *
7
 */
8
public class ProcessException extends RuntimeException {
9

  
10
	private static final long serialVersionUID = 2758953138374438377L;
11

  
12
	public ProcessException(String message){
13
		super(message);
14
	}
15
	
16
	public ProcessException(String message, Throwable cause){
17
		super(message, cause);
18
	}
19

  
20
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/InfoSpaceConstants.java
1
package eu.dnetlib.dhp.common;
2

  
3
import java.io.UnsupportedEncodingException;
4

  
5
/**
6
 * InfoSpaceConstants constants.
7
 * 
8
 * @author mhorst
9
 *
10
 */
11
public final class InfoSpaceConstants {
12

  
13
    public static final float CONFIDENCE_TO_TRUST_LEVEL_FACTOR = 0.9f;
14

  
15
    public static final String ENCODING_UTF8 = "utf-8";
16

  
17
    public static final char ROW_PREFIX_SEPARATOR = '|';
18

  
19
    public static final String ID_NAMESPACE_SEPARATOR = "::";
20
    public static final String CLASSIFICATION_HIERARCHY_SEPARATOR = ID_NAMESPACE_SEPARATOR;
21
    public static final String INFERENCE_PROVENANCE_SEPARATOR = ID_NAMESPACE_SEPARATOR;
22

  
23
    public static final String ROW_PREFIX_RESULT = "50|";
24
    public static final String ROW_PREFIX_PROJECT = "40|";
25
    public static final String ROW_PREFIX_PERSON = "30|";
26
    public static final String ROW_PREFIX_ORGANIZATION = "20|";
27
    public static final String ROW_PREFIX_DATASOURCE = "10|";
28

  
29
    public static final String QUALIFIER_BODY_STRING = "body";
30
    public static final byte[] QUALIFIER_BODY;
31

  
32
    public static final String SEMANTIC_CLASS_MAIN_TITLE = "main title";
33
    public static final String SEMANTIC_CLASS_PUBLICATION = "publication";
34
    public static final String SEMANTIC_CLASS_UNKNOWN = "UNKNOWN";
35

  
36
    public static final String SEMANTIC_SCHEME_DNET_PERSON_ROLES = "dnet:personroles";
37
    public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_RESULT = "dnet:result_result_relations";
38
    public static final String SEMANTIC_SCHEME_DNET_RELATIONS_RESULT_PROJECT = "dnet:result_project_relations";
39

  
40
    public static final String SEMANTIC_SCHEME_DNET_TITLE = "dnet:dataCite_title";
41
    public static final String SEMANTIC_SCHEME_DNET_TITLE_TYPOLOGIES = "dnet:title_typologies";
42
    public static final String SEMANTIC_SCHEME_DNET_RESULT_TYPOLOGIES = "dnet:result_typologies";
43
    public static final String SEMANTIC_SCHEME_DNET_PROVENANCE_ACTIONS = "dnet:provenanceActions";
44
    public static final String SEMANTIC_SCHEME_DNET_LANGUAGES = "dnet:languages";
45
    public static final String SEMANTIC_SCHEME_DNET_PID_TYPES = "dnet:pid_types";
46
    public static final String SEMANTIC_SCHEME_DNET_CLASSIFICATION_TAXONOMIES = "dnet:subject_classification_typologies";
47

  
48
    // resultResult citation and similarity related
49
    public static final String SEMANTIC_SCHEME_DNET_DATASET_PUBLICATION_RELS = "dnet:dataset_publication_rels";
50

  
51
    public static final String SEMANTIC_CLASS_TAXONOMIES_ARXIV = "arxiv";
52
    public static final String SEMANTIC_CLASS_TAXONOMIES_WOS = "wos";
53
    public static final String SEMANTIC_CLASS_TAXONOMIES_DDC = "ddc";
54
    public static final String SEMANTIC_CLASS_TAXONOMIES_MESHEUROPMC = "mesheuropmc";
55
    public static final String SEMANTIC_CLASS_TAXONOMIES_ACM = "acm";
56

  
57
    public static final String EXTERNAL_ID_TYPE_INSTANCE_URL = "dnet:instance-url";
58
    public static final String EXTERNAL_ID_TYPE_UNKNOWN = "unknown";
59

  
60
    // publication types class ids
61
    public static final String SEMANTIC_CLASS_INSTANCE_TYPE_ARTICLE = "0001";
62
    public static final String SEMANTIC_CLASS_INSTANCE_TYPE_DATASET = "0021";
63

  
64
    static {
65
        try {
66
            QUALIFIER_BODY = QUALIFIER_BODY_STRING.getBytes(ENCODING_UTF8);
67

  
68
        } catch (UnsupportedEncodingException e) {
69
            throw new RuntimeException(e);
70
        }
71
    }
72

  
73
    private InfoSpaceConstants() {
74
    }
75
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/lock/LockManagingProcess.java
1
package eu.dnetlib.dhp.common.lock;
2

  
3
import java.security.InvalidParameterException;
4
import java.util.Collections;
5
import java.util.Map;
6
import java.util.concurrent.Semaphore;
7

  
8
import org.apache.commons.lang.StringUtils;
9
import org.apache.hadoop.conf.Configuration;
10
import org.apache.hadoop.ha.ZKFailoverController;
11
import org.apache.log4j.Logger;
12
import org.apache.zookeeper.CreateMode;
13
import org.apache.zookeeper.KeeperException;
14
import org.apache.zookeeper.Watcher.Event;
15
import org.apache.zookeeper.ZooDefs;
16
import org.apache.zookeeper.ZooKeeper;
17

  
18
import com.google.common.base.Preconditions;
19
import com.google.common.base.Stopwatch;
20

  
21
import eu.dnetlib.dhp.common.java.PortBindings;
22
import eu.dnetlib.dhp.common.java.porttype.PortType;
23

  
24
/**
25
 * Zookeeper lock managing process. Blocks until lock is released.
26
 * 
27
 * @author mhorst
28
 *
29
 */
30
public class LockManagingProcess implements eu.dnetlib.dhp.common.java.Process {
31

  
32
	public static final String DEFAULT_ROOT_NODE = "/cache";
33
	
34
	public static final String NODE_SEPARATOR = "/";
35
	
36
	public static final String PARAM_ZK_SESSION_TIMEOUT = "zk_session_timeout";
37
	
38
	public static final String PARAM_NODE_ID = "node_id";
39
	
40
	public static final String PARAM_LOCK_MODE = "mode";
41
	
42
	public static enum LockMode {
43
		obtain,
44
		release
45
	}
46
	
47
	public static final int DEFAULT_SESSION_TIMEOUT = 60000;
48
	
49
	public static final Logger log = Logger.getLogger(LockManagingProcess.class);
50
	
51
	@Override
52
	public Map<String, PortType> getInputPorts() {
53
		return Collections.emptyMap();
54
	}
55

  
56
	@Override
57
	public Map<String, PortType> getOutputPorts() {
58
		return Collections.emptyMap();
59
	}
60

  
61
	@Override
62
	public void run(PortBindings portBindings, Configuration conf,
63
			Map<String, String> parameters) throws Exception {
64
	    
65
		Preconditions.checkArgument(parameters.containsKey(PARAM_NODE_ID), "node id not provided!");
66
		Preconditions.checkArgument(parameters.containsKey(PARAM_LOCK_MODE), "lock mode not provided!");
67

  
68
		String zkConnectionString = conf.get(ZKFailoverController.ZK_QUORUM_KEY);
69
		Preconditions.checkArgument(StringUtils.isNotBlank(zkConnectionString), 
70
		        "zookeeper quorum is unknown, invalid '%s' property value: %s", ZKFailoverController.ZK_QUORUM_KEY, zkConnectionString);
71

  
72
		int sessionTimeout = parameters.containsKey(PARAM_ZK_SESSION_TIMEOUT)?
73
		        Integer.valueOf(parameters.get(PARAM_ZK_SESSION_TIMEOUT)) : DEFAULT_SESSION_TIMEOUT;
74

  
75
		final ZooKeeper zooKeeper = new ZooKeeper(zkConnectionString, sessionTimeout, (e) -> {
76
		 // we are not interested in generic events
77
		});
78
		
79
//		initializing root node if does not exist
80
		if (zooKeeper.exists(DEFAULT_ROOT_NODE, false) == null) {
81
			log.info("initializing root node: " + DEFAULT_ROOT_NODE);
82
			zooKeeper.create(DEFAULT_ROOT_NODE, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
83
			log.info("root node initialized");
84
		}
85

  
86
		final String nodePath = generatePath(parameters.get(PARAM_NODE_ID), DEFAULT_ROOT_NODE);
87
		
88
		final Semaphore semaphore = new Semaphore(1);
89
		semaphore.acquire();
90
		
91
		switch(LockMode.valueOf(parameters.get(PARAM_LOCK_MODE))) {
92
		    case obtain: {
93
		        obtain(zooKeeper, nodePath, semaphore);
94
		        break;
95
		    }
96
		    case release: {
97
		        release(zooKeeper, nodePath);
98
		        break;
99
		    }
100
		    default: {
101
		        throw new InvalidParameterException("unsupported lock mode: " + parameters.get(PARAM_LOCK_MODE));
102
		    }
103
		}
104
	}
105
	
106
	// ------------------------- PRIVATE --------------------------
107
	
108
	private void obtain(final ZooKeeper zooKeeper, final String nodePath, final Semaphore semaphore) throws KeeperException, InterruptedException {
109
        log.info("trying to obtain lock: " + nodePath);
110
        if (zooKeeper.exists(nodePath, (event) -> {
111
            if (Event.EventType.NodeDeleted == event.getType()) {
112
                try {
113
                    log.info(nodePath + " lock release detected");
114
                    log.info("creating new lock instance: " + nodePath + "...");
115
                    zooKeeper.create(nodePath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
116
                    log.info("lock" + nodePath + " created");
117
                    semaphore.release();
118
                } catch (KeeperException e) {
119
                    throw new RuntimeException(e);
120
                } catch (InterruptedException e) {
121
                    throw new RuntimeException(e);
122
                }
123
            }
124
        }) == null) {
125
            log.info("lock not found, creating new lock instance: " + nodePath);
126
            zooKeeper.create(nodePath, new byte[0], ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
127
            log.info("lock" + nodePath + " created");
128
            semaphore.release();
129
        } else {
130
            // waiting until node is removed by other lock manager
131
            log.info("waiting until lock is released");
132
            Stopwatch timer = new Stopwatch().start();
133
            semaphore.acquire();
134
            log.info("lock released, waited for " + timer.elapsedMillis() + " ms");
135
            semaphore.release();
136
        }
137
	}
138
	
139
	private void release(final ZooKeeper zooKeeper, final String nodePath) throws InterruptedException, KeeperException {
140
	    log.info("removing lock" + nodePath + "...");
141
        zooKeeper.delete(nodePath, -1);
142
        log.info("lock" + nodePath + " removed");
143
	}
144
	
145
	private static final String generatePath(String nodeId, String rootNode) {
146
		return rootNode + NODE_SEPARATOR + nodeId.replace('/', '_');
147
	}
148
	
149
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/LenientComparisonStringNormalizer.java
1
/*
2
 * This file is part of CoAnSys project.
3
 * Copyright (c) 2012-2015 ICM-UW
4
 * 
5
 * CoAnSys is free software: you can redistribute it and/or modify
6
 * it under the terms of the GNU Affero General Public License as published by
7
 * the Free Software Foundation, either version 3 of the License, or
8
 * (at your option) any later version.
9

  
10
 * CoAnSys is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU Affero General Public License for more details.
14
 * 
15
 * You should have received a copy of the GNU Affero General Public License
16
 * along with CoAnSys. If not, see <http://www.gnu.org/licenses/>.
17
 */
18
package eu.dnetlib.dhp.common.string;
19

  
20
import java.io.Serializable;
21
import java.util.List;
22

  
23
import org.apache.commons.lang3.StringUtils;
24

  
25
import com.google.common.collect.ImmutableList;
26

  
27
/**
28
 * An implementation of {@link StringNormalizer} that normalizes strings for non-strict comparisons
29
 * in which one does not care about characters other than letters and digits or about differently written diacritics.
30
 *
31
 * @author Łukasz Dumiszewski
32
 *
33
 */
34
public final class LenientComparisonStringNormalizer implements StringNormalizer, Serializable {
35

  
36

  
37
    private static final long serialVersionUID = 1L;
38
    
39
    
40
    private List<Character> whitelistCharacters;
41
    
42
    
43
    //------------------------ CONSTRUCTORS --------------------------
44
    
45
    public LenientComparisonStringNormalizer() {
46
        this(ImmutableList.of());
47
    }
48
    
49
    /**
50
     * @param whitelistCharacters - non alphanumeric characters that will not be removed
51
     *      during normalization
52
     */
53
    public LenientComparisonStringNormalizer(List<Character> whitelistCharacters) {
54
        this.whitelistCharacters = whitelistCharacters;
55
    }
56
    
57
    
58
    //------------------------ LOGIC --------------------------
59

  
60
    
61
    
62
    /**
63
     * Normalizes the given value. <br/>
64
     * The normalized strings are better suited for non-strict comparisons, in which one does NOT care about characters that are
65
     * neither letters nor digits; about accidental spaces or different diacritics etc. <br/><br/>
66
     * This method:
67
     * <ul>
68
     * <li>Replaces all characters that are not letters or digits with spaces (except those on whitelist characters list)</li>
69
     * <li>Replaces white spaces with spaces </li>
70
     * <li>Trims</li>
71
     * <li>Compacts multi-space gaps to one-space gaps</li>
72
     * <li>Removes diacritics</li>
73
     * <li>Changes characters to lower case</li>
74
     * </ul>
75
     * Returns "" if the passed value is null or blank
76
     *
77
     * @param value the string to normalize 
78
     * @see DiacriticsRemover#removeDiacritics(String, boolean)
79
     *
80
     *
81
     */
82
    public String normalize(String value) {
83
        
84
        if (StringUtils.isBlank(value)) {
85
        
86
            return "";
87

  
88
        }
89
        
90
        
91
        String result = value;
92
        
93
        result = DiacriticsRemover.removeDiacritics(result);
94
        
95
        result = removeNonLetterDigitCharacters(result);
96
        
97
        result = result.toLowerCase();
98
        
99
        result = result.trim().replaceAll(" +", " ");
100
        
101
        return result;
102
    }
103
    
104
    
105
    
106
    
107
    //------------------------ PRIVATE --------------------------
108

  
109
    
110
    private String removeNonLetterDigitCharacters(final String value) {
111
        
112
        StringBuilder sb = new StringBuilder();
113
        
114
        for (int i = 0; i < value.length(); ++i) {
115
   
116
            char c = value.charAt(i);
117
            
118
            if (Character.isLetterOrDigit(c) || whitelistCharacters.contains(c)) {
119
                sb.append(c);
120
            } else {
121
                sb.append(' ');
122
            }
123
        }
124
        
125
        return sb.toString();
126
    }
127

  
128
 
129

  
130
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/string/DiacriticsRemover.java
1
/*
2
 * This file is part of CoAnSys project.
3
 * Copyright (c) 2012-2015 ICM-UW
4
 * 
5
 * CoAnSys is free software: you can redistribute it and/or modify
6
 * it under the terms of the GNU Affero General Public License as published by
7
 * the Free Software Foundation, either version 3 of the License, or
8
 * (at your option) any later version.
9

  
10
 * CoAnSys is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
 * GNU Affero General Public License for more details.
14
 * 
15
 * You should have received a copy of the GNU Affero General Public License
16
 * along with CoAnSys. If not, see <http://www.gnu.org/licenses/>.
17
 */
18

  
19
package eu.dnetlib.dhp.common.string;
20

  
21
import java.text.Normalizer;
22
import java.util.HashMap;
23
import java.util.Map;
24

  
25
/**
26
 * Mapping to the basic Latin alphabet (a-z, A-Z). In most cases, a character is
27
 * mapped to the closest visual form, rather than functional one, e.g.: "ö" is
28
 * mapped to "o" rather than "oe", and "đ" is mapped to "d" rather than "dj" or
29
 * "gj". Notable exceptions include: "ĸ" mapped to "q", "ß" mapped to "ss", and
30
 * "Þ", "þ" mapped to "Y", "y".
31
 *
32
 * <p> Each character is processed as follows: <ol> <li>the character is
33
 * compatibility decomposed,</li> <li>all the combining marks are removed,</li>
34
 * <li>the character is compatibility composed,</li> <li>additional "manual"
35
 * substitutions are applied.</li> </ol> </p>
36
 *
37
 * <p> All the characters from the "Latin-1 Supplement" and "Latin Extended-A"
38
 * Unicode blocks are mapped to the "Basic Latin" block. Characters from other
39
 * alphabets are generally left intact, although the decomposable ones may be
40
 * affected by the procedure. </p>
41
 *
42
 * @author Lukasz Bolikowski (bolo@icm.edu.pl)
43
 * 
44
 * @author Łukasz Dumiszewski /just copied from coansys-commons/
45
 *
46
 */
47
public final class DiacriticsRemover {
48

  
49
    private static final Character[] from = {
50
        'Æ', 'Ð', 'Ø', 'Þ', 'ß', 'æ', 'ð', 'ø', 'þ', 'Đ', 'đ', 'Ħ',
51
        'ħ', 'ı', 'ĸ', 'Ł', 'ł', 'Ŋ', 'ŋ', 'Œ', 'œ', 'Ŧ', 'ŧ'};
52
    private static final String[] to = {
53
        "AE", "D", "O", "Y", "ss", "ae", "d", "o", "y", "D", "d", "H",
54
        "h", "i", "q", "L", "l", "N", "n", "OE", "oe", "T", "t"};
55
    
56
    private static Map<Character, String> lookup = buildLookup();
57
    
58
    
59
    //------------------------ CONSTRUCTORS -------------------
60
    
61
    
62
    private DiacriticsRemover() {}
63
    
64

  
65
    //------------------------ LOGIC --------------------------
66
    
67
    
68
    /**
69
     * Removes diacritics from a text.
70
     *
71
     * @param text Text to process.
72
     * @return Text without diacritics.
73
     */
74
    public static String removeDiacritics(String text) {
75
        if (text == null) {
76
            return null;
77
        }
78

  
79
        String tmp = Normalizer.normalize(text, Normalizer.Form.NFKD);
80

  
81
        StringBuilder builder = new StringBuilder();
82
        for (int i = 0; i < tmp.length(); i++) {
83
            Character ch = tmp.charAt(i);
84
            if (Character.getType(ch) == Character.NON_SPACING_MARK) {
85
                continue;
86
            }
87

  
88
            if (lookup.containsKey(ch)) {
89
                builder.append(lookup.get(ch));
90
            } else {
91
                builder.append(ch);
92
            }
93
        }
94

  
95
        return builder.toString();
96
    }
97

  
98

  
99
    //------------------------ PRIVATE --------------------------
100
    
101
    private static Map<Character, String> buildLookup() {
102
        if (from.length != to.length) {
103
            throw new IllegalStateException();
104
        }
105

  
106
        Map<Character, String> _lookup = new HashMap<Character, String>();
107
        for (int i = 0; i < from.length; i++) {
108
            _lookup.put(from[i], to[i]);
109
        }
110

  
111
        return _lookup;
112
    }
113
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/report/ReportEntryFactory.java
1
package eu.dnetlib.dhp.common.report;
2

  
3
import eu.dnetlib.dhp.common.schemas.ReportEntry;
4
import eu.dnetlib.dhp.common.schemas.ReportEntryType;
5

  
6
/**
7
 * Factory of {@link ReportEntry} objects.
8
 * 
9
 * @author madryk
10
 */
11
public final class ReportEntryFactory {
12

  
13
    // ----------------------- CONSTRUCTORS -----------------------------
14
    
15
    private ReportEntryFactory() {}
16

  
17
    // ----------------------- LOGIC ------------------------------------
18
    
19
    /**
20
     * Creates {@link ReportEntry} with {@link ReportEntryType#COUNTER} type
21
     */
22
    public static ReportEntry createCounterReportEntry(String key, long count) {
23
        return new ReportEntry(key, ReportEntryType.COUNTER, String.valueOf(count));
24
    }
25
    
26
    /**
27
     * Creates {@link ReportEntry} with {@link ReportEntryType#DURATION} type
28
     */
29
    public static ReportEntry createDurationReportEntry(String key, long duration) {
30
        return new ReportEntry(key, ReportEntryType.DURATION, String.valueOf(duration));
31
    }
32
}
dnet-hadoop/dhp-common/src/main/java/eu/dnetlib/dhp/common/counter/NamedCounters.java
1
package eu.dnetlib.dhp.common.counter;
2

  
3
import java.io.Serializable;
4
import java.util.Collection;
5
import java.util.Map;
6

  
7
import com.google.common.base.Preconditions;
8
import com.google.common.collect.Maps;
9

  
10
/**
11
 * Class that groups several counters which are identified by name (<code>String</code> value).
12
 * 
13
 * @author madryk
14
 */
15
public class NamedCounters implements Serializable {
16

  
17
    private static final long serialVersionUID = 1L;
18
    
19
    
20
    private final Map<String, Long> counters;
21
    
22
    
23
    //------------------------ CONSTRUCTORS --------------------------
24
    
25
    /**
26
     * Creates {@link NamedCounters} with empty initial counters.
27
     */
28
    public NamedCounters() {
29
        this.counters = Maps.newHashMap();
30
    }
31
    
32
    /**
33
     * Creates {@link NamedCounters} with initial counters.<br/>
34
     * Starting value of initial counters is zero.
35
     * 
36
     * @param initialCounterNames - names of initial counters
37
     */
38
    public NamedCounters(String[] initialCounterNames) {
39
        Preconditions.checkNotNull(initialCounterNames);
40
        
41
        this.counters = Maps.newHashMap();
42
        
43
        for (String initialCounterName : initialCounterNames) {
44
            this.counters.put(initialCounterName, 0L);
45
        }
46
    }
47
    
48
    /**
49
     * Creates {@link NamedCounters} with initial counters.<br/>
50
     * Starting value of initial counters is zero.
51
     * 
52
     * @param initialCounterNamesEnumClass - enum class providing names of initial counters
53
     */
54
    public <E extends Enum<E>> NamedCounters(Class<E> initialCounterNamesEnumClass) {
55
        Preconditions.checkNotNull(initialCounterNamesEnumClass);
56
        
57
        this.counters = Maps.newHashMap();
58
        Enum<?>[] enumConstants = initialCounterNamesEnumClass.getEnumConstants();
59
        
60
        for (int i=0; i<enumConstants.length; ++i) {
61
            this.counters.put(enumConstants[i].name(), 0L);
62
        }
63
        
64
    }
65
    
66
    
67
    //------------------------ LOGIC --------------------------
68
    
69
    /**
70
     * Increments value by 1 of a counter with the name specified as parameter.<br/>
71
     * Internally uses {@link #increment(String, Long)} 
72
     */
73
    public void increment(String counterName) {
74
        increment(counterName, 1L);
75
    }
76
    
77
    /**
78
     * Increments value of a counter with the name specified as parameter by the given value.<br/>
79
     * If current instance of {@link NamedCounters} does not contain counter
80
     * with provided name, then before incrementing counter will be created with starting
81
     * value equal to zero.
82
     */
83
    public void increment(String counterName, Long incrementValue) {
84
        
85
        long oldValue = counters.getOrDefault(counterName, 0L);
86
        counters.put(counterName, oldValue + incrementValue);
87
    }
88
    
89
    /**
90
     * Returns current value of a counter with the name specified as parameter.
91
     * 
92
     * @throws IllegalArgumentException when {@link NamedCounters} does not contain counter
93
     *      with provided name
94
     */
95
    public long currentValue(String counterName) {
96
        
97
        if (!counters.containsKey(counterName)) {
98
            throw new IllegalArgumentException("Couldn't find counter with name: " + counterName);
99
        }
100
        
101
        return counters.get(counterName);
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff