Revision 47956
Added by Claudio Atzori almost 7 years ago
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/deploy.info | ||
---|---|---|
1 |
{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-hadoop-commons/trunk/", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-hadoop-commons"} |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/hadoop/hdfs/SequenceFileIterable.java | ||
---|---|---|
1 |
package eu.dnetlib.data.hadoop.hdfs; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.util.Iterator; |
|
5 |
|
|
6 |
import org.apache.hadoop.conf.Configuration; |
|
7 |
import org.apache.hadoop.fs.Path; |
|
8 |
import org.apache.hadoop.io.Writable; |
|
9 |
|
|
10 |
import eu.dnetlib.miscutils.collections.Pair; |
|
11 |
|
|
12 |
/** |
|
13 |
* <p> |
|
14 |
* {@link Iterable} counterpart to {@link SequenceFileIterator}. |
|
15 |
* </p> |
|
16 |
*/ |
|
17 |
public final class SequenceFileIterable<K extends Writable, V extends Writable> implements Iterable<Pair<K, V>> { |
|
18 |
|
|
19 |
private final Path path; |
|
20 |
private final boolean reuseKeyValueInstances; |
|
21 |
private final Configuration conf; |
|
22 |
|
|
23 |
/** |
|
24 |
* Like {@link #SequenceFileIterable(Path, boolean, Configuration)} but key and value instances are not reused by |
|
25 |
* default. |
|
26 |
* |
|
27 |
* @param path |
|
28 |
* file to iterate over |
|
29 |
*/ |
|
30 |
public SequenceFileIterable(Path path, Configuration conf) { |
|
31 |
this(path, false, conf); |
|
32 |
} |
|
33 |
|
|
34 |
/** |
|
35 |
* @param path |
|
36 |
* file to iterate over |
|
37 |
* @param reuseKeyValueInstances |
|
38 |
* if true, reuses instances of the key and value object instead of creating a new one for each read from |
|
39 |
* the file |
|
40 |
*/ |
|
41 |
public SequenceFileIterable(Path path, boolean reuseKeyValueInstances, Configuration conf) { |
|
42 |
this.path = path; |
|
43 |
this.reuseKeyValueInstances = reuseKeyValueInstances; |
|
44 |
this.conf = conf; |
|
45 |
} |
|
46 |
|
|
47 |
@Override |
|
48 |
public Iterator<Pair<K, V>> iterator() { |
|
49 |
try { |
|
50 |
return new SequenceFileIterator<K, V>(path, reuseKeyValueInstances, conf); |
|
51 |
} catch (IOException ioe) { |
|
52 |
throw new IllegalStateException(path.toString(), ioe); |
|
53 |
} |
|
54 |
} |
|
55 |
|
|
56 |
} |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/hadoop/hdfs/SequenceFileIterator.java | ||
---|---|---|
1 |
package eu.dnetlib.data.hadoop.hdfs; |
|
2 |
|
|
3 |
import java.io.Closeable; |
|
4 |
import java.io.IOException; |
|
5 |
|
|
6 |
import org.apache.hadoop.conf.Configuration; |
|
7 |
import org.apache.hadoop.fs.FileSystem; |
|
8 |
import org.apache.hadoop.fs.Path; |
|
9 |
import org.apache.hadoop.io.NullWritable; |
|
10 |
import org.apache.hadoop.io.SequenceFile; |
|
11 |
import org.apache.hadoop.io.Writable; |
|
12 |
import org.apache.hadoop.util.ReflectionUtils; |
|
13 |
|
|
14 |
import com.google.common.collect.AbstractIterator; |
|
15 |
import com.google.common.io.Closeables; |
|
16 |
|
|
17 |
import eu.dnetlib.miscutils.collections.Pair; |
|
18 |
|
|
19 |
/** |
|
20 |
* <p> |
|
21 |
* {@link java.util.Iterator} over a {@link SequenceFile}'s keys and values, as a {@link Pair} containing key and value. |
|
22 |
* </p> |
|
23 |
*/ |
|
24 |
public final class SequenceFileIterator<K extends Writable, V extends Writable> extends AbstractIterator<Pair<K, V>> implements Closeable { |
|
25 |
|
|
26 |
private final SequenceFile.Reader reader; |
|
27 |
private final Configuration conf; |
|
28 |
private final Class<K> keyClass; |
|
29 |
private final Class<V> valueClass; |
|
30 |
private final boolean noValue; |
|
31 |
private K key; |
|
32 |
private V value; |
|
33 |
private final boolean reuseKeyValueInstances; |
|
34 |
|
|
35 |
/** |
|
36 |
* @throws IOException |
|
37 |
* if path can't be read, or its key or value class can't be instantiated |
|
38 |
*/ |
|
39 |
@SuppressWarnings("unchecked") |
|
40 |
public SequenceFileIterator(Path path, boolean reuseKeyValueInstances, Configuration conf) throws IOException { |
|
41 |
key = null; |
|
42 |
value = null; |
|
43 |
FileSystem fs = path.getFileSystem(conf); |
|
44 |
path = path.makeQualified(fs); |
|
45 |
reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(path)); |
|
46 |
this.conf = conf; |
|
47 |
keyClass = (Class<K>) reader.getKeyClass(); |
|
48 |
valueClass = (Class<V>) reader.getValueClass(); |
|
49 |
noValue = NullWritable.class.equals(valueClass); |
|
50 |
this.reuseKeyValueInstances = reuseKeyValueInstances; |
|
51 |
} |
|
52 |
|
|
53 |
public Class<K> getKeyClass() { |
|
54 |
return keyClass; |
|
55 |
} |
|
56 |
|
|
57 |
public Class<V> getValueClass() { |
|
58 |
return valueClass; |
|
59 |
} |
|
60 |
|
|
61 |
@Override |
|
62 |
public void close() { |
|
63 |
key = null; |
|
64 |
value = null; |
|
65 |
try { |
|
66 |
Closeables.close(reader, false); |
|
67 |
} catch (IOException e) { |
|
68 |
throw new IllegalStateException(e); |
|
69 |
} |
|
70 |
endOfData(); |
|
71 |
} |
|
72 |
|
|
73 |
@Override |
|
74 |
protected Pair<K, V> computeNext() { |
|
75 |
if (!reuseKeyValueInstances || value == null) { |
|
76 |
key = ReflectionUtils.newInstance(keyClass, conf); |
|
77 |
if (!noValue) { |
|
78 |
value = ReflectionUtils.newInstance(valueClass, conf); |
|
79 |
} |
|
80 |
} |
|
81 |
try { |
|
82 |
boolean available; |
|
83 |
if (noValue) { |
|
84 |
available = reader.next(key); |
|
85 |
} else { |
|
86 |
available = reader.next(key, value); |
|
87 |
} |
|
88 |
if (!available) { |
|
89 |
close(); |
|
90 |
return null; |
|
91 |
} |
|
92 |
return new Pair<K, V>(key, value); |
|
93 |
} catch (IOException ioe) { |
|
94 |
close(); |
|
95 |
throw new IllegalStateException(ioe); |
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
} |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/hadoop/hdfs/SequenceFileUtils.java | ||
---|---|---|
1 |
package eu.dnetlib.data.hadoop.hdfs; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
|
|
5 |
import org.apache.hadoop.conf.Configuration; |
|
6 |
import org.apache.hadoop.fs.FileStatus; |
|
7 |
import org.apache.hadoop.fs.FileSystem; |
|
8 |
import org.apache.hadoop.fs.Path; |
|
9 |
import org.apache.hadoop.fs.PathFilter; |
|
10 |
import org.apache.hadoop.io.Text; |
|
11 |
|
|
12 |
import com.google.common.base.Function; |
|
13 |
import com.google.common.collect.Iterables; |
|
14 |
import com.google.common.collect.Lists; |
|
15 |
|
|
16 |
import eu.dnetlib.miscutils.collections.Pair; |
|
17 |
|
|
18 |
public class SequenceFileUtils { |
|
19 |
|
|
20 |
public static final int NO_LIMIT = -1; |
|
21 |
|
|
22 |
public static Iterable<Pair<Text, Text>> read(Path path, final Configuration conf) throws IOException { |
|
23 |
return Iterables.concat(Iterables.transform(new SequenceFileUtils().doRead(path, conf), new Function<Path, SequenceFileIterable<Text, Text>>() { |
|
24 |
@Override |
|
25 |
public SequenceFileIterable<Text, Text> apply(Path path) { |
|
26 |
return new SequenceFileIterable<Text, Text>(path, conf); |
|
27 |
} |
|
28 |
})); |
|
29 |
} |
|
30 |
|
|
31 |
public static Iterable<Pair<Text, Text>> read(Path path, final Configuration conf, final int readMax) throws IOException { |
|
32 |
if (readMax != NO_LIMIT) |
|
33 |
return Iterables.limit(read(path, conf), readMax); |
|
34 |
else |
|
35 |
return read(path, conf); |
|
36 |
} |
|
37 |
|
|
38 |
private final PathFilter f = new PathFilter() { |
|
39 |
@Override |
|
40 |
public boolean accept(Path path) { |
|
41 |
return path.getName().contains("part-r"); |
|
42 |
} |
|
43 |
}; |
|
44 |
|
|
45 |
private Iterable<Path> doRead(Path path, final Configuration conf) throws IOException { |
|
46 |
FileSystem fs = FileSystem.get(conf); |
|
47 |
return Iterables.transform(Lists.newArrayList(fs.listStatus(path, f)), new Function<FileStatus, Path>() { |
|
48 |
@Override |
|
49 |
public Path apply(FileStatus fs) { |
|
50 |
Path path = fs.getPath(); |
|
51 |
System.out.println("downloading xml files from path: " + path.toString()); |
|
52 |
return path; |
|
53 |
} |
|
54 |
}); |
|
55 |
} |
|
56 |
|
|
57 |
} |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/hadoop/hdfs/SequenceFileWriterFactory.java | ||
---|---|---|
1 |
package eu.dnetlib.data.hadoop.hdfs; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
|
|
5 |
import org.apache.hadoop.conf.Configuration; |
|
6 |
import org.apache.hadoop.fs.Path; |
|
7 |
import org.apache.hadoop.io.SequenceFile; |
|
8 |
import org.apache.hadoop.io.SequenceFile.CompressionType; |
|
9 |
import org.apache.hadoop.io.SequenceFile.Writer.Option; |
|
10 |
import org.apache.hadoop.io.Writable; |
|
11 |
import org.springframework.beans.factory.annotation.Required; |
|
12 |
|
|
13 |
/** |
|
14 |
* Factory for SequenceFile.Writer instances |
|
15 |
* |
|
16 |
* @author claudio |
|
17 |
* |
|
18 |
*/ |
|
19 |
public class SequenceFileWriterFactory { |
|
20 |
|
|
21 |
private Class<? extends Writable> keyClass; |
|
22 |
|
|
23 |
private Class<? extends Writable> valueClass; |
|
24 |
|
|
25 |
private String compressionType; |
|
26 |
|
|
27 |
private String blockSize = "64M"; |
|
28 |
|
|
29 |
private short replication = 3; |
|
30 |
|
|
31 |
public SequenceFile.Writer getSequenceFileWriter( |
|
32 |
final Class<? extends Writable> keyClass, |
|
33 |
final Class<? extends Writable> valueClass, |
|
34 |
final Configuration conf, |
|
35 |
final Path sequenceFilePath) throws IOException { |
|
36 |
|
|
37 |
Configuration newConf = new Configuration(conf); |
|
38 |
newConf.set("dfs.blocksize", getBlockSize()); |
|
39 |
|
|
40 |
Option oFile = SequenceFile.Writer.file(sequenceFilePath); |
|
41 |
Option oKey = SequenceFile.Writer.keyClass(keyClass); |
|
42 |
Option oValue = SequenceFile.Writer.valueClass(valueClass); |
|
43 |
Option oCmp = SequenceFile.Writer.compression(CompressionType.valueOf(getCompressionType())); |
|
44 |
Option oRpl = SequenceFile.Writer.replication(getReplication()); |
|
45 |
|
|
46 |
return SequenceFile.createWriter(newConf, oFile, oKey, oValue, oCmp, oRpl); |
|
47 |
} |
|
48 |
|
|
49 |
public SequenceFile.Writer getSequenceFileWriter(final Configuration conf, final Path sequenceFilePath) throws IOException { |
|
50 |
return getSequenceFileWriter(getKeyClass(), getValueClass(), conf, sequenceFilePath); |
|
51 |
} |
|
52 |
|
|
53 |
public Class<? extends Writable> getKeyClass() { |
|
54 |
return keyClass; |
|
55 |
} |
|
56 |
|
|
57 |
@Required |
|
58 |
public void setKeyClass(final Class<? extends Writable> keyClass) { |
|
59 |
this.keyClass = keyClass; |
|
60 |
} |
|
61 |
|
|
62 |
public Class<? extends Writable> getValueClass() { |
|
63 |
return valueClass; |
|
64 |
} |
|
65 |
|
|
66 |
@Required |
|
67 |
public void setValueClass(final Class<? extends Writable> valueClass) { |
|
68 |
this.valueClass = valueClass; |
|
69 |
} |
|
70 |
|
|
71 |
public String getCompressionType() { |
|
72 |
return compressionType; |
|
73 |
} |
|
74 |
|
|
75 |
@Required |
|
76 |
public void setCompressionType(final String compressionType) { |
|
77 |
this.compressionType = compressionType; |
|
78 |
} |
|
79 |
|
|
80 |
public String getBlockSize() { |
|
81 |
return blockSize; |
|
82 |
} |
|
83 |
|
|
84 |
public void setBlockSize(final String blockSize) { |
|
85 |
this.blockSize = blockSize; |
|
86 |
} |
|
87 |
|
|
88 |
public short getReplication() { |
|
89 |
return replication; |
|
90 |
} |
|
91 |
|
|
92 |
public void setReplication(final short replication) { |
|
93 |
this.replication = replication; |
|
94 |
} |
|
95 |
|
|
96 |
} |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/hadoop/config/ConfigurationFactory.java | ||
---|---|---|
1 |
package eu.dnetlib.data.hadoop.config; |
|
2 |
|
|
3 |
import java.io.IOException; |
|
4 |
import java.util.Map.Entry; |
|
5 |
import java.util.Properties; |
|
6 |
|
|
7 |
import org.apache.commons.logging.Log; |
|
8 |
import org.apache.commons.logging.LogFactory; |
|
9 |
import org.apache.hadoop.conf.Configuration; |
|
10 |
import org.springframework.beans.factory.FactoryBean; |
|
11 |
import org.springframework.beans.factory.annotation.Required; |
|
12 |
import org.springframework.core.io.Resource; |
|
13 |
|
|
14 |
/** |
|
15 |
* Factory bean for hadoop cluster configuration object |
|
16 |
* |
|
17 |
* @author claudio |
|
18 |
* |
|
19 |
*/ |
|
20 |
public class ConfigurationFactory implements FactoryBean<Configuration> { |
|
21 |
|
|
22 |
private static final Log log = LogFactory.getLog(ConfigurationFactory.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
23 |
|
|
24 |
private Resource defaults; |
|
25 |
|
|
26 |
public Configuration getConfiguration() { |
|
27 |
try { |
|
28 |
return getObject(); |
|
29 |
} catch (Exception e) { |
|
30 |
throw new IllegalStateException("Unable to load hadoop configuration", e); |
|
31 |
} |
|
32 |
} |
|
33 |
|
|
34 |
@Override |
|
35 |
public Configuration getObject() throws Exception { |
|
36 |
return initConfiguration(defaultProperties()); |
|
37 |
} |
|
38 |
|
|
39 |
@Override |
|
40 |
public Class<?> getObjectType() { |
|
41 |
return Configuration.class; |
|
42 |
} |
|
43 |
|
|
44 |
@Override |
|
45 |
public boolean isSingleton() { |
|
46 |
return true; |
|
47 |
} |
|
48 |
|
|
49 |
private Configuration initConfiguration(final Properties p) { |
|
50 |
final Configuration conf = new Configuration(false); |
|
51 |
for (Entry<Object, Object> e : p.entrySet()) { |
|
52 |
conf.set(e.getKey().toString(), e.getValue().toString()); |
|
53 |
log.info(e.getKey().toString() + ": " + conf.get(e.getKey().toString())); |
|
54 |
} |
|
55 |
return conf; |
|
56 |
} |
|
57 |
|
|
58 |
private Properties defaultProperties() throws IOException { |
|
59 |
Properties p = new Properties(); |
|
60 |
p.load(defaults.getInputStream()); |
|
61 |
return p; |
|
62 |
} |
|
63 |
|
|
64 |
public Resource getDefaults() { |
|
65 |
return defaults; |
|
66 |
} |
|
67 |
|
|
68 |
@Required |
|
69 |
public void setDefaults(Resource defaults) { |
|
70 |
this.defaults = defaults; |
|
71 |
} |
|
72 |
|
|
73 |
} |
|
0 | 74 |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/hadoop/config/ClusterName.java | ||
---|---|---|
1 |
package eu.dnetlib.data.hadoop.config; |
|
2 |
|
|
3 |
import java.util.Arrays; |
|
4 |
import java.util.List; |
|
5 |
|
|
6 |
import com.google.common.base.Function; |
|
7 |
import com.google.common.collect.Iterables; |
|
8 |
import com.google.common.collect.Lists; |
|
9 |
|
|
10 |
/** |
|
11 |
* Enumeration of the managed clusters. |
|
12 |
* |
|
13 |
* @author claudio |
|
14 |
* |
|
15 |
*/ |
|
16 |
public enum ClusterName { |
|
17 |
DM, // Data Management |
|
18 |
IIS; // Information Inference Service(s) |
|
19 |
|
|
20 |
public static List<String> asStringList() { |
|
21 |
return Lists.newArrayList(Iterables.transform(asList(), new Function<ClusterName, String>() { |
|
22 |
|
|
23 |
@Override |
|
24 |
public String apply(final ClusterName clusterName) { |
|
25 |
return clusterName.toString(); |
|
26 |
} |
|
27 |
})); |
|
28 |
} |
|
29 |
|
|
30 |
public static List<ClusterName> asList() { |
|
31 |
return Arrays.asList(ClusterName.values()); |
|
32 |
} |
|
33 |
|
|
34 |
} |
|
0 | 35 |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/hadoop/config/ConfigurationEnumerator.java | ||
---|---|---|
1 |
package eu.dnetlib.data.hadoop.config; |
|
2 |
|
|
3 |
import java.util.Map; |
|
4 |
|
|
5 |
import org.apache.commons.logging.Log; |
|
6 |
import org.apache.commons.logging.LogFactory; |
|
7 |
import org.apache.hadoop.conf.Configuration; |
|
8 |
import org.springframework.beans.BeansException; |
|
9 |
import org.springframework.beans.factory.BeanFactory; |
|
10 |
import org.springframework.beans.factory.BeanFactoryAware; |
|
11 |
import org.springframework.beans.factory.ListableBeanFactory; |
|
12 |
import org.springframework.beans.factory.NoSuchBeanDefinitionException; |
|
13 |
|
|
14 |
import com.google.common.base.Function; |
|
15 |
import com.google.common.collect.Maps; |
|
16 |
|
|
17 |
public class ConfigurationEnumerator implements BeanFactoryAware { |
|
18 |
|
|
19 |
private static final Log log = LogFactory.getLog(ConfigurationEnumerator.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
20 |
|
|
21 |
/** |
|
22 |
* bean factory. |
|
23 |
*/ |
|
24 |
private ListableBeanFactory beanFactory; |
|
25 |
|
|
26 |
private final Function<ConfigurationFactory, Configuration> asConfiguration = new Function<ConfigurationFactory, Configuration>() { |
|
27 |
|
|
28 |
@Override |
|
29 |
public Configuration apply(ConfigurationFactory factory) { |
|
30 |
return factory.getConfiguration(); |
|
31 |
} |
|
32 |
}; |
|
33 |
|
|
34 |
/** |
|
35 |
* Get all beans implementing the ConfiguredJob interface. |
|
36 |
* |
|
37 |
* @return |
|
38 |
*/ |
|
39 |
public Map<String, Configuration> getAll() { |
|
40 |
return Maps.transformValues(beanFactory.getBeansOfType(ConfigurationFactory.class), asConfiguration); |
|
41 |
} |
|
42 |
|
|
43 |
@Override |
|
44 |
public void setBeanFactory(final BeanFactory beanFactory) throws BeansException { |
|
45 |
this.beanFactory = (ListableBeanFactory) beanFactory; |
|
46 |
} |
|
47 |
|
|
48 |
public ListableBeanFactory getBeanFactory() { |
|
49 |
return beanFactory; |
|
50 |
} |
|
51 |
|
|
52 |
/** |
|
53 |
* Get given Configuration or null. |
|
54 |
* |
|
55 |
* @param name |
|
56 |
* @return |
|
57 |
*/ |
|
58 |
public Configuration get(final ClusterName name) { |
|
59 |
try { |
|
60 |
return (beanFactory.getBean(name.toString(), Configuration.class)); |
|
61 |
} catch (final NoSuchBeanDefinitionException e) { |
|
62 |
log.error("undefined bean: " + name, e); |
|
63 |
return null; |
|
64 |
} |
|
65 |
} |
|
66 |
} |
|
0 | 67 |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/transform/ColumnType.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
public enum ColumnType { |
|
4 |
base64, |
|
5 |
text, |
|
6 |
} |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/transform/XsltRowTransformer.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.util.List; |
|
5 |
|
|
6 |
import javax.xml.transform.Transformer; |
|
7 |
import javax.xml.transform.TransformerException; |
|
8 |
|
|
9 |
import org.apache.commons.codec.binary.Base64; |
|
10 |
import org.apache.commons.logging.Log; |
|
11 |
import org.apache.commons.logging.LogFactory; |
|
12 |
import org.dom4j.Document; |
|
13 |
import org.dom4j.DocumentException; |
|
14 |
import org.dom4j.Element; |
|
15 |
import org.dom4j.Node; |
|
16 |
import org.dom4j.io.DocumentResult; |
|
17 |
import org.dom4j.io.DocumentSource; |
|
18 |
import org.dom4j.io.SAXReader; |
|
19 |
|
|
20 |
import com.google.common.base.Function; |
|
21 |
import com.google.common.collect.Lists; |
|
22 |
|
|
23 |
public class XsltRowTransformer implements Function<String, List<Row>> { |
|
24 |
|
|
25 |
private static final Log log = LogFactory.getLog(XsltRowTransformer.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
26 |
|
|
27 |
private Transformer transformer; |
|
28 |
|
|
29 |
private SAXReader reader = new SAXReader(); |
|
30 |
|
|
31 |
public XsltRowTransformer(final Transformer transformer) { |
|
32 |
this.transformer = transformer; |
|
33 |
log.info(String.format("using trasformer: '%s'", getTransformerClassName())); |
|
34 |
} |
|
35 |
|
|
36 |
private Document transform(final Document doc) { |
|
37 |
if (transformer == null) |
|
38 |
return doc; |
|
39 |
final DocumentResult result = new DocumentResult(); |
|
40 |
|
|
41 |
try { |
|
42 |
transformer.transform(new DocumentSource(doc), result); |
|
43 |
return result.getDocument(); |
|
44 |
} catch (final TransformerException e) { |
|
45 |
throw new RuntimeException("Unable to transform document:\n" + doc.asXML(), e); |
|
46 |
} |
|
47 |
} |
|
48 |
|
|
49 |
private Document transform(final String xml) { |
|
50 |
try { |
|
51 |
return transform(reader.read(new StringReader(xml))); |
|
52 |
} catch (final DocumentException e) { |
|
53 |
log.error("Error parsing xml:\n" + xml, e); |
|
54 |
throw new RuntimeException("Unable to parse document:\n" + xml, e); |
|
55 |
} |
|
56 |
} |
|
57 |
|
|
58 |
private List<Row> transformAsListRow(final String xml) { |
|
59 |
final Document doc = transform(xml); |
|
60 |
|
|
61 |
final List<Row> rows = Lists.newArrayList(); |
|
62 |
|
|
63 |
if (doc == null) |
|
64 |
return rows; |
|
65 |
|
|
66 |
for (final Object or : doc.selectNodes("//ROW")) { |
|
67 |
final Element row = (Element) or; |
|
68 |
|
|
69 |
final String columnFamily = row.valueOf("@columnFamily"); |
|
70 |
final String key = row.valueOf("@key"); |
|
71 |
|
|
72 |
if ((key == null) || key.isEmpty()) |
|
73 |
throw new RuntimeException("Attribute 'key' is missing in XSLT"); |
|
74 |
|
|
75 |
if ((columnFamily == null) || columnFamily.isEmpty()) |
|
76 |
throw new RuntimeException("Attribute 'columnFamily' is missing in XSLT"); |
|
77 |
|
|
78 |
final List<Column<String, byte[]>> cols = Lists.newArrayList(); |
|
79 |
|
|
80 |
for (final Object of : row.selectNodes("./QUALIFIER")) { |
|
81 |
final Node node = (Node) of; |
|
82 |
|
|
83 |
final String name = node.valueOf("@name"); |
|
84 |
final String type = node.valueOf("@type"); |
|
85 |
|
|
86 |
final byte[] value = decode(node.getText().trim(), type); |
|
87 |
|
|
88 |
cols.add(new Column<String, byte[]>(name, value)); |
|
89 |
} |
|
90 |
rows.add(new Row(columnFamily, key, cols)); |
|
91 |
} |
|
92 |
return rows; |
|
93 |
} |
|
94 |
|
|
95 |
public String getTransformerClassName() { |
|
96 |
return transformer != null ? transformer.getClass().getName() : "null"; |
|
97 |
} |
|
98 |
|
|
99 |
private byte[] decode(final String value, final String type) { |
|
100 |
|
|
101 |
if ("base64".equals(type)) |
|
102 |
return Base64.decodeBase64(value); |
|
103 |
|
|
104 |
return value.getBytes(); |
|
105 |
} |
|
106 |
|
|
107 |
@Override |
|
108 |
public List<Row> apply(final String xml) { |
|
109 |
return transformAsListRow(xml); |
|
110 |
} |
|
111 |
|
|
112 |
} |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/transform/Row.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import java.util.Collection; |
|
4 |
import java.util.Iterator; |
|
5 |
import java.util.List; |
|
6 |
import java.util.Map; |
|
7 |
|
|
8 |
import com.google.common.collect.Maps; |
|
9 |
|
|
10 |
public class Row implements Iterable<Column<String, byte[]>> { |
|
11 |
|
|
12 |
private String columnFamily; |
|
13 |
|
|
14 |
private String key; |
|
15 |
|
|
16 |
private Map<String, Column<String, byte[]>> columns; |
|
17 |
|
|
18 |
public Row(final Row row) { |
|
19 |
this.columnFamily = row.getColumnFamily(); |
|
20 |
this.key = row.getKey(); |
|
21 |
this.columns = row.columns; |
|
22 |
} |
|
23 |
|
|
24 |
public Row(final String columnFamily, final String key) { |
|
25 |
this.columnFamily = columnFamily; |
|
26 |
this.key = key; |
|
27 |
this.columns = Maps.newHashMap(); |
|
28 |
} |
|
29 |
|
|
30 |
public Row(final String columnFamily, final String key, final List<Column<String, byte[]>> columns) { |
|
31 |
this(columnFamily, key); |
|
32 |
this.setColumns(columns); |
|
33 |
} |
|
34 |
|
|
35 |
public void addColumn(final Column<String, byte[]> column) { |
|
36 |
getColumns().add(column); |
|
37 |
} |
|
38 |
|
|
39 |
public String getKey() { |
|
40 |
return key; |
|
41 |
} |
|
42 |
|
|
43 |
public Collection<Column<String, byte[]>> getColumns() { |
|
44 |
return columns.values(); |
|
45 |
} |
|
46 |
|
|
47 |
public Column<String, byte[]> getColumn(final String name) { |
|
48 |
return columns.get(name); |
|
49 |
} |
|
50 |
|
|
51 |
public void setColumn(final String name, final Column<String, byte[]> newColumn) { |
|
52 |
columns.put(name, newColumn); |
|
53 |
} |
|
54 |
|
|
55 |
public void setColumns(final List<Column<String, byte[]>> columns) { |
|
56 |
this.columns.clear(); |
|
57 |
for (final Column<String, byte[]> col : columns) { |
|
58 |
this.columns.put(col.getName(), col); |
|
59 |
} |
|
60 |
} |
|
61 |
|
|
62 |
public String getColumnFamily() { |
|
63 |
return columnFamily; |
|
64 |
} |
|
65 |
|
|
66 |
@Override |
|
67 |
public Iterator<Column<String, byte[]>> iterator() { |
|
68 |
return getColumns().iterator(); |
|
69 |
} |
|
70 |
|
|
71 |
@Override |
|
72 |
public String toString() { |
|
73 |
return "\nRow [ KEY: " + key + ",\n\tCF: " + columnFamily + "\n\tCOLS: {" + columns + "}]"; |
|
74 |
} |
|
75 |
|
|
76 |
} |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/transform/Column.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
public class Column<N, V> { |
|
4 |
|
|
5 |
private N name; |
|
6 |
|
|
7 |
private V value; |
|
8 |
|
|
9 |
public static <N, V> Column<N, V> newInstance(final N name, final V value) { |
|
10 |
return new Column<N, V>(name, value); |
|
11 |
} |
|
12 |
|
|
13 |
public Column(final N name, final V value) { |
|
14 |
this.name = name; |
|
15 |
this.value = value; |
|
16 |
} |
|
17 |
|
|
18 |
public N getName() { |
|
19 |
return name; |
|
20 |
} |
|
21 |
|
|
22 |
public V getValue() { |
|
23 |
return value; |
|
24 |
} |
|
25 |
|
|
26 |
} |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/java/eu/dnetlib/data/transform/XsltRowTransformerFactory.java | ||
---|---|---|
1 |
package eu.dnetlib.data.transform; |
|
2 |
|
|
3 |
import java.io.StringReader; |
|
4 |
import java.util.Map; |
|
5 |
import java.util.Map.Entry; |
|
6 |
|
|
7 |
import javax.xml.transform.Transformer; |
|
8 |
import javax.xml.transform.TransformerFactory; |
|
9 |
|
|
10 |
import org.apache.commons.collections.MapUtils; |
|
11 |
import org.apache.commons.logging.Log; |
|
12 |
import org.apache.commons.logging.LogFactory; |
|
13 |
import org.dom4j.io.DocumentSource; |
|
14 |
import org.dom4j.io.SAXReader; |
|
15 |
|
|
16 |
public class XsltRowTransformerFactory { |
|
17 |
|
|
18 |
private static final Log log = LogFactory.getLog(XsltRowTransformerFactory.class); // NOPMD by marko on 11/24/08 5:02 PM |
|
19 |
|
|
20 |
public XsltRowTransformer getTransformer(final String xslt) { |
|
21 |
return doGetTransformer(xslt, null); |
|
22 |
} |
|
23 |
|
|
24 |
public XsltRowTransformer getTransformer(final String xslt, final Map<String, Object> params) { |
|
25 |
return doGetTransformer(xslt, params); |
|
26 |
} |
|
27 |
|
|
28 |
private XsltRowTransformer doGetTransformer(final String xslt, final Map<String, Object> params) { |
|
29 |
try { |
|
30 |
if ((xslt == null) || xslt.isEmpty()) return new XsltRowTransformer(null); |
|
31 |
|
|
32 |
final TransformerFactory factory = TransformerFactory.newInstance(); |
|
33 |
|
|
34 |
if (log.isDebugEnabled()) { |
|
35 |
log.debug("using transformer factory: '" + factory.getClass().getCanonicalName() + "'"); |
|
36 |
} |
|
37 |
|
|
38 |
final Transformer t = factory.newTransformer(new DocumentSource(new SAXReader().read(new StringReader(xslt)))); |
|
39 |
if (!MapUtils.isEmpty(params)) { |
|
40 |
for (final Entry<String, Object> e : params.entrySet()) { |
|
41 |
log.debug(String.format("using xslt param: %s - %s", e.getKey(), e.getValue())); |
|
42 |
t.setParameter(e.getKey(), e.getValue()); |
|
43 |
} |
|
44 |
} |
|
45 |
return new XsltRowTransformer(t); |
|
46 |
} catch (final Exception e) { |
|
47 |
throw new RuntimeException("Error generating transformer from xslt:\n" + xslt, e); |
|
48 |
} |
|
49 |
} |
|
50 |
|
|
51 |
public static XsltRowTransformer newInstance(final String xslt) { |
|
52 |
return new XsltRowTransformerFactory().getTransformer(xslt); |
|
53 |
} |
|
54 |
|
|
55 |
public static XsltRowTransformer newInstance(final String xslt, final Map<String, Object> params) { |
|
56 |
return new XsltRowTransformerFactory().getTransformer(xslt, params); |
|
57 |
} |
|
58 |
|
|
59 |
} |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/resources/eu/dnetlib/data/hadoop/config/applicationContext-hadoop.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<beans xmlns="http://www.springframework.org/schema/beans" |
|
3 |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:p="http://www.springframework.org/schema/p" |
|
4 |
xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans-2.5.xsd"> |
|
5 |
|
|
6 |
<bean id="configurationEnumerator" class="eu.dnetlib.data.hadoop.config.ConfigurationEnumerator" /> |
|
7 |
|
|
8 |
<bean id="DM" class="eu.dnetlib.data.hadoop.config.ConfigurationFactory" |
|
9 |
p:defaults="${services.data.hadoop.dm.properties}" /> |
|
10 |
|
|
11 |
<bean id="IIS" class="eu.dnetlib.data.hadoop.config.ConfigurationFactory" |
|
12 |
p:defaults="${services.data.hadoop.iis.properties}" /> |
|
13 |
|
|
14 |
<bean id="sequenceFileWriterFactory" |
|
15 |
class="eu.dnetlib.data.hadoop.hdfs.SequenceFileWriterFactory" |
|
16 |
scope="prototype" p:keyClass="${services.data.hadoop.hdfs.seqfilewriterfactory.keyclass}" |
|
17 |
p:valueClass="${services.data.hadoop.hdfs.seqfilewriterfactory.valueclass}" |
|
18 |
p:compressionType="${services.data.hadoop.hdfs.seqfilewriterfactory.compressiontype}" |
|
19 |
p:blockSize="${services.data.hadoop.hdfs.seqfilewriterfactory.blocksize}" |
|
20 |
p:replication="${services.data.hadoop.hdfs.seqfilewriterfactory.replication}"/> |
|
21 |
|
|
22 |
</beans> |
|
0 | 23 |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/resources/eu/dnetlib/data/hadoop/config/hadoop-default.dm.local.properties | ||
---|---|---|
1 |
dnet.clustername = DM |
|
2 |
|
|
3 |
#CORE-SITE |
|
4 |
fs.defaultFS = hdfs://localhost:8020 |
|
5 |
|
|
6 |
hadoop.security.authentication = simple |
|
7 |
hadoop.security.auth_to_local = DEFAULT |
|
8 |
|
|
9 |
hadoop.rpc.socket.factory.class.default = org.apache.hadoop.net.StandardSocketFactory |
|
10 |
|
|
11 |
#HBASE-SITE |
|
12 |
hbase.rootdir = hdfs://localhost:8020/hbase |
|
13 |
|
|
14 |
hbase.security.authentication = simple |
|
15 |
zookeeper.znode.rootserver = root-region-server |
|
16 |
hbase.zookeeper.quorum = localhost |
|
17 |
hbase.zookeeper.property.clientPort = 2181 |
|
18 |
hbase.zookeeper.client.port = 2181 |
|
19 |
zookeeper.znode.parent = /hbase |
|
20 |
|
|
21 |
#HDFS-SITE |
|
22 |
dfs.replication = 2 |
|
23 |
dfs.namenode.servicerpc-address = localhost:8022 |
|
24 |
dfs.namenode.http-address = localhost:50070 |
|
25 |
|
|
26 |
|
|
27 |
|
|
28 |
#MAPRED-SITE |
|
29 |
mapred.job.tracker = localhost:8021 |
|
30 |
|
|
31 |
#OOZIE SERVER |
|
32 |
oozie.service.loc = http://localhost:11000/oozie |
|
33 |
|
|
34 |
|
|
35 |
######### CDH QUICKSTART |
|
36 |
|
|
37 |
#HDFS-SITE |
|
38 |
#dfs.namenode.servicerpc-address=cloudera-vm:8022 |
|
39 |
#dfs.https.port=50470 |
|
40 |
#dfs.namenode.http-address=cloudera-vm:50070 |
|
41 |
#dfs.replication=3 |
|
42 |
#dfs.blocksize=134217728 |
|
43 |
#dfs.client.use.datanode.hostname=false |
|
44 |
#fs.permissions.umask-mode=022 |
|
45 |
#dfs.client.read.shortcircuit=false |
|
46 |
#dfs.domain.socket.path=/var/run/hdfs-sockets/dn |
|
47 |
#dfs.client.read.shortcircuit.skip.checksum=false |
|
48 |
#dfs.client.domain.socket.data.traffic=false |
|
49 |
#dfs.datanode.hdfs-blocks-metadata.enabled=true |
|
50 |
# |
|
51 |
# |
|
52 |
##HBASE-SITE |
|
53 |
#hbase.rootdir=hdfs://cloudera-vm:8020/hbase |
|
54 |
#hbase.client.write.buffer=2097152 |
|
55 |
#hbase.client.pause=1000 |
|
56 |
#hbase.client.retries.number=10 |
|
57 |
#hbase.client.scanner.caching=1 |
|
58 |
#hbase.client.keyvalue.maxsize=10485760 |
|
59 |
#hbase.rpc.timeout=60000 |
|
60 |
#hbase.security.authentication=simple |
|
61 |
#zookeeper.session.timeout=60000 |
|
62 |
#zookeeper.znode.parent=/hbase |
|
63 |
#zookeeper.znode.rootserver=root-region-server |
|
64 |
#hbase.zookeeper.quorum=cloudera-vm |
|
65 |
#hbase.zookeeper.property.clientPort=2181 |
|
66 |
# |
|
67 |
##CORE-SITE |
|
68 |
#fs.defaultFS=hdfs://cloudera-vm:8020 |
|
69 |
#fs.trash.interval=1 |
|
70 |
#hadoop.security.authentication=simple |
|
71 |
#hadoop.rpc.protection=authentication |
|
72 |
#hadoop.security.auth_to_local=DEFAULT |
|
73 |
|
|
74 |
|
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/resources/eu/dnetlib/data/hadoop/config/hadoop-default.iis.icm.properties | ||
---|---|---|
1 |
dnet.clustername = IIS |
|
2 |
|
|
3 |
#CORE-SITE |
|
4 |
fs.defaultFS = hdfs://iis-cluster-nn |
|
5 |
|
|
6 |
hadoop.security.authentication = simple |
|
7 |
hadoop.security.auth_to_local = DEFAULT |
|
8 |
|
|
9 |
#HBASE-SITE |
|
10 |
hbase.rootdir = hdfs://iis-cluster-nn/hbase |
|
11 |
|
|
12 |
hbase.security.authentication = simple |
|
13 |
zookeeper.znode.rootserver = root-region-server |
|
14 |
hbase.zookeeper.quorum = master1.hadoop.iis.openaire.eu,master2.hadoop.iis.openaire.eu,master3.hadoop.iis.openaire.eu |
|
15 |
hbase.zookeeper.property.clientPort = 2181 |
|
16 |
hbase.zookeeper.client.port = 2181 |
|
17 |
zookeeper.znode.parent = /hbase |
|
18 |
|
|
19 |
#HDFS-SITE |
|
20 |
dfs.replication = 2 |
|
21 |
dfs.nameservices = iis-cluster-nn |
|
22 |
dfs.ha.namenodes.iis-cluster-nn = nn1,nn2 |
|
23 |
|
|
24 |
dfs.namenode.rpc-address.iis-cluster-nn.nn1=namenode1.hadoop.iis.openaire.eu:8020 |
|
25 |
dfs.namenode.http-address.iis-cluster-nn.nn1=namenode1.hadoop.iis.openaire.eu:50070 |
|
26 |
dfs.namenode.rpc-address.iis-cluster-nn.nn2=namenode2.hadoop.iis.openaire.eu:8020 |
|
27 |
dfs.namenode.http-address.iis-cluster-nn.nn2=namenode2.hadoop.iis.openaire.eu:50070 |
|
28 |
|
|
29 |
dfs.client.failover.proxy.provider.iis-cluster-nn=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider |
|
30 |
|
|
31 |
#MAPRED-SITE |
|
32 |
mapred.job.tracker = iis-cluster-jt |
|
33 |
mapred.jobtrackers.iis-cluster-jt = jt1,jt2 |
|
34 |
|
|
35 |
mapred.jobtracker.rpc-address.iis-cluster-jt.jt1 = master1.hadoop.iis.openaire.eu:8021 |
|
36 |
mapred.jobtracker.rpc-address.iis-cluster-jt.jt2 = master1.hadoop.iis.openaire.eu:8022 |
|
37 |
|
|
38 |
mapred.client.failover.proxy.provider.iis-cluster-jt = org.apache.hadoop.mapred.ConfiguredFailoverProxyProvider |
|
39 |
|
|
40 |
mapred.mapper.new-api = true |
|
41 |
mapred.reducer.new-api = true |
|
42 |
|
|
43 |
#OOZIE SERVER |
|
44 |
oozie.service.loc = http://oozie.hadoop.iis.openaire.eu:11000/oozie |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/resources/eu/dnetlib/data/hadoop/config/hadoop-default.iis-cdh5.icm.properties | ||
---|---|---|
1 |
dnet.clustername = IIS |
|
2 |
|
|
3 |
#CORE-SITE |
|
4 |
fs.defaultFS = hdfs://nameservice1 |
|
5 |
|
|
6 |
#HDFS-SITE |
|
7 |
dfs.replication = 2 |
|
8 |
|
|
9 |
#MAPRED-SITE |
|
10 |
jobTracker = yarnRM |
|
11 |
mapred.job.tracker = yarnRM |
|
12 |
mapred.mapper.new-api = true |
|
13 |
mapred.reducer.new-api = true |
|
14 |
|
|
15 |
#OOZIE SERVER |
|
16 |
oozie.service.loc = http://iis-cdh5-test-m3.ocean.icm.edu.pl:11000/oozie |
|
17 |
|
|
18 |
#MISC |
|
19 |
importerQueueName = dm_import |
|
20 |
oozieLauncherQueueName = default |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/resources/eu/dnetlib/data/hadoop/config/hadoop-default.dm.cnr.properties | ||
---|---|---|
1 |
dnet.clustername = DM |
|
2 |
|
|
3 |
#CORE-SITE |
|
4 |
fs.defaultFS = hdfs://nmis-hadoop-cluster |
|
5 |
|
|
6 |
hadoop.security.authentication = simple |
|
7 |
hadoop.security.auth_to_local = DEFAULT |
|
8 |
|
|
9 |
hadoop.rpc.socket.factory.class.default = org.apache.hadoop.net.StandardSocketFactory |
|
10 |
|
|
11 |
#HBASE-SITE |
|
12 |
hbase.rootdir = hdfs://nmis-hadoop-cluster/hbase |
|
13 |
|
|
14 |
hbase.security.authentication = simple |
|
15 |
zookeeper.znode.rootserver = root-region-server |
|
16 |
hbase.zookeeper.quorum = quorum1.t.hadoop.research-infrastructures.eu,quorum2.t.hadoop.research-infrastructures.eu,quorum3.t.hadoop.research-infrastructures.eu,quorum4.t.hadoop.research-infrastructures.eu,jobtracker.t.hadoop.research-infrastructures.eu |
|
17 |
hbase.zookeeper.property.clientPort = 2182 |
|
18 |
hbase.zookeeper.client.port = 2182 |
|
19 |
zookeeper.znode.parent = /hbase |
|
20 |
|
|
21 |
#HDFS-SITE |
|
22 |
dfs.replication = 2 |
|
23 |
dfs.nameservices = nmis-hadoop-cluster |
|
24 |
dfs.ha.namenodes.nmis-hadoop-cluster = nn1,nn2 |
|
25 |
|
|
26 |
dfs.namenode.rpc-address.nmis-hadoop-cluster.nn1=quorum1.t.hadoop.research-infrastructures.eu:8020 |
|
27 |
dfs.namenode.http-address.nmis-hadoop-cluster.nn1=quorum1.t.hadoop.research-infrastructures.eu:50070 |
|
28 |
dfs.namenode.rpc-address.nmis-hadoop-cluster.nn2=quorum2.t.hadoop.research-infrastructures.eu:8020 |
|
29 |
dfs.namenode.http-address.nmis-hadoop-cluster.nn2=quorum2.t.hadoop.research-infrastructures.eu:50070 |
|
30 |
|
|
31 |
dfs.client.failover.proxy.provider.nmis-hadoop-cluster=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider |
|
32 |
|
|
33 |
#MAPRED-SITE |
|
34 |
mapred.job.tracker = nmis-hadoop-jt |
|
35 |
mapred.jobtrackers.nmis-hadoop-jt = jt1,jt2 |
|
36 |
|
|
37 |
mapred.jobtracker.rpc-address.nmis-hadoop-jt.jt1 = jobtracker.t.hadoop.research-infrastructures.eu:8021 |
|
38 |
mapred.jobtracker.rpc-address.nmis-hadoop-jt.jt2 = quorum4.t.hadoop.research-infrastructures.eu:8022 |
|
39 |
|
|
40 |
mapred.client.failover.proxy.provider.nmis-hadoop-jt = org.apache.hadoop.mapred.ConfiguredFailoverProxyProvider |
|
41 |
|
|
42 |
mapred.mapper.new-api = true |
|
43 |
mapred.reducer.new-api = true |
|
44 |
|
|
45 |
#OOZIE SERVER |
|
46 |
oozie.service.loc = http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/resources/eu/dnetlib/data/hadoop/config/applicationContext-hadoop.properties | ||
---|---|---|
1 |
services.data.hadoop.dm.properties = classpath:/eu/dnetlib/data/hadoop/config/hadoop-default.dm.cnr.properties |
|
2 |
services.data.hadoop.iis.properties = classpath:/eu/dnetlib/data/hadoop/config/hadoop-default.iis.icm.properties |
|
3 |
|
|
4 |
services.data.hadoop.hdfs.seqfilewriterfactory.keyclass = org.apache.hadoop.io.Text |
|
5 |
services.data.hadoop.hdfs.seqfilewriterfactory.valueclass = org.apache.hadoop.io.Text |
|
6 |
services.data.hadoop.hdfs.seqfilewriterfactory.compressiontype = BLOCK |
|
7 |
services.data.hadoop.hdfs.seqfilewriterfactory.blocksize = 16M |
|
8 |
services.data.hadoop.hdfs.seqfilewriterfactory.replication = 3 |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/resources/eu/dnetlib/data/hadoop/config/hadoop-default.dm.stdl.properties | ||
---|---|---|
1 |
dnet.clustername = DM |
|
2 |
|
|
3 |
#CORE-SITE |
|
4 |
fs.defaultFS = hdfs://d-net2.house.cnr.it:8020 |
|
5 |
|
|
6 |
hadoop.security.authentication = simple |
|
7 |
hadoop.security.auth_to_local = DEFAULT |
|
8 |
|
|
9 |
hadoop.rpc.socket.factory.class.default = org.apache.hadoop.net.StandardSocketFactory |
|
10 |
|
|
11 |
#HBASE-SITE |
|
12 |
hbase.rootdir = hdfs://d-net2.house.cnr.it:8020/hbase |
|
13 |
|
|
14 |
hbase.security.authentication = simple |
|
15 |
zookeeper.znode.rootserver = root-region-server |
|
16 |
hbase.zookeeper.quorum = d-net2.house.cnr.it |
|
17 |
hbase.zookeeper.property.clientPort = 2181 |
|
18 |
hbase.zookeeper.client.port = 2181 |
|
19 |
zookeeper.znode.parent = /hbase |
|
20 |
|
|
21 |
#HDFS-SITE |
|
22 |
dfs.replication = 2 |
|
23 |
dfs.namenode.servicerpc-address = d-net2.house.cnr.it:8022 |
|
24 |
dfs.namenode.http-address = d-net2.house.cnr.it:50070 |
|
25 |
dfs.replication = 1 |
|
26 |
|
|
27 |
#MAPRED-SITE |
|
28 |
mapred.job.tracker = d-net2.house.cnr.it:8021 |
|
29 |
mapred.child.java.opts = -Djava.net.preferIPv4Stack=true -Xmx1073741824 |
|
30 |
|
|
31 |
mapred.mapper.new-api = true |
|
32 |
mapred.reducer.new-api = true |
|
33 |
|
|
34 |
#OOZIE SERVER |
|
35 |
#oozie.service.loc = http://oozie.t.hadoop.research-infrastructures.eu:11000/oozie |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/src/main/resources/eu/dnetlib/data/hadoop/config/hadoop-default.dm.icm.properties | ||
---|---|---|
1 |
dnet.clustername = DM |
|
2 |
|
|
3 |
#CORE-SITE |
|
4 |
fs.defaultFS = hdfs://dm-cluster-nn |
|
5 |
|
|
6 |
hadoop.security.authentication = simple |
|
7 |
hadoop.security.auth_to_local = DEFAULT |
|
8 |
|
|
9 |
hadoop.rpc.socket.factory.class.default = org.apache.hadoop.net.StandardSocketFactory |
|
10 |
|
|
11 |
#HBASE-SITE |
|
12 |
hbase.rootdir = hdfs://dm-cluster-nn/hbase |
|
13 |
|
|
14 |
hbase.security.authentication = simple |
|
15 |
zookeeper.znode.rootserver = root-region-server |
|
16 |
hbase.zookeeper.quorum = namenode1.hadoop.dm.openaire.eu,namenode2.hadoop.dm.openaire.eu,jobtracker1.hadoop.dm.openaire.eu,jobtracker2.hadoop.dm.openaire.eu,hbase-master1.hadoop.dm.openaire.eu |
|
17 |
hbase.zookeeper.property.clientPort = 2181 |
|
18 |
hbase.zookeeper.client.port = 2181 |
|
19 |
zookeeper.znode.parent = /hbase |
|
20 |
|
|
21 |
#HDFS-SITE |
|
22 |
dfs.replication = 2 |
|
23 |
dfs.nameservices = dm-cluster-nn |
|
24 |
dfs.ha.namenodes.dm-cluster-nn = nn1,nn2 |
|
25 |
|
|
26 |
dfs.namenode.rpc-address.dm-cluster-nn.nn1=namenode1.hadoop.dm.openaire.eu:8020 |
|
27 |
dfs.namenode.http-address.dm-cluster-nn.nn1=namenode1.hadoop.dm.openaire.eu:50070 |
|
28 |
dfs.namenode.rpc-address.dm-cluster-nn.nn2=namenode2.hadoop.dm.openaire.eu:8020 |
|
29 |
dfs.namenode.http-address.dm-cluster-nn.nn2=namenode2.hadoop.dm.openaire.eu:50070 |
|
30 |
|
|
31 |
dfs.client.failover.proxy.provider.dm-cluster-nn=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider |
|
32 |
|
|
33 |
#MAPRED-SITE |
|
34 |
mapred.job.tracker = dm-cluster-jt |
|
35 |
mapred.jobtrackers.dm-cluster-jt = jt1,jt2 |
|
36 |
|
|
37 |
mapred.jobtracker.rpc-address.dm-cluster-jt.jt1 = jobtracker1.hadoop.dm.openaire.eu:8021 |
|
38 |
mapred.jobtracker.rpc-address.dm-cluster-jt.jt2 = jobtracker2.hadoop.dm.openaire.eu:8021 |
|
39 |
|
|
40 |
mapred.client.failover.proxy.provider.dm-cluster-jt = org.apache.hadoop.mapred.ConfiguredFailoverProxyProvider |
|
41 |
|
|
42 |
mapred.mapper.new-api = true |
|
43 |
mapred.reducer.new-api = true |
|
44 |
|
|
45 |
#OOZIE SERVER |
|
46 |
oozie.service.loc = http://oozie.hadoop.dm.openaire.eu:11000/oozie |
modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1/pom.xml | ||
---|---|---|
1 |
<?xml version="1.0" encoding="UTF-8"?> |
|
2 |
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
3 |
<parent> |
|
4 |
<groupId>eu.dnetlib</groupId> |
|
5 |
<artifactId>dnet45-parent</artifactId> |
|
6 |
<version>1.0.0</version> |
|
7 |
<relativePath /> |
|
8 |
</parent> |
|
9 |
<modelVersion>4.0.0</modelVersion> |
|
10 |
<groupId>eu.dnetlib</groupId> |
|
11 |
<artifactId>dnet-hadoop-commons</artifactId> |
|
12 |
<packaging>jar</packaging> |
|
13 |
<version>2.0.1</version> |
|
14 |
<scm> |
|
15 |
<developerConnection>scm:svn:https://svn.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-hadoop-commons/tags/dnet-hadoop-commons-2.0.1</developerConnection> |
|
16 |
</scm> |
|
17 |
<repositories> |
|
18 |
<!-- Cloudera Repositories --> |
|
19 |
<repository> |
|
20 |
<snapshots> |
|
21 |
<enabled>false</enabled> |
|
22 |
</snapshots> |
|
23 |
<id>cloudera-central</id> |
|
24 |
<name>cloundera-libs-release</name> |
|
25 |
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/cloudera-central</url> |
|
26 |
</repository> |
|
27 |
<repository> |
|
28 |
<id>cloudera-snapshots</id> |
|
29 |
<name>cloudera-libs-snapshot</name> |
|
30 |
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/cloudera-snapshots</url> |
|
31 |
</repository> |
|
32 |
<repository> |
|
33 |
<id>typesafe</id> |
|
34 |
<name>typesafe-releases</name> |
|
35 |
<url>http://maven.research-infrastructures.eu/nexus/content/repositories/typesafe</url> |
|
36 |
</repository> |
|
37 |
</repositories> |
|
38 |
|
|
39 |
<dependencies> |
|
40 |
<dependency> |
|
41 |
<groupId>eu.dnetlib</groupId> |
|
42 |
<artifactId>cnr-misc-utils</artifactId> |
|
43 |
<version>[1.0.0,2.0.0)</version> |
|
44 |
<exclusions> |
|
45 |
<exclusion> |
|
46 |
<artifactId>slf4j-api</artifactId> |
|
47 |
<groupId>org.slf4j</groupId> |
|
48 |
</exclusion> |
|
49 |
</exclusions> |
|
50 |
</dependency> |
|
51 |
<dependency> |
|
52 |
<groupId>org.apache.hadoop</groupId> |
|
53 |
<artifactId>hadoop-common</artifactId> |
|
54 |
<version>${hadoop.common.version}</version> |
|
55 |
<exclusions> |
|
56 |
<exclusion> |
|
57 |
<artifactId>mockito-all</artifactId> |
|
58 |
<groupId>org.mockito</groupId> |
|
59 |
</exclusion> |
|
60 |
<exclusion> |
|
61 |
<artifactId>jetty</artifactId> |
|
62 |
<groupId>org.mortbay.jetty</groupId> |
|
63 |
</exclusion> |
|
64 |
<exclusion> |
|
65 |
<artifactId>jetty-util</artifactId> |
|
66 |
<groupId>org.mortbay.jetty</groupId> |
|
67 |
</exclusion> |
|
68 |
<exclusion> |
|
69 |
<artifactId>core</artifactId> |
|
70 |
<groupId>org.eclipse.jdt</groupId> |
|
71 |
</exclusion> |
|
72 |
<exclusion> |
|
73 |
<groupId>javax.servlet</groupId> |
|
74 |
<artifactId>servlet-api</artifactId> |
|
75 |
</exclusion> |
|
76 |
<exclusion> |
|
77 |
<artifactId>slf4j-api</artifactId> |
|
78 |
<groupId>org.slf4j</groupId> |
|
79 |
</exclusion> |
|
80 |
<exclusion> |
|
81 |
<artifactId>slf4j-log4j12</artifactId> |
|
82 |
<groupId>org.slf4j</groupId> |
|
83 |
</exclusion> |
|
84 |
<exclusion> |
|
85 |
<artifactId>jsp-api</artifactId> |
|
86 |
<groupId>javax.servlet.jsp</groupId> |
|
87 |
</exclusion> |
|
88 |
<exclusion> |
|
89 |
<artifactId>guava</artifactId> |
|
90 |
<groupId>com.google.guava</groupId> |
|
91 |
</exclusion> |
|
92 |
</exclusions> |
|
93 |
</dependency> |
|
94 |
<dependency> |
|
95 |
<groupId>org.apache.hadoop</groupId> |
|
96 |
<artifactId>hadoop-core</artifactId> |
|
97 |
<version>${hadoop.core.version}</version> |
|
98 |
<exclusions> |
|
99 |
<exclusion> |
|
100 |
<artifactId>core</artifactId> |
|
101 |
<groupId>org.eclipse.jdt</groupId> |
|
102 |
</exclusion> |
|
103 |
<exclusion> |
|
104 |
<artifactId>jetty</artifactId> |
|
105 |
<groupId>org.mortbay.jetty</groupId> |
|
106 |
</exclusion> |
|
107 |
<exclusion> |
|
108 |
<artifactId>jetty-util</artifactId> |
|
109 |
<groupId>org.mortbay.jetty</groupId> |
|
110 |
</exclusion> |
|
111 |
|
|
112 |
<exclusion> |
|
113 |
<groupId>tomcat</groupId> |
|
114 |
<artifactId>jasper-runtime</artifactId> |
|
115 |
</exclusion> |
|
116 |
<exclusion> |
|
117 |
<groupId>tomcat</groupId> |
|
118 |
<artifactId>jasper-compiler</artifactId> |
|
119 |
</exclusion> |
|
120 |
<exclusion> |
|
121 |
<artifactId>jsp-api</artifactId> |
|
122 |
<groupId>javax.servlet.jsp</groupId> |
|
123 |
</exclusion> |
|
124 |
<exclusion> |
|
125 |
<groupId>javax.servlet</groupId> |
|
126 |
<artifactId>servlet-api</artifactId> |
|
127 |
</exclusion> |
|
128 |
</exclusions> |
|
129 |
</dependency> |
|
130 |
<dependency> |
|
131 |
<groupId>org.springframework</groupId> |
|
132 |
<artifactId>spring-beans</artifactId> |
|
133 |
<version>${spring.version}</version> |
|
134 |
</dependency> |
|
135 |
|
|
136 |
|
|
137 |
<!-- DO NOT REMOVE THIS DEPENDENCY!!! MODULES ACCESSING HDFS NEEDS IT EVEN |
|
138 |
IF mvn dependency:analyze SUGGESTS TO REMOVE IT --> |
|
139 |
<dependency> |
|
140 |
<groupId>org.apache.hadoop</groupId> |
|
141 |
<artifactId>hadoop-hdfs</artifactId> |
|
142 |
<version>${hadoop.hdfs.version}</version> |
|
143 |
<exclusions> |
|
144 |
<exclusion> |
|
145 |
<artifactId>jasper-runtime</artifactId> |
|
146 |
<groupId>tomcat</groupId> |
|
147 |
</exclusion> |
|
148 |
<exclusion> |
|
149 |
<artifactId>jetty</artifactId> |
|
150 |
<groupId>org.mortbay.jetty</groupId> |
|
151 |
</exclusion> |
|
152 |
<exclusion> |
|
153 |
<artifactId>jetty-util</artifactId> |
|
154 |
<groupId>org.mortbay.jetty</groupId> |
|
155 |
</exclusion> |
|
156 |
<exclusion> |
|
157 |
<groupId>javax.servlet</groupId> |
|
158 |
<artifactId>servlet-api</artifactId> |
|
159 |
</exclusion> |
|
160 |
<exclusion> |
|
161 |
<artifactId>jsp-api</artifactId> |
|
162 |
<groupId>javax.servlet.jsp</groupId> |
|
163 |
</exclusion> |
|
164 |
<exclusion> |
|
165 |
<artifactId>guava</artifactId> |
|
166 |
<groupId>com.google.guava</groupId> |
|
167 |
</exclusion> |
|
168 |
</exclusions> |
|
169 |
</dependency> |
|
170 |
</dependencies> |
|
171 |
</project> |
Also available in: Unified diff
[maven-release-plugin] copy for tag dnet-hadoop-commons-2.0.1