/ - Diff - D-Net - D-Net project tracking tool

modules/dnet-hadoop-service/tags/dnet-hadoop-service-2.7.7/deploy.info

1

{"type_source": "SVN", "goal": "package -U -T 4C source:jar", "url": "http://svn-public.driver.research-infrastructures.eu/driver/dnet45/modules/dnet-hadoop-service/trunk/", "deploy_repository": "dnet45-snapshots", "version": "4", "mail": "sandro.labruzzo@isti.cnr.it,michele.artini@isti.cnr.it, claudio.atzori@isti.cnr.it, alessia.bardi@isti.cnr.it", "deploy_repository_url": "http://maven.research-infrastructures.eu/nexus/content/repositories/dnet45-snapshots", "name": "dnet-hadoop-service"}

     package eu.dnetlib.data.hadoop.utils;
     import static org.junit.Assert.assertNotNull;
     import java.io.IOException;
     import java.util.Map.Entry;
     import org.apache.hadoop.conf.Configuration;
     import org.apache.hadoop.hbase.mapreduce.CopyTable;
     import org.apache.hadoop.mapreduce.Job;
     import org.junit.Test;
     public class CopyTableTest {
     	@Test
     	public void testCopyTable() throws IOException {
     		Job job =
     				CopyTable.createSubmittableJob(new Configuration(), new String[] { "--peer.adr=server1,server2,server3:2181:/hbase",
     					"--families=myOldCf:myNewCf,cf2,cf3", "tableName" });
     		assertNotNull(job);
     		Configuration conf = job.getConfiguration();
     		for (Entry<String, String> e : conf) {
     			System.out.println(String.format("<PROPERTY key=\"%s\" value=\"%s\"/>", e.getKey(), e.getValue()));
+    		}
+    	}
+    }

     package eu.dnetlib.data.hadoop.utils;
     import java.util.Map;
     import java.util.Map.Entry;
     import org.apache.commons.math.stat.descriptive.SummaryStatistics;
     import org.apache.hadoop.conf.Configuration;
     import org.apache.hadoop.fs.Path;
     import org.apache.hadoop.io.Text;
     import org.junit.Before;
     import org.junit.Ignore;
     import org.junit.Test;
     import org.springframework.core.io.ClassPathResource;
     import com.google.common.collect.Maps;
     import eu.dnetlib.data.hadoop.config.ConfigurationFactory;
     import eu.dnetlib.data.hadoop.hdfs.SequenceFileUtils;
     import eu.dnetlib.miscutils.collections.Pair;
     public class ReadSequenceFileTest {
     	private static final Path SEQUENCE_FILE_PATH = new Path("hdfs://nmis-hadoop-cluster/tmp/indexrecords_db_openaireplus_sesam_SESAMALL.seq");
     	private final String HADOOP_CONF_FILE = "/eu/dnetlib/data/hadoop/config/hadoop-default.dm.cnr.properties";
     	private Configuration conf;
     	@Before
     	public void setUp() {
     		final ConfigurationFactory confFactory = new ConfigurationFactory();
     		confFactory.setDefaults(new ClassPathResource(HADOOP_CONF_FILE));
     		conf = confFactory.getConfiguration();
+    	}
     	@Test
     	@Ignore
     	public void testReadSequenceFile() throws Exception {
     		final SummaryStatistics statsAll = new SummaryStatistics();
     		final Map<String, SummaryStatistics> stats = Maps.newHashMap();
     		int i = 0;
     		for (Pair<Text, Text> pair : SequenceFileUtils.read(SEQUENCE_FILE_PATH, conf)) {
     			final String id = pair.getKey().toString();
     			final String record = pair.getValue().toString();
     			final int length = record.getBytes().length;
     			final String type = id.substring(0, 2);
     			if (!stats.containsKey(type)) {
     				stats.put(type, new SummaryStatistics());
+    			}
     			statsAll.addValue(length);
     			stats.get(type).addValue(length);
     			if (++i % 10000 == 0) {
     				System.out.println("Read " + i);
+    			}
+    		}
     		printStats("ALL", statsAll);
     		for (Entry<String, SummaryStatistics> e : stats.entrySet()) {
     			printStats(e.getKey(), e.getValue());
+    		}
+    	}
     	private void printStats(final String type, final SummaryStatistics stats) {
     		System.out.println("************************************");
     		System.out.println("Type: " + type);
     		System.out.println(String.format("\tmin    : %.2f KBytes", stats.getMin() / 1024));
     		System.out.println(String.format("\tmax    : %.2f KBytes", stats.getMax() / 1024));
     		System.out.println(String.format("\tavg    : %.2f KBytes", stats.getMean() / 1024));
     		System.out.println(String.format("\tstdDev : %.2f", stats.getStandardDeviation() / 1024));
+    	}
+    }

     package eu.dnetlib.data.hadoop.hbase;
     import eu.dnetlib.data.hadoop.HadoopClientMap;
     import eu.dnetlib.data.hadoop.HadoopServiceCore;
     import eu.dnetlib.data.hadoop.config.ConfigurationEnumerator;
     import eu.dnetlib.data.hadoop.config.ConfigurationFactory;
     import eu.dnetlib.data.hadoop.mapred.JobClientFactory;
     import eu.dnetlib.data.hadoop.oozie.OozieClientFactory;
     import org.springframework.context.annotation.Bean;
     import org.springframework.context.annotation.Configuration;
     import org.springframework.context.annotation.Profile;
     import org.springframework.core.io.ClassPathResource;
     import org.springframework.core.io.Resource;
     @Configuration
     @Profile(value = "test")
     public class HBaseTestContextConfiguration {
     	public static final String ENABLED_CLIENTS = "{"
     			+ "\"DM\":{\"oozie\":\"false\",\"mapred\":\"false\",\"hbase\":\"true\"},"
     			+ "\"IIS\":{\"oozie\":\"false\",\"mapred\":\"false\",\"hbase\":\"false\"}"
     			+ "}";
     	public static final int MAX_VERSIONS = 10;
     	@Bean
     	public HadoopServiceCore hadoopServiceCore() {
     		final HadoopServiceCore core = new HadoopServiceCore();
     		core.setMaxVersions(MAX_VERSIONS);
     		System.out.println("using hbase max versions: " + MAX_VERSIONS);
     		return core;
+    	}
     	@Bean
     	public HadoopClientMap hadoopClientMap() throws InterruptedException {
     		final HadoopClientMap clientMap = new HadoopClientMap();
     		clientMap.setEnabledClients(ENABLED_CLIENTS);
     		return clientMap;
+    	}
     	@Bean
     	public HBaseAdminFactory hBaseAdminFactory() {
     		return new HBaseAdminFactory();
+    	}
     	@Bean
     	public OozieClientFactory oozieClientFactory() {
     		return new OozieClientFactory();
+    	}
     	@Bean
     	public JobClientFactory jobClientFactory() {
     		return new JobClientFactory();
+    	}
     	@Bean
     	public ConfigurationEnumerator configurationEnumerator() {
     		return new ConfigurationEnumerator();
+    	}
     	@Bean
     	public ConfigurationFactory DM() {
     		return get(new ClassPathResource("/eu/dnetlib/data/hadoop/config/hadoop-default.dm.cnr.properties"));
+    	}
     	@Bean
     	public ConfigurationFactory IIS() {
     		return get(new ClassPathResource("/eu/dnetlib/data/hadoop/config/hadoop-default.iis.icm.properties"));
+    	}
     	protected ConfigurationFactory get(final Resource props) {
     		final ConfigurationFactory configurationFactory = new ConfigurationFactory();
     		configurationFactory.setDefaults(props);
     		return configurationFactory;
+    	}
+    }

     package eu.dnetlib.data.hadoop.hbase;
     import java.io.IOException;
     import java.util.Map.Entry;
     import java.util.NavigableMap;
     import java.util.Set;
     import com.google.common.collect.Sets;
     import eu.dnetlib.data.hadoop.HadoopServiceCore;
     import eu.dnetlib.data.hadoop.config.ClusterName;
     import eu.dnetlib.data.hadoop.config.ConfigurationEnumerator;
     import eu.dnetlib.data.hadoop.rmi.HadoopServiceException;
     import eu.dnetlib.miscutils.datetime.DateUtils;
     import org.apache.hadoop.hbase.client.Get;
     import org.apache.hadoop.hbase.client.HTable;
     import org.apache.hadoop.hbase.client.Put;
     import org.apache.hadoop.hbase.client.Result;
     import org.apache.hadoop.hbase.util.Bytes;
     import org.junit.After;
     import org.junit.Before;
     import org.junit.Ignore;
     import org.junit.Test;
     import org.junit.runner.RunWith;
     import org.springframework.beans.factory.annotation.Autowired;
     import org.springframework.test.context.ActiveProfiles;
     import org.springframework.test.context.ContextConfiguration;
     import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
     import static org.junit.Assert.assertNotNull;
     import static org.junit.Assert.assertTrue;
     @ActiveProfiles("test")
     @RunWith(SpringJUnit4ClassRunner.class)
     @ContextConfiguration(classes = HBaseTestContextConfiguration.class)
     public class HBaseTest {
     	protected static final String TEST_TABLE = "dnet_test_table";
     	protected static final int NUM_VERSIONS = 10;
     	@Autowired
     	private HadoopServiceCore hadoopServiceCore;
     	@Autowired
     	private ConfigurationEnumerator configurationEnumerator;
     	@Before
     	public void setUp() throws HadoopServiceException, IOException, InterruptedException {
     		assertNotNull(hadoopServiceCore);
     		ensureDropTable();
+    	}
     	@After
     	public void tearDown() throws HadoopServiceException, IOException {
     		ensureDropTable();
+    	}
     	@Test
     	@Ignore
     	// TODO allow testing on a dev cluster instance
     	public void testReadWrite() throws HadoopServiceException, IOException, InterruptedException {
     		hadoopServiceCore.createTable(ClusterName.DM, TEST_TABLE, testSchema());
     		assertTrue(hadoopServiceCore.existTable(ClusterName.DM, TEST_TABLE));
     		final HTable htable = new HTable(configurationEnumerator.get(ClusterName.DM), TEST_TABLE);
     		final Put put = new Put(Bytes.toBytes("1"));
     		for (int i = 0; i < NUM_VERSIONS; i++) {
     			put.add(Bytes.toBytes("result"), Bytes.toBytes("body"), Bytes.toBytes(i + ""));
     			htable.put(put);
     			Thread.sleep(1000);
+    		}
     		final Get get = new Get(Bytes.toBytes("1"));
     		get.setMaxVersions(HBaseTestContextConfiguration.MAX_VERSIONS);
     		final Result r = htable.get(get);
     		// Map<family,Map<qualifier,Map<timestamp,value>>>
     		final NavigableMap<Long, byte[]> versions = r.getMap().get(Bytes.toBytes("result")).get(Bytes.toBytes("body"));
     		for (final Entry<Long, byte[]> e : versions.entrySet()) {
     			System.out.println("t: " + DateUtils.calculate_ISO8601(e.getKey()) + ", v: " + Bytes.toString(e.getValue()));
+    		}
     		htable.close();
+    	}
     	protected void ensureDropTable() throws HadoopServiceException, IOException {
     		if (hadoopServiceCore.existTable(ClusterName.DM, TEST_TABLE)) {
     			hadoopServiceCore.dropTable(ClusterName.DM, TEST_TABLE);
+    		}
+    	}
     	protected Set<String> testSchema() {
     		final Set<String> schema = Sets.newHashSet();
     		schema.add("result");
     		return schema;
+    	}
+    }

     ### Root Level ###
     log4j.rootLogger=WARN, LOGFILE, CONSOLE
     ### Configuration for the LOGFILE appender ###
     log4j.appender.LOGFILE=org.apache.log4j.RollingFileAppender
     log4j.appender.LOGFILE.MaxFileSize=25MB
     log4j.appender.LOGFILE.MaxBackupIndex=10
     log4j.appender.LOGFILE.File=logs/dnet.log
     log4j.appender.LOGFILE.Append=true
     log4j.appender.LOGFILE.layout=org.apache.log4j.PatternLayout
     log4j.appender.LOGFILE.layout.ConversionPattern=[%-5p] %d %c - %m%n
     ### Configuration for the CONSOLE appender ###
     log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
     log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
     log4j.appender.CONSOLE.layout.ConversionPattern=[%-5p] %d %c - %m%n
     org.apache.cxf.Logger=org.apache.cxf.common.logging.Log4jLogger
     ### Application Level ###
     log4j.logger.eu.dnetlib=INFO
     log4j.logger.eu.dnetlib.enabling.is.sn=INFO
     log4j.logger.org.apache.cxf.interceptor=FATAL
     log4j.logger.org.apache.cxf.ws.addressing.ContextUtils=FATAL
     log4j.logger.eu.dnetlib.enabling.tools.AbstractBaseService=INFO
     log4j.logger.eu.dnetlib.enabling.inspector=DEBUG
     log4j.logger.eu.dnetlib.xml.database.LoggingTrigger=WARN
     log4j.logger.eu.dnetlib.enabling.tools.registration.ServiceRegistrator=INFO
     log4j.logger.eu.dnetlib.enabling.inspector=FATAL
     log4j.logger.eu.dnetlib.enabling.inspector.SubscriptionController=DEBUG
     log4j.logger.eu.dnetlib.springutils.stringtemplate.StringTemplateViewResolver=FATAL
     log4j.logger.eu.dnetlib.enabling.is.sn.SynchronousNotificationSenderImpl=WARN
     log4j.logger.eu.dnetlib.enabling.tools.LocalServiceResolverImpl=WARN
     log4j.logger.eu.dnetlib.enabling.is.sn.NotificationInvokerImpl=WARN
     log4j.logger.eu.dnetlib.data.collective=INFO
     log4j.logger.eu.dnetlib.data.hadoop.utils.ScanFactory=DEBUG
     log4j.logger.org.apache.xerces.parsers.SAXParser=OFF
     log4j.logger.eu.dnetlib.conf.PropertyFetcher=WARN
     #log4j.logger.eu.dnetlib.data.transform.XsltRowTransformerFactory=DEBUG
     log4j.logger.eu.dnetlib.enabling.is.sn.ISSNServiceImpl=OFF
     log4j.logger.eu.dnetlib.enabling.datasources.DatasourceManagerClients=FATAL
     log4j.logger.eu.dnetlib.data.mdstore.modular.mongodb.utils.MetadataCheckJob=DEBUG
     log4j.logger.eu.dnetlib.enabling.is.sn.ISSNServiceCore=WARN
     log4j.logger.eu.dnetlib.xml.database.exist.ExistDatabase=WARN
     log4j.logger.eu.dnetlib.enabling.is.store.AbstractContentInitializer=FATAL
     log4j.logger.org.apache.hadoop.hbase.mapreduce.TableInputFormatBase=FATAL
     ### Spring ###
     log4j.logger.org.springframework=ERROR

     package eu.dnetlib.data.hadoop.utils;
     import java.util.Set;
     import org.apache.hadoop.hbase.filter.FilterList;
     import org.apache.hadoop.hbase.filter.FilterList.Operator;
     import com.google.common.collect.Sets;
     public class ScanProperties {
     	private static final Operator DEFAULT_OPERATOR = Operator.MUST_PASS_ALL;
     	private int caching = 100;
     	private FilterList filterList;
     	private Set<String> families = Sets.newHashSet();
     	public ScanProperties(final String op) {
     		Operator operator = DEFAULT_OPERATOR;
     		if ((op != null) && !op.isEmpty()) {
     			operator = Operator.valueOf(op);
+    		}
     		filterList = new FilterList(operator);
+    	}
     	public FilterList getFilterList() {
     		return filterList;
+    	}
     	public void setFilterList(final FilterList filterList) {
     		this.filterList = filterList;
+    	}
     	public Set<String> getFamilies() {
     		return families;
+    	}
     	public void setFamilies(final Set<String> families) {
     		this.families = families;
+    	}
     	public int getCaching() {
     		return caching;
+    	}
     	public void setCaching(int caching) {
     		this.caching = caching;
+    	}
+    }

     package eu.dnetlib.data.hadoop.utils;
     import eu.dnetlib.data.hadoop.HadoopJob;
     import eu.dnetlib.data.hadoop.rmi.HadoopJobDescriptor;
     import eu.dnetlib.data.hadoop.rmi.HadoopServiceException;
     import org.apache.commons.logging.Log;
     import org.apache.commons.logging.LogFactory;
     public class HadoopUtils {
     	private static final Log log = LogFactory.getLog(HadoopUtils.class);
     	public static java.util.function.Function<HadoopJob, HadoopJobDescriptor> asDescriptor() {
     		return d -> {
     			try {
     				return d.asDescriptor();
     			} catch (HadoopServiceException e) {
     				log.warn(e);
     				return null;
+    			}
     		};
+    	}
+    }

     package eu.dnetlib.data.hadoop.utils;
     import java.util.Map;
     import java.util.Set;
     import com.google.common.collect.Maps;
     import com.google.common.collect.Sets;
     import eu.dnetlib.data.hadoop.rmi.HadoopJobType;
     public class JobProfile {
     	private final Map<String, String> jobDefinition = Maps.newHashMap();
     	private final Set<String> requiredParams = Sets.newHashSet();
     	private ScanProperties scanProperties;
     	private String name;
     	private String description = "";
     	private HadoopJobType jobType;
     	public boolean isEmpty() {
     		return getJobDefinition().isEmpty();
+    	}
     	public Map<String, String> getJobDefinition() {
     		return jobDefinition;
+    	}
     	public Set<String> getRequiredParams() {
     		return requiredParams;
+    	}
     	public String getName() {
     		return name;
+    	}
     	public void setName(String name) {
     		this.name = name;
+    	}
     	public String getDescription() {
     		return description;
+    	}
     	public void setDescription(String description) {
     		this.description = description;
+    	}
     	public ScanProperties getScanProperties() {
     		return scanProperties;
+    	}
     	public void setScanProperties(ScanProperties scanProperties) {
     		this.scanProperties = scanProperties;
+    	}
     	public HadoopJobType getJobType() {
     		return jobType;
+    	}
     	public void setJobType(HadoopJobType jobType) {
     		this.jobType = jobType;
+    	}
+    }

     package eu.dnetlib.data.hadoop.utils;
     import java.io.ByteArrayOutputStream;
     import java.io.DataOutputStream;
     import java.io.IOException;
     import java.util.Map;
     import org.apache.commons.lang.StringUtils;
     import org.apache.commons.logging.Log;
     import org.apache.commons.logging.LogFactory;
     import org.apache.hadoop.hbase.client.Scan;
     import org.apache.hadoop.hbase.filter.PrefixFilter;
     import org.apache.hadoop.hbase.util.Base64;
     import org.dom4j.Document;
     import org.dom4j.Node;
     public class ScanFactory {
     	private static final Log log = LogFactory.getLog(ScanFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
     	public static String getScan(final ScanProperties scanProperties) throws IOException {
     		Scan scan = new Scan();
     		scan.setCaching(scanProperties.getCaching());
     		scan.setCacheBlocks(false); // don't set to true for MR jobs
     		scan.setFilter(scanProperties.getFilterList());
     		for (String family : scanProperties.getFamilies()) {
     			scan.addFamily(family.getBytes());
+    		}
     		log.debug("serializing scan");
     		return convertScanToString(scan);
+    	}
     	public static ScanProperties parseScanProperties(final Document doc, final Map<String, String> bbParams) {
     		log.debug("setting job scanner");
     		ScanProperties scanProperties = new ScanProperties(doc.valueOf("//FILTERS/@operator"));
     		String caching = doc.valueOf("//SCAN/@caching");
     		if (!StringUtils.isBlank(caching)) {
     			log.info("overriding default scan caching with: " + caching);
     			scanProperties.setCaching(Integer.valueOf(caching));
+    		}
     		for (Object o : doc.selectNodes("//SCAN/FAMILIES/FAMILY")) {
     			Node node = (Node) o;
     			String value = node.valueOf("./@value");
     			if ((value == null) || value.isEmpty()) {
     				value = bbParams.get(node.valueOf("./@param"));
+    			}
     			log.debug("scanner family value: " + value);
     			scanProperties.getFamilies().add(value);
+    		}
     		for (Object o : doc.selectNodes("//SCAN/FILTERS/FILTER")) {
     			Node node = (Node) o;
     			String filterType = node.valueOf("./@type");
     			String value = node.valueOf("./@value");
     			if ((value == null) || value.isEmpty()) {
     				value = bbParams.get(node.valueOf("./@param"));
+    			}
     			if (filterType.equals("prefix")) {
     				log.debug("scanner prefix filter, value: " + value);
     				scanProperties.getFilterList().addFilter(new PrefixFilter(value.getBytes()));
     			} // TODO add more filterType cases here
+    		}
     		return scanProperties;
+    	}
     	/**
     	 * Writes the given scan into a Base64 encoded string.
+    	 *
     	 * @param scan
     	 *            The scan to write out.
     	 * @return The scan saved in a Base64 encoded string.
     	 * @throws IOException
     	 *             When writing the scan fails.
     	 */
     	private static String convertScanToString(final Scan scan) throws IOException {
     		ByteArrayOutputStream out = new ByteArrayOutputStream();
     		DataOutputStream dos = new DataOutputStream(out);
     		scan.write(dos);
     		return Base64.encodeBytes(out.toByteArray());
+    	}
+    }

     package eu.dnetlib.data.hadoop.hbase;
     import java.util.List;
     import org.apache.commons.logging.Log;
     import org.apache.commons.logging.LogFactory;
     import org.apache.hadoop.hbase.client.Delete;
     import org.apache.hadoop.hbase.client.Mutation;
     import org.apache.hadoop.hbase.util.Bytes;
     import eu.dnetlib.data.transform.Column;
     import eu.dnetlib.data.transform.Row;
     /**
      * The Class HBaseDeleteFeeder performs a batch of Delete operations.
      */
     public class HBaseDeleteFeeder extends HbaseTableFeeder {
     	/**
     	 * Logger.
     	 */
     	private static final Log log = LogFactory.getLog(HBaseDeleteFeeder.class); // NOPMD by marko on 11/24/08 5:02 PM
     	/*
     	 * (non-Javadoc)
+    	 *
     	 * @see eu.dnetlib.data.hadoop.hbase.HbaseTableFeeder#addOperation(java.util.List, eu.dnetlib.data.transform.Row)
     	 */
     	@Override
     	protected void addOperation(final List<Mutation> buffer, final Row row) {
     		final Delete delete = new Delete(Bytes.toBytes(row.getKey()));
     		delete.setWriteToWAL(true);
     		for (final Column<String, byte[]> col : row) {
     			log.debug(String.format("deleting K: '%s' CF:'%s' Q:'%s'", row.getKey(), row.getColumnFamily(), col.getName()));
     			delete.deleteColumns(Bytes.toBytes(row.getColumnFamily()), Bytes.toBytes(col.getName()));
+    		}
     		buffer.add(delete);
+    	}
+    }

     package eu.dnetlib.data.hadoop.hbase;
     import eu.dnetlib.data.hadoop.AbstractHadoopClient;
     import eu.dnetlib.data.hadoop.config.ClusterName;
     import eu.dnetlib.data.hadoop.rmi.HadoopServiceException;
     import org.apache.commons.logging.Log;
     import org.apache.commons.logging.LogFactory;
     import org.apache.hadoop.hbase.client.HBaseAdmin;
     public class HBaseAdminFactory extends AbstractHadoopClient {
     	private static final Log log = LogFactory.getLog(HBaseAdminFactory.class); // NOPMD by marko on 11/24/08 5:02 PM
     	public HBaseAdmin newInstance(final ClusterName clusterName) throws HadoopServiceException {
     		try {
     			log.info("init hbaseAdmin, cluster: " + clusterName.toString());
     			setHadoopUser();
     			return new HBaseAdmin(configurationEnumerator.get(clusterName));
     		} catch (final Throwable e) {
     			throw new HadoopServiceException("unable to initialize hbase client for cluster: " + clusterName.toString(), e);
+    		}
+    	}
+    }

     package eu.dnetlib.data.hadoop.hbase;
     import java.io.IOException;
     import java.util.Arrays;
     import java.util.List;
     import com.google.common.base.Predicates;
     import com.google.common.collect.Iterables;
     import com.google.common.collect.Lists;
     import eu.dnetlib.data.hadoop.config.ClusterName;
     import eu.dnetlib.data.hadoop.config.ConfigurationEnumerator;
     import eu.dnetlib.data.transform.Row;
     import eu.dnetlib.data.transform.XsltRowTransformerFactory;
     import eu.dnetlib.enabling.resultset.client.ResultSetClientFactory;
     import org.apache.commons.logging.Log;
     import org.apache.commons.logging.LogFactory;
     import org.apache.hadoop.conf.Configuration;
     import org.apache.hadoop.hbase.client.HTable;
     import org.apache.hadoop.hbase.client.Mutation;
     import org.springframework.beans.factory.annotation.Autowired;
     import org.springframework.beans.factory.annotation.Required;
     /**
      * The Class HbaseTableFeeder provides abstraction to ship batch operation on an HBase table
      */
     public abstract class HbaseTableFeeder {
     	/**
     	 * The Constant log.
     	 */
     	private static final Log log = LogFactory.getLog(HbaseTableFeeder.class); // NOPMD by marko on 11/24/08 5:02 PM
     	/**
     	 * The configuration enumerator.
     	 */
     	@Autowired
     	protected ConfigurationEnumerator configurationEnumerator;
     	/**
     	 * The batch size.
     	 */
     	private int batchSize = 100;
     	/**
     	 * The result set client factory.
     	 */
     	private ResultSetClientFactory resultSetClientFactory;
     	/**
     	 * Adds the operation.
+    	 *
     	 * @param buffer the buffer
     	 * @param row    the row
     	 */
     	protected abstract void addOperation(final List<Mutation> buffer, final Row row);
     	/**
     	 * Feed.
+    	 *
     	 * @param epr         the epr
     	 * @param xsl         the xsl
     	 * @param clusterName the cluster name
     	 * @param tableName   the table name
     	 * @param simulation  the simulation
     	 * @return the int
     	 * @throws IOException          Signals that an I/O exception has occurred.
     	 * @throws InterruptedException the interrupted exception
     	 */
     	public int feed(final String epr, final String xsl, final ClusterName clusterName, final String tableName, final boolean simulation)
     			throws IOException, InterruptedException {
     		return doWrite(asRows(epr, xsl), getConf(clusterName), tableName, simulation);
+    	}
     	/**
     	 * Do writeOnHBase.
+    	 *
     	 * @param rows          the rows
     	 * @param configuration the configuration
     	 * @param tableName     the table name
     	 * @param simulation    the simulation
     	 * @return the int
     	 * @throws IOException          Signals that an I/O exception has occurred.
     	 * @throws InterruptedException the interrupted exception
     	 */
     	private int doWrite(final Iterable<Row> rows, final Configuration configuration, final String tableName, final boolean simulation)
     			throws IOException, InterruptedException {
     		final List<Mutation> buffer = Lists.newArrayList();
     		int count = 0;
     		if (simulation) {
     			log.info("running in simulation mode ...");
     			log.info(String.format("... simulated import of %d records", Iterables.size(rows)));
     		} else {
     			final HTable htable = new HTable(configuration, tableName);
     			try {
     				int i = 0;
     				for (final Row row : rows) {
     					addOperation(buffer, row);
     					if ((++i % getBatchSize()) == 0) {
     						flush(tableName, buffer, htable);
     						count += buffer.size();
     						buffer.clear();
+    					}
+    				}
     			} finally {
     				if (!buffer.isEmpty()) {
     					flush(tableName, buffer, htable);
     					count += buffer.size();
+    				}
     				htable.flushCommits();
     				htable.close();
+    			}
+    		}
     		return count;
+    	}
     	private void flush(final String tableName, final List<Mutation> buffer, final HTable htable) throws IOException, InterruptedException {
     		if (!checkOp(htable.batch(buffer), tableName)) throw new IOException("unable to flush operation on HBase table: " + tableName);
+    	}
     	private boolean checkOp(final Object[] res, final String tableName) throws IOException {
     		return Iterables.all(Arrays.asList(res), Predicates.notNull());
+    	}
     	/**
     	 * As rows.
+    	 *
     	 * @param epr the epr
     	 * @param xsl the xsl
     	 * @return the iterable
     	 */
     	protected Iterable<Row> asRows(final String epr, final String xsl) {
     		return Iterables.concat(Iterables.transform(getResultSetClientFactory().getClient(epr), XsltRowTransformerFactory.newInstance(xsl)));
+    	}
     	/**
     	 * Gets the conf.
+    	 *
     	 * @param clusterName the cluster name
     	 * @return the conf
     	 */
     	protected Configuration getConf(final ClusterName clusterName) {
     		return configurationEnumerator.get(clusterName);
+    	}
     	/**
     	 * Gets the batch size.
+    	 *
     	 * @return the batch size
     	 */
     	public int getBatchSize() {
     		return batchSize;
+    	}
     	/**
     	 * Sets the batch size.
+    	 *
     	 * @param batchSize the new batch size
     	 */
     	public void setBatchSize(final int batchSize) {
     		this.batchSize = batchSize;
+    	}
     	/**
     	 * Gets the result set client factory.
+    	 *
     	 * @return the result set client factory
     	 */
     	public ResultSetClientFactory getResultSetClientFactory() {
     		return resultSetClientFactory;
+    	}
     	/**
     	 * Sets the result set client factory.
+    	 *
     	 * @param resultSetClientFactory the new result set client factory
     	 */
     	@Required
     	public void setResultSetClientFactory(final ResultSetClientFactory resultSetClientFactory) {
     		this.resultSetClientFactory = resultSetClientFactory;
+    	}
+    }

     package eu.dnetlib.data.hadoop.hbase;
     import java.util.List;
     import org.apache.commons.logging.Log;
     import org.apache.commons.logging.LogFactory;
     import org.apache.hadoop.hbase.client.Mutation;
     import org.apache.hadoop.hbase.client.Put;
     import org.apache.hadoop.hbase.util.Bytes;
     import eu.dnetlib.data.transform.Column;
     import eu.dnetlib.data.transform.Row;
     /**
      * The Class HBasePutFeeder performs a batch of Put operations..
      */
     public class HBasePutFeeder extends HbaseTableFeeder {
     	/**
     	 * Logger.
     	 */
     	private static final Log log = LogFactory.getLog(HBasePutFeeder.class); // NOPMD by marko on 11/24/08 5:02 PM
     	/*
     	 * (non-Javadoc)
+    	 *
     	 * @see eu.dnetlib.data.hadoop.hbase.HbaseTableFeeder#addOperation(java.util.List, eu.dnetlib.data.transform.Row)
     	 */
     	@Override
     	protected void addOperation(final List<Mutation> buffer, final Row row) {
     		final Put put = new Put(Bytes.toBytes(row.getKey()));
     		put.setWriteToWAL(true);
     		for (final Column<String, byte[]> col : row) {
     			log.debug(String.format("adding value to K: '%s' CF:'%s' Q:'%s'", row.getKey(), row.getColumnFamily(), col.getName()));
     			put.add(Bytes.toBytes(row.getColumnFamily()), Bytes.toBytes(col.getName()), col.getValue());
+    		}
     		buffer.add(put);
+    	}
+    }

     package eu.dnetlib.data.hadoop;
     import java.util.Date;
     import java.util.List;
     import java.util.Map.Entry;
     import java.util.Objects;
     import java.util.stream.Collectors;
     import com.google.common.collect.BiMap;
     import com.google.common.collect.HashBiMap;
     import eu.dnetlib.data.hadoop.HadoopJob.Status;
     import eu.dnetlib.data.hadoop.config.ClusterName;
     import eu.dnetlib.data.hadoop.rmi.HadoopJobDescriptor;
     import eu.dnetlib.data.hadoop.rmi.HadoopServiceException;
     import eu.dnetlib.data.hadoop.utils.HadoopUtils;
     import org.apache.commons.logging.Log;
     import org.apache.commons.logging.LogFactory;
     import org.springframework.beans.factory.annotation.Required;
     public class JobRegistry {
     	private static final Log log = LogFactory.getLog(JobRegistry.class); // NOPMD by marko on 11/24/08 5:02 PM
     	private int maxJobs;
     	private final BiMap<String, HadoopJob> jobs = HashBiMap.create();
     	public String registerJob(HadoopJob hadoopJob) throws HadoopServiceException {
     		if (jobs.containsValue(hadoopJob)) { return jobs.inverse().get(hadoopJob); }
     		if (jobs.size() > getMaxJobs()) {
     			removeOldestProcess();
+    		}
     		jobs.put(hadoopJob.getId(), hadoopJob);
     		log.info("Registered hadoop job " + hadoopJob.getId());
     		hadoopJob.startMonitor();
     		return hadoopJob.getId();
+    	}
     	public Status getJobStatus(String id) {
     		return findJob(id).getStatus();
+    	}
     	public HadoopJob findJob(String id) {
     		return jobs.get(id);
+    	}
     	public void unregisterJob(String id) throws HadoopServiceException {
     		if (!jobs.containsKey(id)) { throw new HadoopServiceException("unable to unregister job, could not find jobId in registry: " + id); }
     		log.info("unregistering job: " + id);
     		jobs.get(id).getJobMonitor().kill();
     		jobs.remove(id);
+    	}
     	private void removeOldestProcess() throws HadoopServiceException {
     		Date oldDate = new Date();
     		String oldId = null;
     		for (Entry<String, HadoopJob> e : jobs.entrySet()) {
     			final HadoopJob hadoopJob = e.getValue();
     			if (hadoopJob.isComplete()) {
     				final Date date = hadoopJob.getLastActivity();
     				if (date.before(oldDate)) {
     					oldDate = date;
     					oldId = e.getKey();
+    				}
+    			}
+    		}
     		if (oldId != null) {
     			unregisterJob(oldId);
+    		}
+    	}
     	public List<HadoopJobDescriptor> listJobs(ClusterName clusterName) {
     		return jobs.values().stream()
     				.filter(j -> clusterName.equals(j.getClusterName()))
     				.map(HadoopUtils.asDescriptor())
     				.filter(Objects::nonNull)
     				.collect(Collectors.toList());
+    	}
     	@Required
     	public void setMaxJobs(final int maxJobs) {
     		this.maxJobs = maxJobs;
+    	}
     	public int getMaxJobs() {
     		return maxJobs;
+    	}
+    }

     package eu.dnetlib.data.hadoop;
     import java.io.IOException;
     import java.net.URI;
     import java.util.*;
     import java.util.Map.Entry;
     import java.util.stream.Collectors;
     import com.google.common.collect.Lists;
     import com.google.common.collect.Maps;
     import com.google.common.collect.Sets;
     import eu.dnetlib.data.hadoop.config.ClusterName;
     import eu.dnetlib.data.hadoop.config.ConfigurationEnumerator;
     import eu.dnetlib.data.hadoop.rmi.HadoopServiceException;
     import eu.dnetlib.data.hadoop.rmi.hbase.Column;
     import eu.dnetlib.data.hadoop.rmi.hbase.HBaseRowDescriptor;
     import eu.dnetlib.data.hadoop.rmi.hbase.schema.HBaseTableDescriptor;
     import eu.dnetlib.data.hadoop.rmi.hbase.schema.HBaseTableRegionInfo;
     import org.apache.commons.lang.StringUtils;
     import org.apache.commons.logging.Log;
     import org.apache.commons.logging.LogFactory;
     import org.apache.hadoop.conf.Configuration;
     import org.apache.hadoop.fs.FileSystem;
     import org.apache.hadoop.fs.Path;
     import org.apache.hadoop.hbase.HColumnDescriptor;
     import org.apache.hadoop.hbase.HRegionInfo;
     import org.apache.hadoop.hbase.HTableDescriptor;
     import org.apache.hadoop.hbase.client.*;
     import org.apache.hadoop.hbase.util.Bytes;
     import org.springframework.beans.factory.annotation.Autowired;
     import org.springframework.beans.factory.annotation.Required;
     public class HadoopServiceCore {
     	private static final Log log = LogFactory.getLog(HadoopServiceCore.class); // NOPMD by marko on 11/24/08 5:02 PM
     	@Autowired
     	protected ConfigurationEnumerator configurationEnumerator;
     	@Autowired
     	private HadoopClientMap clients;
     	private int maxVersions;
     	public List<String> listTables(final ClusterName clusterName) throws IOException, HadoopServiceException {
     		try(final HBaseAdmin admin = clients.getHbaseAdmin(clusterName)) {
     			return Arrays.asList(admin.listTables())
     					.stream()
     					.map(HTableDescriptor::getNameAsString)
     					.collect(Collectors.toList());
+    		}
+    	}
     	public String getHBaseTableDescriptor(final ClusterName clusterName, final String tableName) throws HadoopServiceException, IOException {
     		try(final HBaseAdmin admin = clients.getHbaseAdmin(clusterName)) {
     			if (StringUtils.isBlank(tableName)) throw new HadoopServiceException("Table name cannot be empty or null");
     			if (admin == null) throw new HadoopServiceException(String.format("HBase admin not available for cluster: '%s'", clusterName.toString()));
     			final List<HRegionInfo> tableRegions = admin.getTableRegions(tableName.getBytes());
     			final HTableDescriptor desc = admin.getTableDescriptor(tableName.getBytes());
     			final Set<String> columns = Sets.newHashSet();
     			for (HColumnDescriptor hColDesc : Arrays.asList(desc.getColumnFamilies())) {
     				columns.add(hColDesc.getNameAsString());
+    			}
     			HBaseTableDescriptor htDescriptor = new HBaseTableDescriptor();
     			htDescriptor.setColumns(columns);
     			List<HBaseTableRegionInfo> regions = Lists.newArrayList();
     			for (HRegionInfo info : tableRegions) {
     				regions.add(new HBaseTableRegionInfo(new String(info.getStartKey()), new String(info.getEndKey())));
+    			}
     			htDescriptor.setRegions(regions);
     			if (log.isDebugEnabled()) {
     				log.info("got configuration for table '" + tableName + "': " + htDescriptor.toString());
+    			}
     			return htDescriptor.toString();
+    		}
+    	}
     	public List<String> describeTable(final ClusterName clusterName, final String table) throws IOException, HadoopServiceException {
     		try(final HBaseAdmin admin = clients.getHbaseAdmin(clusterName)) {
     			final HTableDescriptor desc = admin.getTableDescriptor(table.getBytes());
     			return desc.getFamilies().stream()
     					.map(d -> d.getNameAsString())
     					.collect(Collectors.toList());
+    		}
+    	}
     	public void truncateTable(final ClusterName clusterName, final String table) throws IOException, HadoopServiceException {
     		try(final HBaseAdmin admin = clients.getHbaseAdmin(clusterName)) {
     			if (!admin.tableExists(table)) throw new IllegalStateException("cannot truncate unexisting table");
     			final HTableDescriptor desc = admin.getTableDescriptor(table.getBytes());
     			log.info("disabling table: " + table);
     			admin.disableTable(table);
     			log.info("deleting table: " + table);
     			admin.deleteTable(table);
     			log.info("creating table: " + table);
     			admin.createTable(desc);
+    		}
+    	}
     	public boolean existTable(final ClusterName clusterName, final String table) throws IOException, HadoopServiceException {
     		try(final HBaseAdmin admin = clients.getHbaseAdmin(clusterName)) {
     			return admin.tableExists(table);
+    		}
+    	}
     	public void dropTable(final ClusterName clusterName, final String table) throws IOException, HadoopServiceException {
     		try(final HBaseAdmin admin = clients.getHbaseAdmin(clusterName)) {
     			if (!admin.tableExists(table)) throw new IllegalStateException("cannot drop unexisting table: '" + table + "'");
     			log.info("disabling table: " + table);
     			admin.disableTable(table);
     			log.info("deleting table: " + table);
     			admin.deleteTable(table);
+    		}
+    	}
     	public void createTable(final ClusterName clusterName, final String table, final String tableConfiguration) throws IOException, HadoopServiceException {
     		try(final HBaseAdmin admin = clients.getHbaseAdmin(clusterName)) {
     			if (admin.tableExists(table)) throw new IllegalStateException("table already exists");
     			if (StringUtils.isBlank(tableConfiguration)) throw new HadoopServiceException("empty table configuration");
     			final HBaseTableDescriptor tableConf = HBaseTableDescriptor.fromJSON(tableConfiguration);
     			doCreateTable(clusterName, table, tableConf.getColumns(), tableConf.getRegions());
+    		}
+    	}
     	public void createTable(final ClusterName clusterName, final String table, final Set<String> columns) throws IOException, HadoopServiceException {
     		try(final HBaseAdmin admin = clients.getHbaseAdmin(clusterName)) {
     			if (admin.tableExists(table)) throw new IllegalStateException("table already exists");
     			doCreateTable(clusterName, table, columns, null);
+    		}
+    	}
     	public void doCreateTable(final ClusterName clusterName, final String table, final Set<String> columns, final List<HBaseTableRegionInfo> regions)
     			throws IOException, HadoopServiceException {
     		try(final HBaseAdmin admin = clients.getHbaseAdmin(clusterName)) {
     			if (admin.tableExists(table)) throw new IllegalStateException("table already exists");
     			final HTableDescriptor desc = new HTableDescriptor(table);
     			for (final String column : columns) {
     				final HColumnDescriptor hds = new HColumnDescriptor(column);
     				hds.setMaxVersions(getMaxVersions());
     				desc.addFamily(hds);
+    			}
     			log.info("creating hbase table: " + table);
     			if (regions != null && !regions.isEmpty()) {
     				log.debug(String.format("create using %s regions: %s", regions.size(), regions));
     				admin.createTable(desc, getSplitKeys(regions));
     			} else {
     				admin.createTable(desc);
+    			}
     			log.info("created hbase table: [" + table + "]");
     			log.debug("descriptor: [" + desc.toString() + "]");
+    		}
+    	}
     	private byte[][] getSplitKeys(final List<HBaseTableRegionInfo> regions) {
     		byte[][] splits = new byte[regions.size() - 1][];
     		for (int i = 0; i < regions.size() - 1; i++) {
     			splits[i] = regions.get(i).getEndKey().getBytes();
+    		}
     		return splits;
+    	}
     	public void ensureTable(final ClusterName clusterName, final String table, final Set<String> columns) throws IOException, HadoopServiceException {
     		try(final HBaseAdmin admin = clients.getHbaseAdmin(clusterName)) {
     			if (!admin.tableExists(table)) {
     				createTable(clusterName, table, columns);
     			} else {
     				final HTableDescriptor desc = admin.getTableDescriptor(Bytes.toBytes(table));
     				final Set<String> foundColumns = desc.getFamilies().stream()
     						.map(d -> d.getNameAsString())
     						.collect(Collectors.toCollection(HashSet::new));
     				log.info("ensuring columns on table " + table + ": " + columns);
     				final Collection<String> missingColumns = Sets.difference(columns, foundColumns);
     				if (!missingColumns.isEmpty()) {
     					if (admin.isTableEnabled(table)) {
     						admin.disableTable(table);
+    					}
     					for (final String column : missingColumns) {
     						log.info("hbase table: '" + table + "', adding column: " + column);
     						admin.addColumn(table, new HColumnDescriptor(column));
+    					}
     					admin.enableTable(table);
+    				}
+    			}
+    		}
+    	}
     	public void writeOnHBase(final ClusterName clusterName, final String tableName, final List<Put> puts) throws IOException {
     		final Configuration conf = configurationEnumerator.get(clusterName);
     		final HTable table = new HTable(conf, tableName);
     		try {
     			table.put(puts);
     		} finally {
     			table.flushCommits();
     			table.close();
+    		}
+    	}
     	public void deleteFromHBase(final ClusterName clusterName, final String tableName, final List<Delete> deletes) throws IOException {
     		final Configuration conf = configurationEnumerator.get(clusterName);
     		final HTable table = new HTable(conf, tableName);
     		try {
     			table.delete(deletes);
     		} finally {
     			table.flushCommits();
     			table.close();
+    		}
+    	}
     	public void deleteColumnsFromHBase(final ClusterName clusterName, final String tableName, final List<HBaseRowDescriptor> columns) throws IOException {
     		final Configuration conf = configurationEnumerator.get(clusterName);
     		final HTable table = new HTable(conf, tableName);
     		try {
     			for(HBaseRowDescriptor desc : columns) {
     				final Delete d = new Delete(Bytes.toBytes(desc.getRowKey()));
     				d.setWriteToWAL(true);
     				for(Column c : desc.getColumns()) {
     					for(String qualifier : c.getQualifier()) {
     						log.info(String.format("delete from row '%s' cf '%s:%s'", desc.getRowKey(), c.getFamily(), qualifier));
     						d.deleteColumns(Bytes.toBytes(c.getFamily()), Bytes.toBytes(qualifier));
+    					}
+    				}
     				table.delete(d);
+    			}
     		} finally {
     			table.flushCommits();
     			table.close();
+    		}
+    	}
     	public Result getRow(final ClusterName clusterName, final String tableName, final byte[] id) throws IOException {
     		final Configuration conf = configurationEnumerator.get(clusterName);
     		final HTable table = new HTable(conf, tableName);
     		try {
     			return table.get(new Get(id));
     		} finally {
     			table.close();
+    		}
+    	}
     	public Map<String, HBaseRowDescriptor> describeRows(final ClusterName clusterName, final String tableName, final List<String> rowKeys) throws IOException {
     		final Map<String, HBaseRowDescriptor> map = Maps.newHashMap();
     		for(String rowKey : rowKeys) {
     			map.put(rowKey, describeRow(clusterName, tableName, rowKey));
+    		}
     		return map;
+    	}
     	public HBaseRowDescriptor describeRow(final ClusterName clusterName, final String tableName, final String rowKey) throws IOException {
     		final Configuration conf = configurationEnumerator.get(clusterName);
     		final HTable table = new HTable(conf, tableName);
     		final HBaseRowDescriptor desc = new HBaseRowDescriptor();
     		try {
     			final Result r = table.get(new Get(Bytes.toBytes(rowKey)));
     			if (r.isEmpty()) {
     				return desc;
+    			}
     			final List<Column> columns = Lists.newArrayList();
     			for(Entry<byte[], NavigableMap<byte[], byte[]>> e : r.getNoVersionMap().entrySet()) {
     				final Set<byte[]> qualifiers = e.getValue().keySet();
     				final String family = new String(e.getKey());
     				final Column col = new Column(family);
     				for(byte[] q : qualifiers) {
     					String qs = new String(q);
     					col.getQualifier().add(qs);
+    				}
     				columns.add(col);
+    			}
     			desc.setColumns(columns);
     			desc.setRowKey(rowKey);
     			return desc;
     		} finally {
     			table.close();
+    		}
+    	}
     	public List<Result> getRows(final ClusterName clusterName, final String tableName, final Scan scan) throws IOException {
     		final Configuration conf = configurationEnumerator.get(clusterName);
     		try(final HTable table = new HTable(conf, tableName)) {
     			final ResultScanner rs = table.getScanner(scan);
     			try {
     				return Lists.newArrayList(rs.iterator());
     			} finally {
     				rs.close();
+    			}
+    		}
+    	}
     	public boolean deleteFromHdfs(final ClusterName clusterName, final String path) throws HadoopServiceException {
     		if (StringUtils.isBlank(path))
     			throw new HadoopServiceException("Cannot deleteFromHBase an empty HDFS path.");
     		final Configuration conf = configurationEnumerator.get(clusterName);
     		try(final FileSystem hdfs = FileSystem.get(conf)) {
     			final Path absolutePath = new Path(URI.create(conf.get("fs.defaultFS") + path));
     			if (hdfs.exists(absolutePath)) {
     				log.debug("deleteFromHBase path: " + absolutePath.toString());
     				hdfs.delete(absolutePath, true);
     				log.info("deleted path: " + absolutePath.toString());
     				return true;
     			} else {
     				log.warn("cannot deleteFromHBase unexisting path: " + absolutePath.toString());
     				return false;
+    			}
     		} catch (IOException e) {
     			throw new HadoopServiceException(e);
+    		}
+    	}
     	public boolean createHdfsDir(final ClusterName clusterName, final String path, final boolean force) throws HadoopServiceException {
     		if (StringUtils.isBlank(path))
     			throw new HadoopServiceException("Cannot create an empty HDFS path.");
     		final Configuration conf = configurationEnumerator.get(clusterName);
     		try(final FileSystem hdfs = FileSystem.get(conf)) {
     			final Path absolutePath = new Path(URI.create(conf.get("fs.defaultFS") + path));
     			if (!hdfs.exists(absolutePath)) {
     				hdfs.mkdirs(absolutePath);
     				log.info("created path: " + absolutePath.toString());
     				return true;
     			} else if (force) {
     				log.info(String.format("found directory '%s', force delete it", absolutePath.toString()));
     				hdfs.delete(absolutePath, true);
     				hdfs.mkdirs(absolutePath);
     				log.info("created path: " + absolutePath.toString());
     				return true;
     			} else {
     				log.info(String.format("directory already exists: '%s', nothing to do", absolutePath.toString()));
     				return false;
+    			}
     		} catch (IOException e) {
     			throw new HadoopServiceException(e);
+    		}
+    	}
     	public boolean existHdfsPath(final ClusterName clusterName, final String path) throws HadoopServiceException {
     		if (StringUtils.isBlank(path))
     			throw new HadoopServiceException("invalid empty path");
     		final Configuration conf = configurationEnumerator.get(clusterName);
     		try(final FileSystem hdfs = FileSystem.get(conf)) {
     			final Path absolutePath = new Path(URI.create(conf.get("fs.defaultFS") + path));
     			return hdfs.exists(absolutePath);
     		} catch (IOException e) {
     			throw new HadoopServiceException(e);
+    		}
+    	}
     	public Configuration getClusterConiguration(final ClusterName clusterName) {
     		return configurationEnumerator.get(clusterName);
+    	}
     	public int getMaxVersions() {
     		return maxVersions;
+    	}
     	@Required
     	public void setMaxVersions(final int maxVersions) {
     		this.maxVersions = maxVersions;
+    	}
     	public HadoopClientMap getClients() {
     		return clients;
+    	}
+    }

     package eu.dnetlib.data.hadoop;
     import java.util.Date;
     import java.util.concurrent.Executor;
     import java.util.concurrent.Executors;
     import org.apache.commons.logging.Log;
     import org.apache.commons.logging.LogFactory;
     import eu.dnetlib.data.hadoop.action.JobMonitor;
     import eu.dnetlib.data.hadoop.config.ClusterName;
     import eu.dnetlib.data.hadoop.rmi.HadoopJobDescriptor;
     import eu.dnetlib.data.hadoop.rmi.HadoopServiceException;
     import eu.dnetlib.data.hadoop.utils.JobProfile;
     public class HadoopJob {
     	private static final Log log = LogFactory.getLog(HadoopJob.class); // NOPMD by marko on 11/24/08 5:02 PM
     	/**
     	 * Defines the possible stati of an hadoop job.
     	 */
     	public static enum Status {
     		PREP, RUNNING, SUCCEEDED, KILLED, FAILED, SUSPENDED, UNKNOWN
+    	}
     	private final Executor executor = Executors.newSingleThreadExecutor();
     	private final JobMonitor jobMonitor;
     	private final JobProfile jobProfile;
     	private final ClusterName clusterName;
     	private final String id;
     	public static HadoopJob newInstance(String id, ClusterName clusterName, JobProfile profile, JobMonitor jobMonitor) {
     		return new HadoopJob(id, clusterName, profile, jobMonitor);
+    	}
     	private HadoopJob(String id, ClusterName clusterName, JobProfile jobProfile, JobMonitor jobMonitor) {
     		super();

Project

General

Profile

D-Net

Revision 55136

Added by Claudio Atzori about 5 years ago