Revision 57767
Added by Michele De Bonis over 4 years ago
DedupUtils.java | ||
---|---|---|
19 | 19 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
20 | 20 |
import eu.dnetlib.data.transform.xml.AbstractDNetXsltFunctions; |
21 | 21 |
import eu.dnetlib.pace.config.DedupConfig; |
22 |
import org.apache.commons.lang3.StringUtils; |
|
22 | 23 |
import org.apache.hadoop.hbase.io.ImmutableBytesWritable; |
23 | 24 |
import org.apache.hadoop.hbase.util.Bytes; |
24 | 25 |
|
... | ... | |
39 | 40 |
public static String newId(final String id, final String dedupRun) { |
40 | 41 |
if ((dedupRun == null) || (dedupRun.length() != 3)) throw new IllegalArgumentException("wrong dedupRun param"); |
41 | 42 |
|
42 |
return id.replaceFirst("\\|.*\\:\\:", dedupPrefix(dedupRun));
|
|
43 |
return StringUtils.substringBefore(id,"|") + dedupPrefix(dedupRun) + "::" + AbstractDNetXsltFunctions.md5(id);
|
|
43 | 44 |
} |
44 | 45 |
|
45 | 46 |
public static byte[] newIdBytes(final String s, final String dedupRun) { |
Also available in: Unified diff
update in the generation of the master index