Revision 45625
Added by Eri Katsari about 7 years ago
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/test/java/BuildTest.java | ||
---|---|---|
25 | 25 |
BlockReducer blockReducer = new BlockReducer(); |
26 | 26 |
|
27 | 27 |
Configuration configuration = new Configuration(); |
28 |
configuration.set("lod.sourceMappings", "{\"result\":[\"http://www.w3.org/1999/02/22-rdf-syntax-ns#type\",\"http://purl.org/dc/terms/identifier\",\"http://lod.openaire.eu/vocab/dateOfTransformation\",\"http://lod.openaire.eu/vocab/dateOfCollection\",\"http://purl.org/dc/terms/identifier\",\"http://www.eurocris.org/ontologies/cerif/1.3#name\",\"http://purl.org/dc/terms/dateAccepted\",\"http://purl.org/dc/terms/publisher\",\"http://purl.org/dc/terms/identifier\",\"http://purl.org/dc/terms/language\",\"http://purl.org/dc/terms/date\",\"http://lod.openaire.eu/vocab/resultSubject\",\"http://lod.openaire.eu/vocab/externalReference\",\"http://purl.org/dc/terms/source\",\"http://purl.org/dc/terms/format\",\"http://lod.openaire.eu/vocab/context\",\"http://dbpedia.org/ontology/country\",\"http://purl.org/dc/terms/accessRights\",\"http://purl.org/dc/terms/description\",\"http://lsdis.cs.uga.edu/projects/semdis/opus#journal_name\",\"http://lod.openaire.eu/vocab/dataSourceType\",\"http://lod.openaire.eu/vocab/device\",\"http://lod.openaire.eu/vocab/size\",\"http://lod.openaire.eu/vocab/version\",\"http://lod.openaire.eu/vocab/lastMetadataUpdate\",\"http://lod.openaire.eu/vocab/metadataVersion\",\"http://lod.openaire.eu/vocab/year\",\"http://lod.openaire.eu/vocab/resultType\"],\"project\": [\"http://www.w3.org/1999/02/22-rdf-syntax-ns#type\",\"http://purl.org/dc/terms/identifier\",\"http://lod.openaire.eu/vocab/dateOfTransformation\",\"http://lod.openaire.eu/vocab/dateOfCollection\",\"http://purl.org/dc/terms/identifier\",\"http://lod.openaire.eu/vocab/projectCode\",\"http://schema.org/url\",\"http://www.eurocris.org/ontologies/cerif/1.3#acronym\",\"http://www.eurocris.org/ontologies/cerif/1.3#name\",\"http://www.eurocris.org/ontologies/cerif/1.3#startDate\",\"http://www.eurocris.org/ontologies/cerif/1.3#endDate\",\"http://purl.org/cerif/frapo/hasCallIdentifier\",\"http://www.eurocris.org/ontologies/cerif/1.3#keyword\",\"http://www.w3.org/2006/time#hasDurationDescription\",\"http://lod.openaire.eu/vocab/ec_SC39\",\"http://lod.openaire.eu/vocab/contractType\",\"http://lod.openaire.eu/vocab/oaMandatePublications\",\"http://lod.openaire.eu/vocab/projectSubjects\",\"http://od.openaire.eu/vocab/ec_article29-3\",\"http://lod.openaire.eu/vocab/funder\",\"http://lod.openaire.eu/vocab/fundingLevel0\",\"http://lod.openaire.eu/vocab/fundingLevel1\",\"http://lod.openaire.eu/vocab/fundingLevel2\",\"http://lod.openaire.eu/vocab/fundingLevel3\"],\"person\": [\"http://www.w3.org/1999/02/22-rdf-syntax-ns#type\",\"http://purl.org/dc/terms/identifier\",\"http://lod.openaire.eu/vocab/dateOfTransformation\",\"http://lod.openaire.eu/vocab/dateOfCollection\",\"http://purl.org/dc/terms/identifier\", \"http://xmlns.com/foaf/0.1/firstName\",\"http://xmlns.com/foaf/0.1/lastName\", \"http://xmlns.com/foaf/0.1/name\",\"http://schema.org/faxNumber\",\"http://xmlns.com/foaf/0.1/mbox\",\"http://xmlns.com/foaf/0.1/phone\", \"http://schema.org/nationality\",\"http://purl.org/dc/terms/identifier\", \"http://lod.openaire.eu/vocab/trust\"],\"organization\": [\"http://www.w3.org/1999/02/22-rdf-syntax-ns#type\",\"http://purl.org/dc/terms/identifier\",\"http://lod.openaire.eu/vocab/dateOfTransformation\",\"http://lod.openaire.eu/vocab/dateOfCollection\",\"http://purl.org/dc/terms/identifier\",\"http://www.w3.org/2004/02/skos/core#altLabel\",\"http://www.w3.org/2004/02/skos/core#prefLabel\",\"http://lod.openaire.eu/vocab/webSiteUrl\",\"http://xmlns.com/foaf/0.1/logo\",\"http://dbpedia.org/ontology/country\",\"http://lod.openaire.eu/vocab/entityType\" ],\"datasource\":[\"http://www.w3.org/1999/02/22-rdf-syntax-ns#type\",\"http://purl.org/dc/terms/identifier\",\"http://lod.openaire.eu/vocab/dateOfTransformation\",\"http://lod.openaire.eu/vocab/dateOfCollection\",\"http://purl.org/dc/terms/identifier\",\"http://lod.openaire.eu/vocab/datasourceType\",\"http://lod.openaire.eu/vocab/openAIRECompatibility\",\"http://dbpedia.org/ontology/officialName\",\"http://lod.openaire.eu/vocab/englishName\",\"http://schema.org/url\",\"http://xmlns.com/foaf/0.1/logo\",\"http://xmlns.com/foaf/0.1/mbox\",\"http://purl.org/vocab/vann/preferredNamespacePrefix\",\"http://www.w3.org/2003/01/geo/wgs84_pos#lat\",\"http://www.w3.org/2003/01/geo/wgs84_pos#long\",\"http://lod.openaire.eu/vocab/dateOfValidity\",\"http://purl.org/dc/terms/description\",\"http://lod.openaire.eu/vocab/subjectList\",\"http://lod.openaire.eu/numberOfItems\",\"http://purl.org/dc/terms/date\",\"http://lod.openaire.eu/vocab/policies\",\"http://lod.openaire.eu/vocab/languages\",\"http://lod.openaire.eu/vocab/contentType\",\"http://lod.openaire.eu/vocab/accessInfoPackage\",\"http://lod.openaire.eu/vocab/releaseStartDate\",\"http://lod.openaire.eu/vocab/releaseEndDate\",\"http://lod.openaire.eu/vocab/missionStatementUrl\",\"http://www.europeana.eu/schemas/edm/dataProvider\",\"http://lod.openaire.eu/vocab/serviceProvider\",\"http://lod.openaire.eu/vocab/databaseAccessType\",\"http://lod.openaire.eu/vocab/dataUploadType\",\"http://lod.openaire.eu/vocab/dataUploadRestrictions\",\"http://lod.openaire.eu/vocab/versioning\",\"http://lod.openaire.eu/vocab/citationGuidelineUrl\",\"http://lod.openaire.eu/vocab/qualityManagementKind\",\"http://lod.openaire.eu/vocab/pidSystems\",\"http://lod.openaire.eu/vocab/certificates\",\"http://purl.org/dc/terms/accessRights\"]}"); |
|
29 |
configuration.set("lod.redisHost", "194.177.192.118"); |
|
28 |
String sourceMappings = "{\"type\":\"result\", \"fields\":[\"<http://purl.org/dc/terms/identifier>\",\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\"]}"; |
|
29 |
configuration.set("lod.redisHost", "83.212.96.39"); |
|
30 |
configuration.set("lod.sourceMappings", sourceMappings); |
|
31 |
|
|
32 |
String stopwords = "a,able,about,above,abst,accordance,according,accordingly,across,act,actually,added,adj,affected,affecting,affects,after,afterwards,again,against,ah,all,almost,alone,along,already,also,although,always,am,among,amongst,an,and,announce,another,any,anybody,anyhow,anymore,anyone,anything,anyway,anyways,anywhere,apparently,approximately,are,aren,arent,arise,around,as,aside,ask,asking,at,auth,available,away,awfully,b,back,be,became,because,become,becomes,becoming,been,before,beforehand,begin,beginning,beginnings,begins,behind,being,believe,below,beside,besides,between,beyond,biol,both,brief,briefly,but,by,c,ca,came,can,cannot,cant,cause,causes,certain,certainly,co,com,come,comes,contain,containing,contains,could,couldnt,d,date,did,didnt,different,do,does,doesnt,doing,done,dont,down,downwards,due,during,e,each,ed,edu,effect,eg,eight,eighty,either,else,elsewhere,end,ending,enough,especially,et,et-al,etc,even,ever,every,everybody,everyone,everything,everywhere,ex,except,f,far,few,ff,fifth,first,five,fix,followed,following,follows,for,former,formerly,forth,found,four,from,further,furthermore,g,gave,get,gets,getting,give,given,gives,giving,go,goes,gone,got,gotten,h,had,happens,hardly,has,hasnt,have,havent,having,he,hed,hence,her,here,hereafter,hereby,herein,heres,hereupon,hers,herself,hes,hi,hid,him,himself,his,hither,home,how,howbeit,however,hundred,i,id,ie,if,ill,im,immediate,immediately,importance,important,in,inc,indeed,index,information,instead,into,invention,inward,is,isnt,it,itd,itll,its,itself,ive,j,just,k,keep, keeps,kept,kg,km,know,known,knows,l,largely,last,lately,later,latter,latterly,least,less,lest,let,lets,like,liked,likely,line,little,ll,look,looking,looks,ltd,m,made,mainly,make,makes,many,may,maybe,me,mean,means,meantime,meanwhile,merely,mg,might,million,miss,ml,more,moreover,most,mostly,mr,mrs,much,mug,must,my,myself,n,na,name,namely,nay,nd,near,nearly,necessarily,necessary,need,needs,neither,never,nevertheless,new,next,nine,ninety,no,nobody,non,none,nonetheless,noone,nor,normally,nos,not,noted,nothing,now,nowhere,o,obtain,obtained,obviously,of,off,often,oh,ok,okay,old,omitted,on,once,one,ones,only,onto,or,ord,other,others,otherwise,ought,our,ours,ourselves,out,outside,over,overall,owing,own,p,page,pages,part,particular,particularly,past,per,perhaps,placed,please,plus,poorly,possible,possibly,potentially,pp,predominantly,present,previously,primarily,probably,promptly,proud,provides,put,q,que,quickly,quite,qv,r,ran,rather,rd,re,readily,really,recent,recently,ref,refs,regarding,regardless,regards,related,relatively,research,respectively,resulted,resulting,results,right,run,s,said,same,saw,say,saying,says,sec,section,see,seeing,seem,seemed,seeming,seems,seen,self,selves,sent,seven,several,shall,she,shed,shell,shes,should,shouldnt,show,showed,shown,showns,shows,significant,significantly,similar,similarly,since,six,slightly,so,some,somebody,somehow,someone,somethan,something,sometime,sometimes,somewhat,somewhere,soon,sorry,specifically,specified,specify,specifying,still,stop,strongly,sub,substantially,successfully,such,sufficiently,suggest,sup,sure,t,take,taken,taking,tell,tends,th,than,thank,thanks,thanx,that,thatll,thats,thatve,the,their,theirs,them,themselves,then,thence,there,thereafter,thereby,thered,therefore,therein,therell,thereof,therere,theres,thereto,thereupon,thereve,these,they,theyd,theyll,theyre,theyve,think,this,those,thou,though,thoughh,thousand,throug,through,throughout,thru,thus,til,tip,to,together,too,took,toward,towards,tried,tries,truly,try,trying,ts,twice,two,u,un,under,unfortunately,unless,unlike,unlikely,until,unto,up,upon,ups,us,use,used,useful,usefully,usefulness,uses,using,usually,v,value,various,ve,very,via,viz,vol,vols,vs,w,want,wants,was,wasnt,way,we,wed,welcome,well,went,were,werent,weve,what,whatever,whatll,whats,when,whence,whenever,where,whereafter,whereas,whereby,wherein,wheres,whereupon,wherever,whether,which,while,whim,whither,who,whod,whoever,whole,wholl,whom,whomever,whos,whose,why,widely,willing,wish,with,within,without,wont,words,world,would,wouldnt,www,x,y,yes,yet,you,youd,youll,your,youre,yours,yourself,yourselves,youve,z,zero\n"; |
|
33 |
configuration.set("lod.stopwords", stopwords); |
|
34 |
|
|
30 | 35 |
configuration.set("lod.redisPort", "6379"); |
31 | 36 |
|
37 |
String targetMappings = "{\"type\":\"publications\",\"fields\":[\"<http://purl.org/dc/terms/issued>\",\"<http://www.w3.org/2000/01/rdf-schema#label>\",\"<http://purl.org/dc/terms/identifier>\"]}"; |
|
38 |
configuration.set("lod.targetMappings", targetMappings); |
|
39 |
|
|
32 | 40 |
sourceMapDriver = MapDriver.newMapDriver(sourceBuildMapper).withConfiguration(configuration); |
33 | 41 |
targetMapDriver = MapDriver.newMapDriver(targetBuildMapper).withConfiguration(configuration); |
34 | 42 |
|
... | ... | |
47 | 55 |
@Test |
48 | 56 |
public void testTargetMapper() throws IOException { |
49 | 57 |
|
50 |
targetMapDriver.withInput(new LongWritable(1), new Text("id,<http://dblp.l3s.de/d2r/resource/publications/journals/advai/Luis-GarciaP16>,<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>,<http://xmlns.com/foaf/0.1/Document>\n"));
|
|
58 |
targetMapDriver.withInput(new LongWritable(1), new Text("<http://dblp.l3s.de/d2r/resource/publications/books/acm/0082477>\t<http://purl.org/dc/terms/issued>\t\"1992\"\t.\t<http://dblp.l3s.de/d2r/resource/publications/books/acm/0082477>\t<http://www.w3.org/2000/01/rdf-schema#label>\t\"The no-nonsense guide to computing careers.\"\t.\t"));
|
|
51 | 59 |
targetMapDriver.run(); |
52 | 60 |
} |
53 | 61 |
|
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/test/java/PreprocessingTest.java | ||
---|---|---|
39 | 39 |
Configuration configuration = new Configuration(); |
40 | 40 |
String sourceMappings = "{\"type\":\"result\", \"fields\":[\"<http://purl.org/dc/terms/identifier>\",\"<http://www.eurocris.org/ontologies/cerif/1.3#name>\",\"<http://lod.openaire.eu/vocab/year>\"]}"; |
41 | 41 |
configuration.set("lod.sourceMappings", sourceMappings); |
42 |
|
|
43 | 42 |
mapDriver = MapDriver.newMapDriver(new SourceMapper()); |
44 | 43 |
mapDriver.withConfiguration(configuration); |
45 | 44 |
mapDriver.withInput(new LongWritable(1), new Text("<http://lod.openaire.eu/data/result/od_______908::bbaecb13949279cda128a66545446b76> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.eurocris.org/ontologies/cerif/1.3#ResultEntity> .\n")) |
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/build/SourceBuildMapper.java | ||
---|---|---|
9 | 9 |
import org.apache.hadoop.io.Text; |
10 | 10 |
import org.apache.hadoop.mapreduce.Mapper; |
11 | 11 |
import org.apache.log4j.Logger; |
12 |
import org.matheclipse.core.reflection.system.E; |
|
12 | 13 |
|
13 | 14 |
import java.io.BufferedWriter; |
14 | 15 |
import java.io.IOException; |
... | ... | |
47 | 48 |
private String uriPrefix; |
48 | 49 |
private String stopWords; |
49 | 50 |
private Map<String, Integer> stopWordsMap = new HashMap(); |
51 |
private static final String LINE_DELIM="\t.\t"; |
|
52 |
private static final String FIELD_DELIM="\t"; |
|
50 | 53 |
|
51 |
|
|
52 | 54 |
public static enum SOURCE_BUILD_COUNTERS { |
53 | 55 |
BLOCKING_KEYS, |
54 | 56 |
REDIS_RECORDS |
... | ... | |
57 | 59 |
|
58 | 60 |
@Override |
59 | 61 |
protected void setup(Context context) throws IOException, InterruptedException { |
60 |
lodConfiguration = new LodConfiguration(); |
|
61 |
lodConfiguration.load(context.getConfiguration().get("lod.sourceMappings")); |
|
62 |
redisHost = context.getConfiguration().get("lod.redisHost"); |
|
63 |
redisPort = Integer.parseInt(context.getConfiguration().get("lod.redisPort")); |
|
64 |
log.debug("Redis connection info : " + "redis://" + redisHost + ":" + redisPort); |
|
65 |
client = RedisClient.create("redis://" + redisHost + ":" + redisPort); |
|
66 |
connection = client.connect(); |
|
67 |
uriPrefix = context.getConfiguration().get("lod.prefix"); |
|
68 |
stopWords = context.getConfiguration().get("lod.stopwords"); |
|
62 |
try { |
|
63 |
lodConfiguration = new LodConfiguration(); |
|
64 |
lodConfiguration.load(context.getConfiguration().get("lod.sourceMappings")); |
|
65 |
redisHost = context.getConfiguration().get("lod.redisHost"); |
|
66 |
redisPort = Integer.parseInt(context.getConfiguration().get("lod.redisPort")); |
|
67 |
log.debug("Redis connection info : " + "redis://" + redisHost + ":" + redisPort); |
|
68 |
client = RedisClient.create("redis://" + redisHost + ":" + redisPort); |
|
69 |
connection = client.connect(); |
|
70 |
uriPrefix = context.getConfiguration().get("lod.prefix"); |
|
71 |
stopWords = context.getConfiguration().get("lod.stopwords"); |
|
69 | 72 |
|
70 |
for (String stopword : stopWords.split(",")) { |
|
71 |
stopWordsMap.put(stopword, 0); |
|
73 |
for (String stopword : stopWords.split(",")) { |
|
74 |
stopWordsMap.put(stopword, 0); |
|
75 |
} |
|
76 |
System.out.println("Stopwords size " + stopWordsMap.size()); |
|
77 |
log.debug("Stopwords size " + stopWordsMap.size()); |
|
78 |
} catch (Exception ex) { |
|
79 |
log.error("An error occured during Mapper Setup " + ex.toString(), ex); |
|
80 |
System.out.println(ex.getCause().toString()); |
|
72 | 81 |
} |
73 | 82 |
} |
74 | 83 |
|
... | ... | |
78 | 87 |
try { |
79 | 88 |
|
80 | 89 |
StringBuilder id = new StringBuilder(); |
81 |
String[] triples = result.toString().split(".");
|
|
82 |
for(String triple:triples){
|
|
83 |
String [] fields=triple.split("\t");
|
|
84 |
if(id.length()<1){
|
|
85 |
id=id.append("source_").append(fields[0]);
|
|
90 |
String[] triples = result.toString().split(LINE_DELIM);
|
|
91 |
for (String triple : triples) {
|
|
92 |
String[] fields = triple.split(FIELD_DELIM);
|
|
93 |
if (id.length() < 1) {
|
|
94 |
id = id.append("source_").append(fields[0]);
|
|
86 | 95 |
} |
87 | 96 |
|
88 |
String property=fields[1];
|
|
89 |
String value=fields[2];
|
|
97 |
String property = fields[1];
|
|
98 |
String value = fields[2];
|
|
90 | 99 |
List<String> blockingKeys = Blocking.tokenBlocking(value, stopWordsMap); |
91 | 100 |
for (String blockingKey : blockingKeys) { |
92 | 101 |
//Write BlockingKey, RecordID to output |
... | ... | |
94 | 103 |
context.getCounter(SOURCE_BUILD_COUNTERS.BLOCKING_KEYS).increment(1); |
95 | 104 |
} |
96 | 105 |
} |
97 |
writeToRedis(id.toString(), result.toString(),context); |
|
106 |
writeToRedis(id.toString(), result.toString(), context);
|
|
98 | 107 |
} catch (Exception e) { |
99 | 108 |
log.error("Error writing entity to M/R output", e); |
100 | 109 |
log.error("result error " + result.toString()); |
... | ... | |
105 | 114 |
} |
106 | 115 |
|
107 | 116 |
|
108 |
private void writeToRedis(String key, String value,Context context) throws Exception { |
|
117 |
private void writeToRedis(String key, String value, Context context) throws Exception {
|
|
109 | 118 |
|
110 | 119 |
try { |
111 | 120 |
connection.set(key, value); |
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/build/TargetBuildMapper.java | ||
---|---|---|
38 | 38 |
private RedisClient client; |
39 | 39 |
private String redisHost; |
40 | 40 |
private Integer redisPort; |
41 |
|
|
42 | 41 |
private FileSystem hdfs; |
43 |
private OutputStream os; |
|
44 |
private BufferedWriter br; |
|
45 |
|
|
46 | 42 |
private String uriPrefix; |
47 | 43 |
private String stopWords; |
48 | 44 |
private Map<String, Integer> stopWordsMap = new HashMap<>(); |
49 |
private String entityType; |
|
45 |
private static final String LINE_DELIM="\t.\t"; |
|
46 |
private static final String FIELD_DELIM="\t"; |
|
50 | 47 |
|
51 | 48 |
public static enum TARGET_BUILD_COUNTERS { |
52 | 49 |
|
... | ... | |
58 | 55 |
@Override |
59 | 56 |
protected void setup(Context context) throws IOException, InterruptedException { |
60 | 57 |
|
61 |
lodConfiguration = new LodConfiguration(); |
|
62 |
lodConfiguration.load(context.getConfiguration().get("lod.targetMappings")); |
|
63 |
redisHost = context.getConfiguration().get("lod.redisHost"); |
|
64 |
redisPort = Integer.parseInt(context.getConfiguration().get("lod.redisPort")); |
|
65 |
client = RedisClient.create("redis://" + redisHost + ":" + redisPort); |
|
66 |
log.debug("Redis connection info : " + "redis://" + redisHost + ":" + redisPort); |
|
58 |
try { |
|
59 |
lodConfiguration = new LodConfiguration(); |
|
67 | 60 |
|
68 |
connection = client.connect(); |
|
69 |
uriPrefix = context.getConfiguration().get("lod.prefix"); |
|
70 |
stopWords = context.getConfiguration().get("lod.stopwords"); |
|
71 |
for (String stopword : stopWords.split(",")) { |
|
72 |
stopWordsMap.put(stopword, 0); |
|
61 |
lodConfiguration.load(context.getConfiguration().get("lod.targetMappings")); |
|
62 |
redisHost = context.getConfiguration().get("lod.redisHost"); |
|
63 |
redisPort = Integer.parseInt(context.getConfiguration().get("lod.redisPort")); |
|
64 |
client = RedisClient.create("redis://" + redisHost + ":" + redisPort); |
|
65 |
log.debug("Redis connection info : " + "redis://" + redisHost + ":" + redisPort); |
|
66 |
|
|
67 |
connection = client.connect(); |
|
68 |
uriPrefix = context.getConfiguration().get("lod.prefix"); |
|
69 |
stopWords = context.getConfiguration().get("lod.stopwords"); |
|
70 |
for (String stopword : stopWords.split(",")) { |
|
71 |
stopWordsMap.put(stopword, 0); |
|
72 |
} |
|
73 |
|
|
74 |
log.info("Stopwords size " + stopWordsMap.size()); |
|
75 |
System.out.println("Stopwords size " + stopWordsMap.size()); |
|
76 |
|
|
77 |
} catch (Exception ex) { |
|
78 |
log.error("An error occured during Mapper Setup " + ex.toString(), ex); |
|
79 |
System.out.println(ex.getCause().toString()); |
|
73 | 80 |
} |
74 | 81 |
|
75 | 82 |
} |
... | ... | |
77 | 84 |
|
78 | 85 |
@Override |
79 | 86 |
protected void map(final LongWritable keyIn, final Text result, final Context context) throws IOException { |
80 |
|
|
81 | 87 |
try { |
82 | 88 |
//get ID |
83 | 89 |
StringBuilder id = new StringBuilder(); |
84 |
String[] triples = result.toString().split("."); |
|
85 |
for(String triple:triples){ |
|
86 |
String [] fields=triple.split("\t"); |
|
87 |
|
|
88 |
if(id.length()<1){ |
|
89 |
id=id.append("target_").append(fields[0]); |
|
90 |
String[] triples = result.toString().split(LINE_DELIM); |
|
91 |
for (String triple : triples) { |
|
92 |
String[] fields = triple.split(FIELD_DELIM); |
|
93 |
if (id.length()<1) { |
|
94 |
id.append("target_").append(fields[0]); |
|
90 | 95 |
} |
91 | 96 |
|
92 |
String value=fields[2];
|
|
97 |
String value = fields[2];
|
|
93 | 98 |
List<String> blockingKeys = Blocking.tokenBlocking(value, stopWordsMap); |
94 | 99 |
for (String blockingKey : blockingKeys) { |
95 | 100 |
//Write BlockingKey, RecordID to output |
... | ... | |
97 | 102 |
context.getCounter(TARGET_BUILD_COUNTERS.BLOCKING_KEYS).increment(1); |
98 | 103 |
} |
99 | 104 |
} |
100 |
writeToRedis(id.toString(), result.toString(),context); |
|
105 |
writeToRedis(id.toString(), result.toString(), context);
|
|
101 | 106 |
} catch (Exception e) { |
102 | 107 |
log.error("Error writing entity to M/R output", e); |
103 | 108 |
log.error("result error " + result.toString()); |
... | ... | |
106 | 111 |
|
107 | 112 |
} |
108 | 113 |
|
109 |
private void writeToRedis(String key, String value,Context context) throws Exception { |
|
114 |
private void writeToRedis(String key, String value, Context context) throws Exception {
|
|
110 | 115 |
|
111 | 116 |
try { |
112 | 117 |
connection.set(key, value); |
modules/dnet-openaire-lodinterlinking/branches/cacheOptimized/src/main/java/eu/dnetlib/data/mapreduce/hbase/lodExport/utils/Blocking.java | ||
---|---|---|
18 | 18 |
|
19 | 19 |
List<String> blockingKeys = new ArrayList<>(); |
20 | 20 |
Map<String, Integer> blockingKeysMap = new HashMap<>(); |
21 |
System.out.println("generated tokens " + tokens); |
|
21 | 22 |
|
22 | 23 |
for (int j = 0; j < tokens.length; j++) { |
23 | 24 |
String currentToken = tokens[j]; |
Also available in: Unified diff
Refactored Build accoring to new parsing.