Revision 52908
Added by Miriam Baglioni over 5 years ago
modules/dnet-openaireplus-mapping-utils/branches/solr7/src/main/java/eu/dnetlib/data/bulktag/ZenodoCommunity.java | ||
---|---|---|
1 |
package eu.dnetlib.data.bulktag; |
|
2 |
|
|
3 |
import org.dom4j.Node; |
|
4 |
|
|
5 |
/** |
|
6 |
* Created by miriam on 01/08/2018. |
|
7 |
*/ |
|
8 |
public class ZenodoCommunity { |
|
9 |
|
|
10 |
private String zenodoCommunityId; |
|
11 |
|
|
12 |
private SelectionCriteria selCriteria; |
|
13 |
|
|
14 |
public String getZenodoCommunityId() { |
|
15 |
return zenodoCommunityId; |
|
16 |
} |
|
17 |
|
|
18 |
public void setZenodoCommunityId(String zenodoCommunityId) { |
|
19 |
this.zenodoCommunityId = zenodoCommunityId; |
|
20 |
} |
|
21 |
|
|
22 |
public SelectionCriteria getSelCriteria() { |
|
23 |
return selCriteria; |
|
24 |
} |
|
25 |
|
|
26 |
public void setSelCriteria(SelectionCriteria selCriteria) { |
|
27 |
this.selCriteria = selCriteria; |
|
28 |
} |
|
29 |
|
|
30 |
} |
modules/dnet-openaireplus-mapping-utils/branches/solr7/src/main/java/eu/dnetlib/data/bulktag/Community.java | ||
---|---|---|
1 |
package eu.dnetlib.data.bulktag; |
|
2 |
|
|
3 |
import com.google.gson.Gson; |
|
4 |
import org.apache.commons.logging.Log; |
|
5 |
import org.apache.commons.logging.LogFactory; |
|
6 |
|
|
7 |
import java.util.ArrayList; |
|
8 |
import java.util.List; |
|
9 |
|
|
10 |
/** |
|
11 |
* Created by miriam on 01/08/2018. |
|
12 |
*/ |
|
13 |
public class Community { |
|
14 |
|
|
15 |
private static final Log log = LogFactory.getLog(Community.class); |
|
16 |
|
|
17 |
private String id; |
|
18 |
private List<String> subjects = new ArrayList<>(); |
|
19 |
private List<Datasource> datasources = new ArrayList<>(); |
|
20 |
private List<ZenodoCommunity> zenodoCommunities = new ArrayList<>(); |
|
21 |
|
|
22 |
public String toJson() { |
|
23 |
final Gson g = new Gson(); |
|
24 |
return g.toJson(this); |
|
25 |
} |
|
26 |
|
|
27 |
public boolean isValid() { |
|
28 |
return !getSubjects().isEmpty() || !getDatasources().isEmpty() || !getZenodoCommunities().isEmpty(); |
|
29 |
} |
|
30 |
|
|
31 |
public String getId() { |
|
32 |
return id; |
|
33 |
} |
|
34 |
|
|
35 |
public void setId(String id) { |
|
36 |
this.id = id; |
|
37 |
} |
|
38 |
|
|
39 |
public List<String> getSubjects() { |
|
40 |
return subjects; |
|
41 |
} |
|
42 |
|
|
43 |
public void setSubjects(List<String> subjects) { |
|
44 |
this.subjects = subjects; |
|
45 |
} |
|
46 |
|
|
47 |
public List<Datasource> getDatasources() { |
|
48 |
return datasources; |
|
49 |
} |
|
50 |
|
|
51 |
public void setDatasources(List<Datasource> datasources) { |
|
52 |
this.datasources = datasources; |
|
53 |
} |
|
54 |
|
|
55 |
public List<ZenodoCommunity> getZenodoCommunities() { |
|
56 |
return zenodoCommunities; |
|
57 |
} |
|
58 |
|
|
59 |
public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) { |
|
60 |
this.zenodoCommunities = zenodoCommunities; |
|
61 |
} |
|
62 |
|
|
63 |
} |
modules/dnet-openaireplus-mapping-utils/branches/solr7/src/main/java/eu/dnetlib/data/bulktag/CommunityConfigurationFactory.java | ||
---|---|---|
1 |
package eu.dnetlib.data.bulktag; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import com.google.common.collect.Maps; |
|
5 |
import com.google.gson.Gson; |
|
6 |
import org.apache.commons.lang3.StringUtils; |
|
7 |
import org.apache.commons.logging.Log; |
|
8 |
import org.apache.commons.logging.LogFactory; |
|
9 |
import org.dom4j.Document; |
|
10 |
import org.dom4j.DocumentException; |
|
11 |
import org.dom4j.Node; |
|
12 |
import org.dom4j.io.SAXReader; |
|
13 |
|
|
14 |
import java.io.StringReader; |
|
15 |
import java.util.ArrayList; |
|
16 |
import java.util.List; |
|
17 |
import java.util.Map; |
|
18 |
|
|
19 |
/** |
|
20 |
* Created by miriam on 03/08/2018. |
|
21 |
*/ |
|
22 |
public class CommunityConfigurationFactory { |
|
23 |
|
|
24 |
private static final Log log = LogFactory.getLog(CommunityConfigurationFactory.class); |
|
25 |
|
|
26 |
public static CommunityConfiguration newInstance(final String xml) throws DocumentException { |
|
27 |
|
|
28 |
log.debug(String.format("parsing community configuration from:\n%s", xml)); |
|
29 |
|
|
30 |
final Document doc = new SAXReader().read(new StringReader(xml)); |
|
31 |
|
|
32 |
final Map<String,Community> communities = Maps.newHashMap(); |
|
33 |
|
|
34 |
for(final Object o : doc.selectNodes("//community")) { |
|
35 |
|
|
36 |
final Node node = (Node) o; |
|
37 |
|
|
38 |
final Community community = parseCommunity(node); |
|
39 |
|
|
40 |
if (community.isValid()) { |
|
41 |
communities.put(community.getId(), community); |
|
42 |
} |
|
43 |
} |
|
44 |
|
|
45 |
log.info(String.format("loaded %s community configuration profiles", communities.size())); |
|
46 |
log.debug(String.format("loaded community configuration:\n%s", communities.toString())); |
|
47 |
|
|
48 |
return new CommunityConfiguration(communities); |
|
49 |
} |
|
50 |
|
|
51 |
public static CommunityConfiguration fromJson(final String json) { |
|
52 |
return new Gson().fromJson(json, CommunityConfiguration.class); |
|
53 |
} |
|
54 |
|
|
55 |
private static Community parseCommunity(final Node node) { |
|
56 |
|
|
57 |
final Community c = new Community(); |
|
58 |
|
|
59 |
c.setId(node.valueOf("./@id")); |
|
60 |
|
|
61 |
log.info(String.format("community id: %s", c.getId())); |
|
62 |
|
|
63 |
c.setSubjects(parseSubjects(node)); |
|
64 |
c.setDatasources(parseDatasources(node)); |
|
65 |
c.setZenodoCommunities(parseZenodoCommunities(node)); |
|
66 |
|
|
67 |
return c; |
|
68 |
} |
|
69 |
|
|
70 |
private static List<String> parseSubjects(final Node node) { |
|
71 |
|
|
72 |
final List<String> subjects = Lists.newArrayList(); |
|
73 |
|
|
74 |
final List <Node> list = node.selectNodes("./subjects/subject"); |
|
75 |
|
|
76 |
for(Node n : list){ |
|
77 |
log.debug("text of the node " + n.getText()); |
|
78 |
subjects.add(StringUtils.trim(n.getText())); |
|
79 |
} |
|
80 |
log.info("size of the subject list " + subjects.size()); |
|
81 |
return subjects; |
|
82 |
} |
|
83 |
|
|
84 |
private static List<Datasource> parseDatasources(final Node node) { |
|
85 |
final List <Node> list = node.selectNodes("./datasources/datasource"); |
|
86 |
final List<Datasource> datasourceList = new ArrayList<>(); |
|
87 |
for(Node n : list){ |
|
88 |
Datasource d = new Datasource(); |
|
89 |
d.setOpenaireId(n.selectSingleNode("./openaireId").getText()); |
|
90 |
d.setSelCriteria(new SelectionCriteria(n.selectSingleNode("./selcriteria"))); |
|
91 |
datasourceList.add(d); |
|
92 |
} |
|
93 |
log.info("size of the datasource list " + datasourceList.size()); |
|
94 |
return datasourceList; |
|
95 |
} |
|
96 |
|
|
97 |
private static List<ZenodoCommunity> parseZenodoCommunities(final Node node) { |
|
98 |
|
|
99 |
final List<Node> list = node.selectNodes("./zenodocommunities/zenodocommunity"); |
|
100 |
final List<ZenodoCommunity> zenodoCommunityList = new ArrayList<>(); |
|
101 |
for(Node n : list){ |
|
102 |
ZenodoCommunity zc = new ZenodoCommunity(); |
|
103 |
zc.setZenodoCommunityId(n.selectSingleNode("./zenodoid").getText()); |
|
104 |
zc.setSelCriteria(new SelectionCriteria(n.selectSingleNode("./selcriteria"))); |
|
105 |
|
|
106 |
zenodoCommunityList.add(zc); |
|
107 |
} |
|
108 |
log.info("size of the zenodo community list " + zenodoCommunityList.size()); |
|
109 |
return zenodoCommunityList; |
|
110 |
} |
|
111 |
|
|
112 |
|
|
113 |
|
|
114 |
|
|
115 |
} |
modules/dnet-openaireplus-mapping-utils/branches/solr7/src/main/java/eu/dnetlib/data/bulktag/SelectionCriteria.java | ||
---|---|---|
1 |
package eu.dnetlib.data.bulktag; |
|
2 |
|
|
3 |
import org.dom4j.Node; |
|
4 |
|
|
5 |
/** |
|
6 |
* Created by miriam on 02/08/2018. |
|
7 |
*/ |
|
8 |
public class SelectionCriteria { |
|
9 |
|
|
10 |
public SelectionCriteria(Node n){ |
|
11 |
|
|
12 |
} |
|
13 |
|
|
14 |
} |
modules/dnet-openaireplus-mapping-utils/branches/solr7/src/main/java/eu/dnetlib/data/bulktag/Datasource.java | ||
---|---|---|
1 |
package eu.dnetlib.data.bulktag; |
|
2 |
|
|
3 |
|
|
4 |
import org.dom4j.Node; |
|
5 |
|
|
6 |
/** |
|
7 |
* Created by miriam on 01/08/2018. |
|
8 |
*/ |
|
9 |
public class Datasource { |
|
10 |
|
|
11 |
private String openaireId; |
|
12 |
|
|
13 |
private SelectionCriteria selCriteria; |
|
14 |
|
|
15 |
public String getOpenaireId() { |
|
16 |
return openaireId; |
|
17 |
} |
|
18 |
|
|
19 |
public void setOpenaireId(String openaireId) { |
|
20 |
this.openaireId = openaireId; |
|
21 |
} |
|
22 |
|
|
23 |
public SelectionCriteria getSelCriteria() { |
|
24 |
return selCriteria; |
|
25 |
} |
|
26 |
|
|
27 |
public void setSelCriteria(SelectionCriteria selCriteria) { |
|
28 |
this.selCriteria = selCriteria; |
|
29 |
} |
|
30 |
|
|
31 |
} |
modules/dnet-openaireplus-mapping-utils/branches/solr7/src/main/java/eu/dnetlib/data/bulktag/CommunityConfiguration.java | ||
---|---|---|
1 |
package eu.dnetlib.data.bulktag; |
|
2 |
|
|
3 |
import com.google.common.collect.Lists; |
|
4 |
import com.google.gson.Gson; |
|
5 |
import com.sun.tools.javac.util.Pair; |
|
6 |
import org.apache.commons.logging.Log; |
|
7 |
import org.apache.commons.logging.LogFactory; |
|
8 |
|
|
9 |
import java.util.ArrayList; |
|
10 |
import java.util.HashMap; |
|
11 |
import java.util.List; |
|
12 |
import java.util.Map; |
|
13 |
import java.util.stream.Collectors; |
|
14 |
|
|
15 |
/** |
|
16 |
* Created by miriam on 02/08/2018. |
|
17 |
*/ |
|
18 |
public class CommunityConfiguration { |
|
19 |
|
|
20 |
private static final Log log = LogFactory.getLog(CommunityConfiguration.class); |
|
21 |
|
|
22 |
enum MapModes{ |
|
23 |
SUBJECT_MAP, |
|
24 |
DATASOURCE_MAP, |
|
25 |
ZENODO_COMMUNITY_MAP |
|
26 |
} |
|
27 |
|
|
28 |
private Map<String,Community> communities; |
|
29 |
|
|
30 |
//map subject -> communityid |
|
31 |
private transient final Map<String,List<Pair<String,SelectionCriteria>>> subjectMap = new HashMap<>(); |
|
32 |
//map datasourceid -> communityid |
|
33 |
private transient final Map<String,List<Pair<String,SelectionCriteria>>> datasourceMap = new HashMap<>(); |
|
34 |
//map zenodocommunityid -> communityid |
|
35 |
private transient final Map<String,List<Pair<String,SelectionCriteria>>> zenodocommunityMap = new HashMap<>(); |
|
36 |
|
|
37 |
public CommunityConfiguration(final Map<String, Community> communities) { |
|
38 |
this.communities = communities; |
|
39 |
init(); |
|
40 |
} |
|
41 |
|
|
42 |
private void init() { |
|
43 |
for(Community c: getCommunityList()){ |
|
44 |
//get subjects |
|
45 |
final String id = c.getId(); |
|
46 |
for(String sbj : c.getSubjects()){ |
|
47 |
Pair<String,SelectionCriteria> p = new Pair<>(id,new SelectionCriteria(null)); |
|
48 |
add(sbj.toLowerCase(),p,subjectMap); |
|
49 |
} |
|
50 |
//get datasources |
|
51 |
for(Datasource d: c.getDatasources()){ |
|
52 |
add(d.getOpenaireId(),new Pair<>(id,d.getSelCriteria()),datasourceMap); |
|
53 |
} |
|
54 |
//get zenodo communities |
|
55 |
for(ZenodoCommunity zc : c.getZenodoCommunities()){ |
|
56 |
add(zc.getZenodoCommunityId(),new Pair<>(id,zc.getSelCriteria()),zenodocommunityMap); |
|
57 |
} |
|
58 |
} |
|
59 |
} |
|
60 |
|
|
61 |
private void add(String key,Pair<String,SelectionCriteria> value, Map<String,List<Pair<String,SelectionCriteria>>> map){ |
|
62 |
List<Pair<String,SelectionCriteria>> values = map.get(key); |
|
63 |
|
|
64 |
if (values == null){ |
|
65 |
values = new ArrayList<>(); |
|
66 |
map.put(key,values); |
|
67 |
} |
|
68 |
values.add(value); |
|
69 |
} |
|
70 |
|
|
71 |
public List<Pair<String,SelectionCriteria>> getCommunityForSubject(String sbj){ |
|
72 |
return subjectMap.get(sbj); |
|
73 |
} |
|
74 |
|
|
75 |
public List<Pair<String,SelectionCriteria>> getCommunityForDatasource(String dts){ |
|
76 |
return datasourceMap.get(dts); |
|
77 |
} |
|
78 |
|
|
79 |
public List<Pair<String,SelectionCriteria>> getCommunityForZenodoCommunity(String zc){ |
|
80 |
return zenodocommunityMap.get(zc); |
|
81 |
} |
|
82 |
|
|
83 |
public List<String> getCommunityForSubjectValue(String value) { |
|
84 |
try { |
|
85 |
return subjectMap.get(value.toLowerCase()).stream().map(p -> p.fst).collect(Collectors.toList()); |
|
86 |
}catch(Exception e){ |
|
87 |
return new ArrayList<>(); |
|
88 |
} |
|
89 |
} |
|
90 |
|
|
91 |
public List<String> getCommunityForDatasourceValue(String value) { |
|
92 |
try { |
|
93 |
return datasourceMap.get(value.toLowerCase()).stream().map(p -> p.fst).collect(Collectors.toList()); |
|
94 |
}catch(Exception e){ |
|
95 |
return new ArrayList<>(); |
|
96 |
} |
|
97 |
} |
|
98 |
|
|
99 |
public List<String> getCommunityForZenodoCommunityValue(String value){ |
|
100 |
try { |
|
101 |
return zenodocommunityMap.get(value.toLowerCase()).stream().map(p -> p.fst).collect(Collectors.toList()); |
|
102 |
}catch(Exception e){ |
|
103 |
return new ArrayList<>(); |
|
104 |
|
|
105 |
} |
|
106 |
} |
|
107 |
|
|
108 |
/* |
|
109 |
public SelectionCriteria getSelCriteria(String value, String community, MapModes map_mode){ |
|
110 |
|
|
111 |
Map<String,List<Pair<String,SelectionCriteria>>> map = null; |
|
112 |
if(map_mode == MapModes.DATASOURCE_MAP) |
|
113 |
map = datasourceMap; |
|
114 |
else |
|
115 |
if(map_mode == MapModes.ZENODO_COMMUNITY_MAP) |
|
116 |
map = zenodocommunityMap; |
|
117 |
else |
|
118 |
new Throwable("Impossible to have Selection Criteria over subjects"); |
|
119 |
|
|
120 |
List<Pair<String, SelectionCriteria>> lst = map.get(value); |
|
121 |
List<SelectionCriteria> selectionList = lst.stream().map(p -> { |
|
122 |
if (p.fst == community) |
|
123 |
return p.snd; |
|
124 |
return null; |
|
125 |
}).collect(Collectors.toList());//for each community there will be only one Selection Criteria per datasource or zenodo community |
|
126 |
if(selectionList != null) |
|
127 |
if (selectionList.size()>0) |
|
128 |
return selectionList.get(0); |
|
129 |
return null; |
|
130 |
} |
|
131 |
*/ |
|
132 |
|
|
133 |
public Map<String, Community> getCommunities() { |
|
134 |
return communities; |
|
135 |
} |
|
136 |
|
|
137 |
public void setCommunities(Map<String, Community> communities) { |
|
138 |
this.communities = communities; |
|
139 |
} |
|
140 |
|
|
141 |
public String toJson() { |
|
142 |
final Gson g = new Gson(); |
|
143 |
return g.toJson(this); |
|
144 |
} |
|
145 |
|
|
146 |
public int size() { |
|
147 |
return communities.keySet().size(); |
|
148 |
} |
|
149 |
|
|
150 |
public Community getCommunityById(String id){ |
|
151 |
return communities.get(id); |
|
152 |
} |
|
153 |
|
|
154 |
public List<Community> getCommunityList() { |
|
155 |
return Lists.newLinkedList(communities.values()); |
|
156 |
} |
|
157 |
} |
modules/dnet-openaireplus-workflows/branches/solr7/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/bulktag/ZenodoCommunity.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.bulktag; |
|
2 |
|
|
3 |
import com.google.gson.Gson; |
|
4 |
|
|
5 |
/** |
|
6 |
* Created by miriam on 01/08/2018. |
|
7 |
*/ |
|
8 |
public class ZenodoCommunity { |
|
9 |
|
|
10 |
private String zenodoCommunityId; |
|
11 |
private String selCriteria; |
|
12 |
|
|
13 |
public String getZenodoCommunityId() { |
|
14 |
return zenodoCommunityId; |
|
15 |
} |
|
16 |
|
|
17 |
public void setZenodoCommunityId(String zenodoCommunityId) { |
|
18 |
this.zenodoCommunityId = zenodoCommunityId; |
|
19 |
} |
|
20 |
|
|
21 |
public String getSelCriteria() { |
|
22 |
return selCriteria; |
|
23 |
} |
|
24 |
|
|
25 |
public void setSelCriteria(String selCriteria) { |
|
26 |
this.selCriteria = selCriteria; |
|
27 |
} |
|
28 |
|
|
29 |
public String getZenodoCommunities(){ |
|
30 |
final Gson g = new Gson(); |
|
31 |
return g.toJson(this); |
|
32 |
} |
|
33 |
} |
modules/dnet-openaireplus-workflows/branches/solr7/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/bulktag/Community.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.bulktag; |
|
2 |
|
|
3 |
import java.util.ArrayList; |
|
4 |
import java.util.List; |
|
5 |
import com.google.gson.Gson; |
|
6 |
|
|
7 |
/** |
|
8 |
* Created by miriam on 01/08/2018. |
|
9 |
*/ |
|
10 |
public class Community { |
|
11 |
|
|
12 |
private String id; |
|
13 |
private List<String> subjects; |
|
14 |
private List<Datasource> datasources; |
|
15 |
private List<ZenodoCommunity> zenodoCommunities; |
|
16 |
|
|
17 |
public String getId() { |
|
18 |
return id; |
|
19 |
} |
|
20 |
|
|
21 |
public void setId(String id) { |
|
22 |
this.id = id; |
|
23 |
} |
|
24 |
|
|
25 |
|
|
26 |
public List<String> getSubjects() { |
|
27 |
return subjects; |
|
28 |
} |
|
29 |
|
|
30 |
public void setSubjects(List<String> subjects) { |
|
31 |
this.subjects = subjects; |
|
32 |
} |
|
33 |
|
|
34 |
public List<Datasource> getDatasources() { |
|
35 |
return datasources; |
|
36 |
} |
|
37 |
|
|
38 |
public void setDatasources(List<Datasource> datasources) { |
|
39 |
this.datasources = datasources; |
|
40 |
} |
|
41 |
|
|
42 |
public List<ZenodoCommunity> getZenodoCommunities() { |
|
43 |
return zenodoCommunities; |
|
44 |
} |
|
45 |
|
|
46 |
public void setZenodoCommunities(List<ZenodoCommunity> zenodoCommunities) { |
|
47 |
this.zenodoCommunities = zenodoCommunities; |
|
48 |
} |
|
49 |
|
|
50 |
public String getCommunityConf(){ |
|
51 |
final Gson g = new Gson(); |
|
52 |
return g.toJson(this); |
|
53 |
} |
|
54 |
|
|
55 |
} |
modules/dnet-openaireplus-workflows/branches/solr7/src/main/java/eu/dnetlib/msro/openaireplus/workflows/nodes/bulktag/Datasource.java | ||
---|---|---|
1 |
package eu.dnetlib.msro.openaireplus.workflows.nodes.bulktag; |
|
2 |
|
|
3 |
import com.google.gson.Gson; |
|
4 |
|
|
5 |
/** |
|
6 |
* Created by miriam on 01/08/2018. |
|
7 |
*/ |
|
8 |
public class Datasource { |
|
9 |
|
|
10 |
private String openaireId; |
|
11 |
private String selCriteria; |
|
12 |
|
|
13 |
public String getOpenaireId() { |
|
14 |
return openaireId; |
|
15 |
} |
|
16 |
|
|
17 |
public void setOpenaireId(String openaireId) { |
|
18 |
this.openaireId = openaireId; |
|
19 |
} |
|
20 |
|
|
21 |
public String getSelCriteria() { |
|
22 |
return selCriteria; |
|
23 |
} |
|
24 |
|
|
25 |
public void setSelCriteria(String selCriteria) { |
|
26 |
this.selCriteria = selCriteria; |
|
27 |
} |
|
28 |
|
|
29 |
public String getDatasource(){ |
|
30 |
final Gson g= new Gson(); |
|
31 |
|
|
32 |
return g.toJson(this); |
|
33 |
} |
|
34 |
|
|
35 |
} |
Also available in: Unified diff
code bulktag refactoring