Project

General

Profile

1
package eu.dnetlib.iis.importer.converter;
2

    
3
import java.io.IOException;
4
import java.util.ArrayList;
5
import java.util.Collection;
6
import java.util.HashSet;
7
import java.util.List;
8
import java.util.Set;
9

    
10
import org.apache.hadoop.hbase.client.Result;
11
import org.apache.log4j.Logger;
12
import org.json.simple.JSONObject;
13
import org.json.simple.parser.JSONParser;
14
import org.json.simple.parser.ParseException;
15

    
16
import eu.dnetlib.data.proto.FieldTypeProtos.StringField;
17
import eu.dnetlib.data.proto.OafProtos.Oaf;
18
import eu.dnetlib.iis.importer.input.approver.ResultApprover;
19
import eu.dnetlib.iis.importer.schemas.Project;
20

    
21
/**
22
 * HBase {@link Result} to avro {@link Project} converter.
23
 * @author mhorst
24
 *
25
 */
26
public class ProjectConverter extends AbstractAvroConverter<Project> {
27

    
28
	protected static final Logger log = Logger.getLogger(ProjectConverter.class);
29
	
30
	private static final String ELEM_FUNDING_TREE_PARENT = "parent";
31
	private static final String ELEM_FUNDING_TREE_NAME = "name";
32

    
33
	private static final Set<String> ACRONYM_SKIP_LOWERCASED_VALUES = new HashSet<String>();
34
	
35
	{
36
		ACRONYM_SKIP_LOWERCASED_VALUES.add("undefined");
37
		ACRONYM_SKIP_LOWERCASED_VALUES.add("unknown");
38
	}
39
	
40
	/**
41
	 * Default constructor.
42
	 * @param encoding
43
	 * @param resultApprover
44
	 */
45
	public ProjectConverter(String encoding,
46
			ResultApprover resultApprover) {
47
		super(encoding, resultApprover);
48
	}
49

    
50
	@Override
51
	public Project buildObject(Result source, Oaf resolvedOafObject) throws IOException {
52
		eu.dnetlib.data.proto.ProjectProtos.Project sourceProject = resolvedOafObject.getEntity()!=null?
53
				resolvedOafObject.getEntity().getProject():null;
54
		if (sourceProject==null) {
55
			log.error("skipping: no project object " +
56
					"for a row " + new String(source.getRow(), getEncoding()));
57
			return null;
58
		}
59
		if (resolvedOafObject.getEntity().getId()!=null && 
60
				!resolvedOafObject.getEntity().getId().isEmpty()) {
61
			Project.Builder builder = Project.newBuilder();
62
			builder.setId(resolvedOafObject.getEntity().getId());
63
			if (sourceProject.getMetadata()!=null) {
64
				if (isAcronymValid(sourceProject.getMetadata().getAcronym())) {
65
					builder.setProjectAcronym(sourceProject.getMetadata().getAcronym().getValue());
66
				}
67
				if (sourceProject.getMetadata().getCode()!=null &&
68
						sourceProject.getMetadata().getCode().getValue()!=null &&
69
						!sourceProject.getMetadata().getCode().getValue().isEmpty()) {
70
					builder.setProjectGrantId(sourceProject.getMetadata().getCode().getValue());
71
				}
72
				String extractedFundingClass = extractFundingClass(
73
						extractStringValues(sourceProject.getMetadata().getFundingtreeList()));
74
				if (extractedFundingClass!=null && !extractedFundingClass.isEmpty()) {
75
					builder.setFundingClass(extractedFundingClass);	
76
				}
77
			}
78
			return builder.build();	
79
		} else {
80
			log.warn("unable to extract grant number: " +
81
					"unsupported project id: " + resolvedOafObject.getEntity().getId());
82
			return null;
83
		}
84
	}
85

    
86
	/**
87
	 * Extracts string values from {@link StringField} list.
88
	 * @param source
89
	 * @return string values extracted from {@link StringField} list
90
	 */
91
	protected static List<String> extractStringValues(List<StringField> source) {
92
		if (source!=null) {
93
			List<String> results = new ArrayList<String>(source.size());
94
			for (StringField currentField : source) {
95
				results.add(currentField.getValue());
96
			}
97
			return results;
98
		} else {
99
			return null;
100
		}
101
	}
102
	
103
	/**
104
	 * Verifies whether acronym should be considered as valid.
105
	 * @param acronym
106
	 * @return true if valid, false otherwise
107
	 */
108
	public static boolean isAcronymValid(StringField acronym) {
109
		return acronym!=null && acronym.getValue()!=null && !acronym.getValue().isEmpty() && 
110
				!ACRONYM_SKIP_LOWERCASED_VALUES.contains(acronym.getValue().trim().toLowerCase());
111
	}
112
	
113
	/**
114
	 * Extracts funding class from funding tree.
115
	 * @param fundingTreeJson
116
	 * @return extracted funding class
117
	 * @throws IOException 
118
	 */
119
	public static String extractFundingClass(List<String> fundingTreeList) throws IOException {
120
		if (fundingTreeList!=null && !fundingTreeList.isEmpty()) {
121
			for (String currentFundingTreeJson : fundingTreeList) {
122
				if (currentFundingTreeJson!=null && !currentFundingTreeJson.isEmpty()) {
123
					try {
124
						JSONParser parser = new JSONParser();
125
						JSONObject topLevelParent = getTopLevelParent(
126
								(JSONObject) parser.parse(currentFundingTreeJson));
127
						if (topLevelParent!=null) {
128
							@SuppressWarnings("unchecked")
129
							Collection<JSONObject> topLevelParentValues = topLevelParent.values();
130
							for (JSONObject currentValue : topLevelParentValues) {
131
								Object currentName = currentValue.get(ELEM_FUNDING_TREE_NAME);
132
								if (currentName!=null) {
133
									return currentName.toString();
134
								}
135
							}
136
//							fallback
137
							return null;
138
						} else {
139
							return null;
140
						}
141
						
142
					} catch (ParseException e) {
143
						throw new IOException("unable to parse funding tree: " + 
144
					currentFundingTreeJson, e);
145
					}	
146
				}
147
			}		
148
//			fallback
149
			return null;
150
		} else {
151
			return null;
152
		}
153
	}
154
	
155
	private static JSONObject getTopLevelParent(JSONObject parent) {
156
		if (parent!=null) {
157
			@SuppressWarnings("unchecked")
158
			Collection<JSONObject> values = parent.values();
159
			for (JSONObject value : values) {
160
				JSONObject newParent = (JSONObject) value.get(ELEM_FUNDING_TREE_PARENT);
161
				if (newParent!=null && !newParent.isEmpty()) {
162
					return getTopLevelParent(newParent);
163
				}	
164
			}
165
//			fallback
166
			return parent;
167
		} else {
168
			return null;
169
		}
170
		
171
	}
172
	
173
}
(11-11/11)