1
|
package eu.dnetlib.dli.resolver;
|
2
|
|
3
|
import java.util.List;
|
4
|
|
5
|
import com.ximpleware.AutoPilot;
|
6
|
import com.ximpleware.VTDGen;
|
7
|
import com.ximpleware.VTDNav;
|
8
|
import eu.dnetlib.data.transform.VtdUtilityParser;
|
9
|
import eu.dnetlib.dli.resolver.model.DLIResolvedObject;
|
10
|
|
11
|
import eu.dnetlib.pid.resolver.model.ObjectType;
|
12
|
import org.apache.commons.logging.Log;
|
13
|
import org.apache.commons.logging.LogFactory;
|
14
|
|
15
|
/**
|
16
|
* Created by sandro on 9/26/16.
|
17
|
*/
|
18
|
public class NCBINParser {
|
19
|
|
20
|
private static final Log log = LogFactory.getLog(NCBINParser.class);
|
21
|
|
22
|
public DLIResolvedObject parseRecord(final String record) {
|
23
|
try {
|
24
|
if (record == null || record.contains("ERROR"))
|
25
|
return null;
|
26
|
|
27
|
final DLIResolvedObject parsedObject = new DLIResolvedObject();
|
28
|
final VTDGen vg = new VTDGen();
|
29
|
vg.setDoc(record.getBytes());
|
30
|
vg.parse(true);
|
31
|
final VTDNav vn = vg.getNav();
|
32
|
final AutoPilot ap = new AutoPilot(vn);
|
33
|
parsedObject.setTitles(VtdUtilityParser.getTextValue(ap, vn, "//Item[./@Name='Title']"));
|
34
|
List<String> dates = VtdUtilityParser.getTextValue(ap, vn, "//Item[./@Name='CreateDate']");
|
35
|
if(dates!=null && dates.size()>0)
|
36
|
parsedObject.setDate(dates.get(0));
|
37
|
|
38
|
parsedObject.setType(ObjectType.dataset);
|
39
|
return parsedObject;
|
40
|
} catch (Throwable e) {
|
41
|
log.error(String.format("Error on parsing document %s", record), e);
|
42
|
return null;
|
43
|
}
|
44
|
}
|
45
|
|
46
|
}
|