Project

General

Profile

1
package eu.dnetlib.dli.resolver;
2

    
3
import java.util.List;
4

    
5
import com.ximpleware.AutoPilot;
6
import com.ximpleware.VTDGen;
7
import com.ximpleware.VTDNav;
8
import eu.dnetlib.data.transform.VtdUtilityParser;
9
import eu.dnetlib.dli.resolver.model.DLIResolvedObject;
10

    
11
import eu.dnetlib.pid.resolver.model.ObjectType;
12
import org.apache.commons.logging.Log;
13
import org.apache.commons.logging.LogFactory;
14

    
15
/**
16
 * Created by sandro on 9/26/16.
17
 */
18
public class NCBINParser {
19

    
20
	private static final Log log = LogFactory.getLog(NCBINParser.class);
21

    
22
    public DLIResolvedObject parseRecord(final String record) {
23
        try {
24
			if (record == null)
25
				return null;
26
            final DLIResolvedObject parsedObject = new DLIResolvedObject();
27
            final VTDGen vg = new VTDGen();
28
			vg.setDoc(record.getBytes());
29
			vg.parse(true);
30
			final VTDNav vn = vg.getNav();
31
			final AutoPilot ap = new AutoPilot(vn);
32

    
33
			parsedObject.setTitles(VtdUtilityParser.getTextValue(ap, vn, "//GBSeq_definition"));
34

    
35
			final List<String> descriptions = VtdUtilityParser.getTextValue(ap, vn, "//GBSeq_comment");
36
			if (descriptions != null && descriptions.size() > 0) {
37
				parsedObject.setDescription(descriptions.get(0));
38
			}
39

    
40
			parsedObject.setType(ObjectType.dataset);
41
			return parsedObject;
42
		} catch (Throwable e) {
43
			log.error(String.format("Error on parsing document %s", record), e);
44
			return null;
45
		}
46
	}
47

    
48
}
(9-9/15)