Project

General

Profile

« Previous | Next » 

Revision 30986

Added by Marek Horst over 9 years ago

#757 fixing pmid and doi matching, fixing sourceDocumentId and destinationDocumentId generation

View differences:

ResolvedCitation.java
1 1
package eu.dnetlib.iis.ingest.pmc.citations;
2 2

  
3
import eu.dnetlib.data.proto.TypeProtos.Type;
4
import eu.dnetlib.data.transform.xml.AbstractDNetOafXsltFunctions;
3
import java.util.ArrayList;
4
import java.util.HashMap;
5
import java.util.List;
6
import java.util.Map;
5 7

  
6 8
import org.apache.commons.io.IOUtils;
7 9
import org.apache.commons.lang.StringUtils;
......
12 14
import pl.edu.icm.ceon.scala_commons.xml.XPathEvaluator;
13 15
import scala.collection.JavaConversions;
14 16

  
15
import java.util.ArrayList;
16
import java.util.HashMap;
17
import java.util.List;
18
import java.util.Map;
19

  
20 17
/**
21 18
 * A model of resolved citation from PMC.
22 19
 *
23 20
 * @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl)
24 21
 */
25 22
public class ResolvedCitation {
26
    private final String sourcePmid;
23
    private final String sourceOaid;
27 24
    private final int position;
28 25
    private final String rawText;
29 26
    private final Map<String, String> targetIds;
30 27

  
31
    private static final String PUBMED_NS_PREFIX = "od_______908";
32
    private static final String PUBMED_ID_PREFIX = "oai:europepmc.org:";
33 28
    private static final String PMID_KEY = "pmid";
29
    private static final String DOI_KEY = "doi";
34 30

  
35
    public ResolvedCitation(final String sourcePmid, final int position, final String rawText, final Map<String, String> targetIds) {
36
        this.sourcePmid = sourcePmid;
31
    public ResolvedCitation(final String sourceOaid, final int position, final String rawText, final Map<String, String> targetIds) {
32
        this.sourceOaid = sourceOaid;
37 33
        this.position = position;
38 34
        this.rawText = rawText;
39 35
        this.targetIds = targetIds;
40 36
    }
41 37

  
42 38
    /**
43
     * @return Source (citing) document PubMedID
44
     */
45
    public String getSourcePmid() {
46
        return sourcePmid;
47
    }
48

  
49
    /**
50 39
     * @return Source (citing) document OpenAIRE ID (before dedup)
51 40
     */
52 41
    public String getSourceOaid() {
53
        return pmidToOaid(sourcePmid);
42
        return sourceOaid;
54 43
    }
55 44

  
56 45
    public int getPosition() {
......
68 57
        return targetIds.get(PMID_KEY);
69 58
    }
70 59

  
60

  
71 61
    /**
72
     * @return Target (cited) document OpenAIRE ID (before dedup)
62
     * @return Target (cited) document DOI
73 63
     */
74
    public String getTargetOaid() {
75
        final String pmid = getTargetPmid();
76
        if (pmid != null) {
77
            return pmidToOaid(pmid);
78
        } else {
79
            return null;
80
        }
64
    public String getTargetDoi() {
65
        return targetIds.get(DOI_KEY);
81 66
    }
82

  
67
    
83 68
    public String getTargetIdsJson() {
84 69
        return new JSONObject(targetIds).toString();
85 70
    }
86

  
71
    
87 72
    /**
88
     * Transforms PubMed ID into OpenAIRE ID (before dedup)
89
     */
90
    public static String pmidToOaid(final String pmid) {
91
        return AbstractDNetOafXsltFunctions.oafId(
92
        		Type.result.name(), PUBMED_NS_PREFIX, PUBMED_ID_PREFIX + pmid);
93
    }
94

  
95
    /**
96 73
     * Parses NLM file and extracts resolved citations.
97 74
     */
98
    public static List<ResolvedCitation> extractFromNlm(final String text) {
99
        XPathEvaluator evaluator = XPathEvaluator.fromString(text);
100
        String id = evaluator.apply("/article/front/article-meta/article-id[@pub-id-type='pmid']");
75
    public static List<ResolvedCitation> extractFromNlm(final String oaSourceId, final String text) {
76
        XPathEvaluator evaluator = XPathEvaluator.fromInputStream(IOUtils.toInputStream(text));
101 77

  
102 78
        List<ResolvedCitation> result = new ArrayList<ResolvedCitation>();
103 79
        int position = 1;
......
109 85
                final String idValue = citId.getTextContent();
110 86
                targetIds.put(idType, idValue);
111 87
            }
112
            result.add(new ResolvedCitation(id, position, rawTextGenerator(ref), targetIds));
88
            result.add(new ResolvedCitation(oaSourceId, position, rawTextGenerator(ref), targetIds));
113 89

  
114 90
            ++position;
115 91
        }

Also available in: Unified diff