Revision 30986
Added by Marek Horst over 9 years ago
ResolvedCitation.java | ||
---|---|---|
1 | 1 |
package eu.dnetlib.iis.ingest.pmc.citations; |
2 | 2 |
|
3 |
import eu.dnetlib.data.proto.TypeProtos.Type; |
|
4 |
import eu.dnetlib.data.transform.xml.AbstractDNetOafXsltFunctions; |
|
3 |
import java.util.ArrayList; |
|
4 |
import java.util.HashMap; |
|
5 |
import java.util.List; |
|
6 |
import java.util.Map; |
|
5 | 7 |
|
6 | 8 |
import org.apache.commons.io.IOUtils; |
7 | 9 |
import org.apache.commons.lang.StringUtils; |
... | ... | |
12 | 14 |
import pl.edu.icm.ceon.scala_commons.xml.XPathEvaluator; |
13 | 15 |
import scala.collection.JavaConversions; |
14 | 16 |
|
15 |
import java.util.ArrayList; |
|
16 |
import java.util.HashMap; |
|
17 |
import java.util.List; |
|
18 |
import java.util.Map; |
|
19 |
|
|
20 | 17 |
/** |
21 | 18 |
* A model of resolved citation from PMC. |
22 | 19 |
* |
23 | 20 |
* @author Mateusz Fedoryszak (m.fedoryszak@icm.edu.pl) |
24 | 21 |
*/ |
25 | 22 |
public class ResolvedCitation { |
26 |
private final String sourcePmid;
|
|
23 |
private final String sourceOaid;
|
|
27 | 24 |
private final int position; |
28 | 25 |
private final String rawText; |
29 | 26 |
private final Map<String, String> targetIds; |
30 | 27 |
|
31 |
private static final String PUBMED_NS_PREFIX = "od_______908"; |
|
32 |
private static final String PUBMED_ID_PREFIX = "oai:europepmc.org:"; |
|
33 | 28 |
private static final String PMID_KEY = "pmid"; |
29 |
private static final String DOI_KEY = "doi"; |
|
34 | 30 |
|
35 |
public ResolvedCitation(final String sourcePmid, final int position, final String rawText, final Map<String, String> targetIds) {
|
|
36 |
this.sourcePmid = sourcePmid;
|
|
31 |
public ResolvedCitation(final String sourceOaid, final int position, final String rawText, final Map<String, String> targetIds) {
|
|
32 |
this.sourceOaid = sourceOaid;
|
|
37 | 33 |
this.position = position; |
38 | 34 |
this.rawText = rawText; |
39 | 35 |
this.targetIds = targetIds; |
40 | 36 |
} |
41 | 37 |
|
42 | 38 |
/** |
43 |
* @return Source (citing) document PubMedID |
|
44 |
*/ |
|
45 |
public String getSourcePmid() { |
|
46 |
return sourcePmid; |
|
47 |
} |
|
48 |
|
|
49 |
/** |
|
50 | 39 |
* @return Source (citing) document OpenAIRE ID (before dedup) |
51 | 40 |
*/ |
52 | 41 |
public String getSourceOaid() { |
53 |
return pmidToOaid(sourcePmid);
|
|
42 |
return sourceOaid;
|
|
54 | 43 |
} |
55 | 44 |
|
56 | 45 |
public int getPosition() { |
... | ... | |
68 | 57 |
return targetIds.get(PMID_KEY); |
69 | 58 |
} |
70 | 59 |
|
60 |
|
|
71 | 61 |
/** |
72 |
* @return Target (cited) document OpenAIRE ID (before dedup)
|
|
62 |
* @return Target (cited) document DOI
|
|
73 | 63 |
*/ |
74 |
public String getTargetOaid() { |
|
75 |
final String pmid = getTargetPmid(); |
|
76 |
if (pmid != null) { |
|
77 |
return pmidToOaid(pmid); |
|
78 |
} else { |
|
79 |
return null; |
|
80 |
} |
|
64 |
public String getTargetDoi() { |
|
65 |
return targetIds.get(DOI_KEY); |
|
81 | 66 |
} |
82 |
|
|
67 |
|
|
83 | 68 |
public String getTargetIdsJson() { |
84 | 69 |
return new JSONObject(targetIds).toString(); |
85 | 70 |
} |
86 |
|
|
71 |
|
|
87 | 72 |
/** |
88 |
* Transforms PubMed ID into OpenAIRE ID (before dedup) |
|
89 |
*/ |
|
90 |
public static String pmidToOaid(final String pmid) { |
|
91 |
return AbstractDNetOafXsltFunctions.oafId( |
|
92 |
Type.result.name(), PUBMED_NS_PREFIX, PUBMED_ID_PREFIX + pmid); |
|
93 |
} |
|
94 |
|
|
95 |
/** |
|
96 | 73 |
* Parses NLM file and extracts resolved citations. |
97 | 74 |
*/ |
98 |
public static List<ResolvedCitation> extractFromNlm(final String text) { |
|
99 |
XPathEvaluator evaluator = XPathEvaluator.fromString(text); |
|
100 |
String id = evaluator.apply("/article/front/article-meta/article-id[@pub-id-type='pmid']"); |
|
75 |
public static List<ResolvedCitation> extractFromNlm(final String oaSourceId, final String text) { |
|
76 |
XPathEvaluator evaluator = XPathEvaluator.fromInputStream(IOUtils.toInputStream(text)); |
|
101 | 77 |
|
102 | 78 |
List<ResolvedCitation> result = new ArrayList<ResolvedCitation>(); |
103 | 79 |
int position = 1; |
... | ... | |
109 | 85 |
final String idValue = citId.getTextContent(); |
110 | 86 |
targetIds.put(idType, idValue); |
111 | 87 |
} |
112 |
result.add(new ResolvedCitation(id, position, rawTextGenerator(ref), targetIds));
|
|
88 |
result.add(new ResolvedCitation(oaSourceId, position, rawTextGenerator(ref), targetIds));
|
|
113 | 89 |
|
114 | 90 |
++position; |
115 | 91 |
} |
Also available in: Unified diff
#757 fixing pmid and doi matching, fixing sourceDocumentId and destinationDocumentId generation