Project

General

Profile

1
package eu.dnetlib.iis.documentssimilarity.producer;
2

    
3
import java.io.IOException;
4
import java.util.HashMap;
5
import java.util.List;
6
import java.util.Map;
7
import java.util.Random;
8

    
9
import org.apache.hadoop.conf.Configuration;
10
import org.apache.hadoop.fs.FileSystem;
11
import org.apache.hadoop.fs.Path;
12

    
13
import com.google.common.collect.Lists;
14

    
15
import de.svenjacobs.loremipsum.LoremIpsum;
16
import eu.dnetlib.iis.core.java.PortBindings;
17
import eu.dnetlib.iis.core.java.Process;
18
import eu.dnetlib.iis.core.java.io.DataStore;
19
import eu.dnetlib.iis.core.java.io.FileSystemPath;
20
import eu.dnetlib.iis.core.java.porttype.AvroPortType;
21
import eu.dnetlib.iis.core.java.porttype.PortType;
22
import eu.dnetlib.iis.documentssimilarity.schemas.DocumentMetadata;
23
import eu.dnetlib.iis.importer.schemas.Person;
24

    
25
/**
26
 * Produce data stores
27
 *
28
 * @author Mateusz Fedoryszak
29
 */
30
public class DocumentAvroDatastoreProducer implements Process {
31

    
32
    private final static String documentPort = "document";
33

    
34
    public Map<String, PortType> getInputPorts() {
35
        return new HashMap<String, PortType>();
36
    }
37

    
38
    @Override
39
    public Map<String, PortType> getOutputPorts() {
40
        return createOutputPorts();
41
    }
42

    
43
    private static Map<String, PortType> createOutputPorts() {
44
        HashMap<String, PortType> outputPorts =
45
                new HashMap<String, PortType>();
46
        outputPorts.put(documentPort,
47
                new AvroPortType(DocumentMetadata.SCHEMA$));
48
        return outputPorts;
49
    }
50
    
51
    public static Person createPerson(String id, String name) {
52
        return new Person(id, null, null, name);
53
    }
54

    
55
    public static List<DocumentMetadata> getDocumentMetadataList() {
56
        DocumentMetadata doc1 = new DocumentMetadata();
57
        doc1.setId("1");
58
        doc1.setTitle("A new method of something");
59
        doc1.setAbstract$("We present a new method of doing something. We are not sure yet what " +
60
                        "it is actually doing, but it definitely is a worthwhile technique.");
61
        doc1.setKeywords(Lists.<CharSequence>newArrayList("method", "something", "nothing", "anything"));
62
        doc1.setAuthors(Lists.<Person>newArrayList(createPerson("1", "Jan Kowalski")));
63

    
64
        DocumentMetadata doc2 = new DocumentMetadata();
65
        doc2.setId("2");
66
        doc2.setTitle("How to do it?");
67
        doc2.setAbstract$("We are asking some of fundamental engineering questions here. As all " +
68
                "kinds of fundamental questions, they probably have no answers.");
69
        doc2.setKeywords(Lists.<CharSequence>newArrayList(
70
                "doing things", "questioning", "falsificationism", "epistemology"));
71
        doc2.setAuthors(Lists.<Person>newArrayList(createPerson("1", "Jan Kowalski"), createPerson("2", "Zygmunt Nowak")));
72

    
73
        DocumentMetadata doc3 = new DocumentMetadata();
74
        doc3.setId("3");
75
        doc3.setTitle("Our great tool");
76
        doc3.setAbstract$("In this paper we present Our great tool that is capable of doing " +
77
                "anything. First theoretical studies have shown its great potential. Practical " +
78
                "applications are to be investigated in the future.");
79
        doc3.setKeywords(Lists.<CharSequence>newArrayList(
80
                "Our great tool", "perpetuum mobile", "stop problem", "P==NP?"));
81
        doc3.setAuthors(Lists.<Person>newArrayList(createPerson("2", "Zygmunt Nowak")));
82

    
83
        DocumentMetadata doc4 = new DocumentMetadata();
84
        doc4.setId("4");
85
        doc4.setTitle("Big and great system");
86
        doc4.setAbstract$("Worldwide amount of data is growing every year. That is why ever " +
87
                "bigger and greater systems needs to be built. In this paper we present our biggest " +
88
                "and greatest system so far.");
89
        doc4.setKeywords(Lists.<CharSequence>newArrayList(
90
                "big", "enormous", "great", "grand"));
91
        doc4.setAuthors(Lists.<Person>newArrayList(createPerson("2", "Zygmunt Nowak"), createPerson("1", "Jan Kowalski")));
92

    
93
        List<DocumentMetadata> results = Lists.newArrayList(doc1, doc2, doc3, doc4);
94
        
95
//      adding dummy records up to 10 in total
96
        LoremIpsum loremIpsum = new LoremIpsum();
97
        Random rand = new Random();
98
        for(int i = 5; i<=200; i++) {
99
        	DocumentMetadata doc = new DocumentMetadata();
100
            doc.setId(Integer.toString(i));
101
            doc.setTitle(loremIpsum.getWords(10, rand.nextInt(50)));
102
            doc.setAbstract$(loremIpsum.getWords(50, rand.nextInt(50)));
103
            results.add(doc);
104
        }
105

    
106
        return results;
107
    }
108

    
109
    @Override
110
    public void run(PortBindings portBindings, Configuration conf,
111
                    Map<String, String> parameters) throws IOException {
112
        Map<String, Path> output = portBindings.getOutput();
113
        FileSystem fs = FileSystem.get(conf);
114

    
115
        DataStore.create(getDocumentMetadataList(),
116
                new FileSystemPath(fs, output.get(documentPort)));
117
    }
118
    
119
    public static void main(String[] args) {
120
    	Random rand = new Random();
121
    	LoremIpsum loremIpsum = new LoremIpsum();
122
    	System.out.println(loremIpsum.getWords(100));
123
    	
124
	}
125
}
    (1-1/1)