Project

General

Profile

« Previous | Next » 

Revision 43901

added aggregation workflows

View differences:

modules/dnet-isti/trunk/src/main/resources/eu/dnetlib/efg/workflows/repo-hi/isti_aggregation_wf.xml.st
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value=""/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value=""/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>$name$</WORKFLOW_NAME>
12
        <WORKFLOW_DESCRIPTION>$desc$</WORKFLOW_DESCRIPTION>
13
        <WORKFLOW_INFO />
14
        <WORKFLOW_FAMILY>aggregator</WORKFLOW_FAMILY>
15
        <WORKFLOW_PRIORITY>$priority$</WORKFLOW_PRIORITY>
16
        <DATASOURCE id="$dsId$" interface="$interface$" />
17

  
18
        <CONFIGURATION status="WAIT_SYS_SETTINGS" start="MANUAL">
19
            <PARAMETERS>
20
				<PARAM name="transformationRuleId"  description="Transformation Rule Identifier"            required="true" managedBy="user"   category="TRANSFORMATION_RULE_ID" type="string" function="listProfiles('TransformationRuleDSResourceType', '//TITLE')"/>
21
            	<PARAM name="collMdstoreId"         description="Store for collected records"               required="true" managedBy="system" category="MDSTORE_ID"/>
22
            	<PARAM name="cleanMdstoreId"        description="Store for transformed records"             required="true" managedBy="system" category="MDSTORE_ID"/>
23
            </PARAMETERS>
24
            <WORKFLOW>
25
                <NODE isStart="true" name="collection" type="LaunchWorkflowTemplate">
26
                    <DESCRIPTION>Collect records</DESCRIPTION>
27
                    <PARAMETERS>
28
                    	<PARAM name="wfTemplateId" value="2c8e722e-c095-4e2c-a45d-a6ff75bf0cc9_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==" />
29
                    	<PARAM name="wfTemplateParams">
30
                    		<MAP>
31
                    			<ENTRY key="dsId"          value="$dsId$" />
32
 								<ENTRY key="interface"     value="$interface$" />
33
	 							<ENTRY key="collMdstoreId" ref="collMdstoreId" />
34
                    		</MAP>
35
                    	</PARAM>
36
                    </PARAMETERS>
37
                    <ARCS>
38
                        <ARC to="transform"/>
39
                    </ARCS>
40
                </NODE>
41

  
42
                <NODE name="transform" type="LaunchWorkflowTemplate">
43
                    <DESCRIPTION>Clean record</DESCRIPTION>
44
                    <PARAMETERS>
45
                        <PARAM name="wfTemplateId" value="dcd92f5b-9033-46c7-aa7c-2dd367be672b_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==" />
46
                        <PARAM name="wfTemplateParams">
47
                            <MAP>
48
                                <ENTRY key="dsId"            value="$dsId$" />
49
                                <ENTRY key="interface"       value="$interface$" />
50
                                <ENTRY key="collMdstoreId"   ref="collMdstoreId" />
51
                                <ENTRY key="cleanMdstoreId"  ref="cleanMdstoreId" />
52
                                <ENTRY key="transformRuleId" ref="transformationRuleId" />
53
                            </MAP>
54
                        </PARAM>
55
                    </PARAMETERS>
56
                    <ARCS>
57
                        <ARC to="success"/>
58
                    </ARCS>
59
                </NODE>
60
            </WORKFLOW>
61
            <DESTROY_WORKFLOW_TEMPLATE id="1de73b0e-ccbf-442c-a88d-41b27daa0ba2_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==">
62
            	<PARAMETERS>
63
	            	<PARAM name="dsId"                  value="$dsId$" />
64
					<PARAM name="interface"             value="$interface$" />
65
					<PARAM name="collMdstoreId"         ref="collMdstoreId" />
66
					<PARAM name="cleanMdstoreId"        ref="cleanMdstoreId" />
67
				</PARAMETERS>
68
            </DESTROY_WORKFLOW_TEMPLATE>
69
        </CONFIGURATION>
70

  
71
        <NOTIFICATIONS/>
72

  
73
        <SCHEDULING enabled="false">
74
            <CRON>9 9 9 ? * *</CRON>
75
            <MININTERVAL>10080</MININTERVAL>
76
        </SCHEDULING>
77
        <STATUS/>
78
    </BODY>
79
</RESOURCE_PROFILE>
modules/dnet-isti/trunk/src/main/java/eu/dnetlib/msro/workflows/nodes/ObtainISTIDataSourceParamsJobNode.java
1
package eu.dnetlib.msro.workflows.nodes;
2

  
3
import javax.annotation.Resource;
4

  
5
import eu.dnetlib.enabling.locators.UniqueServiceLocator;
6
import eu.dnetlib.miscutils.datetime.DateUtils;
7
import eu.dnetlib.msro.workflows.graph.Arc;
8
import eu.dnetlib.msro.workflows.procs.Env;
9
import eu.dnetlib.msro.workflows.util.WorkflowsConstants;
10
import eu.dnetlib.rmi.enabling.ISLookUpService;
11

  
12
/**
13
 * Created by sandro on 9/23/16.
14
 */
15
public class ObtainISTIDataSourceParamsJobNode extends SimpleJobNode {
16
	private String providerId;
17

  
18
	@Resource
19
	private UniqueServiceLocator serviceLocator;
20

  
21
	@Override
22
	protected String execute(final Env env) throws Exception {
23
		final String query = "let $x := /*[.//RESOURCE_IDENTIFIER/@value='" + this.providerId + "']//EXTRA_FIELDS\n"
24
				+ "return concat($x/FIELD[./key='OpenAireDataSourceId']/value, ' @@@ ', $x/FIELD[./key='NamespacePrefix']/value)";
25

  
26
		final String[] arr = this.serviceLocator.getService(ISLookUpService.class).getResourceProfileByQuery(query).split("@@@");
27

  
28
		final String origId = arr[0].trim();
29
		final String nsPrefix = arr[1].trim();
30
		// this is needed by the mdbuilder
31
		// TODO: update mdbuilder to use the env attributes below, whose names are defined in WorkflowConstants
32
		env.setAttribute("parentDatasourceId", origId);
33
		env.setAttribute("namespacePrefix", nsPrefix);
34
		env.setAttribute("dateOfCollection", DateUtils.now_ISO8601());
35

  
36
		// these are needed for validation and fill hostedby
37
		env.setAttribute(WorkflowsConstants.LOG_DATASOURCE_ID, origId);
38
		env.setAttribute(WorkflowsConstants.DATASOURCE_PREFIX, nsPrefix);
39

  
40
		return Arc.DEFAULT_ARC;
41

  
42
	}
43

  
44
	public String getProviderId() {
45
		return this.providerId;
46
	}
47

  
48
	public void setProviderId(final String providerId) {
49
		this.providerId = providerId;
50
	}
51

  
52

  
53
}
modules/dnet-isti/trunk/src/main/resources/eu/dnetlib/isti/workflows/repo-hi/isti_aggregation_wf.xml.st
1
<?xml version="1.0" encoding="UTF-8"?>
2
<RESOURCE_PROFILE>
3
    <HEADER>
4
        <RESOURCE_IDENTIFIER value=""/>
5
        <RESOURCE_TYPE value="WorkflowDSResourceType"/>
6
        <RESOURCE_KIND value="WorkflowDSResources"/>
7
        <RESOURCE_URI value=""/>
8
        <DATE_OF_CREATION value=""/>
9
    </HEADER>
10
    <BODY>
11
        <WORKFLOW_NAME>$name$</WORKFLOW_NAME>
12
        <WORKFLOW_DESCRIPTION>$desc$</WORKFLOW_DESCRIPTION>
13
        <WORKFLOW_INFO />
14
        <WORKFLOW_FAMILY>aggregator</WORKFLOW_FAMILY>
15
        <WORKFLOW_PRIORITY>$priority$</WORKFLOW_PRIORITY>
16
        <DATASOURCE id="$dsId$" interface="$interface$" />
17

  
18
        <CONFIGURATION status="WAIT_SYS_SETTINGS" start="MANUAL">
19
            <PARAMETERS>
20
				<PARAM name="transformationRuleId"  description="Transformation Rule Identifier"            required="true" managedBy="user"   category="TRANSFORMATION_RULE_ID" type="string" function="listProfiles('TransformationRuleDSResourceType', '//TITLE')"/>
21
            	<PARAM name="collMdstoreId"         description="Store for collected records"               required="true" managedBy="system" category="MDSTORE_ID"/>
22
            	<PARAM name="cleanMdstoreId"        description="Store for transformed records"             required="true" managedBy="system" category="MDSTORE_ID"/>
23
            </PARAMETERS>
24
            <WORKFLOW>
25
                <NODE isStart="true" name="collection" type="LaunchWorkflowTemplate">
26
                    <DESCRIPTION>Collect records</DESCRIPTION>
27
                    <PARAMETERS>
28
                    	<PARAM name="wfTemplateId" value="2c8e722e-c095-4e2c-a45d-a6ff75bf0cc9_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==" />
29
                    	<PARAM name="wfTemplateParams">
30
                    		<MAP>
31
                    			<ENTRY key="dsId"          value="$dsId$" />
32
 								<ENTRY key="interface"     value="$interface$" />
33
	 							<ENTRY key="collMdstoreId" ref="collMdstoreId" />
34
                    		</MAP>
35
                    	</PARAM>
36
                    </PARAMETERS>
37
                    <ARCS>
38
                        <ARC to="transform"/>
39
                    </ARCS>
40
                </NODE>
41

  
42
                <NODE name="transform" type="LaunchWorkflowTemplate">
43
                    <DESCRIPTION>Clean record</DESCRIPTION>
44
                    <PARAMETERS>
45
                        <PARAM name="wfTemplateId" value="dcd92f5b-9033-46c7-aa7c-2dd367be672b_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==" />
46
                        <PARAM name="wfTemplateParams">
47
                            <MAP>
48
                                <ENTRY key="dsId"            value="$dsId$" />
49
                                <ENTRY key="interface"       value="$interface$" />
50
                                <ENTRY key="collMdstoreId"   ref="collMdstoreId" />
51
                                <ENTRY key="cleanMdstoreId"  ref="cleanMdstoreId" />
52
                                <ENTRY key="transformRuleId" ref="transformationRuleId" />
53
                            </MAP>
54
                        </PARAM>
55
                    </PARAMETERS>
56
                    <ARCS>
57
                        <ARC to="success"/>
58
                    </ARCS>
59
                </NODE>
60
            </WORKFLOW>
61
            <DESTROY_WORKFLOW_TEMPLATE id="1de73b0e-ccbf-442c-a88d-41b27daa0ba2_V29ya2Zsb3dUZW1wbGF0ZURTUmVzb3VyY2VzL1dvcmtmbG93VGVtcGxhdGVEU1Jlc291cmNlVHlwZQ==">
62
            	<PARAMETERS>
63
	            	<PARAM name="dsId"                  value="$dsId$" />
64
					<PARAM name="interface"             value="$interface$" />
65
					<PARAM name="collMdstoreId"         ref="collMdstoreId" />
66
					<PARAM name="cleanMdstoreId"        ref="cleanMdstoreId" />
67
				</PARAMETERS>
68
            </DESTROY_WORKFLOW_TEMPLATE>
69
        </CONFIGURATION>
70

  
71
        <NOTIFICATIONS/>
72

  
73
        <SCHEDULING enabled="false">
74
            <CRON>9 9 9 ? * *</CRON>
75
            <MININTERVAL>10080</MININTERVAL>
76
        </SCHEDULING>
77
        <STATUS/>
78
    </BODY>
79
</RESOURCE_PROFILE>
modules/dnet-isti/trunk/src/main/resources/eu/dnetlib/isti/workflows/nodes/applicationContext-msro-isti.xml
1
<?xml version="1.0" encoding="UTF-8"?>
2
<beans xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
3
       xmlns:p="http://www.springframework.org/schema/p"
4
       xmlns="http://www.springframework.org/schema/beans"
5
       xsi:schemaLocation="http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd">
6

  
7
	<bean id="wfNodeObtainISTIDataSourceParams"
8
	      class="eu.dnetlib.msro.workflows.nodes.ObtainISTIDataSourceParamsJobNode"
9
	      scope="prototype" />
10

  
11
</beans>
modules/dnet-isti/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/RepositoryServiceResources/people.xml
1
<RESOURCE_PROFILE>
2
	<HEADER>
3
		<RESOURCE_IDENTIFIER value="8F08617D-970A-4CA7-8F93-C6C905585B2B_UmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZXMvUmVwb3NpdG9yeVNlcnZpY2VSZXNvdXJjZVR5cGU="/>
4
		<RESOURCE_TYPE value="RepositoryServiceResourceType"/>
5
		<RESOURCE_KIND value="RepositoryServiceResources"/>
6
		<RESOURCE_URI value=""/>
7
		<DATE_OF_CREATION value="2015-07-24T09:38:39+00:00"/>
8
		<PROTOCOL/>
9
	</HEADER>
10
	<BODY>
11
		<CONFIGURATION>
12
			<DATASOURCE_TYPE>pubsrepository::unknown</DATASOURCE_TYPE>
13
			<DATASOURCE_ORIGINAL_ID provenance="ISTI">people</DATASOURCE_ORIGINAL_ID>
14
			<DATASOURCE_AGGREGATED>false</DATASOURCE_AGGREGATED>
15
			<ENVIRONMENTS>
16
				<ENVIRONMENT>OPENAIRE</ENVIRONMENT>
17
			</ENVIRONMENTS>
18
			<TYPOLOGY>OCTOPUS</TYPOLOGY>
19
			<MAX_SIZE_OF_DATASTRUCTURE>0</MAX_SIZE_OF_DATASTRUCTURE>
20
			<AVAILABLE_DISKSPACE>0</AVAILABLE_DISKSPACE>
21
			<MAX_NUMBER_OF_DATASTRUCTURE>0</MAX_NUMBER_OF_DATASTRUCTURE>
22
			<OFFICIAL_NAME>People</OFFICIAL_NAME>
23
			<ENGLISH_NAME>People</ENGLISH_NAME>
24
			<ICON_URI/>
25
			<COUNTRY>IT</COUNTRY>
26
			<LOCATION>
27
				<LONGITUDE>10.42</LONGITUDE>
28
				<LATITUDE>43.71</LATITUDE>
29
				<TIMEZONE>0.0</TIMEZONE>
30
			</LOCATION>
31
			<REPOSITORY_WEBPAGE></REPOSITORY_WEBPAGE>
32
			<REPOSITORY_INSTITUTION>CNR </REPOSITORY_INSTITUTION>
33
			<ADMIN_INFO>stefania.biagioni@isti.cnr.it</ADMIN_INFO>
34
			<INTERFACES>
35
				<INTERFACE active="true" compliance="isti" contentDescription="metadata" id="api_________::people::0" label="pubsrepository::unknown (driver)" removable="false" typology="pubsrepository::unknown">
36
					<ACCESS_PROTOCOL format="oai_dc" set="CDS074">oai</ACCESS_PROTOCOL>
37
					<BASE_URL>http://webtemp.src.cnr.it/oai-pmh/oai2.php</BASE_URL>
38
					<INTERFACE_EXTRA_FIELD name="metadata_identifier_path">//*[local-name()='header']/*[local-name()='identifier']</INTERFACE_EXTRA_FIELD>
39
				</INTERFACE>
40
			</INTERFACES>
41
			<EXTRA_FIELDS>
42
				<FIELD>
43
					<key>OpenAireDataSourceId</key>
44
					<value>people</value>
45
				</FIELD>
46
				<FIELD>
47
					<key>NamespacePrefix</key>
48
					<value>people______</value>
49
				</FIELD>
50
				<FIELD>
51
					<key>VERIFIED</key>
52
					<value>NO</value>
53
				</FIELD>
54
				<FIELD>
55
					<key>aggregatorName</key>
56
					<value>OPENAIRE</value>
57
				</FIELD>
58
				<FIELD>
59
					<key>dateOfValidation</key>
60
					<value/>
61
				</FIELD>
62
				<FIELD>
63
					<key>dateOfCollection</key>
64
					<value>2013-05-07</value>
65
				</FIELD>
66
			</EXTRA_FIELDS>
67
			<REGISTERED_BY/>
68
		</CONFIGURATION>
69
		<STATUS>
70
			<NUMBER_OF_OBJECTS>0</NUMBER_OF_OBJECTS>
71
			<LAST_UPDATE value="2015-07-24T09:38:36Z"/>
72
		</STATUS>
73
		<QOS>
74
			<AVAILABILITY>0</AVAILABILITY>
75
			<CAPACITY/>
76
			<THROUGHPUT>0.0</THROUGHPUT>
77
		</QOS>
78
		<SECURITY_PARAMETERS/>
79
		<BLACKBOARD/>
80
	</BODY>
81
</RESOURCE_PROFILE>
modules/dnet-isti/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/collection.xml
22 22
						<PARAM name="eprParam" value="collected_epr"/>
23 23
					</PARAMETERS>
24 24
					<ARCS>
25
						<ARC to="OBTAIN_PARAMS"/>
26
					</ARCS>
27
				</NODE>
28
				<NODE name="OBTAIN_PARAMS" type="ObtainDliDataSourceParams">
29
					<DESCRIPTION>Obtain datasource parameters</DESCRIPTION>
30
					<PARAMETERS>
31
						<PARAM name="providerId" ref="dsId"/>
32
					</PARAMETERS>
33
					<ARCS>
34 25
						<ARC to="MD_BUILDER"/>
35 26
					</ARCS>
36 27
				</NODE>
modules/dnet-isti/trunk/src/main/resources/eu/dnetlib/bootstrap/profiles/workflows/repo_hi_isti.xml
24 24
				<NODE name="VerifyDatasource" type="VerifyDatasource" isStart="true">
25 25
					<DESCRIPTION>Verify if DS is pending</DESCRIPTION>
26 26
					<PARAMETERS>
27
						<PARAM name="expectedInterfaceTypologyPrefixes" value="isti"/>
27
						<PARAM name="expectedInterfaceTypologyPrefixes" value="pubsrepository::unknown"/>
28 28
						<PARAM name="expectedCompliancePrefixes" value="isti"/>
29 29
					</PARAMETERS>
30 30
					<ARCS>
......
45 45
					<DESCRIPTION>Create Workflow</DESCRIPTION>
46 46
					<PARAMETERS>
47 47
						<PARAM name="wfName" value="Aggregate and Index efg content [Ingestion]"/>
48
						<PARAM name="wfTemplate" value="/eu/dnetlib/efg/workflows/repo-hi/isti_aggregation_wf.xml.st"/>
48
						<PARAM name="wfTemplate" value="/eu/dnetlib/isti/workflows/repo-hi/isti_aggregation_wf.xml.st"/>
49 49
						<PARAM name="description" value="Aggregate and Transform Metadata (publications) from PubsRepository [Ingestion]"/>
50 50
					</PARAMETERS>
51 51
					<ARCS>
......
87 87
							<MAP>
88 88
								<ENTRY key="collMdstoreId" value="harv_id"/>
89 89
								<ENTRY key="cleanMdstoreId" value="clean_id"/>
90
								<ENTRY key="edmMdstoreId" value="edm_id"/>
91
								<ENTRY key="patchMdstoreId" value="patch_id"/>
92
								<ENTRY key="nativeObjectStoreId" value="OSNat_id"/>
93
								<ENTRY key="indexId" value="index_id"/>
94 90
							</MAP>
95 91
						</PARAM>
96 92
					</PARAMETERS>
modules/dnet-isti/trunk/pom.xml
21 21
      <artifactId>dnet-msro-service</artifactId>
22 22
      <version>5.0.0-SNAPSHOT</version>
23 23
    </dependency>
24

  
25
   
26 24
    <dependency>
27
      <groupId>eu.dnetlib</groupId>
28
      <artifactId>dnet-graph-domain</artifactId>
29
      <version>[1.0.0-SNAPSHOT,2.0.0)</version>
30
    </dependency>
31
   
32
    <dependency>
33 25
      <groupId>com.ximpleware</groupId>
34 26
      <artifactId>vtd-xml</artifactId>
35 27
      <version>2.11</version>

Also available in: Unified diff