Revision 36671
Added by Marek Horst about 9 years ago
modules/icm-iis-documentssimilarity/trunk/src/main/resources/eu/dnetlib/iis/documentssimilarity/chain/job.properties | ||
---|---|---|
1 |
input_document=/share/transformers/documentssimilarity/2014-06-05/
|
|
2 |
output_documents_similarity=${workingDir}/documents_similarity
|
|
1 |
input_document=/share/transformers/documentssimilarity/2015-04-23
|
|
2 |
output_documents_similarity=${workingDir}/out
|
|
3 | 3 |
|
4 |
#propozycje Piotrka |
|
5 |
remove_sideproducts=true |
|
6 | 4 |
mapredChildJavaOpts=-Xmx20g |
7 | 5 |
parallel=20 |
8 | 6 |
removal_rate=0.99 |
... | ... | |
11 | 9 |
similarityTopnDocumentPerDocument=20 |
12 | 10 |
sample=1.0 |
13 | 11 |
|
14 |
threshold_num_of_vector_elems_length=3 |
|
15 |
|
|
12 |
remove_sideproducts=false |
modules/icm-iis-documentssimilarity/trunk/src/main/resources/eu/dnetlib/iis/documentssimilarity/chain/oozie_app/workflow.xml | ||
---|---|---|
59 | 59 |
<value>2</value> |
60 | 60 |
<description>vector elements length threshold, when set to less than 2 all documents will be included in similarity matching</description> |
61 | 61 |
</property> |
62 |
<property> |
|
63 |
<name>remove_sideproducts</name> |
|
64 |
<value>true</value> |
|
65 |
<description>flag indicating all documents similarity processing sideproducts should be removed</description> |
|
66 |
</property> |
|
62 | 67 |
</parameters> |
63 | 68 |
|
64 | 69 |
<start to="prepare" /> |
... | ... | |
198 | 203 |
<!-- newly introduced properties --> |
199 | 204 |
<property> |
200 | 205 |
<name>remove_sideproducts</name> |
201 |
<value>true</value>
|
|
206 |
<value>${remove_sideproducts}</value>
|
|
202 | 207 |
</property> |
203 | 208 |
<property> |
204 | 209 |
<name>tfidfMinValue</name> |
Also available in: Unified diff
defining remove_sideproducts set to true by default among chain workflow.xml properties. This way global remove_sideproducts flag defined in primary workflow will be propagated here, up until now sideproducts were always removed