Revision 50228
Added by Claudio Atzori over 6 years ago
modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java | ||
---|---|---|
112 | 112 |
// /////////////////////// |
113 | 113 |
|
114 | 114 |
protected String normalize(final String s) { |
115 |
return nfd(s).toLowerCase().replaceAll("(\\W|\\p{InCombiningDiacriticalMarks}|\\p{Punct}|\\d|\\n)+", " ").trim(); |
|
115 |
return nfd(s).toLowerCase() |
|
116 |
.replaceAll("(\\W)+", " ") |
|
117 |
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") |
|
118 |
.replaceAll("(\\p{Punct})+", " ") |
|
119 |
.replaceAll("(\\d)+", " ") |
|
120 |
.replaceAll("(\\n)+", " ") |
|
121 |
//.replaceAll("(\\W|\\p{InCombiningDiacriticalMarks}|\\p{Punct}|\\d|\\n)+", " ") |
|
122 |
.trim(); |
|
116 | 123 |
} |
117 | 124 |
|
118 | 125 |
private String nfd(final String s) { |
Also available in: Unified diff
trying to cope with java.util.regex.Pattern bug #3369