Revision 50229
Added by Claudio Atzori over 6 years ago
AbstractPaceFunctions.java | ||
---|---|---|
113 | 113 |
|
114 | 114 |
protected String normalize(final String s) { |
115 | 115 |
return nfd(s).toLowerCase() |
116 |
// do not compact the regexes in a single expression, would cause StackOverflowError in case of large input strings |
|
116 | 117 |
.replaceAll("(\\W)+", " ") |
117 | 118 |
.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ") |
118 | 119 |
.replaceAll("(\\p{Punct})+", " ") |
119 | 120 |
.replaceAll("(\\d)+", " ") |
120 | 121 |
.replaceAll("(\\n)+", " ") |
121 |
//.replaceAll("(\\W|\\p{InCombiningDiacriticalMarks}|\\p{Punct}|\\d|\\n)+", " ") |
|
122 | 122 |
.trim(); |
123 | 123 |
} |
124 | 124 |
|
Also available in: Unified diff
trying to cope with java.util.regex.Pattern bug #3369