Project

General

Profile

« Previous | Next » 

Revision 50228

trying to cope with java.util.regex.Pattern bug #3369

View differences:

modules/dnet-pace-core/trunk/src/main/java/eu/dnetlib/pace/common/AbstractPaceFunctions.java
112 112
	// ///////////////////////
113 113

  
114 114
	protected String normalize(final String s) {
115
		return nfd(s).toLowerCase().replaceAll("(\\W|\\p{InCombiningDiacriticalMarks}|\\p{Punct}|\\d|\\n)+", " ").trim();
115
		return nfd(s).toLowerCase()
116
				.replaceAll("(\\W)+", " ")
117
				.replaceAll("(\\p{InCombiningDiacriticalMarks})+", " ")
118
				.replaceAll("(\\p{Punct})+", " ")
119
				.replaceAll("(\\d)+", " ")
120
				.replaceAll("(\\n)+", " ")
121
				//.replaceAll("(\\W|\\p{InCombiningDiacriticalMarks}|\\p{Punct}|\\d|\\n)+", " ")
122
				.trim();
116 123
	}
117 124

  
118 125
	private String nfd(final String s) {

Also available in: Unified diff