Project

General

Profile

« Previous | Next » 

Revision 28793

Added by Dominika Tkaczyk almost 10 years ago

Madis update

View differences:

text.py
350 350

  
351 351
    def cleanchar(c):
352 352
        c=c.group()[0]
353
        if unicodedata.category(c)[0]=='C':
353
        if c != '\n' and unicodedata.category(c)[0] == 'C':
354 354
            return u''
355 355
        else:
356 356
            return c
......
360 360
        if type(i) in (str,unicode):
361 361
            o+=characters_to_clean.sub(cleanchar, i)
362 362
        else:
363
            o+=unicode(append(i))
363
            o+=unicode(i, errors='replace')
364 364

  
365 365
    return o
366 366

  
......
412 412
        else:
413 413
            return None
414 414

  
415
    if len(args)==3:
416
        return re.sub(args[0], args[2], args[1])
415
    if len(args) == 3:
416
        try:
417
            return re.sub(args[0], args[2], args[1], flags=re.UNICODE)
418
        except TypeError:
419
            return re.sub(args[0], args[2], args[1])
417 420

  
418
regexpr.registered=True
421
regexpr.registered = True
419 422

  
420 423
def regexprfindall(*args):
421 424
    """
......
935 938
hashmodarchdep.registered=True
936 939

  
937 940

  
938
def textreferences(txt,maxlen = 5,pattern = r'(\b|_)(1|2)\d{3,3}(\b|_)' ):
941
def textreferences(txt,maxlen = 5,pattern = r'(\b|_)((1[5-9]\d{2,2})|(20\d{2,2}))(\b|_)' ):
939 942
    """
940 943
    .. function:: textreferences(text, maxlen = 5, pattern = (\b|_)(1|2)\d{3,3}(\b|_))
941 944

  
......
1030 1033
    except:
1031 1034
        threshold = 0
1032 1035

  
1033
    winlen = 0
1034
    win = deque(('' for _ in xrange(maxlen)),maxlen)
1035 1036
    current = 0
1036
    start = 0
1037 1037
    for i in reversedtext2:
1038 1038
        if len(i)>10:
1039
            if  winlen == maxlen and densities[current]>=threshold:
1040
                if start == 1:
1041
                    start = 0
1042
                    for j in xrange(maxlen/2):
1043
                        references.append(win[j])
1044

  
1045
                references.append(win[maxlen/2])
1046
            win.append(i)
1047
            if winlen<maxlen:
1048
                winlen+=1
1049
                if winlen == maxlen:
1050
                    start = 1
1051
            else :
1052
                current+=1
1053

  
1054

  
1039
            if densities[current] >= threshold:
1040
                references.append(i)
1041
            current+=1
1055 1042
    return  '\n'.join(reversed(references))
1056 1043

  
1057 1044
textreferences.registered=True
......
1234 1221

  
1235 1222
    try:
1236 1223
        nextlen = args[3]
1224
        try:
1225
            nextlen = int(nextlen)
1226
        except:
1227
            raise functions.OperatorError('textwindow2s','Third argument should be an integer')
1237 1228
    except IndexError:
1238 1229
        nextlen = 0
1239 1230

  
1240 1231
    if len(args) > 4:
1241
        patt = re.compile(args[4])
1232
        try:
1233
            patt = re.compile(args[4])
1234
        except:
1235
            raise functions.OperatorError('textwindow2s','Fourth argument must be string or compiled pattern')
1242 1236
        for i in xrange(len(g)-middle+1):
1243 1237
            im = i+middle
1244 1238
            mid = ' '.join(g[i:im])

Also available in: Unified diff