Project

General

Profile

« Previous | Next » 

Revision 10278

bin/*: replaced confusing regexp constructs involving \W inside [] with the much clearer explicit character class [:alnum:] . this avoids adding or subtracting from an inverted class in order to reach a subset of the corresponding positive class, because the subset can just be named explicitly instead.

View differences:

bin/repl
43 43
        if in_ != '':
44 44
            if text or re.match(r'^\w+$', in_): # match word
45 45
                in_ = (r'(?:^|(?<=[\s,"])|^:|(?<=[\s,"]):)'+re.escape(in_)
46
                    +r'(?![^\W_])')
46
                    +r'(?![[:alnum:]])')
47 47
            repls.append((r'(?m)'+in_, out))
48 48
    stream.close()
49 49
    def repl_all(str_):
bin/filter_out_ci
7 7
import re
8 8
import sys
9 9

  
10
def simplify(str_): return re.sub(r'#.*$|[\W_]+', r'', str_.lower())
10
def simplify(str_): return re.sub(r'#.*$|[^[:alnum:]]+', r'', str_.lower())
11 11

  
12 12
def main():
13 13
    try: _prog_name, col_num, vocab_path = sys.argv
bin/canon
17 17
        if key in self: raise KeyError(key)
18 18
        dict.__setitem__(self, key, value)
19 19

  
20
def simplify(str_): return re.sub(r'[\W_]+', r'', str_.lower())
20
def simplify(str_): return re.sub(r'[^[:alnum:]]+', r'', str_.lower())
21 21

  
22 22
def main():
23 23
    try: _prog_name, col_num, vocab_path = sys.argv

Also available in: Unified diff