Revision 5149
Added by Aaron Marcuse-Kubitza about 12 years ago
lib/tnrs.py | ||
---|---|---|
5 | 5 |
import time |
6 | 6 |
import urllib2 |
7 | 7 |
|
8 |
import csvs |
|
8 | 9 |
import exc |
9 | 10 |
import profiling |
10 | 11 |
import streams |
... | ... | |
59 | 60 |
def gwt_encode(str_): |
60 | 61 |
return strings.esc_quotes(strings.json_encode(str_), '|', quote_esc='\!') |
61 | 62 |
|
63 |
def make_spliced_decode_map(decode_map): |
|
64 |
return [(r'(?: |(?<=\t)|^)'+re.escape(from_.strip())+r'(?: |(?=\t)|$)', |
|
65 |
strings.regexp_repl_esc(to)) for from_, to in decode_map] |
|
66 |
|
|
67 |
encode_map = [ |
|
68 |
('!', ' !exc '), # escape char |
|
69 |
('\t', ' !tab '), |
|
70 |
('\n', ' !nl '), |
|
71 |
('\r', ' !cr '), |
|
72 |
('%', ' !pct '), |
|
73 |
('&', ' !amp '), |
|
74 |
(';', ' !sem '), |
|
75 |
('\\', ' !bsl '), |
|
76 |
] |
|
77 |
decode_map = strings.flip_map(encode_map) |
|
78 |
spliced_decode_map = make_spliced_decode_map(decode_map) |
|
79 |
|
|
80 |
def encode(str_): return strings.replace_all(encode_map, str_) |
|
81 |
|
|
82 |
def decode(str_): return strings.replace_all_re(spliced_decode_map, str_) |
|
83 |
|
|
84 |
decode_for_tsv_map = make_spliced_decode_map([(from_, strings.replace_all( |
|
85 |
csvs.tsv_encode_map, to)) for from_, to in decode_map]) |
|
86 |
|
|
87 |
def decode_for_tsv(str_): |
|
88 |
return strings.replace_all_re(decode_for_tsv_map, str_) |
|
89 |
|
|
90 |
class TnrsOutputStream(streams.FilterStream): |
|
91 |
'''Decodes a TNRS response whose names were encoded with encode()''' |
|
92 |
def __init__(self, stream): |
|
93 |
streams.FilterStream.__init__(self, decode_for_tsv, stream) |
|
94 |
|
|
62 | 95 |
def parse_response(name, pattern, str_, response, response_info): |
63 | 96 |
match = re.match(pattern, str_) |
64 | 97 |
if not match: |
Also available in: Unified diff
tnrs.py: Added encode(), decode(), decode_for_tsv(), and TnrsOutputStream to handle escaping TNRS-invalid characters