Revision 5439
Added by Aaron Marcuse-Kubitza about 12 years ago
lib/csvs.py | ||
---|---|---|
8 | 8 |
import util |
9 | 9 |
|
10 | 10 |
delims = ',;\t|`' |
11 |
tab_padded_delims = ['\t|\t'] |
|
11 | 12 |
tsv_delim = '\t' |
12 | 13 |
escape = '\\' |
13 | 14 |
|
... | ... | |
20 | 21 |
line, ending = strings.extract_line_ending(line) |
21 | 22 |
dialect = csv.Sniffer().sniff(line, delims) |
22 | 23 |
|
23 |
# TSVs usually don't quote fields (nor doublequote embedded quotes) |
|
24 |
if is_tsv(dialect): dialect.quoting = csv.QUOTE_NONE |
|
24 |
if is_tsv(dialect): |
|
25 |
# TSVs usually don't quote fields (nor doublequote embedded quotes) |
|
26 |
dialect.quoting = csv.QUOTE_NONE |
|
27 |
|
|
28 |
# Check multi-char delims using \t |
|
29 |
delim = strings.find_any(line, tab_padded_delims) |
|
30 |
if delim: |
|
31 |
dialect.delimiter = delim |
|
32 |
line_suffix = delim.rstrip('\t') |
|
33 |
if line.endswith(line_suffix): ending = line_suffix+ending |
|
25 | 34 |
else: dialect.doublequote = True # Sniffer doesn't turn this on by default |
26 | 35 |
dialect.lineterminator = ending |
27 | 36 |
|
Also available in: Unified diff
csvs.py: sniff(): Support multi-char delims using \t, such as \t|\t used by NCBI. Support custom line suffixes, such as \t| used by NCBI.