Project

General

Profile

« Previous | Next » 

Revision 5439

csvs.py: sniff(): Support multi-char delims using \t, such as \t|\t used by NCBI. Support custom line suffixes, such as \t| used by NCBI.

View differences:

lib/csvs.py
8 8
import util
9 9

  
10 10
delims = ',;\t|`'
11
tab_padded_delims = ['\t|\t']
11 12
tsv_delim = '\t'
12 13
escape = '\\'
13 14

  
......
20 21
    line, ending = strings.extract_line_ending(line)
21 22
    dialect = csv.Sniffer().sniff(line, delims)
22 23
    
23
    # TSVs usually don't quote fields (nor doublequote embedded quotes)
24
    if is_tsv(dialect): dialect.quoting = csv.QUOTE_NONE
24
    if is_tsv(dialect):
25
        # TSVs usually don't quote fields (nor doublequote embedded quotes)
26
        dialect.quoting = csv.QUOTE_NONE
27
        
28
        # Check multi-char delims using \t
29
        delim = strings.find_any(line, tab_padded_delims)
30
        if delim:
31
            dialect.delimiter = delim
32
            line_suffix = delim.rstrip('\t')
33
            if line.endswith(line_suffix): ending = line_suffix+ending
25 34
    else: dialect.doublequote = True # Sniffer doesn't turn this on by default
26 35
    dialect.lineterminator = ending
27 36
    

Also available in: Unified diff