Project

General

Profile

« Previous | Next » 

Revision 8202

lib/csvs.py: stream_info(): Fixed bug where headers with multiline columns were not supported because only the first line (not the first multiline row) is sniffed for the dialect

View differences:

lib/csvs.py
36 36
    
37 37
    return dialect
38 38

  
39
def has_unbalanced_quotes(str_): return str_.count('"') % 2 == 1 # odd # of "
40

  
41
def has_multiline_column(str_): return has_unbalanced_quotes(str_)
42

  
39 43
def stream_info(stream, parse_header=False):
40 44
    '''Automatically detects the dialect based on the header line.
41 45
    Uses the Excel dialect if the CSV file is empty.
42 46
    @return NamedTuple {header_line, header, dialect}'''
43 47
    info = util.NamedTuple()
44 48
    info.header_line = stream.readline()
49
    if has_multiline_column(info.header_line): # 1st line not full header
50
        # assume it's a header-only csv with multiline columns
51
        info.header_line += ''.join(stream.readlines()) # use entire file
45 52
    info.header = None
46 53
    if info.header_line != '':
47 54
        info.dialect = sniff(info.header_line)

Also available in: Unified diff