Project

General

Profile

1 1942 aaronmk
#!/usr/bin/env python
2
# Loads a command's CSV output stream into a PostgreSQL table.
3
# The command may be run more than once.
4
5
import csv
6
import os.path
7 1963 aaronmk
import re
8 1942 aaronmk
import subprocess
9
import sys
10
11
sys.path.append(os.path.dirname(__file__)+"/../lib")
12
13
import csvs
14
import exc
15
import opts
16
import sql
17
import streams
18 1963 aaronmk
import strings
19 1942 aaronmk
20
def main():
21
    # Usage
22
    env_names = []
23
    def usage_err():
24
        raise SystemExit('Usage: '+opts.env_usage(env_names)+' '+sys.argv[0]
25
            +' input_cmd [args...]')
26
27
    # Parse args
28
    input_cmd = sys.argv[1:]
29
    if input_cmd == []: usage_err()
30
31
    # Get config from env vars
32
    table = opts.get_env_var('table', None, env_names)
33
    schema = opts.get_env_var('schema', 'public', env_names)
34
    db_config = opts.get_env_vars(sql.db_config_names, None, env_names)
35
    debug = opts.env_flag('debug', False, env_names)
36
    if not (table != None and 'engine' in db_config): usage_err()
37
38
    # Connect to DB
39
    db = sql.connect(db_config)
40
41 1963 aaronmk
    use_copy_from = [True]
42
43
    # Loads data into the table using the currently-selected approach.
44
    def load_():
45 1942 aaronmk
        # Open input stream
46
        proc = subprocess.Popen(input_cmd, stdout=subprocess.PIPE, bufsize=-1)
47
        in_ = proc.stdout
48
49
        # Get format info
50
        info = csvs.stream_info(in_, parse_header=True)
51
        dialect = info.dialect
52 1963 aaronmk
        if csvs.is_tsv(dialect): use_copy_from[0] = False
53 1942 aaronmk
54 1963 aaronmk
        # Select schema and escape names
55 1942 aaronmk
        def esc_name(name): return sql.esc_name(db, name, preserve_case=True)
56 1963 aaronmk
        sql.run_query(db, 'SET search_path TO '+esc_name(schema))
57
        esc_table = esc_name(table)
58 1942 aaronmk
        esc_cols = map(esc_name, info.header)
59
60
        # Create CREATE TABLE statement
61
        pkey = esc_name(table+'_pkey')
62 1963 aaronmk
        create_table = 'CREATE TABLE '+esc_table+' (\n'
63 1942 aaronmk
        create_table += '    row_num serial NOT NULL,\n'
64
        for esc_col in esc_cols: create_table += '    '+esc_col+' text,\n'
65
        create_table += '    CONSTRAINT '+pkey+' PRIMARY KEY (row_num)\n'
66
        create_table += ');\n'
67
        if debug: sys.stderr.write(create_table)
68
69 1963 aaronmk
        # Create table
70
        sql.run_query(db, create_table)
71
72 1942 aaronmk
        # Create COPY FROM statement
73 1963 aaronmk
        if use_copy_from[0]:
74
            cur = db.db.cursor()
75
            copy_from = ('COPY '+esc_table+' ('+(', '.join(esc_cols))
76
                +') FROM STDIN DELIMITER %(delimiter)s NULL %(null)s')
77
            assert not csvs.is_tsv(dialect)
78 1942 aaronmk
            copy_from += ' CSV'
79
            if dialect.quoting != csv.QUOTE_NONE:
80
                copy_from += ' QUOTE %(quotechar)s'
81
                if dialect.doublequote: copy_from += ' ESCAPE %(quotechar)s'
82 1963 aaronmk
            copy_from += ';\n'
83
            copy_from = cur.mogrify(copy_from, dict(delimiter=dialect.delimiter,
84
                null=r'\N', quotechar=dialect.quotechar))
85
            if debug: sys.stderr.write(copy_from)
86 1942 aaronmk
87 1963 aaronmk
        # Load the data
88 1942 aaronmk
        line_in = streams.ProgressInputStream(in_, sys.stderr,
89
            'Processed %d row(s)', n=10000)
90 1963 aaronmk
        try:
91
            if use_copy_from[0]:
92
                sys.stderr.write('Using COPY FROM\n')
93
                db.db.cursor().copy_expert(copy_from, line_in)
94
            else:
95
                sys.stderr.write('Using INSERT\n')
96
                for row in csvs.make_reader(line_in, dialect):
97
                    row = map(strings.to_unicode, row)
98
                    row.insert(0, sql.default) # leave space for autogen row_num
99
                    sql.insert(db, esc_table, row, table_is_esc=True)
100 1942 aaronmk
        finally:
101
            line_in.close() # also closes proc.stdout
102
            proc.wait()
103 1963 aaronmk
    load = lambda: sql.with_savepoint(db, load_)
104 1942 aaronmk
105 1963 aaronmk
    try: load()
106
    except sql.DatabaseErrors, e:
107
        if use_copy_from[0]: # first try
108
            exc.print_ex(e, plain=True)
109
            use_copy_from[0] = False
110
            load() # try again with different approach
111
        else: raise e
112
    db.db.commit()
113 1942 aaronmk
114
main()