Created
December 31, 2016 07:26
-
-
Save vasansr/07420a402a74c1b559ffd6767eb66747 to your computer and use it in GitHub Desktop.
Create SQL commands based on a CSV file (perl)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# | |
use Text::CSV_XS; | |
use IO::Handle; | |
use Getopt::Long; | |
sub usage { | |
print "Usage: csv2sql.pl [OPTIONS] <csvfile>\n"; | |
print "Dump sql statements based on input csv\n"; | |
print "Conventions: \n"; | |
print " Table name is derived from of csv file name by stripping .csv\n"; | |
print " First line of csv file contains column names\n"; | |
print " Columns with names beginning with a # are ignored\n"; | |
print "Options:\n"; | |
print " --update=<n>: update using first n columns as key (the default is to insert)\n"; | |
print " --table=<table>: override default table name derived from file name\n"; | |
} | |
our $opt_update = 0; | |
our $opt_table = ""; | |
GetOptions("update=i", "table=s") or die usage(); | |
my $csvfile = $ARGV[0]; | |
if (!$csvfile) { | |
print "Error: No filename specified\n"; | |
usage(); | |
exit 1; | |
} | |
if ($opt_table) { | |
$table = $opt_table; | |
} else { | |
$table = $csvfile; | |
$table =~ s/\.csv$//; | |
} | |
# get the csv | |
my $csv = Text::CSV_XS->new(); | |
my $f; | |
open ($f, "<", "$csvfile") or die "Could not open $csvfile: $!"; | |
my $col_names = $csv->getline($f); | |
my @column_commented; | |
for (my $i=0; $i<(@$col_names); $i++) { | |
if ($col_names->[$i] =~ /^#/) { | |
$column_commented[$i] = 1; | |
} else { | |
$column_commented[$i] = 0; | |
} | |
} | |
# create the list of columns | |
my $column_list = ""; | |
for (my $i=0; $i<(@$col_names); $i++) { | |
unless ($column_commented[$i]) { | |
comma_append(\$column_list, $col_names->[$i]); | |
} | |
} | |
while (my $row = $csv->getline($f)) { | |
my @fields = @$row; | |
next if ($fields[0] eq '#'); | |
if ($opt_update) { | |
spewUpdate(@fields); | |
} else { | |
spewInsert(@fields); | |
} | |
} | |
sub spewInsert { | |
my @fields = @_; | |
my $values = ""; | |
for (my $i=0; $i<(@$col_names); $i++) { | |
unless ($column_commented[$i]) { | |
comma_append(\$values, "'" . sql_escape($fields[$i]) . "'"); | |
} | |
} | |
print "INSERT INTO $table ($column_list) values($values);\n"; | |
} | |
sub spewUpdate { | |
# opt_update has the number of columns that is to be used for the primary | |
# key. Assumption is that all primary key columns will appear in the beginning | |
# (excluding commented columns) | |
my @fields = @_; | |
my $col_index = 0; | |
my $values = ""; | |
for (my $i=0; $i<(@$col_names); $i++) { | |
unless ($column_commented[$i]) { | |
# exclude pkeys | |
unless ($col_index++ < $opt_update) { | |
comma_append(\$values, $col_names->[$i] . "='" . sql_escape($fields[$i]) . "'"); | |
} | |
} | |
} | |
my $filter = ""; | |
$col_index = 0; | |
for (my $i=0; $i<(@$col_names); $i++) { | |
unless ($column_commented[$i]) { | |
# include only pkeys | |
if ($col_index++ < $opt_update) { | |
and_append(\$filter, $col_names->[$i] . "='" . sql_escape($fields[$i]) . "'"); | |
} else { | |
last; | |
} | |
} | |
} | |
print "UPDATE $table SET $values WHERE $filter;\n"; | |
} | |
sub comma_append { | |
my ($str_ref, $to_append) = @_; | |
if ($$str_ref) { | |
$$str_ref .= ", "; | |
} | |
$$str_ref .= $to_append; | |
} | |
sub and_append { | |
my ($str_ref, $to_append) = @_; | |
if ($$str_ref) { | |
$$str_ref .= " AND "; | |
} | |
$$str_ref .= $to_append; | |
} | |
sub sql_escape { | |
$str = shift; | |
$str =~ s/\'/\'\'/g; | |
return $str; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment