Last active
September 16, 2015 15:45
-
-
Save matthallamew/dc9902bb2fdaa62b0132 to your computer and use it in GitHub Desktop.
Analyze and display user defined columns from a CSV file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# | |
# Analyze CSV files in order to see only the columns you specify. | |
# Display the headers of the CSV in order to get column names and/or numbers. | |
# Use the column names/numbers in order to only display those columns to standard out. | |
# | |
use strict; | |
use Getopt::Std; | |
my %options=(); | |
getopts("c:laf:s:", \%options); | |
init(); | |
sub init{ | |
if($options{f}){ | |
if($options{l}){ | |
listhead(); | |
} | |
elsif($options{a}){ | |
if($options{c}){ | |
analyze(); | |
} else { die "Did not include -c <column name>,<column name>\n\n"; } | |
} | |
} | |
else { die "Did not include -f <filename>\n\n"; } | |
} | |
############### | |
# | |
# Display the header of the input file | |
# | |
# ./csvTool -l -f file.csv [-s '\|'] | |
# | |
############### | |
sub listhead { | |
my $filName; | |
my $separator; | |
# Check to make sure the proper switch and a filename were provided | |
if($options{f}){ | |
$filName = "$options{f}"; | |
}else { die "Did not include -f <filename>\n\n";} | |
$separator = "$options{s}" || ","; | |
# Read first line to get header row | |
open(FILE1,"<", "$filName") or die "Could not read from $filName!"; | |
my $header = <FILE1>; | |
my @fileFields = split(/$separator/, uc($header)); | |
my $fileFieldsLen = @fileFields; | |
my %index; | |
@index{@fileFields} = (0..$#fileFields); | |
for my $field (@fileFields){ | |
my $idx = $index{$field} + 1; | |
print "$idx $field\n"; | |
} | |
} | |
############### | |
# | |
# Analyze input file | |
# | |
# ./csvTool -a -f file.csv -c 1,2,3 [-s '\t'] | |
# | |
# ./csvTool -a -f file.csv -c id,email [-s ';'] | |
# | |
############### | |
sub analyze { | |
my $filName; | |
my @inFields; | |
my $separator; | |
# Check to make sure the proper switch and a filename were provided | |
if($options{f}){ | |
$filName = "$options{f}"; | |
}else { die "Did not include -f <filename>\n\n"; } | |
if($options{c}){ | |
@inFields = split(/,/,$options{c}); | |
}else { die "Did not include -c <column name>,<column name>\n\n"; } | |
$separator = "$options{s}" || ","; | |
# Read first line to get header row | |
open(FILE1,"<", "$filName") or die "Could not read from $filName!"; | |
my $header = <FILE1>; | |
my @fileFields = split(/$separator/, uc($header)); | |
my $fileFieldsLen = @fileFields; | |
# Get column numbers of each user requested field | |
my $useFields = undef; | |
my %index; | |
@index{@fileFields} = (0..$#fileFields); | |
for my $inField (@inFields){ | |
$inField =~ s/^\s+|\s+$//g; | |
$inField = uc($inField); | |
if($inField =~ /[0-9]/){ | |
for(my $c = 0; $c < $fileFieldsLen; $c++){ | |
if($c eq $inField - 1){ | |
$useFields .= $inField - 1 .","; | |
last; | |
} | |
} | |
} else{ | |
$useFields .= "$index{$inField},"; | |
} | |
} | |
$useFields = join(",",split(/,/,$useFields)); | |
# Read the rest of the file, displaying only those columns that the user requested | |
my $cnt = 0; | |
while (!eof(FILE1)) { | |
my $line = <FILE1>; | |
$line =~ tr/\"//d; | |
chomp($line); | |
my @row = split(/$separator/,$line); | |
my $reportRows = undef; | |
for my $c (split(/,/,$useFields)){ | |
$reportRows = $reportRows . "$row[$c] "; | |
} | |
print "$reportRows\n"; | |
$cnt++; | |
} | |
close FILE1; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment