Last active
May 13, 2016 00:16
-
-
Save martinec/1fe9425eec63e6c7aa430c59465865c3 to your computer and use it in GitHub Desktop.
Parse plain FILE(s) and load them into TLS206_ASCII PATSTAT table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ============================================================================= | |
# tls206_ascii configuration file | |
# ============================================================================= | |
# run 'tls206_ascii --man' for details | |
# ============================================================================= | |
# Database options | |
[database] | |
name = dbname | |
host = 127.0.0.1 | |
port = 3306 | |
user = dbuser | |
pass = dbpass | |
table = tls206_ascii | |
# Connection over ssh tunnel | |
# Uncomment for setup | |
#[ssh] | |
#server = | |
#port = 22 | |
#user = | |
#pass = | |
# Fixed fields ranges = offset;length | |
[field_fix] | |
prof_person_id = 0;9 | |
prof_doc_sn_id = 9;9 | |
prof_appln_id = 18;9 | |
prof_wk_country = 27;7 | |
prof_wk_number = 34;10 | |
prof_wk_kind = 44;2 | |
prof_source = 46;4 | |
prof_a_i_flag = 50;1 | |
prof_seq_nr = 51;4 | |
prof_country = 55;2 | |
prof_nationality = 57;2 | |
persons_in_residence = 59;2 | |
persons_in_uspto_role = 61;2 | |
# Length fields ranges = offset;length | |
[field_len] | |
prof_last_name = 63;3 | |
prof_first_name = 66;3 | |
prof_middle_names = 69;3 | |
prof_street = 72;3 | |
prof_city = 75;3 | |
prof_state = 78;3 | |
prof_zip_code = 81;3 | |
# Variable field = offset;0 | |
[field_var] | |
prof_name_addr = 84;0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET SQL_MODE="NO_AUTO_VALUE_ON_ZERO"; | |
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; | |
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */; | |
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */; | |
/*!40101 SET NAMES utf8 */; | |
-- -------------------------------------------------------- | |
-- | |
-- Table structure for table `TLS206_ASCII` | |
-- | |
CREATE TABLE IF NOT EXISTS `TLS206_ASCII` ( | |
`ID` int(10) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY, | |
`prof_person_id` int(11) NOT NULL, | |
`prof_doc_sn_id` int(11) NOT NULL, | |
`prof_appln_id` int(11) NOT NULL, | |
`prof_wk_country` char(2) NOT NULL, | |
`prof_wk_number` text NOT NULL, | |
`prof_wk_kind` text NOT NULL, | |
`prof_source` text NOT NULL, | |
`prof_a_i_flag` text NOT NULL, | |
`prof_seq_nr` text NOT NULL, | |
`prof_country` text NOT NULL, | |
`prof_nationality` text NOT NULL, | |
`persons_in_residence` text NOT NULL, | |
`persons_in_uspto_role` text NOT NULL, | |
`prof_last_name` text NOT NULL, | |
`prof_first_name` text NOT NULL, | |
`prof_middle_names` text NOT NULL, | |
`prof_street` text NOT NULL, | |
`prof_city` text NOT NULL, | |
`prof_state` text NOT NULL, | |
`prof_zip_code` text NOT NULL, | |
`prof_name_addr` text NOT NULL | |
) ENGINE=MyISAM DEFAULT CHARSET=utf8; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
# ============================================================================= | |
# tls206_ascii - Parse plain FILE(s) and load them into TLS206_ASCII PATSTAT table. | |
# Author : Cristian Martinez <martinec-at-esiee.fr> | |
# License: GNU General Public License Version 3 or later | |
# ============================================================================= | |
use strict; # Pragma to restrict unsafe constructs | |
use warnings; # Pragma to control optional warnings | |
# ============================================================================= | |
#use lib 'lib'; # Use to manipulate @INC at compile time | |
# ============================================================================= | |
use Module::Load::Conditional qw[can_load];# Looking up module information | |
use Pod::Usage; # Print usage msg from embedded pod doc | |
use File::Basename; # Parse file paths into dir, name and suffix. | |
# ============================================================================= | |
use utf8; # Enables to type utf8 in program code | |
use open IN => ':encoding(iso-8859-1)'; # Input stream legacy ascii 8859 Latin 1 | |
use open OUT => ':encoding(utf8)'; # Output stream legacy utf8 | |
use open ':std'; # STDIN(iso-8859-1), STDOUT(utf8), STDERR(utf8) | |
# ============================================================================= | |
use AppConfig qw(:argcount); # Reading conf files and parsing arguments | |
use AppConfig::Args; # Reading command line arguments | |
use Data::Dumper; # Stringified perl data structures | |
use DBI; # Database independent interface for Perl | |
use Scalar::Util qw(looks_like_number); # Returns true if EXPR is a number | |
# ============================================================================= | |
# ============================================================================= | |
# Environment | |
# ============================================================================= | |
my $config; # setup configuration environment | |
my $template; # for use within unpack function | |
my $field_boundary; # field line boundray | |
my @fields; # fields names to distinguish parsing | |
my $sql_statement; # sql insert statement | |
my $dbh; # database handle object | |
my $sth; # statement handle object | |
my $ssh2; | |
my $channel; | |
# ============================================================================= | |
# ============================================================================= | |
# Constants | |
# ============================================================================= | |
use constant PROG_VER => '1.1'; # Program version | |
use constant CONF_FILE => 'config.ini'; # Configuration file | |
use constant UNTIL_EOL => '*'; # Until end of line symbol | |
use constant STD_INPUT => '-'; # Read from standard input | |
use constant OUT_TYPE => ['db','csv','sql']; # Types of outputs | |
use constant DEF_OUT => OUT_TYPE->[0]; # Default output | |
use constant DEF_HOST => '127.0.0.1' ; # Default host | |
use constant DEF_PORT => '3306'; # Default port | |
use constant DEF_USER => 'root'; # Default user | |
use constant DEF_PASS => ''; # Default password | |
use constant DEF_TABLE => 'tls206_ascii'; # Default table | |
# ============================================================================= | |
# ============================================================================= | |
# init | |
# ============================================================================= | |
sub init() { | |
# place init code here | |
} | |
# ============================================================================= | |
# check_file | |
# ============================================================================= | |
sub check_file{ | |
my $varname = shift; | |
my $file = shift; | |
# get filename without path | |
my $basename = basename($file); | |
# test is stdin | |
return 1 if($file eq STD_INPUT); | |
# test directory | |
error("'%s' is a directory.",$file) if (-d $file); | |
# test exist | |
error("File '%s' does not exist.",$basename) unless(-e $file); | |
# test readable | |
error("Cannot read '%s' file.",$basename) unless(-r $file); | |
# nonzero size | |
error("File '%s' is empty.",$basename) unless(-s $file); | |
# test plain-format | |
error("File '%s' format is not valid.",$basename) unless(-T $file); | |
# return true | |
return 1; | |
} | |
# ============================================================================= | |
# show_config | |
# ============================================================================= | |
sub show_config{ | |
my $value; # current value of a variable | |
$Data::Dumper::Terse = 1; # don't output string '$VAR' | |
$Data::Dumper::Indent = 0; # spews output without any newlines | |
my %varlist = $config->varlist('.*'); # get all confuration options | |
foreach my $varname (sort keys %varlist){ # sort and print names and values | |
$value = Dumper $config->get($varname); # capture readable var structure | |
$varname =~ s/^field_(fix|len|var)_//; # remove some variables prefix | |
printf "%-27s%s\n", $varname, $value; # print padded info to stdout | |
} | |
exit 2; # exit with error 2 | |
} | |
# ============================================================================= | |
# show_version | |
# ============================================================================= | |
sub show_version(){ | |
my $program = basename($0); # set program name to script name | |
printf "$program version %s\n",PROG_VER; | |
exit 0; | |
} | |
# ============================================================================= | |
# error | |
# ============================================================================= | |
sub error { | |
my $format = shift; # format of message | |
my @args = @_; # arguments | |
my $program = basename($0); # set program name to script name | |
if(scalar(@args) > 0){ | |
printf STDERR "$program : " . $format . "\n" , @_; | |
}else{ | |
printf STDERR "$program : " . "%s" . "\n" , $format; | |
} | |
db_connection_close(); # l | |
ssh_tunnel_close(); # l | |
# exit with error | |
exit(1); | |
} | |
# ============================================================================= | |
# get_range(str_range) | |
# return (offset,length) | |
# ============================================================================= | |
sub get_range($) { | |
return split(/;/, shift, 2); # split line by ';' max 2 values | |
} | |
# ============================================================================= | |
# create_parsing_template() | |
# ============================================================================= | |
sub create_parsing_template() { | |
my $offset; # | |
my $length; # | |
my $lastpos = 0; # | |
# get all "^field_*" variables | |
my %field_ranges = ($config->varlist("^field_",1)); | |
if(scalar keys %field_ranges == 0){ | |
error("Your configuration file has no [field_] sections, ". | |
"For instructions, type: 'tls206_ascii --man'."); | |
} | |
# read parse ranges through fields generating a unpack template and | |
# pushing names to the fields array | |
foreach my $value (sort { (get_range($field_ranges{$a}))[0] <=> | |
(get_range($field_ranges{$b}))[0] } | |
keys %field_ranges) | |
{ | |
# split offset and length for parse range | |
($offset,$length) = get_range($field_ranges{$value}); | |
# Test if offset is valid | |
if(!(defined($offset)) || length($offset) == 0 || !(looks_like_number($offset))){ | |
error("The offset of the field '$value' is invalid or was not provided."); | |
} | |
# Test if length is valid | |
if(!(defined($length)) || length($length) == 0 || !(looks_like_number($length))){ | |
error("The length of the field '$value' is invalid or was not provided."); | |
} | |
# Test if parse range is valid | |
if($offset != $lastpos){ | |
error("There was no information to parse the range : [$lastpos;". | |
($offset-$lastpos) ."]\n" . "Please add it to your configuration file."); | |
} | |
# update last position in range | |
$lastpos = $offset + $length; | |
# push field name to fields array | |
push(@fields,$value); | |
# create template to use within 'unpack' function, this improves the | |
# speed in relation to use 'substr' function | |
$template .= "A" . (($length != 0) ? $length : UNTIL_EOL) . " "; | |
} | |
# store field boundary | |
$field_boundary = $lastpos; | |
chop($template); | |
} | |
# ============================================================================= | |
# parse_line_values(str_line, str_template, href_fields) | |
# ============================================================================= | |
sub parse_line_values($){ | |
my $line = shift; # line to parsing | |
my $offset = 0; # string offset to pasing variable field | |
my $length; # string length to pasing variable field | |
chomp($line); # remove any newline character | |
my @values = unpack($template, $line); # parse line based in template | |
my $vfield = $values[$#values]; # variable field is the last in array | |
my $lenvfd = length($vfield); # length of variable field | |
# traverse values to remove leading/trailing spaces and parse variable field | |
for my $i (0..$#values){ | |
$values[$i] =~ s/^\s+//; # remove leading spaces | |
$values[$i] =~ s/\s+$//; # remove trailing spaces | |
# parse variable field based in her field length | |
if($fields[$i] =~ /^len_/ ){ | |
$length = scalar($values[$i]); | |
if(looks_like_number($length)){ | |
$length = ($length + $offset) <= $lenvfd ? | |
$length : $lenvfd - $offset; # warning this ! | |
$values[$i] = $length > 0 ? substr($vfield, $offset, $length) : ""; | |
$values[$i] =~ s/^\s+//; # remove leading spaces | |
$values[$i] =~ s/\s+$//; # remove trailing spaces | |
$offset = $offset + $length; # update offset | |
} | |
} | |
} | |
# maybe is best to remove here the spaces using a map | |
return @values; | |
} | |
# ============================================================================= | |
# process_argfiles() | |
# ============================================================================= | |
sub process_argfiles { | |
# if there are more arguments to be processed | |
if(scalar(@ARGV) > 0){ | |
# treat last arguments as filenames | |
# each step remove a filename from @ARGV | |
while (my $arg = shift @ARGV) { | |
# try to expand the filename, e.g."*.txt" | |
my @filelist = glob($arg); | |
# if glob expansion does not match any file | |
if(scalar(@filelist) == 0){ | |
# intentionally raising an error to exit | |
$config->filelist($arg); | |
} | |
# add each file to filelist using config environment | |
# filelist() automatically call check_file() function | |
foreach my $file (@filelist) { | |
$config->filelist($file); | |
} | |
} | |
# finally put all expanded filenames back in argv | |
@ARGV = map {$_} @{$config->filelist()}; | |
} | |
if(-t STDIN and not @ARGV){ | |
error("No files were selected, try --help to show options"); | |
} | |
} | |
# ============================================================================= | |
# setup_configuration() | |
# ============================================================================= | |
sub setup_configuration() { | |
$config = AppConfig->new( { # | |
CASE => 1, # | |
PEDANTIC => 1, # | |
ERROR => \&error, # | |
CREATE => '^(field_|ssh)', # auto-create 'field_' and 'ssh' section options | |
GLOBAL => { # | |
DEFAULT => "<undef>", # | |
ARGCOUNT => ARGCOUNT_ONE, # | |
} | |
}); | |
# define | |
$config->define("database|D|database_name=s"); | |
$config->define("filelist|f|file=s@" ,{ VALIDATE => \&check_file } ); | |
$config->define("help|?!" ,{ DEFAULT => 0 } ); | |
$config->define("host|h|database_host=s" ,{ DEFAULT => DEF_HOST } ); | |
$config->define("man|m!" ,{ DEFAULT => 0 } ); | |
$config->define("output|o=s" ,{ DEFAULT => DEF_OUT } ); | |
$config->define("password|p|database_pass=s" ,{ DEFAULT => DEF_PASS } ); | |
$config->define("port|P|database_port=s" ,{ DEFAULT => DEF_PORT } ); | |
$config->define("show-config|S|show!" ,{ DEFAULT => 0 } ); | |
$config->define("table|t|database_table=s" ,{ DEFAULT => DEF_TABLE} ); | |
$config->define("user|u|database_user=s" ,{ DEFAULT => DEF_USER } ); | |
$config->define("version|V!" ,{ DEFAULT => 0 } ); | |
} | |
# ============================================================================= | |
# process_options() | |
# ============================================================================= | |
sub process_options { | |
$config->file(CONF_FILE); # load conf from file | |
my $arg_count = scalar(@ARGV); # count args | |
$config->getopt(qw(no_ignore_case)) ; # load conf from @ARGV | |
show_version() if( $config->version); # show program version | |
show_config() if( $config->show); # show configuration | |
pod2usage(-verbose => 2) if( $config->man); # show manual | |
pod2usage(-verbose => 0) if(($config->help) || # show help | |
(-t STDIN and not $arg_count)); # reading from stdin? | |
process_argfiles(); # save the remaining arguments as filenames | |
} | |
# ============================================================================= | |
# output_db() | |
# ============================================================================= | |
sub output_db($) { | |
my $values = shift; | |
eval {$sth->execute(@{$values});}; | |
# raise warn if problem | |
if ($@) { | |
printf STDERR "Warning : $@"; | |
} | |
} | |
# ============================================================================= | |
# output_csv() | |
# ============================================================================= | |
sub output_csv($) { | |
my $values = shift; | |
print "'" . join("','", @{$values}) . "'" . "\n"; | |
return 1; | |
} | |
# ============================================================================= | |
# output_sql() | |
# ============================================================================= | |
sub output_sql($) { | |
my $values = shift; | |
print $sql_statement . " VALUES ('" . join("','", @{$values}) . "');" . "\n"; | |
return 1; | |
} | |
# ============================================================================= | |
# output() | |
# ============================================================================= | |
sub output($) { | |
my $values = shift; | |
# db | |
if($config->output eq OUT_TYPE->[0]){ | |
return output_db($values); | |
} | |
# csv | |
if($config->output eq OUT_TYPE->[1]){ | |
return output_csv($values); | |
} | |
# sql | |
if($config->output eq OUT_TYPE->[2]){ | |
return output_sql($values); | |
} | |
} | |
# ============================================================================= | |
# process_files() | |
# ============================================================================= | |
sub process_files { | |
my @values; | |
while (<>) { | |
my $line = $_; | |
my $fields_count = scalar(@fields); | |
if(defined($line) && length($line) >= $field_boundary){ | |
@values = parse_line_values($line); | |
if(scalar(@values) == $fields_count){ | |
unless(output(\@values)){ | |
} | |
} | |
} | |
} | |
} | |
# ============================================================================= | |
# ssh_tunnel_close() | |
# Tunnel a TCP connection through an SSH session | |
# ============================================================================= | |
sub ssh_tunnel_close(){ | |
$channel->close() if(defined($channel)); | |
$ssh2->disconnect() if(defined($ssh2)); | |
} | |
# ============================================================================= | |
# ssh_tunnel_create() | |
# Tunnel a TCP connection through an SSH session | |
# ============================================================================= | |
sub ssh_tunnel_create{ | |
unless(can_load( modules => {'Net::SSH2' => undef} )) { | |
error("You need Module Net::SSH2 to connect through a SSH session"); | |
} | |
$ssh2 = Net::SSH2->new(trace => -1); | |
$ssh2->debug(1); | |
unless($ssh2->connect('ssh.exemple.org',22)){ | |
error("Could not connect to sshf.exemple.org: $!"); | |
} | |
$ssh2->auth_password("password", "password"); | |
unless($ssh2->auth_ok){ | |
error("failed to authenticate with server ssh.exemple.org"); | |
} | |
$channel = $ssh2->tcpip('subdomain.exemple.org', 3306, '127.0.0.1', 3306); | |
unless($channel){ | |
error("Could not connect to subdomain.exemple.org"); | |
} | |
# must use non-blocking IO hereafter due to the current libssh2 API | |
$ssh2->blocking(0); | |
} | |
# ============================================================================= | |
# db_connection_create() | |
# ============================================================================= | |
sub db_connection_create(){ | |
# db or sql | |
if($config->output eq OUT_TYPE->[0] || | |
$config->output eq OUT_TYPE->[2]){ | |
# ssh_tunnel_create(); | |
# create sql insert statement | |
$sql_statement = sprintf("INSERT INTO %s.%s (", | |
$config->database, $config->table); | |
$sql_statement .= join(",", @fields) . ")" ; | |
$sql_statement =~ s/(fix|len|var)_//g; | |
# db connection and sql statement preparation | |
if($config->output eq OUT_TYPE->[0]){ | |
my $drive = sprintf("dbi:%s:database=%s;host=%s;port=%s", "mysql", | |
$config->database, $config->host, $config->port); | |
my $dbargs = { mysql_enable_utf8 => 1, # UTF-8 flag for all text strings | |
RaiseError => 1, # not die if something goes wrong | |
PrintError => 0 # turn off warns on an error | |
}; | |
# DBI->trace( 1 ); # uncomment for debug | |
# try to connect | |
eval { | |
$dbh = DBI->connect( # connect to database | |
$drive, # drive | |
$config->user, # no user | |
$config->password, # no password | |
$dbargs # arguments | |
);}; | |
# raise if error exists | |
if ($@) { | |
error("$DBI::errstr"); | |
} | |
# check if table exists | |
$sth = $dbh->table_info(undef, undef, $config->table); | |
my $info = $sth->fetchall_hashref("TABLE_NAME"); | |
unless(exists $info->{$config->table}){ | |
error("Table '%s.%s' doesn't exist", $config->database, $config->table); | |
} | |
# ToDo : check if fields exists | |
$sql_statement .= sprintf(" VALUES (%s?)", "?," x $#fields); | |
$sth= $dbh->prepare(qq{$sql_statement}); # prepare | |
} | |
} | |
} | |
# ============================================================================= | |
# db_connection_close() | |
# ============================================================================= | |
sub db_connection_close{ | |
$dbh->disconnect() if(defined($dbh)); | |
} | |
# ============================================================================= | |
# Main program | |
# ============================================================================= | |
init(); # initialize variables | |
setup_configuration(); # setup configuration environment | |
process_options(); # process options and command line arguments | |
create_parsing_template(); # create a string template for parsing | |
db_connection_create(); # init db connection | |
process_files(); # process each file line by line | |
db_connection_close(); # close db connection | |
exit(0); # exit with success | |
# ============================================================================= | |
__END__ | |
=head1 NAME | |
tls206_ascii - Parse plain FILE(s) and load them into I<'TLS206_ASCII'> PATSTAT table. | |
=head1 USAGE | |
B<tls206_ascii> [OPTION]... [FILE]... | |
Options: | |
-D, --database=name database to use | |
-f, --filelist=list list of files to parse and load | |
-?, --help display a help message and exit | |
-h, --host=name connect to host | |
-m, --man display the manual page and exits | |
-o, --output=type type of output : db, csv or sql | |
-p, --password=name password to use when connecting to server | |
-P, --port=# port number to use for connection | |
-S --show-config display active configuration variables and exit | |
-t, --table=name overrides default 'TLS206_ASCII' table name | |
-u, --user=name user for login | |
-V, --version show program version string and exit | |
Default options are read from config.ini file. With no FILE, or when FILE is -, read standard input. | |
For more information about this tool run with --man option. | |
=head1 DESCRIPTION | |
PATSTAT, is the EPO Worldwide Patent Statistical Database, with worldwide | |
coverage. It contains more than 20 tables with bibliographic data, citations | |
and family links of about 70 million applications of more than 80 countries. | |
B<This program> will read the given input iso-8859-1 file(s), usually named as | |
F<tls206_ascii_part_01.txt>, F<tls206_ascii_part_02.txt>, ... and parses each | |
line to load into 'TLS206_ASCII' PATSTAT table. | |
For PATSTAT version 14.24 you can free download F<TLS206_PERSON_ASCII.zip> from | |
L<EPO website|https://publication.epo.org/raw-data/product?productId=1>. | |
=head1 OPTIONS | |
=over 8 | |
=item . B<--database=db_name, -D db_name> | |
The database to use. This is useful primarily in the option file. | |
=item . B<--filelist=file_list, -f file_list> | |
Files to parse and load into I<'TLS206_ASCII'> PATSTAT table. | |
=item . B<--help, -?> | |
Display a help message and exit. | |
=item . B<--host=host_name, -h host_name> | |
Connect to the server on the given host. | |
=item . B<--man, -m> | |
Disaply the manual page and exits. | |
=item . B<--output={db|csv|sql}, -o> | |
Lorem ipsum | |
=over 8 | |
=item B<db> | |
Connecto to | |
=item B<csv> | |
Produce CSV statements to stdout. | |
=item B<sql> | |
Produce auto-generate SQL INSERT statements to stdout. | |
=back | |
=item . B<--password=password, -p password> | |
The password to use when connecting to the server. Specifying a password on the | |
command line should be considered insecure. You can use F<config.ini> to avoid | |
giving the password on the command line. | |
=item . B<--port=port_num, -P port_num> | |
The TCP/IP port number to use for the connection. | |
=item . B<--show-config, -S> | |
Display active configuration variables and exit. | |
=item . B<--table=table_name, -t table_name> | |
Overrides default 'TLS206_ASCII' table name. | |
=item . B<--user=user_name, -u user_name> | |
The user name to use when connecting to the server. | |
=item . B<--version, -V> | |
Show version information and exit. | |
=back | |
=head1 EXAMPLES | |
=over 8 | |
=item B<tls206_ascii> tls206_ascii_*.txt | |
Load tls206_ascii_*.txt files into database_table. | |
=item zcat tls206_ascii_part0*.gz | B<tls206_ascii> | |
uncompressed tls206_ascii_part0*.gz data on standard output and pass to B<tls206_ascii> | |
=back | |
=head1 CONFIGURATION | |
The F<config.ini> file | |
=head2 B<[database]> Database connection section | |
=over 8 | |
=item . B<name> | |
The database to use. | |
=item . B<host> | |
Connect to the server on the given host. | |
=item . B<port> | |
The TCP/IP port number to use. | |
=item . B<user> | |
The user name to use. | |
=item . B<pass> | |
The password to use. | |
=item . B<table> | |
Overrides default 'TLS206_ASCII' table name. | |
=back | |
=head2 B<[field_*]> Column name and parse range sections | |
These sections indicates both the name and parse rank of each of field. | |
+-------+--------+--------+------+---------+-+---++---+-+-+-+-+--+--+--+--+--+--+--+-----------------+ | |
| A | B | C | D | E |F| G || I |J|K|L|M| N| O| P| Q| R| S| T| ...U... | | |
+-------+--------+--------+------+---------+-+---++---+-+-+-+-+--+--+--+--+--+--+--+-----------------+ | |
0 9 18 27 34 44 46 50 55 59 63 66 69 72 75 78 81 84 EOL | |
=head3 B<[field_fix]> Fixed fields section | |
=over 8 | |
=item . (A) B<prof_person_id [0;9]> | |
=item . (B) B<prof_doc_sn_id [9;9]> | |
=item . (C) B<prof_appln_id [18;9]> | |
=item . (D) B<prof_wk_country [27;7]> | |
=item . (E) B<prof_wk_number [34;10]> | |
=item . (F) B<prof_wk_kind [44;2]> | |
=item . (G) B<prof_source [46;4]> | |
=item . (H) B<prof_a_i_flag [50;1]> | |
=item . (I) B<prof_seq_nr [51;4]> | |
=item . (J) B<prof_country [55;2]> | |
=item . (K) B<prof_nationality [57;2]> | |
=item . (L) B<persons_in_residence [59;2]> | |
=item . (M) B<persons_in_uspto_role [61;2]> | |
=back | |
=head3 B<[field_len]> Length fields section | |
=over 8 | |
=item . (N) B<prof_last_name [63;3]> | |
=item . (O) B<prof_first_name [66;3]> | |
=item . (P) B<prof_middle_names [69;3]> | |
=item . (Q) B<prof_street [72;3]> | |
=item . (R) B<prof_city [75;3]> | |
=item . (S) B<prof_state [78;3]> | |
=item . (T) B<prof_zip_code [81;3]> | |
=back | |
=head3 B<[field_var]> Variable field section | |
=over 8 | |
=item . (U) B<prof_name_addr [84;0]> | |
=back | |
=head1 TLS206_ASCII | |
Before running this program, please make sure that I<'TLS206_ASCII'> table | |
exists. If necessary, an example of SQL instruction for create it is: | |
CREATE TABLE IF NOT EXISTS `tls206_ascii` ( | |
`ID` int(10) unsigned NOT NULL AUTO_INCREMENT PRIMARY KEY, | |
`prof_person_id` int(11) NOT NULL, | |
`prof_doc_sn_id` int(11) NOT NULL, | |
`prof_appln_id` int(11) NOT NULL, | |
`prof_wk_country` char(2) NOT NULL, | |
`prof_wk_number` text NOT NULL, | |
`prof_wk_kind` text NOT NULL, | |
`prof_source` text NOT NULL, | |
`prof_a_i_flag` text NOT NULL, | |
`prof_seq_nr` text NOT NULL, | |
`prof_country` text NOT NULL, | |
`prof_nationality` text NOT NULL, | |
`persons_in_residence` text NOT NULL, | |
`persons_in_uspto_role` text NOT NULL, | |
`prof_last_name` text NOT NULL, | |
`prof_first_name` text NOT NULL, | |
`prof_middle_names` text NOT NULL, | |
`prof_street` text NOT NULL, | |
`prof_city` text NOT NULL, | |
`prof_state` text NOT NULL, | |
`prof_zip_code` text NOT NULL, | |
`prof_name_addr` text NOT NULL | |
) ENGINE=MyISAM DEFAULT CHARSET=utf8; | |
You can map the names of columns by modifying F<config.ini> file. | |
See L<CONFIGURATION> for more information on configuration variable values. | |
=head1 DEPENDENCIES | |
The libssh2 library is required by ssh conections. If you don't have it, you can | |
download it from L<http://www.libssh2.org>; you may also need OpenSSL, which can be | |
obtained from L<http://www.openssl.org>. | |
Debian/Ubuntu: sudo aptitude install libssh-dev libssl-dev libcrypto++-dev | |
To do so you need to forward a port on your local machine (3307) to port 3306 on | |
the server which hosts your database. The UNIX ssh syntax for that would be: | |
ssh -TNL 3307:host.with.database:3306 [email protected] -p port | |
If you're using Windows, download I<plink> and use the same syntax as above, | |
substituting "ssh" by "plink". L<http://www.chiark.greenend.org.uk/~sgtatham/putty/download.html> | |
Having done this you must specify "-h 127.0.0.1 -p 3007" | |
=head1 AUTHOR | |
Cristian Martinez, E<lt>martinec-at-esiee.frE<gt> | |
=head1 COPYRIGHT | |
This program is distributed under the GNU General Public License Version 3 or later | |
L<http://gnu.org/licenses/gpl.html>. This is free software: you are free to change | |
and redistribute it. There is NO WARRANTY, to the extent permitted by law. | |
=cut |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment