Skip to content

Instantly share code, notes, and snippets.

@Generalelektrix
Last active September 18, 2019 01:24
Show Gist options
  • Save Generalelektrix/52c2d09fc82c99dd96e4a71242ad5433 to your computer and use it in GitHub Desktop.
Save Generalelektrix/52c2d09fc82c99dd96e4a71242ad5433 to your computer and use it in GitHub Desktop.
#!/usr/bin/perl -w
use strict;
use LWP::Simple;
use URI::URL;
=head1 Synopsis
This script will produce a file of RedirectSafe directives with domains from the DOAJ csv file publicly available.
The main concern is to avoid duplicates of any domains that are already used in your own EZproxy config files.
The new config file produced is called config_doaj.txt and can be appended to your own EZproxy main config file
this way:
IncludeFile config_doaj.txt
=cut
=head2 Author
Sébastien Nadeau
Université Laval
=cut
# URL where DOAJ csv file is found
my $url_doaj_csv = qq[https://doaj.org/csv];
# Will we fetch the DOAJ csv file?
my $go_get_it_again = 1;
# Temporary DOAJ csv file
my $csv_doaj_filename = qq[csv_doaj.txt];
# New DOAJ config file
my $cfg_doaj_filename = qq[config_doaj.txt];
# Logfile
my $log_doaj_filename = qq[config_doaj.log];
# List of actual EZproxy config files
# Add more files to this list if necessary
my @ezproxy_cfg_files = (
"config.txt",
);
# EZproxy base directory (where EZproxy config file is found)
my $ezproxy_dir = qq[/usr/local/ezproxy];
open my $logfile, qq[>$ezproxy_dir/$log_doaj_filename] or die $!;
if ($go_get_it_again) {
my $response_code = getstore($url_doaj_csv, qq[$ezproxy_dir/$csv_doaj_filename]);
if (is_error($response_code)) {
print $logfile "Response code: $response_code -- Aborting.\n";
die;
}
else {
print $logfile "Fetched $url_doaj_csv sucessfully.\n";
}
print $logfile "\n";
}
else {
print $logfile "Won't fetch $url_doaj_csv again.\n";
print $logfile "\n";
}
# Opening DOAJ csv file and reduction to a hashmap of domains
my %doaj_domains;
if (open my $csv, qq[<$ezproxy_dir/$csv_doaj_filename]) {
while (my $line = <$csv>) {
chomp $line;
my $url = new URI::URL ((split /,/, $line)[1]);
if ($url->can("host") and $url->can("port")) {
my $domain;
if ($url->port != 80 and $url->port != 443) {
$domain = $url->host . ":" . $url->port;
}
else {
$domain = $url->host;
}
if (defined($domain) and $domain) {
$doaj_domains{$domain}++;
}
}
}
my $number_of_domains = scalar keys %doaj_domains;
print $logfile "DOAJ list contains $number_of_domains unique domains.\n";
close $csv;
}
else {
print $logfile "$! -- Aborting.\n";
die $!;
}
# If DOAJ domains are found in EZproxy config, remove them from hashmap
# There are 2 possibilities:
# - domain is already in a RedirectSafe directive
# - domain is found in a database stanza
my $ct = 0;
my $ct_db = 0;
my $ct_rs = 0;
foreach my $ezproxy_cfg_filename (@ezproxy_cfg_files) {
if (open my $cfg, qq[<$ezproxy_dir/$ezproxy_cfg_filename]) {
while (my $line = <$cfg>) {
chomp $line;
$line =~s/\r\n//g;
if ($line=~/^redirectsafe\s+(.*)/i) {
my $domain = lc($1);
$domain =~s/\/.*$//;
if (defined $doaj_domains{$domain}) {
delete $doaj_domains{$domain};
print $logfile "$domain already in redirectsafe directive of config, removed from DOAJ list\n";
$ct_rs++;
}
}
elsif ($line=~/^(U|URL|H|HJ|Host|HostJavascript|D|DJ|Domain|DomainJavascript)\s+(.*)/i) {
$ct++;
my $value = lc($2);
my $domain;
if ($value =~/^\w+:\/\//) {
my $url = new URI::URL ($value);
$domain = $url->host;
}
else {
$domain = $value;
$domain =~s/\/.*$//;
}
if ($domain) {
foreach my $doaj_domain (keys %doaj_domains) {
if ($domain =~/(\A|\.)$doaj_domain$/) {
delete $doaj_domains{$doaj_domain};
$ct_db++;
print $logfile "$domain <==> $doaj_domain already in database stanza, removed from DOAJ list\n";
}
}
}
}
}
close $cfg;
}
else {
print $logfile "Unable to open $ezproxy_cfg_filename: $!\n";
}
}
print $logfile "$ct_rs domains removed from DOAJ list because already in RedirectSafe\n";
print $logfile "$ct_db domains removed from DOAJ list because already in Database Stanza\n";
my $number_of_domains = scalar keys %doaj_domains;
print $logfile "DOAJ list now contains $number_of_domains unique domains.\n";
# Creating new config file with DOAJ domains in RedirectSafe directives
if (open my $doaj_cfg, qq[>$ezproxy_dir/$cfg_doaj_filename]) {
foreach my $domain (sort keys %doaj_domains) {
print $doaj_cfg qq[RedirectSafe $domain\n];
}
print $logfile "$ezproxy_dir/$cfg_doaj_filename created successfully.\n";
close $doaj_cfg;
}
else {
print $logfile "$! -- Aborting.\n";
die $!;
}
close $logfile;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment