Last active
September 15, 2019 22:08
-
-
Save lkwg82/ba39c04cc40eb55ccb9d3836098aa186 to your computer and use it in GitHub Desktop.
anonymize syncthing log with config xml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!perl | |
use strict; | |
use warnings; | |
$| = 1; | |
# use module | |
use XML::Simple; | |
use Data::Dumper; | |
use Regexp::Common qw /net/; | |
$Data::Dumper::Sortkeys=1; | |
my $configFile = "syncthing.config.xml"; | |
my $filename = "syncthing.logs.since-2019.08.01.log"; | |
# create object | |
my $xml = new XML::Simple; | |
# read XML file | |
my $data = $xml->XMLin($configFile); | |
#~ print Dumper($data); | |
my @devices = keys %{$data->{'device'}}; | |
my $replacements ={}; | |
for ( my $i = 0; $i < scalar(@devices); $i++ ) { | |
#~ print "$i $devices[$i]"; | |
my $device=$devices[$i]; | |
my $deviceId=$data->{'device'}->{$device}->{'id'}; | |
my ($deviceShortId) = split(/-/,$deviceId); | |
$replacements->{'device-names'}->{$device} = "device-name-$i"; | |
$replacements->{'device-ids'}->{$deviceId} = "device-id-$i"; | |
$replacements->{'device-short-ids'}->{$deviceShortId} = "device-short-id-$i"; | |
#~ print "\n"; | |
} | |
my @folders=keys %{$data->{'folder'}}; | |
for ( my $i = 0; $i < scalar(@folders); $i++ ) { | |
my $folder=$folders[$i]; | |
my $folderLabel=$data->{'folder'}->{$folder}->{'label'}; | |
$folderLabel=~ s/(\W)/\\$1/g; # escape non-word character, because it will be used in regex | |
my $folderPath=$data->{'folder'}->{$folder}->{'path'}; | |
$replacements->{'folder-ids'}->{$folder} = "folder-id-$i"; | |
$replacements->{'folder-paths'}->{$folderPath} = "/folder-path-$i"; | |
if ( length($folderLabel) > 0 ) { | |
$replacements->{'folder-labels'}->{$folderLabel} = "folder-label-$i"; | |
} | |
} | |
# respect ignored folders | |
for ( my $i = 0; $i < scalar(@devices); $i++ ) { | |
#~ print "$i $devices[$i]"; | |
my $device=$devices[$i]; | |
my $ignoredFolders = $data->{'device'}->{$device}->{'ignoredFolder'}; | |
grep{ | |
my $folderId=$_; | |
my $folderLabel=$ignoredFolders->{$folderId}->{'label'}; | |
$folderLabel=~ s/(\W)/\\$1/g; # escape non-word character, because it will be used in regex | |
#~ print "$folderId - $folderLabel\n"; | |
if ( !exists($replacements->{'folder-ids'}->{$folderId})){ | |
my @folderIds=keys %{$replacements->{'folder-ids'}}; | |
my $nextId=scalar(@folderIds); | |
$replacements->{'folder-ids'}->{$folderId} = "folder-id-$nextId"; | |
} | |
if ( !exists($replacements->{'folder-labels'}->{$folderLabel})){ | |
my @folderLabels=keys %{$replacements->{'folder-labels'}}; | |
my $nextId=scalar(@folderLabels); | |
$replacements->{'folder-labels'}->{$folderLabel} = "folder-label-$nextId"; | |
} | |
}keys %{$ignoredFolders}; | |
#~ print Dumper(); | |
#~ print "\n"; | |
} | |
#print Dumper(\@devices); | |
#~ print Dumper($replacements); | |
#grep{ print $_,"\n" } sort( keys %{$replacements->{'folder-ids'}}) | |
#~ exit 0; | |
sub replaced { | |
my ($row) = @_; | |
grep{ | |
my $hashName = $_; | |
#~ print "hashname $hashName\n"; | |
my $hash = $replacements->{$hashName}; | |
grep{ | |
my ($key,$value) = ($_,$hash->{$_}); | |
#~ print "\t$key -> $value\n"; | |
my $count = $row =~ s/([\W]+)($key)([\W]?)/$1$value$3/g; | |
#~ if ($count > 0){ | |
#~ print $row,"\n" ; | |
#~ print "replaced: $key -> $value\n"; | |
#~ } | |
}keys %{$hash}; | |
}qw/device-ids device-short-ids device-names folder-ids folder-paths folder-labels ipv4s ipv6s/; | |
return $row; | |
} | |
my $anonFile = "$filename.anon"; | |
open (my $fh, "<", $filename) or die "Could not open file '$filename' $!"; | |
open (my $fhA, ">", $anonFile) or die "Could not open file '$anonFile' $!"; | |
my $ipv4s ={}; | |
my $ipv6s ={}; | |
# scan all ips | |
my $cnt1=0; | |
while(my $row = <$fh>){ | |
#~ last if (++$cnt1 > 2000); | |
grep{ $ipv4s->{$_}=1; }( $row =~ /($RE{net}{IPv4})/og ); | |
grep{ $ipv6s->{$_}=1; }( $row =~ /($RE{net}{IPv6})/og ); | |
} | |
my @ipv4s = keys %{$ipv4s}; | |
my @ipv6s = keys %{$ipv6s}; | |
for(my $i=0; $i < scalar(@ipv4s); $i++) { $replacements->{'ipv4s'}->{$ipv4s[$i]} = "ipv4-$i"; } | |
for(my $i=0; $i < scalar(@ipv6s); $i++) { $replacements->{'ipv6s'}->{$ipv6s[$i]} = "ipv6-$i"; } | |
seek $fh, 0, 0; # start from the beginning | |
#~ print Dumper($ipv4s); | |
#~ print Dumper($ipv6s); | |
#~ print Dumper($replacements->{'device-names'}); | |
my $cnt2 = 0; | |
while (my $row = <$fh> ) { | |
#~ last if (++$cnt2 > 200); | |
chomp $row; | |
#~ if ($row =~ /Ignor/) { | |
print $fhA &replaced($row),"\n" ; | |
#~ print &replaced($row),"\n" ; | |
#~ } | |
} | |
print "done\n"; | |
close($fh); | |
close($fhA); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment