| 1 |
## $Id: Checksum.pm,v 1.5 2004/05/11 19:43:02 joko Exp $ |
| 2 |
## |
| 3 |
## Copyright (c) 2002 Andreas Motl <andreas.motl@ilo.de> |
| 4 |
## |
| 5 |
## See COPYRIGHT section in pod text below for usage and distribution rights. |
| 6 |
## |
| 7 |
## ---------------------------------------------------------------------------------------- |
| 8 |
## $Log: Checksum.pm,v $ |
| 9 |
## Revision 1.5 2004/05/11 19:43:02 joko |
| 10 |
## don't use File::RsyncP::Digest on win32 |
| 11 |
## |
| 12 |
## Revision 1.4 2004/05/06 12:53:07 jonen |
| 13 |
## + added use of File::RsycP::digest |
| 14 |
## |
| 15 |
## Revision 1.3 2003/05/13 08:19:00 joko |
| 16 |
## switched to crc32 |
| 17 |
## |
| 18 |
## Revision 1.2 2003/02/11 09:53:07 joko |
| 19 |
## + metadata-structure-change, fixed some code here |
| 20 |
## |
| 21 |
## Revision 1.1 2003/02/09 05:10:13 joko |
| 22 |
## + initial commit |
| 23 |
## |
| 24 |
## ---------------------------------------------------------------------------------------- |
| 25 |
|
| 26 |
|
| 27 |
=pod |
| 28 |
|
| 29 |
=head1 Todo |
| 30 |
|
| 31 |
o Data::Transfer::Sync::Compare::Slot using Compare::Struct |
| 32 |
o Load checksum algorithm on demand, do some negotiation |
| 33 |
|
| 34 |
=cut |
| 35 |
|
| 36 |
package Data::Transfer::Sync::Compare::Checksum; |
| 37 |
|
| 38 |
use strict; |
| 39 |
use warnings; |
| 40 |
|
| 41 |
use mixin::with qw( Data::Transfer::Sync ); |
| 42 |
|
| 43 |
use shortcuts qw( RUNNING_IN_HELL ); |
| 44 |
use Data::Dumper; |
| 45 |
|
| 46 |
# TODO: Load these appropriatly at runtime. |
| 47 |
use Digest::MD5 qw( md5 md5_hex md5_base64 ); |
| 48 |
use String::CRC32; |
| 49 |
use Data::Mungle::Transform::Deep qw( deep_copy ); |
| 50 |
|
| 51 |
# don't use File::RsyncP::Digest on win32 |
| 52 |
# TODO: enhance here! (e.g. negotiate proper checksum-algorithm first, apply afterwards) |
| 53 |
if (not RUNNING_IN_HELL()) { |
| 54 |
eval "use File::RsyncP::Digest;"; |
| 55 |
} |
| 56 |
|
| 57 |
# get logger instance |
| 58 |
my $logger = Log::Dispatch::Config->instance; |
| 59 |
|
| 60 |
|
| 61 |
# Maybe refactor to shortcuts::checksum? |
| 62 |
# calculate checksum for current object |
| 63 |
sub _calcChecksum { |
| 64 |
|
| 65 |
my $self = shift; |
| 66 |
my $descent = shift; |
| 67 |
my $options = shift; |
| 68 |
|
| 69 |
my $checksum; |
| 70 |
|
| 71 |
my $ident = $self->{node}->{$descent}->{ident}; |
| 72 |
|
| 73 |
# build dump of this node |
| 74 |
my $payload = $self->{node}->{$descent}->{payload}; |
| 75 |
$payload = deep_copy($payload) if $options->{clone}; |
| 76 |
|
| 77 |
# new 2004-06-17: remove attributes if request before dumping/calculating |
| 78 |
map { delete $payload->{$_} } @{$options->{exclude}} if $options->{exclude}; |
| 79 |
|
| 80 |
# various methods to dump this thing |
| 81 |
#my $dump = $ident . "\n" . $item->quickdump(); |
| 82 |
#my $dump = $ident . "\n" . Dumper($item); |
| 83 |
my $dump = $self->_dumpCompact($payload); |
| 84 |
$dump = $ident . $dump if $self->{options}->{ident}; |
| 85 |
|
| 86 |
# TODO: $logger->dump( ... ); |
| 87 |
#$logger->debug( __PACKAGE__ . ": " . $dump ); |
| 88 |
#$logger->dump( __PACKAGE__ . ": " . $dump ); |
| 89 |
|
| 90 |
# calculate checksum from dump |
| 91 |
|
| 92 |
# 1. md5-based fingerprint, base64 encoded (from Digest::MD5) |
| 93 |
#$checksum = md5_base64($dump) . '=='; |
| 94 |
|
| 95 |
# 2. 32-bit integer "hash" value (maybe faster?) (from DBI) |
| 96 |
# Note: The 32-bit integer hash from DBI seems to generate duplicates |
| 97 |
# with small payloads already in ranges of hundreds of items/rows!!! |
| 98 |
# Try to avoid it or try to use it only for payloads greater than, hmmm, let's say 30 chars? |
| 99 |
# (we had about 15 chars average per item (row)) |
| 100 |
# Possible (generic) solution: Just generate checksum, if length(checksum(payload)) < length(payload) |
| 101 |
#$checksum = DBI::hash($dump, 1); |
| 102 |
|
| 103 |
# 3. good old crc32??? |
| 104 |
#$checksum = crc32($dump); |
| 105 |
|
| 106 |
# 4. File::RsyncP::Digest - Perl interface to rsync message digest algorithms |
| 107 |
if (not RUNNING_IN_HELL()) { |
| 108 |
my $rsDigest = new File::RsyncP::Digest; |
| 109 |
$rsDigest->add($dump); |
| 110 |
my $digest = $rsDigest->digest(); |
| 111 |
$checksum = unpack("H*", $digest); |
| 112 |
|
| 113 |
} else { # fallback to Digest::MD5 on win32 |
| 114 |
#print $dump, " ==> "; |
| 115 |
$checksum = md5_base64($dump) . '=='; |
| 116 |
#print $checksum, "\n"; |
| 117 |
} |
| 118 |
|
| 119 |
# 5. some more modern Digest::SHA1 or similar? |
| 120 |
|
| 121 |
# signal good |
| 122 |
return $checksum; |
| 123 |
|
| 124 |
} |
| 125 |
|
| 126 |
|
| 127 |
sub readChecksum { |
| 128 |
my $self = shift; |
| 129 |
|
| 130 |
my $descent = shift; |
| 131 |
|
| 132 |
# signal checksum bad |
| 133 |
if (!$self->{node}->{$descent}) { |
| 134 |
return; |
| 135 |
} |
| 136 |
|
| 137 |
# trace |
| 138 |
#print "desc: $descent", "\n"; |
| 139 |
#print Dumper($self); |
| 140 |
#print Dumper($self->{meta}->{$descent}); |
| 141 |
#exit; |
| 142 |
|
| 143 |
# get checksum for current entry |
| 144 |
# TODO: don't have the checksum column/property hardcoded as "cs" here, make this configurable somehow |
| 145 |
if ($self->{meta}->{$descent}->{isChecksumAuthority}) { |
| 146 |
$self->{node}->{$descent}->{checksum} = $self->_calcChecksum($descent, { ident => 1 }); |
| 147 |
} else { |
| 148 |
$self->{node}->{$descent}->{checksum} = $self->{node}->{$descent}->{payload}->{cs}; |
| 149 |
} |
| 150 |
|
| 151 |
# signal checksum good |
| 152 |
return 1; |
| 153 |
|
| 154 |
} |
| 155 |
|
| 156 |
sub _readLocalChecksum { |
| 157 |
my $self = shift; |
| 158 |
my $descent = shift; |
| 159 |
$self->{node}->{$descent}->{checksum_local_storage} = $self->{node}->{$descent}->{payload}->{cs_local}; |
| 160 |
$self->{node}->{$descent}->{checksum_local_storage} ||= ""; |
| 161 |
} |
| 162 |
|
| 163 |
sub _calcLocalChecksum { |
| 164 |
my $self = shift; |
| 165 |
my $descent = shift; |
| 166 |
# TODO: don't hardcode this (oid, cs, cs_local) |
| 167 |
$self->{node}->{$descent}->{checksum_local_calculated} = |
| 168 |
$self->_calcChecksum($descent, { clone => 1, exclude => [qw( oid cs cs_local )] }); |
| 169 |
$self->{node}->{$descent}->{checksum_local_calculated} ||= ""; |
| 170 |
} |
| 171 |
|
| 172 |
sub handleLocalChecksum { |
| 173 |
my $self = shift; |
| 174 |
my $descent = shift; |
| 175 |
$self->_readLocalChecksum($descent); |
| 176 |
$self->_calcLocalChecksum($descent); |
| 177 |
} |
| 178 |
|
| 179 |
sub _dumpCompact { |
| 180 |
my $self = shift; |
| 181 |
|
| 182 |
my @data = (); |
| 183 |
|
| 184 |
my $count = 0; |
| 185 |
|
| 186 |
# walk through all arguments |
| 187 |
foreach (@_) { |
| 188 |
my $item = {}; |
| 189 |
# assume item is a hashref and walk through all entries |
| 190 |
foreach my $key (keys %$_) { |
| 191 |
my $val = $_->{$key}; |
| 192 |
# dive into tied/hidden types and resolve deep structure inside |
| 193 |
if (ref $val eq 'Set::Object') { |
| 194 |
$item->{$key} = $val->members(); |
| 195 |
} else { |
| 196 |
$item->{$key} = $val; |
| 197 |
} |
| 198 |
} |
| 199 |
push @data, $item; |
| 200 |
$count++; |
| 201 |
} |
| 202 |
|
| 203 |
my $indent_save = $Data::Dumper::Indent; |
| 204 |
$Data::Dumper::Indent = 0; |
| 205 |
my $result = Dumper(@data); |
| 206 |
$Data::Dumper::Indent = $indent_save; |
| 207 |
return $result; |
| 208 |
|
| 209 |
} |
| 210 |
|
| 211 |
1; |
| 212 |
__END__ |