#!/usr/bin/perl use strict; use warnings; sub usage { print STDERR < \$quiet, 'h|help' => \$help) or $bad_args = 1; usage if ($bad_args || $help); exit 1 if $bad_args; exit 0 if $help; use File::Temp; use File::stat; sub vmsg { my ($t) = @_; print STDERR "$t\n" unless $quiet; } sub nmsg { my ($t) = @_; print STDERR "$t\n"; } sub tabquote { my ($x) = @_; $x =~ s/\\/\\_/g; $x =~ s/\t/\\t/g; return $x; } sub tabunquote { my ($x) = @_; $x =~ s/\\_/\\/g; $x =~ s/\\t/\t/g; return $x; } my %files_by_size; @ARGV=(); nmsg "Reading files..."; my $i=0; my %s; my $sb; my $totposs=0; my $totposs_size=0; while (<>) { $i++; if ($i % 1000 == 0) { vmsg "read $i file names..."; } chomp; if (!-e $_) { die "Does not exist: $_\n"; } else { $s{$_} = $sb = stat($_); if (@{$files_by_size{$sb->size}||[]}) { $totposs++; $totposs_size+=$sb->size } push @{$files_by_size{$sb->size}}, $_; } } my $n = (scalar (keys %files_by_size)); nmsg "read $i file names, in $n different sizes"; nmsg "($totposs possible duplicates, with a total possible size of $totposs_size)"; nmsg "Comparing files..."; $i=0; for (sort {$b<=>$a} (keys %files_by_size)) { next if (@{$files_by_size{$_}} == 1); vmsg "Size = $_"; my @files = @{$files_by_size{$_}}; if ($i>0 && $i % 100 == 0) { nmsg "compared $i/$totposs files..."; } $i+=@files-1; my (%l); @l{@files} = (1)x@files; for my $f1 (keys %l) { next unless defined $l{$f1}; for my $f2 (keys %l) { next unless defined $l{$f2}; next if $f1 eq $f2; if ($s{$f1}->ino eq $s{$f2}->ino) { nmsg "Skipping already-linked files $f1, $f2"; next; } vmsg "diff -q \Q$f1\E \Q$f2\E > /dev/null"; if (system("diff -q \Q$f1\E \Q$f2\E > /dev/null") == 0) { nmsg "ln -f \Q$f1\E \Q$f2\E"; system("ln -f \Q$f1\E \Q$f2\E") == 0 or die "Error: couldn't link $f1 to $f2"; delete $l{$f2}; } elsif ($? & 127) { die "diff: Died with signal ".($? & 127); } else { nmsg "$f1 and $f2 have the same size but are different."; } } delete $l{$f1}; } } nmsg "Done.";