#!/usr/bin/perl -w ###################################################################### # This script will attempt to solve Caeser ciphers (and cryptoquips) # through some common-sense English techniques, and a dictionary # search. # # I used to solve cryptoquips ("cute" messages encrypted in a caeser # cipher -- you can usually find them in newspapers or puzzle books) # in High School during history class. I always thought it would be fun # to write a program to help solve them, or even solve them on its own. # This script is somewhere in-between ... It will try a dictionary # attack against a given ciphertext, but you may have to help it along # occasionally via hints for certain letters. It actually works a # lot better than I thought it was going to at first, and I hope I can # improve it more. # # Assuming your dictionary is located in '/usr/dict/words', you can # init your DB dictionary via: # "$0 -w /path/to/db.out -t /usr/dict/words" # # Then run the script with the -w parameter. It will read a ciphertext # from STDIN or a file specified on the commandline. Any lines that # have just "a=b c=d ..." are assumed to have known characters. # Everything else is assumed ciphertext. # # By: Theo Van Dinter (felicity@kluge.net) (c) 2001 # RCS Information: $Id: crypto,v 1.2 2001/05/05 05:37:13 felicity Stab $ # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation; either version 2 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # ###################################################################### use strict; use DB_File; use vars qw/ %DICT $opt_t $opt_w %count $opt_m /; use Getopt::Std; use Text::Wrap; $|++; #### Nothing to edit below here #### getopts("w:t:m:") || die "Usage: $0 [ -w , -t , -m ]"; # -w means use the specified dictionary # -t points to the text dictionary file # -m max word options to allow through the filter if ( $opt_m ) { $opt_m--; } else { $opt_m = 4; # max == 5 } # necessary commandline opts # die "Need to specify a dictionary via -w!" unless ( $opt_w ); tie(%DICT, 'DB_File', $opt_w, O_RDWR|O_CREAT, 0644, $DB_BTREE) or die "Can't tie to $opt_w:$!"; # Shove words into the b_tree if necessary # if ( defined $opt_t ) { warn "Inserting words into $opt_w b-tree from $opt_t\n"; open(DICT,"<$opt_t") || die "Can't open dictionary $opt_t:$!"; # Some dictionaries, for whatever reason, don't have common words # like 'an' and 'in' ... We want to make sure we have those along # with the other words from the dictionary. # %DICT = map{chomp; tr/A-Z/a-z/; $_=>1} grep(length>3, ); foreach ( qw/ the did why and but who how are you was all can that here what there with over this many every place ah am an as at be by do go ha he hi if in is it me my no of oh on or ox pi so to up us we /) { $DICT{$_} = 1; } close(DICT); untie %DICT; exit 0; } my $puzz = ""; my %ltrs = (); # Read in the puzzle while ( $_=<> ) { chomp; next unless /\S/; # skip blank lines tr/A-Z/a-z/; if ( /^(\s*[a-z]=[a-z]\s*)+$/ ) { # knowns foreach my $known ( grep(/\S/,split) ) { my($cr,$ct) = split(/=/,$known); die "$cr already known!" if ( $ltrs{$cr} ); $ltrs{$cr} = $ct; } } else { die "Invalid characters in input" if /[^()a-z.,"'?!;: -]/; $puzz .= " $_"; $puzz =~ s/\s+/ /g; } } # make a uniq'd list of words, minus most punctuation my %words = map { tr/a-z'-//cd; $_ => 1 } grep(/[a-z]/,split(/\s+/,$puzz)); # Try to find definite known letters # foreach ( keys %words ) { # Count # of different crypt letters foreach my $ltr ( split(//,$_) ) { $count{$ltr}++; } # Ends in "s'". if ( /(.)'$/ ) { my $ltr = $1; die "Can't have 2 different letters as s ... (x')" if ( $ltrs{$ltr} && $ltrs{$ltr} ne 's'); $ltrs{$ltr} = "s"; delete $words{$_}; s/'$//; $words{$_}=1; print "D: Found $ltr=s\n"; } elsif ( /'(.)\1$/ ) { # ends in "'ll" my $ltr = $1; die "Can't have 2 different letters as l ... ('xx)" if ( $ltrs{$ltr} && $ltrs{$ltr} ne 'l'); $ltrs{$ltr} = "l"; delete $words{$_}; s/'..$//; $words{$_}=1; print "D: Found $ltr=l\n"; } } # Try to solve the cipher. If successful, print out the solution, the letter # map, and any unsolved cryptchars with available cleartext possibilities. # if ( &solve(\%ltrs,\%words,0) == 0 ) { print "\n\n\n"; my $solved = join(" ",map { "$_=".$ltrs{$_} } sort keys %ltrs); &printknown($puzz,\%ltrs); print "\nLetter Map:\n",wrap("\t","\t",$solved),"\n\n"; my %needed = map { $_ => 1 } grep(/[a-z]/,split(//,$puzz)); my %avail = map { $_ => 1 } ('a'..'z'); grep(delete $avail{$_},values %ltrs); my @unknown; foreach ( sort keys %needed ) { push(@unknown,$_) unless $ltrs{$_}; } if ( @unknown ) { print "Unsolvable letters:\n\t",join(" ",@unknown),"\n"; print "Available cleartest:\n\t",join(" ",keys %avail),"\n"; } } # Cleanup and exit ... :) untie %DICT; exit 0; # This is the main routine of the whole script. It actually tries to solve # the cipher via attack of the individual words. # sub solve { my($letters, $words, $level) = @_; my(%letters) = %$letters; # make our own copies my(%words) = %$words; # Find out if any words are completely solved already. If they are, # we don't need to solve them and we can remove them from our list. # foreach my $word ( keys %words ) { my $keep = 0; foreach ( split(//,$word) ) { # If a letter hasn't been found yet, keep this word. # if it's not a letter, skip it. if ( /[a-z]/ && ! exists $letters{$_} ) { $keep = 1; last; } } unless ( $keep ) { # remove the word unless we want to keep it print "$level: Removing $word -- already solved.\n"; delete $words{$word}; } } if ( keys %words == 0 ) { # if there are no words, we're done! print "$level: !! Done !!\n"; %ltrs = %letters; # set the final letter set return 0; } # try standard apostrophe (')-based letters # foreach my $word ( keys %words ) { next unless $word =~ /'/; my %reverse = reverse %letters; # clear => crypt if ( $word =~ /^(.)'(.)$/ ) { # try i'm my($i,$m) = ($1,$2); unless ( exists $reverse{'i'} && $reverse{'i'} ne $i || exists $reverse{'m'} && $reverse{'m'} ne $m || exists $letters{$i} && $letters{$i} ne 'i' || exists $letters{$m} && $letters{$m} ne 'm' ) { delete $words{$word}; print "$level: Trying $word = i'm\n"; $letters{$i} = "i"; $letters{$m} = "m"; &printknown($puzz,\%letters); my $return = &solve(\%letters,\%words,$level+1); return 0 if ( $return == 0 ); %letters = %$letters; # reset %words = %$words; # reset } } if ( $word =~ /(.)'(.)$/ ) { # try n't my($n,$t) = ($1,$2); unless ( exists $reverse{'n'} && $reverse{'n'} ne $n || exists $reverse{'t'} && $reverse{'t'} ne $t || exists $letters{$n} && $letters{$n} ne 'n' || exists $letters{$t} && $letters{$t} ne 't' ) { delete $words{$word}; print "$level: Trying $n=n, $t=t\n"; $word=~s/.'.$//; $letters{$n} = "n"; $letters{$t} = "t"; &printknown($puzz,\%letters); my $return = &solve(\%letters,\%words,$level+1); return 0 if ( $return == 0 ); %letters = %$letters; # reset %words = %$words; # reset } } if ( $word =~ /'(.)$/ ) { # try 's my $s = $1; unless ( exists $reverse{'s'} || exists $letters{$s} && $letters{$s} ne 's' ){ delete $words{$word}; print "$level: Trying $s=s\n"; $word=~s/'.$//; $letters{$s} = "s"; &printknown($puzz,\%letters); my $return = &solve(\%letters,\%words,$level+1); return 0 if ( $return == 0 ); %letters = %$letters; # reset %words = %$words; # reset } } if ( $word =~ /'(.)(.)$/ ) { # try 're and 've my($r,$e) = ($1,$2); unless ( exists $reverse{'r'} && $reverse{'r'} ne $r || exists $reverse{'e'} && $reverse{'e'} ne $e || exists $letters{$r} && $letters{$r} ne 'r' || exists $letters{$e} && $letters{$e} ne 'e' ) { delete $words{$word}; print "$level: Trying $r=r, $e=e\n"; $word=~s/'..$//; $letters{$r} = "r"; $letters{$e} = "e"; &printknown($puzz,\%letters); my $return = &solve(\%letters,\%words,$level+1); return 0 if ( $return == 0 ); %letters = %$letters; # reset %words = %$words; # reset } unless ( exists $reverse{'v'} && $reverse{'v'} ne $r || exists $reverse{'e'} && $reverse{'e'} ne $e || exists $letters{$r} && $letters{$r} ne 'v' || exists $letters{$e} && $letters{$e} ne 'e' ) { delete $words{$word}; print "$level: Trying $r=v, $e=e\n"; $word=~s/'..$//; $letters{$r} = "v"; $letters{$e} = "e"; &printknown($puzz,\%letters); my $return = &solve(\%letters,\%words,$level+1); return 0 if ( $return == 0 ); %letters = %$letters; # reset %words = %$words; # reset } } } # try one letter words (A and I) if ( my(@oltr) = grep(length == 1, keys %words) ) { die "Too many 1 letter words" if ( $#oltr > 1 ); # more than 2 choices? my %reverse = reverse %letters; if ( exists $reverse{a} && exists $reverse{i} ) { print "$level: 1 letter word and a+i already defined.\n"; return 1; } my(@t) = ('a','i','i','a'); for ( 0..1 ) { foreach my $a ( @oltr ) { my $use = shift @t; $use = shift @t if ( exists $reverse{$use} ); $letters{$a} = $use; print "$level: Using $a=",$letters{$a},"\n"; delete $words{$a}; } &printknown($puzz,\%letters); my $return = &solve(\%letters,\%words,$level+1); return 0 if ( $return == 0 ); %letters = %$letters; # reset %words = %$words; # reset } die "Neither A nor I for single letters will solve"; } # Try to figure out if any words only have one choice # this one has to order all possibilities and go in order up the # chain. my(%regexs,%nums,%totry); foreach ( keys %words ) { my $regex = &genregex($_,\%letters); next if ( $regexs{$regex}++ ); print "$level: $_: $regex "; my @returns = grep(/$regex/, keys %DICT); if ( length $_ == 2 && $#returns == -1 ) { # invalid print "- invalid (2 ltr, 0 valid)\n"; return 1; } if ( $#returns == -1 || $#returns > $opt_m ){ # unknown, keep going print "- unknown\n"; next; } print "- ",$#returns+1," matches\n"; $totry{$_} = \@returns; $nums{$_} = $#returns; } # go through each cipher word in order of possibles foreach my $word ( sort { my $w = $nums{$a} <=> $nums{$b}; if ( $w == 0 ) { length $b <=> length $a; } else {$w;}} keys %nums ) { delete $words{$word}; foreach my $a ( @{$totry{$word}} ) { my @t = split(//,$a); print "$level: Trying $a for $word"; my %weset; foreach my $b ( split(//,$word) ) { my $ltr = shift @t; # if we've already set this letter in this word next if ( $weset{$b}++ && $letters{$b} eq $ltr ); # defined already w/ another letter if ( $letters{$b} && $letters{$b} ne $ltr ) { print " -- Invalid!"; undef %letters; last; } $letters{$b} = $ltr; } print "\n"; if ( %letters ) { &printknown($puzz,\%letters); my $return = &solve(\%letters,\%words,$level+1); return 0 if ( $return == 0 ); } %letters = %$letters; # reset } %words = %$words; # reset } # If after trying everything we could, see if there's just some # letters we can't solve. If so, claim that we've finished and # return triumphant. # foreach my $word ( keys %words ) { my $keep = 0; # go through each crypto letter. if the word is completely # solved already (all letters exist or unsolved letters are # loners), remove it from the list. # foreach ( split(//,$word) ) { if ( /[a-z]/ && ! exists $letters{$_} && $count{$_} > 1 ) { $keep = 1; last; } } unless ( $keep ) { print "$level: Removing $word -- unsolvable letter.\n"; delete $words{$word}; } } if ( keys %words == 0 ) { print "$level: !! Done !!\n"; %ltrs = %letters; return 0; } return 1; # 0 means solved, 1 means keep going } # Display the current cleartext and the ciphertext. # sub printknown { my($puzzle,$letters) = @_; my $solved; foreach ( split(//,$puzzle) ) { if ( /[^a-z]/ ) { $solved .= $_; } elsif ( $letters->{$_} ) { $solved .= $letters->{$_}; } else { $solved .= "_"; } } print "\n\n",wrap(""," ",$solved),"\n","="x78,"\n", wrap(""," ",$puzzle),"\n\n"; } # Generate a regular expression to search for from a given ciphertext word. # sub genregex { my($word,$letters) = @_; my($known) = "."; my($regex) = ""; my(%count,%spot,$spot); # If we know any letters, make sure we don't search for words with # them in it... if ( values %$letters ) { $known = "[^".join("",sort values %$letters)."]"; $known =~ tr/a-z/a-z/s; } # Figure out if we'll need to backreference foreach my $ltr ( split(//,$word) ) { $count{$ltr}++; } # Actually run through and make the regex foreach my $ltr ( split(//,$word) ) { if ( $letters->{$ltr} ) { $regex .= $letters->{$ltr}; } elsif ( $count{$ltr} > 1 ) { # remember it if ( $spot{$ltr} ) { $regex .= "\\".$spot{$ltr}; } else { $spot{$ltr} = ++$spot; die "Too many backreferences!" if $spot>9; $regex .= "($known)"; } } else { $regex .= "$known"; } } return qr/^$regex$/; }