#!/usr/local/bin/perl # # An implementation of 'strfile' in Perl. Written for the Perl # Power Tools (PPT) project by Theo Van Dinter (felicity@kluge.net). (C) 2000 # # $Id: strfile,v 1.3 2000/02/20 16:42:53 felicity Exp felicity $ # use Getopt::Std; use strict; use vars qw/$opt_i $opt_o $opt_r $opt_s $opt_x $opt_c $fort $opt_h/; &help unless getopts('hiorsxc:'); &help if ( $opt_h ); my($file) = shift || &help; my($ofile) = shift || "$file.dat"; my($ver) = 1; my($longlen) = 0; my($shortlen) = undef; my($offset) = 0; my(%fortunes) = (); my($flags) = 0; $opt_c ||= "%"; # delimiter &help if ( @ARGV || length($opt_c) > 1 ); if ( $opt_r && $opt_o ) { # Random and ordered?!? warn "-o (ordered) and -r (random) are mutually exclusive.\n"; exit 1; } $flags |= 0x1 if ( $opt_r ); # STR_RANDOM 0x1 /* randomized pointers */ $flags |= 0x2 if ( $opt_o ); # STR_ORDERED 0x2 /* ordered pointers */ $flags |= 0x4 if ( $opt_x ); # STR_ROTATED 0x4 /* rot-13'd text */ open(IN,"<$file") || die "Can't open $file:$!"; # This was originally going to read in the file with a $/= ... delim ... # version, but it had issues with malformed input files, so I had to goto the # standard one line at a time approach. 8( # until ( eof(IN) ) { my($fort); # slurp in the fortune into $fort ... $fort.=$_ while ( defined($_=) && ($_ ne "$opt_c\n") ); my($len) = length($fort); # delim and newline are after it... # Skip empty fortunes. unless ( $len > 0 ) { warn "Empty fortune at offset $offset!\n" unless ( $opt_s ); $offset = tell; next; } # Figure out the shortest/longest fortunes. $shortlen = $len if ( !defined($shortlen) || $len < $shortlen ); $longlen = $len if ( $len > $longlen ); $fort =~ s/^[^0-9a-zA-Z]+//; # remove leading non-alphanums # Duplicate fortune?!? Blah! if ( $fortunes{$fort} ) { warn "Duplicate fortune at offset $offset!\n" unless ( $opt_s ); } else { $fortunes{$fort} = $offset; } $offset = tell; # update w/ new pointer $fort = ""; # blank out fortune holder } close(IN); # we're done with the input file... my($nfort) = scalar(keys %fortunes); # number of fortunes # No fortunes? Just quit, an empty dat file is useless. unless ( $nfort ) { warn "No fortunes found in file! Exiting!\n"; exit 1; } # Start the output phase! # open(OUT,">$ofile") || die "Can't open $ofile:$!"; binmode OUT; # we're writing binary data, right? print OUT pack ("NNNNNaxxx", # Print out the header, format: $ver, # Version (1) $nfort, # # of fortunes $longlen, # length of longest fortunes $shortlen, # length of shortest fortunes $flags, # byte field for flags $opt_c); # delimiter (padded to 32bits) # Print out the offsets in network format # if ( !$flags || $flags & 0x1 ) { # random or normal mode (sorted or not) print OUT map { pack("N", $_); } ($flags & 0x1) ? values %fortunes : sort { $a <=> $b } (values %fortunes); } elsif ( $flags & 0x2 ) { # alphabetical order, case sensitive or not... print OUT map { pack("N", $fortunes{$_}); } sort { ($opt_i) ? lc $a cmp lc $b : $a cmp $b } (keys %fortunes); } print OUT pack("N",$offset); # stick the final offset in there... (EOF ptr) close(OUT); # Print out a summary unless we're in silent mode ... # my($nstr) = ($nfort != 1)?"There were $nfort fortunes":"There was 1 fortune"; my($ostr) = ($flags&0x1)?", ordered randomly":($flags&0x2)? ", ordered alphabetically":""; print qq{"$ofile" created$ostr $nstr Longest string: $longlen bytes Shortest string: $shortlen bytes } unless ( $opt_s ); # Print the darned usage statement! # sub help { print " usage: $0 [-iorsxh] [-c char] source_file [data_file] -o\tOrder pointers in data_file Alphabetically. -i\tCause Alphabetic ordering (-o) to be case-insensitive. -r\tRandomize pointers in data_file. -s\tSilent mode, don't print any messages. -x\tSet the STR_ROTATED (rot13) bit in the data_file header. -c char\tChange the delimiter from the standard \"\%\". -h\tThis help message. "; exit 1; } # Pod Documentation ... =head1 NAME strfile - create a random access file for storing strings =head1 SYNOPSIS B [ B<-iorsxh> ] [ B<-c char> ] I [ I ] =head1 DESCRIPTION strfile reads a file containing groups of lines separated by a line containing a single delimiting character (by default the character is the percent `%' sign) and creates a data file which contains a header structure and a table of file offsets for each group of lines. This allows random access of the strings. data_file, if not specified on the command line, is named source_file.dat. =head1 OPTIONS AND ARGUMENTS =over 8 =item I<-h> Display the usage help message. =item I<-c char> Change the delimiting character from the percent sign to C. =item I<-i> Ignore case when ordering the strings. =item I<-o> Order the strings alphabetically. Any initial non-alphanumeric characters are ignored. This option causes the STR_ORDERED bit in the header str_flags field to be set. =item I<-r> Randomize access to the strings in data_file. Entries in the offset table will be randomly ordered. This option causes the STR_RANDOM bit in the header str_flags field to be set. (It does this by writing out the offsets as returned by Perl's hashing function value().) =item I<-s> Run silently, don't display any messages or the summary. =item I<-x> Set the STR_ROTATED bit in the header str_flags field. This means that the text in the source_file is rotated 13 positions (rot13 encoded). =back =head1 NOTES =head2 data_file Header The format of the header is: #define VERSION 1 unsigned long str_version; /* version number */ unsigned long str_numstr; /* # of strings in the file */ unsigned long str_longlen; /* length of longest string */ unsigned long str_shortlen; /* shortest string length */ #define STR_RANDOM 0x1 /* randomized pointers */ #define STR_ORDERED 0x2 /* ordered pointers */ #define STR_ROTATED 0x4 /* rot-13'd text */ unsigned long str_flags; /* bit field for flags */ char str_delim; /* delimiting character */ All fields are written in network byte order. (The header is C in Perl pack semantics). =head1 RETURN VALUES strfile returns 0 on success or 1 if an error occurred. =head1 HISTORY Perl version written for the Perl Power Tools project from the description of the strfile program in RedHat Linux 6.1. =head1 AUTHOR Theo Van Dinter (felicity@kluge.net) =head1 SEE ALSO fortune(1), unstr(1)