+# Filter the given input file and write it to a CDB data file, and then use
+# cdb to turn that into a database.
+#
+# $in_fh - Input file handle for the source wordlist
+# $input - Name of the input file, from which the CDB file name is derived
+# $filter - Reference to sub that returns true to keep a word, false otherwise
+#
+# Returns: undef
+# Throws: Text exception on output failure or pre-existing temporary file
+sub write_cdb {
+ my ($in_fh, $input, $filter) = @_;
+
+ # Create a temporary file to write the CDB input into.
+ my $tmp = $input . '.data';
+ if (-f $tmp) {
+ die "$0: temporary output file $tmp already exists\n";
+ }
+ open(my $tmp_fh, '>', $tmp)
+ or die "$0: cannot create output file $tmp: $!\n";
+
+ # Walk through the input word list and write each word that passes the
+ # filter to the output file handle as CDB data.
+ while (defined(my $word = <$in_fh>)) {
+ chomp($word);
+ next if !$filter->($word);
+ my $length = length($word);
+ print_fh($tmp_fh, "+$length,1:$word->1\n");
+ }
+
+ # Add a trailing newline, required by the CDB data format, and close.
+ print_fh($tmp_fh, "\n");
+ close($tmp_fh) or die "$0: cannot write to temporary file $tmp: $!\n";
+
+ # Run cdb to turn the result into a CDB database. Ignore duplicate keys.
+ system($CDB, '-c', '-u', "$input.cdb", $tmp) == 0
+ or die "$0: cdb -c failed\n";
+
+ # Remove the temporary file and return.
+ unlink($tmp) or die "$0: cannot remove temporary file $tmp: $!\n";
+ return;
+}
+
+# Filter the given input file and write the results to a new wordlist.
+#
+# $in_fh - Input file handle for the source wordlist
+# $output - Output file name to which to write the resulting wordlist
+# $filter - Reference to sub that returns true to keep a word, false otherwise
+#
+# Returns: undef
+# Throws: Text exception on output failure
+sub write_wordlist {
+ my ($in_fh, $output, $filter) = @_;
+ open(my $out_fh, '>', $output)
+ or die "$0: cannot create output file $output: $!\n";
+
+ # Walk through the input word list and write each word that passes the
+ # filter to the output file handle.
+ while (defined(my $word = <$in_fh>)) {
+ chomp($word);
+ next if !$filter->($word);
+ print_fh($out_fh, "$word\n");
+ }
+
+ # All done.
+ close($out_fh) or die "$0: cannot write to output file $output: $!\n";
+ return;
+}
+