From f5d2ad169dfcb08f533b8010f8c5e3d4e1a108f8 Mon Sep 17 00:00:00 2001 From: Russ Allbery Date: Mon, 24 Mar 2014 19:16:12 -0700 Subject: [PATCH] Rename cdbmake-wordlist and add SQLite support cdbmake-wordlist has been renamed to krb5-strength-wordlist. Generating CDB dictionaries now requires the -c option; see the documentation for more information. A SQLite database of dictionary words can now be created instead, using the -s option. --- .gitignore | 1 + Makefile.am | 31 ++- NEWS | 5 + autogen | 8 +- tests/TESTS | 2 +- .../{cdbmake-wordlist-t => wordlist-cdb-t} | 19 +- ...dbmake-wordlist => krb5-strength-wordlist} | 189 ++++++++++++++---- 7 files changed, 194 insertions(+), 61 deletions(-) rename tests/tools/{cdbmake-wordlist-t => wordlist-cdb-t} (83%) rename tools/{cdbmake-wordlist => krb5-strength-wordlist} (56%) diff --git a/.gitignore b/.gitignore index 8013fb4..d723ddf 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ /tests/data/dictionary.* /tests/data/passwords/*.c /tests/data/wordlist.cdb +/tests/data/wordlist.sqlite /tests/plugin/heimdal-t /tests/plugin/mit-t /tests/portable/asprintf-t diff --git a/Makefile.am b/Makefile.am index 6d38876..7fce813 100644 --- a/Makefile.am +++ b/Makefile.am @@ -12,12 +12,13 @@ EXTRA_DIST = .gitignore LICENSE autogen cracklib/HISTORY cracklib/LICENCE \ tests/TESTS tests/data/krb5.conf tests/data/make-krb5-conf \ tests/data/passwords tests/data/perl.conf tests/data/perlcriticrc \ tests/data/perltidyrc tests/data/valgrind.supp tests/data/wordlist \ - tests/data/wordlist.cdb tests/docs/pod-spelling-t tests/docs/pod-t \ - tests/perl/critic-t tests/perl/minimum-version-t \ - tests/perl/strict-t tests/tap/libtap.sh tests/tap/perl/Test/RRA.pm \ + tests/data/wordlist.cdb tests/data/wordlist.sqlite \ + tests/docs/pod-spelling-t tests/docs/pod-t tests/perl/critic-t \ + tests/perl/minimum-version-t tests/perl/strict-t \ + tests/tap/libtap.sh tests/tap/perl/Test/RRA.pm \ tests/tap/perl/Test/RRA/Config.pm \ - tests/tap/perl/Test/RRA/Automake.pm tests/tools/cdbmake-wordlist-t \ - tests/tools/heimdal-history-t tests/tools/heimdal-strength-t \ + tests/tap/perl/Test/RRA/Automake.pm tests/tools/heimdal-history-t \ + tests/tools/heimdal-strength-t tests/tools/wordlist-cdb-t \ tests/util/xmalloc-t tools/heimdal-strength.pod # Do this globally. Everything needs to find the Kerberos headers and @@ -82,11 +83,11 @@ tools_heimdal_strength_LDADD += util/libutil.a portable/libportable.la \ $(KRB5_LIBS) $(CDB_LIBS) # Other tools. -dist_bin_SCRIPTS = tools/cdbmake-wordlist tools/heimdal-history +dist_bin_SCRIPTS = tools/heimdal-history tools/krb5-strength-wordlist # Man pages for all tools. dist_man_MANS = tools/heimdal-history.1 tools/heimdal-strength.1 \ - tools/cdbmake-wordlist.1 + tools/krb5-strength-wordlist.1 # Handle the standard stuff that make maintainer-clean should probably remove # but doesn't. This breaks the GNU coding standard, but in this area the GNU @@ -98,7 +99,15 @@ MAINTAINERCLEANFILES = Makefile.in aclocal.m4 build-aux/compile \ build-aux/config.guess build-aux/config.sub build-aux/depcomp \ build-aux/install-sh build-aux/ltmain.sh build-aux/missing \ config.h.in config.h.in~ configure m4/libtool.m4 m4/ltoptions.m4 \ - m4/ltsugar.m4 m4/ltversion.m4 m4/lt~obsolete.m4 + m4/ltsugar.m4 m4/ltversion.m4 m4/lt~obsolete.m4 \ + tests/data/wordlist.cdb tests/data/wordlist.sqlite \ + tools/heimdal-history.1 tools/heimdal-strength.1 \ + tools/krb5-strength-wordlist.1 + +# Also remove the generated *.c files from our JSON test data on +# maintainer-clean. +maintainer-clean-local: + rm -f tests/data/passwords/*.c # A set of flags for warnings. Add -O because gcc won't find some warnings # without optimization turned on. Desirable warnings that can't be turned @@ -183,8 +192,8 @@ check-local: $(check_PROGRAMS) tests/data/dictionary.pwd # Used by maintainers to run the main test suite under valgrind. Suppress # the xmalloc and pod-spelling tests because the former won't work properly # under valgrind (due to increased memory usage) and the latter is pointless -# to run under valgrind. Don't try to trace the test for cdbmake-wordlist, -# since it's pure Perl. +# to run under valgrind. Don't try to trace the test for +# krb5-strength-wordlist, since it's pure Perl. check-valgrind: $(check_PROGRAMS) tests/data/dictionary.pwd rm -rf $(abs_top_builddir)/tmp-valgrind mkdir $(abs_top_builddir)/tmp-valgrind @@ -192,5 +201,5 @@ check-valgrind: $(check_PROGRAMS) tests/data/dictionary.pwd --show-reachable=yes --trace-children=yes \ --log-file=$(abs_top_builddir)/tmp-valgrind/log.%p \ --suppressions=tests/data/valgrind.supp \ - --trace-children-skip="/bin/sh,*/cat,*/diff,*/expr,*/grep,*/mkdir,*/rm,*/rmdir,*/sed,*/sleep,*/true,*/wc,*/docs/*-t,*/perl/*-t,*/data/make-krb5-conf,*/tools/cdbmake-wordlist-t" \ + --trace-children-skip="/bin/sh,*/cat,*/diff,*/expr,*/grep,*/mkdir,*/rm,*/rmdir,*/sed,*/sleep,*/true,*/wc,*/docs/*-t,*/perl/*-t,*/data/make-krb5-conf,*/tools/wordlist-*-t" \ tests/runtests -l '$(abs_top_srcdir)/tests/TESTS' diff --git a/NEWS b/NEWS index 0457d3e..96758be 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,11 @@ krb5-strength 3.0 (unreleased) + cdbmake-wordlist has been renamed to krb5-strength-wordlist. + Generating CDB dictionaries now requires the -c option; see the + documentation for more information. A SQLite database of dictionary + words can now be created instead, using the -s option. + A password history implementation for Heimdal is now included. This is a separate Perl program, heimdal-history, that stacks with the external program implementation of strength checking. It is not diff --git a/autogen b/autogen index f53f66c..201d996 100755 --- a/autogen +++ b/autogen @@ -9,12 +9,12 @@ autoreconf -i --force # Generate manual pages. version=`grep '^krb5-strength' NEWS | head -1 | cut -d' ' -f2` -pod2man --release="$version" --center='krb5-strength' \ - tools/cdbmake-wordlist > tools/cdbmake-wordlist.1 pod2man --release="$version" --center='krb5-strength' \ tools/heimdal-history > tools/heimdal-history.1 pod2man --release="$version" --center='krb5-strength' \ tools/heimdal-strength.pod > tools/heimdal-strength.1 +pod2man --release="$version" --center='krb5-strength' \ + tools/krb5-strength-wordlist > tools/krb5-strength-wordlist.1 # Generate the C version of our password test data. for data in tests/data/passwords/*.json ; do @@ -23,4 +23,6 @@ for data in tests/data/passwords/*.json ; do done # Generate the CDB database from the test wordlist for plugin tests. -tools/cdbmake-wordlist tests/data/wordlist +rm -f tests/data/wordlist.cdb tests/data/wordlist.sqlite +tools/krb5-strength-wordlist -c tests/data/wordlist.cdb tests/data/wordlist +tools/krb5-strength-wordlist -s tests/data/wordlist.sqlite tests/data/wordlist diff --git a/tests/TESTS b/tests/TESTS index 2ba810c..72157f3 100644 --- a/tests/TESTS +++ b/tests/TESTS @@ -9,9 +9,9 @@ portable/asprintf portable/mkstemp portable/snprintf portable/strndup -tools/cdbmake-wordlist tools/heimdal-history tools/heimdal-strength +tools/wordlist-cdb util/messages util/messages-krb5 util/xmalloc diff --git a/tests/tools/cdbmake-wordlist-t b/tests/tools/wordlist-cdb-t similarity index 83% rename from tests/tools/cdbmake-wordlist-t rename to tests/tools/wordlist-cdb-t index 08d8d91..034a83f 100755 --- a/tests/tools/cdbmake-wordlist-t +++ b/tests/tools/wordlist-cdb-t @@ -1,9 +1,9 @@ #!/bin/sh # -# Test suite for the cdbmake-wordlist utility. +# Test suite for the CDB handling in the krb5-strength-wordlist utility. # # Written by Russ Allbery -# Copyright 2013 +# Copyright 2013, 2014 # The Board of Trustees of the Leland Stanford Junior University # # See LICENSE for licensing terms. @@ -32,8 +32,9 @@ chmod 644 "$tmpdir/wordlist" echo 'عربى' >> "$tmpdir/wordlist" # Test generation of the basic cdb file. -cdbmake="$SOURCE/../tools/cdbmake-wordlist" -ok_program 'Database generation' 0 '' "$cdbmake" "$tmpdir/wordlist" +makelist="$SOURCE/../tools/krb5-strength-wordlist" +ok_program 'Database generation' 0 '' \ + "$makelist" -c "$tmpdir/wordlist.cdb" "$tmpdir/wordlist" # Check the contents. ok_program 'Database contains password' 0 '1' \ @@ -47,7 +48,7 @@ ok_program 'Database contains non-ASCII password' 0 '1' \ # Regenerate the database, filtering out short passwords. ok_program 'Database generation with no short passwords' 0 '' \ - "$cdbmake" -l 8 "$tmpdir/wordlist" + "$makelist" -c "$tmpdir/wordlist.cdb" -l 8 "$tmpdir/wordlist" ok_program 'Database still contains password' 0 '1' \ cdb -q "$tmpdir/wordlist.cdb" password ok_program 'Database does not contain one' 100 '' \ @@ -55,7 +56,7 @@ ok_program 'Database does not contain one' 100 '' \ # Regenerate the database, filtering out non-ASCII words. ok_program 'Database generation with no non-ASCII' 0 '' \ - "$cdbmake" -a "$tmpdir/wordlist" + "$makelist" -c "$tmpdir/wordlist.cdb" -a "$tmpdir/wordlist" ok_program 'Database still contains password' 0 '1' \ cdb -q "$tmpdir/wordlist.cdb" password ok_program 'Database does not contain non-ASCII password' 100 '' \ @@ -63,7 +64,7 @@ ok_program 'Database does not contain non-ASCII password' 100 '' \ # Regenerate the database, filtering out long passwords. ok_program 'Database generation with no long passwords' 0 '' \ - "$cdbmake" -L 10 "$tmpdir/wordlist" + "$makelist" -c "$tmpdir/wordlist.cdb" -L 10 "$tmpdir/wordlist" ok_program 'Database still contains bitterbane' 0 '1' \ cdb -q "$tmpdir/wordlist.cdb" bitterbane ok_program 'Database does not contain happenstance' 100 '' \ @@ -71,7 +72,7 @@ ok_program 'Database does not contain happenstance' 100 '' \ # Regenerate the database, filtering out words starting with b or ending in d. ok_program 'Database generation with no b passwords' 0 '' \ - "$cdbmake" -x '\Ab' -x '.*d' "$tmpdir/wordlist" + "$makelist" -c "$tmpdir/wordlist.cdb" -x '\Ab' -x '.*d' "$tmpdir/wordlist" ok_program 'Database does not contain bitterbane' 100 '' \ cdb -q "$tmpdir/wordlist.cdb" bitterbane ok_program 'Database still contains happenstance' 0 '1' \ @@ -81,7 +82,7 @@ ok_program 'Database does not contain password' 100 '' \ # Try filtering the wordlist into a new wordlist. ok_program 'Wordlist filtering' 0 '' \ - "$cdbmake" -a -x '.*d' -l 8 -o "$tmpdir/wordlist.new" "$tmpdir/wordlist" + "$makelist" -a -x '.*d' -l 8 -o "$tmpdir/wordlist.new" "$tmpdir/wordlist" ( echo 'bitterbane'; echo 'happenstance' ) > "$tmpdir/wordlist.expected" ok_program 'Filtered wordlist is correct' 0 '' \ cmp "$tmpdir/wordlist.expected" "$tmpdir/wordlist.new" diff --git a/tools/cdbmake-wordlist b/tools/krb5-strength-wordlist similarity index 56% rename from tools/cdbmake-wordlist rename to tools/krb5-strength-wordlist index 74621a7..f92ec0a 100755 --- a/tools/cdbmake-wordlist +++ b/tools/krb5-strength-wordlist @@ -21,6 +21,19 @@ use Getopt::Long qw(GetOptions); # the user's PATH is searched for cdb. my $CDB = 'cdb'; +# The SQL used to create the SQLite database. +my $SQLITE_CREATE = q{ + CREATE TABLE passwords ( + password TEXT UNIQUE NOT NULL, + drowssap TEXT UNIQUE NOT NULL + ) +}; + +# The SQL used to insert passwords into the database. +my $SQLITE_INSERT = q{ + INSERT OR IGNORE INTO passwords (password, drowssap) values (?, ?) +}; + # print with error checking and an explicit file handle. # # $fh - Output file handle @@ -38,16 +51,21 @@ sub print_fh { # cdb to turn that into a database. # # $in_fh - Input file handle for the source wordlist -# $input - Name of the input file, from which the CDB file name is derived +# $output - Name of the output CDB file # $filter - Reference to sub that returns true to keep a word, false otherwise # # Returns: undef # Throws: Text exception on output failure or pre-existing temporary file sub write_cdb { - my ($in_fh, $input, $filter) = @_; + my ($in_fh, $output, $filter) = @_; + + # Check that the output CDB file doesn't exist. + if (-f $output) { + die "$0: output file $output already exists\n"; + } # Create a temporary file to write the CDB input into. - my $tmp = $input . '.data'; + my $tmp = $output . '.data'; if (-f $tmp) { die "$0: temporary output file $tmp already exists\n"; } @@ -68,7 +86,7 @@ sub write_cdb { close($tmp_fh) or die "$0: cannot write to temporary file $tmp: $!\n"; # Run cdb to turn the result into a CDB database. Ignore duplicate keys. - system($CDB, '-c', '-u', "$input.cdb", $tmp) == 0 + system($CDB, '-c', '-u', $output, $tmp) == 0 or die "$0: cdb -c failed\n"; # Remove the temporary file and return. @@ -76,6 +94,53 @@ sub write_cdb { return; } +# Filter the given input file and write it to a newly-created SQLite database. +# Requires the DBI and DBD::SQLite modules be installed. The database will +# contain one table, passwords, with two columns, password and drowssap, which +# store the word and the word reversed for each word that passes the filter. +# +# $in_fh - Input file handle for the source wordlist +# $output - Name of the output SQLite database +# $filter - Reference to sub that returns true to keep a word, false otherwise +# +# Returns: undef +# Throws: Text exception on output failure, pre-existing output file, or +# missing Perl modules +sub write_sqlite { + my ($in_fh, $output, $filter) = @_; + + # Check that the output SQLite file doesn't exist. + if (-f $output) { + die "$0: output file $output already exists\n"; + } + + # Load the required modules. + require DBI; + require DBD::SQLite; + + # Open and create the database. + my $options = { PrintError => 0, RaiseError => 1, AutoCommit => 0 }; + my $dbh = DBI->connect("dbi:SQLite:dbname=$output", q{}, q{}, $options); + $dbh->do($SQLITE_CREATE); + + # Prepare the insert statement for each word. + my $sth = $dbh->prepare($SQLITE_INSERT); + + # Walk through the input word list and add each word that passes the + # filter to the database, both as-is and reversed. + while (defined(my $word = <$in_fh>)) { + chomp($word); + next if !$filter->($word); + my $reversed = reverse($word); + $sth->execute($word, $reversed); + } + + # Commit and close the database. + $dbh->commit; + $dbh->disconnect; + return; +} + # Filter the given input file and write the results to a new wordlist. # # $in_fh - Input file handle for the source wordlist @@ -110,14 +175,17 @@ my $fullpath = $0; local $0 = basename($0); # Parse the argument list. -my ($ascii, @exclude, $max_length, $min_length, $manual, $output); +my ($ascii, $cdb, @exclude, $max_length, $min_length, $manual, $output, + $sqlite); Getopt::Long::config('bundling', 'no_ignore_case'); GetOptions( 'ascii|a' => \$ascii, + 'cdb|c=s' => \$cdb, 'max-length|L=i' => \$max_length, 'min-length|l=i' => \$min_length, 'manual|man|m' => \$manual, 'output|o=s' => \$output, + 'sqlite|s=s' => \$sqlite, 'exclude|x=s' => \@exclude, ); if ($manual) { @@ -127,6 +195,11 @@ if ($manual) { if (@ARGV != 1) { die "Usage: cdbmake-wordlist \n"; } +if (defined($cdb) && (defined($output) || defined($sqlite))) { + die "$0: -c cannot be used with -o or -s\n"; +} elsif (defined($output) && defined($sqlite)) { + die "$0: -o cannot be used with -c or -s\n"; +} my $input = $ARGV[0]; # Build a filter from our command-line parameters. This is an anonymous sub @@ -159,8 +232,10 @@ open(my $in_fh, '<', $input) or die "$0: cannot open input file $input: $!\n"; if (defined($output)) { write_wordlist($in_fh, $output, $filter); -} else { - write_cdb($in_fh, $input, $filter); +} elsif (defined($cdb)) { + write_cdb($in_fh, $cdb, $filter); +} elsif (defined($sqlite)) { + write_sqlite($in_fh, $sqlite, $filter); } close($in_fh) or die "$0: cannot read all of input file $input: $!\n"; @@ -169,37 +244,53 @@ exit(0); __END__ =for stopwords -cdbmake-wordlist cdb whitespace wordlist lookups lookup sublicense -MERCHANTABILITY NONINFRINGEMENT krb5-strength --ascii Allbery regexes -output-wordlist +krb5-strength-wordlist krb5-strength cdb whitespace lookups lookup +sublicense MERCHANTABILITY NONINFRINGEMENT krb5-strength --ascii Allbery +regexes output-wordlist =head1 NAME -cdbmake-wordlist - Create a cdb database from a wordlist +krb5-strength-wordlist - Create a krb5-strength database from a wordlist =head1 SYNOPSIS -B [B<-am>] [B<-l> I] [B<-L> I] - [B<-o> I] [B<-x> I ...] I +B [B<-am>] [B<-c> I] [B<-l> I] + [B<-L> I] [B<-o> I] [B<-s> I] + [B<-x> I ...] I =head1 DESCRIPTION -cdb is a format invented by Dan Bernstein for fast, constant databases. +B converts a word list (a file containing one word +per line) into a database that can be used by the krb5-strength plugin or +B command for checking passwords. Two database formats +are supported, with different features. CDB is more space-efficient and +possibly faster, but supports checking passwords only against exact +matches or simple transformations (removing small numbers of leading and +trailing characters). SQLite creates a much larger database, but supports +rejecting any password within edit distance one of a word in the word +list. + +CDB is a format invented by Dan Bernstein for fast, constant databases. The database is fixed during creation and cannot be changed without -rebuilding it, and is optimized for very fast access. This program takes -as input a wordlist file (a set of words, possibly including whitespace, -separated by newlines) and turns it into a cdb file with the words as keys -and the constant C<1> as a value. The resulting database is suitable for -fast existence lookups in the wordlist, such as for password dictionary -checks. - -B takes one argument, the input wordlist file. The -output cdb database will have the same name as I but with -C<.cdb> appended. The input wordlist file does not have to be sorted. - -B can, instead of building a CDB file, filter a wordlist -against the criteria given on the command line and generate a new -wordlist. See the B<-o> option for more details. +rebuilding it, and is optimized for very fast access. For cdb, the +database generated by this program will have keys for each word in the +word list and the constant C<1> as the value. + +SQLite stores the word list in a single SQL table containing both each +word and each word reversed. This allows the krb5-strength plugin or +B command to reject passwords within edit distance one +of any word in the word list. (Edit distance one means that the word list +entry can be formed by changing a single character of the password, either +by adding one character, removing one character, or changing one character +to a different character.) However, the SQLite database will be much +larger and lookups may be somewhat slower. + +B takes one argument, the input wordlist file. +Use the B<-c> option to specify an output CDB file, B<-s> to specify an +output SQLite file, or B<-o> to just filter the word list against the +criteria given on the command line and generate a new word list. +The input wordlist file does not have to be sorted. See the individual +option descriptions for more information. =head1 OPTIONS @@ -211,6 +302,17 @@ Filter all words that contain non-ASCII characters or control characters from the resulting cdb file, leaving only words that consist solely of ASCII non-control characters. +=item B<-c> I, B<--cdb>=I + +Create a CDB database in I. A temporary file named after +I with C<.data> appended will be created in the same directory +and used to stage the database contents. The actual CDB file will be +built using the B command, which must be on the user's path. If +either file already exists, B will abort with an +error. + +This option cannot be used with B<-o> or B<-s>. + =item B<-L> I, B<--max-length>=I Filter all words of length greater than I from the resulting cdb @@ -241,14 +343,27 @@ C). =item B<-o> I, B<--output>=I -Rather than creating a CDB database, apply the filter rules given by the -other command-line arguments and generate a new wordlist in the file name -given by the I option. This can be used to reduce the size of -a raw wordlist file (such as one taken from Internet sources) by removing -the words that will be filtered out of the CDB file anyway, thus reducing -the size of the source required to regenerate the CDB database. +Rather than creating a database, apply the filter rules given by the other +command-line arguments and generate a new wordlist in the file name given +by the I option. This can be used to reduce the size of a raw +wordlist file (such as one taken from Internet sources) by removing the +words that will be filtered out of the dictionary anyway, thus reducing +the size of the source required to regenerate the dictionary. + +This option cannot be used with B<-c> or B<-s>. + +=item B<-s> I, B<--sqlite>=I + +Create a SQLite database in I. If this file already +exists, B will abort with an error. The resulting +SQLite database will have one table, C, with two columns, +C and C. The first holds a word from the word list, +and the second holds the same word reversed. + +Using this option requires the DBI and DBD::SQLite Perl modules be +installed. -If this option is given, no CDB database will be created. +This option cannot be used with B<-c> or B<-o>. =item B<-x> I, B<--exclude>=I @@ -265,7 +380,7 @@ Russ Allbery =head1 COPYRIGHT AND LICENSE -Copyright 2013 The Board of Trustees of the Leland Stanford Junior +Copyright 2013, 2014 The Board of Trustees of the Leland Stanford Junior University Permission is hereby granted, free of charge, to any person obtaining a @@ -288,7 +403,7 @@ DEALINGS IN THE SOFTWARE. =head1 SEE ALSO -cdb(1) +cdb(1), L, L The cdb file format is defined at L. -- 2.39.2