download BadWords.pm
Language: Perl
Copyright: (c) 2000 Dirk Koopman
LOC: 84
Project Info
DXSpider DX Cluster System(dxspider)
Server: SourceForge
Type: cvs
...pider\dxspider\spider\perl\
   AGWConnect.pm
   AGWMsg.pm
   AnnTalk.pm
   BadWords.pm
   Bands.pm
   BBS.pm
   Buck.pm
   call.pl
   callbot.pl
   Chain.pm
   cluster.pl
   CmdAlias.pm
   connect.pl
   console.pl
   Console.pm
   convert_users.pl
   convkeps.pl
   create_prefix.pl
   create_qsl.pl
   create_sysop.pl
   create_usdb.pl
   DB0SDX.pm
   DXBearing.pm
   dxcc.pl
   DXChannel.pm
   DXCommandmode.pm
   DXConnect.pm
   DXCron.pm
   DXDb.pm
   DXDebug.pm
   DXDupe.pm
   DXHash.pm
   DXLog.pm
   DXLogPrint.pm
   DXM.pm
   DXMsg.pm
   dxoldtonew.pl
   DXProt.pm
   DXProtout.pm
   DXProtVars.pm
   DXSql.pm
   DXUser.pm
   DXUtil.pm
   DXVars.pm.issue
   DXXml.pm
   Editable.pm
   export_opernam.pl
   ExtMsg.pm
   Filter.pm
   ForkingServer.pm
   gen_usdb_data.pl
   Geomag.pm
   hlptohtml.pl
   importkeps.pl
   importwwv.pl
   Internet.pm
   IntMsg.pm
   Investigate.pm
   IsoTime.pm
   Julian.pm
   K4UTE.pm
   Keps.pm
   Listeners.pm
   Local.pm
   lock_nodes.pl
   log2csv.pl
   LRU.pm
   Minimuf.pm
   MiscLog.pm
   Mrtg.pm
   Msg.pm
   PC.pm
   Prefix.pm
   process_ursa.pl
   Prot.pm
   proto.html
   QRZ.pm
   QSL.pm
   RingBuf.pm
   Route.pm
   RouteDB.pm
   Script.pm
   Spot.pm
   spot2csv.pl
   Sun.pm
   talias.pl
   Thingy.pm
   Timer.pm
   UDPMsg.pm
   update_sysop.pl
   USDB.pm
   VE7CC.pm
   Verify.pm
   WCY.pm
   winclient.pl
   y2k.sh

#
# Search for bad words in strings
#
# Copyright (c) 2000 Dirk Koopman
#
# $Id: BadWords.pm,v 1.16 2005/12/21 18:39:25 minima Exp $
#

package BadWords;

use strict;

use DXUtil;
use DXVars;
use DXHash;
use DXDebug;

use IO::File;

use vars qw($badword $regexcode);

my $oldfn = "$main::data/badwords";
my $regex = "$main::data/badw_regex";
my $bwfn = "$main::data/badword";

# copy issue ones across
filecopy("$regex.gb.issue", $regex) unless -e $regex;
filecopy("$bwfn.issue", $bwfn) unless -e $bwfn;

$badword = new DXHash "badword";

use vars qw($VERSION $BRANCH);
$VERSION = sprintf( "%d.%03d", q$Revision: 1.16 $ =~ /(\d+)\.(\d+)/ );
$BRANCH = sprintf( "%d.%03d", q$Revision: 1.16 $ =~ /\d+\.\d+\.(\d+)\.(\d+)/  || (0,0));
$main::build += $VERSION;
$main::branch += $BRANCH;

# load the badwords file
sub load
{
	my @out;
	my $fh = new IO::File $oldfn;
	
	if ($fh) {
		while (<$fh>) {
			chomp;
			next if /^\s*\#/;
			my @list = split " ";
			for (@list) {
				$badword->add($_);
			}
		}
		$fh->close;
		$badword->put;
		unlink $oldfn;
	}
	push @out, create_regex(); 
	return @out;
}

sub create_regex
{
	my @out;
	my $fh = new IO::File $regex;
	
	if ($fh) {
		my $s = "sub { my \$str = shift; my \@out; \n";
		while (<$fh>) {
			chomp;
			next if /^\s*\#/;
			my @list = split " ";
			for (@list) {
				# create a closure for each word so that it matches stuff with spaces/punctuation
				# and repeated characters in it
				my $w = uc $_;
				my @l = split //, $w;
				my $e = join '+[\s\W]*', @l;
				$s .= "push \@out, \$1 if \$str =~ /\\b($e)/;\n";
			}
		}
		$s .= "return \@out;\n}";
		$regexcode = eval $s;
		dbg($s) if isdbg('badword');
		if ($@) {
			@out = ($@);
			dbg($@);
			return @out;
		}
		$fh->close;
	} else {
		my $l = "can't open $regex $!";
		dbg($l);
		push @out, $l;
	}
	
	return @out;
}

# check the text against the badwords list
sub check
{
	my $s = uc shift;
	my @out;

	push @out, &$regexcode($s) if $regexcode;
	
	return @out if @out;
	
	for (split(/\b/, $s)) {
		push @out, $_ if $badword->in($_);
	}

	return @out;
}

1;

About Koders | Resources | Downloads | Support | Black Duck | Submit Project | Terms of Service | DMCA | Privacy Policy | Site Map| Contact Us