From c8fdacf41ab98ffac5738fd081671bff2fcb6d40 Mon Sep 17 00:00:00 2001 From: Mike Pirnat Date: Wed, 8 Aug 2012 19:39:20 -0400 Subject: Added SpamBayes to requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..24c2fc8 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +spambayes -- cgit v1.2.3 From a530575b4995a28e91f28e08ea211e172beb2607 Mon Sep 17 00:00:00 2001 From: Mike Pirnat Date: Wed, 8 Aug 2012 23:02:15 -0400 Subject: Adds example configuration for spam-fighting --- email_gateway.cfg | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/email_gateway.cfg b/email_gateway.cfg index 31a3691..05ad723 100644 --- a/email_gateway.cfg +++ b/email_gateway.cfg @@ -8,6 +8,9 @@ ; message = First line of the message ; redirect = /contact-thanks.html ; site = http://(?:www\.)?example.com +; spam.check = True +; spam.pickle_file = /etc/email_gateway_spam.pkl +; spam.min_spam_prob = 0.90 ; ====================== ; Required Configuration @@ -36,3 +39,7 @@ ; mailer.subject -- Subject of message ; mailer.message -- Message + +; spam.check -- Check message against spambayes? True/False +; spam.pickle_file -- Filename of pickle file +; spam.min_spam_prob -- Minimum probability to consider message to be spam -- cgit v1.2.3 From 775be2a87ad8a744548d1a1d0ae87a7e665ca10e Mon Sep 17 00:00:00 2001 From: Mike Pirnat Date: Wed, 8 Aug 2012 23:03:36 -0400 Subject: Adds rudimentary spam check --- email_gateway.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/email_gateway.py b/email_gateway.py index e73e3e2..fe58fad 100755 --- a/email_gateway.py +++ b/email_gateway.py @@ -6,7 +6,9 @@ import re import urlparse from cStringIO import StringIO from email.mime.text import MIMEText -from ConfigParser import SafeConfigParser as ConfigParser, NoSectionError +from ConfigParser import SafeConfigParser as ConfigParser, \ + NoSectionError, NoOptionError +from spambayes.storage import PickledClassifier config = ConfigParser() @@ -26,6 +28,18 @@ def send_message(text, subject, to, from_email): p.close() +def looks_like_spam(message, config, section): + pickle_filename = config.get(section, 'spam.pickle_file') + min_spam_prob = config.getfloat(section, 'spam.min_spam_prob') + + bayes = PickledClassifier(pickle_filename) + + if bayes.chi2_spamprob(message) >= min_spam_prob: + return True + + return False + + def email_app(environ, start_response): ignored_fields = [] useful_fields = [] @@ -60,6 +74,14 @@ def email_app(environ, start_response): start_response('403 Forbidden', [('Content-Type', 'text/plain')]) return "Invalid send!" + try: + if config.getboolean(form_key, 'spam.check') \ + and looks_like_spam(context["message"], config, form_key): + start_response('403 Forbidden', [('Content-Type', 'text/plain')]) + return "I don't like SPAM!" + except NoOptionError: + pass + useful_fields = ["{0}: {1}".format(*f) for f in useful_fields if f[0] not in ignored_fields] -- cgit v1.2.3 From 82bdf837d993fdf8db5f6e8fa6169be964adb687 Mon Sep 17 00:00:00 2001 From: Mike Pirnat Date: Wed, 8 Aug 2012 23:10:18 -0400 Subject: Rudimentary scripts for training spam and ham from stdin --- train_ham.py | 29 +++++++++++++++++++++++++++++ train_spam.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 train_ham.py create mode 100644 train_spam.py diff --git a/train_ham.py b/train_ham.py new file mode 100644 index 0000000..4989c7c --- /dev/null +++ b/train_ham.py @@ -0,0 +1,29 @@ +""" +A rudimentary way to train additional ham into our pickle file. + +Example usage: + +$ python train_spam.py /path/to/spam.pkl +blah blah blah^D +""" +import sys +from ConfigParser import SafeConfigParser as ConfigParser, \ + NoSectionError, NoOptionError +from spambayes.storage import PickledClassifier + + +config = ConfigParser() +with open("/etc/email_gateway.cfg") as fp: + config.readfp(fp) + + +def main(): + pickle_filename = sys.argv[-1] + bayes = PickledClassifier(pickle_filename) + message = sys.stdin.readlines() + bayes.learn(message, False) + bayes.store() + + +if __name__ == '__main__': + main() diff --git a/train_spam.py b/train_spam.py new file mode 100644 index 0000000..c1f2065 --- /dev/null +++ b/train_spam.py @@ -0,0 +1,29 @@ +""" +A rudimentary way to train additional spam into our pickle file. + +Example usage: + +$ python train_spam.py /path/to/spam.pkl +blah blah blah^D +""" +import sys +from ConfigParser import SafeConfigParser as ConfigParser, \ + NoSectionError, NoOptionError +from spambayes.storage import PickledClassifier + + +config = ConfigParser() +with open("/etc/email_gateway.cfg") as fp: + config.readfp(fp) + + +def main(): + pickle_filename = sys.argv[-1] + bayes = PickledClassifier(pickle_filename) + message = sys.stdin.readlines() + bayes.learn(message, True) + bayes.store() + + +if __name__ == '__main__': + main() -- cgit v1.2.3 From 97a908e50865d9385dff31cfbfb237fb7097f30a Mon Sep 17 00:00:00 2001 From: Mike Pirnat Date: Wed, 8 Aug 2012 23:18:06 -0400 Subject: Adds example pickle file with spambayes data based on training against real-world FPIP contact form input --- example_spam.pkl | Bin 0 -> 1542 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 example_spam.pkl diff --git a/example_spam.pkl b/example_spam.pkl new file mode 100644 index 0000000..a8ccdca Binary files /dev/null and b/example_spam.pkl differ -- cgit v1.2.3