diff options
author | Mike Crute <mcrute@gmail.com> | 2012-08-09 18:53:00 -0700 |
---|---|---|
committer | Mike Crute <mcrute@gmail.com> | 2012-08-09 18:53:00 -0700 |
commit | 645f95c6549bdf4fcc0e465b489b014945377291 (patch) | |
tree | 18b710ed55451c0ccd581b97b07820e35e61454e | |
parent | d4c4aaac0be5a52d5fcca67596fa522d57553acf (diff) | |
parent | 97a908e50865d9385dff31cfbfb237fb7097f30a (diff) | |
download | tiny-webapps-645f95c6549bdf4fcc0e465b489b014945377291.tar.bz2 tiny-webapps-645f95c6549bdf4fcc0e465b489b014945377291.tar.xz tiny-webapps-645f95c6549bdf4fcc0e465b489b014945377291.zip |
Merge pull request #1 from mpirnat/i-hate-spam
I hate spam
-rw-r--r-- | email_gateway.cfg | 7 | ||||
-rwxr-xr-x | email_gateway.py | 24 | ||||
-rw-r--r-- | example_spam.pkl | bin | 0 -> 1542 bytes | |||
-rw-r--r-- | requirements.txt | 1 | ||||
-rw-r--r-- | train_ham.py | 29 | ||||
-rw-r--r-- | train_spam.py | 29 |
6 files changed, 89 insertions, 1 deletions
diff --git a/email_gateway.cfg b/email_gateway.cfg index 31a3691..05ad723 100644 --- a/email_gateway.cfg +++ b/email_gateway.cfg | |||
@@ -8,6 +8,9 @@ | |||
8 | ; message = First line of the message | 8 | ; message = First line of the message |
9 | ; redirect = /contact-thanks.html | 9 | ; redirect = /contact-thanks.html |
10 | ; site = http://(?:www\.)?example.com | 10 | ; site = http://(?:www\.)?example.com |
11 | ; spam.check = True | ||
12 | ; spam.pickle_file = /etc/email_gateway_spam.pkl | ||
13 | ; spam.min_spam_prob = 0.90 | ||
11 | 14 | ||
12 | ; ====================== | 15 | ; ====================== |
13 | ; Required Configuration | 16 | ; Required Configuration |
@@ -36,3 +39,7 @@ | |||
36 | 39 | ||
37 | ; mailer.subject -- Subject of message | 40 | ; mailer.subject -- Subject of message |
38 | ; mailer.message -- Message | 41 | ; mailer.message -- Message |
42 | |||
43 | ; spam.check -- Check message against spambayes? True/False | ||
44 | ; spam.pickle_file -- Filename of pickle file | ||
45 | ; spam.min_spam_prob -- Minimum probability to consider message to be spam | ||
diff --git a/email_gateway.py b/email_gateway.py index e73e3e2..fe58fad 100755 --- a/email_gateway.py +++ b/email_gateway.py | |||
@@ -6,7 +6,9 @@ import re | |||
6 | import urlparse | 6 | import urlparse |
7 | from cStringIO import StringIO | 7 | from cStringIO import StringIO |
8 | from email.mime.text import MIMEText | 8 | from email.mime.text import MIMEText |
9 | from ConfigParser import SafeConfigParser as ConfigParser, NoSectionError | 9 | from ConfigParser import SafeConfigParser as ConfigParser, \ |
10 | NoSectionError, NoOptionError | ||
11 | from spambayes.storage import PickledClassifier | ||
10 | 12 | ||
11 | 13 | ||
12 | config = ConfigParser() | 14 | config = ConfigParser() |
@@ -26,6 +28,18 @@ def send_message(text, subject, to, from_email): | |||
26 | p.close() | 28 | p.close() |
27 | 29 | ||
28 | 30 | ||
31 | def looks_like_spam(message, config, section): | ||
32 | pickle_filename = config.get(section, 'spam.pickle_file') | ||
33 | min_spam_prob = config.getfloat(section, 'spam.min_spam_prob') | ||
34 | |||
35 | bayes = PickledClassifier(pickle_filename) | ||
36 | |||
37 | if bayes.chi2_spamprob(message) >= min_spam_prob: | ||
38 | return True | ||
39 | |||
40 | return False | ||
41 | |||
42 | |||
29 | def email_app(environ, start_response): | 43 | def email_app(environ, start_response): |
30 | ignored_fields = [] | 44 | ignored_fields = [] |
31 | useful_fields = [] | 45 | useful_fields = [] |
@@ -60,6 +74,14 @@ def email_app(environ, start_response): | |||
60 | start_response('403 Forbidden', [('Content-Type', 'text/plain')]) | 74 | start_response('403 Forbidden', [('Content-Type', 'text/plain')]) |
61 | return "Invalid send!" | 75 | return "Invalid send!" |
62 | 76 | ||
77 | try: | ||
78 | if config.getboolean(form_key, 'spam.check') \ | ||
79 | and looks_like_spam(context["message"], config, form_key): | ||
80 | start_response('403 Forbidden', [('Content-Type', 'text/plain')]) | ||
81 | return "I don't like SPAM!" | ||
82 | except NoOptionError: | ||
83 | pass | ||
84 | |||
63 | useful_fields = ["{0}: {1}".format(*f) | 85 | useful_fields = ["{0}: {1}".format(*f) |
64 | for f in useful_fields | 86 | for f in useful_fields |
65 | if f[0] not in ignored_fields] | 87 | if f[0] not in ignored_fields] |
diff --git a/example_spam.pkl b/example_spam.pkl new file mode 100644 index 0000000..a8ccdca --- /dev/null +++ b/example_spam.pkl | |||
Binary files differ | |||
diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..24c2fc8 --- /dev/null +++ b/requirements.txt | |||
@@ -0,0 +1 @@ | |||
spambayes | |||
diff --git a/train_ham.py b/train_ham.py new file mode 100644 index 0000000..4989c7c --- /dev/null +++ b/train_ham.py | |||
@@ -0,0 +1,29 @@ | |||
1 | """ | ||
2 | A rudimentary way to train additional ham into our pickle file. | ||
3 | |||
4 | Example usage: | ||
5 | |||
6 | $ python train_spam.py /path/to/spam.pkl | ||
7 | blah blah blah^D | ||
8 | """ | ||
9 | import sys | ||
10 | from ConfigParser import SafeConfigParser as ConfigParser, \ | ||
11 | NoSectionError, NoOptionError | ||
12 | from spambayes.storage import PickledClassifier | ||
13 | |||
14 | |||
15 | config = ConfigParser() | ||
16 | with open("/etc/email_gateway.cfg") as fp: | ||
17 | config.readfp(fp) | ||
18 | |||
19 | |||
20 | def main(): | ||
21 | pickle_filename = sys.argv[-1] | ||
22 | bayes = PickledClassifier(pickle_filename) | ||
23 | message = sys.stdin.readlines() | ||
24 | bayes.learn(message, False) | ||
25 | bayes.store() | ||
26 | |||
27 | |||
28 | if __name__ == '__main__': | ||
29 | main() | ||
diff --git a/train_spam.py b/train_spam.py new file mode 100644 index 0000000..c1f2065 --- /dev/null +++ b/train_spam.py | |||
@@ -0,0 +1,29 @@ | |||
1 | """ | ||
2 | A rudimentary way to train additional spam into our pickle file. | ||
3 | |||
4 | Example usage: | ||
5 | |||
6 | $ python train_spam.py /path/to/spam.pkl | ||
7 | blah blah blah^D | ||
8 | """ | ||
9 | import sys | ||
10 | from ConfigParser import SafeConfigParser as ConfigParser, \ | ||
11 | NoSectionError, NoOptionError | ||
12 | from spambayes.storage import PickledClassifier | ||
13 | |||
14 | |||
15 | config = ConfigParser() | ||
16 | with open("/etc/email_gateway.cfg") as fp: | ||
17 | config.readfp(fp) | ||
18 | |||
19 | |||
20 | def main(): | ||
21 | pickle_filename = sys.argv[-1] | ||
22 | bayes = PickledClassifier(pickle_filename) | ||
23 | message = sys.stdin.readlines() | ||
24 | bayes.learn(message, True) | ||
25 | bayes.store() | ||
26 | |||
27 | |||
28 | if __name__ == '__main__': | ||
29 | main() | ||