summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Crute <mcrute@gmail.com>2012-08-09 18:53:00 -0700
committerMike Crute <mcrute@gmail.com>2012-08-09 18:53:00 -0700
commit645f95c6549bdf4fcc0e465b489b014945377291 (patch)
tree18b710ed55451c0ccd581b97b07820e35e61454e
parentd4c4aaac0be5a52d5fcca67596fa522d57553acf (diff)
parent97a908e50865d9385dff31cfbfb237fb7097f30a (diff)
downloadtiny-webapps-645f95c6549bdf4fcc0e465b489b014945377291.tar.bz2
tiny-webapps-645f95c6549bdf4fcc0e465b489b014945377291.tar.xz
tiny-webapps-645f95c6549bdf4fcc0e465b489b014945377291.zip
Merge pull request #1 from mpirnat/i-hate-spam
I hate spam
-rw-r--r--email_gateway.cfg7
-rwxr-xr-xemail_gateway.py24
-rw-r--r--example_spam.pklbin0 -> 1542 bytes
-rw-r--r--requirements.txt1
-rw-r--r--train_ham.py29
-rw-r--r--train_spam.py29
6 files changed, 89 insertions, 1 deletions
diff --git a/email_gateway.cfg b/email_gateway.cfg
index 31a3691..05ad723 100644
--- a/email_gateway.cfg
+++ b/email_gateway.cfg
@@ -8,6 +8,9 @@
8; message = First line of the message 8; message = First line of the message
9; redirect = /contact-thanks.html 9; redirect = /contact-thanks.html
10; site = http://(?:www\.)?example.com 10; site = http://(?:www\.)?example.com
11; spam.check = True
12; spam.pickle_file = /etc/email_gateway_spam.pkl
13; spam.min_spam_prob = 0.90
11 14
12; ====================== 15; ======================
13; Required Configuration 16; Required Configuration
@@ -36,3 +39,7 @@
36 39
37; mailer.subject -- Subject of message 40; mailer.subject -- Subject of message
38; mailer.message -- Message 41; mailer.message -- Message
42
43; spam.check -- Check message against spambayes? True/False
44; spam.pickle_file -- Filename of pickle file
45; spam.min_spam_prob -- Minimum probability to consider message to be spam
diff --git a/email_gateway.py b/email_gateway.py
index e73e3e2..fe58fad 100755
--- a/email_gateway.py
+++ b/email_gateway.py
@@ -6,7 +6,9 @@ import re
6import urlparse 6import urlparse
7from cStringIO import StringIO 7from cStringIO import StringIO
8from email.mime.text import MIMEText 8from email.mime.text import MIMEText
9from ConfigParser import SafeConfigParser as ConfigParser, NoSectionError 9from ConfigParser import SafeConfigParser as ConfigParser, \
10 NoSectionError, NoOptionError
11from spambayes.storage import PickledClassifier
10 12
11 13
12config = ConfigParser() 14config = ConfigParser()
@@ -26,6 +28,18 @@ def send_message(text, subject, to, from_email):
26 p.close() 28 p.close()
27 29
28 30
31def looks_like_spam(message, config, section):
32 pickle_filename = config.get(section, 'spam.pickle_file')
33 min_spam_prob = config.getfloat(section, 'spam.min_spam_prob')
34
35 bayes = PickledClassifier(pickle_filename)
36
37 if bayes.chi2_spamprob(message) >= min_spam_prob:
38 return True
39
40 return False
41
42
29def email_app(environ, start_response): 43def email_app(environ, start_response):
30 ignored_fields = [] 44 ignored_fields = []
31 useful_fields = [] 45 useful_fields = []
@@ -60,6 +74,14 @@ def email_app(environ, start_response):
60 start_response('403 Forbidden', [('Content-Type', 'text/plain')]) 74 start_response('403 Forbidden', [('Content-Type', 'text/plain')])
61 return "Invalid send!" 75 return "Invalid send!"
62 76
77 try:
78 if config.getboolean(form_key, 'spam.check') \
79 and looks_like_spam(context["message"], config, form_key):
80 start_response('403 Forbidden', [('Content-Type', 'text/plain')])
81 return "I don't like SPAM!"
82 except NoOptionError:
83 pass
84
63 useful_fields = ["{0}: {1}".format(*f) 85 useful_fields = ["{0}: {1}".format(*f)
64 for f in useful_fields 86 for f in useful_fields
65 if f[0] not in ignored_fields] 87 if f[0] not in ignored_fields]
diff --git a/example_spam.pkl b/example_spam.pkl
new file mode 100644
index 0000000..a8ccdca
--- /dev/null
+++ b/example_spam.pkl
Binary files differ
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..24c2fc8
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
spambayes
diff --git a/train_ham.py b/train_ham.py
new file mode 100644
index 0000000..4989c7c
--- /dev/null
+++ b/train_ham.py
@@ -0,0 +1,29 @@
1"""
2A rudimentary way to train additional ham into our pickle file.
3
4Example usage:
5
6$ python train_spam.py /path/to/spam.pkl
7blah blah blah^D
8"""
9import sys
10from ConfigParser import SafeConfigParser as ConfigParser, \
11 NoSectionError, NoOptionError
12from spambayes.storage import PickledClassifier
13
14
15config = ConfigParser()
16with open("/etc/email_gateway.cfg") as fp:
17 config.readfp(fp)
18
19
20def main():
21 pickle_filename = sys.argv[-1]
22 bayes = PickledClassifier(pickle_filename)
23 message = sys.stdin.readlines()
24 bayes.learn(message, False)
25 bayes.store()
26
27
28if __name__ == '__main__':
29 main()
diff --git a/train_spam.py b/train_spam.py
new file mode 100644
index 0000000..c1f2065
--- /dev/null
+++ b/train_spam.py
@@ -0,0 +1,29 @@
1"""
2A rudimentary way to train additional spam into our pickle file.
3
4Example usage:
5
6$ python train_spam.py /path/to/spam.pkl
7blah blah blah^D
8"""
9import sys
10from ConfigParser import SafeConfigParser as ConfigParser, \
11 NoSectionError, NoOptionError
12from spambayes.storage import PickledClassifier
13
14
15config = ConfigParser()
16with open("/etc/email_gateway.cfg") as fp:
17 config.readfp(fp)
18
19
20def main():
21 pickle_filename = sys.argv[-1]
22 bayes = PickledClassifier(pickle_filename)
23 message = sys.stdin.readlines()
24 bayes.learn(message, True)
25 bayes.store()
26
27
28if __name__ == '__main__':
29 main()