--- a
+++ b/clusters/scripts/commonwords.py
@@ -0,0 +1,32 @@
+from nltk import PorterStemmer
+for x in range(300) :
+    fin = open("C:/primes/data/unpackedclusters/clust_" + str(x) + ".txt", 'r')
+    fout = open("C:/primes/data/commonwords/words_" + str(x) + ".txt", 'w+')
+    lines = fin.readlines()
+    wordmap = {}
+    stemtoword = {}
+    letters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','-']
+    for line in lines :
+        l = line.lower()
+        word = ""
+        for x in l :
+            if x in letters :
+                word = word + x
+            elif (not (word == "")) and (not (word == " ")):
+                stem = PorterStemmer().stem(word)
+                stemtoword[stem] = word
+                if stem in wordmap :
+                    wordmap[stem] = wordmap[stem] + 1
+                else :
+                    wordmap[stem] = 1
+                word = ""
+        stem = PorterStemmer().stem(word)
+        stemtoword[stem] = word
+        if stem in wordmap :
+            wordmap[stem] = wordmap[stem] + 1
+        else :
+            wordmap[stem] = 1
+    for x in wordmap :
+        fout.write(str(stemtoword[x]) + " " + str(wordmap[x]) + "\n")
+    fin.close()
+    fout.close()
\ No newline at end of file