o1 = "laks laks skj?re deilig".split() b1 = "laks lus celle alge alge".split() o2 = "alge nori laks ris eddik".split() b2 = "celle laks b?rekraftighet celle".split() o3 = "laks laks laks laks laks laks laks laks laks laks".split() d1 = "b?rekraftighet deilig laks ris".split() d2 = "lus lus celle eddik".split() alle = [o1,o2,b1,b2,o3] bio = [b1,b2] opp = [o1,o2,o3] classes = {"oppskrift":opp,"biologi":bio} V = [] for document in alle: for w in document: V.append(w) #print(V) V = set(V) #print(V) def predict(sentence): results = {} for name, cl in classes.items(): p_prior = len(cl)/len(alle) c_vocab = [w for doc in cl for w in doc] p_sent = 1 for w in sentence: if w in V: w_in_c = c_vocab.count(w) + 1 tot_w_in_c = len(c_vocab) + len(V) p_sent *= w_in_c/tot_w_in_c p = p_prior * p_sent results[name] = p for r in results: print(r,results[r]) predict(d1) predict(d2)