import numpy as np
import nltk
from nltk.corpus import brown


# Exercise 1
def my_frequency(j):
    d={}
    for it in j:
        if it not in d:
            d[it]=1
        else:
            d[it]+=1
    return d


# Exercise 2
def my_median(l):
    s = sorted(l)
    n = len(s)
    if n % 2 == 0:
        r = (s[int(n/2-1)]+s[int(n/2)])/2
    else:
        r = s[int((n-1)/2)]
    return r

def my_median_2(li):
    sor = sorted(li)
    n = len(li)
    return (sor[(n-1)//2]+sor[n//2])/2

def my_mean(l):
    return sum(l)/len(l)


# Exerccise 3
# How to handle lower case?
labels = ['i', 'we', 'he', 'she', 'they']

brown_freq=nltk.FreqDist(
    [w.lower() for w in brown.words() if
     w.lower() in set(labels)])

# To replace 'i' with 'I' as key:
v=brown_freq.pop('i')
brown_freq['I']=v

brown_freq.tabulate()
brown_freq.plot()


# Exercise 4
# the following does not do lower case correctly
cond = nltk.ConditionalFreqDist([(genre,word)
for genre in ['news','fiction']
for word in brown.words(categories=genre)
if word in set(labels)])

# to get lower case:
cf_low = nltk.ConditionalFreqDist([(genre,word.lower())
for genre in ['news','fiction']
for word in brown.words(categories=genre)
if word.lower() in set(labels)])

# to decorate the result:
cf_low['news']['I']=cf_low['news'].pop('i')
cf_low['fiction']['I']=cf_low['fiction'].pop('i')

print("\nTable without case folding:")
cond.tabulate()

print("\nTable with case folding:")
cf_low.tabulate()