CAU-IR-Spring-2019-Project-1/program.py

86 lines
2.0 KiB
Python

#!/usr/bin/python
import sys
import matplotlib.pyplot as plt
def sort(dic):
new_dic = {}
while len(dic) > 0:
tmp_val = 0
tmp_key = ""
for key, val in dic.items():
if val > tmp_val:
tmp_key = key
tmp_val = val
new_dic[tmp_key] = tmp_val
dic.__delitem__(tmp_key)
return new_dic
def get_words(script):
words = []
try:
file = open(script, "r")
file_content = file.read()
file_content_split = file_content.split(" ")
for word_split_1 in file_content_split:
if word_split_1 != "":
remove_back_to_line = word_split_1.split('\n')
for word in remove_back_to_line:
if word != "":
words.append(word)
file.close()
except FileNotFoundError:
sys.stderr.write("Error: " + script + " does not exist!")
sys.exit(1)
return words
def get_movie_name(file_path):
files_name = file_path.split("/")
file_name = files_name.pop()
movie_name = (file_name.split('.')[0]).replace('_', ' ')
return movie_name
def get_frequency(words):
freq = {}
for word in words:
if freq.get(word) is not None:
freq[word] = freq[word] + 1
else:
freq[word] = 1
return sort(freq)
def display_fig(words, movie_name):
plt.title("Word frequencies in " + movie_name)
plt.ylabel("Number of Occurrences")
plt.xlabel("Words")
plt.bar(range(len(words)), list(words.values()), align='center')
plt.xticks(range(len(words)), list(words.keys()))
plt.show()
def main(script):
print("File loading and sorting ...")
words = get_words(script)
sorted_words = get_frequency(words)
print("Display result")
display_fig(sorted_words, get_movie_name(script))
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Please enter one movie parameter")
elif len(sys.argv) > 2:
print("Please enter only one argument")
else:
main(sys.argv[1])