86 lines
2.0 KiB
Python
86 lines
2.0 KiB
Python
#!/usr/bin/python
|
|
|
|
import sys
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
def sort(dic):
|
|
new_dic = {}
|
|
while len(dic) > 0:
|
|
tmp_val = 0
|
|
tmp_key = ""
|
|
for key, val in dic.items():
|
|
if val > tmp_val:
|
|
tmp_key = key
|
|
tmp_val = val
|
|
new_dic[tmp_key] = tmp_val
|
|
dic.__delitem__(tmp_key)
|
|
return new_dic
|
|
|
|
|
|
def get_words(script):
|
|
words = []
|
|
|
|
try:
|
|
file = open(script, "r")
|
|
file_content = file.read()
|
|
|
|
file_content_split = file_content.split(" ")
|
|
for word_split_1 in file_content_split:
|
|
if word_split_1 != "":
|
|
remove_back_to_line = word_split_1.split('\n')
|
|
for word in remove_back_to_line:
|
|
if word != "":
|
|
words.append(word)
|
|
|
|
file.close()
|
|
|
|
except FileNotFoundError:
|
|
sys.stderr.write("Error: " + script + " does not exist!")
|
|
sys.exit(1)
|
|
|
|
return words
|
|
|
|
|
|
def get_movie_name(file_path):
|
|
files_name = file_path.split("/")
|
|
file_name = files_name.pop()
|
|
movie_name = (file_name.split('.')[0]).replace('_', ' ')
|
|
return movie_name
|
|
|
|
|
|
def get_frequency(words):
|
|
freq = {}
|
|
for word in words:
|
|
if freq.get(word) is not None:
|
|
freq[word] = freq[word] + 1
|
|
else:
|
|
freq[word] = 1
|
|
return sort(freq)
|
|
|
|
|
|
def display_fig(words, movie_name):
|
|
plt.title("Word frequencies in " + movie_name)
|
|
plt.ylabel("Number of Occurrences")
|
|
plt.xlabel("Words")
|
|
plt.bar(range(len(words)), list(words.values()), align='center')
|
|
plt.xticks(range(len(words)), list(words.keys()))
|
|
plt.show()
|
|
|
|
|
|
def main(script):
|
|
print("File loading and sorting ...")
|
|
words = get_words(script)
|
|
sorted_words = get_frequency(words)
|
|
print("Display result")
|
|
display_fig(sorted_words, get_movie_name(script))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Please enter one movie parameter")
|
|
elif len(sys.argv) > 2:
|
|
print("Please enter only one argument")
|
|
else:
|
|
main(sys.argv[1])
|