diff --git a/CAu_NLP_2019/pom.xml b/CAu_NLP_2019/pom.xml index fdc8ee1..6a4c0d4 100644 --- a/CAu_NLP_2019/pom.xml +++ b/CAu_NLP_2019/pom.xml @@ -35,6 +35,46 @@ 1.8 + + + maven-assembly-plugin + + + package + + single + + + + + + + com.nlp.App + + + + jar-with-dependencies + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + **/log4j.properties + + + + + com.nlp.App + + + + PQMAN diff --git a/CAu_NLP_2019/src/main/java/App.java b/CAu_NLP_2019/src/main/java/com/nlp/App.java similarity index 98% rename from CAu_NLP_2019/src/main/java/App.java rename to CAu_NLP_2019/src/main/java/com/nlp/App.java index aae70b5..c4cc586 100644 --- a/CAu_NLP_2019/src/main/java/App.java +++ b/CAu_NLP_2019/src/main/java/com/nlp/App.java @@ -1,3 +1,5 @@ +package com.nlp; + import com.google.gson.Gson; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -28,8 +30,6 @@ public class App { String[] url = {"https://www.thesun.co.uk/?s=", "https://www.bbc.co.uk/search?q=", "https://www.skysports.com/search?q="}; List> documents = new ArrayList<>(); - List> result = new ArrayList<>(); - List result2 = new ArrayList<>(); String document; List goodUrl = new ArrayList<>(); Article data = new Article(); diff --git a/CAu_NLP_2019/src/main/java/Article.java b/CAu_NLP_2019/src/main/java/com/nlp/Article.java similarity index 95% rename from CAu_NLP_2019/src/main/java/Article.java rename to CAu_NLP_2019/src/main/java/com/nlp/Article.java index 48d30b4..8c1ffa6 100644 --- a/CAu_NLP_2019/src/main/java/Article.java +++ b/CAu_NLP_2019/src/main/java/com/nlp/Article.java @@ -1,63 +1,65 @@ -import java.util.ArrayList; - -public class Article { - - public Article() { - date = new ArrayList(); - Headline = new ArrayList(); - Url = new ArrayList(); - Site = new ArrayList(); - Content = new ArrayList(); - } - - private ArrayList date; - private ArrayList Headline; - private ArrayList Url; - private ArrayList Site; - private ArrayList Content; - - public void setDate(int num) { - date.add(num); - } - - public void setHeadline(String head) { - Headline.add(head); - } - - public void setUrl(String url) { - Url.add(url); - } - - public void setSite(String site) { - Site.add(site); - } - - public void setContent(String content) { - Content.add(content); - } - - - public int getHowManyData() { - return Headline.size(); - } - - public int getDate(int num) { - return date.get(num); - } - - public String getHeadline(int num) { - return Headline.get(num); - } - - public String getUrl(int num) { - return Url.get(num); - } - - public String getSite(int num) { - return Site.get(num); - } - - public String getContent(int num) { - return Content.get(num); - } +package com.nlp; + +import java.util.ArrayList; + +public class Article { + + public Article() { + date = new ArrayList(); + Headline = new ArrayList(); + Url = new ArrayList(); + Site = new ArrayList(); + Content = new ArrayList(); + } + + private ArrayList date; + private ArrayList Headline; + private ArrayList Url; + private ArrayList Site; + private ArrayList Content; + + public void setDate(int num) { + date.add(num); + } + + public void setHeadline(String head) { + Headline.add(head); + } + + public void setUrl(String url) { + Url.add(url); + } + + public void setSite(String site) { + Site.add(site); + } + + public void setContent(String content) { + Content.add(content); + } + + + public int getHowManyData() { + return Headline.size(); + } + + public int getDate(int num) { + return date.get(num); + } + + public String getHeadline(int num) { + return Headline.get(num); + } + + public String getUrl(int num) { + return Url.get(num); + } + + public String getSite(int num) { + return Site.get(num); + } + + public String getContent(int num) { + return Content.get(num); + } } \ No newline at end of file diff --git a/CAu_NLP_2019/src/main/java/Result.java b/CAu_NLP_2019/src/main/java/com/nlp/Result.java similarity index 96% rename from CAu_NLP_2019/src/main/java/Result.java rename to CAu_NLP_2019/src/main/java/com/nlp/Result.java index 89990eb..0143ed5 100644 --- a/CAu_NLP_2019/src/main/java/Result.java +++ b/CAu_NLP_2019/src/main/java/com/nlp/Result.java @@ -1,3 +1,5 @@ +package com.nlp; + public class Result { private String tfidf; diff --git a/CAu_NLP_2019/src/main/java/TFIDF.java b/CAu_NLP_2019/src/main/java/com/nlp/TFIDF.java similarity index 96% rename from CAu_NLP_2019/src/main/java/TFIDF.java rename to CAu_NLP_2019/src/main/java/com/nlp/TFIDF.java index 4e96b67..5471a26 100644 --- a/CAu_NLP_2019/src/main/java/TFIDF.java +++ b/CAu_NLP_2019/src/main/java/com/nlp/TFIDF.java @@ -1,46 +1,48 @@ -import java.util.List; - -public class TFIDF { - /** - * @param doc list of strings - * @param term String represents a term - * @return term frequency of term in document - */ - public double tf(List doc, String term) { - double result = 0; - for (String word : doc) { - if (term.equalsIgnoreCase(word)) - result++; - } - return result / doc.size(); - } - - /** - * @param docs list of list of strings represents the dataset - * @param term String represents a term - * @return the inverse term frequency of term in documents - */ - public double idf(List> docs, String term) { - double n = 0; - for (List doc : docs) { - for (String word : doc) { - if (term.equalsIgnoreCase(word)) { - n++; - break; - } - } - } - return Math.log(docs.size() / n); - } - - /** - * @param doc a text document - * @param docs all documents - * @param term term - * @return the TF-IDF of term - */ - public double tfIdf(List doc, List> docs, String term) { - return tf(doc, term) * idf(docs, term); - - } -} +package com.nlp; + +import java.util.List; + +public class TFIDF { + /** + * @param doc list of strings + * @param term String represents a term + * @return term frequency of term in document + */ + public double tf(List doc, String term) { + double result = 0; + for (String word : doc) { + if (term.equalsIgnoreCase(word)) + result++; + } + return result / doc.size(); + } + + /** + * @param docs list of list of strings represents the dataset + * @param term String represents a term + * @return the inverse term frequency of term in documents + */ + public double idf(List> docs, String term) { + double n = 0; + for (List doc : docs) { + for (String word : doc) { + if (term.equalsIgnoreCase(word)) { + n++; + break; + } + } + } + return Math.log(docs.size() / n); + } + + /** + * @param doc a text document + * @param docs all documents + * @param term term + * @return the TF-IDF of term + */ + public double tfIdf(List doc, List> docs, String term) { + return tf(doc, term) * idf(docs, term); + + } +}