Put the webserver
This commit is contained in:
parent
1c36a2676a
commit
cf33115501
@ -35,6 +35,46 @@
|
||||
<target>1.8</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>package</phase>
|
||||
<goals>
|
||||
<goal>single</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<archive>
|
||||
<manifest>
|
||||
<mainClass>com.nlp.App</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
<descriptorRefs>
|
||||
<descriptorRef>jar-with-dependencies</descriptorRef>
|
||||
</descriptorRefs>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<!-- Make this jar executable -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<configuration>
|
||||
<!-- DO NOT include log4j.properties file in your Jar -->
|
||||
<excludes>
|
||||
<exclude>**/log4j.properties</exclude>
|
||||
</excludes>
|
||||
<archive>
|
||||
<manifest>
|
||||
<!-- Jar file entry point -->
|
||||
<mainClass>com.nlp.App</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<groupId>PQMAN</groupId>
|
||||
|
@ -1,3 +1,5 @@
|
||||
package com.nlp;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
@ -28,8 +30,6 @@ public class App {
|
||||
String[] url = {"https://www.thesun.co.uk/?s=", "https://www.bbc.co.uk/search?q=", "https://www.skysports.com/search?q="};
|
||||
|
||||
List<List<String>> documents = new ArrayList<>();
|
||||
List<List<String>> result = new ArrayList<>();
|
||||
List<String> result2 = new ArrayList<>();
|
||||
String document;
|
||||
List<String> goodUrl = new ArrayList<>();
|
||||
Article data = new Article();
|
@ -1,63 +1,65 @@
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class Article {
|
||||
|
||||
public Article() {
|
||||
date = new ArrayList<Integer>();
|
||||
Headline = new ArrayList<String>();
|
||||
Url = new ArrayList<String>();
|
||||
Site = new ArrayList<String>();
|
||||
Content = new ArrayList<String>();
|
||||
}
|
||||
|
||||
private ArrayList<Integer> date;
|
||||
private ArrayList<String> Headline;
|
||||
private ArrayList<String> Url;
|
||||
private ArrayList<String> Site;
|
||||
private ArrayList<String> Content;
|
||||
|
||||
public void setDate(int num) {
|
||||
date.add(num);
|
||||
}
|
||||
|
||||
public void setHeadline(String head) {
|
||||
Headline.add(head);
|
||||
}
|
||||
|
||||
public void setUrl(String url) {
|
||||
Url.add(url);
|
||||
}
|
||||
|
||||
public void setSite(String site) {
|
||||
Site.add(site);
|
||||
}
|
||||
|
||||
public void setContent(String content) {
|
||||
Content.add(content);
|
||||
}
|
||||
|
||||
|
||||
public int getHowManyData() {
|
||||
return Headline.size();
|
||||
}
|
||||
|
||||
public int getDate(int num) {
|
||||
return date.get(num);
|
||||
}
|
||||
|
||||
public String getHeadline(int num) {
|
||||
return Headline.get(num);
|
||||
}
|
||||
|
||||
public String getUrl(int num) {
|
||||
return Url.get(num);
|
||||
}
|
||||
|
||||
public String getSite(int num) {
|
||||
return Site.get(num);
|
||||
}
|
||||
|
||||
public String getContent(int num) {
|
||||
return Content.get(num);
|
||||
}
|
||||
package com.nlp;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class Article {
|
||||
|
||||
public Article() {
|
||||
date = new ArrayList<Integer>();
|
||||
Headline = new ArrayList<String>();
|
||||
Url = new ArrayList<String>();
|
||||
Site = new ArrayList<String>();
|
||||
Content = new ArrayList<String>();
|
||||
}
|
||||
|
||||
private ArrayList<Integer> date;
|
||||
private ArrayList<String> Headline;
|
||||
private ArrayList<String> Url;
|
||||
private ArrayList<String> Site;
|
||||
private ArrayList<String> Content;
|
||||
|
||||
public void setDate(int num) {
|
||||
date.add(num);
|
||||
}
|
||||
|
||||
public void setHeadline(String head) {
|
||||
Headline.add(head);
|
||||
}
|
||||
|
||||
public void setUrl(String url) {
|
||||
Url.add(url);
|
||||
}
|
||||
|
||||
public void setSite(String site) {
|
||||
Site.add(site);
|
||||
}
|
||||
|
||||
public void setContent(String content) {
|
||||
Content.add(content);
|
||||
}
|
||||
|
||||
|
||||
public int getHowManyData() {
|
||||
return Headline.size();
|
||||
}
|
||||
|
||||
public int getDate(int num) {
|
||||
return date.get(num);
|
||||
}
|
||||
|
||||
public String getHeadline(int num) {
|
||||
return Headline.get(num);
|
||||
}
|
||||
|
||||
public String getUrl(int num) {
|
||||
return Url.get(num);
|
||||
}
|
||||
|
||||
public String getSite(int num) {
|
||||
return Site.get(num);
|
||||
}
|
||||
|
||||
public String getContent(int num) {
|
||||
return Content.get(num);
|
||||
}
|
||||
}
|
@ -1,3 +1,5 @@
|
||||
package com.nlp;
|
||||
|
||||
public class Result {
|
||||
|
||||
private String tfidf;
|
@ -1,46 +1,48 @@
|
||||
import java.util.List;
|
||||
|
||||
public class TFIDF {
|
||||
/**
|
||||
* @param doc list of strings
|
||||
* @param term String represents a term
|
||||
* @return term frequency of term in document
|
||||
*/
|
||||
public double tf(List<String> doc, String term) {
|
||||
double result = 0;
|
||||
for (String word : doc) {
|
||||
if (term.equalsIgnoreCase(word))
|
||||
result++;
|
||||
}
|
||||
return result / doc.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param docs list of list of strings represents the dataset
|
||||
* @param term String represents a term
|
||||
* @return the inverse term frequency of term in documents
|
||||
*/
|
||||
public double idf(List<List<String>> docs, String term) {
|
||||
double n = 0;
|
||||
for (List<String> doc : docs) {
|
||||
for (String word : doc) {
|
||||
if (term.equalsIgnoreCase(word)) {
|
||||
n++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Math.log(docs.size() / n);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param doc a text document
|
||||
* @param docs all documents
|
||||
* @param term term
|
||||
* @return the TF-IDF of term
|
||||
*/
|
||||
public double tfIdf(List<String> doc, List<List<String>> docs, String term) {
|
||||
return tf(doc, term) * idf(docs, term);
|
||||
|
||||
}
|
||||
}
|
||||
package com.nlp;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class TFIDF {
|
||||
/**
|
||||
* @param doc list of strings
|
||||
* @param term String represents a term
|
||||
* @return term frequency of term in document
|
||||
*/
|
||||
public double tf(List<String> doc, String term) {
|
||||
double result = 0;
|
||||
for (String word : doc) {
|
||||
if (term.equalsIgnoreCase(word))
|
||||
result++;
|
||||
}
|
||||
return result / doc.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param docs list of list of strings represents the dataset
|
||||
* @param term String represents a term
|
||||
* @return the inverse term frequency of term in documents
|
||||
*/
|
||||
public double idf(List<List<String>> docs, String term) {
|
||||
double n = 0;
|
||||
for (List<String> doc : docs) {
|
||||
for (String word : doc) {
|
||||
if (term.equalsIgnoreCase(word)) {
|
||||
n++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Math.log(docs.size() / n);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param doc a text document
|
||||
* @param docs all documents
|
||||
* @param term term
|
||||
* @return the TF-IDF of term
|
||||
*/
|
||||
public double tfIdf(List<String> doc, List<List<String>> docs, String term) {
|
||||
return tf(doc, term) * idf(docs, term);
|
||||
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user