Put the webserver

This commit is contained in:
Mathieu Sanchez 2019-06-12 12:05:50 +09:00
parent 1c36a2676a
commit cf33115501
5 changed files with 156 additions and 110 deletions

View File

@ -35,6 +35,46 @@
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
<configuration>
<archive>
<manifest>
<mainClass>com.nlp.App</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
<!-- Make this jar executable -->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<!-- DO NOT include log4j.properties file in your Jar -->
<excludes>
<exclude>**/log4j.properties</exclude>
</excludes>
<archive>
<manifest>
<!-- Jar file entry point -->
<mainClass>com.nlp.App</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
</plugins>
</build>
<groupId>PQMAN</groupId>

View File

@ -1,3 +1,5 @@
package com.nlp;
import com.google.gson.Gson;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
@ -28,8 +30,6 @@ public class App {
String[] url = {"https://www.thesun.co.uk/?s=", "https://www.bbc.co.uk/search?q=", "https://www.skysports.com/search?q="};
List<List<String>> documents = new ArrayList<>();
List<List<String>> result = new ArrayList<>();
List<String> result2 = new ArrayList<>();
String document;
List<String> goodUrl = new ArrayList<>();
Article data = new Article();

View File

@ -1,63 +1,65 @@
import java.util.ArrayList;
public class Article {
public Article() {
date = new ArrayList<Integer>();
Headline = new ArrayList<String>();
Url = new ArrayList<String>();
Site = new ArrayList<String>();
Content = new ArrayList<String>();
}
private ArrayList<Integer> date;
private ArrayList<String> Headline;
private ArrayList<String> Url;
private ArrayList<String> Site;
private ArrayList<String> Content;
public void setDate(int num) {
date.add(num);
}
public void setHeadline(String head) {
Headline.add(head);
}
public void setUrl(String url) {
Url.add(url);
}
public void setSite(String site) {
Site.add(site);
}
public void setContent(String content) {
Content.add(content);
}
public int getHowManyData() {
return Headline.size();
}
public int getDate(int num) {
return date.get(num);
}
public String getHeadline(int num) {
return Headline.get(num);
}
public String getUrl(int num) {
return Url.get(num);
}
public String getSite(int num) {
return Site.get(num);
}
public String getContent(int num) {
return Content.get(num);
}
package com.nlp;
import java.util.ArrayList;
public class Article {
public Article() {
date = new ArrayList<Integer>();
Headline = new ArrayList<String>();
Url = new ArrayList<String>();
Site = new ArrayList<String>();
Content = new ArrayList<String>();
}
private ArrayList<Integer> date;
private ArrayList<String> Headline;
private ArrayList<String> Url;
private ArrayList<String> Site;
private ArrayList<String> Content;
public void setDate(int num) {
date.add(num);
}
public void setHeadline(String head) {
Headline.add(head);
}
public void setUrl(String url) {
Url.add(url);
}
public void setSite(String site) {
Site.add(site);
}
public void setContent(String content) {
Content.add(content);
}
public int getHowManyData() {
return Headline.size();
}
public int getDate(int num) {
return date.get(num);
}
public String getHeadline(int num) {
return Headline.get(num);
}
public String getUrl(int num) {
return Url.get(num);
}
public String getSite(int num) {
return Site.get(num);
}
public String getContent(int num) {
return Content.get(num);
}
}

View File

@ -1,3 +1,5 @@
package com.nlp;
public class Result {
private String tfidf;

View File

@ -1,46 +1,48 @@
import java.util.List;
public class TFIDF {
/**
* @param doc list of strings
* @param term String represents a term
* @return term frequency of term in document
*/
public double tf(List<String> doc, String term) {
double result = 0;
for (String word : doc) {
if (term.equalsIgnoreCase(word))
result++;
}
return result / doc.size();
}
/**
* @param docs list of list of strings represents the dataset
* @param term String represents a term
* @return the inverse term frequency of term in documents
*/
public double idf(List<List<String>> docs, String term) {
double n = 0;
for (List<String> doc : docs) {
for (String word : doc) {
if (term.equalsIgnoreCase(word)) {
n++;
break;
}
}
}
return Math.log(docs.size() / n);
}
/**
* @param doc a text document
* @param docs all documents
* @param term term
* @return the TF-IDF of term
*/
public double tfIdf(List<String> doc, List<List<String>> docs, String term) {
return tf(doc, term) * idf(docs, term);
}
}
package com.nlp;
import java.util.List;
public class TFIDF {
/**
* @param doc list of strings
* @param term String represents a term
* @return term frequency of term in document
*/
public double tf(List<String> doc, String term) {
double result = 0;
for (String word : doc) {
if (term.equalsIgnoreCase(word))
result++;
}
return result / doc.size();
}
/**
* @param docs list of list of strings represents the dataset
* @param term String represents a term
* @return the inverse term frequency of term in documents
*/
public double idf(List<List<String>> docs, String term) {
double n = 0;
for (List<String> doc : docs) {
for (String word : doc) {
if (term.equalsIgnoreCase(word)) {
n++;
break;
}
}
}
return Math.log(docs.size() / n);
}
/**
* @param doc a text document
* @param docs all documents
* @param term term
* @return the TF-IDF of term
*/
public double tfIdf(List<String> doc, List<List<String>> docs, String term) {
return tf(doc, term) * idf(docs, term);
}
}