Put the webserver
This commit is contained in:
parent
1c36a2676a
commit
cf33115501
@ -35,6 +35,46 @@
|
|||||||
<target>1.8</target>
|
<target>1.8</target>
|
||||||
</configuration>
|
</configuration>
|
||||||
</plugin>
|
</plugin>
|
||||||
|
|
||||||
|
<plugin>
|
||||||
|
<artifactId>maven-assembly-plugin</artifactId>
|
||||||
|
<executions>
|
||||||
|
<execution>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>single</goal>
|
||||||
|
</goals>
|
||||||
|
</execution>
|
||||||
|
</executions>
|
||||||
|
<configuration>
|
||||||
|
<archive>
|
||||||
|
<manifest>
|
||||||
|
<mainClass>com.nlp.App</mainClass>
|
||||||
|
</manifest>
|
||||||
|
</archive>
|
||||||
|
<descriptorRefs>
|
||||||
|
<descriptorRef>jar-with-dependencies</descriptorRef>
|
||||||
|
</descriptorRefs>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
|
||||||
|
<!-- Make this jar executable -->
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.maven.plugins</groupId>
|
||||||
|
<artifactId>maven-jar-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<!-- DO NOT include log4j.properties file in your Jar -->
|
||||||
|
<excludes>
|
||||||
|
<exclude>**/log4j.properties</exclude>
|
||||||
|
</excludes>
|
||||||
|
<archive>
|
||||||
|
<manifest>
|
||||||
|
<!-- Jar file entry point -->
|
||||||
|
<mainClass>com.nlp.App</mainClass>
|
||||||
|
</manifest>
|
||||||
|
</archive>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
</build>
|
</build>
|
||||||
<groupId>PQMAN</groupId>
|
<groupId>PQMAN</groupId>
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
package com.nlp;
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
import com.google.gson.Gson;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
@ -28,8 +30,6 @@ public class App {
|
|||||||
String[] url = {"https://www.thesun.co.uk/?s=", "https://www.bbc.co.uk/search?q=", "https://www.skysports.com/search?q="};
|
String[] url = {"https://www.thesun.co.uk/?s=", "https://www.bbc.co.uk/search?q=", "https://www.skysports.com/search?q="};
|
||||||
|
|
||||||
List<List<String>> documents = new ArrayList<>();
|
List<List<String>> documents = new ArrayList<>();
|
||||||
List<List<String>> result = new ArrayList<>();
|
|
||||||
List<String> result2 = new ArrayList<>();
|
|
||||||
String document;
|
String document;
|
||||||
List<String> goodUrl = new ArrayList<>();
|
List<String> goodUrl = new ArrayList<>();
|
||||||
Article data = new Article();
|
Article data = new Article();
|
@ -1,63 +1,65 @@
|
|||||||
import java.util.ArrayList;
|
package com.nlp;
|
||||||
|
|
||||||
public class Article {
|
import java.util.ArrayList;
|
||||||
|
|
||||||
public Article() {
|
public class Article {
|
||||||
date = new ArrayList<Integer>();
|
|
||||||
Headline = new ArrayList<String>();
|
public Article() {
|
||||||
Url = new ArrayList<String>();
|
date = new ArrayList<Integer>();
|
||||||
Site = new ArrayList<String>();
|
Headline = new ArrayList<String>();
|
||||||
Content = new ArrayList<String>();
|
Url = new ArrayList<String>();
|
||||||
}
|
Site = new ArrayList<String>();
|
||||||
|
Content = new ArrayList<String>();
|
||||||
private ArrayList<Integer> date;
|
}
|
||||||
private ArrayList<String> Headline;
|
|
||||||
private ArrayList<String> Url;
|
private ArrayList<Integer> date;
|
||||||
private ArrayList<String> Site;
|
private ArrayList<String> Headline;
|
||||||
private ArrayList<String> Content;
|
private ArrayList<String> Url;
|
||||||
|
private ArrayList<String> Site;
|
||||||
public void setDate(int num) {
|
private ArrayList<String> Content;
|
||||||
date.add(num);
|
|
||||||
}
|
public void setDate(int num) {
|
||||||
|
date.add(num);
|
||||||
public void setHeadline(String head) {
|
}
|
||||||
Headline.add(head);
|
|
||||||
}
|
public void setHeadline(String head) {
|
||||||
|
Headline.add(head);
|
||||||
public void setUrl(String url) {
|
}
|
||||||
Url.add(url);
|
|
||||||
}
|
public void setUrl(String url) {
|
||||||
|
Url.add(url);
|
||||||
public void setSite(String site) {
|
}
|
||||||
Site.add(site);
|
|
||||||
}
|
public void setSite(String site) {
|
||||||
|
Site.add(site);
|
||||||
public void setContent(String content) {
|
}
|
||||||
Content.add(content);
|
|
||||||
}
|
public void setContent(String content) {
|
||||||
|
Content.add(content);
|
||||||
|
}
|
||||||
public int getHowManyData() {
|
|
||||||
return Headline.size();
|
|
||||||
}
|
public int getHowManyData() {
|
||||||
|
return Headline.size();
|
||||||
public int getDate(int num) {
|
}
|
||||||
return date.get(num);
|
|
||||||
}
|
public int getDate(int num) {
|
||||||
|
return date.get(num);
|
||||||
public String getHeadline(int num) {
|
}
|
||||||
return Headline.get(num);
|
|
||||||
}
|
public String getHeadline(int num) {
|
||||||
|
return Headline.get(num);
|
||||||
public String getUrl(int num) {
|
}
|
||||||
return Url.get(num);
|
|
||||||
}
|
public String getUrl(int num) {
|
||||||
|
return Url.get(num);
|
||||||
public String getSite(int num) {
|
}
|
||||||
return Site.get(num);
|
|
||||||
}
|
public String getSite(int num) {
|
||||||
|
return Site.get(num);
|
||||||
public String getContent(int num) {
|
}
|
||||||
return Content.get(num);
|
|
||||||
}
|
public String getContent(int num) {
|
||||||
|
return Content.get(num);
|
||||||
|
}
|
||||||
}
|
}
|
@ -1,3 +1,5 @@
|
|||||||
|
package com.nlp;
|
||||||
|
|
||||||
public class Result {
|
public class Result {
|
||||||
|
|
||||||
private String tfidf;
|
private String tfidf;
|
@ -1,46 +1,48 @@
|
|||||||
import java.util.List;
|
package com.nlp;
|
||||||
|
|
||||||
public class TFIDF {
|
import java.util.List;
|
||||||
/**
|
|
||||||
* @param doc list of strings
|
public class TFIDF {
|
||||||
* @param term String represents a term
|
/**
|
||||||
* @return term frequency of term in document
|
* @param doc list of strings
|
||||||
*/
|
* @param term String represents a term
|
||||||
public double tf(List<String> doc, String term) {
|
* @return term frequency of term in document
|
||||||
double result = 0;
|
*/
|
||||||
for (String word : doc) {
|
public double tf(List<String> doc, String term) {
|
||||||
if (term.equalsIgnoreCase(word))
|
double result = 0;
|
||||||
result++;
|
for (String word : doc) {
|
||||||
}
|
if (term.equalsIgnoreCase(word))
|
||||||
return result / doc.size();
|
result++;
|
||||||
}
|
}
|
||||||
|
return result / doc.size();
|
||||||
/**
|
}
|
||||||
* @param docs list of list of strings represents the dataset
|
|
||||||
* @param term String represents a term
|
/**
|
||||||
* @return the inverse term frequency of term in documents
|
* @param docs list of list of strings represents the dataset
|
||||||
*/
|
* @param term String represents a term
|
||||||
public double idf(List<List<String>> docs, String term) {
|
* @return the inverse term frequency of term in documents
|
||||||
double n = 0;
|
*/
|
||||||
for (List<String> doc : docs) {
|
public double idf(List<List<String>> docs, String term) {
|
||||||
for (String word : doc) {
|
double n = 0;
|
||||||
if (term.equalsIgnoreCase(word)) {
|
for (List<String> doc : docs) {
|
||||||
n++;
|
for (String word : doc) {
|
||||||
break;
|
if (term.equalsIgnoreCase(word)) {
|
||||||
}
|
n++;
|
||||||
}
|
break;
|
||||||
}
|
}
|
||||||
return Math.log(docs.size() / n);
|
}
|
||||||
}
|
}
|
||||||
|
return Math.log(docs.size() / n);
|
||||||
/**
|
}
|
||||||
* @param doc a text document
|
|
||||||
* @param docs all documents
|
/**
|
||||||
* @param term term
|
* @param doc a text document
|
||||||
* @return the TF-IDF of term
|
* @param docs all documents
|
||||||
*/
|
* @param term term
|
||||||
public double tfIdf(List<String> doc, List<List<String>> docs, String term) {
|
* @return the TF-IDF of term
|
||||||
return tf(doc, term) * idf(docs, term);
|
*/
|
||||||
|
public double tfIdf(List<String> doc, List<List<String>> docs, String term) {
|
||||||
}
|
return tf(doc, term) * idf(docs, term);
|
||||||
}
|
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user