Put the webserver
This commit is contained in:
		@@ -35,6 +35,46 @@
 | 
			
		||||
                    <target>1.8</target>
 | 
			
		||||
                </configuration>
 | 
			
		||||
            </plugin>
 | 
			
		||||
 | 
			
		||||
            <plugin>
 | 
			
		||||
                <artifactId>maven-assembly-plugin</artifactId>
 | 
			
		||||
                <executions>
 | 
			
		||||
                    <execution>
 | 
			
		||||
                        <phase>package</phase>
 | 
			
		||||
                        <goals>
 | 
			
		||||
                            <goal>single</goal>
 | 
			
		||||
                        </goals>
 | 
			
		||||
                    </execution>
 | 
			
		||||
                </executions>
 | 
			
		||||
                <configuration>
 | 
			
		||||
                    <archive>
 | 
			
		||||
                        <manifest>
 | 
			
		||||
                            <mainClass>com.nlp.App</mainClass>
 | 
			
		||||
                        </manifest>
 | 
			
		||||
                    </archive>
 | 
			
		||||
                    <descriptorRefs>
 | 
			
		||||
                        <descriptorRef>jar-with-dependencies</descriptorRef>
 | 
			
		||||
                    </descriptorRefs>
 | 
			
		||||
                </configuration>
 | 
			
		||||
            </plugin>
 | 
			
		||||
 | 
			
		||||
            <!-- Make this jar executable -->
 | 
			
		||||
            <plugin>
 | 
			
		||||
                <groupId>org.apache.maven.plugins</groupId>
 | 
			
		||||
                <artifactId>maven-jar-plugin</artifactId>
 | 
			
		||||
                <configuration>
 | 
			
		||||
                    <!-- DO NOT include log4j.properties file in your Jar -->
 | 
			
		||||
                    <excludes>
 | 
			
		||||
                        <exclude>**/log4j.properties</exclude>
 | 
			
		||||
                    </excludes>
 | 
			
		||||
                    <archive>
 | 
			
		||||
                        <manifest>
 | 
			
		||||
                            <!-- Jar file entry point -->
 | 
			
		||||
                            <mainClass>com.nlp.App</mainClass>
 | 
			
		||||
                        </manifest>
 | 
			
		||||
                    </archive>
 | 
			
		||||
                </configuration>
 | 
			
		||||
            </plugin>
 | 
			
		||||
        </plugins>
 | 
			
		||||
    </build>
 | 
			
		||||
    <groupId>PQMAN</groupId>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
package com.nlp;
 | 
			
		||||
 | 
			
		||||
import com.google.gson.Gson;
 | 
			
		||||
import org.jsoup.Jsoup;
 | 
			
		||||
import org.jsoup.nodes.Document;
 | 
			
		||||
@@ -28,8 +30,6 @@ public class App {
 | 
			
		||||
        String[] url = {"https://www.thesun.co.uk/?s=", "https://www.bbc.co.uk/search?q=", "https://www.skysports.com/search?q="};
 | 
			
		||||
 | 
			
		||||
        List<List<String>> documents = new ArrayList<>();
 | 
			
		||||
        List<List<String>> result = new ArrayList<>();
 | 
			
		||||
        List<String> result2 = new ArrayList<>();
 | 
			
		||||
        String document;
 | 
			
		||||
        List<String> goodUrl = new ArrayList<>();
 | 
			
		||||
        Article data = new Article();
 | 
			
		||||
@@ -1,63 +1,65 @@
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
 | 
			
		||||
public class Article {
 | 
			
		||||
 | 
			
		||||
    public Article() {
 | 
			
		||||
        date = new ArrayList<Integer>();
 | 
			
		||||
        Headline = new ArrayList<String>();
 | 
			
		||||
        Url = new ArrayList<String>();
 | 
			
		||||
        Site = new ArrayList<String>();
 | 
			
		||||
        Content = new ArrayList<String>();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private ArrayList<Integer> date;
 | 
			
		||||
    private ArrayList<String> Headline;
 | 
			
		||||
    private ArrayList<String> Url;
 | 
			
		||||
    private ArrayList<String> Site;
 | 
			
		||||
    private ArrayList<String> Content;
 | 
			
		||||
 | 
			
		||||
    public void setDate(int num) {
 | 
			
		||||
        date.add(num);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public void setHeadline(String head) {
 | 
			
		||||
        Headline.add(head);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public void setUrl(String url) {
 | 
			
		||||
        Url.add(url);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public void setSite(String site) {
 | 
			
		||||
        Site.add(site);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public void setContent(String content) {
 | 
			
		||||
        Content.add(content);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    public int getHowManyData() {
 | 
			
		||||
        return Headline.size();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public int getDate(int num) {
 | 
			
		||||
        return date.get(num);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public String getHeadline(int num) {
 | 
			
		||||
        return Headline.get(num);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public String getUrl(int num) {
 | 
			
		||||
        return Url.get(num);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public String getSite(int num) {
 | 
			
		||||
        return Site.get(num);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public String getContent(int num) {
 | 
			
		||||
        return Content.get(num);
 | 
			
		||||
    }
 | 
			
		||||
package com.nlp;
 | 
			
		||||
 | 
			
		||||
import java.util.ArrayList;
 | 
			
		||||
 | 
			
		||||
public class Article {
 | 
			
		||||
 | 
			
		||||
    public Article() {
 | 
			
		||||
        date = new ArrayList<Integer>();
 | 
			
		||||
        Headline = new ArrayList<String>();
 | 
			
		||||
        Url = new ArrayList<String>();
 | 
			
		||||
        Site = new ArrayList<String>();
 | 
			
		||||
        Content = new ArrayList<String>();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private ArrayList<Integer> date;
 | 
			
		||||
    private ArrayList<String> Headline;
 | 
			
		||||
    private ArrayList<String> Url;
 | 
			
		||||
    private ArrayList<String> Site;
 | 
			
		||||
    private ArrayList<String> Content;
 | 
			
		||||
 | 
			
		||||
    public void setDate(int num) {
 | 
			
		||||
        date.add(num);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public void setHeadline(String head) {
 | 
			
		||||
        Headline.add(head);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public void setUrl(String url) {
 | 
			
		||||
        Url.add(url);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public void setSite(String site) {
 | 
			
		||||
        Site.add(site);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public void setContent(String content) {
 | 
			
		||||
        Content.add(content);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    public int getHowManyData() {
 | 
			
		||||
        return Headline.size();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public int getDate(int num) {
 | 
			
		||||
        return date.get(num);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public String getHeadline(int num) {
 | 
			
		||||
        return Headline.get(num);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public String getUrl(int num) {
 | 
			
		||||
        return Url.get(num);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public String getSite(int num) {
 | 
			
		||||
        return Site.get(num);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public String getContent(int num) {
 | 
			
		||||
        return Content.get(num);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -1,3 +1,5 @@
 | 
			
		||||
package com.nlp;
 | 
			
		||||
 | 
			
		||||
public class Result {
 | 
			
		||||
 | 
			
		||||
    private String tfidf;
 | 
			
		||||
@@ -1,46 +1,48 @@
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
public class TFIDF {
 | 
			
		||||
    /**
 | 
			
		||||
     * @param doc  list of strings
 | 
			
		||||
     * @param term String represents a term
 | 
			
		||||
     * @return term frequency of term in document
 | 
			
		||||
     */
 | 
			
		||||
    public double tf(List<String> doc, String term) {
 | 
			
		||||
        double result = 0;
 | 
			
		||||
        for (String word : doc) {
 | 
			
		||||
            if (term.equalsIgnoreCase(word))
 | 
			
		||||
                result++;
 | 
			
		||||
        }
 | 
			
		||||
        return result / doc.size();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * @param docs list of list of strings represents the dataset
 | 
			
		||||
     * @param term String represents a term
 | 
			
		||||
     * @return the inverse term frequency of term in documents
 | 
			
		||||
     */
 | 
			
		||||
    public double idf(List<List<String>> docs, String term) {
 | 
			
		||||
        double n = 0;
 | 
			
		||||
        for (List<String> doc : docs) {
 | 
			
		||||
            for (String word : doc) {
 | 
			
		||||
                if (term.equalsIgnoreCase(word)) {
 | 
			
		||||
                    n++;
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return Math.log(docs.size() / n);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * @param doc  a text document
 | 
			
		||||
     * @param docs all documents
 | 
			
		||||
     * @param term term
 | 
			
		||||
     * @return the TF-IDF of term
 | 
			
		||||
     */
 | 
			
		||||
    public double tfIdf(List<String> doc, List<List<String>> docs, String term) {
 | 
			
		||||
        return tf(doc, term) * idf(docs, term);
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
package com.nlp;
 | 
			
		||||
 | 
			
		||||
import java.util.List;
 | 
			
		||||
 | 
			
		||||
public class TFIDF {
 | 
			
		||||
    /**
 | 
			
		||||
     * @param doc  list of strings
 | 
			
		||||
     * @param term String represents a term
 | 
			
		||||
     * @return term frequency of term in document
 | 
			
		||||
     */
 | 
			
		||||
    public double tf(List<String> doc, String term) {
 | 
			
		||||
        double result = 0;
 | 
			
		||||
        for (String word : doc) {
 | 
			
		||||
            if (term.equalsIgnoreCase(word))
 | 
			
		||||
                result++;
 | 
			
		||||
        }
 | 
			
		||||
        return result / doc.size();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * @param docs list of list of strings represents the dataset
 | 
			
		||||
     * @param term String represents a term
 | 
			
		||||
     * @return the inverse term frequency of term in documents
 | 
			
		||||
     */
 | 
			
		||||
    public double idf(List<List<String>> docs, String term) {
 | 
			
		||||
        double n = 0;
 | 
			
		||||
        for (List<String> doc : docs) {
 | 
			
		||||
            for (String word : doc) {
 | 
			
		||||
                if (term.equalsIgnoreCase(word)) {
 | 
			
		||||
                    n++;
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        return Math.log(docs.size() / n);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * @param doc  a text document
 | 
			
		||||
     * @param docs all documents
 | 
			
		||||
     * @param term term
 | 
			
		||||
     * @return the TF-IDF of term
 | 
			
		||||
     */
 | 
			
		||||
    public double tfIdf(List<String> doc, List<List<String>> docs, String term) {
 | 
			
		||||
        return tf(doc, term) * idf(docs, term);
 | 
			
		||||
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user