Add Server for request from the web app angular 8

This commit is contained in:
Mathieu Sanchez 2019-06-11 17:23:49 +09:00
parent 5acf243701
commit 1c36a2676a
7 changed files with 384 additions and 256 deletions

53
.idea/workspace.xml generated
View File

@ -29,21 +29,18 @@
<item name="CAu-Natural-language-processing" type="b2602c69:ProjectViewProjectNode" />
<item name="CAu-Natural-language-processing" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="CAu-Natural-language-processing" type="b2602c69:ProjectViewProjectNode" />
<item name="CAu-Natural-language-processing" type="462c0819:PsiDirectoryNode" />
<item name="CAu_NLP_2019" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
<pane id="Scope" />
<pane id="PackagesPane" />
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="WebServerToolWindowFactoryState" value="false" />
<property name="aspect.path.notification.shown" value="true" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/CAu_NLP_2019" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
@ -67,23 +64,25 @@
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1559529262635</updated>
<workItem from="1559824815280" duration="54000" />
</task>
<servers />
</component>
<component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="54000" />
</component>
<component name="ToolWindowManager">
<frame x="-8" y="-8" width="1936" height="1096" extended-state="6" />
<frame x="-8" y="-8" width="1936" height="1056" extended-state="6" />
<layout>
<window_info id="Image Layers" />
<window_info id="Designer" />
<window_info id="UI Designer" />
<window_info id="Capture Tool" />
<window_info id="Favorites" side_tool="true" />
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.25" />
<window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.25266525" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info anchor="bottom" id="Version Control" />
<window_info anchor="bottom" id="Terminal" />
<window_info anchor="bottom" id="Event Log" side_tool="true" />
<window_info anchor="bottom" id="Messages" />
<window_info id="Image Layers" order="2" />
<window_info id="Designer" order="3" />
<window_info id="UI Designer" order="4" />
<window_info id="Capture Tool" order="5" />
<window_info id="Favorites" order="6" side_tool="true" />
<window_info anchor="bottom" id="Docker" show_stripe_button="false" />
<window_info anchor="bottom" id="Database Changes" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info anchor="bottom" id="Run" order="2" />
@ -91,16 +90,24 @@
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="right" id="Palette" />
<window_info anchor="right" id="Theme Preview" />
<window_info anchor="right" id="Maven" />
<window_info anchor="right" id="Capture Analysis" />
<window_info anchor="right" id="Palette&#9;" />
<window_info anchor="bottom" id="Terminal" order="7" />
<window_info anchor="bottom" id="Event Log" order="8" side_tool="true" />
<window_info anchor="bottom" id="Version Control" order="9" />
<window_info anchor="bottom" id="Messages" order="10" />
<window_info anchor="right" id="Database" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
<window_info anchor="right" id="Palette" order="3" />
<window_info anchor="right" id="Maven" order="4" />
<window_info anchor="right" id="Theme Preview" order="5" />
<window_info anchor="right" id="Capture Analysis" order="6" />
<window_info anchor="right" id="Palette&#9;" order="7" />
</layout>
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="1" />
</component>
<component name="masterDetails">
<states>
<state key="ProjectJDKs.UI">

View File

@ -1,14 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="11" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_11" project-jdk-name="OpenJDK 11.0.2" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

View File

@ -1,7 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="878e5c11-e08f-43ee-a4df-9d0edd20ed32" name="Default Changelist" comment="" />
<list default="true" id="878e5c11-e08f-43ee-a4df-9d0edd20ed32" name="Default Changelist" comment="">
<change afterPath="$PROJECT_DIR$/src/main/java/Result.java" afterDir="false" />
<change beforePath="$PROJECT_DIR$/../.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/../.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/misc.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/misc.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/pom.xml" beforeDir="false" afterPath="$PROJECT_DIR$/pom.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/src/main/java/App.java" beforeDir="false" afterPath="$PROJECT_DIR$/src/main/java/App.java" afterDir="false" />
<change beforePath="$PROJECT_DIR$/target/classes/App.class" beforeDir="false" afterPath="$PROJECT_DIR$/target/classes/App.class" afterDir="false" />
</list>
<ignored path="$PROJECT_DIR$/out/" />
<ignored path="$PROJECT_DIR$/target/" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
@ -15,20 +23,11 @@
</component>
<component name="FileEditorManager">
<leaf SIDE_TABS_SIZE_LIMIT_KEY="300">
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/pom.xml">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="102">
<caret line="6" column="16" selection-start-line="6" selection-start-column="16" selection-end-line="6" selection-end-column="16" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/src/main/java/App.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="156">
<caret line="16" column="54" lean-forward="true" selection-start-line="16" selection-start-column="54" selection-end-line="16" selection-end-column="54" />
<state relative-caret-position="154">
<caret line="135" column="44" lean-forward="true" selection-start-line="135" selection-start-column="44" selection-end-line="135" selection-end-column="44" />
<folding>
<element signature="imports" expanded="true" />
</folding>
@ -36,10 +35,38 @@
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/pom.xml">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="425">
<caret line="25" selection-start-line="25" selection-end-line="25" />
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/src/main/java/Result.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="238">
<caret line="14" column="31" selection-start-line="14" selection-start-column="31" selection-end-line="14" selection-end-column="31" />
<folding>
<element signature="e#208#209#0" expanded="true" />
<element signature="e#236#237#0" expanded="true" />
<element signature="e#278#279#0" expanded="true" />
<element signature="e#312#313#0" expanded="true" />
<element signature="e#342#343#0" expanded="true" />
<element signature="e#368#369#0" expanded="true" />
<element signature="e#406#407#0" expanded="true" />
<element signature="e#436#437#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/src/main/java/Article.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="493">
<state relative-caret-position="722">
<caret line="59" column="28" selection-start-line="59" selection-start-column="18" selection-end-line="59" selection-end-column="28" />
<folding>
<element signature="e#511#512#0" expanded="true" />
@ -78,16 +105,37 @@
</list>
</option>
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>args</find>
<find>StaticLoggerBinder</find>
<find>changeDate</find>
<find>System.out.println</find>
<find>result</find>
<find>key</find>
</findStrings>
<replaceStrings>
<replace>searchs</replace>
<replace>words</replace>
</replaceStrings>
</component>
<component name="Git.Settings">
<option name="UPDATE_TYPE" value="MERGE" />
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/.." />
<option name="RECENT_BRANCH_BY_REPOSITORY">
<map>
<entry key="$PROJECT_DIR$/.." value="master" />
</map>
</option>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/CAu_NLP_2019.iml" />
<option value="$PROJECT_DIR$/pom.xml" />
<option value="$PROJECT_DIR$/src/main/java/Article.java" />
<option value="$PROJECT_DIR$/src/main/java/tfidf.java" />
<option value="$PROJECT_DIR$/src/main/java/Result.java" />
<option value="$PROJECT_DIR$/pom.xml" />
<option value="$PROJECT_DIR$/src/main/java/App.java" />
</list>
</option>
@ -98,13 +146,14 @@
<option name="width" value="1400" />
<option name="height" value="1000" />
</component>
<component name="ProjectLevelVcsManager" settingsEditedManually="true" />
<component name="ProjectLevelVcsManager" settingsEditedManually="true">
<OptionsSetting value="false" id="Update" />
</component>
<component name="ProjectView">
<navigator proportions="" version="1">
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="PackagesPane" />
<pane id="Scope" />
<pane id="ProjectPane">
<subPane>
@ -131,24 +180,27 @@
<item name="main" type="462c0819:PsiDirectoryNode" />
<item name="java" type="462c0819:PsiDirectoryNode" />
</path>
<path>
<item name="CAu_NLP_2019" type="b2602c69:ProjectViewProjectNode" />
<item name="CAu_NLP_2019" type="462c0819:PsiDirectoryNode" />
<item name="target" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
<pane id="PackagesPane" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="SHARE_PROJECT_CONFIGURATION_FILES" value="true" />
<property name="WebServerToolWindowFactoryState" value="false" />
<property name="aspect.path.notification.shown" value="true" />
<property name="last.edited.regexp" value="" />
<property name="last_opened_file_path" value="$PROJECT_DIR$" />
<property name="node.js.detected.package.eslint" value="true" />
<property name="node.js.path.for.package.eslint" value="project" />
<property name="node.js.selected.package.eslint" value="(autodetect)" />
<property name="project.structure.last.edited" value="SDKs" />
<property name="project.structure.proportion" value="0.0" />
<property name="project.structure.side.proportion" value="0.2" />
<property name="settings.editor.selected.configurable" value="preferences.lookFeel" />
<property name="restartRequiresConfirmation" value="false" />
<property name="settings.editor.selected.configurable" value="preferences.keymap" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
@ -173,6 +225,9 @@
<recent_temporary>
<list>
<item itemvalue="Application.App" />
<item itemvalue="Application.App" />
<item itemvalue="Application.App" />
<item itemvalue="Application.App" />
</list>
</recent_temporary>
</component>
@ -186,14 +241,22 @@
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1559476475559</updated>
<workItem from="1559824875684" duration="31000" />
<workItem from="1559824912264" duration="975000" />
<workItem from="1559826436158" duration="669000" />
<workItem from="1560222716538" duration="45000" />
<workItem from="1560238353777" duration="3020000" />
</task>
<servers />
</component>
<component name="TimeTrackingManager">
<option name="totallyTimeSpent" value="4740000" />
</component>
<component name="ToolWindowManager">
<frame x="-8" y="-8" width="1936" height="1096" extended-state="6" />
<frame x="-8" y="-8" width="1936" height="1056" extended-state="6" />
<editor active="true" />
<layout>
<window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.18176973" />
<window_info content_ui="combo" id="Project" order="0" visible="true" weight="0.19189766" />
<window_info id="Structure" order="1" side_tool="true" weight="0.25" />
<window_info id="Image Layers" order="2" />
<window_info id="Designer" order="3" />
@ -202,15 +265,17 @@
<window_info id="Favorites" order="6" side_tool="true" />
<window_info anchor="bottom" id="Message" order="0" />
<window_info anchor="bottom" id="Find" order="1" />
<window_info active="true" anchor="bottom" id="Run" order="2" sideWeight="0.49520257" visible="true" weight="0.32917964" />
<window_info active="true" anchor="bottom" id="Run" order="2" sideWeight="0.49520257" visible="true" weight="0.32827735" />
<window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
<window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
<window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
<window_info anchor="bottom" id="TODO" order="6" />
<window_info anchor="bottom" id="Terminal" order="7" sideWeight="0.49946696" weight="0.32917964" />
<window_info anchor="bottom" id="Event Log" order="8" sideWeight="0.50479746" side_tool="true" visible="true" weight="0.32917964" />
<window_info anchor="bottom" id="Event Log" order="8" sideWeight="0.50479746" side_tool="true" weight="0.32827735" />
<window_info anchor="bottom" id="Version Control" order="9" />
<window_info anchor="bottom" id="Messages" order="10" sideWeight="0.4978678" weight="0.32917964" />
<window_info anchor="bottom" id="Messages" order="10" sideWeight="0.4978678" weight="0.32827735" />
<window_info anchor="bottom" id="Docker" order="11" show_stripe_button="false" />
<window_info anchor="bottom" id="Database Changes" order="12" />
<window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
<window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
@ -219,6 +284,7 @@
<window_info anchor="right" id="Theme Preview" order="5" />
<window_info anchor="right" id="Capture Analysis" order="6" />
<window_info anchor="right" id="Palette&#9;" order="7" />
<window_info anchor="right" id="Database" order="8" />
</layout>
<layout-to-restore>
<window_info id="Image Layers" order="0" />
@ -249,24 +315,30 @@
<window_info anchor="right" content_ui="combo" id="Hierarchy" order="7" weight="0.25" />
</layout-to-restore>
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="1" />
</component>
<component name="editorHistoryManager">
<entry file="file://$PROJECT_DIR$/CAu_NLP_2019.iml">
<entry file="file://$PROJECT_DIR$/CAu_NLP_2019.iml" />
<entry file="file://$PROJECT_DIR$/src/main/java/TFIDF.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="17">
<caret line="1" column="41" selection-start-line="1" selection-start-column="41" selection-end-line="1" selection-end-column="41" />
<state relative-caret-position="357">
<caret line="21" column="7" lean-forward="true" selection-start-line="21" selection-start-column="7" selection-end-line="21" selection-end-column="7" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/pom.xml">
<entry file="file://$PROJECT_DIR$/target/classes/App.class">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="102">
<caret line="6" column="16" selection-start-line="6" selection-start-column="16" selection-end-line="6" selection-end-column="16" />
<state>
<folding>
<element signature="e#0#7649#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/Article.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="493">
<state relative-caret-position="722">
<caret line="59" column="28" selection-start-line="59" selection-start-column="18" selection-end-line="59" selection-end-column="28" />
<folding>
<element signature="e#511#512#0" expanded="true" />
@ -295,17 +367,34 @@
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/TFIDF.java">
<entry file="file://$PROJECT_DIR$/src/main/java/Result.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="357">
<caret line="21" column="7" lean-forward="true" selection-start-line="21" selection-start-column="7" selection-end-line="21" selection-end-column="7" />
<state relative-caret-position="238">
<caret line="14" column="31" selection-start-line="14" selection-start-column="31" selection-end-line="14" selection-end-column="31" />
<folding>
<element signature="e#208#209#0" expanded="true" />
<element signature="e#236#237#0" expanded="true" />
<element signature="e#278#279#0" expanded="true" />
<element signature="e#312#313#0" expanded="true" />
<element signature="e#342#343#0" expanded="true" />
<element signature="e#368#369#0" expanded="true" />
<element signature="e#406#407#0" expanded="true" />
<element signature="e#436#437#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/pom.xml">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="425">
<caret line="25" selection-start-line="25" selection-end-line="25" />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/src/main/java/App.java">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="156">
<caret line="16" column="54" lean-forward="true" selection-start-line="16" selection-start-column="54" selection-end-line="16" selection-end-column="54" />
<state relative-caret-position="154">
<caret line="135" column="44" lean-forward="true" selection-start-line="135" selection-start-column="44" selection-end-line="135" selection-end-column="44" />
<folding>
<element signature="imports" expanded="true" />
</folding>
@ -377,6 +466,7 @@
</state>
<state key="ProjectJDKs.UI">
<settings>
<last-edited>OpenJDK 11.0.2</last-edited>
<splitter-proportions>
<option name="proportions">
<list>

View File

@ -4,24 +4,38 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
<dependency>
<groupId>com.sparkjava</groupId>
<artifactId>spark-core</artifactId>
<version>2.8.0</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.8.5</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
</plugins>
</build>
<groupId>PQMAN</groupId>
<artifactId>CAu_NLP_2019</artifactId>

View File

@ -1,166 +1,154 @@
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class App {
public static void main(String[] args) throws Exception {
String url[] = {"https://www.thesun.co.uk/?s=", "https://www.bbc.co.uk/search?q=", "https://www.skysports.com/search?q="};
List<List<String>> documents = new ArrayList<>();
List<List<String>> result = new ArrayList<>();
List<String> result2 = new ArrayList<>();
String document;
List<String> goodUrl = new ArrayList<>();
Article data = new Article();
Document doc = null;
Elements element = null;
Scanner scanner = new Scanner(System.in);
System.out.print("Please type keywords : ");
String key[] = scanner.nextLine().split(",");
scanner.close();
for (int j = 0; j < url.length; j++) {
for (int i = 0; i < key.length; i++) {
String urlTmp = url[j] + key[i];
doc = Jsoup.connect(urlTmp).execute().parse(); // Document에 url 페이지의 데이터를 가져온다.
if (url[j].equals("https://www.thesun.co.uk/?s=")) {
element = doc.select("div.search-results-wrap");
for (Element el : element.select(".teaser-item")) {
if (el.select("p").text().toLowerCase().contains(key[i].toLowerCase())) {
data.setHeadline(el.select("p").text());
Elements elUrl = el.select(".teaser__copy-container a");
data.setUrl(elUrl.first().absUrl("href"));
String temp = el.select(".search-date").text();
data.setDate(changeDate(temp));
data.setSite("The Sun");
}
}
}
if (url[j].equals("https://www.bbc.co.uk/search?q=")) {
element = doc.select("section.search-content");
for (Element el : element.select("li[data-result-number]")) {
data.setHeadline(el.select("h1").select("a").text());
Elements elUrl = el.select("a[href]");
data.setUrl(elUrl.first().absUrl("href"));
String temp = el.select(".display-date").text();
data.setDate(changeDate(temp));
data.setSite("BBC");
}
}
if (url[j].equals("https://www.skysports.com/search?q=")) {
element = doc.select("div.news-list");
for (Element el : element.select("div.news-list__item")) {
data.setHeadline(el.select("h4").select("a").text());
Elements elUrl = el.select("a[href]");
data.setUrl(elUrl.first().absUrl("href"));
String temp = el.select(".label__timestamp").text();
data.setDate(changeDate2(temp));
data.setSite("SKYSPORTS");
}
}
}
}
String content = "";
for(int i = 0; i < data.getHowManyData(); i++) {
content = "";
try {
doc = Jsoup.connect(data.getUrl(i)).execute().parse();
if (data.getSite(i) == "The Sun") {
element = doc.select("div.article__content");
for (Element el : element.select("p")) {
content += el.text();
}
data.setContent(content);
} else if (data.getSite(i) == "BBC") {
element = doc.select("div#story-body");
for (Element el : element.select("p")) {
content += el.text();
}
data.setContent(content);
} else if (data.getSite(i) == "SKYSPORTS") {
element = doc.select("div.article__body");
for (Element el : element.select("p")) {
if (!el.hasClass("widge-marketing__text")) {
content += el.text();
}
}
data.setContent(content);
}
if (data.getContent(i).length() > 0){
/*
System.out.println(data.getDate(i));
System.out.println(data.getHeadline(i));
System.out.println(data.getUrl(i));
System.out.println(data.getSite(i));
*/
document = data.getContent((i)).replaceAll("\\s+",",");
goodUrl.add(data.getUrl(i));
documents.add(new ArrayList<String>(Arrays.asList(document.split(","))));
}
else
System.out.println("No content: " + data.getUrl(i));
}
catch (Exception e) {
System.out.println("Something went wrong.: " + e);
}
}
for (int i = 0; i < documents.size(); i++){
DecimalFormat df = new DecimalFormat("#.####");
TFIDF calculator = new TFIDF();
double tfidf = calculator.tfIdf(documents.get(i), documents, key[0]);
result.add(Arrays.asList((df.format(tfidf)), goodUrl.get(i)));
}
for (int i = 0; i < result.size(); i++){
System.out.println("TF-IDF: " + result.get(i).get(0) + " Url: " + result.get(i).get(1));
}
double max;
int pos;
while (result.size() > 0){
pos = 0;
max = Double.parseDouble(result.get(0).get(0));
for (int i2 = 0; i2 < result.size(); i2++){
if (Double.compare(max, Double.parseDouble(result.get(i2).get(0))) < 0){
max = Double.parseDouble(result.get(i2).get(0));
pos = i2;
}
}
result2.add(result.get(pos).get(1));
result.remove(pos);
}
for (int i = 0; i < result2.size(); i++){
System.out.println("Url: " + result2.get(i));
}
}
public static int changeDate2(String date) {
date = date.substring(0, 2) + date.substring(2 + 1);
date = date.substring(0, 4) + date.substring(4 + 1);
String year = date.substring(4, 8);
String month = date.substring(2,4);
String day = date.substring(0, 2);
String fdate = year + month + day;
int mydate = Integer.parseInt(fdate);
return mydate;
}
public static int changeDate(String date) {
int formdate = 0;
String sp[] = date.split(" ");
formdate = 0;
return formdate;
}
import com.google.gson.Gson;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static spark.Spark.get;
import static spark.Spark.port;
public class App {
public static void main(String[] args) {
port(8080);
get("/search", (req, res) -> {
Gson gson = new Gson();
String[] words = req.queryParams("s").split("/+");
ArrayList<Result> results = getResults(words);
return gson.toJson(results);
});
}
private static ArrayList<Result> getResults(String[] words) throws Exception {
String[] url = {"https://www.thesun.co.uk/?s=", "https://www.bbc.co.uk/search?q=", "https://www.skysports.com/search?q="};
List<List<String>> documents = new ArrayList<>();
List<List<String>> result = new ArrayList<>();
List<String> result2 = new ArrayList<>();
String document;
List<String> goodUrl = new ArrayList<>();
Article data = new Article();
ArrayList<Result> results = new ArrayList<>();
Document doc = null;
Elements element = null;
for (String s : url) {
for (String word : words) {
String urlTmp = s + word;
doc = Jsoup.connect(urlTmp).execute().parse(); // Document에 url 페이지의 데이터를 가져온다.
if (s.equals("https://www.thesun.co.uk/?s=")) {
element = doc.select("div.search-results-wrap");
for (Element el : element.select(".teaser-item")) {
if (el.select("p").text().toLowerCase().contains(word.toLowerCase())) {
data.setHeadline(el.select("p").text());
Elements elUrl = el.select(".teaser__copy-container a");
data.setUrl(elUrl.first().absUrl("href"));
String temp = el.select(".search-date").text();
data.setDate(changeDate(temp));
data.setSite("The Sun");
}
}
}
if (s.equals("https://www.bbc.co.uk/search?q=")) {
element = doc.select("section.search-content");
for (Element el : element.select("li[data-result-number]")) {
data.setHeadline(el.select("h1").select("a").text());
Elements elUrl = el.select("a[href]");
data.setUrl(elUrl.first().absUrl("href"));
String temp = el.select(".display-date").text();
data.setDate(changeDate(temp));
data.setSite("BBC");
}
}
if (s.equals("https://www.skysports.com/search?q=")) {
element = doc.select("div.news-list");
for (Element el : element.select("div.news-list__item")) {
data.setHeadline(el.select("h4").select("a").text());
Elements elUrl = el.select("a[href]");
data.setUrl(elUrl.first().absUrl("href"));
String temp = el.select(".label__timestamp").text();
data.setDate(changeDate2(temp));
data.setSite("SKYSPORTS");
}
}
}
}
StringBuilder content = new StringBuilder();
for (int i = 0; i < data.getHowManyData(); i++) {
content = new StringBuilder();
try {
doc = Jsoup.connect(data.getUrl(i)).execute().parse();
switch (data.getSite(i)) {
case "The Sun":
element = doc.select("div.article__content");
for (Element el : element.select("p")) {
content.append(el.text());
}
data.setContent(content.toString());
break;
case "BBC":
element = doc.select("div#story-body");
for (Element el : element.select("p")) {
content.append(el.text());
}
data.setContent(content.toString());
break;
case "SKYSPORTS":
element = doc.select("div.article__body");
for (Element el : element.select("p")) {
if (!el.hasClass("widge-marketing__text")) {
content.append(el.text());
}
}
data.setContent(content.toString());
break;
}
if (data.getContent(i).length() > 0) {
document = data.getContent((i)).replaceAll("\\s+", ",");
goodUrl.add(data.getUrl(i));
documents.add(new ArrayList<String>(Arrays.asList(document.split(","))));
} else
System.out.println("No content: " + data.getUrl(i));
} catch (Exception e) {
System.out.println("Something went wrong.: " + e);
}
}
for (int i = 0; i < documents.size(); i++) {
DecimalFormat df = new DecimalFormat("#.####");
TFIDF calculator = new TFIDF();
double tfidf = calculator.tfIdf(documents.get(i), documents, words[0]);
results.add(new Result(df.format(tfidf), goodUrl.get(i)));
}
System.out.println("End Request");
return results;
}
private static int changeDate2(String date) {
date = date.substring(0, 2) + date.substring(2 + 1);
date = date.substring(0, 4) + date.substring(4 + 1);
String year = date.substring(4, 8);
String month = date.substring(2, 4);
String day = date.substring(0, 2);
String fdate = year + month + day;
int mydate = Integer.parseInt(fdate);
return mydate;
}
private static int changeDate(String date) {
int formdate = 0;
String[] sp = date.split(" ");
formdate = 0;
return formdate;
}
}

View File

@ -0,0 +1,26 @@
public class Result {
private String tfidf;
private String url;
public Result(String tfidf, String url) {
this.tfidf = tfidf;
this.url = url;
}
public String getTfidf() {
return tfidf;
}
public void setTfidf(String tfidf) {
this.tfidf = tfidf;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
}