From d42abcf3dc63dcac5dbe31e5d86297583dbeee9f Mon Sep 17 00:00:00 2001 From: noelq Date: Wed, 5 Jun 2019 21:46:13 +0900 Subject: [PATCH] Add files via upload MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Récupération du content des articles stockés dans la string "Content" de la classe Article. --- App.java | 184 +++++++++++++++++++++++++++++++++++++++++++++++++++ Article.java | 63 ++++++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 App.java create mode 100644 Article.java diff --git a/App.java b/App.java new file mode 100644 index 0000000..e69773a --- /dev/null +++ b/App.java @@ -0,0 +1,184 @@ + + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Scanner; +import java.util.StringTokenizer; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +public class App { + public static void main(String[] args) throws Exception { + String url[] = {"https://www.thesun.co.uk/?s=", "https://www.bbc.co.uk/search?q=", "https://www.skysports.com/search?q="}; + //ArrayList key = new ArrayList(); + Article data = new Article(); + //key.add("salah"); + //key.add("Liverpool"); + //key.add("Champions league"); + // 가져오고 싶은 정보가 있는 웹페이지의 url + Document doc = null; + Elements element = null; + + Scanner scanner = new Scanner(System.in); + System.out.print("Please type keywords : "); + String key[] = scanner.nextLine().split(","); + scanner.close(); + for (int j = 0; j < url.length; j++) { + for (int i = 0; i < key.length; i++) { + String urlTmp = url[j] + key[i]; + doc = Jsoup.connect(urlTmp).execute().parse(); // Document에 url 페이지의 데이터를 가져온다. + /* } catch (IOException e) { + e.printStackTrace(); + }*/ + if (url[j].equals("https://www.thesun.co.uk/?s=")) { + element = doc.select("div.search-results-wrap"); + for (Element el : element.select(".teaser-item")) { + if (el.select("p").text().toLowerCase().contains(key[i].toLowerCase())) { + data.setHeadline(el.select("p").text()); + Elements elUrl = el.select(".teaser__copy-container a"); + data.setUrl(elUrl.first().absUrl("href")); + String temp = el.select(".search-date").text(); + data.setDate(changeDate(temp)); + data.setSite("The Sun"); + + + } + } + } + if (url[j].equals("https://www.bbc.co.uk/search?q=")) { + element = doc.select("section.search-content"); + for (Element el : element.select("li[data-result-number]")) { + data.setHeadline(el.select("h1").select("a").text()); + Elements elUrl = el.select("a[href]"); + data.setUrl(elUrl.first().absUrl("href")); + String temp = el.select(".display-date").text(); + data.setDate(changeDate(temp)); + data.setSite("BBC"); + } + } + if (url[j].equals("https://www.skysports.com/search?q=")) { + element = doc.select("div.news-list"); + for (Element el : element.select("div.news-list__item")) { + data.setHeadline(el.select("h4").select("a").text()); + Elements elUrl = el.select("a[href]"); + data.setUrl(elUrl.first().absUrl("href")); + String temp = el.select(".label__timestamp").text(); + data.setDate(changeDate2(temp)); + data.setSite("SKYSPORTS"); + } + } + } + } + + String content = ""; + for(int i = 0; i < data.getHowManyData(); i++) { + content = ""; + doc = Jsoup.connect(data.getUrl(i)).execute().parse(); + if (data.getSite(i) == "The Sun") { + element = doc.select("div.article__content"); + for (Element el : element.select("p")) { + content += el.text(); + } + data.setContent(content); + } + else if (data.getSite(i) == "BBC") { + element = doc.select("div#story-body"); + for (Element el : element.select("p")) { + content += el.text(); + } + data.setContent(content); + } + else if (data.getSite(i) == "SKYSPORTS") { + element = doc.select("div.article__body"); + for (Element el : element.select("p")) { + if (!el.hasClass("widge-marketing__text")) { + content += el.text(); + } + } + data.setContent(content); + } + System.out.println(data.getDate(i)); + System.out.println(data.getHeadline(i)); + System.out.println(data.getUrl(i)); + System.out.println(data.getSite(i)); + System.out.println(data.getContent(i)); + } + + } + + public static int changeDate2(String date) { + date = date.substring(0, 2) + date.substring(2 + 1); + date = date.substring(0, 4) + date.substring(4 + 1); + String year = date.substring(4, 8); + String month = date.substring(2,4); + String day = date.substring(0, 2); + String fdate = year + month + day; + + int mydate = Integer.parseInt(fdate); + return mydate; + } + + public static int changeDate(String date) { + int formdate = 0; + String sp[] = date.split(" "); + formdate += Integer.parseInt(sp[2]) * 10000; + formdate += Integer.parseInt(sp[0]); + + switch (sp[1]) { + case "January": + case "Jan": + formdate += 100; + break; + case "February": + case "Feb": + formdate += 200; + break; + case "March": + case "Mar": + formdate += 300; + break; + case "April": + case "Apr": + formdate += 400; + break; + case "May": + formdate += 500; + break; + case "June": + case "Jun": + formdate += 600; + break; + case "July": + case "Jul": + formdate += 700; + break; + case "August": + case "Aug": + formdate += 800; + break; + case "September": + case "Sep": + formdate += 900; + break; + case "October": + case "Oct": + formdate += 1000; + break; + case "November": + case "Nov": + formdate += 1100; + break; + case "December": + case "Dec": + formdate += 1200; + break; + + } + + return formdate; + + } +} \ No newline at end of file diff --git a/Article.java b/Article.java new file mode 100644 index 0000000..1db8a3c --- /dev/null +++ b/Article.java @@ -0,0 +1,63 @@ +import java.util.ArrayList; + +public class Article { + + public Article() { + date = new ArrayList(); + Headline = new ArrayList(); + Url = new ArrayList(); + Site = new ArrayList(); + Content = new ArrayList(); + } + + private ArrayList date; + private ArrayList Headline; + private ArrayList Url; + private ArrayList Site; + private ArrayList Content; + + public void setDate(int num) { + date.add(num); + } + + public void setHeadline(String head) { + Headline.add(head); + } + + public void setUrl(String url) { + Url.add(url); + } + + public void setSite(String site) { + Site.add(site); + } + + public void setContent(String content) { + Content.add(content); + } + + + public int getHowManyData() { + return Headline.size(); + } + + public int getDate(int num) { + return date.get(num); + } + + public String getHeadline(int num) { + return Headline.get(num); + } + + public String getUrl(int num) { + return Url.get(num); + } + + public String getSite(int num) { + return Site.get(num); + } + + public String getContent(int num) { + return Content.get(num); + } +} \ No newline at end of file