import java.io.IOException; import java.util.ArrayList; import java.util.Scanner; import java.util.StringTokenizer; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class App { public static void main(String[] args) throws Exception { String url[] = {"https://www.thesun.co.uk/?s=", "https://www.bbc.co.uk/search?q=", "https://www.skysports.com/search?q="}; //ArrayList key = new ArrayList(); Article data = new Article(); //key.add("salah"); //key.add("Liverpool"); //key.add("Champions league"); // 가져오고 싶은 정보가 있는 웹페이지의 url Document doc = null; Elements element = null; Scanner scanner = new Scanner(System.in); System.out.print("Please type keywords : "); String key[] = scanner.nextLine().split(","); scanner.close(); for (int j = 0; j < url.length; j++) { for (int i = 0; i < key.length; i++) { String urlTmp = url[j] + key[i]; doc = Jsoup.connect(urlTmp).execute().parse(); // Document에 url 페이지의 데이터를 가져온다. /* } catch (IOException e) { e.printStackTrace(); }*/ if (url[j].equals("https://www.thesun.co.uk/?s=")) { element = doc.select("div.search-results-wrap"); for (Element el : element.select(".teaser-item")) { if (el.select("p").text().toLowerCase().contains(key[i].toLowerCase())) { data.setHeadline(el.select("p").text()); Elements elUrl = el.select(".teaser__copy-container a"); data.setUrl(elUrl.first().absUrl("href")); String temp = el.select(".search-date").text(); data.setDate(changeDate(temp)); data.setSite("The Sun"); } } } if (url[j].equals("https://www.bbc.co.uk/search?q=")) { element = doc.select("section.search-content"); for (Element el : element.select("li[data-result-number]")) { data.setHeadline(el.select("h1").select("a").text()); Elements elUrl = el.select("a[href]"); data.setUrl(elUrl.first().absUrl("href")); String temp = el.select(".display-date").text(); data.setDate(changeDate(temp)); data.setSite("BBC"); } } if (url[j].equals("https://www.skysports.com/search?q=")) { element = doc.select("div.news-list"); for (Element el : element.select("div.news-list__item")) { data.setHeadline(el.select("h4").select("a").text()); Elements elUrl = el.select("a[href]"); data.setUrl(elUrl.first().absUrl("href")); String temp = el.select(".label__timestamp").text(); data.setDate(changeDate2(temp)); data.setSite("SKYSPORTS"); } } } } String content = ""; for(int i = 0; i < data.getHowManyData(); i++) { content = ""; doc = Jsoup.connect(data.getUrl(i)).execute().parse(); if (data.getSite(i) == "The Sun") { element = doc.select("div.article__content"); for (Element el : element.select("p")) { content += el.text(); } data.setContent(content); } else if (data.getSite(i) == "BBC") { element = doc.select("div#story-body"); for (Element el : element.select("p")) { content += el.text(); } data.setContent(content); } else if (data.getSite(i) == "SKYSPORTS") { element = doc.select("div.article__body"); for (Element el : element.select("p")) { if (!el.hasClass("widge-marketing__text")) { content += el.text(); } } data.setContent(content); } System.out.println(data.getDate(i)); System.out.println(data.getHeadline(i)); System.out.println(data.getUrl(i)); System.out.println(data.getSite(i)); System.out.println(data.getContent(i)); } } public static int changeDate2(String date) { date = date.substring(0, 2) + date.substring(2 + 1); date = date.substring(0, 4) + date.substring(4 + 1); String year = date.substring(4, 8); String month = date.substring(2,4); String day = date.substring(0, 2); String fdate = year + month + day; int mydate = Integer.parseInt(fdate); return mydate; } public static int changeDate(String date) { int formdate = 0; String sp[] = date.split(" "); formdate += Integer.parseInt(sp[2]) * 10000; formdate += Integer.parseInt(sp[0]); switch (sp[1]) { case "January": case "Jan": formdate += 100; break; case "February": case "Feb": formdate += 200; break; case "March": case "Mar": formdate += 300; break; case "April": case "Apr": formdate += 400; break; case "May": formdate += 500; break; case "June": case "Jun": formdate += 600; break; case "July": case "Jul": formdate += 700; break; case "August": case "Aug": formdate += 800; break; case "September": case "Sep": formdate += 900; break; case "October": case "Oct": formdate += 1000; break; case "November": case "Nov": formdate += 1100; break; case "December": case "Dec": formdate += 1200; break; } return formdate; } }