Add files via upload

Récupération du content des articles stockés dans la string "Content" de la classe Article.
This commit is contained in:
noelq 2019-06-05 21:46:13 +09:00 committed by GitHub
parent c569fc531d
commit d42abcf3dc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 247 additions and 0 deletions

184
App.java Normal file
View File

@ -0,0 +1,184 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.StringTokenizer;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class App {
public static void main(String[] args) throws Exception {
String url[] = {"https://www.thesun.co.uk/?s=", "https://www.bbc.co.uk/search?q=", "https://www.skysports.com/search?q="};
//ArrayList<String> key = new ArrayList<String>();
Article data = new Article();
//key.add("salah");
//key.add("Liverpool");
//key.add("Champions league");
// 가져오고 싶은 정보가 있는 웹페이지의 url
Document doc = null;
Elements element = null;
Scanner scanner = new Scanner(System.in);
System.out.print("Please type keywords : ");
String key[] = scanner.nextLine().split(",");
scanner.close();
for (int j = 0; j < url.length; j++) {
for (int i = 0; i < key.length; i++) {
String urlTmp = url[j] + key[i];
doc = Jsoup.connect(urlTmp).execute().parse(); // Document에 url 페이지의 데이터를 가져온다.
/* } catch (IOException e) {
e.printStackTrace();
}*/
if (url[j].equals("https://www.thesun.co.uk/?s=")) {
element = doc.select("div.search-results-wrap");
for (Element el : element.select(".teaser-item")) {
if (el.select("p").text().toLowerCase().contains(key[i].toLowerCase())) {
data.setHeadline(el.select("p").text());
Elements elUrl = el.select(".teaser__copy-container a");
data.setUrl(elUrl.first().absUrl("href"));
String temp = el.select(".search-date").text();
data.setDate(changeDate(temp));
data.setSite("The Sun");
}
}
}
if (url[j].equals("https://www.bbc.co.uk/search?q=")) {
element = doc.select("section.search-content");
for (Element el : element.select("li[data-result-number]")) {
data.setHeadline(el.select("h1").select("a").text());
Elements elUrl = el.select("a[href]");
data.setUrl(elUrl.first().absUrl("href"));
String temp = el.select(".display-date").text();
data.setDate(changeDate(temp));
data.setSite("BBC");
}
}
if (url[j].equals("https://www.skysports.com/search?q=")) {
element = doc.select("div.news-list");
for (Element el : element.select("div.news-list__item")) {
data.setHeadline(el.select("h4").select("a").text());
Elements elUrl = el.select("a[href]");
data.setUrl(elUrl.first().absUrl("href"));
String temp = el.select(".label__timestamp").text();
data.setDate(changeDate2(temp));
data.setSite("SKYSPORTS");
}
}
}
}
String content = "";
for(int i = 0; i < data.getHowManyData(); i++) {
content = "";
doc = Jsoup.connect(data.getUrl(i)).execute().parse();
if (data.getSite(i) == "The Sun") {
element = doc.select("div.article__content");
for (Element el : element.select("p")) {
content += el.text();
}
data.setContent(content);
}
else if (data.getSite(i) == "BBC") {
element = doc.select("div#story-body");
for (Element el : element.select("p")) {
content += el.text();
}
data.setContent(content);
}
else if (data.getSite(i) == "SKYSPORTS") {
element = doc.select("div.article__body");
for (Element el : element.select("p")) {
if (!el.hasClass("widge-marketing__text")) {
content += el.text();
}
}
data.setContent(content);
}
System.out.println(data.getDate(i));
System.out.println(data.getHeadline(i));
System.out.println(data.getUrl(i));
System.out.println(data.getSite(i));
System.out.println(data.getContent(i));
}
}
public static int changeDate2(String date) {
date = date.substring(0, 2) + date.substring(2 + 1);
date = date.substring(0, 4) + date.substring(4 + 1);
String year = date.substring(4, 8);
String month = date.substring(2,4);
String day = date.substring(0, 2);
String fdate = year + month + day;
int mydate = Integer.parseInt(fdate);
return mydate;
}
public static int changeDate(String date) {
int formdate = 0;
String sp[] = date.split(" ");
formdate += Integer.parseInt(sp[2]) * 10000;
formdate += Integer.parseInt(sp[0]);
switch (sp[1]) {
case "January":
case "Jan":
formdate += 100;
break;
case "February":
case "Feb":
formdate += 200;
break;
case "March":
case "Mar":
formdate += 300;
break;
case "April":
case "Apr":
formdate += 400;
break;
case "May":
formdate += 500;
break;
case "June":
case "Jun":
formdate += 600;
break;
case "July":
case "Jul":
formdate += 700;
break;
case "August":
case "Aug":
formdate += 800;
break;
case "September":
case "Sep":
formdate += 900;
break;
case "October":
case "Oct":
formdate += 1000;
break;
case "November":
case "Nov":
formdate += 1100;
break;
case "December":
case "Dec":
formdate += 1200;
break;
}
return formdate;
}
}

63
Article.java Normal file
View File

@ -0,0 +1,63 @@
import java.util.ArrayList;
public class Article {
public Article() {
date = new ArrayList<Integer>();
Headline = new ArrayList<String>();
Url = new ArrayList<String>();
Site = new ArrayList<String>();
Content = new ArrayList<String>();
}
private ArrayList<Integer> date;
private ArrayList<String> Headline;
private ArrayList<String> Url;
private ArrayList<String> Site;
private ArrayList<String> Content;
public void setDate(int num) {
date.add(num);
}
public void setHeadline(String head) {
Headline.add(head);
}
public void setUrl(String url) {
Url.add(url);
}
public void setSite(String site) {
Site.add(site);
}
public void setContent(String content) {
Content.add(content);
}
public int getHowManyData() {
return Headline.size();
}
public int getDate(int num) {
return date.get(num);
}
public String getHeadline(int num) {
return Headline.get(num);
}
public String getUrl(int num) {
return Url.get(num);
}
public String getSite(int num) {
return Site.get(num);
}
public String getContent(int num) {
return Content.get(num);
}
}