package me.despawningbone.discordbot.utils; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.net.URLConnection; import java.util.AbstractMap; import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.StringEscapeUtils; @SuppressWarnings("deprecation") public class GoogleSearch { public static List> search(String search, int num) throws IOException { String query = "https://www.google.com/search?q=" + search + "&num=" + num; //String query = "https://www.google.com/search?q=site:https://osu.ppy.sh/+searchbigblack&num=100&gbv=1&sei=u4V8Wo2GIczfvASZtoaQCQ"; String page = getSearchContent(query); return parseLinks(page); } /** * The method will return the search page result in a {@link String} object * * @param path * the google search query * @return the content as {@link String} object * @throws Exception */ public static String getSearchContent(String path) throws IOException { final String agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0)"; //final String agent = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"; URL url = new URL(path); final URLConnection connection = url.openConnection(); /** * User-Agent is mandatory otherwise Google will return HTTP response * code: 403 */ connection.setRequestProperty("User-Agent", agent); final InputStream stream = connection.getInputStream(); return IOUtils.toString(stream, "UTF-8"); } /** * Parse all links * * @param html * the page * @return the list with all URLSs * @throws Exception */ public static List> parseLinks(String html) { List> result = new ArrayList>(); //System.out.println(html); String pattern1 = "

"; String pattern3 = "

"; Pattern p = Pattern.compile(Pattern.quote(pattern1) + "(.*?)" + Pattern.quote(pattern2) + "(.*?)" + Pattern.quote(pattern3)); //result = new ArrayList(Arrays.asList(html.split(" "))).stream().filter(p.asPredicate()).collect(Collectors.toList()); Matcher m = p.matcher(html); while (m.find()) { String section = m.group(0).trim(); String url = "", title = ""; url = section.substring(section.indexOf("/url?q=") + 7); url = StringEscapeUtils.unescapeXml(url.substring(0, url.indexOf("&"))); title = section.substring(section.lastIndexOf("\">") + 2); title = StringEscapeUtils.unescapeXml(title.substring(0, title.lastIndexOf(""))); result.add(new AbstractMap.SimpleEntry(title, url)); } return result; } }