Page MenuHomedesp's stash

GoogleSearch.java
No OneTemporary

GoogleSearch.java

package me.despawningbone.discordbot.utils;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringEscapeUtils;
@SuppressWarnings("deprecation")
public class GoogleSearch {
public static List<Entry<String, String>> search(String search, int num) throws IOException {
String query = "https://www.google.com/search?q=" + search + "&num=" + num;
//String query = "https://www.google.com/search?q=site:https://osu.ppy.sh/+searchbigblack&amp;num=100&amp;gbv=1&amp;sei=u4V8Wo2GIczfvASZtoaQCQ";
String page = getSearchContent(query);
return parseLinks(page);
}
/**
* The method will return the search page result in a {@link String} object
*
* @param path
* the google search query
* @return the content as {@link String} object
* @throws Exception
*/
public static String getSearchContent(String path) throws IOException {
final String agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0)";
//final String agent = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
URL url = new URL(path);
final URLConnection connection = url.openConnection();
/**
* User-Agent is mandatory otherwise Google will return HTTP response
* code: 403
*/
connection.setRequestProperty("User-Agent", agent);
final InputStream stream = connection.getInputStream();
return IOUtils.toString(stream, "UTF-8");
}
/**
* Parse all links
*
* @param html
* the page
* @return the list with all URLSs
* @throws Exception
*/
public static List<Entry<String, String>> parseLinks(String html) {
List<Entry<String, String>> result = new ArrayList<Entry<String, String>>();
//System.out.println(html);
String pattern1 = "<h3 class=\"r\"><a href=\"/url?q=";
String pattern2 = "\">";
String pattern3 = "</a></h3>";
Pattern p = Pattern.compile(Pattern.quote(pattern1) + "(.*?)" + Pattern.quote(pattern2) + "(.*?)" + Pattern.quote(pattern3));
//result = new ArrayList<String>(Arrays.asList(html.split(" "))).stream().filter(p.asPredicate()).collect(Collectors.toList());
Matcher m = p.matcher(html);
while (m.find()) {
String section = m.group(0).trim();
String url = "", title = "";
url = section.substring(section.indexOf("/url?q=") + 7);
url = StringEscapeUtils.unescapeXml(url.substring(0, url.indexOf("&amp;")));
title = section.substring(section.lastIndexOf("\">") + 2);
title = StringEscapeUtils.unescapeXml(title.substring(0, title.lastIndexOf("</a></h3>")));
result.add(new AbstractMap.SimpleEntry<String, String>(title, url));
}
return result;
}
}

File Metadata

Mime Type
text/html
Expires
Wed, Mar 4, 5:18 PM (1 d, 9 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
59/20/0c5bc905e558db6da4d643a49ab2

Event Timeline