Page Menu
Home
desp's stash
Search
Configure Global Search
Log In
Files
F575964
GoogleSearch.java
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
3 KB
Subscribers
None
GoogleSearch.java
View Options
package
me.despawningbone.discordbot.utils
;
import
java.io.IOException
;
import
java.io.InputStream
;
import
java.net.URL
;
import
java.net.URLConnection
;
import
java.util.AbstractMap
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Map.Entry
;
import
java.util.regex.Matcher
;
import
java.util.regex.Pattern
;
import
org.apache.commons.io.IOUtils
;
import
org.apache.commons.lang3.StringEscapeUtils
;
@SuppressWarnings
(
"deprecation"
)
public
class
GoogleSearch
{
public
static
List
<
Entry
<
String
,
String
>>
search
(
String
search
,
int
num
)
throws
IOException
{
String
query
=
"https://www.google.com/search?q="
+
search
+
"&num="
+
num
;
//String query = "https://www.google.com/search?q=site:https://osu.ppy.sh/+searchbigblack&num=100&gbv=1&sei=u4V8Wo2GIczfvASZtoaQCQ";
String
page
=
getSearchContent
(
query
);
return
parseLinks
(
page
);
}
/**
* The method will return the search page result in a {@link String} object
*
* @param path
* the google search query
* @return the content as {@link String} object
* @throws Exception
*/
public
static
String
getSearchContent
(
String
path
)
throws
IOException
{
final
String
agent
=
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0)"
;
//final String agent = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
URL
url
=
new
URL
(
path
);
final
URLConnection
connection
=
url
.
openConnection
();
/**
* User-Agent is mandatory otherwise Google will return HTTP response
* code: 403
*/
connection
.
setRequestProperty
(
"User-Agent"
,
agent
);
final
InputStream
stream
=
connection
.
getInputStream
();
return
IOUtils
.
toString
(
stream
,
"UTF-8"
);
}
/**
* Parse all links
*
* @param html
* the page
* @return the list with all URLSs
* @throws Exception
*/
public
static
List
<
Entry
<
String
,
String
>>
parseLinks
(
String
html
)
{
List
<
Entry
<
String
,
String
>>
result
=
new
ArrayList
<
Entry
<
String
,
String
>>();
//System.out.println(html);
String
pattern1
=
"<h3 class=\"r\"><a href=\"/url?q="
;
String
pattern2
=
"\">"
;
String
pattern3
=
"</a></h3>"
;
Pattern
p
=
Pattern
.
compile
(
Pattern
.
quote
(
pattern1
)
+
"(.*?)"
+
Pattern
.
quote
(
pattern2
)
+
"(.*?)"
+
Pattern
.
quote
(
pattern3
));
//result = new ArrayList<String>(Arrays.asList(html.split(" "))).stream().filter(p.asPredicate()).collect(Collectors.toList());
Matcher
m
=
p
.
matcher
(
html
);
while
(
m
.
find
())
{
String
section
=
m
.
group
(
0
).
trim
();
String
url
=
""
,
title
=
""
;
url
=
section
.
substring
(
section
.
indexOf
(
"/url?q="
)
+
7
);
url
=
StringEscapeUtils
.
unescapeXml
(
url
.
substring
(
0
,
url
.
indexOf
(
"&"
)));
title
=
section
.
substring
(
section
.
lastIndexOf
(
"\">"
)
+
2
);
title
=
StringEscapeUtils
.
unescapeXml
(
title
.
substring
(
0
,
title
.
lastIndexOf
(
"</a></h3>"
)));
result
.
add
(
new
AbstractMap
.
SimpleEntry
<
String
,
String
>(
title
,
url
));
}
return
result
;
}
}
File Metadata
Details
Attached
Mime Type
text/html
Expires
Wed, Mar 4, 5:18 PM (1 d, 9 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
59/20/0c5bc905e558db6da4d643a49ab2
Attached To
rDESB despbot
Event Timeline
Log In to Comment