Page MenuHomedesp's stash

Sauce.java
No OneTemporary

Sauce.java

package me.despawningbone.discordbot.command.anime;
import java.awt.Color;
import java.io.IOException;
import java.net.ConnectException;
import java.net.MalformedURLException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import me.despawningbone.discordbot.command.Command;
import me.despawningbone.discordbot.command.CommandResult;
import me.despawningbone.discordbot.command.CommandResult.CommandResultType;
import net.dv8tion.jda.api.EmbedBuilder;
import net.dv8tion.jda.api.entities.Message;
import net.dv8tion.jda.api.entities.TextChannel;
import net.dv8tion.jda.api.entities.User;
public class Sauce extends Command{
public Sauce() {
this.desc = "Get the source of an anime pic!";
this.usage = "[-d] [imgurl]";
this.alias = Arrays.asList("source", "saucenao", "iqdb");
this.remarks = Arrays.asList("The URL should be a direct link to an image.", "You can also upload an image as an attachment while calling this command instead of using a url.",
" * Specify the `-d` parameter to do a depth search!", " * It is useful for cropped images and edited images, but makes the search much longer,", " * and can sometimes result in a related image instead of the actual source.");
}
@Override
public CommandResult execute(TextChannel channel, User author, Message msg, String[] args) { //allow people to not input url to check sauce of the most recent image like u/2dgt3d?
List<String> amend = new ArrayList<String>(Arrays.asList(args));
int depth = amend.indexOf("-d");
if(depth != -1) amend.subList(depth, depth + 1).clear();
//get url or use attachment if no url found
String url = null;
try {
if(amend.size() < 1) {
if(msg.getAttachments().size() > 0) url = msg.getAttachments().get(0).getUrl();
else throw new MalformedURLException();
} else {
url = amend.get(0).trim();
new URL(url);
}
} catch(MalformedURLException e) {
return new CommandResult(CommandResultType.FAILURE, "Please enter a valid URL!");
}
try {
channel.sendTyping().queueAfter(20, TimeUnit.MILLISECONDS);
if(depth != -1) {
channel.sendMessage("Performing depth search for the picture... (this can take up to 20 seconds)").queue();
String[] urls = yandexSearch(url);
//System.out.println("urls" + Arrays.asList(urls));
url = urls[2] == null ? urls[0] : urls[1] + ";" + urls[2]; //TODO deprecate this wack system
//sync
//List<EmbedBuilder> collect = Arrays.asList(tempSearchSauce(urls[0]),tempSearchSauce(urls[1]));
//async - only throw exception if both errored
CompletableFuture<EmbedBuilder> first = CompletableFuture.supplyAsync(() -> {try {return getSauce(urls[0]);} catch (IOException e){e.printStackTrace(); return null;}});
CompletableFuture<EmbedBuilder> ratio = CompletableFuture.supplyAsync(() -> {try {return getSauce(urls[1]);} catch (IOException e){e.printStackTrace(); return null;}});
List<EmbedBuilder> collect = CompletableFuture.allOf(first, ratio)
.thenApply(future -> Arrays.asList(first.join(), ratio.join())).get();
//checking at whencomplete wouldnt help - the array joining wouldve errored as a whole and returned null regardless; must use individual try catches
//return highest similarity if obtained using closest ratio, else return first found (other size then similar)
channel.sendMessage(collect.stream().filter(e -> e != null)
.sorted((a, b) -> urls[2] == null ? 1 : Double.compare(
Double.parseDouble(b.build().getFooter().getText().replaceAll(".*?([.0-9]*)%.*", "$1")),
Double.parseDouble(a.build().getFooter().getText().replaceAll(".*?([.0-9]*)%.*", "$1"))))
.findFirst().get().build()).queue();
} else {
//direct search
EmbedBuilder eb = getSauce(url);
channel.sendMessage(eb.build()).queue();
}
} catch (IOException | InterruptedException | ExecutionException e) {
Throwable t = e instanceof ExecutionException ? e.getCause() : e;
return new CommandResult(CommandResultType.ERROR, ExceptionUtils.getStackTrace(t));
} catch (NullPointerException | NoSuchElementException e) {
//give possible related images embed info
if(e instanceof NoSuchElementException) {
EmbedBuilder eb = new EmbedBuilder();
String[] urls = url.split(";");
eb.setThumbnail(urls[0]);
eb.setTitle("Possibly related image", urls[0]);
eb.setDescription("Maybe [this](" + urls[urls.length > 1 ? 1 : 0] + ") will help in your search for the delicious marinara?");
channel.sendMessage(eb.build()).queueAfter(20, TimeUnit.MILLISECONDS);
}
return new CommandResult(CommandResultType.NORESULT);
}
return new CommandResult(CommandResultType.SUCCESS);
}
//well known image boards with workable source images that yandex might scrape
private List<String> imageBoards = Arrays.asList("danbooru.donmai.us", "safebooru.org", "gelbooru.com", "zerochan.net", "tbib.org", "pixiv.kurocore.com", "pixiv.net", "chan.sankakucomplex.com", "sankakucomplex.com", "e-shuushuu.net");
//TODO internal cooldown of ~5s when command cooldown system is done - too many requests results in captcha
//returns String[] {<best result from similar image>, <best result from other sites>, <optional url where the image was fetched from>}
private String[] yandexSearch(String url) throws IOException {
Element yandex = Jsoup.connect("https://yandex.com/images/search?url=" + URLEncoder.encode(url, "UTF-8") + "&rpt=imageview").get().body();
//other sites div class new style is slowly rolling out, support both names
String osItem, osSnippet, osPreview, osMeta, osSimilar;
if(!yandex.select(".other-sites__item").isEmpty()) { //old ver
osItem = ".other-sites__item";
osSnippet = ".other-sites__snippet-site-link";
osPreview = ".other-sites__preview-link";
osMeta = ".other-sites__meta";
osSimilar = ".cbir-similar__thumb .cbir-similar__image";
} else { //usually means new
//System.out.println("Sauce: new version of yandex found");
//System.out.println(yandex);
osItem = ".CbirSites-Item";
osSnippet = ".CbirSites-ItemDomain";
osPreview = ".CbirSites-ItemThumb .Thumb";
osMeta = ".CbirSites-ItemThumb .Thumb-Mark";
osSimilar = ".CbirSimilar-Thumb .Thumb-Image";
}
//System.out.println(yandex.select(osItem).stream()
// .filter(o -> imageBoards.contains(o.select(osSnippet).text().toLowerCase())).collect(Collectors.toList()));
//check if image boards are found, if so return the first one found and discard similar results since imageboards are as standard as we can get
//it is possible but unlikely that we would be grabbing the wrong image's source in the lower sections even though there are good sources (that are not imageboards) at the top due to this
String boardRes = yandex.select(osItem).stream()
.filter(o -> imageBoards.contains(o.select(osSnippet).text().toLowerCase()))
.map(o -> o.select(osPreview).attr("href")).findFirst().orElse(null);
if(boardRes != null) {
if(boardRes.startsWith("https://embed.pixiv.net")) { //pixiv images arent full images, need some post processing - using kurocore's database instead
String illustId = boardRes.substring(boardRes.lastIndexOf("illust_id=") + 10);
boardRes = "https://img.kurocore.com/thumbnail/pi/"
+ String.format("%03d", Integer.parseInt(illustId.substring(0, illustId.length() - 6)))
+ "/" + illustId.substring(illustId.length() - 6, illustId.length() - 3)
+ "/" + illustId + "_0.jpg";
}
return new String[] {null, boardRes, null};
}
//else use other options
//gets second image coz usually the first one is just a sharper identical image
Element simThumb = yandex.select(osSimilar).get(1);
String similar = "https:" + (osSimilar.contains(".CbirSimilar") ? simThumb.attr("style").replaceAll(".*url\\((.*?)\\).*", "$1") : simThumb.attr("src")); //use yandex's thumbnails since the actual source might be broken already
//use other sizes if found - second most accurate result (unless theres edits that uses the image in it (memes for example), so still retain similar results)
if(yandex.select(".CbirOtherSizes-Wrapper").size() > 0) { //merge the sort algorithms? //sorting with ratio seems to perform bad for most crops so dont sort anymore
String otherSize = yandex.select(".Tags-Item").first().attr("href");
return new String[] {similar, otherSize, null};
}
//last resort: sort by closest ratio (only first 12 results since typically they deviate from original a ton after 12 results)
String[] size = new String[]{yandex.selectFirst(".CbirPreview-Placeholder").attr("width"), yandex.selectFirst(".CbirPreview-Placeholder").attr("height")};
//fixed ratio - works better with normal dimensions (eg anime scenes), sucks at everything else
//double ratio = Double.parseDouble(size[0]) / Double.parseDouble(size[1]) >= 1 ? 16.0/9 : 9/16.0;
//dynamic ratio - works really well only if the crop has the same ratio as the actual image itself
double ratio = Double.parseDouble(size[0]) / Double.parseDouble(size[1]);
Element eSite = yandex.select(osItem).stream().limit(12).sorted((a, b) -> {
String[] aA = a.select(osMeta).first().text().split("×"), bA = b.select(osMeta).first().text().split("×");
return Double.compare(Math.abs(Double.parseDouble(aA[0]) / Double.parseDouble(aA[1]) - ratio), Math.abs(Double.parseDouble(bA[0]) / Double.parseDouble(bA[1]) - ratio));}).findFirst().get();
return new String[] {similar,
eSite.select(osPreview).first().attr("href"),
eSite.select(osSnippet).first().attr("href")};
//anime checking usually cant be done with depth search since not every scene is screen capped online
}
private EmbedBuilder getSauce(String url) throws IOException {
if(url == null) return null;
EmbedBuilder eb = new EmbedBuilder();
eb.setColor(new Color(29, 29, 29));
try { //search iqdb first for the tags; results usually more organized and formatted
Element iqdb = Jsoup.connect("https://iqdb.org/?url=" + URLEncoder.encode(url, "UTF-8") + "&service[]=1&service[]=2&service[]=3&service[]=4&service[]=5&service[]=11&service[]=13").post().body(); //services excluding eshuushuu since it has no tags and will fallback to saucenao anyways
//post instead of get due to the recent incident making iqdb and saucenao block imgur
if(iqdb.select(".err").size() > 0 && iqdb.select(".err").html().contains("HTTP")) //iqdb errors, most likely broken link so dont fallback
throw new IOException(iqdb.selectFirst(".err").ownText().split("\\.")[0]);
Elements tb = iqdb.select("th:contains(Best match)").get(0).parent().siblingElements();
Element img = tb.get(0).selectFirst("img");
eb.setThumbnail("https://iqdb.org/" + img.attr("src"));
eb.setTitle("Source: " + tb.get(1).selectFirst("td").ownText(), (img.parent().attr("href").contains("http") ? "" : "https:") + img.parent().attr("href"));
String tags = img.attr("alt").split("Tags:")[1];
eb.setDescription(tags.contains(",") ? tags.replaceAll(",", "\n") : tags.replaceAll(" ", "\n").replaceAll("\\_", " "));
eb.setFooter(tb.get(2).select("td").eachText().get(0) + " | " + tb.get(3).select("td").text(), null);
} catch (IndexOutOfBoundsException | SocketTimeoutException | ConnectException e) {
try { //fallback to saucenao, usually pixiv source instead of image boards; also falls back if its anime scenes
Element saucenao = Jsoup.connect("https://saucenao.com/search.php").requestBody("url=" + URLEncoder.encode(url, "UTF-8")).post().body(); //same reason as iqdb post
Element result = saucenao.selectFirst(".resulttable");
if(result == null) return null; //no results
if(result.parent().attr("class").equals("result hidden")) return null; //all low similarity results, ignore
eb.setThumbnail((result.selectFirst("img").attr("src").contains("http") ? "" : "https:") + result.selectFirst("img").attr("src").replaceAll(" ", "%20"));
try { //normal pixiv/deviantart handling
Element source = result.selectFirst("strong:contains(ID:)").nextElementSibling();
eb.setAuthor(result.select(".resulttitle strong").text());
eb.setTitle("Source: " + source.previousElementSibling().text().replaceAll("ID:", "#") + source.text(), source.attr("href"));
Element member = result.select(".linkify").get(2);
eb.setDescription("Author: [" + member.text() + "](" + member.attr("href") + ")");
} catch (NullPointerException e1) { //weird saucenao card formatting (eg episode info)
//change line break tags to \n, and use first line as title
result.select("br").after("\\n");
String[] title = result.selectFirst(".resulttitle") == null ? new String[]{"No title"} : result.selectFirst(".resulttitle").wholeText().replaceAll("\\\\n", "\n").split("\n", 2); //there can be no titles, like 4chan sources
eb.setTitle(title[0], result.selectFirst(".resultmiscinfo a") == null ? null : result.selectFirst(".resultmiscinfo a").attr("href"));
if(title.length > 1) eb.appendDescription(title[1] + "\n");
eb.appendDescription(result.selectFirst(".resultcontentcolumn").wholeText().replaceAll("\\\\n", "\n"));
}
String similarity = result.select(".resultsimilarityinfo").text();
//additional layer above saucenao's low similarity check: discard lower than 65% similarity
if(Double.parseDouble(similarity.substring(0, similarity.length() - 1)) < 65)
return null;
eb.setFooter(similarity + " similarity", null);
} catch (IndexOutOfBoundsException e1) {
return null;
}
}
return eb;
}
}

File Metadata

Mime Type
text/x-java
Expires
Sun, Jul 6, 4:13 PM (1 d, 2 h)
Storage Engine
local-disk
Storage Format
Raw Data
Storage Handle
25/3f/d36e85a8238511d09bb68a68d175

Event Timeline