diff --git a/src/me/despawningbone/discordbot/command/anime/Sauce.java b/src/me/despawningbone/discordbot/command/anime/Sauce.java index 95035f4..1de5d3a 100644 --- a/src/me/despawningbone/discordbot/command/anime/Sauce.java +++ b/src/me/despawningbone/discordbot/command/anime/Sauce.java @@ -1,169 +1,257 @@ package me.despawningbone.discordbot.command.anime; import java.awt.Color; import java.io.IOException; import java.net.ConnectException; import java.net.MalformedURLException; import java.net.SocketTimeoutException; import java.net.URL; -import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.NoSuchElementException; import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionException; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import org.apache.commons.lang3.exception.ExceptionUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import me.despawningbone.discordbot.command.Command; import me.despawningbone.discordbot.command.CommandResult; import me.despawningbone.discordbot.command.CommandResult.CommandResultType; import net.dv8tion.jda.api.EmbedBuilder; import net.dv8tion.jda.api.entities.Message; import net.dv8tion.jda.api.entities.TextChannel; import net.dv8tion.jda.api.entities.User; public class Sauce extends Command{ public Sauce() { this.desc = "Get the source of an anime pic!"; this.usage = "[-d] [imgurl]"; this.alias = Arrays.asList("source", "saucenao", "iqdb"); this.remarks = Arrays.asList("The URL should be a direct link to an image.", "You can also upload an image as an attachment while calling this command instead of using a url.", - " * Specify the `-d` parameter to do a depth search!", " * It is useful for cropped images and edited images, but makes the search much longer."); + " * Specify the `-d` parameter to do a depth search!", " * It is useful for cropped images and edited images, but makes the search much longer,", " * and can sometimes result in a related image instead of the actual source."); } @Override public CommandResult execute(TextChannel channel, User author, Message msg, String[] args) { //allow people to not input url to check sauce of the most recent image like u/2dgt3d? List amend = new ArrayList(Arrays.asList(args)); - int temp = amend.indexOf("-d"); - if(temp != -1) amend.subList(temp, temp + 1).clear(); + int depth = amend.indexOf("-d"); + if(depth != -1) amend.subList(depth, depth + 1).clear(); + + //get url or use attachment if no url found String url = null; try { if(amend.size() < 1) { if(msg.getAttachments().size() > 0) url = msg.getAttachments().get(0).getUrl(); else throw new MalformedURLException(); } else { url = amend.get(0).trim(); new URL(url); } } catch(MalformedURLException e) { return new CommandResult(CommandResultType.FAILURE, "Please enter a valid URL!"); } - System.out.println(url); + try { channel.sendTyping().queueAfter(20, TimeUnit.MILLISECONDS); - if(temp != -1) { + + if(depth != -1) { channel.sendMessage("Performing depth search for the picture... (this can take up to 20 seconds)").queue(); + String[] urls = yandexSearch(url); - System.out.println(Arrays.asList(urls)); - url = urls[2] == null ? urls[0] : urls[1] + ";" + urls[2]; + + //System.out.println("urls" + Arrays.asList(urls)); + url = urls[2] == null ? urls[0] : urls[1] + ";" + urls[2]; //TODO deprecate this wack system + //sync //List collect = Arrays.asList(tempSearchSauce(urls[0]),tempSearchSauce(urls[1])); - //async - CompletableFuture first = CompletableFuture.supplyAsync(() -> {try {return tempSearchSauce(urls[0]);} catch (IOException e){e.printStackTrace(); return null;}}); - CompletableFuture ratio = CompletableFuture.supplyAsync(() -> {try {return tempSearchSauce(urls[1]);} catch (IOException e){e.printStackTrace(); return null;}}); + + //async - only throw exception if both errored + CompletableFuture first = CompletableFuture.supplyAsync(() -> {try {return getSauce(urls[0]);} catch (IOException e){e.printStackTrace(); return null;}}); + CompletableFuture ratio = CompletableFuture.supplyAsync(() -> {try {return getSauce(urls[1]);} catch (IOException e){e.printStackTrace(); return null;}}); List collect = CompletableFuture.allOf(first, ratio) - .thenApply(future -> Arrays.asList(first.join(), ratio.join())) - .whenComplete((s, t) -> {if(!(ratio.join() != null && first.join() != null) && t != null) throw new CompletionException(t.getCause());}).get(); + .thenApply(future -> Arrays.asList(first.join(), ratio.join())).get(); + //checking at whencomplete wouldnt help - the array joining wouldve errored as a whole and returned null regardless; must use individual try catches - channel.sendMessage(collect.stream().filter(e -> e != null).sorted((a, b) -> urls[2] == null ? -1 : Double.compare(Double.parseDouble(b.build().getFooter().getText().replaceAll(".*?([.0-9]*%).*", "$1").replace("%", "")), Double.parseDouble(a.build().getFooter().getText().replaceAll(".*?([.0-9]*%).*", "$1").replace("%", "")))).findFirst().get().build()).queue(); + //return highest similarity if obtained using closest ratio, else return first found (other size then similar) + channel.sendMessage(collect.stream().filter(e -> e != null) + .sorted((a, b) -> urls[2] == null ? 1 : Double.compare( + Double.parseDouble(b.build().getFooter().getText().replaceAll(".*?([.0-9]*)%.*", "$1")), + Double.parseDouble(a.build().getFooter().getText().replaceAll(".*?([.0-9]*)%.*", "$1")))) + .findFirst().get().build()).queue(); } else { - EmbedBuilder eb = tempSearchSauce(url); + //direct search + EmbedBuilder eb = getSauce(url); channel.sendMessage(eb.build()).queue(); } } catch (IOException | InterruptedException | ExecutionException e) { Throwable t = e instanceof ExecutionException ? e.getCause() : e; return new CommandResult(CommandResultType.ERROR, ExceptionUtils.getStackTrace(t)); } catch (NullPointerException | NoSuchElementException e) { + + //give possible related images embed info if(e instanceof NoSuchElementException) { EmbedBuilder eb = new EmbedBuilder(); String[] urls = url.split(";"); eb.setThumbnail(urls[0]); eb.setTitle("Possibly related image", urls[0]); eb.setDescription("Maybe [this](" + urls[urls.length > 1 ? 1 : 0] + ") will help in your search for the delicious marinara?"); channel.sendMessage(eb.build()).queueAfter(20, TimeUnit.MILLISECONDS); } + return new CommandResult(CommandResultType.NORESULT); } + return new CommandResult(CommandResultType.SUCCESS); } + //well known image boards with workable source images that yandex might scrape + private List imageBoards = Arrays.asList("danbooru.donmai.us", "safebooru.org", "gelbooru.com", "zerochan.net", "tbib.org", "pixiv.kurocore.com", "pixiv.net", "chan.sankakucomplex.com", "sankakucomplex.com", "e-shuushuu.net"); + + //TODO internal cooldown of ~5s when command cooldown system is done - too many requests results in captcha + + //returns String[] {, , } private String[] yandexSearch(String url) throws IOException { Element yandex = Jsoup.connect("https://yandex.com/images/search?url=" + URLEncoder.encode(url, "UTF-8") + "&rpt=imageview").get().body(); + + //other sites div class new style is slowly rolling out, support both names + String osItem, osSnippet, osPreview, osMeta, osSimilar; + if(!yandex.select(".other-sites__item").isEmpty()) { //old ver + osItem = ".other-sites__item"; + osSnippet = ".other-sites__snippet-site-link"; + osPreview = ".other-sites__preview-link"; + osMeta = ".other-sites__meta"; + osSimilar = ".cbir-similar__thumb .cbir-similar__image"; + } else { //usually means new + //System.out.println("Sauce: new version of yandex found"); + //System.out.println(yandex); + osItem = ".CbirSites-Item"; + osSnippet = ".CbirSites-ItemDomain"; + osPreview = ".CbirSites-ItemThumb .Thumb"; + osMeta = ".CbirSites-ItemThumb .Thumb-Mark"; + osSimilar = ".CbirSimilar-Thumb .Thumb-Image"; + } + + //System.out.println(yandex.select(osItem).stream() + // .filter(o -> imageBoards.contains(o.select(osSnippet).text().toLowerCase())).collect(Collectors.toList())); + + //check if image boards are found, if so return the first one found and discard similar results since imageboards are as standard as we can get + //it is possible but unlikely that we would be grabbing the wrong image's source in the lower sections even though there are good sources (that are not imageboards) at the top due to this + String boardRes = yandex.select(osItem).stream() + .filter(o -> imageBoards.contains(o.select(osSnippet).text().toLowerCase())) + .map(o -> o.select(osPreview).attr("href")).findFirst().orElse(null); + + if(boardRes != null) { + if(boardRes.startsWith("https://embed.pixiv.net")) { //pixiv images arent full images, need some post processing - using kurocore's database instead + String illustId = boardRes.substring(boardRes.lastIndexOf("illust_id=") + 10); + + boardRes = "https://img.kurocore.com/thumbnail/pi/" + + String.format("%03d", Integer.parseInt(illustId.substring(0, illustId.length() - 6))) + + "/" + illustId.substring(illustId.length() - 6, illustId.length() - 3) + + "/" + illustId + "_0.jpg"; + } + + return new String[] {null, boardRes, null}; + } + + //else use other options + + //gets second image coz usually the first one is just a sharper identical image + Element simThumb = yandex.select(osSimilar).get(1); + String similar = "https:" + (osSimilar.contains(".CbirSimilar") ? simThumb.attr("style").replaceAll(".*url\\((.*?)\\).*", "$1") : simThumb.attr("src")); //use yandex's thumbnails since the actual source might be broken already + + //use other sizes if found - second most accurate result (unless theres edits that uses the image in it (memes for example), so still retain similar results) + if(yandex.select(".CbirOtherSizes-Wrapper").size() > 0) { //merge the sort algorithms? //sorting with ratio seems to perform bad for most crops so dont sort anymore + String otherSize = yandex.select(".Tags-Item").first().attr("href"); + return new String[] {similar, otherSize, null}; + } + + + //last resort: sort by closest ratio (only first 12 results since typically they deviate from original a ton after 12 results) String[] size = new String[]{yandex.selectFirst(".CbirPreview-Placeholder").attr("width"), yandex.selectFirst(".CbirPreview-Placeholder").attr("height")}; + + //fixed ratio - works better with normal dimensions (eg anime scenes), sucks at everything else //double ratio = Double.parseDouble(size[0]) / Double.parseDouble(size[1]) >= 1 ? 16.0/9 : 9/16.0; + + //dynamic ratio - works really well only if the crop has the same ratio as the actual image itself double ratio = Double.parseDouble(size[0]) / Double.parseDouble(size[1]); - //System.out.println(yandex.select(".other-sites__thumb")); - //System.out.println(size[0] + " " + size[1] + " " + ratio); - String other, site = null, similar = URLDecoder.decode(yandex.select(".cbir-similar__thumb .cbir-similar__image").get(yandex.select(".cbir-other-sizes__item").size() > 0 ? 1 : 0).parent().attr("href").split("img_url=")[1].split("&")[0], "UTF-8"); - if(yandex.select(".cbir-other-sizes__list").size() > 0) { //TODO merge the sort algorithms? - /*other = yandex.select(".cbir-other-sizes__list").first().select("a").stream().sorted((a, b) -> { //selects largest possible section of the other sizes - String[] aA = a.select(".cbir-other-sizes__resolution").first().text().split("×"), bA = b.select(".cbir-other-sizes__resolution").first().text().split("×"); - return Double.compare(Math.abs(Double.parseDouble(aA[0]) / Double.parseDouble(aA[1]) - ratio), Math.abs(Double.parseDouble(bA[0]) / Double.parseDouble(bA[1]) - ratio)); - }).findFirst().get().attr("href");*/ - other = yandex.select(".cbir-other-sizes__list a").first().attr("href"); //sorting with ratio seems to perform bad for most crops so dont sort anymore - } else { - yandex.select(".other-sites__item").stream().limit(12).sorted((a, b) -> { - String[] aA = a.select(".other-sites__meta").first().text().split("×"), bA = b.select(".other-sites__meta").first().text().split("×"); - System.out.println(Math.abs(Double.parseDouble(aA[0]) / Double.parseDouble(aA[1]) - ratio) + ", " + Math.abs(Double.parseDouble(bA[0]) / Double.parseDouble(bA[1]) - ratio)); - return Double.compare(Math.abs(Double.parseDouble(aA[0]) / Double.parseDouble(aA[1]) - ratio), Math.abs(Double.parseDouble(bA[0]) / Double.parseDouble(bA[1]) - ratio));}).forEach(s -> System.out.println(s)); - Element eSite = yandex.select(".other-sites__item").stream().limit(12).sorted((a, b) -> { - String[] aA = a.select(".other-sites__meta").first().text().split("×"), bA = b.select(".other-sites__meta").first().text().split("×"); - return Double.compare(Math.abs(Double.parseDouble(aA[0]) / Double.parseDouble(aA[1]) - ratio), Math.abs(Double.parseDouble(bA[0]) / Double.parseDouble(bA[1]) - ratio));}).findFirst().get(); - other = eSite.select(".other-sites__preview-link").first().attr("href"); - site = eSite.select(".other-sites__snippet-site-link").first().attr("href"); - } - return new String[]{similar, other, site}; - //gets second image coz usually the first one is just a sharper identical image, then get the largest image that has the image inside or select an image closest to 16:9 for anime checking //closest to given ratio now, anime checking usually cant be done with depth search anyways since not every scene is screen capped online + Element eSite = yandex.select(osItem).stream().limit(12).sorted((a, b) -> { + String[] aA = a.select(osMeta).first().text().split("×"), bA = b.select(osMeta).first().text().split("×"); + return Double.compare(Math.abs(Double.parseDouble(aA[0]) / Double.parseDouble(aA[1]) - ratio), Math.abs(Double.parseDouble(bA[0]) / Double.parseDouble(bA[1]) - ratio));}).findFirst().get(); + + return new String[] {similar, + eSite.select(osPreview).first().attr("href"), + eSite.select(osSnippet).first().attr("href")}; + + //anime checking usually cant be done with depth search since not every scene is screen capped online + } - private EmbedBuilder tempSearchSauce(String url) throws IOException { + private EmbedBuilder getSauce(String url) throws IOException { + if(url == null) return null; + EmbedBuilder eb = new EmbedBuilder(); eb.setColor(new Color(29, 29, 29)); + try { //search iqdb first for the tags; results usually more organized and formatted - Element iqdb = Jsoup.connect("https://iqdb.org/?url=" + url + "&service[]=1&service[]=2&service[]=3&service[]=4&service[]=5&service[]=11&service[]=13").get().body(); //services excluding eshuushuu since it has no tags and will fallback to saucenao anyways - if(iqdb.select(".err").size() > 0 && iqdb.select(".err").html().contains("HTTP")) throw new IOException(iqdb.selectFirst(".err").ownText().split("\\.")[0]); + Element iqdb = Jsoup.connect("https://iqdb.org/?url=" + URLEncoder.encode(url, "UTF-8") + "&service[]=1&service[]=2&service[]=3&service[]=4&service[]=5&service[]=11&service[]=13").post().body(); //services excluding eshuushuu since it has no tags and will fallback to saucenao anyways + //post instead of get due to the recent incident making iqdb and saucenao block imgur + + if(iqdb.select(".err").size() > 0 && iqdb.select(".err").html().contains("HTTP")) //iqdb errors, most likely broken link so dont fallback + throw new IOException(iqdb.selectFirst(".err").ownText().split("\\.")[0]); + Elements tb = iqdb.select("th:contains(Best match)").get(0).parent().siblingElements(); Element img = tb.get(0).selectFirst("img"); + eb.setThumbnail("https://iqdb.org/" + img.attr("src")); eb.setTitle("Source: " + tb.get(1).selectFirst("td").ownText(), (img.parent().attr("href").contains("http") ? "" : "https:") + img.parent().attr("href")); + String tags = img.attr("alt").split("Tags:")[1]; eb.setDescription(tags.contains(",") ? tags.replaceAll(",", "\n") : tags.replaceAll(" ", "\n").replaceAll("\\_", " ")); eb.setFooter(tb.get(2).select("td").eachText().get(0) + " | " + tb.get(3).select("td").text(), null); - } catch (IndexOutOfBoundsException | SocketTimeoutException | ConnectException e) { //fallback to saucenao, usually pixiv source instead of image boards - try { - Element saucenao = Jsoup.connect("https://saucenao.com/search.php?url=" + url).get().body(); + } catch (IndexOutOfBoundsException | SocketTimeoutException | ConnectException e) { + try { //fallback to saucenao, usually pixiv source instead of image boards; also falls back if its anime scenes + Element saucenao = Jsoup.connect("https://saucenao.com/search.php").requestBody("url=" + URLEncoder.encode(url, "UTF-8")).post().body(); //same reason as iqdb post Element result = saucenao.selectFirst(".resulttable"); - if(result == null) return null; - if(result.parent().attr("class").equals("result hidden")) return null; + + if(result == null) return null; //no results + if(result.parent().attr("class").equals("result hidden")) return null; //all low similarity results, ignore + eb.setThumbnail((result.selectFirst("img").attr("src").contains("http") ? "" : "https:") + result.selectFirst("img").attr("src").replaceAll(" ", "%20")); + try { //normal pixiv/deviantart handling Element source = result.selectFirst("strong:contains(ID:)").nextElementSibling(); eb.setAuthor(result.select(".resulttitle strong").text()); eb.setTitle("Source: " + source.previousElementSibling().text().replaceAll("ID:", "#") + source.text(), source.attr("href")); Element member = result.select(".linkify").get(2); eb.setDescription("Author: [" + member.text() + "](" + member.attr("href") + ")"); } catch (NullPointerException e1) { //weird saucenao card formatting (eg episode info) + //change line break tags to \n, and use first line as title result.select("br").after("\\n"); String[] title = result.selectFirst(".resulttitle") == null ? new String[]{"No title"} : result.selectFirst(".resulttitle").wholeText().replaceAll("\\\\n", "\n").split("\n", 2); //there can be no titles, like 4chan sources eb.setTitle(title[0], result.selectFirst(".resultmiscinfo a") == null ? null : result.selectFirst(".resultmiscinfo a").attr("href")); if(title.length > 1) eb.appendDescription(title[1] + "\n"); eb.appendDescription(result.selectFirst(".resultcontentcolumn").wholeText().replaceAll("\\\\n", "\n")); } - eb.setFooter(result.select(".resultsimilarityinfo").text() + " Similarity", null); + + String similarity = result.select(".resultsimilarityinfo").text(); + + //additional layer above saucenao's low similarity check: discard lower than 65% similarity + if(Double.parseDouble(similarity.substring(0, similarity.length() - 1)) < 65) + return null; + + eb.setFooter(similarity + " similarity", null); } catch (IndexOutOfBoundsException e1) { return null; } } return eb; } }