+ for link in sum([x[0] for x in weasyprint.document.resolve_links(doc.pages)], []): #flatten list of page tuples of list of link tuples, concatenate then iterate
+ if(link[0] == 'external'):
+ new_url = link[1].split('#')[0] #remove fragment identifier as it will be the same page
+ if(new_url not in resolved_urls and re.match(scope, new_url)): #prevent infinite recursion; do not resolve if external (not in scope)
+ #if servers allow multiple urls to the same page this might result in duplicates (e.g. foo/index.html and foo/)
+ resolved_urls.append(new_url)
+ try:
+ print('from ' + url, end=' ') #prepend additional info about traversal to output
+ doc = merge_pdf(doc, resolve_into_pdf(new_url))
+ except AssertionError: #might be non html files that we accidentally run into; still its recommended to include some sort of check in scope
+ for link in sum([x[0] for x in weasyprint.document.resolve_links(doc.pages)], []): #flatten list of page tuples of list of link tuples, concatenate then iterate
+ if(link[0] == 'external'):
+ new_url = link[1].split('#')[0] #remove fragment identifier as it will be the same page
+ if(new_url not in resolved_urls and re.match(scope, new_url)): #prevent infinite recursion; do not resolve if external (not in scope)
+ #if servers allow multiple urls to the same page this might result in duplicates (e.g. foo/index.html and foo/)
+ resolved_urls.append(new_url)
+ try:
+ print('from ' + url, end=' ') #prepend additional info about traversal to output