diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..6e3b986
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+*
+!.gitignore
+!crawl2pdf.py
+!crawl2pypdf.py
\ No newline at end of file
diff --git a/crawl2pdf.py b/crawl2pdf.py
new file mode 100644
index 0000000..7d4193a
--- /dev/null
+++ b/crawl2pdf.py
@@ -0,0 +1,85 @@
+import sys
+import weasyprint
+import re
+
+import logging
+
+#config
+override_css = ['override.css'] #user style; recommended to at least set @page css attribute to fit the site you are scraping better
+output_pdf = 'output.pdf'
+debug = False
+
+#weasyprint <=52.2 might result in broken pdf if there are custom fonts apparently
+
+#crawl2pdf.py <scope> <url1> [url2] [url3]...
+#crawls a site and turn included pages into pdf according to the regex scope specified
+
+#init vars
+resolved_urls = []
+try:
+    scope = sys.argv[1]  #scope in regex
+
+    urls = sys.argv[2:]  #initial urls
+    if(len(urls) == 0):
+        raise IndexError()
+
+    for css in override_css:  #check file existence
+        with open(css) as f:
+            continue
+except IOError:
+    print('invalid user css file(s) specified, ignoring...')
+    override_css = None
+except IndexError:
+    print('Please enter both the regex scope and the url to fetch!')
+    print('Syntax: crawl2pypdf.py <scope> <url1> [url2] [url3]...')
+    quit()
+
+def merge_pdf(base_doc, new_doc):
+    if(new_doc != None):
+        base_doc.metadata.attachments += new_doc.metadata.attachments
+        base_doc.pages += new_doc.pages
+    return base_doc
+
+#shouldnt need timeout to avoid rate limit - rendering takes a lot of time already; just do it sync
+def resolve_into_pdf(url):
+    print('resolving ' + url + '...')
+    try:
+        doc = weasyprint.HTML(url).render(stylesheets=override_css, optimize_images=True, presentational_hints=True)
+    except weasyprint.urls.URLFetchingError as e:
+        print('failed to fetch - ' + str(e))
+        return None
+    
+    for link in sum([x[0] for x in weasyprint.document.resolve_links(doc.pages)], []):  #flatten list of page tuples of list of link tuples, concatenate then iterate
+        if(link[0] == 'external'):
+            new_url = link[1].split('#')[0] #remove fragment identifier as it will be the same page
+            if(new_url not in resolved_urls and re.match(scope, new_url)): #prevent infinite recursion; do not resolve if external (not in scope)
+                #if servers allow multiple urls to the same page this might result in duplicates (e.g. foo/index.html and foo/)
+                resolved_urls.append(new_url)
+                try:
+                    print('from ' + url, end=' ')  #prepend additional info about traversal to output
+                    doc = merge_pdf(doc, resolve_into_pdf(new_url))
+                except AssertionError:  #might be non html files that we accidentally run into; still its recommended to include some sort of check in scope
+                    continue
+    return doc
+
+
+if debug:
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
+    weasyprint.LOGGER.setLevel(logging.DEBUG)
+    weasyprint.PROGRESS_LOGGER.setLevel(logging.DEBUG)
+    weasyprint.LOGGER.addHandler(handler)
+    weasyprint.PROGRESS_LOGGER.addHandler(handler)
+
+root_doc = resolve_into_pdf(urls[0]) #initial pdf to append pages on
+
+#resolve additional urls if any
+for url in urls[1:]:  #no need checking as sublists does not throw error but returns empty list if out of bounds
+        root_doc = merge_pdf(root_doc, resolve_into_pdf(url))
+
+print('writing pdf...')
+root_doc.write_pdf(output_pdf)
+
+
+
+
diff --git a/crawl2pypdf.py b/crawl2pypdf.py
new file mode 100644
index 0000000..903bc77
--- /dev/null
+++ b/crawl2pypdf.py
@@ -0,0 +1,107 @@
+import sys
+import weasyprint
+import re
+import PyPDF2
+import io
+
+import logging
+
+#config
+override_css = ['override.css'] #user style; recommended to at least set @page css attribute to fit the site you are scraping better
+output_pdf = 'output.pdf'
+debug = False
+
+#weasyprint <=52.2 might result in broken pdf if there are custom fonts apparently
+
+#weasyprint merging using docs.pages doesnt always work well - some text gets broken so this version utilizes pypdf2;
+#even then, there might be patches needed in pypdf2 to read the non standard pdfs generated by weasyprint sometimes:
+#utils.py - in b_(s): `r = s.encode('latin-1')` -> `r = s.encode('UTF-8')`
+#generic.py - in getOutlines(self, node=None, outlines=None): add try catch statement to `node = node["/Next"]` and break if encounter ValueError
+
+#crawl2pypdf.py <scope> <url1> [url2] [url3]...
+#crawls a site and turn included pages into pdf according to the regex scope specified using pypdf2 merging
+
+#init vars
+resolved_urls = []
+try:
+    scope = sys.argv[1]  #scope in regex
+
+    urls = sys.argv[2:]  #initial urls
+    if(len(urls) == 0):
+        raise IndexError()
+
+    for css in override_css:  #check file existence
+        with open(css) as f:
+            continue
+except IOError:
+    print('invalid user css file(s) specified, ignoring...')
+    override_css = None
+except IndexError:
+    print('Please enter both the regex scope and the url to fetch!')
+    print('Syntax: crawl2pypdf.py <scope> <url1> [url2] [url3]...')
+    quit()
+
+#recursive merging to maintain order and to avoid problems with too many merged pdfs in a single merger
+def merge_pdf(base_pdf_stream, new_pdf_stream):
+    if(new_pdf_stream == None):
+        return base_pdf_stream
+
+    merger = PyPDF2.PdfFileMerger(strict=False) #needed or else some pdfs wont be read
+
+    merger.append(base_pdf_stream)
+    merger.append(new_pdf_stream)
+
+    merge_stream = io.BytesIO()
+    merger.write(merge_stream)
+
+    base_pdf_stream.close()
+    new_pdf_stream.close()
+    merger.close()
+
+    return merge_stream
+
+#shouldnt need timeout to avoid rate limit - rendering takes a lot of time already; just do it sync
+def resolve_into_pdf(url):
+    print('resolving ' + url + '...')
+    try:
+        doc = weasyprint.HTML(url).render(stylesheets=override_css, optimize_images=True, presentational_hints=True)
+        stream = io.BytesIO(doc.write_pdf())
+    except weasyprint.urls.URLFetchingError as e:
+        print('failed to fetch - ' + str(e))
+        return None
+    
+    for link in sum([x[0] for x in weasyprint.document.resolve_links(doc.pages)], []):  #flatten list of page tuples of list of link tuples, concatenate then iterate
+        if(link[0] == 'external'):
+            new_url = link[1].split('#')[0] #remove fragment identifier as it will be the same page
+            if(new_url not in resolved_urls and re.match(scope, new_url)): #prevent infinite recursion; do not resolve if external (not in scope)
+                #if servers allow multiple urls to the same page this might result in duplicates (e.g. foo/index.html and foo/)
+                resolved_urls.append(new_url)
+                try:
+                    print('from ' + url, end=' ')  #prepend additional info about traversal to output
+                    stream = merge_pdf(stream, resolve_into_pdf(new_url))
+                except AssertionError:  #might be non html files that we accidentally run into; still its recommended to include some sort of check in scope
+                    continue
+    return stream
+
+
+if debug:
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
+    weasyprint.LOGGER.setLevel(logging.DEBUG)
+    weasyprint.PROGRESS_LOGGER.setLevel(logging.DEBUG)
+    weasyprint.LOGGER.addHandler(handler)
+    weasyprint.PROGRESS_LOGGER.addHandler(handler)
+
+root_stream = resolve_into_pdf(urls[0]) #initial pdf to append pages on
+
+#resolve additional urls if any
+for url in urls[1:]:  #no need checking as sublists does not throw error but returns empty list if out of bounds
+    root_stream = merge_pdf(root_stream, resolve_into_pdf(url))
+
+print('writing pdf...')
+with open(output_pdf, "wb") as f:
+    f.write(root_stream.getbuffer())
+
+
+
+