import yaml, os, re
from datetime import datetime
from markdown import markdown
from bs4 import BeautifulSoup, Comment
#also requires pymdown-extensions

#note to self: to convert existing ctf diaries, check whether `\n([0-9a-zA-Z]+)` (or `^((?:[a-zA-Z0-9] ?)+)\n- ` for some messier writeups) looks like a chall name, then replace with `\n### $1`
#remove all first level -s that i use for paragraph into 2 \ns
#push all ```s to the left without spaces

#TODO aggregate stats (total ctfs, total solves, solve and co-solve amt, category solves breakdown, avg chall per ctf?, avg points?, avg solve count??)
#exclude organized events


CWD = os.path.dirname(os.path.realpath(__file__))

alphanum = re.compile(r'[^A-Za-z0-9\- ]+')
merge_space = re.compile(r'\s+')
remove_formatting = lambda str: merge_space.sub('-', alphanum.sub('', str).strip()).lower()  #for name formats

objects = {}

comments = {}

#read special events
with open(f'{CWD}/ctf-diary/special.yml', encoding='utf-8') as s:
  objects.update(yaml.load(s, Loader=yaml.Loader))

#read all ctfs in record
skipped_cmts = []
for file in os.listdir(f'{CWD}/ctf-diary/ctfs'):
  if file.endswith('.yml'):
    name = file[:-4]
    #read metadata into objects
    with open(f'{CWD}/ctf-diary/ctfs/{file}', encoding='utf-8') as s:
      objects.update({name: yaml.load(s, Loader=yaml.Loader)})

    #read comments; split according to headers ('### ') which should only be used by chall names, where the - is the start of comments
    #the 3 ### should be good enough to differentiate headers from code comments, we will do sanity check when we actually map the comments anyway
    try:
      with open(f'{CWD}/ctf-diary/ctfs/comments/{name}.md', encoding='utf-8') as c:
        assert c.read(4) == '### ' #the first line should already have a challenge, also discard the header
        comments.update({name: {remove_formatting((sp:=v.split('\n', 1))[0]): sp[1] for v in c.read().split('\n### ')}})
    except Exception as e:
      skipped_cmts.append(name)
      if 'challenges' in objects[name] and not all('writeup-url' in chall and chall['writeup-url'] for chall in objects[name]['challenges']): #only print if ctf should have comments
        print(f'Cannot read comments for {file} ({type(e).__name__}), skipping...')

#date is already datetime.date (thanks pyyaml)
objects = dict(sorted(objects.items(), key=lambda item: item[1]['date']))

#add year objects
year = None
templist = []
for k, v in objects.items():
  if year != v['date'].year:
    year = v['date'].year
    if templist:  #ensure templist is not empty before inserting a year (since that always happen)
      #no need to str(year) since .format nudge it into string anyway
      templist.append((year, {'style': 'timeline-year', 'content': year, 'date': datetime.fromisoformat(f'{year}-01-01')}))

  templist.append((k,v))
  
#apparently printing dicts autosorts it for you but iterating is fine
templist.reverse()
objects = dict(templist)


with open(f'{CWD}/templates/ctf.html', encoding='utf-8') as cf:
  ctf = cf.read()
with open(f'{CWD}/templates/special.html', encoding='utf-8') as sf:
  special = sf.read()
with open(f'{CWD}/templates/diary.html', encoding='utf-8') as df:
  diary = df.read()


#generation helpers 

added = []
def get_comment(chall, ctf):
  name = remove_formatting(next(iter(chall.values())))
  if 'writeup-url' in chall and chall['writeup-url']:
    return f'href="{chall["writeup-url"]}"'
  elif ctf in comments and name in comments[ctf]:
    added.append(f'{ctf}-{name}')
    return f'data-comment="{ctf}-{name}"'
  else:
    if ctf not in skipped_cmts:  #only print if the file is read, otherwise its just redundant
      print(f"{ctf}-{name} has no comments, name mismatch?")
    return ''


ranks = {1: 'first', 2: 'second', 3: 'third'}
def get_ordinal(n):
  if n < 0:
    return '<div>N/A</div>'
  else:
    formatted = f"{n}<sup>{'th' if 11 <= (n % 100) <= 13 else ['th', 'st', 'nd', 'rd', 'th'][min(n % 10, 4)]}</sup>"
    if n <= 3:
      return '<div class="text-' + ranks[n] +  '">' + formatted + '</div>'
    else:
      return '<div>' + formatted + '</div>'
    

special_field_names = {
  'name': '<th scope="col">Challenge <a class="text-light" data-toggle="modal" data-target="#help" href="#help" role="button"><i class="fa fa-question-circle"></i></a></th>', 
  'writeup-url': '<th scope="col">Full writeup</th>',
  #special fields that are not actually fields
  'first-blood': '',
  'writeup-prize': '',
}

chall_decor = {'first-blood': '🩸', 'writeup-prize': '👑'}

special_fields = {
  #for now instead of bolding first item in row, just bold these 2 since i dont think ill need another name for the challenges anyway
  #we are still assuming the name is in the first column though - commenting breaks if not
  'name': lambda v,c: f'<th scope="row">{v} {" ".join([v for k, v in chall_decor.items() if k in c])}</th>',
  'challenge-written': lambda v,_: f'<th scope="row">{v}</th>',
  'writeup-url': lambda v,_: '<td>' + ('✔️' if v else '❌') + '</td>',
  'solve-status': lambda v,_: '<td>' + {'solved': '✔️', 'co-solved': '🤝', 'sniped': '💤', 'unsolved': '👀'}[v] + '</td>',
  #special fields that are not actually fields
  'first-blood': lambda v,_: '',
  'writeup-prize': lambda v,_: '',
}


#generate the page following the template formats
html = diary.format(
  ctfs="".join([
    #special
    special.format(style=v['style'], content=v['content'])
  if 'content' in v else
    #actual ctfs
    ctf.format(
      style='timeline-organized' if v['organizer'] else '',
      #class="headings" to have same style but not clickable if url is null, otherwise link
      #also link is non w3c compliant hack around interactive elements inside buttons; who complies anyway :)
      url=f'class="nav-link" onclick="window.location=\'{v["url"]}\'; event.stopPropagation()"' if v['url'] else 'class="headings"',
      name=v['name'],
      #isoformat is the one we want for datetime, but we only need date not time; actual format should be <month-abbr>. <day>
      date=f'<time datetime="{v["date"].isoformat().split("T")[0]}">{v["date"].strftime("%b. %d")}</time>',
      duration=f'{v["duration"]}h',  #currently we are hardcoding hours, but we can always parse
      type=v['type'],
      team=v['team'],
      rank='<div class="text-success">Organizer</div>' 
        if v['organizer'] else 
        (('<div>' + v['rank'] + '</div>' if isinstance(v['rank'], str) else get_ordinal(v['rank'])) +
        ('<span data-toggle="tooltip" title="All challenges cleared">✨</span>' if v['full-clear'] else '')),
      #use the first challenge as header definition
      challengeheader='<thead><tr>' 
          + ''.join([
            #normal fields that are named as expected so we can just use it as the headers
              '<th scope="col">' + name.replace('-', ' ').capitalize() + '</th>' 
            if name not in special_field_names else
            #special fields that needs renaming
              special_field_names[name]
            for name in v['challenges'][0].keys()]) 
          + '</tr></thead>'
        if 'challenges' in v else "<div class='text-center'>No specific challenges have been logged; It's all a team effort!</div>",   #allow no challenge specified (e.g. A/D ctfs where its basically fully team effort so no specific challs that i wouldve fully solved)
      challenges=''.join(['<tbody><tr ' + get_comment(chall, k) +  '>'
            #assume every chall object follows the same format as the first, or else header mismatches
            + ''.join([
                f'<td>{field}</td>'
              if name not in special_fields else
                special_fields[name](field, chall)
              for name, field in chall.items()])
            + '</tr><tbody>'
          for chall in v['challenges']])
        if 'challenges' in v else '',
    )
  for k, v in objects.items()])
)


#lint output
soup = BeautifulSoup(html, 'html.parser')

for comment in soup.findAll(text=lambda text:isinstance(text, Comment)):
  comment.extract()

with open('ctf.html', 'w', encoding="utf-8") as out:
  out.write(str(soup))  #minify


#write comments into their respective files
if not os.path.exists('ctf'):
  os.mkdir('ctf')

for ctf, challs in comments.items():
  for name, cmt in challs.items():
    with open(f'ctf/{ctf}-{name}.html', 'w', encoding="utf-8") as out:
      #out.write(markdown(cmt, extensions=['fenced_code', 'codehilite'], extension_configs={'codehilite': {'noclasses': True}}))
      out.write(markdown(cmt,
       extensions=['pymdownx.highlight', 'pymdownx.superfences', 'pymdownx.tilde', 'pymdownx.inlinehilite', 'pymdownx.emoji', 'pymdownx.magiclink'],
       extension_configs={
        'pymdownx.highlight': {
          'guess_lang': 'block',
          'pygments_lang_class': True,
        },
       }))
      
    #sanity check if we missed any challs in the yml by comparing against added comments
    if f'{ctf}-{name}' not in added:
      print(f"{ctf}-{name} has a comment but doesn't exist in {ctf}.yml - missed definition?")