Merge branch 'monstersyaml-cleanup'

2020-02-20 15:44:02 -06:00 · 2020-02-20 15:44:02 -06:00 · 09b745d3c8
parent 07caa27fce 6560b512ca
commit 09b745d3c8
6 changed files with 128682 additions and 40579 deletions
--- a/data/yaml/basics.yaml
+++ b/data/yaml/basics.yaml
@ -74,11 +74,37 @@ lang_rarity:
  - Uncommon
  - Secret
 movement:
+  - Air Walk (constant)
  - Land
  - Burrow
+  - Burrow (sand only)
+  - Burrow (snow only)
+  - Can't Move
  - Climb
+  - Climb Stone
+  - Ice Climb
+  - Ice Stride
+  - Trickster's Step
+  - Earth Glide
+  - Sand Glide
+  - Glide
+  - Compression
+  - Suction
+  - Freedom of Movement (constant)
+  - Spider Climb (constant)
+  - Unstoppable Burrow
  - Fly
+  - Fly (from fly)
+  - Magma Swim
  - Swim
+  - Swamp Stride
+  - Swiftness
+  - Woodland Stride
+  - Trackless Step
+  - Cloud Walk
+  - Powerful Jumper
+  - Swiftness
+  - Walk in Shadow
 size:
  - name: Tiny
    space_in_ft: 4
--- a/data/yaml/deprecated/clean-up-monsters.py
+++ b/data/yaml/deprecated/clean-up-monsters.py
@ -0,0 +1,489 @@
+import yaml
+import re
+
+def main():
+
+    cleanuphtmlstuff()
+
+    with open("tmp-monsters-html-cleanup.yaml", 'r') as content_file:
+        data = yaml.full_load(content_file)
+
+    counter = 0
+    for i in data:
+        counter += 1
+        #print("{}\t{}".format(counter, i['name']))
+
+        # Set some data points before iteration
+        i['saves_special'] = {}
+
+        # clean up ability mods
+        for k, v in i['ability_mods'].items():
+            i['ability_mods'][k] = int(v)
+            # print(k, i['ability_mods'][k])
+
+        # clean up skills
+        for k, v in i['skills'].items():
+            i['skills'][k] = int(v)
+            # print(k, i['ability_mods'][k])
+
+        # clean up saves
+        for k, v in i['saves'].items():
+            # print(k, v)
+            i['saves_special'][k] = None
+            if ',' in v:
+                # print("WE GOT A COMMA")
+                i['saves'][k] = v.replace(',', '')
+            if ';' in v:
+                # print("WE GOT A SPECIAL")
+                a = v.split('; ')
+                # print(a)
+                i['saves_special'][k] = a[1]
+                i['saves'][k] = a[0]
+            elif '(' in v:
+                #print("we got a parentheses")
+                #print(v)
+                a = v.split(' (')
+                # print(a)
+                i['saves_special'][k] = a[1]
+                i['saves'][k] = a[0]
+            i['saves'][k] = int(i['saves'][k])
+            # print(k, i['saves'][k])
+
+        # clean up immunities
+        if i['immunities'] == "None":
+            i['immunities'] = None
+        else:
+            tmp = i['immunities']
+            splits = tmp.split(',')
+            res = []
+            for z in splits:
+                res.append(z.strip())
+            i['immunities'] = res
+
+        # clean up traits with trailing or leading whitespace
+        traitslist = []
+        #print(i['traits'])
+        for x in i['traits']:
+            traitslist.append(x.strip())
+            #print(x.strip())
+        #print(traitslist)
+        i['traits'] = traitslist
+
+        # clean up traits underneath proactive actions with trailing or leading whitespace
+        #print(i['proactive_abilities'])
+        for x in i['proactive_abilities']:
+            traitslist = []
+            if x['traits'] != None:
+                if len(x['traits']) == 0:
+                    x['traits'] = None
+                else:
+                    for y in x['traits']:
+                        traitslist.append(y.strip())
+                        #print(y.strip())
+                    x['traits'] = traitslist
+
+        # clean up traits underneath automatic_abilities with trailing or leading whitespace
+        for x in i['automatic_abilities']:
+            # this one line fixes leading and trailing whitespace
+            x['description'] = x['description'].strip()
+            traitslist = []
+            if x['traits'] != None:
+                if len(x['traits']) == 0:
+                    x['traits'] = None
+                else:
+                    for y in x['traits']:
+                        traitslist.append(y.strip())
+                        #print(y.strip())
+                    x['traits'] = traitslist
+
+        # clean up traits underneath melee with trailing or leading whitespace
+        for x in i['melee']:
+            traitslist = []
+            if x['traits'] != None:
+                if len(x['traits']) == 0:
+                    x['traits'] = None
+                else:
+                    for y in x['traits']:
+                        traitslist.append(y.strip())
+                        #print(y.strip())
+                    x['traits'] = traitslist
+
+        # clean up traits underneath ranged with trailing or leading whitespace
+        for x in i['ranged']:
+            traitslist = []
+            if x['traits'] != None:
+                if len(x['traits']) == 0:
+                    x['traits'] = None
+                else:
+                    for y in x['traits']:
+                        traitslist.append(y.strip())
+                        #print(y.strip())
+                    x['traits'] = traitslist
+
+        # clean up senses
+
+        senseslist = []
+        if i['senses'] != None:
+            if len(i['senses']) == 0:
+                i['senses'] = None
+            else:
+                for x in i['senses']:
+                    senseslist.append(x.strip())
+                i['senses'] = senseslist
+
+        # clean up spell DC
+        if i['spell_dc'] == "None":
+            i['spell_dc'] = None
+        else:
+            i['spell_dc'] = int(i['spell_dc'])
+
+        # clean up innate_spell levels
+        if 'innate_spells' not in i:
+            i['innate_spells'] = None
+        elif i['innate_spells'] == "None": 
+            i['innate_spells'] = None
+        else:
+            for x in i['innate_spells']:
+                x['level'] = int(x['level'])
+
+        # clean up level
+        i['level'] = int(i['level'])
+
+        # clean up AC and HP and Perception
+        ###print("AC:\t{}".format(i['ac']))
+        ###print("HP:\t{}".format(i['hp']))
+        res = re.match('(\d+)', i['ac'].strip())
+        i['ac'] = int(res.group(1))
+        res = re.match('(\d+)', i['hp'].strip())
+        i['hp'] = int(res.group(1))
+
+        # clean up melee to hit
+        if 'melee' not in i:
+            i['melee'] = None
+        elif i['melee'] == "None":
+            i['melee'] = None
+        else:
+            for x in i['melee']:
+                if 'to_hit' not in x:
+                    x['to_hit'] = None
+                else:
+                    res = re.search('(\d+)', x['to_hit'])
+                    #print("{}\t{}".format(counter, i['name']))
+                    if res:
+                        x['to_hit'] = int(res.group(1))
+
+        # clean up ranged to hit
+        if 'ranged' not in i:
+            i['ranged'] = None
+        elif i['ranged'] == "None":
+            i['ranged'] = None
+        else:
+            for x in i['ranged']:
+                if 'to_hit' not in x:
+                    x['to_hit'] = None
+                else:
+                    res = re.search('(\d+)', x['to_hit'])
+                    #print("{}\t{}".format(counter, i['name']))
+                    if res:
+                        x['to_hit'] = int(res.group(1))
+
+        # clean up spell attack to hit
+        if 'spell_attack_to_hit' not in i:
+            i['spell_attack_to_hit'] = None
+        elif i['spell_attack_to_hit'] == "None":
+            i['spell_attack_to_hit'] = None
+        else:
+            res = re.search('(\d+)', i['spell_attack_to_hit'])
+            #print("{}\t{}".format(counter, i['name']))
+            if res:
+                i['spell_attack_to_hit'] = int(res.group(1))
+
+        ###if i['perception'] == None or i['perception'] == "":
+        ###    print("{}\t{}".format(counter, i['name']))
+        ###    print("\t\t\t\t{}".format(i['perception']))
+        if type(i['perception']) is str:
+            #print("{}\t{}".format(counter, i['name']))
+            res = re.match('(\d+)', i['perception'])
+            #print(res.group(1))
+            i['perception'] = int(res.group(1))
+
+        # clean up nulls for empty lists
+
+        if i['automatic_abilities'] == []:
+            i['automatic_abilities'] = None
+        if i['ranged'] == []:
+            i['ranged'] = None
+        if i['melee'] == []:
+            i['melee'] = None
+
+        # clean up resistances
+
+        # set Nones to null
+        if i['resistances'] == "None":
+            i['resistances'] = None
+
+        # remove trailing ;
+        elif i['resistances'].endswith(";"):
+            i['resistances'] = i['resistances'][:-1]
+            #print("\t{}".format(i['resistances']))
+
+        if i['resistances'] != None:
+            #print("{}\t{}".format(counter, i['name']))
+            #print("\t{}".format(i['resistances']))
+            # print("{}\t{}".format(counter, i['name']))
+            res = processResistances(i['resistances'])
+            i['resistances'] = res
+            #print(res)
+
+        #if type(i['resistances']) == str:
+        #    print("{}\t{}".format(counter, i['name']))
+        #    print("\t\t\t\t{}".format(i['resistances']))
+
+        # clean up speeds
+        newspeed = []
+        if 'land' in i['speed']:
+            # get number
+            res = re.match('(\d+)', i['speed']['land'])
+            amt = int(res.group(1))
+            newspeed.append({"type": "Land", "amount": amt })
+        if 'fly' in i['speed']:
+            if 'from fly' in i['speed']['fly']:
+                # do the from fly version
+                res = re.match('(\d+)', i['speed']['fly'])
+                amt = int(res.group(1))
+                newspeed.append({"type": "Fly (from Fly)", "amount": amt })
+            else:
+                # get number
+                res = re.match('(\d+)', i['speed']['fly'])
+                amt = int(res.group(1))
+                newspeed.append({"type": "Fly", "amount": amt })
+        if 'swim' in i['speed']:
+            # get number
+            res = re.match('(\d+)', i['speed']['swim'])
+            amt = int(res.group(1))
+            newspeed.append({"type": "Swim", "amount": amt })
+        if 'burrow' in i['speed']:
+            # get number
+            res = re.match('(\d+)', i['speed']['burrow'])
+            amt = int(res.group(1))
+            newspeed.append({"type": "Burrow", "amount": amt })
+        if 'climb' in i['speed']:
+            # get number
+            res = re.match('(\d+)', i['speed']['climb'])
+            amt = int(res.group(1))
+            newspeed.append({"type": "Climb", "amount": amt })
+        if 'iceclimb' in i['speed']:
+            # get number
+            res = re.match('(\d+)', i['speed']['iceclimb'])
+            amt = int(res.group(1))
+            newspeed.append({"type": "Ice Climb", "amount": amt })
+        if 'climbstone' in i['speed']:
+            # get number
+            res = re.match('(\d+)', i['speed']['climbstone'])
+            amt = int(res.group(1))
+            newspeed.append({"type": "Climb Stone", "amount": amt })
+        if 'burrowsnowonly' in i['speed']:
+            # get number
+            res = re.match('(\d+)', i['speed']['burrowsnowonly'])
+            amt = int(res.group(1))
+            newspeed.append({"type": "Burrow (snow only)", "amount": amt })
+        if 'burrowsandonly' in i['speed']:
+            # get number
+            res = re.match('(\d+)', i['speed']['burrowsandonly'])
+            amt = int(res.group(1))
+            newspeed.append({"type": "Burrow (sand only)", "amount": amt })
+        if 'special' in i['speed']:
+            for x in i['speed']['special']:
+                if x == "ice stride":
+                    newspeed.append({"type": "Ice Stride", "amount": None })
+                if x == "trickster's step":
+                    newspeed.append({"type": "Trickster's Step", "amount": None })
+                if x == "earth glide":
+                    newspeed.append({"type": "Earth Glide", "amount": None })
+                if x == "sand glide":
+                    newspeed.append({"type": "Sand Glide", "amount": None })
+                if x == "glide":
+                    newspeed.append({"type": "Glide", "amount": None })
+                if x == "compression":
+                    newspeed.append({"type": "Compression", "amount": None })
+                if x == "suction":
+                    newspeed.append({"type": "Suction", "amount": None })
+                if x == "swamp stride":
+                    newspeed.append({"type": "Swamp Stride", "amount": None })
+                if x == "powerful jumper":
+                    newspeed.append({"type": "Powerful Jumper", "amount": None })
+                if x == "woodland stride":
+                    newspeed.append({"type": "Woodland Stride", "amount": None })
+                if x == "trackless step":
+                    newspeed.append({"type": "Trackless Step", "amount": None })
+                if x == "cloud walk":
+                    newspeed.append({"type": "Cloud Walk", "amount": None })
+                if x == "swiftness":
+                    newspeed.append({"type": "Swiftness", "amount": None })
+                if x == "freedom of movement (constant)":
+                    newspeed.append({"type": "Freedom of Movement (constant)", "amount": None })
+                if x == "spider climb (constant)":
+                    newspeed.append({"type": "Spider Climb (constant)", "amount": None })
+                if x == "unstoppable burrow":
+                    newspeed.append({"type": "Unstoppable Burrow", "amount": None })
+                if x == "walk in shadow":
+                    newspeed.append({"type": "Walk in Shadow", "amount": None })
+                if x == "magma swim":
+                    newspeed.append({"type": "Magma Swim", "amount": None })
+                if x == "Can't Move":
+                    newspeed.append({"type": "Can't Move", "amount": None })
+                if x == "Air Walk (constant)":
+                    newspeed.append({"type": "Air Walk (constant)", "amount": None })
+
+        
+        i['speed'] = newspeed
+
+
+    final = yaml.safe_dump(data, allow_unicode=True)
+
+
+    with open("tmp-monsters.yaml", 'w') as f:
+        f.write(final)
+
+def cleanuphtmlstuff():
+    with open("monsters.yaml", 'r') as f:
+        lines = f.readlines()
+    for num, _ in enumerate(lines):
+        lines[num] = cleanupalinks(lines[num])
+        lines[num] = cleanupilinks(lines[num])
+        lines[num] = cleanupblinks(lines[num])
+        lines[num] = cleanupbrlinks(lines[num])
+        lines[num] = cleanupulinks(lines[num])
+        lines[num] = cleanupspellslinks(lines[num])
+        lines[num] = cleanupimglinks(lines[num])
+        lines[num] = cleanuptlinks(lines[num])
+    
+    with open("tmp-monsters-html-cleanup.yaml", 'w') as f:
+        f.writelines(lines)
+
+def cleanupalinks(x):
+    tmp = re.sub('<a.+?>', '', x)
+    tmp2 = re.sub('</a>', '', tmp)
+    return tmp2
+
+def cleanuptlinks(x):
+    tmp = re.sub('<t>', '', x)
+    return tmp
+
+def cleanupimglinks(x):
+    res = re.search('(<img.+?>)', x)
+    if res != None:
+        print(res)
+    if res:
+        if 'Single Action' in res.group(0):
+            print("SingleAction")
+            tmp = re.sub('<img.+?>', '|1|', x)
+            tmp = re.sub('\|1\|\|1\|', '|1|', tmp)
+        elif 'Two Actions' in res.group(0):
+            print("TwoActions")
+            tmp = re.sub('<img.+?>', '|2|', x)
+            tmp = re.sub('\|2\|\|2\|', '|2|', tmp)
+        elif 'Three Actions' in res.group(0):
+            print("ThreeActions")
+            tmp = re.sub('<img.+?>', '|3|', x)
+            tmp = re.sub('\|3\|\|3\|', '|3|', tmp)
+        elif 'Free Action' in res.group(0):
+            print("FreeAction")
+            tmp = re.sub('<img.+?>', '|F|', x)
+            tmp = re.sub('\|F\|\|F\|', '|F|', tmp)
+
+        print(tmp)
+        return tmp
+    else:
+        return x
+
+def cleanupspellslinks(x):
+    tmp = re.sub('<spells.+?>', '', x)
+    tmp2 = re.sub('</spells.+?>', '', tmp)
+    return tmp2
+    
+def cleanupilinks(x):
+    tmp = re.sub('<i>', '*', x)
+    tmp2 = re.sub('</i>', '*', tmp)
+    return tmp2
+
+def cleanupulinks(x):
+    tmp = re.sub('<u>', '', x)
+    tmp2 = re.sub('</u>', '', tmp)
+    return tmp2
+
+def cleanupblinks(x):
+    tmp = re.sub('<b>', '**', x)
+    tmp2 = re.sub('</b>', '**', tmp)
+    return tmp2
+
+def cleanupbrlinks(x):
+    return  re.sub('<br>', '\n', x)
+
+
+def processResistances(r):
+    if '(' in r:
+        # TODO This is what needs to be done
+        #print("\t\tTODO: Need to process with parentheses")
+
+        # this tree does the simple ones without multiple items
+        if ',' not in r:
+            rr = re.split('(\d+)', r)
+            return [{"type": rr[0].strip() + ' ' + rr[2].strip(), "amount": int(rr[1])},]
+        elif ',' in r:
+            # FIGURE OUT BRANCH FOR THE COMMA IS NOT INSIDE THE PARENTHETICAL
+            test = re.search('(\(.+\))', r)
+            tstr = test.group(0)
+            if ',' in tstr:
+                # TODO here's the case for comma IS inside the parenthetical
+                print("\n")
+                print(r)
+                if 'haunted form' not in r:
+                    # DO the majority of them
+                    rr = re.split('(\d+)', r)
+                    print(rr)
+                    return [{"type": rr[0].strip() + ' ' + rr[2].strip(), "amount": int(rr[1])},]
+                elif 'haunted form' in r:
+                    # DO the single haunted form one
+                    ##### NOTE I'm hard coding this in as it will be faster than writing regex
+                    result = [{"type": "all (except force, ghost touch, or positive; double resistance against non-magical)", "amount": 8},
+                               {"type": "haunted form", "amount": 12}]
+                    print(result)
+                    return result
+            else:
+                # TODO Here's the case for comma is NOT inside the parenthetical
+                splits = re.split(',', r)
+                mysplitlist = []
+                for x in splits:
+                    rr = re.split('(\d+)', x)
+                    mysplitlist.append({"type": rr[0].strip() + ' ' + rr[2].strip(), "amount": int(rr[1])})
+                return mysplitlist
+        else:
+            print("else has been run")
+            return None
+    else:
+        #print("\t\tNo parentheses")
+        # split on commas
+        res = r.split(',')
+        #print(r.split(','))
+        #print(res)
+        results_list = []
+        for i in res:
+            #print(i)
+            #tmp = re.search('(\d+)', i)
+            #print(tmp.groups())
+            tmp = re.split('(\d+)', i)
+            #print(tmp)
+            tmpres = {"type": tmp[0].strip(), "amount": int(tmp[1])}
+            results_list.append(tmpres)
+
+        return results_list
+
+
+    return "Something went wrong"
+
+
+if __name__ == "__main__":
+    main()
+
+
--- a/data/yaml/deprecated/monsters.yaml.old
+++ b/data/yaml/deprecated/monsters.yaml.old
--- a/data/yaml/deprecated/scraped-monsters.yaml
+++ b/data/yaml/deprecated/scraped-monsters.yaml
--- a/data/yaml/monsters.yaml
+++ b/data/yaml/monsters.yaml
--- a/scripts/utilities/monsters-html-escaper.py
+++ b/scripts/utilities/monsters-html-escaper.py
@ -0,0 +1,13 @@
+import html
+
+def main():
+
+    # read in the file
+    with open("../data/yaml/monsters.yaml", 'r') as content_file:
+        content = content_file.read()
+    # print(html.unescape(content))
+    with open("../data/yaml/monsters-fixed.yaml", 'w') as outfile:
+        outfile.write(html.unescape(content))
+
+if __name__ == "__main__":
+    main()