Change name to codepoint

2022-04-27 12:45:45 -04:00 · 2022-04-27 12:45:45 -04:00 · 4129d58b6d
parent 97105ebc5f
commit 4129d58b6d
3 changed files with 7394 additions and 118 deletions
--- a/resources/shortcodes.txt
+++ b/resources/shortcodes.txt
@ -1,42 +1,55 @@
-loudly crying face:sob
+1F62D:sob
-face screaming in fear:scream
+1F631:scream
-downcast face with sweat:sweat 
+1F613:sweat 
-grinning face with sweat:sweat_smile
+1F605:sweat_smile
-grinning face with smiling eyes:smile 
+1F604:smile 
-rolling on the floor laughing:rofl 
+1F923:rofl 
-face savoring food:yum
+1F60B:yum
-smiling face with halo:innocent
+1F607:innocent
-smiling face with open hands:hugging_face
+1F917:hugging_face
-high voltage:zap
+26A1:zap
-double exclamation mark:bangbang
+203C FE0F:bangbang
-exclamation question mark:interrobang
+2049 FE0F:interrobang
-red question mark:question 
+2753:question 
-red exclamation mark:exclamation 
+2757:exclamation 
-white question mark:white_question
+2754:white_question
-white exclamation mark:white_exclamation
+2755:white_exclamation
-input latin uppercase:big_abcd
+1F520:big_abcd
-input latin lowercase:abcd 
+1F521:abcd 
-input numbers:1234
+1F522:1234
-input symbols:symbols 
+1F523:symbols 
-input latin letters:abc
+1F524:abc
-smiling face with heart-eyes:heart_eyes
+1F60D:heart_eyes
-smiling face with tear:cry_smile
+1F927:cry_smile
-smiling face with horns:smiling_imp 
+1F608:smiling_imp 
-angry face with horns:imp
+1F47F:imp
-pouting face:rage
+1F261:rage
-face with steam from nose:triumph
+1F627:triumph
-grinning cat:smiley_cat
+1F63A:smiley_cat
-grinning cat with smiling eyes:smile_cat 
+1F638:smile_cat 
-cat with tears of joy:joy_cat
+1F639:joy_cat
-smiling cat with heart-eyes:heart_eyes_cat
+1F63B:heart_eyes_cat
-cat with wry smile:smirk_cat
+1F63C:smirk_cat
-weary cat:scream_cat
+1F640:scream_cat
-hundred points:100
+1F4AF:100
-waving hand:wave
+1F44B:wave
-backhand index pointing left:point_left
+1F448:point_left
-backhand index pointing right:point_right
+1F449:point_right
-backhand index pointing up:point_up_2
+1F446:point_up_2
-backhand index pointing down:point_down
+1F447:point_down
-index pointing up:point_up 
+261D FE0F:point_up 
-index pointing at the viewer:point_you
+1FAF5:point_you
-person shrugging:shrug
+1F937:shrug
 0023 FE0F 20E3:hash 
 002A FE0F 20E3:asterisk 
 0030 FE0F 20E3:zero 
 0031 FE0F 20E3:one 
 0032 FE0F 20E3:two
 0033 FE0F 20E3:three
 0034 FE0F 20E3:four 
 0035 FE0F 20E3:five
 0036 FE0F 20E3:six
 0037 FE0F 20E3:seven
 0038 FE0F 20E3:eight
 0039 FE0F 20E3:nine
 1F51F:ten
--- a/scripts/emoji_codegen.py
+++ b/scripts/emoji_codegen.py
@ -25,37 +25,6 @@ const QVector<Emoji> emoji::Provider::emoji = {
    ''')
    d = dict(kwargs=kwargs)
    print(tmpl.render(d))
 # FIXME: Stop this madness
 def humanize_keypad(num): 
    match num: 
        case "0": 
            return "zero" 
        case "1": 
            return "one"
        case "2": 
            return "two"
        case "3": 
            return "three"
        case "4": 
            return "four"
        case "5": 
            return "five"
        case "6": 
            return "six" 
        case "7": 
            return "seven" 
        case "8": 
            return "eight"
        case "9": 
            return "nine"
        case "10": 
            return "ten"
        case "*": 
            return "asterisk"
        case "#": 
            return "hash"
        case _: 
            return None
 if __name__ == '__main__':
    if len(sys.argv) < 3:
        print('usage: emoji_codegen.py /path/to/emoji-test.txt /path/to/shortcodes.txt')
@ -105,22 +74,23 @@ if __name__ == '__main__':
        code, qualification, charAndName = segments
        # skip unqualified versions of same unicode
-        if qualification != 'fully-qualified' and qualification != 'component' :
+        if qualification != 'fully-qualified':
            continue
        char, name = re.match(r'^(\S+) E\d+\.\d+ (.*)$', charAndName).groups()
        shortname = name
-        
+        # until skin tone is handled, keep them around
        # discard skin tone variants for sanity
        # __contains__ is so stupid i hate prototype languages
-        if name.__contains__("skin tone") and qualification != 'component': 
+        # if name.__contains__("skin tone") and qualification != 'component': 
-            continue
+        #    continue
-        if qualification == 'component' and not name.__contains__("skin tone"): 
+        # if qualification == 'component' and not name.__contains__("skin tone"): 
-            continue
+        #    continue
        #TODO: Handle skintone modifiers in a sane way
-        if shortname in shortcodeDict: 
+        basicallyTheSame = False
-            shortname = shortcodeDict[shortname]
+        if code in shortcodeDict: 
            shortname = shortcodeDict[code]
        else:
            shortname = shortname.lower()
            if shortname.endswith(' (blood type)'): 
@ -141,28 +111,25 @@ if __name__ == '__main__':
                shortname = shortname[:-7] 
            if shortname.endswith(' banknote'): 
                shortname = shortname[:-9]
            keycapmtch = re.match(r'^keycap: (.+)$', shortname)
            if keycapmtch: 
                keycapthing, = keycapmtch.groups()
                type(keycapthing)
                num_name = humanize_keypad(keycapthing) 
                if num_name: 
                    shortname = num_name
                else: 
                    raise Exception("incomplete keycap " + keycapthing + ", fix ur code")
            # FIXME: Is there a better way to do this?
            matchobj = re.match(r'^flag: (.*)$', shortname) 
-            if matchobj: 
+            if shortname.startswith("flag: "): 
-                country, = matchobj.groups() 
+                country = shortname[5:]
                shortname = country + " flag"
            shortname = shortname.replace("u.s.", "us")
            shortname = shortname.replace("&", "and")
            if shortname == name.lower(): 
                basicallyTheSame = True
            shortname = shortname.replace("-", "_")
            shortname = re.sub(r'\W', '_', shortname)
            shortname, = re.match(r'^_*(.+)_*$', shortname).groups()
            shortname = re.sub(r'\W', '_', shortname) 
            shortname = re.sub(r'_{2,}', '_', shortname) 
            shortname = unidecode(shortname)
        # if basicallyTheSame: 
        #    shortname = ""
        categories[current_category].append(Emoji(code, shortname, name))
    # Use xclip to pipe the output to clipboard.
--- a/src/emoji/Provider.cpp
+++ b/src/emoji/Provider.cpp