unicode_ranges="0000-007F," #Basic Latin unicode_ranges+="0080-00FF," #C1 Controls and Latin-1 Supplement unicode_ranges+="0100-017F," #Latin Extended-A unicode_ranges+="0180-024F," #Latin Extended-B #unicode_ranges+="0250-02AF," #IPA Extensions #unicode_ranges+="02B0-02FF," #Spacing Modifier Letters unicode_ranges+="0300-036F," #Combining Diacritical Marks unicode_ranges+="0370-03FF," #Greek/Coptic unicode_ranges+="0400-04FF," #Cyrillic #unicode_ranges+="0500-052F," #Cyrillic Supplement unicode_ranges+="0530-058F," #Armenian -> broken in powershell unicode_ranges+="0590-05FF," #Hebrew -> broken in powershell unicode_ranges+="0600-06FF," #Arabic -> broken in powershell #unicode_ranges+="0700-074F," #Syriac #unicode_ranges+="0750-077F," #Undefined #unicode_ranges+="0780-07BF," #Thaana #unicode_ranges+="07C0-08FF," #Undefined #unicode_ranges+="0900-097F," #Devanagari #unicode_ranges+="0980-09FF," #Bengali/Assamese #unicode_ranges+="0A00-0A7F," #Gurmukhi #unicode_ranges+="0A80-0AFF," #Gujarati #unicode_ranges+="0B00-0B7F," #Oriya #unicode_ranges+="0B80-0BFF," #Tamil #unicode_ranges+="0C00-0C7F," #Telugu #unicode_ranges+="0C80-0CFF," #Kannada #unicode_ranges+="0D00-0DFF," #Malayalam #unicode_ranges+="0D80-0DFF," #Sinhala unicode_ranges+="0E00-0E7F," #Thai #unicode_ranges+="0E80-0EFF," #Lao #unicode_ranges+="0F00-0FFF," #Tibetan #unicode_ranges+="1000-109F," #Myanmar unicode_ranges+="10A0-10FF," #Georgian #unicode_ranges+="1100-11FF," #Hangul Jamo #unicode_ranges+="1200-137F," #Ethiopic #unicode_ranges+="1380-139F," #Undefined #unicode_ranges+="13A0-13FF," #Cherokee #unicode_ranges+="1400-167F," #Unified Canadian Aboriginal Syllabics #unicode_ranges+="1680-169F," #Ogham #unicode_ranges+="16A0-16FF," #Runic #unicode_ranges+="1700-171F," #Tagalog #unicode_ranges+="1720-173F," #Hanunoo #unicode_ranges+="1740-175F," #Buhid #unicode_ranges+="1760-177F," #Tagbanwa #unicode_ranges+="1780-17FF," #Khmer #unicode_ranges+="1800-18AF," #Mongolian #unicode_ranges+="18B0-18FF," #Undefined #unicode_ranges+="1900-194F," #Limbu #unicode_ranges+="1950-197F," #Tai Le #unicode_ranges+="1980-19DF," #Undefined #unicode_ranges+="19E0-19FF," #Khmer Symbols #unicode_ranges+="1A00-1CFF," #Undefined #unicode_ranges+="1D00-1D7F," #Phonetic Extensions #unicode_ranges+="1D80-1DFF," #Undefined unicode_ranges+="1E00-1EFF," #Latin Extended Additional #unicode_ranges+="1F00-1FFF," #Greek Extended #unicode_ranges+="2000-206F," #General Punctuation #unicode_ranges+="2070-209F," #Superscripts and Subscripts #unicode_ranges+="20A0-20CF," #Currency Symbols #unicode_ranges+="20D0-20FF," #Combining Diacritical Marks for Symbols #unicode_ranges+="2100-214F," #Letterlike Symbols #unicode_ranges+="2150-218F," #Number Forms #unicode_ranges+="2190-21FF," #Arrows #unicode_ranges+="2200-22FF," #Mathematical Operators #unicode_ranges+="2300-23FF," #Miscellaneous Technical #unicode_ranges+="2400-243F," #Control Pictures #unicode_ranges+="2440-245F," #Optical Character Recognition #unicode_ranges+="2460-24FF," #Enclosed Alphanumerics #unicode_ranges+="2500-257F," #Box Drawing #unicode_ranges+="2580-259F," #Block Elements #unicode_ranges+="25A0-25FF," #Geometric Shapes #unicode_ranges+="2600-26FF," #Miscellaneous Symbols #unicode_ranges+="2700-27BF," #Dingbats #unicode_ranges+="27C0-27EF," #Miscellaneous Mathematical Symbols-A #unicode_ranges+="27F0-27FF," #Supplemental Arrows-A #unicode_ranges+="2800-28FF," #Braille Patterns #unicode_ranges+="2900-297F," #Supplemental Arrows-B #unicode_ranges+="2980-29FF," #Miscellaneous Mathematical Symbols-B #unicode_ranges+="2A00-2AFF," #Supplemental Mathematical Operators #unicode_ranges+="2B00-2BFF," #Miscellaneous Symbols and Arrows #unicode_ranges+="2C00-2E7F," #Undefined #unicode_ranges+="2E80-2EFF," #CJK Radicals Supplement #unicode_ranges+="2F00-2FDF," #Kangxi Radicals #unicode_ranges+="2FE0-2EEF," #Undefined #unicode_ranges+="2FF0-2FFF," #Ideographic Description Characters #unicode_ranges+="3000-303F," #CJK Symbols and Punctuation #unicode_ranges+="3040-309F," #Hiragana #unicode_ranges+="30A0-30FF," #Katakana #unicode_ranges+="3100-312F," #Bopomofo #unicode_ranges+="3130-318F," #Hangul Compatibility Jamo #unicode_ranges+="3190-319F," #Kanbun (Kunten) #unicode_ranges+="31A0-31BF," #Bopomofo Extended #unicode_ranges+="31C0-31EF," #Undefined #unicode_ranges+="31F0-31FF," #Katakana Phonetic Extensions #unicode_ranges+="3200-32FF," #Enclosed CJK Letters and Months #unicode_ranges+="3300-33FF," #CJK Compatibility #unicode_ranges+="3400-4DBF," #CJK Unified Ideographs Extension A #unicode_ranges+="4DC0-4DFF," #Yijing Hexagram Symbols #unicode_ranges+="4E00-9FAF," #CJK Unified Ideographs #unicode_ranges+="9FB0-9FFF," #Undefined #unicode_ranges+="A000-A48F," #Yi Syllables #unicode_ranges+="A490-A4CF," #Yi Radicals #unicode_ranges+="A4D0-ABFF," #Undefined #unicode_ranges+="AC00-D7AF," #Hangul Syllables #unicode_ranges+="D7B0-D7FF," #Undefined #unicode_ranges+="D800-DBFF," #High Surrogate Area #unicode_ranges+="DC00-DFFF," #Low Surrogate Area #unicode_ranges+="E000-F8FF," #Private Use Area #unicode_ranges+="F900-FAFF," #CJK Compatibility Ideographs #unicode_ranges+="FB00-FB4F," #Alphabetic Presentation Forms #unicode_ranges+="FB50-FDFF," #Arabic Presentation Forms-A #unicode_ranges+="FE00-FE0F," #Variation Selectors #unicode_ranges+="FE10-FE1F," #Undefined #unicode_ranges+="FE20-FE2F," #Combining Half Marks #unicode_ranges+="FE30-FE4F," #CJK Compatibility Forms #unicode_ranges+="FE50-FE6F," #Small Form Variants #unicode_ranges+="FE70-FEFF," #Arabic Presentation Forms-B #unicode_ranges+="FF00-FFEF," #Halfwidth and Fullwidth Forms #unicode_ranges+="FFF0-FFFF," #Specials #unicode_ranges+="10000-1007F," #Linear B Syllabary #unicode_ranges+="10080-100FF," #Linear B Ideograms #unicode_ranges+="10100-1013F," #Aegean Numbers #unicode_ranges+="10140-102FF," #Undefined #unicode_ranges+="10300-1032F," #Old Italic #unicode_ranges+="10330-1034F," #Gothic #unicode_ranges+="10380-1039F," #Ugaritic #unicode_ranges+="10400-1044F," #Deseret #unicode_ranges+="10450-1047F," #Shavian #unicode_ranges+="10480-104AF," #Osmanya #unicode_ranges+="104B0-107FF," #Undefined #unicode_ranges+="10800-1083F," #Cypriot Syllabary #unicode_ranges+="10840-1CFFF," #Undefined #unicode_ranges+="1D000-1D0FF," #Byzantine Musical Symbols #unicode_ranges+="1D100-1D1FF," #Musical Symbols #unicode_ranges+="1D200-1D2FF," #Undefined #unicode_ranges+="1D300-1D35F," #Tai Xuan Jing Symbols #unicode_ranges+="1D360-1D3FF," #Undefined #unicode_ranges+="1D400-1D7FF," #Mathematical Alphanumeric Symbols #unicode_ranges+="1D800-1FFFF," #Undefined #unicode_ranges+="20000-2A6DF," #CJK Unified Ideographs Extension B #unicode_ranges+="2A6E0-2F7FF," #Undefined #unicode_ranges+="2F800-2FA1F," #CJK Compatibility Ideographs Supplement #unicode_ranges+="2FAB0-DFFFF," #Unused #unicode_ranges+="E0000-E007F," #Tags #unicode_ranges+="E0080-E00FF," #Unused #unicode_ranges+="E0100-E01EF," #Variation Selectors Supplement #unicode_ranges+="E01F0-EFFFF," #Unused #unicode_ranges+="F0000-FFFFD," #Supplementary Private Use Area-A #unicode_ranges+="FFFFE-FFFFF," #Unused #unicode_ranges+="100000-10FFFD," #Supplementary Private Use Area-B char_res_string = "" for r in unicode_ranges[:-1].split(","): start = int('0x'+r.split('-')[0], 16) end = int('0x'+r.split('-')[1], 16)+1 print(start, end) char_res_string += ''.join(chr(i) for i in range(start, end)) # picked chars from chinese # simple char_res_string += "卧房室客厅洗手间阳台厨灯壁镜前后左右东南西北中空调风扇橱柜控温湿度网关串夜落地阅读水泵一二三四五六七八九十百年月日周星期播放窗帘门开高低家在电暖气器摄像头扫人机组群冷热模式时分秒大小上下多少主书层制浴" # trad char_res_string += "臥廳間陽廚燈鏡後東調風櫥櫃溫濕網關閱讀簾門開電氣攝頭掃機組熱時書層" # chars that were missing simple char_res_string += "略闲定戒当色置辅离亮激义自位清行状过送按活条态警速码假除密锁解件运" # chars mising trad char_res_string += "鎖離狀出目外斷態輔用運判閉碼助啟執轉閒" print("Out: ") #print(char_res_string) import json # check if translations.py is covered with open("test", 'r') as f: # open in readonly mode unique_chars = set(f.read()) #unique_chars = json.dumps(json.load(f)) len(unique_chars) print(len(unique_chars)) #print(''.join(sorted(unique_chars))) for char in unique_chars: if char not in char_res_string: print(f"char {char} missing") with open("charout.txt", "wb") as text_file: text_file.write(char_res_string.encode('utf8'))