# # Japanese charcter category map # # $Id: char.def,v 1.4 2006/07/05 16:54:13 taku-ku Exp $; # ################################################################################### # # CHARACTER CATEGORY DEFINITION # # CATEGORY_NAME INVOKE GROUP LENGTH # # - CATEGORY_NAME: Name of category. you have to define DEFAULT class. # - INVOKE: 1/0: always invoke unknown word processing, evan when the word can be found in the lexicon # - GROUP: 1/0: make a new word by grouping the same chracter category # - LENGTH: n: 1 to n length new words are added # DEFAULT 0 1 0 # DEFAULT is a mandatory category! SPACE 0 1 0 KANJI 0 0 1 SYMBOL 1 1 0 NUMERIC 1 1 0 ALPHA 1 1 0 HIRAGANA 0 1 2 KATAKANA 1 1 2 #KANJINUMERIC 1 1 0 GREEK 1 1 0 CYRILLIC 1 1 0 ################################################################################### # # CODE(UCS2) TO CATEGORY MAPPING # # SPACE 0x0020 SPACE # DO NOT REMOVE THIS LINE, 0x0020 is reserved for SPACE 0x00D0 SPACE 0x0009 SPACE 0x000B SPACE 0x000A SPACE # ASCII 0x0021..0x002F SYMBOL 0x0030..0x0039 NUMERIC 0x003A..0x0040 SYMBOL 0x0041..0x005A ALPHA 0x005B..0x0060 SYMBOL 0x0061..0x007A ALPHA 0x007B..0x007E SYMBOL # Latin 0x00A1..0x00BF SYMBOL # Latin 1 0x00C0..0x00FF ALPHA # Latin 1 0x0100..0x017F ALPHA # Latin Extended A 0x0180..0x0236 ALPHA # Latin Extended B 0x1E00..0x1EF9 ALPHA # Latin Extended Additional # CYRILLIC 0x0400..0x04F9 CYRILLIC 0x0500..0x050F CYRILLIC # Cyrillic supplementary # GREEK 0x0374..0x03FB GREEK # Greek and Coptic # HIRAGANA 0x3041..0x309F HIRAGANA # KATAKANA 0x30A1..0x30FF KATAKANA 0x31F0..0x31FF KATAKANA # Small KU .. Small RO # 0x30FC KATAKANA HIRAGANA # ー 0x30FC KATAKANA # Half KATAKANA 0xFF66..0xFF9D KATAKANA 0xFF9E..0xFF9F KATAKANA # KANJI 0x2E80..0x2EF3 KANJI # CJK Raidcals Supplement 0x2F00..0x2FD5 KANJI 0x3005 KANJI 0x3007 KANJI 0x3400..0x4DB5 KANJI # CJK Unified Ideographs Extention 0x4E00..0x9FA5 KANJI 0xF900..0xFA2D KANJI 0xFA30..0xFA6A KANJI # KANJI-NUMERIC (一 二 三 四 五 六 七 八 九 十 百 千 万 億 兆) #0x4E00 KANJINUMERIC KANJI #0x4E8C KANJINUMERIC KANJI #0x4E09 KANJINUMERIC KANJI #0x56DB KANJINUMERIC KANJI #0x4E94 KANJINUMERIC KANJI #0x516D KANJINUMERIC KANJI #0x4E03 KANJINUMERIC KANJI #0x516B KANJINUMERIC KANJI #0x4E5D KANJINUMERIC KANJI #0x5341 KANJINUMERIC KANJI #0x767E KANJINUMERIC KANJI #0x5343 KANJINUMERIC KANJI #0x4E07 KANJINUMERIC KANJI #0x5104 KANJINUMERIC KANJI #0x5146 KANJINUMERIC KANJI # ZENKAKU 0xFF10..0xFF19 NUMERIC 0xFF21..0xFF3A ALPHA 0xFF41..0xFF5A ALPHA 0xFF01..0xFF0F SYMBOL 0xFF1A..0xFF1F SYMBOL 0xFF3B..0xFF40 SYMBOL 0xFF5B..0xFF65 SYMBOL 0xFFE0..0xFFEF SYMBOL # HalfWidth and Full width Form # OTHER SYMBOLS 0x2000..0x206F SYMBOL # General Punctuation 0x2070..0x209F NUMERIC # Superscripts and Subscripts 0x20A0..0x20CF SYMBOL # Currency Symbols 0x20D0..0x20FF SYMBOL # Combining Diaritical Marks for Symbols 0x2100..0x214F SYMBOL # Letterlike Symbols 0x2150..0x218F NUMERIC # Number forms 0x2100..0x214B SYMBOL # Letterlike Symbols 0x2190..0x21FF SYMBOL # Arrow 0x2200..0x22FF SYMBOL # Mathematical Operators 0x2300..0x23FF SYMBOL # Miscellaneuos Technical 0x2460..0x24FF SYMBOL # Enclosed NUMERICs 0x2501..0x257F SYMBOL # Box Drawing 0x2580..0x259F SYMBOL # Block Elements 0x25A0..0x25FF SYMBOL # Geometric Shapes 0x2600..0x26FE SYMBOL # Miscellaneous Symbols 0x2700..0x27BF SYMBOL # Dingbats 0x27F0..0x27FF SYMBOL # Supplemental Arrows A 0x27C0..0x27EF SYMBOL # Miscellaneous Mathematical Symbols-A 0x2800..0x28FF SYMBOL # Braille Patterns 0x2900..0x297F SYMBOL # Supplemental Arrows B 0x2B00..0x2BFF SYMBOL # Miscellaneous Symbols and Arrows 0x2A00..0x2AFF SYMBOL # Supplemental Mathematical Operators 0x3300..0x33FF SYMBOL 0x3200..0x32FE SYMBOL # ENclosed CJK Letters and Months 0x3000..0x303F SYMBOL # CJK Symbol and Punctuation 0xFE30..0xFE4F SYMBOL # CJK Compatibility Forms 0xFE50..0xFE6B SYMBOL # Small Form Variants # added 2006/3/13 0x3007 SYMBOL # KANJINUMERIC # END OF TABLE