[ibus-table] Resolves: #856320

Thu Sep 13 13:14:42 UTC 2012

commit 52b91b4b420e7b5f8cd6a1a3d561192f1c7ab863
Author: Mike FABIAN <mfabian at redhat.com>
Date:   Tue Sep 11 20:21:33 2012 +0200

    Resolves: #856320
    
    - Improve check whether a phrase is simplified or traditional Chinese
      The improvement is to ignore all non-Han characters when
      doing the check.
      This is to avoid classifying a simplified Chinese string as
      traditional just because it happens to include some non-Chinese
      characters, for example box drawing characters, which cannot be
      converted to gb2312 but happen to be convertible to big5hkscs.
      This fixes the problem in the emoji-table input method that most
      phrases cannot be input at all.

 ...9.20120907-improve-chinese-category-check.patch |   69 ++++++++++++++++++++
 ibus-table.spec                                    |   16 ++++-
 2 files changed, 84 insertions(+), 1 deletions(-)
---

diff --git a/ibus-table-1.4.99.20120907-improve-chinese-category-check.patch b/ibus-table-1.4.99.20120907-improve-chinese-category-check.patch
new file mode 100644
index 0000000..4e4ec8c
--- /dev/null
+++ b/ibus-table-1.4.99.20120907-improve-chinese-category-check.patch
@@ -0,0 +1,69 @@
+From 7a2eed5fb33098b10d67958472a23bb41a3f7b11 Mon Sep 17 00:00:00 2001
+From: Mike FABIAN <mfabian at redhat.com>
+Date: Thu, 13 Sep 2012 12:43:55 +0200
+Subject: [PATCH] Improve check whether a phrase is simplified or traditional
+ Chinese
+
+The improvement is to ignore all non-Han characters when
+doing the check.
+
+This is to avoid classifying a simplified Chinese string as
+traditional just because it happens to include some non-Chinese
+characters, for example box drawing characters, which cannot be
+converted to gb2312 but happen to be convertible to big5hkscs.
+
+This fixes the problem in the emoji-table input method that most
+phrases cannot be input at all.
+
+See: https://bugzilla.redhat.com/show_bug.cgi?id=856320
+---
+ engine/tabsqlitedb.py | 18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+diff --git a/engine/tabsqlitedb.py b/engine/tabsqlitedb.py
+index 7606afb..9e3b50c 100644
+--- a/engine/tabsqlitedb.py
++++ b/engine/tabsqlitedb.py
+@@ -483,17 +483,27 @@ class tabsqlitedb:
+             # make sure that we got a unicode string
+             if type(phrase) != type(u''):
+                 phrase = phrase.decode('utf8')
++            tmp_phrase = ''.join(re.findall(u'['
++                                            + u'\u4E00-\u9FCB'
++                                            + u'\u3400-\u4DB5'
++                                            + u'\uF900-\uFaFF'
++                                            + u'\U00020000-\U0002A6D6'
++                                            + u'\U0002A700-\U0002B734'
++                                            + u'\U0002B740-\U0002B81D'
++                                            + u'\U0002F800-\U0002FA1D'
++                                            + u']+',
++                                            phrase))
+             # first whether in gb2312
+             try:
+-                phrase.encode('gb2312')
++                tmp_phrase.encode('gb2312')
+                 category |= 1
+             except:
+-                if '〇'.decode('utf8') in phrase:
++                if '〇'.decode('utf8') in tmp_phrase:
+                     # we add '〇' into SC as well
+                     category |= 1
+             # second check big5-hkscs
+             try:
+-                phrase.encode('big5hkscs')
++                tmp_phrase.encode('big5hkscs')
+                 category |= 1 << 1
+             except:
+                 # then check whether in gbk,
+@@ -503,7 +513,7 @@ class tabsqlitedb:
+                 else:
+                     # need to check
+                     try:
+-                        phrase.encode('gbk')
++                        tmp_phrase.encode('gbk')
+                         category |= 1
+                     except:
+                         # not in gbk
+-- 
+1.7.11.4
+
diff --git a/ibus-table.spec b/ibus-table.spec
index 816cc3c..7f1a946 100644
--- a/ibus-table.spec
+++ b/ibus-table.spec
@@ -1,12 +1,13 @@
 Name:       ibus-table
 Version:    1.4.99.20120907
-Release:    1%{?dist}
+Release:    2%{?dist}
 Summary:    The Table engine for IBus platform
 License:    LGPLv2+
 Group:      System Environment/Libraries
 URL:        http://code.google.com/p/ibus/
 Source0:    http://mfabian.fedorapeople.org/ibus-table/%{name}-%{version}.tar.gz
 Patch1:     ibus-table-1.3.9.20110827-uppercase-umlauts.patch
+Patch2:     ibus-table-1.4.99.20120907-improve-chinese-category-check.patch
 
 Requires:       ibus > 1.3.0
 BuildRequires:  ibus-devel > 1.3.0
@@ -30,6 +31,7 @@ Development files for %{name}.
 %prep
 %setup -q
 %patch1 -p1 -b .uppercase-umlauts
+%patch2 -p1 -b .improve-chinese-category-check
 
 %build
 %configure --disable-static --disable-additional
@@ -100,6 +102,18 @@ Development files for %{name}.
 %{_datadir}/pkgconfig/%{name}.pc
 
 %changelog
+* Tue Sep 11 2012 Mike FABIAN <mfabian at redhat.com> - 1.4.99.20120907-2
+- Resolves: #856320
+- Improve check whether a phrase is simplified or traditional Chinese
+  The improvement is to ignore all non-Han characters when
+  doing the check.
+  This is to avoid classifying a simplified Chinese string as
+  traditional just because it happens to include some non-Chinese
+  characters, for example box drawing characters, which cannot be
+  converted to gb2312 but happen to be convertible to big5hkscs.
+  This fixes the problem in the emoji-table input method that most
+  phrases cannot be input at all.
+
 * Fri Sep 07 2012 Mike FABIAN <mfabian at redhat.com> - 1.4.99.20120907-1
 - Relates: #855250
 - see comment#1 in #855250