[libkkc] Update to 0.3.5-2

Daiki Ueno ueno at fedoraproject.org
Tue Feb 17 06:02:01 UTC 2015


commit 8639facc18df3952fc98d5c067eb44e9c0534b3c
Author: Daiki Ueno <dueno at redhat.com>
Date:   Tue Feb 17 15:01:46 2015 +0900

    Update to 0.3.5-2

 libkkc-try-all.patch |  211 ++++++++++++++++++++++++++++++++++++++++++++++++++
 libkkc.spec          |    7 ++-
 2 files changed, 217 insertions(+), 1 deletions(-)
---
diff --git a/libkkc-try-all.patch b/libkkc-try-all.patch
new file mode 100644
index 0000000..60a5b67
--- /dev/null
+++ b/libkkc-try-all.patch
@@ -0,0 +1,211 @@
+From 1f512da81a71287b13eb0a1f9b31830b16db2107 Mon Sep 17 00:00:00 2001
+From: Daiki Ueno <ueno at gnu.org>
+Date: Tue, 17 Feb 2015 14:47:17 +0900
+Subject: [PATCH] template: Try all possible okuri-gana combinations
+
+Reported by ricky at burg.in.
+---
+ libkkc/state.vala                      | 29 +++++++++++++++++++++--------
+ libkkc/template.vala                   | 22 +++++++++-------------
+ tests/conversions-segment.json         | 14 +++++++-------
+ tests/conversions-user-dictionary.json |  6 +++---
+ tests/template.vala                    |  2 +-
+ 5 files changed, 41 insertions(+), 32 deletions(-)
+
+diff --git a/libkkc/state.vala b/libkkc/state.vala
+index 4ba4c50..7bbf079 100644
+--- a/libkkc/state.vala
++++ b/libkkc/state.vala
+@@ -324,11 +324,14 @@ namespace Kkc {
+                                                 out _candidates)) {
+                 return template.expand (_candidates[0].text);
+             }
+-            template = new OkuriganaTemplate (input);
+-            if (segment_dict.lookup_candidates (template.source,
+-                                                template.okuri,
+-                                                out _candidates)) {
+-                return template.expand (_candidates[0].text);
++            var count = input.char_count ();
++            if (count > 1) {
++                template = new OkuriganaTemplate (input, count - 1);
++                if (segment_dict.lookup_candidates (template.source,
++                                                    template.okuri,
++                                                    out _candidates)) {
++                    return template.expand (_candidates[0].text);
++                }
+             }
+             return null;
+         }
+@@ -385,7 +388,10 @@ namespace Kkc {
+             // 1. Look up candidates from user segment dictionaries.
+             lookup_template (new NumericTemplate (normalized_input), true);
+             lookup_template (new SimpleTemplate (normalized_input), true);
+-            lookup_template (new OkuriganaTemplate (normalized_input), true);
++            for (var i = normalized_input.char_count (); i > 1; i--) {
++                lookup_template (
++                    new OkuriganaTemplate (normalized_input, i - 1), true);
++            }
+ 
+             // 2. Look up the most frequently used unigram from language model.
+             if (normalized_input.char_count () > 1) {
+@@ -405,7 +411,6 @@ namespace Kkc {
+             // 3. Look up candidates from system segment dictionaries.
+             lookup_template (new NumericTemplate (normalized_input), false);
+             lookup_template (new SimpleTemplate (normalized_input), false);
+-            lookup_template (new OkuriganaTemplate (normalized_input), false);
+ 
+             // 4. Do sentence conversion with N-best search.
+ 
+@@ -445,9 +450,17 @@ namespace Kkc {
+                     builder.str);
+                 if (!kana_candidates.contains (sentence))
+                     candidates.add (sentence);
++
++            }
++
++            // 4.3. Look up okuri-ari candidates from system segment
++            // dictionaries, for each possible okurigana combination.
++            for (var i = normalized_input.char_count (); i > 1; i--) {
++                lookup_template (
++                    new OkuriganaTemplate (normalized_input, i - 1), false);
+             }
+ 
+-            // 4.3. Add Kana candidates at the end.
++            // 4.4. Add Kana candidates at the end.
+             candidates.add_all (kana_candidates);
+ 
+             candidates.populated ();
+diff --git a/libkkc/template.vala b/libkkc/template.vala
+index 7768f80..92c9995 100644
+--- a/libkkc/template.vala
++++ b/libkkc/template.vala
+@@ -42,19 +42,15 @@ namespace Kkc {
+ 
+         string? okurigana = null;
+ 
+-        public OkuriganaTemplate (string source) {
+-            var count = source.char_count ();
+-            if (count > 1) {
+-                var last_char_index = source.index_of_nth_char (count - 1);
+-                this.okurigana = source[last_char_index:source.length];
+-                string? prefix = RomKanaUtils.get_okurigana_prefix (
+-                    this.okurigana);
+-                this.source = source[0:last_char_index] + prefix;
+-                this.okuri = true;
+-            } else {
+-                this.source = source;
+-                this.okuri = false;
+-            }
++        public OkuriganaTemplate (string source, int pos) {
++            assert (source.char_count () > 1);
++            assert (0 < pos && pos < source.char_count ());
++
++            var last_char_index = source.index_of_nth_char (pos);
++            this.okurigana = source[last_char_index:source.length];
++            string? prefix = RomKanaUtils.get_okurigana_prefix (this.okurigana);
++            this.source = source[0:last_char_index] + prefix;
++            this.okuri = true;
+         }
+ 
+         public string expand (string text) {
+diff --git a/tests/conversions-segment.json b/tests/conversions-segment.json
+index 63d0b9b..33baadf 100644
+--- a/tests/conversions-segment.json
++++ b/tests/conversions-segment.json
+@@ -122,11 +122,11 @@
+     {
+         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC SPC",
+         "input": "わたしのなまえはなかのです",
+-        "segments": "わたしの名前は中野です",
++        "segments": "渡しの名前は中野です",
+         "segments_size": 3,
+         "segments_cursor_pos": 0,
+         "output": "",
+-        "candidates_size": 4,
++        "candidates_size": 5,
+         "input_cursor_pos": -1
+     },
+     {
+@@ -136,7 +136,7 @@
+         "segments_size": 3,
+         "segments_cursor_pos": 0,
+         "output": "",
+-        "candidates_size": 4,
++        "candidates_size": 5,
+         "input_cursor_pos": -1
+     },
+     {
+@@ -152,17 +152,17 @@
+     {
+         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC SPC Right",
+         "input": "わたしのなまえはなかのです",
+-        "segments": "わたしの名前は中野です",
++        "segments": "渡しの名前は中野です",
+         "segments_size": 3,
+         "segments_cursor_pos": 1,
+         "output": "",
+-        "candidates_size": 4,
++        "candidates_size": 5,
+         "input_cursor_pos": -1
+     },
+     {
+         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC SPC Right SPC",
+         "input": "わたしのなまえはなかのです",
+-        "segments": "わたしのなまえは中野です",
++        "segments": "渡しのなまえは中野です",
+         "segments_size": 3,
+         "segments_cursor_pos": 1,
+         "output": "",
+@@ -172,7 +172,7 @@
+     {
+         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC SPC Right SPC SPC",
+         "input": "わたしのなまえはなかのです",
+-        "segments": "わたしのナマエハ中野です",
++        "segments": "渡しのナマエハ中野です",
+         "segments_size": 3,
+         "segments_cursor_pos": 1,
+         "output": "",
+diff --git a/tests/conversions-user-dictionary.json b/tests/conversions-user-dictionary.json
+index 6c52df5..c5ddace 100644
+--- a/tests/conversions-user-dictionary.json
++++ b/tests/conversions-user-dictionary.json
+@@ -29,12 +29,12 @@
+         "segments": "",
+         "segments_size": 0,
+         "segments_cursor_pos": -1,
+-        "output": "わたしの名前はなかのです"
++        "output": "渡しの名前はなかのです"
+     },
+     {
+         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC",
+         "input": "わたしのなまえはなかのです",
+-        "segments": "わたしの名前はなかのです",
++        "segments": "渡しの名前はなかのです",
+         "segments_size": 2,
+         "segments_cursor_pos": 0,
+         "output": ""
+@@ -42,7 +42,7 @@
+     {
+         "keys": "w a t a s h i n o n a m a e h a n a k a n o d e s u SPC Right SPC Right Right SPC",
+         "input": "わたしのなまえはなかのです",
+-        "segments": "わたしのなまえはなかのです",
++        "segments": "渡しのなまえはなかのです",
+         "segments_size": 2,
+         "segments_cursor_pos": 1,
+         "output": ""
+diff --git a/tests/template.vala b/tests/template.vala
+index 1f8fb5e..5900cd1 100644
+--- a/tests/template.vala
++++ b/tests/template.vala
+@@ -16,7 +16,7 @@ class TemplateTests : Kkc.TestCase {
+         assert (source == "source");
+         assert (!okuri);
+ 
+-        template = new Kkc.OkuriganaTemplate ("かう");
++        template = new Kkc.OkuriganaTemplate ("かう", 1);
+         template.get ("source", out source,
+                       "okuri", out okuri);
+ 
+-- 
+2.1.0
+
diff --git a/libkkc.spec b/libkkc.spec
index 85396e7..5296b7a 100644
--- a/libkkc.spec
+++ b/libkkc.spec
@@ -2,7 +2,7 @@
 
 Name:		libkkc
 Version:	0.3.5
-Release:	1%{?dist}
+Release:	2%{?dist}
 Summary:	Japanese Kana Kanji conversion library
 
 License:	GPLv3+
@@ -10,6 +10,7 @@ Group:		System Environment/Libraries
 URL:		https://github.com/ueno/libkkc
 Source0:	https://github.com/ueno/libkkc/releases/download/v%{version}/%{name}-%{version}.tar.gz
 #Patch0:		libkkc-HEAD.patch
+Patch1:		libkkc-try-all.patch
 
 BuildRequires:	marisa-devel
 BuildRequires:	vala
@@ -64,6 +65,7 @@ The %{name}-common package contains the arch-independent data that
 %prep
 %setup -q
 #patch0 -p1 -b .HEAD
+%patch1 -p1 -b .try-all
 
 
 %build
@@ -109,6 +111,9 @@ find $RPM_BUILD_ROOT -name '*.la' -exec rm -f {} ';'
 
 
 %changelog
+* Tue Feb 17 2015 Daiki Ueno <dueno at redhat.com> - 0.3.5-2
+- apply libkkc-try-all.patch for better candidate list
+
 * Fri Dec 19 2014 Daiki Ueno <dueno at redhat.com> - 0.3.5-1
 - new upstream release
 - switch upstream source location to Github


More information about the scm-commits mailing list