[opengrm-ngram] Initial import.

Jerry James jjames at fedoraproject.org
Fri Dec 21 19:43:03 UTC 2012


commit 1f70ee64ec565ac165b5f085b8a6458b106d2421
Author: Jerry James <jamesjer at betterlinux.com>
Date:   Fri Dec 21 12:42:48 2012 -0700

    Initial import.

 .gitignore                  |    2 +
 opengrm-ngram-getpid.patch  |   10 ++++
 opengrm-ngram-warning.patch |   66 +++++++++++++++++++++++++++
 opengrm-ngram.spec          |  103 +++++++++++++++++++++++++++++++++++++++++++
 sources                     |    2 +
 5 files changed, 183 insertions(+), 0 deletions(-)
---
diff --git a/.gitignore b/.gitignore
index e69de29..1c0b112 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/opengrm-ngram-1.0.3.tar.gz
+/opengrm-ngram-man.tar.xz
diff --git a/opengrm-ngram-getpid.patch b/opengrm-ngram-getpid.patch
new file mode 100644
index 0000000..9890e28
--- /dev/null
+++ b/opengrm-ngram-getpid.patch
@@ -0,0 +1,10 @@
+--- ./src/include/ngram/ngram-randgen.h.orig	2012-02-13 07:45:55.000000000 -0700
++++ ./src/include/ngram/ngram-randgen.h	2012-12-18 08:47:55.296955430 -0700
+@@ -35,6 +35,7 @@
+ 
+ #include <fst/fst.h>
+ #include <fst/randgen.h>
++#include <unistd.h>
+ 
+ namespace ngram {
+ 
diff --git a/opengrm-ngram-warning.patch b/opengrm-ngram-warning.patch
new file mode 100644
index 0000000..8919b4e
--- /dev/null
+++ b/opengrm-ngram-warning.patch
@@ -0,0 +1,66 @@
+--- ./src/bin/ngramapply.cc.orig	2012-03-26 07:45:53.000000000 -0600
++++ ./src/bin/ngramapply.cc	2012-12-18 09:05:55.252919497 -0700
+@@ -63,8 +63,10 @@ int main(int argc, char **argv) {
+     type = EPS;
+   else if (FLAGS_bo_arc_type == "lexicographic")
+     type = LEX_EPS;
+-  else
++  else {
+     LOG(FATAL) << "Unknown backoff arc type: " << FLAGS_bo_arc_type;
++    __builtin_unreachable();
++  }
+ 
+   // TODO(rws): This is temporary to avoid issues having to do with
+   // symbol table compatibility. At some point we need to sanitize all
+--- ./src/lib/ngram-output.cc.orig	2012-03-09 12:37:08.000000000 -0700
++++ ./src/lib/ngram-output.cc	2012-12-18 09:06:26.268384488 -0700
+@@ -479,14 +479,11 @@ void NGramOutput::RandNGramModel(int64 s
+   for (int i = 0; i < 1000; ++i)  // a bit of burn-in
+     rand(); // TODO(riley): huh?
+   for (int sample = 0; sample < samples; ++sample) {
+-    StateId st = GetFst().Start(), hi_state = -1, nextstate;
+-    Label lbl;
++    StateId st = GetFst().Start(), hi_state = -1;
+     bool first_printed = true;
+     while (st >= 0) {
+       double r = rand()/(RAND_MAX + 1.0);
+       double p = SetInitRandProb(hi_state, st, &r);
+-      nextstate = -1;
+-      lbl = -1;
+       st = GetAndShowSymbol(st, p, r, &hi_state,
+ 			    &first_printed, show_backoff);
+     }
+--- ./src/include/ngram/ngram-input.h.orig	2012-03-09 12:47:04.000000000 -0700
++++ ./src/include/ngram/ngram-input.h	2012-12-18 09:05:55.253919512 -0700
+@@ -271,6 +271,7 @@ class NGramInput {
+        return barc.nextstate;
+      } else {
+        LOG(FATAL) << "NGramInput: Lower order prefix n-gram not found: ";
++       __builtin_unreachable();
+      }
+    }
+ 
+@@ -295,6 +296,7 @@ class NGramInput {
+        return ngram_counter->NGramStartState();
+      } else if (endsym) {  // end symbol </s>
+        LOG(FATAL) << "NGramInput: stop symbol occurred in n-gram prefix";
++       __builtin_unreachable();
+      } else {
+        ssize_t arc_id = ngram_counter->FindArc(st, label);
+        return ngram_counter->NGramNextState(arc_id);
+@@ -477,6 +479,7 @@ class NGramInput {
+      }
+      if (!matcher.Find(0)) {
+        LOG(FATAL) << "NGramInput: No backoff probability";
++       __builtin_unreachable();
+      }
+      for (; !matcher.Done(); matcher.Next()) {
+        StdArc arc = matcher.Value();
+@@ -485,6 +488,7 @@ class NGramInput {
+        }
+      }
+      LOG(FATAL) << "NGramInput: No backoff arc found";
++     __builtin_unreachable();
+    }
+ 
+    // Descends backoff arcs to find backoff final cost and set
diff --git a/opengrm-ngram.spec b/opengrm-ngram.spec
new file mode 100644
index 0000000..51e963c
--- /dev/null
+++ b/opengrm-ngram.spec
@@ -0,0 +1,103 @@
+Name:           opengrm-ngram
+Version:        1.0.3
+Release:        1%{?dist}
+Summary:        Library for making and modifying n-gram language models
+
+License:        ASL 2.0
+URL:            http://www.opengrm.org/
+Source0:        http://www.openfst.org/twiki/pub/GRM/NGramDownload/%{name}-%{version}.tar.gz
+# Man pages written by Jerry James.  The text of the man pages is lifted from
+# the sources, so it is under the same copyright and license.  In short, I
+# contributed the formatting, but none of the text.
+Source1:        %{name}-man.tar.xz
+# This patch will not be sent upstream, as it is gcc-specific.  Squelch
+# warnings that may indicate miscompiled code.
+Patch0:         %{name}-warning.patch
+# Patch sent upstream 18 Dec 2012.  Get a missing prototype for getpid().
+Patch1:         %{name}-getpid.patch
+
+BuildRequires:  gsl-devel
+BuildRequires:  openfst-devel
+BuildRequires:  openfst-tools
+
+%description
+The OpenGrm NGram library is used for making and modifying n-gram
+language models encoded as weighted finite-state transducers (FSTs).  It
+makes use of functionality in the OpenFst library to create, access and
+manipulate n-gram models.  Operations for counting, smoothing, pruning,
+applying, and evaluating models are among those provided.
+
+%package devel
+Summary:        Development files for OpenGrm NGram
+Group:          Development/Libraries
+Requires:       %{name}%{?_isa} = %{version}-%{release}
+Requires:       gsl-devel%{?_isa}, openfst-devel%{?_isa}
+
+%description devel
+This package includes the necessary files to develop systems with the
+OpenGrm NGram library.
+
+%package tools
+Summary:        Command-line tools for working with n-gram language models
+Group:          Applications/Multimedia
+Requires:       %{name}%{?_isa} = %{version}-%{release}
+
+%description tools
+This package contains command-line tools that give access to OpenGrm
+NGram library functionality.
+
+%prep
+%setup -q
+%setup -q -T -D -a 1
+%patch0
+%patch1
+
+%build
+%configure CXXFLAGS="%{optflags} -DHAVE_GSL" LIBS="-lfst -lgsl"
+
+# Get rid of undesirable hardcoded rpaths; also workaround libtool reordering
+# -Wl,--as-needed after all the libraries.
+sed -e 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' \
+    -e 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' \
+    -e 's|CC="g..|& -Wl,--as-needed|' \
+    -i libtool
+
+make %{?_smp_mflags}
+
+%install
+%make_install
+
+# Remove libtool archives
+rm -f %{buildroot}%{_libdir}/*.la
+
+# Install the man pages
+mkdir -p %{buildroot}%{_mandir}/man1
+cd man
+for f in *.1; do
+  sed -e "s/@VERSION@/%{version}/" $f > %{buildroot}%{_mandir}/man1/$f
+  touch -r $f %{buildroot}%{_mandir}/man1/$f
+done
+cd ..
+
+%check
+LD_LIBRARY_PATH=$PWD/src/lib/.libs make check
+
+%post -p /sbin/ldconfig
+
+%postun -p /sbin/ldconfig
+
+%files
+%doc AUTHORS COPYING NEWS README
+%{_libdir}/*.so.*
+
+%files devel
+%{_includedir}/ngram/
+%{_libdir}/*.so
+
+%files tools
+%{_bindir}/*
+%{_mandir}/man1/*
+
+%changelog
+* Tue Dec 18 2012 Jerry James <loganjerry at gmail.com> - 1.0.3-1
+- Initial RPM
diff --git a/sources b/sources
index e69de29..3309189 100644
--- a/sources
+++ b/sources
@@ -0,0 +1,2 @@
+03bc3e03b7106b3f135a65838324e7a9  opengrm-ngram-1.0.3.tar.gz
+fc429541e290ac9bd66c4391bf23430f  opengrm-ngram-man.tar.xz


More information about the scm-commits mailing list