[strigi] backport upstream patches (as of 20120626)
Rex Dieter
rdieter at fedoraproject.org
Tue Jun 26 13:44:12 UTC 2012
commit e397d92c88f07115beb725ac3f4633a10e0f9854
Author: Rex Dieter <rdieter at fedoraproject.org>
Date: Tue Jun 26 08:47:20 2012 -0500
backport upstream patches (as of 20120626)
...pmfix.patch => 0001-Fix-xpm-and-xbm-index.patch | 18 +-
0001-Minor.-Fix-grammar-typo-in-cmake-output.patch | 26 +
...cknumber-and-track-count-from-a-value-lik.patch | 131 ++++
...nistd.h-header-required-unconditionally-f.patch | 27 +
0003-Fix-return-value-wrong-type.patch | 54 ++
0003-Fixed-indexing-of-m3u-files.patch | 114 ++++
...C-Files-Remove-addtional-db-in-replaygain.patch | 85 +++
...nalizer-was-importing-only-one-artist-tag.patch | 103 +++
...-numeric-genres-in-id3-v2-mp3-are-ignored.patch | 666 +++++++++++++++++++
...g-a-wrong-commited-file-id3endanalyzer.cp.patch | 699 ++++++++++++++++++++
...-of-genre-field-in-id3v2-tags-and-clean-c.patch | 112 ++++
strigi-0.7.7-gcc47.patch | 13 -
strigi.spec | 40 +-
13 files changed, 2061 insertions(+), 27 deletions(-)
---
diff --git a/strigi-0.7.7-xpmfix.patch b/0001-Fix-xpm-and-xbm-index.patch
similarity index 85%
rename from strigi-0.7.7-xpmfix.patch
rename to 0001-Fix-xpm-and-xbm-index.patch
index 5a61cd3..b199ea2 100644
--- a/strigi-0.7.7-xpmfix.patch
+++ b/0001-Fix-xpm-and-xbm-index.patch
@@ -1,10 +1,13 @@
-commit 35cf4a4818a0d1bc7cda07e29f271360e06443a0
-Author: Weng Xuetian <wengxt at gmail.com>
-Date: Mon Dec 12 09:12:35 2011 +0000
+From 35cf4a4818a0d1bc7cda07e29f271360e06443a0 Mon Sep 17 00:00:00 2001
+From: Weng Xuetian <wengxt at gmail.com>
+Date: Mon, 12 Dec 2011 09:12:35 +0000
+Subject: [PATCH 1/8] Fix xpm and xbm index.
- Fix xpm and xbm index.
-
- REVIEW: 103368
+REVIEW: 103368
+---
+ plugins/lineplugins/xpmlineanalyzer.cpp | 10 +++++-----
+ plugins/throughplugins/xbmthroughanalyzer.cpp | 2 +-
+ 2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/plugins/lineplugins/xpmlineanalyzer.cpp b/plugins/lineplugins/xpmlineanalyzer.cpp
index 980b2f7..c87a071 100644
@@ -62,3 +65,6 @@ index e926e84..e3f35cc 100644
return in;
}
+--
+1.7.10.4
+
diff --git a/0001-Minor.-Fix-grammar-typo-in-cmake-output.patch b/0001-Minor.-Fix-grammar-typo-in-cmake-output.patch
new file mode 100644
index 0000000..26881c1
--- /dev/null
+++ b/0001-Minor.-Fix-grammar-typo-in-cmake-output.patch
@@ -0,0 +1,26 @@
+From 24484bfb91b89ab5e0ef799252e69212ce94bde8 Mon Sep 17 00:00:00 2001
+From: Shaun Reich <shaun.reich at kdemail.net>
+Date: Fri, 23 Dec 2011 00:03:20 -0500
+Subject: [PATCH 1/3] Minor. Fix (grammar) typo in cmake output.
+
+SVN_SILENT
+---
+ cmake/FindCppUnit.cmake | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/cmake/FindCppUnit.cmake b/cmake/FindCppUnit.cmake
+index adaaeb6..f983e10 100644
+--- a/cmake/FindCppUnit.cmake
++++ b/cmake/FindCppUnit.cmake
+@@ -33,7 +33,7 @@ ELSE(CPPUNIT_INCLUDE_DIR AND CPPUNIT_LIBRARIES)
+ FIND_PATH(CPPUNIT_CFLAGS cppunit/TestRunner.h PATHS /usr/include /usr/local/include )
+ FIND_LIBRARY(CPPUNIT_LIBRARIES NAMES cppunit PATHS /usr/lib /usr/local/lib )
+ # how can we find cppunit version?
+- MESSAGE (STATUS "Ensure you cppunit installed version is at least ${CPPUNIT_MIN_VERSION}")
++ MESSAGE (STATUS "Ensure your cppunit installed version is at least ${CPPUNIT_MIN_VERSION}")
+ SET (CPPUNIT_INSTALLED_VERSION ${CPPUNIT_MIN_VERSION})
+ ENDIF(CPPUNIT_CONFIG_EXECUTABLE)
+
+--
+1.7.10.4
+
diff --git a/0002-Extract-tracknumber-and-track-count-from-a-value-lik.patch b/0002-Extract-tracknumber-and-track-count-from-a-value-lik.patch
new file mode 100644
index 0000000..3a17cba
--- /dev/null
+++ b/0002-Extract-tracknumber-and-track-count-from-a-value-lik.patch
@@ -0,0 +1,131 @@
+From 92df13d9b5fab8259a85315eb2c277546d20d4a6 Mon Sep 17 00:00:00 2001
+From: Sebastian Trueg <trueg at kde.org>
+Date: Fri, 10 Feb 2012 13:31:31 +0100
+Subject: [PATCH 2/8] Extract tracknumber and track count from a value like
+ "03/16".
+
+REVIEW: 103911
+---
+ lib/endanalyzers/flacendanalyzer.cpp | 16 +++++++++++--
+ lib/throughanalyzers/oggthroughanalyzer.cpp | 33 ++++++++++++++++++++++-----
+ 2 files changed, 41 insertions(+), 8 deletions(-)
+
+diff --git a/lib/endanalyzers/flacendanalyzer.cpp b/lib/endanalyzers/flacendanalyzer.cpp
+index c043872..8877d1d 100644
+--- a/lib/endanalyzers/flacendanalyzer.cpp
++++ b/lib/endanalyzers/flacendanalyzer.cpp
+@@ -52,6 +52,8 @@ const string
+ NMM_DRAFT "musicBrainzAlbumID"),
+ discNumberPropertyName(
+ NMM_DRAFT "setNumber"),
++ albumTrackCountName(
++ NMM_DRAFT "albumTrackCount"),
+
+ musicClassName(
+ NMM_DRAFT "MusicPiece"),
+@@ -226,7 +228,17 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ const string value(p2+eq+1, size-eq-1);
+
+ if (iter != factory->fields.end()) {
+- indexable.addValue(iter->second, value);
++ // Hack: the tracknumber sometimes contains the track count, too
++ int pos = 0;
++ if(name=="tracknumber" && (pos = value.find_first_of('/')) > 0 ) {
++ // the track number
++ indexable.addValue(iter->second, value.substr(0, pos));
++ // the track count
++ addStatement(indexable, albumUri, albumTrackCountName, value.substr(pos+1));
++ }
++ else {
++ indexable.addValue(iter->second, value);
++ }
+ } else if(name=="artist") {
+ artist = value;
+ } else if(name=="lyrics") {
+@@ -261,7 +273,7 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ indexable.addTriplet(publisherUri, fullnamePropertyName, value);
+ } else if(name=="performer") {
+ performer = value;
+- }
++ }
+ }
+ } else {
+ m_error = "problem with tag size";
+diff --git a/lib/throughanalyzers/oggthroughanalyzer.cpp b/lib/throughanalyzers/oggthroughanalyzer.cpp
+index 26faa15..741b28e 100644
+--- a/lib/throughanalyzers/oggthroughanalyzer.cpp
++++ b/lib/throughanalyzers/oggthroughanalyzer.cpp
+@@ -37,6 +37,8 @@ const string
+ "http://www.semanticdesktop.org/ontologies/2007/03/22/nco#fullname"),
+ titlePropertyName(
+ "http://www.semanticdesktop.org/ontologies/2007/01/19/nie#title"),
++ albumTrackCountName(
++ NMM_DRAFT "albumTrackCount"),
+
+ musicClassName(
+ NMM_DRAFT "MusicPiece"),
+@@ -68,6 +70,14 @@ OggThroughAnalyzerFactory::registerFields(FieldRegister& r) {
+ fields["type"] = r.typeField;
+ }
+
++inline
++void
++addStatement(AnalysisResult* indexable, string& subject, const string& predicate, const string& object) {
++ if (subject.empty())
++ subject = indexable->newAnonymousUri();
++ indexable->addTriplet(subject, predicate, object);
++}
++
+ void
+ OggThroughAnalyzer::setIndexable(AnalysisResult* i) {
+ indexable = i;
+@@ -129,6 +139,7 @@ OggThroughAnalyzer::connectInputStream(InputStream* in) {
+ // but for the composer in calssical music. Thus, we cache both and make the decision
+ // at the end
+ string artist, performer;
++ string albumUri;
+
+ // read all the comments
+ p2 += 4;
+@@ -151,15 +162,21 @@ OggThroughAnalyzer::connectInputStream(InputStream* in) {
+ = factory->fields.find(name);
+ string value(p2+eq+1, size-eq-1);
+ if (iter != factory->fields.end()) {
+- indexable->addValue(iter->second, value);
++ // Hack: the tracknumber sometimes contains the track count, too
++ int pos = 0;
++ if(name=="tracknumber" && (pos = value.find_first_of('/')) > 0 ) {
++ // the track number
++ indexable->addValue(iter->second, value.substr(0, pos));
++ // the track count
++ addStatement(indexable, albumUri, albumTrackCountName, value.substr(pos+1));
++ }
++ else {
++ indexable->addValue(iter->second, value);
++ }
+ } else if(name=="artist") {
+ artist = value;
+ } else if(name=="album") {
+- string albumUri = indexable->newAnonymousUri();
+-
+- indexable->addValue(factory->albumField, albumUri);
+- indexable->addTriplet(albumUri, typePropertyName, albumClassName);
+- indexable->addTriplet(albumUri, titlePropertyName, value);
++ addStatement(indexable, albumUri, titlePropertyName, value);
+ } else if(name=="composer") {
+ string composerUri = indexable->newAnonymousUri();
+
+@@ -206,6 +223,10 @@ OggThroughAnalyzer::connectInputStream(InputStream* in) {
+ indexable->addTriplet(performerUri, typePropertyName, contactClassName);
+ indexable->addTriplet(performerUri, fullnamePropertyName, performer);
+ }
++ if(!albumUri.empty()) {
++ indexable->addValue(factory->albumField, albumUri);
++ indexable->addTriplet(albumUri, typePropertyName, albumClassName);
++ }
+
+ // set the "codec" value
+ indexable->addValue(factory->fields.find("codec")->second, "Ogg/Vorbis");
+--
+1.7.10.4
+
diff --git a/0002-gcc47-fix-unistd.h-header-required-unconditionally-f.patch b/0002-gcc47-fix-unistd.h-header-required-unconditionally-f.patch
new file mode 100644
index 0000000..b6f9d1e
--- /dev/null
+++ b/0002-gcc47-fix-unistd.h-header-required-unconditionally-f.patch
@@ -0,0 +1,27 @@
+From a8e97f672325557b3fbc84b987299350ec5ac10b Mon Sep 17 00:00:00 2001
+From: Rex Dieter <rdieter at fedoraproject.org>
+Date: Mon, 9 Jan 2012 11:13:25 -0600
+Subject: [PATCH 2/3] gcc47 fix, unistd.h header required unconditionally for
+ 'sleep'
+
+---
+ bin/daemon/eventlistener/eventlistenerqueue.cpp | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/bin/daemon/eventlistener/eventlistenerqueue.cpp b/bin/daemon/eventlistener/eventlistenerqueue.cpp
+index 9fcdcb1..1e97599 100644
+--- a/bin/daemon/eventlistener/eventlistenerqueue.cpp
++++ b/bin/daemon/eventlistener/eventlistenerqueue.cpp
+@@ -27,9 +27,7 @@
+ #include <iostream>
+ #include <stdio.h>
+
+-#if defined(__SUNPRO_CC)
+ #include <unistd.h>
+-#endif
+
+ using namespace std;
+
+--
+1.7.10.4
+
diff --git a/0003-Fix-return-value-wrong-type.patch b/0003-Fix-return-value-wrong-type.patch
new file mode 100644
index 0000000..b6c129a
--- /dev/null
+++ b/0003-Fix-return-value-wrong-type.patch
@@ -0,0 +1,54 @@
+From 1a291699d17fab42aa35505093e09efab8141eb2 Mon Sep 17 00:00:00 2001
+From: Christoph Feck <christoph at maxiom.de>
+Date: Wed, 21 Mar 2012 23:36:56 +0100
+Subject: [PATCH 3/3] Fix return value (wrong type)
+
+---
+ bin/daemon/dbus/dbustest.cpp | 2 +-
+ bin/daemon/socketserver.cpp | 6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/bin/daemon/dbus/dbustest.cpp b/bin/daemon/dbus/dbustest.cpp
+index 4f86376..c065f1a 100644
+--- a/bin/daemon/dbus/dbustest.cpp
++++ b/bin/daemon/dbus/dbustest.cpp
+@@ -67,7 +67,7 @@ serverthread(void*) {
+ }
+ if (DBUS_REQUEST_NAME_REPLY_PRIMARY_OWNER != ret) {
+ fprintf(stderr, "Not Primary Owner (%d)\n", ret);
+- return false;
++ return 0;
+ }
+
+ for (int i=0; i<10; ++i) {
+diff --git a/bin/daemon/socketserver.cpp b/bin/daemon/socketserver.cpp
+index 7212b0d..777831e 100644
+--- a/bin/daemon/socketserver.cpp
++++ b/bin/daemon/socketserver.cpp
+@@ -54,7 +54,7 @@ SocketServer::run(void*) {
+ sd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if(sd < 0) {
+ perror("cannot open socket ");
+- return false;
++ return 0;
+ }
+
+ /* set the address */
+@@ -67,12 +67,12 @@ SocketServer::run(void*) {
+ sock.sun_family = AF_UNIX;
+ if (bind(sd, (struct sockaddr *)&sock, sizeof(sock))<0) {
+ perror("cannot bind port ");
+- return false;
++ return 0;
+ }
+
+ if (::listen(sd, 5) < 0) {
+ perror("cannot listen to port");
+- return false;
++ return 0;
+ }
+
+ while (interface->isActive()) {
+--
+1.7.10.4
+
diff --git a/0003-Fixed-indexing-of-m3u-files.patch b/0003-Fixed-indexing-of-m3u-files.patch
new file mode 100644
index 0000000..3d79631
--- /dev/null
+++ b/0003-Fixed-indexing-of-m3u-files.patch
@@ -0,0 +1,114 @@
+From ee49b5e1a8065ae7823c1ee091ace2e7741059e9 Mon Sep 17 00:00:00 2001
+From: Sebastian Trueg <trueg at kde.org>
+Date: Mon, 13 Feb 2012 09:20:47 +0100
+Subject: [PATCH 3/8] Fixed indexing of m3u files.
+
+The analyzer now constructs absolute paths to the containing files and
+creates nie:links relations to them. nie:hasLogicalPart does not make
+much sense since the audio files are not actually contained in the
+playlist, they are just referenced. NFO specifies the usage of rdf:list.
+However, that is not supported in Nepomuk at the moment. Thus, we
+cannot put any order on the linked audio files yet.
+
+REVIEW: 103961
+---
+ lib/lineanalyzers/m3ustreamanalyzer.cpp | 33 ++++++++++++++++++++++++++-----
+ lib/lineanalyzers/m3ustreamanalyzer.h | 4 ++++
+ 2 files changed, 32 insertions(+), 5 deletions(-)
+
+diff --git a/lib/lineanalyzers/m3ustreamanalyzer.cpp b/lib/lineanalyzers/m3ustreamanalyzer.cpp
+index dacc15a..c20b28a 100644
+--- a/lib/lineanalyzers/m3ustreamanalyzer.cpp
++++ b/lib/lineanalyzers/m3ustreamanalyzer.cpp
+@@ -26,13 +26,16 @@
+ #include <string>
+ #include <cstring>
+
++#include <unistd.h>
++#include <stdlib.h>
++
+ // AnalyzerFactory
+ void M3uLineAnalyzerFactory::registerFields(Strigi::FieldRegister& reg)
+ {
+ // track list length is easily obtained via API
+ // tracksField = reg.registerField();
+ trackPathField = reg.registerField(
+- "http://www.semanticdesktop.org/ontologies/2007/01/19/nie#hasLogicalPart");
++ "http://www.semanticdesktop.org/ontologies/2007/01/19/nie#links");
+ m3uTypeField = reg.registerField(
+ "http://freedesktop.org/standards/xesam/1.0/core#formatSubtype");
+ typeField = reg.typeField;
+@@ -43,7 +46,7 @@ void M3uLineAnalyzerFactory::registerFields(Strigi::FieldRegister& reg)
+ }
+
+ // Analyzer
+-void M3uLineAnalyzer::startAnalysis(Strigi::AnalysisResult* i)
++void M3uLineAnalyzer::startAnalysis(Strigi::AnalysisResult* i)
+ {
+ extensionOk = i->extension() == "m3u" || i->extension() == "M3U";
+
+@@ -52,7 +55,24 @@ void M3uLineAnalyzer::startAnalysis(Strigi::AnalysisResult* i)
+ count = 0;
+ }
+
+-void M3uLineAnalyzer::handleLine(const char* data, uint32_t length)
++std::string M3uLineAnalyzer::constructAbsolutePath(const std::string &relative) const
++{
++ if(char* buf = realpath(analysisResult->path().c_str(), 0)) {
++#ifdef _WIN32
++ static const char s_pathSeparator = '\\';
++#else
++ static const char s_pathSeparator = '/';
++#endif
++ std::string path(buf);
++ free(buf);
++ return path.substr(0, path.rfind(s_pathSeparator)+1) + relative;
++ }
++ else {
++ return std::string();
++ }
++}
++
++void M3uLineAnalyzer::handleLine(const char* data, uint32_t length)
+ {
+ if (!extensionOk)
+ return;
+@@ -68,8 +88,11 @@ void M3uLineAnalyzer::handleLine(const char* data, uint32_t length)
+ //if (line == 1)
+ // analysisResult->addValue(factory->m3uTypeField, "simple");
+
+- // TODO: Check for a valid url with QUrl
+- analysisResult->addValue(factory->trackPathField, std::string(data, length));
++ // we create absolute paths and drop links to non-existing files
++ const std::string path = constructAbsolutePath(std::string(data, length));
++ if(!access(path.c_str(), F_OK)) {
++ analysisResult->addValue(factory->trackPathField, path);
++ }
+
+ ++count;
+ } else if (line == 1 && strncmp(data, "#EXTM3U", 7) == 0) {
+diff --git a/lib/lineanalyzers/m3ustreamanalyzer.h b/lib/lineanalyzers/m3ustreamanalyzer.h
+index 461def3..9033f14 100644
+--- a/lib/lineanalyzers/m3ustreamanalyzer.h
++++ b/lib/lineanalyzers/m3ustreamanalyzer.h
+@@ -26,6 +26,8 @@
+ #include <strigi/analyzerplugin.h>
+ #include <strigi/streamlineanalyzer.h>
+
++#include <string>
++
+ class M3uLineAnalyzerFactory;
+
+ class M3uLineAnalyzer : public Strigi::StreamLineAnalyzer
+@@ -37,6 +39,8 @@ private:
+ bool extensionOk;
+ int32_t count;
+
++ std::string constructAbsolutePath(const std::string& relative) const;
++
+ public:
+ M3uLineAnalyzer(const M3uLineAnalyzerFactory* f) : factory(f) {}
+ ~M3uLineAnalyzer() {}
+--
+1.7.10.4
+
diff --git a/0004-Fix-FLAC-Files-Remove-addtional-db-in-replaygain.patch b/0004-Fix-FLAC-Files-Remove-addtional-db-in-replaygain.patch
new file mode 100644
index 0000000..ca18bf4
--- /dev/null
+++ b/0004-Fix-FLAC-Files-Remove-addtional-db-in-replaygain.patch
@@ -0,0 +1,85 @@
+From 0fb8aee587436663cbb60bf743882c70954f8683 Mon Sep 17 00:00:00 2001
+From: Vishesh Handa <handa.vish at gmail.com>
+Date: Tue, 14 Feb 2012 21:50:13 +0530
+Subject: [PATCH 4/8] Fix FLAC Files: Remove addtional 'db' in replaygain
+
+FLAC files have a parameter named "replaygain" which is in the format "<num> dB",
+and because of that Nepomuk cannot parse it as a float.
+
+REVIEW: 103977
+---
+ lib/endanalyzers/flacendanalyzer.cpp | 41 ++++++++++++++++++++++------------
+ 1 file changed, 27 insertions(+), 14 deletions(-)
+
+diff --git a/lib/endanalyzers/flacendanalyzer.cpp b/lib/endanalyzers/flacendanalyzer.cpp
+index 8877d1d..7e34270 100644
+--- a/lib/endanalyzers/flacendanalyzer.cpp
++++ b/lib/endanalyzers/flacendanalyzer.cpp
+@@ -124,6 +124,16 @@ addStatement(AnalysisResult& indexable, string& subject, const string& predicate
+ indexable.addTriplet(subject, predicate, object);
+ }
+
++string
++removeAlphabets(const string& str) {
++ std::string newStr;
++ newStr.reserve(str.length());
++ for( int i=0; i<str.length(); i++ )
++ if( !isalpha(str[i]) )
++ newStr.push_back( str[i] );
++ return newStr;
++}
++
+ signed char
+ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* in) {
+ if(!in)
+@@ -225,20 +235,22 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ // check if we can handle this field and if so handle it
+ map<string, const RegisteredField*>::const_iterator iter
+ = factory->fields.find(name);
+- const string value(p2+eq+1, size-eq-1);
++ string value(p2+eq+1, size-eq-1);
+
+ if (iter != factory->fields.end()) {
+- // Hack: the tracknumber sometimes contains the track count, too
+- int pos = 0;
+- if(name=="tracknumber" && (pos = value.find_first_of('/')) > 0 ) {
+- // the track number
+- indexable.addValue(iter->second, value.substr(0, pos));
+- // the track count
+- addStatement(indexable, albumUri, albumTrackCountName, value.substr(pos+1));
+- }
+- else {
+- indexable.addValue(iter->second, value);
+- }
++ // Hack: the tracknumber sometimes contains the track count, too
++ int pos = 0;
++ if(name=="tracknumber" && (pos = value.find_first_of('/')) > 0 ) {
++ // the track number
++ indexable.addValue(iter->second, value.substr(0, pos));
++ // the track count
++ addStatement(indexable, albumUri, albumTrackCountName, value.substr(pos+1));
++ }
++ else {
++ if(name == "replaygain_track_gain")
++ value = removeAlphabets(value);
++ indexable.addValue(iter->second, value);
++ }
+ } else if(name=="artist") {
+ artist = value;
+ } else if(name=="lyrics") {
+@@ -256,9 +268,10 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ } else if(name=="trackcount") {
+ addStatement(indexable, albumUri, albumTrackCountPropertyName, value);
+ } else if(name=="replaygain_album_gain") {
+- addStatement(indexable, albumUri, albumGainPropertyName, value);
++ // the gain is often in the form "number dB", the appending "dB" must be removed
++ addStatement(indexable, albumUri, albumGainPropertyName, removeAlphabets(value));
+ } else if(name=="replaygain_album_peak") {
+- addStatement(indexable, albumUri, albumPeakGainPropertyName, value);
++ addStatement(indexable, albumUri, albumPeakGainPropertyName, removeAlphabets(value));
+ } else if(name=="composer") {
+ const string composerUri( indexable.newAnonymousUri() );
+
+--
+1.7.10.4
+
diff --git a/0005-Fix-flac-analizer-was-importing-only-one-artist-tag.patch b/0005-Fix-flac-analizer-was-importing-only-one-artist-tag.patch
new file mode 100644
index 0000000..7f2849b
--- /dev/null
+++ b/0005-Fix-flac-analizer-was-importing-only-one-artist-tag.patch
@@ -0,0 +1,103 @@
+From f1c837823b6dde8464f46ccb02a2c91eff69bee0 Mon Sep 17 00:00:00 2001
+From: Ignacio Serantes <kde at aynoa.net>
+Date: Thu, 14 Jun 2012 20:15:10 +0200
+Subject: [PATCH 5/8] Fix: flac analizer was importing only one artist tag.
+ REVIEW:105208
+
+---
+ lib/endanalyzers/flacendanalyzer.cpp | 37 +++++++++++++++++++++-------------
+ 1 file changed, 23 insertions(+), 14 deletions(-)
+
+diff --git a/lib/endanalyzers/flacendanalyzer.cpp b/lib/endanalyzers/flacendanalyzer.cpp
+index 7e34270..0a7d680 100644
+--- a/lib/endanalyzers/flacendanalyzer.cpp
++++ b/lib/endanalyzers/flacendanalyzer.cpp
+@@ -30,6 +30,7 @@
+ #include <iostream>
+ #include <cctype>
+ #include <cstring>
++#include <list>
+ using namespace Strigi;
+ using namespace std;
+
+@@ -212,7 +213,7 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ // in Vorbis comments the "artist" field is used for the performer in modern music
+ // but for the composer in calssical music. Thus, we cache both and make the decision
+ // at the end
+- string artist, performer;
++ list<string> artists, performers;
+
+ // read all the comments
+ p2 += 4;
+@@ -252,7 +253,7 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ indexable.addValue(iter->second, value);
+ }
+ } else if(name=="artist") {
+- artist = value;
++ artists.push_back(value);
+ } else if(name=="lyrics") {
+ indexable.addText(value.c_str(),
+ (int32_t)value.length());
+@@ -285,7 +286,7 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ indexable.addTriplet(publisherUri, typePropertyName, contactClassName);
+ indexable.addTriplet(publisherUri, fullnamePropertyName, value);
+ } else if(name=="performer") {
+- performer = value;
++ performers.push_back(value);
+ }
+ }
+ } else {
+@@ -298,8 +299,8 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ // we now decide how to store the artist and performer as suggested by the Vorbis comments spec
+ const Strigi::RegisteredField* artistField = 0;
+ const Strigi::RegisteredField* performerField = 0;
+- if (!artist.empty()) {
+- if (!performer.empty()) {
++ if (!artists.empty()) {
++ if (!performers.empty()) {
+ artistField = factory->composerField;
+ performerField = factory->performerField;
+ }
+@@ -307,22 +308,30 @@ FlacEndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ artistField = factory->performerField;
+ }
+ }
+- else if (!performer.empty()) {
++ else if (!performers.empty()) {
+ performerField = factory->performerField;
+ }
+ if (artistField) {
+- const string artistUri( indexable.newAnonymousUri() );
++ list<string>::iterator aIt;
+
+- indexable.addValue(artistField, artistUri);
+- indexable.addTriplet(artistUri, typePropertyName, contactClassName);
+- indexable.addTriplet(artistUri, fullnamePropertyName, artist);
++ for(aIt=artists.begin(); aIt != artists.end(); ++aIt) {
++ const string artistUri( indexable.newAnonymousUri() );
++
++ indexable.addValue(artistField, artistUri);
++ indexable.addTriplet(artistUri, typePropertyName, contactClassName);
++ indexable.addTriplet(artistUri, fullnamePropertyName, *aIt);
++ }
+ }
+ if (performerField) {
+- const string performerUri( indexable.newAnonymousUri() );
++ list<string>::iterator pIt;
+
+- indexable.addValue(performerField, performerUri);
+- indexable.addTriplet(performerUri, typePropertyName, contactClassName);
+- indexable.addTriplet(performerUri, fullnamePropertyName, performer);
++ for(pIt=performers.begin(); pIt != performers.end(); ++pIt) {
++ const string performerUri( indexable.newAnonymousUri() );
++
++ indexable.addValue(performerField, performerUri);
++ indexable.addTriplet(performerUri, typePropertyName, contactClassName);
++ indexable.addTriplet(performerUri, fullnamePropertyName, *pIt);
++ }
+ }
+
+ if(!albumUri.empty()) {
+--
+1.7.10.4
+
diff --git a/0006-Fix-non-numeric-genres-in-id3-v2-mp3-are-ignored.patch b/0006-Fix-non-numeric-genres-in-id3-v2-mp3-are-ignored.patch
new file mode 100644
index 0000000..6fed939
--- /dev/null
+++ b/0006-Fix-non-numeric-genres-in-id3-v2-mp3-are-ignored.patch
@@ -0,0 +1,666 @@
+From 17e9cdbdee9b3b3cdccab416004a99547a7196a7 Mon Sep 17 00:00:00 2001
+From: Ignacio Serantes <kde at aynoa.net>
+Date: Sun, 17 Jun 2012 20:39:36 +0200
+Subject: [PATCH 6/8] Fix: non numeric genres in id3 v2 mp3 are ignored.
+ REVIEW:105242.
+
+---
+ id3endanalyzer.cpp | 646 ++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 646 insertions(+)
+ create mode 100644 id3endanalyzer.cpp
+
+diff --git a/id3endanalyzer.cpp b/id3endanalyzer.cpp
+new file mode 100644
+index 0000000..677ece0
+--- /dev/null
++++ b/id3endanalyzer.cpp
+@@ -0,0 +1,646 @@
++/* This file is part of Strigi Desktop Search
++ *
++ * Copyright (C) 2006 Jos van den Oever <jos at vandenoever.info>
++ * 2009 Evgeny Egorochkin <phreedom.stdin at gmail.com>
++ *
++ * This library is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Library General Public
++ * License as published by the Free Software Foundation; either
++ * version 2 of the License, or (at your option) any later version.
++ *
++ * This library is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ * Library General Public License for more details.
++ *
++ * You should have received a copy of the GNU Library General Public License
++ * along with this library; see the file COPYING.LIB. If not, write to
++ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
++ * Boston, MA 02110-1301, USA.
++ */
++
++#ifdef HAVE_CONFIG_H
++# include "config.h"
++#endif
++
++#include "id3endanalyzer.h"
++#include "analysisresult.h"
++#include "../rdfnamespaces.h"
++#include <strigi/strigiconfig.h>
++#include <strigi/textutils.h>
++#include <strigi/stringstream.h>
++#include <iostream>
++#include <sstream>
++#include <cstring>
++#include <cstdlib>
++#include <iconv.h>
++
++#ifdef ICONV_SECOND_ARGUMENT_IS_CONST
++ #define ICONV_CONST const
++#else
++ #define ICONV_CONST
++#endif
++
++using namespace Strigi;
++using namespace std;
++
++const string
++ typePropertyName(
++ RDF "type"),
++ fullnamePropertyName(
++ NCO "fullname"),
++ titlePropertyName(
++ NIE "title"),
++ albumTrackCountPropertyName(
++ NMM_DRAFT "albumTrackCount"),
++ discNumberPropertyName(
++ NMM_DRAFT "setNumber"),
++ discCountPropertyName(
++ NMM_DRAFT "setSize"),
++
++ musicClassName(
++ NMM_DRAFT "MusicPiece"),
++ audioClassName(
++ NFO "Audio"),
++ albumClassName(
++ NMM_DRAFT "MusicAlbum"),
++ contactClassName(
++ NCO "Contact");
++
++/*
++ENCA autodetection of broken encodings. First, need to make sure it's going to be actually useful.
++ID3v2.0
++play counter:needs nepomuk resolution
++replaygain
+++lyrics
+++Improve:
++ creation date:
++ language: support multiple
++ Genre
++ album art type handling
++VBR detection
++*/
++
++static const string genres[148] = {
++ "Blues",
++ "Classic Rock",
++ "Country",
++ "Dance",
++ "Disco",
++ "Funk",
++ "Grunge",
++ "Hip-Hop",
++ "Jazz",
++ "Metal",
++ "New Age",
++ "Oldies",
++ "Other",
++ "Pop",
++ "R&B",
++ "Rap",
++ "Reggae",
++ "Rock",
++ "Techno",
++ "Industrial",
++ "Alternative",
++ "Ska",
++ "Death Metal",
++ "Pranks",
++ "Soundtrack",
++ "Euro-Techno",
++ "Ambient",
++ "Trip-Hop",
++ "Vocal",
++ "Jazz+Funk",
++ "Fusion",
++ "Trance",
++ "Classical",
++ "Instrumental",
++ "Acid",
++ "House",
++ "Game",
++ "Sound Clip",
++ "Gospel",
++ "Noise",
++ "Alternative Rock",
++ "Bass",
++ "Soul",
++ "Punk",
++ "Space",
++ "Meditative",
++ "Instrumental Pop",
++ "Instrumental Rock",
++ "Ethnic",
++ "Gothic",
++ "Darkwave",
++ "Techno-Industrial",
++ "Electronic",
++ "Pop-Folk",
++ "Eurodance",
++ "Dream",
++ "Southern Rock",
++ "Comedy",
++ "Cult",
++ "Gangsta",
++ "Top 40",
++ "Christian Rap",
++ "Pop/Funk",
++ "Jungle",
++ "Native American",
++ "Cabaret",
++ "New Wave",
++ "Psychedelic",
++ "Rave",
++ "Showtunes",
++ "Trailer",
++ "Lo-Fi",
++ "Tribal",
++ "Acid Punk",
++ "Acid Jazz",
++ "Polka",
++ "Retro",
++ "Musical",
++ "Rock & Roll",
++ "Hard Rock",
++ "Folk",
++ "Folk/Rock",
++ "National Folk",
++ "Swing",
++ "Fusion",
++ "Bebop",
++ "Latin",
++ "Revival",
++ "Celtic",
++ "Bluegrass",
++ "Avantgarde",
++ "Gothic Rock",
++ "Progressive Rock",
++ "Psychedelic Rock",
++ "Symphonic Rock",
++ "Slow Rock",
++ "Big Band",
++ "Chorus",
++ "Easy Listening",
++ "Acoustic",
++ "Humour",
++ "Speech",
++ "Chanson",
++ "Opera",
++ "Chamber Music",
++ "Sonata",
++ "Symphony",
++ "Booty Bass",
++ "Primus",
++ "Porn Groove",
++ "Satire",
++ "Slow Jam",
++ "Club",
++ "Tango",
++ "Samba",
++ "Folklore",
++ "Ballad",
++ "Power Ballad",
++ "Rhythmic Soul",
++ "Freestyle",
++ "Duet",
++ "Punk Rock",
++ "Drum Solo",
++ "A Cappella",
++ "Euro-House",
++ "Dance Hall",
++ "Goa",
++ "Drum & Bass",
++ "Club-House",
++ "Hardcore",
++ "Terror",
++ "Indie",
++ "BritPop",
++ "Negerpunk",
++ "Polsk Punk",
++ "Beat",
++ "Christian Gangsta Rap",
++ "Heavy Metal",
++ "Black Metal",
++ "Crossover",
++ "Contemporary Christian",
++ "Christian Rock",
++ "Merengue",
++ "Salsa",
++ "Thrash Metal",
++ "Anime",
++ "Jpop",
++ "Synthpop"
++};
++
++const uint32_t bitrate [15] = {0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, 256000, 320000 };
++
++const uint32_t samplerate[3] = {44100, 48000, 32000};
++
++const char * encodings[5] = {"ISO-8859-1", "UTF-16", "UTF-16BE", "UTF-8", "UTF-16LE"};
++
++#ifndef _GNU_SOURCE
++size_t
++strnlen(const char *s, size_t maxlen) {
++ for(size_t i=0; i<maxlen; i++)
++ if (s[i]==0)
++ return i;
++ return maxlen;
++}
++#endif
++
++class UTF8Convertor {
++ private:
++ iconv_t const conv;
++ char *out;
++ size_t capacity;
++ public:
++ UTF8Convertor(const char *encoding);
++ const string convert(const char *data, size_t len);
++ ~UTF8Convertor();
++};
++UTF8Convertor::UTF8Convertor(const char *encoding) :conv(iconv_open("UTF-8", encoding)), out(0), capacity(0) {
++}
++UTF8Convertor::~UTF8Convertor() {
++ iconv_close(conv);
++ if (out) free(out);
++}
++const string
++UTF8Convertor::convert(const char *data, size_t len) {
++ if (!len)
++ return string();
++ if ( capacity<len*3 || // is the buffer too small or too large?
++ (capacity>10000 && capacity>len*8) ) {
++ capacity = len*3;
++ out = (char*)realloc(out, len*3);
++ }
++
++ char *result = out;
++ size_t reslen = capacity;
++
++ ICONV_CONST char *input = (char *)data;
++ iconv(conv, &input, &len, &result, &reslen);
++
++ return string(out,capacity-reslen);
++}
++
++void
++ID3EndAnalyzerFactory::registerFields(FieldRegister& r) {
++ createdField = r.registerField(NIE "contentCreated");
++ subjectField = r.registerField(NIE "subject");
++ titleField = r.registerField(titlePropertyName);
++ descriptionField = r.registerField(NIE "description");
++ commentField = r.registerField(NIE "comment");
++ albumField = r.registerField(NMM_DRAFT "musicAlbum");
++ genreField = r.registerField(NMM_DRAFT "genre");
++ composerField = r.registerField(NMM_DRAFT "composer");
++ performerField = r.registerField(NMM_DRAFT "performer");
++ lyricistField = r.registerField(NMM_DRAFT "lyricist");
++ publisherField = r.registerField(NCO "publisher");
++ languageField = r.registerField(NIE "language");
++ copyrightField = r.registerField(NIE "copyright");
++ trackNumberField = r.registerField(NMM_DRAFT "trackNumber");
++ discNumberField = r.registerField(discNumberPropertyName);
++ durationField = r.registerField(NFO "duration");
++ typeField = r.typeField;
++
++ bitrateField = r.registerField(NFO "averageBitrate");
++ samplerateField = r.registerField(NFO "sampleRate");
++ codecField = r.registerField(NFO "codec");
++ channelsField = r.registerField(NFO "channels");
++}
++
++inline
++void
++addStatement(AnalysisResult &indexable, string& subject, const string& predicate, const string& object) {
++ if (subject.empty())
++ subject = indexable.newAnonymousUri();
++ indexable.addTriplet(subject, predicate, object);
++}
++
++inline
++int32_t readAsyncSize(const unsigned char* b) {
++ return (((int32_t)b[0])<<21) + (((int32_t)b[1])<<14)
++ + (((int32_t)b[2])<<7) + ((int32_t)b[3]);
++}
++
++int32_t
++readSize(const unsigned char* b, bool async) {
++ const signed char* c = (const signed char*)b;
++ if (async) {
++ if (c[0] < 0 || c[1] < 0 || c[2] < 0 || c[3] < 0)
++ return -1;
++ return readAsyncSize(b);
++ }
++ return readBigEndianInt32(b);
++}
++bool
++ID3EndAnalyzer::checkHeader(const char* header, int32_t headersize) const {
++ const unsigned char* usbuf = (const unsigned char*)header;
++ int32_t i;
++
++ for(i=0; (header[i] == '\0') && (i<headersize); i++);
++ return (headersize>=6+i)
++ && (
++ (strncmp("ID3", header+i, 3) == 0 // check that it's ID3
++ && usbuf[3+i] <= 4 // only handle version <= 4
++ && (usbuf[5+i]&~0x80) == 0) // we're too dumb too handle other flags
++ ||
++ ((unsigned char)header[0+i] == 0xff && ((unsigned char)header[1+i]&0xfe) == 0xfa
++ && (unsigned char)header[2+i]>>4 != 0xf // MP3 frame header is ok too
++ && (((unsigned char)header[2+i]>>2)&3) != 3)
++ );
++
++}
++
++static void trim(string& s,const string& drop = " ")
++{
++ string r = s.erase(s.find_last_not_of(drop)+1);
++ r.erase(0, r.find_first_not_of(drop));
++}
++
++static bool extract_and_trim(const char* buf, int offset, int length, string& s)
++{
++ // We're extracting here the ID3v1 tags and doing some sanity checks:
++ // 1) Strip of all leading and prefixed spaces
++ // 2) Test if string contains at least something
++ if (!buf[offset])
++ return false;
++
++ s = string(buf + offset, strnlen(buf + offset, length));
++ trim(s);
++ // Return true if the extracted value is not empty (read: contains something)
++ return !s.empty();
++}
++
++signed char
++ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* in) {
++ const int max_padding = 1000;
++ if(!in)
++ return -1;
++
++ bool found_title = false, found_artist = false,
++ found_album = false, found_comment = false,
++ found_year = false, found_track = false,
++ found_genre = false, found_tag = false;
++ string albumUri;
++ char albumArtNum = '\0';
++
++ // read 10 byte header
++ const char* buf;
++ int32_t nread = in->read(buf, 10+max_padding, 10+max_padding);
++
++ // parse ID3v2* tag
++
++ if (nread == 10+max_padding && strncmp("ID3", buf, 3) == 0) { // check for ID3 header
++
++ bool async = buf[3] >= 4;
++ bool unsync = (buf[5] & 0x80)!=0;
++
++ // calculate size from 4 syncsafe bytes
++ int32_t size = readAsyncSize((unsigned char*)buf+6);
++ if (size < 0 || size > 5000000)
++ return -1;
++ size += 10+4+max_padding; // add the size of the ID3 header, MP3 frame header and possible padding generated by LAME(workaround)
++
++ // read the entire tag
++ in->reset(0);
++ nread = in->read(buf, size, size);
++ if (nread != size)
++ return -1;
++
++ found_tag = true;
++
++ const char* p = buf + 10;
++ buf += size-4-max_padding;
++ while (p < buf && *p) {
++ size = readSize((unsigned char*)p+4, async);
++ if (size <= 0 || size > (buf-p)-10) {
++ //cerr << "size < 0: " << size << endl;
++ break;
++ }
++
++ string value;
++ uint8_t enc = p[10];
++ const char *encoding = enc>4 ? encodings[0] : encodings[enc] ;
++ UTF8Convertor conv(encoding);
++ const char *decoded_value;
++ int32_t decoded_value_size;
++ string deunsyncbuf;
++ if (unsync) {
++ deunsyncbuf.reserve(size-1);
++ for(int32_t i = 0; i<size-1; i++)
++ if ( (i==0) || (p[11+i]!=0) || (p[10+i]!=0xff) )
++ deunsyncbuf.push_back(p[11+i]);
++ decoded_value = deunsyncbuf.c_str();
++ decoded_value_size = deunsyncbuf.length();
++ } else {
++ decoded_value = p+11;
++ decoded_value_size = size-1;
++ };
++
++ if (strncmp("APIC", p, 4) == 0) {
++ size_t mimelen = strnlen(decoded_value, decoded_value_size);
++ if ((int32_t)mimelen < decoded_value_size-3) {
++ const char *desc = decoded_value+mimelen+1+1;
++// uint8_t pictype = p[11+mimelen+1];
++ size_t desclen = strnlen(desc,decoded_value_size-mimelen-2-1);
++ const char *content = desc + desclen + 1 + (enc == 0 || enc == 3 ? 0:1) ;
++
++ if(content<decoded_value+decoded_value_size) {
++ StringInputStream picstream(content,
++ (uint32_t)(decoded_value+decoded_value_size-content), false);
++ string picname;
++ picname = (char)('0'+albumArtNum++);
++ indexable.indexChild(picname, indexable.mTime(), &picstream);
++
++ if (desclen && indexable.child()) {
++ if (enc == 0 || enc == 3) {
++ indexable.child()->addValue(factory->descriptionField, string(desc, desclen) );
++ } else {
++ indexable.child()->addValue(factory->descriptionField, conv.convert(desc, desclen) );
++ }
++ }
++
++ indexable.finishIndexChild();
++ }
++ }
++ }
++
++ if (enc == 0 || enc == 3) {
++ value = string(decoded_value, strnlen(decoded_value, decoded_value_size));
++ } else {
++ value = conv.convert(decoded_value, decoded_value_size); // FIXME: add similar workaround
++ }
++
++ if (!value.empty()) {
++ if (strncmp("TIT1", p, 4) == 0) {
++ indexable.addValue(factory->subjectField, value);
++ } else if (strncmp("TIT2", p, 4) == 0) {
++ indexable.addValue(factory->titleField, value);
++ found_title = true;
++ } else if (strncmp("TIT3", p, 4) == 0) {
++ indexable.addValue(factory->descriptionField, value);
++ } else if (strncmp("TLAN", p, 4) == 0) {
++ indexable.addValue(factory->languageField, value);
++ } else if (strncmp("TCOP", p, 4) == 0) {
++ indexable.addValue(factory->copyrightField, value);
++ } else if ((strncmp("TDRL", p, 4) == 0) ||
++ (strncmp("TDAT", p, 4) == 0) ||
++ (strncmp("TYER", p, 4) == 0) ||
++ (strncmp("TDRC", p, 4) == 0)) {
++ indexable.addValue(factory->createdField, value);
++ found_year = true;
++ } else if ((strncmp("TPE1", p, 4) == 0) ||
++ (strncmp("TPE2", p, 4) == 0) ||
++ (strncmp("TPE3", p, 4) == 0) ||
++ (strncmp("TPE4", p, 4) == 0)) {
++ string performerUri = indexable.newAnonymousUri();
++
++ indexable.addValue(factory->performerField, performerUri);
++ indexable.addTriplet(performerUri, typePropertyName, contactClassName);
++ indexable.addTriplet(performerUri, fullnamePropertyName, value);
++ found_artist = true;
++ } else if ((strncmp("TPUB", p, 4) == 0) ||
++ (strncmp("TENC", p, 4) == 0)) {
++ string publisherUri = indexable.newAnonymousUri();
++
++ indexable.addValue(factory->publisherField, publisherUri);
++ indexable.addTriplet(publisherUri, typePropertyName, contactClassName);
++ indexable.addTriplet(publisherUri, fullnamePropertyName, value);
++ } else if ((strncmp("TALB", p, 4) == 0) ||
++ (strncmp("TOAL", p, 4) == 0)) {
++ addStatement(indexable, albumUri, titlePropertyName, value);
++ found_album = true;
++ } else if (strncmp("TCON", p, 4) == 0) {
++ // The Genre is stored as (number)
++ if( value[0] == '(' && value[value.length()-1] == ')' ) {
++ //vHanda: Maybe one should check if all the characters in between are digits
++ int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() );
++ indexable.addValue(factory->genreField, genres[ genreIndex ]);
++ found_genre = true;
++ } else {
++ // We must not forget that genre could be a string.
++ if (!value.empty()) {
++ indexable.addValue(factory->genreField, value);
++ found_genre = true;
++ }
++ }
++ } else if (strncmp("TLEN", p, 4) == 0) {
++ indexable.addValue(factory->durationField, value);
++ } else if (strncmp("TEXT", p, 4) == 0) {
++ string lyricistUri = indexable.newAnonymousUri();
++
++ indexable.addValue(factory->lyricistField, lyricistUri);
++ indexable.addTriplet(lyricistUri, typePropertyName, contactClassName);
++ indexable.addTriplet(lyricistUri, fullnamePropertyName, value);
++ } else if (strncmp("TCOM", p, 4) == 0) {
++ string composerUri = indexable.newAnonymousUri();
++
++ indexable.addValue(factory->composerField, composerUri);
++ indexable.addTriplet(composerUri, typePropertyName, contactClassName);
++ indexable.addTriplet(composerUri, fullnamePropertyName, value);
++ } else if (strncmp("TRCK", p, 4) == 0) {
++ istringstream ins(value);
++ int tnum;
++ ins >> tnum;
++ if (!ins.fail()) {
++ indexable.addValue(factory->trackNumberField, tnum);
++ found_track = true;
++ ins.ignore(10,'/');
++ int tcount;
++ ins >> tcount;
++ if (!ins.fail()) {
++ ostringstream outs;
++ outs << tcount;
++ addStatement(indexable, albumUri, albumTrackCountPropertyName, outs.str());
++ }
++ }
++ } else if (strncmp("TPOS", p, 4) == 0) {
++ istringstream ins(value);
++ int dnum;
++ ins >> dnum;
++ if (!ins.fail()) {
++ indexable.addValue(factory->discNumberField, dnum);
++ ins.ignore(10,'/');
++ int dcount;
++ ins >> dcount;
++ if (!ins.fail()) {
++ ostringstream outs;
++ outs << dcount;
++ addStatement(indexable, albumUri, discCountPropertyName, outs.str());
++ }
++ }
++ }
++ }
++ p += size + 10;
++ }
++ }
++ // parse MP3 frame header
++
++ int bitrateindex, samplerateindex;
++ int i;
++ for(i=0; (buf[i]=='\0') && (i<max_padding) ; i++);
++ if (((unsigned char)buf[0+i] == 0xff) && (((unsigned char)buf[1+i]&0xfe) == 0xfa)
++ && ((bitrateindex = ((unsigned char)buf[2+i]>>4)) != 0xf)
++ && ((samplerateindex = (((unsigned char)buf[2+i]>>2)&3)) != 3 )) { // is this MP3?
++
++ indexable.addValue(factory->typeField, audioClassName);
++ // FIXME: no support for VBR :(
++ // ideas: compare bitrate from the frame with stream size/duration from ID3 tags
++ // check several consecutive frames to see if bitrate is different
++ // in neither case you can be sure to properly detected VBR :(
++ indexable.addValue(factory->bitrateField, bitrate[bitrateindex]);
++ indexable.addValue(factory->samplerateField, samplerate[samplerateindex]);
++ indexable.addValue(factory->codecField, "MP3");
++ indexable.addValue(factory->channelsField, ((buf[3+i]>>6) == 3 ? 1:2 ) );
++ }
++
++ // Parse ID3v1 tag
++
++ int64_t insize;
++ if ( (insize = in->size()) > (128+nread)) {
++
++ // read the tag and check signature
++ int64_t nskip = insize-128-nread;
++ if (nskip == in->skip(nskip))
++ if (in->read(buf, 128, 128)==128)
++ if (!strncmp("TAG", buf, 3)) {
++
++ found_tag = true;
++
++ std::string s;
++
++ if (!found_title && extract_and_trim(buf, 3, 30, s)) {
++ indexable.addValue(factory->titleField, s);
++ }
++ if (!found_artist && extract_and_trim(buf, 33, 30, s)) {
++ const string performerUri = indexable.newAnonymousUri();
++ indexable.addValue(factory->performerField, performerUri);
++ indexable.addTriplet(performerUri, typePropertyName, contactClassName);
++ indexable.addTriplet(performerUri, fullnamePropertyName, s);
++ }
++ if (!found_album && extract_and_trim(buf, 63, 30, s))
++ addStatement(indexable, albumUri, titlePropertyName, s);
++ if (!found_year && extract_and_trim(buf, 93, 4, s))
++ indexable.addValue(factory->createdField, s);
++ if (!found_comment && extract_and_trim(buf, 97, 30, s)) {
++ indexable.addValue(factory->commentField, s);
++ }
++ if (!found_track && !buf[125] && buf[126]) {
++ indexable.addValue(factory->trackNumberField, (int)(buf[126]));
++ }
++ if (!found_genre && (unsigned char)(buf[127]) < 148)
++ indexable.addValue(factory->genreField, genres[(uint8_t)buf[127]]);
++ }
++ }
++
++ if(!albumUri.empty()) {
++ indexable.addValue(factory->albumField, albumUri);
++ indexable.addTriplet(albumUri, typePropertyName, albumClassName);
++ }
++
++ if (found_tag)
++ indexable.addValue(factory->typeField, musicClassName);
++
++ return 0;
++}
+--
+1.7.10.4
+
diff --git a/0007-Opps-Rmoving-a-wrong-commited-file-id3endanalyzer.cp.patch b/0007-Opps-Rmoving-a-wrong-commited-file-id3endanalyzer.cp.patch
new file mode 100644
index 0000000..f376016
--- /dev/null
+++ b/0007-Opps-Rmoving-a-wrong-commited-file-id3endanalyzer.cp.patch
@@ -0,0 +1,699 @@
+From 907162391395412c058d7339c4f84533ef92023d Mon Sep 17 00:00:00 2001
+From: Ignacio Serantes <kde at aynoa.net>
+Date: Sun, 17 Jun 2012 21:38:31 +0200
+Subject: [PATCH 7/8] Opps! Rmoving a wrong commited file,
+ "id3endanalyzer.cpp", and updating the right one
+ "lib/endanalyzers/id3endanalyzer.cpp".
+
+---
+ id3endanalyzer.cpp | 646 -----------------------------------
+ lib/endanalyzers/id3endanalyzer.cpp | 20 +-
+ 2 files changed, 13 insertions(+), 653 deletions(-)
+ delete mode 100644 id3endanalyzer.cpp
+
+diff --git a/id3endanalyzer.cpp b/id3endanalyzer.cpp
+deleted file mode 100644
+index 677ece0..0000000
+--- a/id3endanalyzer.cpp
++++ /dev/null
+@@ -1,646 +0,0 @@
+-/* This file is part of Strigi Desktop Search
+- *
+- * Copyright (C) 2006 Jos van den Oever <jos at vandenoever.info>
+- * 2009 Evgeny Egorochkin <phreedom.stdin at gmail.com>
+- *
+- * This library is free software; you can redistribute it and/or
+- * modify it under the terms of the GNU Library General Public
+- * License as published by the Free Software Foundation; either
+- * version 2 of the License, or (at your option) any later version.
+- *
+- * This library is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+- * Library General Public License for more details.
+- *
+- * You should have received a copy of the GNU Library General Public License
+- * along with this library; see the file COPYING.LIB. If not, write to
+- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+- * Boston, MA 02110-1301, USA.
+- */
+-
+-#ifdef HAVE_CONFIG_H
+-# include "config.h"
+-#endif
+-
+-#include "id3endanalyzer.h"
+-#include "analysisresult.h"
+-#include "../rdfnamespaces.h"
+-#include <strigi/strigiconfig.h>
+-#include <strigi/textutils.h>
+-#include <strigi/stringstream.h>
+-#include <iostream>
+-#include <sstream>
+-#include <cstring>
+-#include <cstdlib>
+-#include <iconv.h>
+-
+-#ifdef ICONV_SECOND_ARGUMENT_IS_CONST
+- #define ICONV_CONST const
+-#else
+- #define ICONV_CONST
+-#endif
+-
+-using namespace Strigi;
+-using namespace std;
+-
+-const string
+- typePropertyName(
+- RDF "type"),
+- fullnamePropertyName(
+- NCO "fullname"),
+- titlePropertyName(
+- NIE "title"),
+- albumTrackCountPropertyName(
+- NMM_DRAFT "albumTrackCount"),
+- discNumberPropertyName(
+- NMM_DRAFT "setNumber"),
+- discCountPropertyName(
+- NMM_DRAFT "setSize"),
+-
+- musicClassName(
+- NMM_DRAFT "MusicPiece"),
+- audioClassName(
+- NFO "Audio"),
+- albumClassName(
+- NMM_DRAFT "MusicAlbum"),
+- contactClassName(
+- NCO "Contact");
+-
+-/*
+-ENCA autodetection of broken encodings. First, need to make sure it's going to be actually useful.
+-ID3v2.0
+-play counter:needs nepomuk resolution
+-replaygain
+-+lyrics
+-+Improve:
+- creation date:
+- language: support multiple
+- Genre
+- album art type handling
+-VBR detection
+-*/
+-
+-static const string genres[148] = {
+- "Blues",
+- "Classic Rock",
+- "Country",
+- "Dance",
+- "Disco",
+- "Funk",
+- "Grunge",
+- "Hip-Hop",
+- "Jazz",
+- "Metal",
+- "New Age",
+- "Oldies",
+- "Other",
+- "Pop",
+- "R&B",
+- "Rap",
+- "Reggae",
+- "Rock",
+- "Techno",
+- "Industrial",
+- "Alternative",
+- "Ska",
+- "Death Metal",
+- "Pranks",
+- "Soundtrack",
+- "Euro-Techno",
+- "Ambient",
+- "Trip-Hop",
+- "Vocal",
+- "Jazz+Funk",
+- "Fusion",
+- "Trance",
+- "Classical",
+- "Instrumental",
+- "Acid",
+- "House",
+- "Game",
+- "Sound Clip",
+- "Gospel",
+- "Noise",
+- "Alternative Rock",
+- "Bass",
+- "Soul",
+- "Punk",
+- "Space",
+- "Meditative",
+- "Instrumental Pop",
+- "Instrumental Rock",
+- "Ethnic",
+- "Gothic",
+- "Darkwave",
+- "Techno-Industrial",
+- "Electronic",
+- "Pop-Folk",
+- "Eurodance",
+- "Dream",
+- "Southern Rock",
+- "Comedy",
+- "Cult",
+- "Gangsta",
+- "Top 40",
+- "Christian Rap",
+- "Pop/Funk",
+- "Jungle",
+- "Native American",
+- "Cabaret",
+- "New Wave",
+- "Psychedelic",
+- "Rave",
+- "Showtunes",
+- "Trailer",
+- "Lo-Fi",
+- "Tribal",
+- "Acid Punk",
+- "Acid Jazz",
+- "Polka",
+- "Retro",
+- "Musical",
+- "Rock & Roll",
+- "Hard Rock",
+- "Folk",
+- "Folk/Rock",
+- "National Folk",
+- "Swing",
+- "Fusion",
+- "Bebop",
+- "Latin",
+- "Revival",
+- "Celtic",
+- "Bluegrass",
+- "Avantgarde",
+- "Gothic Rock",
+- "Progressive Rock",
+- "Psychedelic Rock",
+- "Symphonic Rock",
+- "Slow Rock",
+- "Big Band",
+- "Chorus",
+- "Easy Listening",
+- "Acoustic",
+- "Humour",
+- "Speech",
+- "Chanson",
+- "Opera",
+- "Chamber Music",
+- "Sonata",
+- "Symphony",
+- "Booty Bass",
+- "Primus",
+- "Porn Groove",
+- "Satire",
+- "Slow Jam",
+- "Club",
+- "Tango",
+- "Samba",
+- "Folklore",
+- "Ballad",
+- "Power Ballad",
+- "Rhythmic Soul",
+- "Freestyle",
+- "Duet",
+- "Punk Rock",
+- "Drum Solo",
+- "A Cappella",
+- "Euro-House",
+- "Dance Hall",
+- "Goa",
+- "Drum & Bass",
+- "Club-House",
+- "Hardcore",
+- "Terror",
+- "Indie",
+- "BritPop",
+- "Negerpunk",
+- "Polsk Punk",
+- "Beat",
+- "Christian Gangsta Rap",
+- "Heavy Metal",
+- "Black Metal",
+- "Crossover",
+- "Contemporary Christian",
+- "Christian Rock",
+- "Merengue",
+- "Salsa",
+- "Thrash Metal",
+- "Anime",
+- "Jpop",
+- "Synthpop"
+-};
+-
+-const uint32_t bitrate [15] = {0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, 256000, 320000 };
+-
+-const uint32_t samplerate[3] = {44100, 48000, 32000};
+-
+-const char * encodings[5] = {"ISO-8859-1", "UTF-16", "UTF-16BE", "UTF-8", "UTF-16LE"};
+-
+-#ifndef _GNU_SOURCE
+-size_t
+-strnlen(const char *s, size_t maxlen) {
+- for(size_t i=0; i<maxlen; i++)
+- if (s[i]==0)
+- return i;
+- return maxlen;
+-}
+-#endif
+-
+-class UTF8Convertor {
+- private:
+- iconv_t const conv;
+- char *out;
+- size_t capacity;
+- public:
+- UTF8Convertor(const char *encoding);
+- const string convert(const char *data, size_t len);
+- ~UTF8Convertor();
+-};
+-UTF8Convertor::UTF8Convertor(const char *encoding) :conv(iconv_open("UTF-8", encoding)), out(0), capacity(0) {
+-}
+-UTF8Convertor::~UTF8Convertor() {
+- iconv_close(conv);
+- if (out) free(out);
+-}
+-const string
+-UTF8Convertor::convert(const char *data, size_t len) {
+- if (!len)
+- return string();
+- if ( capacity<len*3 || // is the buffer too small or too large?
+- (capacity>10000 && capacity>len*8) ) {
+- capacity = len*3;
+- out = (char*)realloc(out, len*3);
+- }
+-
+- char *result = out;
+- size_t reslen = capacity;
+-
+- ICONV_CONST char *input = (char *)data;
+- iconv(conv, &input, &len, &result, &reslen);
+-
+- return string(out,capacity-reslen);
+-}
+-
+-void
+-ID3EndAnalyzerFactory::registerFields(FieldRegister& r) {
+- createdField = r.registerField(NIE "contentCreated");
+- subjectField = r.registerField(NIE "subject");
+- titleField = r.registerField(titlePropertyName);
+- descriptionField = r.registerField(NIE "description");
+- commentField = r.registerField(NIE "comment");
+- albumField = r.registerField(NMM_DRAFT "musicAlbum");
+- genreField = r.registerField(NMM_DRAFT "genre");
+- composerField = r.registerField(NMM_DRAFT "composer");
+- performerField = r.registerField(NMM_DRAFT "performer");
+- lyricistField = r.registerField(NMM_DRAFT "lyricist");
+- publisherField = r.registerField(NCO "publisher");
+- languageField = r.registerField(NIE "language");
+- copyrightField = r.registerField(NIE "copyright");
+- trackNumberField = r.registerField(NMM_DRAFT "trackNumber");
+- discNumberField = r.registerField(discNumberPropertyName);
+- durationField = r.registerField(NFO "duration");
+- typeField = r.typeField;
+-
+- bitrateField = r.registerField(NFO "averageBitrate");
+- samplerateField = r.registerField(NFO "sampleRate");
+- codecField = r.registerField(NFO "codec");
+- channelsField = r.registerField(NFO "channels");
+-}
+-
+-inline
+-void
+-addStatement(AnalysisResult &indexable, string& subject, const string& predicate, const string& object) {
+- if (subject.empty())
+- subject = indexable.newAnonymousUri();
+- indexable.addTriplet(subject, predicate, object);
+-}
+-
+-inline
+-int32_t readAsyncSize(const unsigned char* b) {
+- return (((int32_t)b[0])<<21) + (((int32_t)b[1])<<14)
+- + (((int32_t)b[2])<<7) + ((int32_t)b[3]);
+-}
+-
+-int32_t
+-readSize(const unsigned char* b, bool async) {
+- const signed char* c = (const signed char*)b;
+- if (async) {
+- if (c[0] < 0 || c[1] < 0 || c[2] < 0 || c[3] < 0)
+- return -1;
+- return readAsyncSize(b);
+- }
+- return readBigEndianInt32(b);
+-}
+-bool
+-ID3EndAnalyzer::checkHeader(const char* header, int32_t headersize) const {
+- const unsigned char* usbuf = (const unsigned char*)header;
+- int32_t i;
+-
+- for(i=0; (header[i] == '\0') && (i<headersize); i++);
+- return (headersize>=6+i)
+- && (
+- (strncmp("ID3", header+i, 3) == 0 // check that it's ID3
+- && usbuf[3+i] <= 4 // only handle version <= 4
+- && (usbuf[5+i]&~0x80) == 0) // we're too dumb too handle other flags
+- ||
+- ((unsigned char)header[0+i] == 0xff && ((unsigned char)header[1+i]&0xfe) == 0xfa
+- && (unsigned char)header[2+i]>>4 != 0xf // MP3 frame header is ok too
+- && (((unsigned char)header[2+i]>>2)&3) != 3)
+- );
+-
+-}
+-
+-static void trim(string& s,const string& drop = " ")
+-{
+- string r = s.erase(s.find_last_not_of(drop)+1);
+- r.erase(0, r.find_first_not_of(drop));
+-}
+-
+-static bool extract_and_trim(const char* buf, int offset, int length, string& s)
+-{
+- // We're extracting here the ID3v1 tags and doing some sanity checks:
+- // 1) Strip of all leading and prefixed spaces
+- // 2) Test if string contains at least something
+- if (!buf[offset])
+- return false;
+-
+- s = string(buf + offset, strnlen(buf + offset, length));
+- trim(s);
+- // Return true if the extracted value is not empty (read: contains something)
+- return !s.empty();
+-}
+-
+-signed char
+-ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* in) {
+- const int max_padding = 1000;
+- if(!in)
+- return -1;
+-
+- bool found_title = false, found_artist = false,
+- found_album = false, found_comment = false,
+- found_year = false, found_track = false,
+- found_genre = false, found_tag = false;
+- string albumUri;
+- char albumArtNum = '\0';
+-
+- // read 10 byte header
+- const char* buf;
+- int32_t nread = in->read(buf, 10+max_padding, 10+max_padding);
+-
+- // parse ID3v2* tag
+-
+- if (nread == 10+max_padding && strncmp("ID3", buf, 3) == 0) { // check for ID3 header
+-
+- bool async = buf[3] >= 4;
+- bool unsync = (buf[5] & 0x80)!=0;
+-
+- // calculate size from 4 syncsafe bytes
+- int32_t size = readAsyncSize((unsigned char*)buf+6);
+- if (size < 0 || size > 5000000)
+- return -1;
+- size += 10+4+max_padding; // add the size of the ID3 header, MP3 frame header and possible padding generated by LAME(workaround)
+-
+- // read the entire tag
+- in->reset(0);
+- nread = in->read(buf, size, size);
+- if (nread != size)
+- return -1;
+-
+- found_tag = true;
+-
+- const char* p = buf + 10;
+- buf += size-4-max_padding;
+- while (p < buf && *p) {
+- size = readSize((unsigned char*)p+4, async);
+- if (size <= 0 || size > (buf-p)-10) {
+- //cerr << "size < 0: " << size << endl;
+- break;
+- }
+-
+- string value;
+- uint8_t enc = p[10];
+- const char *encoding = enc>4 ? encodings[0] : encodings[enc] ;
+- UTF8Convertor conv(encoding);
+- const char *decoded_value;
+- int32_t decoded_value_size;
+- string deunsyncbuf;
+- if (unsync) {
+- deunsyncbuf.reserve(size-1);
+- for(int32_t i = 0; i<size-1; i++)
+- if ( (i==0) || (p[11+i]!=0) || (p[10+i]!=0xff) )
+- deunsyncbuf.push_back(p[11+i]);
+- decoded_value = deunsyncbuf.c_str();
+- decoded_value_size = deunsyncbuf.length();
+- } else {
+- decoded_value = p+11;
+- decoded_value_size = size-1;
+- };
+-
+- if (strncmp("APIC", p, 4) == 0) {
+- size_t mimelen = strnlen(decoded_value, decoded_value_size);
+- if ((int32_t)mimelen < decoded_value_size-3) {
+- const char *desc = decoded_value+mimelen+1+1;
+-// uint8_t pictype = p[11+mimelen+1];
+- size_t desclen = strnlen(desc,decoded_value_size-mimelen-2-1);
+- const char *content = desc + desclen + 1 + (enc == 0 || enc == 3 ? 0:1) ;
+-
+- if(content<decoded_value+decoded_value_size) {
+- StringInputStream picstream(content,
+- (uint32_t)(decoded_value+decoded_value_size-content), false);
+- string picname;
+- picname = (char)('0'+albumArtNum++);
+- indexable.indexChild(picname, indexable.mTime(), &picstream);
+-
+- if (desclen && indexable.child()) {
+- if (enc == 0 || enc == 3) {
+- indexable.child()->addValue(factory->descriptionField, string(desc, desclen) );
+- } else {
+- indexable.child()->addValue(factory->descriptionField, conv.convert(desc, desclen) );
+- }
+- }
+-
+- indexable.finishIndexChild();
+- }
+- }
+- }
+-
+- if (enc == 0 || enc == 3) {
+- value = string(decoded_value, strnlen(decoded_value, decoded_value_size));
+- } else {
+- value = conv.convert(decoded_value, decoded_value_size); // FIXME: add similar workaround
+- }
+-
+- if (!value.empty()) {
+- if (strncmp("TIT1", p, 4) == 0) {
+- indexable.addValue(factory->subjectField, value);
+- } else if (strncmp("TIT2", p, 4) == 0) {
+- indexable.addValue(factory->titleField, value);
+- found_title = true;
+- } else if (strncmp("TIT3", p, 4) == 0) {
+- indexable.addValue(factory->descriptionField, value);
+- } else if (strncmp("TLAN", p, 4) == 0) {
+- indexable.addValue(factory->languageField, value);
+- } else if (strncmp("TCOP", p, 4) == 0) {
+- indexable.addValue(factory->copyrightField, value);
+- } else if ((strncmp("TDRL", p, 4) == 0) ||
+- (strncmp("TDAT", p, 4) == 0) ||
+- (strncmp("TYER", p, 4) == 0) ||
+- (strncmp("TDRC", p, 4) == 0)) {
+- indexable.addValue(factory->createdField, value);
+- found_year = true;
+- } else if ((strncmp("TPE1", p, 4) == 0) ||
+- (strncmp("TPE2", p, 4) == 0) ||
+- (strncmp("TPE3", p, 4) == 0) ||
+- (strncmp("TPE4", p, 4) == 0)) {
+- string performerUri = indexable.newAnonymousUri();
+-
+- indexable.addValue(factory->performerField, performerUri);
+- indexable.addTriplet(performerUri, typePropertyName, contactClassName);
+- indexable.addTriplet(performerUri, fullnamePropertyName, value);
+- found_artist = true;
+- } else if ((strncmp("TPUB", p, 4) == 0) ||
+- (strncmp("TENC", p, 4) == 0)) {
+- string publisherUri = indexable.newAnonymousUri();
+-
+- indexable.addValue(factory->publisherField, publisherUri);
+- indexable.addTriplet(publisherUri, typePropertyName, contactClassName);
+- indexable.addTriplet(publisherUri, fullnamePropertyName, value);
+- } else if ((strncmp("TALB", p, 4) == 0) ||
+- (strncmp("TOAL", p, 4) == 0)) {
+- addStatement(indexable, albumUri, titlePropertyName, value);
+- found_album = true;
+- } else if (strncmp("TCON", p, 4) == 0) {
+- // The Genre is stored as (number)
+- if( value[0] == '(' && value[value.length()-1] == ')' ) {
+- //vHanda: Maybe one should check if all the characters in between are digits
+- int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() );
+- indexable.addValue(factory->genreField, genres[ genreIndex ]);
+- found_genre = true;
+- } else {
+- // We must not forget that genre could be a string.
+- if (!value.empty()) {
+- indexable.addValue(factory->genreField, value);
+- found_genre = true;
+- }
+- }
+- } else if (strncmp("TLEN", p, 4) == 0) {
+- indexable.addValue(factory->durationField, value);
+- } else if (strncmp("TEXT", p, 4) == 0) {
+- string lyricistUri = indexable.newAnonymousUri();
+-
+- indexable.addValue(factory->lyricistField, lyricistUri);
+- indexable.addTriplet(lyricistUri, typePropertyName, contactClassName);
+- indexable.addTriplet(lyricistUri, fullnamePropertyName, value);
+- } else if (strncmp("TCOM", p, 4) == 0) {
+- string composerUri = indexable.newAnonymousUri();
+-
+- indexable.addValue(factory->composerField, composerUri);
+- indexable.addTriplet(composerUri, typePropertyName, contactClassName);
+- indexable.addTriplet(composerUri, fullnamePropertyName, value);
+- } else if (strncmp("TRCK", p, 4) == 0) {
+- istringstream ins(value);
+- int tnum;
+- ins >> tnum;
+- if (!ins.fail()) {
+- indexable.addValue(factory->trackNumberField, tnum);
+- found_track = true;
+- ins.ignore(10,'/');
+- int tcount;
+- ins >> tcount;
+- if (!ins.fail()) {
+- ostringstream outs;
+- outs << tcount;
+- addStatement(indexable, albumUri, albumTrackCountPropertyName, outs.str());
+- }
+- }
+- } else if (strncmp("TPOS", p, 4) == 0) {
+- istringstream ins(value);
+- int dnum;
+- ins >> dnum;
+- if (!ins.fail()) {
+- indexable.addValue(factory->discNumberField, dnum);
+- ins.ignore(10,'/');
+- int dcount;
+- ins >> dcount;
+- if (!ins.fail()) {
+- ostringstream outs;
+- outs << dcount;
+- addStatement(indexable, albumUri, discCountPropertyName, outs.str());
+- }
+- }
+- }
+- }
+- p += size + 10;
+- }
+- }
+- // parse MP3 frame header
+-
+- int bitrateindex, samplerateindex;
+- int i;
+- for(i=0; (buf[i]=='\0') && (i<max_padding) ; i++);
+- if (((unsigned char)buf[0+i] == 0xff) && (((unsigned char)buf[1+i]&0xfe) == 0xfa)
+- && ((bitrateindex = ((unsigned char)buf[2+i]>>4)) != 0xf)
+- && ((samplerateindex = (((unsigned char)buf[2+i]>>2)&3)) != 3 )) { // is this MP3?
+-
+- indexable.addValue(factory->typeField, audioClassName);
+- // FIXME: no support for VBR :(
+- // ideas: compare bitrate from the frame with stream size/duration from ID3 tags
+- // check several consecutive frames to see if bitrate is different
+- // in neither case you can be sure to properly detected VBR :(
+- indexable.addValue(factory->bitrateField, bitrate[bitrateindex]);
+- indexable.addValue(factory->samplerateField, samplerate[samplerateindex]);
+- indexable.addValue(factory->codecField, "MP3");
+- indexable.addValue(factory->channelsField, ((buf[3+i]>>6) == 3 ? 1:2 ) );
+- }
+-
+- // Parse ID3v1 tag
+-
+- int64_t insize;
+- if ( (insize = in->size()) > (128+nread)) {
+-
+- // read the tag and check signature
+- int64_t nskip = insize-128-nread;
+- if (nskip == in->skip(nskip))
+- if (in->read(buf, 128, 128)==128)
+- if (!strncmp("TAG", buf, 3)) {
+-
+- found_tag = true;
+-
+- std::string s;
+-
+- if (!found_title && extract_and_trim(buf, 3, 30, s)) {
+- indexable.addValue(factory->titleField, s);
+- }
+- if (!found_artist && extract_and_trim(buf, 33, 30, s)) {
+- const string performerUri = indexable.newAnonymousUri();
+- indexable.addValue(factory->performerField, performerUri);
+- indexable.addTriplet(performerUri, typePropertyName, contactClassName);
+- indexable.addTriplet(performerUri, fullnamePropertyName, s);
+- }
+- if (!found_album && extract_and_trim(buf, 63, 30, s))
+- addStatement(indexable, albumUri, titlePropertyName, s);
+- if (!found_year && extract_and_trim(buf, 93, 4, s))
+- indexable.addValue(factory->createdField, s);
+- if (!found_comment && extract_and_trim(buf, 97, 30, s)) {
+- indexable.addValue(factory->commentField, s);
+- }
+- if (!found_track && !buf[125] && buf[126]) {
+- indexable.addValue(factory->trackNumberField, (int)(buf[126]));
+- }
+- if (!found_genre && (unsigned char)(buf[127]) < 148)
+- indexable.addValue(factory->genreField, genres[(uint8_t)buf[127]]);
+- }
+- }
+-
+- if(!albumUri.empty()) {
+- indexable.addValue(factory->albumField, albumUri);
+- indexable.addTriplet(albumUri, typePropertyName, albumClassName);
+- }
+-
+- if (found_tag)
+- indexable.addValue(factory->typeField, musicClassName);
+-
+- return 0;
+-}
+diff --git a/lib/endanalyzers/id3endanalyzer.cpp b/lib/endanalyzers/id3endanalyzer.cpp
+index d8487b5..677ece0 100644
+--- a/lib/endanalyzers/id3endanalyzer.cpp
++++ b/lib/endanalyzers/id3endanalyzer.cpp
+@@ -512,13 +512,19 @@ ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ addStatement(indexable, albumUri, titlePropertyName, value);
+ found_album = true;
+ } else if (strncmp("TCON", p, 4) == 0) {
+- // The Genre is stored as (number)
+- if( value[0] == '(' && value[value.length()-1] == ')' ) {
+- //vHanda: Maybe one should check if all the characters in between are digits
+- int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() );
+- indexable.addValue(factory->genreField, genres[ genreIndex ]);
+- found_genre = true;
+- }
++ // The Genre is stored as (number)
++ if( value[0] == '(' && value[value.length()-1] == ')' ) {
++ //vHanda: Maybe one should check if all the characters in between are digits
++ int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() );
++ indexable.addValue(factory->genreField, genres[ genreIndex ]);
++ found_genre = true;
++ } else {
++ // We must not forget that genre could be a string.
++ if (!value.empty()) {
++ indexable.addValue(factory->genreField, value);
++ found_genre = true;
++ }
++ }
+ } else if (strncmp("TLEN", p, 4) == 0) {
+ indexable.addValue(factory->durationField, value);
+ } else if (strncmp("TEXT", p, 4) == 0) {
+--
+1.7.10.4
+
diff --git a/0008-fix-parsing-of-genre-field-in-id3v2-tags-and-clean-c.patch b/0008-fix-parsing-of-genre-field-in-id3v2-tags-and-clean-c.patch
new file mode 100644
index 0000000..5476527
--- /dev/null
+++ b/0008-fix-parsing-of-genre-field-in-id3v2-tags-and-clean-c.patch
@@ -0,0 +1,112 @@
+From 808a3fafc1d89a9b8ec76bbcc5b2514cefa9345d Mon Sep 17 00:00:00 2001
+From: Sune Vuorela <sune at vuorela.dk>
+Date: Sun, 24 Jun 2012 16:18:31 +0200
+Subject: [PATCH 8/8] fix parsing of genre field in id3v2 tags and clean code
+ up a bit
+
+the genre field of a id3v2 tag might or might not be a number that matches
+entries in a lookup table or alternatively a string. If it is a number,
+then it might or might not be in parenthesis.
+Handle all of the above and also handle the fact that some people might
+enjoy adding numbers that are outside the range of the lookup table
+---
+ lib/endanalyzers/id3endanalyzer.cpp | 62 +++++++++++++++++++++++++++++++----
+ 1 file changed, 55 insertions(+), 7 deletions(-)
+
+diff --git a/lib/endanalyzers/id3endanalyzer.cpp b/lib/endanalyzers/id3endanalyzer.cpp
+index 677ece0..0db3728 100644
+--- a/lib/endanalyzers/id3endanalyzer.cpp
++++ b/lib/endanalyzers/id3endanalyzer.cpp
+@@ -81,7 +81,9 @@ replaygain
+ VBR detection
+ */
+
+-static const string genres[148] = {
++#define ID3_NUMBER_OF_GENRES 148
++
++static const string genres[ID3_NUMBER_OF_GENRES] = {
+ "Blues",
+ "Classic Rock",
+ "Country",
+@@ -372,6 +374,54 @@ static bool extract_and_trim(const char* buf, int offset, int length, string& s)
+ return !s.empty();
+ }
+
++/**
++ * Functional helper class to get the right numbers out of a 'genre' string which
++ * might be a number in a index
++ */
++class genre_number_parser {
++ private:
++ bool success;
++ long result;
++ void parse_string( string genre ) {
++ char* endptr;
++ int r = strtol(genre.c_str(),&endptr, 10);
++ if(*endptr == '\0') { //to check if the convertion went more or less ok
++ if(r >=0 && r < ID3_NUMBER_OF_GENRES ) { //to ensure it is within the range we have
++ success=true;
++ result=r;
++ }
++ }
++ }
++ public:
++ /**
++ * constructor taking the genre string you want parsed as a number
++ */
++ genre_number_parser(string genre) : success(false), result(-1) {
++ if(genre.size()==0) {
++ //if the string is empty, there is no need to try to parse it
++ return;
++ }
++ //the string might start and end with parenthesis
++ if(genre[0]=='(' && genre[genre.size()-1]==')') {
++ parse_string(genre.substr(1,genre.length()-2));
++ return;
++ }
++ parse_string(genre);
++ }
++ /**
++ * wether or not parsing was successful
++ */
++ operator bool() {
++ return success;
++ }
++ /**
++ * the actual result of the parsing, or -1 if parsing wasn't successful
++ */
++ operator long() {
++ return result;
++ }
++};
++
+ signed char
+ ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream* in) {
+ const int max_padding = 1000;
+@@ -512,11 +562,9 @@ ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ addStatement(indexable, albumUri, titlePropertyName, value);
+ found_album = true;
+ } else if (strncmp("TCON", p, 4) == 0) {
+- // The Genre is stored as (number)
+- if( value[0] == '(' && value[value.length()-1] == ')' ) {
+- //vHanda: Maybe one should check if all the characters in between are digits
+- int genreIndex = atoi( value.substr( 1, value.length()-1 ).c_str() );
+- indexable.addValue(factory->genreField, genres[ genreIndex ]);
++ genre_number_parser p(value);
++ if(p) {
++ indexable.addValue(factory->genreField, genres[ p ]);
+ found_genre = true;
+ } else {
+ // We must not forget that genre could be a string.
+@@ -629,7 +677,7 @@ ID3EndAnalyzer::analyze(Strigi::AnalysisResult& indexable, Strigi::InputStream*
+ if (!found_track && !buf[125] && buf[126]) {
+ indexable.addValue(factory->trackNumberField, (int)(buf[126]));
+ }
+- if (!found_genre && (unsigned char)(buf[127]) < 148)
++ if (!found_genre && (unsigned char)(buf[127]) < ID3_NUMBER_OF_GENRES)
+ indexable.addValue(factory->genreField, genres[(uint8_t)buf[127]]);
+ }
+ }
+--
+1.7.10.4
+
diff --git a/strigi.spec b/strigi.spec
index 48a041e..2950aaa 100644
--- a/strigi.spec
+++ b/strigi.spec
@@ -5,9 +5,11 @@
%global clucene 1
%endif
+%define snap 20120626
+
Name: strigi
Version: 0.7.7
-Release: 4%{?dist}
+Release: 5.20120626%{?dist}
Summary: A desktop search program
Group: Applications/Productivity
License: LGPLv2+
@@ -19,12 +21,20 @@ Source1: strigiclient.desktop
Source2: strigi-daemon.desktop
BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
-## upstreamable patches
-Patch50: strigi-0.7.7-gcc47.patch
-
## upstream patches
-# https://projects.kde.org/projects/kdesupport/strigi/libstreamanalyzer/repository/revisions/35cf4a4818a0d1bc7cda07e29f271360e06443a0
-Patch100: strigi-0.7.7-xpmfix.patch
+# strigidaemon
+Patch101: 0001-Minor.-Fix-grammar-typo-in-cmake-output.patch
+Patch102: 0002-gcc47-fix-unistd.h-header-required-unconditionally-f.patch
+Patch103: 0003-Fix-return-value-wrong-type.patch
+# libstreamanalizer
+Patch201: 0001-Fix-xpm-and-xbm-index.patch
+Patch202: 0002-Extract-tracknumber-and-track-count-from-a-value-lik.patch
+Patch203: 0003-Fixed-indexing-of-m3u-files.patch
+Patch204: 0004-Fix-FLAC-Files-Remove-addtional-db-in-replaygain.patch
+Patch205: 0005-Fix-flac-analizer-was-importing-only-one-artist-tag.patch
+Patch206: 0006-Fix-non-numeric-genres-in-id3-v2-mp3-are-ignored.patch
+Patch207: 0007-Opps-Rmoving-a-wrong-commited-file-id3endanalyzer.cp.patch
+Patch208: 0008-fix-parsing-of-genre-field-in-id3v2-tags-and-clean-c.patch
BuildRequires: bison
BuildRequires: boost-devel
@@ -75,9 +85,20 @@ Strigi search engine libraries
%prep
%setup -q -n %{name}-%{version}%{?pre:-%{pre}}
-%patch50 -p1 -b .gcc47
+pushd strigidaemon
+%patch101 -p1
+%patch102 -p1
+%patch103 -p1
+popd
pushd libstreamanalyzer
-%patch100 -p1 -b .xpmfix
+%patch201 -p1
+%patch202 -p1
+%patch203 -p1
+%patch204 -p1
+%patch205 -p1
+%patch206 -p1
+%patch207 -p1
+%patch208 -p1
popd
@@ -158,6 +179,9 @@ rm -rf %{buildroot}
%changelog
+* Tue Jun 26 2012 Rex Dieter <rdieter at fedoraproject.org> 0.7.7-5.20120626
+- backport upstream patches (as of 20120626)
+
* Wed May 02 2012 Rex Dieter <rdieter at fedoraproject.org> 0.7.7-4
- rebuild (exiv2)
More information about the scm-commits
mailing list