[tika] Initial import (#1002721).

gil gil at fedoraproject.org
Tue Oct 15 15:35:12 UTC 2013


commit 8de4fa0cef2eca665a8bc17fe2fda5280b8b64fb
Author: gil <puntogil at libero.it>
Date:   Tue Oct 15 17:35:05 2013 +0200

    Initial import (#1002721).

 .gitignore                    |    1 +
 sources                       |    1 +
 tika-1.4-fix-build-deps.patch |   53 ++++++++++
 tika.spec                     |  215 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 270 insertions(+), 0 deletions(-)
---
diff --git a/.gitignore b/.gitignore
index e69de29..e05a17f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/tika-1.4-src.zip
diff --git a/sources b/sources
index e69de29..44f515e 100644
--- a/sources
+++ b/sources
@@ -0,0 +1 @@
+6daa446b1dfb08888169d558263416d7  tika-1.4-src.zip
diff --git a/tika-1.4-fix-build-deps.patch b/tika-1.4-fix-build-deps.patch
new file mode 100644
index 0000000..5ecd4fb
--- /dev/null
+++ b/tika-1.4-fix-build-deps.patch
@@ -0,0 +1,53 @@
+diff -Nru tika-1.4/pom.xml tika-1.4-gil/pom.xml
+--- tika-1.4/pom.xml	2013-06-16 10:31:48.000000000 +0200
++++ tika-1.4-gil/pom.xml	2013-10-10 14:00:01.050417440 +0200
+@@ -194,7 +194,7 @@
+             <dependencies>
+               <dependency>
+                  <groupId>org.apache.ant</groupId>
+-                 <artifactId>ant-nodeps</artifactId>
++                 <artifactId>ant</artifactId>
+                  <version>1.8.1</version>
+                </dependency>
+             </dependencies>
+diff -Nru tika-1.4/tika-parsers/pom.xml tika-1.4-gil/tika-parsers/pom.xml
+--- tika-1.4/tika-parsers/pom.xml	2013-06-16 10:33:20.000000000 +0200
++++ tika-1.4-gil/tika-parsers/pom.xml	2013-10-10 14:01:58.532318716 +0200
+@@ -107,13 +107,13 @@
+          problems with encrypted PDFs. -->
+     <dependency>
+       <groupId>org.bouncycastle</groupId>
+-      <artifactId>bcmail-jdk15</artifactId>
+-      <version>1.45</version>
++      <artifactId>bcmail-jdk16</artifactId>
++      <version>1.46</version>
+     </dependency>
+     <dependency>
+       <groupId>org.bouncycastle</groupId>
+-      <artifactId>bcprov-jdk15</artifactId>
+-      <version>1.45</version>
++      <artifactId>bcprov-jdk16</artifactId>
++      <version>1.46</version>
+     </dependency>
+     <dependency>
+       <groupId>org.apache.poi</groupId>
+@@ -141,8 +141,8 @@
+       </exclusions>
+     </dependency>
+     <dependency>
+-      <groupId>org.apache.geronimo.specs</groupId>
+-      <artifactId>geronimo-stax-api_1.0_spec</artifactId>
++      <groupId>javax.xml.stream</groupId>
++      <artifactId>stax-api</artifactId>
+       <version>1.0.1</version>
+     </dependency>
+     <dependency>
+@@ -152,7 +152,7 @@
+     </dependency>
+     <dependency>
+       <groupId>org.ow2.asm</groupId>
+-      <artifactId>asm-debug-all</artifactId>
++      <artifactId>asm-all</artifactId>
+       <version>4.1</version>
+     </dependency>
+     <dependency>
diff --git a/tika.spec b/tika.spec
new file mode 100644
index 0000000..38636c2
--- /dev/null
+++ b/tika.spec
@@ -0,0 +1,215 @@
+# Conditionals to help breaking tika <-> vorbis-java-tika dependency cycle
+%if 0%{?fedora}
+%bcond_with vorbis_tika
+# Disable only for now
+%bcond_with tika_parsers
+%endif
+
+Name:          tika
+Version:       1.4
+Release:       1%{?dist}
+Summary:       A content analysis toolkit
+License:       ASL 2.0
+Url:           http://tika.apache.org/
+Source0:       http://www.apache.org/dist/tika/%{name}-%{version}-src.zip
+# Fix stax-api gId:aId
+# Replace unavailable org.ow2.asm:asm-debug-all:4.1
+# Replace ant-nodeps with ant
+# Fix bouncycastle aId
+Patch0:        %{name}-1.4-fix-build-deps.patch
+BuildRequires: java-devel
+
+BuildRequires: mvn(biz.aQute:bndlib)
+BuildRequires: mvn(org.apache.ant:ant)
+BuildRequires: mvn(org.osgi:org.osgi.compendium)
+BuildRequires: mvn(org.osgi:org.osgi.core)
+
+%if %{with vorbis_tika}
+BuildRequires: mvn(org.gagravarr:vorbis-java-tika)
+%endif
+
+%if %{with tika_parsers}
+BuildRequires: mvn(com.adobe.xmp:xmpcore)
+BuildRequires: mvn(com.drewnoakes:metadata-extractor:2)
+BuildRequires: mvn(org.gagravarr:vorbis-java-core)
+BuildRequires: mvn(com.googlecode.juniversalchardet:juniversalchardet)
+BuildRequires: mvn(commons-codec:commons-codec)
+BuildRequires: mvn(de.l3s.boilerpipe:boilerpipe)
+BuildRequires: mvn(edu.ucar:netcdf)
+BuildRequires: mvn(javax.xml.stream:stax-api)
+BuildRequires: mvn(org.apache.commons:commons-compress)
+BuildRequires: mvn(org.apache.felix:org.apache.felix.scr.annotations)
+BuildRequires: mvn(org.apache.james:apache-mime4j-core)
+BuildRequires: mvn(org.apache.james:apache-mime4j-dom)
+BuildRequires: mvn(org.apache.james:james-project)
+BuildRequires: mvn(org.apache.pdfbox:pdfbox)
+BuildRequires: mvn(org.apache.poi:poi)
+BuildRequires: mvn(org.apache.poi:poi-scratchpad)
+BuildRequires: mvn(org.apache.poi:poi-ooxml)
+BuildRequires: mvn(org.bouncycastle:bcmail-jdk16)
+BuildRequires: mvn(org.bouncycastle:bcprov-jdk16)
+BuildRequires: mvn(org.ccil.cowan.tagsoup:tagsoup)
+BuildRequires: mvn(org.ow2.asm:asm-all)
+BuildRequires: mvn(rome:rome)
+%endif
+
+%if 0
+# tika-server deps
+BuildRequires: mvn(net.sf.opencsv:opencsv:2.0)
+BuildRequires: mvn(org.apache.cxf:cxf-rt-frontend-jaxrs:2.6.1)
+BuildRequires: mvn(org.apache.cxf:cxf-rt-transports-http-jetty:2.6.1)
+# tika-parser deps
+BuildRequires: mvn(com.googlecode.mp4parser:mp4parser-project:1.0-RC-1)
+BuildRequires: mvn(com.googlecode.mp4parser:isoparser:1.0-RC-1)
+# tika-xmp
+BuildRequires: mvn(org.apache.felix:maven-scr-plugin:1.7.4)
+%endif
+
+# Test deps
+BuildRequires: mvn(junit:junit)
+BuildRequires: mvn(org.mockito:mockito-core)
+BuildRequires: mvn(org.slf4j:slf4j-log4j12)
+
+BuildRequires: maven-local
+BuildRequires: maven-failsafe-plugin
+BuildRequires: maven-plugin-bundle
+BuildRequires: maven-remote-resources-plugin
+BuildRequires: maven-site-plugin
+
+BuildArch:     noarch
+
+%description
+The Apache Tika toolkit detects and extracts meta-data and
+structured text content from various documents using existing
+parser libraries.
+
+%if %{with tika_parsers}
+%package parsers
+Summary:       Apache Tika parsers
+
+%description parsers
+Apache Tika parsers implementation that matches the
+type of the document, once it is known, using
+Mime Type detection.
+
+%package xmp
+Summary:       Apache Tika XMP
+
+%description xmp
+Converts Tika metadata to XMP.
+%endif
+
+%package javadoc
+Summary:       Javadoc for %{name}
+
+%description javadoc
+This package contains javadoc for %{name}.
+
+%prep
+%setup -q
+# Cleanup
+find . -name '*.jar' -delete
+find . -name '*.class' -delete
+# Remove unwanted test resources
+rm -r %{name}-parsers/src/test/resources/test-documents/testLinux-*-*
+rm -r %{name}-parsers/src/test/resources/test-documents/testFreeBSD-*
+rm -r %{name}-parsers/src/test/resources/test-documents/testSolaris-*
+rm -r %{name}-parsers/src/test/resources/test-documents/*.ibooks
+rm -r %{name}-parsers/src/test/resources/test-documents/*.numbers
+rm -r %{name}-parsers/src/test/resources/test-documents/*.pages
+rm -r %{name}-parsers/src/test/resources/test-documents/*.key
+rm -r %{name}-parsers/src/test/resources/test-documents/*.war
+rm -r %{name}-parsers/src/test/resources/test-documents/*.wma
+rm -r %{name}-parsers/src/test/resources/test-documents/*.wmv
+find . -name '*.7z' -delete
+find . -name '*.ar' -delete
+find . -name '*.cpio' -delete
+find . -name '*.ear' -delete
+find . -name '*.exe' -delete
+find . -name '*.mp*' -delete
+find . -name '*.tbz2' -delete
+find . -name '*.tgz' -delete
+find . -name '*.zip' -delete
+%patch0 -p1
+
+%pom_disable_module %{name}-app
+%pom_disable_module %{name}-bundle
+%pom_disable_module %{name}-server
+# Unavailable plugins
+%pom_remove_plugin org.codehaus.mojo:clirr-maven-plugin %{name}-core
+%pom_remove_plugin org.apache.felix:maven-scr-plugin %{name}-xmp
+
+# Require com.drewnoakes:metadata-extractor:2.6.2 and fedora metadata-extractor pkg is too old
+# see https://bugzilla.redhat.com/show_bug.cgi?id=947457
+%pom_xpath_set "pom:project/pom:dependencies/pom:dependency[pom:artifactId='metadata-extractor']/pom:version" 2  %{name}-parsers
+# Disable vorbis-java-tika support, cause circular dependency
+%if %{without vorbis_tika}
+%pom_remove_dep :vorbis-java-tika %{name}-parsers
+%endif
+
+%if %{without tika_parsers}
+%pom_disable_module %{name}-parsers
+%pom_disable_module %{name}-xmp
+%endif
+
+# Unavailable build dep com.googlecode.mp4parser:isoparser
+# MP4 is non-free remove support for it
+%pom_remove_dep com.googlecode.mp4parser:isoparser %{name}-parsers
+rm -r %{name}-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/mp4/MP4ParserTest.java
+
+# This test require network
+rm %{name}-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
+# These test fails for unavailable deps: com.googlecode.mp4parser:isoparser and org.gagravarr:vorbis-java-tika
+rm -r %{name}-parsers/src/test/java/org/apache/tika/parser/mail/RFC822ParserTest.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/mbox/MboxParserTest.java
+rm -r %{name}-parsers/src/test/java/org/apache/tika/detect/TestContainerAwareDetector.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/asm/ClassParserTest.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/pkg/Bzip2ParserTest.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/pkg/GzipParserTest.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/pkg/TarParserTest.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/pkg/ZipParserTest.java
+rm -r %{name}-parsers/src/test/java/org/apache/tika/parser/image/ImageMetadataExtractorTest.java
+# Fails for unavailable test resources
+rm -r %{name}-parsers/src/test/java/org/apache/tika/parser/microsoft/ProjectParserTest.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/mp3/Mp3ParserTest.java \
+ %{name}-parsers/src/test/java/org/apache/tika/mime/TestMimeTypes.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/iwork/IWorkParserTest.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/pkg/ArParserTest.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/executable/ExecutableParserTest.java \
+ %{name}-parsers/src/test/java/org/apache/tika/parser/ibooks/iBooksParserTest.java
+
+%build
+# skip tests for now because there are test failures:
+# Tests which use cglib fail because of incompatibility with asm4
+# Test fails for unavailable build deps: com.googlecode.mp4parser:isoparser
+%mvn_package :%{name} %{name}
+%mvn_package :%{name}-core %{name}
+%mvn_package :%{name}-parent %{name}
+%mvn_build -s -- -Dproject.build.sourceEncoding=UTF-8
+
+%install
+%mvn_install
+
+%files -f .mfiles-%{name}
+%dir %{_javadir}/%{name}
+%doc CHANGES.txt HEADER.txt KEYS LICENSE.txt NOTICE.txt README.txt
+
+%if %{with tika_parsers}
+%files parsers -f .mfiles-%{name}-parsers
+%doc LICENSE.txt NOTICE.txt
+
+%files xmp -f .mfiles-%{name}-xmp
+%doc LICENSE.txt NOTICE.txt
+%endif
+
+%files javadoc -f .mfiles-javadoc
+%doc LICENSE.txt NOTICE.txt
+
+%changelog
+* Thu Aug 29 2013 gil cattaneo <puntogil at libero.it> 1.4-1
+- update to 1.4
+
+* Tue Oct 23 2012 gil cattaneo <puntogil at libero.it> 1.2-1
+- initial rpm
\ No newline at end of file


More information about the scm-commits mailing list