[boilerpipe] Initial import (#1002704).

gil gil at fedoraproject.org
Sat Oct 19 21:50:59 UTC 2013


commit 4207277913f9607d9180793f5ccb6292106ed9b4
Author: gil <puntogil at libero.it>
Date:   Sat Oct 19 23:48:39 2013 +0200

    Initial import (#1002704).

 .gitignore                      |    1 +
 boilerpipe-1.2.0-libdir-patch   |   21 +
 boilerpipe-1.2.0-nekohtml-patch | 2228 +++++++++++++++++++++++++++++++++++++++
 boilerpipe-1.2.0.pom            |   35 +
 boilerpipe.spec                 |  152 +++
 sources                         |    1 +
 6 files changed, 2438 insertions(+), 0 deletions(-)
---
diff --git a/.gitignore b/.gitignore
index e69de29..6741a91 100644
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+/boilerpipe-1.2.0-src.tar.gz
diff --git a/boilerpipe-1.2.0-libdir-patch b/boilerpipe-1.2.0-libdir-patch
new file mode 100644
index 0000000..d4244ab
--- /dev/null
+++ b/boilerpipe-1.2.0-libdir-patch
@@ -0,0 +1,21 @@
+--- build.xml	2011-05-28 16:56:41.000000000 +0200
++++ build.xml-gil	2011-08-15 17:57:57.279492364 +0200
+@@ -53,7 +53,7 @@
+ 
+ 	<property name="build.main" value="${build.dir}/main" />
+ 	<property name="build.demo" value="${build.dir}/demo" />
+-    <property name="lib.dir" value="${app.dir}/lib" />
++    <property name="lib.dir" value="/usr/share/java" />
+ 	<property name="src.main" value="${app.dir}/src/main" />
+ 	<property name="src.demo" value="${app.dir}/src/demo" />
+ 	<property name="dist.dir" value="${app.dir}/dist" />
+@@ -67,7 +67,8 @@
+ 
+    <path id="classpath.libs">
+         <fileset dir="${lib.dir}">
+-            <include name="**/*.jar" />
++            <include name="nekohtml.jar" />
++            <include name="xerces-j2.jar" />
+         </fileset>
+     </path>
+ 
diff --git a/boilerpipe-1.2.0-nekohtml-patch b/boilerpipe-1.2.0-nekohtml-patch
new file mode 100644
index 0000000..5918988
--- /dev/null
+++ b/boilerpipe-1.2.0-nekohtml-patch
@@ -0,0 +1,2228 @@
+diff -Nru boilerpipe-1.2.0/pom.xml boilerpipe-1.2.0-gil/pom.xml
+--- boilerpipe-1.2.0/pom.xml	2013-10-11 11:54:23.418310128 +0200
++++ boilerpipe-1.2.0-gil/pom.xml	2013-10-11 11:51:51.334701196 +0200
+@@ -32,4 +32,13 @@
+       <name>Christian Kohlschütter</name>
+     </developer>
+   </developers>
++
++  <dependencies>
++    <dependency>
++      <groupId>net.sourceforge.nekohtml</groupId>
++      <artifactId>nekohtml</artifactId>
++      <version>1.9.14</version>
++    </dependency>
++  </dependencies>
++
+ </project>
+diff -Nru boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLElements.java boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLElements.java
+--- boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLElements.java	2010-12-16 11:30:06.000000000 +0100
++++ boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLElements.java	1970-01-01 01:00:00.000000000 +0100
+@@ -1,794 +0,0 @@
+-/* 
+- * Copyright 2002-2009 Andy Clark, Marc Guillemot
+- * 
+- * Licensed under the Apache License, Version 2.0 (the "License");
+- * you may not use this file except in compliance with the License.
+- * You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-package org.cyberneko.html;
+-
+-/**
+- * Collection of HTML element information.
+- *
+- * @author Andy Clark
+- * @author Ahmed Ashour
+- * @author Marc Guillemot
+- *
+- * @version $Id: HTMLElements.java,v 1.12 2005/02/14 07:16:59 andyc Exp $
+- */
+-public class HTMLElements {
+-
+-    //
+-    // Constants
+-    //
+-    
+-    // element codes
+-
+-    // NOTE: The element codes *must* start with 0 and increment in
+-    //       sequence. The parent and closes references depends on 
+-    //       this assumption. -Ac
+-
+-    public static final short A = 0;
+-    public static final short ABBR = A+1;
+-    public static final short ACRONYM = ABBR+1;
+-    public static final short ADDRESS = ACRONYM+1;
+-    public static final short APPLET = ADDRESS+1;
+-    public static final short AREA = APPLET+1;
+-    public static final short B = AREA+1;
+-    public static final short BASE = B+1;
+-    public static final short BASEFONT = BASE+1;
+-    public static final short BDO = BASEFONT+1;
+-    public static final short BGSOUND = BDO+1;
+-    public static final short BIG = BGSOUND+1;
+-    public static final short BLINK = BIG+1;
+-    public static final short BLOCKQUOTE = BLINK+1;
+-    public static final short BODY = BLOCKQUOTE+1;
+-    public static final short BR = BODY+1;
+-    public static final short BUTTON = BR+1;
+-    public static final short CAPTION = BUTTON+1;
+-    public static final short CENTER = CAPTION+1;
+-    public static final short CITE = CENTER+1;
+-    public static final short CODE = CITE+1;
+-    public static final short COL = CODE+1;
+-    public static final short COLGROUP = COL+1;
+-    public static final short COMMENT = COLGROUP+1;
+-    public static final short DEL = COMMENT+1;
+-    public static final short DFN = DEL+1;
+-    public static final short DIR = DFN+1;
+-    public static final short DIV = DIR+1;
+-    public static final short DD = DIV+1;
+-    public static final short DL = DD+1;
+-    public static final short DT = DL+1;
+-    public static final short EM = DT+1;
+-    public static final short EMBED = EM+1;
+-    public static final short FIELDSET = EMBED+1;
+-    public static final short FONT = FIELDSET+1;
+-    public static final short FORM = FONT+1;
+-    public static final short FRAME = FORM+1;
+-    public static final short FRAMESET = FRAME+1;
+-    public static final short H1 = FRAMESET+1;
+-    public static final short H2 = H1+1;
+-    public static final short H3 = H2+1;
+-    public static final short H4 = H3+1;
+-    public static final short H5 = H4+1;
+-    public static final short H6 = H5+1;
+-    public static final short HEAD = H6+1;
+-    public static final short HR = HEAD+1;
+-    public static final short HTML = HR+1;
+-    public static final short I = HTML+1;
+-    public static final short IFRAME = I+1;
+-    public static final short ILAYER = IFRAME+1;
+-    public static final short IMG = ILAYER+1;
+-    public static final short INPUT = IMG+1;
+-    public static final short INS = INPUT+1;
+-    public static final short ISINDEX = INS+1;
+-    public static final short KBD = ISINDEX+1;
+-    public static final short KEYGEN = KBD+1;
+-    public static final short LABEL = KEYGEN+1;
+-    public static final short LAYER = LABEL+1;
+-    public static final short LEGEND = LAYER+1;
+-    public static final short LI = LEGEND+1;
+-    public static final short LINK = LI+1;
+-    public static final short LISTING = LINK+1;
+-    public static final short MAP = LISTING+1;
+-    public static final short MARQUEE = MAP+1;
+-    public static final short MENU = MARQUEE+1;
+-    public static final short META = MENU+1;
+-    public static final short MULTICOL = META+1;
+-    public static final short NEXTID = MULTICOL+1;
+-    public static final short NOBR = NEXTID+1;
+-    public static final short NOEMBED = NOBR+1;
+-    public static final short NOFRAMES = NOEMBED+1;
+-    public static final short NOLAYER = NOFRAMES+1;
+-    public static final short NOSCRIPT = NOLAYER+1;
+-    public static final short OBJECT = NOSCRIPT+1;
+-    public static final short OL = OBJECT+1;
+-    public static final short OPTION = OL+1;
+-    public static final short OPTGROUP = OPTION+1;
+-    public static final short P = OPTGROUP+1;
+-    public static final short PARAM = P+1;
+-    public static final short PLAINTEXT = PARAM+1;
+-    public static final short PRE = PLAINTEXT+1;
+-    public static final short Q = PRE+1;
+-    public static final short RB = Q+1;
+-    public static final short RBC = RB+1;
+-    public static final short RP = RBC+1;
+-    public static final short RT = RP+1;
+-    public static final short RTC = RT+1;
+-    public static final short RUBY = RTC+1;
+-    public static final short S = RUBY+1;
+-    public static final short SAMP = S+1;
+-    public static final short SCRIPT = SAMP+1;
+-    public static final short SELECT = SCRIPT+1;
+-    public static final short SMALL = SELECT+1;
+-    public static final short SOUND = SMALL+1;
+-    public static final short SPACER = SOUND+1;
+-    public static final short SPAN = SPACER+1;
+-    public static final short STRIKE = SPAN+1;
+-    public static final short STRONG = STRIKE+1;
+-    public static final short STYLE = STRONG+1;
+-    public static final short SUB = STYLE+1;
+-    public static final short SUP = SUB+1;
+-    public static final short TABLE = SUP+1;
+-    public static final short TBODY = TABLE+1;
+-    public static final short TD = TBODY+1;
+-    public static final short TEXTAREA = TD+1;
+-    public static final short TFOOT = TEXTAREA+1;
+-    public static final short TH = TFOOT+1;
+-    public static final short THEAD = TH+1;
+-    public static final short TITLE = THEAD+1;
+-    public static final short TR = TITLE+1;
+-    public static final short TT = TR+1;
+-    public static final short U = TT+1;
+-    public static final short UL = U+1;
+-    public static final short VAR = UL+1;
+-    public static final short WBR = VAR+1;
+-    public static final short XML = WBR+1;
+-    public static final short XMP = XML+1;
+-    public static final short UNKNOWN = XMP+1;
+-
+-    // information
+-
+-    /** Element information organized by first letter. */
+-    protected static final Element[][] ELEMENTS_ARRAY = new Element[26][];
+-
+-    /** Element information as a contiguous list. */
+-    protected static final ElementList ELEMENTS = new ElementList();
+-
+-    /** No such element. */
+-    public static final Element NO_SUCH_ELEMENT = new Element(UNKNOWN, "",  Element.CONTAINER, new short[]{BODY,HEAD}/*HTML*/, null);
+-
+-    //
+-    // Static initializer
+-    //
+-
+-    /**
+-     * Initializes the element information.
+-     * <p>
+-     * <strong>Note:</strong>
+-     * The <code>getElement</code> method requires that the HTML elements
+-     * are added to the list in alphabetical order. If new elements are
+-     * added, then they <em>must</em> be inserted in alphabetical order.
+-     */
+-    static {
+-        // <!ENTITY % heading "H1|H2|H3|H4|H5|H6">
+-        // <!ENTITY % fontstyle "TT | I | B | BIG | SMALL">
+-        // <!ENTITY % phrase "EM | STRONG | DFN | CODE | SAMP | KBD | VAR | CITE | ABBR | ACRONYM" >
+-        // <!ENTITY % special "A | IMG | OBJECT | BR | SCRIPT | MAP | Q | SUB | SUP | SPAN | BDO">
+-        // <!ENTITY % formctrl "INPUT | SELECT | TEXTAREA | LABEL | BUTTON">
+-        // <!ENTITY % inline "#PCDATA | %fontstyle; | %phrase; | %special; | %formctrl;">
+-        // <!ENTITY % block "P | %heading; | %list; | %preformatted; | DL | DIV | NOSCRIPT | BLOCKQUOTE | FORM | HR | TABLE | FIELDSET | ADDRESS">
+-        // <!ENTITY % flow "%block; | %inline;">
+-
+-        // initialize array of element information
+-        ELEMENTS_ARRAY['A'-'A'] = new Element[] {
+-            // A - - (%inline;)* -(A)
+-            new Element(A, "A", Element.INLINE, BODY, new short[] {A}),
+-            // ABBR - - (%inline;)*
+-            new Element(ABBR, "ABBR", Element.INLINE, BODY, null),
+-            // ACRONYM - - (%inline;)*
+-            new Element(ACRONYM, "ACRONYM", Element.INLINE, BODY, null),
+-            // ADDRESS - - (%inline;)*
+-            new Element(ADDRESS, "ADDRESS", Element.BLOCK, BODY, null),
+-            // APPLET
+-            new Element(APPLET, "APPLET", 0, BODY, null),
+-            // AREA - O EMPTY
+-            new Element(AREA, "AREA", Element.EMPTY, MAP, null),
+-        };
+-        ELEMENTS_ARRAY['B'-'A'] = new Element[] {
+-            // B - - (%inline;)*
+-            new Element(B, "B", Element.INLINE, BODY, null),
+-            // BASE - O EMPTY
+-            new Element(BASE, "BASE", Element.EMPTY, HEAD, null),
+-            // BASEFONT
+-            new Element(BASEFONT, "BASEFONT", 0, HEAD, null),
+-            // BDO - - (%inline;)*
+-            new Element(BDO, "BDO", Element.INLINE, BODY, null),
+-            // BGSOUND
+-            new Element(BGSOUND, "BGSOUND", Element.EMPTY, HEAD, null),
+-            // BIG - - (%inline;)*
+-            new Element(BIG, "BIG", Element.INLINE, BODY, null),
+-            // BLINK
+-            new Element(BLINK, "BLINK", Element.INLINE, BODY, null),
+-            // BLOCKQUOTE - - (%block;|SCRIPT)+
+-            new Element(BLOCKQUOTE, "BLOCKQUOTE", Element.BLOCK, BODY, new short[]{P}),
+-            // BODY O O (%block;|SCRIPT)+ +(INS|DEL)
+-            new Element(BODY, "BODY", Element.CONTAINER, HTML, new short[]{HEAD}),
+-            // BR - O EMPTY
+-            new Element(BR, "BR", Element.EMPTY, BODY, null),
+-            // BUTTON - - (%flow;)* -(A|%formctrl;|FORM|FIELDSET)
+-            new Element(BUTTON, "BUTTON", 0, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['C'-'A'] = new Element[] {
+-            // CAPTION - - (%inline;)*
+-            new Element(CAPTION, "CAPTION", Element.INLINE, TABLE, null),
+-            // CENTER, 
+-            new Element(CENTER, "CENTER", 0, BODY, null),
+-            // CITE - - (%inline;)*
+-            new Element(CITE, "CITE", Element.INLINE, BODY, null),
+-            // CODE - - (%inline;)*
+-            new Element(CODE, "CODE", Element.INLINE, BODY, null),
+-            // COL - O EMPTY
+-            new Element(COL, "COL", Element.EMPTY, TABLE, null),
+-            // COLGROUP - O (COL)*
+-            new Element(COLGROUP, "COLGROUP", 0, TABLE, new short[]{COL,COLGROUP}),
+-            // COMMENT
+-            new Element(COMMENT, "COMMENT", Element.SPECIAL, HTML, null),
+-        };
+-        ELEMENTS_ARRAY['D'-'A'] = new Element[] {
+-            // DEL - - (%flow;)*
+-            new Element(DEL, "DEL", 0, BODY, null),
+-            // DFN - - (%inline;)*
+-            new Element(DFN, "DFN", Element.INLINE, BODY, null),
+-            // DIR
+-            new Element(DIR, "DIR", 0, BODY, null),
+-            // DIV - - (%flow;)*
+-            new Element(DIV, "DIV", Element.BLOCK, BODY, new short[]{P}),
+-            // DD - O (%flow;)*
+-            new Element(DD, "DD", 0, DL, new short[]{DT,DD}),
+-            // DL - - (DT|DD)+
+-            new Element(DL, "DL", Element.BLOCK, BODY, null),
+-            // DT - O (%inline;)*
+-            new Element(DT, "DT", 0, DL, new short[]{DT,DD}),
+-        };
+-        ELEMENTS_ARRAY['E'-'A'] = new Element[] {
+-            // EM - - (%inline;)*
+-            new Element(EM, "EM", Element.INLINE, BODY, null),
+-            // EMBED
+-            new Element(EMBED, "EMBED", 0, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['F'-'A'] = new Element[] {
+-            // FIELDSET - - (#PCDATA,LEGEND,(%flow;)*)
+-            new Element(FIELDSET, "FIELDSET", 0, BODY, null),
+-            // FONT
+-            new Element(FONT, "FONT", Element.CONTAINER, BODY, null),
+-            // FORM - - (%block;|SCRIPT)+ -(FORM)
+-            new Element(FORM, "FORM", Element.CONTAINER, new short[]{BODY,TD,DIV}, new short[]{BUTTON,P}),
+-            // FRAME - O EMPTY
+-            new Element(FRAME, "FRAME", Element.EMPTY, FRAMESET, null),
+-            // FRAMESET - - ((FRAMESET|FRAME)+ & NOFRAMES?)
+-            new Element(FRAMESET, "FRAMESET", 0, HTML, null),
+-        };
+-        ELEMENTS_ARRAY['H'-'A'] = new Element[] {
+-            // (H1|H2|H3|H4|H5|H6) - - (%inline;)*
+-            new Element(H1, "H1", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+-            new Element(H2, "H2", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+-            new Element(H3, "H3", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+-            new Element(H4, "H4", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+-            new Element(H5, "H5", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+-            new Element(H6, "H6", Element.BLOCK, new short[]{BODY,A}, new short[]{H1,H2,H3,H4,H5,H6,P}),
+-            // HEAD O O (%head.content;) +(%head.misc;)
+-            new Element(HEAD, "HEAD", 0, HTML, null),
+-            // HR - O EMPTY
+-            new Element(HR, "HR", Element.EMPTY, BODY, new short[]{P}),
+-            // HTML O O (%html.content;)
+-            new Element(HTML, "HTML", 0, null, null),
+-        };
+-        ELEMENTS_ARRAY['I'-'A'] = new Element[] {
+-            // I - - (%inline;)*
+-            new Element(I, "I", Element.INLINE, BODY, null),
+-            // IFRAME
+-            new Element(IFRAME, "IFRAME", Element.BLOCK, BODY, null),
+-            // ILAYER
+-            new Element(ILAYER, "ILAYER", Element.BLOCK, BODY, null),
+-            // IMG - O EMPTY
+-            new Element(IMG, "IMG", Element.EMPTY, BODY, null),
+-            // INPUT - O EMPTY
+-            new Element(INPUT, "INPUT", Element.EMPTY, BODY, null),
+-            // INS - - (%flow;)*
+-            new Element(INS, "INS", 0, BODY, null),
+-            // ISINDEX
+-            new Element(ISINDEX, "ISINDEX", 0, HEAD, null),
+-        };
+-        ELEMENTS_ARRAY['K'-'A'] = new Element[] {
+-            // KBD - - (%inline;)*
+-            new Element(KBD, "KBD", Element.INLINE, BODY, null),
+-            // KEYGEN
+-            new Element(KEYGEN, "KEYGEN", 0, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['L'-'A'] = new Element[] {
+-            // LABEL - - (%inline;)* -(LABEL)
+-            new Element(LABEL, "LABEL", 0, BODY, null),
+-            // LAYER
+-            new Element(LAYER, "LAYER", Element.BLOCK, BODY, null),
+-            // LEGEND - - (%inline;)*
+-            new Element(LEGEND, "LEGEND", Element.INLINE, FIELDSET, null),
+-            // LI - O (%flow;)*
+-            new Element(LI, "LI", 0, new short[]{BODY,UL,OL}, new short[]{LI}),
+-            // LINK - O EMPTY
+-            new Element(LINK, "LINK", Element.EMPTY, HEAD, null),
+-            // LISTING
+-            new Element(LISTING, "LISTING", 0, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['M'-'A'] = new Element[] {
+-            // MAP - - ((%block;) | AREA)+
+-            new Element(MAP, "MAP", Element.INLINE, BODY, null),
+-            // MARQUEE
+-            new Element(MARQUEE, "MARQUEE", 0, BODY, null),
+-            // MENU
+-            new Element(MENU, "MENU", 0, BODY, null),
+-            // META - O EMPTY
+-            new Element(META, "META", Element.EMPTY, HEAD, new short[]{STYLE,TITLE}),
+-            // MULTICOL
+-            new Element(MULTICOL, "MULTICOL", 0, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['N'-'A'] = new Element[] {
+-            // NEXTID
+-            new Element(NEXTID, "NEXTID", Element.EMPTY, BODY, null),
+-            // NOBR
+-            new Element(NOBR, "NOBR", Element.INLINE, BODY, null),
+-            // NOEMBED
+-            new Element(NOEMBED, "NOEMBED", 0, BODY, null),
+-            // NOFRAMES - - (BODY) -(NOFRAMES)
+-            new Element(NOFRAMES, "NOFRAMES", 0, FRAMESET, null),
+-            // NOLAYER
+-            new Element(NOLAYER, "NOLAYER", 0, BODY, null),
+-            // NOSCRIPT - - (%block;)+
+-            new Element(NOSCRIPT, "NOSCRIPT", 0, new short[]{BODY}, null),
+-        };
+-        ELEMENTS_ARRAY['O'-'A'] = new Element[] {
+-            // OBJECT - - (PARAM | %flow;)*
+-            new Element(OBJECT, "OBJECT", 0, BODY, null),
+-            // OL - - (LI)+
+-            new Element(OL, "OL", Element.BLOCK, BODY, null),
+-            // OPTGROUP - - (OPTION)+
+-            new Element(OPTGROUP, "OPTGROUP", 0, SELECT, new short[]{OPTION}),
+-            // OPTION - O (#PCDATA)
+-            new Element(OPTION, "OPTION", 0, SELECT, new short[]{OPTION}),
+-        };
+-        ELEMENTS_ARRAY['P'-'A'] = new Element[] {
+-            // P - O (%inline;)*
+-            new Element(P, "P", Element.CONTAINER, BODY, new short[]{P}),
+-            // PARAM - O EMPTY
+-            new Element(PARAM, "PARAM", Element.EMPTY, new short[]{OBJECT,APPLET}, null),
+-            // PLAINTEXT
+-            new Element(PLAINTEXT, "PLAINTEXT", Element.SPECIAL, BODY, null),
+-            // PRE - - (%inline;)* -(%pre.exclusion;)
+-            new Element(PRE, "PRE", 0, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['Q'-'A'] = new Element[] {
+-            // Q - - (%inline;)*
+-            new Element(Q, "Q", Element.INLINE, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['R'-'A'] = new Element[] {
+-            // RB
+-            new Element(RB, "RB", Element.INLINE, RUBY, new short[]{RB}),
+-            // RBC
+-            new Element(RBC, "RBC", 0, RUBY, null),
+-            // RP
+-            new Element(RP, "RP", Element.INLINE, RUBY, new short[]{RB}),
+-            // RT
+-            new Element(RT, "RT", Element.INLINE, RUBY, new short[]{RB,RP}),
+-            // RTC
+-            new Element(RTC, "RTC", 0, RUBY, new short[]{RBC}),
+-            // RUBY
+-            new Element(RUBY, "RUBY", 0, BODY, new short[]{RUBY}),
+-        };
+-        ELEMENTS_ARRAY['S'-'A'] = new Element[] {
+-            // S
+-            new Element(S, "S", 0, BODY, null),
+-            // SAMP - - (%inline;)*
+-            new Element(SAMP, "SAMP", Element.INLINE, BODY, null),
+-            // SCRIPT - - %Script;
+-            new Element(SCRIPT, "SCRIPT", Element.SPECIAL, new short[]{HEAD,BODY}, null),
+-            // SELECT - - (OPTGROUP|OPTION)+
+-            new Element(SELECT, "SELECT", Element.CONTAINER, BODY, new short[]{SELECT}),
+-            // SMALL - - (%inline;)*
+-            new Element(SMALL, "SMALL", Element.INLINE, BODY, null),
+-            // SOUND
+-            new Element(SOUND, "SOUND", Element.EMPTY, HEAD, null),
+-            // SPACER
+-            new Element(SPACER, "SPACER", Element.EMPTY, BODY, null),
+-            // SPAN - - (%inline;)*
+-            new Element(SPAN, "SPAN", Element.CONTAINER, BODY, null),
+-            // STRIKE
+-            new Element(STRIKE, "STRIKE", Element.INLINE, BODY, null),
+-            // STRONG - - (%inline;)*
+-            new Element(STRONG, "STRONG", Element.INLINE, BODY, null),
+-            // STYLE - - %StyleSheet;
+-            new Element(STYLE, "STYLE", Element.SPECIAL, new short[]{HEAD,BODY}, new short[]{STYLE,TITLE,META}),
+-            // SUB - - (%inline;)*
+-            new Element(SUB, "SUB", Element.INLINE, BODY, null),
+-            // SUP - - (%inline;)*
+-            new Element(SUP, "SUP", Element.INLINE, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['T'-'A'] = new Element[] {
+-            // TABLE - - (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
+-            new Element(TABLE, "TABLE", Element.BLOCK|Element.CONTAINER, BODY, null),
+-            // TBODY O O (TR)+
+-            new Element(TBODY, "TBODY", 0, TABLE, new short[]{THEAD,TD,TH,TR,COLGROUP}),
+-            // TD - O (%flow;)*
+-            new Element(TD, "TD", Element.CONTAINER, TR, TABLE, new short[]{TD,TH}),
+-            // TEXTAREA - - (#PCDATA)
+-            new Element(TEXTAREA, "TEXTAREA", Element.SPECIAL, BODY, null),
+-            // TFOOT - O (TR)+
+-            new Element(TFOOT, "TFOOT", 0, TABLE, new short[]{THEAD,TBODY,TD,TH,TR}),
+-            // TH - O (%flow;)*
+-            new Element(TH, "TH", Element.CONTAINER, TR, TABLE, new short[]{TD,TH}),
+-            // THEAD - O (TR)+
+-            new Element(THEAD, "THEAD", 0, TABLE, new short[]{COLGROUP}),
+-            // TITLE - - (#PCDATA) -(%head.misc;)
+-            new Element(TITLE, "TITLE", Element.SPECIAL, new short[]{HEAD,BODY}, null),
+-            // TR - O (TH|TD)+
+-            new Element(TR, "TR", Element.BLOCK, new short[]{TBODY, THEAD, TFOOT}, TABLE, new short[]{TD,TH,TR,COLGROUP}),
+-            // TT - - (%inline;)*
+-            new Element(TT, "TT", Element.INLINE, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['U'-'A'] = new Element[] {
+-            // U, 
+-            new Element(U, "U", Element.INLINE, BODY, null),
+-            // UL - - (LI)+
+-            new Element(UL, "UL", Element.BLOCK, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['V'-'A'] = new Element[] {
+-            // VAR - - (%inline;)*
+-            new Element(VAR, "VAR", Element.INLINE, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['W'-'A'] = new Element[] {
+-            // WBR
+-            new Element(WBR, "WBR", Element.EMPTY, BODY, null),
+-        };
+-        ELEMENTS_ARRAY['X'-'A'] = new Element[] {
+-            // XML
+-            new Element(XML, "XML", 0, BODY, null),
+-            // XMP
+-            new Element(XMP, "XMP", Element.SPECIAL, BODY, null),
+-        };
+-
+-        // keep contiguous list of elements for lookups by code
+-        for (int i = 0; i < ELEMENTS_ARRAY.length; i++) {
+-            Element[] elements = ELEMENTS_ARRAY[i];
+-            if (elements != null) {
+-                for (int j = 0; j < elements.length; j++) {
+-                    Element element = elements[j];
+-                    ELEMENTS.addElement(element);
+-                }
+-            }
+-        }
+-        ELEMENTS.addElement(NO_SUCH_ELEMENT);
+-
+-        // initialize cross references to parent elements
+-        for (int i = 0; i < ELEMENTS.size; i++) {
+-            Element element = ELEMENTS.data[i];
+-            if (element.parentCodes != null) {
+-                element.parent = new Element[element.parentCodes.length];
+-                for (int j = 0; j < element.parentCodes.length; j++) {
+-                    element.parent[j] = ELEMENTS.data[element.parentCodes[j]];
+-                }
+-                element.parentCodes = null;
+-            }
+-        }
+-
+-    } // <clinit>()
+-
+-    //
+-    // Public static methods
+-    //
+-
+-    /**
+-     * Returns the element information for the specified element code.
+-     *
+-     * @param code The element code.
+-     */
+-    public static final Element getElement(short code) {
+-        return ELEMENTS.data[code];
+-    } // getElement(short):Element
+-
+-    /**
+-     * Returns the element information for the specified element name.
+-     *
+-     * @param ename The element name.
+-     */
+-    public static final Element getElement(String ename) {
+-        return getElement(ename, NO_SUCH_ELEMENT);
+-    } // getElement(String):Element
+-
+-    /**
+-     * Returns the element information for the specified element name.
+-     *
+-     * @param ename The element name.
+-     * @param element The default element to return if not found.
+-     */
+-    public static final Element getElement(String ename, Element element) {
+-
+-        if (ename.length() > 0) {
+-            int c = ename.charAt(0);
+-            if (c >= 'a' && c <= 'z') {
+-                c = 'A' + c - 'a';
+-            }
+-            if (c >= 'A' && c <= 'Z') {
+-                Element[] elements = ELEMENTS_ARRAY[c - 'A'];
+-                if (elements != null) {
+-                    for (int i = 0; i < elements.length; i++) {
+-                        Element elem = elements[i];
+-                        if (elem.name.equalsIgnoreCase(ename)) {
+-                            return elem;
+-                        }
+-                    }
+-                }
+-            }
+-        }
+-        return element;
+-
+-    } // getElement(String):Element
+-
+-    //
+-    // Classes
+-    //
+-
+-    /**
+-     * Element information.
+-     *
+-     * @author Andy Clark
+-     */
+-    public static class Element {
+-
+-        //
+-        // Constants
+-        //
+-
+-        /** Inline element. */
+-        public static final int INLINE = 0x01;
+-
+-        /** Block element. */
+-        public static final int BLOCK = 0x02;
+-
+-        /** Empty element. */
+-        public static final int EMPTY = 0x04;
+-
+-        /** Container element. */
+-        public static final int CONTAINER = 0x08;
+-
+-        /** Special element. */
+-        public static final int SPECIAL = 0x10;
+-
+-        //
+-        // Data
+-        //
+-
+-        /** The element code. */
+-        public short code;
+-
+-        /** The element name. */
+-        public String name;
+-
+-        /** Informational flags. */
+-        public int flags;
+-
+-        /** Parent elements. */
+-        public short[] parentCodes;
+-
+-        /** Parent elements. */
+-        public Element[] parent;
+-
+-        /** The bounding element code. */
+-        public short bounds;
+-
+-        /** List of elements this element can close. */
+-        public short[] closes;
+-        
+-        /** If set to true, then this element may not be nested, example: "A" **/
+-        boolean nestable = true;
+-
+-        //
+-        // Constructors
+-        //
+-
+-        /** 
+-         * Constructs an element object.
+-         *
+-         * @param code The element code.
+-         * @param name The element name.
+-         * @param flags Informational flags
+-         * @param parent Natural closing parent name.
+-         * @param closes List of elements this element can close.
+-         */
+-        public Element(short code, String name, int flags, 
+-                       short parent, short[] closes) {
+-            this(code, name, flags, new short[]{parent}, (short)-1, closes);
+-        } // <init>(short,String,int,short,short[]);
+-
+-        /** 
+-         * Constructs an element object.
+-         *
+-         * @param code The element code.
+-         * @param name The element name.
+-         * @param flags Informational flags
+-         * @param parent Natural closing parent name.
+-         * @param closes List of elements this element can close.
+-         */
+-        public Element(short code, String name, int flags, 
+-                       short parent, short bounds, short[] closes) {
+-            this(code, name, flags, new short[]{parent}, bounds, closes);
+-        } // <init>(short,String,int,short,short,short[])
+-
+-        /** 
+-         * Constructs an element object.
+-         *
+-         * @param code The element code.
+-         * @param name The element name.
+-         * @param flags Informational flags
+-         * @param parents Natural closing parent names.
+-         * @param closes List of elements this element can close.
+-         */
+-        public Element(short code, String name, int flags, 
+-                       short[] parents, short[] closes) {
+-            this(code, name, flags, parents, (short)-1, closes);
+-        } // <init>(short,String,int,short[],short[])
+-
+-        /** 
+-         * Constructs an element object.
+-         *
+-         * @param code The element code.
+-         * @param name The element name.
+-         * @param flags Informational flags
+-         * @param parents Natural closing parent names.
+-         * @param closes List of elements this element can close.
+-         */
+-        public Element(short code, String name, int flags, 
+-                       short[] parents, short bounds, short[] closes) {
+-            this.code = code;
+-            this.name = name;
+-            this.flags = flags;
+-            this.parentCodes = parents;
+-            this.parent = null;
+-            this.bounds = bounds;
+-            this.closes = closes;
+-            if(closes != null) {
+-                for(int i=0;i<closes.length;i++) {
+-                    if(closes[i] == code) {
+-                        this.nestable = false;
+-                        break;
+-                    }
+-                }
+-            }
+-        } // <init>(short,String,int,short[],short,short[])
+-
+-        //
+-        // Public methods
+-        //
+-
+-        /** Returns true if this element is an inline element. */
+-        public final boolean isInline() {
+-            return (flags & INLINE) != 0;
+-        } // isInline():boolean
+-
+-        /** Returns true if this element is a block element. */
+-        public final boolean isBlock() {
+-            return (flags & BLOCK) != 0;
+-        } // isBlock():boolean
+-
+-        /** Returns true if this element is an empty element. */
+-        public final boolean isEmpty() {
+-            return (flags & EMPTY) != 0;
+-        } // isEmpty():boolean
+-
+-        /** Returns true if this element is a container element. */
+-        public final boolean isContainer() {
+-            return (flags & CONTAINER) != 0;
+-        } // isContainer():boolean
+-
+-        /** 
+-         * Returns true if this element is special -- if its content
+-         * should be parsed ignoring markup.
+-         */
+-        public final boolean isSpecial() {
+-            return (flags & SPECIAL) != 0;
+-        } // isSpecial():boolean
+-
+-        /**
+-         * Returns true if this element can close the specified Element.
+-         *
+-         * @param tag The element.
+-         */
+-        public boolean closes(short tag) {
+-
+-            if (closes != null) {
+-                for (int i = 0; i < closes.length; i++) {
+-                    if (closes[i] == tag) {
+-                        return true;
+-                    }
+-                }
+-            }
+-            return false;
+-
+-        } // closes(short):boolean
+-
+-        //
+-        // Object methods
+-        //
+-
+-        /** Returns a hash code for this object. */
+-        public int hashCode() {
+-            return name.hashCode();
+-        } // hashCode():int
+-
+-        /** Returns true if the objects are equal. */
+-        public boolean equals(Object o) {
+-            return name.equals(o);
+-        } // equals(Object):boolean
+-
+-        /**
+-         * Provides a simple representation to make debugging easier
+-         */
+-        public String toString() {
+-        	return super.toString() + "(name=" + name + ")";
+-        }
+-
+-        /**
+-         * Indicates if the provided element is an accepted parent of current element
+-         * @param element the element to test for "paternity"
+-         * @return <code>true</code> if <code>element</code> belongs to the {@link #parent}
+-         */
+-		public boolean isParent(final Element element) {
+-			if (parent == null)
+-				return false;
+-			else {
+-				for (int i=0; i<parent.length; ++i) {
+-					if (element.code == parent[i].code)
+-						return true;
+-				}
+-			}
+-			return false;
+-		}
+-    } // class Element
+-
+-    /** Unsynchronized list of elements. */
+-    public static class ElementList {
+-
+-        //
+-        // Data
+-        //
+-
+-        /** The size of the list. */
+-        public int size;
+-
+-        /** The data in the list. */
+-        public Element[] data = new Element[120];
+-
+-        //
+-        // Public methods
+-        //
+-
+-        /** Adds an element to list, resizing if necessary. */
+-        public void addElement(Element element) {
+-            if (size == data.length) {
+-                Element[] newarray = new Element[size + 20];
+-                System.arraycopy(data, 0, newarray, 0, size);
+-                data = newarray;
+-            }
+-            data[size++] = element;
+-        } // addElement(Element)
+-
+-    } // class Element
+-
+-} // class HTMLElements
+diff -Nru boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLTagBalancer.java boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLTagBalancer.java
+--- boilerpipe-1.2.0/src/main/org/cyberneko/html/HTMLTagBalancer.java	2010-12-16 11:30:06.000000000 +0100
++++ boilerpipe-1.2.0-gil/src/main/org/cyberneko/html/HTMLTagBalancer.java	1970-01-01 01:00:00.000000000 +0100
+@@ -1,1409 +0,0 @@
+-/* 
+- * Copyright 2002-2009 Andy Clark, Marc Guillemot
+- * 
+- * Licensed under the Apache License, Version 2.0 (the "License");
+- * you may not use this file except in compliance with the License.
+- * You may obtain a copy of the License at
+- *
+- *     http://www.apache.org/licenses/LICENSE-2.0
+- *
+- * Unless required by applicable law or agreed to in writing, software
+- * distributed under the License is distributed on an "AS IS" BASIS,
+- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- * See the License for the specific language governing permissions and
+- * limitations under the License.
+- */
+-
+-package org.cyberneko.html;
+-
+-import java.util.ArrayList;
+-import java.util.List;
+-import org.apache.xerces.util.XMLAttributesImpl;
+-import org.apache.xerces.xni.Augmentations;
+-import org.apache.xerces.xni.NamespaceContext;
+-import org.apache.xerces.xni.QName;
+-import org.apache.xerces.xni.XMLAttributes;
+-import org.apache.xerces.xni.XMLDocumentHandler;
+-import org.apache.xerces.xni.XMLLocator;
+-import org.apache.xerces.xni.XMLResourceIdentifier;
+-import org.apache.xerces.xni.XMLString;
+-import org.apache.xerces.xni.XNIException;
+-import org.apache.xerces.xni.parser.XMLComponentManager;
+-import org.apache.xerces.xni.parser.XMLConfigurationException;
+-import org.apache.xerces.xni.parser.XMLDocumentFilter;
+-import org.apache.xerces.xni.parser.XMLDocumentSource;
+-import org.cyberneko.html.HTMLElements.Element;
+-import org.cyberneko.html.filters.NamespaceBinder;
+-import org.cyberneko.html.xercesbridge.XercesBridge;
+-                      
+-/**
+- * Balances tags in an HTML document. This component receives document events
+- * and tries to correct many common mistakes that human (and computer) HTML
+- * document authors make. This tag balancer can:
+- * <ul>
+- * <li>add missing parent elements;
+- * <li>automatically close elements with optional end tags; and
+- * <li>handle mis-matched inline element tags.
+- * </ul>
+- * <p>
+- * This component recognizes the following features:
+- * <ul>
+- * <li>http://cyberneko.org/html/features/augmentations
+- * <li>http://cyberneko.org/html/features/report-errors
+- * <li>http://cyberneko.org/html/features/balance-tags/document-fragment
+- * <li>http://cyberneko.org/html/features/balance-tags/ignore-outside-content
+- * </ul>
+- * <p>
+- * This component recognizes the following properties:
+- * <ul>
+- * <li>http://cyberneko.org/html/properties/names/elems
+- * <li>http://cyberneko.org/html/properties/names/attrs
+- * <li>http://cyberneko.org/html/properties/error-reporter
+- * <li>http://cyberneko.org/html/properties/balance-tags/current-stack
+- * </ul>
+- *
+- * @see HTMLElements
+- *
+- * @author Andy Clark
+- * @author Marc Guillemot
+- *
+- * @version $Id: HTMLTagBalancer.java,v 1.20 2005/02/14 04:06:22 andyc Exp $
+- */
+-public class HTMLTagBalancer
+-    implements XMLDocumentFilter, HTMLComponent {
+-
+-    //
+-    // Constants
+-    //
+-
+-    // features
+-
+-    /** Namespaces. */
+-    protected static final String NAMESPACES = "http://xml.org/sax/features/namespaces";
+-
+-    /** Include infoset augmentations. */
+-    protected static final String AUGMENTATIONS = "http://cyberneko.org/html/features/augmentations";
+-
+-    /** Report errors. */
+-    protected static final String REPORT_ERRORS = "http://cyberneko.org/html/features/report-errors";
+-
+-    /** Document fragment balancing only (deprecated). */
+-    protected static final String DOCUMENT_FRAGMENT_DEPRECATED = "http://cyberneko.org/html/features/document-fragment";
+-
+-    /** Document fragment balancing only. */
+-    protected static final String DOCUMENT_FRAGMENT = "http://cyberneko.org/html/features/balance-tags/document-fragment";
+-
+-    /** Ignore outside content. */
+-    protected static final String IGNORE_OUTSIDE_CONTENT = "http://cyberneko.org/html/features/balance-tags/ignore-outside-content";
+-
+-    /** Recognized features. */
+-    private static final String[] RECOGNIZED_FEATURES = {
+-        NAMESPACES,
+-        AUGMENTATIONS,
+-        REPORT_ERRORS,
+-        DOCUMENT_FRAGMENT_DEPRECATED,
+-        DOCUMENT_FRAGMENT,
+-        IGNORE_OUTSIDE_CONTENT,
+-    };
+-
+-    /** Recognized features defaults. */
+-    private static final Boolean[] RECOGNIZED_FEATURES_DEFAULTS = {
+-        null,
+-        null,
+-        null,
+-        null,
+-        Boolean.FALSE,
+-        Boolean.FALSE,
+-    };
+-
+-    // properties
+-
+-    /** Modify HTML element names: { "upper", "lower", "default" }. */
+-    protected static final String NAMES_ELEMS = "http://cyberneko.org/html/properties/names/elems";
+-
+-    /** Modify HTML attribute names: { "upper", "lower", "default" }. */
+-    protected static final String NAMES_ATTRS = "http://cyberneko.org/html/properties/names/attrs";
+-    
+-    /** Error reporter. */
+-    protected static final String ERROR_REPORTER = "http://cyberneko.org/html/properties/error-reporter";
+-
+-    /**
+-     * <font color="red">EXPERIMENTAL: may change in next release</font><br/>
+-     * Name of the property holding the stack of elements in which context a document fragment should be parsed.
+-     **/
+-    public static final String FRAGMENT_CONTEXT_STACK = "http://cyberneko.org/html/properties/balance-tags/fragment-context-stack";
+-
+-    /** Recognized properties. */
+-    private static final String[] RECOGNIZED_PROPERTIES = {
+-        NAMES_ELEMS,
+-        NAMES_ATTRS,
+-        ERROR_REPORTER,
+-        FRAGMENT_CONTEXT_STACK,
+-    };
+-
+-    /** Recognized properties defaults. */
+-    private static final Object[] RECOGNIZED_PROPERTIES_DEFAULTS = {
+-        null,
+-        null,
+-        null,
+-        null,
+-    };
+-
+-    // modify HTML names
+-
+-    /** Don't modify HTML names. */
+-    protected static final short NAMES_NO_CHANGE = 0;
+-
+-    /** Match HTML element names. */
+-    protected static final short NAMES_MATCH = 0;
+-
+-    /** Uppercase HTML names. */
+-    protected static final short NAMES_UPPERCASE = 1;
+-
+-    /** Lowercase HTML names. */
+-    protected static final short NAMES_LOWERCASE = 2;
+-
+-    // static vars
+-
+-    /** Synthesized event info item. */
+-    protected static final HTMLEventInfo SYNTHESIZED_ITEM = 
+-        new HTMLEventInfo.SynthesizedItem();
+-
+-    //
+-    // Data
+-    //
+-
+-    // features
+-
+-    /** Namespaces. */
+-    protected boolean fNamespaces;
+-
+-    /** Include infoset augmentations. */
+-    protected boolean fAugmentations;
+-    
+-    /** Report errors. */
+-    protected boolean fReportErrors;
+-
+-    /** Document fragment balancing only. */
+-    protected boolean fDocumentFragment;
+-
+-    /** Ignore outside content. */
+-    protected boolean fIgnoreOutsideContent;
+-
+-    // properties
+-
+-    /** Modify HTML element names. */
+-    protected short fNamesElems;
+-
+-    /** Modify HTML attribute names. */
+-    protected short fNamesAttrs;
+-
+-    /** Error reporter. */
+-    protected HTMLErrorReporter fErrorReporter;
+-
+-    // connections
+-
+-    /** The document source. */
+-    protected XMLDocumentSource fDocumentSource;
+-
+-    /** The document handler. */
+-    protected XMLDocumentHandler fDocumentHandler;
+-
+-    // state
+-
+-    /** The element stack. */
+-    protected final InfoStack fElementStack = new InfoStack();
+-
+-    /** The inline stack. */
+-    protected final InfoStack fInlineStack = new InfoStack();
+-
+-    /** True if seen anything. Important for xml declaration. */
+-    protected boolean fSeenAnything;
+-
+-    /** True if root element has been seen. */
+-    protected boolean fSeenDoctype;
+-
+-    /** True if root element has been seen. */
+-    protected boolean fSeenRootElement;
+-
+-    /** 
+-     * True if seen the end of the document element. In other words, 
+-     * this variable is set to false <em>until</em> the end &lt;/HTML&gt; 
+-     * tag is seen (or synthesized). This is used to ensure that 
+-     * extraneous events after the end of the document element do not 
+-     * make the document stream ill-formed.
+-     */
+-    protected boolean fSeenRootElementEnd;
+-
+-    /** True if seen &lt;head&lt; element. */
+-    protected boolean fSeenHeadElement;
+-
+-    /** True if seen &lt;body&lt; element. */
+-    protected boolean fSeenBodyElement;
+-
+-    /** True if a form is in the stack (allow to discard opening of nested forms) */
+-    protected boolean fOpenedForm;
+-
+-    // temp vars
+-
+-    /** A qualified name. */
+-    private final QName fQName = new QName();
+-
+-    /** Empty attributes. */
+-    private final XMLAttributes fEmptyAttrs = new XMLAttributesImpl();
+-
+-    /** Augmentations. */
+-    private final HTMLAugmentations fInfosetAugs = new HTMLAugmentations();
+-
+-    protected HTMLTagBalancingListener tagBalancingListener;
+-    private LostText lostText_ = new LostText();
+-
+-    private boolean forcedStartElement_ = false;
+-    private boolean forcedEndElement_ = false;
+-
+-    /**
+-     * Stack of elements determining the context in which a document fragment should be parsed
+-     */
+-	private QName[] fragmentContextStack_ = null;
+-	private int fragmentContextStackSize_ = 0; // not 0 only when a fragment is parsed and fragmentContextStack_ is set
+-
+-    private List/*ElementEntry*/ endElementsBuffer_ = new ArrayList(); 
+-
+-    //
+-    // HTMLComponent methods
+-    //
+-
+-    /** Returns the default state for a feature. */
+-    public Boolean getFeatureDefault(String featureId) {
+-        int length = RECOGNIZED_FEATURES != null ? RECOGNIZED_FEATURES.length : 0;
+-        for (int i = 0; i < length; i++) {
+-            if (RECOGNIZED_FEATURES[i].equals(featureId)) {
+-                return RECOGNIZED_FEATURES_DEFAULTS[i];
+-            }
+-        }
+-        return null;
+-    } // getFeatureDefault(String):Boolean
+-
+-    /** Returns the default state for a property. */
+-    public Object getPropertyDefault(String propertyId) {
+-        int length = RECOGNIZED_PROPERTIES != null ? RECOGNIZED_PROPERTIES.length : 0;
+-        for (int i = 0; i < length; i++) {
+-            if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
+-                return RECOGNIZED_PROPERTIES_DEFAULTS[i];
+-            }
+-        }
+-        return null;
+-    } // getPropertyDefault(String):Object
+-
+-    //
+-    // XMLComponent methods
+-    //
+-
+-    /** Returns recognized features. */
+-    public String[] getRecognizedFeatures() {
+-        return RECOGNIZED_FEATURES;
+-    } // getRecognizedFeatures():String[]
+-
+-    /** Returns recognized properties. */
+-    public String[] getRecognizedProperties() {
+-        return RECOGNIZED_PROPERTIES;
+-    } // getRecognizedProperties():String[]
+-
+-    /** Resets the component. */
+-    public void reset(XMLComponentManager manager)
+-        throws XMLConfigurationException {
+-
+-        // get features
+-        fNamespaces = manager.getFeature(NAMESPACES);
+-        fAugmentations = manager.getFeature(AUGMENTATIONS);
+-        fReportErrors = manager.getFeature(REPORT_ERRORS);
+-        fDocumentFragment = manager.getFeature(DOCUMENT_FRAGMENT) ||
+-                            manager.getFeature(DOCUMENT_FRAGMENT_DEPRECATED);
+-        fIgnoreOutsideContent = manager.getFeature(IGNORE_OUTSIDE_CONTENT);
+-
+-        // get properties
+-        fNamesElems = getNamesValue(String.valueOf(manager.getProperty(NAMES_ELEMS)));
+-        fNamesAttrs = getNamesValue(String.valueOf(manager.getProperty(NAMES_ATTRS)));
+-        fErrorReporter = (HTMLErrorReporter)manager.getProperty(ERROR_REPORTER);
+-        
+-        fragmentContextStack_ = (QName[]) manager.getProperty(FRAGMENT_CONTEXT_STACK);
+-
+-    } // reset(XMLComponentManager)
+-
+-    /** Sets a feature. */
+-    public void setFeature(String featureId, boolean state)
+-        throws XMLConfigurationException {
+-
+-        if (featureId.equals(AUGMENTATIONS)) {
+-            fAugmentations = state;
+-            return;
+-        }
+-        if (featureId.equals(REPORT_ERRORS)) {
+-            fReportErrors = state;
+-            return;
+-        }
+-        if (featureId.equals(IGNORE_OUTSIDE_CONTENT)) {
+-            fIgnoreOutsideContent = state;
+-            return;
+-        }
+-
+-    } // setFeature(String,boolean)
+-
+-    /** Sets a property. */
+-    public void setProperty(String propertyId, Object value)
+-        throws XMLConfigurationException {
+-    
+-        if (propertyId.equals(NAMES_ELEMS)) {
+-            fNamesElems = getNamesValue(String.valueOf(value));
+-            return;
+-        }
+-
+-        if (propertyId.equals(NAMES_ATTRS)) {
+-            fNamesAttrs = getNamesValue(String.valueOf(value));
+-            return;
+-        }
+-
+-    } // setProperty(String,Object)
+-
+-    //
+-    // XMLDocumentSource methods
+-    //
+-
+-    /** Sets the document handler. */
+-    public void setDocumentHandler(XMLDocumentHandler handler) {
+-        fDocumentHandler = handler;
+-    } // setDocumentHandler(XMLDocumentHandler)
+-
+-    // @since Xerces 2.1.0
+-
+-    /** Returns the document handler. */
+-    public XMLDocumentHandler getDocumentHandler() {
+-        return fDocumentHandler;
+-    } // getDocumentHandler():XMLDocumentHandler
+-
+-    //
+-    // XMLDocumentHandler methods
+-    //
+-
+-    // since Xerces-J 2.2.0
+-
+-    /** Start document. */
+-    public void startDocument(XMLLocator locator, String encoding, 
+-                              NamespaceContext nscontext, Augmentations augs) 
+-        throws XNIException {
+-
+-        // reset state
+-        fElementStack.top = 0;
+-        if (fragmentContextStack_ != null) {
+-        	fragmentContextStackSize_ = fragmentContextStack_.length;
+-        	for (int i=0; i<fragmentContextStack_.length; ++i) {
+-        		final QName name = fragmentContextStack_[i];
+-            	final Element elt = HTMLElements.getElement(name.localpart);
+-            	fElementStack.push(new Info(elt, name));
+-        	}
+-        	
+-        }
+-        else {
+-        	fragmentContextStackSize_ = 0;
+-        }
+-        fSeenAnything = false;
+-        fSeenDoctype = false;
+-        fSeenRootElement = false;
+-        fSeenRootElementEnd = false;
+-        fSeenHeadElement = false;
+-        fSeenBodyElement = false;
+-        
+-
+-        // pass on event
+-        if (fDocumentHandler != null) {
+-        	XercesBridge.getInstance().XMLDocumentHandler_startDocument(fDocumentHandler, locator, encoding, nscontext, augs);
+-        }
+-    
+-    } // startDocument(XMLLocator,String,Augmentations)
+-
+-    // old methods
+-
+-    /** XML declaration. */
+-    public void xmlDecl(String version, String encoding, String standalone,
+-                        Augmentations augs) throws XNIException {
+-        if (!fSeenAnything && fDocumentHandler != null) {
+-            fDocumentHandler.xmlDecl(version, encoding, standalone, augs);
+-        }
+-    } // xmlDecl(String,String,String,Augmentations)
+-
+-    /** Doctype declaration. */
+-    public void doctypeDecl(String rootElementName, String publicId, String systemId,
+-                            Augmentations augs) throws XNIException {
+-        fSeenAnything = true;
+-        if (fReportErrors) {
+-            if (fSeenRootElement) {
+-                fErrorReporter.reportError("HTML2010", null);
+-            }
+-            else if (fSeenDoctype) {
+-                fErrorReporter.reportError("HTML2011", null);
+-            }
+-        }
+-        if (!fSeenRootElement && !fSeenDoctype) {
+-            fSeenDoctype = true;
+-            if (fDocumentHandler != null) {
+-                fDocumentHandler.doctypeDecl(rootElementName, publicId, systemId, augs);
+-            }
+-        }
+-    } // doctypeDecl(String,String,String,Augmentations)
+-
+-    /** End document. */
+-    public void endDocument(Augmentations augs) throws XNIException {
+-
+-    	// </body> and </html> have been buffered to consider outside content
+-    	fIgnoreOutsideContent = true; // endElement should not ignore the elements passed from buffer
+-    	consumeBufferedEndElements();
+-    	
+-        // handle empty document
+-        if (!fSeenRootElement && !fDocumentFragment) {
+-            if (fReportErrors) {
+-                fErrorReporter.reportError("HTML2000", null);
+-            }
+-            if (fDocumentHandler != null) {
+-            	fSeenRootElementEnd = false;
+-                forceStartBody(); // will force <html> and <head></head>
+-                final String body = modifyName("body", fNamesElems);
+-                fQName.setValues(null, body, body, null);
+-                callEndElement(fQName, synthesizedAugs());
+-
+-                final String ename = modifyName("html", fNamesElems);
+-                fQName.setValues(null, ename, ename, null);
+-                callEndElement(fQName, synthesizedAugs());
+-            }
+-        }
+-
+-        // pop all remaining elements
+-        else {
+-            int length = fElementStack.top - fragmentContextStackSize_;
+-            for (int i = 0; i < length; i++) {
+-                Info info = fElementStack.pop();
+-                if (fReportErrors) {
+-                    String ename = info.qname.rawname;
+-                    fErrorReporter.reportWarning("HTML2001", new Object[]{ename});
+-                }
+-                if (fDocumentHandler != null) {
+-                    callEndElement(info.qname, synthesizedAugs());
+-                }
+-            }
+-        }
+-
+-        // call handler
+-        if (fDocumentHandler != null) {
+-            fDocumentHandler.endDocument(augs);
+-        }
+-
+-    } // endDocument(Augmentations)
+-
+-    /**
+-     * Consume elements that have been buffered, like </body></html> that are first consumed
+-     * at the end of document
+-     */
+-	private void consumeBufferedEndElements() {
+-		final List toConsume = new ArrayList(endElementsBuffer_);
+-		endElementsBuffer_.clear();
+-		for (int i=0; i<toConsume.size(); ++i) {
+-    		final ElementEntry entry = (ElementEntry) toConsume.get(i);
+-    		forcedEndElement_ = true;
+-        	endElement(entry.name_, entry.augs_);
+-    	}
+-		endElementsBuffer_.clear();
+-	}
+-
+-    /** Comment. */
+-    public void comment(XMLString text, Augmentations augs) throws XNIException {
+-        fSeenAnything = true;
+-        consumeEarlyTextIfNeeded();
+-        if (fDocumentHandler != null) {
+-            fDocumentHandler.comment(text, augs);
+-        }
+-    } // comment(XMLString,Augmentations)
+-
+-	private void consumeEarlyTextIfNeeded() {
+-		if (!lostText_.isEmpty()) {
+-        	if (!fSeenBodyElement) {
+-        		forceStartBody();
+-        	}
+-            lostText_.refeed(this);
+-        }
+-	}
+-
+-    /** Processing instruction. */
+-    public void processingInstruction(String target, XMLString data,
+-                                      Augmentations augs) throws XNIException {
+-        fSeenAnything = true;
+-        consumeEarlyTextIfNeeded();
+-        if (fDocumentHandler != null) {
+-            fDocumentHandler.processingInstruction(target, data, augs);
+-        }
+-    } // processingInstruction(String,XMLString,Augmentations)
+-
+-    /** Start element. */
+-    public void startElement(final QName elem, XMLAttributes attrs, final Augmentations augs)
+-        throws XNIException {
+-        fSeenAnything = true;
+-        
+-        final boolean isForcedCreation = forcedStartElement_;
+-        forcedStartElement_ = false;
+-
+-        // check for end of document
+-        if (fSeenRootElementEnd) {
+-        	notifyDiscardedStartElement(elem, attrs, augs);
+-            return;
+-        }
+-
+-        // get element information
+-        final HTMLElements.Element element = getElement(elem);
+-        final short elementCode = element.code;
+-
+-        // the creation of some elements like TABLE or SELECT can't be forced. Any others? 
+-        if (isForcedCreation && (elementCode == HTMLElements.TABLE || elementCode == HTMLElements.SELECT)) {
+-        	return; // don't accept creation
+-        }
+-
+-        // ignore multiple html, head, body elements
+-		if (fSeenRootElement && elementCode == HTMLElements.HTML) {
+-        	notifyDiscardedStartElement(elem, attrs, augs);
+-            return;
+-        }
+-        if (elementCode == HTMLElements.HEAD) {
+-            if (fSeenHeadElement) {
+-            	notifyDiscardedStartElement(elem, attrs, augs);
+-                return;
+-            }
+-            fSeenHeadElement = true;
+-        }
+-        else if (elementCode == HTMLElements.FRAMESET) {
+-        	consumeBufferedEndElements(); // </head> (if any) has been buffered
+-        }
+-        else if (elementCode == HTMLElements.BODY) {
+-    		// create <head></head> if none was present
+-    		if (!fSeenHeadElement) {
+-    			final QName head = createQName("head");
+-    			forceStartElement(head, null, synthesizedAugs());
+-    			endElement(head, synthesizedAugs());
+-    		}
+-        	consumeBufferedEndElements(); // </head> (if any) has been buffered
+-    		
+-            if (fSeenBodyElement) {
+-            	notifyDiscardedStartElement(elem, attrs, augs);
+-                return;
+-            }
+-            fSeenBodyElement = true;
+-        }
+-        else if (elementCode == HTMLElements.FORM) {
+-        	if (fOpenedForm) {
+-            	notifyDiscardedStartElement(elem, attrs, augs);
+-        		return;
+-        	}
+-        	fOpenedForm = true;
+-        }
+-        else if (elementCode == HTMLElements.UNKNOWN) {
+-        	consumeBufferedEndElements();
+-        }
+-
+-        // check proper parent
+-        if (element.parent != null) {
+-        	if (!fSeenRootElement && !fDocumentFragment) {
+-                String pname = element.parent[0].name;
+-                pname = modifyName(pname, fNamesElems);
+-                if (fReportErrors) {
+-                    String ename = elem.rawname;
+-                    fErrorReporter.reportWarning("HTML2002", new Object[]{ename,pname});
+-                }
+-                final QName qname = new QName(null, pname, pname, null);
+-                final boolean parentCreated = forceStartElement(qname, null, synthesizedAugs());
+-                if (!parentCreated) {
+-                	if (!isForcedCreation) {
+-                		notifyDiscardedStartElement(elem, attrs, augs);
+-                	}
+-            		return;
+-                }
+-            }
+-        	else {
+-                HTMLElements.Element preferedParent = element.parent[0];
+-                if (preferedParent.code != HTMLElements.HEAD || (!fSeenBodyElement && !fDocumentFragment)) {
+-                    int depth = getParentDepth(element.parent, element.bounds);
+-                    if (depth == -1) { // no parent found
+-                        final String pname = modifyName(preferedParent.name, fNamesElems);
+-                        final QName qname = new QName(null, pname, pname, null);
+-                        if (fReportErrors) {
+-                            String ename = elem.rawname;
+-                            fErrorReporter.reportWarning("HTML2004", new Object[]{ename,pname});
+-                        }
+-                        final boolean parentCreated = forceStartElement(qname, null, synthesizedAugs());
+-                        if (!parentCreated) {
+-                        	if (!isForcedCreation) {
+-                        		notifyDiscardedStartElement(elem, attrs, augs);
+-                        	}
+-                    		return;
+-                        }
+-                    }
+-                }
+-            }
+-        }
+-
+-        // if block element, save immediate parent inline elements
+-        int depth = 0;
+-        if (element.flags == 0) {
+-            int length = fElementStack.top;
+-            fInlineStack.top = 0;
+-            for (int i = length - 1; i >= 0; i--) {
+-                Info info = fElementStack.data[i];
+-                if (!info.element.isInline()) {
+-                    break;
+-                }
+-                fInlineStack.push(info);
+-                endElement(info.qname, synthesizedAugs());
+-            }
+-            depth = fInlineStack.top;
+-        }
+-
+-        // close previous elements
+-        // all elements close a <script>
+-        // in head, no element has children
+-        if ((fElementStack.top > 1 
+-        		&& (fElementStack.peek().element.code == HTMLElements.SCRIPT))
+-        		|| fElementStack.top > 2 && fElementStack.data[fElementStack.top-2].element.code == HTMLElements.HEAD) {
+-            final Info info = fElementStack.pop();
+-            if (fDocumentHandler != null) {
+-                callEndElement(info.qname, synthesizedAugs());
+-            }
+-        }
+-        if (element.closes != null) {
+-            int length = fElementStack.top;
+-            for (int i = length - 1; i >= 0; i--) {
+-                Info info = fElementStack.data[i];
+-
+-                // does it close the element we're looking at?
+-                if (element.closes(info.element.code)) {
+-                    if (fReportErrors) {
+-                        String ename = elem.rawname;
+-                        String iname = info.qname.rawname;
+-                        fErrorReporter.reportWarning("HTML2005", new Object[]{ename,iname});
+-                    }
+-                    for (int j = length - 1; j >= i; j--) {
+-                        info = fElementStack.pop();
+-                        if (fDocumentHandler != null) {
+-                            // PATCH: Marc-Andr� Morissette
+-                            callEndElement(info.qname, synthesizedAugs());
+-                        }
+-                    }
+-                    length = i;
+-                    continue;
+-                }
+-                
+-                // should we stop searching?
+-                if(element.nestable) {
+-                    if (info.element.isBlock() || element.isParent(info.element)) {
+-                    	break;
+-                    }
+-                }
+-            }
+-        }
+-        // TODO: investigate if only table is special here
+-        // table closes all opened inline elements
+-        else if (elementCode == HTMLElements.TABLE) {
+-            for (int i=fElementStack.top-1; i >= 0; i--) {
+-                final Info info = fElementStack.data[i];
+-                if (!info.element.isInline()) {
+-                    break;
+-                }
+-                endElement(info.qname, synthesizedAugs());
+-            }
+-        }
+-
+-        // call handler
+-        fSeenRootElement = true;
+-        if (element != null && element.isEmpty()) {
+-            if (attrs == null) {
+-                attrs = emptyAttributes();
+-            }
+-            if (fDocumentHandler != null) {
+-                fDocumentHandler.emptyElement(elem, attrs, augs);
+-            }
+-        }
+-        else {
+-            boolean inline = element != null && element.isInline();
+-            fElementStack.push(new Info(element, elem, inline ? attrs : null));
+-            if (attrs == null) {
+-                attrs = emptyAttributes();
+-            }
+-            if (fDocumentHandler != null) {
+-                callStartElement(elem, attrs, augs);
+-            }
+-        }
+-
+-        // re-open inline elements
+-        for (int i = 0; i < depth; i++) {
+-            Info info = fInlineStack.pop();
+-            forceStartElement(info.qname, info.attributes, synthesizedAugs());
+-        }
+-
+-        if (elementCode == HTMLElements.BODY) {
+-        	lostText_.refeed(this);
+-        }
+-    } // startElement(QName,XMLAttributes,Augmentations)
+-
+-    /**
+-     * Forces an element start, taking care to set the information to allow startElement to "see" that's
+-     * the element has been forced.
+-     * @return <code>true</code> if creation could be done (TABLE's creation for instance can't be forced)
+-     */
+-    private boolean forceStartElement(final QName elem, XMLAttributes attrs, final Augmentations augs)
+-    throws XNIException {
+-    	
+-    	forcedStartElement_ = true;
+-    	startElement(elem, attrs, augs);
+-    	
+-    	return fElementStack.top > 0 && elem.equals(fElementStack.peek().qname);
+-    }
+-
+-    private QName createQName(String tagName) {
+-		tagName = modifyName(tagName, fNamesElems);
+-		return new QName(null, tagName, tagName, NamespaceBinder.XHTML_1_0_URI);
+-	}
+-
+-	/** Empty element. */
+-    public void emptyElement(final QName element, XMLAttributes attrs, Augmentations augs)
+-        throws XNIException {
+-    	startElement(element, attrs, augs);
+-        // browser ignore the closing indication for non empty tags like <form .../> but not for unknown element
+-        final HTMLElements.Element elem = getElement(element);
+-        if (elem.isEmpty() || elem.code == HTMLElements.UNKNOWN) {
+-        	endElement(element, augs);
+-        }
+-    } // emptyElement(QName,XMLAttributes,Augmentations)
+-
+-	/** Start entity. */
+-    public void startGeneralEntity(String name, 
+-                                   XMLResourceIdentifier id,
+-                                   String encoding,
+-                                   Augmentations augs) throws XNIException {
+-        fSeenAnything = true;
+-
+-        // check for end of document
+-        if (fSeenRootElementEnd) {
+-            return;
+-        }
+-
+-        // insert body, if needed
+-        if (!fDocumentFragment) {
+-            boolean insertBody = !fSeenRootElement;
+-            if (!insertBody) {
+-                Info info = fElementStack.peek();
+-                if (info.element.code == HTMLElements.HEAD ||
+-                    info.element.code == HTMLElements.HTML) {
+-                    String hname = modifyName("head", fNamesElems);
+-                    String bname = modifyName("body", fNamesElems);
+-                    if (fReportErrors) {
+-                        fErrorReporter.reportWarning("HTML2009", new Object[]{hname,bname});
+-                    }
+-                    fQName.setValues(null, hname, hname, null);
+-                    endElement(fQName, synthesizedAugs());
+-                    insertBody = true;
+-                }
+-            }
+-            if (insertBody) {
+-                forceStartBody();
+-            }
+-        }
+-        
+-        // call handler
+-        if (fDocumentHandler != null) {
+-            fDocumentHandler.startGeneralEntity(name, id, encoding, augs);
+-        }
+-
+-    } // startGeneralEntity(String,XMLResourceIdentifier,String,Augmentations)
+-
+-    /**
+-     * Generates a missing <body> (which creates missing <head> when needed)
+-     */
+-	private void forceStartBody() {
+-		final QName body = createQName("body");
+-		if (fReportErrors) {
+-		    fErrorReporter.reportWarning("HTML2006", new Object[]{body.localpart});
+-		}
+-		forceStartElement(body, null, synthesizedAugs());
+-	}
+-
+-    /** Text declaration. */
+-    public void textDecl(String version, String encoding, Augmentations augs)
+-        throws XNIException {
+-        fSeenAnything = true;
+-        
+-        // check for end of document
+-        if (fSeenRootElementEnd) {
+-            return;
+-        }
+-
+-        // call handler
+-        if (fDocumentHandler != null) {
+-            fDocumentHandler.textDecl(version, encoding, augs);
+-        }
+-
+-    } // textDecl(String,String,Augmentations)
+-
+-    /** End entity. */
+-    public void endGeneralEntity(String name, Augmentations augs) throws XNIException {
+-        
+-        // check for end of document
+-        if (fSeenRootElementEnd) {
+-            return;
+-        }
+-
+-        // call handler
+-        if (fDocumentHandler != null) {
+-            fDocumentHandler.endGeneralEntity(name, augs);
+-        }
+-
+-    } // endGeneralEntity(String,Augmentations)
+-
+-    /** Start CDATA section. */
+-    public void startCDATA(Augmentations augs) throws XNIException {
+-        fSeenAnything = true;
+-        
+-        consumeEarlyTextIfNeeded();
+-
+-        // check for end of document
+-        if (fSeenRootElementEnd) {
+-            return;
+-        }
+-
+-        // call handler
+-        if (fDocumentHandler != null) {
+-            fDocumentHandler.startCDATA(augs);
+-        }
+-
+-    } // startCDATA(Augmentations)
+-
+-    /** End CDATA section. */
+-    public void endCDATA(Augmentations augs) throws XNIException {
+-
+-        // check for end of document
+-        if (fSeenRootElementEnd) {
+-            return;
+-        }
+-
+-        // call handler
+-        if (fDocumentHandler != null) {
+-            fDocumentHandler.endCDATA(augs);
+-        }
+-
+-    } // endCDATA(Augmentations)
+-
+-    /** Characters. */
+-    public void characters(final XMLString text, final Augmentations augs) throws XNIException {
+-        // check for end of document
+-        if (fSeenRootElementEnd) {
+-            return;
+-        }
+-
+-    	if (fElementStack.top == 0 && !fDocumentFragment) {
+-    		// character before first opening tag
+-    		lostText_.add(text, augs);
+-    		return;
+-    	}
+-
+-        // is this text whitespace?
+-        boolean whitespace = true;
+-        for (int i = 0; i < text.length; i++) {
+-            if (!Character.isWhitespace(text.ch[text.offset + i])) {
+-                whitespace = false;
+-                break;
+-            }
+-        }
+-
+-        if (!fDocumentFragment) {
+-            // handle bare characters
+-            if (!fSeenRootElement) {
+-                if (whitespace) {
+-                    return;
+-                }
+-                forceStartBody();
+-            }
+-            
+-            if (whitespace && (fElementStack.top < 2 || endElementsBuffer_.size() == 1)) {
+-            	// ignore spaces directly within <html>
+-            	return;
+-            }
+-
+-            // handle character content in head
+-            // NOTE: This frequently happens when the document looks like:
+-            //       <title>Title</title>
+-            //       And here's some text.
+-            else if (!whitespace) {
+-                Info info = fElementStack.peek();
+-                if (info.element.code == HTMLElements.HEAD ||
+-                    info.element.code == HTMLElements.HTML) {
+-                    String hname = modifyName("head", fNamesElems);
+-                    String bname = modifyName("body", fNamesElems);
+-                    if (fReportErrors) {
+-                        fErrorReporter.reportWarning("HTML2009", new Object[]{hname,bname});
+-                    }
+-                    forceStartBody();
+-                }
+-            }
+-        }
+-
+-        // call handler
+-        if (fDocumentHandler != null) {
+-            fDocumentHandler.characters(text, augs);
+-        }
+-
+-    } // characters(XMLString,Augmentations)
+-
+-    /** Ignorable whitespace. */
+-    public void ignorableWhitespace(XMLString text, Augmentations augs)
+-        throws XNIException {
+-        characters(text, augs);
+-    } // ignorableWhitespace(XMLString,Augmentations)
+-    
+-    /** End element. */
+-    public void endElement(final QName element, final Augmentations augs) throws XNIException {
+-    	final boolean forcedEndElement = forcedEndElement_;
+-        // is there anything to do?
+-        if (fSeenRootElementEnd) {
+-        	notifyDiscardedEndElement(element, augs);
+-            return;
+-        }
+-        
+-        // get element information
+-        HTMLElements.Element elem = getElement(element);
+-
+-        // if we consider outside content, just buffer </body> and </html> to consider them at the very end
+-        if (!fIgnoreOutsideContent &&
+-            (elem.code == HTMLElements.BODY || elem.code == HTMLElements.HTML)) {
+-        	endElementsBuffer_.add(new ElementEntry(element, augs));
+-            return;
+-        }
+-
+-        // check for end of document
+-        if (elem.code == HTMLElements.HTML) {
+-            fSeenRootElementEnd = true;
+-        }
+-        else if (elem.code == HTMLElements.FORM) {
+-        	fOpenedForm = false;
+-        }
+-        else if (elem.code == HTMLElements.HEAD && !forcedEndElement) {
+-        	// consume </head> first when <body> is reached to retrieve content lost between </head> and <body>
+-        	endElementsBuffer_.add(new ElementEntry(element, augs));
+-        	return;
+-        }
+-        
+-
+-        // empty element
+-        int depth = getElementDepth(elem);
+-        if (depth == -1) {
+-        	if (elem.code == HTMLElements.P) {
+-        		forceStartElement(element, emptyAttributes(), synthesizedAugs());
+-	            endElement(element, augs);
+-        	}
+-        	else if (!elem.isEmpty()) {
+-            	notifyDiscardedEndElement(element, augs);
+-        	}
+-            return;
+-        }
+-
+-        // find unbalanced inline elements
+-        if (depth > 1 && elem.isInline()) {
+-            final int size = fElementStack.top;
+-            fInlineStack.top = 0;
+-            for (int i = 0; i < depth - 1; i++) {
+-                final Info info = fElementStack.data[size - i - 1];
+-                final HTMLElements.Element pelem = info.element;
+-                
+-                if (pelem.isInline() || pelem.code == HTMLElements.FONT) { // TODO: investigate if only FONT
+-                    // NOTE: I don't have to make a copy of the info because
+-                    //       it will just be popped off of the element stack
+-                    //       as soon as we close it, anyway.
+-                    fInlineStack.push(info);
+-                }
+-            }
+-        }
+-
+-        // close children up to appropriate element
+-        for (int i = 0; i < depth; i++) {
+-            Info info = fElementStack.pop();
+-            
+-            if (fReportErrors && i < depth - 1) {
+-                String ename = modifyName(element.rawname, fNamesElems);
+-                String iname = info.qname.rawname;
+-                fErrorReporter.reportWarning("HTML2007", new Object[]{ename,iname});
+-            }
+-            if (fDocumentHandler != null) {
+-                // PATCH: Marc-Andr\u00e8 Morissette
+-                callEndElement(info.qname, i < depth - 1 ? synthesizedAugs() : augs);
+-            }
+-        }
+-
+-        // re-open inline elements
+-        if (depth > 1) {
+-            int size = fInlineStack.top;
+-            for (int i = 0; i < size; i++) {
+-                Info info = (Info)fInlineStack.pop();
+-                XMLAttributes attributes = info.attributes;
+-                if (fReportErrors) {
+-                    String iname = info.qname.rawname;
+-                    fErrorReporter.reportWarning("HTML2008", new Object[]{iname});
+-                }
+-                forceStartElement(info.qname, attributes, synthesizedAugs());
+-            }
+-        }
+-
+-    } // endElement(QName,Augmentations)
+-
+-    // @since Xerces 2.1.0
+-
+-	/** Sets the document source. */
+-    public void setDocumentSource(XMLDocumentSource source) {
+-        fDocumentSource = source;
+-    } // setDocumentSource(XMLDocumentSource)
+-
+-    /** Returns the document source. */
+-    public XMLDocumentSource getDocumentSource() {
+-        return fDocumentSource;
+-    } // getDocumentSource():XMLDocumentSource
+-
+-    // removed since Xerces-J 2.3.0
+-
+-    /** Start document. */
+-    public void startDocument(XMLLocator locator, String encoding, Augmentations augs) 
+-        throws XNIException {
+-        startDocument(locator, encoding, null, augs);
+-    } // startDocument(XMLLocator,String,Augmentations)
+-
+-    /** Start prefix mapping. */
+-    public void startPrefixMapping(String prefix, String uri, Augmentations augs)
+-        throws XNIException {
+-        
+-        // check for end of document
+-        if (fSeenRootElementEnd) {
+-            return;
+-        }
+-
+-        // call handler
+-        if (fDocumentHandler != null) {
+-        	XercesBridge.getInstance().XMLDocumentHandler_startPrefixMapping(fDocumentHandler, prefix, uri, augs);
+-        }
+-    
+-    } // startPrefixMapping(String,String,Augmentations)
+-
+-    /** End prefix mapping. */
+-    public void endPrefixMapping(String prefix, Augmentations augs)
+-        throws XNIException {
+-        
+-        // check for end of document
+-        if (fSeenRootElementEnd) {
+-            return;
+-        }
+-
+-        // call handler
+-        if (fDocumentHandler != null) {
+-        	XercesBridge.getInstance().XMLDocumentHandler_endPrefixMapping(fDocumentHandler, prefix, augs);
+-        }
+-    
+-    } // endPrefixMapping(String,Augmentations)
+-
+-    //
+-    // Protected methods
+-    //
+-
+-    /** Returns an HTML element. */
+-    protected HTMLElements.Element getElement(final QName elementName) {
+-    	String name = elementName.rawname;
+-        if (fNamespaces && NamespaceBinder.XHTML_1_0_URI.equals(elementName.uri)) {
+-            int index = name.indexOf(':');
+-            if (index != -1) {
+-                name = name.substring(index+1);
+-            }
+-        }
+-        return HTMLElements.getElement(name);
+-    } // getElement(String):HTMLElements.Element
+-
+-    /** Call document handler start element. */
+-    protected final void callStartElement(QName element, XMLAttributes attrs,
+-                                          Augmentations augs) 
+-        throws XNIException {
+-        fDocumentHandler.startElement(element, attrs, augs);
+-    } // callStartElement(QName,XMLAttributes,Augmentations)
+-
+-    /** Call document handler end element. */
+-    protected final void callEndElement(QName element, Augmentations augs) 
+-        throws XNIException {
+-        fDocumentHandler.endElement(element, augs);
+-    } // callEndElement(QName,Augmentations)
+-
+-    /**
+-     * Returns the depth of the open tag associated with the specified
+-     * element name or -1 if no matching element is found.
+-     *
+-     * @param element The element.
+-     */
+-    protected final int getElementDepth(HTMLElements.Element element) {
+-        final boolean container = element.isContainer();
+-        int depth = -1;
+-        for (int i = fElementStack.top - 1; i >=fragmentContextStackSize_; i--) {
+-            Info info = fElementStack.data[i];
+-            if (info.element.code == element.code) {
+-                depth = fElementStack.top - i;
+-                break;
+-            }
+-            if (!container && (element.nestable && info.element.isBlock())) {
+-                break;
+-            }
+-        }
+-        return depth;
+-    } // getElementDepth(HTMLElements.Element)
+-
+-    /**
+-     * Returns the depth of the open tag associated with the specified
+-     * element parent names or -1 if no matching element is found.
+-     *
+-     * @param parents The parent elements.
+-     */
+-    protected int getParentDepth(HTMLElements.Element[] parents, short bounds) {
+-        if (parents != null) {
+-            for (int i = fElementStack.top - 1; i >= 0; i--) {
+-                Info info = fElementStack.data[i];
+-                if (info.element.code == bounds) {
+-                    break;
+-                }
+-                for (int j = 0; j < parents.length; j++) {
+-                    if (info.element.code == parents[j].code) {
+-                        return fElementStack.top - i;
+-                    }
+-                }
+-            }
+-        }
+-        return -1;
+-    } // getParentDepth(HTMLElements.Element[],short):int
+-
+-    /** Returns a set of empty attributes. */
+-    protected final XMLAttributes emptyAttributes() {
+-        fEmptyAttrs.removeAllAttributes();
+-        return fEmptyAttrs;
+-    } // emptyAttributes():XMLAttributes
+-
+-    /** Returns an augmentations object with a synthesized item added. */
+-    protected final Augmentations synthesizedAugs() {
+-        HTMLAugmentations augs = null;
+-        if (fAugmentations) {
+-            augs = fInfosetAugs;
+-            augs.removeAllItems();
+-            augs.putItem(AUGMENTATIONS, SYNTHESIZED_ITEM);
+-        }
+-        return augs;
+-    } // synthesizedAugs():Augmentations
+-
+-    //
+-    // Protected static methods
+-    //
+-
+-    /** Modifies the given name based on the specified mode. */
+-    protected static final String modifyName(String name, short mode) {
+-        switch (mode) {
+-            case NAMES_UPPERCASE: return name.toUpperCase();
+-            case NAMES_LOWERCASE: return name.toLowerCase();
+-        }
+-        return name;
+-    } // modifyName(String,short):String
+-
+-    /**
+-     * Converts HTML names string value to constant value. 
+-     *
+-     * @see #NAMES_NO_CHANGE
+-     * @see #NAMES_LOWERCASE
+-     * @see #NAMES_UPPERCASE
+-     */
+-    protected static final short getNamesValue(String value) {
+-        if (value.equals("lower")) {
+-            return NAMES_LOWERCASE;
+-        }
+-        if (value.equals("upper")) {
+-            return NAMES_UPPERCASE;
+-        }
+-        return NAMES_NO_CHANGE;
+-    } // getNamesValue(String):short
+-
+-    //
+-    // Classes
+-    //
+-
+-    /**
+-     * Element info for each start element. This information is used when
+-     * closing unbalanced inline elements. For example:
+-     * <pre>
+-     * &lt;i>unbalanced &lt;b>HTML&lt;/i> content&lt;/b>
+-     * </pre>
+-     * <p>
+-     * It seems that it is a waste of processing and memory to copy the 
+-     * attributes for every start element even if there are no unbalanced 
+-     * inline elements in the document. However, if the attributes are
+-     * <em>not</em> saved, then important attributes such as style
+-     * information would be lost.
+-     *
+-     * @author Andy Clark
+-     */
+-    public static class Info {
+-
+-        //
+-        // Data
+-        //
+-
+-        /** The element. */
+-        public HTMLElements.Element element;
+-
+-        /** The element qualified name. */
+-        public QName qname;
+-
+-        /** The element attributes. */
+-        public XMLAttributes attributes;
+-
+-        //
+-        // Constructors
+-        //
+-
+-        /**
+-         * Creates an element information object.
+-         * <p>
+-         * <strong>Note:</strong>
+-         * This constructor makes a copy of the element information.
+-         *
+-         * @param element The element qualified name.
+-         */
+-        public Info(HTMLElements.Element element, QName qname) {
+-            this(element, qname, null);
+-        } // <init>(HTMLElements.Element,QName)
+-
+-        /**
+-         * Creates an element information object.
+-         * <p>
+-         * <strong>Note:</strong>
+-         * This constructor makes a copy of the element information.
+-         *
+-         * @param element The element qualified name.
+-         * @param attributes The element attributes.
+-         */
+-        public Info(HTMLElements.Element element,
+-                    QName qname, XMLAttributes attributes) {
+-            this.element = element;
+-            this.qname = new QName(qname);
+-            if (attributes != null) {
+-                int length = attributes.getLength();
+-                if (length > 0) {
+-                    QName aqname = new QName();
+-                    XMLAttributes newattrs = new XMLAttributesImpl();
+-                    for (int i = 0; i < length; i++) {
+-                        attributes.getName(i, aqname);
+-                        String type = attributes.getType(i);
+-                        String value = attributes.getValue(i);
+-                        String nonNormalizedValue = attributes.getNonNormalizedValue(i);
+-                        boolean specified = attributes.isSpecified(i);
+-                        newattrs.addAttribute(aqname, type, value);
+-                        newattrs.setNonNormalizedValue(i, nonNormalizedValue);
+-                        newattrs.setSpecified(i, specified);
+-                    }
+-                    this.attributes = newattrs;
+-                }
+-            }
+-        } // <init>(HTMLElements.Element,QName,XMLAttributes)
+-
+-        /**
+-         * Simple representation to make debugging easier
+-         */
+-        public String toString() {
+-        	return super.toString() + qname;
+-        }
+-    } // class Info
+-
+-    /** Unsynchronized stack of element information. */
+-    public static class InfoStack {
+-
+-        //
+-        // Data
+-        //
+-
+-        /** The top of the stack. */
+-        public int top;
+-
+-        /** The stack data. */
+-        public Info[] data = new Info[10];
+-
+-        //
+-        // Public methods
+-        //
+-
+-        /** Pushes element information onto the stack. */
+-        public void push(Info info) {
+-            if (top == data.length) {
+-                Info[] newarray = new Info[top + 10];
+-                System.arraycopy(data, 0, newarray, 0, top);
+-                data = newarray;
+-            }
+-            data[top++] = info;
+-        } // push(Info)
+-
+-        /** Peeks at the top of the stack. */
+-        public Info peek() {
+-            return data[top-1];
+-        } // peek():Info
+-
+-        /** Pops the top item off of the stack. */
+-        public Info pop() {
+-            return data[--top];
+-        } // pop():Info
+-        
+-        /**
+-         * Simple representation to make debugging easier
+-         */
+-        public String toString() {
+-        	final StringBuffer sb = new StringBuffer("InfoStack(");
+-        	for (int i=top-1; i>=0; --i) {
+-        		sb.append(data[i]);
+-        		if (i != 0)
+-        			sb.append(", ");
+-        	}
+-        	sb.append(")");
+-        	return sb.toString();
+-        }
+-
+-
+-    } // class InfoStack
+-
+-	void setTagBalancingListener(final HTMLTagBalancingListener tagBalancingListener) {
+-		this.tagBalancingListener = tagBalancingListener;
+-	}
+-
+-	/**
+-	 * Notifies the tagBalancingListener (if any) of an ignored start element
+-	 */
+-    private void notifyDiscardedStartElement(final QName elem, final XMLAttributes attrs,
+-    		final Augmentations augs) {
+-    	if (tagBalancingListener != null)
+-    		tagBalancingListener.ignoredStartElement(elem, attrs, augs);
+-	}
+-
+-	/**
+-	 * Notifies the tagBalancingListener (if any) of an ignored end element
+-	 */
+-    private void notifyDiscardedEndElement(final QName element, final Augmentations augs) {
+-    	if (tagBalancingListener != null)
+-    		tagBalancingListener.ignoredEndElement(element, augs);
+-	}
+-
+-    /**
+-     * Structure to hold information about an element placed in buffer to be comsumed later
+-     */
+-    static class ElementEntry {
+-    	private final QName name_;
+-    	private final Augmentations augs_;
+-    	ElementEntry(final QName element, final Augmentations augs) {
+-    		name_ = new QName(element);
+-    		augs_ = (augs == null) ? null : new HTMLAugmentations(augs);
+-    	}
+-    }
+-} // class HTMLTagBalancer
diff --git a/boilerpipe-1.2.0.pom b/boilerpipe-1.2.0.pom
new file mode 100644
index 0000000..cc3f8e2
--- /dev/null
+++ b/boilerpipe-1.2.0.pom
@@ -0,0 +1,35 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>de.l3s.boilerpipe</groupId>
+  <artifactId>boilerpipe</artifactId>
+  <packaging>jar</packaging>
+  <version>1.2.0</version>
+  <url>http://code.google.com/p/boilerpipe/</url>
+  <licenses>
+  	<license>
+  	  <name>Apache License 2.0</name>
+  	</license>
+  </licenses>
+  <name>Boilerpipe -- Boilerplate Removal and Fulltext Extraction from HTML pages</name>
+  <description>The boilerpipe library provides algorithms to detect and remove the surplus "clutter" (boilerplate, templates) around the main textual content of a web page.
+
+The library already provides specific strategies for common tasks (for example: news article extraction) and may also be easily extended for individual problem settings.
+
+Extracting content is very fast (milliseconds), just needs the input document (no global or site-level information required) and is usually quite accurate.
+
+Boilerpipe is a Java library written by Christian Kohlschütter. It is released under the Apache License 2.0.
+
+The algorithms used by the library are based on (and extending) some concepts of the paper "Boilerplate Detection using Shallow Text Features" by Christian Kohlschütter et al., presented at WSDM 2010 -- The Third ACM International Conference on Web Search and Data Mining New York City, NY USA.
+  </description>
+  <scm>
+    <connection>scm:svn:http://boilerpipe.googlecode.com/svn/trunk/</connection>
+    <url>http://code.google.com/p/boilerpipe/source/browse/</url>
+  </scm>
+  <developers>
+    <developer>
+      <name>Christian Kohlschütter</name>
+    </developer>
+  </developers>
+</project>
diff --git a/boilerpipe.spec b/boilerpipe.spec
new file mode 100644
index 0000000..e1a9fd4
--- /dev/null
+++ b/boilerpipe.spec
@@ -0,0 +1,152 @@
+Name:          boilerpipe
+Version:       1.2.0
+Release:       1%{?dist}
+Summary:       Boilerplate Removal and Fulltext Extraction from HTML pages
+License:       ASL 2.0
+Url:           http://code.google.com/p/boilerpipe/
+Source0:       http://boilerpipe.googlecode.com/files/%{name}-%{version}-src.tar.gz
+Source1:       http://boilerpipe.googlecode.com/svn/repo/de/l3s/%{name}/%{name}/%{version}/%{name}-%{version}.pom
+# use system libraries
+Patch0:        %{name}-1.2.0-libdir-patch
+# remove embedded nekohtml
+Patch1:        %{name}-1.2.0-nekohtml-patch
+
+BuildRequires: java-devel
+BuildRequires: javapackages-tools
+
+BuildRequires: ant
+BuildRequires: nekohtml
+BuildRequires: xerces-j2
+
+Requires:      nekohtml
+
+Requires:      java
+Requires:      javapackages-tools
+BuildArch:     noarch
+
+%description
+The boilerpipe library provides algorithms to detect and
+remove the surplus "clutter" (boilerplate, templates)
+around the main textual content of a web page.
+
+The library already provides specific strategies 
+for common tasks (for example: news article extraction) and
+may also be easily extended for individual problem settings.
+
+Extracting content is very fast (milliseconds), just needs the
+input document (no global or site-level information required) and
+is usually quite accurate. 
+
+%package javadoc
+Summary:       Javadoc for %{name}
+
+%description javadoc
+This package contains javadoc for %{name}.
+
+%prep
+%setup -q
+find . -iname '*.jar' -delete
+find . -iname '*.class' -delete
+
+%patch0 -p0
+cp %{SOURCE1} pom.xml
+%patch1 -p1
+
+# fix non ASCII chars
+for s in src/main/de/l3s/boilerpipe/BoilerpipeInput.java \
+ src/main/de/l3s/boilerpipe/BoilerpipeInput.java \
+ src/main/de/l3s/boilerpipe/BoilerpipeFilter.java \
+ src/main/de/l3s/boilerpipe/BoilerpipeExtractor.java \
+ src/main/de/l3s/boilerpipe/BoilerpipeProcessingException.java \
+ src/main/de/l3s/boilerpipe/conditions/TextBlockCondition.java \
+ src/main/de/l3s/boilerpipe/document/TextBlock.java \
+ src/main/de/l3s/boilerpipe/document/TextDocumentStatistics.java \
+ src/main/de/l3s/boilerpipe/document/TextDocument.java \
+ src/main/de/l3s/boilerpipe/estimators/SimpleEstimator.java \
+ src/main/de/l3s/boilerpipe/extractors/LargestContentExtractor.java \
+ src/main/de/l3s/boilerpipe/extractors/DefaultExtractor.java \
+ src/main/de/l3s/boilerpipe/extractors/NumWordsRulesExtractor.java \
+ src/main/de/l3s/boilerpipe/extractors/KeepEverythingWithMinKWordsExtractor.java \
+ src/main/de/l3s/boilerpipe/extractors/ExtractorBase.java \
+ src/main/de/l3s/boilerpipe/extractors/ArticleSentencesExtractor.java \
+ src/main/de/l3s/boilerpipe/extractors/CommonExtractors.java \
+ src/main/de/l3s/boilerpipe/extractors/CanolaExtractor.java \
+ src/main/de/l3s/boilerpipe/extractors/ArticleExtractor.java \
+ src/main/de/l3s/boilerpipe/extractors/KeepEverythingExtractor.java \
+ src/main/de/l3s/boilerpipe/filters/english/HeuristicFilterBase.java \
+ src/main/de/l3s/boilerpipe/filters/english/KeepLargestFulltextBlockFilter.java \
+ src/main/de/l3s/boilerpipe/filters/english/TerminatingBlocksFinder.java \
+ src/main/de/l3s/boilerpipe/filters/english/IgnoreBlocksAfterContentFilter.java \
+ src/main/de/l3s/boilerpipe/filters/english/IgnoreBlocksAfterContentFromEndFilter.java \
+ src/main/de/l3s/boilerpipe/filters/english/DensityRulesClassifier.java \
+ src/main/de/l3s/boilerpipe/filters/english/MinFulltextWordsFilter.java \
+ src/main/de/l3s/boilerpipe/filters/english/NumWordsRulesClassifier.java \
+ src/main/de/l3s/boilerpipe/filters/heuristics/SimpleBlockFusionProcessor.java \
+ src/main/de/l3s/boilerpipe/filters/heuristics/BlockProximityFusion.java \
+ src/main/de/l3s/boilerpipe/filters/heuristics/KeepLargestBlockFilter.java \
+ src/main/de/l3s/boilerpipe/filters/heuristics/DocumentTitleMatchClassifier.java \
+ src/main/de/l3s/boilerpipe/filters/heuristics/LabelFusion.java \
+ src/main/de/l3s/boilerpipe/filters/heuristics/AddPrecedingLabelsFilter.java \
+ src/main/de/l3s/boilerpipe/filters/heuristics/ExpandTitleToContentFilter.java \
+ src/main/de/l3s/boilerpipe/filters/heuristics/ContentFusion.java \
+ src/main/de/l3s/boilerpipe/filters/simple/MinWordsFilter.java \
+ src/main/de/l3s/boilerpipe/filters/simple/LabelToBoilerplateFilter.java \
+ src/main/de/l3s/boilerpipe/filters/simple/LabelToContentFilter.java \
+ src/main/de/l3s/boilerpipe/filters/simple/InvertedFilter.java \
+ src/main/de/l3s/boilerpipe/filters/simple/InvertedFilter.java \
+ src/main/de/l3s/boilerpipe/filters/simple/MinClauseWordsFilter.java \
+ src/main/de/l3s/boilerpipe/filters/simple/BoilerplateBlockFilter.java \
+ src/main/de/l3s/boilerpipe/filters/simple/SplitParagraphBlocksFilter.java \
+ src/main/de/l3s/boilerpipe/filters/simple/MarkEverythingContentFilter.java \
+ src/main/de/l3s/boilerpipe/labels/DefaultLabels.java \
+ src/main/de/l3s/boilerpipe/labels/ConditionalLabelAction.java \
+ src/main/de/l3s/boilerpipe/labels/LabelAction.java \
+ src/main/de/l3s/boilerpipe/sax/BoilerpipeSAXInput.java \
+ src/main/de/l3s/boilerpipe/sax/HTMLHighlighter.java \
+ src/main/de/l3s/boilerpipe/sax/BoilerpipeHTMLContentHandler.java \
+ src/main/de/l3s/boilerpipe/sax/BoilerpipeHTMLParser.java \
+ src/main/de/l3s/boilerpipe/sax/TagActionMap.java \
+ src/main/de/l3s/boilerpipe/sax/InputSourceable.java \
+ src/main/de/l3s/boilerpipe/sax/HTMLDocument.java \
+ src/main/de/l3s/boilerpipe/sax/CommonTagActions.java \
+ src/main/de/l3s/boilerpipe/sax/DefaultTagActionMap.java \
+ src/main/de/l3s/boilerpipe/sax/HTMLFetcher.java \
+ src/main/de/l3s/boilerpipe/sax/TagAction.java \
+ src/main/de/l3s/boilerpipe/sax/MarkupTagAction.java \
+ src/main/de/l3s/boilerpipe/util/UnicodeTokenizer.java;do
+  native2ascii -encoding UTF8 ${s} ${s}
+done
+
+%build
+
+ant
+
+%install
+
+mkdir -p %{buildroot}%{_javadir}
+install -m 644 dist/%{name}-%{version}.jar \
+  %{buildroot}%{_javadir}/%{name}.jar
+install -m 644 dist/%{name}-demo-%{version}.jar \
+  %{buildroot}%{_javadir}/%{name}-demo.jar
+  
+mkdir -p %{buildroot}%{_mavenpomdir}
+install -pm 644 pom.xml %{buildroot}%{_mavenpomdir}/JPP-%{name}.pom
+%add_maven_depmap
+
+mkdir -p %{buildroot}%{_javadocdir}/%{name}
+cp -rp javadoc/*/* %{buildroot}%{_javadocdir}/%{name}
+
+%files
+%{_javadir}/%{name}.jar
+%{_javadir}/%{name}-demo.jar
+%{_mavenpomdir}/JPP-%{name}.pom
+%{_mavendepmapfragdir}/%{name}
+%doc LICENSE.txt NOTICE.txt
+
+%files javadoc
+%{_javadocdir}/%{name}
+%doc LICENSE.txt NOTICE.txt
+
+%changelog
+* Mon Jan 21 2013 gil cattaneo <puntogil at libero.it> 1.2.0-1
+- initial rpm
\ No newline at end of file
diff --git a/sources b/sources
index e69de29..24c7069 100644
--- a/sources
+++ b/sources
@@ -0,0 +1 @@
+9a90e9857bafcb31e9b21e3875e02701  boilerpipe-1.2.0-src.tar.gz


More information about the scm-commits mailing list