ignatenkobrain pushed to beignet (f22). "update to latest head and fix licensing issues (..more)"

notifications at fedoraproject.org notifications at fedoraproject.org
Tue May 19 10:03:43 UTC 2015


From bbfccf3794ded8b19d0e05a8aefa85c7cf20b61c Mon Sep 17 00:00:00 2001
From: Igor Gnatenko <i.gnatenko.brain at gmail.com>
Date: Tue, 19 May 2015 12:42:33 +0300
Subject: update to latest head and fix licensing issues

Signed-off-by: Igor Gnatenko <i.gnatenko.brain at gmail.com>

diff --git a/0001-Remove-some-LGPL-incompatible-code.patch b/0001-Remove-some-LGPL-incompatible-code.patch
new file mode 100644
index 0000000..b0fb06d
--- /dev/null
+++ b/0001-Remove-some-LGPL-incompatible-code.patch
@@ -0,0 +1,1506 @@
+From 347116ef2855e73c25dc09c0a8ee1c7f58c46099 Mon Sep 17 00:00:00 2001
+From: Zhigang Gong <zhigang.gong at intel.com>
+Date: Tue, 19 May 2015 10:36:03 +0800
+Subject: [PATCH 1/9] Remove some LGPL incompatible code.
+
+Signed-off-by: Zhigang Gong <zhigang.gong at intel.com>
+---
+ backend/src/CMakeLists.txt             |    2 -
+ backend/src/ir/structural_analysis.cpp | 1092 --------------------------------
+ backend/src/ir/structural_analysis.hpp |  342 ----------
+ backend/src/llvm/llvm_to_gen.cpp       |   11 -
+ 4 files changed, 1447 deletions(-)
+ delete mode 100644 backend/src/ir/structural_analysis.cpp
+ delete mode 100644 backend/src/ir/structural_analysis.hpp
+
+diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
+index a6736ec..45f18d8 100644
+--- a/backend/src/CMakeLists.txt
++++ b/backend/src/CMakeLists.txt
+@@ -66,8 +66,6 @@ set (GBE_SRC
+     ir/lowering.hpp
+     ir/printf.cpp
+     ir/printf.hpp
+-    ir/structural_analysis.cpp
+-    ir/structural_analysis.hpp
+     ir/immediate.hpp
+     ir/immediate.cpp
+     backend/context.cpp
+diff --git a/backend/src/ir/structural_analysis.cpp b/backend/src/ir/structural_analysis.cpp
+deleted file mode 100644
+index 101570a..0000000
+--- a/backend/src/ir/structural_analysis.cpp
++++ /dev/null
+@@ -1,1092 +0,0 @@
+-/*
+- * structural_analysis.hpp
+- * This code is derived from the ControlTree.h and ControlTree.cpp of
+- * project gpuocelot by Yongjia Zhang.
+- * The original copyright of gpuocelot appears below in its entirety.
+- */
+-
+-/*
+- * Copyright 2011
+- * GEORGIA TECH RESEARCH CORPORATION
+- * ALL RIGHTS RESERVED
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions are
+- * met:
+- *     * Redistributions of source code must retain the above copyright
+- * notice,   this list of conditions and the following disclaimers.
+- *     * Redistributions in binary form must reproduce the above copyright
+- * notice, this list of conditions and the following disclaimers in the
+- *       documentation and/or other materials provided with the
+- * distribution.
+- *     * Neither the name of GEORGIA TECH RESEARCH CORPORATION nor the
+- * names of  its contributors may be used to endorse or promote
+- * products derived  from this software without specific prior
+- * written permission.
+- *
+- * THIS SOFTWARE IS PROVIDED BY GEORGIA TECH RESEARCH CORPORATION ''AS IS''
+- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GEORGIA TECH RESEARCH
+- * CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- * You agree that the Software will not be shipped, transferred, exported,
+- * or re-exported directly into any country prohibited by the United States
+- * Export Administration Act and the regulations thereunder nor will be
+- * used for any purpose prohibited by the Act.
+- */
+-
+-
+-#include "structural_analysis.hpp"
+-
+-namespace analysis
+-{
+-  ControlTree::~ControlTree()
+-  {
+-    NodeVector::iterator iter = nodes.begin();
+-    NodeVector::iterator iter_end = nodes.end();
+-    while(iter != iter_end)
+-    {
+-      delete *iter;
+-      iter++;
+-    }
+-  }
+-  void ControlTree::handleSelfLoopNode(Node *loopnode, ir::LabelIndex& whileLabel)
+-  {
+-              //NodeList::iterator child_iter = (*it)->children.begin();
+-    ir::BasicBlock *pbb = loopnode->getExit();
+-    GBE_ASSERT(pbb->isLoopExit);
+-    ir::BasicBlock::iterator it = pbb->end();
+-    it--;
+-    if (pbb->hasExtraBra)
+-      it--;
+-    ir::BranchInstruction* pinsn = static_cast<ir::BranchInstruction *>(&*it);
+-
+-    if(!pinsn->isPredicated()){
+-      std::cout << "WARNING:" << "endless loop detected!" << std::endl;
+-      return;
+-    }
+-    ir::Register reg = pinsn->getPredicateIndex();
+-    /* since this node is an while node, so we remove the BRA instruction at the bottom of the exit BB of 'node',
+-     * and insert WHILE instead
+-     */
+-    whileLabel = pinsn->getLabelIndex();
+-    ir::Instruction insn = ir::WHILE(whileLabel, reg);
+-    ir::Instruction* p_new_insn = pbb->getParent().newInstruction(insn);
+-    pbb->insertAt(it, *p_new_insn);
+-    pbb->whileLabel = whileLabel;
+-    pbb->erase(it);
+-  }
+-
+-  /* recursive mark the bbs' variable needEndif, the bbs all belong to node.*/
+-  void ControlTree::markNeedIf(Node *node, bool status)
+-  {
+-    if(node->type() == BasicBlock)
+-    {
+-      ir::BasicBlock* bb = ((BasicBlockNode*)node)->getBasicBlock();
+-      bb->needIf = status;
+-      return;
+-    }
+-    NodeList::iterator it = node->children.begin();
+-    while(it != node->children.end())
+-    {
+-      markNeedIf(*it,status);
+-      it++;
+-    }
+-  }
+-
+-  /* recursive mark the bbs' variable needIf, the bbs all belong to node.*/
+-  void ControlTree::markNeedEndif(Node *node, bool status)
+-  {
+-    if(node->type() == BasicBlock)
+-    {
+-      ir::BasicBlock* bb = ((BasicBlockNode*)node)->getBasicBlock();
+-      bb->needEndif = status;
+-      return;
+-    }
+-
+-    NodeList::iterator it = node->children.begin();
+-    while(it != node->children.end())
+-    {
+-      markNeedEndif(*it, status);
+-      it++;
+-    }
+-  }
+-
+-  /* recursive mark the bbs' variable mark, the bbs all belong to node. */
+-  void ControlTree::markStructuredNodes(Node *node, bool status)
+-  {
+-    if(node->type() == BasicBlock)
+-    {
+-      BasicBlockNode* pbb = static_cast<BasicBlockNode *>(node);
+-      pbb->getBasicBlock()->belongToStructure = true;
+-    }
+-    node->mark = status;
+-    NodeList::iterator it = node->children.begin();
+-    while(it != node->children.end())
+-    {
+-      markStructuredNodes(*it, status);
+-      it++;
+-    }
+-  }
+-
+-  void ControlTree::handleIfNode(Node *node, ir::LabelIndex& matchingEndifLabel, ir::LabelIndex& matchingElseLabel)
+-  {
+-    ir::BasicBlock *pbb = node->getExit();
+-    ir::BranchInstruction* pinsn = static_cast<ir::BranchInstruction *>(pbb->getLastInstruction());
+-    ir::Register reg = pinsn->getPredicateIndex();
+-    ir::BasicBlock::iterator it = pbb->end();
+-    it--;
+-    /* since this node is an if node, so we remove the BRA instruction at the bottom of the exit BB of 'node',
+-     * and insert IF instead
+-     */
+-    pbb->erase(it);
+-    ir::Instruction insn = ir::IF(matchingElseLabel, reg, node->inversePredicate);
+-    ir::Instruction* p_new_insn = pbb->getParent().newInstruction(insn);
+-    pbb->append(*p_new_insn);
+-    pbb->matchingEndifLabel = matchingEndifLabel;
+-    pbb->matchingElseLabel = matchingElseLabel;
+-  }
+-
+-  void ControlTree::handleThenNode(Node *node, ir::LabelIndex& endiflabel)
+-  {
+-    ir::BasicBlock *pbb = node->getExit();
+-    ir::BasicBlock::iterator it = pbb->end();
+-    it--;
+-    ir::Instruction *p_last_insn = pbb->getLastInstruction();
+-
+-    endiflabel = fn->newLabel();
+-    //pbb->thisEndifLabel = endiflabel;
+-
+-    ir::Instruction insn = ir::ENDIF(endiflabel);
+-    ir::Instruction* p_new_insn = pbb->getParent().newInstruction(insn);
+-    // we need to insert ENDIF before the BRA(if exists).
+-    bool append_bra = false;
+-    if((*it).getOpcode() == ir::OP_BRA)
+-    {
+-      pbb->erase(it);
+-      append_bra = true;
+-    }
+-    pbb->append(*p_new_insn);
+-    if(append_bra)
+-      pbb->append(*p_last_insn);
+-  }
+-
+-
+-  void ControlTree::handleThenNode2(Node *node, Node *elsenode, ir::LabelIndex elseBBLabel)
+-  {
+-    ir::BasicBlock *pbb = node->getExit();
+-    ir::BasicBlock::iterator it = pbb->end();
+-    it--;
+-    if((*it).getOpcode() == ir::OP_BRA)
+-      pbb->erase(it);
+-
+-    if(node->getExit()->getNextBlock() == elsenode->getEntry())
+-      return;
+-
+-    // Add an unconditional jump to 'else' block
+-    ir::Instruction insn = ir::BRA(elseBBLabel);
+-    ir::Instruction* p_new_insn = pbb->getParent().newInstruction(insn);
+-    pbb->append(*p_new_insn);
+-  }
+-
+-
+-  void ControlTree::handleElseNode(Node* node, ir::LabelIndex& elselabel, ir::LabelIndex& endiflabel)
+-  {
+-    // to insert ENDIF properly
+-    handleThenNode(node, endiflabel);
+-
+-    ir::BasicBlock *pbb = node->getEntry();
+-    ir::BasicBlock::iterator it = pbb->begin();
+-    it++;
+-
+-    elselabel = fn->newLabel();
+-    pbb->thisElseLabel = elselabel;
+-
+-    // insert ELSE properly
+-    ir::Instruction insn = ir::ELSE(endiflabel);
+-    ir::Instruction* p_new_insn = pbb->getParent().newInstruction(insn);
+-
+-    pbb->insertAt(it, *p_new_insn);
+-  }
+-
+-
+-  void ControlTree::handleStructuredNodes()
+-  {
+-    NodeVector::iterator it;
+-    NodeVector::iterator end = nodes.end();
+-    NodeVector::iterator begin = nodes.begin();
+-    it = end;
+-    it--;
+-    NodeVector::reverse_iterator rit = nodes.rbegin();
+-    /* structured bbs only need if and endif insn to handle the execution
+-     * in structure entry and exit BasicBlock, so we process the nodes backward, since
+-     * the node at the back of nodes is always a 'not smaller' structure then
+-     * the ones before it. we mark the nodes which are sub-nodes of the node
+-     * we are dealing with, in order to ensure we are always handling the 'biggest'
+-     * structures */
+-    while(rit != nodes.rend())
+-    {
+-      if((*rit)->type() == IfThen || (*rit)->type() == IfElse|| (*rit)->type() == SelfLoop)
+-      {
+-        if(false == (*rit)->mark && (*rit)->canBeHandled)
+-        {
+-          markStructuredNodes(*rit, true);
+-          /* only the entry bb of this structure needs 'if' at backend and
+-           * only the exit bb of this structure needs 'endif' at backend
+-           * see comment about needEndif and needIf at function.hpp for detail. */
+-          markNeedEndif(*rit, false);
+-          markNeedIf(*rit, false);
+-          ir::BasicBlock* entry = (*rit)->getEntry();
+-          ir::BasicBlock* eexit = (*rit)->getExit();
+-          entry->needIf = true;
+-          eexit->needEndif = true;
+-          entry->endifLabel = fn->newLabel();
+-          eexit->endifLabel = entry->endifLabel;
+-          eexit->isStructureExit = true;
+-          eexit->matchingStructureEntry = entry;
+-        }
+-      }
+-      rit++;
+-    }
+-
+-    rit = nodes.rbegin();
+-    gbe::vector<ir::BasicBlock *> &blocks = fn->getBlocks();
+-    std::vector<ir::BasicBlock *> bbs;
+-    bbs.resize(blocks.size());
+-
+-    /* here insert the bras to the BBs, which would
+-     * simplify the reorder of basic blocks */
+-    for(size_t i = 0; i < blocks.size(); ++i)
+-    {
+-      bbs[i] = blocks[i];
+-      if(i != blocks.size() -1 &&
+-         (bbs[i]->getLastInstruction()->getOpcode() != ir::OP_BRA ||
+-         (bbs[i]->isStructureExit && bbs[i]->isLoopExit)))
+-      {
+-        ir::Instruction insn = ir::BRA(bbs[i]->getNextBlock()->getLabelIndex());
+-        ir::Instruction* pNewInsn = bbs[i]->getParent().newInstruction(insn);
+-        bbs[i]->append(*pNewInsn);
+-        if (bbs[i]->isStructureExit && bbs[i]->isLoopExit)
+-          bbs[i]->hasExtraBra = true;
+-      }
+-    }
+-
+-    /* now, reorder the basic blocks to reduce the unconditional jump we inserted whose
+-     * targets are the 'else' nodes. the algorithm is quite simple, just put the unstructured
+-     * BBs(maybe belong to another structure, but not this one) in front of the entry BB of
+-     * this structure in front of all the others and put the other unstructured BBs at the
+-     * back of the others. the sequence of structured is get through function getStructureSequence.
+-     */
+-    while(rit != nodes.rend())
+-    {
+-      if(((*rit)->type() == IfThen || (*rit)->type() == IfElse || (*rit)->type() == Block ||(*rit)->type() == SelfLoop) &&
+-          (*rit)->canBeHandled && (*rit)->mark == true)
+-      {
+-        markStructuredNodes(*rit, false);
+-        std::set<int> ns = getStructureBasicBlocksIndex(*rit, bbs);
+-        ir::BasicBlock *entry = (*rit)->getEntry();
+-
+-        int entryIndex = *(ns.begin());
+-        for(size_t i=0; i<bbs.size(); ++i)
+-        {
+-          if(bbs[i] == entry)
+-            entryIndex = i;
+-        }
+-
+-        std::set<int>::iterator iter = ns.begin();
+-        int index = *iter;
+-
+-        std::vector<ir::BasicBlock *> unstruSeqHead;
+-        std::vector<ir::BasicBlock *> unstruSeqTail;
+-
+-        iter = ns.begin();
+-        while(iter != ns.end())
+-        {
+-          if(index != *iter)
+-          {
+-            if(index < entryIndex)
+-              unstruSeqHead.push_back(bbs[index]);
+-            else
+-              unstruSeqTail.push_back(bbs[index]);
+-            index++;
+-          }
+-          else
+-          {
+-            index++;
+-            iter++;
+-          }
+-        }
+-
+-        std::vector<ir::BasicBlock *> struSeq;
+-        getStructureSequence(*rit, struSeq);
+-
+-        int firstindex = *(ns.begin());
+-        for(size_t i = 0; i < unstruSeqHead.size(); ++i)
+-          bbs[firstindex++] = unstruSeqHead[i];
+-        for(size_t i = 0; i < struSeq.size(); ++i)
+-          bbs[firstindex++] = struSeq[i];
+-        for(size_t i = 0; i < unstruSeqTail.size(); ++i)
+-          bbs[firstindex++] = unstruSeqTail[i];
+-      }
+-      rit++;
+-    }
+-
+-   /* now, erase the BRAs inserted before whose targets are their fallthrough blocks */
+-    for(size_t i=0; i<bbs.size(); ++i)
+-    {
+-      if(bbs[i]->getLastInstruction()->getOpcode() == ir::OP_BRA &&
+-         !((ir::BranchInstruction*)(bbs[i]->getLastInstruction()))->isPredicated())
+-      {
+-        if(((ir::BranchInstruction *)bbs[i]->getLastInstruction())->getLabelIndex() == bbs[i+1]->getLabelIndex())
+-        {
+-          ir::BasicBlock::iterator it= bbs[i]->end();
+-          it--;
+-
+-          bbs[i]->erase(it);
+-
+-          if (bbs[i]->hasExtraBra)
+-            bbs[i]->hasExtraBra = false;
+-        }
+-      }
+-    }
+-    for(size_t i=0; i<bbs.size(); ++i)
+-      blocks[i] = bbs[i];
+-
+-    fn->sortLabels();
+-    fn->computeCFG();
+-
+-    it = begin;
+-    while(it != end)
+-    {
+-      if((*it)->canBeHandled)
+-      {
+-        switch((*it)->type())
+-        {
+-          case IfThen:
+-            {
+-              NodeList::iterator child_iter = (*it)->children.end();
+-              ir::LabelIndex endiflabel;
+-              child_iter--;
+-              handleThenNode(*child_iter, endiflabel); // this call would pass out the proper endiflabel for handleIfNode's use.
+-              child_iter--;
+-              handleIfNode(*child_iter, endiflabel, endiflabel);
+-            }
+-            break;
+-
+-          case IfElse:
+-            {
+-              NodeList::iterator child_iter = (*it)->children.end();
+-              ir::LabelIndex endiflabel;
+-              ir::LabelIndex elselabel;
+-              NodeList::iterator else_node;
+-              child_iter--;
+-              else_node = child_iter;
+-              handleElseNode(*child_iter, elselabel, endiflabel);
+-              ir::LabelIndex elseBBLabel = (*child_iter)->getEntry()->getLabelIndex();
+-              child_iter--;
+-              handleThenNode2(*child_iter, *else_node, elseBBLabel);
+-              child_iter--;
+-              handleIfNode(*child_iter, endiflabel, elselabel);
+-            }
+-            break;
+-
+-          case SelfLoop:
+-            {
+-              ir::LabelIndex whilelabel;
+-              handleSelfLoopNode(*it, whilelabel);
+-            }
+-            break;
+-
+-          default:
+-            break;
+-        }
+-      }
+-
+-      it++;
+-    }
+-
+-  }
+-
+-  void ControlTree::getStructureSequence(Node *node, std::vector<ir::BasicBlock*> &seq)
+-  {
+-    /* in the control tree, for if-then, if node is before then node; for if-else, the
+-     * stored sequence is if-then-else, for block structure, the stored sequence is just
+-     * their executed sequence. so we could just get the structure sequence by recrusive
+-     * calls getStructureSequence to all the elements in children one by one.
+-     */
+-    if(node->type() == BasicBlock)
+-    {
+-      seq.push_back(((BasicBlockNode *)node)->getBasicBlock());
+-      return;
+-    }
+-
+-    NodeList::iterator iter = node->children.begin();
+-    while(iter != node->children.end())
+-    {
+-      getStructureSequence(*iter, seq);
+-      iter++;
+-    }
+-
+-  }
+-
+-
+-  std::set<int> ControlTree::getStructureBasicBlocksIndex(Node* node, std::vector<ir::BasicBlock *> &bbs)
+-  {
+-    std::set<int> result;
+-    if(node->type() == BasicBlock)
+-    {
+-      for(size_t i=0; i<bbs.size(); i++)
+-      {
+-        if(bbs[i] == ((BasicBlockNode *)node)->getBasicBlock())
+-        {
+-          result.insert(i);
+-          break;
+-        }
+-      }
+-      return result;
+-    }
+-    NodeList::iterator iter = (node->children).begin();
+-    NodeList::iterator end = (node->children).end();
+-    while(iter != end)
+-    {
+-      std::set<int> ret = getStructureBasicBlocksIndex(*iter, bbs);
+-      result.insert(ret.begin(), ret.end());
+-      iter++;
+-    }
+-    return result;
+-  }
+-
+-
+-  std::set<ir::BasicBlock *> ControlTree::getStructureBasicBlocks(Node *node)
+-  {
+-    std::set<ir::BasicBlock *> result;
+-    if(node->type() == BasicBlock)
+-    {
+-      result.insert(((BasicBlockNode *)node)->getBasicBlock());
+-      return result;
+-    }
+-    NodeList::iterator iter = (node->children).begin();
+-    NodeList::iterator end = (node->children).end();
+-    while(iter != end)
+-    {
+-      std::set<ir::BasicBlock *> ret = getStructureBasicBlocks(*iter);
+-      result.insert(ret.begin(), ret.end());
+-      iter++;
+-    }
+-    return result;
+-  }
+-
+-
+-  Node* ControlTree::insertNode(Node *p_node)
+-  {
+-    nodes.push_back(p_node);
+-    return p_node;
+-  }
+-
+-
+-  bool ControlTree::checkForBarrier(const ir::BasicBlock* bb)
+-  {
+-    ir::BasicBlock::const_iterator iter = bb->begin();
+-    ir::BasicBlock::const_iterator iter_end = bb->end();
+-    while(iter != iter_end)
+-    {
+-      if((*iter).getOpcode() == ir::OP_SYNC)
+-        return true;
+-      iter++;
+-    }
+-
+-    return false;
+-  }
+-
+-
+-  void ControlTree::getLiveIn(ir::BasicBlock& bb, std::set<ir::Register>& livein)
+-  {
+-    ir::BasicBlock::iterator iter = bb.begin();
+-    std::set<ir::Register> varKill;
+-    while(iter != bb.end())
+-    {
+-      ir::Instruction& insn = *iter;
+-      const uint32_t srcNum = insn.getSrcNum();
+-      const uint32_t dstNum = insn.getDstNum();
+-      for(uint32_t srcID = 0; srcID < srcNum; ++srcID)
+-      {
+-        const ir::Register reg = insn.getSrc(srcID);
+-        if(varKill.find(reg) == varKill.end())
+-          livein.insert(reg);
+-      }
+-      for(uint32_t dstID = 0; dstID < dstNum; ++dstID)
+-      {
+-        const ir::Register reg = insn.getDst(dstID);
+-        varKill.insert(reg);
+-      }
+-
+-      iter++;
+-    }
+-  }
+-
+-  void ControlTree::calculateNecessaryLiveout()
+-  {
+-    NodeVector::iterator iter = nodes.begin();
+-
+-    while(iter != nodes.end())
+-    {
+-      switch((*iter)->type())
+-      {
+-        case IfElse:
+-        {
+-          std::set<ir::BasicBlock *> bbs;
+-          NodeList::iterator thenIter = (*iter)->children.begin();
+-          thenIter++;
+-          bbs = getStructureBasicBlocks(*thenIter);
+-
+-          Node *elseNode = *((*iter)->children.rbegin());
+-          std::set<ir::Register> livein;
+-          getLiveIn(*(elseNode->getEntry()), livein);
+-
+-          std::set<ir::BasicBlock *>::iterator bbiter = bbs.begin();
+-          while(bbiter != bbs.end())
+-          {
+-            (*bbiter)->liveout.insert(livein.begin(), livein.end());
+-            bbiter++;
+-          }
+-        }
+-
+-        default:
+-          break;
+-      }
+-      iter++;
+-    }
+-  }
+-
+-
+-  void ControlTree::initializeNodes()
+-  {
+-    ir::BasicBlock& tmp_bb = fn->getTopBlock();
+-    ir::BasicBlock* p_tmp_bb = &tmp_bb;
+-    Node* p = NULL;
+-
+-    if(NULL != p_tmp_bb)
+-    {
+-      Node *p_tmp_node = new BasicBlockNode(p_tmp_bb);
+-      p_tmp_node->label = p_tmp_bb->getLabelIndex();
+-
+-      if(checkForBarrier(p_tmp_bb))
+-        p_tmp_node->hasBarrier() = true;
+-
+-      nodes.push_back(p_tmp_node);
+-      bbmap[p_tmp_bb] = p_tmp_node;
+-      p_tmp_bb = p_tmp_bb->getNextBlock();
+-      p = p_tmp_node;
+-    }
+-
+-    while(p_tmp_bb != NULL)
+-    {
+-      Node *p_tmp_node = new BasicBlockNode(p_tmp_bb);
+-      p_tmp_node->label = p_tmp_bb->getLabelIndex();
+-
+-      if(checkForBarrier(p_tmp_bb))
+-        p_tmp_node->hasBarrier() = true;
+-
+-      p->fallthrough() = p_tmp_node;
+-      p = p_tmp_node;
+-      nodes.push_back(p_tmp_node);
+-      bbmap[p_tmp_bb] = p_tmp_node;
+-      p_tmp_bb = p_tmp_bb->getNextBlock();
+-    }
+-
+-    if(NULL != p)
+-      p->fallthrough() = NULL;
+-
+-    p_tmp_bb = &tmp_bb;
+-
+-    this->nodes_entry = bbmap[p_tmp_bb];
+-
+-    while(p_tmp_bb != NULL)
+-    {
+-      ir::BlockSet::const_iterator iter_begin = p_tmp_bb->getPredecessorSet().begin();
+-      ir::BlockSet::const_iterator iter_end = p_tmp_bb->getPredecessorSet().end();
+-      while(iter_begin != iter_end)
+-      {
+-        bbmap[p_tmp_bb]->preds().insert(bbmap[*iter_begin]);
+-        iter_begin++;
+-      }
+-
+-      iter_begin = p_tmp_bb->getSuccessorSet().begin();
+-      iter_end = p_tmp_bb->getSuccessorSet().end();
+-      while(iter_begin != iter_end)
+-      {
+-        bbmap[p_tmp_bb]->succs().insert(bbmap[*iter_begin]);
+-        iter_begin++;
+-      }
+-
+-      p_tmp_bb = p_tmp_bb->getNextBlock();
+-    }
+-  }
+-
+-
+-  void ControlTree::DFSPostOrder(Node *start)
+-  {
+-    visited.insert(start);
+-    NodeSet::iterator y;
+-    NodeSet::iterator iter_begin = start->succs().begin();
+-    NodeSet::iterator iter_end = start->succs().end();
+-    for(y = iter_begin; y != iter_end; ++y )
+-    {
+-      if(visited.find(*y) != visited.end())
+-        continue;
+-      DFSPostOrder(*y);
+-    }
+-    post_order.push_back(start);
+-  }
+-
+-
+-  bool ControlTree::isCyclic(Node* node)
+-  {
+-    if(node->type() == NaturalLoop ||
+-       node->type() == WhileLoop ||
+-       node->type() == SelfLoop)
+-      return true;
+-
+-    return false;
+-  }
+-
+-
+-  bool ControlTree::isBackedge(const Node* head, const Node* tail)
+-  {
+-    const Node* match[] = {head, tail};
+-    NodeList::iterator n = find_first_of(post_order.begin(), post_order.end(), match, match + 2);
+-
+-    if(*n == head)
+-      return true;
+-    if(*n == tail)
+-      return false;
+-
+-    return false;
+-  }
+-
+-
+-  /* this algorithm is from Muchnick's textbook(sec 7.7) (Advanced Compiler Design and Implementation) */
+-  Node* ControlTree::acyclicRegionType(Node* node, NodeSet& nset)
+-  {
+-    nset.clear();
+-    Node *n;
+-    bool p, s, barrier;
+-    NodeList nodes;
+-
+-    n = node;
+-    p = true;
+-    s = (n->succs().size()==1);
+-    barrier = n->hasBarrier();
+-    while(p && s && !barrier)
+-    {
+-      if(nset.insert(n).second)
+-        nodes.push_back(n);
+-      n = *(n->succs().begin());
+-      barrier = n->hasBarrier();
+-      p = (n->preds().size() == 1);
+-      s = (n->succs().size() == 1);
+-    }
+-
+-    if(p && !barrier)
+-    {
+-      if(nset.insert(n).second)
+-        nodes.push_back(n);
+-    }
+-
+-    n = node;
+-    p = (n->preds().size() == 1);
+-    s = true;
+-    barrier = n->hasBarrier();
+-
+-    while(p && s && !barrier)
+-    {
+-      if(nset.insert(n).second)
+-        nodes.push_front(n);
+-      n = *(n->preds().begin());
+-      barrier = n->hasBarrier();
+-      p = (n->preds().size() == 1);
+-      s = (n->succs().size() == 1);
+-    }
+-
+-    if(s && !barrier)
+-    {
+-      if(nset.insert(n).second)
+-        nodes.push_front(n);
+-    }
+-
+-    node = n;
+-
+-    if(nodes.size() >=2 )
+-    {
+-      Node* p = new BlockNode(nodes);
+-      NodeList::iterator iter = nodes.begin();
+-      while(iter != nodes.end())
+-      {
+-        if((*iter)->canBeHandled == false)
+-        {
+-          p->canBeHandled = false;
+-          break;
+-        }
+-        iter++;
+-      }
+-
+-      return insertNode(p);
+-    }
+-
+-    else if(node->succs().size() == 2)
+-    {
+-      Node *m;
+-      m = *(node->succs().begin());
+-      n = *(++(node->succs().begin()));
+-
+-      /* check for if node then n */
+-      if( n->succs().size() == 1 &&
+-         n->preds().size() == 1 &&
+-         *(n->succs().begin()) == m &&
+-         !n->hasBarrier() && !node->hasBarrier())
+-      {
+-        nset.clear();
+-        nset.insert(node);
+-        nset.insert(n);
+-
+-        Node* p = new IfThenNode(node, n);
+-        if(node->fallthrough() == m)
+-          node->inversePredicate = false;
+-
+-        if(node->canBeHandled == false || n->canBeHandled == false)
+-          p->canBeHandled = false;
+-
+-        return insertNode(p);
+-      }
+-
+-      /* check for if node then m */
+-      if(m->succs().size() == 1 &&
+-         m->preds().size() == 1 &&
+-         *(m->succs().begin()) == n &&
+-         !m->hasBarrier() && !node->hasBarrier())
+-      {
+-        nset.clear();
+-        nset.insert(node);
+-        nset.insert(m);
+-
+-        Node* p = new IfThenNode(node, m);
+-        if(node->fallthrough() == n)
+-          node->inversePredicate = false;
+-
+-        if(node->canBeHandled == false || m->canBeHandled == false)
+-          p->canBeHandled = false;
+-
+-        return insertNode(p);
+-      }
+-
+-      /* check for if node then n else m */
+-      if(m->succs().size() == 1 && n->succs().size() == 1 &&
+-         m->preds().size() == 1 && n->preds().size() == 1 &&
+-         *(m->succs().begin()) == *(n->succs().begin()) &&
+-         node->fallthrough() == n && !m->hasBarrier() && !n->hasBarrier() && !node->hasBarrier())
+-      {
+-        nset.clear();
+-        nset.insert(node);
+-        nset.insert(n);
+-        nset.insert(m);
+-
+-        Node* p = new IfElseNode(node, n, m);
+-
+-        if(node->canBeHandled == false ||
+-           m->canBeHandled == false ||
+-           n->canBeHandled == false)
+-          p->canBeHandled = false;
+-
+-        return insertNode(p);
+-      }
+-
+-      /* check for if node then m else n */
+-      if(m->succs().size() == 1 && n->succs().size() == 1 &&
+-         m->preds().size() == 1 && n->preds().size() == 1 &&
+-         *(m->succs().begin()) == *(n->succs().begin()) &&
+-         node->fallthrough() == m && !m->hasBarrier() && !n->hasBarrier() &&!node->hasBarrier())
+-      {
+-        nset.clear();
+-        nset.insert(node);
+-        nset.insert(m);
+-        nset.insert(n);
+-
+-        Node* p = new IfElseNode(node, m, n);
+-
+-        if(node->canBeHandled == false ||
+-           m->canBeHandled == false ||
+-           n->canBeHandled == false)
+-          p->canBeHandled = false;
+-        return insertNode(p);
+-      }
+-    }
+-
+-    return NULL;
+-  }
+-
+-  bool ControlTree::path(Node *from, Node *to, Node *notthrough)
+-  {
+-
+-    if(from == notthrough || visited.find(from) != visited.end())
+-      return false;
+-
+-    if(from == to)
+-      return true;
+-
+-    visited.insert(from);
+-
+-    for(NodeSet::const_iterator s = from->succs().begin(); s != from->succs().end(); s++)
+-    {
+-      if(path(*s, to, notthrough))
+-        return true;
+-    }
+-
+-    return false;
+-  }
+-
+-
+-  Node * ControlTree::cyclicRegionType(Node *node, NodeList &nset)
+-  {
+-    /* check for self-loop */
+-    if(nset.size() == 1)
+-    {
+-      if(node->succs().find(node) != node->succs().end())
+-      {
+-        Node* p = new SelfLoopNode(node);
+-
+-        p->canBeHandled = true;
+-        node->getExit()->isLoopExit = true;
+-        return insertNode(p);
+-      }
+-      else
+-        return NULL;
+-    }
+-
+-    //FIXME: as our IR could only handle self loop, the while loop node
+-    //is disabled to avoid performace regression by the path function.
+-#if 0
+-    /* check for improper region */
+-    for(NodeList::const_iterator m = nset.begin(); m != nset.end(); m++)
+-    {
+-      visited.clear();
+-      if(!path(node, *m))
+-        return NULL;
+-    }
+-
+-    /* check for while loop */
+-    NodeList::iterator m;
+-    for(m = nset.begin(); m != nset.end(); ++m)
+-    {
+-      if(*m == node)
+-        continue;
+-      if(node->succs().size() == 2 && (*m)->succs().size() == 1 &&
+-         node->preds().size() == 2 && (*m)->preds().size() == 1)
+-      {
+-        Node* p = new WhileLoopNode(node, *m);
+-
+-        p->canBeHandled = false;
+-
+-        return insertNode(p);
+-      }
+-    }
+-#endif
+-
+-    return NULL;
+-  }
+-
+-
+-  /* this algorithm is from Muchnick's textbook(sec 7.7) (Advanced Compiler Design and Implementation) */
+-  void ControlTree::reduce(Node* node,  NodeSet nodeSet)
+-  {
+-    NodeSet::iterator n;
+-    for(n = nodeSet.begin(); n != nodeSet.end(); n++)
+-    {
+-      NodeSet::iterator p;
+-      for(p = (*n)->preds().begin(); p != (*n)->preds().end(); p++)
+-      {
+-        if(nodeSet.find(*p) != nodeSet.end())
+-          continue;
+-
+-        (*p)->succs().erase(*n);
+-
+-        (*p)->succs().insert(node);
+-        node->preds().insert(*p);
+-
+-        if((*p)->fallthrough() == *n)
+-          (*p)->fallthrough() = node;
+-      }
+-
+-
+-     NodeSet::iterator s;
+-     for(s = (*n)->succs().begin(); s != (*n)->succs().end(); s++)
+-     {
+-        if(nodeSet.find(*s) != nodeSet.end())
+-          continue;
+-
+-       (*s)->preds().erase(*n);
+-
+-       (*s)->preds().insert(node);
+-       node->succs().insert(*s);
+-
+-       if((*n)->fallthrough() == *s)
+-         node->fallthrough() = *s;
+-     }
+-    }
+-
+-    if(!isCyclic(node))
+-    {
+-      for(n = nodeSet.begin(); n != nodeSet.end(); n++)
+-      {
+-        bool shouldbreak = false;
+-        NodeSet::iterator p;
+-        for(p = (*n)->preds().begin(); p != (*n)->preds().end(); p++)
+-        {
+-          if(nodeSet.find(*p) == nodeSet.end())
+-            continue;
+-
+-          if(isBackedge(*p, *n))
+-          {
+-            node->preds().insert(node);
+-            node->succs().insert(node);
+-
+-            shouldbreak = true;
+-            break;
+-          }
+-        }
+-
+-        if(shouldbreak)
+-          break;
+-      }
+-    }
+-
+-    compact(node, nodeSet);
+-  }
+-
+-
+-  /* this algorithm is from Muchnick's textbook(sec 7.7) (Advanced Compiler Design and Implementation) */
+-  void ControlTree::compact(Node* node,  NodeSet nodeSet)
+-  {
+-    NodeList::iterator n, pos;
+-    for(n = post_order.begin(); n!= post_order.end() && !nodeSet.empty();)
+-    {
+-      if(!nodeSet.erase(*n))
+-      {
+-        n++;
+-        continue;
+-      }
+-
+-      n = post_order.erase(n);
+-      pos = n;
+-    }
+-
+-    post_ctr = post_order.insert(pos, node);
+-  }
+-
+-
+-  /* this algorithm is from Muchnick's textbook(sec 7.7) (Advanced Compiler Design and Implementation) */
+-  void ControlTree::structuralAnalysis(Node *entry)
+-  {
+-    Node* n;
+-    NodeSet nset;
+-    NodeList reachUnder;
+-    bool changed;
+-    do
+-    {
+-      changed = false;
+-      post_order.clear();
+-      visited.clear();
+-
+-      DFSPostOrder(entry);
+-      post_ctr = post_order.begin();
+-
+-      while(post_order.size() > 1 && post_ctr != post_order.end())
+-      {
+-        n = *post_ctr;
+-        Node* region = acyclicRegionType(n, nset);
+-
+-        if( NULL != region)
+-        {
+-          changed = true;
+-
+-          reduce(region, nset);
+-
+-          if(nset.find(entry) != nset.end())
+-            entry = region;
+-        }
+-        else
+-        {
+-          reachUnder.clear();
+-          nset.clear();
+-
+-          //reuse the loop info from llvm gaterLoopInfo.
+-          const gbe::vector<ir::Loop *> &loops = fn->getLoops();
+-          if(loops.size() == 0){
+-            post_ctr++;
+-            continue;
+-          }
+-
+-          Node* loop_header = NULL;
+-          //if n is basic block node, query the llvm loop info to find the loop whoose loop header is n;
+-          if(n->type() == BasicBlock){
+-            for (auto l : loops) {
+-              ir::BasicBlock &a = fn->getBlock(l->bbs[0]);
+-              loop_header = bbmap.find(&a)->second;
+-
+-              if(loop_header == n){
+-                for (auto bb : l->bbs) {
+-                  ir::BasicBlock &tmp = fn->getBlock(bb);
+-                  Node* node_ = bbmap.find(&tmp)->second;
+-                  reachUnder.push_front(node_);
+-                  nset.insert(node_);
+-                }
+-                break;
+-              }
+-            }
+-          }else{
+-          //n is compacted node, it would have a successor pointed to itself for self loop.
+-            if(n->succs().find(n) != n->succs().end())
+-            {
+-                reachUnder.push_front(n);
+-                nset.insert(n);
+-            }
+-          }
+-
+-          region = cyclicRegionType(n, reachUnder);
+-
+-          if(NULL != region)
+-          {
+-            reduce(region, nset);
+-            changed = true;
+-
+-            if(nset.find(entry) != nset.end())
+-              entry = region;
+-          }
+-          else
+-          {
+-            post_ctr++;
+-          }
+-        }
+-      }
+-
+-      if(!changed)
+-        break;
+-
+-    } while(post_order.size()>1);
+-
+-  }
+-
+-  void ControlTree::analyze()
+-  {
+-    initializeNodes();
+-    structuralAnalysis(nodes_entry);
+-    handleStructuredNodes();
+-    calculateNecessaryLiveout();
+-  }
+-}
+diff --git a/backend/src/ir/structural_analysis.hpp b/backend/src/ir/structural_analysis.hpp
+deleted file mode 100644
+index 7aaa533..0000000
+--- a/backend/src/ir/structural_analysis.hpp
++++ /dev/null
+@@ -1,342 +0,0 @@
+-/*
+- * structural_analysis.hpp
+- * This code is derived from the ControlTree.h and ControlTree.cpp of
+- * project gpuocelot by Yongjia Zhang.
+- * The original copyright of gpuocelot appears below in its entirety.
+- */
+-
+-/*
+- * Copyright 2011
+- * GEORGIA TECH RESEARCH CORPORATION
+- * ALL RIGHTS RESERVED
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions are
+- * met:
+- *     * Redistributions of source code must retain the above copyright
+- * notice,   this list of conditions and the following disclaimers.
+- *     * Redistributions in binary form must reproduce the above copyright
+- * notice, this list of conditions and the following disclaimers in the
+- *       documentation and/or other materials provided with the
+- * distribution.
+- *     * Neither the name of GEORGIA TECH RESEARCH CORPORATION nor the
+- * names of  its contributors may be used to endorse or promote
+- * products derived  from this software without specific prior
+- * written permission.
+- *
+- * THIS SOFTWARE IS PROVIDED BY GEORGIA TECH RESEARCH CORPORATION ''AS IS''
+- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GEORGIA TECH RESEARCH
+- * CORPORATION BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- * You agree that the Software will not be shipped, transferred, exported,
+- * or re-exported directly into any country prohibited by the United States
+- * Export Administration Act and the regulations thereunder nor will be
+- * used for any purpose prohibited by the Act.
+- */
+-
+-
+-#ifndef __STRUCTURAL_ANALYSIS_HPP__
+-#define __STRUCTURAL_ANALYSIS_HPP__
+-
+-#include "ir/unit.hpp"
+-#include "ir/function.hpp"
+-#include "ir/instruction.hpp"
+-
+-#include <iostream>
+-#include <unordered_set>
+-#include <unordered_map>
+-#include <vector>
+-#include <map>
+-#include <list>
+-#include <algorithm>
+-#include <set>
+-#define TRANSFORM_UNSTRUCTURE
+-
+-namespace analysis
+-{
+-  using namespace std;
+-  using namespace gbe;
+-
+-  enum RegionType
+-  {
+-    BasicBlock = 0,
+-    Block,
+-    IfThen,
+-    IfElse,
+-    SelfLoop,
+-    WhileLoop,
+-    NaturalLoop
+-  } ;
+-
+-  /* control tree virtual node */
+-  class Node;
+-
+-  typedef unordered_set<Node *> NodeSet;
+-  typedef list<Node *> NodeList;
+-  typedef std::vector<Node *> NodeVector;
+-
+-  /* control tree virtual node */
+-  class Node
+-  {
+-  public:
+-    Node(RegionType rtype, const NodeList& children): has_barrier(false), mark(false), canBeHandled(true), inversePredicate(true)
+-    {
+-      this->rtype = rtype;
+-      this->children = children;
+-    }
+-    virtual ~Node() {}
+-    NodeSet& preds() { return pred; }
+-    NodeSet& succs() { return succ; }
+-    Node*& fallthrough() { return fall_through; }
+-    bool& hasBarrier() { return has_barrier; }
+-    RegionType type() { return rtype; }
+-    virtual ir::BasicBlock* getEntry()
+-    {
+-      return (*(children.begin()))->getEntry();
+-    }
+-    virtual ir::BasicBlock* getExit()
+-    {
+-      return (*(children.rbegin()))->getExit();
+-    }
+-
+-  public:
+-    RegionType rtype;
+-    NodeSet pred;
+-    NodeSet succ;
+-    NodeList children;
+-    Node* fall_through;
+-    bool has_barrier;
+-    bool mark;
+-    bool canBeHandled;
+-    //label is for debug
+-    int label;
+-    /* inversePredicate should be false under two circumstance,
+-     * fallthrough is the same with succs:
+-     * (1) n->succs == m && node->fallthrough == m
+-     * node
+-     * | \
+-     * |  \
+-     * m<--n
+-     * (2) m->succs == n && node->fallthrough == n
+-     * node
+-     * | \
+-     * |  \
+-     * m-->n
+-     * */
+-    bool inversePredicate;
+-  };
+-
+-  /* represents basic block */
+-  class BasicBlockNode : public Node
+-  {
+-  public:
+-    BasicBlockNode(ir::BasicBlock *p_bb) : Node(BasicBlock, NodeList()) { this->p_bb = p_bb; }
+-    virtual ~BasicBlockNode() {}
+-    ir::BasicBlock* getBasicBlock() { return p_bb; }
+-    virtual ir::BasicBlock* getEntry() { return p_bb; }
+-    virtual ir::BasicBlock* getExit() { return p_bb; }
+-    virtual ir::BasicBlock* getFirstBB() { return p_bb; }
+-  private:
+-    ir::BasicBlock *p_bb;
+-  };
+-
+-  /* a sequence of nodes */
+-  class BlockNode : public Node
+-  {
+-  public:
+-    BlockNode(NodeList& children) : Node(Block, children) {}
+-    virtual ~BlockNode(){}
+-  };
+-
+-  /* If-Then structure node */
+-  class IfThenNode : public Node
+-  {
+-  public:
+-    IfThenNode(Node* cond, Node* ifTrue) : Node(IfThen, BuildChildren(cond, ifTrue)) {}
+-    virtual ~IfThenNode() {}
+-
+-  private:
+-    const NodeList BuildChildren(Node* cond, Node* ifTrue)
+-    {
+-      NodeList children;
+-      children.push_back(cond);
+-      children.push_back(ifTrue);
+-      return children;
+-    }
+-  };
+-
+-  /* If-Else structure node */
+-  class IfElseNode : public Node
+-  {
+-  public:
+-    IfElseNode(Node* cond, Node* ifTrue, Node* ifFalse) : Node(IfElse, BuildChildren(cond, ifTrue, ifFalse)) {}
+-    virtual ~IfElseNode() {}
+-
+-  private:
+-    const NodeList BuildChildren(Node* cond, Node* ifTrue, Node* ifFalse)
+-    {
+-      NodeList children;
+-      children.push_back(cond);
+-      children.push_back(ifTrue);
+-      children.push_back(ifFalse);
+-      return children;
+-    }
+-  };
+-
+-  /* Self loop structure node */
+-  class SelfLoopNode : public Node
+-  {
+-  public:
+-    SelfLoopNode(Node* node) : Node(SelfLoop, BuildChildren(node)) {}
+-    virtual ~SelfLoopNode() {}
+-    virtual ir::BasicBlock* getEntry()
+-    {
+-      return (*(children.begin()))->getEntry();
+-    }
+-    virtual ir::BasicBlock* getExit()
+-    {
+-      return (*(children.begin()))->getExit();
+-    }
+-
+-  private:
+-    const NodeList BuildChildren(Node *node)
+-    {
+-      NodeList children;
+-      children.push_back(node);
+-      return children;
+-    }
+-  };
+-
+-  /* While loop structure node */
+-  class WhileLoopNode : public Node
+-  {
+-  public:
+-    WhileLoopNode(Node* cond, Node* execute) : Node(WhileLoop, BuildChildren(cond, execute)) {}
+-    virtual ~WhileLoopNode() {}
+-    virtual ir::BasicBlock* getEntry()
+-    {
+-      return (*(children.begin()))->getEntry();
+-    }
+-    virtual ir::BasicBlock* getExit()
+-    {
+-      return (*(children.begin()))->getExit();
+-    }
+-
+-  private:
+-    const NodeList BuildChildren(Node* cond, Node* execute)
+-    {
+-      NodeList children;
+-      children.push_back(cond);
+-      children.push_back(execute);
+-      return children;
+-    }
+-
+-  };
+-
+-  /* Natural loop structure node */
+-  class NaturalLoopNode : public Node
+-  {
+-  public:
+-    NaturalLoopNode(const NodeList& children): Node(NaturalLoop, children){}
+-    virtual ~NaturalLoopNode() {}
+-    virtual ir::BasicBlock* getEntry()
+-    {
+-      //TODO implement it
+-      return NULL;
+-    }
+-    virtual ir::BasicBlock* getExit()
+-    {
+-      //TODO implement it
+-      return NULL;
+-    }
+-  };
+-
+-  /* computes the control tree, and do the structure identification during the computation */
+-  class ControlTree
+-  {
+-  public:
+-    void analyze();
+-
+-    ControlTree(ir::Function* fn) { this->fn = fn; }
+-    ~ControlTree();
+-
+-  private:
+-    /* create a sequence of BasicBlockNodes, which are refered to the basic blocks in the function */
+-    void initializeNodes();
+-    /* insert a new Node, and returns the pointer of the node */
+-    Node* insertNode(Node *);
+-    /* do the structural analysis */
+-    void structuralAnalysis(Node * entry);
+-    /* do the dfs post order traverse of the current CFG */
+-    void DFSPostOrder(Node *start);
+-    /* returns true if there is a (possibly empty) path from m to k that does not pass through n */
+-    bool path(Node *m, Node *k, Node *n = NULL);
+-    /* link region node into abstract flowgraph, adjust the predecessor and successor functions, and augment the control tree */
+-    void reduce(Node* node,  NodeSet nodeSet);
+-    /* adds node to the control tree, inserts node into _post
+-     * at the highest-numbered position of a node in nodeSet, removes
+-     * the nodes in nodeSet from _post, compacts the remaining nodes at
+-     * the beginning of _post, and sets _postCtr to the index of node
+-     * in the resulting postorder */
+-    void compact(Node* node,  NodeSet nodeSet);
+-    Node* getNodesEntry() const  { return nodes_entry;}
+-    /* determines whether node is the entry node of an acyclic
+-     * control structure and returns its region. Stores in nset the set
+-     * of nodes in the identified control structure */
+-    Node* acyclicRegionType(Node* node, NodeSet& nset);
+-    /* determines whether node is the entry node of a cyclic
+-     * control structure and returns its region. Stores in nset the set
+-     * of nodes in the identified control structure */
+-    Node* cyclicRegionType(Node*, NodeList&);
+-    /* is this a cyclic region? */
+-    bool isCyclic(Node*);
+-    /* is this a back edge? */
+-    bool isBackedge(const Node*, const Node*);
+-    /* check if there is a barrier in a basic block */
+-    bool checkForBarrier(const ir::BasicBlock*);
+-    /* insert while instruction at the proper position of Node */
+-    void handleSelfLoopNode(Node *, ir::LabelIndex&);
+-    /* mark all the BasicBlockNodes of the control tree node n as status */
+-    void markStructuredNodes(Node *n, bool status);
+-    /* mark all the ir::BasicBlocks' needEndIf of n as status */
+-    void markNeedEndif(Node * n, bool status);
+-    /* mark all the ir::BasicBlocks' needIf of n as status */
+-    void markNeedIf(Node *, bool);
+-    /* insert IF instruction at the proper position of Node */
+-    void handleIfNode(Node *, ir::LabelIndex&, ir::LabelIndex&);
+-    /* insert ENDIF instruction at the proper position of Node, this Node is
+-       the 'then' node of identified if-then structure */
+-    void handleThenNode(Node *, ir::LabelIndex&);
+-    /* handle the then node of identified if-else structure */
+-    void handleThenNode2(Node *, Node *, ir::LabelIndex);
+-    /* insert ELSE instruction at the proper position of Node */
+-    void handleElseNode(Node *, ir::LabelIndex&, ir::LabelIndex&);
+-    /* this calls other functions to finish the handling of identified structure blocks */
+-    void handleStructuredNodes();
+-    std::set<int> getStructureBasicBlocksIndex(Node *, std::vector<ir::BasicBlock *> &);
+-    std::set<ir::BasicBlock *> getStructureBasicBlocks(Node*);
+-    /* get livein of bb */
+-    void getLiveIn(ir::BasicBlock& bb, std::set<ir::Register>& livein);
+-    /* see comment of BasicBlock::liveout in function.hpp for detail. */
+-    void calculateNecessaryLiveout();
+-    /* get the exectutive sequence of structure n */
+-    void getStructureSequence(Node* n, std::vector<ir::BasicBlock*> &v);
+-  private:
+-    ir::Function *fn;
+-    NodeVector nodes;
+-    Node* nodes_entry;
+-    unordered_map<ir::BasicBlock *, Node *> bbmap;
+-    NodeList post_order;
+-    NodeSet visited;
+-    NodeList::iterator post_ctr;
+-  };
+-}
+-#endif
+diff --git a/backend/src/llvm/llvm_to_gen.cpp b/backend/src/llvm/llvm_to_gen.cpp
+index a4ce4a2..3e11fdb 100644
+--- a/backend/src/llvm/llvm_to_gen.cpp
++++ b/backend/src/llvm/llvm_to_gen.cpp
+@@ -61,7 +61,6 @@
+ #include "sys/cvar.hpp"
+ #include "sys/platform.hpp"
+ #include "ir/unit.hpp"
+-#include "ir/structural_analysis.hpp"
+ 
+ #include <clang/CodeGen/CodeGenAction.h>
+ 
+@@ -308,16 +307,6 @@ namespace gbe
+     // Print the code extra optimization passes
+     OUTPUT_BITCODE(AFTER_GEN, mod);
+ 
+-    const ir::Unit::FunctionSet& fs = unit.getFunctionSet();
+-    ir::Unit::FunctionSet::const_iterator iter = fs.begin();
+-    while(iter != fs.end())
+-    {
+-      analysis::ControlTree *ct = new analysis::ControlTree(iter->second);
+-      ct->analyze();
+-      delete ct;
+-      iter++;
+-    }
+-
+     delete libraryInfo;
+     return true;
+   }
+-- 
+2.4.1
+
diff --git a/0002-GBE-Fix-the-immediate-data-type.patch b/0002-GBE-Fix-the-immediate-data-type.patch
new file mode 100644
index 0000000..27d7582
--- /dev/null
+++ b/0002-GBE-Fix-the-immediate-data-type.patch
@@ -0,0 +1,62 @@
+From ae59e9fd4d2777eb5a2990060d70870597f7ce05 Mon Sep 17 00:00:00 2001
+From: Ruiling Song <ruiling.song at intel.com>
+Date: Thu, 30 Apr 2015 11:49:47 +0800
+Subject: [PATCH 2/9] GBE: Fix the immediate data type
+
+Signed-off-by: Ruiling Song <ruiling.song at intel.com>
+Reviewed-by: Zhigang Gong <zhigang.gong at intel.com>
+---
+ backend/src/ir/immediate.hpp          | 12 ++++++------
+ backend/src/llvm/llvm_gen_backend.cpp |  2 +-
+ 2 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/backend/src/ir/immediate.hpp b/backend/src/ir/immediate.hpp
+index 6b27e8b..ff37a29 100644
+--- a/backend/src/ir/immediate.hpp
++++ b/backend/src/ir/immediate.hpp
+@@ -130,11 +130,11 @@ namespace ir {
+     DECL_CONSTRUCTOR(int8_t, s8, TYPE_S8)
+     DECL_CONSTRUCTOR(uint8_t, u8, TYPE_U8)
+     DECL_CONSTRUCTOR(int16_t, s16, TYPE_S16)
+-    DECL_CONSTRUCTOR(uint16_t, u16, TYPE_S16)
++    DECL_CONSTRUCTOR(uint16_t, u16, TYPE_U16)
+     DECL_CONSTRUCTOR(int32_t, s32, TYPE_S32)
+-    DECL_CONSTRUCTOR(uint32_t, u32, TYPE_S32)
++    DECL_CONSTRUCTOR(uint32_t, u32, TYPE_U32)
+     DECL_CONSTRUCTOR(int64_t, s64, TYPE_S64)
+-    DECL_CONSTRUCTOR(uint64_t, u64, TYPE_S64)
++    DECL_CONSTRUCTOR(uint64_t, u64, TYPE_U64)
+     DECL_CONSTRUCTOR(float, f32, TYPE_FLOAT)
+     DECL_CONSTRUCTOR(double, f64, TYPE_DOUBLE)
+ #undef DECL_CONSTRUCTOR
+@@ -155,11 +155,11 @@ namespace ir {
+     DECL_CONSTRUCTOR(int8_t, s8, TYPE_S8, elemNum)
+     DECL_CONSTRUCTOR(uint8_t, u8, TYPE_U8, elemNum)
+     DECL_CONSTRUCTOR(int16_t, s16, TYPE_S16, elemNum)
+-    DECL_CONSTRUCTOR(uint16_t, u16, TYPE_S16, elemNum)
++    DECL_CONSTRUCTOR(uint16_t, u16, TYPE_U16, elemNum)
+     DECL_CONSTRUCTOR(int32_t, s32, TYPE_S32, elemNum)
+-    DECL_CONSTRUCTOR(uint32_t, u32, TYPE_S32, elemNum)
++    DECL_CONSTRUCTOR(uint32_t, u32, TYPE_U32, elemNum)
+     DECL_CONSTRUCTOR(int64_t, s64, TYPE_S64, elemNum)
+-    DECL_CONSTRUCTOR(uint64_t, u64, TYPE_S64, elemNum)
++    DECL_CONSTRUCTOR(uint64_t, u64, TYPE_U64, elemNum)
+     DECL_CONSTRUCTOR(float, f32, TYPE_FLOAT, elemNum)
+     DECL_CONSTRUCTOR(double, f64, TYPE_DOUBLE, elemNum)
+ #undef DECL_CONSTRUCTOR
+diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
+index 0487bcb..c2c016c 100644
+--- a/backend/src/llvm/llvm_gen_backend.cpp
++++ b/backend/src/llvm/llvm_gen_backend.cpp
+@@ -3546,7 +3546,7 @@ namespace gbe
+       if (step != 0) {
+         ir::ImmediateIndex stepImm = ctx.newIntegerImmediate(step, ir::TYPE_U32);
+         ir::Register stepReg = ctx.reg(ctx.getPointerFamily());
+-        ctx.LOADI(ir::TYPE_S32, stepReg, stepImm);
++        ctx.LOADI(ir::TYPE_U32, stepReg, stepImm);
+         ctx.ADD(ir::TYPE_U32, stack, stack, stepReg);
+         ctx.getFunction().pushStackSize(step);
+       }
+-- 
+2.4.1
+
diff --git a/0003-correct-the-src-output-of-alu3-when-OCL_OUTPUT_ASM-1.patch b/0003-correct-the-src-output-of-alu3-when-OCL_OUTPUT_ASM-1.patch
new file mode 100644
index 0000000..11c94fd
--- /dev/null
+++ b/0003-correct-the-src-output-of-alu3-when-OCL_OUTPUT_ASM-1.patch
@@ -0,0 +1,54 @@
+From 0b10bc0dbd201b892299b897ef5038dc3c52c724 Mon Sep 17 00:00:00 2001
+From: Guo Yejun <yejun.guo at intel.com>
+Date: Mon, 4 May 2015 16:47:21 +0800
+Subject: [PATCH 3/9] correct the src output of alu3 when OCL_OUTPUT_ASM=1
+
+Signed-off-by: Guo Yejun <yejun.guo at intel.com>
+Reviewed-by: "Song, Ruiling" <ruiling.song at intel.com>
+---
+ backend/src/backend/gen/gen_mesa_disasm.c | 15 ++++++++++++---
+ 1 file changed, 12 insertions(+), 3 deletions(-)
+
+diff --git a/backend/src/backend/gen/gen_mesa_disasm.c b/backend/src/backend/gen/gen_mesa_disasm.c
+index 4822de3..2615276 100644
+--- a/backend/src/backend/gen/gen_mesa_disasm.c
++++ b/backend/src/backend/gen/gen_mesa_disasm.c
+@@ -844,7 +844,10 @@ static int src0_3src(FILE *file, const void* inst)
+     return 0;
+   if (GEN_BITS_FIELD(inst, bits2.da3src.src0_subreg_nr))
+     format(file, ".%d", GEN_BITS_FIELD(inst, bits2.da3src.src0_subreg_nr));
+-  string(file, "<4,1,1>");
++  if (GEN_BITS_FIELD(inst, bits2.da3src.src0_rep_ctrl))
++    string(file, "<0,1,0>");
++  else
++    string(file, "<8,8,1>");
+   err |= control(file, "src da16 reg type", reg_encoding,
+                  GEN_TYPE_F, NULL);
+   /*
+@@ -889,7 +892,10 @@ static int src1_3src(FILE *file, const void* inst)
+     return 0;
+   if (src1_subreg_nr)
+     format(file, ".%d", src1_subreg_nr);
+-  string(file, "<4,1,1>");
++  if (GEN_BITS_FIELD(inst, bits2.da3src.src1_rep_ctrl))
++    string(file, "<0,1,0>");
++  else
++    string(file, "<8,8,1>");
+   err |= control(file, "src da16 reg type", reg_encoding,
+                  GEN_TYPE_F, NULL);
+   /*
+@@ -931,7 +937,10 @@ static int src2_3src(FILE *file, const void* inst)
+     return 0;
+   if (GEN_BITS_FIELD(inst, bits3.da3src.src2_subreg_nr))
+     format(file, ".%d", GEN_BITS_FIELD(inst, bits3.da3src.src2_subreg_nr));
+-  string(file, "<4,1,1>");
++  if (GEN_BITS_FIELD(inst, bits3.da3src.src2_rep_ctrl))
++    string(file, "<0,1,0>");
++  else
++    string(file, "<8,8,1>");
+   err |= control(file, "src da16 reg type", reg_encoding,
+                  GEN_TYPE_F, NULL);
+   /*
+-- 
+2.4.1
+
diff --git a/0004-Add-a-sanity-test-in-clGetDeviceIDs.patch b/0004-Add-a-sanity-test-in-clGetDeviceIDs.patch
new file mode 100644
index 0000000..a0d77a4
--- /dev/null
+++ b/0004-Add-a-sanity-test-in-clGetDeviceIDs.patch
@@ -0,0 +1,134 @@
+From a7f3944cf5437e7648b43d4aaba8f48616d5dcfc Mon Sep 17 00:00:00 2001
+From: "Rebecca N. Palmer" <rebecca_palmer at zoho.com>
+Date: Sat, 16 May 2015 18:48:37 +0100
+Subject: [PATCH 4/9] Add a sanity test in clGetDeviceIDs
+
+Run a small __local-using kernel in clGetDeviceIDs; if this returns
+the wrong result, return CL_DEVICE_NOT_FOUND.
+
+As far as I can see, there's no way to tell in advance (except
+unreliably with a global version check) whether __local-using batches
+will be accepted...so the easiest solution is probably to just try
+running one and see what result we get.
+
+Signed-off-by: Rebecca Palmer <rebecca_palmer at zoho.com>
+Reviewed-by: "Luo, Xionghu" <xionghu.luo at intel.com>
+Reviewed-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ src/cl_device_id.c  | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++
+ utests/setenv.sh.in |  2 ++
+ 2 files changed, 84 insertions(+)
+
+diff --git a/src/cl_device_id.c b/src/cl_device_id.c
+index 0fd4a69..0c1c52f 100644
+--- a/src/cl_device_id.c
++++ b/src/cl_device_id.c
+@@ -436,6 +436,74 @@ brw_gt3_break:
+   return ret;
+ }
+ 
++/* Runs a small kernel to check that the device works; returns
++ * 0 for success, 1 for silently wrong result, 2 for error */
++LOCAL cl_int
++cl_self_test(cl_device_id device)
++{
++  cl_int status, ret;
++  cl_context ctx;
++  cl_command_queue queue;
++  cl_program program;
++  cl_kernel kernel;
++  cl_mem buffer;
++  cl_event kernel_finished;
++  size_t n = 3;
++  cl_int test_data[3] = {3, 7, 5};
++  const char* kernel_source = "__kernel void self_test(__global int *buf) {"
++  "  __local int tmp[3];"
++  "  tmp[get_local_id(0)] = buf[get_local_id(0)];"
++  "  barrier(CLK_LOCAL_MEM_FENCE);"
++  "  buf[get_global_id(0)] = tmp[2 - get_local_id(0)] + buf[get_global_id(0)];"
++  "}"; // using __local to catch the "no SLM on Haswell" problem
++  ret = 2;
++  ctx = clCreateContext(NULL, 1, &device, NULL, NULL, &status);
++  if (status == CL_SUCCESS) {
++    queue = clCreateCommandQueue(ctx, device, 0, &status);
++    if (status == CL_SUCCESS) {
++      program = clCreateProgramWithSource(ctx, 1, &kernel_source, NULL, &status);
++      if (status == CL_SUCCESS) {
++        status = clBuildProgram(program, 1, &device, "", NULL, NULL);
++        if (status == CL_SUCCESS) {
++          kernel = clCreateKernel(program, "self_test", &status);
++          if (status == CL_SUCCESS) {
++            buffer = clCreateBuffer(ctx, CL_MEM_COPY_HOST_PTR, n*4, test_data, &status);
++            if (status == CL_SUCCESS) {
++              status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &buffer);
++              if (status == CL_SUCCESS) {
++                status = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &n, &n, 0, NULL, &kernel_finished);
++                if (status == CL_SUCCESS) {
++                  status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, n*4, test_data, 1, &kernel_finished, NULL);
++                  if (status == CL_SUCCESS) {
++                    if (test_data[0] == 8 && test_data[1] == 14 && test_data[2] == 8){
++                      ret = 0;
++                    } else {
++                      ret = 1;
++                      printf("Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned (%i, %i, %i)\n"
++                      "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n",
++                      test_data[0], test_data[1], test_data[2]);
++                    }
++                  }
++                }
++              }
++            }
++            clReleaseMemObject(buffer);
++          }
++          clReleaseKernel(kernel);
++        }
++      }
++      clReleaseProgram(program);
++    }
++    clReleaseCommandQueue(queue);
++  }
++  clReleaseContext(ctx);
++  if (ret == 2) {
++    printf("Beignet: self-test failed: error %i\n"
++    "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n", status);
++  }
++  return ret;
++}
++
+ LOCAL cl_int
+ cl_get_device_ids(cl_platform_id    platform,
+                   cl_device_type    device_type,
+@@ -447,6 +515,20 @@ cl_get_device_ids(cl_platform_id    platform,
+ 
+   /* Do we have a usable device? */
+   device = cl_get_gt_device();
++  if (device && cl_self_test(device)) {
++    int disable_self_test = 0;
++    // can't use BVAR (backend/src/sys/cvar.hpp) here as it's C++
++    const char *env = getenv("OCL_IGNORE_SELF_TEST");
++    if (env != NULL) {
++      sscanf(env, "%i", &disable_self_test);
++    }
++    if (disable_self_test) {
++      printf("Beignet: Warning - overriding self-test failure\n");
++    } else {
++      printf("Beignet: disabling non-working device\n");
++      device = 0;
++    }
++  }
+   if (!device) {
+     if (num_devices)
+       *num_devices = 0;
+diff --git a/utests/setenv.sh.in b/utests/setenv.sh.in
+index ac06b10..67e3bf1 100644
+--- a/utests/setenv.sh.in
++++ b/utests/setenv.sh.in
+@@ -6,3 +6,5 @@ export OCL_PCH_PATH=@LOCAL_OCL_PCH_OBJECT@
+ export OCL_KERNEL_PATH=@CMAKE_CURRENT_SOURCE_DIR@/../kernels
+ export OCL_GBE_PATH=@LOCAL_GBE_OBJECT_DIR@
+ export OCL_INTERP_PATH=@LOCAL_INTERP_OBJECT_DIR@
++#disable self-test so we can get something more precise than "doesn't work"
++export OCL_IGNORE_SELF_TEST=1
+-- 
+2.4.1
+
diff --git a/0005-Docs-update-clarify-Haswell-issues.patch b/0005-Docs-update-clarify-Haswell-issues.patch
new file mode 100644
index 0000000..9f8f8af
--- /dev/null
+++ b/0005-Docs-update-clarify-Haswell-issues.patch
@@ -0,0 +1,124 @@
+From b19ed97e5c4127f04f1aff05f111162cb96629e6 Mon Sep 17 00:00:00 2001
+From: "Rebecca N. Palmer" <rebecca_palmer at zoho.com>
+Date: Sat, 16 May 2015 18:52:59 +0100
+Subject: [PATCH 5/9] Docs: update/clarify Haswell issues
+
+Reflect recent beignet and Linux changes.
+Signed-off-by: Rebecca Palmer <rebecca_palmer at zoho.com>
+Reviewed-by: "Luo, Xionghu" <xionghu.luo at intel.com>
+Reviewed-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ docs/Beignet.mdwn  | 40 ++++++++++++++++++++++++++--------------
+ src/cl_device_id.c | 14 +++++++++-----
+ 2 files changed, 35 insertions(+), 19 deletions(-)
+
+diff --git a/docs/Beignet.mdwn b/docs/Beignet.mdwn
+index 57041ba..bd7fd98 100644
+--- a/docs/Beignet.mdwn
++++ b/docs/Beignet.mdwn
+@@ -142,7 +142,7 @@ Supported Targets
+ 
+  * 3rd Generation Intel Core Processors
+  * Intel “Bay Trail” platforms with Intel HD Graphics
+- * 4th Generation Intel Core Processors, need kernel patch currently, see the "Known Issues" section.
++ * 4th Generation Intel Core Processors "Haswell", need kernel patch currently, see the "Known Issues" section.
+  * 5th Generation Intel Core Processors "Broadwell".
+ 
+ Known Issues
+@@ -163,22 +163,34 @@ Known Issues
+   But this command is a little bit dangerous, as if your kernel really hang, then the gpu will lock up
+   forever until a reboot.
+ 
+-* Almost all unit tests fail.
+-  There is a known issue in some versions of linux kernel which enable register whitelist feature
+-  but miss some necessary registers which are required for beignet. For non-HSW platforms, the
+-  problematic version are around 3.15 and 3.16 which have commit f0a346b... but haven't commit
+-  c9224f... If it is the case, you can apply c9224f... manually and rebuild the kernel or just
+-  disable the parse command by invoke the following command (use Ubuntu as an example):
++* "Beignet: self-test failed" and almost all unit tests fail.
++  Linux 3.15 and 3.16 (commits [f0a346b](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=f0a346bdafaf6fc4a51df9ddf1548fd888f860d8)
++  to [c9224fa](https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=c9224faa59c3071ecfa2d4b24592f4eb61e57069))
++  enable the register whitelist by default but miss some registers needed
++  for Beignet.
++  
++  This can be fixed by upgrading Linux, or by disabling the whitelist:
+ 
+   `# echo 0 > /sys/module/i915/parameters/enable_cmd_parser`
+ 
+-  For HSW platforms, this issue exists in all linux kernel version after 3.15. We always need
+-  to execute the above command.
+-
+-* Some unit test cases, maybe 20 to 30, fail on 4th Generation (HSW) platform.
+-  _The 4th Generation Intel Core Processors's support requires some Linux kernel
+-  modification_. You need to apply the patch at:  
+-  [https://01.org/zh/beignet/downloads/linux-kernel-patch-hsw-support](https://01.org/zh/beignet/downloads/linux-kernel-patch-hsw-support)
++  On Haswell hardware, Beignet 1.0.1 to 1.0.3 also required the
++  above workaround on later Linux versions, but this _should not_ be
++  required in current (after [83f8739](http://cgit.freedesktop.org/beignet/commit/?id=83f8739b6fc4893fac60145326052ccb5cf653dc))
++  git master.
++
++* "Beignet: self-test failed" and 15-30 unit tests fail on 4th Generation (Haswell) hardware.
++  On Haswell, shared local memory (\_\_local) does not work at all on
++  Linux <= 4.0, and requires the i915.enable_ppgtt=2 [boot parameter](https://wiki.ubuntu.com/Kernel/KernelBootParameters)
++  on Linux 4.1.
++  
++  This will be fixed in Linux 4.2; older versions can be fixed with
++  [this patch](https://01.org/zh/beignet/downloads/linux-kernel-patch-hsw-support).
++  
++  If you do not need \_\_local, you can override the self-test with
++  
++  `export OCL_IGNORE_SELF_TEST=1`
++  
++  but using \_\_local after this may silently give wrong results.
+ 
+ * Precision issue.
+   Currently Gen does not provide native support of high precision math functions
+diff --git a/src/cl_device_id.c b/src/cl_device_id.c
+index 0c1c52f..9cd7c00 100644
+--- a/src/cl_device_id.c
++++ b/src/cl_device_id.c
+@@ -441,7 +441,7 @@ brw_gt3_break:
+ LOCAL cl_int
+ cl_self_test(cl_device_id device)
+ {
+-  cl_int status, ret;
++  cl_int status;
+   cl_context ctx;
+   cl_command_queue queue;
+   cl_program program;
+@@ -456,7 +456,11 @@ cl_self_test(cl_device_id device)
+   "  barrier(CLK_LOCAL_MEM_FENCE);"
+   "  buf[get_global_id(0)] = tmp[2 - get_local_id(0)] + buf[get_global_id(0)];"
+   "}"; // using __local to catch the "no SLM on Haswell" problem
+-  ret = 2;
++  static int tested = 0;
++  static cl_int ret = 2;
++  if (tested != 0)
++    return ret;
++  tested = 1;
+   ctx = clCreateContext(NULL, 1, &device, NULL, NULL, &status);
+   if (status == CL_SUCCESS) {
+     queue = clCreateCommandQueue(ctx, device, 0, &status);
+@@ -480,8 +484,8 @@ cl_self_test(cl_device_id device)
+                     } else {
+                       ret = 1;
+                       printf("Beignet: self-test failed: (3, 7, 5) + (5, 7, 3) returned (%i, %i, %i)\n"
+-                      "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n",
+-                      test_data[0], test_data[1], test_data[2]);
++                             "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n",
++                             test_data[0], test_data[1], test_data[2]);
+                     }
+                   }
+                 }
+@@ -499,7 +503,7 @@ cl_self_test(cl_device_id device)
+   clReleaseContext(ctx);
+   if (ret == 2) {
+     printf("Beignet: self-test failed: error %i\n"
+-    "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n", status);
++           "See README.md or http://www.freedesktop.org/wiki/Software/Beignet/\n", status);
+   }
+   return ret;
+ }
+-- 
+2.4.1
+
diff --git a/0006-utest_pow-don-t-fail-on-declared-lack-of-denormals.patch b/0006-utest_pow-don-t-fail-on-declared-lack-of-denormals.patch
new file mode 100644
index 0000000..6ef1e34
--- /dev/null
+++ b/0006-utest_pow-don-t-fail-on-declared-lack-of-denormals.patch
@@ -0,0 +1,53 @@
+From 028f5ab22dfa70c1e2d6a71799f41badbd19eafd Mon Sep 17 00:00:00 2001
+From: "Rebecca N. Palmer" <rebecca_palmer at zoho.com>
+Date: Wed, 29 Apr 2015 13:58:13 +0800
+Subject: [PATCH 6/9] utest_pow: don't fail on declared lack of denormals.
+
+0.01**20.5 is denormal; at least Ivy Bridge does not support
+denormals and hence returns 0.  As this is allowed by the
+OpenCL standard, it shouldn't fail the test.
+
+Signed-off-by: Rebecca Palmer <rebecca_palmer at zoho.com>
+Reviewed-by: "Song, Ruiling" <ruiling.song at intel.com>
+---
+ utests/builtin_pow.cpp | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/utests/builtin_pow.cpp b/utests/builtin_pow.cpp
+index a18f31e..a8523d3 100644
+--- a/utests/builtin_pow.cpp
++++ b/utests/builtin_pow.cpp
+@@ -37,6 +37,9 @@ static void builtin_pow(void)
+       input_data2[i*count_input_ori+k] = ori_data[k];
+     }
+ 
++  cl_device_fp_config fp_config;
++  clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), &fp_config, 0);
++  bool denormals_supported = fp_config & CL_FP_DENORM;
+   const char* env_strict = getenv("OCL_STRICT_CONFORMANCE");
+   float ULPSIZE_FACTOR = 16.0;
+   if (env_strict == NULL || strcmp(env_strict, "0") == 0)
+@@ -75,7 +78,9 @@ static void builtin_pow(void)
+ #if udebug
+       if ( (isinf(cpu_data[index_cur]) && !isinf(gpu_data[index_cur])) ||
+            (isnan(cpu_data[index_cur]) && !isnan(gpu_data[index_cur])) ||
+-           (fabs(gpu_data[index_cur] - cpu_data[index_cur]) > cl_FLT_ULP(cpu_data[index_cur]) * ULPSIZE_FACTOR)   )
++           (fabs(gpu_data[index_cur] - cpu_data[index_cur]) > cl_FLT_ULP(cpu_data[index_cur]) * ULPSIZE_FACTOR
++           && (denormals_supported || gpu_data[index_cur]!=0 || std::fpclassify(cpu_data[index_cur])!=FP_SUBNORMAL) ) )
++
+       {
+         printf_c("%d/%d: x:%f, y:%f -> gpu:%f  cpu:%f\n", k, i, input_data1[k], input_data2[k], gpu_data[index_cur], cpu_data[index_cur]);
+       }
+@@ -88,7 +93,8 @@ static void builtin_pow(void)
+        OCL_ASSERT(isnan(gpu_data[index_cur]));
+      else
+      {
+-       OCL_ASSERT(fabs(gpu_data[index_cur] - cpu_data[index_cur]) < cl_FLT_ULP(cpu_data[index_cur]) * ULPSIZE_FACTOR);
++       OCL_ASSERT((fabs(gpu_data[index_cur] - cpu_data[index_cur]) < cl_FLT_ULP(cpu_data[index_cur]) * ULPSIZE_FACTOR) ||
++       (!denormals_supported && gpu_data[index_cur]==0 && std::fpclassify(cpu_data[index_cur])==FP_SUBNORMAL) );
+      }
+ #endif
+     }
+-- 
+2.4.1
+
diff --git a/0007-Make-tgamma-meet-the-accuracy-standard.patch b/0007-Make-tgamma-meet-the-accuracy-standard.patch
new file mode 100644
index 0000000..b51d40c
--- /dev/null
+++ b/0007-Make-tgamma-meet-the-accuracy-standard.patch
@@ -0,0 +1,135 @@
+From e4c38e56576e58cd384c156e0bcfe6290ff9f33a Mon Sep 17 00:00:00 2001
+From: "Rebecca N. Palmer" <rebecca_palmer at zoho.com>
+Date: Wed, 29 Apr 2015 14:15:09 +0800
+Subject: [PATCH 7/9] Make tgamma meet the accuracy standard.
+
+The old tgamma=exp(lgamma) implementation had high rounding error on
+large outputs, exceeding the 16ulp specification for approx. x>8
+(hence the test failure in strict conformance mode).
+
+Replace this with an implementation based on glibc's
+http://sources.debian.net/src/glibc/2.19-17/sysdeps/ieee754/flt-32/e_gammaf_r.c/
+
+Signed-off-by: Rebecca Palmer <rebecca_palmer at zoho.com>
+Reviewed-by: "Song, Ruiling" <ruiling.song at intel.com>
+---
+ backend/src/libocl/tmpl/ocl_math.tmpl.cl | 96 +++++++++++++++++++++++++++++---
+ 1 file changed, 89 insertions(+), 7 deletions(-)
+
+diff --git a/backend/src/libocl/tmpl/ocl_math.tmpl.cl b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+index fcc60fd..f6e53c3 100644
+--- a/backend/src/libocl/tmpl/ocl_math.tmpl.cl
++++ b/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+@@ -1746,13 +1746,6 @@ OVERLOADABLE float __gen_ocl_internal_exp(float x) {
+   }
+ }
+ 
+-INLINE_OVERLOADABLE float tgamma(float x) {
+-  float y;
+-  int s;
+-  y=lgamma_r(x,&s);
+-  return __gen_ocl_internal_exp(y)*s;
+-}
+-
+ /* erf,erfc from glibc s_erff.c -- float version of s_erf.c.
+  * Conversion to float by Ian Lance Taylor, Cygnus Support, ian at cygnus.com.
+  */
+@@ -2963,6 +2956,95 @@ OVERLOADABLE float __gen_ocl_internal_pow(float x, float y) {
+   return sn*z;
+ }
+ 
++OVERLOADABLE float tgamma (float x)
++{
++  /* based on glibc __ieee754_gammaf_r by Ulrich Drepper <drepper at cygnus.com> */
++
++  unsigned int hx;
++  GEN_OCL_GET_FLOAT_WORD(hx,x);
++  if (hx == 0xff800000)
++    {
++      /* x == -Inf.  According to ISO this is NaN.  */
++      return NAN;
++    }
++  if ((hx & 0x7f800000) == 0x7f800000)
++    {
++      /* Positive infinity (return positive infinity) or NaN (return
++	 NaN).  */
++      return x;
++    }
++  if (x < 0.0f && __gen_ocl_internal_floor (x) == x)
++    {
++      /* integer x < 0 */
++      return NAN;
++    }
++
++  if (x >= 36.0f)
++    {
++      /* Overflow.  */
++      return INFINITY;
++    }
++  else if (x <= 0.0f && x >= -FLT_EPSILON / 4.0f)
++    {
++      return 1.0f / x;
++    }
++  else
++    {
++      float sinpix = __gen_ocl_internal_sinpi(x);
++      if (x <= -42.0f)
++	/* Underflow.  */
++	{return 0.0f * sinpix /*for sign*/;}
++      int exp2_adj = 0;
++      float x_abs = __gen_ocl_fabs(x);
++      float gam0;
++
++      if (x_abs < 4.0f) {
++        /* gamma = exp(lgamma) is only accurate for small lgamma */
++        float prod,x_adj;
++        if (x_abs < 0.5f) {
++          prod = 1.0f / x_abs;
++          x_adj = x_abs + 1.0f;
++        } else if (x_abs <= 1.5f) {
++          prod = 1.0f;
++          x_adj = x_abs;
++        } else if (x_abs < 2.5f) {
++          x_adj = x_abs - 1.0f;
++          prod = x_adj;
++        } else {
++          x_adj = x_abs - 2.0f;
++          prod = x_adj * (x_abs - 1.0f);
++        }
++        gam0 = __gen_ocl_internal_exp (lgamma (x_adj)) * prod;
++      }
++      else {
++        /* Compute gamma (X) using Stirling's approximation,
++  	 starting by computing pow (X, X) with a power of 2
++  	 factored out to avoid intermediate overflow.  */
++        float x_int = __gen_ocl_internal_round (x_abs);
++        float x_frac = x_abs - x_int;
++        int x_log2;
++        float x_mant = frexp (x_abs, &x_log2);
++        if (x_mant < M_SQRT1_2_F)
++          {
++          x_log2--;
++          x_mant *= 2.0f;
++          }
++        exp2_adj = x_log2 * (int) x_int;
++        float ret = (__gen_ocl_internal_pow(x_mant, x_abs)
++  		   * exp2 (x_log2 * x_frac)
++  		   * __gen_ocl_internal_exp (-x_abs)
++  		   * sqrt (2.0f * M_PI_F / x_abs) );
++
++        float x2 = x_abs * x_abs;
++        float bsum = (0x3.403404p-12f / x2 -0xb.60b61p-12f) / x2 + 0x1.555556p-4f;
++        gam0 = ret + ret * __gen_ocl_internal_expm1 (bsum / x_abs);
++      }
++      if (x > 0.0f) {return __gen_ocl_internal_ldexp (gam0, exp2_adj);}
++      float gam1 = M_PI_F / (-x * sinpix * gam0);
++      return __gen_ocl_internal_ldexp (gam1, -exp2_adj);
++    }
++}
++
+ float __gen_ocl_internal_pown(float x, int y) {
+   const float
+   bp[] = {1.0, 1.5,},
+-- 
+2.4.1
+
diff --git a/0008-Allow-building-with-Python-3.patch b/0008-Allow-building-with-Python-3.patch
new file mode 100644
index 0000000..9fa5108
--- /dev/null
+++ b/0008-Allow-building-with-Python-3.patch
@@ -0,0 +1,119 @@
+From 48b2cc4757cef526961f62cd5c41023690b7d2f1 Mon Sep 17 00:00:00 2001
+From: "Rebecca N. Palmer" <rebecca_palmer at zoho.com>
+Date: Wed, 29 Apr 2015 11:26:41 +0100
+Subject: [PATCH 8/9] Allow building with Python 3
+
+Make the build scripts work in both Python 2 and Python 3.
+(CMake prefers Python 2 if both are available, but will use
+Python 3 if only it is installed.)
+
+Signed-off-by: Rebecca Palmer <rebecca_palmer at zoho.com>
+Reviewed-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ backend/src/libocl/script/gen_vector.py | 23 ++++++++++++-----------
+ utests/utest_generator.py               |  3 ++-
+ 2 files changed, 14 insertions(+), 12 deletions(-)
+
+diff --git a/backend/src/libocl/script/gen_vector.py b/backend/src/libocl/script/gen_vector.py
+index ffc573a..92582f5 100755
+--- a/backend/src/libocl/script/gen_vector.py
++++ b/backend/src/libocl/script/gen_vector.py
+@@ -20,13 +20,14 @@
+ 
+ # This file is to generate inline code to lower down those builtin
+ # vector functions to scalar functions.
++from __future__ import print_function
+ import re
+ import sys
+ import os
+ 
+ if len(sys.argv) != 4:
+-    print "Invalid argument {0}".format(sys.argv)
+-    print "use {0} spec_file_name output_file_name just_proto".format(sys.argv[0])
++    print("Invalid argument {0}".format(sys.argv))
++    print("use {0} spec_file_name output_file_name just_proto".format(sys.argv[0]))
+     raise
+ 
+ all_vector = 1,2,3,4,8,16
+@@ -61,8 +62,8 @@ all_type = all_int_type + all_float_type
+ 
+ # all vector/scalar types
+ for t in all_type:
+-    exec "{0}n = [\"{0}n\", gen_vector_type([\"{0}\"])]".format(t)
+-    exec "s{0} = [\"{0}\", gen_vector_type([\"{0}\"], [1])]".format(t)
++    exec("{0}n = [\"{0}n\", gen_vector_type([\"{0}\"])]".format(t))
++    exec("s{0} = [\"{0}\", gen_vector_type([\"{0}\"], [1])]".format(t))
+ 
+ # Predefined type sets according to the Open CL spec.
+ math_gentype = ["math_gentype", gen_vector_type(all_float_type)]
+@@ -124,8 +125,8 @@ def check_type(types):
+     for t in types:
+         memspace, t = stripMemSpace(t)
+         if not t in type_dict:
+-            print t
+-            raise "found invalid type."
++            print(t)
++            raise TypeError("found invalid type.")
+ 
+ def match_unsigned(dtype):
+     if dtype[0] == 'float':
+@@ -187,8 +188,8 @@ def fixup_type(dstType, srcType, n):
+         if (len(dstType) == len(srcType)):
+             return dstType[n]
+ 
+-    print dstType, srcType
+-    raise "type mispatch"
++    print(dstType, srcType)
++    raise TypeError("type mispatch")
+ 
+ class builtinProto():
+     valueTypeStr = ""
+@@ -226,7 +227,7 @@ class builtinProto():
+ 
+     def init_from_line(self, t):
+         self.append('//{0}'.format(t))
+-        line = filter(None, re.split(',| |\(', t.rstrip(')\n')))
++        line = [_f for _f in re.split(',| |\(', t.rstrip(')\n')) if _f]
+         self.paramCount = 0
+         stripped = 0
+         memSpace = ''
+@@ -310,7 +311,7 @@ class builtinProto():
+                 vtype = fixup_type(vtypeSeq, ptypeSeqs[n], i)
+                 if vtype[1] != ptype[1]:
+                     if ptype[1] != 1:
+-                        raise "parameter is not a scalar but has different width with result value."
++                        raise TypeError("parameter is not a scalar but has different width with result value.")
+                     if isPointer(ptype):
+                         formatStr += '&'
+                     formatStr += 'param{0}'.format(n)
+@@ -333,7 +334,7 @@ class builtinProto():
+ 
+     def output(self):
+         for line in self.outputStr:
+-            print line
++            print(line)
+ 
+     def output(self, outFile):
+         for line in self.outputStr:
+diff --git a/utests/utest_generator.py b/utests/utest_generator.py
+index 510c41a..7d2d3a0 100644
+--- a/utests/utest_generator.py
++++ b/utests/utest_generator.py
+@@ -1,4 +1,5 @@
+ #!/usr/bin/python
++from __future__ import print_function
+ import os,sys,re
+ 
+ FLT_MAX_POSI='0x1.fffffep127f'
+@@ -326,7 +327,7 @@ which can print more values and information to assist debuging the issue.
+     file_object.close()
+ 
+   def nameForCmake(self,content,namesuffix):
+-    print("generated/%s_%s.cpp"%(self.fileName,namesuffix)),
++    print("generated/%s_%s.cpp"%(self.fileName,namesuffix),end=" ")
+ 
+   def utestFunc(self,index):
+     funcLines=[]
+-- 
+2.4.1
+
diff --git a/0009-utests-fix-test-case-builtin_tgamma.patch b/0009-utests-fix-test-case-builtin_tgamma.patch
new file mode 100644
index 0000000..a3191b7
--- /dev/null
+++ b/0009-utests-fix-test-case-builtin_tgamma.patch
@@ -0,0 +1,65 @@
+From a6810c54b5bfa4c88d9fec6fb1f86fdea4af75af Mon Sep 17 00:00:00 2001
+From: "Rebecca N. Palmer" <rebecca_palmer at zoho.com>
+Date: Wed, 29 Apr 2015 16:25:47 +0100
+Subject: [PATCH 9/9] utests: fix test case builtin_tgamma.
+
+Compare with tgamma instead of tgammaf for better accuracy.
+Include negative inputs, and handle the resulting denormals.
+Print maximum error found.
+
+Signed-off-by: Rebecca Palmer <rebecca_palmer at zoho.com>
+Reviewed-by: "Song, Ruiling" <ruiling.song at intel.com>
+Reviewed-by: Zhigang Gong <zhigang.gong at linux.intel.com>
+---
+ utests/builtin_tgamma.cpp | 19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+diff --git a/utests/builtin_tgamma.cpp b/utests/builtin_tgamma.cpp
+index 47cc5f4..b7db69b 100644
+--- a/utests/builtin_tgamma.cpp
++++ b/utests/builtin_tgamma.cpp
+@@ -20,10 +20,15 @@ void builtin_tgamma(void)
+   if (env_strict == NULL || strcmp(env_strict, "0") == 0)
+     ULPSIZE_FACTOR = 10000.;
+ 
+-  for (int j = 0; j < 1024; j ++) {
++  cl_device_fp_config fp_config;
++  clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(cl_device_fp_config), &fp_config, 0);
++  bool denormals_supported = fp_config & CL_FP_DENORM;
++  float max_ulp = 0, max_ulp_at = 0;
++
++  for (int j = 0; j < 128; j ++) {
+     OCL_MAP_BUFFER(0);
+     for (int i = 0; i < n; ++i) {
+-      src[i] = ((float*)buf_data[0])[i] = (j*n+i+1) * 0.001f;
++      src[i] = ((float*)buf_data[0])[i] = j - 64 + i*0.001f;
+     }
+     OCL_UNMAP_BUFFER(0);
+ 
+@@ -32,7 +37,14 @@ void builtin_tgamma(void)
+     OCL_MAP_BUFFER(1);
+     float *dst = (float*)buf_data[1];
+     for (int i = 0; i < n; ++i) {
+-      float cpu = tgammaf(src[i]);
++      float cpu = tgamma(src[i]);
++      if (!denormals_supported && std::fpclassify(cpu)==FP_SUBNORMAL && dst[i]==0) {
++        cpu = 0;
++      }
++      if (fabsf(cpu - dst[i]) > cl_FLT_ULP(cpu) * max_ulp) {
++        max_ulp = fabsf(cpu - dst[i]) / cl_FLT_ULP(cpu);
++        max_ulp_at = src[i];
++      }
+       if (isinf(cpu)) {
+         OCL_ASSERT(isinf(dst[i]));
+       } else if (fabsf(cpu - dst[i]) >= cl_FLT_ULP(cpu) * ULPSIZE_FACTOR) {
+@@ -42,6 +54,7 @@ void builtin_tgamma(void)
+     }
+     OCL_UNMAP_BUFFER(1);
+   }
++  printf("max error=%f ulp at x=%f ", max_ulp, max_ulp_at);
+ }
+ 
+ MAKE_UTEST_FROM_FUNCTION(builtin_tgamma);
+-- 
+2.4.1
+
diff --git a/beignet.spec b/beignet.spec
index cfd6700..8dd4d78 100644
--- a/beignet.spec
+++ b/beignet.spec
@@ -1,6 +1,6 @@
 Name:           beignet
 Version:        1.0.3
-Release:        1%{?dist}
+Release:        2%{?dist}
 Summary:        Open source implementation of the OpenCL for Intel GPUs
 
 License:        LGPLv2+
@@ -9,12 +9,28 @@ Source0:        https://01.org/sites/default/files/%{name}-%{version}-source.tar
 
 BuildRequires:  cmake
 BuildRequires:  llvm-devel >= 3.3 clang-devel >= 3.3 llvm-static >= 3.3
-BuildRequires:  libdrm-devel mesa-libGL-devel mesa-libEGL-devel mesa-libgbm-devel ocl-icd-devel python-devel
+BuildRequires:  libdrm-devel mesa-libGL-devel mesa-libEGL-devel mesa-libgbm-devel ocl-icd-devel
 BuildRequires:  zlib-devel libedit-devel
 BuildRequires:  opencl-headers
 
+%if 0%{?fedora} > 22
+BuildRequires:  python3-devel
+%else
+BuildRequires:  python2-devel
+%endif
+
 Requires:       opencl-filesystem
 
+Patch0: 0001-Remove-some-LGPL-incompatible-code.patch
+Patch1: 0002-GBE-Fix-the-immediate-data-type.patch
+Patch2: 0003-correct-the-src-output-of-alu3-when-OCL_OUTPUT_ASM-1.patch
+Patch3: 0004-Add-a-sanity-test-in-clGetDeviceIDs.patch
+Patch4: 0005-Docs-update-clarify-Haswell-issues.patch
+Patch5: 0006-utest_pow-don-t-fail-on-declared-lack-of-denormals.patch
+Patch6: 0007-Make-tgamma-meet-the-accuracy-standard.patch
+Patch7: 0008-Allow-building-with-Python-3.patch
+Patch8: 0009-utests-fix-test-case-builtin_tgamma.patch
+
 ExclusiveArch:  x86_64 %{ix86}
 
 %description
@@ -34,23 +50,24 @@ Devel package for Beignet is an open source implementation of the OpenCL
 specification - a generic compute oriented API.
 
 %prep
-%autosetup -n Beignet-%{version}-Source
+%autosetup -n Beignet-%{version}-Source -S git
 mkdir build
 
 %build
 pushd build
   %cmake ../ -DLLVM_INSTALL_DIR=%{_bindir}/
-  make %{?_smp_mflags}
+  %make_build
 popd
 
 %install
 pushd build
   %make_install
 popd
-find %{buildroot}%{_includedir}/CL/ -regextype posix-egrep -not -regex ".*(cl_intel.h)" -type f -delete
+find %{buildroot}%{_includedir}/CL/ -not -name "cl_intel.h" -type f -delete
 
 %files
-%doc COPYING README.md
+%license COPYING
+%doc README.md
 %{_libdir}/beignet/
 %{_sysconfdir}/OpenCL/vendors/intel-beignet.icd
 
@@ -59,6 +76,12 @@ find %{buildroot}%{_includedir}/CL/ -regextype posix-egrep -not -regex ".*(cl_in
 %{_includedir}/CL/cl_intel.h
 
 %changelog
+* Tue May 19 2015 Igor Gnatenko <i.gnatenko.brain at gmail.com> - 1.0.3-2
+- Fix licensing issues with not compatipble LGPL code
+- use python3-devel for fedora23+
+- use license macro
+- use make_build macro
+
 * Fri May 08 2015 Igor Gnatenko <i.gnatenko.brain at gmail.com> - 1.0.3-1
 - Update to 1.0.3 (RHBZ #1202329)
 
-- 
cgit v0.10.2


	http://pkgs.fedoraproject.org/cgit/beignet.git/commit/?h=f22&id=bbfccf3794ded8b19d0e05a8aefa85c7cf20b61c


More information about the scm-commits mailing list