From 1a82b34a37b82a2e9bf9e4e993c35eb245114058 Mon Sep 17 00:00:00 2001 From: martin-mann Date: Wed, 31 Jul 2019 10:39:43 +0200 Subject: [PATCH 1/2] + support for length-corrected E optimization and output --- src/IntaRNA/Makefile.am | 2 ++ src/IntaRNA/ObjectiveHandler.cpp | 44 ++++++++++++++++++++++++++++++++ src/IntaRNA/ObjectiveHandler.h | 32 +++++++++++++++++++++++ src/IntaRNA/OutputHandlerCsv.cpp | 7 ++++- src/IntaRNA/OutputHandlerCsv.h | 2 ++ src/IntaRNA/PredictorMfe.cpp | 11 ++++++-- 6 files changed, 95 insertions(+), 3 deletions(-) create mode 100644 src/IntaRNA/ObjectiveHandler.cpp create mode 100644 src/IntaRNA/ObjectiveHandler.h diff --git a/src/IntaRNA/Makefile.am b/src/IntaRNA/Makefile.am index 5f78c9cb..5eee3eb5 100644 --- a/src/IntaRNA/Makefile.am +++ b/src/IntaRNA/Makefile.am @@ -42,6 +42,7 @@ libIntaRNA_a_HEADERS = \ InteractionEnergyVrna.h \ InteractionRange.h \ NussinovHandler.h \ + ObjectiveHandler.h \ OutputConstraint.h \ OutputStreamHandler.h \ OutputStreamHandlerSortedCsv.h \ @@ -107,6 +108,7 @@ libIntaRNA_a_SOURCES = \ InteractionEnergyVrna.cpp \ InteractionRange.cpp \ NussinovHandler.cpp \ + ObjectiveHandler.cpp \ OutputConstraint.cpp \ OutputStreamHandlerSortedCsv.cpp \ OutputHandler.cpp \ diff --git a/src/IntaRNA/ObjectiveHandler.cpp b/src/IntaRNA/ObjectiveHandler.cpp new file mode 100644 index 00000000..9c2dfe19 --- /dev/null +++ b/src/IntaRNA/ObjectiveHandler.cpp @@ -0,0 +1,44 @@ + + +#include "IntaRNA/ObjectiveHandler.h" + +namespace IntaRNA +{ + +ObjectiveHandler::ObjectiveHandler() +{ +} + +ObjectiveHandler::~ObjectiveHandler() +{ +} + +E_type +ObjectiveHandler:: +getLcE( const Interaction & i, const InteractionEnergy & energy ) +{ + const size_t curLength = std::max( + ( 1 + i.basePairs.rbegin()->first - i.basePairs.begin()->first ) + , ( 1 + i.basePairs.begin()->second - i.basePairs.rbegin()->second ) + ); + + return getLcE( curLength, i.energy, energy); +} + +E_type +ObjectiveHandler:: +getLcE( const size_t & curLength, const E_type & fullE, const InteractionEnergy & energy ) +{ +// const size_t maxLength = std::max( std::min(energy.getAccessibility1().getSequence().size(), energy.getAccessibility1().getMaxLength()) +// , std::min(energy.getAccessibility2().getSequence().size(), energy.getAccessibility2().getMaxLength()) ); +LOG_IF(curLength<1, DEBUG) <<"curLength < 1"; + return (E_type)( (double)(fullE) / std::log2(1+(double)curLength) ); +// return (E_type)( (double)(fullE) / std::log(5+(double)curLength) ); +// return (E_type)( (double)(fullE) / std::log(2.0*(double)curLength) ); +// return (fullE) / (E_type)curLength; +// return (fullE * maxLength) / curLength; +} + + + +} /* namespace IntaRNA */ diff --git a/src/IntaRNA/ObjectiveHandler.h b/src/IntaRNA/ObjectiveHandler.h new file mode 100644 index 00000000..a0940a2b --- /dev/null +++ b/src/IntaRNA/ObjectiveHandler.h @@ -0,0 +1,32 @@ +#ifndef INTARNA_OBJECTIVEHANDLER_H_ +#define INTARNA_OBJECTIVEHANDLER_H_ + +#include "IntaRNA/Interaction.h" +#include "IntaRNA/InteractionEnergy.h" + +namespace IntaRNA +{ + +/** + * Defines the optimization objective of Predictor instances + */ +class ObjectiveHandler +{ +public: + ObjectiveHandler(); + virtual ~ObjectiveHandler(); + + + static + E_type + getLcE( const Interaction & i, const InteractionEnergy & energy ); + + static + E_type + getLcE( const size_t & curLength, const E_type & fullE, const InteractionEnergy & energy ); + +}; + +} /* namespace IntaRNA */ + +#endif /* INTARNA_OBJECTIVEHANDLER_H_ */ diff --git a/src/IntaRNA/OutputHandlerCsv.cpp b/src/IntaRNA/OutputHandlerCsv.cpp index 54a3c032..f69c9c17 100644 --- a/src/IntaRNA/OutputHandlerCsv.cpp +++ b/src/IntaRNA/OutputHandlerCsv.cpp @@ -1,5 +1,6 @@ #include "IntaRNA/OutputHandlerCsv.h" +#include "IntaRNA/ObjectiveHandler.h" #if INTARNA_MULITHREADING #include @@ -22,7 +23,7 @@ const std::string OutputHandlerCsv::notAvailable = "NAN"; const OutputHandlerCsv::ColTypeList OutputHandlerCsv::colTypeNumericSort( OutputHandlerCsv::string2list( "start1,end1,start2,end2" - ",E,ED1,ED2,Pu1,Pu2,E_init,E_loops,E_dangleL,E_dangleR,E_endL,E_endR,E_hybrid,E_norm,E_hybridNorm,E_add" + ",E,ED1,ED2,Pu1,Pu2,E_init,E_loops,E_dangleL,E_dangleR,E_endL,E_endR,E_hybrid,E_norm,E_hybridNorm,E_add,lcE" ",seedStart1,seedEnd1,seedStart2,seedEnd2,seedE,seedED1,seedED2,seedPu1,seedPu2" ",Eall,Zall,P_E" )); @@ -242,6 +243,10 @@ add( const Interaction & i, const OutputConstraint & outConstraint ) outTmp < #include @@ -114,7 +115,10 @@ updateOptima( const size_t i1, const size_t j1 // check if we have to care about insertion (curE <= worst E in list) - if (curE > mfeInteractions.rbegin()->energy ) { +// if (curE > mfeInteractions.rbegin()->energy ) { + if (ObjectiveHandler::getLcE( std::max(1+j1-i1,1+j2-i2), curE, energy) + > ObjectiveHandler::getLcE( *(mfeInteractions.rbegin()), energy) ) + { return; } @@ -136,7 +140,10 @@ updateOptima( const size_t i1, const size_t j1 } else { // check for insertion position - InteractionList::iterator insertPos = std::find_if_not( mfeInteractions.begin(), mfeInteractions.end(), [&](Interaction & i){return i < tmp;}); + InteractionList::iterator insertPos = std::find_if_not( mfeInteractions.begin(), mfeInteractions.end(), [&](Interaction & i){ + return ObjectiveHandler::getLcE(i,energy) < ObjectiveHandler::getLcE(tmp,energy); +// return i < tmp; + }); if ( insertPos != mfeInteractions.end() && !( tmp == *insertPos )) { From 9530e12264bfb283a47bb3688214ccd5633a67d6 Mon Sep 17 00:00:00 2001 From: martin-mann Date: Wed, 31 Jul 2019 17:20:17 +0200 Subject: [PATCH 2/2] linear lcE --- src/IntaRNA/ObjectiveHandler.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/IntaRNA/ObjectiveHandler.cpp b/src/IntaRNA/ObjectiveHandler.cpp index 9c2dfe19..cca7c3ee 100644 --- a/src/IntaRNA/ObjectiveHandler.cpp +++ b/src/IntaRNA/ObjectiveHandler.cpp @@ -31,12 +31,11 @@ getLcE( const size_t & curLength, const E_type & fullE, const InteractionEnergy { // const size_t maxLength = std::max( std::min(energy.getAccessibility1().getSequence().size(), energy.getAccessibility1().getMaxLength()) // , std::min(energy.getAccessibility2().getSequence().size(), energy.getAccessibility2().getMaxLength()) ); -LOG_IF(curLength<1, DEBUG) <<"curLength < 1"; - return (E_type)( (double)(fullE) / std::log2(1+(double)curLength) ); +// return (E_type)( (double)(fullE) / std::log2(1+(double)curLength) ); // return (E_type)( (double)(fullE) / std::log(5+(double)curLength) ); // return (E_type)( (double)(fullE) / std::log(2.0*(double)curLength) ); // return (fullE) / (E_type)curLength; -// return (fullE * maxLength) / curLength; + return E_type((float)(fullE) / (float)curLength); }