99
1010import java .io .Serializable ;
1111import java .net .URL ;
12+ import java .util .Arrays ;
1213import java .util .Collections ;
1314import java .util .HashMap ;
1415import java .util .Map ;
1516
1617import edu .illinois .cs .cogcomp .core .datastructures .vectors .*;
18+ import edu .illinois .cs .cogcomp .lbjava .classify .DiscreteConjunctiveFeature ;
1719import edu .illinois .cs .cogcomp .lbjava .classify .Feature ;
20+ import edu .illinois .cs .cogcomp .lbjava .classify .RealConjunctiveFeature ;
1821import edu .illinois .cs .cogcomp .lbjava .util .ByteString ;
1922import edu .illinois .cs .cogcomp .lbjava .util .ClassUtils ;
2023import edu .illinois .cs .cogcomp .lbjava .util .FVector ;
@@ -305,7 +308,7 @@ public boolean contains(Feature f) {
305308 *
306309 * @param f The feature to look up.
307310 * @return The integer key that the feature maps to.
308- **/
311+ **/
309312 public int lookup (Feature f ) {
310313 return lookup (f , false , -1 );
311314 }
@@ -661,6 +664,36 @@ public void discardPrunedFeatures() {
661664 pruneCutoff = -1 ;
662665 }
663666
667+ /**
668+ * Discard features at the provided indices. This operation is performed
669+ * last to first so we can do it in place. This method will sort the input
670+ * array.
671+ * @param dumpthese the indexes of the features to dump.
672+ */
673+ public void discardPrunedFeatures (int [] dumpthese ) {
674+ Arrays .sort (dumpthese );
675+ lexiconInv .remove (dumpthese );
676+
677+ // this compresses the FVector
678+ lexiconInv = new FVector (lexiconInv );
679+ if (lexicon != null ) {
680+
681+ // reconstitute the lexicon.
682+ lexicon .clear ();
683+ for (int i = 0 ; i < lexiconInv .size ();i ++) {
684+ lexicon .put (lexiconInv .get (i ), new Integer (i ));
685+ }
686+
687+ // sanity check, make sure the indices in the lexicon map matches the index in the feature vector
688+ for (int i = 0 ; i < lexiconInv .size ();i ++) {
689+ if (i != ((Integer )lexicon .get (lexiconInv .get (i ))).intValue ()) {
690+ throw new RuntimeException ("After optimization pruning, the index in the lexicon did "
691+ + "not match the inverted index." );
692+ }
693+ }
694+ }
695+ }
696+
664697
665698 /**
666699 * <!-- clone() --> Returns a deep clone of this lexicon implemented as a <code>HashMap</code>.
@@ -742,10 +775,9 @@ public int compare(int i1, int i2) {
742775 ByteString previousBSIdentifier = null ;
743776 out .writeInt (indexes .length );
744777 out .writeInt (pruneCutoff );
745-
746778 for (int i = 0 ; i < indexes .length ; ++i ) {
747779 Feature f = inverse .get (indexes [i ]);
748- previousClassName =
780+ previousClassName =
749781 f .lexWrite (out , this , previousClassName , previousPackage , previousClassifier ,
750782 previousSIdentifier , previousBSIdentifier );
751783 previousPackage = f .getPackage ();
@@ -757,7 +789,6 @@ else if (f.hasByteStringIdentifier())
757789
758790 out .writeInt (indexes [i ]);
759791 }
760-
761792 if (featureCounts == null )
762793 out .writeInt (0 );
763794 else
@@ -801,14 +832,12 @@ public void read(ExceptionlessInputStream in, boolean readCounts) {
801832 pruneCutoff = in .readInt ();
802833 lexicon = null ;
803834 lexiconInv = new FVector (N );
804-
805835 for (int i = 0 ; i < N ; ++i ) {
806836 Feature f =
807837 Feature .lexReadFeature (in , this , previousClass , previousPackage ,
808838 previousClassifier , previousSIdentifier , previousBSIdentifier );
809839 int index = in .readInt ();
810840 lexiconInv .set (index , f );
811-
812841 previousClass = f .getClass ();
813842 previousPackage = f .getPackage ();
814843 previousClassifier = f .getGeneratingClassifier ();
@@ -817,7 +846,7 @@ public void read(ExceptionlessInputStream in, boolean readCounts) {
817846 else if (f .hasByteStringIdentifier ())
818847 previousBSIdentifier = f .getByteStringIdentifier ();
819848 }
820-
849+
821850 if (readCounts ) {
822851 featureCounts = new IVector ();
823852 featureCounts .read (in );
0 commit comments