Skip to content

Commit 37842f8

Browse files
author
khashab2
committed
move files
1 parent 8c76959 commit 37842f8

File tree

265 files changed

+86214
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

265 files changed

+86214
-0
lines changed

lbjava-examples/README

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
This a sample classification project based on Learning Based Java (LBJava).
2+
Author: Hao Wu, Parisa Kordjamshidi, Daniel Khashabi
3+
4+
5+
6+
#### HOW TO RUN ####
7+
8+
LINUX
9+
======
10+
run the following script for separate classifiers:
11+
./scripts/compileLBJ lbj/LALModel.lbj
12+
This lbj file contains a set of independent classifiers for name-entity types and relations.
13+
14+
To get a joint classifier run the following command:
15+
./scripts/compileLBJ lbj/ER_JointAll.lbj
16+
This file contains a joint inference model, for applying constraints between labels of
17+
entity-classifiers and relation classifiers.
18+
19+
Note: if you want to run training again, first remove the content under lbjsrc directory.
20+
rm -r lbjsrc/*
21+
22+
#### EXTERNAL LINKS
23+
The homepage of LBJ: http://cogcomp.cs.illinois.edu/page/software_view/LBJ
24+
If you have any questions visit http://cogcomp.cs.illinois.edu/

lbjava-examples/pom.xml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<project xmlns="http://maven.apache.org/POM/4.0.0"
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
5+
<parent>
6+
<artifactId>lbjava-project</artifactId>
7+
<groupId>edu.illinois.cs.cogcomp</groupId>
8+
<version>1.2.0</version>
9+
</parent>
10+
11+
<modelVersion>4.0.0</modelVersion>
12+
13+
<artifactId>LBJavaExamples</artifactId>
14+
15+
<repositories>
16+
<repository>
17+
<id>CogcompSoftware</id>
18+
<name>CogcompSoftware</name>
19+
<url>http://cogcomp.cs.illinois.edu/m2repo/</url>
20+
</repository>
21+
</repositories>
22+
23+
<dependencies>
24+
<dependency>
25+
<groupId>edu.illinois.cs.cogcomp</groupId>
26+
<artifactId>LBJava</artifactId>
27+
<version>1.0.3</version>
28+
</dependency>
29+
<dependency>
30+
<groupId>edu.illinois.cs.cogcomp</groupId>
31+
<artifactId>coreUtilities</artifactId>
32+
<version>0.2.8</version>
33+
</dependency>
34+
</dependencies>
35+
36+
</project>
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
package edu.illinois.cs.cogcomp.tutorial;
2+
3+
import edu.illinois.cs.cogcomp.core.io.LineIO;
4+
import edu.illinois.cs.cogcomp.lbjava.parse.Parser;
5+
import edu.illinois.cs.cogcomp.tutorial.datastruct.ConllRawSentence;
6+
import edu.illinois.cs.cogcomp.tutorial.datastruct.ConllRawToken;
7+
import edu.illinois.cs.cogcomp.tutorial.datastruct.ConllRelation;
8+
9+
import java.io.FileNotFoundException;
10+
import java.util.ArrayList;
11+
import java.util.List;
12+
import java.util.Vector;
13+
14+
//import parse.Parser;
15+
//import jointentityrelationextraction.
16+
//import edu.illinois.cs.cogcomp.datastruct;
17+
//import edu.illinois.cs.cogcomp.datastruct.ConllRawSentence;
18+
//import edu.illinois.cs.cogcomp.datastruct.ConllRawToken;
19+
//import edu.illinois.cs.cogcomp.datastruct.ConllRelation;
20+
21+
public class Conll04_InstanceReader implements Parser {
22+
public Vector<ConllRawToken> instances;
23+
public Vector<ConllRawSentence> sentences;
24+
public Vector<ConllRelation> relations;
25+
26+
public String[] entityLabels,relLabels;
27+
private int currentInstanceId;
28+
29+
30+
public Conll04_InstanceReader(String filename){
31+
instances=new Vector<ConllRawToken>();
32+
relations=new Vector<ConllRelation>();
33+
sentences=new Vector<ConllRawSentence>();
34+
entityLabels=new String[0];
35+
relLabels=new String[0];
36+
// }
37+
38+
39+
//public void readData(String filename) throws Exception {
40+
//BufferedReader br=new BufferedReader(new FileReader(filename));
41+
List<String> lines = null;
42+
try {
43+
lines = LineIO.read(filename);
44+
} catch (FileNotFoundException e) {
45+
// TODO Auto-generated catch block
46+
e.printStackTrace();
47+
}
48+
String line;
49+
String[] tokens;
50+
51+
52+
ConllRawToken c=new ConllRawToken();
53+
54+
ConllRelation r;
55+
int currSentId=0;
56+
boolean sentEnd=false;
57+
ConllRawSentence sent=new ConllRawSentence(currSentId);
58+
59+
ArrayList<String> entityal = new ArrayList<String>();
60+
ArrayList<String> relal = new ArrayList<String>();
61+
62+
boolean relationSeen=false;
63+
int sentindex = 0;
64+
while(sentindex < lines.size()){
65+
line = lines.get(sentindex);
66+
sentindex ++;
67+
68+
//System.out.println(sentindex + " " + line);
69+
if(line.isEmpty()){
70+
sentEnd=true;
71+
72+
/* if(!sentEnd){
73+
currSentId++;
74+
sentEnd=true;
75+
76+
sentences.add(sent);
77+
78+
sent=new ConllRawSentence(currSentId);
79+
}*/
80+
continue;
81+
}
82+
83+
tokens=line.split("\t|\n");
84+
int s=tokens.length;
85+
if(s==3){
86+
relationSeen=true;
87+
r=new ConllRelation();
88+
// r.sentId1=currSentId-1;
89+
// r.sentId2=currSentId-1;
90+
r.wordId1=Integer.parseInt(tokens[0]);
91+
r.wordId2=Integer.parseInt(tokens[1]);
92+
r.relType=tokens[2];
93+
relations.add(r);
94+
sent.addRelations(r);
95+
// sentences.elementAt(sentences.size()-1).addRelations(r);
96+
if(!relal.contains(tokens[2])){
97+
relal.add(tokens[2]);
98+
}
99+
}
100+
else{
101+
//System.out.println("tokens[1]="+tokens[1]+"done");
102+
if(sentEnd){
103+
//if(!relationSeen)
104+
{
105+
sentences.add(sent);
106+
/* if(currSentId < 700)
107+
System.out.println("sid:" + currSentId);
108+
else System.out.println("sid:" + (currSentId + 51));
109+
for(int ind = 0;ind < sent.sentTokens.size();ind ++)
110+
System.out.print(sent.sentTokens.get(ind).phrase + " ");
111+
System.out.println();
112+
*/
113+
currSentId++;
114+
}
115+
sent=new ConllRawSentence(currSentId);
116+
}
117+
118+
c=new ConllRawToken();
119+
120+
/* if(currSentId < 700)
121+
assert (currSentId == Integer.parseInt(tokens[0]));
122+
else
123+
{
124+
assert(currSentId == Integer.parseInt(tokens[0]) - 51);
125+
if(currSentId != Integer.parseInt(tokens[0]) - 51)
126+
System.out.println("fuck you here");
127+
}*/
128+
129+
c.entType=tokens[1];
130+
c.sentId=currSentId;
131+
c.wordId=Integer.parseInt(tokens[2]);
132+
c.setPOS(tokens[4]);
133+
c.setPhrase(tokens[5]);
134+
135+
sent.addTokens(c);
136+
if(!tokens[1].trim().equals("O")){
137+
instances.add(c);
138+
sent.setCurrentTokenAsEntity();
139+
if(!entityal.contains(tokens[1])){
140+
entityal.add(tokens[1]);
141+
}
142+
}
143+
144+
sentEnd=false;
145+
relationSeen=false;
146+
}
147+
}
148+
149+
entityLabels=entityal.toArray(entityLabels);
150+
relLabels=relal.toArray(relLabels);
151+
152+
}
153+
154+
155+
public void printData(){
156+
System.out.println("printing total "+sentences.size()+" sentences");
157+
for(int i=0;i<sentences.size();i++){
158+
// sentences.elementAt(i).printSentence();
159+
sentences.elementAt(i).printEntities();
160+
sentences.elementAt(i).printRelations();
161+
}
162+
System.out.println("printing total "+instances.size()+" instances");
163+
for(int i=0;i<instances.size();i++){
164+
instances.elementAt(i).printInstance();
165+
}
166+
System.out.println("printing total "+relations.size()+ " relations");
167+
for(int i=0;i<relations.size();i++){
168+
relations.elementAt(i).printRelation();
169+
}
170+
}
171+
// public static void main(String[] args) throws Exception{
172+
// System.out.println("here");
173+
// Conll04_InstanceReader cr=new Conll04_InstanceReader("./data/conll04.corp");
174+
// //cr.readData("./data/conll04.corp");
175+
// cr.printData();
176+
// }
177+
public void close() {
178+
}
179+
public Object next() {
180+
181+
if (currentInstanceId < instances.size()) {
182+
183+
ConllRawToken file = instances.get(currentInstanceId++);
184+
185+
// String[] split = file.getPath().split(File.separator);
186+
187+
// String label = split[split.length - 2];
188+
189+
return file;//Document(file, label);
190+
} else
191+
return null;
192+
}
193+
194+
public void reset() {
195+
currentInstanceId = 0;
196+
}
197+
198+
public static void main(String[] args) throws Exception{
199+
System.out.println("here");
200+
Conll04_InstanceReader cr=new Conll04_InstanceReader("/Users/parisakordjamshidi/wolfe-0.1.0/LBJ/data/conll04.corp");
201+
202+
//cr.readData("/home/roth/rsamdan2/Project/EMStructuredPrediction/UnsupRelationExtraction/data/conll04.corp");
203+
cr.printData();
204+
205+
}
206+
207+
}

0 commit comments

Comments
 (0)