@@ -18,8 +18,7 @@ public class Element {
1818 //**************************************************************************
1919 //** Constructor
2020 //**************************************************************************
21- /**
22- * @param html HTML used to define a tag (e.g. <div id="1">)
21+ /** @param html HTML used to define a tag (e.g. <div id="1">)
2322 */
2423 protected Element (String tagHTML ){
2524 this .tagHTML = tagHTML ;
@@ -85,35 +84,132 @@ public String getInnerHTML(){
8584 public String getOuterHTML (){
8685 return outerHTML ;
8786 }
87+
88+ public String getInnerText (){
89+ return Parser .stripHTMLTags (innerHTML );
90+ }
91+
92+ //**************************************************************************
93+ //** getAttribute
94+ //**************************************************************************
95+ /** Returns the value for a given attribute. If no match is found, returns
96+ * an empty string.
97+ */
98+ public String getAttribute (String attributeName ){
99+ return _getAttributeValue (attributeName );
100+ }
101+
102+
103+ //**************************************************************************
104+ //** getElementByID
105+ //**************************************************************************
106+ /** Returns an HTML Element with given a id. Returns null if the element was
107+ * not found.
108+ */
109+ public Element getElementByID (String id ){
110+ return getElementByAttributes (null , "id" , id );
111+ }
112+
113+
114+ //**************************************************************************
115+ //** getElementByTagName
116+ //**************************************************************************
117+ /** Returns an array of HTML Elements with given tag name.
118+ */
119+ public Element [] getElementsByTagName (String tagName ){
120+ return new Parser (innerHTML ).getElementsByTagName (tagName );
121+ }
122+
123+
124+ //**************************************************************************
125+ //** getElementByTagName
126+ //**************************************************************************
127+ /** Returns the first HTML Element with given tag name. Returns null if an
128+ * element was not found.
129+ */
130+ public Element getElementByTagName (String tagName ){
131+ return getElementByAttributes (tagName , null , null );
132+ }
88133
89134
135+ //**************************************************************************
136+ //** getElements
137+ //**************************************************************************
138+ /** Returns an array of HTML Elements with given tag name, attribute, and
139+ * attribute value (e.g. "div", "class", "panel-header").
140+ */
141+ public Element [] getElements (String tagName , String attributeName , String attributeValue ){
142+ return new Parser (innerHTML ).getElements (tagName , attributeName , attributeValue );
143+ }
144+
145+
146+ //**************************************************************************
147+ //** getElementByAttributes
148+ //**************************************************************************
149+ /** Returns the first HTML Element with given tag name and attribute. Returns
150+ * null if an element was not found.
151+ */
152+ public Element getElementByAttributes (String tagName , String attributeName , String attributeValue ){
153+ return new Parser (innerHTML ).getElementByAttributes (tagName , attributeName , attributeValue );
154+ }
155+
156+
157+ //**************************************************************************
158+ //** getImageLinks
159+ //**************************************************************************
160+ /** Returns a list of links to images. The links may include relative paths.
161+ * Use the Parser.getAbsolutePath() method to resolve the relative paths to
162+ * a fully qualified url.
163+ */
164+ public String [] getImageLinks (){
165+ return new Parser (innerHTML ).getImageLinks ();
166+ }
167+
168+
169+ //**************************************************************************
170+ //** toString
171+ //**************************************************************************
172+ public String toString (){
173+ return outerHTML ;
174+ }
175+
176+
177+ /** @deprecated Use getInnerText() */
178+ public String stripHTMLTags (){
179+ return getInnerText ();
180+ }
181+
182+ /** @deprecated Use getAttribute() */
183+ public String getAttributeValue (String attributeName ){
184+ return getAttribute (attributeName );
185+ }
186+
187+
188+
90189 //**************************************************************************
91190 //** getAttributeValue
92191 //**************************************************************************
93192 /** Returns the value for a given attribute. If no match is found, returns
94193 * an empty string.
95194 */
96- public String getAttributeValue (String attributeName ){
195+ private String _getAttributeValue (String attributeName ){
97196 try {
98197 org .w3c .dom .Document XMLDoc = DOM .createDocument ("<" + tag + "/>" );
99198 org .w3c .dom .NamedNodeMap attr = XMLDoc .getFirstChild ().getAttributes ();
100199 return DOM .getAttributeValue (attr ,attributeName );
101200 }
102201 catch (Exception e ){
103202 try {
104- return getAttributeValue2 (tag , attributeName );
203+ return _getAttributeValue2 (tag , attributeName );
105204 }
106205 catch (Exception ex ){
107206 return "" ;
108207 }
109208 }
110-
111209 }
112210
113211
114-
115-
116- private String getAttributeValue2 (String tag , String attributeName ){
212+ private String _getAttributeValue2 (String tag , String attributeName ){
117213
118214 tag = tag .trim ();
119215
@@ -128,15 +224,6 @@ private String getAttributeValue2(String tag, String attributeName){
128224 String tagName = orgTag + " " ;
129225 tagName = tagName .substring (0 , tagName .indexOf (" " ));
130226
131- /*
132- if (tagName.equalsIgnoreCase("img")){
133- System.out.println("IMGTAG = " + tag);
134- }
135- else{
136- return "";
137- }
138-
139- */
140227
141228
142229 //compress spaces
@@ -192,81 +279,8 @@ else if (newTag.charAt(i+1)=='\''){
192279
193280 }
194281
195-
196282 }
197-
198-
199283 }
200-
201284 return "" ;
202285 }
203-
204-
205-
206- //**************************************************************************
207- //** getElementByTagName
208- //**************************************************************************
209- /** Returns an array of HTML Elements with given tag name.
210- */
211- public Element [] getElementsByTagName (String tagName ){
212- return new Parser (innerHTML ).getElementsByTagName (tagName );
213- }
214-
215-
216- //**************************************************************************
217- //** getElementByTagName
218- //**************************************************************************
219- /** Returns the first HTML Element with given tag name. Returns null if an
220- * element was not found.
221- */
222- public Element getElementByTagName (String tagName ){
223- return getElementByAttributes (tagName , null , null );
224- }
225-
226-
227- //**************************************************************************
228- //** getElementByID
229- //**************************************************************************
230- /** Returns an HTML Element with given a id. Returns null if the element was
231- * not found.
232- */
233- public Element getElementByID (String id ){
234- return getElementByAttributes (null , "id" , id );
235- }
236-
237-
238- public Element [] getElements (String tagName , String attributeName , String attributeValue ){
239- return new Parser (innerHTML ).getElements (tagName , attributeName , attributeValue );
240- }
241-
242-
243- //**************************************************************************
244- //** getElementByAttributes
245- //**************************************************************************
246- /** Returns the first HTML Element with given tag name and attribute. Returns
247- * null if an element was not found.
248- */
249- public Element getElementByAttributes (String tagName , String attributeName , String attributeValue ){
250- return new Parser (innerHTML ).getElementByAttributes (tagName , attributeName , attributeValue );
251- }
252-
253- public String stripHTMLTags (){
254- return Parser .stripHTMLTags (innerHTML );
255- }
256-
257-
258- //**************************************************************************
259- //** getImageLinks
260- //**************************************************************************
261- /** Returns a list of links to images. The links may include relative paths.
262- * Use the getAbsolutePath method to resolve the relative paths to a fully
263- * qualified url.
264- */
265- public String [] getImageLinks (){
266- return new Parser (innerHTML ).getImageLinks ();
267- }
268-
269- public String toString (){
270- return outerHTML ;
271- }
272286}
0 commit comments