Thursday, March 19, 2015

Image Picker

Imagepicker utility which pick all image URL used in site . Just give a URL and get list of images .
It use HTML Parser to parse html Tag nodes.
HTML Parser is a Java library used to parse HTML in either a linear or nested fashion. 
Primarily used for transformation or extraction.

Maven dependency

<dependency>
  <groupId>org.htmlparser</groupId>
  <artifactId>htmlparser</artifactId>
  <version>1.6</version>
</dependency>

or

download the jar




import java.util.ArrayList;
import java.util.List;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.SimpleNodeIterator;

public class Imagepicker {
/**
* @author Rohan Kamat
* @version 1.0
* @Date Jan 11, 2015
*/
public static void main(String[] argv) throws ParserException {
Imagepicker img = new Imagepicker();
List<String> result = img
.imagelist("http://www.pixelofnature.com/portfolio/");
System.out.println(result);
}

public List<String> imagelist(String url) throws ParserException {
List<String> result = new ArrayList<String>();
Parser parser = new Parser(url);

                //check for img tag in html
NodeList list = parser.parse(new TagNameFilter("IMG"));
               
                //traverse the node to src attribute
for (SimpleNodeIterator iterator = list.elements(); iterator
.hasMoreNodes();) {
Tag tag = (Tag) iterator.nextNode();
System.out.println(tag.getAttribute("src"));
result.add(tag.getAttribute("src"));
}
return result;
}
}

No comments:

Post a Comment