Imagepicker utility which pick all image URL used in site . Just give a URL and get list of images .
It use HTML Parser to parse html Tag nodes.
HTML Parser is a Java library used to parse HTML in either a linear or nested fashion.
Primarily used for transformation or extraction.
Maven dependency
<
dependency
>
<
groupId
>org.htmlparser</
groupId
>
<
artifactId
>htmlparser</
artifactId
>
<
version
>1.6</
version
>
</
dependency
>
or
download the jar
import java.util.ArrayList;
import java.util.List;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.SimpleNodeIterator;
public class Imagepicker {
/**
* @author Rohan Kamat
* @version 1.0
* @Date Jan 11, 2015
*/
public static void main(String[] argv) throws ParserException {
Imagepicker img = new Imagepicker();
List<String> result = img
.imagelist("http://www.pixelofnature.com/portfolio/");
System.out.println(result);
}
public List<String> imagelist(String url) throws ParserException {
List<String> result = new ArrayList<String>();
Parser parser = new Parser(url);
//check for img tag in html
NodeList list = parser.parse(new TagNameFilter("IMG"));
//traverse the node to src attribute
for (SimpleNodeIterator iterator = list.elements(); iterator
.hasMoreNodes();) {
Tag tag = (Tag) iterator.nextNode();
System.out.println(tag.getAttribute("src"));
result.add(tag.getAttribute("src"));
}
return result;
}
}
No comments:
Post a Comment