使用java采集京东商城行政区划数据示例
package com.test.html;
import com.alibaba.fastjson.JSON;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
public class JD_Address
{
private static int timeout = 300000;
private static int count = 0;
public static void main(String[] args) throws IOException, InterruptedException
{
String province_url = "http://trade.jd.com/dynamic/consignee/getProvinces.action";
String city_url = "http://trade.jd.com/dynamic/consignee/getCitys.action?consigneeParam.provinceId=";
String county_url = "http://trade.jd.com/dynamic/consignee/getCountys.action?consigneeParam.cityId=";
String town_url = "http://trade.jd.com/dynamic/consignee/getTowns.action?consigneeParam.countyId=";
//getList(town_url + "4139");
List<Address> provinces = getList(province_url);
for (Address province : provinces)
{
List<Address> citys = getList(city_url + province.getId());
province.setChildren(citys);
for (Address city : citys)
{
List<Address> countys = getList(county_url + city.getId());
city.setChildren(countys);
for (Address county : countys)
{
List<Address> towns = getList(town_url + county.getId());
county.setChildren(towns);
}
}
}
System.out.println("=======");
FileUtils.writeStringToFile(new File("保存位置/address2.js"), "var ds=" + JSON.toJSONString(provinces));
}
private static List<Address> getList(String url) throws IOException, InterruptedException
{
List<Address> list = new ArrayList<Address>();
Document doc = Jsoup.parse(new URL(url), timeout);
Elements elements = doc.select("option[value~=\\d+]");
for (Element element : elements)
{
Address address = new Address();
address.setName(element.text().replace("*", StringUtils.EMPTY));
address.setId(element.attr("value"));
list.add(address);
//System.out.println(address.getName());
}
++count;
if (count % 100 == 0)
{
System.out.println(count);
}
if (count % 500 == 0)
{
Thread.sleep(5000);
}
return list;
}
}
class Address
{
private String name;
private String id;
private List<Address> children;
public String getName()
{
return name;
}
public void setName(String name)
{
this.name = name;
}
public List<Address> getChildren()
{
return children;
}
public void setChildren(List<Address> children)
{
this.children = children;
}
public String getId()
{
return id;
}
public void setId(String id)
{
this.id = id;
}
}
JavaScript]代码
[
{"children": [
{"children": [
{"id": "2799", "name": "三环以内"},
{"id": "2819", "name": "三环到四环之间"},
{"id": "2839", "name": "四环到五环之间"},
{"id": "2840", "name": "五环到六环之间"},
{"id": "4137", "name": "管庄"},
{"id": "4139", "name": "北苑"},
{"id": "4211", "name": "定福庄"}
], "id": "72", "name": "朝阳区"},
{"children": [
{"id": "2848", "name": "三环以内"},
{"id": "2849", "name": "三环到四环之间"},
{"id": "2850", "name": "四环到五环之间"},
{"id": "2851", "name": "五环到六环之间"},
{"id": "2852", "name": "六环以外"},
{"id": "4134", "name": "西三旗"},
{"id": "4209", "name": "西二旗"}
], "id": "2800", "name": "海淀区"},
{"children": [
{"id": "2827", "name": "内环到二环里"},
{"id": "2853", "name": "二环到三环"}
], "id": "2801", "name": "西城区"},
{"children": [
{"id": "2821", "name": "内环到三环里"}
], "id": "2802", "name": "东城区"},
{"children": [
{"id": "2829", "name": "一环到二环"},
{"id": "2842", "name": "二环到三环"}
], "id": "2803", "name": "崇文区"},
{"children": [
{"id": "2828", "name": "内环到三环里"}
], "id": "2804", "name": "宣武区"},
{"children": [
{"id": "2832", "name": "四环到五环之间"},
{"id": "2854", "name": "二环到三环"},
{"id": "2855", "name": "三环到四环之间"},
{"id": "34544", "name": "五环到六环之间"},
{"id": "34545", "name": "六环之外"}
], "id": "2805", "name": "丰台区"},
{"children": [
{"id": "2831", "name": "四环到五环内"},
{"id": "4187", "name": "石景山城区"},
{"id": "4188", "name": "八大处科技园区"}
], "id": "2806", "name": "石景山区"},
{"children": [
{"id": "6491", "name": "城区以内"},
{"id": "2843", "name": "郊区"}
], "id": "2807", "name": "门头沟"},
{"children": [
{"id": "6492", "name": "城区以内"},
{"id": "2844", "name": "郊区"}
], "id": "2808", "name": "房山区"},
{"children": [
{"id": "4175", "name": "五环到六环之间"},
{"id": "37643", "name": "六环以外(于家务乡)"},
{"id": "51150", "name": "六环以外(张家湾镇、台湖镇、漷县镇)"},
{"id": "51151", "name": "六环以外(宋庄镇)"},
{"id": "35698", "name": "六环以外(其他地区)"}
], "id": "2809", "name": "通州区"},
{"children": [
{"id": "4194", "name": "四环至五环之间"},
{"id": "6501", "name": "五环至六环之间"},
{"id": "4205", "name": "六环以外"},
{"id": "51081", "name": "亦庄经济开发区"}
], "id": "2810", "name": "大兴区"},
{"children": [
{"id": "51125", "name": "北石槽镇"},
{"id": "51126", "name": "北务镇"},
{"id": "51127", "name": "北小营镇"},
{"id": "51128", "name": "大孙各庄镇"},
{"id": "51129", "name": "高丽营镇"},
{"id": "51130", "name": "光明街道"},
{"id": "51131", "name": "后沙峪地区"},
{"id": "51132", "name": "空港街道"},
{"id": "51133", "name": "李桥镇"},
{"id": "51134", "name": "李遂镇"},
{"id": "51135", "name": "龙湾屯镇"},
{"id": "51136", "name": "马坡地区"},
{"id": "51137", "name": "木林镇"},
{"id": "51138", "name": "南彩镇"},
{"id": "51139", "name": "南法信地区"},
{"id": "51140", "name": "牛栏山地区"},
{"id": "51141", "name": "仁和地区"},
{"id": "51142", "name": "胜利街道"},
{"id": "51143", "name": "石园街道"},
{"id": "51144", "name": "双丰街道"},
{"id": "51145", "name": "天竺地区"},
{"id": "51146", "name": "旺泉街道"},
{"id": "51147", "name": "杨镇地区"},
{"id": "51148", "name": "张镇"},
{"id": "51149", "name": "赵全营镇"}
], "id": "2812", "name": "顺义区"},
{"children": [
{"id": "6115", "name": "城区以内"},
{"id": "2847", "name": "郊区"}
], "id": "2814", "name": "怀柔区"},
{"children": [
{"id": "6667", "name": "城区"},
{"id": "2862", "name": "城区以外"}
], "id": "2816", "name": "密云区"},
{"children": [
{"id": "4135", "name": "六环以内"},
{"id": "4136", "name": "城区"},
{"id": "2906", "name": "城区以外"}
], "id": "2901", "name": "昌平区"},
{"children": [
{"id": "6666", "name": "城区"},
{"id": "2954", "name": "城区以外"}
], "id": "2953", "name": "平谷区"},
{"children": [
{"id": "6009", "name": "百泉路北,京新高速南,康张路东,京银路西"},
{"id": "3066", "name": "百泉路南,京新高速北,康张路西,京银路东"}
], "id": "3065", "name": "延庆县"}
], "id": "1", "name": "北京"},
{"children": [
{"children": [
{"id": "79", "name": "内环以内"},
{"id": "80", "name": "内环中环之间"},
{"id": "81", "name": "中环外环之间"}
], "id": "78", "name": "黄浦区"},
{"children": [], "id": "2811", "name": "卢湾区"},
{"children": [
{"id": "2863", "name": "内环以内"},
{"id": "2865", "name": "内环中环之间"}
], "id": "2813", "name": "徐汇区"},
{"children": [
{"id": "2870", "name": "内环以内"},
{"id": "2871", "name": "内环中环之间"},
{"id": "2872", "name": "中环外环之间"}
], "id": "2815", "name": "长宁区"},
{"children": [
{"id": "2873", "name": "内环以内"},
{"id": "2874", "name": "内环中环之间"},
{"id": "2875", "name": "中环外环之间"}
], "id": "2817", "name": "静安区"},
{"children": [], "id": "2820", "name": "闸北区"},
{"children": [
{"id": "2856", "name": "内环中环之间"},
{"id": "2882", "name": "内环以内"},
{"id": "2883", "name": "中环外环之间"}
], "id": "2822", "name": "虹口区"},
{"children": [], "id": "2823", "name": "杨浦区"},
{"children": [
{"id": "2889", "name": "中环以内"},
{"id": "2890", "name": "中环外环之间"},
{"id": "2891", "name": "外环以外"}
], "id": "2824", "name": "宝山区"},
{"children": [
{"id": "2892", "name": "中环以内"},
{"id": "2893", "name": "外环以外"},
{"id": "2896", "name": "中环外环之间"}
], "id": "2825", "name": "闵行区"},
{"children": [
{"id": "2864", "name": "外环以外"}
], "id": "2826", "name": "嘉定区"},
{"children": [
{"id": "2894", "name": "中环以内"},
{"id": "2895", "name": "外环以外"},
{"id": "2897", "name": "中环外环之间"}
], "id": "2830", "name": "浦东新区"},
{"children": [
{"id": "2869", "name": "外环以外"}
], "id": "2833", "name": "青浦区"},
{"children": [
{"id": "2866", "name": "外环以外"}
], "id": "2834", "name": "松江区"},
{"children": [
{"id": "2868", "name": "外环以外"}
], "id": "2835", "name": "金山区"},
{"children": [
{"id": "2903", "name": "祝桥镇"},
{"id": "2904", "name": "新场镇"},
{"id": "2935", "name": "惠南镇"},
{"id": "2937", "name": "康桥镇"},
{"id": "2938", "name": "宣桥镇"},
{"id": "2939", "name": "书院镇"},
{"id": "2940", "name": "大团镇"},
{"id": "2941", "name": "周浦镇"},
{"id": "2942", "name": "芦潮港镇"},
{"id": "2943", "name": "泥城镇"},
{"id": "2944", "name": "六灶镇"},
{"id": "2945", "name": "航头镇"},
{"id": "2946", "name": "万祥镇"},
{"id": "2947", "name": "老港镇"},
{"id": "4159", "name": "申港街道"},
{"id": "4180", "name": "临港新城"}
], "id": "2836", "name": "南汇区"},
{"children": [
{"id": "2888", "name": "外环以外"}
], "id": "2837", "name": "奉贤区"},
{"children": [
{"id": "2876", "name": "内环以内"},
{"id": "2877", "name": "内环中环之间"},
{"id": "2878", "name": "中环外环之间"},
{"id": "3110", "name": "桃浦新村"}
], "id": "2841", "name": "普陀区"},
{"children": [
{"id": "50779", "name": "堡镇"},
{"id": "50780", "name": "庙镇"},
{"id": "50781", "name": "陈家镇"},
{"id": "50782", "name": "城桥镇"},
{"id": "50783", "name": "东平镇"},
{"id": "50784", "name": "港西镇"},
{"id": "50785", "name": "港沿镇"},
{"id": "50786", "name": "建设镇"},
{"id": "50787", "name": "绿华镇"},
{"id": "50788", "name": "三星镇"},
{"id": "50789", "name": "竖新镇"},
{"id": "50790", "name": "向化镇"},
{"id": "50791", "name": "新海镇"},
{"id": "50792", "name": "新河镇"},
{"id": "50793", "name": "中兴镇"},
{"id": "50794", "name": "长兴乡"},
{"id": "50795", "name": "横沙乡"},
{"id": "50796", "name": "新村乡"}
], "id": "2919", "name": "崇明县"}
], "id": "2", "name": "上海"},
{"children": [
{"children": [
{"id": "39620", "name": "全境"}
], "id": "51035", "name": "东丽区"},
{"children": [
{"id": "2984", "name": "全境"}
], "id": "51036", "name": "和平区"},
{"children": [
{"id": "2987", "name": "全境"}
], "id": "51037", "name": "河北区"},
{"children": [
{"id": "3000", "name": "全境"}
], "id": "51038", "name": "河东区"},
{"children": [
{"id": "2985", "name": "全境"}
], "id": "51039", "name": "河西区"},
{"children": [
{"id": "2986", "name": "全境"}
], "id": "51040", "name": "红桥区"},
{"children": [
{"id": "98", "name": "全境"}
], "id": "51041", "name": "蓟县"},
{"children": [
{"id": "36157", "name": "全境"}
], "id": "51042", "name": "静海县"},
{"children": [
{"id": "2907", "name": "全境"}
], "id": "51043", "name": "南开区"},
{"children": [
{"id": "25708", "name": "全境"}
], "id": "51044", "name": "塘沽区"},
{"children": [
{"id": "25712", "name": "杨柳青,中北,精武,大寺镇,环外海泰及外环内"},
{"id": "25711", "name": "其它地区"}
], "id": "51045", "name": "西青区"},
{"children": [
{"id": "22846", "name": "杨村镇、下朱庄内"},
{"id": "22847", "name": "其它地区"}
], "id": "51046", "name": "武清区"},
{"children": [
{"id": "25704", "name": "咸水沽镇、海河教育园,海河科技园"},
{"id": "36171", "name": "双港,辛庄"},
{"id": "36172", "name": "其他地区"}
], "id": "51047", "name": "津南区"},
{"children": [
{"id": "23672", "name": "汉沽区街里、汉沽开发区"},
{"id": "23673", "name": "其它地区"}
], "id": "51048", "name": "汉沽区"},
{"children": [], "id": "51049", "name": "大港区"},
{"children": [
{"id": "6646", "name": "外环内"},
{"id": "36167", "name": "外环外双街镇,河北工大新校,屈店工业园"},
{"id": "36168", "name": "外环外其它地区"}
], "id": "51050", "name": "北辰区"},
{"children": [
{"id": "22848", "name": "城关镇、马家店开发区、天宝工业园"},
{"id": "22849", "name": "其它地区"}
], "id": "51051", "name": "宝坻区"},
{"children": [
{"id": "23674", "name": "芦台镇、经济开发区、贸易开发区"},
{"id": "23675", "name": "其它地区"}
], "id": "51052", "name": "宁河县"}
], "id": "3", "name": "天津"},
{"children": [
{"children": [
{"id": "9775", "name": "陈家坝街道"},
{"id": "9776", "name": "钟鼓楼街道"},
{"id": "9777", "name": "周家坝街道"},
{"id": "9778", "name": "百安坝街道"},
{"id": "9779", "name": "高笋塘街道"},
{"id": "9780", "name": "双河口街道"},
{"id": "9781", "name": "龙都街道"},
{"id": "9782", "name": "牌楼街道"},
{"id": "9783", "name": "沙河街道"},
{"id": "9784", "name": "太白街道"},
{"id": "9785", "name": "五桥街道"},
{"id": "9786", "name": "白土镇"},
{"id": "9787", "name": "白羊镇"},
{"id": "9788", "name": "大周镇"},
{"id": "9789", "name": "弹子镇"},
{"id": "9790", "name": "分水镇"},
{"id": "9791", "name": "甘宁镇"},
{"id": "9792", "name": "高峰镇"},
{"id": "9793", "name": "高梁镇"},
{"id": "9794", "name": "后山镇"},
{"id": "9795", "name": "李河镇"},
{"id": "9796", "name": "龙驹镇"},
{"id": "9797", "name": "龙沙镇"},
{"id": "9798", "name": "罗田镇"},
{"id": "9799", "name": "孙家镇"},
{"id": "9800", "name": "太安镇"},
{"id": "9801", "name": "太龙镇"},
{"id": "9802", "name": "天城镇"},
{"id": "9803", "name": "武陵镇"},
{"id": "9804", "name": "响水镇"},
{"id": "9805", "name": "小周镇"},
{"id": "9806", "name": "新田镇"},
{"id": "9807", "name": "新乡镇"},
{"id": "9808", "name": "熊家镇"},
{"id": "9809", "name": "余家镇"},
{"id": "9810", "name": "长岭镇"},
{"id": "9811", "name": "长坪镇"},
{"id": "9812", "name": "长滩镇"},
{"id": "9813", "name": "走马镇"},
{"id": "9814", "name": "瀼渡镇"},
{"id": "9815", "name": "茨竹乡"},
{"id": "9816", "name": "柱山乡"},
{"id": "9817", "name": "燕山乡"},
{"id": "9818", "name": "溪口乡"},
{"id": "9819", "name": "普子乡"},
{"id": "9820", "name": "地宝乡"},
{"id": "9821", "name": "铁峰乡"},
{"id": "9822", "name": "黄柏乡"},
{"id": "9823", "name": "九池乡"},
{"id": "9824", "name": "梨树乡"},
{"id": "9825", "name": "郭村乡"},
{"id": "9826", "name": "恒合乡"}
], "id": "113", "name": "万州区"},
{"children": [
{"id": "9893", "name": "荔枝街道"},
{"id": "9894", "name": "敦仁街道"},
{"id": "9895", "name": "江北街道"},
{"id": "9896", "name": "江东街道"},
{"id": "9897", "name": "崇义街道"},
{"id": "9898", "name": "李渡镇"},
{"id": "9899", "name": "白涛镇"},
{"id": "9900", "name": "百胜镇"},
{"id": "9901", "name": "堡子镇"},
{"id": "9902", "name": "焦石镇"},
{"id": "9903", "name": "蔺市镇"},
{"id": "9904", "name": "龙桥镇"},
{"id": "9905", "name": "龙潭镇"},
{"id": "9906", "name": "马武镇"},
{"id": "9907", "name": "南沱镇"},
{"id": "9908", "name": "青羊镇"},
{"id": "9909", "name": "清溪镇"},
{"id": "9910", "name": "石沱镇"},
{"id": "9911", "name": "新妙镇"},
{"id": "9912", "name": "义和镇"},
{"id": "9913", "name": "增福乡"},
{"id": "9914", "name": "珍溪镇"},
{"id": "9915", "name": "镇安镇"},
{"id": "9916", "name": "致韩镇"},
{"id": "9917", "name": "土地坡乡"},
{"id": "9918", "name": "武陵山乡"},
{"id": "9919", "name": "中峰乡"},
{"id": "9920", "name": "梓里乡"},
{"id": "9921", "name": "丛林乡"},
{"id": "9922", "name": "大木乡"},
{"id": "9923", "name": "惠民乡"},
{"id": "9924", "name": "酒店乡"},
{"id": "9925", "name": "聚宝乡"},
{"id": "9926", "name": "卷洞乡"},
{"id": "9927", "name": "两汇乡"},
{"id": "9928", "name": "罗云乡"},
{"id": "9929", "name": "明家乡"},
{"id": "9930", "name": "仁义乡"},
{"id": "9931", "name": "山窝乡"},
{"id": "9932", "name": "石和乡"},
{"id": "9933", "name": "石龙乡"},
{"id": "9934", "name": "太和乡"},
{"id": "9935", "name": "天台乡"},
{"id": "9936", "name": "同乐乡"},
{"id": "9937", "name": "新村乡"}
], "id": "114", "name": "涪陵区"},
{"children": [
{"id": "9938", "name": "梁山镇"},
{"id": "9939", "name": "柏家镇"},
{"id": "9940", "name": "碧山镇"},
{"id": "9941", "name": "大观镇"},
{"id": "9942", "name": "福禄镇"},
{"id": "9943", "name": "合兴镇"},
{"id": "9944", "name": "和林镇"},
{"id": "9945", "name": "虎城镇"},
{"id": "9946", "name": "回龙镇"},
{"id": "9947", "name": "金带镇"},
{"id": "9948", "name": "聚奎镇"},
{"id": "9949", "name": "礼让镇"},
{"id": "9950", "name": "龙门镇"},
{"id": "9951", "name": "明达镇"},
{"id": "9952", "name": "蟠龙镇"},
{"id": "9953", "name": "屏锦镇"},
{"id": "9954", "name": "仁贤镇"},
{"id": "9955", "name": "石安镇"},
{"id": "9956", "name": "文化镇"},
{"id": "9957", "name": "新盛镇"},
{"id": "9958", "name": "荫平镇"},
{"id": "9959", "name": "袁驿镇"},
{"id": "9960", "name": "云龙镇"},
{"id": "9961", "name": "竹山镇"},
{"id": "9962", "name": "安胜乡"},
{"id": "9963", "name": "铁门乡"},
{"id": "9964", "name": "紫照乡"},
{"id": "9965", "name": "曲水乡"},
{"id": "9966", "name": "龙胜乡"},
{"id": "9967", "name": "城北乡"},
{"id": "9968", "name": "城东乡"},
{"id": "9969", "name": "复平乡"},
{"id": "39680", "name": "县城内"}
], "id": "115", "name": "梁平县"},