|
@@ -1,5 +1,7 @@
|
|
package spider;
|
|
package spider;
|
|
|
|
|
|
|
|
+import cn.hutool.core.date.DateUtil;
|
|
|
|
+import cn.hutool.core.thread.ThreadUtil;
|
|
import cn.hutool.db.DbUtil;
|
|
import cn.hutool.db.DbUtil;
|
|
import cn.hutool.db.Entity;
|
|
import cn.hutool.db.Entity;
|
|
import com.alibaba.fastjson.JSONArray;
|
|
import com.alibaba.fastjson.JSONArray;
|
|
@@ -27,6 +29,8 @@ public class YouZy {
|
|
|
|
|
|
@Test
|
|
@Test
|
|
public void test() throws Exception {
|
|
public void test() throws Exception {
|
|
|
|
+ //高校
|
|
|
|
+ getUniversityList();
|
|
// getMajorData();
|
|
// getMajorData();
|
|
// processVocationalHotAndLack();
|
|
// processVocationalHotAndLack();
|
|
/**
|
|
/**
|
|
@@ -54,6 +58,185 @@ public class YouZy {
|
|
// }
|
|
// }
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+
|
|
|
|
+ public void getUniversityList() {
|
|
|
|
+ //64 9 学校 10221, 香港树仁大学
|
|
|
|
+ //91 11 学校 11831, 邯郸幼儿师范高等专科学校
|
|
|
|
+ int count = 0, total = 5000, pageIndex = 1, pageSize=20;
|
|
|
|
+
|
|
|
|
+ do {
|
|
|
|
+ //学校列表
|
|
|
|
+ String requestPath = "/youzy.dms.basiclib.api.college.query";
|
|
|
|
+ String jsonParam="{\"keyword\":\"\",\"provinceNames\":[],\"natureTypes\":[],\"eduLevel\":\"\",\"categories\":[],\"features\":[],\"pageIndex\":%s,\"pageSize\":20,\"sort\":11}";
|
|
|
|
+ jsonParam = String.format(jsonParam,pageIndex);
|
|
|
|
+ Map paramMap =new HashMap<>();
|
|
|
|
+ String res = getRes(requestPath,paramMap,jsonParam);
|
|
|
|
+ if (StringUtils.isEmpty(res)) {
|
|
|
|
+ log.error("res is empty");
|
|
|
|
+ return;
|
|
|
|
+ } else {
|
|
|
|
+ JSONObject resultJsonobject = JSONObject.parseObject(res);
|
|
|
|
+ if (resultJsonobject.getBoolean("isSuccess")) {
|
|
|
|
+ JSONArray items = resultJsonobject.getJSONObject("result").getJSONArray("items");
|
|
|
|
+ Set<Entity> universitySet=new HashSet();
|
|
|
|
+ for (int i=0;i<items.size();i++) {
|
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
|
+
|
|
|
|
+ JSONObject rowUniversity = (JSONObject)items.get(i);
|
|
|
|
+ String universityCode = rowUniversity.getString("code");
|
|
|
|
+ /**
|
|
|
|
+ * 学校详情及存储学校数据
|
|
|
|
+ */
|
|
|
|
+// getUniversityDetail(universityCode,rowUniversity,universitySet);
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 招生简单
|
|
|
|
+ */
|
|
|
|
+ enrollDetail(universityCode);
|
|
|
|
+
|
|
|
|
+ ThreadUtil.safeSleep(1 * 1000);
|
|
|
|
+ log.error("{} 学校 {}, {}, 耗时{}",i+1,universityCode,rowUniversity.getString("cnName"), DateUtil.formatBetween(System.currentTimeMillis() - start));
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ log.error("已完成page={}, 开始第{}页" ,pageIndex,(pageIndex+1));
|
|
|
|
+ count = pageIndex * pageSize;
|
|
|
|
+ pageIndex += 1;
|
|
|
|
+ }while (count <= total);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 学校详情及存储学校数据
|
|
|
|
+ * @param universityCode
|
|
|
|
+ * @param rowUniversity
|
|
|
|
+ * @param universitySet
|
|
|
|
+ */
|
|
|
|
+ private void getUniversityDetail(String universityCode,JSONObject rowUniversity,Set<Entity> universitySet){
|
|
|
|
+ String requestPath = "/youzy.dms.basiclib.api.college.bycode.get";
|
|
|
|
+ String jsonParam = "";
|
|
|
|
+ Map paramMap = new HashMap<>();
|
|
|
|
+ paramMap.put("code",universityCode);
|
|
|
|
+ String res = getRes(requestPath, paramMap, jsonParam);
|
|
|
|
+ if (StringUtils.isEmpty(res)) {
|
|
|
|
+ log.error("res is empty");
|
|
|
|
+ return;
|
|
|
|
+ }else {
|
|
|
|
+ JSONObject resultJsonobject = JSONObject.parseObject(res);
|
|
|
|
+ if (resultJsonobject.getBoolean("isSuccess")) {
|
|
|
|
+ JSONObject universityDetail = resultJsonobject.getJSONObject("result");
|
|
|
|
+// log.info("size:{} rowUniversity is {}",rowUniversity.size(),rowUniversity);
|
|
|
|
+// log.info("size:{} universityDetail is {}",universityDetail.size(),universityDetail);
|
|
|
|
+ rowUniversity.putAll(universityDetail);
|
|
|
|
+// log.info("size:{} universityTotal is {}",rowUniversity.size(),rowUniversity);
|
|
|
|
+
|
|
|
|
+ //处理json数据
|
|
|
|
+ rowUniversity.put("pointsOfBo",rowUniversity.getString("pointsOfBo"));
|
|
|
|
+ rowUniversity.put("bxLevel",rowUniversity.getString("bxLevel"));
|
|
|
|
+ rowUniversity.put("features",rowUniversity.getString("features"));
|
|
|
|
+ rowUniversity.put("entranceType",rowUniversity.getString("entranceType"));
|
|
|
|
+ rowUniversity.put("pointsOfShuo",rowUniversity.getString("pointsOfShuo"));
|
|
|
|
+ rowUniversity.put("academicians",rowUniversity.getString("academicians"));
|
|
|
|
+ rowUniversity.put("alumnis",rowUniversity.getString("alumnis"));
|
|
|
|
+ rowUniversity.put("artFeatures",rowUniversity.getString("artFeatures"));
|
|
|
|
+ rowUniversity.put("shuoShiZBDH",rowUniversity.getString("shuoShiZBDH"));
|
|
|
|
+ rowUniversity.put("bxType",rowUniversity.getString("bxType"));
|
|
|
|
+ rowUniversity.put("zhaoBanDH",rowUniversity.getString("zhaoBanDH"));
|
|
|
|
+ rowUniversity.put("categories",rowUniversity.getString("categories"));
|
|
|
|
+ rowUniversity.put("nameUsed",rowUniversity.getString("nameUsed"));
|
|
|
|
+ rowUniversity.put("address",rowUniversity.getString("address"));
|
|
|
|
+ rowUniversity.put("histories",rowUniversity.getString("histories"));
|
|
|
|
+ rowUniversity.put("awards",rowUniversity.getString("awards"));
|
|
|
|
+
|
|
|
|
+ Date now =new Date();
|
|
|
|
+ rowUniversity.put("update_time",now);
|
|
|
|
+ //存储学校
|
|
|
|
+ Entity universityTable = Entity.create("sy_university");
|
|
|
|
+ universityTable.putAll(rowUniversity);
|
|
|
|
+ universitySet.add(universityTable);
|
|
|
|
+ try {
|
|
|
|
+ DbUtil.use().insertOrUpdate(universityTable, "id");
|
|
|
|
+// DbUtil.use().insert(universityTable);
|
|
|
|
+ } catch (SQLException e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * 招生简单
|
|
|
|
+ * @param collegeCode
|
|
|
|
+ */
|
|
|
|
+ private void enrollDetail(String collegeCode){
|
|
|
|
+
|
|
|
|
+ String requestPath = "/youzy.dms.basiclib.api.college.news.bykeywords.search";
|
|
|
|
+ String jsonParam="{\"collegeCode\":\"%s\",\"pageIndex\":1,\"keywords\":\"\"}";
|
|
|
|
+ jsonParam = String.format(jsonParam,collegeCode);
|
|
|
|
+ Map paramMap =new HashMap<>();
|
|
|
|
+ String res = getRes(requestPath,paramMap,jsonParam);
|
|
|
|
+ if (StringUtils.isEmpty(res)) {
|
|
|
|
+ log.error("res is empty");
|
|
|
|
+ return;
|
|
|
|
+ } else {
|
|
|
|
+ JSONObject resultJsonobject = JSONObject.parseObject(res);
|
|
|
|
+ if (resultJsonobject.getBoolean("isSuccess")) {
|
|
|
|
+ JSONArray items = resultJsonobject.getJSONObject("result").getJSONArray("items");
|
|
|
|
+
|
|
|
|
+ Set<Entity> set=new HashSet();
|
|
|
|
+ for (int i=0;i<items.size();i++) {
|
|
|
|
+ JSONObject row = (JSONObject) items.get(i);
|
|
|
|
+ Integer year = row.getInteger("year");
|
|
|
|
+ //只取2021、2022的数据
|
|
|
|
+ if(year<2021) continue;
|
|
|
|
+ String id = row.getString("id");
|
|
|
|
+
|
|
|
|
+ //处理detail
|
|
|
|
+ requestPath = "/youzy.dms.basiclib.api.college.news.get";
|
|
|
|
+// jsonParam="{\"collegeCode\":\"%s\",\"pageIndex\":1,\"keywords\":\"\"}";
|
|
|
|
+// jsonParam = String.format(jsonParam,collegeCode);
|
|
|
|
+ paramMap =new HashMap<>();
|
|
|
|
+ paramMap.put("id",id);
|
|
|
|
+ paramMap.put("isAddHits",true);
|
|
|
|
+ res = getRes(requestPath,paramMap,"");
|
|
|
|
+ if (StringUtils.isEmpty(res)) {
|
|
|
|
+ log.error("res is empty");
|
|
|
|
+ return;
|
|
|
|
+ } else {
|
|
|
|
+ resultJsonobject = JSONObject.parseObject(res);
|
|
|
|
+ if (resultJsonobject.getBoolean("isSuccess")) {
|
|
|
|
+ JSONObject result = resultJsonobject.getJSONObject("result");
|
|
|
|
+ row.putAll(result);
|
|
|
|
+// log.info("size is {} ,totalData is {}",row.size(),row);
|
|
|
|
+ //sy_enroll_brochure学校招生简单
|
|
|
|
+
|
|
|
|
+ row.remove("createdAt");
|
|
|
|
+ row.remove("updatedAt");
|
|
|
|
+ //处理json
|
|
|
|
+ row.put("tags",row.getString("tags"));
|
|
|
|
+
|
|
|
|
+ Date now =new Date();
|
|
|
|
+ row.put("update_time",now);
|
|
|
|
+ //存储学校
|
|
|
|
+ Entity universityTable = Entity.create("sy_university_enroll_brochure");
|
|
|
|
+ universityTable.putAll(row);
|
|
|
|
+ set.add(universityTable);
|
|
|
|
+ try {
|
|
|
|
+ DbUtil.use().insertOrUpdate(universityTable, "id");
|
|
|
|
+// DbUtil.use().insert(universityTable);
|
|
|
|
+ } catch (SQLException e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* @param majorLevel =1 2 3级专业
|
|
* @param majorLevel =1 2 3级专业
|
|
* @param eduLevels(教育水平):"ben","zhuan"
|
|
* @param eduLevels(教育水平):"ben","zhuan"
|
|
@@ -682,6 +865,28 @@ public class YouZy {
|
|
return res;
|
|
return res;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ public String getRes(String path, Map<String, Object> map,String jsonParam) {
|
|
|
|
+ String requestPath = UsignUtils.getRequestPathAndParam(path, map);
|
|
|
|
+ String usign =StringUtils.EMPTY;
|
|
|
|
+ //f6a8052b16fbe01064e4c4e86441a642
|
|
|
|
+ if(StringUtils.isBlank(jsonParam)){
|
|
|
|
+ usign = UsignUtils.getUsign(requestPath);
|
|
|
|
+ }else {
|
|
|
|
+ usign = UsignUtils.getUsign(requestPath,jsonParam);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ String url = host + requestPath;
|
|
|
|
+ String res = HttpUtils.postBody(url, jsonParam, usign);
|
|
|
|
+
|
|
|
|
+ try {
|
|
|
|
+ Thread.sleep(500);
|
|
|
|
+ } catch (InterruptedException e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return res;
|
|
|
|
+ }
|
|
|
|
+
|
|
private void getMajorData(){
|
|
private void getMajorData(){
|
|
String location="福建";
|
|
String location="福建";
|
|
Integer provinceId=50;
|
|
Integer provinceId=50;
|
|
@@ -751,5 +956,14 @@ public class YouZy {
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ public void testUniversity() {
|
|
|
|
+ //u-sign: f6a8052b16fbe01064e4c4e86441a642
|
|
|
|
+ String requestPath = "/youzy.dms.basiclib.api.college.query";
|
|
|
|
+ String jsonParam="{\"keyword\":\"\",\"provinceNames\":[],\"natureTypes\":[],\"eduLevel\":\"\",\"categories\":[],\"features\":[],\"pageIndex\":%s,\"pageSize\":20,\"sort\":11}";
|
|
|
|
+ jsonParam = String.format(jsonParam,2);
|
|
|
|
+ Map paramMap =new HashMap<>();
|
|
|
|
+ System.out.println(getRes(requestPath,paramMap,jsonParam));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
|
|
}
|
|
}
|