Bladeren bron

优志愿学校数据及招生简章

jinxia.mo 3 jaren geleden
bovenliggende
commit
755413fb18
2 gewijzigde bestanden met toevoegingen van 222 en 1 verwijderingen
  1. 8 1
      src/main/java/com/mingxue/spider/utils/UsignUtils.java
  2. 214 0
      src/test/java/spider/YouZy.java

+ 8 - 1
src/main/java/com/mingxue/spider/utils/UsignUtils.java

@@ -100,11 +100,18 @@ public class UsignUtils {
         String eduLevel = "ben";
         paramMap =new HashMap<>();
         paramMap.put("eduLevel",eduLevel);
-//        System.out.println(getUsign(getRequestPathAndParam(requestPath,paramMap)));
+        System.out.println(getUsign(getRequestPathAndParam(requestPath,paramMap)));
 
         requestPath = "/youzy.dms.basiclib.api.major.tree.query";
         String jsonParam="{\"count\":5,\"rankType\":1,\"eduLevel\":\"ben\"}";
         paramMap =new HashMap<>();
         System.out.println(getUsign(getRequestPathAndParam(requestPath,paramMap),jsonParam));
+
+        //u-sign: f6a8052b16fbe01064e4c4e86441a642
+        requestPath = "/youzy.dms.basiclib.api.college.query";
+        jsonParam="{\"keyword\":\"\",\"provinceNames\":[],\"natureTypes\":[],\"eduLevel\":\"\",\"categories\":[],\"features\":[],\"pageIndex\":%s,\"pageSize\":20,\"sort\":11}";
+        jsonParam = String.format(jsonParam,2);
+        paramMap =new HashMap<>();
+        System.out.println(getUsign(getRequestPathAndParam(requestPath,paramMap),jsonParam));
     }
 }

+ 214 - 0
src/test/java/spider/YouZy.java

@@ -1,5 +1,7 @@
 package spider;
 
+import cn.hutool.core.date.DateUtil;
+import cn.hutool.core.thread.ThreadUtil;
 import cn.hutool.db.DbUtil;
 import cn.hutool.db.Entity;
 import com.alibaba.fastjson.JSONArray;
@@ -27,6 +29,8 @@ public class YouZy {
 
     @Test
     public void test() throws Exception {
+        //高校
+        getUniversityList();
 //        getMajorData();
 //        processVocationalHotAndLack();
         /**
@@ -54,6 +58,185 @@ public class YouZy {
 //        }
     }
 
+
+    public void getUniversityList() {
+        //64 9 学校 10221, 香港树仁大学
+        //91 11 学校 11831, 邯郸幼儿师范高等专科学校
+        int count = 0, total = 5000, pageIndex = 1, pageSize=20;
+
+        do {
+            //学校列表
+            String requestPath = "/youzy.dms.basiclib.api.college.query";
+            String jsonParam="{\"keyword\":\"\",\"provinceNames\":[],\"natureTypes\":[],\"eduLevel\":\"\",\"categories\":[],\"features\":[],\"pageIndex\":%s,\"pageSize\":20,\"sort\":11}";
+            jsonParam = String.format(jsonParam,pageIndex);
+            Map paramMap =new HashMap<>();
+            String res = getRes(requestPath,paramMap,jsonParam);
+            if (StringUtils.isEmpty(res)) {
+                log.error("res is empty");
+                return;
+            } else {
+                JSONObject resultJsonobject = JSONObject.parseObject(res);
+                if (resultJsonobject.getBoolean("isSuccess")) {
+                    JSONArray items = resultJsonobject.getJSONObject("result").getJSONArray("items");
+                    Set<Entity> universitySet=new HashSet();
+                    for (int i=0;i<items.size();i++) {
+                        long start = System.currentTimeMillis();
+
+                        JSONObject rowUniversity = (JSONObject)items.get(i);
+                        String universityCode = rowUniversity.getString("code");
+                        /**
+                         * 学校详情及存储学校数据
+                         */
+//                        getUniversityDetail(universityCode,rowUniversity,universitySet);
+
+                        /**
+                         * 招生简单
+                         */
+                        enrollDetail(universityCode);
+
+                        ThreadUtil.safeSleep(1 * 1000);
+                        log.error("{} 学校 {}, {}, 耗时{}",i+1,universityCode,rowUniversity.getString("cnName"), DateUtil.formatBetween(System.currentTimeMillis() - start));
+
+                    }
+                }
+            }
+
+            log.error("已完成page={}, 开始第{}页" ,pageIndex,(pageIndex+1));
+            count = pageIndex * pageSize;
+            pageIndex += 1;
+        }while (count <= total);
+    }
+
+    /**
+     * 学校详情及存储学校数据
+     * @param universityCode
+     * @param rowUniversity
+     * @param universitySet
+     */
+    private void getUniversityDetail(String universityCode,JSONObject rowUniversity,Set<Entity> universitySet){
+        String requestPath = "/youzy.dms.basiclib.api.college.bycode.get";
+        String jsonParam = "";
+        Map paramMap = new HashMap<>();
+        paramMap.put("code",universityCode);
+        String res = getRes(requestPath, paramMap, jsonParam);
+        if (StringUtils.isEmpty(res)) {
+            log.error("res is empty");
+            return;
+        }else {
+            JSONObject resultJsonobject = JSONObject.parseObject(res);
+            if (resultJsonobject.getBoolean("isSuccess")) {
+                JSONObject universityDetail = resultJsonobject.getJSONObject("result");
+//                                log.info("size:{} rowUniversity is {}",rowUniversity.size(),rowUniversity);
+//                                log.info("size:{} universityDetail is {}",universityDetail.size(),universityDetail);
+                rowUniversity.putAll(universityDetail);
+//                                log.info("size:{} universityTotal is {}",rowUniversity.size(),rowUniversity);
+
+                //处理json数据
+                rowUniversity.put("pointsOfBo",rowUniversity.getString("pointsOfBo"));
+                rowUniversity.put("bxLevel",rowUniversity.getString("bxLevel"));
+                rowUniversity.put("features",rowUniversity.getString("features"));
+                rowUniversity.put("entranceType",rowUniversity.getString("entranceType"));
+                rowUniversity.put("pointsOfShuo",rowUniversity.getString("pointsOfShuo"));
+                rowUniversity.put("academicians",rowUniversity.getString("academicians"));
+                rowUniversity.put("alumnis",rowUniversity.getString("alumnis"));
+                rowUniversity.put("artFeatures",rowUniversity.getString("artFeatures"));
+                rowUniversity.put("shuoShiZBDH",rowUniversity.getString("shuoShiZBDH"));
+                rowUniversity.put("bxType",rowUniversity.getString("bxType"));
+                rowUniversity.put("zhaoBanDH",rowUniversity.getString("zhaoBanDH"));
+                rowUniversity.put("categories",rowUniversity.getString("categories"));
+                rowUniversity.put("nameUsed",rowUniversity.getString("nameUsed"));
+                rowUniversity.put("address",rowUniversity.getString("address"));
+                rowUniversity.put("histories",rowUniversity.getString("histories"));
+                rowUniversity.put("awards",rowUniversity.getString("awards"));
+
+                Date now =new Date();
+                rowUniversity.put("update_time",now);
+                //存储学校
+                Entity universityTable = Entity.create("sy_university");
+                universityTable.putAll(rowUniversity);
+                universitySet.add(universityTable);
+                try {
+                    DbUtil.use().insertOrUpdate(universityTable, "id");
+//                                    DbUtil.use().insert(universityTable);
+                } catch (SQLException e) {
+                    e.printStackTrace();
+                }
+            }
+        }
+    }
+
+    /**
+     * 招生简单
+     * @param collegeCode
+     */
+    private void enrollDetail(String collegeCode){
+
+        String requestPath = "/youzy.dms.basiclib.api.college.news.bykeywords.search";
+        String jsonParam="{\"collegeCode\":\"%s\",\"pageIndex\":1,\"keywords\":\"\"}";
+        jsonParam = String.format(jsonParam,collegeCode);
+        Map paramMap =new HashMap<>();
+        String res = getRes(requestPath,paramMap,jsonParam);
+        if (StringUtils.isEmpty(res)) {
+            log.error("res is empty");
+            return;
+        } else {
+            JSONObject resultJsonobject = JSONObject.parseObject(res);
+            if (resultJsonobject.getBoolean("isSuccess")) {
+                JSONArray items = resultJsonobject.getJSONObject("result").getJSONArray("items");
+
+                Set<Entity> set=new HashSet();
+                for (int i=0;i<items.size();i++) {
+                    JSONObject row = (JSONObject) items.get(i);
+                    Integer year = row.getInteger("year");
+                    //只取2021、2022的数据
+                    if(year<2021) continue;
+                    String id = row.getString("id");
+
+                    //处理detail
+                    requestPath = "/youzy.dms.basiclib.api.college.news.get";
+//                    jsonParam="{\"collegeCode\":\"%s\",\"pageIndex\":1,\"keywords\":\"\"}";
+//                    jsonParam = String.format(jsonParam,collegeCode);
+                    paramMap =new HashMap<>();
+                    paramMap.put("id",id);
+                    paramMap.put("isAddHits",true);
+                    res = getRes(requestPath,paramMap,"");
+                    if (StringUtils.isEmpty(res)) {
+                        log.error("res is empty");
+                        return;
+                    } else {
+                        resultJsonobject = JSONObject.parseObject(res);
+                        if (resultJsonobject.getBoolean("isSuccess")) {
+                            JSONObject result = resultJsonobject.getJSONObject("result");
+                            row.putAll(result);
+//                            log.info("size is {} ,totalData is {}",row.size(),row);
+                            //sy_enroll_brochure学校招生简单
+
+                            row.remove("createdAt");
+                            row.remove("updatedAt");
+                            //处理json
+                            row.put("tags",row.getString("tags"));
+
+                            Date now =new Date();
+                            row.put("update_time",now);
+                            //存储学校
+                            Entity universityTable = Entity.create("sy_university_enroll_brochure");
+                            universityTable.putAll(row);
+                            set.add(universityTable);
+                            try {
+                                DbUtil.use().insertOrUpdate(universityTable, "id");
+//                                    DbUtil.use().insert(universityTable);
+                            } catch (SQLException e) {
+                                e.printStackTrace();
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+
+
     /**
      * @param majorLevel =1 2 3级专业
      * @param eduLevels(教育水平):"ben","zhuan"
@@ -682,6 +865,28 @@ public class YouZy {
         return res;
     }
 
+    public String getRes(String path, Map<String, Object> map,String jsonParam) {
+        String requestPath = UsignUtils.getRequestPathAndParam(path, map);
+        String usign =StringUtils.EMPTY;
+        //f6a8052b16fbe01064e4c4e86441a642
+        if(StringUtils.isBlank(jsonParam)){
+            usign = UsignUtils.getUsign(requestPath);
+        }else {
+            usign = UsignUtils.getUsign(requestPath,jsonParam);
+        }
+
+        String url = host + requestPath;
+        String res = HttpUtils.postBody(url, jsonParam, usign);
+
+        try {
+            Thread.sleep(500);
+        } catch (InterruptedException e) {
+            e.printStackTrace();
+        }
+
+        return res;
+    }
+
     private void getMajorData(){
         String location="福建";
         Integer provinceId=50;
@@ -751,5 +956,14 @@ public class YouZy {
         }
     }
 
+    public void testUniversity() {
+        //u-sign: f6a8052b16fbe01064e4c4e86441a642
+        String requestPath = "/youzy.dms.basiclib.api.college.query";
+        String jsonParam="{\"keyword\":\"\",\"provinceNames\":[],\"natureTypes\":[],\"eduLevel\":\"\",\"categories\":[],\"features\":[],\"pageIndex\":%s,\"pageSize\":20,\"sort\":11}";
+        jsonParam = String.format(jsonParam,2);
+        Map paramMap =new HashMap<>();
+        System.out.println(getRes(requestPath,paramMap,jsonParam));
+    }
+
 
 }