jinxia.mo 2 rokov pred
rodič
commit
8cc86bc1ca

+ 1 - 1
src/main/java/com/mingxue/spider/utils/ApiUtil.java

@@ -121,7 +121,7 @@ public class ApiUtil {
 
                 return true;
             } else {
-                log.error("{}",res);
+                log.error("账号{} {}",user.getCode(),res);
                 return false;
             }
         } catch (Exception e) {

+ 120 - 5
src/main/resources/users.txt

@@ -1,13 +1,68 @@
-002180054390,10253537
+002180054473,36680365
+002180054472,18508278
+002180054471,24223525
+002180054470,14971017
+002180054469,60248551
+002180054468,84778723
+002180054466,81725092
+002180054465,17131135
+002180054464,86870442
+002180054458,11003994
+002180054457,92451161
+002180054456,27080352
+002180054455,88826291
+002180054436,82398377
+002180054435,28311630
+002180054434,29161329
+002180054430,57417569
+002180054428,27718255
+002180054427,61356610
+002180054426,74567923
+002180054425,84239091
+002180054424,45882217
+002180054423,31227224
+002180054422,1046269
+002180054421,17295510
+002180054417,33145594
+002180054415,56862009
+002180054414,10601003
+002180054412,43162060
+00218005441141604594
+002180054410,43787056
+002180054406,29717188
+002180054405,20317936
+002180054404,58424981
+002180054403,24902275
+002180054402,26151624
+002180054401,30656209
+002180054399,39515161
+002180054398,29340318
+002180054396,89362505
+002180054394,51313992
+002180054393,55512404
+002180054391,67577625
+002180054389,47306621
 002180054388,59296188
-002180054182,86934322
+002180054387,16944190
+002180054386,88928895
+002180054385,65072001
+002280024693,53643407
+002280024691,56921921
+002280024687,14281218
+002280024679,89432519
+002280024675,71891136
+002280024674,12729092
+002280024670,31082789
+002280024669,78132826
+002280024667,44017234
+002280024663,52185095
 002180054185,55900344
 002180054347,85968924
 002180054186,17464580
 002180054188,86684308
 002180054189,91506868
-002180054192,11088908
-002180054190,94862151
+002180054388,59296188
+002180054182,86934322
 002180054169,19027633
 002180054174,25057765
 002180054175,35491736
@@ -33,4 +88,64 @@
 002180054382,82653711
 002180054392,59831255
 002180054391,67577625
-
+002180054477,56734083
+002180054476,20211784
+002180054474,14426607
+002180054473,36680365
+002180054472,18508278
+002180054471,24223525
+002180054470,14971017
+002180054469,60248551
+002180054468,84778723
+002180054466,81725092
+002180054465,17131135
+002180054464,86870442
+002180054458,11003994
+002180054457,92451161
+002180054456,27080352
+002180054455,88826291
+002180054436,82398377
+002180054435,28311630
+002180054434,29161329
+002180054430,57417569
+002180054428,27718255
+002180054427,61356610
+002180054426,74567923
+002180054425,84239091
+002180054424,45882217
+002180054423,31227224
+002180054422,1046269
+002180054421,17295510
+002180054417,33145594
+002180054415,56862009
+002180054414,10601003
+002180054412,43162060
+00218005441141604594
+002180054410,43787056
+002180054406,29717188
+002180054405,20317936
+002180054404,58424981
+002180054403,24902275
+002180054402,26151624
+002180054401,30656209
+002180054399,39515161
+002180054398,29340318
+002180054396,89362505
+002180054394,51313992
+002180054393,55512404
+002180054391,67577625
+002180054389,47306621
+002180054388,59296188
+002180054387,16944190
+002180054386,88928895
+002180054385,65072001
+002280024693,53643407
+002280024691,56921921
+002280024687,14281218
+002280024679,89432519
+002280024675,71891136
+002280024674,12729092
+002280024670,31082789
+002280024669,78132826
+002280024667,44017234
+002280024663,52185095

+ 36 - 14
src/test/java/spider/CollegeData.java

@@ -35,6 +35,9 @@ public class CollegeData {
 
     @Test
     public void test() throws Exception {
+        System.out.println("010102".compareTo("030101"));
+        System.out.println("030102".compareTo("030101"));
+        System.out.println("030101".compareTo("030101"));
 //        getMajorData();
 
         /**
@@ -53,7 +56,7 @@ public class CollegeData {
             JSONObject jsonObject = (JSONObject)ll;
             provinceMap.put(jsonObject.getString("provinceName"),JSONObject.parseObject(JSONObject.toJSONString(jsonObject),ProvinceData.class));
         });
-        String processProvinceName= "西";
+        String processProvinceName= "西";
         User currentUser = new User();
         switch (processProvinceName){
             case "湖南":
@@ -107,6 +110,20 @@ public class CollegeData {
                  */
                 currentUser = new User().setCode("13437248803").setPassword("41186990").setIsHn(false).setIsNewGaokao(true).setProvinceName(processProvinceName);
 
+                break;
+            case "河南":
+                /**
+                 * 河南	18574734705	75231013
+                 */
+                currentUser = new User().setCode("13437248803").setPassword("41186990").setIsHn(false).setIsNewGaokao(true).setProvinceName(processProvinceName);
+
+                break;
+            case "广西":
+                /** 河南 湖北 广东 广西
+                 * 广西	18873220395	50277878
+                 */
+                currentUser = new User().setCode("13437248803").setPassword("41186990").setIsHn(false).setIsNewGaokao(true).setProvinceName(processProvinceName);
+
                 break;
         }
 
@@ -148,14 +165,16 @@ public class CollegeData {
                      * 选科数据 xuanke(collegeId);
                      */
                     List<Integer> colledgeIds = Arrays.asList(
-                            2478,2479,2480,2462,2463,2464,2465,2791,2792,2793,2794,2796);
+                            1,3,654,871,1526,718,652,1855);
                     if(colledgeIds.contains(collegeId)){
-//                        continue;
+                        continue;
                     }
-                    if(processColledgeCount>5){
-                        //超过5个学校换个账号
-//                        ApiUtil.getInstance().login();
-//                        processColledgeCount=0;
+                    if("湖南".equalsIgnoreCase(provinceData.getProvinceName())){
+                        if(processColledgeCount>5){
+                            //湖南 超过5个学校换个账号
+                        ApiUtil.getInstance().login();
+                        processColledgeCount=0;
+                        }
                     }
 
                     List<Integer> liberalScienceList= Arrays.asList(0,1);
@@ -180,11 +199,12 @@ public class CollegeData {
                         /**
                          * 专业录取数据
                          */
-//                        byCollege(collegeId,liberalScience,2,Location,ProvinceId);
+                        ThreadUtil.safeSleep(2 * 1000);
+                        byCollege(collegeId,collegeName,liberalScience,2,Location,ProvinceId,provinceData);
                         /**
                          * 招生计划数据
                          */
-                        byCollege(collegeId,collegeName,liberalScience,3,Location,ProvinceId,provinceData);
+//                        byCollege(collegeId,collegeName,liberalScience,3,Location,ProvinceId,provinceData);
                     }
                     processColledgeCount++;
 
@@ -360,7 +380,7 @@ public class CollegeData {
 //                dataJSONObject.put("year",yearLast);
 //                Integer year = dataJSONObject.getInteger("year");
 //            List<Integer> yearList = Arrays.asList(2022,2021,2020,2019);
-            List<Integer> yearList = Arrays.asList(2022);
+            List<Integer> yearList = Arrays.asList(2021);
             for(int year:yearList){
                 sb = new StringBuilder(host);
                 if(queryType==2){
@@ -377,6 +397,7 @@ public class CollegeData {
                 sb.append("&pageSize=").append(100);
                 res = ApiUtil.getInstance().httpSyncGet(sb.toString());
 //        System.out.println("data is "+res);
+                ThreadUtil.safeSleep(1 * 1000);
                 JSONObject rows = res.getJSONObject("data");
                 if(res.getInteger("error")==0){
                     JSONArray tdDatas = rows.getJSONArray("tdData");
@@ -429,17 +450,18 @@ public class CollegeData {
 
                         String tableName = StringUtils.EMPTY;
                         if(queryType==2){
-                            tableName = "sy_colledge_major_enroll_data";
+                            tableName = "sy_colledge_enroll_major_data";
                         }else if(queryType==3){
-                            tableName = "sy_colledge_recruit_data_2022_"+provinceData.getProvinceSpell();
+//                            tableName = "sy_colledge_recruit_data_"+year+"_"+provinceData.getProvinceSpell();
+                            tableName = "sy_colledge_recruit_data";
                         }
                         Entity subjectTable4 = Entity.create(tableName);
                         subjectTable4.putAll(row);
                         subjectList.add(subjectTable4);
                         if(!isBatchInsert){
                             try {
-//                                DbUtil.use().insertOrUpdate(subjectTable4, "ID");
-                                DbUtil.use().insert(subjectTable4);
+                                DbUtil.use().insertOrUpdate(subjectTable4, "ID");
+//                                DbUtil.use().insert(subjectTable4);
                             } catch (SQLException e) {
                                 e.printStackTrace();
                             }

+ 99 - 10
src/test/java/spider/YouZy.java

@@ -12,6 +12,7 @@ import com.mingxue.spider.utils.UsignUtils;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.springframework.util.CollectionUtils;
 
 import java.sql.SQLException;
 import java.util.*;
@@ -26,11 +27,12 @@ public class YouZy {
     private static Logger log = LoggerFactory.getLogger(YouZy.class);
 
     private static final String host = "https://uwf7de983aad7a717eb.youzy.cn";
+    Boolean isBatchInsert = true;
 
     @Test
     public void test() throws Exception {
         //高校
-        getUniversityList();
+//        getUniversityList();
 //        getMajorData();
 //        processVocationalHotAndLack();
         /**
@@ -40,11 +42,13 @@ public class YouZy {
 
 
         /**
-         * 业库
+         * 业库
          * eduLevel:"ben","zhuan"
          */
 
-//        List<String> majorList = getMajor(3, "ben,zhuan");
+        Integer majorLevel = 3;
+        List<String> majorList = getMajor(majorLevel, "ben,zhuan");
+        log.error("{}级专业编码为 {}",majorLevel,JSONObject.toJSONString(majorList));
 //        System.out.println(JSONObject.toJSONString(majorList));
 
 //        for (String majorCode : majorList) {
@@ -56,12 +60,22 @@ public class YouZy {
 //            }
 //            getCareerProspects(majorCode);
 //        }
+
+        //处理专业开设院校
+        for (String majorCode : majorList) {
+            if(Integer.parseInt(majorCode)<80202){
+                continue;
+            }
+            processUniversityByCode(majorCode,2022);
+        }
+
+
     }
 
 
     public void getUniversityList() {
 //        31 43 52  67
-        int count = 0, total = 5000, pageIndex = 96, pageSize=20;
+        int count = 0, total = 5000, pageIndex = 115, pageSize=20;
         do {
             //学校列表
             String requestPath = "/youzy.dms.basiclib.api.college.query";
@@ -95,7 +109,7 @@ public class YouZy {
                         /**
                          * 招生简单
                          */
-//                        enrollDetail(universityCode);
+                        enrollDetail(universityCode);
 
                         /**
                          * 特色专业
@@ -126,12 +140,12 @@ public class YouZy {
                         /**
                          * 获取学校图片
                          */
-                        getImgs(universityCode);
+//                        getImgs(universityCode);
 
                         /**
                          * VR
                          */
-                        getVR(universityCode);
+//                        getVR(universityCode);
                         /**
                          * 视频:需要将aliId存储到我们自己的vod中
                          */
@@ -149,6 +163,81 @@ public class YouZy {
         }while (count <= total);
     }
 
+    //处理专业开设院校
+    private void processUniversityByCode(String code,Integer year){
+
+        int count = 0, total = 5000, pageIndex = 1, pageSize=20;
+        if(Integer.parseInt(code)==80202){
+            pageIndex= 10;
+        }
+        do {
+            //学校列表
+            String requestPath = "/youzy.dms.basiclib.api.eduranking.mr.search";
+            String jsonParam="{\"year\":\"2022\",\"majorCode\":\"%s\",\"keyword\":\"\",\"collegeProvinceCodes\":[],\"features\":[],\"collegeCategories\":[],\"natureTypes\":[],\"pageIndex\":%s,\"pageSize\":20}";
+            jsonParam = String.format(jsonParam,code,pageIndex);
+            Map paramMap =new HashMap<>();
+            String res = getRes(requestPath,paramMap,jsonParam);
+            if (StringUtils.isEmpty(res)) {
+                log.error("res is empty");
+                return;
+            } else {
+                JSONObject resultJsonobject = JSONObject.parseObject(res);
+                if (resultJsonobject.getBoolean("isSuccess")) {
+                    JSONObject result = resultJsonobject.getJSONObject("result");
+                    total= result.getInteger("totalCount");
+                    long start = System.currentTimeMillis();
+                    JSONArray items = result.getJSONArray("items");
+                    Set<Entity> subjectList=new HashSet();
+
+                    for (int i=0;i<items.size();i++) {
+                        JSONObject rowUniversity = (JSONObject)items.get(i);
+                        JSONObject college = rowUniversity.getJSONObject("college");
+                        String universityCode = college.getString("code");
+                        String universityName = college.getString("cnName");
+                        rowUniversity.remove("college");
+                        rowUniversity.remove("hits");
+                        rowUniversity.remove("isUp");
+                        rowUniversity.put("majorCode",code);
+                        rowUniversity.put("year",year);
+                        rowUniversity.put("universityCode",universityCode);
+                        rowUniversity.put("universityName",universityName);
+
+                        Date now =new Date();
+                        rowUniversity.put("updateTime",now);
+
+                        //存储学校
+                        Entity universityTable = Entity.create("sy_major_university");
+                        universityTable.putAll(rowUniversity);
+                        subjectList.add(universityTable);
+                        if(!isBatchInsert){
+                            try {
+//                            DbUtil.use().insertOrUpdate(universityTable, "id");
+                                DbUtil.use().insert(universityTable);
+                            } catch (SQLException e) {
+                                e.printStackTrace();
+                            }
+                        }
+                    }
+                    if(isBatchInsert){
+                        try {
+                            if(!CollectionUtils.isEmpty(subjectList)){
+                                DbUtil.use().insert(subjectList);
+                            }
+                        } catch (SQLException e) {
+                            e.printStackTrace();
+                        }
+                    }
+                    log.error("完成专业编码 {}, 耗时{}",code, DateUtil.formatBetween(System.currentTimeMillis() - start));
+                }
+            }
+
+            log.error("专业编码 {}, 已完成page={}, 开始第{}页" ,code,pageIndex,(pageIndex+1));
+            count = pageIndex * pageSize;
+            pageIndex += 1;
+        }while (count <= total);
+
+    }
+
     private void getVideo(String universityCode,String universityName){
         String requestPath = "/op/Classrooms/Packs/Query";
         String jsonParam = "";
@@ -440,7 +529,7 @@ public class YouZy {
     }
 
     /**
-     * 招生简
+     * 招生简
      * @param collegeCode
      */
     private void enrollDetail(String collegeCode){
@@ -463,7 +552,7 @@ public class YouZy {
                     JSONObject row = (JSONObject) items.get(i);
                     Integer year = row.getInteger("year");
                     //只取2021、2022的数据
-                    if(year<2021) continue;
+                    if(year<=2021) continue;
                     String id = row.getString("id");
 
                     //处理detail
@@ -493,7 +582,7 @@ public class YouZy {
 
                             Date now =new Date();
                             row.put("createTime",createdAt);
-                            row.put("updateTime",now);
+                            row.put("update_time",now);
                             //存储学校
                             Entity universityTable = Entity.create("sy_university_enroll_brochure");
                             universityTable.putAll(row);