소스 검색

取数据

admin 2 년 전
부모
커밋
15e3484b8f

+ 10 - 0
src/main/java/com/mingxue/spider/dto/ProvinceData.java

@@ -6,6 +6,16 @@ public class ProvinceData {
     private String provinceSpell;
     private Integer id;
     private String provinceName;
+    private Integer queryType;
+
+    public Integer getQueryType() {
+        return queryType;
+    }
+
+    public ProvinceData setQueryType(Integer queryType) {
+        this.queryType = queryType;
+        return this;
+    }
 
     public String getCode() {
         return code;

+ 52 - 12
src/main/java/com/mingxue/spider/youzy/ChooseSubjectService.java

@@ -17,27 +17,56 @@ import cn.hutool.db.Entity;
 
 public class ChooseSubjectService extends AbstractService {
 
-    private String tableName = "yzy_choosesubject_2024_3";
+    private String tableName = "yzy_choosesubject_2024_all";
 
     public void getAll() {
+//        List<String> excludProvinces = Arrays.asList("湖北","广西","河北","安徽","北京","重庆",
+//                "福建","甘肃","广东","黑龙江","海南","湖南","吉林","江西","江苏","辽宁","山东","上海","天津","浙江");
+        List<String> excludProvinces = Arrays.asList("湖南","北京","重庆","广东","河北","湖北","海南","江苏","辽宁","山东","上海","天津");
         String api = "/youzy.dms.datalib.api.choosesubject.config.brief.get";
         JSONArray arr = (JSONArray)postBody(api, Dict.create().set("eduLevel", "ben"), null);
         for (int i = 0; i < arr.size(); i++) {
             JSONObject row = arr.getJSONObject(i);
             String provinceCode = row.getString("provinceCode");
-            String year = row.getJSONArray("years").getString(0);
-            getByProvinceAndYear(provinceCode, year);
+            String provinceName = row.getString("provinceName");
+            if(excludProvinces.contains(provinceName)){
+                continue;
+            }
+
+            JSONArray yearArray = row.getJSONArray("years");
+            //取非2024年的数据
+            if(yearArray.size()==1){
+                continue;
+            }else {
+                for(int j=0;j<yearArray.size();j++){
+                    //2024的过滤
+                    if(j==0){
+                        continue;
+                    }
+                    String year = yearArray.getString(j);
+                    getByProvinceAndYear(provinceCode,provinceName, year);
+                }
+//                String year = row.getJSONArray("years").getString(0);
+//                getByProvinceAndYear(provinceCode,provinceName, year);
+            }
+
         }
     }
 
-    public void getByProvinceAndYear(String provinceCode, String year) {
+    public void getByProvinceAndYear(String provinceCode,String provinceName, String year) {
         long start = System.currentTimeMillis();
         String api = "/youzy.dms.datalib.api.choosesubject.college.query";
         Integer pageIndex = 1;
+
+        String excludeProvinceName = "河北";
+        if(excludeProvinceName.equalsIgnoreCase(provinceName)){
+            pageIndex = 33;//甘肃:开始1-17页
+        }
         while (true) {
             try {
                 // {"eduLevel":"ben","pageIndex":2,"pageSize":20,"collegeKeywords":[],"provinceCodes":[],
                 // "collegeCodes":[],"features":[],"natures":[],"categories":[],"provinceCode":34,"year":"2024"}
+                logger.error("{}开始第{}页",provinceName,pageIndex);
                 Dict body = Dict.create().set("eduLevel", "ben");
                 body.set("pageIndex", pageIndex).set("pageSize", 20);
                 body.set("collegeKeywords", Collections.EMPTY_LIST).set("provinceCodes", Collections.EMPTY_LIST);
@@ -49,22 +78,31 @@ public class ChooseSubjectService extends AbstractService {
                 if (null == arr || arr.size() == 0) {
                     break;
                 }
+                List<String> excludeColleges= Arrays.asList("10476","11079","10786","10320");
+                StringBuilder codes = new StringBuilder();
                 for (int i = 0; i < arr.size(); i++) {
                     long _start = System.currentTimeMillis();
                     JSONObject row = arr.getJSONObject(i);
-                    String code = row.getString("collegeCode");
                     String name = row.getString("collegeName");
+                    String code = row.getString("collegeCode");
+                    codes.append(",").append(code);
+                    if(excludeColleges.contains(code)){
+                        if(excludeProvinceName.equalsIgnoreCase(provinceName)){
+                            continue;
+                        }
+                    }
+
                     int cnt = getByCollege(code, provinceCode, year);
                     String time = DateUtil.formatBetween(System.currentTimeMillis() - _start);
-                    logger.info("处理学校:[{}-{}] ,选科数据记录:[{}].用时:{}", code, name, cnt, time);
-
+                    logger.info("{}{}:第{}页:[{}-{}-{}/{},] ,选科数据记录:[{}].用时:{},{}", provinceName,year,pageIndex,code, name,(i+1),body.getStr("pageSize"), cnt, time,codes);
+//                    ThreadUtil.safeSleep(2000);
                 }
                 pageIndex += 1;
             } catch (Exception e) {
                 e.printStackTrace();
             }
         }
-        logger.info("处理省份:[{}] ,选科数据用时:{}", provinceCode, DateUtil.formatBetween(System.currentTimeMillis() - start));
+        logger.info("处理省份:[{}-{}]完成,选科数据用时:{}", provinceCode,provinceName, DateUtil.formatBetween(System.currentTimeMillis() - start));
     }
 
     private int getByCollege(String collegeCode, String provinceCode, String year) {
@@ -107,10 +145,12 @@ public class ChooseSubjectService extends AbstractService {
             }
         }
         try {
-            DbUtil.use().del(Entity.create(tableName).set("collegeCode", collegeCode).set("provinceCode", provinceCode).set("year", year));
-            if (CollectionUtil.isNotEmpty(records)) {
-                DbUtil.use().insert(records);
-            }
+//            DbUtil.use().del(Entity.create(tableName).set("collegeCode", collegeCode).set("provinceCode", provinceCode).set("year", year));
+//            if (CollectionUtil.isNotEmpty(records)) {
+//                DbUtil.use().insert(records);
+//            }
+
+            DbUtil.use().insert(records);
         } catch (Exception e) {
             e.printStackTrace();
         }

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 0 - 0
src/test/java/spider/CollegeData.java


이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.