Browse Source

用户数据

jinxia.mo 3 years ago
parent
commit
70af1fbc43

+ 6 - 0
pom.xml

@@ -60,5 +60,11 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.projectlombok</groupId>
+      <artifactId>lombok</artifactId>
+      <version>1.18.12</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 </project>

+ 18 - 4
src/main/java/com/mingxue/spider/utils/ApiUtil.java

@@ -3,6 +3,7 @@ package com.mingxue.spider.utils;
 import java.io.IOException;
 
 import com.mingxue.spider.helper.StringUtils;
+import com.sun.org.apache.xpath.internal.operations.Bool;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -80,8 +81,9 @@ public class ApiUtil {
      * 第一高考网 http://www.diyigaokao.com/
      * @param user
      */
-    public void login(User user) {
+    public Boolean login(User user) {
         try {
+            log.error("user is start login: {}",JSONObject.toJSONString(user));
             String url= StringUtils.EMPTY;
             String host= StringUtils.EMPTY;
             if(user.getIsHn()){
@@ -106,11 +108,14 @@ public class ApiUtil {
                 String token = data.getString("accessToken");
                 requestBuilder.addHeader("accessToken", token);
                 this.user.setProvinceId(provinceId).setToken(token);
+                return true;
             } else {
                 System.out.println(res);
+                return false;
             }
         } catch (Exception e) {
             e.printStackTrace();
+            return false;
         }
     }
 
@@ -119,11 +124,20 @@ public class ApiUtil {
      * @return
      */
     public void login() {
-        login(UserUtil.next());
+        while (true){
+            Boolean isSuccess = login(UserUtil.next());
+            if(isSuccess){
+                //登录成功后跳转
+                break;
+            }else {
+                //登录失败后重复调用
+                login();
+            }
+        }
     }
 
-    public void login(String code, String password,Boolean isHn) {
-        login(new User().setCode(code).setPassword(password).setIsHn(isHn));
+    public Boolean login(String code, String password, Boolean isHn) {
+        return login(new User().setCode(code).setPassword(password).setIsHn(isHn));
     }
 
     /**

+ 9 - 1
src/main/java/com/mingxue/spider/utils/UserUtil.java

@@ -8,12 +8,17 @@ import com.mingxue.spider.dto.User;
 
 import cn.hutool.core.text.csv.CsvData;
 import cn.hutool.core.text.csv.CsvUtil;
+import org.springframework.util.CollectionUtils;
 
 public class UserUtil {
 
     private static List<User> list = new ArrayList<>(12);
     private static User current;
     static {
+        initUserInfo();
+    }
+
+    private static void initUserInfo(){
         File file = new File("./users.txt");
         if (!file.exists()) {
             file = new File(UserUtil.class.getResource("/users.txt").getFile());
@@ -24,7 +29,6 @@ public class UserUtil {
                 list.add(new User().setCode(row.get(0)).setPassword(row.get(1)));
             });
         }
-
     }
 
     /**
@@ -32,6 +36,10 @@ public class UserUtil {
      * @return
      */
     public static User next() {
+        if(CollectionUtils.isEmpty(list)){
+            //如果用户信息被使用完了,重新开始
+            initUserInfo();
+        }
         User user = list.remove(0);
         user.setIsHn(true);
         current = user;

+ 34 - 3
src/main/resources/users.txt

@@ -1,5 +1,36 @@
-002180054495,46805392
-002180054494,17988675
+002180054390,10253537
+002180054388,59296188
+002180054182,86934322
+002180054185,55900344
+002180054347,85968924
+002180054186,17464580
+002180054188,86684308
+002180054189,91506868
+002180054192,11088908
+002180054190,94862151
+002180054169,19027633
+002180054174,25057765
+002180054175,35491736
+002180054176,81373283
+002180054177,17193030
+002180054178,23896638
+002280024643,96933881
+002280024703,39408374
+002180054396,89362505
+002180054393,55512404
 002180054496,85603400
+002280024690,42967884
+002180056105,68365038
+002180054494,17988675
+002280024626,24305928
+002180054495,46805392
+002180054497,86080741
 002180054499,41507645
-002180054497,86080741
+002180054344,45023479
+002180054341,61258631
+002180054350,97237331
+002180054349,15936262
+002180054382,82653711
+002180054392,59831255
+002180054391,67577625
+

+ 25 - 0
src/main/resources/users2.txt

@@ -0,0 +1,25 @@
+002180056093,14925938
+002180056092,52692881
+002180056091,71686142
+002180056090,60092632
+002180056089,73756298
+002180056088,38045573
+002180056087,23884334
+002180056086,50726366
+002180056085,10410049
+002180056084,85373334
+002180056083,28070526
+002180056082,19134175
+002180056081,35737009
+002180056080,36807937
+002180056079,72463216
+002180056078,79020302
+002180056077,43400238
+002180056075,27805879
+002180056076,22969816
+002180056074,30145430
+002180056073,71774822
+002180056072,82429152
+002180056071,65012867
+002180056070,68442838
+002180056068,10650817

+ 101 - 6
src/test/java/spider/CollegeData.java

@@ -5,6 +5,7 @@ import java.io.IOException;
 import java.sql.SQLException;
 import java.util.*;
 
+import com.mingxue.spider.helper.StringUtils;
 import org.junit.Test;
 
 import com.alibaba.fastjson.JSONArray;
@@ -17,11 +18,16 @@ import cn.hutool.core.thread.ThreadUtil;
 import cn.hutool.core.util.CharsetUtil;
 import cn.hutool.db.DbUtil;
 import cn.hutool.db.Entity;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.util.CollectionUtils;
 
 public class CollegeData {
     private static String host = "https://apiv4.diyigaokao.com";
 
     private List<String> errors = new ArrayList<>();
+    protected final Logger log = LoggerFactory.getLogger(this.getClass());
+    Boolean isBatchInsert = false;
 
     @Test
     public void test() throws Exception {
@@ -31,7 +37,7 @@ public class CollegeData {
 //        getCategoryMajor();
 //        generateNineDate();
 //         广东账号:18774924158,密码:123456
-//         ApiUtil.getInstance().login("18774924158", "123456",false);
+         ApiUtil.getInstance().login("18774924158", "123456",false);
         // 湖北账号:18674898114, 密码:123456
         // ApiUtil.getInstance().login("18674898114", "123456",false);
         // 江西账号:13203226079,密码:123456
@@ -41,16 +47,20 @@ public class CollegeData {
     }
 
     public void collegeList() throws SQLException {
-        int count = 0, total = 5000, page = 1;
+        int count = 0, total = 5000, page = 74;
+        JSONObject res=new JSONObject();
         do {
             try {
                 StringBuilder sb = new StringBuilder(host);
                 sb.append("/college/list/byMultiple");
                 sb.append("?provinceIds=&yxjbz=&levels=&collegeType=&hotTags=");
                 sb.append("&pageIndex=").append(page).append("&pageSize=15");
-                JSONObject res = ApiUtil.getInstance().httpSyncGet(sb.toString());
+                ThreadUtil.safeSleep(2 * 1000);
+
+                res = ApiUtil.getInstance().httpSyncGet(sb.toString());
                 total = res.getInteger("total");
                 JSONArray data = res.getJSONArray("data");
+                int processColledgeCount=0;
                 for (int i = 0; i < data.size(); i++) {
                     long start = System.currentTimeMillis();
                     JSONObject row = data.getJSONObject(i);
@@ -61,15 +71,43 @@ public class CollegeData {
                         ThreadUtil.safeSleep(1 * 1000);
                     }
                     //选科数据
-                    xuanke(collegeId);
+//                    xuanke(collegeId);
+                    List<Integer> colledgeIds = Arrays.asList(
+                            1,3,2,24,8,13,6,7,4,10,5,18,20,32,27,46,58,30,47,22,94,95,176,274,
+                            271,403,399,427,512,521,571,573,580,652,654,658,653,655,664,667,675,
+                            656,718,720,724,728,722,721,736,741,719,729,731,725,765,871,900,873,
+                            879,972,971,973,1083,1085,1166,1257,1258,1260,1406,1525,1526,1533,
+                            1530,1535,1537,1544,2484,2485,2451,2360,2363,2361,2330,2364,2341,
+                            2141,2145,2082,2084,2083,2065,1978,1945,1946,1855,1854,1856,1858,
+                            1866,1753,153,1091,717,2359,3123,1868);
+                    if(colledgeIds.contains(collegeId)){
+                        continue;
+                    }
+                    if(processColledgeCount>5){
+                        //超过5个学校换个账号
+                        ApiUtil.getInstance().login();
+                        processColledgeCount=0;
+                    }
+                    //院校录取数据
+                    List<Integer> liberalScienceList= Arrays.asList(1,2);
+                    List<Integer> typeList= Arrays.asList(2,3);
+                    for(Integer liberalScience:liberalScienceList){
+                        for(Integer type:typeList){
+                            byHistoryNew(collegeId,liberalScience,type);
+                        }
+                    }
+                    processColledgeCount++;
+
                     ThreadUtil.safeSleep(1 * 1000);
-                    System.out.println(DateUtil.formatBetween(System.currentTimeMillis() - start));
+                    log.error("学校 {}, {}, 耗时{}",collegeId,row.getString("collegeName"),DateUtil.formatBetween(System.currentTimeMillis() - start));
                 }
-                System.out.println("已完成:" + page);
+                log.error("已完成page={}, 开始第{}页" ,page,(page+1));
                 count = page * 15;
                 page += 1;
             } catch (IOException e) {
                 e.printStackTrace();
+                log.error("error res is {}",res);
+                return;
             }
         } while (count <= total);
         File dir = new File("E:/projects/wangmin/evaluation/code/spider/");
@@ -77,6 +115,63 @@ public class CollegeData {
         System.out.println("执行完成!");
     }
 
+    /*
+     * 院校录取数据
+     * Request URL: https://apiv4.diyigaokao.com/query/collegeScore/byHistoryNew?liberalScience=1&collegeId=1&type=2
+     * collegeId 院校ID true(必传) integer
+     * liberalScience 科类: 1理科(物理),0文科(历史), 2文理不分;  true integer
+     * accessToken accessToken令牌 header false string
+     * phase 批次 false integer
+     * type 类型,普通省份不传或传1;312院校专业组省份(江苏、湖南、湖北、广东、福建):2021年以后数据传2,2020年以前数据传3 query false integer
+     */
+    private void byHistoryNew(Integer collegeId,Integer liberalScience,Integer type) throws IOException {
+        ThreadUtil.safeSleep(2 * 1000);
+
+        StringBuilder sb = new StringBuilder(host);
+        sb.append("/query/collegeScore/byHistoryNew");
+        sb.append("?liberalScience=").append(liberalScience);
+        sb.append("&collegeId=").append(collegeId);
+        sb.append("&type=").append(type);
+        JSONObject res = ApiUtil.getInstance().httpSyncGet(sb.toString());
+//        System.out.println("data is "+res);
+        if(res.getInteger("error")==0){
+            //data
+            JSONObject dataObject = res.getJSONObject("data");
+            JSONArray tdDatas = dataObject.getJSONArray("tdData");
+            Set<Entity> subjectList=new HashSet();
+            for (int i = 0; i < tdDatas.size(); i++) {
+                //sy_colledge_enroll_data
+                JSONObject row = tdDatas.getJSONObject(i);
+                Entity subjectTable4 = Entity.create("sy_colledge_enroll_data");
+                subjectTable4.putAll(row);
+                subjectList.add(subjectTable4);
+                if(!isBatchInsert){
+                    try {
+                        DbUtil.use().insertOrUpdate(subjectTable4, "ID");
+                    } catch (SQLException e) {
+                        e.printStackTrace();
+                    }
+                }
+            }
+            if(isBatchInsert){
+                try {
+                    if(!CollectionUtils.isEmpty(subjectList)){
+                        DbUtil.use().insert(subjectList);
+                    }
+                } catch (SQLException e) {
+                    e.printStackTrace();
+                }
+            }
+        }else {
+            log.error("byHistoryNew res is {}, request is {}  ",res,sb.toString());
+        }
+    }
+
+    /**
+     * 学校详情
+     * @param id
+     * @param retry
+     */
     private void detail(Integer id, Integer retry) {
         int maxRetry = 2;
         JSONObject res = null;