Browse Source

Merge branch 'master' of http://121.4.203.192:9000/root/spider

* 'master' of http://121.4.203.192:9000/root/spider:
  优志愿数据抓取
admin 2 năm trước cách đây
mục cha
commit
61a6c94cc9

+ 1 - 2
src/main/java/com/mingxue/spider/utils/ApiUtil.java

@@ -2,14 +2,13 @@ package com.mingxue.spider.utils;
 
 import java.io.IOException;
 
-import com.mingxue.spider.helper.StringUtils;
-import com.sun.org.apache.xpath.internal.operations.Bool;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import com.alibaba.fastjson.JSON;
 import com.alibaba.fastjson.JSONObject;
 import com.mingxue.spider.dto.User;
+import com.mingxue.spider.helper.StringUtils;
 
 import okhttp3.Call;
 import okhttp3.Callback;

+ 25 - 6
src/main/java/com/mingxue/spider/youzy/AbstractService.java

@@ -19,9 +19,10 @@ import cn.hutool.core.lang.Dict;
 import cn.hutool.core.util.StrUtil;
 
 public class AbstractService {
-    protected Logger logger=LoggerFactory.getLogger(getClass());
+    protected Logger logger = LoggerFactory.getLogger(getClass());
     private static String host = "https://uwf7de983aad7a717eb.youzy.cn";
     private static String accessToken;
+    private static Integer provinceId;
 
     protected String getAccessToken() {
         return accessToken;
@@ -31,8 +32,12 @@ public class AbstractService {
         AbstractService.accessToken = accessToken;
     }
 
-    protected Object postBody(String api) {
-        return postBody(api, null);
+    public static Integer getProvinceId() {
+        return provinceId;
+    }
+
+    public static void setProvinceId(Integer provinceId) {
+        AbstractService.provinceId = provinceId;
     }
 
     protected Map<String, String> buildProvinceMap() {
@@ -49,9 +54,23 @@ public class AbstractService {
         return map;
     }
 
-    protected Object postBody(String api, Dict body) {
-        String requestPath = UsignUtils.getRequestPathAndParam(api, body);
-        String jsonBody = JSON.toJSONString(body);
+    protected JSONObject httpSyncGet(String api) {
+        String url = host + api;
+        String res = HttpUtils.postBody(url, null, null, accessToken);
+        JSONObject json = JSONObject.parseObject(res);
+        if (json.getBooleanValue("isSuccess")) {
+            return json.getJSONObject("result");
+        }
+        return json;
+    }
+
+    protected Object postBody(String api) {
+        return postBody(api, null, null);
+    }
+
+    protected Object postBody(String api, Dict params, Dict body) {
+        String requestPath = UsignUtils.getRequestPathAndParam(api, params);
+        String jsonBody = null == body ? "{}" : JSON.toJSONString(body);
         String usign = StringUtils.EMPTY;
         if (StringUtils.isBlank(jsonBody)) {
             usign = UsignUtils.getUsign(requestPath);

+ 120 - 0
src/main/java/com/mingxue/spider/youzy/ChooseSubjectService.java

@@ -0,0 +1,120 @@
+package com.mingxue.spider.youzy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+
+import cn.hutool.core.collection.CollectionUtil;
+import cn.hutool.core.date.DateUtil;
+import cn.hutool.core.lang.Dict;
+import cn.hutool.core.thread.ThreadUtil;
+import cn.hutool.db.DbUtil;
+import cn.hutool.db.Entity;
+
+public class ChooseSubjectService extends AbstractService {
+
+    private String tableName = "yzy_choosesubject_2024_3";
+
+    public void getAll() {
+        String api = "/youzy.dms.datalib.api.choosesubject.config.brief.get";
+        JSONArray arr = (JSONArray)postBody(api, Dict.create().set("eduLevel", "ben"), null);
+        for (int i = 0; i < arr.size(); i++) {
+            JSONObject row = arr.getJSONObject(i);
+            String provinceCode = row.getString("provinceCode");
+            String year = row.getJSONArray("years").getString(0);
+            getByProvinceAndYear(provinceCode, year);
+        }
+    }
+
+    public void getByProvinceAndYear(String provinceCode, String year) {
+        long start = System.currentTimeMillis();
+        String api = "/youzy.dms.datalib.api.choosesubject.college.query";
+        Integer pageIndex = 1;
+        while (true) {
+            try {
+                // {"eduLevel":"ben","pageIndex":2,"pageSize":20,"collegeKeywords":[],"provinceCodes":[],
+                // "collegeCodes":[],"features":[],"natures":[],"categories":[],"provinceCode":34,"year":"2024"}
+                Dict body = Dict.create().set("eduLevel", "ben");
+                body.set("pageIndex", pageIndex).set("pageSize", 20);
+                body.set("collegeKeywords", Collections.EMPTY_LIST).set("provinceCodes", Collections.EMPTY_LIST);
+                body.set("collegeCodes", Collections.EMPTY_LIST).set("features", Collections.EMPTY_LIST);
+                body.set("natures", Collections.EMPTY_LIST).set("categories", Collections.EMPTY_LIST);
+                body.set("provinceCode", provinceCode).set("year", year);
+                JSONObject res = (JSONObject)postBody(api, null, body);
+                JSONArray arr = res.getJSONArray("items");
+                if (null == arr || arr.size() == 0) {
+                    break;
+                }
+                for (int i = 0; i < arr.size(); i++) {
+                    long _start = System.currentTimeMillis();
+                    JSONObject row = arr.getJSONObject(i);
+                    String code = row.getString("collegeCode");
+                    String name = row.getString("collegeName");
+                    int cnt = getByCollege(code, provinceCode, year);
+                    String time = DateUtil.formatBetween(System.currentTimeMillis() - _start);
+                    logger.info("处理学校:[{}-{}] ,选科数据记录:[{}].用时:{}", code, name, cnt, time);
+
+                }
+                pageIndex += 1;
+            } catch (Exception e) {
+                e.printStackTrace();
+            }
+        }
+        logger.info("处理省份:[{}] ,选科数据用时:{}", provinceCode, DateUtil.formatBetween(System.currentTimeMillis() - start));
+    }
+
+    private int getByCollege(String collegeCode, String provinceCode, String year) {
+        String api = "/youzy.dms.datalib.api.choosesubject.combine.query";
+        Integer total = 1000;
+        Integer pageIndex = 1;
+        Integer count = 0;
+        List<Entity> records = new ArrayList<>();
+        while (true) {
+            List<Entity> temp = new ArrayList<>();
+            try {
+                // {"provinceCode":"42","year":"2024","eduLevel":"ben",
+                // "chooseSubjects":["物理","政治","地理"],"majorCodes":[],
+                // "majorKeywords":[],"collegeCodes":["10185"],"collegeKeywords":[],
+                // "isMatch":-1,"pageIndex":1,"pageSize":50}
+                Dict body = Dict.create().set("provinceCode", provinceCode).set("year", year);
+                body.set("eduLevel", "ben").set("chooseSubjects", Arrays.asList("物理", "政治", "地理"));
+                body.set("majorCodes", Collections.EMPTY_LIST).set("majorKeywords", Collections.EMPTY_LIST);
+                body.set("collegeCodes", Arrays.asList(collegeCode)).set("collegeKeywords", Collections.EMPTY_LIST);
+                body.set("isMatch", -1).set("pageIndex", pageIndex).set("pageSize", 50);
+                JSONObject data = (JSONObject)postBody(api, null, body);
+                total = data.getInteger("totalCount");
+                JSONArray arr = data.getJSONArray("items");
+                count += arr.size();
+                for (int i = 0; i < arr.size(); i++) {
+                    Entity item = Entity.create(tableName).set("collegeCode", collegeCode);
+                    item.putAll(arr.getJSONObject(i));
+                    temp.add(item);
+                }
+                if (CollectionUtil.isNotEmpty(temp)) {
+                    records.addAll(temp);
+                }
+                if (count >= total || arr.size() == 0) {
+                    break;
+                }
+                pageIndex += 1;
+            } catch (Exception e) {
+                e.printStackTrace();
+                ThreadUtil.safeSleep(500);
+            }
+        }
+        try {
+            DbUtil.use().del(Entity.create(tableName).set("collegeCode", collegeCode).set("provinceCode", provinceCode).set("year", year));
+            if (CollectionUtil.isNotEmpty(records)) {
+                DbUtil.use().insert(records);
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        return records.size();
+    }
+
+}

+ 1 - 1
src/main/java/com/mingxue/spider/youzy/PclService.java

@@ -35,7 +35,7 @@ public class PclService extends AbstractService {
     private void getByProvince(String code, String name, Integer yyyy) {
         long start = System.currentTimeMillis();
         Dict body = Dict.create().set("provinceCode", code).set("year", yyyy);
-        Object res = postBody(api, body);
+        Object res = postBody(api,null, body);
         JSONArray arr = (JSONArray)res;
         for (int i = 0; i < arr.size(); i++) {
             JSONObject item = arr.getJSONObject(i);

+ 1 - 1
src/main/java/com/mingxue/spider/youzy/ScoreService.java

@@ -39,7 +39,7 @@ public class ScoreService extends AbstractService {
             for (String batch : batchs) {
                 Dict body = Dict.create().set("collegeCode", collegeCode).set("year", year);
                 body.set("provinceCode", provinceCode).set("course", course).set("batch", batch);
-                JSONObject res = (JSONObject)postBody(api, body);
+                JSONObject res = (JSONObject)postBody(api, null, body);
                 JSONArray rows = res.getJSONArray("uCodes");
                 if (null == rows || rows.size() == 0) {
                     continue;

+ 2 - 1
src/main/java/com/mingxue/spider/youzy/UserService.java

@@ -15,11 +15,12 @@ public class UserService extends AbstractService {
         String api = "/tocusers/youzy.toc.user.login.validate";
         Dict body = Dict.create();
         body.set("username", mobile).set("password", password);
-        JSONObject res = (JSONObject)postBody(api, body);
+        JSONObject res = (JSONObject)postBody(api,null, body);
         if (res.containsKey("validateUserOutput")) {
             JSONObject user = res.getJSONObject("validateUserOutput");
             JSONObject token = user.getJSONObject("userToken");
             setAccessToken(token.getString("accessToken"));
+            //setProvinceId(user);
         } else {
             System.out.println(res);
         }

+ 78 - 0
src/main/java/com/mingxue/spider/youzy/XuankeService.java

@@ -0,0 +1,78 @@
+package com.mingxue.spider.youzy;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import com.alibaba.fastjson.JSONArray;
+import com.alibaba.fastjson.JSONObject;
+
+import cn.hutool.core.date.DateUtil;
+import cn.hutool.db.DbUtil;
+import cn.hutool.db.Entity;
+
+public class XuankeService extends AbstractService {
+    String tableName = "major";
+    public void getAll() {
+        long start = System.currentTimeMillis();
+        try {
+            List<Entity> coll = DbUtil.use().query("select id from college");
+            for (Entity entity : coll) {
+                getByCollege(entity.getInt("id"));
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        logger.info("获取{}选科数据用时:{}", getProvinceId(), DateUtil.formatBetween(System.currentTimeMillis() - start));
+    }
+
+    public void getByCollege(Integer collegeId) {
+        List<Integer> years = new ArrayList<Integer>(2);
+        Integer provinceId = getProvinceId();
+        try {
+            JSONObject res = httpSyncGet("/years?provinceId=" + provinceId + "&queryType=4");
+            JSONArray rows = res.getJSONArray("data");
+            for (int i = 0; i < rows.size(); i++) {
+                years.add(rows.getJSONObject(i).getInteger("year"));
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+        List<Entity> records = new ArrayList<>(12);
+        for (Integer year : years) {
+            int count = 0, total = 100, page = 1;
+            do {
+                try {
+                    StringBuilder sb = new StringBuilder();
+                    sb.append("/xuanke/list/byCollegeId");
+                    sb.append("?provinceId=").append(provinceId);
+                    sb.append("&collegeId=").append(collegeId);
+                    sb.append("&year=").append(year);
+                    sb.append("&page=").append(page).append("&pageSize=30");
+                    JSONObject res = httpSyncGet(sb.toString());
+                    JSONArray rows = res.getJSONArray("data");
+                    for (int i = 0; i < rows.size(); i++) {
+                        JSONObject row = rows.getJSONObject(i);
+                        row.remove("Row");
+                        Entity record = Entity.create(tableName);
+                        record.putAll(row);
+                        record.set("_provinceId", provinceId).set("_collegeId", collegeId).set("_year", year);
+                        records.add(record);
+                    }
+                    total = res.getInteger("total");
+                    count = page * 30;
+                    page += 1;
+                } catch (Exception e) {
+                    e.printStackTrace();
+                }
+
+            } while (count <= total);
+        }
+        try {
+            DbUtil.use().del(Entity.create(tableName).set("CollegeID", collegeId));
+            DbUtil.use().insert(records);
+            // DbUtil.use()
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+}

+ 1 - 1
src/main/java/com/mingxue/spider/youzy/YfydService.java

@@ -64,7 +64,7 @@ public class YfydService extends AbstractService {
                 // {"provinceCode":43,"score":"150","year":2022,"course":"历史","bzType":"本专"}
                 Dict body = Dict.create().set("provinceCode", code).set("year", year);
                 body.set("score", 0).set("course", course).set("bzType", type);
-                JSONObject res = (JSONObject)postBody(api, body);
+                JSONObject res = (JSONObject)postBody(api, null, body);
                 JSONArray arr = res.getJSONArray("yfyds");
                 for (int i = 0; i < arr.size(); i++) {
                     JSONObject item = arr.getJSONObject(i);

+ 0 - 13
src/main/java/com/mingxue/spider/youzy/YouZyServer.java

@@ -1,13 +0,0 @@
-package com.mingxue.spider.youzy;
-
-public class YouZyServer {
-
-    private static UserService userService = new UserService();
-
-    private static ScoreService scoreService = new ScoreService();
-
-    public static void main(String[] args) {
-        userService.validate("17307497333", "Xj123456");
-        scoreService.plan(43, "10013", 2021);
-    }
-}

+ 28 - 0
src/main/java/com/mingxue/spider/youzy/YouZyTask.java

@@ -0,0 +1,28 @@
+package com.mingxue.spider.youzy;
+
+public class YouZyTask {
+
+    private static UserService userService = new UserService();
+
+    // private static ScoreService scoreService = new ScoreService();
+
+    // private static XuankeService xuankeService = new XuankeService();
+
+    private static ChooseSubjectService css = new ChooseSubjectService();
+
+    public static void main(String[] args) {
+        // userService.validate("18173350065", "17006428");
+        // 优志愿:17307497333,密码:Xj123456
+        userService.validate("17307497333", "Xj123456");
+        css.getAll();
+        // 湖北 42
+        // css.getByProvinceAndYear("42", "2024");
+        // 广西 45
+        // css.getByProvinceAndYear("45", "2024");
+        // 河北 13
+        // css.getByProvinceAndYear("13", "2024");
+        // 安徽 34
+        // css.getByProvinceAndYear("34", "2024");
+        System.out.println("success");
+    }
+}