|
@@ -0,0 +1,120 @@
|
|
|
|
+package com.mingxue.spider.youzy;
|
|
|
|
+
|
|
|
|
+import java.util.ArrayList;
|
|
|
|
+import java.util.Arrays;
|
|
|
|
+import java.util.Collections;
|
|
|
|
+import java.util.List;
|
|
|
|
+
|
|
|
|
+import com.alibaba.fastjson.JSONArray;
|
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
|
+
|
|
|
|
+import cn.hutool.core.collection.CollectionUtil;
|
|
|
|
+import cn.hutool.core.date.DateUtil;
|
|
|
|
+import cn.hutool.core.lang.Dict;
|
|
|
|
+import cn.hutool.core.thread.ThreadUtil;
|
|
|
|
+import cn.hutool.db.DbUtil;
|
|
|
|
+import cn.hutool.db.Entity;
|
|
|
|
+
|
|
|
|
+public class ChooseSubjectService extends AbstractService {
|
|
|
|
+
|
|
|
|
+ private String tableName = "yzy_choosesubject_2024_3";
|
|
|
|
+
|
|
|
|
+ public void getAll() {
|
|
|
|
+ String api = "/youzy.dms.datalib.api.choosesubject.config.brief.get";
|
|
|
|
+ JSONArray arr = (JSONArray)postBody(api, Dict.create().set("eduLevel", "ben"), null);
|
|
|
|
+ for (int i = 0; i < arr.size(); i++) {
|
|
|
|
+ JSONObject row = arr.getJSONObject(i);
|
|
|
|
+ String provinceCode = row.getString("provinceCode");
|
|
|
|
+ String year = row.getJSONArray("years").getString(0);
|
|
|
|
+ getByProvinceAndYear(provinceCode, year);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ public void getByProvinceAndYear(String provinceCode, String year) {
|
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
|
+ String api = "/youzy.dms.datalib.api.choosesubject.college.query";
|
|
|
|
+ Integer pageIndex = 1;
|
|
|
|
+ while (true) {
|
|
|
|
+ try {
|
|
|
|
+ // {"eduLevel":"ben","pageIndex":2,"pageSize":20,"collegeKeywords":[],"provinceCodes":[],
|
|
|
|
+ // "collegeCodes":[],"features":[],"natures":[],"categories":[],"provinceCode":34,"year":"2024"}
|
|
|
|
+ Dict body = Dict.create().set("eduLevel", "ben");
|
|
|
|
+ body.set("pageIndex", pageIndex).set("pageSize", 20);
|
|
|
|
+ body.set("collegeKeywords", Collections.EMPTY_LIST).set("provinceCodes", Collections.EMPTY_LIST);
|
|
|
|
+ body.set("collegeCodes", Collections.EMPTY_LIST).set("features", Collections.EMPTY_LIST);
|
|
|
|
+ body.set("natures", Collections.EMPTY_LIST).set("categories", Collections.EMPTY_LIST);
|
|
|
|
+ body.set("provinceCode", provinceCode).set("year", year);
|
|
|
|
+ JSONObject res = (JSONObject)postBody(api, null, body);
|
|
|
|
+ JSONArray arr = res.getJSONArray("items");
|
|
|
|
+ if (null == arr || arr.size() == 0) {
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ for (int i = 0; i < arr.size(); i++) {
|
|
|
|
+ long _start = System.currentTimeMillis();
|
|
|
|
+ JSONObject row = arr.getJSONObject(i);
|
|
|
|
+ String code = row.getString("collegeCode");
|
|
|
|
+ String name = row.getString("collegeName");
|
|
|
|
+ int cnt = getByCollege(code, provinceCode, year);
|
|
|
|
+ String time = DateUtil.formatBetween(System.currentTimeMillis() - _start);
|
|
|
|
+ logger.info("处理学校:[{}-{}] ,选科数据记录:[{}].用时:{}", code, name, cnt, time);
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+ pageIndex += 1;
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ logger.info("处理省份:[{}] ,选科数据用时:{}", provinceCode, DateUtil.formatBetween(System.currentTimeMillis() - start));
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ private int getByCollege(String collegeCode, String provinceCode, String year) {
|
|
|
|
+ String api = "/youzy.dms.datalib.api.choosesubject.combine.query";
|
|
|
|
+ Integer total = 1000;
|
|
|
|
+ Integer pageIndex = 1;
|
|
|
|
+ Integer count = 0;
|
|
|
|
+ List<Entity> records = new ArrayList<>();
|
|
|
|
+ while (true) {
|
|
|
|
+ List<Entity> temp = new ArrayList<>();
|
|
|
|
+ try {
|
|
|
|
+ // {"provinceCode":"42","year":"2024","eduLevel":"ben",
|
|
|
|
+ // "chooseSubjects":["物理","政治","地理"],"majorCodes":[],
|
|
|
|
+ // "majorKeywords":[],"collegeCodes":["10185"],"collegeKeywords":[],
|
|
|
|
+ // "isMatch":-1,"pageIndex":1,"pageSize":50}
|
|
|
|
+ Dict body = Dict.create().set("provinceCode", provinceCode).set("year", year);
|
|
|
|
+ body.set("eduLevel", "ben").set("chooseSubjects", Arrays.asList("物理", "政治", "地理"));
|
|
|
|
+ body.set("majorCodes", Collections.EMPTY_LIST).set("majorKeywords", Collections.EMPTY_LIST);
|
|
|
|
+ body.set("collegeCodes", Arrays.asList(collegeCode)).set("collegeKeywords", Collections.EMPTY_LIST);
|
|
|
|
+ body.set("isMatch", -1).set("pageIndex", pageIndex).set("pageSize", 50);
|
|
|
|
+ JSONObject data = (JSONObject)postBody(api, null, body);
|
|
|
|
+ total = data.getInteger("totalCount");
|
|
|
|
+ JSONArray arr = data.getJSONArray("items");
|
|
|
|
+ count += arr.size();
|
|
|
|
+ for (int i = 0; i < arr.size(); i++) {
|
|
|
|
+ Entity item = Entity.create(tableName).set("collegeCode", collegeCode);
|
|
|
|
+ item.putAll(arr.getJSONObject(i));
|
|
|
|
+ temp.add(item);
|
|
|
|
+ }
|
|
|
|
+ if (CollectionUtil.isNotEmpty(temp)) {
|
|
|
|
+ records.addAll(temp);
|
|
|
|
+ }
|
|
|
|
+ if (count >= total || arr.size() == 0) {
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ pageIndex += 1;
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ ThreadUtil.safeSleep(500);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ try {
|
|
|
|
+ DbUtil.use().del(Entity.create(tableName).set("collegeCode", collegeCode).set("provinceCode", provinceCode).set("year", year));
|
|
|
|
+ if (CollectionUtil.isNotEmpty(records)) {
|
|
|
|
+ DbUtil.use().insert(records);
|
|
|
|
+ }
|
|
|
|
+ } catch (Exception e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ return records.size();
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+}
|