|
@@ -5,6 +5,7 @@ import java.io.IOException;
|
|
|
import java.sql.SQLException;
|
|
|
import java.util.*;
|
|
|
|
|
|
+import com.mingxue.spider.helper.StringUtils;
|
|
|
import org.junit.Test;
|
|
|
|
|
|
import com.alibaba.fastjson.JSONArray;
|
|
@@ -17,11 +18,16 @@ import cn.hutool.core.thread.ThreadUtil;
|
|
|
import cn.hutool.core.util.CharsetUtil;
|
|
|
import cn.hutool.db.DbUtil;
|
|
|
import cn.hutool.db.Entity;
|
|
|
+import org.slf4j.Logger;
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
|
|
|
public class CollegeData {
|
|
|
private static String host = "https://apiv4.diyigaokao.com";
|
|
|
|
|
|
private List<String> errors = new ArrayList<>();
|
|
|
+ protected final Logger log = LoggerFactory.getLogger(this.getClass());
|
|
|
+ Boolean isBatchInsert = false;
|
|
|
|
|
|
@Test
|
|
|
public void test() throws Exception {
|
|
@@ -31,7 +37,7 @@ public class CollegeData {
|
|
|
// getCategoryMajor();
|
|
|
// generateNineDate();
|
|
|
// 广东账号:18774924158,密码:123456
|
|
|
-// ApiUtil.getInstance().login("18774924158", "123456",false);
|
|
|
+ ApiUtil.getInstance().login("18774924158", "123456",false);
|
|
|
// 湖北账号:18674898114, 密码:123456
|
|
|
// ApiUtil.getInstance().login("18674898114", "123456",false);
|
|
|
// 江西账号:13203226079,密码:123456
|
|
@@ -41,16 +47,20 @@ public class CollegeData {
|
|
|
}
|
|
|
|
|
|
public void collegeList() throws SQLException {
|
|
|
- int count = 0, total = 5000, page = 1;
|
|
|
+ int count = 0, total = 5000, page = 74;
|
|
|
+ JSONObject res=new JSONObject();
|
|
|
do {
|
|
|
try {
|
|
|
StringBuilder sb = new StringBuilder(host);
|
|
|
sb.append("/college/list/byMultiple");
|
|
|
sb.append("?provinceIds=&yxjbz=&levels=&collegeType=&hotTags=");
|
|
|
sb.append("&pageIndex=").append(page).append("&pageSize=15");
|
|
|
- JSONObject res = ApiUtil.getInstance().httpSyncGet(sb.toString());
|
|
|
+ ThreadUtil.safeSleep(2 * 1000);
|
|
|
+
|
|
|
+ res = ApiUtil.getInstance().httpSyncGet(sb.toString());
|
|
|
total = res.getInteger("total");
|
|
|
JSONArray data = res.getJSONArray("data");
|
|
|
+ int processColledgeCount=0;
|
|
|
for (int i = 0; i < data.size(); i++) {
|
|
|
long start = System.currentTimeMillis();
|
|
|
JSONObject row = data.getJSONObject(i);
|
|
@@ -61,15 +71,43 @@ public class CollegeData {
|
|
|
ThreadUtil.safeSleep(1 * 1000);
|
|
|
}
|
|
|
//选科数据
|
|
|
- xuanke(collegeId);
|
|
|
+// xuanke(collegeId);
|
|
|
+ List<Integer> colledgeIds = Arrays.asList(
|
|
|
+ 1,3,2,24,8,13,6,7,4,10,5,18,20,32,27,46,58,30,47,22,94,95,176,274,
|
|
|
+ 271,403,399,427,512,521,571,573,580,652,654,658,653,655,664,667,675,
|
|
|
+ 656,718,720,724,728,722,721,736,741,719,729,731,725,765,871,900,873,
|
|
|
+ 879,972,971,973,1083,1085,1166,1257,1258,1260,1406,1525,1526,1533,
|
|
|
+ 1530,1535,1537,1544,2484,2485,2451,2360,2363,2361,2330,2364,2341,
|
|
|
+ 2141,2145,2082,2084,2083,2065,1978,1945,1946,1855,1854,1856,1858,
|
|
|
+ 1866,1753,153,1091,717,2359,3123,1868);
|
|
|
+ if(colledgeIds.contains(collegeId)){
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if(processColledgeCount>5){
|
|
|
+ //超过5个学校换个账号
|
|
|
+ ApiUtil.getInstance().login();
|
|
|
+ processColledgeCount=0;
|
|
|
+ }
|
|
|
+ //院校录取数据
|
|
|
+ List<Integer> liberalScienceList= Arrays.asList(1,2);
|
|
|
+ List<Integer> typeList= Arrays.asList(2,3);
|
|
|
+ for(Integer liberalScience:liberalScienceList){
|
|
|
+ for(Integer type:typeList){
|
|
|
+ byHistoryNew(collegeId,liberalScience,type);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ processColledgeCount++;
|
|
|
+
|
|
|
ThreadUtil.safeSleep(1 * 1000);
|
|
|
- System.out.println(DateUtil.formatBetween(System.currentTimeMillis() - start));
|
|
|
+ log.error("学校 {}, {}, 耗时{}",collegeId,row.getString("collegeName"),DateUtil.formatBetween(System.currentTimeMillis() - start));
|
|
|
}
|
|
|
- System.out.println("已完成:" + page);
|
|
|
+ log.error("已完成page={}, 开始第{}页" ,page,(page+1));
|
|
|
count = page * 15;
|
|
|
page += 1;
|
|
|
} catch (IOException e) {
|
|
|
e.printStackTrace();
|
|
|
+ log.error("error res is {}",res);
|
|
|
+ return;
|
|
|
}
|
|
|
} while (count <= total);
|
|
|
File dir = new File("E:/projects/wangmin/evaluation/code/spider/");
|
|
@@ -77,6 +115,63 @@ public class CollegeData {
|
|
|
System.out.println("执行完成!");
|
|
|
}
|
|
|
|
|
|
+ /*
|
|
|
+ * 院校录取数据
|
|
|
+ * Request URL: https://apiv4.diyigaokao.com/query/collegeScore/byHistoryNew?liberalScience=1&collegeId=1&type=2
|
|
|
+ * collegeId 院校ID true(必传) integer
|
|
|
+ * liberalScience 科类: 1理科(物理),0文科(历史), 2文理不分; true integer
|
|
|
+ * accessToken accessToken令牌 header false string
|
|
|
+ * phase 批次 false integer
|
|
|
+ * type 类型,普通省份不传或传1;312院校专业组省份(江苏、湖南、湖北、广东、福建):2021年以后数据传2,2020年以前数据传3 query false integer
|
|
|
+ */
|
|
|
+ private void byHistoryNew(Integer collegeId,Integer liberalScience,Integer type) throws IOException {
|
|
|
+ ThreadUtil.safeSleep(2 * 1000);
|
|
|
+
|
|
|
+ StringBuilder sb = new StringBuilder(host);
|
|
|
+ sb.append("/query/collegeScore/byHistoryNew");
|
|
|
+ sb.append("?liberalScience=").append(liberalScience);
|
|
|
+ sb.append("&collegeId=").append(collegeId);
|
|
|
+ sb.append("&type=").append(type);
|
|
|
+ JSONObject res = ApiUtil.getInstance().httpSyncGet(sb.toString());
|
|
|
+// System.out.println("data is "+res);
|
|
|
+ if(res.getInteger("error")==0){
|
|
|
+ //data
|
|
|
+ JSONObject dataObject = res.getJSONObject("data");
|
|
|
+ JSONArray tdDatas = dataObject.getJSONArray("tdData");
|
|
|
+ Set<Entity> subjectList=new HashSet();
|
|
|
+ for (int i = 0; i < tdDatas.size(); i++) {
|
|
|
+ //sy_colledge_enroll_data
|
|
|
+ JSONObject row = tdDatas.getJSONObject(i);
|
|
|
+ Entity subjectTable4 = Entity.create("sy_colledge_enroll_data");
|
|
|
+ subjectTable4.putAll(row);
|
|
|
+ subjectList.add(subjectTable4);
|
|
|
+ if(!isBatchInsert){
|
|
|
+ try {
|
|
|
+ DbUtil.use().insertOrUpdate(subjectTable4, "ID");
|
|
|
+ } catch (SQLException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if(isBatchInsert){
|
|
|
+ try {
|
|
|
+ if(!CollectionUtils.isEmpty(subjectList)){
|
|
|
+ DbUtil.use().insert(subjectList);
|
|
|
+ }
|
|
|
+ } catch (SQLException e) {
|
|
|
+ e.printStackTrace();
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }else {
|
|
|
+ log.error("byHistoryNew res is {}, request is {} ",res,sb.toString());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 学校详情
|
|
|
+ * @param id
|
|
|
+ * @param retry
|
|
|
+ */
|
|
|
private void detail(Integer id, Integer retry) {
|
|
|
int maxRetry = 2;
|
|
|
JSONObject res = null;
|