|
@@ -12,6 +12,7 @@ import com.mingxue.spider.utils.UsignUtils;
|
|
import org.junit.Test;
|
|
import org.junit.Test;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
|
|
|
import java.sql.SQLException;
|
|
import java.sql.SQLException;
|
|
import java.util.*;
|
|
import java.util.*;
|
|
@@ -26,11 +27,12 @@ public class YouZy {
|
|
private static Logger log = LoggerFactory.getLogger(YouZy.class);
|
|
private static Logger log = LoggerFactory.getLogger(YouZy.class);
|
|
|
|
|
|
private static final String host = "https://uwf7de983aad7a717eb.youzy.cn";
|
|
private static final String host = "https://uwf7de983aad7a717eb.youzy.cn";
|
|
|
|
+ Boolean isBatchInsert = true;
|
|
|
|
|
|
@Test
|
|
@Test
|
|
public void test() throws Exception {
|
|
public void test() throws Exception {
|
|
//高校
|
|
//高校
|
|
- getUniversityList();
|
|
|
|
|
|
+// getUniversityList();
|
|
// getMajorData();
|
|
// getMajorData();
|
|
// processVocationalHotAndLack();
|
|
// processVocationalHotAndLack();
|
|
/**
|
|
/**
|
|
@@ -40,11 +42,13 @@ public class YouZy {
|
|
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
- * 职业库
|
|
|
|
|
|
+ * 专业库
|
|
* eduLevel:"ben","zhuan"
|
|
* eduLevel:"ben","zhuan"
|
|
*/
|
|
*/
|
|
|
|
|
|
-// List<String> majorList = getMajor(3, "ben,zhuan");
|
|
|
|
|
|
+ Integer majorLevel = 3;
|
|
|
|
+ List<String> majorList = getMajor(majorLevel, "ben,zhuan");
|
|
|
|
+ log.error("{}级专业编码为 {}",majorLevel,JSONObject.toJSONString(majorList));
|
|
// System.out.println(JSONObject.toJSONString(majorList));
|
|
// System.out.println(JSONObject.toJSONString(majorList));
|
|
|
|
|
|
// for (String majorCode : majorList) {
|
|
// for (String majorCode : majorList) {
|
|
@@ -56,12 +60,22 @@ public class YouZy {
|
|
// }
|
|
// }
|
|
// getCareerProspects(majorCode);
|
|
// getCareerProspects(majorCode);
|
|
// }
|
|
// }
|
|
|
|
+
|
|
|
|
+ //处理专业开设院校
|
|
|
|
+ for (String majorCode : majorList) {
|
|
|
|
+ if(Integer.parseInt(majorCode)<80202){
|
|
|
|
+ continue;
|
|
|
|
+ }
|
|
|
|
+ processUniversityByCode(majorCode,2022);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
public void getUniversityList() {
|
|
public void getUniversityList() {
|
|
// 31 43 52 67
|
|
// 31 43 52 67
|
|
- int count = 0, total = 5000, pageIndex = 96, pageSize=20;
|
|
|
|
|
|
+ int count = 0, total = 5000, pageIndex = 115, pageSize=20;
|
|
do {
|
|
do {
|
|
//学校列表
|
|
//学校列表
|
|
String requestPath = "/youzy.dms.basiclib.api.college.query";
|
|
String requestPath = "/youzy.dms.basiclib.api.college.query";
|
|
@@ -95,7 +109,7 @@ public class YouZy {
|
|
/**
|
|
/**
|
|
* 招生简单
|
|
* 招生简单
|
|
*/
|
|
*/
|
|
-// enrollDetail(universityCode);
|
|
|
|
|
|
+ enrollDetail(universityCode);
|
|
|
|
|
|
/**
|
|
/**
|
|
* 特色专业
|
|
* 特色专业
|
|
@@ -126,12 +140,12 @@ public class YouZy {
|
|
/**
|
|
/**
|
|
* 获取学校图片
|
|
* 获取学校图片
|
|
*/
|
|
*/
|
|
- getImgs(universityCode);
|
|
|
|
|
|
+// getImgs(universityCode);
|
|
|
|
|
|
/**
|
|
/**
|
|
* VR
|
|
* VR
|
|
*/
|
|
*/
|
|
- getVR(universityCode);
|
|
|
|
|
|
+// getVR(universityCode);
|
|
/**
|
|
/**
|
|
* 视频:需要将aliId存储到我们自己的vod中
|
|
* 视频:需要将aliId存储到我们自己的vod中
|
|
*/
|
|
*/
|
|
@@ -149,6 +163,81 @@ public class YouZy {
|
|
}while (count <= total);
|
|
}while (count <= total);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ //处理专业开设院校
|
|
|
|
+ private void processUniversityByCode(String code,Integer year){
|
|
|
|
+
|
|
|
|
+ int count = 0, total = 5000, pageIndex = 1, pageSize=20;
|
|
|
|
+ if(Integer.parseInt(code)==80202){
|
|
|
|
+ pageIndex= 10;
|
|
|
|
+ }
|
|
|
|
+ do {
|
|
|
|
+ //学校列表
|
|
|
|
+ String requestPath = "/youzy.dms.basiclib.api.eduranking.mr.search";
|
|
|
|
+ String jsonParam="{\"year\":\"2022\",\"majorCode\":\"%s\",\"keyword\":\"\",\"collegeProvinceCodes\":[],\"features\":[],\"collegeCategories\":[],\"natureTypes\":[],\"pageIndex\":%s,\"pageSize\":20}";
|
|
|
|
+ jsonParam = String.format(jsonParam,code,pageIndex);
|
|
|
|
+ Map paramMap =new HashMap<>();
|
|
|
|
+ String res = getRes(requestPath,paramMap,jsonParam);
|
|
|
|
+ if (StringUtils.isEmpty(res)) {
|
|
|
|
+ log.error("res is empty");
|
|
|
|
+ return;
|
|
|
|
+ } else {
|
|
|
|
+ JSONObject resultJsonobject = JSONObject.parseObject(res);
|
|
|
|
+ if (resultJsonobject.getBoolean("isSuccess")) {
|
|
|
|
+ JSONObject result = resultJsonobject.getJSONObject("result");
|
|
|
|
+ total= result.getInteger("totalCount");
|
|
|
|
+ long start = System.currentTimeMillis();
|
|
|
|
+ JSONArray items = result.getJSONArray("items");
|
|
|
|
+ Set<Entity> subjectList=new HashSet();
|
|
|
|
+
|
|
|
|
+ for (int i=0;i<items.size();i++) {
|
|
|
|
+ JSONObject rowUniversity = (JSONObject)items.get(i);
|
|
|
|
+ JSONObject college = rowUniversity.getJSONObject("college");
|
|
|
|
+ String universityCode = college.getString("code");
|
|
|
|
+ String universityName = college.getString("cnName");
|
|
|
|
+ rowUniversity.remove("college");
|
|
|
|
+ rowUniversity.remove("hits");
|
|
|
|
+ rowUniversity.remove("isUp");
|
|
|
|
+ rowUniversity.put("majorCode",code);
|
|
|
|
+ rowUniversity.put("year",year);
|
|
|
|
+ rowUniversity.put("universityCode",universityCode);
|
|
|
|
+ rowUniversity.put("universityName",universityName);
|
|
|
|
+
|
|
|
|
+ Date now =new Date();
|
|
|
|
+ rowUniversity.put("updateTime",now);
|
|
|
|
+
|
|
|
|
+ //存储学校
|
|
|
|
+ Entity universityTable = Entity.create("sy_major_university");
|
|
|
|
+ universityTable.putAll(rowUniversity);
|
|
|
|
+ subjectList.add(universityTable);
|
|
|
|
+ if(!isBatchInsert){
|
|
|
|
+ try {
|
|
|
|
+// DbUtil.use().insertOrUpdate(universityTable, "id");
|
|
|
|
+ DbUtil.use().insert(universityTable);
|
|
|
|
+ } catch (SQLException e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if(isBatchInsert){
|
|
|
|
+ try {
|
|
|
|
+ if(!CollectionUtils.isEmpty(subjectList)){
|
|
|
|
+ DbUtil.use().insert(subjectList);
|
|
|
|
+ }
|
|
|
|
+ } catch (SQLException e) {
|
|
|
|
+ e.printStackTrace();
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ log.error("完成专业编码 {}, 耗时{}",code, DateUtil.formatBetween(System.currentTimeMillis() - start));
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ log.error("专业编码 {}, 已完成page={}, 开始第{}页" ,code,pageIndex,(pageIndex+1));
|
|
|
|
+ count = pageIndex * pageSize;
|
|
|
|
+ pageIndex += 1;
|
|
|
|
+ }while (count <= total);
|
|
|
|
+
|
|
|
|
+ }
|
|
|
|
+
|
|
private void getVideo(String universityCode,String universityName){
|
|
private void getVideo(String universityCode,String universityName){
|
|
String requestPath = "/op/Classrooms/Packs/Query";
|
|
String requestPath = "/op/Classrooms/Packs/Query";
|
|
String jsonParam = "";
|
|
String jsonParam = "";
|
|
@@ -440,7 +529,7 @@ public class YouZy {
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
- * 招生简单
|
|
|
|
|
|
+ * 招生简章
|
|
* @param collegeCode
|
|
* @param collegeCode
|
|
*/
|
|
*/
|
|
private void enrollDetail(String collegeCode){
|
|
private void enrollDetail(String collegeCode){
|
|
@@ -463,7 +552,7 @@ public class YouZy {
|
|
JSONObject row = (JSONObject) items.get(i);
|
|
JSONObject row = (JSONObject) items.get(i);
|
|
Integer year = row.getInteger("year");
|
|
Integer year = row.getInteger("year");
|
|
//只取2021、2022的数据
|
|
//只取2021、2022的数据
|
|
- if(year<2021) continue;
|
|
|
|
|
|
+ if(year<=2021) continue;
|
|
String id = row.getString("id");
|
|
String id = row.getString("id");
|
|
|
|
|
|
//处理detail
|
|
//处理detail
|
|
@@ -493,7 +582,7 @@ public class YouZy {
|
|
|
|
|
|
Date now =new Date();
|
|
Date now =new Date();
|
|
row.put("createTime",createdAt);
|
|
row.put("createTime",createdAt);
|
|
- row.put("updateTime",now);
|
|
|
|
|
|
+ row.put("update_time",now);
|
|
//存储学校
|
|
//存储学校
|
|
Entity universityTable = Entity.create("sy_university_enroll_brochure");
|
|
Entity universityTable = Entity.create("sy_university_enroll_brochure");
|
|
universityTable.putAll(row);
|
|
universityTable.putAll(row);
|