mojinxia 1 год назад
Сommit
696cac5a83

+ 33 - 0
.gitignore

@@ -0,0 +1,33 @@
+HELP.md
+target/
+!.mvn/wrapper/maven-wrapper.jar
+!**/src/main/**/target/
+!**/src/test/**/target/
+
+### STS ###
+.apt_generated
+.classpath
+.factorypath
+.project
+.settings
+.springBeans
+.sts4-cache
+
+### IntelliJ IDEA ###
+.idea
+*.iws
+*.iml
+*.ipr
+
+### NetBeans ###
+/nbproject/private/
+/nbbuild/
+/dist/
+/nbdist/
+/.nb-gradle/
+build/
+!**/src/main/**/build/
+!**/src/test/**/build/
+
+### VS Code ###
+.vscode/

+ 308 - 0
mvnw

@@ -0,0 +1,308 @@
+#!/bin/sh
+# ----------------------------------------------------------------------------
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#    https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# ----------------------------------------------------------------------------
+
+# ----------------------------------------------------------------------------
+# Apache Maven Wrapper startup batch script, version 3.2.0
+#
+# Required ENV vars:
+# ------------------
+#   JAVA_HOME - location of a JDK home dir
+#
+# Optional ENV vars
+# -----------------
+#   MAVEN_OPTS - parameters passed to the Java VM when running Maven
+#     e.g. to debug Maven itself, use
+#       set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
+#   MAVEN_SKIP_RC - flag to disable loading of mavenrc files
+# ----------------------------------------------------------------------------
+
+if [ -z "$MAVEN_SKIP_RC" ] ; then
+
+  if [ -f /usr/local/etc/mavenrc ] ; then
+    . /usr/local/etc/mavenrc
+  fi
+
+  if [ -f /etc/mavenrc ] ; then
+    . /etc/mavenrc
+  fi
+
+  if [ -f "$HOME/.mavenrc" ] ; then
+    . "$HOME/.mavenrc"
+  fi
+
+fi
+
+# OS specific support.  $var _must_ be set to either true or false.
+cygwin=false;
+darwin=false;
+mingw=false
+case "$(uname)" in
+  CYGWIN*) cygwin=true ;;
+  MINGW*) mingw=true;;
+  Darwin*) darwin=true
+    # Use /usr/libexec/java_home if available, otherwise fall back to /Library/Java/Home
+    # See https://developer.apple.com/library/mac/qa/qa1170/_index.html
+    if [ -z "$JAVA_HOME" ]; then
+      if [ -x "/usr/libexec/java_home" ]; then
+        JAVA_HOME="$(/usr/libexec/java_home)"; export JAVA_HOME
+      else
+        JAVA_HOME="/Library/Java/Home"; export JAVA_HOME
+      fi
+    fi
+    ;;
+esac
+
+if [ -z "$JAVA_HOME" ] ; then
+  if [ -r /etc/gentoo-release ] ; then
+    JAVA_HOME=$(java-config --jre-home)
+  fi
+fi
+
+# For Cygwin, ensure paths are in UNIX format before anything is touched
+if $cygwin ; then
+  [ -n "$JAVA_HOME" ] &&
+    JAVA_HOME=$(cygpath --unix "$JAVA_HOME")
+  [ -n "$CLASSPATH" ] &&
+    CLASSPATH=$(cygpath --path --unix "$CLASSPATH")
+fi
+
+# For Mingw, ensure paths are in UNIX format before anything is touched
+if $mingw ; then
+  [ -n "$JAVA_HOME" ] && [ -d "$JAVA_HOME" ] &&
+    JAVA_HOME="$(cd "$JAVA_HOME" || (echo "cannot cd into $JAVA_HOME."; exit 1); pwd)"
+fi
+
+if [ -z "$JAVA_HOME" ]; then
+  javaExecutable="$(which javac)"
+  if [ -n "$javaExecutable" ] && ! [ "$(expr "\"$javaExecutable\"" : '\([^ ]*\)')" = "no" ]; then
+    # readlink(1) is not available as standard on Solaris 10.
+    readLink=$(which readlink)
+    if [ ! "$(expr "$readLink" : '\([^ ]*\)')" = "no" ]; then
+      if $darwin ; then
+        javaHome="$(dirname "\"$javaExecutable\"")"
+        javaExecutable="$(cd "\"$javaHome\"" && pwd -P)/javac"
+      else
+        javaExecutable="$(readlink -f "\"$javaExecutable\"")"
+      fi
+      javaHome="$(dirname "\"$javaExecutable\"")"
+      javaHome=$(expr "$javaHome" : '\(.*\)/bin')
+      JAVA_HOME="$javaHome"
+      export JAVA_HOME
+    fi
+  fi
+fi
+
+if [ -z "$JAVACMD" ] ; then
+  if [ -n "$JAVA_HOME"  ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+      # IBM's JDK on AIX uses strange locations for the executables
+      JAVACMD="$JAVA_HOME/jre/sh/java"
+    else
+      JAVACMD="$JAVA_HOME/bin/java"
+    fi
+  else
+    JAVACMD="$(\unset -f command 2>/dev/null; \command -v java)"
+  fi
+fi
+
+if [ ! -x "$JAVACMD" ] ; then
+  echo "Error: JAVA_HOME is not defined correctly." >&2
+  echo "  We cannot execute $JAVACMD" >&2
+  exit 1
+fi
+
+if [ -z "$JAVA_HOME" ] ; then
+  echo "Warning: JAVA_HOME environment variable is not set."
+fi
+
+# traverses directory structure from process work directory to filesystem root
+# first directory with .mvn subdirectory is considered project base directory
+find_maven_basedir() {
+  if [ -z "$1" ]
+  then
+    echo "Path not specified to find_maven_basedir"
+    return 1
+  fi
+
+  basedir="$1"
+  wdir="$1"
+  while [ "$wdir" != '/' ] ; do
+    if [ -d "$wdir"/.mvn ] ; then
+      basedir=$wdir
+      break
+    fi
+    # workaround for JBEAP-8937 (on Solaris 10/Sparc)
+    if [ -d "${wdir}" ]; then
+      wdir=$(cd "$wdir/.." || exit 1; pwd)
+    fi
+    # end of workaround
+  done
+  printf '%s' "$(cd "$basedir" || exit 1; pwd)"
+}
+
+# concatenates all lines of a file
+concat_lines() {
+  if [ -f "$1" ]; then
+    # Remove \r in case we run on Windows within Git Bash
+    # and check out the repository with auto CRLF management
+    # enabled. Otherwise, we may read lines that are delimited with
+    # \r\n and produce $'-Xarg\r' rather than -Xarg due to word
+    # splitting rules.
+    tr -s '\r\n' ' ' < "$1"
+  fi
+}
+
+log() {
+  if [ "$MVNW_VERBOSE" = true ]; then
+    printf '%s\n' "$1"
+  fi
+}
+
+BASE_DIR=$(find_maven_basedir "$(dirname "$0")")
+if [ -z "$BASE_DIR" ]; then
+  exit 1;
+fi
+
+MAVEN_PROJECTBASEDIR=${MAVEN_BASEDIR:-"$BASE_DIR"}; export MAVEN_PROJECTBASEDIR
+log "$MAVEN_PROJECTBASEDIR"
+
+##########################################################################################
+# Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
+# This allows using the maven wrapper in projects that prohibit checking in binary data.
+##########################################################################################
+wrapperJarPath="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar"
+if [ -r "$wrapperJarPath" ]; then
+    log "Found $wrapperJarPath"
+else
+    log "Couldn't find $wrapperJarPath, downloading it ..."
+
+    if [ -n "$MVNW_REPOURL" ]; then
+      wrapperUrl="$MVNW_REPOURL/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar"
+    else
+      wrapperUrl="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar"
+    fi
+    while IFS="=" read -r key value; do
+      # Remove '\r' from value to allow usage on windows as IFS does not consider '\r' as a separator ( considers space, tab, new line ('\n'), and custom '=' )
+      safeValue=$(echo "$value" | tr -d '\r')
+      case "$key" in (wrapperUrl) wrapperUrl="$safeValue"; break ;;
+      esac
+    done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties"
+    log "Downloading from: $wrapperUrl"
+
+    if $cygwin; then
+      wrapperJarPath=$(cygpath --path --windows "$wrapperJarPath")
+    fi
+
+    if command -v wget > /dev/null; then
+        log "Found wget ... using wget"
+        [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--quiet"
+        if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
+            wget $QUIET "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath"
+        else
+            wget $QUIET --http-user="$MVNW_USERNAME" --http-password="$MVNW_PASSWORD" "$wrapperUrl" -O "$wrapperJarPath" || rm -f "$wrapperJarPath"
+        fi
+    elif command -v curl > /dev/null; then
+        log "Found curl ... using curl"
+        [ "$MVNW_VERBOSE" = true ] && QUIET="" || QUIET="--silent"
+        if [ -z "$MVNW_USERNAME" ] || [ -z "$MVNW_PASSWORD" ]; then
+            curl $QUIET -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath"
+        else
+            curl $QUIET --user "$MVNW_USERNAME:$MVNW_PASSWORD" -o "$wrapperJarPath" "$wrapperUrl" -f -L || rm -f "$wrapperJarPath"
+        fi
+    else
+        log "Falling back to using Java to download"
+        javaSource="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.java"
+        javaClass="$MAVEN_PROJECTBASEDIR/.mvn/wrapper/MavenWrapperDownloader.class"
+        # For Cygwin, switch paths to Windows format before running javac
+        if $cygwin; then
+          javaSource=$(cygpath --path --windows "$javaSource")
+          javaClass=$(cygpath --path --windows "$javaClass")
+        fi
+        if [ -e "$javaSource" ]; then
+            if [ ! -e "$javaClass" ]; then
+                log " - Compiling MavenWrapperDownloader.java ..."
+                ("$JAVA_HOME/bin/javac" "$javaSource")
+            fi
+            if [ -e "$javaClass" ]; then
+                log " - Running MavenWrapperDownloader.java ..."
+                ("$JAVA_HOME/bin/java" -cp .mvn/wrapper MavenWrapperDownloader "$wrapperUrl" "$wrapperJarPath") || rm -f "$wrapperJarPath"
+            fi
+        fi
+    fi
+fi
+##########################################################################################
+# End of extension
+##########################################################################################
+
+# If specified, validate the SHA-256 sum of the Maven wrapper jar file
+wrapperSha256Sum=""
+while IFS="=" read -r key value; do
+  case "$key" in (wrapperSha256Sum) wrapperSha256Sum=$value; break ;;
+  esac
+done < "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.properties"
+if [ -n "$wrapperSha256Sum" ]; then
+  wrapperSha256Result=false
+  if command -v sha256sum > /dev/null; then
+    if echo "$wrapperSha256Sum  $wrapperJarPath" | sha256sum -c > /dev/null 2>&1; then
+      wrapperSha256Result=true
+    fi
+  elif command -v shasum > /dev/null; then
+    if echo "$wrapperSha256Sum  $wrapperJarPath" | shasum -a 256 -c > /dev/null 2>&1; then
+      wrapperSha256Result=true
+    fi
+  else
+    echo "Checksum validation was requested but neither 'sha256sum' or 'shasum' are available."
+    echo "Please install either command, or disable validation by removing 'wrapperSha256Sum' from your maven-wrapper.properties."
+    exit 1
+  fi
+  if [ $wrapperSha256Result = false ]; then
+    echo "Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised." >&2
+    echo "Investigate or delete $wrapperJarPath to attempt a clean download." >&2
+    echo "If you updated your Maven version, you need to update the specified wrapperSha256Sum property." >&2
+    exit 1
+  fi
+fi
+
+MAVEN_OPTS="$(concat_lines "$MAVEN_PROJECTBASEDIR/.mvn/jvm.config") $MAVEN_OPTS"
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin; then
+  [ -n "$JAVA_HOME" ] &&
+    JAVA_HOME=$(cygpath --path --windows "$JAVA_HOME")
+  [ -n "$CLASSPATH" ] &&
+    CLASSPATH=$(cygpath --path --windows "$CLASSPATH")
+  [ -n "$MAVEN_PROJECTBASEDIR" ] &&
+    MAVEN_PROJECTBASEDIR=$(cygpath --path --windows "$MAVEN_PROJECTBASEDIR")
+fi
+
+# Provide a "standardized" way to retrieve the CLI args that will
+# work with both Windows and non-Windows executions.
+MAVEN_CMD_LINE_ARGS="$MAVEN_CONFIG $*"
+export MAVEN_CMD_LINE_ARGS
+
+WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
+
+# shellcheck disable=SC2086 # safe args
+exec "$JAVACMD" \
+  $MAVEN_OPTS \
+  $MAVEN_DEBUG_OPTS \
+  -classpath "$MAVEN_PROJECTBASEDIR/.mvn/wrapper/maven-wrapper.jar" \
+  "-Dmaven.multiModuleProjectDirectory=${MAVEN_PROJECTBASEDIR}" \
+  ${WRAPPER_LAUNCHER} $MAVEN_CONFIG "$@"

+ 205 - 0
mvnw.cmd

@@ -0,0 +1,205 @@
+@REM ----------------------------------------------------------------------------
+@REM Licensed to the Apache Software Foundation (ASF) under one
+@REM or more contributor license agreements.  See the NOTICE file
+@REM distributed with this work for additional information
+@REM regarding copyright ownership.  The ASF licenses this file
+@REM to you under the Apache License, Version 2.0 (the
+@REM "License"); you may not use this file except in compliance
+@REM with the License.  You may obtain a copy of the License at
+@REM
+@REM    https://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing,
+@REM software distributed under the License is distributed on an
+@REM "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+@REM KIND, either express or implied.  See the License for the
+@REM specific language governing permissions and limitations
+@REM under the License.
+@REM ----------------------------------------------------------------------------
+
+@REM ----------------------------------------------------------------------------
+@REM Apache Maven Wrapper startup batch script, version 3.2.0
+@REM
+@REM Required ENV vars:
+@REM JAVA_HOME - location of a JDK home dir
+@REM
+@REM Optional ENV vars
+@REM MAVEN_BATCH_ECHO - set to 'on' to enable the echoing of the batch commands
+@REM MAVEN_BATCH_PAUSE - set to 'on' to wait for a keystroke before ending
+@REM MAVEN_OPTS - parameters passed to the Java VM when running Maven
+@REM     e.g. to debug Maven itself, use
+@REM set MAVEN_OPTS=-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=8000
+@REM MAVEN_SKIP_RC - flag to disable loading of mavenrc files
+@REM ----------------------------------------------------------------------------
+
+@REM Begin all REM lines with '@' in case MAVEN_BATCH_ECHO is 'on'
+@echo off
+@REM set title of command window
+title %0
+@REM enable echoing by setting MAVEN_BATCH_ECHO to 'on'
+@if "%MAVEN_BATCH_ECHO%" == "on"  echo %MAVEN_BATCH_ECHO%
+
+@REM set %HOME% to equivalent of $HOME
+if "%HOME%" == "" (set "HOME=%HOMEDRIVE%%HOMEPATH%")
+
+@REM Execute a user defined script before this one
+if not "%MAVEN_SKIP_RC%" == "" goto skipRcPre
+@REM check for pre script, once with legacy .bat ending and once with .cmd ending
+if exist "%USERPROFILE%\mavenrc_pre.bat" call "%USERPROFILE%\mavenrc_pre.bat" %*
+if exist "%USERPROFILE%\mavenrc_pre.cmd" call "%USERPROFILE%\mavenrc_pre.cmd" %*
+:skipRcPre
+
+@setlocal
+
+set ERROR_CODE=0
+
+@REM To isolate internal variables from possible post scripts, we use another setlocal
+@setlocal
+
+@REM ==== START VALIDATION ====
+if not "%JAVA_HOME%" == "" goto OkJHome
+
+echo.
+echo Error: JAVA_HOME not found in your environment. >&2
+echo Please set the JAVA_HOME variable in your environment to match the >&2
+echo location of your Java installation. >&2
+echo.
+goto error
+
+:OkJHome
+if exist "%JAVA_HOME%\bin\java.exe" goto init
+
+echo.
+echo Error: JAVA_HOME is set to an invalid directory. >&2
+echo JAVA_HOME = "%JAVA_HOME%" >&2
+echo Please set the JAVA_HOME variable in your environment to match the >&2
+echo location of your Java installation. >&2
+echo.
+goto error
+
+@REM ==== END VALIDATION ====
+
+:init
+
+@REM Find the project base dir, i.e. the directory that contains the folder ".mvn".
+@REM Fallback to current working directory if not found.
+
+set MAVEN_PROJECTBASEDIR=%MAVEN_BASEDIR%
+IF NOT "%MAVEN_PROJECTBASEDIR%"=="" goto endDetectBaseDir
+
+set EXEC_DIR=%CD%
+set WDIR=%EXEC_DIR%
+:findBaseDir
+IF EXIST "%WDIR%"\.mvn goto baseDirFound
+cd ..
+IF "%WDIR%"=="%CD%" goto baseDirNotFound
+set WDIR=%CD%
+goto findBaseDir
+
+:baseDirFound
+set MAVEN_PROJECTBASEDIR=%WDIR%
+cd "%EXEC_DIR%"
+goto endDetectBaseDir
+
+:baseDirNotFound
+set MAVEN_PROJECTBASEDIR=%EXEC_DIR%
+cd "%EXEC_DIR%"
+
+:endDetectBaseDir
+
+IF NOT EXIST "%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config" goto endReadAdditionalConfig
+
+@setlocal EnableExtensions EnableDelayedExpansion
+for /F "usebackq delims=" %%a in ("%MAVEN_PROJECTBASEDIR%\.mvn\jvm.config") do set JVM_CONFIG_MAVEN_PROPS=!JVM_CONFIG_MAVEN_PROPS! %%a
+@endlocal & set JVM_CONFIG_MAVEN_PROPS=%JVM_CONFIG_MAVEN_PROPS%
+
+:endReadAdditionalConfig
+
+SET MAVEN_JAVA_EXE="%JAVA_HOME%\bin\java.exe"
+set WRAPPER_JAR="%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.jar"
+set WRAPPER_LAUNCHER=org.apache.maven.wrapper.MavenWrapperMain
+
+set WRAPPER_URL="https://repo.maven.apache.org/maven2/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar"
+
+FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO (
+    IF "%%A"=="wrapperUrl" SET WRAPPER_URL=%%B
+)
+
+@REM Extension to allow automatically downloading the maven-wrapper.jar from Maven-central
+@REM This allows using the maven wrapper in projects that prohibit checking in binary data.
+if exist %WRAPPER_JAR% (
+    if "%MVNW_VERBOSE%" == "true" (
+        echo Found %WRAPPER_JAR%
+    )
+) else (
+    if not "%MVNW_REPOURL%" == "" (
+        SET WRAPPER_URL="%MVNW_REPOURL%/org/apache/maven/wrapper/maven-wrapper/3.2.0/maven-wrapper-3.2.0.jar"
+    )
+    if "%MVNW_VERBOSE%" == "true" (
+        echo Couldn't find %WRAPPER_JAR%, downloading it ...
+        echo Downloading from: %WRAPPER_URL%
+    )
+
+    powershell -Command "&{"^
+		"$webclient = new-object System.Net.WebClient;"^
+		"if (-not ([string]::IsNullOrEmpty('%MVNW_USERNAME%') -and [string]::IsNullOrEmpty('%MVNW_PASSWORD%'))) {"^
+		"$webclient.Credentials = new-object System.Net.NetworkCredential('%MVNW_USERNAME%', '%MVNW_PASSWORD%');"^
+		"}"^
+		"[Net.ServicePointManager]::SecurityProtocol = [Net.SecurityProtocolType]::Tls12; $webclient.DownloadFile('%WRAPPER_URL%', '%WRAPPER_JAR%')"^
+		"}"
+    if "%MVNW_VERBOSE%" == "true" (
+        echo Finished downloading %WRAPPER_JAR%
+    )
+)
+@REM End of extension
+
+@REM If specified, validate the SHA-256 sum of the Maven wrapper jar file
+SET WRAPPER_SHA_256_SUM=""
+FOR /F "usebackq tokens=1,2 delims==" %%A IN ("%MAVEN_PROJECTBASEDIR%\.mvn\wrapper\maven-wrapper.properties") DO (
+    IF "%%A"=="wrapperSha256Sum" SET WRAPPER_SHA_256_SUM=%%B
+)
+IF NOT %WRAPPER_SHA_256_SUM%=="" (
+    powershell -Command "&{"^
+       "$hash = (Get-FileHash \"%WRAPPER_JAR%\" -Algorithm SHA256).Hash.ToLower();"^
+       "If('%WRAPPER_SHA_256_SUM%' -ne $hash){"^
+       "  Write-Output 'Error: Failed to validate Maven wrapper SHA-256, your Maven wrapper might be compromised.';"^
+       "  Write-Output 'Investigate or delete %WRAPPER_JAR% to attempt a clean download.';"^
+       "  Write-Output 'If you updated your Maven version, you need to update the specified wrapperSha256Sum property.';"^
+       "  exit 1;"^
+       "}"^
+       "}"
+    if ERRORLEVEL 1 goto error
+)
+
+@REM Provide a "standardized" way to retrieve the CLI args that will
+@REM work with both Windows and non-Windows executions.
+set MAVEN_CMD_LINE_ARGS=%*
+
+%MAVEN_JAVA_EXE% ^
+  %JVM_CONFIG_MAVEN_PROPS% ^
+  %MAVEN_OPTS% ^
+  %MAVEN_DEBUG_OPTS% ^
+  -classpath %WRAPPER_JAR% ^
+  "-Dmaven.multiModuleProjectDirectory=%MAVEN_PROJECTBASEDIR%" ^
+  %WRAPPER_LAUNCHER% %MAVEN_CONFIG% %*
+if ERRORLEVEL 1 goto error
+goto end
+
+:error
+set ERROR_CODE=1
+
+:end
+@endlocal & set ERROR_CODE=%ERROR_CODE%
+
+if not "%MAVEN_SKIP_RC%"=="" goto skipRcPost
+@REM check for post script, once with legacy .bat ending and once with .cmd ending
+if exist "%USERPROFILE%\mavenrc_post.bat" call "%USERPROFILE%\mavenrc_post.bat"
+if exist "%USERPROFILE%\mavenrc_post.cmd" call "%USERPROFILE%\mavenrc_post.cmd"
+:skipRcPost
+
+@REM pause the script if MAVEN_BATCH_PAUSE is set to 'on'
+if "%MAVEN_BATCH_PAUSE%"=="on" pause
+
+if "%MAVEN_TERMINATE_CMD%"=="on" exit %ERROR_CODE%
+
+cmd /C exit /B %ERROR_CODE%

+ 85 - 0
pom.xml

@@ -0,0 +1,85 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.springframework.boot</groupId>
+        <artifactId>spring-boot-starter-parent</artifactId>
+        <version>3.2.4</version>
+        <relativePath/> <!-- lookup parent from repository -->
+    </parent>
+    <groupId>com.zs</groupId>
+    <artifactId>test</artifactId>
+    <version>0.0.1-SNAPSHOT</version>
+    <name>test</name>
+    <description>test</description>
+    <properties>
+        <java.version>17</java.version>
+    </properties>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml</artifactId>
+            <version>4.1.2</version>
+        </dependency>
+        <dependency>
+            <groupId>com.squareup.okhttp3</groupId>
+            <artifactId>okhttp</artifactId>
+            <version>4.9.3</version>
+        </dependency>
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+            <version>2.13.0</version>
+        </dependency>
+        <dependency>
+            <groupId>com.fasterxml.jackson.core</groupId>
+            <artifactId>jackson-databind</artifactId>
+            <version>2.14.2</version>
+        </dependency>
+        <dependency>
+            <groupId>com.opencsv</groupId>
+            <artifactId>opencsv</artifactId>
+            <version>4.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>org.projectlombok</groupId>
+            <artifactId>lombok</artifactId>
+            <optional>true</optional>
+        </dependency>
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-test</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.squareup.okhttp3</groupId>
+            <artifactId>okhttp</artifactId>
+            <version>4.9.3</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.springframework.boot</groupId>
+                <artifactId>spring-boot-maven-plugin</artifactId>
+                <configuration>
+                    <excludes>
+                        <exclude>
+                            <groupId>org.projectlombok</groupId>
+                            <artifactId>lombok</artifactId>
+                        </exclude>
+                    </excludes>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>

+ 3 - 0
readme_dataprocess.txt

@@ -0,0 +1,3 @@
+TestApplicationTests. contextLoads 把图片识别成 json
+ImagesTest.testExportOriExcel  把json还原成 excel
+ImageTextTest这个是验证多栏处理的测试

+ 13 - 0
src/main/java/com/zs/test/TestApplication.java

@@ -0,0 +1,13 @@
+package com.zs.test;
+
+import org.springframework.boot.SpringApplication;
+import org.springframework.boot.autoconfigure.SpringBootApplication;
+
+@SpringBootApplication
+public class TestApplication {
+
+    public static void main(String[] args) {
+        SpringApplication.run(TestApplication.class, args);
+    }
+
+}

+ 1 - 0
src/main/resources/application.properties

@@ -0,0 +1 @@
+spring.application.name=test

+ 262 - 0
src/test/java/com/zs/test/ImageTextTest.java

@@ -0,0 +1,262 @@
+package com.zs.test;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import org.apache.commons.io.FileUtils;
+import org.assertj.core.util.Lists;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class ImageTextTest {
+    @Test
+    public void testLoadText() throws IOException {
+        String data = FileUtils.readFileToString(new File("D:\\data\\ocr\\2024\\招生计划扫描图片\\江西\\359b8817fe6a8670e026d2351d46964.json"), "utf-8");
+        ObjectMapper om = new ObjectMapper();
+        om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+        OcrWordResult wordResult = om.readValue(data, OcrWordResult.class);
+        if (null == wordResult.wordsResultNum || 0 == wordResult.wordsResultNum) {
+            return;
+        }
+        // TODO 根据修正坐标及计数,找出计数最大的列坐标,行坐标的前几个,消除重叠区域,然后求最大面积为表格区域, 列最大不重叠的为行区域 分类
+
+        // 计算平均高度,平均间距,列块起始位及宽度
+        Integer avgHeight = null;
+        Integer avgMidDiff = null;
+        Integer lastMid = null;
+        Map<Integer, Stat> leftMap = new HashMap<>();
+        for (OcrWord word : wordResult.wordList) {
+            OcrLoc l = word.location;
+            if (null == avgHeight) {
+                avgHeight = l.height;
+            } else {
+                Integer newMid = l.top + l.height / 2;
+                if(null == avgMidDiff) {
+                    avgMidDiff = newMid - lastMid;
+                } else {
+                    avgMidDiff = (avgMidDiff + newMid - lastMid) / 2;
+                }
+                avgHeight = (avgHeight + l.height) / 2;
+            }
+            lastMid = l.top + l.height / 2;
+
+            // 算中线
+            word.row = Math.round((l.top + l.height / 2) / 40.0) * 40;
+            word.col = Math.round((l.left + l.width / 2) / 20.0) * 20 ;
+
+            Integer left = (l.left / 10 + 1) * 10;
+            Integer width = (l.width / 10 - 1) * 10;
+            Stat s = leftMap.get(left);
+            if (null == s) {
+                s = new Stat(left, left + width, l.width, 1, word.row, l.top, l.top + l.height);
+                leftMap.put(left, s);
+            } else {
+                s.right = Math.max(s.right, left + width);
+                s.width = Math.max(s.width, l.width);
+                s.midline = (s.midline + word.row) / 2;
+                s.count += 1;
+                s.top = Math.min(s.top, l.top);
+                s.bottom = Math.max(s.bottom, l.top + l.height);
+            }
+        }
+        // 计算最高的三个大小
+        List<Stat> list = new ArrayList<>(leftMap.values());
+        Collections.sort(list, Comparator.comparingInt(o -> -o.count));
+        List<Stat> validStatList = new ArrayList<>();
+        Integer ttop = null, tbottom = null, tleft = null, tright = null;
+        for(Stat s : list) {
+            if (validStatList.size() > 0) {
+                boolean ignore = false;
+                for (Stat st : validStatList) {
+                    if (s.key >= st.key && s.key <= st.right || s.right >= st.key && s.right <= st.right
+                            || st.key >= s.key && st.key <= s.right || st.right >= s.key && st.right <= s.right) {
+                        ignore = true; // 如果说这个与原来的有交集,而且行号相同,应该合并进去
+                        if (Math.abs(s.midline - st.midline) < 10) {
+                            st.right = Math.max(st.right, s.right);
+                        }
+                        break;
+                    }
+                }
+                if (ignore) {
+                    continue;
+                }
+            }
+            if (validStatList.size() == 0) {
+                ttop = s.top;
+                tbottom = s.bottom;
+                tleft = s.key;
+                tright = s.right;
+            } else {
+                ttop = Math.min(s.top, ttop);
+                tbottom = Math.max(s.bottom, tbottom);
+                tleft = Math.min(s.key, tleft);
+                tright = Math.max(s.right, tright);
+            }
+            validStatList.add(s);
+        }
+        ttop -= 20;
+        tright += 20;
+        tleft -= 20;
+        tbottom += 20;
+        Collections.sort(validStatList, Comparator.comparingLong(o -> o.key));
+        // 计算列号
+        for (OcrWord wd : wordResult.wordList) {
+            OcrLoc l = wd.location;
+            Integer left = (l.left / 10 + 2) * 10;
+            Integer right = left + (l.width / 10 - 2) * 10;
+            int i = 0;
+            for (; i < validStatList.size(); i++) {
+                Stat s = validStatList.get(i);
+                if (left >= s.key && left <= s.right && right >= s.key && right <= s.right) {
+                    break;
+                }
+            }
+            wd.part = i; // 行号
+        }
+        Collections.sort(wordResult.wordList, new Comparator<OcrWord>() { // 先左右后上下排序, 是需要的顺序
+            @Override
+            public int compare(OcrWord o1, OcrWord o2) {
+                int iRet;
+                if (0 != (iRet = o1.part.compareTo(o2.part))) {
+                    return iRet;
+                }
+                if (0 != (iRet = o1.row.compareTo(o2.row))) {
+                    return iRet;
+                }
+                if (0 != (iRet = o1.col.compareTo(o2.col))) {
+                    return iRet;
+                }
+                return 0;
+            }
+        });
+        Integer heightLimit = avgHeight / 2 - 4;
+        List<DataRow> dataRowList = new ArrayList<>();
+        Map<Integer, DataRow> colRowMap = new HashMap<>();
+        Integer maxCol = validStatList.size();
+        for (OcrWord wd : wordResult.wordList) {
+            OcrLoc l = wd.location;
+            if (wd.part == maxCol) { // 过滤不在列中的
+                System.out.println("ignore1:" + wd.part + ":" + l.left + ":" + l.top + ":" + wd.words);
+                continue;
+            }
+            if(Math.abs(l.height - avgHeight) > heightLimit) { // 过滤字体太大的
+                System.out.println("ignore2:" + l.top + ":" + l.left + ":" + wd.words);
+                continue;
+            }
+            if (l.left < tleft || l.left > tright || l.top < ttop || l.top > tbottom) {
+                System.out.println("ignore3:" + l.top + ":" + l.left + ":" + wd.words);
+                continue;
+            }
+            Stat s = validStatList.get(wd.part);
+            DataRow dr = colRowMap.get(wd.part);
+            if(null == dr) { // 列首的left不能间隔太远
+                dr = new DataRow(wd.part, wd.row, Lists.newArrayList(wd));
+                colRowMap.put(wd.part, dr);
+            } else if (wd.part.equals(dr.part) && wd.row.equals(dr.row) || l.left - s.key > 40) { // 同行合并
+                dr.wordList.add(wd);
+            } else { // 换行
+                dataRowList.add(dr);
+                dr = new DataRow(wd.part, wd.row, Lists.newArrayList(wd));
+                colRowMap.put(wd.part, dr);
+            }
+        }
+
+        // wordResult.wordList.forEach(t -> System.out.println(t.part + ":" + t.location.top + ":" + t.location.left + ":" + t.words));
+        dataRowList.forEach(t -> System.out.println(t.part + ":" + t.row + ":" + t.wordList.stream().map(u -> u.words).collect(Collectors.joining(","))));
+        /*
+
+            // 过滤初始行距大的单个
+            if(null == lw) {
+                lw = wd;
+                continue;
+            }
+            OcrLoc ll = lw.location;
+            if (Math.abs(l.top - ll.top - avgMidDiff) > 10) {
+                System.out.println("ignore2:" + ll.top + ":" + ll.left + ":" + lw.words);
+                lw = wd;
+                continue;
+            }
+            System.out.println("found:" + ll.top + ":" + ll.left + ":" + lw.words);
+        List<OcrWord> validWordList = new ArrayList<>();
+        for (OcrWord wd : wordResult.wordList) {
+
+        }
+
+
+        // 修正每个字的中线,用于当成划分一行的标准
+        List<String> lineList = new ArrayList<>();
+        OcrWord lw = null;
+        for (OcrWord wd : validWordList) {
+            if (null == lw || Math.abs(lw.row - wd.row) > 8) {
+                if (lineList.size() > 0) {
+                    System.out.println(lw.part + ":" + lw.row + ":" + lw.col + ":" + StringUtils.join(lineList));
+                    lineList.clear();
+                }
+                lw = wd;
+            }
+            lineList.add(wd.words);
+        }
+        if(lineList.size() > 0) {
+            System.out.println(lw.part + ":" + lw.row + ":" + lw.col + ":" + StringUtils.join(lineList));
+            lineList.clear();
+        }*/
+        return ;
+    }
+
+    @Data
+    @AllArgsConstructor
+    @NoArgsConstructor
+    private static class Stat {
+        Integer key; // div 10
+        Integer right; // div 10
+        Integer width; // max
+        Integer count;
+        Long midline; // average
+        Integer top; // min
+        Integer bottom; // max
+    }
+
+    @Data
+    private static class OcrWordResult {
+        @JsonProperty("words_result")
+        List<OcrWord> wordList;
+        @JsonProperty("words_result_num")
+        Integer wordsResultNum;
+        @JsonProperty("log_id")
+        Long logId;
+    }
+
+    @Data
+    private static class OcrWord {
+        Integer part;
+        Long col;
+        Long row;
+        String words;
+        OcrLoc location;
+    }
+
+    @Data
+    private static class OcrLoc {
+        Integer top;
+        Integer left;
+        Integer width;
+        Integer height;
+    }
+
+    @Data
+    @AllArgsConstructor
+    @NoArgsConstructor
+    private static class DataRow {
+        Integer part;
+        Long row;
+        List<OcrWord> wordList;
+    }
+
+}

+ 417 - 0
src/test/java/com/zs/test/ImageTextTest2.java

@@ -0,0 +1,417 @@
+package com.zs.test;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.RegExUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.assertj.core.util.Lists;
+import org.junit.jupiter.api.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.stream.Collectors;
+
+public class ImageTextTest2 {
+    @Test
+    public void testLoadText() throws IOException {
+        String data = FileUtils.readFileToString(new File("D:\\data\\ocr\\2024\\招生计划扫描图片\\江西\\359b8817fe6a8670e026d2351d46964.json"), "utf-8");
+        ObjectMapper om = new ObjectMapper();
+        om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+        OcrWordResult wordResult = om.readValue(data, OcrWordResult.class);
+        if (null == wordResult.wordsResultNum || 0 == wordResult.wordsResultNum) {
+            return;
+        }
+        // 计算平均高度,平均间距,列块起始位及宽度 用于分列
+        Long minTopLine = null, maxBottomLine = null;
+        Integer avgHeight = null;
+        Map<Integer, Stat> leftMap = new HashMap<>();
+        Integer id = 1;
+        for (OcrWord word : wordResult.wordList) {
+            word.id = id++;
+            OcrLoc l = word.location;
+            // 算中线
+            word.midline = Math.round((l.top + l.height / 2) / 5.0) * 5;
+
+            if (null == avgHeight) {
+                avgHeight = l.height;
+                minTopLine = word.midline;
+                maxBottomLine = word.midline;
+            } else {
+                avgHeight = (avgHeight + l.height) / 2;
+                minTopLine = Math.min(minTopLine, word.midline);
+                maxBottomLine = Math.max(maxBottomLine, word.midline);
+            }
+            // 收缩框
+            Integer left = (l.left / 10 + 1) * 10;
+            Integer width = (l.width / 10 - 1) * 10;
+            Stat s = leftMap.get(left);
+            if (null == s) {
+                s = new Stat(left, left + width, 1);
+                leftMap.put(left, s);
+            } else {
+                s.right = Math.max(s.right, left + width);
+                s.count += 1;
+            }
+        }
+        // 计算最高的三个大小
+        List<Stat> list = new ArrayList<>(leftMap.values());
+        Collections.sort(list, Comparator.comparingInt(o -> -o.count));
+        List<Stat> validStatList = new ArrayList<>();
+        for(Stat s : list) {
+            if (validStatList.size() > 0) {
+                boolean ignore = false;
+                for (Stat st : validStatList) {
+                    if (s.key >= st.key && s.key <= st.right || s.right >= st.key && s.right <= st.right
+                            || st.key >= s.key && st.key <= s.right || st.right >= s.key && st.right <= s.right) {
+                        ignore = true; // 如果说这个与原来的有交集,而且行号相同,应该合并进去
+                        break;
+                    }
+                }
+                if (ignore) {
+                    continue;
+                }
+            }
+            validStatList.add(s);
+        }
+        Collections.sort(validStatList, Comparator.comparingLong(o -> o.key));
+
+        Long ocrMiddleLine = (minTopLine + maxBottomLine) / 2;
+        Integer avgHeightLimit = avgHeight * 3 / 2;
+        for (OcrWord wd : wordResult.wordList) {
+            OcrLoc l = wd.location;
+            Integer left = (l.left / 10 + 2) * 10;
+            Integer right = left + (l.width / 10 - 2) * 10;
+            int i = 0;
+            if(l.height > avgHeightLimit) {
+                wd.part = i = validStatList.size();
+            } else {
+                for (; i < validStatList.size(); i++) {
+                    Stat s = validStatList.get(i);
+                    if (left >= s.key && left <= s.right && right >= s.key && right <= s.right) {
+                        break;
+                    }
+                }
+                wd.part = i; // 行号
+            }
+            if (i == validStatList.size()) {
+                if(null == minTopLine) {
+                    minTopLine = wd.midline;
+                    maxBottomLine = wd.midline;
+                } else if(wd.midline > ocrMiddleLine) {
+                    maxBottomLine = Math.min(maxBottomLine, wd.midline);
+                } else {
+                    minTopLine = Math.max(minTopLine, wd.midline);
+                }
+            }
+        }
+        Collections.sort(wordResult.wordList, new Comparator<OcrWord>() { // 先左右后上下排序, 是需要的顺序
+            @Override
+            public int compare(OcrWord o1, OcrWord o2) {
+                int iRet;
+                if (0 != (iRet = o1.part.compareTo(o2.part))) {
+                    return iRet;
+                }
+                if (0 != (iRet = o1.midline.compareTo(o2.midline))) {
+                    return iRet;
+                }
+                OcrLoc l1 = o1.location;
+                OcrLoc l2 = o2.location;
+                if (0 != (iRet = l1.left.compareTo(l2.left))) {
+                    return iRet;
+                }
+                return 0;
+            }
+        });
+        // 修正行号
+        OcrWord lastWord = null;
+        for (OcrWord wd : wordResult.wordList) {
+            if(null == lastWord || wd.part != lastWord.part) {
+                wd.line = 1;
+            } else {
+                OcrLoc l = wd.location;
+                Integer midline = l.top + l.height / 2;
+                if (Math.abs(midline - lastWord.midline) > avgHeight) { // 换行
+                    wd.line = lastWord.line + 1;
+                } else {
+                    wd.line = lastWord.line;
+                }
+            }
+            lastWord = wd;
+        }
+        // 重排序
+        Collections.sort(wordResult.wordList, new Comparator<OcrWord>() { // 先左右后上下排序, 是需要的顺序
+            @Override
+            public int compare(OcrWord o1, OcrWord o2) {
+                int iRet;
+                if (0 != (iRet = o1.part.compareTo(o2.part))) {
+                    return iRet;
+                }
+                if (0 != (iRet = o1.line.compareTo(o2.line))) {
+                    return iRet;
+                }
+                OcrLoc l1 = o1.location;
+                OcrLoc l2 = o2.location;
+                if (0 != (iRet = l1.left.compareTo(l2.left))) {
+                    return iRet;
+                }
+                return 0;
+            }
+        });
+        // 分行
+        Map<Integer, List<DataRow>> colRowsMap = new LinkedHashMap<>();
+        DataRow lastDataRow = null;
+        for (OcrWord wd : wordResult.wordList) {
+            if (wd.midline < minTopLine || wd.midline > maxBottomLine) {
+                System.out.println("Ignore:" + wd.part + ":" + wd.line + ":" + wd.words);
+                continue;
+            } else if(wd.midline > maxBottomLine - avgHeight && wd.words.startsWith("·")) {
+                System.out.println("Ignore:" + wd.part + ":" + wd.line + ":" + wd.words);
+                continue;
+            }
+            List<DataRow> dataRowList = colRowsMap.get(wd.part);
+            if (null == dataRowList) {
+                dataRowList = new ArrayList<>();
+                colRowsMap.put(wd.part, dataRowList);
+            }
+            if (null == lastDataRow || wd.line != lastDataRow.line) {
+                lastDataRow = new DataRow(wd.part, wd.line, new ArrayList<>());
+                dataRowList.add(lastDataRow);
+            }
+            lastDataRow.wordList.add(wd);
+        }
+
+        List<DataRow> finalRowList = new ArrayList<>();
+        DataRow lastDr = null;
+        colRowsMap.remove(3);
+        for (Integer part : colRowsMap.keySet()) {
+            for(DataRow dr : colRowsMap.get(part)) {
+                if (null == lastDr) {
+                    lastDr = dr;
+                    continue;
+                }
+                String prefix = dr.getWordList().get(0).getWords();
+                if (isAsciiNum(prefix, 2)) {
+                    if (null != lastDr) {
+                        finalRowList.add(lastDr);
+                    }
+                    lastDr = dr;
+                } else {
+                    lastDr.wordList.addAll(dr.getWordList());
+                }
+            }
+        }
+        if (null != lastDr) {
+            finalRowList.add(lastDr);
+        }
+
+        List<University> universityList = new ArrayList<>();
+        University lastUniversity = new University();
+        for (DataRow wd : finalRowList) {
+            List<OcrWord> wordList = wd.getWordList();
+            String fb = wordList.get(0).words;
+            String fe = wordList.get(wordList.size() - 1).words;
+            if (fe.contains("校址") || isNumber(fb, 4)) {
+                if (lastUniversity.majorList.size() > 0) {
+                    universityList.add(lastUniversity);
+                }
+                lastUniversity = University.parse(wd);
+            } else if (isAsciiNum(fb, 2)) {
+                lastUniversity.majorList.add(Major.parse(wd));
+            } else {
+                System.out.println("错误" + ":" + wd.part + ":" + wd.line + ":" + wd.wordList.stream().map(t -> t.words).collect(Collectors.joining(",")));
+            }
+        }
+        if (lastUniversity.majorList.size() > 0) {
+            universityList.add(lastUniversity);
+        }
+        for (University u : universityList) {
+            for (Major m : u.majorList) {
+                System.out.println(u.collegeCode + ":" + u.universityName + ":" + u.count + ":" + u.address + ":" + m.majorCode + ":" + m.majorName + ":" + m.count + ":" + m.xuefei + ":" + m.remark);
+            }
+        }
+    }
+
+    @Data
+    public static class University {
+        String collegeCode;
+        String universityName;
+        String count;
+        String address;
+
+        DataRow dataRow;
+        List<Major> majorList = new ArrayList<>();
+
+        public static University parse(DataRow dr) {
+            List<OcrWord> wordList = dr.wordList;
+            University u = new University();
+            if (wordList.size() == 4) {
+                u.collegeCode = wordList.get(0).words;
+                u.universityName = wordList.get(1).words;
+                u.count = wordList.get(2).words;
+                u.address = wordList.get(3).words;
+            } else if (wordList.size() == 3) {
+                String w = wordList.get(0).words;
+                u.collegeCode = w.substring(0, 4);
+                u.universityName = w.substring(4);
+                u.count = wordList.get(1).words;
+                u.address = wordList.get(2).words;
+            } else {
+                System.out.println("错误学院" + ":" + dr.part + ":" + dr.line + ":" + dr.wordList.stream().map(t -> t.words).collect(Collectors.joining(",")));
+            }
+            u.count = u.count.replaceAll("名", "");
+            u.address = u.address.replaceAll("校址:", "");
+            return u;
+        }
+    }
+
+    @Data
+    public static class Major {
+        String majorCode;
+        String majorName;
+        String count;
+        String xuefei;
+        String remark;
+
+        DataRow dataRow;
+
+        public static Major parse(DataRow dr) {
+            List<OcrWord> wdList = dr.wordList;
+            Major m = new Major();
+            String f1 = wdList.get(0).words;
+            Integer index = 1;
+            if(f1.length() == 2) {
+                m.majorCode = f1;
+                m.majorName = wdList.get(index++).words;
+            } else {
+                m.majorCode = f1.substring(0, 2);
+                m.majorName = f1.substring(2);
+            }
+            char[] mnChars = m.majorName.toCharArray();
+            int iLen = getNumIdx(mnChars);
+            if (iLen != mnChars.length) { // 末尾数字
+                m.count = m.majorName.substring(iLen);
+                m.majorName = m.majorName.substring(0, iLen);
+                m.xuefei = wdList.get(index++).words;
+            } else {
+                while(wdList.size() > index) {
+                    String ns = wdList.get(index++).words;
+                    m.majorName = m.majorName + ns;
+                    if (ns.contains("元")) {
+                        break;
+                    }
+                }
+                Matcher matcher = RegExUtils.dotAllMatcher("(.+)(\\d+)\\((\\d+元)\\)(.*)", m.majorName);
+                if(matcher.find()) {
+                    m.majorName = matcher.group(1);
+                    m.count = matcher.group(2);
+                    m.xuefei = matcher.group(3);
+                    m.remark = matcher.group(4);
+                } else {
+                    System.out.println("专业" + ":" + dr.part + ":" + dr.line + ":" + dr.wordList.stream().map(t -> t.words).collect(Collectors.joining(",")));
+                }
+            }
+            if(null == m.remark) {
+                m.remark = "";
+            }
+            m.xuefei = m.xuefei.replaceAll("\\(|元|\\)", "");
+            while(wdList.size() > index) {
+                String ns = wdList.get(index++).words;
+                m.remark = m.remark + ns;
+            }
+            return m;
+        }
+    }
+
+    private static Integer getNumIdx(char[] mnChars) {
+        int iLen = mnChars.length;
+        while (iLen > 0) {
+            char c = mnChars[--iLen];
+            if (!(c >= '0' && c <= '9')) {
+                iLen++;
+                break;
+            }
+        }
+        return iLen;
+    }
+
+    private boolean isAsciiNum(String str, int len) {
+        int i = 0;
+        for (char c : str.toCharArray()) {
+            if (!(c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' || c >= '0' && c <= '9')) {
+                return false;
+            }
+            if (++i == len) {
+                break;
+            }
+        }
+        return i == len;
+    }
+
+    private boolean isNumber(String str, int len) {
+        int i = 0;
+        for (char c : str.toCharArray()) {
+            if (!(c >= '0' && c <= '9')) {
+                return false;
+            }
+            if (++i == len) {
+                break;
+            }
+        }
+        return i == len;
+    }
+
+    @Data
+    @AllArgsConstructor
+    @NoArgsConstructor
+    private static class Stat {
+        Integer key; // div 10
+        Integer right; // div 10
+        Integer count;
+    }
+
+    @Data
+    private static class OcrWordResult {
+        @JsonProperty("words_result")
+        List<OcrWord> wordList;
+        @JsonProperty("words_result_num")
+        Integer wordsResultNum;
+        @JsonProperty("log_id")
+        Long logId;
+    }
+
+    @Data
+    private static class OcrWord {
+        Integer id;
+        Integer part;
+        Long midline;
+        Integer line;
+
+        String words;
+        OcrLoc location;
+    }
+
+    @Data
+    private static class OcrLoc {
+        Integer top;
+        Integer left;
+        Integer width;
+        Integer height;
+    }
+
+    @Data
+    @AllArgsConstructor
+    @NoArgsConstructor
+    private static class DataRow {
+        Integer part;
+        Integer line;
+        List<OcrWord> wordList;
+    }
+
+}

+ 374 - 0
src/test/java/com/zs/test/ImagesTest.java

@@ -0,0 +1,374 @@
+package com.zs.test;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.opencsv.CSVWriter;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.RegExUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.math.NumberUtils;
+import org.assertj.core.util.Lists;
+import org.json.JSONException;
+import org.junit.jupiter.api.Test;
+import org.springframework.beans.BeanUtils;
+
+import javax.imageio.ImageIO;
+import java.awt.Color;
+import java.awt.Font;
+import java.awt.*;
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.List;
+import java.util.*;
+
+public class ImagesTest {
+    @Test
+    public void testDrawImage() throws IOException {
+        BufferedImage oriImage = ImageIO.read(new File("D:\\data\\ocr\\招生报1.jpg"));
+        BufferedImage image = new BufferedImage(oriImage.getWidth(), oriImage.getHeight(), BufferedImage.TYPE_INT_RGB);
+        Graphics2D g2d = image.createGraphics();
+        g2d.setColor(Color.WHITE);
+        g2d.fillRect(0, 0, image.getWidth(), image.getHeight());
+        g2d.setColor(Color.BLACK);
+        // g2d.drawOval(25, 25, 150, 150);
+
+        g2d.setFont(new Font("宋体", Font.BOLD, 24));
+        g2d.drawString("69护理学", 18, 481);
+
+        g2d.dispose();
+        ImageIO.write(image, "png", new File("D:\\data\\ocr\\output_image.png"));
+        System.out.println("图片生成成功!");
+    }
+
+    @Test
+    public void testDrawImage2() throws IOException, JSONException {
+        String data = FileUtils.readFileToString(new File("D:\\data\\ocr\\8888\\20240418000944690_0001.json.txt"), "utf-8");
+        ObjectMapper om = new ObjectMapper();
+        om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+        OcrModel.OcrTableResult tableResult = om.readValue(data, OcrModel.OcrTableResult.class);
+        if (null == tableResult.tableNum || tableResult.tableNum <= 0) {
+            return;
+        }
+        BufferedImage oriImage = ImageIO.read(new File("D:\\data\\ocr\\8888\\20240418000944690_0001.jpg"));
+        BufferedImage image = new BufferedImage(oriImage.getWidth(), oriImage.getHeight(), BufferedImage.TYPE_INT_RGB);
+        Graphics2D g2d = image.createGraphics();
+        g2d.setColor(Color.WHITE);
+        g2d.fillRect(0, 0, image.getWidth(), image.getHeight());
+        g2d.setColor(Color.BLACK);
+        // g2d.drawOval(25, 25, 150, 150);
+        g2d.setFont(new Font("宋体", Font.BOLD, 16));
+        for(OcrModel.OcrTableCells tbl : tableResult.tables) {
+            for (OcrModel.OcrTableCell cell : tbl.getCells()) {
+                OcrModel.OcrPoint pt = cell.getLocations().get(0);
+                g2d.drawString(cell.getWords(), pt.getX(), pt.getY());
+            }
+        }
+        g2d.dispose();
+        ImageIO.write(image, "png", new File("D:\\data\\ocr\\test2.png"));
+        System.out.println("图片生成成功!");
+    }
+
+
+    @Test
+    /**
+     * 检查所有OCR结果的行列识别情况,判断目录内的数据是否完整
+     */
+    public void testExtractReport() throws IOException {
+        File targetDir = new File("D:\\data\\ocr\\1111out");
+        Set<String> ignoreFiles = new HashSet<>();
+        List<String[]> lineList = new ArrayList<>();
+        lineList.add(new String[]{"文件", "表格序号", "行数", "列数"});
+        for (File file : FileUtils.listFiles(targetDir, new String[]{"json"}, false)) {
+            String data = FileUtils.readFileToString(file, "utf-8");
+            ObjectMapper om = new ObjectMapper();
+            om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+            OcrModel.OcrTableResult tableResult = om.readValue(data, OcrModel.OcrTableResult.class);
+            if (null == tableResult.tableNum || tableResult.tableNum <= 0) {
+                lineList.add(new String[]{file.getName(), "0", "0", "0"});
+                return;
+            }
+            int i = 1;
+            for (OcrModel.OcrTableCells tbl : tableResult.tables) {
+                int rowStart = 0, rowEnd = 0, colStart = 0, colEnd = 0;
+                for (OcrModel.OcrTableCell oc : tbl.getCells()) {
+                    rowStart = Math.min(oc.getRowStart(), rowStart);
+                    rowEnd = Math.max(oc.getRowEnd(), rowEnd);
+                    colStart = Math.min(oc.getColStart(), colStart);
+                    colEnd = Math.max(oc.getColEnd(), colEnd);
+                }
+                lineList.add(new String[]{file.getName(), String.valueOf(i++), String.valueOf(rowEnd - rowStart), String.valueOf(colEnd - colStart)});
+            }
+        }
+        CSVWriter writer = new CSVWriter(new FileWriter("D:\\data\\ocr\\1111out.csv"));
+        for (String[] line : lineList) {
+            writer.writeNext(line);
+        }
+        writer.flush();
+    }
+
+    @Test
+    /**
+     * 按原始OCR结果导出Excel查看
+     */
+    public void testExportOriExcel() throws IOException, JSONException {
+        String jsonFileName = "D:\\data\\ocr\\8888\\" + "20240418004338611_0031p4";
+        OcrExcelExporter.exportOriData(FileUtils.readFileToString(new File(jsonFileName + ".json"), "utf-8"), jsonFileName + ".xlsx");
+    }
+
+    @Test
+    /**
+     * 模式一提取
+     */
+    public void testExtractExcel() throws IOException, JSONException {
+        Integer validCellCount = 5;
+
+        String data = FileUtils.readFileToString(new File("D:\\data\\ocr\\8888\\20240418004338611_0031p4.json"), "utf-8");
+        ObjectMapper om = new ObjectMapper();
+        om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+        OcrModel.OcrTableResult tableResult = om.readValue(data, OcrModel.OcrTableResult.class);
+        if (null == tableResult.tableNum || tableResult.tableNum <= 0) {
+            return;
+        }
+        Collections.sort(tableResult.tables, new Comparator<OcrModel.OcrTableCells>() {
+            @Override
+            public int compare(OcrModel.OcrTableCells o1, OcrModel.OcrTableCells o2) {
+                int iRet;
+                if (0 != (iRet = o1.getLocations().get(0).getX().compareTo(o2.getLocations().get(0).getY()))) {
+                    return iRet;
+                }
+                if (0 != (iRet = o1.getLocations().get(0).getY().compareTo(o2.getLocations().get(0).getY()))) {
+                    return iRet;
+                }
+                return 0;
+            }
+        });
+        List<OcrModel.MajorRow> finalRowList = Lists.newArrayList();
+        OcrModel.MajorRow headRow = null;
+        OcrModel.MajorRow lastRow = new OcrModel.MajorRow();
+        for (OcrModel.OcrTableCells tbl : tableResult.tables) {
+            System.out.println("process table");
+            Collections.sort(tbl.getCells(), new Comparator<OcrModel.OcrTableCell>() {
+                @Override
+                public int compare(OcrModel.OcrTableCell o1, OcrModel.OcrTableCell o2) {
+                    int iRet;
+                    if (0 != (iRet = o1.getRowStart().compareTo(o2.getRowStart()))) {
+                        return iRet;
+                    }
+                    if (0 != (iRet = o1.getColStart().compareTo(o2.getColStart()))) {
+                        return iRet;
+                    }
+                    return 0;
+                }
+            });
+            Map<Integer, OcrModel.OcrTableRow> rowMap = new HashMap<>();
+            List<OcrModel.OcrTableRow> rowList = new ArrayList<>();
+            for (OcrModel.OcrTableCell cell : tbl.getCells()) {
+                if (cell.getRowEnd() - cell.getRowStart() > 1) {
+                    System.out.println("多行错误");
+                }
+                OcrModel.OcrTableRow row = rowMap.get(cell.getRowStart());
+                if (null == row) {
+                    if (0 != cell.getColStart()) {
+                        System.out.println("起行错误");
+                    }
+                    row = new OcrModel.OcrTableRow();
+                    rowList.add(row);
+                    rowMap.put(cell.getRowStart(), row);
+                }
+                if (StringUtils.isNotBlank(cell.getWords())) {
+                    row.setValidCount(row.getValidCount() + 1);
+                }
+                row.getCells().add(cell);
+            }
+            // 处理行粘连
+            List<OcrModel.OcrTableRow> newRowList = Lists.newArrayList();
+            for (OcrModel.OcrTableRow row : rowList) {
+                if (row.getCells().size() != validCellCount) {
+                    // 处理列粘边
+                    System.out.println("有列粘连");
+                    return;
+                }
+                OcrModel.RowSplitter.addRowForSplit(row, newRowList);
+            }
+            for (OcrModel.OcrTableRow row : newRowList) {
+                OcrModel.MajorRow finalRow = new OcrModel.MajorRow();
+                if (row.getValidCount() == 2) {
+                    for (OcrModel.OcrTableCell tc : row.getCells()) {
+                        if (StringUtils.isNotBlank(tc.getWords())) {
+                            if (null == finalRow.getUniversityName()) {
+                                finalRow.setUniversityName(tc.getWords().replaceAll("\n|\r", ""));
+                            } else {
+                                finalRow.setUniversityCount(tc.getWords().replaceAll("\n|\r", ""));
+                            }
+                        }
+                    }
+                    lastRow = finalRow;
+                    continue;
+                } else if (row.getCells().size() != validCellCount) {
+                    System.out.println("错误行: ");
+                    return;
+                }
+                String name = row.getCells().get(1).getWords();
+                String plan = row.getCells().get(3).getWords().replaceAll("\n|#", "");
+                if (!NumberUtils.isDigits(plan) && plan.contains("数")) {
+                    if (null == headRow) {
+                        headRow = new OcrModel.MajorRow();
+                        headRow.setCollegeCode("院校代码");
+                        headRow.setUniversityName("院校名称");
+                        headRow.setUniversityCount("院校人数");
+                        headRow.setGroup("专业组代码");
+                        headRow.setGroupName("院校专业组");
+                        headRow.setGroupCount("专业组人数");
+                        headRow.setCourse(row.getCells().get(2).getWords().replaceAll("\n|\r", ""));
+                        headRow.setCode("专业代号");
+                        headRow.setName("专业名称及备注");
+                        headRow.setPlanCount("专业人数");
+                        headRow.setXuezhi("学制");
+                        headRow.setXuefei(row.getCells().get(4).getWords().replaceAll("\n|\r", ""));
+                        headRow.setRemark("备注");
+                    } else {
+                        System.out.println("标题重复错新一页");
+                    }
+                    continue;
+                } else if(RegExUtils.dotAllMatcher(".*第\\d+组", name).find()) {
+                    lastRow.setGroup(row.getCells().get(0).getWords().replaceAll("\n|\r", ""));
+                    lastRow.setGroupName(row.getCells().get(1).getWords().replaceAll("\n|\r", ""));
+                    lastRow.setCourse(row.getCells().get(2).getWords().replaceAll("\n|\r", ""));
+                    lastRow.setGroupCount(plan);
+                    BeanUtils.copyProperties(lastRow, finalRow);
+                } else {
+                    BeanUtils.copyProperties(lastRow, finalRow);
+                    finalRow.setCode(row.getCells().get(0).getWords().replaceAll("\n|\r", ""));
+                    finalRow.setName(row.getCells().get(1).getWords().replaceAll("\n|\r", ""));
+                    finalRow.setPlanCount(plan);
+                    finalRow.setXuefei(row.getCells().get(4).getWords());
+                }
+                finalRowList.add(finalRow);
+            }
+        }
+        if (null != headRow) {
+            finalRowList.add(0, headRow);
+        }
+        System.out.println("end: " + finalRowList.size());
+        CSVWriter writer = new CSVWriter(new FileWriter("D:\\data\\ocr\\2024\\test1.csv"));
+        for (OcrModel.MajorRow row : finalRowList) {
+            writer.writeNext(new String[]{row.collegeCode, row.universityName, row.universityCount,
+                    row.group, row.groupName, row.groupCount, row.course,
+                    row.code, row.name, row.planCount, row.xuezhi, row.xuefei, row.remark});
+        }
+        writer.flush();
+    }
+
+
+    @Test
+    public void testExtractExcel2() throws IOException, JSONException {
+        /*String data = FileUtils.readFileToString(new File("D:\\data\\ocr\\2024\\autoGrab\\北京大学\\2022-理科-本一-北京大学(1003)((校址北京市)).json"), "utf-8");
+        ObjectMapper om = new ObjectMapper();
+        om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+        OcrTableResult tableResult = om.readValue(data, OcrTableResult.class);
+        if (null == tableResult.tableNum || tableResult.tableNum <= 0) {
+            return;
+        }
+        Collections.sort(tableResult.tables, new Comparator<OcrTable>() {
+            @Override
+            public int compare(OcrTable o1, OcrTable o2) {
+                int iRet;
+                if (0 != (iRet = o1.getLocations().get(0).x.compareTo(o2.getLocations().get(0).x))) {
+                    return iRet;
+                }
+                if (0 != (iRet = o1.getLocations().get(0).y.compareTo(o2.getLocations().get(0).y))) {
+                    return iRet;
+                }
+                return 0;
+            }
+        });
+        List<FinalRow> finalRowList = Lists.newArrayList();
+        FinalRow headRow = null;
+        FinalRow lastRow = new FinalRow();
+        for (OcrTable tbl : tableResult.tables) {
+            System.out.println("process table");
+            Collections.sort(tbl.getCells(), new Comparator<OcrCell>() {
+                @Override
+                public int compare(OcrCell o1, OcrCell o2) {
+                    int iRet;
+                    if (0 != (iRet = o1.getRowStart().compareTo(o2.getRowStart()))) {
+                        return iRet;
+                    }
+                    if (0 != (iRet = o1.getColStart().compareTo(o2.getColStart()))) {
+                        return iRet;
+                    }
+                    return 0;
+                }
+            });
+            Map<Integer, OcrRow> rowMap = new HashMap<>();
+            List<OcrRow> rowList = new ArrayList<>();
+            for (OcrCell cell : tbl.getCells()) {
+                if (cell.rowEnd - cell.rowStart > 1) {
+                    System.out.println("多行错误");
+                }
+                OcrRow row = rowMap.get(cell.rowStart);
+                if (null == row) {
+                    if (0 != cell.colStart) {
+                        System.out.println("起行错误");
+                    }
+                    row = new OcrRow();
+                    rowList.add(row);
+                    rowMap.put(cell.rowStart, row);
+                }
+                if (StringUtils.isNotBlank(cell.getWords())) {
+                    row.validCount++;
+                }
+                row.getCells().add(cell);
+            }
+            for (OcrRow row : rowList) {
+                FinalRow finalRow = new FinalRow();
+                if (row.validCount == 1) {
+                    for (OcrCell tc : row.cells) {
+                        if (StringUtils.isNotBlank(tc.getWords())) {
+                            finalRow.setUniversityName(tc.getWords().replaceAll("\n|\r", ""));
+                            break;
+                        }
+                    }
+                    lastRow = finalRow;
+                    continue;
+                } else if (row.cells.size() != 6) {
+                    System.out.println("错误行: ");
+                } else if (row.cells.get(3).getWords().contains("计") || row.cells.get(3).getWords().contains("划")) {
+                    if(null == headRow) {
+                        headRow = new FinalRow();
+                        headRow.setUniversityName("院校名称");
+                        headRow.setUniversityCount("院校计划数");
+                        headRow.setCode(row.cells.get(0).getWords().replaceAll("\n|\r", ""));
+                        headRow.setName(row.cells.get(1).getWords().replaceAll("\n|\r", ""));
+                        headRow.setRemark(row.cells.get(2).getWords().replaceAll("\n|\r", ""));
+                        headRow.setPlanCount(row.cells.get(3).getWords().replaceAll("\n|\r", ""));
+                        headRow.setXuezhi(row.cells.get(4).getWords().replaceAll("\n|\r", ""));
+                        headRow.setXuefei(row.cells.get(5).getWords().replaceAll("\n|\r", ""));
+                    }
+                } else {
+                    finalRow.setUniversityName(lastRow.getUniversityName());
+                    finalRow.setUniversityCount(lastRow.universityCount);
+                    finalRow.setCode(row.cells.get(0).getWords());
+                    finalRow.setName(row.cells.get(1).getWords().replaceAll("\n|\r", ""));
+                    finalRow.setRemark(row.cells.get(2).getWords().replaceAll("\n|\r", ""));
+                    finalRow.setPlanCount(row.cells.get(3).getWords());
+                    finalRow.setXuezhi(row.cells.get(4).getWords());
+                    finalRow.setXuefei(row.cells.get(5).getWords());
+                    finalRowList.add(finalRow);
+                }
+            }
+        }
+        if (null != headRow) {
+            finalRowList.add(0, headRow);
+        }
+        System.out.println("end: " + finalRowList.size());
+        CSVWriter writer = new CSVWriter(new FileWriter("D:\\data\\ocr\\testYZY.csv"));
+        for (FinalRow row : finalRowList) {
+            writer.writeNext(new String[]{row.universityName, row.universityCount, row.code, row.name, row.remark, row.planCount, row.xuezhi, row.xuefei});
+        }
+        writer.flush();*/
+    }
+
+}

+ 49 - 0
src/test/java/com/zs/test/OcrExcelExporter.java

@@ -0,0 +1,49 @@
+package com.zs.test;
+
+import com.fasterxml.jackson.databind.DeserializationFeature;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.poi.ss.usermodel.*;
+import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+public class OcrExcelExporter {
+    public static void exportOriData(String data, String outFilename) throws IOException {
+        ObjectMapper om = new ObjectMapper();
+        om.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
+        OcrModel.OcrTableResult tableResult = om.readValue(data, OcrModel.OcrTableResult.class);
+        if (null == tableResult.tableNum || tableResult.tableNum <= 0) {
+            return;
+        }
+        Workbook wb = new XSSFWorkbook();
+        int i = 1;
+        CellStyle style = wb.createCellStyle();
+        style.setWrapText(true);
+        for (OcrModel.OcrTableCells tbl : tableResult.tables) {
+            Sheet sheet = wb.createSheet("Sheet" + i++);
+            for (OcrModel.OcrTableCell oc : tbl.getCells()) {
+                Row row = sheet.getRow(oc.getRowStart());
+                if (null == row) {
+                    row = sheet.createRow(oc.getRowStart());
+                }
+                Cell cell = row.createCell(oc.getColStart());
+                cell.setCellStyle(style);
+                cell.setCellValue(oc.getWords());
+                int rowRange = oc.getRowEnd() - oc.getRowStart();
+                int colRange = oc.getColEnd() - oc.getColStart();
+
+                if (rowRange > 1 && rowRange < 4 && colRange > 1 && rowRange < 4) {
+                    CellRangeAddress ra = new CellRangeAddress(oc.getRowStart(), oc.getRowEnd(), oc.getColStart(), oc.getColEnd());
+                    sheet.addMergedRegion(ra);
+                }
+            }
+        }
+        try (FileOutputStream os = new FileOutputStream(outFilename)) {
+            wb.write(os);
+        } finally {
+            wb.close();
+        }
+    }
+}

+ 195 - 0
src/test/java/com/zs/test/OcrModel.java

@@ -0,0 +1,195 @@
+package com.zs.test;
+
+import com.fasterxml.jackson.annotation.JsonProperty;
+import lombok.Data;
+import org.apache.commons.lang3.RegExUtils;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.commons.lang3.math.NumberUtils;
+import org.assertj.core.util.Lists;
+import org.springframework.beans.BeanUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public class OcrModel {
+
+    @Data
+    public static class FinalRow {
+        String universityName;
+        String universityCount;
+        String code;
+        String name;
+        String xuezhi;
+        String planCount;
+        String xuefei;
+        String remark;
+    }
+
+    @Data
+    public static class MajorRow {
+        String collegeCode; // 院校代码
+        String universityName; // 院校名称
+        String universityCount; // 院校计划数
+        String group; // 专业组编号
+        String groupName; // 专业组名
+        String groupCount; // 专业组计划数
+        String course; // 专业组再选科目
+        String code; // 专业代码
+        String name; // 专业名称
+        String planCount; // 专业计划数
+        String xuezhi;
+        String xuefei;
+        String remark;
+    }
+
+    @Data
+    public static class OcrTableResult {
+        @JsonProperty("table_num")
+        Integer tableNum;
+        @JsonProperty("log_id")
+        Long logId;
+        @JsonProperty("tables_result")
+        List<OcrTableCells> tables;
+    }
+
+    @Data
+    public static class OcrTableCells {
+        @JsonProperty("body")
+        private List<OcrTableCell> cells;
+        @JsonProperty("table_location")
+        private List<OcrPoint> locations;
+    }
+
+    @Data
+    public static class OcrTableRow {
+        private Integer validCount = 0;
+        private List<OcrTableCell> cells = new ArrayList<>();
+    }
+
+    @Data
+    public static class OcrTableCell {
+        @JsonProperty("row_start")
+        private Integer rowStart;
+        @JsonProperty("row_end")
+        private Integer rowEnd;
+        @JsonProperty("col_start")
+        private Integer colStart;
+        @JsonProperty("col_end")
+        private Integer colEnd;
+        private String words;
+        @JsonProperty("cell_location")
+        private List<OcrPoint> locations;
+    }
+
+    @Data
+    public static class OcrPoint {
+        private Integer x;
+        private Integer y;
+    }
+
+
+    public static class RowSplitter {
+        public static List<OcrTableRow> addRowForSplit(OcrTableRow row, List<OcrTableRow> rowList) {
+            List<OcrTableCell> cells = row.getCells();
+            String[] w4 = cells.get(3).getWords().replaceAll("#", "").split("\n"); // 选择肯定有值的列区分
+            if (w4.length == 1) {
+                rowList.add(row);
+                return rowList;
+            }
+            String w2 = cells.get(2).getWords().replaceAll("\n", "");
+            if (w2.length() > 3 && !w2.endsWith("要求")) {
+                System.out.println("二组粘连处理不了");
+                return null;
+            }
+
+            List<String> wl1 = splitNums(cells.get(0).getWords().split("\n"));
+            List<String> wl2 = splitMajors(cells.get(1).getWords().split("\n"));
+            List<String> wl4 = splitNums(w4);
+            List<String> wl5 = splitNums(cells.get(4).getWords().split("\n"));
+            // 处理院校与专业组粘连
+            if (wl1.size() == 1 && wl1.size() != wl2.size()) {
+                for (int i = 0; i < wl2.size(); i++) {
+                    if (!wl2.get(i).endsWith("组")) { // 当代码只有一个名称有两个时,肯定有一个是学校, 这时学校的这个位置要放空
+                        wl1.add(i, "");
+                        if (wl1.size() != wl5.size()) {
+                            wl5.add(i, "");
+                        }
+                    }
+                }
+            }
+            // 当学费只有一个代码有多个时,只有组没有学费,需要加入空白
+            if (wl5.size() == 1 && wl1.size() != wl5.size()) {
+                for (int i = 0; i < wl2.size(); i++) {
+                    if (wl2.get(i).endsWith("组")) {
+                        wl5.add(i, "");
+                    }
+                }
+            }
+            if (wl1.size() != wl2.size() || wl1.size() != wl4.size() || wl1.size() != wl5.size()) {
+                System.out.println("个数不对处理不了");
+                return null;
+            }
+            for (int i = 0; i < wl1.size(); i++) {
+                String tw2 = StringUtils.isNotBlank(w2) && RegExUtils.dotAllMatcher(".*第\\d+组", wl2.get(i)).find() ? w2 : ""; // 两个组不能在一起
+                rowList.add(createRow(row, createCell(cells.get(0), wl1.get(i)), createCell(cells.get(1), wl2.get(i)), createCell(cells.get(2), tw2),
+                        createCell(cells.get(3), wl4.get(i)), createCell(cells.get(4), wl5.get(i))));
+            }
+            return rowList;
+        }
+
+        public static OcrTableRow createRow(OcrTableRow or, OcrTableCell... cells) {
+            OcrTableRow nr = new OcrTableRow();
+            BeanUtils.copyProperties(or, nr);
+            nr.validCount = Arrays.stream(cells).mapToInt(t -> StringUtils.isNotBlank(t.words) ? 1 : 0).sum();
+            nr.setCells(Arrays.asList(cells));
+            return nr;
+        }
+        public static OcrTableCell createCell(OcrTableCell oc, String word) {
+            OcrTableCell nc = new OcrTableCell();
+            BeanUtils.copyProperties(oc, nc);
+            nc.setWords(word);
+            return nc;
+        }
+
+        public static List<String> splitNums(String[] word) {
+            List<String> wordList = Lists.newArrayList();
+            List<String> tmpList = Lists.newArrayList();
+            for (String w : word) {
+                String tw = StringUtils.trimToEmpty(w);
+                if (NumberUtils.isDigits(tw)) {
+                    if (tmpList.size() > 0) {
+                        wordList.add(StringUtils.join(tmpList, "\n"));
+                        tmpList.clear();
+                    }
+                    wordList.add(tw);
+                } else {
+                    tmpList.add(tw);
+                }
+            }
+            if (tmpList.size() > 0) {
+                wordList.add(StringUtils.join(tmpList, "\n"));
+            }
+            return wordList;
+        }
+
+        public static List<String> splitMajors(String[] word) {
+            List<String> wordList = Lists.newArrayList();
+            List<String> tmpList = Lists.newArrayList();
+            for (String w : word) {
+                String tw = StringUtils.trimToEmpty(w);
+                if (tw.contains("专业组") && tw.contains("名称及备注") || tw.endsWith(")") || tw.endsWith(")") || RegExUtils.dotAllMatcher(".*第\\d+组", tw).find()) {// 专业可能的粘连
+                    tmpList.add(tw);
+                    wordList.add(StringUtils.join(tmpList, "\n"));
+                    tmpList.clear();
+                } else {
+                    tmpList.add(tw);
+                }
+            }
+            if (tmpList.size() > 0) {
+                wordList.add(StringUtils.join(tmpList, "\n"));
+            }
+            return wordList;
+        }
+    }
+}

Разница между файлами не показана из-за своего большого размера
+ 37 - 0
src/test/java/com/zs/test/TestApplicationTests.java


Некоторые файлы не были показаны из-за большого количества измененных файлов