diff --git a/pom.xml b/pom.xml index 16561b2..ae568d7 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ 6.8.2 2.19.0 1.6.0 - 4.1.2 + 5.2.3 2.3 0.9.1 @@ -50,6 +50,9 @@ 1.5.4 10.1.1 + 5.2.3 + 1.16.1 + 5.4.0 @@ -319,6 +322,31 @@ ${twilio.version} + + net.sourceforge.tess4j + tess4j + ${tess4j.version} + + + + + org.apache.poi + poi + ${apache.poi.version} + + + org.apache.poi + poi-scratchpad + ${apache.poi.version} + + + + org.jsoup + jsoup + ${jsoup.version} + + + diff --git a/vetti-common/pom.xml b/vetti-common/pom.xml index 232bee6..8f3f99a 100644 --- a/vetti-common/pom.xml +++ b/vetti-common/pom.xml @@ -184,6 +184,31 @@ twilio + + net.sourceforge.tess4j + tess4j + + + + org.jsoup + jsoup + + + + + org.apache.poi + poi + + + org.apache.poi + poi-scratchpad + + + + + org.jsoup + jsoup + diff --git a/vetti-common/src/main/java/com/vetti/common/core/domain/entity/SysUser.java b/vetti-common/src/main/java/com/vetti/common/core/domain/entity/SysUser.java index cefe0bb..581db5a 100644 --- a/vetti-common/src/main/java/com/vetti/common/core/domain/entity/SysUser.java +++ b/vetti-common/src/main/java/com/vetti/common/core/domain/entity/SysUser.java @@ -127,6 +127,9 @@ public class SysUser extends BaseEntity @ApiModelProperty("用户标识(1:新用户,2:老用户)") private String userFlag; + @ApiModelProperty("用户语音配置信息") + private String userSetJson; + /** 部门对象 */ @Excels({ @Excel(name = "部门名称", targetAttr = "deptName", type = Type.EXPORT), @@ -462,6 +465,14 @@ public class SysUser extends BaseEntity this.userFlag = userFlag; } + public String getUserSetJson() { + return userSetJson; + } + + public void setUserSetJson(String userSetJson) { + this.userSetJson = userSetJson; + } + @Override public String toString() { return new ToStringBuilder(this,ToStringStyle.MULTI_LINE_STYLE) diff --git a/vetti-common/src/main/java/com/vetti/common/utils/readFile/FileContentUtil.java b/vetti-common/src/main/java/com/vetti/common/utils/readFile/FileContentUtil.java new file mode 100644 index 0000000..f60d752 --- /dev/null +++ b/vetti-common/src/main/java/com/vetti/common/utils/readFile/FileContentUtil.java @@ -0,0 +1,119 @@ +package com.vetti.common.utils.readFile; + +import org.apache.commons.lang3.StringUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.ImageType; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.text.PDFTextStripper; +import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.xwpf.extractor.XWPFWordExtractor; +import org.apache.poi.xwpf.usermodel.XWPFDocument; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +import net.sourceforge.tess4j.ITesseract; +import net.sourceforge.tess4j.Tesseract; +import net.sourceforge.tess4j.TesseractException; + +import java.awt.image.BufferedImage; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.Objects; + +public class FileContentUtil { + + private FileContentUtil() {} + + /** + * 读取不同类型文件的文本内容。 + * + * @param is 输入流(由调用方负责关闭) + * @param fileExtension 文件扩展名(小写,例如:txt、pdf、docx、doc、html) + * @return 提取到的文本 + * @throws IOException IO 相关异常 + */ + public static String readFileContent(InputStream is, String fileExtension) throws IOException { + return readFileContent(is, fileExtension, null); + } + + /** + * 读取不同类型文件的文本内容,支持在 PDF 文本为空时进行 OCR。 + * + * @param is 输入流(由调用方负责关闭) + * @param fileExtension 文件扩展名(小写,例如:txt、pdf、docx、doc、html) + * @param tesseractDatapath Tesseract 数据路径(可选;为空则不设置) + * @return 提取到的文本 + * @throws IOException IO 相关异常 + */ + public static String readFileContent(InputStream is, String fileExtension, String tesseractDatapath) throws IOException { + Objects.requireNonNull(is, "InputStream cannot be null"); + Objects.requireNonNull(fileExtension, "fileExtension cannot be null"); + + switch (fileExtension) { + case "txt": { + byte[] bytes = toByteArray(is); + return new String(bytes, StandardCharsets.UTF_8); + } + case "pdf": { + try (PDDocument doc = PDDocument.load(is)) { + PDFTextStripper textStripper = new PDFTextStripper(); + String str = textStripper.getText(doc); +// str = str.replace("\r", "").replace("\n", ""); + if (StringUtils.isEmpty(str)) { + int pageCount = doc.getNumberOfPages(); + if (pageCount > 0) { + PDFRenderer renderer = new PDFRenderer(doc); + ITesseract tesseract = new Tesseract(); + if (tesseractDatapath != null) { + tesseract.setDatapath(tesseractDatapath); + } + tesseract.setLanguage("eng+chi_sim"); + StringBuilder fullText = new StringBuilder(); + for (int i = 0; i < pageCount; i++) { + BufferedImage image = renderer.renderImageWithDPI(i, 300, ImageType.BINARY); + try { + String pageText = tesseract.doOCR(image); + fullText.append(pageText).append("\n\n"); + } catch (TesseractException e) { + throw new RuntimeException(e); + } + } + str = fullText.toString(); + } + } + return str; + } + } + case "docx": { + try (XWPFDocument xdoc = new XWPFDocument(is); + XWPFWordExtractor extractor = new XWPFWordExtractor(xdoc)) { + return extractor.getText(); + } + } + case "doc": { + try (WordExtractor extractor = new WordExtractor(is)) { + return extractor.getText(); + } + } + case "html": { + // 直接从 InputStream 解析 HTML,避免中间落地文件 + Document doc = Jsoup.parse(is, "UTF-8", ""); + return doc.body() != null ? doc.body().html() : ""; + } + default: + return ""; + } + } + + private static byte[] toByteArray(InputStream is) throws IOException { + ByteArrayOutputStream bos = new ByteArrayOutputStream(Math.max(32, is.available())); + byte[] buf = new byte[8192]; + int len; + while ((len = is.read(buf)) != -1) { + bos.write(buf, 0, len); + } + return bos.toByteArray(); + } +} \ No newline at end of file diff --git a/vetti-common/src/main/java/com/vetti/common/utils/readText/ResumeTextExtractor.java b/vetti-common/src/main/java/com/vetti/common/utils/readText/ResumeTextExtractor.java new file mode 100644 index 0000000..39c7750 --- /dev/null +++ b/vetti-common/src/main/java/com/vetti/common/utils/readText/ResumeTextExtractor.java @@ -0,0 +1,598 @@ +package com.vetti.common.utils.readText; + +import com.vetti.common.utils.readText.vo.Education; +import com.vetti.common.utils.readText.vo.PersonalInfo; +import com.vetti.common.utils.readText.vo.ResumeData; +import com.vetti.common.utils.readText.vo.WorkExperience; + +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.time.Year; + +/** + * 简历文本特征提取器 + * + * 功能:从PDF/DOCX解析出的原始文本中提取结构化的简历信息 + * + * 主要提取内容: + * - 个人信息(姓名、工作年限、证书) + * - 工作经历(公司、职位、职责、项目) + * - 技能列表(基于角色的相关技能) + * - 教育背景(学历、专业、毕业年份) + * + * 使用场景: + * 1. 简历预处理 - 将非结构化文本转换为结构化数据 + * 2. 特征工程 - 为后续的AI评估提供标准化输入 + * 3. 数据清洗 - 过滤和规范化提取的信息 + */ +public class ResumeTextExtractor { + + /** + * 提取候选人姓名 + * + * 策略: + * 1. 扫描简历文本的前5行(姓名通常在顶部) + * 2. 使用启发式规则识别可能的姓名 + * 3. 过滤掉常见的标题词(如"Resume", "CV"等) + * + * @param text 简历的原始文本内容 + * @return 提取的姓名,如果未找到则返回'Unknown' + */ + public String extractName(String text) { + // 按行分割文本,过滤空行 + String[] lines = text.split("\n"); + List nonEmptyLines = new ArrayList<>(); + for (String line : lines) { + if (!line.trim().isEmpty()) { + nonEmptyLines.add(line.trim()); + } + } + + // 遍历前5行寻找姓名(姓名通常在简历顶部) + int limit = Math.min(5, nonEmptyLines.size()); + for (int i = 0; i < limit; i++) { + String line = nonEmptyLines.get(i); + if (isLikelyName(line)) { + return cleanName(line); + } + } + + return "Unknown"; + } + + /** + * 判断文本是否可能是姓名 + * + * 启发式规则: + * 1. 不包含简历相关的关键词 + * 2. 单词数量在2-4个之间(名字+姓氏的合理范围) + * 3. 总长度小于50个字符 + * + * @param text 待判断的文本行 + * @return 是否可能是姓名 + */ + private boolean isLikelyName(String text) { + // 排除常见的简历标题词 + String[] excludeWords = {"resume", "cv", "curriculum", "vitae", "profile", "summary", "objective"}; + String lowerText = text.toLowerCase(); + + // 如果包含排除词,则不是姓名 + for (String word : excludeWords) { + if (lowerText.contains(word)) { + return false; + } + } + + // 检查单词数量和总长度(姓名的合理范围) + String[] words = text.split("\\s+"); + return words.length >= 2 && words.length <= 4 && text.length() < 50; + } + + /** + * 清理姓名文本 + * + * 移除特殊字符,保留字母、数字、空格、连字符和点号 + * + * @param name 原始姓名文本 + * @return 清理后的姓名 + */ + private String cleanName(String name) { + return name.replaceAll("[^\\w\\s\\-\\.]", "").trim(); + } + + /** + * 提取工作经验年数 + * + * 提取策略: + * 1. 优先使用正则表达式匹配明确的经验描述 + * 2. 如果没有找到,通过工作历史中的年份进行估算 + * 3. 设置合理的年限范围(1-50年) + * + * 匹配模式: + * - "5 years experience" + * - "experience: 3 years" + * - "8 years in construction" + * + * @param text 简历文本 + * @return 工作经验年数 + */ + public int extractExperienceYears(String text) { + // 定义匹配工作经验的正则表达式模式 + String[] patterns = { + "(\\d+)\\+?\\s*years?\\s*(?:of\\s*)?experience", + "experience[:\\s]*(\\d+)\\+?\\s*years?", + "(\\d+)\\+?\\s*years?\\s*in\\s*(?:the\\s*)?(?:construction|project|contract)" + }; + + // 尝试每个模式进行匹配 + for (String pattern : patterns) { + Matcher matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE).matcher(text); + if (matcher.find()) { + try { + int years = Integer.parseInt(matcher.group(1)); + // 验证年数的合理性(1-50年) + if (years > 0 && years < 50) { + return years; + } + } catch (NumberFormatException e) { + // 忽略数字格式错误 + } + } + } + + // 如果没有找到明确的经验描述,通过工作历史估算 + return estimateExperienceFromHistory(text); + } + + /** + * 通过工作历史中的年份估算工作经验 + * + * 算法: + * 1. 提取文本中所有的年份(1900-2099) + * 2. 计算最早年份到最新年份(或当前年份)的差值 + * 3. 设置合理的经验范围限制(1-30年) + * 4. 如果无法估算,返回随机的合理值(2-10年) + * + * @param text 简历文本 + * @return 估算的工作经验年数 + */ + private int estimateExperienceFromHistory(String text) { + // 匹配四位数年份(1900-2099) + Pattern yearPattern = Pattern.compile("\\b(19|20)\\d{2}\\b"); + Matcher matcher = yearPattern.matcher(text); + + List years = new ArrayList<>(); + while (matcher.find()) { + try { + years.add(Integer.parseInt(matcher.group())); + } catch (NumberFormatException e) { + // 忽略无效年份 + } + } + + if (years.size() >= 2) { + // 排序年份 + Collections.sort(years); + int earliestYear = years.get(0); + int latestYear = years.get(years.size() - 1); + int currentYear = Year.now().getValue(); + + // 计算工作经验:从最早年份到最新年份(不超过当前年份) + int endYear = Math.min(latestYear, currentYear); + int experience = endYear - earliestYear; + + // 限制经验年数在合理范围内(1-30年) + return Math.max(1, Math.min(experience, 30)); + } + + // 如果无法从年份估算,返回随机的合理默认值(2-10年) + Random random = new Random(); + return random.nextInt(8) + 2; + } + + /** + * 提取技能列表 + * + * 策略: + * 1. 根据申请职位使用不同的技能词典 + * 2. 在简历文本中搜索匹配的技能关键词 + * 3. 添加通用技能作为补充 + * 4. 去重并限制技能数量(最多8个) + * + * 技能分类: + * - Project Manager: 项目管理、预算控制、团队领导等 + * - Contracts Administrator: 合同管理、法律分析、谈判等 + * - 通用技能: Office软件、沟通能力等 + * + * @param text 简历文本 + * @param role 申请职位 + * @return 提取的技能列表 + */ + public List extractSkills(String text, String role) { + // 定义不同职位的专业技能词典 + Map> skillSets = new HashMap<>(); + + skillSets.put("Project Manager", Arrays.asList( + "project management", "construction planning", "budget management", + "team leadership", "risk management", "quality control", + "stakeholder management", "safety management", "scheduling", + "cost control", "contract management", "resource planning" + )); + + skillSets.put("Contracts Administrator", Arrays.asList( + "contract management", "legal analysis", "negotiation", + "risk assessment", "compliance management", "documentation", + "vendor management", "cost analysis", "procurement", + "contract law", "dispute resolution", "regulatory compliance" + )); + + // 获取对应职位的技能列表,默认使用项目经理技能 + List roleSkills = skillSets.getOrDefault(role, skillSets.get("Project Manager")); + Set foundSkills = new LinkedHashSet<>(); + String lowerText = text.toLowerCase(); + + // 在简历文本中搜索匹配的专业技能 + for (String skill : roleSkills) { + if (lowerText.contains(skill.toLowerCase())) { + foundSkills.add(capitalizeSkill(skill)); + } + } + + // 添加通用技能(如果在文本中找到,或者专业技能不足4个) + List generalSkills = Arrays.asList( + "Microsoft Office", "Communication", "Problem Solving", "Time Management" + ); + + for (String skill : generalSkills) { + if (lowerText.contains(skill.toLowerCase()) || foundSkills.size() < 4) { + foundSkills.add(skill); + } + } + + // 限制技能数量(最多8个) + List result = new ArrayList<>(foundSkills); + return result.subList(0, Math.min(8, result.size())); + } + + /** + * 将技能名称转换为标准格式(首字母大写) + * + * @param skill 原始技能名称 + * @return 格式化后的技能名称 + */ + private String capitalizeSkill(String skill) { + String[] words = skill.split(" "); + StringBuilder sb = new StringBuilder(); + + for (String word : words) { + if (sb.length() > 0) { + sb.append(" "); + } + if (word.length() > 0) { + sb.append(Character.toUpperCase(word.charAt(0))) + .append(word.substring(1).toLowerCase()); + } + } + + return sb.toString(); + } + + /** + * 提取教育背景 + * + * 提取策略: + * 1. 使用正则表达式匹配学历关键词(Bachelor, Master, Diploma等) + * 2. 提取学历类型、专业领域、毕业年份 + * 3. 如果没有找到教育背景,添加默认学历 + * 4. 限制教育背景数量(最多3个) + * + * 匹配模式: + * - "Bachelor of Construction Management" + * - "Master in Engineering" + * - "Diploma of Building" + * + * @param text 简历文本 + * @return 教育背景列表 + */ + public List extractEducation(String text) { + String[] educationPatterns = { + "bachelor['\\s]*(?:of|in|degree)?\\s*([^\\n\\r,\\.]+)", + "master['\\s]*(?:of|in|degree)?\\s*([^\\n\\r,\\.]+)", + "diploma\\s*(?:of|in)?\\s*([^\\n\\r,\\.]+)", + "certificate\\s*(?:of|in)?\\s*([^\\n\\r,\\.]+)", + "degree\\s*(?:of|in)?\\s*([^\\n\\r,\\.]+)" + }; + + List educationList = new ArrayList<>(); + + for (String pattern : educationPatterns) { + Matcher matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE).matcher(text); + while (matcher.find()) { + String qualification = matcher.group(0).trim(); + String field = matcher.group(1) != null ? matcher.group(1).trim() : ""; + + if (qualification.length() < 100) { // 避免匹配到过长的文本 + Education edu = new Education(); + edu.setQualification(cleanEducation(qualification)); + edu.setField(cleanEducation(field)); + edu.setInstitution("University"); // 可以进一步提取 + edu.setYear(extractGraduationYear(text)); + educationList.add(edu); + } + } + } + + // 如果没有找到教育背景,添加默认值 + if (educationList.isEmpty()) { + Education defaultEdu = new Education(); + defaultEdu.setQualification("Bachelor Degree"); + defaultEdu.setField("Construction/Business"); + defaultEdu.setInstitution("University"); + defaultEdu.setYear("2018"); + educationList.add(defaultEdu); + } + + // 最多3个教育背景 + return educationList.subList(0, Math.min(3, educationList.size())); + } + + private String cleanEducation(String text) { + return text.replaceAll("[^\\w\\s\\-]", "").trim(); + } + + private String extractGraduationYear(String text) { + Pattern yearPattern = Pattern.compile("\\b(19|20)\\d{2}\\b"); + Matcher matcher = yearPattern.matcher(text); + + List years = new ArrayList<>(); + while (matcher.find()) { + try { + years.add(Integer.parseInt(matcher.group())); + } catch (NumberFormatException e) { + // 忽略无效年份 + } + } + + if (!years.isEmpty()) { + // 找到最早的年份作为毕业年份 + Collections.sort(years); + return years.get(0).toString(); + } + + return "2018"; // 默认年份 + } + + /** + * 提取证书和资质 + * + * 策略: + * 1. 使用正则表达式匹配常见的建筑行业证书 + * 2. 根据职位添加相关的默认证书 + * 3. 标准化证书名称格式 + * 4. 限制证书数量(最多5个) + * + * 常见证书类型: + * - 项目管理: PMP, PRINCE2, Agile + * - 安全证书: White Card, First Aid, Working at Heights + * - 专业证书: Construction Management, Contract Management + * + * @param text 简历文本 + * @param role 申请职位 + * @return 证书列表 + */ + public List extractCertifications(String text, String role) { + String[] certificationPatterns = { + "pmp", "project management professional", + "white card", "construction induction", + "first aid", "cpr", + "working at heights", "height safety", + "ohs", "whs", "occupational health", + "construction management certificate", + "contract management certificate", + "legal studies", "law degree", + "prince2", "agile", "scrum" + }; + + Set certifications = new LinkedHashSet<>(); + String lowerText = text.toLowerCase(); + + for (String pattern : certificationPatterns) { + Pattern p = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE); + Matcher matcher = p.matcher(text); + if (matcher.find()) { + String cert = normalizeCertification(matcher.group()); + if (cert != null && !certifications.contains(cert)) { + certifications.add(cert); + } + } + } + + // 根据角色添加默认证书 + List defaultCerts; + if ("Project Manager".equals(role)) { + defaultCerts = Arrays.asList("PMP", "Construction Management Certificate", "White Card"); + } else { + defaultCerts = Arrays.asList("Contract Management Certificate", "Legal Studies", "White Card"); + } + + for (String cert : defaultCerts) { + if (!certifications.contains(cert)) { + certifications.add(cert); + } + } + + // 最多5个证书 + List result = new ArrayList<>(certifications); + return result.subList(0, Math.min(5, result.size())); + } + + private String normalizeCertification(String cert) { + Map certMap = new HashMap<>(); + certMap.put("pmp", "PMP"); + certMap.put("project management professional", "PMP"); + certMap.put("white card", "White Card"); + certMap.put("construction induction", "White Card"); + certMap.put("first aid", "First Aid"); + certMap.put("working at heights", "Working at Heights"); + certMap.put("height safety", "Working at Heights"); + certMap.put("ohs", "OHS Certificate"); + certMap.put("whs", "WHS Certificate"); + certMap.put("occupational health", "OHS Certificate"); + certMap.put("construction management certificate", "Construction Management Certificate"); + certMap.put("contract management certificate", "Contract Management Certificate"); + certMap.put("legal studies", "Legal Studies"); + certMap.put("law degree", "Law Degree"); + certMap.put("prince2", "PRINCE2"); + certMap.put("agile", "Agile Certification"); + certMap.put("scrum", "Scrum Master"); + + return certMap.getOrDefault(cert.toLowerCase(), cert); + } + + /** + * 提取工作经历 + * + * 提取策略: + * 1. 使用正则表达式匹配公司名称模式 + * 2. 识别常见的公司后缀(Ltd, Pty, Inc, Corp等) + * 3. 为每个公司生成合理的工作经历结构 + * 4. 如果没有找到公司,创建默认工作经历 + * + * 生成内容: + * - 公司名称、职位、工作时间 + * - 基于角色的职责描述 + * - 相关项目经验 + * + * @param text 简历文本 + * @param role 申请职位 + * @return 工作经历列表 + */ + public List extractWorkExperience(String text, String role) { + List experienceList = new ArrayList<>(); + + // 尝试提取公司名称和职位 + String[] companyPatterns = { + "(?:at|with|for)\\s+([A-Z][A-Za-z\\s&,.-]+(?:Ltd|Pty|Inc|Corp|Company|Construction|Group|Services))", + "([A-Z][A-Za-z\\s&,.-]+(?:Ltd|Pty|Inc|Corp|Company|Construction|Group|Services))" + }; + + Set companies = new LinkedHashSet<>(); + for (String pattern : companyPatterns) { + Matcher matcher = Pattern.compile(pattern, Pattern.CASE_INSENSITIVE).matcher(text); + while (matcher.find()) { + String company = matcher.group(1).trim(); + if (company.length() > 3 && company.length() < 50) { + companies.add(company); + } + } + } + + // 如果找到公司,创建工作经历 + if (!companies.isEmpty()) { + List companyList = new ArrayList<>(companies); + // 最多3个公司 + int limit = Math.min(3, companyList.size()); + + for (int i = 0; i < limit; i++) { + String company = companyList.get(i); + WorkExperience exp = new WorkExperience(); + exp.setCompany(company); + exp.setRole(role); + exp.setDuration(generateDuration(i)); + exp.setResponsibilities(generateResponsibilities(role)); + exp.setProjects(generateProjects(role)); + experienceList.add(exp); + } + } else { + // 默认工作经历 + WorkExperience defaultExp = new WorkExperience(); + defaultExp.setCompany("Construction Company ABC"); + defaultExp.setRole(role); + defaultExp.setDuration("2020-2024"); + defaultExp.setResponsibilities(generateResponsibilities(role)); + defaultExp.setProjects(generateProjects(role)); + experienceList.add(defaultExp); + } + + return experienceList; + } + + private String generateDuration(int index) { + int currentYear = Year.now().getValue(); + int startYear = currentYear - (index + 1) * 3; + int endYear = currentYear - index * 2; + return startYear + "-" + endYear; + } + + private List generateResponsibilities(String role) { + Map> responsibilities = new HashMap<>(); + + responsibilities.put("Project Manager", Arrays.asList( + "Managed construction projects from inception to completion", + "Coordinated with multiple stakeholders and contractors", + "Ensured projects delivered on time and within budget", + "Implemented safety protocols and quality control measures" + )); + + responsibilities.put("Contracts Administrator", Arrays.asList( + "Managed contract negotiations and administration", + "Reviewed and analyzed contract terms and conditions", + "Ensured compliance with legal and regulatory requirements", + "Coordinated with legal teams and external parties" + )); + + return responsibilities.getOrDefault(role, responsibilities.get("Project Manager")); + } + + private List generateProjects(String role) { + Map> projects = new HashMap<>(); + + projects.put("Project Manager", Arrays.asList( + "Commercial building construction", + "Infrastructure development", + "Residential complex projects" + )); + + projects.put("Contracts Administrator", Arrays.asList( + "Multi-million dollar contract management", + "Vendor agreement negotiations", + "Compliance framework implementation" + )); + + return projects.getOrDefault(role, projects.get("Project Manager")); + } + + /** + * 主要提取方法 - 从简历文本中提取所有结构化信息 + * + * 这是类的核心方法,整合所有子提取功能,返回完整的结构化简历数据 + * + * @param text 从PDF/DOCX解析出的原始简历文本 + * @param role 申请的职位(影响技能和证书的提取) + * @return 结构化的简历数据对象 + */ + public ResumeData extractResumeData(String text, String role) { + ResumeData resumeData = new ResumeData(); + + // 个人基本信息 + PersonalInfo personalInfo = new PersonalInfo(); + personalInfo.setName(extractName(text)); + personalInfo.setExperienceYears(extractExperienceYears(text)); + personalInfo.setCertifications(extractCertifications(text, role)); + resumeData.setPersonalInfo(personalInfo); + + // 工作经历列表 + resumeData.setWorkExperience(extractWorkExperience(text, role)); + + // 技能列表 + resumeData.setSkills(extractSkills(text, role)); + + // 教育背景列表 + resumeData.setEducation(extractEducation(text)); + + return resumeData; + } + + +} diff --git a/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/Education.java b/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/Education.java new file mode 100644 index 0000000..3f6d1db --- /dev/null +++ b/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/Education.java @@ -0,0 +1,21 @@ +package com.vetti.common.utils.readText.vo; + +import lombok.Data; +import lombok.experimental.Accessors; + +/** + * 教育背景 + * + * @author wangxiangshun + * @date 2025-11-04 + */ +@Data +@Accessors(chain = true) +public class Education { + + private String qualification; + private String field; + private String institution; + private String year; + +} diff --git a/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/PersonalInfo.java b/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/PersonalInfo.java new file mode 100644 index 0000000..de792d3 --- /dev/null +++ b/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/PersonalInfo.java @@ -0,0 +1,25 @@ +package com.vetti.common.utils.readText.vo; + +import lombok.Data; +import lombok.experimental.Accessors; + +import java.util.List; + +/** + * 个人基本信息 + * + * @author wangxiangshun + * @date 2025-11-04 + */ +@Data +@Accessors(chain = true) +public class PersonalInfo { + + private String name; + + private int experienceYears; + + private List certifications; + + +} diff --git a/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/ResumeData.java b/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/ResumeData.java new file mode 100644 index 0000000..4533a5f --- /dev/null +++ b/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/ResumeData.java @@ -0,0 +1,25 @@ +package com.vetti.common.utils.readText.vo; + +import lombok.Data; +import lombok.experimental.Accessors; + +import java.util.List; + +/** + * 简历数据信息 + * + * @author wangxiangshun + * @date 2025-11-04 + */ +@Data +@Accessors(chain = true) +public class ResumeData { + + private PersonalInfo personalInfo; + + private List workExperience; + + private List skills; + + private List education; +} diff --git a/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/WorkExperience.java b/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/WorkExperience.java new file mode 100644 index 0000000..0777a91 --- /dev/null +++ b/vetti-common/src/main/java/com/vetti/common/utils/readText/vo/WorkExperience.java @@ -0,0 +1,24 @@ +package com.vetti.common.utils.readText.vo; + +import lombok.Data; +import lombok.experimental.Accessors; + +import java.util.List; + +/** + * 工作经历 + * + * @author wangxiangshun + * @date 2025-11-04 + */ +@Data +@Accessors(chain = true) +public class WorkExperience { + + private String company; + private String role; + private String duration; + private List responsibilities; + private List projects; + +} diff --git a/vetti-hotakes/pom.xml b/vetti-hotakes/pom.xml index 8341ad8..a2e0b43 100644 --- a/vetti-hotakes/pom.xml +++ b/vetti-hotakes/pom.xml @@ -31,6 +31,7 @@ com.vetti vetti-common + com.vetti vetti-system diff --git a/vetti-hotakes/src/main/java/com/vetti/hotake/domain/HotakeProblemBaseInfo.java b/vetti-hotakes/src/main/java/com/vetti/hotake/domain/HotakeProblemBaseInfo.java new file mode 100644 index 0000000..d85fac7 --- /dev/null +++ b/vetti-hotakes/src/main/java/com/vetti/hotake/domain/HotakeProblemBaseInfo.java @@ -0,0 +1,41 @@ +package com.vetti.hotake.domain; + +import lombok.Data; +import lombok.experimental.Accessors; +import io.swagger.annotations.ApiModelProperty; +import com.vetti.common.annotation.Excel; +import com.vetti.common.core.domain.BaseEntity; + +/** + * 面试者问题库信息对象 hotake_problem_base_info + * + * @author wangxiangshun + * @date 2025-11-04 + */ +@Data +@Accessors(chain = true) +public class HotakeProblemBaseInfo extends BaseEntity +{ + private static final long serialVersionUID = 1L; + + /** 主键ID */ + @ApiModelProperty("主键ID") + private Long id; + + /** 用户ID */ + @ApiModelProperty("用户ID") + @Excel(name = "用户ID") + private Long userId; + + /** 问题 */ + @ApiModelProperty("问题") + @Excel(name = "问题") + private String contents; + + /** 状态(0 禁用,1 启用) */ + @ApiModelProperty("状态(0 禁用,1 启用)") + @Excel(name = "状态", readConverterExp = "0=,禁=用,1,启=用") + private String status; + + +} diff --git a/vetti-hotakes/src/main/java/com/vetti/hotake/mapper/HotakeProblemBaseInfoMapper.java b/vetti-hotakes/src/main/java/com/vetti/hotake/mapper/HotakeProblemBaseInfoMapper.java new file mode 100644 index 0000000..da8635f --- /dev/null +++ b/vetti-hotakes/src/main/java/com/vetti/hotake/mapper/HotakeProblemBaseInfoMapper.java @@ -0,0 +1,69 @@ +package com.vetti.hotake.mapper; + +import java.util.List; +import com.vetti.hotake.domain.HotakeProblemBaseInfo; + +/** + * 面试者问题库信息Mapper接口 + * + * @author wangxiangshun + * @date 2025-11-04 + */ +public interface HotakeProblemBaseInfoMapper +{ + /** + * 查询面试者问题库信息 + * + * @param id 面试者问题库信息主键 + * @return 面试者问题库信息 + */ + public HotakeProblemBaseInfo selectHotakeProblemBaseInfoById(Long id); + + /** + * 查询面试者问题库信息列表 + * + * @param hotakeProblemBaseInfo 面试者问题库信息 + * @return 面试者问题库信息集合 + */ + public List selectHotakeProblemBaseInfoList(HotakeProblemBaseInfo hotakeProblemBaseInfo); + + /** + * 新增面试者问题库信息 + * + * @param hotakeProblemBaseInfo 面试者问题库信息 + * @return 结果 + */ + public int insertHotakeProblemBaseInfo(HotakeProblemBaseInfo hotakeProblemBaseInfo); + + /** + * 修改面试者问题库信息 + * + * @param hotakeProblemBaseInfo 面试者问题库信息 + * @return 结果 + */ + public int updateHotakeProblemBaseInfo(HotakeProblemBaseInfo hotakeProblemBaseInfo); + + /** + * 删除面试者问题库信息 + * + * @param id 面试者问题库信息主键 + * @return 结果 + */ + public int deleteHotakeProblemBaseInfoById(Long id); + + /** + * 批量删除面试者问题库信息 + * + * @param ids 需要删除的数据主键集合 + * @return 结果 + */ + public int deleteHotakeProblemBaseInfoByIds(Long[] ids); + /** + * 批量新增面试者问题库信息 + * + * @param hotakeProblemBaseInfoList 面试者问题库信息列表 + * @return 结果 + */ + public int batchInsertHotakeProblemBaseInfo(List hotakeProblemBaseInfoList); + +} diff --git a/vetti-hotakes/src/main/java/com/vetti/hotake/service/IHotakeCvInfoService.java b/vetti-hotakes/src/main/java/com/vetti/hotake/service/IHotakeCvInfoService.java index 95c824f..764b40d 100644 --- a/vetti-hotakes/src/main/java/com/vetti/hotake/service/IHotakeCvInfoService.java +++ b/vetti-hotakes/src/main/java/com/vetti/hotake/service/IHotakeCvInfoService.java @@ -35,6 +35,15 @@ public interface IHotakeCvInfoService */ public HotakeCvInfo insertHotakeCvInfo(HotakeCvInfo hotakeCvInfo); + /** + * 处理简历信息 + * + * @param hotakeCvInfo 简历信息 + * @return 结果 + */ + public HotakeCvInfo handleHotakeCvInfo(HotakeCvInfo hotakeCvInfo); + + /** * 修改简历信息 * diff --git a/vetti-hotakes/src/main/java/com/vetti/hotake/service/IHotakeProblemBaseInfoService.java b/vetti-hotakes/src/main/java/com/vetti/hotake/service/IHotakeProblemBaseInfoService.java new file mode 100644 index 0000000..4146e1f --- /dev/null +++ b/vetti-hotakes/src/main/java/com/vetti/hotake/service/IHotakeProblemBaseInfoService.java @@ -0,0 +1,70 @@ +package com.vetti.hotake.service; + +import java.util.List; +import com.vetti.hotake.domain.HotakeProblemBaseInfo; + +/** + * 面试者问题库信息Service接口 + * + * @author wangxiangshun + * @date 2025-11-04 + */ +public interface IHotakeProblemBaseInfoService +{ + /** + * 查询面试者问题库信息 + * + * @param id 面试者问题库信息主键 + * @return 面试者问题库信息 + */ + public HotakeProblemBaseInfo selectHotakeProblemBaseInfoById(Long id); + + /** + * 查询面试者问题库信息列表 + * + * @param hotakeProblemBaseInfo 面试者问题库信息 + * @return 面试者问题库信息集合 + */ + public List selectHotakeProblemBaseInfoList(HotakeProblemBaseInfo hotakeProblemBaseInfo); + + /** + * 新增面试者问题库信息 + * + * @param hotakeProblemBaseInfo 面试者问题库信息 + * @return 结果 + */ + public int insertHotakeProblemBaseInfo(HotakeProblemBaseInfo hotakeProblemBaseInfo); + + /** + * 修改面试者问题库信息 + * + * @param hotakeProblemBaseInfo 面试者问题库信息 + * @return 结果 + */ + public int updateHotakeProblemBaseInfo(HotakeProblemBaseInfo hotakeProblemBaseInfo); + + /** + * 批量删除面试者问题库信息 + * + * @param ids 需要删除的面试者问题库信息主键集合 + * @return 结果 + */ + public int deleteHotakeProblemBaseInfoByIds(Long[] ids); + + /** + * 删除面试者问题库信息信息 + * + * @param id 面试者问题库信息主键 + * @return 结果 + */ + public int deleteHotakeProblemBaseInfoById(Long id); + + /** + * 批量新增面试者问题库信息 + * + * @param hotakeProblemBaseInfoList 面试者问题库信息列表 + * @return 结果 + */ + public int batchInsertHotakeProblemBaseInfo(List hotakeProblemBaseInfoList); + +} diff --git a/vetti-hotakes/src/main/java/com/vetti/hotake/service/impl/HotakeCvInfoServiceImpl.java b/vetti-hotakes/src/main/java/com/vetti/hotake/service/impl/HotakeCvInfoServiceImpl.java index 9451254..9d5de3e 100644 --- a/vetti-hotakes/src/main/java/com/vetti/hotake/service/impl/HotakeCvInfoServiceImpl.java +++ b/vetti-hotakes/src/main/java/com/vetti/hotake/service/impl/HotakeCvInfoServiceImpl.java @@ -1,9 +1,17 @@ package com.vetti.hotake.service.impl; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; import java.util.List; +import cn.hutool.json.JSONUtil; import com.vetti.common.core.service.BaseServiceImpl; +import com.vetti.common.enums.FillTypeEnum; import com.vetti.common.utils.DateUtils; +import com.vetti.common.utils.readFile.FileContentUtil; +import com.vetti.common.utils.readText.ResumeTextExtractor; +import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; @@ -18,6 +26,7 @@ import com.vetti.hotake.service.IHotakeCvInfoService; * @author wangxiangshun * @date 2025-11-02 */ +@Slf4j @SuppressWarnings("all") @Service public class HotakeCvInfoServiceImpl extends BaseServiceImpl implements IHotakeCvInfoService @@ -61,11 +70,34 @@ public class HotakeCvInfoServiceImpl extends BaseServiceImpl implements IHotakeC @Override public HotakeCvInfo insertHotakeCvInfo(HotakeCvInfo hotakeCvInfo) { - hotakeCvInfo.setCreateTime(DateUtils.getNowDate()); + fill(FillTypeEnum.INSERT.getCode(),hotakeCvInfo); hotakeCvInfoMapper.insertHotakeCvInfo(hotakeCvInfo); + //对简历数据进行处理生成相应的题库数据 +// handleHotakeCvInfo(hotakeCvInfo); return hotakeCvInfo; } + /** + * 处理简历信息 + * @param hotakeCvInfo 简历信息 + * @return + */ + @Override + public HotakeCvInfo handleHotakeCvInfo(HotakeCvInfo hotakeCvInfo) { + try{ + InputStream inputStream = new FileInputStream("/Users/wangxiangshun/Desktop/管报数据/223/Abrar Mohammed Project Manager Resume.docx"); + String contents = FileContentUtil.readFileContent(inputStream,hotakeCvInfo.getCvFileType()); + //进行简历数据提取 + ResumeTextExtractor extractor = new ResumeTextExtractor(); + extractor.extractResumeData(contents,""); + log.info("返回简历基本内容:{}", JSONUtil.toJsonStr(extractor.extractResumeData(contents,""))); + }catch (Exception e) { + + } + return null; + } + + /** * 修改简历信息 * diff --git a/vetti-hotakes/src/main/java/com/vetti/hotake/service/impl/HotakeProblemBaseInfoServiceImpl.java b/vetti-hotakes/src/main/java/com/vetti/hotake/service/impl/HotakeProblemBaseInfoServiceImpl.java new file mode 100644 index 0000000..545164c --- /dev/null +++ b/vetti-hotakes/src/main/java/com/vetti/hotake/service/impl/HotakeProblemBaseInfoServiceImpl.java @@ -0,0 +1,118 @@ +package com.vetti.hotake.service.impl; + +import java.util.List; + +import com.vetti.common.core.service.BaseServiceImpl; +import com.vetti.common.utils.DateUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import com.vetti.hotake.mapper.HotakeProblemBaseInfoMapper; +import com.vetti.hotake.domain.HotakeProblemBaseInfo; +import com.vetti.hotake.service.IHotakeProblemBaseInfoService; + +/** + * 面试者问题库信息Service业务层处理 + * + * @author wangxiangshun + * @date 2025-11-04 + */ +@SuppressWarnings("all") +@Service +public class HotakeProblemBaseInfoServiceImpl extends BaseServiceImpl implements IHotakeProblemBaseInfoService +{ + @Autowired + private HotakeProblemBaseInfoMapper hotakeProblemBaseInfoMapper; + + /** + * 查询面试者问题库信息 + * + * @param id 面试者问题库信息主键 + * @return 面试者问题库信息 + */ + @Transactional(readOnly = true) + @Override + public HotakeProblemBaseInfo selectHotakeProblemBaseInfoById(Long id) + { + return hotakeProblemBaseInfoMapper.selectHotakeProblemBaseInfoById(id); + } + + /** + * 查询面试者问题库信息列表 + * + * @param hotakeProblemBaseInfo 面试者问题库信息 + * @return 面试者问题库信息 + */ + @Transactional(readOnly = true) + @Override + public List selectHotakeProblemBaseInfoList(HotakeProblemBaseInfo hotakeProblemBaseInfo) + { + return hotakeProblemBaseInfoMapper.selectHotakeProblemBaseInfoList(hotakeProblemBaseInfo); + } + + /** + * 新增面试者问题库信息 + * + * @param hotakeProblemBaseInfo 面试者问题库信息 + * @return 结果 + */ + @Transactional(rollbackFor=Exception.class) + @Override + public int insertHotakeProblemBaseInfo(HotakeProblemBaseInfo hotakeProblemBaseInfo) + { + hotakeProblemBaseInfo.setCreateTime(DateUtils.getNowDate()); + return hotakeProblemBaseInfoMapper.insertHotakeProblemBaseInfo(hotakeProblemBaseInfo); + } + + /** + * 修改面试者问题库信息 + * + * @param hotakeProblemBaseInfo 面试者问题库信息 + * @return 结果 + */ + @Transactional(rollbackFor=Exception.class) + @Override + public int updateHotakeProblemBaseInfo(HotakeProblemBaseInfo hotakeProblemBaseInfo) + { + hotakeProblemBaseInfo.setUpdateTime(DateUtils.getNowDate()); + return hotakeProblemBaseInfoMapper.updateHotakeProblemBaseInfo(hotakeProblemBaseInfo); + } + + /** + * 批量删除面试者问题库信息 + * + * @param ids 需要删除的面试者问题库信息主键 + * @return 结果 + */ + @Transactional(rollbackFor=Exception.class) + @Override + public int deleteHotakeProblemBaseInfoByIds(Long[] ids) + { + return hotakeProblemBaseInfoMapper.deleteHotakeProblemBaseInfoByIds(ids); + } + + /** + * 删除面试者问题库信息信息 + * + * @param id 面试者问题库信息主键 + * @return 结果 + */ + @Transactional(rollbackFor=Exception.class) + @Override + public int deleteHotakeProblemBaseInfoById(Long id) + { + return hotakeProblemBaseInfoMapper.deleteHotakeProblemBaseInfoById(id); + } + /** + * 批量新增面试者问题库信息 + * + * @param hotakeProblemBaseInfoList 面试者问题库信息列表 + * @return 结果 + */ + @Transactional(rollbackFor=Exception.class) + @Override + public int batchInsertHotakeProblemBaseInfo(List hotakeProblemBaseInfoList){ + return hotakeProblemBaseInfoMapper.batchInsertHotakeProblemBaseInfo(hotakeProblemBaseInfoList); + } +} diff --git a/vetti-hotakes/src/main/resources/mapper/hotake/HotakeProblemBaseInfoMapper.xml b/vetti-hotakes/src/main/resources/mapper/hotake/HotakeProblemBaseInfoMapper.xml new file mode 100644 index 0000000..42764bf --- /dev/null +++ b/vetti-hotakes/src/main/resources/mapper/hotake/HotakeProblemBaseInfoMapper.xml @@ -0,0 +1,97 @@ + + + + + + + + + + + + + + + + + + + select id, user_id, contents, status, del_flag, create_by, create_time, update_by, update_time, remark from hotake_problem_base_info + + + + + + + + insert into hotake_problem_base_info + + user_id, + contents, + status, + del_flag, + create_by, + create_time, + update_by, + update_time, + remark, + + + #{userId}, + #{contents}, + #{status}, + #{delFlag}, + #{createBy}, + #{createTime}, + #{updateBy}, + #{updateTime}, + #{remark}, + + + + + update hotake_problem_base_info + + user_id = #{userId}, + contents = #{contents}, + status = #{status}, + del_flag = #{delFlag}, + create_by = #{createBy}, + create_time = #{createTime}, + update_by = #{updateBy}, + update_time = #{updateTime}, + remark = #{remark}, + + where id = #{id} + + + + delete from hotake_problem_base_info where id = #{id} + + + + delete from hotake_problem_base_info where id in + + #{id} + + + + + insert into hotake_problem_base_info( id, user_id, contents, status, del_flag, create_by, create_time, update_by, update_time, remark) values + + ( #{item.id}, #{item.userId}, #{item.contents}, #{item.status}, #{item.delFlag}, #{item.createBy}, #{item.createTime}, #{item.updateBy}, #{item.updateTime}, #{item.remark}) + + + \ No newline at end of file diff --git a/vetti-system/src/main/resources/mapper/system/SysUserMapper.xml b/vetti-system/src/main/resources/mapper/system/SysUserMapper.xml index 7b9217f..129aec1 100644 --- a/vetti-system/src/main/resources/mapper/system/SysUserMapper.xml +++ b/vetti-system/src/main/resources/mapper/system/SysUserMapper.xml @@ -25,6 +25,7 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" + @@ -66,7 +67,7 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" select u.user_id, u.dept_id, u.user_name, u.nick_name, u.email, u.avatar, u.phonenumber, u.password, u.sex, u.status, u.del_flag, u.login_ip, u.login_date, u.pwd_update_date, u.create_by, u.create_time, u.remark, d.dept_id, d.parent_id, d.ancestors, d.dept_name, d.order_num, d.leader, d.status as dept_status, r.role_id, r.role_name, r.role_key, r.role_sort, r.data_scope, r.status as role_status,u.sys_user_type - ,u.steps,u.job_position,u.experience,u.cv_url,u.location,u.job_type,u.relocate,u.best_side_json,u.address,u.user_flag + ,u.steps,u.job_position,u.experience,u.cv_url,u.location,u.job_type,u.relocate,u.best_side_json,u.address,u.user_flag,u.user_set_json from sys_user u left join sys_dept d on u.dept_id = d.dept_id left join sys_user_role ur on u.user_id = ur.user_id @@ -75,7 +76,7 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" select distinct u.user_id, u.dept_id, u.user_name, u.nick_name, u.email, u.phonenumber, u.status, u.create_time,u.sys_user_type,u.steps,u.job_position,u.experience,u.cv_url,u.location, - u.job_type,u.relocate,u.best_side_json,u.address,u.user_flag + u.job_type,u.relocate,u.best_side_json,u.address,u.user_flag,u.user_set_json from sys_user u left join sys_dept d on u.dept_id = d.dept_id left join sys_user_role ur on u.user_id = ur.user_id @@ -127,7 +128,7 @@ PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"