业务逻辑修改以及完善
This commit is contained in:
@@ -229,19 +229,6 @@
|
||||
<artifactId>openhtmltopdf-pdfbox</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- 可选:支持更复杂 CSS -->
|
||||
<dependency>
|
||||
<groupId>com.openhtmltopdf</groupId>
|
||||
<artifactId>openhtmltopdf-slf4j</artifactId>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>net.sourceforge.htmlunit</groupId>
|
||||
<artifactId>htmlunit</artifactId>
|
||||
</dependency>
|
||||
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
@@ -13,10 +13,6 @@ import java.io.IOException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.gargoylesoftware.htmlunit.WebClient;
|
||||
import com.gargoylesoftware.htmlunit.html.HtmlPage;
|
||||
|
||||
|
||||
/**
|
||||
* OkHttp第三方库读取网页HTML内容
|
||||
*/
|
||||
@@ -125,33 +121,4 @@ public class ReadHtmlByOkHttp {
|
||||
return noMultipleSpace.trim();
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
// 1. 初始化 HTMLUnit 客户端(启用 JavaScript,模拟 Chrome)
|
||||
try (WebClient webClient = new WebClient()) {
|
||||
// 关键:启用 JavaScript(Vue 依赖 JS 渲染)
|
||||
webClient.getOptions().setJavaScriptEnabled(true);
|
||||
// 禁用 CSS(无需渲染样式,提升速度)
|
||||
webClient.getOptions().setCssEnabled(false);
|
||||
// 忽略 JS 错误(避免页面 JS 报错中断执行)
|
||||
webClient.getOptions().setThrowExceptionOnScriptError(false);
|
||||
// 设置超时时间
|
||||
webClient.getOptions().setTimeout(15000);
|
||||
|
||||
// 2. 加载页面并等待 JS 渲染
|
||||
String url = "https://vetti.hotake.cn/#/jobs/job/detail?jobId=126";
|
||||
HtmlPage page = webClient.getPage(url);
|
||||
// 等待 Vue 数据渲染(给足够时间执行 JS)
|
||||
webClient.waitForBackgroundJavaScript(5000);
|
||||
|
||||
// 3. 提取页面纯文本
|
||||
String pageText = page.getTextContent();
|
||||
System.out.println("=== HTMLUnit 提取的页面文本 ===");
|
||||
System.out.println(pageText);
|
||||
|
||||
} catch (Exception e) {
|
||||
System.out.println("提取失败:" + e.getMessage());
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user