前兩章記錄了Maven項目開發過程的配置,下面就是簡單的開發過程,這裡是一個定時任務的開發,初衷是能夠定時根據儲存的URL去網上搜尋資源,但沒那麼多時間去搞,算是個半成品吧,算是記錄一下開發的過程。
Web.xml
接上面的配置,在web.xml中配置一個listener:
<?xml version="1.0" encoding="UTF-8"?>
<web-app version="2.4"
xmlns="http://java.sun.com/xml/ns/j2ee"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://java.sun.com/xml/ns/j2ee http://java.sun.com/xml/ns/j2ee/web-app_2_4.xsd">
<!-- Servlet Filters ================================================ -->
<!--
- Declare a filter for multipart MIME handling
-->
<context-param>
<param-name>contextConfigLocation</param-name>
<param-value>classpath:/applicationContext.xml</param-value>
</context-param>
<listener>
<listener-class>org.springframework.web.context.ContextLoaderListener</listener-class>
</listener>
<listener>
<listener-class>org.springframework.web.context.request.RequestContextListener</listener-class>
</listener>
<listener>
<listener-class>
com.cff.study.TaskContextListener
</listener-class>
</listener>
<filter>
<filter-name>encodingFilter</filter-name>
<filter-class>org.springframework.web.filter.CharacterEncodingFilter</filter-class>
<init-param>
<param-name>encoding</param-name>
<param-value>UTF-8</param-value>
</init-param>
</filter>
<filter-mapping>
<filter-name>encodingFilter</filter-name>
<url-pattern>/*</url-pattern>
</filter-mapping>
<welcome-file-list>
<welcome-file>index.jsp</welcome-file>
</welcome-file-list>
</web-app>
利用listener作為程式的入口。
package com.cff.study;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
public class TaskContextListener implements ServletContextListener{
private Context context = null;
public void contextDestroyed(ServletContextEvent arg0) {
context.close();
}
public void contextInitialized(ServletContextEvent arg0) {
// System.out.println("我是最偉大的人!!");
context = Context.getInstance();
try {
context.init();
} catch (ClassNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (InstantiationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IllegalAccessException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
在Context中,定義多個線程,每個線程啟動一個定時任務,這個小項目我隻添加了一個項目,利用百度搜尋的方法去網上搜尋資源,并不完整,這裡隻稍做介紹。
package com.cff.study;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
import java.util.TimerTask;
import org.apache.commons.logging.*;
import com.cff.study.util.FileUtil;
import com.cff.study.util.Logger;
public class Context {
private static Context instance = null;
private static Log log = LogFactory.getLog(Context.class);
Properties classConfig = new Properties();
synchronized public static Context getInstance() {
if(null == instance){
log.info("執行個體不存在!正在建立... ");
instance = new Context();
return instance;
}
else
return Context.getInstance();
}
public void init() throws ClassNotFoundException, InstantiationException, IllegalAccessException{
log.info("應用上下文初始化中...");
FileUtil.loadConfig(classConfig,"/class-config.properties");
int classNum = classConfig.size();
for(int i=0;i<classNum;i++){
TaskThread taskThread = new TaskThread();
Logger.info(classConfig.getProperty("class"+i));
Class<?> tempTask = Class.forName("com.cff.study."+classConfig.getProperty("class"+i));
TimerTask currentTask = (TimerTask)tempTask.newInstance();
taskThread.setCurrentTask(currentTask);
taskThread.setTimeDelay(5+i);
taskThread.start();
Logger.info(""+i+"個線程啟動中...");
}
}
public void close(){
log.info("應用上下文正在登出...");
}
}
任務線程的啟動
package com.cff.study;
import java.util.Calendar;
import java.util.Timer;
import java.util.TimerTask;
import com.cff.study.util.Logger;
public class TaskThread extends Thread {
private static Timer importDataTimer = null;
TimerTask currentTask = null;
int timeDelay = 5;
public int getTimeDelay() {
return timeDelay;
}
public void setTimeDelay(int timeDelay) {
this.timeDelay = timeDelay;
}
public TimerTask getCurrentTask() {
return currentTask;
}
public void setCurrentTask(TimerTask currentTask) {
this.currentTask = currentTask;
}
public TaskThread() {
super();
}
public void run(){
String importMins = "30";
if(importMins!=null&&!importMins.trim().equals("")&&!importMins.trim().equals("0")){
importDataTimer = new Timer();
Calendar date = Calendar.getInstance();
int sec = 60 * Integer.parseInt(importMins);
date.add(Calendar.SECOND, timeDelay);
importDataTimer.scheduleAtFixedRate(currentTask, date.getTime(),(long)(sec* 1000));
Logger.info("定時任務"+timeDelay+"秒鐘後啟動,掃描時間間隔為"+importMins+"分鐘");
}
}
}
百度搜尋任務
package com.cff.study;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.TimerTask;
import com.cff.study.search.ContentFromUrl;
import com.cff.study.search.JSoupBaiduSearcher;
import com.cff.study.search.SearchResult;
import com.cff.study.search.Searcher;
import com.cff.study.search.UrlOpen;
import com.cff.study.search.Webpage;
import com.cff.study.util.DateUtil;
import com.cff.study.util.Logger;
import com.cff.study.util.FileUtil;
public class BaiDuSearchTask extends TimerTask {
private int UrlLevel = 5; //Url最大層數
private int UrlStep = 10; //Url每層最大數量
private int ExecuteUrlNum=100;//一次定時任務最大處理量
private int MaxUrls = 100000;
Searcher searcher = null;
SearchResult searchResult = null;
List<Webpage> webpages = null;
String today = "";
List<String> keywords = new ArrayList<String>();
List<Integer> CurrentLocations = new ArrayList<Integer>();
int CurrentUrlIndex = 1;
private Properties contextConfig = new Properties();
private Properties urlCollectionFile = new Properties();
private Properties contentCollectionFile = new Properties();
String contextConfigFileName = "F:/Work_Study/TimerTask/context-config.properties";
String urlCollectionFileName = "F:/Work_Study/TimerTask/url-collect.properties";
String contentCollectionFileName = "F:/Work_Study/TimerTask/content-collect.properties";
public BaiDuSearchTask() {
}
@Override
public void run() {
today = DateUtil.format(DateUtil.now(), "yyyyMMdd");
Logger.info("今天是" + today.substring(0, 4) + "年" + today.substring(4, 6)
+ "月" + today.substring(6, 8) + "日,我啟動了一個線程,叫百度搜尋,嘿嘿,厲害吧?");
Logger.info("搜尋正在啟動,請不要關閉電腦!");
String searchDest = "陳付菲";
keywords.add("偉大的人");
keywords.add("華中科技大學");
FileUtil.loadAbsoluteConfig(contextConfig, contextConfigFileName);
FileUtil.loadAbsoluteConfig(urlCollectionFile, urlCollectionFileName);
FileUtil.loadAbsoluteConfig(contentCollectionFile, contentCollectionFileName);
doSearch(searchDest);
}
private void doSearch(String searchDest) {
int endRunning = Integer.parseInt(contextConfig.getProperty(
"endRunning").trim());
if (1 == endRunning)
return;
int SearchMethod = Integer.parseInt(contextConfig.getProperty(
"SearchMethod").trim());
//最大層數
UrlLevel = Integer.parseInt(contextConfig.getProperty("MaxUrlLevel").trim());
//最大Url數量
MaxUrls = Integer.parseInt(contextConfig.getProperty("MaxUrls").trim());
//最大步長
UrlStep = Integer.parseInt(contextConfig.getProperty("MaxUrlStep").trim());
if (1 == SearchMethod){
FileUtil.clearInfoForFile(urlCollectionFileName);
doSearchMethod(searchDest);
FileUtil.writeAbsoluteProperties(contextConfigFileName,
"SearchMethod", String.valueOf(0));
}
else {
int size = urlCollectionFile.size();
if (MaxUrls < size) {
int pageNo = Integer.parseInt(contextConfig.getProperty(
"pageNo").trim());
FileUtil.writeAbsoluteProperties(contextConfigFileName,
"pageNo", String.valueOf(pageNo + 1));
return;
}
readCurrentLocation(CurrentLocations);
int DsetUrlLevel = (int) (UrlLevel-1- Math.log(ExecuteUrlNum) / Math.log(UrlStep));; //遞歸開始的Url層數
int CurrentUrlLevel = Integer.parseInt(contextConfig.getProperty("CurrentUrlLevel").trim());//目前處理層
Logger.info("CurrentLocations size:"+CurrentLocations.size());
//比較如果DsetUrlLevel和CurrentUrlLevel,若相同則從DsetUrlLevel開始處理,不同則從CurrentUrlLevel開始處理
if(DsetUrlLevel==CurrentUrlLevel){
Logger.info("CurrentUrlLevel"+CurrentUrlLevel);
Logger.info("DsetUrlLevel:"+DsetUrlLevel);
int CurrentUrlNo=CurrentLocations.get(CurrentUrlLevel);
doUrlOpenMethod(DsetUrlLevel,CurrentUrlNo);
CurrentLocations.set(CurrentUrlLevel, CurrentUrlNo+1);
writeCurrentLocations(DsetUrlLevel,CurrentLocations);
}
else{
Logger.info("CurrentUrlLevel else"+CurrentUrlLevel);
int CurrentUrlNo=CurrentLocations.get(CurrentUrlLevel);
int result = doUrlOpenToDsetMethod(CurrentUrlNo,CurrentUrlLevel,DsetUrlLevel);
if(-1==result){
FileUtil.writeAbsoluteProperties(contextConfigFileName,
"endRunning", String.valueOf(1));
}
else if(DsetUrlLevel==result){
for(int i=CurrentUrlLevel+1;i<DsetUrlLevel;i++)
CurrentLocations.set(i, 0);
doUrlOpenMethod(DsetUrlLevel,0);
CurrentLocations.set(DsetUrlLevel, 1);
}
else if(CurrentUrlLevel==result){
for(int i=CurrentUrlLevel+1;i<DsetUrlLevel;i++)
CurrentLocations.set(i, -1);
CurrentLocations.set(CurrentUrlLevel, CurrentUrlNo+1);
}
else{
if(CurrentUrlLevel!=result){
for(int k=CurrentUrlLevel+1;k<result;k++ ){
CurrentLocations.set(k,0);
}
CurrentLocations.set(result, 1);
for(int i=result+1;i<DsetUrlLevel;i++)
CurrentLocations.set(i, -1);
}
}
writeCurrentLocations(DsetUrlLevel,CurrentLocations);
}
}
}
/**
* 寫入目前處理位置
* @param dsetUrlLevel 目标處理層
* @param currentLocations 位置數組
*/
private void writeCurrentLocations(int dsetUrlLevel, List<Integer> currentLocations) {
int size = currentLocations.size()-1;
int CurrentUrlLevel = dsetUrlLevel;
for(int i=size;i>0;i--){
if(currentLocations.get(i)>9){
currentLocations.set(i, -1);
int LevelUpNo = currentLocations.get(i-1);
currentLocations.set(i-1, LevelUpNo+1);
CurrentUrlLevel = i-1;
}
}
if(currentLocations.get(0)>9){
FileUtil.writeAbsoluteProperties(contextConfigFileName,
"endRunning", String.valueOf(1));
return;
}
StringBuffer location = new StringBuffer();
location.append(currentLocations.get(0));
for(int i=1;i<=size;i++){
location.append("#");
location.append(currentLocations.get(i));
}
FileUtil.writeAbsoluteProperties(contextConfigFileName,
"CurrentLocation", location.toString());
FileUtil.writeAbsoluteProperties(contextConfigFileName,
"CurrentUrlLevel", String.valueOf(CurrentUrlLevel));
}
/**
* 讀取目前處理位置
* @param currentLocations 位置數組
*/
public static void main(String[] args){
//readCurrentLocation(CurrentLocations);
}
private void readCurrentLocation(List<Integer> currentLocations) {
//FileUtil.loadAbsoluteConfig(contextConfig, contextConfigFileName);
String location = contextConfig.getProperty("CurrentLocation").trim();
String[] locations = location.split("#", -1);
Logger.info(locations.length);
for(int i=0;i<locations.length;i++){
currentLocations.add(Integer.parseInt(locations[i].trim()));
}
}
/**
* 到指定Url層數的解析過程
* @param CurrentUrlNo 目前url維數位置
* @param tempUrlNo 目前url層數位置
* @param CurrentUrlLevel 目前層
*/
private int doUrlOpenToDsetMethod(int CurrentUrlNo, int CurrentUrlLevel, int dsetUrlLevel) {
if(CurrentUrlNo>UrlStep||CurrentUrlLevel>UrlLevel){
return -1;
}
Logger.info("目前處理層:"+CurrentUrlLevel);
Logger.info("目前第"+CurrentUrlNo+"個Url");
Logger.info("Url辨別為:"+"Url"+(CurrentUrlNo+CurrentUrlLevel*UrlStep));
String url = urlCollectionFile.getProperty("Url"+(CurrentUrlNo+CurrentUrlLevel*UrlStep));
Logger.info("目前Url為:"+url);
if(!CheckUrlIllegalOrNot(url)){
Logger.info("目前Url不合法!");
return CurrentUrlLevel;
}
Logger.info("目前Url合法,可以繼續執行! ");
doUrlOpenMethodDetail(url,0,CurrentUrlLevel);
CurrentUrlLevel++;
while(CurrentUrlLevel!=dsetUrlLevel){
url = urlCollectionFile.getProperty("Url"+CurrentUrlLevel*UrlStep);
Logger.info("目前處理層:"+CurrentUrlLevel);
Logger.info("目前第"+0+"個Url");
Logger.info("Url辨別為:"+"Url"+CurrentUrlLevel*UrlStep);
Logger.info("目前Url為:"+url);
if(!CheckUrlIllegalOrNot(url)){
Logger.info("目前Url不合法!");
return CurrentUrlLevel+1;
}
Logger.info("目前Url合法,可以繼續執行! ");
doUrlOpenMethodDetail(url,0,CurrentUrlLevel);
CurrentUrlLevel++;
}
return dsetUrlLevel;
}
/**
* 打開url連結的方法
* @param CurrentUrlLevel
* @param CurrentUrlNo
*/
private void doUrlOpenMethod(int CurrentUrlLevel,int CurrentUrlNo) {
if(CurrentUrlNo>UrlStep||CurrentUrlLevel>UrlLevel){
return;
}
String url = urlCollectionFile.getProperty("Url"+(CurrentUrlNo+CurrentUrlLevel*UrlStep));
Logger.info("目前處理目标層:"+CurrentUrlLevel);
Logger.info("目前第"+0+"個Url");
Logger.info("Url辨別為:"+"Url"+(CurrentUrlNo+CurrentUrlLevel*UrlStep));
Logger.info("目前Url為:"+url);
if(!CheckUrlIllegalOrNot(url)){
Logger.info("目前Url不合法!");
return;
}
Logger.info("目前Url合法,可以繼續執行! ");
doUrlOpenMethodDetail(url,CurrentUrlNo,CurrentUrlLevel);
for(int i=0;i<UrlStep;i++){
doUrlOpenMethod(CurrentUrlLevel+1,i);
}
}
/**
* 搜尋方法主體
* @param searchDest 搜尋詞
*/
private void doSearchMethod(String searchDest) {
int pageNo = Integer.parseInt(contextConfig.getProperty("pageNo")
.trim());
Logger.info("目前搜尋頁碼:" + pageNo);
if (UrlStep < pageNo) {
FileUtil.writeAbsoluteProperties(contextConfigFileName,
"endRunning", "1");
return;
}
doSearchMethodDetail(searchDest, pageNo);
FileUtil.writeAbsoluteProperties(contextConfigFileName, "pageNo",
String.valueOf(pageNo + 1));
}
/**
* 搜尋方法詳細
* @param searchDest 百度搜尋詞
* @param pageNo 顯示頁碼
*/
private void doSearchMethodDetail(String searchDest, int pageNo) {
searcher = new JSoupBaiduSearcher();
searchResult = searcher.search(searchDest, pageNo);
webpages = searchResult.getWebpages();
Map<String, String> map = new HashMap();
int size = urlCollectionFile.size();
if (webpages != null) {
int i = 0;
Logger.info("搜尋結果 目前第 " + searchResult.getPage() + " 頁,頁面大小為:"
+ searchResult.getPageSize() + " 共有結果數:"
+ searchResult.getTotal());
for (Webpage webpage : webpages) {
// Logger.info("搜尋結果 "+ i + " :");
// Logger.info("标題:" + webpage.getTitle());
Logger.info("URL:" + webpage.getUrl());
// Logger.info("摘要:" + webpage.getSummary());
// Logger.info("正文:" + webpage.getContent());
// Logger.info("");
map.put("Url" + i, webpage.getUrl());
FileUtil.writeAbsolutePropertiesFile(urlCollectionFileName, map);
i++;
if(i>UrlStep)break;
}
if(map.size()<UrlStep){
for(int j=map.size();j<UrlStep;j++){
map.put("Url" + j, " ");
FileUtil.writeAbsolutePropertiesFile(urlCollectionFileName, map);
}
}
} else {
Logger.error("沒有搜尋到結果");
}
}
/**
* 處理url位址的内容并添加urls
* @param url url連結
* @param currentUrlNo Url目前位置
* @param currentUrlLevel Url層數
*/
private void doUrlOpenMethodDetail(String url, int currentUrlNo, int currentUrlLevel) {
if(currentUrlNo>UrlStep||currentUrlLevel>UrlLevel){
return;
}
UrlOpen urlOpen = new ContentFromUrl();
SearchResult searchResult = urlOpen.urlOpen(url);
List<Webpage> webpages = searchResult.getWebpages();
Map<String, String> map = new HashMap();
int size = urlCollectionFile.size();
if (webpages != null) {
int i = 0;
Logger.info("搜尋結果 目前第 " + searchResult.getPage() + " 頁,頁面大小為:"
+ searchResult.getPageSize() + " 共有結果數:"
+ searchResult.getTotal());
for (Webpage webpage : webpages) {
// Logger.info("搜尋結果 "+ i + " :");
// Logger.info("标題:" + webpage.getTitle());
for (int k = 0; k < webpage.getUrls().size(); k++) {
Logger.info("URL:" + webpage.getUrls().get(k));
if (k > UrlStep)
break;
map.put("Url" + ((currentUrlLevel+1) * UrlStep + k), webpage.getUrls().get(k));
FileUtil.writeAbsolutePropertiesFile(urlCollectionFileName,
map);
}
// Logger.info("摘要:" + webpage.getSummary());
// Logger.info("正文:" + webpage.getContent());
// Logger.info("");
int sizeOfcontentCollectionFile = contentCollectionFile.size();
FileUtil.writeAbsoluteProperties(contentCollectionFileName,
"内容" + (sizeOfcontentCollectionFile + i + 1),
webpage.getContent());
if (webpage.getUrls().size() < UrlStep) {
for (int k = webpage.getUrls().size(); k < UrlStep; k++) {
map.put("Url" + ((currentUrlLevel+1) * UrlStep + k), " ");
FileUtil.writeAbsolutePropertiesFile(
urlCollectionFileName, map);
}
}
i++;
}
} else {
Logger.error("沒有搜尋到結果");
}
}
/**
* 檢查url合法
* @param Url
* @return
*/
public boolean CheckUrlIllegalOrNot(String Url){
Logger.info(Url);
if("".equals(Url))return false;
if(null==Url)return false;
if(!Url.contains("http:"))return false;
if(Url.length()<10)return false;
if (Url.contains(".js") || Url.contains(".jpg")
|| Url.contains(".jpeg") || Url.contains(".mp4")
|| Url.contains(".avi") || Url.contains(".flv")
|| Url.contains("ico") || Url.contains(".css"))
return false;
return true;
}
}
這個其實沒寫完整,但是不想花心思搞了,百度搜尋利用的jsoup的工具。 如果有人感興趣可以留下郵箱,我把源代碼發給他。