|
马上注册,结交更多郧西好友
您需要 登录 才可以下载或查看,没有帐号?注册
x
来之前,先给论坛一个好东西哦~~
1:将AcquisitionSvcImpljava 替换原工程项目comjeecmscmsservice包下的对应文件。
2:编译工程即可
3:登陆后台配相关规则,如下所示参数:
====================================
*采集名称: 《排名第一的博客名或者其他网站排名第一的信息名》
*页面编码: UTF-8
动态地址: 〈对应的网络地址〉
页码 从 1 到: 2
内容地址集: <!-- 列表 START -->*?<!-- 列表END -->
内容地址: target="blank" href="(*?)">(*?)</a></span>
标题: <title>(*?) 〈〈对应的名称〉〉 </title>
内容: <!-- 正文开始 -->(*?)<!-- 正文结束 -->
package comjeecmscmsservice;
import javaioIOException;
import javanetURI;
import javautilArrayList;
import javautilList;
import javautilregexMatcher;
import javautilregexPattern;
import orgapachecommonslangStringUtils;
import orgapachehttpHttpEntity;
import orgapachehttpHttpResponse;
import orgapachehttpStatusLine;
import orgapachehttpclientClientProtocolException;
import orgapachehttpclientHttpClient;
import orgapachehttpclientHttpResponseException;
import orgapachehttpclientResponseHandler;
import orgapachehttpclientmethodsHttpGet;
import orgapachehttpimplclientDefaultHttpClient;
import orgapachehttputilEntityUtils;
import orgslf4jLogger;
import orgslf4jLoggerFactory;
import orgspringframeworkbeansfactoryannotationAutowired;
import orgspringframeworkstereotypeService;
import comjeecmscmsentityassistCmsAcquisition;
import comjeecmscmsentitymainContent;
import comjeecmscmsmanagerassistCmsAcquisitionMng;
@Service
public class AcquisitionSvcImpl implements AcquisitionSvc {
private Logger log = LoggerFactorygetLogger(AcquisitionSvcImplclass);
public boolean start(Integer id) {
CmsAcquisition acqu = cmsAcquisitionMngfindById(id);
if (acqu == null || acqugetStatus() == CmsAcquisitionSTART) {
return false;
}
Thread thread = new AcquisitionThread(acqu);
threadstart();
return true;
}
private CmsAcquisitionMng cmsAcquisitionMng;
@Autowired
public void setCmsAcquisitionMng(CmsAcquisitionMng cmsAcquisitionMng) {
thiscmsAcquisitionMng = cmsAcquisitionMng;
}
private class AcquisitionThread extends Thread {
private CmsAcquisition acqu;
public AcquisitionThread(CmsAcquisition acqu) {
super(acqugetClass()getName() + "#" + acqugetId());
thisacqu = acqu;
}
@Override
public void run() {
if (acqu == null) {
return;
}
acqu = cmsAcquisitionMngstart(acqugetId());
String[] plans = acqugetAllPlans();
HttpClient client = new DefaultHttpClient();
CharsetHandler handler = new CharsetHandler(acqugetPageEncoding());
List<String> contentList;
String url;
int currNum = acqugetCurrNum();
int currItem = acqugetCurrItem();
Integer acquId = acqugetId();
for (int i = planslength - currNum; i >= 0; i--)
{
url = plans[i];
contentList = getContentList(client, handler, url, acqugetLinksetStart(), acqugetLinksetEnd(), acqugetLinkStart(), acqugetLinkEnd());
String link;
if(contentList!=null)
{
for (int j = contentListsize() - currItem; j >= 0; j--)
{
if (cmsAcquisitionMngisNeedBreak(acqugetId(), planslength - i, contentListsize() - j, contentListsize()))
{
clientgetConnectionManager()shutdown();
loginfo("Acquisition#{} breaked", acqugetId());
return;
}
if (acqugetPauseTime() > 0)
{
try
{
Threadsleep(acqugetPauseTime());
}
catch (InterruptedException e)
{
logwarn("", e);
}
}
link = contentListget(j);
saveContent(client, handler, acquId, link, acqugetTitleStart(), acqugetTitleEnd(), acqugetContentStart(), acqugetContentEnd());
}
}
currItem = 1;
}
clientgetConnectionManager()shutdown();
cmsAcquisitionMngend(acqugetId());
loginfo("Acquisition#{} complete", acqugetId());
}
private List<String> getContentList(HttpClient client,
CharsetHandler handler, String url, String linksetStart,
String linksetEnd, String linkStart, String linkEnd) {
List<String> list = new ArrayList<String>();
try
{
HttpGet httpget = new HttpGet(new URI(url));
String html = clientexecute(httpget, handler);
Pattern pt = Patterncompile(linksetStarttrim());
Matcher m = ptmatcher(html);
if(mfind())
{
html = mgroup();
}
|
|