AI智能回复搜索中,请稍后...
4 回答
-- ---------------------------- -- Table structure for mysql360 -- ---------------------------- DROP TABLE IF EXISTS `mysql360`; CREATE TABLE `mysql360` ( `id` int(11) NOT NULL AUTO_INCREMENT, `title` varchar(255) DEFAULT NULL, `messge` text, `uername` varchar(255) DEFAULT NULL, `views` varchar(11) DEFAULT NULL, `add_time` varchar(32) DEFAULT NULL, `avatar` varchar(255) DEFAULT NULL, `answer_user` varchar(255) DEFAULT NULL, `answer_message` text, `answer_time` varchar(32) DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1901 DEFAULT CHARSET=utf8;火车头的采集规则我就找不到了,自己写一下吧,根据目标站不一样,我这是采集360问答的。 采集图像的脚本
'localhost',
'dbname' => 'caiji',
'username' => 'root',
'password' => '123456'
);
$pdo = new pdomysql($config);
$result = $pdo->fetchAll('select * from mysql360');
$referer = 'http://wenda.haosou.com/';
foreach($result as $key => $val){
$img = curl_get_contents($val['avatar'], $referer, $timeout = 10);
file_put_contents('image/'.basename($val['avatar']),$img);
//sleep(2);
}
//$url = 'http://quc.ssl.qhimg.com/dm/48_48_100/t0108fc371225c67513.jpg';
//$referer = 'http://wenda.haosou.com/';
//header('Content-type:image/jpeg');
echo curl_get_contents($url, $referer, $timeout = 10);
function curl_get_contents($url, $referer, $timeout = 10)
{
if (!function_exists('curl_init'))
{
throw new Zend_Exception('CURL not support');
}
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_TIMEOUT, $timeout);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE);
curl_setopt($curl, CURLOPT_HEADER, FALSE);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, TRUE);
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36');
if($referer) {
curl_setopt($curl, CURLOPT_REFERER, $referer);
} else {
curl_setopt($curl, CURLOPT_AUTOREFERER, 1);
}
if (substr($url, 0, 8) == 'https://')
{
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1);
}
$result = curl_exec($curl);
curl_close($curl);
return $result;
}
批量入库脚本
public function caiji_action(){
//入库,每天执行一遍,怎么样?不行,那怎么处理比较好呢?喜欢的时候就访问一下这个链接吧,访问一次,insert 10条。
$per_page = 10;
$page = intval(file_get_contents('page.data'));
ini_set('display_errors','on');
error_reporting(E_ALL ^ E_NOTICE);
//$limit = ($page*$per_page) . ',' . $per_page;
//($table, $where = null, $order = null, $limit = null, $offset = 0)
$data = $this->model('account')->fetch_page('caiji', '', 'add_time ASC',$page,$per_page);
//echo $this->model('account')->count('caiji');
//print_r($data);die;
foreach($data as $key => $val){
//分词
$topics = $this->model('system')->analysis_keyword($val['title']);
print_r($topics);
//栏目
if(stristr($val['title'],'php')){
$category_id = 1;
}elseif(stristr($val['title'],'mysql')){
$category_id = 4;
}elseif(stristr($val['title'],'jquery')){
$category_id = 5;
}elseif(stristr($val['title'],'html')){
$category_id = 2;
}else{
$category_id = 1;
}
//用户名
$val['username'] = trim($val['username']);
if(!$val['username']){
$val['username'] = 'no_reg';
}
if($uid = $this->model('account')->fetch_one('users', 'uid', "user_name = '" . $this->model('account')->quote($val['username']) . "'")){
$val['uid'] = $uid;
}else{
$uid = $this->model('account')->user_register($val['username'], '7385568', $email = null);
$val['uid'] = $uid;
}
//回答用户名
$val['answer_user'] = trim($val['answer_user']);
if(!$val['answer_user']){
$val['answer_user'] = 'mrliang';
}
if($answer_uid = $this->model('account')->fetch_one('users', 'uid', "user_name = '" . $this->model('account')->quote($val['answer_user']) . "'")){
$val['answer_uid'] = $answer_uid;
}else{
$answer_uid = $this->model('account')->user_register($val['answer_user'], '7385568', $email = null);
$val['answer_uid'] = $answer_uid;
}
//检查用户头像,存在的话,就移动到指定位置。
$avatar_url = 'caiji/image/';
if(trim($val['avatar'])){
$imgname = basename($val['avatar']);
}
if($imgname){
$full_path = str_replace('uploads','',get_setting('upload_dir')) . $avatar_url . $imgname;
echo $full_path . "\r\n";
if(file_exists($full_path) && filesize($full_path) > 1024){
make_dir(get_setting('upload_dir') . '/avatar/' . $this->model('account')->get_avatar($answer_uid, '',1));
foreach(AWS_APP::config()->get('image')->avatar_thumbnail AS $k => $v)
{
$thumb_file[$k] = get_setting('upload_dir') . '/avatar/' . $this->model('account')->get_avatar($answer_uid, $k, 0);
AWS_APP::image()->initialize(array(
'quality' => 90,
'source_image' => $full_path,
'new_image' => $thumb_file[$k],
'width' => $v['w'],
'height' => $v['h']
))->resize();
}
}
}
$now = time() - (100 - $key) * 100;
$answer_now = $now + mt_rand(100,10000);
//用户到位了,这个时候,开始发问题。
$question_id = $this->model('publish')->publish_question($val['title'], $val['message'], $category_id, $uid, $topics, $anonymous = null, $attach_access_key = null, $ask_user_id = null, $create_topic = true, $from = null, $now);
//问题发出去了,回复问题。
$answer_id = $this->model('publish')->publish_answer($question_id, $val['answer_message'], $answer_uid, $anonymous = null, $attach_access_key = null, $auto_focus = true, $reply_to_openid = true, $answer_now);
//gogogo
}
if($data){
//写入
file_put_contents('page.data',($page+1));
}
}