AI智能回复搜索中,请稍后...
-- ---------------------------- -- Table structure for mysql360 -- ---------------------------- DROP TABLE IF EXISTS `mysql360`; CREATE TABLE `mysql360` ( `id` int(11) NOT NULL AUTO_INCREMENT, `title` varchar(255) DEFAULT NULL, `messge` text, `uername` varchar(255) DEFAULT NULL, `views` varchar(11) DEFAULT NULL, `add_time` varchar(32) DEFAULT NULL, `avatar` varchar(255) DEFAULT NULL, `answer_user` varchar(255) DEFAULT NULL, `answer_message` text, `answer_time` varchar(32) DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=1901 DEFAULT CHARSET=utf8;火车头的采集规则我就找不到了,自己写一下吧,根据目标站不一样,我这是采集360问答的。 采集图像的脚本
'localhost', 'dbname' => 'caiji', 'username' => 'root', 'password' => '123456' ); $pdo = new pdomysql($config); $result = $pdo->fetchAll('select * from mysql360'); $referer = 'http://wenda.haosou.com/'; foreach($result as $key => $val){ $img = curl_get_contents($val['avatar'], $referer, $timeout = 10); file_put_contents('image/'.basename($val['avatar']),$img); //sleep(2); } //$url = 'http://quc.ssl.qhimg.com/dm/48_48_100/t0108fc371225c67513.jpg'; //$referer = 'http://wenda.haosou.com/'; //header('Content-type:image/jpeg'); echo curl_get_contents($url, $referer, $timeout = 10); function curl_get_contents($url, $referer, $timeout = 10) { if (!function_exists('curl_init')) { throw new Zend_Exception('CURL not support'); } $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $url); curl_setopt($curl, CURLOPT_TIMEOUT, $timeout); curl_setopt($curl, CURLOPT_RETURNTRANSFER, TRUE); curl_setopt($curl, CURLOPT_HEADER, FALSE); curl_setopt($curl, CURLOPT_FOLLOWLOCATION, TRUE); curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36'); if($referer) { curl_setopt($curl, CURLOPT_REFERER, $referer); } else { curl_setopt($curl, CURLOPT_AUTOREFERER, 1); } if (substr($url, 0, 8) == 'https://') { curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE); curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE); curl_setopt($curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1); } $result = curl_exec($curl); curl_close($curl); return $result; }批量入库脚本
public function caiji_action(){ //入库,每天执行一遍,怎么样?不行,那怎么处理比较好呢?喜欢的时候就访问一下这个链接吧,访问一次,insert 10条。 $per_page = 10; $page = intval(file_get_contents('page.data')); ini_set('display_errors','on'); error_reporting(E_ALL ^ E_NOTICE); //$limit = ($page*$per_page) . ',' . $per_page; //($table, $where = null, $order = null, $limit = null, $offset = 0) $data = $this->model('account')->fetch_page('caiji', '', 'add_time ASC',$page,$per_page); //echo $this->model('account')->count('caiji'); //print_r($data);die; foreach($data as $key => $val){ //分词 $topics = $this->model('system')->analysis_keyword($val['title']); print_r($topics); //栏目 if(stristr($val['title'],'php')){ $category_id = 1; }elseif(stristr($val['title'],'mysql')){ $category_id = 4; }elseif(stristr($val['title'],'jquery')){ $category_id = 5; }elseif(stristr($val['title'],'html')){ $category_id = 2; }else{ $category_id = 1; } //用户名 $val['username'] = trim($val['username']); if(!$val['username']){ $val['username'] = 'no_reg'; } if($uid = $this->model('account')->fetch_one('users', 'uid', "user_name = '" . $this->model('account')->quote($val['username']) . "'")){ $val['uid'] = $uid; }else{ $uid = $this->model('account')->user_register($val['username'], '7385568', $email = null); $val['uid'] = $uid; } //回答用户名 $val['answer_user'] = trim($val['answer_user']); if(!$val['answer_user']){ $val['answer_user'] = 'mrliang'; } if($answer_uid = $this->model('account')->fetch_one('users', 'uid', "user_name = '" . $this->model('account')->quote($val['answer_user']) . "'")){ $val['answer_uid'] = $answer_uid; }else{ $answer_uid = $this->model('account')->user_register($val['answer_user'], '7385568', $email = null); $val['answer_uid'] = $answer_uid; } //检查用户头像,存在的话,就移动到指定位置。 $avatar_url = 'caiji/image/'; if(trim($val['avatar'])){ $imgname = basename($val['avatar']); } if($imgname){ $full_path = str_replace('uploads','',get_setting('upload_dir')) . $avatar_url . $imgname; echo $full_path . "\r\n"; if(file_exists($full_path) && filesize($full_path) > 1024){ make_dir(get_setting('upload_dir') . '/avatar/' . $this->model('account')->get_avatar($answer_uid, '',1)); foreach(AWS_APP::config()->get('image')->avatar_thumbnail AS $k => $v) { $thumb_file[$k] = get_setting('upload_dir') . '/avatar/' . $this->model('account')->get_avatar($answer_uid, $k, 0); AWS_APP::image()->initialize(array( 'quality' => 90, 'source_image' => $full_path, 'new_image' => $thumb_file[$k], 'width' => $v['w'], 'height' => $v['h'] ))->resize(); } } } $now = time() - (100 - $key) * 100; $answer_now = $now + mt_rand(100,10000); //用户到位了,这个时候,开始发问题。 $question_id = $this->model('publish')->publish_question($val['title'], $val['message'], $category_id, $uid, $topics, $anonymous = null, $attach_access_key = null, $ask_user_id = null, $create_topic = true, $from = null, $now); //问题发出去了,回复问题。 $answer_id = $this->model('publish')->publish_answer($question_id, $val['answer_message'], $answer_uid, $anonymous = null, $attach_access_key = null, $auto_focus = true, $reply_to_openid = true, $answer_now); //gogogo } if($data){ //写入 file_put_contents('page.data',($page+1)); } }