https://www.shiyanlou.com/contests/lou5/console
創(chuàng)建數(shù)據(jù)庫(kù)表
create database shiyanlou_data;
use shiyanlou_data;
create table course_data (
id int(11) not null auto_increment,
cname varchar(255) default null,
cdesc varchar(255) default null,
ctype varchar(255) default null,
nlong enum('true','false') default null,
primary key (id)
) ENGINE=INNODB DEFAULT CHARSET=utf8mb4;
php 代碼
<?php
/**
* micro php Crawler
* 以下代碼結(jié)構(gòu)僅供參考,可自由發(fā)揮,只要能實(shí)現(xiàn)挑戰(zhàn)目的就可以。
*/
class Crawler
{
private $content;
private $data;
static private $mysql;
function __construct()
{
echo "開始爬取內(nèi)容...<br>";
//使用面向?qū)ο筮M(jìn)行數(shù)據(jù)庫(kù)的連接,在創(chuàng)建對(duì)象的時(shí)候就自動(dòng)的連接數(shù)據(jù)
self::$mysql = new MySQLi('localhost','root','','shiyanlou_data',3306);
//判斷數(shù)據(jù)庫(kù)是否連接
if(self::$mysql ->connect_errno){
die('連接錯(cuò)誤' . self::$mysql ->connect_error);
}
//設(shè)置字符集
self::$mysql ->set_charset('utf8');
}
public function loadFile($file_path)
{
echo "正在加載文件...<br>";
//TODO
$this->content = file_get_contents($file_path);
}
public function parseCourseBody()
{
//TODO
}
public function parseContent()
{
echo "開始解析內(nèi)容...<br>";
preg_match_all('#<div class="course-body">[\s\S]*?</div>\s*?</div>#',$this->content,$data);
$this->content = $data[0]; //全匹配
foreach ($this->content as $item) {
$course = [];
preg_match('#<div class="course-name">(.*?)</div>#',$item,$title);
$course['cname'] = $title[1];
preg_match('#<div class="course-desc">(.*?)</div>#',$item,$desc);
if(count($desc)==2){
$course['cdesc'] = $desc[1];
}else {
$course['cdesc']='';
}
preg_match('#<div class="course-desc">(.*?)</div>#',$item,$desc);//
preg_match('#<span class=".*?pull-right">(.*?)</span>#',$item,$type);
if (!empty($type)) {
$course['ctype'] = $type[1];
}else{
$course['ctype'] = '';
}
if (strlen($course['cname'])> 16) {
$course['nlong'] = 'true';
} else {
$course['nlong'] = 'false';
}
$course['name_length'] = strlen($course['cname']);
$this->data[] = $course;
}
echo "<pre>";
print_r($this->data);
echo "<pre>";
echo "解析內(nèi)容結(jié)束! <br>";
}
public function saveData()
{
echo "存入數(shù)據(jù)庫(kù)...";
//TODO
foreach ($this->data as $one) {
$cname = $one['cname'];
$ctype = $one['ctype'];
$nlong = $one['nlong'];
$cdesc = $one['cdesc'];
$sql = "insert into course_data (ctype,cname,nlong,cdesc) values ('$ctype','$cname','$nlong','$cdesc')";
var_dump($sql);
$res = self::$mysql->query($sql);
if($res){
echo '添加成功';
}else{
echo '添加失敗' . self::$mysql-> error;
}
}
}
public function parseTitle()
{
echo "解析課程標(biāo)題...<br>";
//TODO
}
public function parseDesc()
{
echo "解析課程簡(jiǎn)介...<br>";
//TODO
}
public function parseType()
{
echo "解析課程類型...<br>";
//TODO
}
public function titleIsLong()
{
echo "判斷課程名是否超長(zhǎng)...<br>";
//TODO
}
}
header("Content-Type: text/html;charset=utf-8");
$Crawler = new Crawler();
$Crawler->loadFile('shiyanlou.html');
$Crawler->parseContent();
$Crawler->saveData();