scrapy startproject example
tree
├── example
│?? ├── __init__.py
│?? ├── __init__.pyc
│?? ├── items.py
│?? ├── middlewares.py
│?? ├── pipelines.py
│?? ├── settings.py
│?? ├── settings.pyc
│?? └── spiders
│??? ? ├── book_spider.py
│??? ? ├── book_spider.pyc
│??? ? ├── __init__.py
│??? ? └── __init__.pyc
cd example
cd spider
vim book_spider.py
#-*- coding: utf-8 -*-
import scrapy
class BooksSpider(scrapy.Spider):
#每個(gè)爬蟲都有相應(yīng)的標(biāo)識符
name = "book"
#定義開始爬取的起始點(diǎn) 可以有多個(gè)
start_urls = ['http://books.toscrape.com/']
def parse(self, response):
for book in response.css('article.product_pod'):
name = book.xpath('./h3/a/@title').extract_first()
price = book.css('p.price_color::text').extract_first()
yield {
'name':name,
'price':price
}
next_url = response.css('ul.pager li.next a::attr(href)').extract_first()
if next_url:
next_url = response.urljoin(next_url)
yield scrapy.Request(next_url,callback=self.parse)
這個(gè)http://books.toscrape.com/可以用來練習(xí)爬蟲
scrapy crawl book -o book.csv

