# Define here the models for your spider middleware # # See documentation in: # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
from scrapy import signals
# useful for handling different item types with a single interface from itemadapter import is_item, ItemAdapter
classProject3DownloaderMiddleware: # Not all methods need to be defined. If a method is not defined, # scrapy acts as if the downloader middleware does not modify the # passed objects.
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36 Edg/90.0.818.49'
defprocess_request(self, request, spider): # Called for each request that goes through the downloader # middleware. request.headers['User-Agent'] = self.user_agent # Must either: # - return None: continue processing this request # - or return a Response object # - or return a Request object # - or raise IgnoreRequest: process_exception() methods of # installed downloader middleware will be called returnNone
defprocess_response(self, request, response, spider): # Called with the response returned from the downloader.
# Must either; # - return a Response object # - return a Request object # - or raise IgnoreRequest return response
defprocess_exception(self, request, exception, spider): # Called when a download handler or a process_request() # (from other downloader middleware) raises an exception.
# Must either: # - return None: continue processing this exception # - return a Response object: stops process_exception() chain # - return a Request object: stops process_exception() chain # 设置代理ip # request.meta['proxy'] = 'http://Agent_Adress' # return request
# Define here the models for your scraped items # # See documentation in: # https://docs.scrapy.org/en/latest/topics/items.html
import scrapy
classProject3Item(scrapy.Item): # define the fields for your item here like: # name = scrapy.Field() name = scrapy.Field() rate = scrapy.Field() comment_num = scrapy.Field()
# Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface from itemadapter import ItemAdapter