for num inrange(2, 10): form_data = {"searchType": "MulityTermsSearch", "ParamIsNullOrEmpty": "false", "Islegal": "false", "Content": "计算机", "Page": str(num)} for url in self.start_urls: yield scrapy.FormRequest(url=url, formdata=form_data, method='POST', callback=self.parse)
defparse(self, response): # 打开数据库连接 db = MySQLdb.connect("localhost", "root", "123456", "cnki", charset='utf8', port=3306) # 使用cursor()方法获取操作游标 cursor = db.cursor() title = response.xpath('//div[@class="list-item"]/p[@class="tit clearfix"]//a[1]/@title').extract() for item in title: sql = "INSERT INTO article(title) VALUE ('%s')" % (str(item)) try: # 执行sql语句 cursor.execute(sql) # 提交到数据库执行 db.commit() except: # Rollback in case there is any error db.rollback() author = response.xpath('//div[@class="list-item"]/p[@class="source"]/span[1]/@title').extract() for item in author: sql1 = "INSERT INTO author(`name`) VALUE ('%s')" % (str(item).split(";")[0]) try: # 执行sql语句 cursor.execute(sql1) # 提交到数据库执行 db.commit() except: # Rollback in case there is any error db.rollback() affiliated = response.xpath('//div[@class="list-item"]/p[@class="source"]/span[3]/@title').extract() for item in affiliated: sql3 = "INSERT INTO affiliated(`name`) VALUE ('%s')" % (str(item)) try: # 执行sql语句 cursor.execute(sql3) # 提交到数据库执行 db.commit() except: # Rollback in case there is any error db.rollback() source = response.xpath('//div[@class="list-item"]/p[@class="source"]/a[1]/span/@title').extract() for item in source: sql2 = "INSERT INTO source(`name`) VALUE ('%s')" % (str(item)) try: # 执行sql语句 cursor.execute(sql2) # 提交到数据库执行 db.commit() except: # Rollback in case there is any error db.rollback() # 关闭数据库连接 db.close()