国产成人久久精品激情,欧洲vodafone精品性,а√天堂资源中文最新版地址

代碼如下，不知道為什么一直不能成功登錄

># -*- coding: utf-8 -*-
import scrapy
import re
import requests
#import urllib
from bs4 import BeautifulSoup

from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
from scrapy.loader import ItemLoader
from scrapy.loader.processors import MapCompose, Join
from scrapy.http import Request,FormRequest

from getweibo.items import InformationItem,TweetsItem

loginURL = "https://login.weibo.cn/login/"
#獲得驗證碼等信息
def get_captchainfo(loginURL):
    html = requests.get(loginURL).content
    bs = BeautifulSoup(html,"lxml")
       #print bs
       #注意通過bs.select元素尋找對象，返回的是列表對象
    password_name = (bs.select('input[type="password"]'))[0].get('name')
    vk = (bs.select('input[name="vk"]'))[0].get('value')
    capId = (bs.select('input[name="capId"]'))[0].get('value')
    #print password_name,vk,capId
    captcha_img = bs.find("img", src=re.compile('http://weibo.cn/interface/f/ttt/captcha/')).get('src')
    print captcha_img
    #captchaid可以從驗證碼圖片地址中直接截取獲得
    #urllib.urlretrieve(captcha_img, 'weibo_spider/image/captcha.jpg')
    #print "captcha download success!"
    captcha_input = raw_input("please input the captcha\n>")

    return (captcha_input,password_name,vk,capId)

class WeiboSpider(CrawlSpider):
    name = 'weibo'
    allowed_domains = ['weibo.cn']
    start_urls = ['http://weibo.cn/dafendi']#先暫時確定精分君的微博，之后start_urls可以從文件提取
 
    rules = (
        Rule(LinkExtractor(restrict_xpaths='//*[@id="pagelist"]/form/p/a')),
        Rule(LinkExtractor(restrict_xpaths='//*[contains(@href,"repost")]'),callback='parse_item')
    )
  
    headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "zh-CN,zh;q=0.8",
    "Connection": "keep-alive",
    "Content-Type":" application/x-www-form-urlencoded",
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36",
    "Referer": "https://login.weibo.cn/login/"
    }
    # Start on the welcome page
    def start_requests(self):
        return [
            Request(
                loginURL,
                meta = {'cookiejar': 1},
                headers=self.headers,
                callback=self.parse_login)
        ]

    # Post welcome page's first form with the given user/pass
    def parse_login(self, response):
        print 'Preparing login'
        captcha=get_captchainfo(loginURL)
        print captcha
       
        return FormRequest.from_response(
            response,#from loginURL
            method="POST",
            meta = {'cookiejar' : response.meta['cookiejar']},#獲取cookies
            headers = self.headers,
            formdata = {
                    "mobile": "帳號",
                    captcha[1]: "密碼",
                    "code": captcha[0],
                    "remember":"on",
                    "backurl": "http%3A%2F%2Fweibo.cn",
                    "backtitle":u'手機新浪網(wǎng)',
                    "tryCount":"",
                    "vk": captcha[2],
                    "capId": captcha[3],
                    "submit": u'登錄'},
            callback = self.after_login,
            dont_filter = True
        )
    def after_login(self, response) :
        for url in self.start_urls :
            yield self.make_requests_from_url(url)

    def parse_start_url(self, response):#用來處理初始response
        html =  response.xpath('/html').extract()
        print html
           # Create the loader using the response
       
        l = ItemLoader(item=InformationItem(), response=response)

       # Load fields using XPath expressions
        l.add_xpath('id_', '//title/text()', MapCompose(lambda i:i[0:len(i)-3])),
        l.add_xpath('Info','//span[contains(@class,"ctt")][2]/text()'),
        l.add_xpath('Num_Tweets','//span[contains(@class,"tc")]/text()',MapCompose(lambda i: i[(i.index("[")+1):(i.index("]"))])),
        l.add_xpath('Num_Follows','//a[contains(@href,"follow")]/text()',MapCompose(lambda i: i[(i.index("[")+1):(i.index("]"))])),

        l.add_xpath('Num_Fans','//a[contains(@href,"fans")]/text()',MapCompose(lambda i: i[(i.index("[")+1):(i.index("]"))])),
        return l.load_item()

    def parse_item(self, response):
         l = ItemLoader(item=TweetsItem(), response=response)
         l.add_xpath('Content','//span[contains(@class,"ctt")]/text()')
         #l.add_xpath('')
         return l.load_item()

下邊settins.py的內(nèi)容

ROBOTSTXT_OBEY = False



HTTPERROR_ALLOWED_CODES = [302,]#返回400時按正常的返回對待
REDIRECT_ENABLED = False #關掉重定向，不會重定向到新的地址


DOWNLOAD_DELAY = 3

COOKIES_ENABLED = True
COOKIES_DEBUG = True

下邊是輸出

2017-04-09 15:53:17 [scrapy] DEBUG: Sending cookies to: <POST https://login.weibo.cn/login/?rand=201282002&backURL=http%3A%2F%2Fweibo.cn&backTitle=%E6%89%8B%E6%9C%BA%E6%96%B0%E6%B5%AA%E7%BD%91&vt=4>
Cookie: _T_WM=6348fb8a523fe1bc486f14d1304cf0d2

2017-04-09 15:53:19 [scrapy] DEBUG: Received cookies from: <302 https://login.weibo.cn/login/?rand=201282002&backURL=http%3A%2F%2Fweibo.cn&backTitle=%E6%89%8B%E6%9C%BA%E6%96%B0%E6%B5%AA%E7%BD%91&vt=4>
Set-Cookie: WEIBOCN_FROM=deleted; expires=Thu, 01-Jan-1970 00:00:01 GMT; path=/; domain=.weibo.cn

Set-Cookie: SUB=_2A2517Zg9DeRhGeVG61ER8yrEwzyIHXVXETh1rDV6PUJbkdAKLRXgkW0wSZc8S6dp1d-NlyAraSqa-1-_0Q..; expires=Tue, 09-May-2017 07:53:17 GMT; path=/; domain=.weibo.cn; httponly

Set-Cookie: gsid_CTandWM=4uuCcdef1lRXUEnMtsgL1fXlgec; expires=Tue, 09-May-2017 07:53:19 GMT; path=/; domain=.weibo.cn; httponly

2017-04-09 15:53:19 [scrapy] DEBUG: Crawled (302) <POST https://login.weibo.cn/login/?rand=201282002&backURL=http%3A%2F%2Fweibo.cn&backTitle=%E6%89%8B%E6%9C%BA%E6%96%B0%E6%B5%AA%E7%BD%91&vt=4> (referer: https://login.weibo.cn/login/)
2017-04-09 15:53:20 [scrapy] DEBUG: Received cookies from: <200 http://weibo.cn/dafendi>
Set-Cookie: _T_WM=80e15f38a0dfb65ea7bbcd00ebcaf1c0; expires=Tue, 09-May-2017 07:53:19 GMT; path=/; domain=.weibo.cn; httponly

Set-Cookie: WEIBOCN_FROM=deleted; expires=Thu, 01-Jan-1970 00:00:01 GMT; path=/; domain=.weibo.cn

2017-04-09 15:53:20 [scrapy] DEBUG: Crawled (200) <GET http://weibo.cn/dafendi> (referer: https://login.weibo.cn/login/?rand=201282002&backURL=http%3A%2F%2Fweibo.cn&backTitle=%E6%89%8B%E6%9C%BA%E6%96%B0%E6%B5%AA%E7%BD%91&vt=4)
2017-04-09 15:53:20 [scrapy] DEBUG: Scraped from <200 http://weibo.cn/dafendi>
{'Info': [u'\u8ba4\u8bc1\uff1a\u77e5\u540d\u5e7d\u9ed8\u535a\u4e3b \u5fae\u535a\u7b7e\u7ea6\u81ea\u5a92\u4f53'],
 'Num_Fans': [u'2055326'],
 'Num_Follows': [u'891'],
 'Num_Tweets': [u'1958'],
 'id_': [u'\u7cbe\u5206\u541b']}
2017-04-09 15:53:20 [scrapy] DEBUG: Sending cookies to: <GET http://weibo.cn/repost/EDsDTFqfJ?rl=0&uid=2626948743>
Cookie: _T_WM=80e15f38a0dfb65ea7bbcd00ebcaf1c0

2017-04-09 15:53:20 [scrapy] DEBUG: Sending cookies to: <GET http://weibo.cn/repost/EDxAwrBrG?rl=0&uid=2626948743>
Cookie: _T_WM=80e15f38a0dfb65ea7bbcd00ebcaf1c0

2017-04-09 15:53:20 [scrapy] DEBUG: Sending cookies to: <GET http://weibo.cn/repost/EDBmajRBl?rl=0&uid=2626948743>
Cookie: _T_WM=80e15f38a0dfb65ea7bbcd00ebcaf1c0

2017-04-09 15:53:20 [scrapy] DEBUG: Sending cookies to: <GET http://weibo.cn/repost/CsN9LnQiG?rl=0&uid=2626948743>
Cookie: _T_WM=80e15f38a0dfb65ea7bbcd00ebcaf1c0

2017-04-09 15:53:24 [scrapy] DEBUG: Received cookies from: <200 http://weibo.cn/repost/EDsDTFqfJ?rl=0&uid=2626948743>
Set-Cookie: WEIBOCN_FROM=deleted; expires=Thu, 01-Jan-1970 00:00:01 GMT; path=/; domain=.weibo.cn

2017-04-09 15:53:24 [scrapy] DEBUG: Crawled (200) <GET http://weibo.cn/repost/EDsDTFqfJ?rl=0&uid=2626948743> (referer: http://weibo.cn/dafendi)
2017-04-09 15:53:24 [scrapy] DEBUG: Scraped from <200 http://weibo.cn/repost/EDsDTFqfJ?rl=0&uid=2626948743>
{'Content': [u':',
             u' \u5047\u5982\u4efb\u4f55\u4e8b\u90fd\u80fd\u6210\u4e3a\u804c\u4e1a\uff0c\u4f60\u4f1a\u9009\u62e9\u4ec0\u4e48\u4f5c\u4e3a\u804c\u4e1a\uff1f \u200b\u200b\u200b']}
2017-04-09 15:53:28 [scrapy] DEBUG: Received cookies from: <200 http://weibo.cn/repost/EDxAwrBrG?rl=0&uid=2626948743>
Set-Cookie: WEIBOCN_FROM=deleted; expires=Thu, 01-Jan-1970 00:00:01 GMT; path=/; domain=.weibo.cn

2017-04-09 15:53:28 [scrapy] DEBUG: Crawled (200) <GET http://weibo.cn/repost/EDxAwrBrG?rl=0&uid=2626948743> (referer: http://weibo.cn/dafendi)
2017-04-09 15:53:28 [scrapy] DEBUG: Scraped from <200 http://weibo.cn/repost/EDxAwrBrG?rl=0&uid=2626948743>
{'Content': [u'\u7279\u522b\u7684\u751f\u65e5\u793c\u7269\u3002 \u200b\u200b\u200b']}
2017-04-09 15:53:32 [scrapy] DEBUG: Received cookies from: <200 http://weibo.cn/repost/EDBmajRBl?rl=0&uid=2626948743>
Set-Cookie: WEIBOCN_FROM=deleted; expires=Thu, 01-Jan-1970 00:00:01 GMT; path=/; domain=.weibo.cn

2017-04-09 15:53:32 [scrapy] DEBUG: Crawled (200) <GET http://weibo.cn/repost/EDBmajRBl?rl=0&uid=2626948743> (referer: http://weibo.cn/dafendi)
2017-04-09 15:53:32 [scrapy] DEBUG: Scraped from <200 http://weibo.cn/repost/EDBmajRBl?rl=0&uid=2626948743>
{'Content': [u'\u7231\u7b11\u7684\u5973\u5b69\u5b50\uff0c\u8fd0\u6c14\u4e00\u5b9a\u4e0d\u4f1a\u592a\u597d\u2026\u2026',
             u' \u200b\u200b\u200b']}
2017-04-09 15:53:36 [scrapy] DEBUG: Received cookies from: <200 http://weibo.cn/repost/CsN9LnQiG?rl=0&uid=2626948743>
Set-Cookie: WEIBOCN_FROM=deleted; expires=Thu, 01-Jan-1970 00:00:01 GMT; path=/; domain=.weibo.cn

2017-04-09 15:53:36 [scrapy] DEBUG: Crawled (200) <GET http://weibo.cn/repost/CsN9LnQiG?rl=0&uid=2626948743> (referer: http://weibo.cn/dafendi)
2017-04-09 15:53:36 [scrapy] DEBUG: Scraped from <200 http://weibo.cn/repost/CsN9LnQiG?rl=0&uid=2626948743>
{'Content': [u':\u4e00\u4e2a\u957f\u5fae\u535a\u5408\u96c6\uff0c\u5927\u5bb6\u65e0\u804a\u53c8\u6ca1\u770b\u8fc7\u7684\u8bdd\u53ef\u4ee5\u770b\u770b[\u7f9e\u55d2\u55d2] \u200b\u200b\u200b']}
2017-04-09 15:53:36 [scrapy] INFO: Closing spider (finished)
2017-04-09 15:53:36 [scrapy] INFO: Stored json feed (5 items) in: wanghongmingdan.json
2017-04-09 15:53:36 [scrapy] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 3029,
 'downloader/request_count': 7,
 'downloader/request_method_count/GET': 6,
 'downloader/request_method_count/POST': 1,
 'downloader/response_bytes': 22746,
 'downloader/response_count': 7,
 'downloader/response_status_count/200': 6,
 'downloader/response_status_count/302': 1,
 'finish_reason': 'finished',
 'finish_time': datetime.datetime(2017, 4, 9, 7, 53, 36, 596076),
 'item_scraped_count': 5,
 'log_count/DEBUG': 27,
 'log_count/INFO': 8,
 'log_count/WARNING': 2,
 'request_depth_max': 3,
 'response_received_count': 7,
 'scheduler/dequeued': 7,
 'scheduler/dequeued/memory': 7,
 'scheduler/enqueued': 7,
 'scheduler/enqueued/memory': 7,
 'start_time': datetime.datetime(2017, 4, 9, 7, 53, 2, 180831)}
2017-04-09 15:53:36 [scrapy] INFO: Spider closed (finished)

2017-04-09 20:11:50 [scrapy] DEBUG: Redirecting (302) to <GET http://weibo.cn/crossDomain/?g=4uegcdef1d93rkj4S3ZomfXlgec&t=1491739909&m=9144&r=&u=http%3A%2F%2Fweibo.cn%3Fgsid%3D4uegcdef1d93rkj4S3ZomfXlgec%26PHPSESSID%3D%26vt%3D4&cross=1&st=ST-MzgwMzAzNDg4MA==-1491739909-tc-27ED8C8D7528C9185E75F7986B8050B7-1,ST-MzgwMzAzNDg4MA==-1491739909-tc-BED83CC16AC311D2BBA234E8F08BBD39-1> from <POST https://login.weibo.cn/login/?rand=842328789&backURL=http%3A%2F%2Fweibo.cn&backTitle=%E6%89%8B%E6%9C%BA%E6%96%B0%E6%B5%AA%E7%BD%91&vt=4>
2017-04-09 20:11:50 [scrapy] DEBUG: Redirecting (meta refresh) to <GET http://weibo.cn/> from <GET http://weibo.cn/crossDomain/?g=4uegcdef1d93rkj4S3ZomfXlgec&t=1491739909&m=9144&r=&u=http%3A%2F%2Fweibo.cn%3Fgsid%3D4uegcdef1d93rkj4S3ZomfXlgec%26PHPSESSID%3D%26vt%3D4&cross=1&st=ST-MzgwMzAzNDg4MA==-1491739909-tc-27ED8C8D7528C9185E75F7986B8050B7-1,ST-MzgwMzAzNDg4MA==-1491739909-tc-BED83CC16AC311D2BBA234E8F08BBD39-1>

亚洲国产日韩欧美一区二区三区,精品亚洲国产成人av在线,国产99视频精品免视看7,99国产精品久久久久久久成人热,欧美日韩亚洲国产综合乱