python3获得浏览器chrome指定网站cookie

2090阅读 0评论2020-07-25 wwm
分类:Python/Ruby

window平台下先手工打开网站,输入账号密码;
然后通过python3 的 win32.win32crypt  中 CryptUnprotectData 获得cookie。
python2 没找到 这个库。


#-*- coding:utf-8 -*-

import sqlite3
import os
#import  win32crypt
import browsercookie
from win32.win32crypt import CryptUnprotectData
import urllib3
import urllib.request

import requests
from bs4 import BeautifulSoup
import traceback
import hashlib


def getcookiefromchrome(host='.1688.com'):
    try:
        cookiepath=os.environ['LOCALAPPDATA']+r"\Google\Chrome\User Data\Default\Cookies"
        sql="select host_key,name,encrypted_value from cookies where host_key='%s'" % host
        with sqlite3.connect(cookiepath) as conn:
            cu=conn.cursor()        
            cookies={name:CryptUnprotectData(encrypted_value)[1].decode() for host_key,name,encrypted_value in cu.execute(sql).fetchall()}
            #print(cookies)
            return cookies    
    except Exception as e:
        print(e)
        return None

def fetch_page2(url,cookie):
    data = requests.get(url,cookies=cookie,verify=False).text
    print(type(data))
    print(data)
    soup = BeautifulSoup(data,'lxml',from_encoding="gbk")
    for tag in soup.find_all('a'class_='title-link'):
        print(tag.get_text())
    #print(soup)
    with open("E:\\aaaa.html""w",encoding='gb18030'as f:
        f.write(data)
        print("ok")

    return data

BASEPATH ="E:"
def load_page(url):
    response=requests.get(url)
    data=response.content
    return data

def save_pic(url,name):
    #name_str = name.encode("latin1")
    #print(type(name_str))
    #md5 = hashlib.md5('%s' % (name_str)).hexdigest()

    m=hashlib.md5()
    m.update(name.encode(encoding='utf-8'))
    md5 = m.hexdigest()
    print(md5)
    p1=md5[0:2]
    p2=md5[6:8]

    try:
        path = "%s/%s/%s" %(BASEPATH,p1,p2)
        if not os.path.exists(path):
            os.makedirs(path)
        filename ='%s/%s.jpg' % (path,name)
        print(filename)
        img = load_page(url)
        with open(filenamessageboxme,'wb+'as f:
            f.write(img)
            print("mmmmmmmmmmmmmmmmmm")
                #print filename, name

    except Exception as e:
            traceback.print_exc()


url ="%24mobileOfferFilter&groupFilter=false&sortType=wangpu_score&pageNum=3#search-bar"
#url = ""
cookie =  getcookiefromchrome(".1688.com")
print(cookie)

rr= fetch_page2(url,cookie)
#save_pic("","wwm")
print(rr)
上一篇:requests抓取网页用chardet.detect存在问题
下一篇:pip提高下载速度,改为国内源