import sys, os
try:
raise NotImplementedError("No error")
except Exception as e:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
# -*- coding: utf-8 -*-
import json
class Dict(dict):
def __missing__(self, key):
rv = self[key] = type(self)()
return rv
$row_data=Dict()
print(json.dumps(row_data,indent=4,ensure_ascii=False))
#!/usr/bin/python
#-*- coding: utf-8 -*-
import json, sys
import xlrd
from datetime import date, datetime
from collections import OrderedDict
import json
import codecs
reload(sys)
sys.setdefaultencoding('utf-8')
wb = xlrd.open_workbook(r'E:\dir\school\2.xlsx')
sheet_list = wb.sheet_names()
filename=1for sheet in sheet_list:
print sheet
convert_list = []
sh = wb.sheet_by_name(sheet)
title = sh.row_values(0)
for rownum in range(1, sh.nrows):
rowvalue = sh.row_values(rownum)
single = OrderedDict()
for colnum in range(0, len(rowvalue)):
single[title[colnum]] = rowvalue[colnum]
convert_list.append(single)
j = json.dumps(convert_list,ensure_ascii=False,indent=2)
f=file('E:\dir\school\json\{}.json'.format(filename), 'w')
f.write(j)
f.close()
filename=filename+1
Python 正则表达式匹配任意字符(包括换行符)的写法
想使用正则表达式来获取一段文本中的任意字符,写出如下匹配规则:
(.*)
结果运行之后才发现,无法获得换行之后的文本。于是查了一下手册,才发现正则表达式中,“.”(点符号)匹配的是除了换行符“\n”以外的所有字符。
以下为正确的正则表达式匹配规则:
([\s\S]*)
同时,也可以用 “([\d\D]*)”、“([\w\W]*)” 来表示
# -*- coding: utf-8 -*-'''
import base64
import pyDes
def encrypt_3des(clear_text, key):
clear_text_byte = clear_text.encode('utf-8')
key_byte = key.encode('utf-8')
key_byte = key_byte.ljust(24, "\0".encode('utf-8'))
if len(key_byte) > 24:
key_byte = key_byte[:24]
k = pyDes.triple_des(key_byte, pyDes.ECB, IV = None, pad = None, padmode = pyDes.PAD_PKCS5)
d = k.encrypt(clear_text_byte)
return base64.b64encode(d).decode('utf-8')
def decrypt_3des(data, key):
data_byte = base64.b64decode(data.encode('utf-8'))
key_byte = key.encode('utf-8')
key_byte = key_byte.ljust(24, "\0".encode('utf-8'))
if len(key_byte) > 24:
key_byte = key_byte[:24]
k = pyDes.triple_des(key_byte, pyDes.ECB, IV = None, pad = None, padmode = pyDes.PAD_PKCS5)
d = k.decrypt(data_byte)
return d.decode('utf-8')
大街网登陆
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
import time
import json
import sys
import requests
import re
#请求对象
session = requests.session()
#请求头信息
HEADERS = {
'Referer': 'https://passport.lagou.com/login/login.html',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:51.0) Gecko/20100101 Firefox/51.0',
}
def login(username, passwd):
login_headers = HEADERS.copy()
login_headers.update({
'Referer':'https://www.dajie.com/',
'x-requested-with':'XMLHttpRequest',
'Host':'www.dajie.com'
})
postData = {
'captcha' : '',
'email' : username,
'password': passwd,
'rememberMe': '1'
}
response=session.post('https://www.dajie.com/account/newloginsubmitm?callback=NEW_VERSION_LOGIN_CALLBACK&_CSRFToken=&ajax=1', data=postData, headers=login_headers)
print(response.content)
login_headers = HEADERS.copy()
login_headers.update({
'Host':'job.dajie.com',
'Referer':'https://www.dajie.com/'
})
response=session.get('https://job.dajie.com/auth/checking', headers=login_headers)
print(response.text)
if __name__ == "__main__":
username=''
passwd=''
login(username, passwd)
登陆58同城,提供两种版本
第1种是模拟输入用户名密码登陆
第2中的调用58自带的js获取登陆参数在登陆
# -*- coding: utf-8 -*-
import time, sys, re
import requests
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from PIL import Image
username=''
passwd=''
driver=webdriver.PhantomJS(executable_path='C:\\Python27\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe')
driver.get('https://passport.58.com/login')
time.sleep(2)
pwdLogin=driver.find_element_by_id('pwdLogin')
pwdLogin.click()
# 输入用户名
usernameUser=driver.find_element_by_id('usernameUser')
usernameUser.send_keys(username)
time.sleep(1)
passwordUserText=driver.find_element_by_id('passwordUserText')
passwordUserText.click()
# 输入密码
passwordUser=driver.find_element_by_id('passwordUser')
passwordUser.send_keys(passwd)
# 点击登陆
btnSubmitUser=driver.find_element_by_id('btnSubmitUser')
btnSubmitUser.click()
time.sleep(3)
''' 获取驱动Cookie '''
dict1_cookie={}
cookie_tmp=[]
for cookie in driver.get_cookies():
data="{}={}".format(cookie['name'], cookie['value'])
dict1_cookie[cookie['name']]=cookie['value']
cookie_tmp.append(data)
_cookie=';'.join(cookie_tmp)
HEADERS={
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0",
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language":"zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3",
"Accept-Encoding":"gzip, deflate, br",
"Connection":"keep-alive",
"Host":"my.58.com",
"Cookie":_cookie
}
''' 通过COOKIE抓取数据'''
session = requests.session()
response=session.get('https://my.58.com/index', headers=HEADERS)
print(response.text)
第2中方法,调用自带的js进行模拟登陆
# -*- coding: utf-8 -*-
import time, sys, re, json
import requests
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities
username=''
passwd=''
''' 设置浏览器的User-Agent '''
desired_capabilities= DesiredCapabilities.PHANTOMJS.copy()
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = (
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
)
driver=webdriver.PhantomJS(executable_path='C:\\Python27\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe', desired_capabilities=desired_capabilities)
driver.get('https://passport.58.com/login')
time.sleep(1)
''' 执行58js获取加密串 '''
rsaModulus=driver.find_element_by_id('rsaModulus').get_attribute('value')
rsaExponent=driver.find_element_by_id('rsaExponent').get_attribute('value')
''' 获取加密串密码 '''
timespan=str(int(round(time.time() * 1000)))
p1_user="return encryptString('{}{}', '{}', '{}')"
encrypt_passwd=driver.execute_script(p1_user.format(timespan, passwd, rsaExponent, rsaModulus))
Fingerprint2=driver.execute_script('return new Fingerprint2().get()')
getTokenId=driver.execute_script('return getTokenId()')
fingerprint=driver.find_element_by_id('fingerprint').get_attribute('value')
session = requests.session()
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0",
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Origin":"https://passport.58.com",
'Content-Type':'application/x-www-form-urlencoded',
"Upgrade-Insecure-Requests":"1",
'Referer':'https://passport.58.com/login?path=http://my.58.com/?pts=' + str(int(round(time.time() * 1000)))
}
postData={
"source":"pc-login",
"path":'http://my.58.com/?pts=' + str(int(round(time.time() * 1000))),
"password":encrypt_passwd,
"timesign":'',
"isremember":"false",
"callback":"successFun",
"yzmstate":"",
"fingerprint":"",
"finger2":fingerprint,
"tokenId":getTokenId,
"username":username,
"validcode":"",
"vcodekey":"",
"btnSubmit":"登录中..."
}
rep=session.post('https://passport.58.com/login/dologin', data=postData, headers=headers)
match=re.search('\((\{.*?\})\)', rep.text)
if match:
res_json=json.loads(match.group(1))
print(res_json)
if res_json['code'] == 0:
print('登陆成功!')
else:
print(res_json['msg'])
需要注意添加cookie需要设置path secure还有需要注意的一点是,要先打开一个同域下面的网站,在添加Cookie 在打开对应页面.
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
import time
import json
import sys
import subprocess
import requests
import hashlib
import re
#请求对象
session = requests.session()
#请求头信息
HEADERS = {
'Referer': 'https://passport.lagou.com/login/login.html',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:51.0) Gecko/20100101 Firefox/51.0',
}
def encrypt_password(passwd):
'''对密码进行了md5双重加密 veennike 这个值是在js文件找到的一个写死的值 '''
passwd = hashlib.md5(passwd.encode('utf-8')).hexdigest()
passwd = 'veenike'+passwd+'veenike'
passwd = hashlib.md5(passwd.encode('utf-8')).hexdigest()
return passwd
def get_token():
login_page = 'https://passport.lagou.com/login/login.html'
data = session.get(login_page, headers=HEADERS)
X_Anti_Forge_Token=re.search('(\w+\-\w+\-\w+\-\w+\-\w+)', data.content)
X_Anti_Forge_Code=re.search('X_Anti_Forge_Code.*?\'(\d+)\'', data.content)
return (X_Anti_Forge_Token.group(1), X_Anti_Forge_Code.group(1))
def login(username, passwd):
X_Anti_Forge_Token,X_Anti_Forge_Code=get_token()
login_headers = HEADERS.copy()
login_headers.update({'X-Requested-With':'XMLHttpRequest','X-Anit-Forge-Token':X_Anti_Forge_Token,'X-Anit-Forge-Code':X_Anti_Forge_Code})
postData = {
'isValidate' : 'true',
'username' : username,
'password': encrypt_password(passwd),
'request_form_verifyCode': '',
'submit': '',
}
response=session.post('https://passport.lagou.com/login/login.json', data=postData, headers=login_headers)
print response.content
del login_headers['Referer']
del login_headers['X-Requested-With']
del login_headers['X-Anit-Forge-Token']
del login_headers['X-Anit-Forge-Code']
req = session.get('https://easy.lagou.com/dashboard/index.htm?from=gray', headers=login_headers)
print req.content
if __name__ == "__main__":
username=''
passwd=''
login(username, passwd)
一、介绍
QQBot 是一个用 python 实现的、基于腾讯 SmartQQ 协议的简单 QQ 机器人,可运行在 Linux 、 Windows 和 Mac OSX 平台下。
本项目 github 地址: https://github.com/pandolia/qqbot
你可以通过扩展 QQBot 来实现:
监控、收集 QQ 消息
自动消息推送
聊天机器人
通过 QQ 远程控制你的设备
给你A,B两个文件,各存放50亿条URL,每条URL占用64字节,内存限制是16G,让你找出A,B文件共同的URL。如果是三个乃至n个文件呢?
计数统计相信大家都不陌生,简单地说就是统计某一项出现的次数。实际应用中很多需
求都需要用到这个模型,如检测样本中某一值出现的次数、日志分析某一消息出现的频率、
分析文件中相同字符串出现的概率等。这种类似的需求有很多种实现方法
Thread 是threading模块中最重要的类之一,可以使用它来创建线程。有两种方式来创建线程:一种是通过继承Thread类,重写它的run方法;另一种是创建一个threading.Thread对象,在它的初始化函数(__init__)中将可调用对象作为参数传入