forked from ls1248659692/python_guide
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgithub_top25.py
More file actions
153 lines (117 loc) · 4.28 KB
/
github_top25.py
File metadata and controls
153 lines (117 loc) · 4.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/python
# coding=utf8
import os
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import requests
from bs4 import BeautifulSoup
from pyquery import PyQuery as pq
from data_visualization.constant import GitHubData
from report_util.email_util import login_smtp_send_mail
__author__ = 'Jam'
__date__ = '2018/12/25 16:53'
## 自动提交代码到 git
def git_add_commit_push(date, filename):
cmd_git_add = 'git add {filename}'.format(filename=filename)
cmd_git_commit = 'git commit -m "{date}"'.format(date=date)
cmd_git_push = 'git push -u origin master'
os.system(cmd_git_add)
os.system(cmd_git_commit)
os.system(cmd_git_push)
def scrape(language):
response = requests.get(
GitHubData.TOP25_URL.format(language=language),
headers=GitHubData.HEADERS
)
if response.status_code == 200 and response.content:
d = pq(response.content.decode('utf-8', 'ignore'))
items = d('div.explore-pjax-container .Box-row')
result = []
for item in items:
i = pq(item)
title = i("h1 a").text().split('/')[-1].strip()
description = i("p.col-9").text()
url = i("h1 a").attr("href")
url = "https://github.com" + url
result.append([title, url, description])
return result
else:
print(GitHubData.ERROR_MSG.format(response.status_code))
def send_mail(content_list):
email_config = GitHubData.EMAIL_CONFG
msg = MIMEMultipart()
msg["Subject"] = email_config.get('title')
msg["From"] = email_config.get('email_address')
msg["To"] = email_config.get('to')
to_addrs = email_config.get('to').split(',')
table_content = ""
for index, item in enumerate(content_list, 1):
table_content += '''<tr><td> {} </td><td>{}</td><td>{}</td><td>{}</td></tr>'''.format(
index, str(item[0]), str(item[1]), item[2]
)
mail_template = GitHubData.MAIL_TEMPLATE.format(
table_content=table_content
)
content = MIMEText(mail_template, 'html', 'utf-8')
msg.attach(content)
login_smtp_send_mail(email_config, to_addrs, msg)
class GitHub():
def __init__(self):
self.session = requests.session()
self.timeline = []
self.name = ''
self.user = ''
self.passwd = ''
def login(self):
html = self.session.get(
'https://github.com/login',
headers=GitHubData.GITHUB_HEADER
).text
authenticity_token = BeautifulSoup(html, 'lxml').find(
'input', {'name': 'authenticity_token'}
).get('value')
data = {
'commit': "Sign in",
'utf8': "✓",
'login': GitHubData.USERNAME,
'password': GitHubData.PASSWORD,
'authenticity_token': authenticity_token
}
html = self.session.post('https://github.com/session', data=data, headers=GitHubData.GITHUB_HEADER).text
self.name = BeautifulSoup(html, 'lxml').find(
'strong', {'class': 'css-truncate-target'}
).get_text()
def get_timeline(self, page=1):
html = self.session.get(
'https://github.com/dashboard/index/{page}?utf8=%E2%9C%93'.format(page=page),
headers=GitHubData.GITHUB_HEADER
).text
table = BeautifulSoup(html, 'lxml').find(
'div', id='dashboard'
).find_all(
'div', {'class': 'alert'}
)
for item in table:
line = dict()
line['thing'] = item.find('div', {'class': 'title'}).get_text(
).replace('\r', '').replace('\n', '')
line['time'] = item.find('relative-time').get('datetime')
self.timeline.append(line)
def show_timeline(self):
for line in self.timeline:
text = line['time'] + ' ' + line['thing']
print('*' + text + ' ' * (80 - len(text) - 2) + '*')
def overview(self, user=None):
if user == None:
user = self.name
html = self.session.get(
'https://github.com/' + user,
headers=GitHubData.GITHUB_HEADER
).text
## need to extract data
return html
def main():
result = scrape('Python')
send_mail(result)
if __name__ == '__main__':
main()