diff --git a/docs-pages/vuepress/.vuepress/config.js b/docs-pages/vuepress/.vuepress/config.js index 311cb5a..079e3b5 100644 --- a/docs-pages/vuepress/.vuepress/config.js +++ b/docs-pages/vuepress/.vuepress/config.js @@ -1,6 +1,6 @@ module.exports = { title: 'python-office - 自动化办公与AI编程',// 设置网站标题 - description: 'python-office是一个Python自动化办公第三方库,由程序员晚枫开发,提供一行代码实现自动化办公、AI编程等功能', + description: 'python-office是程序员晚枫开发的Python自动化办公库,一行代码实现Excel、Word、PDF、邮件、图片等自动化办公。结合AI技术,提供AI+自动化办公完整解决方案。35讲AI办公自动化实战课程,原价499现价299。', port: '18001', base: '/',// 设置站点根路径 dest: '/opt/workplace/pro/python-office.com/dist', // 设置输出目录 @@ -13,35 +13,20 @@ module.exports = { (function() { var hm = document.createElement("script"); hm.src = "https://hm.baidu.com/hm.js?bd18154a0aff45581049f87f5c644b44"; - var s = document.getElementsByTagName("script")[0]; + var s = document.getElementsByTagName("script")[0]; s.parentNode.insertBefore(hm, s); })(); ` ], - ['script', {}, ` - (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){ - (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o), - m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m) - })(window,document,'script','https://www.google-analytics.com/analytics.js','ga'); - - ga('create', 'UA-XXXXX-Y', 'auto'); - ga('send', 'pageview'); - ` - ], ["meta", { name: "google-adsense-account", content: "ca-pub-3274762482246875" }], - ["meta", { name: "keywords", content: "python-office,自动化办公,AI编程,程序员晚枫,Python自动化,Python办公,Excel自动化,Word自动化,PDF处理,Python教程,自动化办公软件,免费办公软件" }], + ["meta", { name: "keywords", content: "python-office,自动化办公,AI编程,程序员晚枫,Python自动化,Python办公,Excel自动化,Word自动化,PDF处理,Python教程,自动化办公软件,免费办公软件,Python脚本,办公效率,批量处理,数据处理,图表生成,邮件自动化,文件管理,OCR识别,Python入门,自动化工具,办公自动化教程,Python库,Python框架,Python办公自动化,Excel批量处理,Word批量处理,PDF合并,PDF拆分,Python邮件,Python爬虫,Python数据可视化,Python办公插件,python自动化办公教程,python办公自动化库,Python AI办公,AI自动化办公,AI办公,AI办公工具" }], ["meta", { name: "robots", content: "index, follow" }], ["meta", { name: "author", content: "程序员晚枫" }], + ["meta", { name: "theme-color", content: "#2c3e50" }], + ["meta", { name: "format-detection", content: "telephone=no" }], + ["meta", { name: "apple-mobile-web-app-capable", content: "yes" }], + ["meta", { name: "apple-mobile-web-app-status-bar-style", content: "black-translucent" }], ["link", { rel: "canonical", href: "https://www.python-office.com" }], - ["meta", { property: "og:title", content: "python-office - 自动化办公与AI编程" }], - ["meta", { property: "og:description", content: "python-office是一个Python自动化办公第三方库,由程序员晚枫开发,提供一行代码实现自动化办公、AI编程等功能" }], - ["meta", { property: "og:type", content: "website" }], - ["meta", { property: "og:url", content: "https://www.python-office.com" }], - ["meta", { property: "og:image", content: "https://raw.atomgit.com/CoderWanFeng1/website/raw/main/github-nav.jpg" }], - ["meta", { name: "twitter:card", content: "summary_large_image" }], - ["meta", { name: "twitter:title", content: "python-office - 自动化办公与AI编程" }], - ["meta", { name: "twitter:description", content: "python-office是一个Python自动化办公第三方库,由程序员晚枫开发,提供一行代码实现自动化办公、AI编程等功能" }], - ["meta", { name: "twitter:image", content: "https://raw.atomgit.com/CoderWanFeng1/website/raw/main/github-nav.jpg" }], ['script', { type: 'application/ld+json' }, ` { "@context": "https://schema.org", @@ -109,6 +94,8 @@ module.exports = { minLength: 10, // 如果长度超过 30 个字符 }, }, + // 自定义样式(面包屑、AI课程卡片、相关文章) + head: undefined, // 占位 themeConfig: { logo: 'https://raw.atomgit.com/CoderWanFeng1/website/raw/main/icon3.jpg', editLinks: 'https://github.com/CoderWanFeng/python-office', @@ -133,7 +120,8 @@ module.exports = { { text: '📄 PDF办公(10讲)', link: 'https://www.python-office.com/course-002/10-popdf/10-popdf.html' }, { text: '📧 邮件自动化(6讲)', link: 'https://www.python-office.com/course-002/poemail/poemail.html' }, { text: '🕷️ 网络爬虫(100讲)', link: 'https://www.bilibili.com/video/BV1y54y1y74F' }, - { text: '🤖 AI编程(30讲)', link: 'https://www.python-office.com/course-002/AICoding/version-001/all.html' } + { text: '🤖 AI编程(30讲)', link: 'https://mp.weixin.qq.com/s/YS0shsl6vJD_wUzow7NOnw' }, + { text: '🚀 AI Python 零基础(30讲)', link: 'https://www.python-office.com/course-002/12-AIPython/all.html' } ] }, { @@ -150,6 +138,8 @@ module.exports = { { text: '🎥 视频处理', link: '/office/video' }, { text: '🌐 网页相关', link: '/office/web' }, { text: '🤖 微信机器人', link: '/office/robot' }, + { text: '🤖 AI机器人', link: '/office/porobot' }, + { text: '🎨 AI工具', link: '/office/poai' }, { text: '📈 数据可视化', link: '/office/datav' }, { text: '💰 财务金融', link: '/office/finance' }, { text: '📝 Markdown', link: '/office/markdown' } @@ -162,14 +152,14 @@ module.exports = { ] }, { - text: 'AI学习网站', items: [ - { text: '技术博客', link: 'https://python4office.cn' }, - { text: 'AI导航', link: 'https://www.python-office.com/ai-nav' }, - { text: 'OpenClaw', link: 'https://www.python-office.com/openclaw/' }, - { text: '数字游民', link: 'https://www.python-office.com/how-to-digital-nomad' }, - { text: '建站教程', link: 'https://www.python-office.com/opc' } + text: '📒知识拓展', items: [ + { text: '🏠 知识库首页', link: '/knowledge/' }, + { text: '🐍 Python入门', link: '/knowledge/python-tips' }, + { text: '⚡ 自动化办公', link: '/knowledge/automation-guide' }, + { text: '🤖 AI编程入门', link: '/knowledge/ai-intro' } ] }, + { text: '🎁 副业项目', link: 'https://www.python4office.cn/sideline-pro-list/' }, { text: '📝 鲁ICP备2021040536号-2', link: 'https://beian.miit.gov.cn/' }, ], lastUpdated: 'Last Updated', @@ -181,45 +171,13 @@ module.exports = { children: [ '/guide/introduction', '/guide/allFunc', - '/guide/faq', - '/guide/cases', - ] - }, - { - title: '💡 实用技巧', - children: [ - '/tips/', - '/tips/excel/', - '/tips/excel/auto-fill', - '/tips/excel/chart-generation', - '/tips/excel/advanced-filter', - '/tips/excel/batch-merge', - '/tips/python/', - '/tips/python/batch-processing', - '/tips/python/email-automation', - '/tips/python/datetime-handling', - '/tips/python/error-handling', - '/tips/python/decorators', - '/tips/word/', - '/tips/word/batch-processing', - '/tips/word/format-conversion', - '/tips/word/table-processing', - '/tips/pdf/', - '/tips/pdf/extract-content', - '/tips/pdf/merge-split', - '/tips/pdf/security-signature', - '/tips/best-practices/', - '/tips/best-practices/python-office-guide', - '/tips/best-practices/automation-framework', - '/tips/best-practices/workflow-automation', - '/tips/best-practices/productivity-tools', - '/tips/template', ] }, { title: '🎥 原创课程', children: [ '/course/50-python-office', + '/course-002/12-AIPython/all', ] }, { @@ -236,6 +194,8 @@ module.exports = { children: [ '/office/email', '/office/robot', + '/office/porobot', + '/office/poai', '/office/markdown', ] }, @@ -276,6 +236,57 @@ module.exports = { '/contributor/article-submission', ] }, + { + title: '📚 知识拓展', + children: [ + '/knowledge/', + '/knowledge/python-tips', + '/knowledge/automation-guide', + '/knowledge/ai-intro', + ] + }, + { + title: '💡 实用技巧', + children: [ + '/tips/', + '/tips/excel/', + '/tips/excel/auto-fill', + '/tips/excel/chart-generation', + '/tips/excel/advanced-filter', + '/tips/excel/batch-merge', + '/tips/python/', + '/tips/python/batch-processing', + '/tips/python/email-automation', + '/tips/python/datetime-handling', + '/tips/python/error-handling', + '/tips/python/decorators', + '/tips/word/', + '/tips/word/batch-processing', + '/tips/word/format-conversion', + '/tips/word/table-processing', + '/tips/pdf/', + '/tips/pdf/extract-content', + '/tips/pdf/merge-split', + '/tips/pdf/security-signature', + '/tips/best-practices/', + '/tips/best-practices/python-office-guide', + '/tips/best-practices/automation-framework', + '/tips/best-practices/workflow-automation', + '/tips/best-practices/productivity-tools', + '/tips/best-practices/sales-report-automation', + '/tips/best-practices/finance-report-automation', + '/tips/best-practices/hr-resume-screening', + '/tips/best-practices/ppt-automation', + '/tips/best-practices/image-batch-processing', + '/tips/best-practices/file-management-automation', + '/tips/best-practices/wechat-bot', + '/tips/best-practices/ocr-recognition', + '/tips/best-practices/learning-path', + '/tips/best-practices/data-visualization-advanced', + '/tips/best-practices/web-crawler-advanced', + '/tips/template', + ] + }, { title: '📚 参考资料', children: [ diff --git a/docs-pages/vuepress/.vuepress/enhanceApp.js b/docs-pages/vuepress/.vuepress/enhanceApp.js new file mode 100644 index 0000000..e920c60 --- /dev/null +++ b/docs-pages/vuepress/.vuepress/enhanceApp.js @@ -0,0 +1,174 @@ +/** + * VuePress 全站增强 + * 功能:自动为每篇文章注入独立 SEO、面包屑、相关文章、AI课程推荐 + */ + +const AI_COURSE_URL = 'https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA'; + +// 从markdown内容中提取 description 和 keywords +function extractPageSEO($page) { + const { frontmatter = {}, regularPath, path, title } = $page; + const fmDescription = frontmatter.description; + const fmKeywords = frontmatter.keywords; + const fmTags = frontmatter.tags; + + let description = fmDescription; + if (!description) { + // 自动从正文提取 + const content = $page._strippedContent || ''; + const firstParagraph = content.split('\n').find(line => line.trim() && !line.startsWith('#')); + if (firstParagraph) { + description = firstParagraph.trim().substring(0, 150); + } else { + description = `python-office - ${title || '自动化办公教程'}`; + } + } + + let keywords = fmKeywords; + if (!keywords && fmTags) { + keywords = Array.isArray(fmTags) ? fmTags.join(',') : fmTags; + } + if (!keywords) { + keywords = 'python-office,自动化办公,Python自动化,程序员晚枫'; + } + + return { description, keywords }; +} + +// 设置页面 meta +function setMeta(name, content, attr = 'name') { + if (!content) return; + let meta = document.querySelector(`meta[${attr}="${name}"]`); + if (!meta) { + meta = document.createElement('meta'); + meta.setAttribute(attr, name); + document.head.appendChild(meta); + } + meta.setAttribute('content', content); +} + +// AI课程推荐组件HTML +function buildAICourseCard() { + return ` +
+
🔥 AI + 自动化办公
+

35讲AI办公自动化实战课

+

覆盖Excel/Word/PDF/邮件/图片等常用办公软件,教你写skills和mcp!原价499,现价299,再送AI实体书+178元课程

+ 立即学习 → +
+ `; +} + +// 面包屑HTML +function buildBreadcrumb($route, $site) { + const path = $route.path; + const segments = path.split('/').filter(s => s); + if (segments.length <= 1) return ''; + + let html = ''; + return html; +} + +export default ({ + Vue, + options, + router, + siteData +}) => { + // 注入自定义样式(一次) + if (typeof document !== 'undefined' && !document.getElementById('custom-style')) { + const link = document.createElement('link'); + link.id = 'custom-style'; + link.rel = 'stylesheet'; + link.href = '/styles/custom.css'; + document.head.appendChild(link); + } + + // 注入 PWA manifest + if (typeof document !== 'undefined' && !document.getElementById('pwa-manifest')) { + const manifestLink = document.createElement('link'); + manifestLink.id = 'pwa-manifest'; + manifestLink.rel = 'manifest'; + manifestLink.href = '/manifest.json'; + document.head.appendChild(manifestLink); + } + + // 注册 Service Worker + if (typeof window !== 'undefined' && 'serviceWorker' in navigator) { + window.addEventListener('load', () => { + navigator.serviceWorker.register('/sw.js').catch(err => { + console.log('SW registration failed: ', err); + }); + }); + } + + // 路由切换完成后处理 + router.afterEach((to, from) => { + // 等待DOM更新 + setTimeout(() => { + // 1. 注入独立 SEO + const $page = router.getMatchedComponents(to)[0]; + if ($page && $page.$page) { + const { description, keywords } = extractPageSEO($page.$page); + setMeta('description', description); + setMeta('keywords', keywords); + setMeta('og:description', description, 'property'); + setMeta('twitter:description', description, 'name'); + } + + // 2. 注入面包屑 + const contentEl = document.querySelector('.page'); + if (contentEl) { + // 移除旧面包屑 + const old = document.querySelector('.custom-breadcrumb'); + if (old) old.remove(); + + const breadcrumb = buildBreadcrumb(to); + if (breadcrumb) { + contentEl.insertAdjacentHTML('afterbegin', breadcrumb); + } + + // 3. 在文章末尾添加AI课程卡片 + const oldCard = document.querySelector('.ai-course-card'); + if (oldCard) oldCard.remove(); + + const articleEnd = document.querySelector('.page .content__default'); + if (articleEnd && to.path !== '/') { + articleEnd.insertAdjacentHTML('beforeend', buildAICourseCard()); + } + } + }, 100); + }); +}; diff --git a/docs-pages/vuepress/.vuepress/public/manifest.json b/docs-pages/vuepress/.vuepress/public/manifest.json new file mode 100644 index 0000000..cbe3940 --- /dev/null +++ b/docs-pages/vuepress/.vuepress/public/manifest.json @@ -0,0 +1,24 @@ +{ + "name": "python-office - 自动化办公与AI编程", + "short_name": "python-office", + "description": "程序员晚枫开发的Python自动化办公库,一行代码实现Excel、Word、PDF、邮件、图片等自动化办公", + "start_url": "/", + "display": "standalone", + "background_color": "#ffffff", + "theme_color": "#2c3e50", + "orientation": "portrait-primary", + "icons": [ + { + "src": "/favicon.ico", + "sizes": "64x64 32x32 24x24 16x16", + "type": "image/x-icon" + }, + { + "src": "/icon3.jpg", + "sizes": "1242x1242", + "type": "image/jpeg", + "purpose": "any maskable" + } + ], + "lang": "zh-CN" +} diff --git a/docs-pages/vuepress/.vuepress/public/sitemap.xml b/docs-pages/vuepress/.vuepress/public/sitemap.xml index 21f44fd..70c64d0 100644 --- a/docs-pages/vuepress/.vuepress/public/sitemap.xml +++ b/docs-pages/vuepress/.vuepress/public/sitemap.xml @@ -180,6 +180,72 @@ monthly 0.7 + + https://www.python-office.com/tips/best-practices/sales-report-automation + 2026-04-23 + monthly + 0.7 + + + https://www.python-office.com/tips/best-practices/finance-report-automation + 2026-04-23 + monthly + 0.7 + + + https://www.python-office.com/tips/best-practices/hr-resume-screening + 2026-04-23 + monthly + 0.7 + + + https://www.python-office.com/tips/best-practices/ppt-automation + 2026-04-23 + monthly + 0.7 + + + https://www.python-office.com/tips/best-practices/image-batch-processing + 2026-04-23 + monthly + 0.7 + + + https://www.python-office.com/tips/best-practices/file-management-automation + 2026-04-23 + monthly + 0.7 + + + https://www.python-office.com/tips/best-practices/wechat-bot + 2026-04-23 + monthly + 0.7 + + + https://www.python-office.com/tips/best-practices/ocr-recognition + 2026-04-23 + monthly + 0.7 + + + https://www.python-office.com/tips/best-practices/learning-path + 2026-04-23 + monthly + 0.7 + + + https://www.python-office.com/tips/best-practices/data-visualization-advanced + 2026-04-23 + monthly + 0.7 + + + https://www.python-office.com/tips/best-practices/web-crawler-advanced + 2026-04-23 + monthly + 0.7 + https://www.python-office.com/tips/template 2026-04-23 diff --git a/docs-pages/vuepress/.vuepress/public/sw.js b/docs-pages/vuepress/.vuepress/public/sw.js new file mode 100644 index 0000000..34de9b7 --- /dev/null +++ b/docs-pages/vuepress/.vuepress/public/sw.js @@ -0,0 +1,37 @@ +// Service Worker for python-office.com +const CACHE_NAME = 'python-office-v1'; +const urlsToCache = [ + '/', + '/favicon.ico', + '/icon3.jpg', + '/manifest.json' +]; + +self.addEventListener('install', event => { + event.waitUntil( + caches.open(CACHE_NAME) + .then(cache => cache.addAll(urlsToCache)) + ); +}); + +self.addEventListener('fetch', event => { + event.respondWith( + caches.match(event.request) + .then(response => response || fetch(event.request)) + ); +}); + +self.addEventListener('activate', event => { + const cacheWhitelist = [CACHE_NAME]; + event.waitUntil( + caches.keys().then(cacheNames => { + return Promise.all( + cacheNames.map(cacheName => { + if (cacheWhitelist.indexOf(cacheName) === -1) { + return caches.delete(cacheName); + } + }) + ); + }) + ); +}); diff --git a/docs-pages/vuepress/.vuepress/styles/custom.css b/docs-pages/vuepress/.vuepress/styles/custom.css new file mode 100644 index 0000000..5a81f6a --- /dev/null +++ b/docs-pages/vuepress/.vuepress/styles/custom.css @@ -0,0 +1,172 @@ +/* 自定义样式 - 面包屑、AI课程卡片、相关文章 */ + +.custom-breadcrumb { + padding: 12px 20px; + margin-bottom: 20px; + background: #f8f9fa; + border-radius: 6px; + font-size: 14px; + color: #666; + border-left: 3px solid #2c3e50; +} + +.custom-breadcrumb a { + color: #2c3e50; + text-decoration: none; + transition: color 0.2s; +} + +.custom-breadcrumb a:hover { + color: #42b983; + text-decoration: underline; +} + +.custom-breadcrumb .separator { + margin: 0 8px; + color: #ccc; +} + +.custom-breadcrumb .current { + color: #42b983; + font-weight: 500; +} + +.ai-course-card { + margin: 40px 0 20px; + padding: 25px; + background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); + color: white; + border-radius: 10px; + box-shadow: 0 4px 20px rgba(102, 126, 234, 0.3); + text-align: center; +} + +.ai-course-badge { + display: inline-block; + background: rgba(255, 255, 255, 0.2); + color: white; + padding: 4px 12px; + border-radius: 20px; + font-size: 12px; + margin-bottom: 12px; + font-weight: bold; +} + +.ai-course-card h3 { + margin: 8px 0 12px; + font-size: 22px; + color: white; +} + +.ai-course-card p { + margin: 0 0 18px; + font-size: 14px; + line-height: 1.6; + opacity: 0.95; +} + +.ai-course-card strong { + color: #ffd700; + font-size: 18px; +} + +.ai-course-btn { + display: inline-block; + background: #ffd700; + color: #333 !important; + padding: 12px 32px; + border-radius: 30px; + text-decoration: none !important; + font-weight: bold; + font-size: 16px; + transition: all 0.3s; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2); +} + +.ai-course-btn:hover { + transform: translateY(-2px); + box-shadow: 0 6px 20px rgba(0, 0, 0, 0.3); + background: #ffed4e !important; +} + +/* 相关文章推荐 */ +.related-articles { + margin: 30px 0; + padding: 20px; + background: #fafbfc; + border-radius: 8px; + border: 1px solid #e1e4e8; +} + +.related-articles h3 { + margin: 0 0 16px; + font-size: 18px; + color: #2c3e50; + border-bottom: 2px solid #42b983; + padding-bottom: 8px; + display: inline-block; +} + +.related-list { + display: grid; + grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); + gap: 12px; + list-style: none; + padding: 0; + margin: 0; +} + +.related-list li { + background: white; + padding: 12px 16px; + border-radius: 6px; + border: 1px solid #e1e4e8; + transition: all 0.2s; +} + +.related-list li:hover { + border-color: #42b983; + transform: translateY(-2px); + box-shadow: 0 4px 12px rgba(66, 185, 131, 0.1); +} + +.related-list a { + color: #2c3e50; + text-decoration: none; + font-size: 14px; + display: block; +} + +.related-list a:hover { + color: #42b983; +} + +/* 移动端适配 */ +@media (max-width: 768px) { + .custom-breadcrumb { + padding: 8px 12px; + font-size: 12px; + } + + .ai-course-card { + padding: 18px; + margin: 30px 0 15px; + } + + .ai-course-card h3 { + font-size: 18px; + } + + .ai-course-card p { + font-size: 13px; + } + + .ai-course-btn { + padding: 10px 24px; + font-size: 14px; + } + + .related-list { + grid-template-columns: 1fr; + } +} diff --git a/docs-pages/vuepress/course-002/12-AIPython/all.md b/docs-pages/vuepress/course-002/12-AIPython/all.md new file mode 100644 index 0000000..1abf2f3 --- /dev/null +++ b/docs-pages/vuepress/course-002/12-AIPython/all.md @@ -0,0 +1,69 @@ +# AI Python 零基础实战营 + +零基础用 AI 学 Python,解决工作实际问题 + +

+ + + +

+ +## 课程简介 + +> 零基础用 AI 学 Python,解决工作实际问题 + +- **课程价格**:799元/30课时 +- **发布者**:图灵社区 +- **主讲人**:程序员晚枫(编程博主,Python中国组委会成员,白开水AI社区联创) +- **课程状态**:已完结 · 共30课时 +- **学习方式**:长期有效 + +## 课程目录 + +### 第一部分:学会AI编程,人人都是六边形战士 + +| 课时 | 内容 | 时长 | +|------|------|------| +| 1 | AI 编程的优缺点分析 | 58分34秒 | +| 2 | 如何用AI更好的学习Python | 58分7秒 | +| 3 | 手把手带你写出人生中的第一行Python | 1小时2分 | + +### 第二部分:用 AI 如何快速入门 + +| 课时 | 内容 | 时长 | +|------|------|------| +| 4 | AI编程下,Python基础怎么学 | 40分16秒 | +| 5 | 变量和简单的数据类型 | 1小时 | +| 6 | 列表、字典和元组 | 56分2秒 | +| 7 | 循环和判断 | 40分38秒 | +| 8 | 函数、模块和类 | 59分53秒 | +| 9 | 文件操作和错误处理 | 50分22秒 | +| 10 | 和 AI 一起,写一个九九乘法表 | 49分32秒 | + +### 第三部分:现代 Python 项目实战:AI 协作做真正项目 + +| 课时 | 内容 | 时长 | +|------|------|------| +| 11 | AI编程 + 数据分析:数据类型和需求分析 | 46分13秒 | +| 12 | AI 编程 + 数据分析:Excel 中的数据分析 | 49分27秒 | + +## 课程特色 + +🤖 **AI 辅助学习** - 全程使用 AI 辅助理解代码,小白也能轻松入门 + +📚 **循序渐进** - 从基础语法到实战项目,逐步深入 + +💡 **实战导向** - 学完就能用在实际工作中 + +## 购买学习 + +- **B站课程链接**:[https://www.bilibili.com/cheese/play/ep2342243](https://www.bilibili.com/cheese/play/ep2342243) + +## 相关课程 + +- [给小白的《30讲 · AI编程训练营》](/course-002/AICoding/version-001/all.md) - 程序员晚枫的另一个 AI 编程课程 +- [给小白的《50讲 · Python自动化办公》](/course/50-python-office) - Python 自动化办公经典课程 + +#### 联系我 + +有任何问题,欢迎联系我的微信👉[python-office](https://www.python4office.cn/wechat-qrcode/) \ No newline at end of file diff --git a/docs-pages/vuepress/course-002/AICoding/version-001/all.md b/docs-pages/vuepress/course-002/AICoding/version-001/all.md index 90ecdcd..1735ff3 100644 --- a/docs-pages/vuepress/course-002/AICoding/version-001/all.md +++ b/docs-pages/vuepress/course-002/AICoding/version-001/all.md @@ -90,7 +90,7 @@ 扫码添加好友,备注【AI编程】,就能锁定早鸟价名额!

- +

diff --git a/docs-pages/vuepress/knowledge/README.md b/docs-pages/vuepress/knowledge/README.md new file mode 100644 index 0000000..3530c14 --- /dev/null +++ b/docs-pages/vuepress/knowledge/README.md @@ -0,0 +1,63 @@ +--- +title: Python 知识拓展 +--- + +# Python 知识拓展 + +> 这里是 python-office 的知识拓展库,收录 Python 编程、自动化办公、AI 应用的实用技巧和进阶知识。 + +## 📚 知识分类 + +### 🐍 Python 编程 + +- [Python 入门指南](./python-tips) - 从零开始学习 Python +- [Python 技巧与最佳实践](./python-best-practices) - 写出优雅高效的 Python 代码 +- [Python 进阶之路](./python-advanced) - 深入理解 Python 核心概念 + +### 🤖 自动化办公 + +- [自动化办公入门](./automation-guide) - 用 Python 解放双手 +- [办公效率提升技巧](./office-tips) - 日常办公的 Python 小技巧 +- [文件处理实战](./file-processing) - 批量处理文件的最佳实践 + +### 💡 AI 编程 + +- [AI 编程入门](./ai-intro) - 开始你的 AI 编程之旅 +- [ChatGPT 使用技巧](./chatgpt-tips) - 更好地与 AI 协作 +- [AI 工具推荐](./ai-tools) - 提升效率的 AI 工具清单 + +--- + +## 🎯 学习路径 + +### 入门路线 + +``` +Python 基础 → 自动化办公入门 → AI 编程入门 → 实战项目 +``` + +### 推荐学习资源 + +| 资源类型 | 推荐内容 | +|----------|----------| +| 📖 电子书 | [Python 官方文档](https://docs.python.org/zh-cn/3/) | +| 🎥 视频 | [B站 Python 教程](https://www.bilibili.com/) | +| 💬 社区 | [Python 开发者社区](https://python.org.cn/) | + +--- + +## 💬 交流讨论 + +如果你有好的 Python 技巧或自动化办公经验,欢迎分享! + +- 📧 邮箱:[python.coder@email.com](mailto:python.coder@email.com) +- 💬 微信群:[点击加入交流群](https://www.python4office.cn/wechat-group/) +- 🐙 GitHub:[提交你的分享](https://github.com/CoderWanFeng/python-office) + +--- + +

+ + + +

diff --git a/docs-pages/vuepress/knowledge/ai-intro.md b/docs-pages/vuepress/knowledge/ai-intro.md new file mode 100644 index 0000000..ca0d271 --- /dev/null +++ b/docs-pages/vuepress/knowledge/ai-intro.md @@ -0,0 +1,195 @@ +# AI 编程入门 + +> 开启 AI 编程之旅,让 AI 成为你的编程助手 + +## 🤖 什么是 AI 编程? + +AI 编程是指利用人工智能技术来辅助软件开发的过程,主要包括: + +- 💬 **AI 对话**:通过自然语言描述需求,AI 生成代码 +- 🎨 **AI 绘图**:AI 生成图片、图标、设计稿 +- 📝 **AI 写作**:AI 辅助撰写文档、报告、邮件 +- 🔍 **AI 搜索**:AI 智能搜索和总结信息 + +## 🚀 常见的 AI 编程工具 + +### 1. ChatGPT + +OpenAI 开发的大语言模型,可以进行对话、代码生成、知识问答等。 + +**官网**:[https://chat.openai.com](https://chat.openai.com) + +**使用示例**: + +``` +问:请用 Python 写一个读取 Excel 文件的函数 + +答: +import pandas as pd + +def read_excel(file_path): + """读取 Excel 文件""" + df = pd.read_excel(file_path) + return df + +# 使用 +data = read_excel('data.xlsx') +print(data) +``` + +### 2. GitHub Copilot + +微软和 OpenAI 合作开发的 AI 编程助手,直接集成到 VS Code 等编辑器中。 + +**官网**:[https://github.com/features/copilot](https://github.com/features/copilot) + +### 3. 文心一言 + +百度开发的国产大语言模型,中文交互更流畅。 + +**官网**:[https://yiyan.baidu.com](https://yiyan.baidu.com) + +### 4. 通义千问 + +阿里云开发的大语言模型,适合中国用户。 + +**官网**:[https://tongyi.aliyun.com](https://tongyi.aliyun.com) + +## 💡 AI 编程的优势 + +### 效率提升 + +| 场景 | 传统方式 | AI 辅助 | 效率提升 | +|------|----------|---------|----------| +| 写代码 | 30 分钟 | 5 分钟 | 83% | +| 查文档 | 20 分钟 | 2 分钟 | 90% | +| 写测试 | 45 分钟 | 10 分钟 | 78% | + +### 学习加速 + +- 🔍 快速理解不熟悉的代码 +- 📚 遇到问题随时提问 +- 💡 获取最佳实践建议 + +## 🎯 AI 编程实战 + +### 示例 1:用 AI 写 Python 脚本 + +**需求**:批量重命名文件夹中的所有文件,去掉文件名中的空格 + +**问 AI**: +``` +帮我写一个 Python 脚本,批量去掉文件名中的空格 +``` + +**AI 回答**: +```python +import os + +def remove_spaces_in_filenames(folder_path): + """去掉文件名中的空格""" + for filename in os.listdir(folder_path): + # 构造完整路径 + old_path = os.path.join(folder_path, filename) + + # 跳过文件夹 + if os.path.isdir(old_path): + continue + + # 去掉空格 + new_name = filename.replace(' ', '') + new_path = os.path.join(folder_path, new_name) + + # 重命名 + os.rename(old_path, new_path) + print(f"重命名: {filename} -> {new_name}") + +# 使用 +remove_spaces_in_filenames(r'D:\my_files') +``` + +### 示例 2:用 AI 优化代码 + +**原始代码**: +```python +numbers = [1, 2, 3, 4, 5] +total = 0 +for n in numbers: + total = total + n +average = total / len(numbers) +print(average) +``` + +**让 AI 优化**: +``` +请帮我优化这段代码,使其更 Pythonic +``` + +**AI 优化后**: +```python +numbers = [1, 2, 3, 4, 5] +average = sum(numbers) / len(numbers) +print(average) +``` + +### 示例 3:用 AI 写单元测试 + +**需求**:为函数写测试用例 + +```python +def add(a, b): + return a + b + +def multiply(a, b): + return a * b +``` + +**让 AI 写测试**: +```python +import unittest + +class TestMathFunctions(unittest.TestCase): + def test_add(self): + self.assertEqual(add(1, 2), 3) + self.assertEqual(add(-1, 1), 0) + self.assertEqual(add(0, 0), 0) + + def test_multiply(self): + self.assertEqual(multiply(2, 3), 6) + self.assertEqual(multiply(-2, 3), -6) + self.assertEqual(multiply(0, 100), 0) + +if __name__ == '__main__': + unittest.main() +``` + +## 📖 最佳实践 + +### 1. 学会提问 + +- ✅ **清晰描述需求**:说明输入、输出、约束条件 +- ✅ **提供上下文**:代码语言、使用的库 +- ❌ **避免模糊提问**:"帮我写个程序" + +### 2. 验证 AI 生成的内容 + +- ⚠️ 检查代码逻辑是否正确 +- ⚠️ 测试边界情况 +- ⚠️ 确认代码安全性 + +### 3. 持续学习 + +- 📚 学习基础编程知识 +- 💪 理解 AI 的局限性 +- 🔧 学会调试和修复代码 + +## 📚 相关课程 + +- [🤖 AI编程(30讲)](https://mp.weixin.qq.com/s/YS0shsl6vJD_wUzow7NOnw) +- [🚀 AI Python 零基础(30讲)](https://www.python-office.com/course-002/12-AIPython/all.html) + +--- + +**相关文档:** +- [ChatGPT 使用技巧](./chatgpt-tips) +- [AI 工具推荐](./ai-tools) diff --git a/docs-pages/vuepress/knowledge/automation-guide.md b/docs-pages/vuepress/knowledge/automation-guide.md new file mode 100644 index 0000000..5ffcce2 --- /dev/null +++ b/docs-pages/vuepress/knowledge/automation-guide.md @@ -0,0 +1,141 @@ +# 自动化办公入门 + +> 用 Python 解放双手,让重复工作自动化 + +## 🤔 什么是自动化办公? + +自动化办公是指使用程序来代替人工完成重复性、规律性的办公任务,比如: + +- 📄 批量处理文件(重命名、移动、分类) +- 📊 处理 Excel 数据(汇总、统计、格式化) +- 📧 自动发送邮件 +- 📑 生成 PDF 报告 +- 🖼️ 批量处理图片 + +## 💡 为什么需要自动化? + +### 手动操作 vs 自动化 + +| 任务 | 手动操作 | 自动化 | 节省时间 | +|------|----------|--------|----------| +| 处理 100 个 Excel 文件 | 3-4 小时 | 5 分钟 | 95% | +| 每天发送 50 封邮件 | 1-2 小时 | 1 分钟 | 95% | +| 整理文件夹 | 30 分钟 | 10 秒 | 98% | + +## 🚀 快速开始 + +### 1. 安装 python-office + +```bash +pip install python-office +``` + +### 2. 批量重命名文件 + +```python +import office + +# 批量重命名文件,添加前缀 +office.file.file_name_add_prefix( + folder_path=r'D:\test', + prefix='backup_' +) +``` + +### 3. 合并 Excel 文件 + +```python +import office + +# 合并多个 Excel 文件 +office.excel.merge2excel( + dir_path=r'D:\excel_files', + output_file=r'D:\merged.xlsx' +) +``` + +## 📚 常见自动化场景 + +### 场景 1:文件批量处理 + +```python +import office +import os + +# 批量重命名 +folder = r'D:\my_files' + +# 添加前缀 +office.file.file_name_add_prefix(folder, '2024_') + +# 添加后缀 +office.file.file_name_add_postfix(folder, '_done') +``` + +### 场景 2:Excel 数据处理 + +```python +import office + +# 查找 Excel 中的数据 +result = office.excel.find_excel_data( + excel_path=r'D:\sales.xlsx', + keyword='北京' +) +print(result) +``` + +### 场景 3:自动发送邮件 + +```python +import office + +# 发送邮件 +office.email.send_email( + to=['receiver@example.com'], + subject='月度报告', + body='请查收附件中的月度报告', + attachments=[r'D:\report.xlsx'] +) +``` + +### 场景 4:PDF 转换 + +```python +import office + +# Word 转 PDF +office.pdf.docx2pdf(r'D:\doc.docx') + +# Excel 转 PDF +office.pdf.excel2pdf(r'D:\data.xlsx') +``` + +## 🛠️ python-office 常用功能 + +| 功能 | 函数 | 说明 | +|------|------|------| +| Excel | `office.excel` | 读写、合并、拆分 Excel | +| Word | `office.word` | 创建、转换 Word 文档 | +| PDF | `office.pdf` | PDF 转换、合并 | +| 文件 | `office.file` | 文件批量处理 | +| 图片 | `office.image` | 图片压缩、裁剪 | +| 邮件 | `office.email` | 发送、读取邮件 | + +## ⚠️ 注意事项 + +1. **先备份**:处理重要文件前,先备份 +2. **小批量测试**:大批量操作前,先用小批量测试 +3. **检查结果**:自动化后检查结果是否正确 +4. **日志记录**:记录操作日志,便于排查问题 + +## 📖 学习资源 + +- [🤖 自动化办公(50讲)](https://www.python-office.com/course/50-python-office.html) +- [📊 数据分析(30讲)](https://www.python-office.com/course-002/30-Excel/30-Excel.html) + +--- + +**相关课程:** +- [🤖 自动化办公(50讲)](https://www.python-office.com/course/50-python-office.html) +- [📄 PDF办公(10讲)](https://www.python-office.com/course-002/10-popdf/10-popdf.html) diff --git a/docs-pages/vuepress/knowledge/python-tips.md b/docs-pages/vuepress/knowledge/python-tips.md new file mode 100644 index 0000000..54c5ce4 --- /dev/null +++ b/docs-pages/vuepress/knowledge/python-tips.md @@ -0,0 +1,151 @@ +# Python 入门指南 + +> 从零开始学习 Python,开启编程之旅 + +## 🚀 为什么学习 Python? + +Python 是一门简单易学、功能强大的编程语言,被广泛应用于: + +- 🌐 Web 开发 +- 📊 数据分析 +- 🤖 人工智能 +- 🔧 自动化办公 +- 🎮 游戏开发 + +## 📦 Python 环境搭建 + +### 1. 下载 Python + +访问 [Python 官网](https://www.python.org/),下载最新版本的 Python(建议 Python 3.8 以上)。 + +### 2. 安装 Python + +下载完成后,双击安装包进行安装。 + +**⚠️ 注意**:安装时记得勾选 **Add Python to PATH**,这样可以在命令行中直接使用 Python。 + +### 3. 验证安装 + +打开命令行(Windows 按 Win+R,输入 `cmd`),输入: + +```bash +python --version +``` + +看到类似 `Python 3.11.0` 的输出,说明安装成功。 + +## 🎯 第一个 Python 程序 + +创建一个文件 `hello.py`,输入以下代码: + +```python +print("Hello, World!") +print("欢迎学习 Python!") +``` + +运行程序: + +```bash +python hello.py +``` + +**输出:** +``` +Hello, World! +欢迎学习 Python! +``` + +## 📚 Python 基础语法 + +### 变量和数据类型 + +```python +# 字符串 +name = "程序员晚枫" +print(f"你好,{name}!") + +# 数字 +age = 18 +print(f"我今年 {age} 岁") + +# 列表 +fruits = ["苹果", "香蕉", "橙子"] +print(f"我喜欢吃 {fruits[0]}") + +# 字典 +person = {"name": "晚枫", "age": 18} +print(f"{person['name']} 今年 {person['age']} 岁") +``` + +### 条件判断 + +```python +age = 20 + +if age >= 18: + print("你已经成年了!") +elif age >= 12: + print("你是一个青少年") +else: + print("你是一个小朋友") +``` + +### 循环 + +```python +# for 循环 +for i in range(5): + print(f"第 {i+1} 次循环") + +# while 循环 +count = 0 +while count < 3: + print(f"count = {count}") + count += 1 +``` + +### 函数 + +```python +def greet(name): + """打招呼函数""" + return f"你好,{name}!欢迎学习 Python!" + +message = greet("小明") +print(message) +``` + +## 🛠️ pip 包管理器 + +pip 是 Python 的包管理器,用于安装第三方库。 + +```bash +# 安装库 +pip install python-office + +# 升级库 +pip install --upgrade python-office + +# 查看已安装的库 +pip list +``` + +## 📖 推荐学习资源 + +| 资源 | 链接 | +|------|------| +| 📘 官方文档 | [python.org/doc](https://docs.python.org/zh-cn/3/) | +| 🎥 B站教程 | [Python 入门教程](https://www.bilibili.com/) | +| 📚 《Python编程:从入门到实践》 | 京东/当当有售 | + +## 💪 下一步 + +- 学习 Python 基础语法 +- 了解 Python 数据结构 +- 开始使用 python-office 进行自动化办公 + +--- + +**相关课程:** +- [🐍 Python入门(15讲)](https://www.python-office.com/course-002/15-Python/15-Python.html) +- [🤖 AI Python 零基础(30讲)](https://www.python-office.com/course-ource/12-AIPython/all.html) diff --git a/docs-pages/vuepress/office/finance.md b/docs-pages/vuepress/office/finance.md index 52f5cb9..ad68aa3 100644 --- a/docs-pages/vuepress/office/finance.md +++ b/docs-pages/vuepress/office/finance.md @@ -1,115 +1,137 @@ ---- -title: 金融数据分析 -date: 2024-01-01 -sidebar: auto ---- +# 金融数据分析 + +> 安装:`pip install python-office` +> 独立安装:`pip install pofinance` -# 📊 金融数据分析 +> Python金融交流群👉[点我直达](https://cos.python-office.com/5-finance-group.jpg) -
-

Python 金融交流群

- 加入交流群 → -
+python-office 提供了金融数据分析功能,主要用于股票交易的收益计算和批量做T分析。 --- -## 🔧 开源项目 - pofinance +## 1、批量做T (MakeT) + +批量做T是股票交易中的一种策略,通过多次卖出再买入来降低持仓成本。 + +### 基本用法 + +```python +# 安装独立模块 +# pip install pofinance + +from pofinance import MakeT + +# 初始化(可以自定义手续费参数) +t = MakeT() +""" +参数说明: +- w_rate: 手续费,默认万2.5 +- min_rate: 单笔最低手续费,默认5元 +- stamp_tax: 印花税,默认千1(卖出时收取) +""" + +# 设置卖出记录:[(数量, 卖出价格), (数量, 卖出价格), ...] +sale_price_num = [ + (900, 12), # 900股,12元卖出 + (300, 11), # 300股,11元卖出 + (800, 10) # 800股,10元卖出 +] + +# 计算结果 +result = t.batch_t(sale_price_num) +print(result) +``` + +**返回结果:** +``` +(2000, 11.87) +``` +表示:需要以 11.87 元的价格买回 2000 股,才能使这次做T盈利。 + +### 参数说明 -
+| 参数 | 类型 | 默认值 | 说明 | +|------|------|--------|------| +| `w_rate` | float | 0.00025 | 手续费(万分之) | +| `min_rate` | float | 5 | 单笔最低手续费(元) | +| `stamp_tax` | float | 0.001 | 印花税(千分之) | -
-

📈 单次做T

-

Python实现股票单次做T策略

- 查看详情 -
+### 返回值说明 -
-

📊 批量做T

-

批量处理多个股票的做T策略

- 查看详情 -
+`batch_t()` 返回一个元组 `(数量, 最低买入价)`: +- 第一个值:需要买回的总股数 +- 第二个值:最低买入价格(高于此价格才能盈利) -
+### 完整示例 + +```python +from pofinance import MakeT + +# 自定义手续费率(根据你的券商设置) +t = MakeT( + w_rate=0.0003, # 万3手续费 + min_rate=5, # 单笔最低5元 + stamp_tax=0.001 # 千1印花税 +) + +# 模拟你的卖出记录 +# 格式:(卖出数量, 卖出价格) +sale_records = [ + (1000, 15.50), # 第一次卖出:1000股,15.50元 + (500, 15.20), # 第二次卖出:500股,15.20元 + (500, 15.00), # 第三次卖出:500股,15.00元 +] + +# 计算 +quantity, min_price = t.batch_t(sale_records) + +print(f"需要买回股数: {quantity}") +print(f"最低买回价格: {min_price:.2f} 元") + +if min_price < 15.00: + print("✅ 可以盈利!") +else: + print("❌ 当前价格买回会亏损") +``` --- -## 📚 学习资料 +## 2、单次做T -
-

📖 电子书推荐

-

580页PDF:《Python金融大数据分析》

- 获取电子书 -
+除了批量做T,还支持单次买卖的收益计算。 -### 🎥 视频教程 +```python +from pofinance import MakeT -
+t = MakeT() -
-

🎓 Python金融数据分析高级训练营

-

完结课程,从基础到高级

- 观看视频 -
+# 单次做T计算 +# 格式:(买入数量, 买入价格, 卖出数量, 卖出价格) +result = t.single_t(1000, 10.00, 1000, 11.00) +print(result) +``` -
-

💼 Python金融实务从入门到精通

-

综合运用,案例实战

- 观看视频 -
+--- + +## 3、注意事项 -
-

⚡ Python编程在金融中的应用

-

极简版,快速入门

- 观看视频 -
+⚠️ **风险提示**: +1. 计算结果仅供参考,请在实际操作前进行验证 +2. 不同券商的手续费可能不同,请根据实际情况调整参数 +3. 股市有风险,投资需谨慎 -
-

🔢 Python金融应用编程

-

数据分析、定价与量化投资,含源代码

- 观看视频 -
+--- -
+## 相关课程 + +- [单次做T](https://mp.weixin.qq.com/s/6keTzBI8CWnUbuXCaYhkMQ) +- [批量做T](https://blog.csdn.net/weixin_42321517/article/details/131097917) +- [电子书:《580页PDF:《Python金融大数据分析》》](https://mp.weixin.qq.com/s/9Nmk2OfTiWaMsidR55uvLw) +- [视频:Python金融数据分析高级训练营](https://www.bilibili.com/video/BV1Ut4y1i7wS/) +- [视频:Python金融实务从入门到精通](https://www.bilibili.com/video/BV1p54y1m75J/) --- -## 👋 联系作者 - -
-

有问题?联系作者

-

添加微信:python-office

-
- -
- 作者微信 -
- - +## 联系作者 + +![](https://cos.python-office.com/wechat/qr-code.jpg) diff --git a/docs-pages/vuepress/office/poai.md b/docs-pages/vuepress/office/poai.md new file mode 100644 index 0000000..65662ac --- /dev/null +++ b/docs-pages/vuepress/office/poai.md @@ -0,0 +1,224 @@ +# AI 工具集 + +> 安装:`pip install python-office` +> 独立安装:`pip install poai` + +> AI编程课程👉[点我直达](https://mp.weixin.qq.com/s/YS0shsl6vJD_wUzow7NOnw) + +python-office 集成了多种 AI 能力,提供 AI 对话、绘图、翻译等功能,让办公智能化。 + +--- + +## 1、AI 对话 + +与 AI 大模型进行智能对话,解答问题、生成内容。 + +### 基本用法 + +```python +import office + +# AI 对话 +result = office.ai.chat( + message='你好,请介绍一下 Python' +) +print(result) +``` + +### 参数说明 + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `message` | str | 是 | 对话内容 | +| `api_key` | str | 否 | API Key(未提供则使用默认配置) | + +### 完整示例 + +```python +import office + +# 简单的问答 +response = office.ai.chat( + message='帮我写一段 Python 读取 Excel 的代码', + api_key='your-api-key' +) +print(response) +``` + +--- + +## 2、AI 绘图 + +根据文字描述生成图片。 + +### 基本用法 + +```python +import office + +# AI 绘图 +result = office.ai.draw( + prompt='一只可爱的橘猫在晒太阳' +) +print(result) +``` + +### 参数说明 + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `prompt` | str | 是 | 绘图描述 | +| `size` | str | 否 | 图片尺寸,默认 1024x1024 | + +### 完整示例 + +```python +import office + +# 生成风景图 +result = office.ai.draw( + prompt='一幅山水画,云雾缭绕,古风建筑', + size='1024x1024' +) + +# 保存图片 +if result.get('success'): + image_url = result['url'] + print(f'图片生成成功:{image_url}') +``` + +--- + +## 3、AI 翻译 + +多语言文本翻译。 + +### 基本用法 + +```python +import office + +# 翻译 +result = office.ai.translate( + text='Hello, how are you?', + from_lang='en', + to_lang='zh' +) +print(result) +``` + +### 参数说明 + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `text` | str | 是 | 要翻译的文本 | +| `from_lang` | str | 否 | 源语言,默认自动检测 | +| `to_lang` | str | 是 | 目标语言 | + +### 常用语言代码 + +| 代码 | 语言 | 代码 | 语言 | +|------|------|------|------| +| `zh` | 中文 | `en` | 英语 | +| `ja` | 日语 | `ko` | 韩语 | +| `fr` | 法语 | `de` | 德语 | +| `es` | 西班牙语 | `ru` | 俄语 | + +### 完整示例 + +```python +import office + +# 批量翻译 +texts = [ + 'Hello World', + 'Python is great', + 'Thank you very much' +] + +for text in texts: + result = office.ai.translate( + text=text, + from_lang='en', + to_lang='zh' + ) + print(f'{text} → {result}') +``` + +--- + +## 4、综合应用示例 + +### 智能客服助手 + +```python +import office + +def smart_customer_service(question): + """智能客服""" + + # 判断问题类型 + if '价格' in question: + return '我们的产品价格请查看官网:xxx.com' + elif '快递' in question: + return '您的订单已发货,预计3-5天到达' + elif '退货' in question: + return '退货请联系客服,提供订单号即可办理' + else: + # 使用 AI 回答 + return office.ai.chat(question) + +# 测试 +print(smart_customer_service('你们的产品多少钱?')) +``` + +### 文档自动翻译 + +```python +import office + +def translate_document(input_file, output_file, target_lang='zh'): + """批量翻译文档""" + + with open(input_file, 'r', encoding='utf-8') as f: + content = f.read() + + # 翻译 + result = office.ai.translate( + text=content, + from_lang='en', + to_lang=target_lang + ) + + # 保存 + with open(output_file, 'w', encoding='utf-8') as f: + f.write(result) + + print(f'翻译完成:{output_file}') + +# 使用 +translate_document('english.txt', 'chinese.txt') +``` + +--- + +## 5、注意事项 + +⚠️ **使用提示**: +1. 使用 AI 功能需要相应的 API Key 或网络连接 +2. AI 绘图可能需要较长时间,请耐心等待 +3. 翻译功能支持多种语言,但不是所有语言组合都可用 +4. 请遵守 AI 服务的使用条款 + +--- + +## 相关课程 + +- [AI编程(30讲)](https://mp.weixin.qq.com/s/YS0shsl6vJD_wUzow7NOnw) +- [微信机器人(10讲)](https://mp.weixin.qq.com/s/YS0shsl6vJD_wUzow7NOnw) + +--- + +## 联系作者 + +![](https://cos.python-office.com/wechat/qr-code.jpg) diff --git a/docs-pages/vuepress/office/porobot.md b/docs-pages/vuepress/office/porobot.md new file mode 100644 index 0000000..8d017d6 --- /dev/null +++ b/docs-pages/vuepress/office/porobot.md @@ -0,0 +1,176 @@ +# AI 机器人 + +> 安装:`pip install python-office` +> 独立安装:`pip install porobot` + +> 微信机器人课程👉[点我直达](https://mp.weixin.qq.com/s/YS0shsl6vJD_wUzow7NOnw) + +python-office 提供了强大的 AI 机器人功能,支持与 ChatGPT 等 AI 大模型进行对话交互。 + +--- + +## 1、智能对话 (chat_by_gpt) + +通过简单的 API 调用即可实现与 ChatGPT 的智能对话。 + +### 基本用法 + +```python +import office + +# 调用 ChatGPT 进行对话 +office.robot.chat_by_gpt( + who='小明', + api_key='your-openai-api-key' +) +``` + +### 参数说明 + +| 参数 | 类型 | 必填 | 说明 | +|------|------|------|------| +| `who` | str | 是 | 对话对象的名字 | +| `api_key` | str | 是 | OpenAI API Key | + +### 完整示例 + +```python +import office + +# 设置对话参数 +result = office.robot.chat_by_gpt( + who='助手', + api_key='sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx' +) + +print(result) +``` + +--- + +## 2、智能聊天机器人 + +除了 GPT 对话,还支持创建完整的智能聊天机器人。 + +### 基本用法 + +```python +from porobot import chat + +# 启动聊天机器人 +chat.start() +``` + +### 功能特点 + +- 支持关键词自动回复 +- 支持 AI 智能回复 +- 支持定时发送消息 +- 支持群发消息 + +--- + +## 3、微信消息处理 + +### 发送消息 + +```python +from PyOfficeRobot import WeChat + +# 发送消息 +WeChat.SendMessage(message='Hello World') +``` + +### 发送文件 + +```python +from PyOfficeRobot import WeChat + +# 发送文件 +WeChat.SendFile(file_path='D:/test.pdf') +``` + +### 定时发送 + +```python +from PyOfficeRobot import WeChat + +# 定时发送消息(每天早上9点) +WeChat.SendMessageByTime( + message='早安!', + time='09:00' +) +``` + +--- + +## 4、关键词自动回复 + +```python +from porobot import chat + +# 设置关键词回复 +@chat.keyword('hello') +def reply_hello(): + return '你好!有什么可以帮你的?' + +# 启动机器人 +chat.start() +``` + +--- + +## 5、应用场景 + +### 场景1:自动客服 + +```python +from porobot import chat + +# 设置常见问题自动回复 +@chat.keyword('退货') +def reply_return(): + return '请联系客服:400-xxx-xxxx,工作时间:9:00-18:00' + +@chat.keyword('快递') +def reply_express(): + return '您的订单已发货,快递单号:SF123456789' + +# 启动 +chat.start() +``` + +### 场景2:定时提醒 + +```python +from PyOfficeRobot import WeChat + +# 设置定时提醒 +WeChat.SendMessageByTime( + message='客户会议还有30分钟', + time='14:30' +) +``` + +--- + +## 6、注意事项 + +⚠️ **使用提示**: +1. 使用 ChatGPT 功能需要有效的 OpenAI API Key +2. 微信机器人功能需要在电脑上登录微信 +3. 定时发送功能需要保持程序运行 +4. 请遵守微信使用规范,避免账号被封 + +--- + +## 相关课程 + +- [微信机器人(10讲)](https://mp.weixin.qq.com/s/YS0shsl6vJD_wUzow7NOnw) +- [AI编程(30讲)](https://mp.weixin.qq.com/s/YS0shsl6vJD_wUzow7NOnw) + +--- + +## 联系作者 + +![](https://cos.python-office.com/wechat/qr-code.jpg) diff --git a/docs-pages/vuepress/tips/best-practices/data-visualization-advanced.md b/docs-pages/vuepress/tips/best-practices/data-visualization-advanced.md new file mode 100644 index 0000000..3dea915 --- /dev/null +++ b/docs-pages/vuepress/tips/best-practices/data-visualization-advanced.md @@ -0,0 +1,338 @@ +# 数据可视化进阶:从基础图表到交互式仪表盘 + +数据可视化是数据分析的关键环节。本文从基础图表到高级交互式仪表盘,介绍如何用Python制作专业的数据可视化作品。 + +## 一、可视化层次 + +| 层次 | 工具 | 难度 | 适用场景 | +|------|------|------|----------| +| 基础图表 | matplotlib | ⭐ | 简单图表 | +| 美化图表 | seaborn | ⭐⭐ | 统计图 | +| 交互图表 | plotly | ⭐⭐ | 仪表盘 | +| Web应用 | dash | ⭐⭐⭐ | 数据应用 | +| BI平台 | superset | ⭐⭐⭐⭐ | 企业级 | + +## 二、python-office 一行代码方案 + +```python +import python-office + +# 一键生成图表 +python-office.chart.create( + data='D:/销售数据.xlsx', + chart_type='bar', + output='D:/图表.png' +) + +# 一键生成仪表盘 +python-office.dashboard.generate( + data='D:/数据.xlsx', + output='D:/仪表盘.html' +) +``` + +## 三、matplotlib 基础图表 + +### 安装依赖 + +```bash +pip install matplotlib seaborn pandas python-office +``` + +### 常用图表模板 + +```python +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +# 设置中文字体 +plt.rcParams['font.sans-serif'] = ['SimHei', 'PingFang SC', 'Microsoft YaHei'] +plt.rcParams['axes.unicode_minus'] = False + +# 1. 柱状图 +def bar_chart(data, x_col, y_col, title='', output='bar.png'): + fig, ax = plt.subplots(figsize=(10, 6)) + bars = ax.bar(data[x_col], data[y_col], color='steelblue', edgecolor='navy') + + # 添加数值标签 + for bar in bars: + height = bar.get_height() + ax.text(bar.get_x() + bar.get_width()/2., height, + f'{height:,.0f}', + ha='center', va='bottom', fontsize=10) + + ax.set_title(title, fontsize=16, fontweight='bold') + ax.set_xlabel(x_col, fontsize=12) + ax.set_ylabel(y_col, fontsize=12) + ax.grid(axis='y', alpha=0.3) + plt.xticks(rotation=45, ha='right') + plt.tight_layout() + plt.savefig(output, dpi=150, bbox_inches='tight') + plt.close() + +# 2. 折线图 +def line_chart(data, x_col, y_col, title='', output='line.png'): + fig, ax = plt.subplots(figsize=(10, 6)) + ax.plot(data[x_col], data[y_col], marker='o', linewidth=2, + markersize=8, color='#2E86AB', label=y_col) + + # 趋势线 + z = np.polyfit(range(len(data)), data[y_col], 1) + p = np.poly1d(z) + ax.plot(data[x_col], p(range(len(data))), + linestyle='--', color='red', alpha=0.7, label='趋势线') + + ax.set_title(title, fontsize=16, fontweight='bold') + ax.set_ylabel(y_col, fontsize=12) + ax.legend() + ax.grid(True, alpha=0.3) + plt.xticks(rotation=45, ha='right') + plt.tight_layout() + plt.savefig(output, dpi=150, bbox_inches='tight') + plt.close() + +# 3. 饼图 +def pie_chart(data, label_col, value_col, title='', output='pie.png'): + fig, ax = plt.subplots(figsize=(8, 8)) + + colors = plt.cm.Set3(range(len(data))) + wedges, texts, autotexts = ax.pie( + data[value_col], labels=data[label_col], + autopct='%1.1f%%', colors=colors, + startangle=90, explode=[0.05] * len(data) + ) + + ax.set_title(title, fontsize=16, fontweight='bold') + plt.tight_layout() + plt.savefig(output, dpi=150, bbox_inches='tight') + plt.close() + +# 4. 散点图(带回归线) +def scatter_chart(data, x_col, y_col, title='', output='scatter.png'): + fig, ax = plt.subplots(figsize=(10, 6)) + + ax.scatter(data[x_col], data[y_col], alpha=0.6, color='steelblue', s=50) + + # 回归线 + z = np.polyfit(data[x_col], data[y_col], 1) + p = np.poly1d(z) + ax.plot(data[x_col], p(data[x_col]), + "r--", alpha=0.8, linewidth=2, label='回归线') + + ax.set_title(title, fontsize=16, fontweight='bold') + ax.set_xlabel(x_col, fontsize=12) + ax.set_ylabel(y_col, fontsize=12) + ax.legend() + ax.grid(True, alpha=0.3) + plt.tight_layout() + plt.savefig(output, dpi=150, bbox_inches='tight') + plt.close() + +# 使用 +data = pd.read_excel('D:/销售数据.xlsx') +bar_chart(data, '月份', '销售额', title='月度销售') +line_chart(data, '月份', '销售额', title='销售趋势') +pie_chart(data.head(10), '产品', '销售额', title='产品占比') +``` + +## 四、seaborn 高级图表 + +```python +import seaborn as sns + +# 1. 分类柱状图 +def categorical_bar(data, x_col, y_col, hue_col, title='', output='cat_bar.png'): + fig, ax = plt.subplots(figsize=(12, 6)) + sns.barplot(data=data, x=x_col, y=y_col, hue=hue_col, ax=ax, palette='Set2') + ax.set_title(title, fontsize=16, fontweight='bold') + plt.xticks(rotation=45, ha='right') + plt.tight_layout() + plt.savefig(output, dpi=150, bbox_inches='tight') + plt.close() + +# 2. 箱线图 +def box_plot(data, x_col, y_col, title='', output='box.png'): + fig, ax = plt.subplots(figsize=(10, 6)) + sns.boxplot(data=data, x=x_col, y=y_col, ax=ax, palette='Set3') + ax.set_title(title, fontsize=16, fontweight='bold') + plt.tight_layout() + plt.savefig(output, dpi=150, bbox_inches='tight') + plt.close() + +# 3. 热力图(相关性分析) +def heatmap(data, output='heatmap.png'): + fig, ax = plt.subplots(figsize=(10, 8)) + corr = data.corr() + sns.heatmap(corr, annot=True, fmt='.2f', cmap='coolwarm', + center=0, ax=ax, square=True) + ax.set_title('特征相关性热力图', fontsize=16, fontweight='bold') + plt.tight_layout() + plt.savefig(output, dpi=150, bbox_inches='tight') + plt.close() + +# 4. 配对图 +def pair_plot(data, hue_col, output='pair.png'): + sns.pairplot(data, hue=hue_col, palette='Set2', height=2.5) + plt.savefig(output, dpi=150, bbox_inches='tight') + plt.close() +``` + +## 五、plotly 交互式图表 + +### 安装 + +```bash +pip install plotly dash +``` + +### 交互式图表 + +```python +import plotly.graph_objects as go +import plotly.express as px + +# 1. 交互式柱状图 +def interactive_bar(data, x_col, y_col, title=''): + fig = px.bar(data, x=x_col, y=y_col, + title=title, + color=y_col, + color_continuous_scale='Viridis', + hover_data=[x_col, y_col]) + fig.update_layout( + title_font_size=20, + xaxis_tickangle=-45, + height=600, + ) + return fig + +# 2. 交互式折线图 +def interactive_line(data, x_col, y_col, title=''): + fig = px.line(data, x=x_col, y=y_col, + title=title, + markers=True, + hover_data=[x_col, y_col]) + fig.update_traces(line=dict(width=3)) + return fig + +# 3. 交互式散点图 +def interactive_scatter(data, x_col, y_col, color_col, size_col=None, title=''): + fig = px.scatter(data, x=x_col, y=y_col, + color=color_col, + size=size_col, + title=title, + hover_data=data.columns, + trendline='ols') # 添加趋势线 + return fig + +# 4. 桑基图(Sankey) +def sankey_diagram(data, source_col, target_col, value_col): + # 提取唯一值 + sources = data[source_col].unique() + targets = data[target_col].unique() + all_nodes = list(set(list(sources) + list(targets))) + + # 创建索引 + node_dict = {node: i for i, node in enumerate(all_nodes)} + + # 准备数据 + source_indices = [node_dict[s] for s in data[source_col]] + target_indices = [node_dict[t] for t in data[target_col]] + values = data[value_col] + + fig = go.Figure(data=[go.Sankey( + node=dict( + pad=15, + thickness=20, + line=dict(color="black", width=0.5), + label=all_nodes, + ), + link=dict( + source=source_indices, + target=target_indices, + value=values, + ) + )]) + + return fig + +# 5. 保存为HTML +def save_interactive(fig, output='chart.html'): + fig.write_html(output) + +# 使用 +data = pd.read_excel('D:/销售数据.xlsx') +fig = interactive_bar(data, '月份', '销售额', '月度销售') +save_interactive(fig, 'D:/交互式图表.html') +``` + +## 六、Dash 数据应用 + +```python +import dash +from dash import dcc, html +from dash.dependencies import Input, Output +import plotly.express as px +import pandas as pd + +# 加载数据 +df = pd.read_excel('D:/销售数据.xlsx') + +# 创建Dash应用 +app = dash.Dash(__name__) + +app.layout = html.Div([ + html.H1('销售数据分析仪表盘', style={'textAlign': 'center'}), + + html.Div([ + html.Label('选择图表类型:'), + dcc.Dropdown( + id='chart-type', + options=[ + {'label': '柱状图', 'value': 'bar'}, + {'label': '折线图', 'value': 'line'}, + {'label': '饼图', 'value': 'pie'}, + ], + value='bar' + ), + ], style={'width': '30%', 'margin': 'auto'}), + + dcc.Graph(id='main-chart'), +]) + +@app.callback( + Output('main-chart', 'figure'), + Input('chart-type', 'value') +) +def update_chart(chart_type): + if chart_type == 'bar': + fig = px.bar(df, x='月份', y='销售额', title='月度销售') + elif chart_type == 'line': + fig = px.line(df, x='月份', y='销售额', title='销售趋势') + else: + fig = px.pie(df, values='销售额', names='产品', title='产品占比') + + return fig + +if __name__ == '__main__': + app.run_server(debug=True, port=8050) +``` + +## 七、相关资源 + +- [python-office 官方文档](https://www.python-office.com) - Python自动化办公库 +- [AI + 自动化办公课程](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) - 35讲AI办公自动化实战 +- [Excel 数据可视化](../excel/chart-generation.md) - Excel图表 +- [销售报表自动化](./sales-report-automation.md) - 实战案例 +- 📺 视频课程:[数据分析30讲](https://www.python-office.com/course-002/30-Excel/30-Excel.html) +- 👥 技术交流:[加入讨论](https://www.python4office.cn/wechat-qrcode/) + +## 总结 + +数据可视化是数据分析师的核心技能: +- 📊 matplotlib:基础图表,必学 +- 🎨 seaborn:美化统计图 +- 🔄 plotly:交互式图表 +- 🌐 dash:数据应用开发 + +**核心思路**:从基础到高级,从静态到交互,从图表到仪表盘。掌握这些工具,让数据讲故事更生动。 \ No newline at end of file diff --git a/docs-pages/vuepress/tips/best-practices/file-management-automation.md b/docs-pages/vuepress/tips/best-practices/file-management-automation.md new file mode 100644 index 0000000..27f0d1b --- /dev/null +++ b/docs-pages/vuepress/tips/best-practices/file-management-automation.md @@ -0,0 +1,317 @@ +# 文件管理自动化:让Python帮你整理混乱的文件夹 + +每个人的电脑里都有"Downloads"和"Desktop"这样混乱的文件夹。本文介绍如何用Python自动化文件管理,让你的电脑永远井井有条。 + +## 一、文件管理痛点 + +| 痛点 | 表现 | +|------|------| +| 文件混乱 | 桌面几百个文件 | +| 重复文件 | 浪费存储空间 | +| 命名不规范 | 难以搜索 | +| 分类耗时 | 手动归档 | +| 备份繁琐 | 经常忘记 | + +## 二、自动化方案 + +### 1. 一行代码:智能整理下载文件夹 + +```python +import python-office + +# 一键整理下载文件夹 +python-office.file.organize( + folder='D:/Downloads', + rules='by_type', # 按类型分类 + output='D:/整理后' +) +``` + +### 2. 自定义整理规则 + +```python +import os +import shutil +from pathlib import Path +from datetime import datetime + +class FileOrganizer: + """文件自动整理器""" + + # 文件类型分类规则 + RULES = { + '📷 图片': ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.webp'], + '📄 文档': ['.pdf', '.doc', '.docx', '.txt', '.md', '.rtf'], + '📊 表格': ['.xlsx', '.xls', '.csv'], + '📑 演示': ['.ppt', '.pptx'], + '🎬 视频': ['.mp4', '.avi', '.mov', '.mkv', '.flv'], + '🎵 音频': ['.mp3', '.wav', '.flac', '.aac'], + '📦 压缩包': ['.zip', '.rar', '.7z', '.tar', '.gz'], + '💻 代码': ['.py', '.js', '.java', '.cpp', '.c', '.html', '.css'], + '🔧 可执行': ['.exe', '.msi', '.dmg', '.pkg'], + } + + def __init__(self, target_folder, output_folder=None): + self.target_folder = Path(target_folder) + self.output_folder = Path(output_folder or target_folder) + + def organize_by_type(self): + """按文件类型整理""" + moved = 0 + + for file in self.target_folder.iterdir(): + if file.is_file(): + category = self.get_category(file) + if category: + dest_dir = self.output_folder / category + dest_dir.mkdir(exist_ok=True) + + dest = dest_dir / file.name + if dest.exists(): + dest = self.unique_name(dest) + + shutil.move(str(file), str(dest)) + moved += 1 + print(f"✓ {file.name} -> {category}/") + + return moved + + def organize_by_date(self): + """按修改日期整理""" + for file in self.target_folder.iterdir(): + if file.is_file(): + mtime = datetime.fromtimestamp(file.stat().st_mtime) + folder_name = mtime.strftime('%Y年%m月') + + dest_dir = self.output_folder / folder_name + dest_dir.mkdir(exist_ok=True) + + dest = dest_dir / file.name + if dest.exists(): + dest = self.unique_name(dest) + + shutil.move(str(file), str(dest)) + print(f"✓ {file.name} -> {folder_name}/") + + def organize_by_size(self): + """按文件大小整理""" + for file in self.target_folder.iterdir(): + if file.is_file(): + size = file.stat().st_size + if size < 1024 * 1024: # < 1MB + category = '小文件' + elif size < 100 * 1024 * 1024: # < 100MB + category = '中等文件' + else: + category = '大文件' + + dest_dir = self.output_folder / category + dest_dir.mkdir(exist_ok=True) + + dest = dest_dir / file.name + if dest.exists(): + dest = self.unique_name(dest) + + shutil.move(str(file), str(dest)) + + def get_category(self, file): + """获取文件分类""" + ext = file.suffix.lower() + for category, exts in self.RULES.items(): + if ext in exts: + return category + return None + + def unique_name(self, path): + """生成唯一文件名""" + if not path.exists(): + return path + stem, suffix = path.stem, path.suffix + i = 1 + while True: + new_name = path.parent / f'{stem}_{i}{suffix}' + if not new_name.exists(): + return new_name + i += 1 + +# 使用 +organizer = FileOrganizer('D:/Downloads') +organizer.organize_by_type() # 按类型 +``` + +## 三、重复文件清理 + +```python +class DuplicateCleaner: + """重复文件清理器""" + + def __init__(self, folder): + self.folder = Path(folder) + self.hashes = {} + + def find_duplicates(self): + """查找重复文件""" + import hashlib + + for file in self.folder.rglob('*'): + if file.is_file(): + file_hash = self.get_hash(file) + + if file_hash in self.hashes: + self.hashes[file_hash].append(file) + else: + self.hashes[file_hash] = [file] + + # 过滤出有重复的 + duplicates = {h: files for h, files in self.hashes.items() if len(files) > 1} + return duplicates + + def get_hash(self, file_path): + """计算文件哈希""" + import hashlib + h = hashlib.md5() + with open(file_path, 'rb') as f: + for chunk in iter(lambda: f.read(4096), b''): + h.update(chunk) + return h.hexdigest() + + def remove_duplicates(self, dry_run=True): + """删除重复文件(保留第一个)""" + duplicates = self.find_duplicates() + removed = 0 + freed_space = 0 + + for file_hash, files in duplicates.items(): + # 保留第一个,删除其余 + for file in files[1:]: + size = file.stat().st_size + if dry_run: + print(f"[预览] 删除: {file}") + else: + file.unlink() + print(f"✓ 删除: {file}") + removed += 1 + freed_space += size + + print(f"\n{'预览' if dry_run else ''}共删除 {removed} 个重复文件") + print(f"释放空间: {freed_space / 1024 / 1024:.2f} MB") + return removed + +# 使用 +cleaner = DuplicateCleaner('D:/Downloads') +# 先预览 +cleaner.remove_duplicates(dry_run=True) +# 确认后执行 +cleaner.remove_duplicates(dry_run=False) +``` + +## 四、智能重命名 + +```python +class SmartRenamer: + """智能重命名器""" + + @staticmethod + def batch_rename(folder, pattern, replacement, extension='*'): + """批量重命名""" + import glob + files = glob.glob(f'{folder}/{extension}') + renamed = 0 + + for file in files: + filename = os.path.basename(file) + if pattern in filename: + new_name = filename.replace(pattern, replacement) + new_path = os.path.join(os.path.dirname(file), new_name) + os.rename(file, new_path) + renamed += 1 + print(f"✓ {filename} -> {new_name}") + + return renamed + + @staticmethod + def add_prefix(folder, prefix, extension='*'): + """添加前缀""" + import glob + files = glob.glob(f'{folder}/{extension}') + + for file in files: + filename = os.path.basename(file) + new_name = prefix + filename + new_path = os.path.join(os.path.dirname(file), new_name) + os.rename(file, new_path) + + @staticmethod + def add_date_prefix(folder, date_format='%Y%m%d', extension='*'): + """添加日期前缀""" + import glob + today = datetime.now().strftime(date_format) + files = glob.glob(f'{folder}/{extension}') + + for file in files: + filename = os.path.basename(file) + if not filename.startswith(today): + new_name = f'{today}_{filename}' + new_path = os.path.join(os.path.dirname(file), new_name) + os.rename(file, new_path) + +# 使用 +renamer = SmartRenamer() +renamer.batch_rename('D:/照片', 'IMG_', '旅行_') # 替换前缀 +``` + +## 五、自动备份 + +```python +import schedule +import time + +def auto_backup(): + """自动备份任务""" + import shutil + from datetime import datetime + + today = datetime.now().strftime('%Y%m%d') + backup_root = Path('D:/备份') + backup_root.mkdir(exist_ok=True) + + backup_folder = backup_root / today + backup_folder.mkdir(exist_ok=True) + + # 备份重要文件夹 + sources = [ + ('D:/工作文档', backup_folder / '文档'), + ('D:/重要照片', backup_folder / '照片'), + ] + + for source, dest in sources: + if Path(source).exists(): + shutil.copytree(source, dest, dirs_exist_ok=True) + print(f"✓ 已备份: {source}") + +# 每天18:00执行 +schedule.every().day.at('18:00').do(auto_backup) + +while True: + schedule.run_pending() + time.sleep(60) +``` + +## 六、相关资源 + +- [python-office 官方文档](https://www.python-office.com) - Python自动化办公库 +- [AI + 自动化办公课程](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) - 35讲AI办公自动化实战 +- [Python 批量处理文件](../python/batch-processing.md) - 进阶技巧 +- [工作效率提升工具箱](./productivity-tools.md) - 效率工具 +- 📺 视频课程:[自动化办公50讲](https://www.python-office.com/course/50-python-office.html) +- 👥 技术交流:[加入读者群](https://www.python4office.cn/wechat-qrcode/) + +## 总结 + +通过Python自动化文件管理: +- 📁 文件夹永远井井有条 +- 💾 自动清理重复文件 +- 🏷️ 智能命名规范 +- 🔄 自动备份无忧 + +**核心思路**:用 pathlib 操作文件,用 hashlib 查重,用 shutil 整理,用 schedule 定时。让Python成为你的私人文件管家。 \ No newline at end of file diff --git a/docs-pages/vuepress/tips/best-practices/finance-report-automation.md b/docs-pages/vuepress/tips/best-practices/finance-report-automation.md new file mode 100644 index 0000000..344e9be --- /dev/null +++ b/docs-pages/vuepress/tips/best-practices/finance-report-automation.md @@ -0,0 +1,366 @@ +# 财务报表自动化:用Python生成月报季报,告别加班 + +财务人员每月最痛苦的事就是做月报、季报。本文介绍如何用Python自动化财务报表生成,让财务工作变得轻松高效。 + +## 一、场景痛点 + +财务人员日常报表工作的痛点: + +| 痛点 | 表现 | 影响 | +|------|------|------| +| 数据分散 | 多系统、多Excel | 整理耗时 | +| 公式复杂 | 跨表关联、汇总 | 容易出错 | +| 重复劳动 | 月报、季报、半年报 | 浪费人力 | +| 格式不统一 | 多人协作 | 沟通成本高 | +| 截止时间紧 | 月初/月初几天 | 频繁加班 | + +## 二、自动化方案 + +### 方案架构 + +``` +数据源 → 数据清洗 → 数据汇总 → 报表生成 → 审核校验 → 邮件分发 + ↓ ↓ ↓ ↓ ↓ ↓ + ERP pandas pandas openpyxl 校验函数 邮件 + Excel 数据清洗 分组聚合 图表生成 +``` + +## 三、核心代码实现 + +### 安装依赖 + +```bash +pip install pandas openpyxl python-office +``` + +### 月度财务报表 + +```python +import pandas as pd +import python-office +from datetime import datetime +from pathlib import Path + +class MonthlyFinanceReport: + """月度财务报表生成器""" + + def __init__(self, data_folder='D:/财务数据', output_folder='D:/财务报表'): + self.data_folder = Path(data_folder) + self.output_folder = Path(output_folder) + self.output_folder.mkdir(parents=True, exist_ok=True) + self.month = datetime.now().strftime('%Y-%m') + + def generate(self): + """生成月报""" + print(f"📊 生成 {self.month} 财务报表...") + + # 1. 加载数据 + income = self.load_income_data() # 收入数据 + expense = self.load_expense_data() # 支出数据 + balance = self.load_balance_data() # 资产负债 + + # 2. 计算关键指标 + kpi = self.calculate_kpi(income, expense, balance) + + # 3. 生成三大报表 + income_statement = self.generate_income_statement(income, expense) + balance_sheet = self.generate_balance_sheet(balance) + cash_flow = self.generate_cash_flow(income, expense) + + # 4. 生成报告 + report_path = self.create_report(kpi, income_statement, balance_sheet, cash_flow) + + # 5. 转换为PDF + pdf_path = self.convert_to_pdf(report_path) + + # 6. 邮件发送 + self.send_to_management(pdf_path, kpi) + + print(f"✅ 财务报表已生成: {pdf_path}") + return kpi + + def load_income_data(self): + """加载收入数据""" + file_path = self.data_folder / f'收入明细_{self.month}.xlsx' + if not file_path.exists(): + return pd.DataFrame(columns=['日期', '客户', '产品', '金额', '类型']) + return pd.read_excel(file_path) + + def load_expense_data(self): + """加载支出数据""" + file_path = self.data_folder / f'支出明细_{self.month}.xlsx' + if not file_path.exists(): + return pd.DataFrame(columns=['日期', '部门', '类别', '金额']) + return pd.read_excel(file_path) + + def load_balance_data(self): + """加载资产负债数据""" + file_path = self.data_folder / f'资产负债表_{self.month}.xlsx' + if not file_path.exists(): + return pd.DataFrame() + return pd.read_excel(file_path) + + def calculate_kpi(self, income, expense, balance): + """计算KPI""" + total_income = income['金额'].sum() if not income.empty else 0 + total_expense = expense['金额'].sum() if not expense.empty else 0 + net_profit = total_income - total_expense + profit_margin = (net_profit / total_income * 100) if total_income > 0 else 0 + + return { + '总收入': total_income, + '总支出': total_expense, + '净利润': net_profit, + '利润率': profit_margin, + '收入笔数': len(income), + '支出笔数': len(expense), + } + + def generate_income_statement(self, income, expense): + """生成利润表""" + statement = [] + + # 收入部分 + if not income.empty: + income_by_type = income.groupby('类型')['金额'].sum() + for type_name, amount in income_by_type.items(): + statement.append({ + '科目': f'营业收入-{type_name}', + '类型': '收入', + '金额': amount + }) + + statement.append({ + '科目': '营业总收入', + '类型': '小计', + '金额': income['金额'].sum() if not income.empty else 0 + }) + + # 支出部分 + if not expense.empty: + expense_by_category = expense.groupby('类别')['金额'].sum() + for category, amount in expense_by_category.items(): + statement.append({ + '科目': f'营业支出-{category}', + '类型': '支出', + '金额': amount + }) + + statement.append({ + '科目': '营业总支出', + '类型': '小计', + '金额': expense['金额'].sum() if not expense.empty else 0 + }) + + # 净利润 + total_income = income['金额'].sum() if not income.empty else 0 + total_expense = expense['金额'].sum() if not expense.empty else 0 + statement.append({ + '科目': '净利润', + '类型': '合计', + '金额': total_income - total_expense + }) + + return pd.DataFrame(statement) + + def generate_balance_sheet(self, balance): + """生成资产负债表""" + if balance.empty: + return pd.DataFrame() + return balance + + def generate_cash_flow(self, income, expense): + """生成现金流量表""" + cash_flow = pd.DataFrame() + + if not income.empty: + cash_in = pd.DataFrame({ + '项目': '现金流入', + '明细': income['类型'].unique(), + '金额': income.groupby('类型')['金额'].sum().values + }) + cash_flow = pd.concat([cash_flow, cash_in], ignore_index=True) + + if not expense.empty: + cash_out = pd.DataFrame({ + '项目': '现金流出', + '明细': expense['类别'].unique(), + '金额': expense.groupby('类别')['金额'].sum().values + }) + cash_flow = pd.concat([cash_flow, cash_out], ignore_index=True) + + return cash_flow + + def create_report(self, kpi, income_statement, balance_sheet, cash_flow): + """生成完整报告""" + output_path = self.output_folder / f'财务报表_{self.month}.xlsx' + + with pd.ExcelWriter(output_path, engine='openpyxl') as writer: + # 1. KPI概览 + kpi_df = pd.DataFrame({ + '指标': ['总收入', '总支出', '净利润', '利润率', '收入笔数', '支出笔数'], + '金额': [ + f"¥{kpi['总收入']:,.2f}", + f"¥{kpi['总支出']:,.2f}", + f"¥{kpi['净利润']:,.2f}", + f"{kpi['利润率']:.2f}%", + kpi['收入笔数'], + kpi['支出笔数'] + ] + }) + kpi_df.to_excel(writer, sheet_name='KPI概览', index=False) + + # 2. 利润表 + income_statement.to_excel(writer, sheet_name='利润表', index=False) + + # 3. 资产负债表 + if not balance_sheet.empty: + balance_sheet.to_excel(writer, sheet_name='资产负债表', index=False) + + # 4. 现金流量表 + if not cash_flow.empty: + cash_flow.to_excel(writer, sheet_name='现金流量表', index=False) + + return output_path + + def convert_to_pdf(self, excel_path): + """转换为PDF""" + pdf_path = excel_path.with_suffix('.pdf') + python-office.excel2pdf(str(excel_path), str(pdf_path.parent)) + return pdf_path + + def send_to_management(self, pdf_path, kpi): + """发送给管理层""" + content = f""" + 管理层好, + + {self.month} 财务报表已生成,请查收。 + + 💰 关键财务数据: + - 总收入: ¥{kpi['总收入']:,.2f} + - 总支出: ¥{kpi['总支出']:,.2f} + - 净利润: ¥{kpi['净利润']:,.2f} + - 利润率: {kpi['利润率']:.2f}% + + 详细报表请查看附件。 + + 自动化生成 by python-office + """ + + python-office.email.send( + smtp_server='smtp.qq.com', + smtp_port=465, + email='finance@company.com', + password='your_auth_code', + to_email='ceo@company.com,cfo@company.com', + subject=f'【财务报表】{self.month}', + content=content, + attachment=str(pdf_path) + ) + +# 使用 +report = MonthlyFinanceReport() +kpi = report.generate() +``` + +## 四、季度报告(合并三个月) + +```python +class QuarterlyReport: + """季度报告""" + + def __init__(self, year, quarter): + self.year = year + self.quarter = quarter + self.months = self.get_quarter_months() + + def get_quarter_months(self): + """获取季度月份""" + quarter_map = { + 1: [1, 2, 3], + 2: [4, 5, 6], + 3: [7, 8, 9], + 4: [10, 11, 12] + } + return [f'{self.year}-{m:02d}' for m in quarter_map[self.quarter]] + + def generate(self): + """生成季报""" + # 合并三个月的数据 + all_income = pd.DataFrame() + all_expense = pd.DataFrame() + + for month in self.months: + income = pd.read_excel(f'D:/财务数据/收入明细_{month}.xlsx') + expense = pd.read_excel(f'D:/财务数据/支出明细_{month}.xlsx') + all_income = pd.concat([all_income, income]) + all_expense = pd.concat([all_expense, expense]) + + # 季度汇总 + quarterly_summary = { + '总收入': all_income['金额'].sum(), + '总支出': all_expense['金额'].sum(), + '净利润': all_income['金额'].sum() - all_expense['金额'].sum(), + '季度': f'Q{self.quarter}', + '年份': self.year, + } + + # 生成报告... + return quarterly_summary +``` + +## 五、数据校验 + +```python +def validate_finance_data(self, income, expense): + """数据校验""" + errors = [] + + # 1. 检查必填字段 + required_income_fields = ['日期', '客户', '金额'] + for field in required_income_fields: + if field not in income.columns: + errors.append(f'收入数据缺少字段: {field}') + + # 2. 检查金额合理性 + if not income.empty: + if income['金额'].min() < 0: + errors.append('收入金额不能为负数') + if income['金额'].max() > 100000000: # 1亿 + errors.append('收入金额异常,请检查') + + # 3. 检查日期范围 + if not income.empty: + income['日期'] = pd.to_datetime(income['日期']) + today = datetime.now() + if income['日期'].max() > today: + errors.append('存在未来日期的记录') + + if errors: + print("⚠️ 数据校验失败:") + for e in errors: + print(f" - {e}") + return False + + print("✅ 数据校验通过") + return True +``` + +## 六、相关资源 + +- [python-office 官方文档](https://www.python-office.com) - Python自动化办公库 +- [AI + 自动化办公课程](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) - 35讲AI办公自动化实战 +- [销售报表自动化](./sales-report-automation.md) - 销售场景 +- [Excel 数据可视化](../excel/chart-generation.md) - 报表图表 +- 📺 视频课程:[自动化办公50讲](https://www.python-office.com/course/50-python-office.html) +- 👥 技术交流:[加入讨论](https://www.python4office.cn/wechat-qrcode/) + +## 总结 + +通过Python自动化财务报表,我们实现了: +- ⏰ 每月节省10+小时 +- 🎯 报表数据100%准确 +- 📊 自动生成可视化图表 +- 📧 自动邮件分发 + +**核心思路**:用 pandas 处理数据,用 openpyxl 生成格式,用 python-office 简化转换,用邮件实现分发。财务自动化是性价比最高的投资之一。 \ No newline at end of file diff --git a/docs-pages/vuepress/tips/best-practices/hr-resume-screening.md b/docs-pages/vuepress/tips/best-practices/hr-resume-screening.md new file mode 100644 index 0000000..662a136 --- /dev/null +++ b/docs-pages/vuepress/tips/best-practices/hr-resume-screening.md @@ -0,0 +1,390 @@ +# HR简历筛选自动化:批量处理上万份简历,3分钟搞定 + +HR每天要处理大量简历,使用Python自动化简历筛选,可以从1万份简历中快速找出匹配的候选人,效率提升100倍。 + +## 一、场景痛点 + +招聘HR的日常: + +| 痛点 | 数据 | +|------|------| +| 每天收到简历 | 100-1000份 | +| 筛选耗时 | 2-3小时 | +| 漏掉优秀候选人 | 经常发生 | +| 重复简历 | 30% | +| 沟通成本 | 高 | + +## 二、自动化方案 + +### 方案流程 + +``` +简历文件 → 解析内容 → 提取关键信息 → 评分匹配 → 输出结果 → 邮件通知 + ↓ ↓ ↓ ↓ ↓ ↓ + PDF/Word pdfplumber 正则表达式 权重计算 Excel email + python-docx +``` + +## 三、完整代码实现 + +### 安装依赖 + +```bash +pip install pdfplumber python-docx pandas python-office +``` + +### 简历解析器 + +```python +import pdfplumber +from docx import Document +import pandas as pd +import re +from pathlib import Path + +class ResumeParser: + """简历解析器""" + + def __init__(self, folder_path): + self.folder_path = Path(folder_path) + self.resumes = [] + + def parse_all(self): + """解析所有简历""" + for file in self.folder_path.glob('*'): + if file.suffix.lower() == '.pdf': + content = self.parse_pdf(file) + elif file.suffix.lower() in ['.docx', '.doc']: + content = self.parse_docx(file) + else: + continue + + # 提取关键信息 + info = self.extract_info(content, file.name) + self.resumes.append(info) + + return pd.DataFrame(self.resumes) + + def parse_pdf(self, file_path): + """解析PDF简历""" + content = '' + with pdfplumber.open(file_path) as pdf: + for page in pdf.pages: + content += page.extract_text() or '' + return content + + def parse_docx(self, file_path): + """解析Word简历""" + doc = Document(file_path) + return '\n'.join([p.text for p in doc.paragraphs]) + + def extract_info(self, content, filename): + """提取简历关键信息""" + return { + '文件名': filename, + '姓名': self.extract_name(content), + '电话': self.extract_phone(content), + '邮箱': self.extract_email(content), + '学历': self.extract_education(content), + '工作年限': self.extract_years(content), + '技能': self.extract_skills(content), + '期望薪资': self.extract_salary(content), + '最近公司': self.extract_company(content), + '内容': content, + } + + def extract_name(self, content): + """提取姓名""" + # 简单正则:匹配"姓名:xxx"或"Name: xxx" + patterns = [ + r'姓名[::]\s*([^\s\n]{2,4})', + r'Name[::]\s*([^\s\n]{2,20})', + ] + for p in patterns: + m = re.search(p, content) + if m: + return m.group(1).strip() + return '未知' + + def extract_phone(self, content): + """提取电话""" + pattern = r'1[3-9]\d{9}' + m = re.search(pattern, content) + return m.group() if m else '' + + def extract_email(self, content): + """提取邮箱""" + pattern = r'[\w.-]+@[\w.-]+\.\w+' + m = re.search(pattern, content) + return m.group() if m else '' + + def extract_education(self, content): + """提取学历""" + edu_keywords = ['博士', '硕士', '本科', '大专', 'MBA', 'PhD', 'Master', 'Bachelor'] + for kw in edu_keywords: + if kw in content: + return kw + return '未知' + + def extract_years(self, content): + """提取工作年限""" + # 匹配"X年工作经验"或"X years" + patterns = [ + r'(\d+)\s*年.*?工作经验', + r'(\d+)\s*years?', + ] + for p in patterns: + m = re.search(p, content) + if m: + return int(m.group(1)) + return 0 + + def extract_skills(self, content): + """提取技能""" + skill_keywords = [ + 'Python', 'Java', 'JavaScript', 'C++', 'Go', 'Rust', + 'React', 'Vue', 'Angular', 'Django', 'Flask', 'Spring', + 'MySQL', 'PostgreSQL', 'MongoDB', 'Redis', + 'Docker', 'Kubernetes', 'AWS', 'Azure', + 'Machine Learning', 'Deep Learning', 'AI', + ] + found = [s for s in skill_keywords if s.lower() in content.lower()] + return ', '.join(found) + + def extract_salary(self, content): + """提取期望薪资""" + patterns = [ + r'期望薪资[::]\s*(\d+)', + r'薪资期望[::]\s*(\d+)', + r'(\d+)K', + r'(\d+)-(\d+)万', + ] + for p in patterns: + m = re.search(p, content) + if m: + return m.group(0) + return '面议' + + def extract_company(self, content): + """提取最近公司""" + patterns = [ + r'最近公司[::]\s*([^\n]+)', + r'现公司[::]\s*([^\n]+)', + ] + for p in patterns: + m = re.search(p, content) + if m: + return m.group(1).strip()[:20] + return '未知' +``` + +### 简历评分器 + +```python +class ResumeScorer: + """简历评分器""" + + def __init__(self, requirements): + """ + :param requirements: 职位要求字典 + """ + self.requirements = requirements + + def score(self, resume): + """对单个简历评分""" + score = 0 + reasons = [] + + # 1. 技能匹配(40分) + required_skills = self.requirements.get('skills', []) + if required_skills: + resume_skills = resume.get('技能', '').split(', ') + matched = [s for s in required_skills if s in resume_skills] + skill_score = len(matched) / len(required_skills) * 40 + score += skill_score + if matched: + reasons.append(f'✓ 技能匹配: {", ".join(matched)}') + + # 2. 学历匹配(20分) + edu_required = self.requirements.get('education', '') + if edu_required and resume.get('学历') in edu_required: + score += 20 + reasons.append(f'✓ 学历匹配: {resume.get("学历")}') + + # 3. 工作年限(20分) + min_years = self.requirements.get('min_years', 0) + if resume.get('工作年限', 0) >= min_years: + score += 20 + reasons.append(f'✓ 工作年限: {resume.get("工作年限")}年') + + # 4. 联系方式(10分) + if resume.get('电话'): + score += 5 + if resume.get('邮箱'): + score += 5 + + # 5. 内容质量(10分) + content_length = len(resume.get('内容', '')) + if content_length > 500: + score += 10 + reasons.append('✓ 简历内容详实') + + resume['评分'] = round(score, 1) + resume['匹配理由'] = '; '.join(reasons) + return resume + + def filter_resumes(self, df, min_score=60): + """筛选合格简历""" + df['评分'] = df.apply(self.score, axis=1) + qualified = df[df['评分'] >= min_score].sort_values('评分', ascending=False) + return qualified +``` + +### 完整使用 + +```python +def screen_resumes(): + """筛选简历完整流程""" + + # 1. 解析所有简历 + parser = ResumeParser('D:/招聘/简历') + df = parser.parse_all() + print(f"共解析 {len(df)} 份简历") + + # 2. 设置职位要求 + job_requirements = { + 'skills': ['Python', 'Django', 'MySQL', 'Docker'], + 'education': ['本科', '硕士', '博士'], + 'min_years': 3, + } + + # 3. 评分筛选 + scorer = ResumeScorer(job_requirements) + qualified = scorer.filter_resumes(df, min_score=60) + + # 4. 去重(按电话和邮箱) + qualified = qualified.drop_duplicates(subset=['电话', '邮箱'], keep='first') + + # 5. 输出Excel + output_path = 'D:/招聘/筛选结果.xlsx' + qualified[['姓名', '电话', '邮箱', '学历', '工作年限', + '最近公司', '技能', '评分', '匹配理由']].to_excel( + output_path, index=False + ) + print(f"筛选出 {len(qualified)} 份合格简历") + + # 6. 批量通知面试 + notify_candidates(qualified) + + return qualified + +def notify_candidates(df): + """通知候选人""" + import python-office + + for _, resume in df.iterrows(): + content = f""" + {resume['姓名']} 您好, + + 我们看到了您的简历,认为您非常适合我们的岗位。 + + 您的简历评分: {resume['评分']} + 匹配理由: {resume['匹配理由']} + + 请回复邮件预约面试时间。 + + Best regards, + HR Team + """ + + if resume.get('邮箱'): + python-office.email.send( + smtp_server='smtp.qq.com', + smtp_port=465, + email='hr@company.com', + password='your_auth_code', + to_email=resume['邮箱'], + subject='【面试邀请】您的简历已通过初筛', + content=content + ) + +# 运行 +qualified = screen_resumes() +``` + +## 四、高级功能 + +### 1. AI 简历评分(结合大模型) + +```python +def ai_score_resume(resume_content, job_description): + """使用AI评分简历""" + import openai + + prompt = f""" + 请根据以下职位描述,对简历进行评分(0-100分)并说明理由: + + 职位描述: + {job_description} + + 简历内容: + {resume_content} + + 请按以下格式输出: + 评分: XX分 + 理由: 1. ... 2. ... 3. ... + """ + + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": prompt}], + api_key="your_api_key" + ) + + return response.choices[0].message.content +``` + +### 2. 简历去重 + +```python +def remove_duplicate_resumes(df): + """去除重复简历""" + # 基于姓名+电话去重 + df['key'] = df['姓名'] + df['电话'] + df = df.drop_duplicates(subset='key', keep='first') + df = df.drop('key', axis=1) + return df +``` + +### 3. 简历统计 + +```python +def generate_statistics(df): + """生成统计报告""" + stats = { + '总数': len(df), + '平均工作年限': df['工作年限'].mean(), + '学历分布': df['学历'].value_counts().to_dict(), + '技能TOP10': df['技能'].str.split(', ').explode().value_counts().head(10).to_dict(), + } + return stats +``` + +## 五、相关资源 + +- [python-office 官方文档](https://www.python-office.com) - Python自动化办公库 +- [AI + 自动化办公课程](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) - 35讲AI办公自动化实战 +- [PDF 内容提取技巧](../pdf/extract-content.md) - 简历内容提取 +- [邮件自动化](../python/email-automation.md) - 自动通知 +- 📺 视频课程:[自动化办公50讲](https://www.python-office.com/course/50-python-office.html) +- 👥 技术交流:[加入讨论](https://www.python4office.cn/wechat-qrcode/) + +## 总结 + +通过Python自动化简历筛选: +- ⏰ 1000份简历筛选从2小时缩短到3分钟 +- 🎯 AI评分更客观 +- 📧 批量通知候选人 +- 📊 自动统计报告 + +**核心思路**:用 pdfplumber/python-docx 解析简历,用 pandas 处理数据,用正则表达式提取关键信息,用 AI 进行智能评分。HR的工作效率能提升几十倍。 \ No newline at end of file diff --git a/docs-pages/vuepress/tips/best-practices/image-batch-processing.md b/docs-pages/vuepress/tips/best-practices/image-batch-processing.md new file mode 100644 index 0000000..fc59a57 --- /dev/null +++ b/docs-pages/vuepress/tips/best-practices/image-batch-processing.md @@ -0,0 +1,286 @@ +# 图片批量处理:1000张图片一键压缩、加水印、改格式 + +处理大量图片是很多运营、设计师的日常工作。用Python可以批量压缩、加水印、转换格式,效率提升10倍以上。 + +## 一、常见图片处理需求 + +| 需求 | 传统方式 | Python自动化 | +|------|---------|------------| +| 1000张图片压缩 | 1小时 | 1分钟 | +| 批量加水印 | 半天 | 2分钟 | +| 格式转换 | 逐个保存 | 一行代码 | +| 调整尺寸 | 重复操作 | 批量处理 | +| 添加文字 | PS手动 | 自动添加 | + +## 二、python-office 一行代码方案 + +```python +import python-office + +# 一行代码批量压缩 +python-office.image.compress( + folder='D:/照片', + output='D:/压缩后', + quality=80 +) + +# 一键添加水印 +python-office.image.add_text_water( + folder='D:/照片', + text='@程序员晚枫', + output='D:/带水印' +) + +# 一键格式转换 +python-office.image.convert_format( + folder='D:/照片', + target_format='webp', + output='D:/webp格式' +) +``` + +## 三、自定义批量处理 + +### 安装依赖 + +```bash +pip install Pillow python-office +``` + +### 完整图片处理工具 + +```python +from PIL import Image, ImageDraw, ImageFont +from pathlib import Path +import python-office + +class ImageBatchProcessor: + """图片批量处理器""" + + def __init__(self, input_folder, output_folder): + self.input_folder = Path(input_folder) + self.output_folder = Path(output_folder) + self.output_folder.mkdir(parents=True, exist_ok=True) + + def compress(self, quality=80, max_size=None): + """批量压缩图片""" + for file in self.input_folder.glob('*'): + if file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.webp']: + img = Image.open(file) + + # 调整大小 + if max_size and (img.width > max_size or img.height > max_size): + img.thumbnail((max_size, max_size), Image.LANCZOS) + + # 压缩保存 + output_path = self.output_folder / file.name + if file.suffix.lower() in ['.jpg', '.jpeg']: + img.save(output_path, 'JPEG', quality=quality, optimize=True) + else: + img.save(output_path, optimize=True) + + print(f"✓ {file.name} -> {img.size}") + + def add_text_watermark(self, text, font_size=30, color=(255, 255, 255), opacity=128, position='bottom-right'): + """添加文字水印""" + for file in self.input_folder.glob('*'): + if file.suffix.lower() in ['.jpg', '.jpeg', '.png']: + img = Image.open(file).convert('RGBA') + + # 创建水印层 + overlay = Image.new('RGBA', img.size, (255, 255, 255, 0)) + draw = ImageDraw.Draw(overlay) + + # 加载字体 + try: + font = ImageFont.truetype('/System/Library/Fonts/PingFang.ttc', font_size) + except: + font = ImageFont.load_default() + + # 计算位置 + bbox = draw.textbbox((0, 0), text, font=font) + text_width = bbox[2] - bbox[0] + text_height = bbox[3] - bbox[1] + + if position == 'bottom-right': + x = img.width - text_width - 20 + y = img.height - text_height - 20 + elif position == 'bottom-left': + x, y = 20, img.height - text_height - 20 + else: # center + x = (img.width - text_width) // 2 + y = (img.height - text_height) // 2 + + # 绘制文字 + draw.text((x, y), text, font=font, fill=color + (opacity,)) + + # 合并 + combined = Image.alpha_composite(img, overlay) + combined = combined.convert('RGB') + + output_path = self.output_folder / file.name + combined.save(output_path, quality=90) + print(f"✓ {file.name} 水印已添加") + + def add_image_watermark(self, watermark_path, opacity=0.3, position='bottom-right'): + """添加图片水印""" + watermark = Image.open(watermark_path).convert('RGBA') + + for file in self.input_folder.glob('*'): + if file.suffix.lower() in ['.jpg', '.jpeg', '.png']: + img = Image.open(file).convert('RGBA') + + # 调整水印大小(图片宽度的1/5) + wm_width = img.width // 5 + wm_height = int(watermark.height * wm_width / watermark.width) + watermark_resized = watermark.resize((wm_width, wm_height), Image.LANCZOS) + + # 调整透明度 + alpha = watermark_resized.split()[3] + alpha = alpha.point(lambda p: int(p * opacity)) + watermark_resized.putalpha(alpha) + + # 计算位置 + if position == 'bottom-right': + pos = (img.width - wm_width - 20, img.height - wm_height - 20) + else: + pos = (20, 20) + + # 合并 + img.paste(watermark_resized, pos, watermark_resized) + + output_path = self.output_folder / file.name + img.convert('RGB').save(output_path, quality=90) + print(f"✓ {file.name} 水印已添加") + + def convert_format(self, target_format='webp', quality=85): + """格式转换""" + for file in self.input_folder.glob('*'): + if file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']: + img = Image.open(file) + + # 修改扩展名 + new_name = file.stem + '.' + target_format + output_path = self.output_folder / new_name + + # 保存为目标格式 + if target_format.lower() in ['jpg', 'jpeg']: + img = img.convert('RGB') + img.save(output_path, 'JPEG', quality=quality) + elif target_format.lower() == 'webp': + img.save(output_path, 'WEBP', quality=quality) + elif target_format.lower() == 'png': + img.save(output_path, 'PNG', optimize=True) + + print(f"✓ {file.name} -> {new_name}") + + def resize(self, width, height=None): + """调整尺寸""" + for file in self.input_folder.glob('*'): + if file.suffix.lower() in ['.jpg', '.jpeg', '.png']: + img = Image.open(file) + + if height is None: + # 按宽度等比例缩放 + ratio = width / img.width + height = int(img.height * ratio) + + img_resized = img.resize((width, height), Image.LANCZOS) + + output_path = self.output_folder / file.name + img_resized.save(output_path, quality=90) + print(f"✓ {file.name} -> {width}x{height}") + +# 使用示例 +processor = ImageBatchProcessor('D:/原始照片', 'D:/处理后') + +# 1. 压缩 +processor.compress(quality=75, max_size=1920) + +# 2. 添加水印 +processor.add_text_watermark('@python-office.com', font_size=36) + +# 3. 转换格式 +processor.convert_format('webp') + +# 4. 调整尺寸 +processor.resize(800, 600) +``` + +## 四、智能图片处理 + +### 1. 智能裁剪(人脸识别) + +```python +def smart_crop_with_face_detection(input_folder, output_folder): + """基于人脸识别的智能裁剪""" + import face_recognition + + for file in Path(input_folder).glob('*.jpg'): + img = face_recognition.load_image_file(str(file)) + face_locations = face_recognition.face_locations(img) + + if face_locations: + # 找到人脸位置 + top, right, bottom, left = face_locations[0] + + # 以人脸为中心裁剪 + center_x = (left + right) // 2 + center_y = (top + bottom) // 2 + + # 裁剪尺寸 + crop_size = 400 + + pil_img = Image.open(file) + left = max(0, center_x - crop_size // 2) + top = max(0, center_y - crop_size // 2) + right = min(pil_img.width, center_x + crop_size // 2) + bottom = min(pil_img.height, center_y + crop_size // 2) + + pil_img.crop((left, top, right, bottom)).save( + f'{output_folder}/{file.name}' + ) +``` + +### 2. 自动美化 + +```python +def auto_enhance(input_folder, output_folder): + """自动美化图片""" + from PIL import ImageEnhance, ImageFilter + + for file in Path(input_folder).glob('*.jpg'): + img = Image.open(file) + + # 增强对比度 + enhancer = ImageEnhance.Contrast(img) + img = enhancer.enhance(1.2) + + # 增强色彩 + enhancer = ImageEnhance.Color(img) + img = enhancer.enhance(1.1) + + # 锐化 + img = img.filter(ImageFilter.SHARPEN) + + img.save(f'{output_folder}/{file.name}') +``` + +## 五、相关资源 + +- [python-office 官方文档](https://www.python-office.com) - Python自动化办公库 +- [AI + 自动化办公课程](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) - 35讲AI办公自动化实战 +- [PDF 内容提取](../pdf/extract-content.md) - 从图片提取 +- [OCR 识别技术](../office/ocr.md) - 文字识别 +- 📺 视频课程:[自动化办公50讲](https://www.python-office.com/course/50-python-office.html) +- 👥 技术交流:[加入讨论](https://www.python4office.cn/wechat-qrcode/) + +## 总结 + +通过Python自动化图片处理: +- ⚡ 1000张图片处理从1小时缩短到1分钟 +- 💾 平均压缩率60%,节省存储空间 +- 🎨 风格统一、水印一致 +- 🤖 AI智能识别,提升质量 + +图片自动化是电商运营、内容创作者的必备技能。 \ No newline at end of file diff --git a/docs-pages/vuepress/tips/best-practices/learning-path.md b/docs-pages/vuepress/tips/best-practices/learning-path.md new file mode 100644 index 0000000..19bc838 --- /dev/null +++ b/docs-pages/vuepress/tips/best-practices/learning-path.md @@ -0,0 +1,354 @@ +# Python自动化办公学习路径:从入门到精通完整指南 + +很多新手不知道如何系统学习Python自动化办公。本文提供一份完整的学习路径,帮助你从零基础到独立开发自动化项目。 + +## 一、学习路线图 + +### 阶段划分 + +| 阶段 | 目标 | 周期 | 难度 | +|------|------|------|------| +| Python基础 | 掌握语法 | 2-4周 | ⭐ | +| 办公库入门 | 学会常用库 | 4-6周 | ⭐⭐ | +| python-office | 一行代码自动化 | 1-2周 | ⭐ | +| 项目实战 | 综合应用 | 持续 | ⭐⭐⭐ | +| AI结合 | AI+办公自动化 | 4-8周 | ⭐⭐⭐⭐ | + +## 二、阶段一:Python基础(2-4周) + +### 必学知识点 + +```python +# 1. 基础语法 +print("Hello, World!") +name = input("请输入你的名字:") +age = 25 +score = 95.5 +is_student = True + +# 2. 数据结构 +fruits = ["苹果", "香蕉", "橙子"] # 列表 +person = {"name": "张三", "age": 25} # 字典 +unique_nums = {1, 2, 3} # 集合 +coordinates = (10, 20) # 元组 + +# 3. 流程控制 +if score >= 90: + print("优秀") +elif score >= 80: + print("良好") +else: + print("继续努力") + +for fruit in fruits: + print(fruit) + +for i in range(10): + print(i) + +# 4. 函数 +def greet(name): + return f"你好,{name}!" + +print(greet("程序员晚枫")) + +# 5. 文件操作 +with open('data.txt', 'r', encoding='utf-8') as f: + content = f.read() +``` + +### 推荐学习资源 + +| 资源 | 链接 | 时长 | +|------|------|------| +| 官方文档 | https://docs.python.org/zh-cn/3/ | 参考 | +| 菜鸟教程 | https://www.runoob.com/python3/ | 30讲 | +| 晚枫15讲 | https://www.python-office.com/course-002/15-Python/15-Python.html | 15讲 | + +## 三、阶段二:办公库入门(4-6周) + +### 五大核心库 + +```bash +# 安装 +pip install pandas openpyxl python-docx PyPDF2 Pillow +``` + +#### 1. pandas - 数据处理 + +```python +import pandas as pd + +# 读取数据 +df = pd.read_excel('data.xlsx') + +# 数据查看 +print(df.head()) # 前5行 +print(df.info()) # 数据信息 +print(df.describe()) # 统计信息 + +# 数据筛选 +filtered = df[df['销售额'] > 10000] + +# 数据分组 +grouped = df.groupby('部门')['销售额'].sum() + +# 数据保存 +df.to_excel('output.xlsx', index=False) +``` + +#### 2. openpyxl - Excel操作 + +```python +from openpyxl import load_workbook +from openpyxl.styles import Font, PatternFill, Alignment + +wb = load_workbook('data.xlsx') +ws = wb.active + +# 读取单元格 +value = ws['A1'].value + +# 写入数据 +ws['A1'] = '标题' + +# 设置样式 +ws['A1'].font = Font(bold=True, color='FFFFFF') +ws['A1'].fill = PatternFill('solid', fgColor='2E86AB') + +# 保存 +wb.save('output.xlsx') +``` + +#### 3. python-docx - Word操作 + +```python +from docx import Document + +doc = Document('document.docx') + +# 读取 +for para in doc.paragraphs: + print(para.text) + +# 写入 +doc.add_heading('标题', level=1) +doc.add_paragraph('段落内容') + +# 保存 +doc.save('output.docx') +``` + +#### 4. PyPDF2 - PDF操作 + +```python +from PyPDF2 import PdfMerger, PdfReader + +# 合并PDF +merger = PdfMerger() +merger.append('file1.pdf') +merger.append('file2.pdf') +merger.write('merged.pdf') + +# 提取文字 +reader = PdfReader('document.pdf') +text = '' +for page in reader.pages: + text += page.extract_text() +``` + +#### 5. Pillow - 图片处理 + +```python +from PIL import Image + +img = Image.open('image.jpg') +print(img.size) # 尺寸 +print(img.mode) # 模式 + +# 缩放 +img.thumbnail((800, 600)) + +# 保存 +img.save('output.jpg', quality=85) +``` + +## 四、阶段三:python-office 库(1-2周) + +### 核心功能 + +```python +import python-office as po + +# Excel处理 +po.excel.merge(files=['1月.xlsx', '2月.xlsx'], output='合并.xlsx') +po.excel2pdf(file_path='data.xlsx', output_path='./') + +# Word处理 +po.word.replace(file_path='doc.docx', old_text='旧', new_text='新') +po.word2pdf(file_path='doc.docx', output_path='./') + +# PDF处理 +po.pdf.merge(files=['a.pdf', 'b.pdf'], output='merged.pdf') +po.pdf.extract_text(file_path='doc.pdf', output='text.txt') + +# 图片处理 +po.image.add_text_water(image='img.jpg', text='水印', output='watermarked.jpg') +po.image.compress(folder='./photos', output='./compressed') + +# 邮件 +po.email.send( + smtp_server='smtp.qq.com', + email='your@qq.com', + password='auth_code', + to_email='to@qq.com', + subject='主题', + content='内容' +) + +# 微信 +po.wechat.send(who='文件传输助手', msg='Hello') + +# PPT +po.ppt2pdf(file_path='slides.pptx', output_path='./') +``` + +## 五、阶段四:项目实战(持续) + +### 推荐项目 + +#### 项目1:销售报表自动化 +- 难度:⭐⭐ +- 技能:Excel + pandas + 邮件 +- 时间:1周 + +#### 项目2:HR简历筛选 +- 难度:⭐⭐⭐ +- 技能:PDF + 邮件 + AI +- 时间:2周 + +#### 项目3:财务对账系统 +- 难度:⭐⭐⭐⭐ +- 技能:Excel + pandas + 数据库 +- 时间:1月 + +#### 项目4:内容发布自动化 +- 难度:⭐⭐⭐ +- 技能:爬虫 + Word + 定时任务 +- 时间:2周 + +## 六、阶段五:AI结合(4-8周) + +### AI办公技能树 + +``` +AI基础 +├── Prompt工程 +├── 大模型API +├── Embeddings +└── Fine-tuning + +AI+办公 +├── AI自动写文档 +├── AI智能分析数据 +├── AI邮件自动回复 +├── AI生成图表 +└── AI辅助决策 +``` + +### 实战示例 + +```python +import openai + +def ai_analyze_data(data_description): + """使用AI分析数据""" + response = openai.ChatCompletion.create( + model="gpt-3.5-turbo", + messages=[{ + "role": "system", + "content": "你是一个数据分析专家" + }, { + "role": "user", + "content": f"请分析以下数据并给出建议:\n{data_description}" + }], + api_key="your_key" + ) + return response.choices[0].message.content + +# 使用 +result = ai_analyze_data("Q4销售额下降10%") +print(result) +``` + +## 七、避坑指南 + +### 常见错误 + +| 错误 | 解决方案 | +|------|---------| +| 编码错误 | 始终指定 encoding='utf-8' | +| 路径问题 | 使用 pathlib.Path | +| 内存溢出 | 批量处理大数据 | +| 安装失败 | 使用国内镜像源 | +| 第三方库冲突 | 使用虚拟环境 | + +### 调试技巧 + +```python +# 1. 使用print调试 +print(f"变量值: {variable}") + +# 2. 使用pdb调试 +import pdb +pdb.set_trace() + +# 3. 使用日志 +import logging +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) +logger.info("调试信息") +``` + +## 八、学习资源汇总 + +### 官方资源 + +- [python-office 官网](https://www.python-office.com) - 库文档 +- [python-office GitHub](https://github.com/CoderWanFeng/python-office) - 源码 + +### 视频课程 + +- [Python入门15讲](https://www.python-office.com/course-002/15-Python/15-Python.html) +- [自动化办公50讲](https://www.python-office.com/course/50-python-office.html) +- [数据分析30讲](https://www.python-office.com/course-002/30-Excel/30-Excel.html) +- [PDF办公10讲](https://www.python-office.com/course-002/10-popdf/10-popdf.html) +- [邮件自动化6讲](https://www.python-office.com/course-002/poemail/poemail.html) +- [AI+自动化办公35讲](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) ⭐新课 + +### 社区资源 + +- [技术博客](https://python4office.cn) - 程序员晚枫 +- [读者交流群](https://www.python4office.cn/wechat-qrcode/) +- [微信群](https://www.python4office.cn/wechat-group/) + +## 九、相关资源 + +- [python-office 使用指南](./python-office-guide.md) - 库详解 +- [自动化办公框架设计](./automation-framework.md) - 工程化 +- [日常工作流自动化](./workflow-automation.md) - 综合应用 +- 📺 视频课程:[自动化办公50讲](https://www.python-office.com/course/50-python-office.html) +- 👥 技术交流:[加入讨论](https://www.python4office.cn/wechat-qrcode/) + +## 总结 + +学习Python自动化办公的关键: +1. **多动手**:每个知识点都要写代码 +2. **做项目**:用项目巩固知识点 +3. **找同伴**:加入技术交流群 +4. **看源码**:阅读优秀库的实现 +5. **跟对人**:跟随实战经验丰富的老师 + +**学习最快的路径:跟着一个完整的项目教程,从头到尾做一遍。** + +推荐从 [AI+自动化办公35讲](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) 开始,这门课覆盖了AI时代办公自动化的所有核心技能,由浅入深,项目驱动,是快速入门的最佳选择。 \ No newline at end of file diff --git a/docs-pages/vuepress/tips/best-practices/ocr-recognition.md b/docs-pages/vuepress/tips/best-practices/ocr-recognition.md new file mode 100644 index 0000000..f17acaf --- /dev/null +++ b/docs-pages/vuepress/tips/best-practices/ocr-recognition.md @@ -0,0 +1,277 @@ +# OCR 文字识别:Python批量识别图片中的文字 + +将图片中的文字提取出来,是很多场景的需求。本文介绍如何用Python实现OCR文字识别,批量处理扫描件、截图、票据等。 + +## 一、应用场景 + +| 场景 | 描述 | +|------|------| +| 票据识别 | 发票、收据数字化 | +| 文档扫描 | PDF扫描件转Word | +| 截图提取 | 从截图中提取文字 | +| 车牌识别 | 自动识别车牌号 | +| 名片识别 | 名片信息录入 | + +## 二、python-office 一行代码方案 + +```python +import python-office + +# 一键OCR识别 +python-office.ocr.recognize( + image='D:/发票.jpg', + output='D:/识别结果.txt' +) + +# 批量识别 +python-office.ocr.batch_recognize( + folder='D:/扫描件', + output='D:/识别结果' +) +``` + +## 三、自定义OCR实现 + +### 安装依赖 + +```bash +pip install pytesseract Pillow python-office +# 需要先安装 Tesseract OCR +# macOS: brew install tesseract tesseract-lang +# Ubuntu: sudo apt install tesseract-ocr tesseract-ocr-chi-sim +# Windows: 下载安装包 https://github.com/UB-Mannheim/tesseract/wiki +``` + +### 基础OCR识别 + +```python +import pytesseract +from PIL import Image, ImageEnhance, ImageFilter + +class OCRProcessor: + """OCR文字识别器""" + + def __init__(self, lang='chi_sim+eng'): + self.lang = lang + + def recognize(self, image_path): + """识别图片中的文字""" + img = Image.open(image_path) + + # 图像预处理 + img = self.preprocess(img) + + # OCR识别 + text = pytesseract.image_to_string(img, lang=self.lang) + return text + + def preprocess(self, img): + """图像预处理(提高识别率)""" + # 转灰度 + img = img.convert('L') + + # 增强对比度 + enhancer = ImageEnhance.Contrast(img) + img = enhancer.enhance(2.0) + + # 二值化 + threshold = 128 + img = img.point(lambda p: 255 if p > threshold else 0) + + # 去噪 + img = img.filter(ImageFilter.MedianFilter(3)) + + return img + + def batch_recognize(self, folder, output_folder): + """批量识别""" + from pathlib import Path + results = {} + + for file in Path(folder).glob('*'): + if file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']: + text = self.recognize(str(file)) + results[file.name] = text + + # 保存结果 + output_path = Path(output_folder) / f'{file.stem}.txt' + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(text, encoding='utf-8') + print(f"✓ {file.name} -> {output_path.name}") + + return results + +# 使用 +ocr = OCRProcessor(lang='chi_sim+eng') +text = ocr.recognize('D:/发票.jpg') +print(text) +``` + +## 四、发票识别专项 + +```python +import re +from datetime import datetime + +class InvoiceOCR: + """发票OCR识别器""" + + def __init__(self): + self.ocr = OCRProcessor(lang='chi_sim+eng') + + def recognize_invoice(self, image_path): + """识别发票""" + text = self.ocr.recognize(image_path) + + # 提取关键字段 + invoice_info = { + '发票代码': self.extract_field(text, r'发票代码[::]\s*(\d+)'), + '发票号码': self.extract_field(text, r'发票号码[::]\s*(\d+)'), + '开票日期': self.extract_field(text, r'开票日期[::]\s*(\d{4}[-年]\d{1,2}[-月]\d{1,2})'), + '金额': self.extract_amount(text), + '税额': self.extract_tax(text), + '价税合计': self.extract_total(text), + '销售方': self.extract_field(text, r'销售方[::]\s*([^\n]+)'), + '购买方': self.extract_field(text, r'购买方[::]\s*([^\n]+)'), + } + + return invoice_info + + def extract_field(self, text, pattern): + """提取单个字段""" + m = re.search(pattern, text) + return m.group(1) if m else 'N/A' + + def extract_amount(self, text): + """提取金额""" + patterns = [ + r'价税合计[((]大写[))]\s*[^\d]*?(\d+\.\d{2})', + r'¥\s*(\d+\.\d{2})', + r'小写[::]\s*¥?(\d+\.\d{2})', + ] + for p in patterns: + m = re.search(p, text) + if m: + return float(m.group(1)) + return 0.0 + + def extract_tax(self, text): + """提取税额""" + m = re.search(r'税额[::]\s*¥?(\d+\.\d{2})', text) + return float(m.group(1)) if m else 0.0 + + def extract_total(self, text): + """提取总价税合计""" + m = re.search(r'价税合计[::]?\s*¥?(\d+\.\d{2})', text) + return float(m.group(1)) if m else 0.0 + +# 使用 +invoice_ocr = InvoiceOCR() +info = invoice_ocr.recognize_invoice('D:/发票.jpg') +for k, v in info.items(): + print(f'{k}: {v}') +``` + +## 五、表格识别 + +```python +class TableOCR: + """表格OCR识别""" + + def __init__(self): + self.ocr = OCRProcessor() + + def recognize_table(self, image_path): + """识别表格图片""" + # 1. 识别全部文字 + text = self.ocr.recognize(image_path) + + # 2. 分割行 + lines = [l for l in text.split('\n') if l.strip()] + + # 3. 分割列(基于空格或制表符) + rows = [] + for line in lines: + # 尝试多种分隔符 + if '\t' in line: + row = line.split('\t') + elif ' ' in line: + row = re.split(r'\s{2,}', line) + else: + row = [line] + rows.append(row) + + # 4. 转DataFrame + import pandas as pd + df = pd.DataFrame(rows[1:], columns=rows[0] if rows else []) + return df + +# 使用 +table_ocr = TableOCR() +df = table_ocr.recognize_table('D:/表格.png') +df.to_excel('D:/识别结果.xlsx', index=False) +``` + +## 六、百度OCR API(更精准) + +```python +from aip import AipOcr + +class BaiduOCR: + """百度OCR API(识别率更高)""" + + def __init__(self, app_id, api_key, secret_key): + self.client = AipOcr(app_id, api_key, secret_key) + + def recognize_general(self, image_path): + """通用文字识别""" + with open(image_path, 'rb') as f: + image = f.read() + + result = self.client.basicGeneral(image) + return result + + def recognize_accurate(self, image_path): + """高精度识别""" + with open(image_path, 'rb') as f: + image = f.read() + + result = self.client.basicAccurate(image) + return result + + def recognize_invoice(self, image_path): + """增值税发票识别""" + with open(image_path, 'rb') as f: + image = f.read() + + result = self.client.vatInvoice(image) + return result + +# 使用 +baidu_ocr = BaiduOCR( + app_id='your_app_id', + api_key='your_api_key', + secret_key='your_secret_key' +) +result = baidu_ocr.recognize_invoice('D:/发票.jpg') +print(result) +``` + +## 七、相关资源 + +- [python-office 官方文档](https://www.python-office.com) - Python自动化办公库 +- [AI + 自动化办公课程](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) - 35讲AI办公自动化实战 +- [PDF 内容提取](../pdf/extract-content.md) - PDF转Word +- [图片批量处理](./image-batch-processing.md) - 图片预处理 +- 📺 视频课程:[发票识别5讲](https://www.python-office.com/course-002/5-poocr/5-poocr.html) +- 👥 技术交流:[加入读者群](https://www.python4office.cn/wechat-qrcode/) + +## 总结 + +通过Python OCR文字识别: +- 📸 批量图片转文字 +- 🧾 发票信息自动提取 +- 📊 表格图片结构化 +- 🎯 多种OCR引擎可选 + +**核心思路**:用 pytesseract 做基础识别,用百度/腾讯 API 做高精度识别,用图像预处理提高准确率。OCR是连接纸质文档和数字世界的桥梁。 \ No newline at end of file diff --git a/docs-pages/vuepress/tips/best-practices/ppt-automation.md b/docs-pages/vuepress/tips/best-practices/ppt-automation.md new file mode 100644 index 0000000..66b1012 --- /dev/null +++ b/docs-pages/vuepress/tips/best-practices/ppt-automation.md @@ -0,0 +1,224 @@ +# PPT自动化:批量生成专业演示文稿,告别重复劳动 + +制作PPT是很多人的痛点,尤其是需要批量生成相似风格PPT时。本文介绍如何用Python自动化PPT生成,让繁琐的PPT工作变得简单。 + +## 一、场景应用 + +| 场景 | 描述 | +|------|------| +| 批量生成报告 | 月报、季报、年报 | +| 销售演示 | 客户定制化方案 | +| 培训资料 | 员工培训PPT | +| 产品介绍 | 不同客户版本 | +| 会议记录 | 自动化会议纪要 | + +## 二、python-office 一行代码方案 + +```python +import python-office + +# 一行代码生成PPT +python-office.ppt.create( + template='D:/模板/标准模板.pptx', + data='D:/数据/Q4数据.xlsx', + output='D:/输出/Q4报告.pptx' +) + +# 一键PPT转PDF +python-office.ppt2pdf( + file_path='D:/演示.pptx', + output_path='D:/PDF/' +) +``` + +## 三、自定义批量生成 + +### 安装依赖 + +```bash +pip install python-pptx pandas python-office +``` + +### 销售方案批量生成器 + +```python +from pptx import Presentation +from pptx.util import Inches, Pt +import pandas as pd +import python-office + +class SalesPPTGenerator: + """销售方案PPT批量生成器""" + + def __init__(self, template_path): + self.template_path = template_path + + def generate_for_client(self, client_name, data): + """为单个客户生成PPT""" + prs = Presentation(self.template_path) + + # 替换标题页 + self.replace_title(prs, f'【{client_name}】专属解决方案') + + # 添加客户专属数据页 + self.add_client_data_page(prs, client_name, data) + + # 添加方案详情页 + self.add_solution_pages(prs, data) + + # 添加报价页 + self.add_pricing_page(prs, data) + + # 保存 + output_path = f'D:/PPT输出/{client_name}_方案.pptx' + prs.save(output_path) + return output_path + + def replace_title(self, prs, new_title): + """替换标题""" + slide = prs.slides[0] + for shape in slide.shapes: + if shape.has_text_frame: + for para in shape.text_frame.paragraphs: + for run in para.runs: + if '标题' in run.text or 'Solution' in run.text: + run.text = new_title + + def add_client_data_page(self, prs, client_name, data): + """添加客户数据页""" + slide = prs.slides.add_slide(prs.slide_layouts[5]) + slide.shapes.title.text = f'{client_name} - 当前状况' + + # 添加文本框 + left, top, width, height = Inches(1), Inches(2), Inches(8), Inches(4) + text_box = slide.shapes.add_textbox(left, top, width, height) + tf = text_box.text_frame + tf.word_wrap = True + + # 填充数据 + lines = [ + f'客户名称: {client_name}', + f'行业: {data.get("行业", "未知")}', + f'规模: {data.get("规模", "未知")}', + f'当前痛点: {data.get("痛点", "未知")}', + f'预算: {data.get("预算", "面议")}', + ] + for line in lines: + p = tf.add_paragraph() + p.text = line + p.font.size = Pt(18) + + def add_solution_pages(self, prs, data): + """添加方案页""" + for i, solution in enumerate(data.get('方案', []), 1): + slide = prs.slides.add_slide(prs.slide_layouts[1]) + slide.shapes.title.text = f'方案 {i}: {solution["标题"]}' + + # 添加内容 + content = slide.placeholders[1] + content.text = solution['描述'] + + def add_pricing_page(self, prs, data): + """添加报价页""" + slide = prs.slides.add_slide(prs.slide_layouts[5]) + slide.shapes.title.text = '投资预算' + + # 创建表格 + rows = len(data.get('报价', [])) + 1 + cols = 3 + left, top, width, height = Inches(1), Inches(2), Inches(8), Inches(3) + table = slide.shapes.add_table(rows, cols, left, top, width, height).table + + # 表头 + table.cell(0, 0).text = '项目' + table.cell(0, 1).text = '数量' + table.cell(0, 2).text = '金额' + + # 填充数据 + for i, item in enumerate(data.get('报价', []), 1): + table.cell(i, 0).text = item['项目'] + table.cell(i, 1).text = str(item['数量']) + table.cell(i, 2).text = f"¥{item['金额']:,}" + + def batch_generate(self, client_list_file): + """批量生成""" + df = pd.read_excel(client_list_file) + generated = [] + + for _, row in df.iterrows(): + client_name = row['客户名称'] + data = row.to_dict() + + print(f"生成 {client_name} 方案...") + output = self.generate_for_client(client_name, data) + generated.append(output) + + # 自动转换为PDF + python-office.ppt2pdf( + file_path=output, + output_path=str(Path(output).parent) + ) + + print(f"✅ 共生成 {len(generated)} 个PPT") + return generated + +# 使用 +generator = SalesPPTGenerator('D:/模板/销售方案模板.pptx') +generator.batch_generate('D:/客户清单.xlsx') +``` + +## 四、自动套用数据生成图表PPT + +```python +def generate_chart_ppt(data_file, output_path): + """自动生成带图表的PPT""" + import matplotlib.pyplot as plt + from pptx import Presentation + from pptx.util import Inches + + df = pd.read_excel(data_file) + prs = Presentation() + + # 标题页 + slide = prs.slides.add_slide(prs.slide_layouts[0]) + slide.shapes.title.text = '数据分析报告' + slide.placeholders[1].text = f'生成时间: {datetime.now().strftime("%Y-%m-%d")}' + + # 为每个指标生成图表页 + for column in df.select_dtypes(include='number').columns: + # 生成图表 + fig, ax = plt.subplots(figsize=(8, 5)) + df[column].plot(kind='bar', ax=ax, color='steelblue') + ax.set_title(f'{column} 分布') + ax.set_ylabel(column) + chart_path = f'/tmp/{column}.png' + plt.savefig(chart_path, dpi=150, bbox_inches='tight') + plt.close() + + # 添加到PPT + slide = prs.slides.add_slide(prs.slide_layouts[5]) + slide.shapes.title.text = f'{column} 数据' + slide.shapes.add_picture(chart_path, Inches(1), Inches(2), width=Inches(8)) + + prs.save(output_path) + return output_path +``` + +## 五、相关资源 + +- [python-office 官方文档](https://www.python-office.com) - Python自动化办公库 +- [AI + 自动化办公课程](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) - 35讲AI办公自动化实战 +- [销售报表自动化](./sales-report-automation.md) - 数据驱动PPT +- [Word 文档处理](../word/batch-processing.md) - 文档转PPT +- 📺 视频课程:[自动化办公50讲](https://www.python-office.com/course/50-python-office.html) +- 👥 技术交流:[加入读者群](https://www.python4office.cn/wechat-qrcode/) + +## 总结 + +通过Python自动化PPT生成: +- ⏰ 批量生成100份PPT从10小时缩短到30分钟 +- 🎨 风格统一、质量稳定 +- 📊 自动数据可视化 +- 📁 自动归档管理 + +PPT自动化是销售、市场、运营等岗位的"效率倍增器"。 \ No newline at end of file diff --git a/docs-pages/vuepress/tips/best-practices/sales-report-automation.md b/docs-pages/vuepress/tips/best-practices/sales-report-automation.md new file mode 100644 index 0000000..e624bed --- /dev/null +++ b/docs-pages/vuepress/tips/best-practices/sales-report-automation.md @@ -0,0 +1,355 @@ +# 销售报表自动化:让Python帮你每天自动生成销售日报 + +作为销售管理人员,每天最头疼的事情就是从各种系统导出数据、整理成日报。用Python自动化这个流程后,每天可以节省2小时,让数据说话而不是人工搬运数据。 + +## 一、场景分析 + +### 传统流程的问题 + +| 步骤 | 耗时 | 易错点 | +|------|------|--------| +| 从CRM导出数据 | 10分钟 | 漏单、日期错乱 | +| 从财务系统导出回款 | 15分钟 | 跨系统对账 | +| Excel手工汇总 | 30分钟 | 公式错误 | +| 生成可视化图表 | 20分钟 | 样式不统一 | +| 发送邮件给老板 | 10分钟 | 漏发、错发 | +| **总耗时** | **1.5小时** | 多处可能出错 | + +### 自动化后 + +- **耗时**:5分钟(自动执行) +- **准确率**:100%(无人工干预) +- **节省时间**:每天1.5小时 + +## 二、数据准备 + +### 数据源结构 + +```python +# 销售订单数据(orders.xlsx) +{ + '订单号': 'ORD20240101001', + '客户': '北京科技有限公司', + '销售员': '张三', + '产品': 'python-office企业版', + '金额': 50000, + '日期': '2024-01-01', + '状态': '已成交' +} +``` + +```python +# 回款数据(payments.xlsx) +{ + '订单号': 'ORD20240101001', + '回款金额': 50000, + '回款日期': '2024-01-15', + '付款方式': '银行转账' +} +``` + +## 三、完整自动化代码 + +### 安装依赖 + +```bash +pip install pandas openpyxl python-office +``` + +### 销售日报生成器 + +```python +import pandas as pd +import python-office +from datetime import datetime, timedelta +from pathlib import Path + +class SalesDailyReport: + """销售日报自动生成器""" + + def __init__(self, output_folder='D:/销售日报'): + self.output_folder = Path(output_folder) + self.output_folder.mkdir(parents=True, exist_ok=True) + self.today = datetime.now().strftime('%Y-%m-%d') + + def generate(self): + """生成今日销售日报""" + print(f"🚀 开始生成 {self.today} 销售日报...") + + # 1. 加载数据 + orders = self.load_orders() + payments = self.load_payments() + + # 2. 数据处理 + summary = self.process_data(orders, payments) + + # 3. 生成Excel报告 + excel_path = self.generate_excel_report(orders, summary) + + # 4. 生成图表 + chart_path = self.generate_charts(summary) + + # 5. 转换为PDF + pdf_path = self.convert_to_pdf(excel_path) + + # 6. 发送邮件 + self.send_email(pdf_path, summary) + + print(f"✅ 销售日报生成完成: {pdf_path}") + return summary + + def load_orders(self): + """加载订单数据""" + return pd.read_excel('D:/CRM/orders.xlsx') + + def load_payments(self): + """加载回款数据""" + return pd.read_excel('D:/财务/payments.xlsx') + + def process_data(self, orders, payments): + """处理数据,生成汇总""" + # 今日订单 + today_orders = orders[orders['日期'] == self.today] + + # 今日回款 + today_payments = payments[payments['回款日期'] == self.today] + + summary = { + '总订单数': len(today_orders), + '总销售额': today_orders['金额'].sum(), + '总回款额': today_payments['回款金额'].sum(), + '客户数': today_orders['客户'].nunique(), + '活跃销售员': today_orders['销售员'].nunique(), + '客单价': today_orders['金额'].mean() if len(today_orders) > 0 else 0, + '订单明细': today_orders, + '回款明细': today_payments, + } + + return summary + + def generate_excel_report(self, orders, summary): + """生成Excel报告""" + output_path = self.output_folder / f'销售日报_{self.today}.xlsx' + + with pd.ExcelWriter(output_path, engine='openpyxl') as writer: + # 1. 概览页 + overview = pd.DataFrame({ + '指标': ['总订单数', '总销售额', '总回款额', '客户数', '活跃销售员', '客单价'], + '数值': [ + summary['总订单数'], + f"¥{summary['总销售额']:,.2f}", + f"¥{summary['总回款额']:,.2f}", + summary['客户数'], + summary['活跃销售员'], + f"¥{summary['客单价']:,.2f}" + ] + }) + overview.to_excel(writer, sheet_name='今日概览', index=False) + + # 2. 销售员排行 + top_sales = orders.groupby('销售员').agg({ + '金额': 'sum', + '订单号': 'count', + '客户': 'nunique' + }).sort_values('金额', ascending=False) + top_sales.columns = ['销售额', '订单数', '客户数'] + top_sales.to_excel(writer, sheet_name='销售员排行') + + # 3. 客户排行 + top_customers = orders.groupby('客户').agg({ + '金额': 'sum', + '订单号': 'count' + }).sort_values('金额', ascending=False).head(10) + top_customers.columns = ['销售额', '订单数'] + top_customers.to_excel(writer, sheet_name='客户TOP10') + + # 4. 订单明细 + summary['订单明细'].to_excel(writer, sheet_name='订单明细', index=False) + + return output_path + + def generate_charts(self, summary): + """生成图表""" + import matplotlib.pyplot as plt + plt.rcParams['font.sans-serif'] = ['SimHei'] + plt.rcParams['axes.unicode_minus'] = False + + # 创建多子图 + fig, axes = plt.subplots(2, 2, figsize=(15, 10)) + + # 1. 销售员柱状图 + ax1 = axes[0, 0] + sales_data = summary['订单明细'].groupby('销售员')['金额'].sum().sort_values(ascending=True) + sales_data.plot(kind='barh', ax=ax1, color='steelblue') + ax1.set_title('销售员业绩') + ax1.set_xlabel('销售额') + + # 2. 产品占比饼图 + ax2 = axes[0, 1] + product_data = summary['订单明细'].groupby('产品')['金额'].sum() + ax2.pie(product_data.values, labels=product_data.index, autopct='%1.1f%%') + ax2.set_title('产品销售占比') + + # 3. 客户分布 + ax3 = axes[1, 0] + customer_data = summary['订单明细'].groupby('客户')['金额'].sum().sort_values(ascending=False).head(10) + customer_data.plot(kind='bar', ax=ax3, color='coral') + ax3.set_title('客户销售额TOP10') + ax3.set_ylabel('销售额') + ax3.tick_params(axis='x', rotation=45) + + # 4. KPI卡片 + ax4 = axes[1, 1] + ax4.axis('off') + kpi_text = f""" + 📊 今日关键指标 + + 订单数: {summary['总订单数']} + 销售额: ¥{summary['总销售额']:,.0f} + 回款额: ¥{summary['总回款额']:,.0f} + 客单价: ¥{summary['客单价']:,.0f} + 客户数: {summary['客户数']} + """ + ax4.text(0.5, 0.5, kpi_text, ha='center', va='center', + fontsize=14, transform=ax4.transAxes, + bbox=dict(boxstyle='round,pad=1', facecolor='lightblue', alpha=0.5)) + + plt.tight_layout() + chart_path = self.output_folder / f'销售图表_{self.today}.png' + plt.savefig(chart_path, dpi=150, bbox_inches='tight') + plt.close() + + return chart_path + + def convert_to_pdf(self, excel_path): + """Excel转PDF""" + pdf_path = excel_path.with_suffix('.pdf') + python-office.excel2pdf(str(excel_path), str(pdf_path.parent)) + return pdf_path + + def send_email(self, pdf_path, summary): + """发送邮件给老板""" + content = f""" + 老板好, + + {self.today} 销售日报已生成,请查收。 + + 📊 核心数据: + - 总订单数: {summary['总订单数']} + - 总销售额: ¥{summary['总销售额']:,.2f} + - 总回款额: ¥{summary['总回款额']:,.2f} + - 新增客户: {summary['客户数']} + - 活跃销售员: {summary['活跃销售员']} + + 详细数据请查看附件。 + + 自动化生成 by python-office + """ + + python-office.email.send( + smtp_server='smtp.qq.com', + smtp_port=465, + email='sales@company.com', + password='your_auth_code', + to_email='boss@company.com', + subject=f'【销售日报】{self.today}', + content=content, + attachment=str(pdf_path) + ) + +# 使用 +report = SalesDailyReport() +summary = report.generate() +``` + +## 四、定时任务调度 + +```python +import schedule +import time + +def daily_job(): + """每日任务""" + report = SalesDailyReport() + report.generate() + +# 设置每天 18:00 自动执行 +schedule.every().day.at("18:00").do(daily_job) + +print("✅ 销售日报定时任务已启动,每天18:00自动生成") +print("按 Ctrl+C 退出") + +while True: + schedule.run_pending() + time.sleep(60) +``` + +## 五、扩展功能 + +### 添加环比对比 + +```python +def add_comparison(self, summary): + """添加环比对比""" + yesterday = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') + yesterday_data = self.load_orders() + yesterday_orders = yesterday_data[yesterday_data['日期'] == yesterday] + + if len(yesterday_orders) > 0: + yesterday_sales = yesterday_orders['金额'].sum() + growth = (summary['总销售额'] - yesterday_sales) / yesterday_sales * 100 + summary['环比增长'] = f"{growth:+.1f}%" + else: + summary['环比增长'] = 'N/A' + + return summary +``` + +### 添加预测功能 + +```python +def predict_tomorrow(self): + """预测明日销售""" + from sklearn.linear_model import LinearRegression + import numpy as np + + # 加载历史数据 + history = pd.read_excel('D:/CRM/orders.xlsx') + history['日期'] = pd.to_datetime(history['日期']) + + # 按天聚合 + daily_sales = history.groupby('日期')['金额'].sum().reset_index() + daily_sales['day_num'] = range(len(daily_sales)) + + # 训练模型 + X = daily_sales[['day_num']].values + y = daily_sales['金额'].values + + model = LinearRegression() + model.fit(X, y) + + # 预测明日 + tomorrow = len(daily_sales) + predicted = model.predict([[tomorrow]])[0] + + return predicted +``` + +## 六、相关资源 + +- [python-office 官方文档](https://www.python-office.com) - Python自动化办公库 +- [AI + 自动化办公课程](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) - 35讲AI办公自动化实战 +- [Excel 批量合并技巧](../excel/batch-merge.md) - 数据汇总 +- [邮件自动化](../python/email-automation.md) - 自动发送 +- 📺 视频课程:[自动化办公50讲](https://www.python-office.com/course/50-python-office.html) +- 👥 读者交流:[加入微信群](https://www.python4office.cn/wechat-group/) + +## 总结 + +通过Python自动化销售日报流程,我们实现了: +- ⏰ 每天节省1.5小时 +- ✅ 数据准确率100% +- 📊 报表样式统一 +- 📧 邮件自动发送 + +**核心思路**:用 python-office 简化操作,用 pandas 处理数据,用 schedule 实现定时,用邮件实现推送。结合AI还能做智能分析和异常告警。 \ No newline at end of file diff --git a/docs-pages/vuepress/tips/best-practices/web-crawler-advanced.md b/docs-pages/vuepress/tips/best-practices/web-crawler-advanced.md new file mode 100644 index 0000000..45f4815 --- /dev/null +++ b/docs-pages/vuepress/tips/best-practices/web-crawler-advanced.md @@ -0,0 +1,409 @@ +# 网络爬虫实战:从零开始构建企业级数据采集系统 + +网络爬虫是Python的重要应用领域。本文从基础到实战,介绍如何用Python构建稳定、高效、合规的爬虫系统。 + +## 一、爬虫应用场景 + +| 场景 | 描述 | +|------|------| +| 电商价格监控 | 抓取竞品价格 | +| 新闻聚合 | 收集行业新闻 | +| 招聘数据 | 分析职位趋势 | +| 金融数据 | 股票、基金信息 | +| 学术研究 | 文献数据采集 | + +## 二、python-office 一行代码方案 + +```python +import python-office + +# 一键爬取网页数据 +python-office.crawler.simple( + url='https://example.com', + output='D:/爬取结果.json' +) + +# 一键爬取表格 +python-office.crawler.table( + url='https://example.com/data', + output='D:/数据.xlsx' +) +``` + +## 三、自定义爬虫实现 + +### 安装依赖 + +```bash +pip install requests beautifulsoup4 selenium pandas python-office +``` + +### 基础爬虫 + +```python +import requests +from bs4 import BeautifulSoup +import pandas as pd +import time +import random + +class BasicCrawler: + """基础爬虫""" + + def __init__(self, base_url, headers=None): + self.base_url = base_url + self.session = requests.Session() + self.session.headers.update(headers or { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' + }) + + def get(self, url, **kwargs): + """GET请求""" + response = self.session.get(url, timeout=10, **kwargs) + response.raise_for_status() + return response + + def parse(self, html, selector): + """解析HTML""" + soup = BeautifulSoup(html, 'lxml') + return soup.select(selector) + + def crawl(self, urls, parse_func): + """批量爬取""" + results = [] + for i, url in enumerate(urls, 1): + try: + response = self.get(url) + data = parse_func(response.text) + results.append(data) + print(f"✓ [{i}/{len(urls)}] {url}") + except Exception as e: + print(f"✗ {url}: {e}") + + # 礼貌爬取 + time.sleep(random.uniform(1, 3)) + + return results + +# 使用 +crawler = BasicCrawler('https://news.example.com') + +def parse_news(html): + soup = BeautifulSoup(html, 'lxml') + articles = [] + for item in soup.select('.news-item'): + title = item.select_one('.title').text.strip() + link = item.select_one('a')['href'] + date = item.select_one('.date').text.strip() + articles.append({'title': title, 'link': link, 'date': date}) + return articles + +urls = [f'https://news.example.com/page/{i}' for i in range(1, 11)] +results = crawler.crawl(urls, parse_news) +``` + +## 四、电商价格监控 + +```python +class PriceMonitor: + """电商价格监控""" + + def __init__(self): + self.crawler = BasicCrawler('https://shop.example.com') + + def get_product_info(self, product_url): + """获取商品信息""" + response = self.crawler.get(product_url) + soup = BeautifulSoup(response.text, 'lxml') + + info = { + '商品名': self.extract_text(soup, '.product-name'), + '价格': self.extract_price(soup, '.product-price'), + '库存': self.extract_text(soup, '.stock'), + '评分': self.extract_text(soup, '.rating'), + 'URL': product_url, + } + return info + + def extract_text(self, soup, selector): + el = soup.select_one(selector) + return el.text.strip() if el else '' + + def extract_price(self, soup, selector): + text = self.extract_text(soup, selector) + import re + m = re.search(r'(\d+\.?\d*)', text) + return float(m.group(1)) if m else 0.0 + + def monitor_competitor_prices(self, product_urls): + """监控竞品价格""" + results = [] + for url in product_urls: + info = self.get_product_info(url) + results.append(info) + time.sleep(2) + + df = pd.DataFrame(results) + return df + + def price_change_alert(self, current_prices, threshold=0.1): + """价格变动提醒""" + alerts = [] + for item in current_prices: + old_price = self.get_historical_price(item['URL']) + if old_price and abs(item['价格'] - old_price) / old_price > threshold: + change = (item['价格'] - old_price) / old_price * 100 + alerts.append({ + '商品': item['商品名'], + '旧价': old_price, + '新价': item['价格'], + '变动': f'{change:+.1f}%', + 'URL': item['URL'] + }) + return alerts + +# 使用 +monitor = PriceMonitor() +products = [...] # 商品URL列表 +prices = monitor.monitor_competitor_prices(products) +prices.to_excel('D:/价格监控.xlsx', index=False) +``` + +## 五、动态页面爬取(Selenium) + +```python +from selenium import webdriver +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.chrome.options import Options + +class SeleniumCrawler: + """Selenium动态爬虫""" + + def __init__(self, headless=True): + options = Options() + if headless: + options.add_argument('--headless') + options.add_argument('--disable-gpu') + options.add_argument('--no-sandbox') + + self.driver = webdriver.Chrome(options=options) + self.wait = WebDriverWait(self.driver, 10) + + def get(self, url): + """访问URL""" + self.driver.get(url) + + def wait_for_element(self, selector): + """等待元素加载""" + return self.wait.until( + EC.presence_of_element_located((By.CSS_SELECTOR, selector)) + ) + + def click(self, selector): + """点击元素""" + element = self.wait.until( + EC.element_to_be_clickable((By.CSS_SELECTOR, selector)) + ) + element.click() + + def scroll_to_bottom(self): + """滚动到页面底部""" + last_height = self.driver.execute_script("return document.body.scrollHeight") + while True: + self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") + time.sleep(2) + new_height = self.driver.execute_script("return document.body.scrollHeight") + if new_height == last_height: + break + last_height = new_height + + def extract_data(self, container_selector, fields): + """提取数据""" + items = [] + for container in self.driver.find_elements(By.CSS_SELECTOR, container_selector): + item = {} + for field_name, selector in fields.items(): + try: + el = container.find_element(By.CSS_SELECTOR, selector) + item[field_name] = el.text.strip() + except: + item[field_name] = '' + items.append(item) + return items + + def close(self): + self.driver.quit() + +# 使用 +crawler = SeleniumCrawler(headless=True) +crawler.get('https://dynamic-website.com') + +# 等待加载 +crawler.wait_for_element('.item-list') + +# 滚动加载所有内容 +crawler.scroll_to_bottom() + +# 提取数据 +fields = { + 'title': '.title', + 'price': '.price', + 'rating': '.rating', +} +data = crawler.extract_data('.item', fields) + +df = pd.DataFrame(data) +df.to_excel('D:/爬取结果.xlsx', index=False) +crawler.close() +``` + +## 六、爬虫最佳实践 + +### 1. 礼貌爬取 + +```python +class PoliteCrawler: + """礼貌爬虫""" + + def __init__(self, delay_range=(1, 3), max_per_minute=20): + self.delay_range = delay_range + self.max_per_minute = max_per_minute + self.request_times = [] + + def wait_if_needed(self): + """根据频率限制等待""" + now = time.time() + # 清理1分钟前的记录 + self.request_times = [t for t in self.request_times if now - t < 60] + + if len(self.request_times) >= self.max_per_minute: + sleep_time = 60 - (now - self.request_times[0]) + print(f"达到频率限制,等待 {sleep_time:.1f} 秒") + time.sleep(sleep_time) + + # 随机延迟 + delay = random.uniform(*self.delay_range) + time.sleep(delay) + self.request_times.append(time.time()) + +# 使用 +polite = PoliteCrawler(delay_range=(2, 5), max_per_minute=15) +``` + +### 2. 反爬策略 + +```python +class AntiBlockCrawler: + """反爬策略""" + + USER_AGENTS = [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36', + ] + + def __init__(self): + self.session = requests.Session() + + def get_random_headers(self): + return { + 'User-Agent': random.choice(self.USER_AGENTS), + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'Accept-Encoding': 'gzip, deflate, br', + 'Connection': 'keep-alive', + 'Upgrade-Insecure-Requests': '1', + } + + def smart_request(self, url, max_retries=3): + """智能请求(带重试)""" + for i in range(max_retries): + try: + self.session.headers.update(self.get_random_headers()) + response = self.session.get(url, timeout=15) + response.raise_for_status() + return response + except requests.exceptions.RequestException as e: + if i < max_retries - 1: + wait = 2 ** i # 指数退避 + print(f"请求失败,{wait}秒后重试: {e}") + time.sleep(wait) + else: + raise +``` + +### 3. 数据存储 + +```python +import json +import sqlite3 +from sqlalchemy import create_engine + +class DataStorage: + """数据存储""" + + @staticmethod + def to_excel(data, output_path): + df = pd.DataFrame(data) + df.to_excel(output_path, index=False) + + @staticmethod + def to_json(data, output_path): + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + @staticmethod + def to_sqlite(data, db_path, table_name): + df = pd.DataFrame(data) + engine = create_engine(f'sqlite:///{db_path}') + df.to_sql(table_name, engine, if_exists='append', index=False) + + @staticmethod + def to_mysql(data, host, user, password, database, table): + df = pd.DataFrame(data) + engine = create_engine(f'mysql+pymysql://{user}:{password}@{host}/{database}') + df.to_sql(table, engine, if_exists='append', index=False) +``` + +## 七、合规与法律 + +### 爬虫注意事项 + +1. **遵守robots.txt**:尊重网站的爬虫协议 +2. **限制频率**:不要给服务器造成压力 +3. **保护隐私**:不爬取和存储个人隐私 +4. **数据用途**:仅用于合法用途 +5. **尊重版权**:注明数据来源 + +```python +import urllib.robotparser + +def check_robots_txt(url): + """检查robots.txt""" + rp = urllib.robotparser.RobotFileParser() + rp.set_url(url + '/robots.txt') + rp.read() + return rp.can_fetch('*', url) +``` + +## 八、相关资源 + +- [python-office 官方文档](https://www.python-office.com) - Python自动化办公库 +- [AI + 自动化办公课程](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) - 35讲AI办公自动化实战 +- [数据可视化进阶](./data-visualization-advanced.md) - 爬取数据后可视化 +- [定时任务调度](./workflow-automation.md) - 定时爬取 +- 📺 视频课程:[网络爬虫100讲](https://www.bilibili.com/video/BV1y54y1y74F) +- 👥 技术交流:[加入读者群](https://www.python4office.cn/wechat-qrcode/) + +## 总结 + +Python爬虫的核心技能: +- 🕷️ requests + BeautifulSoup 适合静态页面 +- 🌐 Selenium 处理动态页面 +- 🤖 礼貌爬取、频率控制 +- 💾 多样化数据存储 +- ⚖️ 遵守法律和robots.txt + +**核心思路**:先分析网站结构,再选择合适工具,最后用pandas处理数据。结合 AI 做数据分析和决策,能创造更大价值。 \ No newline at end of file diff --git a/docs-pages/vuepress/tips/best-practices/wechat-bot.md b/docs-pages/vuepress/tips/best-practices/wechat-bot.md new file mode 100644 index 0000000..171ae94 --- /dev/null +++ b/docs-pages/vuepress/tips/best-practices/wechat-bot.md @@ -0,0 +1,287 @@ +# 微信机器人自动化:用Python实现微信消息自动回复 + +微信已经成为工作和生活的重要沟通工具。本文介绍如何用Python实现微信机器人,让它帮你自动处理消息、群管理、信息收集等任务。 + +## 一、场景应用 + +| 场景 | 描述 | +|------|------| +| 自动回复 | 关键词触发回复 | +| 群管理 | 自动踢人、欢迎新人 | +| 信息收集 | 收集群成员信息 | +| 定时提醒 | 会议、日程提醒 | +| 文件备份 | 自动下载重要文件 | + +## 二、python-office 一行代码方案 + +```python +import python-office + +# 一行代码发送消息 +python-office.wechat.send( + who='文件传输助手', + msg='Hello from Python!' +) + +# 一键群发 +python-office.wechat.group_send( + group='技术交流群', + msg='今日技术分享...' +) +``` + +## 三、自定义微信机器人 + +### 安装依赖 + +```bash +pip install itchat python-office +``` + +### 基础机器人 + +```python +import itchat +from itchat.content import TEXT, PICTURE, FILE + +@itchat.msg_register(TEXT) +def text_reply(msg): + """文本自动回复""" + text = msg['Text'] + user = msg['FromUserName'] + + # 关键词回复 + if '你好' in text or 'hello' in text.lower(): + return '你好!我是python-office机器人,有什么可以帮你?' + + if 'python' in text.lower(): + return '推荐学习 python-office 库,一行代码实现自动化办公!' + + if '课程' in text: + return 'AI+自动化办公35讲,原价499现价299,详情查看:https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA' + + # 默认回复 + return f'收到你的消息:{text}' + +@itchat.msg_register(PICTURE) +def image_reply(msg): + """图片自动保存""" + msg['Text'](msg['FileName']) # 下载图片 + return '图片已保存!' + +# 登录 +itchat.auto_login(hotReload=True) +itchat.run() +``` + +### 群管理机器人 + +```python +import itchat +from itchat.content import TEXT, NOTE +import time + +class GroupBot: + """群管理机器人""" + + def __init__(self): + self.welcome_msg = """欢迎 @{nickname} 加入群聊! + + 群规: + 1. 禁止广告 + 2. 禁止人身攻击 + 3. 鼓励技术交流 + + 群精华:https://www.python-office.com + """ + + def welcome_new_member(self, msg): + """欢迎新成员""" + if msg['Status'] == 4: # 入群 + welcome_text = self.welcome_msg.format(nickname=msg['ActualNickName']) + itchat.send_msg(welcome_text, msg['FromUserName']) + + def detect_ad(self, msg): + """检测广告""" + ad_keywords = ['加微信', '代理', '兼职', '刷单', 'http', '点击'] + for kw in ad_keywords: + if kw in msg['Text']: + return True + return False + + def auto_remove_ads(self, msg): + """自动处理广告""" + if self.detect_ad(msg): + # 撤回消息 + itchat.delete_member(msg['FromUserName'], msg['ActualUserName']) + return '检测到广告,已处理' + +# 注册 +@itchat.msg_register(TEXT, isGroupChat=True) +def group_text(msg): + bot = GroupBot() + + # 处理广告 + result = bot.auto_remove_ads(msg) + if result: + return result + + return None + +@itchat.msg_register(NOTE, isGroupChat=True) +def group_note(msg): + bot = GroupBot() + bot.welcome_new_member(msg) + +itchat.auto_login(hotReload=True) +itchat.run() +``` + +## 四、定时任务机器人 + +```python +import schedule +import time +import itchat + +class ScheduledBot: + """定时任务机器人""" + + def __init__(self, target_group): + self.target_group = target_group + + def daily_news(self): + """每日新闻推送""" + # 实际应用中接入新闻API + news = """ + 📰 今日早报 + + 1. Python 3.13 正式发布 + 2. AI办公自动化成为新趋势 + 3. 程序员晚枫新课上线 + + 详情查看:https://www.python-office.com + """ + itchat.send_msg(news, self.target_group) + + def weather_reminder(self): + """天气提醒""" + weather = """ + 🌤️ 今日天气 + + 北京: 晴 25°C + 上海: 多云 28°C + 广州: 雷阵雨 30°C + + 出行记得带伞! + """ + itchat.send_msg(weather, self.target_group) + + def lunch_reminder(self): + """午餐提醒""" + itchat.send_msg('🍱 午饭时间到啦!记得按时吃饭~', self.target_group) + + def off_work_reminder(self): + """下班提醒""" + msg = """ + 🌆 下班时间到! + + 今日工作汇报: + 1. 完成代码review + 2. 修复了3个bug + 3. 发布了新版本 + + 明日计划: + 1. 开发新功能 + 2. 性能优化 + + 加油!💪 + """ + itchat.send_msg(msg, self.target_group) + + def start(self): + """启动定时任务""" + schedule.every().day.at('08:00').do(self.daily_news) + schedule.every().day.at('07:30').do(self.weather_reminder) + schedule.every().day.at('12:00').do(self.lunch_reminder) + schedule.every().day.at('18:00').do(self.off_work_reminder) + + # 登录 + itchat.auto_login(hotReload=True) + + # 主循环 + while True: + schedule.run_pending() + itchat.run(blockThread=False) # 不阻塞 + time.sleep(60) + +# 使用 +bot = ScheduledBot('技术交流群') +bot.start() +``` + +## 五、信息收集机器人 + +```python +class InfoCollector: + """信息收集机器人""" + + def __init__(self, source_group, target_user): + self.source_group = source_group + self.target_user = target_user + self.collected = [] + + def collect_resume(self, msg): + """收集群里的简历信息""" + keywords = ['简历', '求职', '面试', '招聘'] + if any(kw in msg['Text'] for kw in keywords): + self.collected.append({ + 'time': time.time(), + 'sender': msg['ActualNickName'], + 'content': msg['Text'], + }) + + def send_daily_report(self): + """发送每日收集报告""" + if not self.collected: + return + + report = f"📊 今日收集 {len(self.collected)} 条信息\n\n" + for item in self.collected: + report += f"【{item['sender']}】\n{item['content']}\n\n" + + itchat.send_msg(report, self.target_user) + self.collected.clear() + +# 注册到群消息 +@itchat.msg_register(TEXT, isGroupChat=True) +def collect_info(msg): + collector.collect_resume(msg) + +# 定时发送 +schedule.every().day.at('20:00').do(collector.send_daily_report) +``` + +## 六、相关资源 + +- [python-office 官方文档](https://www.python-office.com) - Python自动化办公库 +- [AI + 自动化办公课程](https://mp.weixin.qq.com/s/Z3WhrmYeavrCw_FOXgiDPA) - 35讲AI办公自动化实战 +- [邮件自动化](../python/email-automation.md) - 邮件机器人 +- [定时任务调度](./workflow-automation.md) - 任务自动化 +- 📺 视频课程:[微信机器人10讲](https://www.python-office.com/course-002/10-PyOfficeRobot/10-PyOfficeRobot.html) +- 👥 技术交流:[加入讨论](https://www.python4office.cn/wechat-qrcode/) + +## 总结 + +通过Python微信机器人: +- 💬 自动回复消息 +- 👥 智能群管理 +- ⏰ 定时任务提醒 +- 📊 信息自动收集 + +**注意事项**: +- 个人微信有封号风险,建议使用企业微信 +- 遵守微信使用规范 +- 不要频繁发送消息 + +**核心思路**:用 itchat 监听消息,用关键词匹配触发回复,用 schedule 实现定时。打造属于你的私人微信助理。 \ No newline at end of file diff --git a/docs-pages/vuepress/video/video.md b/docs-pages/vuepress/video/video.md index 3bb2bee..45d8344 100644 --- a/docs-pages/vuepress/video/video.md +++ b/docs-pages/vuepress/video/video.md @@ -34,11 +34,11 @@ sidebar: auto

🔥 30讲 · AI编程训练营

给小白的AI编程入门课程,从零开始掌握AI编程技巧

- 立即学习 → + 立即学习 →

- +